{ "best_global_step": null, "best_metric": null, "best_model_checkpoint": null, "epoch": 0.17021593487112793, "eval_steps": 0, "global_step": 96000, "is_hyper_param_search": false, "is_local_process_zero": true, "is_world_process_zero": true, "log_history": [ { "epoch": 1.7730826549075824e-06, "grad_norm": 25.625, "learning_rate": 0.0, "loss": 3.8026, "step": 1 }, { "epoch": 3.546165309815165e-06, "grad_norm": 26.5, "learning_rate": 2e-06, "loss": 4.5357, "step": 2 }, { "epoch": 7.09233061963033e-06, "grad_norm": 21.875, "learning_rate": 6e-06, "loss": 4.3725, "step": 4 }, { "epoch": 1.0638495929445495e-05, "grad_norm": 23.5, "learning_rate": 1e-05, "loss": 4.4881, "step": 6 }, { "epoch": 1.418466123926066e-05, "grad_norm": 18.875, "learning_rate": 1.4e-05, "loss": 4.6479, "step": 8 }, { "epoch": 1.7730826549075824e-05, "grad_norm": 18.375, "learning_rate": 1.8e-05, "loss": 4.6278, "step": 10 }, { "epoch": 2.127699185889099e-05, "grad_norm": 17.75, "learning_rate": 2.2e-05, "loss": 4.3834, "step": 12 }, { "epoch": 2.4823157168706156e-05, "grad_norm": 26.75, "learning_rate": 2.6e-05, "loss": 4.3062, "step": 14 }, { "epoch": 2.836932247852132e-05, "grad_norm": 19.625, "learning_rate": 3e-05, "loss": 4.1917, "step": 16 }, { "epoch": 3.1915487788336485e-05, "grad_norm": 20.625, "learning_rate": 3.4000000000000007e-05, "loss": 3.8393, "step": 18 }, { "epoch": 3.546165309815165e-05, "grad_norm": 16.75, "learning_rate": 3.8e-05, "loss": 4.0825, "step": 20 }, { "epoch": 3.900781840796682e-05, "grad_norm": 16.375, "learning_rate": 4.2000000000000004e-05, "loss": 3.4053, "step": 22 }, { "epoch": 4.255398371778198e-05, "grad_norm": 22.75, "learning_rate": 4.6e-05, "loss": 4.1534, "step": 24 }, { "epoch": 4.610014902759714e-05, "grad_norm": 11.5, "learning_rate": 5e-05, "loss": 3.4799, "step": 26 }, { "epoch": 4.964631433741231e-05, "grad_norm": 10.75, "learning_rate": 5.4e-05, "loss": 2.9566, "step": 28 }, { "epoch": 5.3192479647227476e-05, "grad_norm": 9.6875, "learning_rate": 5.800000000000001e-05, "loss": 3.3893, "step": 30 }, { "epoch": 5.673864495704264e-05, "grad_norm": 14.0, "learning_rate": 6.2e-05, "loss": 3.7127, "step": 32 }, { "epoch": 6.028481026685781e-05, "grad_norm": 7.21875, "learning_rate": 6.6e-05, "loss": 2.708, "step": 34 }, { "epoch": 6.383097557667297e-05, "grad_norm": 7.75, "learning_rate": 7.000000000000001e-05, "loss": 3.3579, "step": 36 }, { "epoch": 6.737714088648813e-05, "grad_norm": 5.8125, "learning_rate": 7.4e-05, "loss": 2.5665, "step": 38 }, { "epoch": 7.09233061963033e-05, "grad_norm": 7.59375, "learning_rate": 7.8e-05, "loss": 2.7873, "step": 40 }, { "epoch": 7.446947150611846e-05, "grad_norm": 6.78125, "learning_rate": 8.2e-05, "loss": 2.4843, "step": 42 }, { "epoch": 7.801563681593364e-05, "grad_norm": 6.28125, "learning_rate": 8.599999999999999e-05, "loss": 3.0413, "step": 44 }, { "epoch": 8.15618021257488e-05, "grad_norm": 4.46875, "learning_rate": 8.999999999999999e-05, "loss": 2.2008, "step": 46 }, { "epoch": 8.510796743556396e-05, "grad_norm": 4.5, "learning_rate": 9.400000000000001e-05, "loss": 2.182, "step": 48 }, { "epoch": 8.865413274537912e-05, "grad_norm": 3.421875, "learning_rate": 9.800000000000001e-05, "loss": 2.0904, "step": 50 }, { "epoch": 9.220029805519429e-05, "grad_norm": 4.84375, "learning_rate": 0.000102, "loss": 2.1894, "step": 52 }, { "epoch": 9.574646336500945e-05, "grad_norm": 4.8125, "learning_rate": 0.000106, "loss": 2.4153, "step": 54 }, { "epoch": 9.929262867482463e-05, "grad_norm": 3.75, "learning_rate": 0.00011, "loss": 2.2583, "step": 56 }, { "epoch": 0.00010283879398463979, "grad_norm": 3.0625, "learning_rate": 0.000114, "loss": 2.08, "step": 58 }, { "epoch": 0.00010638495929445495, "grad_norm": 3.125, "learning_rate": 0.000118, "loss": 2.6455, "step": 60 }, { "epoch": 0.00010993112460427011, "grad_norm": 5.59375, "learning_rate": 0.000122, "loss": 2.1495, "step": 62 }, { "epoch": 0.00011347728991408528, "grad_norm": 4.46875, "learning_rate": 0.000126, "loss": 1.957, "step": 64 }, { "epoch": 0.00011702345522390044, "grad_norm": 4.53125, "learning_rate": 0.00013000000000000002, "loss": 2.142, "step": 66 }, { "epoch": 0.00012056962053371562, "grad_norm": 4.40625, "learning_rate": 0.000134, "loss": 2.088, "step": 68 }, { "epoch": 0.00012411578584353076, "grad_norm": 5.34375, "learning_rate": 0.00013800000000000002, "loss": 2.3496, "step": 70 }, { "epoch": 0.00012766195115334594, "grad_norm": 5.46875, "learning_rate": 0.00014199999999999998, "loss": 2.3067, "step": 72 }, { "epoch": 0.00013120811646316112, "grad_norm": 3.265625, "learning_rate": 0.000146, "loss": 1.7205, "step": 74 }, { "epoch": 0.00013475428177297627, "grad_norm": 3.53125, "learning_rate": 0.00015, "loss": 1.9972, "step": 76 }, { "epoch": 0.00013830044708279144, "grad_norm": 3.53125, "learning_rate": 0.000154, "loss": 1.8041, "step": 78 }, { "epoch": 0.0001418466123926066, "grad_norm": 2.65625, "learning_rate": 0.000158, "loss": 2.2117, "step": 80 }, { "epoch": 0.00014539277770242177, "grad_norm": 4.4375, "learning_rate": 0.000162, "loss": 2.1783, "step": 82 }, { "epoch": 0.00014893894301223692, "grad_norm": 2.140625, "learning_rate": 0.00016600000000000002, "loss": 1.9516, "step": 84 }, { "epoch": 0.0001524851083220521, "grad_norm": 4.3125, "learning_rate": 0.00017, "loss": 2.0596, "step": 86 }, { "epoch": 0.00015603127363186727, "grad_norm": 2.484375, "learning_rate": 0.000174, "loss": 1.9511, "step": 88 }, { "epoch": 0.00015957743894168242, "grad_norm": 2.171875, "learning_rate": 0.000178, "loss": 1.9226, "step": 90 }, { "epoch": 0.0001631236042514976, "grad_norm": 6.15625, "learning_rate": 0.000182, "loss": 2.1279, "step": 92 }, { "epoch": 0.00016666976956131275, "grad_norm": 2.109375, "learning_rate": 0.000186, "loss": 1.6153, "step": 94 }, { "epoch": 0.00017021593487112792, "grad_norm": 3.265625, "learning_rate": 0.00019, "loss": 2.2568, "step": 96 }, { "epoch": 0.0001737621001809431, "grad_norm": 3.0625, "learning_rate": 0.000194, "loss": 1.8394, "step": 98 }, { "epoch": 0.00017730826549075825, "grad_norm": 4.0, "learning_rate": 0.00019800000000000002, "loss": 1.9269, "step": 100 }, { "epoch": 0.00018085443080057342, "grad_norm": 2.328125, "learning_rate": 0.000202, "loss": 1.8469, "step": 102 }, { "epoch": 0.00018440059611038857, "grad_norm": 3.6875, "learning_rate": 0.000206, "loss": 2.1011, "step": 104 }, { "epoch": 0.00018794676142020375, "grad_norm": 8.375, "learning_rate": 0.00021, "loss": 1.9144, "step": 106 }, { "epoch": 0.0001914929267300189, "grad_norm": 2.734375, "learning_rate": 0.000214, "loss": 1.855, "step": 108 }, { "epoch": 0.00019503909203983407, "grad_norm": 1.453125, "learning_rate": 0.000218, "loss": 1.5246, "step": 110 }, { "epoch": 0.00019858525734964925, "grad_norm": 2.71875, "learning_rate": 0.000222, "loss": 1.6882, "step": 112 }, { "epoch": 0.0002021314226594644, "grad_norm": 2.125, "learning_rate": 0.00022600000000000002, "loss": 1.9064, "step": 114 }, { "epoch": 0.00020567758796927958, "grad_norm": 3.53125, "learning_rate": 0.00023, "loss": 1.932, "step": 116 }, { "epoch": 0.00020922375327909473, "grad_norm": 5.28125, "learning_rate": 0.00023400000000000002, "loss": 1.8807, "step": 118 }, { "epoch": 0.0002127699185889099, "grad_norm": 3.015625, "learning_rate": 0.00023799999999999998, "loss": 1.8285, "step": 120 }, { "epoch": 0.00021631608389872508, "grad_norm": 3.0, "learning_rate": 0.000242, "loss": 1.7112, "step": 122 }, { "epoch": 0.00021986224920854023, "grad_norm": 3.390625, "learning_rate": 0.000246, "loss": 1.8058, "step": 124 }, { "epoch": 0.0002234084145183554, "grad_norm": 1.875, "learning_rate": 0.00025, "loss": 1.7402, "step": 126 }, { "epoch": 0.00022695457982817055, "grad_norm": 1.953125, "learning_rate": 0.000254, "loss": 1.7423, "step": 128 }, { "epoch": 0.00023050074513798573, "grad_norm": 1.671875, "learning_rate": 0.00025800000000000004, "loss": 1.9259, "step": 130 }, { "epoch": 0.00023404691044780088, "grad_norm": 1.6796875, "learning_rate": 0.000262, "loss": 1.9277, "step": 132 }, { "epoch": 0.00023759307575761606, "grad_norm": 2.125, "learning_rate": 0.000266, "loss": 2.1548, "step": 134 }, { "epoch": 0.00024113924106743123, "grad_norm": 3.796875, "learning_rate": 0.00027, "loss": 2.0847, "step": 136 }, { "epoch": 0.0002446854063772464, "grad_norm": 3.546875, "learning_rate": 0.00027400000000000005, "loss": 1.9213, "step": 138 }, { "epoch": 0.00024823157168706153, "grad_norm": 1.6796875, "learning_rate": 0.00027800000000000004, "loss": 1.9205, "step": 140 }, { "epoch": 0.00025177773699687673, "grad_norm": 1.765625, "learning_rate": 0.00028199999999999997, "loss": 1.9867, "step": 142 }, { "epoch": 0.0002553239023066919, "grad_norm": 0.8203125, "learning_rate": 0.00028599999999999996, "loss": 1.5982, "step": 144 }, { "epoch": 0.00025887006761650703, "grad_norm": 2.5625, "learning_rate": 0.00029, "loss": 1.7819, "step": 146 }, { "epoch": 0.00026241623292632224, "grad_norm": 1.5234375, "learning_rate": 0.000294, "loss": 1.8248, "step": 148 }, { "epoch": 0.0002659623982361374, "grad_norm": 2.671875, "learning_rate": 0.000298, "loss": 1.8077, "step": 150 }, { "epoch": 0.00026950856354595253, "grad_norm": 1.140625, "learning_rate": 0.000302, "loss": 1.6061, "step": 152 }, { "epoch": 0.0002730547288557677, "grad_norm": 1.5078125, "learning_rate": 0.000306, "loss": 1.865, "step": 154 }, { "epoch": 0.0002766008941655829, "grad_norm": 1.4765625, "learning_rate": 0.00031, "loss": 1.7381, "step": 156 }, { "epoch": 0.00028014705947539804, "grad_norm": 1.109375, "learning_rate": 0.000314, "loss": 1.5453, "step": 158 }, { "epoch": 0.0002836932247852132, "grad_norm": 2.171875, "learning_rate": 0.00031800000000000003, "loss": 1.7776, "step": 160 }, { "epoch": 0.0002872393900950284, "grad_norm": 1.7109375, "learning_rate": 0.000322, "loss": 1.5954, "step": 162 }, { "epoch": 0.00029078555540484354, "grad_norm": 3.59375, "learning_rate": 0.000326, "loss": 1.7511, "step": 164 }, { "epoch": 0.0002943317207146587, "grad_norm": 1.4453125, "learning_rate": 0.00033, "loss": 1.5288, "step": 166 }, { "epoch": 0.00029787788602447384, "grad_norm": 1.40625, "learning_rate": 0.00033400000000000004, "loss": 1.5858, "step": 168 }, { "epoch": 0.00030142405133428904, "grad_norm": 2.015625, "learning_rate": 0.00033800000000000003, "loss": 1.4616, "step": 170 }, { "epoch": 0.0003049702166441042, "grad_norm": 3.53125, "learning_rate": 0.000342, "loss": 1.8694, "step": 172 }, { "epoch": 0.00030851638195391934, "grad_norm": 2.75, "learning_rate": 0.000346, "loss": 1.6349, "step": 174 }, { "epoch": 0.00031206254726373454, "grad_norm": 1.328125, "learning_rate": 0.00035, "loss": 1.5103, "step": 176 }, { "epoch": 0.0003156087125735497, "grad_norm": 1.78125, "learning_rate": 0.000354, "loss": 1.5582, "step": 178 }, { "epoch": 0.00031915487788336484, "grad_norm": 1.015625, "learning_rate": 0.000358, "loss": 1.7065, "step": 180 }, { "epoch": 0.00032270104319318, "grad_norm": 0.890625, "learning_rate": 0.000362, "loss": 1.5301, "step": 182 }, { "epoch": 0.0003262472085029952, "grad_norm": 1.828125, "learning_rate": 0.000366, "loss": 1.5613, "step": 184 }, { "epoch": 0.00032979337381281034, "grad_norm": 3.828125, "learning_rate": 0.00037, "loss": 1.9838, "step": 186 }, { "epoch": 0.0003333395391226255, "grad_norm": 4.875, "learning_rate": 0.000374, "loss": 2.3287, "step": 188 }, { "epoch": 0.0003368857044324407, "grad_norm": 0.9375, "learning_rate": 0.000378, "loss": 1.7776, "step": 190 }, { "epoch": 0.00034043186974225584, "grad_norm": 1.6484375, "learning_rate": 0.000382, "loss": 1.6625, "step": 192 }, { "epoch": 0.000343978035052071, "grad_norm": 0.64453125, "learning_rate": 0.000386, "loss": 1.398, "step": 194 }, { "epoch": 0.0003475242003618862, "grad_norm": 1.8125, "learning_rate": 0.00039000000000000005, "loss": 1.7848, "step": 196 }, { "epoch": 0.00035107036567170135, "grad_norm": 1.390625, "learning_rate": 0.00039400000000000004, "loss": 1.8591, "step": 198 }, { "epoch": 0.0003546165309815165, "grad_norm": 0.8203125, "learning_rate": 0.000398, "loss": 1.3619, "step": 200 }, { "epoch": 0.00035816269629133164, "grad_norm": 1.3203125, "learning_rate": 0.000402, "loss": 1.9058, "step": 202 }, { "epoch": 0.00036170886160114685, "grad_norm": 0.74609375, "learning_rate": 0.00040600000000000006, "loss": 1.4741, "step": 204 }, { "epoch": 0.000365255026910962, "grad_norm": 0.71484375, "learning_rate": 0.00041, "loss": 1.5142, "step": 206 }, { "epoch": 0.00036880119222077715, "grad_norm": 2.171875, "learning_rate": 0.000414, "loss": 1.861, "step": 208 }, { "epoch": 0.00037234735753059235, "grad_norm": 0.82421875, "learning_rate": 0.00041799999999999997, "loss": 1.3935, "step": 210 }, { "epoch": 0.0003758935228404075, "grad_norm": 0.94140625, "learning_rate": 0.000422, "loss": 1.3792, "step": 212 }, { "epoch": 0.00037943968815022265, "grad_norm": 1.5390625, "learning_rate": 0.000426, "loss": 1.7609, "step": 214 }, { "epoch": 0.0003829858534600378, "grad_norm": 0.83203125, "learning_rate": 0.00043, "loss": 1.3885, "step": 216 }, { "epoch": 0.000386532018769853, "grad_norm": 1.5, "learning_rate": 0.00043400000000000003, "loss": 1.4444, "step": 218 }, { "epoch": 0.00039007818407966815, "grad_norm": 1.59375, "learning_rate": 0.000438, "loss": 1.3774, "step": 220 }, { "epoch": 0.0003936243493894833, "grad_norm": 0.85546875, "learning_rate": 0.000442, "loss": 1.698, "step": 222 }, { "epoch": 0.0003971705146992985, "grad_norm": 0.96875, "learning_rate": 0.000446, "loss": 1.4195, "step": 224 }, { "epoch": 0.00040071668000911365, "grad_norm": 0.765625, "learning_rate": 0.00045000000000000004, "loss": 1.6253, "step": 226 }, { "epoch": 0.0004042628453189288, "grad_norm": 1.40625, "learning_rate": 0.00045400000000000003, "loss": 1.5447, "step": 228 }, { "epoch": 0.00040780901062874395, "grad_norm": 0.78125, "learning_rate": 0.000458, "loss": 1.5232, "step": 230 }, { "epoch": 0.00041135517593855915, "grad_norm": 2.140625, "learning_rate": 0.000462, "loss": 1.9986, "step": 232 }, { "epoch": 0.0004149013412483743, "grad_norm": 1.5546875, "learning_rate": 0.00046600000000000005, "loss": 1.3245, "step": 234 }, { "epoch": 0.00041844750655818945, "grad_norm": 0.8671875, "learning_rate": 0.00047, "loss": 2.1678, "step": 236 }, { "epoch": 0.00042199367186800466, "grad_norm": 1.734375, "learning_rate": 0.000474, "loss": 1.6768, "step": 238 }, { "epoch": 0.0004255398371778198, "grad_norm": 0.62890625, "learning_rate": 0.00047799999999999996, "loss": 1.5694, "step": 240 }, { "epoch": 0.00042908600248763495, "grad_norm": 3.15625, "learning_rate": 0.000482, "loss": 1.3832, "step": 242 }, { "epoch": 0.00043263216779745016, "grad_norm": 2.390625, "learning_rate": 0.000486, "loss": 1.4785, "step": 244 }, { "epoch": 0.0004361783331072653, "grad_norm": 1.8671875, "learning_rate": 0.00049, "loss": 1.7814, "step": 246 }, { "epoch": 0.00043972449841708046, "grad_norm": 1.3828125, "learning_rate": 0.000494, "loss": 1.3818, "step": 248 }, { "epoch": 0.0004432706637268956, "grad_norm": 0.890625, "learning_rate": 0.000498, "loss": 1.4585, "step": 250 }, { "epoch": 0.0004468168290367108, "grad_norm": 0.62890625, "learning_rate": 0.0005020000000000001, "loss": 1.4761, "step": 252 }, { "epoch": 0.00045036299434652596, "grad_norm": 1.8203125, "learning_rate": 0.000506, "loss": 2.0438, "step": 254 }, { "epoch": 0.0004539091596563411, "grad_norm": 0.671875, "learning_rate": 0.00051, "loss": 1.7384, "step": 256 }, { "epoch": 0.0004574553249661563, "grad_norm": 2.5625, "learning_rate": 0.000514, "loss": 1.5145, "step": 258 }, { "epoch": 0.00046100149027597146, "grad_norm": 1.5703125, "learning_rate": 0.000518, "loss": 1.5793, "step": 260 }, { "epoch": 0.0004645476555857866, "grad_norm": 0.71484375, "learning_rate": 0.000522, "loss": 1.5587, "step": 262 }, { "epoch": 0.00046809382089560176, "grad_norm": 0.62109375, "learning_rate": 0.000526, "loss": 1.5064, "step": 264 }, { "epoch": 0.00047163998620541696, "grad_norm": 0.6171875, "learning_rate": 0.0005300000000000001, "loss": 1.571, "step": 266 }, { "epoch": 0.0004751861515152321, "grad_norm": 1.859375, "learning_rate": 0.0005340000000000001, "loss": 1.5269, "step": 268 }, { "epoch": 0.00047873231682504726, "grad_norm": 0.89453125, "learning_rate": 0.0005380000000000001, "loss": 1.7835, "step": 270 }, { "epoch": 0.00048227848213486246, "grad_norm": 1.0, "learning_rate": 0.0005420000000000001, "loss": 1.8869, "step": 272 }, { "epoch": 0.0004858246474446776, "grad_norm": 0.55078125, "learning_rate": 0.000546, "loss": 1.5411, "step": 274 }, { "epoch": 0.0004893708127544928, "grad_norm": 0.8203125, "learning_rate": 0.00055, "loss": 1.7809, "step": 276 }, { "epoch": 0.000492916978064308, "grad_norm": 1.3671875, "learning_rate": 0.000554, "loss": 1.6076, "step": 278 }, { "epoch": 0.0004964631433741231, "grad_norm": 0.64453125, "learning_rate": 0.000558, "loss": 1.5887, "step": 280 }, { "epoch": 0.0005000093086839383, "grad_norm": 6.28125, "learning_rate": 0.0005620000000000001, "loss": 1.6691, "step": 282 }, { "epoch": 0.0005035554739937535, "grad_norm": 3.890625, "learning_rate": 0.000566, "loss": 1.4103, "step": 284 }, { "epoch": 0.0005071016393035686, "grad_norm": 0.83203125, "learning_rate": 0.00057, "loss": 1.4613, "step": 286 }, { "epoch": 0.0005106478046133838, "grad_norm": 1.7265625, "learning_rate": 0.000574, "loss": 1.5102, "step": 288 }, { "epoch": 0.000514193969923199, "grad_norm": 1.3203125, "learning_rate": 0.000578, "loss": 1.9516, "step": 290 }, { "epoch": 0.0005177401352330141, "grad_norm": 1.7734375, "learning_rate": 0.0005819999999999999, "loss": 2.0735, "step": 292 }, { "epoch": 0.0005212863005428293, "grad_norm": 0.396484375, "learning_rate": 0.0005859999999999999, "loss": 1.4782, "step": 294 }, { "epoch": 0.0005248324658526445, "grad_norm": 0.72265625, "learning_rate": 0.00059, "loss": 1.5791, "step": 296 }, { "epoch": 0.0005283786311624596, "grad_norm": 1.4140625, "learning_rate": 0.000594, "loss": 1.662, "step": 298 }, { "epoch": 0.0005319247964722748, "grad_norm": 1.15625, "learning_rate": 0.000598, "loss": 1.7771, "step": 300 }, { "epoch": 0.0005354709617820899, "grad_norm": 1.296875, "learning_rate": 0.000602, "loss": 2.0136, "step": 302 }, { "epoch": 0.0005390171270919051, "grad_norm": 1.984375, "learning_rate": 0.000606, "loss": 1.6989, "step": 304 }, { "epoch": 0.0005425632924017203, "grad_norm": 0.84375, "learning_rate": 0.00061, "loss": 1.3634, "step": 306 }, { "epoch": 0.0005461094577115354, "grad_norm": 0.99609375, "learning_rate": 0.000614, "loss": 2.0646, "step": 308 }, { "epoch": 0.0005496556230213506, "grad_norm": 0.56640625, "learning_rate": 0.0006180000000000001, "loss": 1.6761, "step": 310 }, { "epoch": 0.0005532017883311658, "grad_norm": 1.234375, "learning_rate": 0.000622, "loss": 1.5955, "step": 312 }, { "epoch": 0.0005567479536409809, "grad_norm": 1.8203125, "learning_rate": 0.000626, "loss": 1.7073, "step": 314 }, { "epoch": 0.0005602941189507961, "grad_norm": 0.88671875, "learning_rate": 0.00063, "loss": 1.517, "step": 316 }, { "epoch": 0.0005638402842606113, "grad_norm": 0.439453125, "learning_rate": 0.000634, "loss": 1.2879, "step": 318 }, { "epoch": 0.0005673864495704264, "grad_norm": 1.7109375, "learning_rate": 0.000638, "loss": 1.5404, "step": 320 }, { "epoch": 0.0005709326148802416, "grad_norm": 0.79296875, "learning_rate": 0.000642, "loss": 1.289, "step": 322 }, { "epoch": 0.0005744787801900568, "grad_norm": 0.7734375, "learning_rate": 0.000646, "loss": 1.538, "step": 324 }, { "epoch": 0.0005780249454998719, "grad_norm": 0.66796875, "learning_rate": 0.0006500000000000001, "loss": 1.5195, "step": 326 }, { "epoch": 0.0005815711108096871, "grad_norm": 1.421875, "learning_rate": 0.0006540000000000001, "loss": 1.5568, "step": 328 }, { "epoch": 0.0005851172761195023, "grad_norm": 3.703125, "learning_rate": 0.0006580000000000001, "loss": 1.5764, "step": 330 }, { "epoch": 0.0005886634414293174, "grad_norm": 1.109375, "learning_rate": 0.000662, "loss": 1.6677, "step": 332 }, { "epoch": 0.0005922096067391326, "grad_norm": 0.67578125, "learning_rate": 0.000666, "loss": 1.7501, "step": 334 }, { "epoch": 0.0005957557720489477, "grad_norm": 0.90234375, "learning_rate": 0.00067, "loss": 1.2844, "step": 336 }, { "epoch": 0.0005993019373587629, "grad_norm": 3.078125, "learning_rate": 0.000674, "loss": 1.7816, "step": 338 }, { "epoch": 0.0006028481026685781, "grad_norm": 1.1328125, "learning_rate": 0.0006780000000000001, "loss": 1.436, "step": 340 }, { "epoch": 0.0006063942679783932, "grad_norm": 0.73046875, "learning_rate": 0.0006820000000000001, "loss": 1.4653, "step": 342 }, { "epoch": 0.0006099404332882084, "grad_norm": 1.1328125, "learning_rate": 0.0006860000000000001, "loss": 1.8547, "step": 344 }, { "epoch": 0.0006134865985980236, "grad_norm": 0.7265625, "learning_rate": 0.00069, "loss": 1.4515, "step": 346 }, { "epoch": 0.0006170327639078387, "grad_norm": 0.9921875, "learning_rate": 0.000694, "loss": 1.4289, "step": 348 }, { "epoch": 0.0006205789292176539, "grad_norm": 1.4296875, "learning_rate": 0.0006979999999999999, "loss": 1.6906, "step": 350 }, { "epoch": 0.0006241250945274691, "grad_norm": 2.40625, "learning_rate": 0.0007019999999999999, "loss": 1.4184, "step": 352 }, { "epoch": 0.0006276712598372842, "grad_norm": 0.94140625, "learning_rate": 0.0007059999999999999, "loss": 1.2701, "step": 354 }, { "epoch": 0.0006312174251470994, "grad_norm": 4.46875, "learning_rate": 0.00071, "loss": 1.3772, "step": 356 }, { "epoch": 0.0006347635904569146, "grad_norm": 3.5625, "learning_rate": 0.000714, "loss": 1.3526, "step": 358 }, { "epoch": 0.0006383097557667297, "grad_norm": 1.0234375, "learning_rate": 0.000718, "loss": 1.3579, "step": 360 }, { "epoch": 0.0006418559210765449, "grad_norm": 1.7265625, "learning_rate": 0.000722, "loss": 1.4318, "step": 362 }, { "epoch": 0.00064540208638636, "grad_norm": 2.015625, "learning_rate": 0.000726, "loss": 1.355, "step": 364 }, { "epoch": 0.0006489482516961752, "grad_norm": 1.3359375, "learning_rate": 0.00073, "loss": 1.3997, "step": 366 }, { "epoch": 0.0006524944170059904, "grad_norm": 1.4140625, "learning_rate": 0.000734, "loss": 1.0671, "step": 368 }, { "epoch": 0.0006560405823158055, "grad_norm": 1.484375, "learning_rate": 0.000738, "loss": 1.1156, "step": 370 }, { "epoch": 0.0006595867476256207, "grad_norm": 2.421875, "learning_rate": 0.000742, "loss": 1.2581, "step": 372 }, { "epoch": 0.0006631329129354359, "grad_norm": 1.0625, "learning_rate": 0.000746, "loss": 0.8968, "step": 374 }, { "epoch": 0.000666679078245251, "grad_norm": 3.765625, "learning_rate": 0.00075, "loss": 1.1869, "step": 376 }, { "epoch": 0.0006702252435550662, "grad_norm": 1.7734375, "learning_rate": 0.000754, "loss": 0.9751, "step": 378 }, { "epoch": 0.0006737714088648814, "grad_norm": 2.890625, "learning_rate": 0.000758, "loss": 1.2709, "step": 380 }, { "epoch": 0.0006773175741746965, "grad_norm": 1.6171875, "learning_rate": 0.000762, "loss": 1.1142, "step": 382 }, { "epoch": 0.0006808637394845117, "grad_norm": 3.796875, "learning_rate": 0.0007660000000000001, "loss": 0.9331, "step": 384 }, { "epoch": 0.0006844099047943269, "grad_norm": 2.859375, "learning_rate": 0.0007700000000000001, "loss": 0.9445, "step": 386 }, { "epoch": 0.000687956070104142, "grad_norm": 3.40625, "learning_rate": 0.0007740000000000001, "loss": 1.1067, "step": 388 }, { "epoch": 0.0006915022354139572, "grad_norm": 1.234375, "learning_rate": 0.000778, "loss": 0.8197, "step": 390 }, { "epoch": 0.0006950484007237724, "grad_norm": 2.671875, "learning_rate": 0.000782, "loss": 0.9289, "step": 392 }, { "epoch": 0.0006985945660335875, "grad_norm": 5.53125, "learning_rate": 0.000786, "loss": 1.2645, "step": 394 }, { "epoch": 0.0007021407313434027, "grad_norm": 2.96875, "learning_rate": 0.00079, "loss": 0.7802, "step": 396 }, { "epoch": 0.0007056868966532178, "grad_norm": 4.96875, "learning_rate": 0.0007940000000000001, "loss": 0.8851, "step": 398 }, { "epoch": 0.000709233061963033, "grad_norm": 1.5234375, "learning_rate": 0.0007980000000000001, "loss": 0.8057, "step": 400 }, { "epoch": 0.0007127792272728482, "grad_norm": 0.91796875, "learning_rate": 0.0008020000000000001, "loss": 0.8375, "step": 402 }, { "epoch": 0.0007163253925826633, "grad_norm": 0.98828125, "learning_rate": 0.0008060000000000001, "loss": 0.7738, "step": 404 }, { "epoch": 0.0007198715578924785, "grad_norm": 1.921875, "learning_rate": 0.0008100000000000001, "loss": 0.9653, "step": 406 }, { "epoch": 0.0007234177232022937, "grad_norm": 2.0625, "learning_rate": 0.0008139999999999999, "loss": 0.8432, "step": 408 }, { "epoch": 0.0007269638885121088, "grad_norm": 0.8359375, "learning_rate": 0.0008179999999999999, "loss": 0.8312, "step": 410 }, { "epoch": 0.000730510053821924, "grad_norm": 5.0625, "learning_rate": 0.0008219999999999999, "loss": 0.8912, "step": 412 }, { "epoch": 0.0007340562191317392, "grad_norm": 4.0625, "learning_rate": 0.000826, "loss": 0.8716, "step": 414 }, { "epoch": 0.0007376023844415543, "grad_norm": 6.6875, "learning_rate": 0.00083, "loss": 0.7662, "step": 416 }, { "epoch": 0.0007411485497513695, "grad_norm": 0.95703125, "learning_rate": 0.000834, "loss": 0.9265, "step": 418 }, { "epoch": 0.0007446947150611847, "grad_norm": 1.0078125, "learning_rate": 0.000838, "loss": 0.9106, "step": 420 }, { "epoch": 0.0007482408803709998, "grad_norm": 3.0625, "learning_rate": 0.000842, "loss": 0.8929, "step": 422 }, { "epoch": 0.000751787045680815, "grad_norm": 2.34375, "learning_rate": 0.000846, "loss": 0.8809, "step": 424 }, { "epoch": 0.0007553332109906301, "grad_norm": 0.76953125, "learning_rate": 0.00085, "loss": 0.6671, "step": 426 }, { "epoch": 0.0007588793763004453, "grad_norm": 0.7734375, "learning_rate": 0.000854, "loss": 0.7229, "step": 428 }, { "epoch": 0.0007624255416102605, "grad_norm": 2.484375, "learning_rate": 0.000858, "loss": 0.9, "step": 430 }, { "epoch": 0.0007659717069200756, "grad_norm": 2.203125, "learning_rate": 0.000862, "loss": 0.8212, "step": 432 }, { "epoch": 0.0007695178722298908, "grad_norm": 0.66015625, "learning_rate": 0.000866, "loss": 0.5973, "step": 434 }, { "epoch": 0.000773064037539706, "grad_norm": 2.328125, "learning_rate": 0.00087, "loss": 0.5344, "step": 436 }, { "epoch": 0.0007766102028495211, "grad_norm": 1.0, "learning_rate": 0.000874, "loss": 0.5902, "step": 438 }, { "epoch": 0.0007801563681593363, "grad_norm": 0.80078125, "learning_rate": 0.000878, "loss": 0.6295, "step": 440 }, { "epoch": 0.0007837025334691515, "grad_norm": 0.75, "learning_rate": 0.000882, "loss": 0.6326, "step": 442 }, { "epoch": 0.0007872486987789666, "grad_norm": 0.9453125, "learning_rate": 0.0008860000000000001, "loss": 0.5642, "step": 444 }, { "epoch": 0.0007907948640887818, "grad_norm": 0.62890625, "learning_rate": 0.0008900000000000001, "loss": 0.602, "step": 446 }, { "epoch": 0.000794341029398597, "grad_norm": 3.234375, "learning_rate": 0.000894, "loss": 0.7297, "step": 448 }, { "epoch": 0.0007978871947084121, "grad_norm": 0.7890625, "learning_rate": 0.000898, "loss": 0.701, "step": 450 }, { "epoch": 0.0008014333600182273, "grad_norm": 0.69140625, "learning_rate": 0.000902, "loss": 0.5983, "step": 452 }, { "epoch": 0.0008049795253280425, "grad_norm": 1.421875, "learning_rate": 0.000906, "loss": 0.4609, "step": 454 }, { "epoch": 0.0008085256906378576, "grad_norm": 1.1484375, "learning_rate": 0.00091, "loss": 0.5314, "step": 456 }, { "epoch": 0.0008120718559476728, "grad_norm": 4.4375, "learning_rate": 0.0009140000000000001, "loss": 1.0837, "step": 458 }, { "epoch": 0.0008156180212574879, "grad_norm": 2.953125, "learning_rate": 0.0009180000000000001, "loss": 0.7036, "step": 460 }, { "epoch": 0.0008191641865673031, "grad_norm": 0.75, "learning_rate": 0.0009220000000000001, "loss": 0.699, "step": 462 }, { "epoch": 0.0008227103518771183, "grad_norm": 0.6171875, "learning_rate": 0.0009260000000000001, "loss": 0.6727, "step": 464 }, { "epoch": 0.0008262565171869334, "grad_norm": 2.0, "learning_rate": 0.00093, "loss": 0.6781, "step": 466 }, { "epoch": 0.0008298026824967486, "grad_norm": 0.70703125, "learning_rate": 0.000934, "loss": 0.603, "step": 468 }, { "epoch": 0.0008333488478065638, "grad_norm": 0.62890625, "learning_rate": 0.0009379999999999999, "loss": 0.5876, "step": 470 }, { "epoch": 0.0008368950131163789, "grad_norm": 0.5703125, "learning_rate": 0.000942, "loss": 0.5201, "step": 472 }, { "epoch": 0.0008404411784261941, "grad_norm": 3.390625, "learning_rate": 0.000946, "loss": 0.6795, "step": 474 }, { "epoch": 0.0008439873437360093, "grad_norm": 1.125, "learning_rate": 0.00095, "loss": 0.5322, "step": 476 }, { "epoch": 0.0008475335090458244, "grad_norm": 2.15625, "learning_rate": 0.000954, "loss": 0.636, "step": 478 }, { "epoch": 0.0008510796743556396, "grad_norm": 0.66015625, "learning_rate": 0.000958, "loss": 0.5606, "step": 480 }, { "epoch": 0.0008546258396654548, "grad_norm": 0.66796875, "learning_rate": 0.000962, "loss": 0.4997, "step": 482 }, { "epoch": 0.0008581720049752699, "grad_norm": 1.1953125, "learning_rate": 0.000966, "loss": 0.704, "step": 484 }, { "epoch": 0.0008617181702850851, "grad_norm": 0.8828125, "learning_rate": 0.0009699999999999999, "loss": 0.5248, "step": 486 }, { "epoch": 0.0008652643355949003, "grad_norm": 5.75, "learning_rate": 0.000974, "loss": 0.7647, "step": 488 }, { "epoch": 0.0008688105009047154, "grad_norm": 0.59765625, "learning_rate": 0.000978, "loss": 0.54, "step": 490 }, { "epoch": 0.0008723566662145306, "grad_norm": 3.109375, "learning_rate": 0.000982, "loss": 0.6266, "step": 492 }, { "epoch": 0.0008759028315243457, "grad_norm": 2.875, "learning_rate": 0.0009860000000000001, "loss": 0.6661, "step": 494 }, { "epoch": 0.0008794489968341609, "grad_norm": 0.77734375, "learning_rate": 0.00099, "loss": 0.5463, "step": 496 }, { "epoch": 0.0008829951621439761, "grad_norm": 1.078125, "learning_rate": 0.000994, "loss": 0.5743, "step": 498 }, { "epoch": 0.0008865413274537912, "grad_norm": 1.28125, "learning_rate": 0.000998, "loss": 0.6301, "step": 500 }, { "epoch": 0.0008900874927636064, "grad_norm": 0.44921875, "learning_rate": 0.001002, "loss": 0.4796, "step": 502 }, { "epoch": 0.0008936336580734216, "grad_norm": 0.6015625, "learning_rate": 0.001006, "loss": 0.5895, "step": 504 }, { "epoch": 0.0008971798233832367, "grad_norm": 1.875, "learning_rate": 0.00101, "loss": 0.5832, "step": 506 }, { "epoch": 0.0009007259886930519, "grad_norm": 1.296875, "learning_rate": 0.001014, "loss": 0.6405, "step": 508 }, { "epoch": 0.0009042721540028671, "grad_norm": 0.455078125, "learning_rate": 0.001018, "loss": 0.4511, "step": 510 }, { "epoch": 0.0009078183193126822, "grad_norm": 0.515625, "learning_rate": 0.0010220000000000001, "loss": 0.4704, "step": 512 }, { "epoch": 0.0009113644846224974, "grad_norm": 1.1953125, "learning_rate": 0.001026, "loss": 0.55, "step": 514 }, { "epoch": 0.0009149106499323126, "grad_norm": 2.359375, "learning_rate": 0.00103, "loss": 0.6907, "step": 516 }, { "epoch": 0.0009184568152421277, "grad_norm": 1.859375, "learning_rate": 0.001034, "loss": 0.7927, "step": 518 }, { "epoch": 0.0009220029805519429, "grad_norm": 2.484375, "learning_rate": 0.001038, "loss": 0.5656, "step": 520 }, { "epoch": 0.000925549145861758, "grad_norm": 0.78515625, "learning_rate": 0.001042, "loss": 0.6883, "step": 522 }, { "epoch": 0.0009290953111715732, "grad_norm": 1.421875, "learning_rate": 0.001046, "loss": 0.7126, "step": 524 }, { "epoch": 0.0009326414764813884, "grad_norm": 1.328125, "learning_rate": 0.0010500000000000002, "loss": 0.5154, "step": 526 }, { "epoch": 0.0009361876417912035, "grad_norm": 0.71484375, "learning_rate": 0.001054, "loss": 0.5787, "step": 528 }, { "epoch": 0.0009397338071010187, "grad_norm": 0.69140625, "learning_rate": 0.0010580000000000001, "loss": 0.5701, "step": 530 }, { "epoch": 0.0009432799724108339, "grad_norm": 0.640625, "learning_rate": 0.001062, "loss": 0.6469, "step": 532 }, { "epoch": 0.000946826137720649, "grad_norm": 0.9375, "learning_rate": 0.001066, "loss": 0.5711, "step": 534 }, { "epoch": 0.0009503723030304642, "grad_norm": 13.6875, "learning_rate": 0.00107, "loss": 0.559, "step": 536 }, { "epoch": 0.0009539184683402794, "grad_norm": 0.60546875, "learning_rate": 0.001074, "loss": 0.4705, "step": 538 }, { "epoch": 0.0009574646336500945, "grad_norm": 1.2890625, "learning_rate": 0.0010780000000000002, "loss": 0.5083, "step": 540 }, { "epoch": 0.0009610107989599097, "grad_norm": 1.0703125, "learning_rate": 0.001082, "loss": 0.6336, "step": 542 }, { "epoch": 0.0009645569642697249, "grad_norm": 3.21875, "learning_rate": 0.0010860000000000002, "loss": 0.6729, "step": 544 }, { "epoch": 0.00096810312957954, "grad_norm": 0.6640625, "learning_rate": 0.00109, "loss": 0.4936, "step": 546 }, { "epoch": 0.0009716492948893552, "grad_norm": 2.421875, "learning_rate": 0.0010940000000000001, "loss": 0.7861, "step": 548 }, { "epoch": 0.0009751954601991704, "grad_norm": 0.9375, "learning_rate": 0.001098, "loss": 0.5245, "step": 550 }, { "epoch": 0.0009787416255089855, "grad_norm": 1.015625, "learning_rate": 0.0011020000000000001, "loss": 0.4803, "step": 552 }, { "epoch": 0.0009822877908188007, "grad_norm": 4.1875, "learning_rate": 0.0011060000000000002, "loss": 0.5755, "step": 554 }, { "epoch": 0.000985833956128616, "grad_norm": 0.453125, "learning_rate": 0.00111, "loss": 0.4838, "step": 556 }, { "epoch": 0.0009893801214384311, "grad_norm": 1.6171875, "learning_rate": 0.0011140000000000002, "loss": 0.6264, "step": 558 }, { "epoch": 0.0009929262867482461, "grad_norm": 2.25, "learning_rate": 0.001118, "loss": 0.8499, "step": 560 }, { "epoch": 0.0009964724520580613, "grad_norm": 0.76171875, "learning_rate": 0.0011220000000000002, "loss": 0.5218, "step": 562 }, { "epoch": 0.0010000186173678765, "grad_norm": 0.7265625, "learning_rate": 0.0011259999999999998, "loss": 0.6252, "step": 564 }, { "epoch": 0.0010035647826776917, "grad_norm": 0.439453125, "learning_rate": 0.00113, "loss": 0.455, "step": 566 }, { "epoch": 0.001007110947987507, "grad_norm": 0.76953125, "learning_rate": 0.001134, "loss": 0.4322, "step": 568 }, { "epoch": 0.001010657113297322, "grad_norm": 5.59375, "learning_rate": 0.001138, "loss": 1.0676, "step": 570 }, { "epoch": 0.0010142032786071371, "grad_norm": 1.40625, "learning_rate": 0.001142, "loss": 0.5648, "step": 572 }, { "epoch": 0.0010177494439169523, "grad_norm": 0.984375, "learning_rate": 0.0011459999999999999, "loss": 0.8953, "step": 574 }, { "epoch": 0.0010212956092267675, "grad_norm": 0.65234375, "learning_rate": 0.00115, "loss": 0.4995, "step": 576 }, { "epoch": 0.0010248417745365827, "grad_norm": 1.3203125, "learning_rate": 0.0011539999999999999, "loss": 0.5413, "step": 578 }, { "epoch": 0.001028387939846398, "grad_norm": 3.859375, "learning_rate": 0.001158, "loss": 0.7195, "step": 580 }, { "epoch": 0.001031934105156213, "grad_norm": 0.97265625, "learning_rate": 0.0011619999999999998, "loss": 0.6171, "step": 582 }, { "epoch": 0.0010354802704660281, "grad_norm": 0.70703125, "learning_rate": 0.001166, "loss": 0.5062, "step": 584 }, { "epoch": 0.0010390264357758433, "grad_norm": 0.734375, "learning_rate": 0.00117, "loss": 0.5645, "step": 586 }, { "epoch": 0.0010425726010856585, "grad_norm": 0.70703125, "learning_rate": 0.001174, "loss": 0.4871, "step": 588 }, { "epoch": 0.0010461187663954737, "grad_norm": 1.703125, "learning_rate": 0.001178, "loss": 0.4328, "step": 590 }, { "epoch": 0.001049664931705289, "grad_norm": 0.73828125, "learning_rate": 0.0011819999999999999, "loss": 0.5342, "step": 592 }, { "epoch": 0.001053211097015104, "grad_norm": 1.3984375, "learning_rate": 0.001186, "loss": 0.4844, "step": 594 }, { "epoch": 0.0010567572623249191, "grad_norm": 0.9609375, "learning_rate": 0.0011899999999999999, "loss": 0.4575, "step": 596 }, { "epoch": 0.0010603034276347343, "grad_norm": 0.482421875, "learning_rate": 0.001194, "loss": 0.5371, "step": 598 }, { "epoch": 0.0010638495929445495, "grad_norm": 1.59375, "learning_rate": 0.001198, "loss": 0.4835, "step": 600 }, { "epoch": 0.0010673957582543647, "grad_norm": 0.55078125, "learning_rate": 0.001202, "loss": 0.4884, "step": 602 }, { "epoch": 0.0010709419235641797, "grad_norm": 0.5625, "learning_rate": 0.001206, "loss": 0.6999, "step": 604 }, { "epoch": 0.001074488088873995, "grad_norm": 1.03125, "learning_rate": 0.00121, "loss": 0.4397, "step": 606 }, { "epoch": 0.0010780342541838101, "grad_norm": 1.3984375, "learning_rate": 0.001214, "loss": 0.6247, "step": 608 }, { "epoch": 0.0010815804194936253, "grad_norm": 0.91015625, "learning_rate": 0.001218, "loss": 0.3905, "step": 610 }, { "epoch": 0.0010851265848034405, "grad_norm": 0.5546875, "learning_rate": 0.001222, "loss": 0.4631, "step": 612 }, { "epoch": 0.0010886727501132557, "grad_norm": 1.421875, "learning_rate": 0.001226, "loss": 0.4027, "step": 614 }, { "epoch": 0.0010922189154230707, "grad_norm": 0.40234375, "learning_rate": 0.00123, "loss": 0.5303, "step": 616 }, { "epoch": 0.001095765080732886, "grad_norm": 0.7109375, "learning_rate": 0.001234, "loss": 0.4526, "step": 618 }, { "epoch": 0.0010993112460427011, "grad_norm": 2.140625, "learning_rate": 0.001238, "loss": 0.6035, "step": 620 }, { "epoch": 0.0011028574113525163, "grad_norm": 1.15625, "learning_rate": 0.001242, "loss": 0.5793, "step": 622 }, { "epoch": 0.0011064035766623315, "grad_norm": 0.416015625, "learning_rate": 0.001246, "loss": 0.4564, "step": 624 }, { "epoch": 0.0011099497419721467, "grad_norm": 0.58203125, "learning_rate": 0.00125, "loss": 0.4803, "step": 626 }, { "epoch": 0.0011134959072819617, "grad_norm": 0.88671875, "learning_rate": 0.0012540000000000001, "loss": 0.6734, "step": 628 }, { "epoch": 0.001117042072591777, "grad_norm": 0.66015625, "learning_rate": 0.001258, "loss": 0.4855, "step": 630 }, { "epoch": 0.0011205882379015921, "grad_norm": 1.15625, "learning_rate": 0.001262, "loss": 0.4557, "step": 632 }, { "epoch": 0.0011241344032114073, "grad_norm": 1.78125, "learning_rate": 0.001266, "loss": 0.5451, "step": 634 }, { "epoch": 0.0011276805685212225, "grad_norm": 1.7265625, "learning_rate": 0.00127, "loss": 0.5126, "step": 636 }, { "epoch": 0.0011312267338310375, "grad_norm": 1.265625, "learning_rate": 0.001274, "loss": 0.5094, "step": 638 }, { "epoch": 0.0011347728991408527, "grad_norm": 2.21875, "learning_rate": 0.001278, "loss": 0.5792, "step": 640 }, { "epoch": 0.001138319064450668, "grad_norm": 1.796875, "learning_rate": 0.0012820000000000002, "loss": 0.5892, "step": 642 }, { "epoch": 0.0011418652297604831, "grad_norm": 0.69921875, "learning_rate": 0.001286, "loss": 0.5642, "step": 644 }, { "epoch": 0.0011454113950702983, "grad_norm": 0.50390625, "learning_rate": 0.0012900000000000001, "loss": 0.4613, "step": 646 }, { "epoch": 0.0011489575603801136, "grad_norm": 0.4140625, "learning_rate": 0.001294, "loss": 0.5105, "step": 648 }, { "epoch": 0.0011525037256899285, "grad_norm": 0.5234375, "learning_rate": 0.0012980000000000001, "loss": 0.7083, "step": 650 }, { "epoch": 0.0011560498909997437, "grad_norm": 0.6640625, "learning_rate": 0.001302, "loss": 0.4454, "step": 652 }, { "epoch": 0.001159596056309559, "grad_norm": 0.52734375, "learning_rate": 0.001306, "loss": 0.3984, "step": 654 }, { "epoch": 0.0011631422216193741, "grad_norm": 1.7578125, "learning_rate": 0.0013100000000000002, "loss": 0.4624, "step": 656 }, { "epoch": 0.0011666883869291894, "grad_norm": 0.39453125, "learning_rate": 0.001314, "loss": 0.5275, "step": 658 }, { "epoch": 0.0011702345522390046, "grad_norm": 0.62890625, "learning_rate": 0.0013180000000000002, "loss": 0.4649, "step": 660 }, { "epoch": 0.0011737807175488195, "grad_norm": 0.451171875, "learning_rate": 0.001322, "loss": 0.4366, "step": 662 }, { "epoch": 0.0011773268828586347, "grad_norm": 0.41015625, "learning_rate": 0.0013260000000000001, "loss": 0.4079, "step": 664 }, { "epoch": 0.00118087304816845, "grad_norm": 0.466796875, "learning_rate": 0.00133, "loss": 0.537, "step": 666 }, { "epoch": 0.0011844192134782652, "grad_norm": 0.8046875, "learning_rate": 0.0013340000000000001, "loss": 0.4374, "step": 668 }, { "epoch": 0.0011879653787880804, "grad_norm": 0.828125, "learning_rate": 0.0013380000000000002, "loss": 0.5113, "step": 670 }, { "epoch": 0.0011915115440978953, "grad_norm": 0.5546875, "learning_rate": 0.001342, "loss": 0.4182, "step": 672 }, { "epoch": 0.0011950577094077105, "grad_norm": 2.015625, "learning_rate": 0.0013460000000000002, "loss": 0.4994, "step": 674 }, { "epoch": 0.0011986038747175258, "grad_norm": 1.5546875, "learning_rate": 0.00135, "loss": 0.7251, "step": 676 }, { "epoch": 0.001202150040027341, "grad_norm": 2.65625, "learning_rate": 0.0013540000000000002, "loss": 0.8491, "step": 678 }, { "epoch": 0.0012056962053371562, "grad_norm": 0.62890625, "learning_rate": 0.001358, "loss": 0.3866, "step": 680 }, { "epoch": 0.0012092423706469714, "grad_norm": 0.82421875, "learning_rate": 0.0013620000000000001, "loss": 0.4451, "step": 682 }, { "epoch": 0.0012127885359567863, "grad_norm": 0.796875, "learning_rate": 0.001366, "loss": 0.3521, "step": 684 }, { "epoch": 0.0012163347012666016, "grad_norm": 0.392578125, "learning_rate": 0.0013700000000000001, "loss": 0.4155, "step": 686 }, { "epoch": 0.0012198808665764168, "grad_norm": 1.28125, "learning_rate": 0.0013740000000000002, "loss": 0.5518, "step": 688 }, { "epoch": 0.001223427031886232, "grad_norm": 0.462890625, "learning_rate": 0.0013779999999999999, "loss": 0.3655, "step": 690 }, { "epoch": 0.0012269731971960472, "grad_norm": 1.109375, "learning_rate": 0.001382, "loss": 0.3804, "step": 692 }, { "epoch": 0.0012305193625058621, "grad_norm": 2.796875, "learning_rate": 0.0013859999999999999, "loss": 0.663, "step": 694 }, { "epoch": 0.0012340655278156774, "grad_norm": 1.203125, "learning_rate": 0.00139, "loss": 0.5644, "step": 696 }, { "epoch": 0.0012376116931254926, "grad_norm": 4.28125, "learning_rate": 0.0013939999999999998, "loss": 0.5431, "step": 698 }, { "epoch": 0.0012411578584353078, "grad_norm": 0.7578125, "learning_rate": 0.001398, "loss": 0.4251, "step": 700 }, { "epoch": 0.001244704023745123, "grad_norm": 0.390625, "learning_rate": 0.001402, "loss": 0.4524, "step": 702 }, { "epoch": 0.0012482501890549382, "grad_norm": 0.36328125, "learning_rate": 0.001406, "loss": 0.3926, "step": 704 }, { "epoch": 0.0012517963543647532, "grad_norm": 1.1484375, "learning_rate": 0.00141, "loss": 0.4461, "step": 706 }, { "epoch": 0.0012553425196745684, "grad_norm": 0.431640625, "learning_rate": 0.001414, "loss": 0.5012, "step": 708 }, { "epoch": 0.0012588886849843836, "grad_norm": 0.46484375, "learning_rate": 0.001418, "loss": 0.4257, "step": 710 }, { "epoch": 0.0012624348502941988, "grad_norm": 1.75, "learning_rate": 0.0014219999999999999, "loss": 0.4114, "step": 712 }, { "epoch": 0.001265981015604014, "grad_norm": 0.66796875, "learning_rate": 0.001426, "loss": 0.4855, "step": 714 }, { "epoch": 0.0012695271809138292, "grad_norm": 2.171875, "learning_rate": 0.00143, "loss": 0.6086, "step": 716 }, { "epoch": 0.0012730733462236442, "grad_norm": 1.375, "learning_rate": 0.001434, "loss": 0.5398, "step": 718 }, { "epoch": 0.0012766195115334594, "grad_norm": 1.2109375, "learning_rate": 0.001438, "loss": 0.5382, "step": 720 }, { "epoch": 0.0012801656768432746, "grad_norm": 1.9765625, "learning_rate": 0.001442, "loss": 0.4693, "step": 722 }, { "epoch": 0.0012837118421530898, "grad_norm": 1.1015625, "learning_rate": 0.001446, "loss": 0.4705, "step": 724 }, { "epoch": 0.001287258007462905, "grad_norm": 0.83984375, "learning_rate": 0.00145, "loss": 0.4714, "step": 726 }, { "epoch": 0.00129080417277272, "grad_norm": 0.66796875, "learning_rate": 0.001454, "loss": 0.9141, "step": 728 }, { "epoch": 0.0012943503380825352, "grad_norm": 0.77734375, "learning_rate": 0.001458, "loss": 0.6113, "step": 730 }, { "epoch": 0.0012978965033923504, "grad_norm": 2.09375, "learning_rate": 0.001462, "loss": 0.4465, "step": 732 }, { "epoch": 0.0013014426687021656, "grad_norm": 1.1875, "learning_rate": 0.001466, "loss": 0.4927, "step": 734 }, { "epoch": 0.0013049888340119808, "grad_norm": 0.3671875, "learning_rate": 0.00147, "loss": 0.3272, "step": 736 }, { "epoch": 0.001308534999321796, "grad_norm": 0.7578125, "learning_rate": 0.001474, "loss": 0.5498, "step": 738 }, { "epoch": 0.001312081164631611, "grad_norm": 2.6875, "learning_rate": 0.001478, "loss": 0.726, "step": 740 }, { "epoch": 0.0013156273299414262, "grad_norm": 0.97265625, "learning_rate": 0.001482, "loss": 0.4328, "step": 742 }, { "epoch": 0.0013191734952512414, "grad_norm": 3.953125, "learning_rate": 0.0014860000000000001, "loss": 0.4116, "step": 744 }, { "epoch": 0.0013227196605610566, "grad_norm": 1.1015625, "learning_rate": 0.00149, "loss": 0.403, "step": 746 }, { "epoch": 0.0013262658258708718, "grad_norm": 0.671875, "learning_rate": 0.001494, "loss": 0.4467, "step": 748 }, { "epoch": 0.001329811991180687, "grad_norm": 0.96875, "learning_rate": 0.001498, "loss": 0.4754, "step": 750 }, { "epoch": 0.001333358156490502, "grad_norm": 0.9453125, "learning_rate": 0.001502, "loss": 0.5166, "step": 752 }, { "epoch": 0.0013369043218003172, "grad_norm": 1.3125, "learning_rate": 0.001506, "loss": 0.4412, "step": 754 }, { "epoch": 0.0013404504871101324, "grad_norm": 0.83984375, "learning_rate": 0.00151, "loss": 0.3362, "step": 756 }, { "epoch": 0.0013439966524199476, "grad_norm": 0.796875, "learning_rate": 0.001514, "loss": 0.3429, "step": 758 }, { "epoch": 0.0013475428177297628, "grad_norm": 0.470703125, "learning_rate": 0.001518, "loss": 0.4573, "step": 760 }, { "epoch": 0.0013510889830395778, "grad_norm": 0.84375, "learning_rate": 0.0015220000000000001, "loss": 0.5393, "step": 762 }, { "epoch": 0.001354635148349393, "grad_norm": 0.9453125, "learning_rate": 0.001526, "loss": 0.3999, "step": 764 }, { "epoch": 0.0013581813136592082, "grad_norm": 0.447265625, "learning_rate": 0.0015300000000000001, "loss": 0.4177, "step": 766 }, { "epoch": 0.0013617274789690234, "grad_norm": 1.1796875, "learning_rate": 0.001534, "loss": 0.7588, "step": 768 }, { "epoch": 0.0013652736442788386, "grad_norm": 0.66015625, "learning_rate": 0.001538, "loss": 0.4386, "step": 770 }, { "epoch": 0.0013688198095886538, "grad_norm": 1.3359375, "learning_rate": 0.001542, "loss": 0.3225, "step": 772 }, { "epoch": 0.0013723659748984688, "grad_norm": 1.671875, "learning_rate": 0.001546, "loss": 0.4429, "step": 774 }, { "epoch": 0.001375912140208284, "grad_norm": 0.625, "learning_rate": 0.0015500000000000002, "loss": 0.4224, "step": 776 }, { "epoch": 0.0013794583055180992, "grad_norm": 1.0703125, "learning_rate": 0.001554, "loss": 0.4154, "step": 778 }, { "epoch": 0.0013830044708279144, "grad_norm": 0.6015625, "learning_rate": 0.0015580000000000001, "loss": 0.3761, "step": 780 }, { "epoch": 0.0013865506361377296, "grad_norm": 0.515625, "learning_rate": 0.001562, "loss": 0.3326, "step": 782 }, { "epoch": 0.0013900968014475448, "grad_norm": 0.54296875, "learning_rate": 0.0015660000000000001, "loss": 0.4234, "step": 784 }, { "epoch": 0.0013936429667573598, "grad_norm": 0.87109375, "learning_rate": 0.00157, "loss": 0.3707, "step": 786 }, { "epoch": 0.001397189132067175, "grad_norm": 2.421875, "learning_rate": 0.001574, "loss": 0.4198, "step": 788 }, { "epoch": 0.0014007352973769902, "grad_norm": 1.6015625, "learning_rate": 0.0015780000000000002, "loss": 0.6297, "step": 790 }, { "epoch": 0.0014042814626868054, "grad_norm": 2.15625, "learning_rate": 0.001582, "loss": 0.437, "step": 792 }, { "epoch": 0.0014078276279966206, "grad_norm": 0.333984375, "learning_rate": 0.0015860000000000002, "loss": 0.3658, "step": 794 }, { "epoch": 0.0014113737933064356, "grad_norm": 0.4921875, "learning_rate": 0.00159, "loss": 0.423, "step": 796 }, { "epoch": 0.0014149199586162508, "grad_norm": 0.322265625, "learning_rate": 0.0015940000000000001, "loss": 0.353, "step": 798 }, { "epoch": 0.001418466123926066, "grad_norm": 2.28125, "learning_rate": 0.001598, "loss": 0.4482, "step": 800 }, { "epoch": 0.0014220122892358812, "grad_norm": 0.357421875, "learning_rate": 0.0016020000000000001, "loss": 0.5871, "step": 802 }, { "epoch": 0.0014255584545456964, "grad_norm": 0.62109375, "learning_rate": 0.0016060000000000002, "loss": 0.4032, "step": 804 }, { "epoch": 0.0014291046198555116, "grad_norm": 0.7109375, "learning_rate": 0.00161, "loss": 0.3488, "step": 806 }, { "epoch": 0.0014326507851653266, "grad_norm": 1.3828125, "learning_rate": 0.0016140000000000002, "loss": 0.4464, "step": 808 }, { "epoch": 0.0014361969504751418, "grad_norm": 0.70703125, "learning_rate": 0.001618, "loss": 0.427, "step": 810 }, { "epoch": 0.001439743115784957, "grad_norm": 0.796875, "learning_rate": 0.0016220000000000002, "loss": 0.3566, "step": 812 }, { "epoch": 0.0014432892810947722, "grad_norm": 0.3359375, "learning_rate": 0.0016259999999999998, "loss": 0.3749, "step": 814 }, { "epoch": 0.0014468354464045874, "grad_norm": 1.046875, "learning_rate": 0.00163, "loss": 0.4408, "step": 816 }, { "epoch": 0.0014503816117144026, "grad_norm": 0.435546875, "learning_rate": 0.001634, "loss": 0.4197, "step": 818 }, { "epoch": 0.0014539277770242176, "grad_norm": 0.5234375, "learning_rate": 0.001638, "loss": 0.4298, "step": 820 }, { "epoch": 0.0014574739423340328, "grad_norm": 0.59765625, "learning_rate": 0.001642, "loss": 0.3424, "step": 822 }, { "epoch": 0.001461020107643848, "grad_norm": 0.466796875, "learning_rate": 0.001646, "loss": 0.4055, "step": 824 }, { "epoch": 0.0014645662729536632, "grad_norm": 0.3203125, "learning_rate": 0.00165, "loss": 0.387, "step": 826 }, { "epoch": 0.0014681124382634784, "grad_norm": 0.4453125, "learning_rate": 0.0016539999999999999, "loss": 0.4673, "step": 828 }, { "epoch": 0.0014716586035732934, "grad_norm": 1.34375, "learning_rate": 0.001658, "loss": 0.884, "step": 830 }, { "epoch": 0.0014752047688831086, "grad_norm": 0.3828125, "learning_rate": 0.0016619999999999998, "loss": 0.3106, "step": 832 }, { "epoch": 0.0014787509341929238, "grad_norm": 1.09375, "learning_rate": 0.001666, "loss": 0.4596, "step": 834 }, { "epoch": 0.001482297099502739, "grad_norm": 0.875, "learning_rate": 0.00167, "loss": 0.5467, "step": 836 }, { "epoch": 0.0014858432648125542, "grad_norm": 0.68359375, "learning_rate": 0.001674, "loss": 0.3769, "step": 838 }, { "epoch": 0.0014893894301223694, "grad_norm": 0.73828125, "learning_rate": 0.001678, "loss": 0.4614, "step": 840 }, { "epoch": 0.0014929355954321844, "grad_norm": 0.6015625, "learning_rate": 0.001682, "loss": 0.4218, "step": 842 }, { "epoch": 0.0014964817607419996, "grad_norm": 0.98828125, "learning_rate": 0.001686, "loss": 0.3143, "step": 844 }, { "epoch": 0.0015000279260518148, "grad_norm": 1.703125, "learning_rate": 0.0016899999999999999, "loss": 0.6092, "step": 846 }, { "epoch": 0.00150357409136163, "grad_norm": 0.416015625, "learning_rate": 0.001694, "loss": 0.3918, "step": 848 }, { "epoch": 0.0015071202566714452, "grad_norm": 0.47265625, "learning_rate": 0.001698, "loss": 0.3889, "step": 850 }, { "epoch": 0.0015106664219812602, "grad_norm": 0.8515625, "learning_rate": 0.001702, "loss": 0.4317, "step": 852 }, { "epoch": 0.0015142125872910754, "grad_norm": 0.5546875, "learning_rate": 0.001706, "loss": 0.433, "step": 854 }, { "epoch": 0.0015177587526008906, "grad_norm": 0.5, "learning_rate": 0.00171, "loss": 0.402, "step": 856 }, { "epoch": 0.0015213049179107058, "grad_norm": 0.984375, "learning_rate": 0.001714, "loss": 0.3781, "step": 858 }, { "epoch": 0.001524851083220521, "grad_norm": 1.125, "learning_rate": 0.001718, "loss": 0.4915, "step": 860 }, { "epoch": 0.0015283972485303362, "grad_norm": 0.48046875, "learning_rate": 0.001722, "loss": 0.485, "step": 862 }, { "epoch": 0.0015319434138401512, "grad_norm": 0.54296875, "learning_rate": 0.001726, "loss": 0.5497, "step": 864 }, { "epoch": 0.0015354895791499664, "grad_norm": 1.046875, "learning_rate": 0.00173, "loss": 0.4267, "step": 866 }, { "epoch": 0.0015390357444597816, "grad_norm": 2.890625, "learning_rate": 0.001734, "loss": 0.4916, "step": 868 }, { "epoch": 0.0015425819097695968, "grad_norm": 0.7265625, "learning_rate": 0.001738, "loss": 0.4525, "step": 870 }, { "epoch": 0.001546128075079412, "grad_norm": 0.734375, "learning_rate": 0.001742, "loss": 0.466, "step": 872 }, { "epoch": 0.0015496742403892272, "grad_norm": 0.6484375, "learning_rate": 0.001746, "loss": 0.4172, "step": 874 }, { "epoch": 0.0015532204056990422, "grad_norm": 2.1875, "learning_rate": 0.00175, "loss": 0.4612, "step": 876 }, { "epoch": 0.0015567665710088574, "grad_norm": 0.51953125, "learning_rate": 0.0017540000000000001, "loss": 0.3634, "step": 878 }, { "epoch": 0.0015603127363186726, "grad_norm": 0.43359375, "learning_rate": 0.001758, "loss": 0.3293, "step": 880 }, { "epoch": 0.0015638589016284878, "grad_norm": 0.45703125, "learning_rate": 0.0017620000000000001, "loss": 0.3595, "step": 882 }, { "epoch": 0.001567405066938303, "grad_norm": 0.58203125, "learning_rate": 0.001766, "loss": 0.3994, "step": 884 }, { "epoch": 0.001570951232248118, "grad_norm": 0.8359375, "learning_rate": 0.00177, "loss": 0.4265, "step": 886 }, { "epoch": 0.0015744973975579332, "grad_norm": 0.62109375, "learning_rate": 0.001774, "loss": 0.3768, "step": 888 }, { "epoch": 0.0015780435628677484, "grad_norm": 0.75, "learning_rate": 0.001778, "loss": 0.3474, "step": 890 }, { "epoch": 0.0015815897281775636, "grad_norm": 0.337890625, "learning_rate": 0.0017820000000000002, "loss": 0.4295, "step": 892 }, { "epoch": 0.0015851358934873788, "grad_norm": 0.5859375, "learning_rate": 0.001786, "loss": 0.4619, "step": 894 }, { "epoch": 0.001588682058797194, "grad_norm": 2.328125, "learning_rate": 0.0017900000000000001, "loss": 0.3903, "step": 896 }, { "epoch": 0.001592228224107009, "grad_norm": 1.6484375, "learning_rate": 0.001794, "loss": 0.4117, "step": 898 }, { "epoch": 0.0015957743894168242, "grad_norm": 1.0, "learning_rate": 0.0017980000000000001, "loss": 0.4776, "step": 900 }, { "epoch": 0.0015993205547266394, "grad_norm": 0.62890625, "learning_rate": 0.001802, "loss": 0.3239, "step": 902 }, { "epoch": 0.0016028667200364546, "grad_norm": 0.439453125, "learning_rate": 0.001806, "loss": 0.4453, "step": 904 }, { "epoch": 0.0016064128853462698, "grad_norm": 0.421875, "learning_rate": 0.0018100000000000002, "loss": 0.3475, "step": 906 }, { "epoch": 0.001609959050656085, "grad_norm": 4.34375, "learning_rate": 0.001814, "loss": 0.5284, "step": 908 }, { "epoch": 0.0016135052159659, "grad_norm": 0.56640625, "learning_rate": 0.0018180000000000002, "loss": 0.3544, "step": 910 }, { "epoch": 0.0016170513812757152, "grad_norm": 0.8828125, "learning_rate": 0.001822, "loss": 0.3681, "step": 912 }, { "epoch": 0.0016205975465855304, "grad_norm": 1.1953125, "learning_rate": 0.0018260000000000001, "loss": 0.3847, "step": 914 }, { "epoch": 0.0016241437118953456, "grad_norm": 0.68359375, "learning_rate": 0.00183, "loss": 0.5371, "step": 916 }, { "epoch": 0.0016276898772051608, "grad_norm": 0.9453125, "learning_rate": 0.0018340000000000001, "loss": 0.4194, "step": 918 }, { "epoch": 0.0016312360425149758, "grad_norm": 3.125, "learning_rate": 0.0018380000000000002, "loss": 0.6421, "step": 920 }, { "epoch": 0.001634782207824791, "grad_norm": 0.51953125, "learning_rate": 0.001842, "loss": 0.4111, "step": 922 }, { "epoch": 0.0016383283731346062, "grad_norm": 1.5546875, "learning_rate": 0.0018460000000000002, "loss": 0.4762, "step": 924 }, { "epoch": 0.0016418745384444214, "grad_norm": 0.466796875, "learning_rate": 0.00185, "loss": 0.4302, "step": 926 }, { "epoch": 0.0016454207037542366, "grad_norm": 0.5703125, "learning_rate": 0.0018540000000000002, "loss": 0.41, "step": 928 }, { "epoch": 0.0016489668690640518, "grad_norm": 0.404296875, "learning_rate": 0.001858, "loss": 0.5211, "step": 930 }, { "epoch": 0.0016525130343738668, "grad_norm": 0.48828125, "learning_rate": 0.0018620000000000002, "loss": 0.5331, "step": 932 }, { "epoch": 0.001656059199683682, "grad_norm": 1.6484375, "learning_rate": 0.001866, "loss": 0.8214, "step": 934 }, { "epoch": 0.0016596053649934972, "grad_norm": 1.875, "learning_rate": 0.0018700000000000001, "loss": 0.3674, "step": 936 }, { "epoch": 0.0016631515303033124, "grad_norm": 17.0, "learning_rate": 0.0018740000000000002, "loss": 0.5836, "step": 938 }, { "epoch": 0.0016666976956131276, "grad_norm": 1.375, "learning_rate": 0.001878, "loss": 0.4125, "step": 940 }, { "epoch": 0.0016702438609229428, "grad_norm": 0.9140625, "learning_rate": 0.001882, "loss": 0.4659, "step": 942 }, { "epoch": 0.0016737900262327578, "grad_norm": 0.478515625, "learning_rate": 0.0018859999999999999, "loss": 0.38, "step": 944 }, { "epoch": 0.001677336191542573, "grad_norm": 0.6484375, "learning_rate": 0.00189, "loss": 0.4455, "step": 946 }, { "epoch": 0.0016808823568523882, "grad_norm": 0.322265625, "learning_rate": 0.0018939999999999999, "loss": 0.3341, "step": 948 }, { "epoch": 0.0016844285221622034, "grad_norm": 0.392578125, "learning_rate": 0.001898, "loss": 0.4781, "step": 950 }, { "epoch": 0.0016879746874720186, "grad_norm": 0.72265625, "learning_rate": 0.001902, "loss": 0.5476, "step": 952 }, { "epoch": 0.0016915208527818336, "grad_norm": 1.2734375, "learning_rate": 0.001906, "loss": 0.476, "step": 954 }, { "epoch": 0.0016950670180916488, "grad_norm": 0.27734375, "learning_rate": 0.00191, "loss": 0.3611, "step": 956 }, { "epoch": 0.001698613183401464, "grad_norm": 1.859375, "learning_rate": 0.001914, "loss": 0.6158, "step": 958 }, { "epoch": 0.0017021593487112792, "grad_norm": 1.703125, "learning_rate": 0.001918, "loss": 0.4304, "step": 960 }, { "epoch": 0.0017057055140210944, "grad_norm": 0.431640625, "learning_rate": 0.0019219999999999999, "loss": 0.3029, "step": 962 }, { "epoch": 0.0017092516793309096, "grad_norm": 0.63671875, "learning_rate": 0.001926, "loss": 0.5024, "step": 964 }, { "epoch": 0.0017127978446407246, "grad_norm": 10.375, "learning_rate": 0.00193, "loss": 0.4969, "step": 966 }, { "epoch": 0.0017163440099505398, "grad_norm": 0.55859375, "learning_rate": 0.001934, "loss": 0.7104, "step": 968 }, { "epoch": 0.001719890175260355, "grad_norm": 0.57421875, "learning_rate": 0.001938, "loss": 0.5163, "step": 970 }, { "epoch": 0.0017234363405701702, "grad_norm": 0.376953125, "learning_rate": 0.001942, "loss": 0.4006, "step": 972 }, { "epoch": 0.0017269825058799854, "grad_norm": 0.453125, "learning_rate": 0.001946, "loss": 0.3615, "step": 974 }, { "epoch": 0.0017305286711898006, "grad_norm": 0.3828125, "learning_rate": 0.00195, "loss": 0.3554, "step": 976 }, { "epoch": 0.0017340748364996156, "grad_norm": 0.56640625, "learning_rate": 0.001954, "loss": 0.4427, "step": 978 }, { "epoch": 0.0017376210018094308, "grad_norm": 0.9921875, "learning_rate": 0.001958, "loss": 0.4424, "step": 980 }, { "epoch": 0.001741167167119246, "grad_norm": 1.4609375, "learning_rate": 0.001962, "loss": 0.5594, "step": 982 }, { "epoch": 0.0017447133324290612, "grad_norm": 1.0625, "learning_rate": 0.001966, "loss": 0.4296, "step": 984 }, { "epoch": 0.0017482594977388764, "grad_norm": 1.4765625, "learning_rate": 0.00197, "loss": 0.4107, "step": 986 }, { "epoch": 0.0017518056630486914, "grad_norm": 0.3984375, "learning_rate": 0.001974, "loss": 0.38, "step": 988 }, { "epoch": 0.0017553518283585066, "grad_norm": 0.68359375, "learning_rate": 0.001978, "loss": 0.4456, "step": 990 }, { "epoch": 0.0017588979936683218, "grad_norm": 0.33203125, "learning_rate": 0.001982, "loss": 0.4618, "step": 992 }, { "epoch": 0.001762444158978137, "grad_norm": 0.33984375, "learning_rate": 0.001986, "loss": 0.3707, "step": 994 }, { "epoch": 0.0017659903242879522, "grad_norm": 0.255859375, "learning_rate": 0.00199, "loss": 0.3779, "step": 996 }, { "epoch": 0.0017695364895977674, "grad_norm": 0.2734375, "learning_rate": 0.001994, "loss": 0.4004, "step": 998 }, { "epoch": 0.0017730826549075824, "grad_norm": 2.25, "learning_rate": 0.001998, "loss": 0.3643, "step": 1000 }, { "epoch": 0.0017766288202173976, "grad_norm": 0.48046875, "learning_rate": 0.0019999999995079904, "loss": 0.3971, "step": 1002 }, { "epoch": 0.0017801749855272128, "grad_norm": 0.78515625, "learning_rate": 0.0019999999955719132, "loss": 0.3017, "step": 1004 }, { "epoch": 0.001783721150837028, "grad_norm": 0.470703125, "learning_rate": 0.001999999987699759, "loss": 0.3235, "step": 1006 }, { "epoch": 0.0017872673161468432, "grad_norm": 0.90234375, "learning_rate": 0.0019999999758915274, "loss": 0.4187, "step": 1008 }, { "epoch": 0.0017908134814566582, "grad_norm": 1.6484375, "learning_rate": 0.0019999999601472184, "loss": 0.5903, "step": 1010 }, { "epoch": 0.0017943596467664734, "grad_norm": 0.384765625, "learning_rate": 0.0019999999404668326, "loss": 0.6096, "step": 1012 }, { "epoch": 0.0017979058120762886, "grad_norm": 1.3515625, "learning_rate": 0.00199999991685037, "loss": 0.3594, "step": 1014 }, { "epoch": 0.0018014519773861038, "grad_norm": 0.388671875, "learning_rate": 0.001999999889297831, "loss": 0.541, "step": 1016 }, { "epoch": 0.001804998142695919, "grad_norm": 0.40234375, "learning_rate": 0.001999999857809214, "loss": 0.3586, "step": 1018 }, { "epoch": 0.0018085443080057342, "grad_norm": 1.015625, "learning_rate": 0.0019999998223845213, "loss": 0.507, "step": 1020 }, { "epoch": 0.0018120904733155492, "grad_norm": 0.91796875, "learning_rate": 0.001999999783023752, "loss": 0.4054, "step": 1022 }, { "epoch": 0.0018156366386253644, "grad_norm": 0.84375, "learning_rate": 0.0019999997397269066, "loss": 0.3707, "step": 1024 }, { "epoch": 0.0018191828039351796, "grad_norm": 1.8359375, "learning_rate": 0.0019999996924939846, "loss": 0.6278, "step": 1026 }, { "epoch": 0.0018227289692449948, "grad_norm": 0.34765625, "learning_rate": 0.001999999641324987, "loss": 0.3858, "step": 1028 }, { "epoch": 0.00182627513455481, "grad_norm": 0.5859375, "learning_rate": 0.001999999586219914, "loss": 0.3753, "step": 1030 }, { "epoch": 0.0018298212998646252, "grad_norm": 0.6171875, "learning_rate": 0.0019999995271787656, "loss": 0.3808, "step": 1032 }, { "epoch": 0.0018333674651744402, "grad_norm": 0.390625, "learning_rate": 0.0019999994642015415, "loss": 0.2981, "step": 1034 }, { "epoch": 0.0018369136304842554, "grad_norm": 0.734375, "learning_rate": 0.0019999993972882432, "loss": 0.4874, "step": 1036 }, { "epoch": 0.0018404597957940706, "grad_norm": 0.349609375, "learning_rate": 0.00199999932643887, "loss": 0.3889, "step": 1038 }, { "epoch": 0.0018440059611038858, "grad_norm": 0.375, "learning_rate": 0.0019999992516534226, "loss": 0.4473, "step": 1040 }, { "epoch": 0.001847552126413701, "grad_norm": 0.333984375, "learning_rate": 0.001999999172931901, "loss": 0.3383, "step": 1042 }, { "epoch": 0.001851098291723516, "grad_norm": 0.87109375, "learning_rate": 0.001999999090274306, "loss": 0.4581, "step": 1044 }, { "epoch": 0.0018546444570333312, "grad_norm": 0.5234375, "learning_rate": 0.001999999003680638, "loss": 0.3222, "step": 1046 }, { "epoch": 0.0018581906223431464, "grad_norm": 0.62109375, "learning_rate": 0.0019999989131508963, "loss": 0.3686, "step": 1048 }, { "epoch": 0.0018617367876529616, "grad_norm": 0.90234375, "learning_rate": 0.001999998818685083, "loss": 0.5504, "step": 1050 }, { "epoch": 0.0018652829529627768, "grad_norm": 0.3125, "learning_rate": 0.0019999987202831975, "loss": 0.3204, "step": 1052 }, { "epoch": 0.001868829118272592, "grad_norm": 0.361328125, "learning_rate": 0.0019999986179452403, "loss": 0.3595, "step": 1054 }, { "epoch": 0.001872375283582407, "grad_norm": 0.3359375, "learning_rate": 0.0019999985116712117, "loss": 0.3356, "step": 1056 }, { "epoch": 0.0018759214488922222, "grad_norm": 0.40625, "learning_rate": 0.0019999984014611124, "loss": 0.4235, "step": 1058 }, { "epoch": 0.0018794676142020374, "grad_norm": 0.390625, "learning_rate": 0.0019999982873149433, "loss": 0.3783, "step": 1060 }, { "epoch": 0.0018830137795118526, "grad_norm": 0.330078125, "learning_rate": 0.0019999981692327045, "loss": 0.4312, "step": 1062 }, { "epoch": 0.0018865599448216678, "grad_norm": 0.287109375, "learning_rate": 0.001999998047214396, "loss": 0.3751, "step": 1064 }, { "epoch": 0.001890106110131483, "grad_norm": 0.498046875, "learning_rate": 0.0019999979212600187, "loss": 0.4861, "step": 1066 }, { "epoch": 0.001893652275441298, "grad_norm": 0.4921875, "learning_rate": 0.001999997791369574, "loss": 0.4154, "step": 1068 }, { "epoch": 0.0018971984407511132, "grad_norm": 0.392578125, "learning_rate": 0.001999997657543061, "loss": 0.3348, "step": 1070 }, { "epoch": 0.0019007446060609284, "grad_norm": 1.4140625, "learning_rate": 0.0019999975197804816, "loss": 0.5562, "step": 1072 }, { "epoch": 0.0019042907713707436, "grad_norm": 3.984375, "learning_rate": 0.0019999973780818353, "loss": 0.5703, "step": 1074 }, { "epoch": 0.0019078369366805588, "grad_norm": 0.31640625, "learning_rate": 0.0019999972324471235, "loss": 0.3959, "step": 1076 }, { "epoch": 0.0019113831019903738, "grad_norm": 0.43359375, "learning_rate": 0.0019999970828763467, "loss": 0.3247, "step": 1078 }, { "epoch": 0.001914929267300189, "grad_norm": 0.6796875, "learning_rate": 0.0019999969293695054, "loss": 0.439, "step": 1080 }, { "epoch": 0.0019184754326100042, "grad_norm": 0.380859375, "learning_rate": 0.0019999967719266003, "loss": 0.6608, "step": 1082 }, { "epoch": 0.0019220215979198194, "grad_norm": 1.375, "learning_rate": 0.001999996610547632, "loss": 0.4712, "step": 1084 }, { "epoch": 0.0019255677632296346, "grad_norm": 0.8671875, "learning_rate": 0.0019999964452326016, "loss": 0.606, "step": 1086 }, { "epoch": 0.0019291139285394499, "grad_norm": 1.359375, "learning_rate": 0.0019999962759815093, "loss": 0.5051, "step": 1088 }, { "epoch": 0.0019326600938492648, "grad_norm": 0.30078125, "learning_rate": 0.001999996102794356, "loss": 0.3872, "step": 1090 }, { "epoch": 0.00193620625915908, "grad_norm": 0.42578125, "learning_rate": 0.0019999959256711427, "loss": 0.3313, "step": 1092 }, { "epoch": 0.0019397524244688952, "grad_norm": 0.99609375, "learning_rate": 0.0019999957446118696, "loss": 0.4841, "step": 1094 }, { "epoch": 0.0019432985897787104, "grad_norm": 0.92578125, "learning_rate": 0.001999995559616538, "loss": 0.5517, "step": 1096 }, { "epoch": 0.0019468447550885257, "grad_norm": 0.443359375, "learning_rate": 0.0019999953706851493, "loss": 0.3948, "step": 1098 }, { "epoch": 0.0019503909203983409, "grad_norm": 0.451171875, "learning_rate": 0.001999995177817703, "loss": 0.5052, "step": 1100 }, { "epoch": 0.001953937085708156, "grad_norm": 1.3828125, "learning_rate": 0.0019999949810142006, "loss": 0.4857, "step": 1102 }, { "epoch": 0.001957483251017971, "grad_norm": 0.369140625, "learning_rate": 0.0019999947802746432, "loss": 0.3922, "step": 1104 }, { "epoch": 0.001961029416327786, "grad_norm": 2.859375, "learning_rate": 0.0019999945755990313, "loss": 0.4307, "step": 1106 }, { "epoch": 0.0019645755816376015, "grad_norm": 0.62890625, "learning_rate": 0.0019999943669873656, "loss": 0.4164, "step": 1108 }, { "epoch": 0.0019681217469474164, "grad_norm": 0.45703125, "learning_rate": 0.0019999941544396474, "loss": 0.3841, "step": 1110 }, { "epoch": 0.001971667912257232, "grad_norm": 4.0625, "learning_rate": 0.001999993937955878, "loss": 0.5836, "step": 1112 }, { "epoch": 0.001975214077567047, "grad_norm": 0.5625, "learning_rate": 0.0019999937175360577, "loss": 0.3593, "step": 1114 }, { "epoch": 0.0019787602428768623, "grad_norm": 0.380859375, "learning_rate": 0.0019999934931801875, "loss": 0.351, "step": 1116 }, { "epoch": 0.0019823064081866773, "grad_norm": 0.515625, "learning_rate": 0.0019999932648882687, "loss": 0.3285, "step": 1118 }, { "epoch": 0.0019858525734964922, "grad_norm": 0.59375, "learning_rate": 0.001999993032660302, "loss": 0.5993, "step": 1120 }, { "epoch": 0.0019893987388063077, "grad_norm": 0.6875, "learning_rate": 0.0019999927964962885, "loss": 0.3257, "step": 1122 }, { "epoch": 0.0019929449041161226, "grad_norm": 0.6640625, "learning_rate": 0.0019999925563962294, "loss": 0.4925, "step": 1124 }, { "epoch": 0.001996491069425938, "grad_norm": 0.404296875, "learning_rate": 0.001999992312360126, "loss": 0.3838, "step": 1126 }, { "epoch": 0.002000037234735753, "grad_norm": 0.47265625, "learning_rate": 0.001999992064387978, "loss": 0.3545, "step": 1128 }, { "epoch": 0.002003583400045568, "grad_norm": 0.63671875, "learning_rate": 0.0019999918124797883, "loss": 0.4683, "step": 1130 }, { "epoch": 0.0020071295653553835, "grad_norm": 0.76953125, "learning_rate": 0.0019999915566355575, "loss": 0.413, "step": 1132 }, { "epoch": 0.0020106757306651984, "grad_norm": 0.65234375, "learning_rate": 0.0019999912968552856, "loss": 0.4569, "step": 1134 }, { "epoch": 0.002014221895975014, "grad_norm": 0.375, "learning_rate": 0.0019999910331389746, "loss": 0.3477, "step": 1136 }, { "epoch": 0.002017768061284829, "grad_norm": 0.52734375, "learning_rate": 0.001999990765486626, "loss": 0.3038, "step": 1138 }, { "epoch": 0.002021314226594644, "grad_norm": 0.59765625, "learning_rate": 0.00199999049389824, "loss": 0.3498, "step": 1140 }, { "epoch": 0.0020248603919044593, "grad_norm": 0.45703125, "learning_rate": 0.001999990218373819, "loss": 0.563, "step": 1142 }, { "epoch": 0.0020284065572142742, "grad_norm": 0.470703125, "learning_rate": 0.0019999899389133635, "loss": 0.7518, "step": 1144 }, { "epoch": 0.0020319527225240897, "grad_norm": 2.0625, "learning_rate": 0.001999989655516875, "loss": 0.5289, "step": 1146 }, { "epoch": 0.0020354988878339047, "grad_norm": 0.423828125, "learning_rate": 0.001999989368184354, "loss": 0.3241, "step": 1148 }, { "epoch": 0.00203904505314372, "grad_norm": 1.0625, "learning_rate": 0.001999989076915802, "loss": 0.527, "step": 1150 }, { "epoch": 0.002042591218453535, "grad_norm": 1.71875, "learning_rate": 0.001999988781711221, "loss": 0.5642, "step": 1152 }, { "epoch": 0.00204613738376335, "grad_norm": 0.921875, "learning_rate": 0.0019999884825706122, "loss": 0.4935, "step": 1154 }, { "epoch": 0.0020496835490731655, "grad_norm": 0.91796875, "learning_rate": 0.001999988179493976, "loss": 0.3964, "step": 1156 }, { "epoch": 0.0020532297143829805, "grad_norm": 2.359375, "learning_rate": 0.001999987872481314, "loss": 0.4454, "step": 1158 }, { "epoch": 0.002056775879692796, "grad_norm": 0.625, "learning_rate": 0.001999987561532629, "loss": 0.4465, "step": 1160 }, { "epoch": 0.002060322045002611, "grad_norm": 1.0234375, "learning_rate": 0.00199998724664792, "loss": 0.4312, "step": 1162 }, { "epoch": 0.002063868210312426, "grad_norm": 0.427734375, "learning_rate": 0.00199998692782719, "loss": 0.4251, "step": 1164 }, { "epoch": 0.0020674143756222413, "grad_norm": 0.3828125, "learning_rate": 0.00199998660507044, "loss": 0.4131, "step": 1166 }, { "epoch": 0.0020709605409320563, "grad_norm": 0.88671875, "learning_rate": 0.0019999862783776716, "loss": 0.3982, "step": 1168 }, { "epoch": 0.0020745067062418717, "grad_norm": 0.443359375, "learning_rate": 0.0019999859477488856, "loss": 0.3609, "step": 1170 }, { "epoch": 0.0020780528715516867, "grad_norm": 0.447265625, "learning_rate": 0.001999985613184084, "loss": 0.3846, "step": 1172 }, { "epoch": 0.0020815990368615016, "grad_norm": 0.984375, "learning_rate": 0.001999985274683268, "loss": 0.4326, "step": 1174 }, { "epoch": 0.002085145202171317, "grad_norm": 0.26953125, "learning_rate": 0.001999984932246439, "loss": 0.3229, "step": 1176 }, { "epoch": 0.002088691367481132, "grad_norm": 0.298828125, "learning_rate": 0.0019999845858735994, "loss": 0.3905, "step": 1178 }, { "epoch": 0.0020922375327909475, "grad_norm": 0.48828125, "learning_rate": 0.0019999842355647494, "loss": 0.5283, "step": 1180 }, { "epoch": 0.0020957836981007625, "grad_norm": 0.24609375, "learning_rate": 0.001999983881319891, "loss": 0.2918, "step": 1182 }, { "epoch": 0.002099329863410578, "grad_norm": 0.5546875, "learning_rate": 0.0019999835231390263, "loss": 0.3663, "step": 1184 }, { "epoch": 0.002102876028720393, "grad_norm": 0.283203125, "learning_rate": 0.0019999831610221564, "loss": 0.3811, "step": 1186 }, { "epoch": 0.002106422194030208, "grad_norm": 6.90625, "learning_rate": 0.0019999827949692827, "loss": 0.68, "step": 1188 }, { "epoch": 0.0021099683593400233, "grad_norm": 0.7109375, "learning_rate": 0.001999982424980407, "loss": 0.3522, "step": 1190 }, { "epoch": 0.0021135145246498383, "grad_norm": 0.45703125, "learning_rate": 0.0019999820510555313, "loss": 0.3751, "step": 1192 }, { "epoch": 0.0021170606899596537, "grad_norm": 2.09375, "learning_rate": 0.0019999816731946563, "loss": 0.4795, "step": 1194 }, { "epoch": 0.0021206068552694687, "grad_norm": 1.2421875, "learning_rate": 0.0019999812913977844, "loss": 0.3934, "step": 1196 }, { "epoch": 0.0021241530205792837, "grad_norm": 0.38671875, "learning_rate": 0.001999980905664918, "loss": 0.549, "step": 1198 }, { "epoch": 0.002127699185889099, "grad_norm": 1.3125, "learning_rate": 0.001999980515996057, "loss": 0.4363, "step": 1200 }, { "epoch": 0.002131245351198914, "grad_norm": 0.38671875, "learning_rate": 0.001999980122391204, "loss": 0.4437, "step": 1202 }, { "epoch": 0.0021347915165087295, "grad_norm": 0.439453125, "learning_rate": 0.001999979724850361, "loss": 0.3607, "step": 1204 }, { "epoch": 0.0021383376818185445, "grad_norm": 0.345703125, "learning_rate": 0.001999979323373529, "loss": 0.3687, "step": 1206 }, { "epoch": 0.0021418838471283595, "grad_norm": 0.5234375, "learning_rate": 0.001999978917960711, "loss": 0.3193, "step": 1208 }, { "epoch": 0.002145430012438175, "grad_norm": 0.51171875, "learning_rate": 0.0019999785086119073, "loss": 0.4807, "step": 1210 }, { "epoch": 0.00214897617774799, "grad_norm": 0.3359375, "learning_rate": 0.0019999780953271207, "loss": 0.3487, "step": 1212 }, { "epoch": 0.0021525223430578053, "grad_norm": 0.4296875, "learning_rate": 0.0019999776781063523, "loss": 0.4667, "step": 1214 }, { "epoch": 0.0021560685083676203, "grad_norm": 0.26171875, "learning_rate": 0.001999977256949605, "loss": 0.3317, "step": 1216 }, { "epoch": 0.0021596146736774357, "grad_norm": 0.361328125, "learning_rate": 0.001999976831856879, "loss": 0.3814, "step": 1218 }, { "epoch": 0.0021631608389872507, "grad_norm": 0.77734375, "learning_rate": 0.001999976402828178, "loss": 0.3466, "step": 1220 }, { "epoch": 0.0021667070042970657, "grad_norm": 0.62890625, "learning_rate": 0.001999975969863502, "loss": 0.382, "step": 1222 }, { "epoch": 0.002170253169606881, "grad_norm": 0.7109375, "learning_rate": 0.001999975532962855, "loss": 0.5704, "step": 1224 }, { "epoch": 0.002173799334916696, "grad_norm": 3.5625, "learning_rate": 0.0019999750921262374, "loss": 0.6261, "step": 1226 }, { "epoch": 0.0021773455002265115, "grad_norm": 0.921875, "learning_rate": 0.001999974647353651, "loss": 0.408, "step": 1228 }, { "epoch": 0.0021808916655363265, "grad_norm": 0.51953125, "learning_rate": 0.001999974198645099, "loss": 0.3717, "step": 1230 }, { "epoch": 0.0021844378308461415, "grad_norm": 0.287109375, "learning_rate": 0.0019999737460005824, "loss": 0.303, "step": 1232 }, { "epoch": 0.002187983996155957, "grad_norm": 0.62109375, "learning_rate": 0.001999973289420103, "loss": 0.3554, "step": 1234 }, { "epoch": 0.002191530161465772, "grad_norm": 0.326171875, "learning_rate": 0.0019999728289036636, "loss": 0.292, "step": 1236 }, { "epoch": 0.0021950763267755873, "grad_norm": 0.322265625, "learning_rate": 0.0019999723644512656, "loss": 0.3423, "step": 1238 }, { "epoch": 0.0021986224920854023, "grad_norm": 0.72265625, "learning_rate": 0.0019999718960629115, "loss": 0.4001, "step": 1240 }, { "epoch": 0.0022021686573952173, "grad_norm": 0.255859375, "learning_rate": 0.001999971423738603, "loss": 0.4699, "step": 1242 }, { "epoch": 0.0022057148227050327, "grad_norm": 0.306640625, "learning_rate": 0.001999970947478342, "loss": 0.3121, "step": 1244 }, { "epoch": 0.0022092609880148477, "grad_norm": 0.4765625, "learning_rate": 0.0019999704672821307, "loss": 0.3609, "step": 1246 }, { "epoch": 0.002212807153324663, "grad_norm": 0.279296875, "learning_rate": 0.001999969983149972, "loss": 0.3226, "step": 1248 }, { "epoch": 0.002216353318634478, "grad_norm": 0.2734375, "learning_rate": 0.001999969495081867, "loss": 0.3121, "step": 1250 }, { "epoch": 0.0022198994839442935, "grad_norm": 0.408203125, "learning_rate": 0.0019999690030778183, "loss": 0.3878, "step": 1252 }, { "epoch": 0.0022234456492541085, "grad_norm": 0.462890625, "learning_rate": 0.001999968507137828, "loss": 0.3017, "step": 1254 }, { "epoch": 0.0022269918145639235, "grad_norm": 0.69140625, "learning_rate": 0.0019999680072618977, "loss": 0.3342, "step": 1256 }, { "epoch": 0.002230537979873739, "grad_norm": 0.462890625, "learning_rate": 0.001999967503450031, "loss": 0.3295, "step": 1258 }, { "epoch": 0.002234084145183554, "grad_norm": 0.33984375, "learning_rate": 0.0019999669957022283, "loss": 0.3612, "step": 1260 }, { "epoch": 0.0022376303104933693, "grad_norm": 0.404296875, "learning_rate": 0.001999966484018493, "loss": 0.4064, "step": 1262 }, { "epoch": 0.0022411764758031843, "grad_norm": 0.97265625, "learning_rate": 0.0019999659683988275, "loss": 0.3753, "step": 1264 }, { "epoch": 0.0022447226411129993, "grad_norm": 0.5703125, "learning_rate": 0.0019999654488432332, "loss": 0.3581, "step": 1266 }, { "epoch": 0.0022482688064228147, "grad_norm": 0.328125, "learning_rate": 0.0019999649253517127, "loss": 0.3033, "step": 1268 }, { "epoch": 0.0022518149717326297, "grad_norm": 0.59765625, "learning_rate": 0.0019999643979242685, "loss": 0.3, "step": 1270 }, { "epoch": 0.002255361137042445, "grad_norm": 0.7578125, "learning_rate": 0.001999963866560903, "loss": 0.3704, "step": 1272 }, { "epoch": 0.00225890730235226, "grad_norm": 0.310546875, "learning_rate": 0.001999963331261618, "loss": 0.3202, "step": 1274 }, { "epoch": 0.002262453467662075, "grad_norm": 0.83984375, "learning_rate": 0.0019999627920264163, "loss": 0.4499, "step": 1276 }, { "epoch": 0.0022659996329718905, "grad_norm": 1.640625, "learning_rate": 0.0019999622488553, "loss": 0.4349, "step": 1278 }, { "epoch": 0.0022695457982817055, "grad_norm": 0.427734375, "learning_rate": 0.0019999617017482717, "loss": 0.4296, "step": 1280 }, { "epoch": 0.002273091963591521, "grad_norm": 1.0546875, "learning_rate": 0.001999961150705334, "loss": 0.3572, "step": 1282 }, { "epoch": 0.002276638128901336, "grad_norm": 0.515625, "learning_rate": 0.0019999605957264884, "loss": 0.3237, "step": 1284 }, { "epoch": 0.0022801842942111513, "grad_norm": 0.451171875, "learning_rate": 0.0019999600368117384, "loss": 0.3838, "step": 1286 }, { "epoch": 0.0022837304595209663, "grad_norm": 0.466796875, "learning_rate": 0.0019999594739610856, "loss": 0.3879, "step": 1288 }, { "epoch": 0.0022872766248307813, "grad_norm": 0.3046875, "learning_rate": 0.0019999589071745326, "loss": 0.4766, "step": 1290 }, { "epoch": 0.0022908227901405967, "grad_norm": 0.275390625, "learning_rate": 0.001999958336452083, "loss": 0.3154, "step": 1292 }, { "epoch": 0.0022943689554504117, "grad_norm": 0.79296875, "learning_rate": 0.0019999577617937376, "loss": 0.2976, "step": 1294 }, { "epoch": 0.002297915120760227, "grad_norm": 0.255859375, "learning_rate": 0.0019999571831995, "loss": 0.3841, "step": 1296 }, { "epoch": 0.002301461286070042, "grad_norm": 0.56640625, "learning_rate": 0.001999956600669372, "loss": 0.3736, "step": 1298 }, { "epoch": 0.002305007451379857, "grad_norm": 0.8046875, "learning_rate": 0.0019999560142033575, "loss": 0.4563, "step": 1300 }, { "epoch": 0.0023085536166896725, "grad_norm": 0.5078125, "learning_rate": 0.0019999554238014573, "loss": 0.3575, "step": 1302 }, { "epoch": 0.0023120997819994875, "grad_norm": 1.7734375, "learning_rate": 0.0019999548294636752, "loss": 0.465, "step": 1304 }, { "epoch": 0.002315645947309303, "grad_norm": 0.412109375, "learning_rate": 0.0019999542311900133, "loss": 0.3361, "step": 1306 }, { "epoch": 0.002319192112619118, "grad_norm": 0.65234375, "learning_rate": 0.0019999536289804745, "loss": 0.3126, "step": 1308 }, { "epoch": 0.002322738277928933, "grad_norm": 0.44140625, "learning_rate": 0.001999953022835061, "loss": 0.4039, "step": 1310 }, { "epoch": 0.0023262844432387483, "grad_norm": 0.51171875, "learning_rate": 0.001999952412753776, "loss": 0.3648, "step": 1312 }, { "epoch": 0.0023298306085485633, "grad_norm": 0.65625, "learning_rate": 0.0019999517987366222, "loss": 0.3008, "step": 1314 }, { "epoch": 0.0023333767738583787, "grad_norm": 2.546875, "learning_rate": 0.0019999511807836014, "loss": 0.3931, "step": 1316 }, { "epoch": 0.0023369229391681937, "grad_norm": 0.64453125, "learning_rate": 0.0019999505588947173, "loss": 0.3711, "step": 1318 }, { "epoch": 0.002340469104478009, "grad_norm": 0.220703125, "learning_rate": 0.0019999499330699715, "loss": 0.2766, "step": 1320 }, { "epoch": 0.002344015269787824, "grad_norm": 0.349609375, "learning_rate": 0.0019999493033093685, "loss": 0.3488, "step": 1322 }, { "epoch": 0.002347561435097639, "grad_norm": 1.9296875, "learning_rate": 0.001999948669612909, "loss": 0.4454, "step": 1324 }, { "epoch": 0.0023511076004074545, "grad_norm": 0.87109375, "learning_rate": 0.0019999480319805975, "loss": 0.5258, "step": 1326 }, { "epoch": 0.0023546537657172695, "grad_norm": 0.8203125, "learning_rate": 0.001999947390412436, "loss": 0.3523, "step": 1328 }, { "epoch": 0.002358199931027085, "grad_norm": 1.1875, "learning_rate": 0.001999946744908427, "loss": 0.3534, "step": 1330 }, { "epoch": 0.0023617460963369, "grad_norm": 0.56640625, "learning_rate": 0.001999946095468574, "loss": 0.3965, "step": 1332 }, { "epoch": 0.002365292261646715, "grad_norm": 0.86328125, "learning_rate": 0.001999945442092879, "loss": 0.5747, "step": 1334 }, { "epoch": 0.0023688384269565303, "grad_norm": 0.44921875, "learning_rate": 0.001999944784781346, "loss": 0.3435, "step": 1336 }, { "epoch": 0.0023723845922663453, "grad_norm": 1.921875, "learning_rate": 0.001999944123533977, "loss": 0.4215, "step": 1338 }, { "epoch": 0.0023759307575761607, "grad_norm": 1.3671875, "learning_rate": 0.0019999434583507754, "loss": 0.3861, "step": 1340 }, { "epoch": 0.0023794769228859757, "grad_norm": 1.9765625, "learning_rate": 0.0019999427892317435, "loss": 0.4901, "step": 1342 }, { "epoch": 0.0023830230881957907, "grad_norm": 0.44140625, "learning_rate": 0.0019999421161768847, "loss": 0.3331, "step": 1344 }, { "epoch": 0.002386569253505606, "grad_norm": 0.34765625, "learning_rate": 0.001999941439186202, "loss": 0.5539, "step": 1346 }, { "epoch": 0.002390115418815421, "grad_norm": 0.484375, "learning_rate": 0.0019999407582596976, "loss": 0.3413, "step": 1348 }, { "epoch": 0.0023936615841252365, "grad_norm": 1.7578125, "learning_rate": 0.001999940073397376, "loss": 0.46, "step": 1350 }, { "epoch": 0.0023972077494350515, "grad_norm": 1.0078125, "learning_rate": 0.001999939384599239, "loss": 0.5004, "step": 1352 }, { "epoch": 0.002400753914744867, "grad_norm": 0.3828125, "learning_rate": 0.0019999386918652896, "loss": 0.4681, "step": 1354 }, { "epoch": 0.002404300080054682, "grad_norm": 0.431640625, "learning_rate": 0.001999937995195531, "loss": 0.4008, "step": 1356 }, { "epoch": 0.002407846245364497, "grad_norm": 0.39453125, "learning_rate": 0.0019999372945899664, "loss": 0.3076, "step": 1358 }, { "epoch": 0.0024113924106743123, "grad_norm": 0.369140625, "learning_rate": 0.0019999365900485993, "loss": 0.3904, "step": 1360 }, { "epoch": 0.0024149385759841273, "grad_norm": 0.23828125, "learning_rate": 0.0019999358815714317, "loss": 0.3989, "step": 1362 }, { "epoch": 0.0024184847412939427, "grad_norm": 0.5, "learning_rate": 0.0019999351691584676, "loss": 0.3566, "step": 1364 }, { "epoch": 0.0024220309066037577, "grad_norm": 0.66015625, "learning_rate": 0.0019999344528097095, "loss": 0.3267, "step": 1366 }, { "epoch": 0.0024255770719135727, "grad_norm": 0.65234375, "learning_rate": 0.0019999337325251614, "loss": 0.3624, "step": 1368 }, { "epoch": 0.002429123237223388, "grad_norm": 1.296875, "learning_rate": 0.0019999330083048258, "loss": 0.3912, "step": 1370 }, { "epoch": 0.002432669402533203, "grad_norm": 1.078125, "learning_rate": 0.0019999322801487057, "loss": 0.4825, "step": 1372 }, { "epoch": 0.0024362155678430185, "grad_norm": 0.66796875, "learning_rate": 0.0019999315480568047, "loss": 0.3519, "step": 1374 }, { "epoch": 0.0024397617331528335, "grad_norm": 0.6875, "learning_rate": 0.001999930812029126, "loss": 0.4869, "step": 1376 }, { "epoch": 0.0024433078984626485, "grad_norm": 5.1875, "learning_rate": 0.001999930072065672, "loss": 0.6554, "step": 1378 }, { "epoch": 0.002446854063772464, "grad_norm": 0.859375, "learning_rate": 0.0019999293281664473, "loss": 0.4527, "step": 1380 }, { "epoch": 0.002450400229082279, "grad_norm": 0.4140625, "learning_rate": 0.001999928580331454, "loss": 0.4871, "step": 1382 }, { "epoch": 0.0024539463943920943, "grad_norm": 0.255859375, "learning_rate": 0.0019999278285606965, "loss": 0.3651, "step": 1384 }, { "epoch": 0.0024574925597019093, "grad_norm": 0.265625, "learning_rate": 0.0019999270728541766, "loss": 0.3327, "step": 1386 }, { "epoch": 0.0024610387250117243, "grad_norm": 0.59765625, "learning_rate": 0.001999926313211899, "loss": 0.4196, "step": 1388 }, { "epoch": 0.0024645848903215397, "grad_norm": 0.330078125, "learning_rate": 0.0019999255496338663, "loss": 0.3441, "step": 1390 }, { "epoch": 0.0024681310556313547, "grad_norm": 0.61328125, "learning_rate": 0.001999924782120082, "loss": 0.7735, "step": 1392 }, { "epoch": 0.00247167722094117, "grad_norm": 0.19140625, "learning_rate": 0.0019999240106705492, "loss": 0.5843, "step": 1394 }, { "epoch": 0.002475223386250985, "grad_norm": 0.72265625, "learning_rate": 0.0019999232352852715, "loss": 0.4501, "step": 1396 }, { "epoch": 0.0024787695515608005, "grad_norm": 0.453125, "learning_rate": 0.001999922455964253, "loss": 0.4135, "step": 1398 }, { "epoch": 0.0024823157168706155, "grad_norm": 0.478515625, "learning_rate": 0.0019999216727074956, "loss": 0.5371, "step": 1400 }, { "epoch": 0.0024858618821804305, "grad_norm": 1.796875, "learning_rate": 0.001999920885515004, "loss": 0.5403, "step": 1402 }, { "epoch": 0.002489408047490246, "grad_norm": 0.3515625, "learning_rate": 0.0019999200943867806, "loss": 0.4323, "step": 1404 }, { "epoch": 0.002492954212800061, "grad_norm": 0.6875, "learning_rate": 0.00199991929932283, "loss": 0.4454, "step": 1406 }, { "epoch": 0.0024965003781098763, "grad_norm": 0.75390625, "learning_rate": 0.001999918500323155, "loss": 0.339, "step": 1408 }, { "epoch": 0.0025000465434196913, "grad_norm": 0.60546875, "learning_rate": 0.0019999176973877594, "loss": 0.4297, "step": 1410 }, { "epoch": 0.0025035927087295063, "grad_norm": 1.1171875, "learning_rate": 0.001999916890516646, "loss": 0.3729, "step": 1412 }, { "epoch": 0.0025071388740393217, "grad_norm": 1.1875, "learning_rate": 0.0019999160797098195, "loss": 0.346, "step": 1414 }, { "epoch": 0.0025106850393491367, "grad_norm": 0.5859375, "learning_rate": 0.0019999152649672826, "loss": 0.4238, "step": 1416 }, { "epoch": 0.002514231204658952, "grad_norm": 1.1640625, "learning_rate": 0.001999914446289039, "loss": 0.494, "step": 1418 }, { "epoch": 0.002517777369968767, "grad_norm": 0.41796875, "learning_rate": 0.0019999136236750923, "loss": 0.3822, "step": 1420 }, { "epoch": 0.002521323535278582, "grad_norm": 0.46875, "learning_rate": 0.001999912797125446, "loss": 0.4121, "step": 1422 }, { "epoch": 0.0025248697005883975, "grad_norm": 0.56640625, "learning_rate": 0.001999911966640104, "loss": 0.3484, "step": 1424 }, { "epoch": 0.0025284158658982125, "grad_norm": 1.0625, "learning_rate": 0.00199991113221907, "loss": 0.5026, "step": 1426 }, { "epoch": 0.002531962031208028, "grad_norm": 0.97265625, "learning_rate": 0.001999910293862347, "loss": 0.4649, "step": 1428 }, { "epoch": 0.002535508196517843, "grad_norm": 0.458984375, "learning_rate": 0.0019999094515699392, "loss": 0.262, "step": 1430 }, { "epoch": 0.0025390543618276583, "grad_norm": 1.1328125, "learning_rate": 0.0019999086053418503, "loss": 0.3965, "step": 1432 }, { "epoch": 0.0025426005271374733, "grad_norm": 2.046875, "learning_rate": 0.001999907755178084, "loss": 0.5808, "step": 1434 }, { "epoch": 0.0025461466924472883, "grad_norm": 0.56640625, "learning_rate": 0.001999906901078644, "loss": 0.3706, "step": 1436 }, { "epoch": 0.0025496928577571037, "grad_norm": 1.2890625, "learning_rate": 0.0019999060430435337, "loss": 0.309, "step": 1438 }, { "epoch": 0.0025532390230669187, "grad_norm": 0.486328125, "learning_rate": 0.0019999051810727572, "loss": 0.354, "step": 1440 }, { "epoch": 0.002556785188376734, "grad_norm": 0.33984375, "learning_rate": 0.0019999043151663182, "loss": 0.3341, "step": 1442 }, { "epoch": 0.002560331353686549, "grad_norm": 0.796875, "learning_rate": 0.00199990344532422, "loss": 0.3325, "step": 1444 }, { "epoch": 0.002563877518996364, "grad_norm": 1.0703125, "learning_rate": 0.0019999025715464673, "loss": 0.4343, "step": 1446 }, { "epoch": 0.0025674236843061795, "grad_norm": 4.28125, "learning_rate": 0.0019999016938330636, "loss": 0.5948, "step": 1448 }, { "epoch": 0.0025709698496159945, "grad_norm": 0.376953125, "learning_rate": 0.0019999008121840125, "loss": 0.3183, "step": 1450 }, { "epoch": 0.00257451601492581, "grad_norm": 0.482421875, "learning_rate": 0.001999899926599318, "loss": 0.3859, "step": 1452 }, { "epoch": 0.002578062180235625, "grad_norm": 0.388671875, "learning_rate": 0.0019998990370789834, "loss": 0.3776, "step": 1454 }, { "epoch": 0.00258160834554544, "grad_norm": 0.44140625, "learning_rate": 0.0019998981436230136, "loss": 0.3878, "step": 1456 }, { "epoch": 0.0025851545108552553, "grad_norm": 0.65234375, "learning_rate": 0.001999897246231412, "loss": 0.3084, "step": 1458 }, { "epoch": 0.0025887006761650703, "grad_norm": 0.390625, "learning_rate": 0.0019998963449041826, "loss": 0.342, "step": 1460 }, { "epoch": 0.0025922468414748857, "grad_norm": 2.34375, "learning_rate": 0.0019998954396413296, "loss": 0.6345, "step": 1462 }, { "epoch": 0.0025957930067847007, "grad_norm": 0.4609375, "learning_rate": 0.001999894530442856, "loss": 0.3512, "step": 1464 }, { "epoch": 0.002599339172094516, "grad_norm": 0.59765625, "learning_rate": 0.001999893617308767, "loss": 0.3512, "step": 1466 }, { "epoch": 0.002602885337404331, "grad_norm": 0.392578125, "learning_rate": 0.001999892700239066, "loss": 0.2984, "step": 1468 }, { "epoch": 0.002606431502714146, "grad_norm": 0.75390625, "learning_rate": 0.0019998917792337567, "loss": 0.3737, "step": 1470 }, { "epoch": 0.0026099776680239615, "grad_norm": 1.28125, "learning_rate": 0.0019998908542928438, "loss": 0.4438, "step": 1472 }, { "epoch": 0.0026135238333337765, "grad_norm": 0.97265625, "learning_rate": 0.001999889925416331, "loss": 0.3816, "step": 1474 }, { "epoch": 0.002617069998643592, "grad_norm": 0.76171875, "learning_rate": 0.001999888992604222, "loss": 0.335, "step": 1476 }, { "epoch": 0.002620616163953407, "grad_norm": 0.80078125, "learning_rate": 0.0019998880558565217, "loss": 0.4098, "step": 1478 }, { "epoch": 0.002624162329263222, "grad_norm": 0.38671875, "learning_rate": 0.0019998871151732335, "loss": 0.3347, "step": 1480 }, { "epoch": 0.0026277084945730373, "grad_norm": 0.640625, "learning_rate": 0.0019998861705543616, "loss": 0.3202, "step": 1482 }, { "epoch": 0.0026312546598828523, "grad_norm": 1.5859375, "learning_rate": 0.001999885221999911, "loss": 0.4216, "step": 1484 }, { "epoch": 0.0026348008251926677, "grad_norm": 0.8203125, "learning_rate": 0.0019998842695098844, "loss": 0.3309, "step": 1486 }, { "epoch": 0.0026383469905024827, "grad_norm": 0.6015625, "learning_rate": 0.0019998833130842873, "loss": 0.4202, "step": 1488 }, { "epoch": 0.0026418931558122977, "grad_norm": 0.578125, "learning_rate": 0.001999882352723123, "loss": 0.418, "step": 1490 }, { "epoch": 0.002645439321122113, "grad_norm": 0.98828125, "learning_rate": 0.001999881388426396, "loss": 0.4025, "step": 1492 }, { "epoch": 0.002648985486431928, "grad_norm": 1.9296875, "learning_rate": 0.0019998804201941107, "loss": 0.3504, "step": 1494 }, { "epoch": 0.0026525316517417435, "grad_norm": 2.203125, "learning_rate": 0.001999879448026271, "loss": 0.3986, "step": 1496 }, { "epoch": 0.0026560778170515585, "grad_norm": 0.5234375, "learning_rate": 0.0019998784719228815, "loss": 0.3664, "step": 1498 }, { "epoch": 0.002659623982361374, "grad_norm": 0.9296875, "learning_rate": 0.001999877491883946, "loss": 0.4292, "step": 1500 }, { "epoch": 0.002663170147671189, "grad_norm": 0.953125, "learning_rate": 0.0019998765079094695, "loss": 0.4353, "step": 1502 }, { "epoch": 0.002666716312981004, "grad_norm": 2.15625, "learning_rate": 0.0019998755199994553, "loss": 0.4917, "step": 1504 }, { "epoch": 0.0026702624782908193, "grad_norm": 0.412109375, "learning_rate": 0.0019998745281539086, "loss": 0.3018, "step": 1506 }, { "epoch": 0.0026738086436006343, "grad_norm": 0.2890625, "learning_rate": 0.0019998735323728334, "loss": 0.4769, "step": 1508 }, { "epoch": 0.0026773548089104498, "grad_norm": 0.72265625, "learning_rate": 0.001999872532656234, "loss": 0.3584, "step": 1510 }, { "epoch": 0.0026809009742202647, "grad_norm": 5.96875, "learning_rate": 0.0019998715290041147, "loss": 0.3792, "step": 1512 }, { "epoch": 0.0026844471395300797, "grad_norm": 0.6328125, "learning_rate": 0.0019998705214164803, "loss": 0.3475, "step": 1514 }, { "epoch": 0.002687993304839895, "grad_norm": 0.53125, "learning_rate": 0.0019998695098933346, "loss": 0.4474, "step": 1516 }, { "epoch": 0.00269153947014971, "grad_norm": 0.69921875, "learning_rate": 0.0019998684944346826, "loss": 0.3354, "step": 1518 }, { "epoch": 0.0026950856354595256, "grad_norm": 1.4296875, "learning_rate": 0.001999867475040529, "loss": 0.4186, "step": 1520 }, { "epoch": 0.0026986318007693405, "grad_norm": 0.423828125, "learning_rate": 0.001999866451710877, "loss": 0.3646, "step": 1522 }, { "epoch": 0.0027021779660791555, "grad_norm": 1.0546875, "learning_rate": 0.0019998654244457324, "loss": 0.2748, "step": 1524 }, { "epoch": 0.002705724131388971, "grad_norm": 0.59375, "learning_rate": 0.001999864393245099, "loss": 0.4799, "step": 1526 }, { "epoch": 0.002709270296698786, "grad_norm": 0.65234375, "learning_rate": 0.001999863358108981, "loss": 0.4214, "step": 1528 }, { "epoch": 0.0027128164620086014, "grad_norm": 0.435546875, "learning_rate": 0.001999862319037384, "loss": 0.424, "step": 1530 }, { "epoch": 0.0027163626273184163, "grad_norm": 0.9453125, "learning_rate": 0.0019998612760303114, "loss": 0.3688, "step": 1532 }, { "epoch": 0.0027199087926282318, "grad_norm": 0.921875, "learning_rate": 0.0019998602290877687, "loss": 0.4423, "step": 1534 }, { "epoch": 0.0027234549579380467, "grad_norm": 0.69921875, "learning_rate": 0.00199985917820976, "loss": 0.3279, "step": 1536 }, { "epoch": 0.0027270011232478617, "grad_norm": 0.388671875, "learning_rate": 0.0019998581233962895, "loss": 0.3472, "step": 1538 }, { "epoch": 0.002730547288557677, "grad_norm": 0.39453125, "learning_rate": 0.001999857064647363, "loss": 0.2892, "step": 1540 }, { "epoch": 0.002734093453867492, "grad_norm": 0.71484375, "learning_rate": 0.0019998560019629835, "loss": 0.4923, "step": 1542 }, { "epoch": 0.0027376396191773076, "grad_norm": 2.0625, "learning_rate": 0.0019998549353431572, "loss": 0.3521, "step": 1544 }, { "epoch": 0.0027411857844871225, "grad_norm": 1.359375, "learning_rate": 0.0019998538647878882, "loss": 0.4604, "step": 1546 }, { "epoch": 0.0027447319497969375, "grad_norm": 2.28125, "learning_rate": 0.001999852790297181, "loss": 0.4689, "step": 1548 }, { "epoch": 0.002748278115106753, "grad_norm": 4.46875, "learning_rate": 0.00199985171187104, "loss": 0.6894, "step": 1550 }, { "epoch": 0.002751824280416568, "grad_norm": 0.4765625, "learning_rate": 0.0019998506295094707, "loss": 0.3231, "step": 1552 }, { "epoch": 0.0027553704457263834, "grad_norm": 1.7421875, "learning_rate": 0.0019998495432124773, "loss": 0.5036, "step": 1554 }, { "epoch": 0.0027589166110361983, "grad_norm": 0.52734375, "learning_rate": 0.0019998484529800643, "loss": 0.4204, "step": 1556 }, { "epoch": 0.0027624627763460133, "grad_norm": 0.9609375, "learning_rate": 0.0019998473588122376, "loss": 0.4533, "step": 1558 }, { "epoch": 0.0027660089416558288, "grad_norm": 0.80859375, "learning_rate": 0.001999846260709001, "loss": 0.4233, "step": 1560 }, { "epoch": 0.0027695551069656437, "grad_norm": 0.478515625, "learning_rate": 0.001999845158670359, "loss": 0.4601, "step": 1562 }, { "epoch": 0.002773101272275459, "grad_norm": 0.58203125, "learning_rate": 0.0019998440526963175, "loss": 0.3775, "step": 1564 }, { "epoch": 0.002776647437585274, "grad_norm": 0.8984375, "learning_rate": 0.0019998429427868806, "loss": 0.4076, "step": 1566 }, { "epoch": 0.0027801936028950896, "grad_norm": 0.419921875, "learning_rate": 0.0019998418289420535, "loss": 0.3472, "step": 1568 }, { "epoch": 0.0027837397682049046, "grad_norm": 2.078125, "learning_rate": 0.0019998407111618413, "loss": 0.4329, "step": 1570 }, { "epoch": 0.0027872859335147195, "grad_norm": 0.84375, "learning_rate": 0.001999839589446248, "loss": 0.6181, "step": 1572 }, { "epoch": 0.002790832098824535, "grad_norm": 2.671875, "learning_rate": 0.001999838463795279, "loss": 0.4774, "step": 1574 }, { "epoch": 0.00279437826413435, "grad_norm": 2.28125, "learning_rate": 0.0019998373342089396, "loss": 0.4125, "step": 1576 }, { "epoch": 0.0027979244294441654, "grad_norm": 1.4140625, "learning_rate": 0.0019998362006872343, "loss": 0.4688, "step": 1578 }, { "epoch": 0.0028014705947539804, "grad_norm": 0.796875, "learning_rate": 0.001999835063230168, "loss": 0.3709, "step": 1580 }, { "epoch": 0.0028050167600637953, "grad_norm": 0.875, "learning_rate": 0.001999833921837746, "loss": 0.4181, "step": 1582 }, { "epoch": 0.0028085629253736108, "grad_norm": 0.51953125, "learning_rate": 0.0019998327765099726, "loss": 0.3191, "step": 1584 }, { "epoch": 0.0028121090906834257, "grad_norm": 0.38671875, "learning_rate": 0.001999831627246854, "loss": 0.3258, "step": 1586 }, { "epoch": 0.002815655255993241, "grad_norm": 3.21875, "learning_rate": 0.001999830474048394, "loss": 0.6824, "step": 1588 }, { "epoch": 0.002819201421303056, "grad_norm": 0.34375, "learning_rate": 0.0019998293169145986, "loss": 0.3697, "step": 1590 }, { "epoch": 0.002822747586612871, "grad_norm": 0.376953125, "learning_rate": 0.0019998281558454723, "loss": 0.3423, "step": 1592 }, { "epoch": 0.0028262937519226866, "grad_norm": 0.578125, "learning_rate": 0.00199982699084102, "loss": 0.3911, "step": 1594 }, { "epoch": 0.0028298399172325015, "grad_norm": 0.349609375, "learning_rate": 0.0019998258219012474, "loss": 0.4432, "step": 1596 }, { "epoch": 0.002833386082542317, "grad_norm": 1.140625, "learning_rate": 0.0019998246490261595, "loss": 0.4584, "step": 1598 }, { "epoch": 0.002836932247852132, "grad_norm": 0.439453125, "learning_rate": 0.0019998234722157613, "loss": 0.285, "step": 1600 }, { "epoch": 0.0028404784131619474, "grad_norm": 0.87109375, "learning_rate": 0.0019998222914700573, "loss": 0.3189, "step": 1602 }, { "epoch": 0.0028440245784717624, "grad_norm": 0.294921875, "learning_rate": 0.0019998211067890543, "loss": 0.3265, "step": 1604 }, { "epoch": 0.0028475707437815773, "grad_norm": 0.609375, "learning_rate": 0.0019998199181727556, "loss": 0.5614, "step": 1606 }, { "epoch": 0.0028511169090913928, "grad_norm": 0.333984375, "learning_rate": 0.0019998187256211673, "loss": 0.3572, "step": 1608 }, { "epoch": 0.0028546630744012078, "grad_norm": 0.64453125, "learning_rate": 0.001999817529134295, "loss": 0.3555, "step": 1610 }, { "epoch": 0.002858209239711023, "grad_norm": 0.236328125, "learning_rate": 0.001999816328712143, "loss": 0.8434, "step": 1612 }, { "epoch": 0.002861755405020838, "grad_norm": 0.427734375, "learning_rate": 0.0019998151243547178, "loss": 0.3892, "step": 1614 }, { "epoch": 0.002865301570330653, "grad_norm": 0.439453125, "learning_rate": 0.0019998139160620236, "loss": 0.3229, "step": 1616 }, { "epoch": 0.0028688477356404686, "grad_norm": 0.5234375, "learning_rate": 0.001999812703834066, "loss": 0.3374, "step": 1618 }, { "epoch": 0.0028723939009502836, "grad_norm": 0.208984375, "learning_rate": 0.0019998114876708497, "loss": 0.349, "step": 1620 }, { "epoch": 0.002875940066260099, "grad_norm": 0.2578125, "learning_rate": 0.0019998102675723812, "loss": 0.3285, "step": 1622 }, { "epoch": 0.002879486231569914, "grad_norm": 0.39453125, "learning_rate": 0.0019998090435386653, "loss": 0.3325, "step": 1624 }, { "epoch": 0.002883032396879729, "grad_norm": 0.6328125, "learning_rate": 0.001999807815569707, "loss": 0.3392, "step": 1626 }, { "epoch": 0.0028865785621895444, "grad_norm": 2.421875, "learning_rate": 0.0019998065836655124, "loss": 0.3521, "step": 1628 }, { "epoch": 0.0028901247274993594, "grad_norm": 0.97265625, "learning_rate": 0.001999805347826086, "loss": 0.4679, "step": 1630 }, { "epoch": 0.0028936708928091748, "grad_norm": 1.015625, "learning_rate": 0.001999804108051434, "loss": 0.3713, "step": 1632 }, { "epoch": 0.0028972170581189898, "grad_norm": 4.90625, "learning_rate": 0.0019998028643415616, "loss": 0.4651, "step": 1634 }, { "epoch": 0.002900763223428805, "grad_norm": 0.5703125, "learning_rate": 0.0019998016166964737, "loss": 0.4644, "step": 1636 }, { "epoch": 0.00290430938873862, "grad_norm": 0.796875, "learning_rate": 0.001999800365116177, "loss": 0.3905, "step": 1638 }, { "epoch": 0.002907855554048435, "grad_norm": 0.71875, "learning_rate": 0.0019997991096006753, "loss": 0.3075, "step": 1640 }, { "epoch": 0.0029114017193582506, "grad_norm": 0.51953125, "learning_rate": 0.0019997978501499754, "loss": 0.3528, "step": 1642 }, { "epoch": 0.0029149478846680656, "grad_norm": 0.515625, "learning_rate": 0.001999796586764082, "loss": 0.6004, "step": 1644 }, { "epoch": 0.002918494049977881, "grad_norm": 0.56640625, "learning_rate": 0.0019997953194430015, "loss": 0.2975, "step": 1646 }, { "epoch": 0.002922040215287696, "grad_norm": 0.49609375, "learning_rate": 0.0019997940481867385, "loss": 0.3195, "step": 1648 }, { "epoch": 0.002925586380597511, "grad_norm": 0.263671875, "learning_rate": 0.001999792772995299, "loss": 0.3221, "step": 1650 }, { "epoch": 0.0029291325459073264, "grad_norm": 0.37890625, "learning_rate": 0.001999791493868689, "loss": 0.294, "step": 1652 }, { "epoch": 0.0029326787112171414, "grad_norm": 0.47265625, "learning_rate": 0.0019997902108069136, "loss": 0.4062, "step": 1654 }, { "epoch": 0.002936224876526957, "grad_norm": 0.98828125, "learning_rate": 0.001999788923809978, "loss": 0.3383, "step": 1656 }, { "epoch": 0.0029397710418367718, "grad_norm": 0.52734375, "learning_rate": 0.001999787632877889, "loss": 0.4175, "step": 1658 }, { "epoch": 0.0029433172071465868, "grad_norm": 0.58203125, "learning_rate": 0.001999786338010651, "loss": 0.2959, "step": 1660 }, { "epoch": 0.002946863372456402, "grad_norm": 0.423828125, "learning_rate": 0.0019997850392082703, "loss": 0.3044, "step": 1662 }, { "epoch": 0.002950409537766217, "grad_norm": 1.3046875, "learning_rate": 0.0019997837364707526, "loss": 0.4403, "step": 1664 }, { "epoch": 0.0029539557030760326, "grad_norm": 0.4921875, "learning_rate": 0.0019997824297981032, "loss": 0.2723, "step": 1666 }, { "epoch": 0.0029575018683858476, "grad_norm": 0.326171875, "learning_rate": 0.0019997811191903286, "loss": 0.3622, "step": 1668 }, { "epoch": 0.0029610480336956626, "grad_norm": 0.498046875, "learning_rate": 0.0019997798046474335, "loss": 0.3541, "step": 1670 }, { "epoch": 0.002964594199005478, "grad_norm": 0.86328125, "learning_rate": 0.001999778486169425, "loss": 0.2949, "step": 1672 }, { "epoch": 0.002968140364315293, "grad_norm": 1.125, "learning_rate": 0.0019997771637563075, "loss": 0.5068, "step": 1674 }, { "epoch": 0.0029716865296251084, "grad_norm": 0.44140625, "learning_rate": 0.0019997758374080874, "loss": 0.3588, "step": 1676 }, { "epoch": 0.0029752326949349234, "grad_norm": 0.3203125, "learning_rate": 0.0019997745071247703, "loss": 0.3341, "step": 1678 }, { "epoch": 0.002978778860244739, "grad_norm": 0.4453125, "learning_rate": 0.0019997731729063622, "loss": 0.4023, "step": 1680 }, { "epoch": 0.0029823250255545538, "grad_norm": 0.263671875, "learning_rate": 0.0019997718347528693, "loss": 0.3561, "step": 1682 }, { "epoch": 0.0029858711908643688, "grad_norm": 0.65234375, "learning_rate": 0.0019997704926642966, "loss": 0.4754, "step": 1684 }, { "epoch": 0.002989417356174184, "grad_norm": 0.68359375, "learning_rate": 0.0019997691466406503, "loss": 0.435, "step": 1686 }, { "epoch": 0.002992963521483999, "grad_norm": 0.6953125, "learning_rate": 0.001999767796681937, "loss": 0.3984, "step": 1688 }, { "epoch": 0.0029965096867938146, "grad_norm": 0.4296875, "learning_rate": 0.0019997664427881616, "loss": 0.3234, "step": 1690 }, { "epoch": 0.0030000558521036296, "grad_norm": 0.349609375, "learning_rate": 0.0019997650849593305, "loss": 0.3247, "step": 1692 }, { "epoch": 0.0030036020174134446, "grad_norm": 0.302734375, "learning_rate": 0.001999763723195449, "loss": 0.3037, "step": 1694 }, { "epoch": 0.00300714818272326, "grad_norm": 1.0703125, "learning_rate": 0.0019997623574965246, "loss": 0.3523, "step": 1696 }, { "epoch": 0.003010694348033075, "grad_norm": 0.50390625, "learning_rate": 0.001999760987862562, "loss": 0.3611, "step": 1698 }, { "epoch": 0.0030142405133428904, "grad_norm": 0.4296875, "learning_rate": 0.001999759614293567, "loss": 0.285, "step": 1700 }, { "epoch": 0.0030177866786527054, "grad_norm": 0.482421875, "learning_rate": 0.0019997582367895462, "loss": 0.3267, "step": 1702 }, { "epoch": 0.0030213328439625204, "grad_norm": 0.3671875, "learning_rate": 0.0019997568553505062, "loss": 0.3262, "step": 1704 }, { "epoch": 0.003024879009272336, "grad_norm": 0.279296875, "learning_rate": 0.0019997554699764516, "loss": 0.3819, "step": 1706 }, { "epoch": 0.0030284251745821508, "grad_norm": 0.6953125, "learning_rate": 0.0019997540806673897, "loss": 0.4152, "step": 1708 }, { "epoch": 0.003031971339891966, "grad_norm": 0.52734375, "learning_rate": 0.0019997526874233258, "loss": 0.3476, "step": 1710 }, { "epoch": 0.003035517505201781, "grad_norm": 0.7265625, "learning_rate": 0.0019997512902442667, "loss": 0.3321, "step": 1712 }, { "epoch": 0.0030390636705115966, "grad_norm": 0.84375, "learning_rate": 0.0019997498891302177, "loss": 0.5933, "step": 1714 }, { "epoch": 0.0030426098358214116, "grad_norm": 0.3984375, "learning_rate": 0.001999748484081185, "loss": 0.3428, "step": 1716 }, { "epoch": 0.0030461560011312266, "grad_norm": 0.94140625, "learning_rate": 0.0019997470750971755, "loss": 0.5058, "step": 1718 }, { "epoch": 0.003049702166441042, "grad_norm": 0.427734375, "learning_rate": 0.001999745662178195, "loss": 0.2842, "step": 1720 }, { "epoch": 0.003053248331750857, "grad_norm": 0.6484375, "learning_rate": 0.0019997442453242495, "loss": 0.4308, "step": 1722 }, { "epoch": 0.0030567944970606724, "grad_norm": 0.5234375, "learning_rate": 0.0019997428245353455, "loss": 0.4701, "step": 1724 }, { "epoch": 0.0030603406623704874, "grad_norm": 0.79296875, "learning_rate": 0.001999741399811489, "loss": 0.4284, "step": 1726 }, { "epoch": 0.0030638868276803024, "grad_norm": 0.39453125, "learning_rate": 0.001999739971152686, "loss": 0.3371, "step": 1728 }, { "epoch": 0.003067432992990118, "grad_norm": 0.40625, "learning_rate": 0.001999738538558943, "loss": 0.3227, "step": 1730 }, { "epoch": 0.0030709791582999328, "grad_norm": 0.54296875, "learning_rate": 0.0019997371020302663, "loss": 0.404, "step": 1732 }, { "epoch": 0.003074525323609748, "grad_norm": 0.310546875, "learning_rate": 0.001999735661566662, "loss": 0.331, "step": 1734 }, { "epoch": 0.003078071488919563, "grad_norm": 0.435546875, "learning_rate": 0.0019997342171681367, "loss": 0.3667, "step": 1736 }, { "epoch": 0.003081617654229378, "grad_norm": 2.296875, "learning_rate": 0.0019997327688346966, "loss": 0.3231, "step": 1738 }, { "epoch": 0.0030851638195391936, "grad_norm": 1.6640625, "learning_rate": 0.001999731316566348, "loss": 0.5626, "step": 1740 }, { "epoch": 0.0030887099848490086, "grad_norm": 0.359375, "learning_rate": 0.001999729860363097, "loss": 0.3205, "step": 1742 }, { "epoch": 0.003092256150158824, "grad_norm": 0.84375, "learning_rate": 0.001999728400224951, "loss": 0.4312, "step": 1744 }, { "epoch": 0.003095802315468639, "grad_norm": 0.4453125, "learning_rate": 0.0019997269361519145, "loss": 0.4246, "step": 1746 }, { "epoch": 0.0030993484807784544, "grad_norm": 1.84375, "learning_rate": 0.0019997254681439957, "loss": 0.5008, "step": 1748 }, { "epoch": 0.0031028946460882694, "grad_norm": 0.34375, "learning_rate": 0.0019997239962012, "loss": 0.3183, "step": 1750 }, { "epoch": 0.0031064408113980844, "grad_norm": 0.287109375, "learning_rate": 0.0019997225203235345, "loss": 0.3441, "step": 1752 }, { "epoch": 0.0031099869767079, "grad_norm": 3.25, "learning_rate": 0.001999721040511005, "loss": 0.3552, "step": 1754 }, { "epoch": 0.003113533142017715, "grad_norm": 0.9375, "learning_rate": 0.0019997195567636188, "loss": 0.3398, "step": 1756 }, { "epoch": 0.00311707930732753, "grad_norm": 0.365234375, "learning_rate": 0.0019997180690813814, "loss": 0.3092, "step": 1758 }, { "epoch": 0.003120625472637345, "grad_norm": 0.365234375, "learning_rate": 0.0019997165774643, "loss": 0.3321, "step": 1760 }, { "epoch": 0.00312417163794716, "grad_norm": 0.6640625, "learning_rate": 0.0019997150819123805, "loss": 0.3145, "step": 1762 }, { "epoch": 0.0031277178032569756, "grad_norm": 0.462890625, "learning_rate": 0.0019997135824256305, "loss": 0.3727, "step": 1764 }, { "epoch": 0.0031312639685667906, "grad_norm": 0.5234375, "learning_rate": 0.0019997120790040553, "loss": 0.3252, "step": 1766 }, { "epoch": 0.003134810133876606, "grad_norm": 0.396484375, "learning_rate": 0.0019997105716476624, "loss": 0.3073, "step": 1768 }, { "epoch": 0.003138356299186421, "grad_norm": 1.296875, "learning_rate": 0.0019997090603564582, "loss": 0.4655, "step": 1770 }, { "epoch": 0.003141902464496236, "grad_norm": 0.478515625, "learning_rate": 0.001999707545130449, "loss": 0.2859, "step": 1772 }, { "epoch": 0.0031454486298060514, "grad_norm": 4.625, "learning_rate": 0.0019997060259696418, "loss": 0.318, "step": 1774 }, { "epoch": 0.0031489947951158664, "grad_norm": 0.703125, "learning_rate": 0.0019997045028740425, "loss": 0.3593, "step": 1776 }, { "epoch": 0.003152540960425682, "grad_norm": 1.8359375, "learning_rate": 0.001999702975843659, "loss": 0.3372, "step": 1778 }, { "epoch": 0.003156087125735497, "grad_norm": 0.61328125, "learning_rate": 0.001999701444878497, "loss": 0.362, "step": 1780 }, { "epoch": 0.003159633291045312, "grad_norm": 1.25, "learning_rate": 0.001999699909978563, "loss": 0.491, "step": 1782 }, { "epoch": 0.003163179456355127, "grad_norm": 0.6640625, "learning_rate": 0.001999698371143865, "loss": 0.3574, "step": 1784 }, { "epoch": 0.003166725621664942, "grad_norm": 8.1875, "learning_rate": 0.0019996968283744086, "loss": 0.44, "step": 1786 }, { "epoch": 0.0031702717869747576, "grad_norm": 2.765625, "learning_rate": 0.0019996952816702007, "loss": 0.4284, "step": 1788 }, { "epoch": 0.0031738179522845726, "grad_norm": 0.47265625, "learning_rate": 0.0019996937310312484, "loss": 0.3777, "step": 1790 }, { "epoch": 0.003177364117594388, "grad_norm": 0.322265625, "learning_rate": 0.0019996921764575586, "loss": 0.3025, "step": 1792 }, { "epoch": 0.003180910282904203, "grad_norm": 0.6875, "learning_rate": 0.0019996906179491373, "loss": 0.3498, "step": 1794 }, { "epoch": 0.003184456448214018, "grad_norm": 0.98046875, "learning_rate": 0.001999689055505992, "loss": 0.2658, "step": 1796 }, { "epoch": 0.0031880026135238334, "grad_norm": 0.408203125, "learning_rate": 0.00199968748912813, "loss": 0.3541, "step": 1798 }, { "epoch": 0.0031915487788336484, "grad_norm": 1.0703125, "learning_rate": 0.001999685918815557, "loss": 0.3026, "step": 1800 }, { "epoch": 0.003195094944143464, "grad_norm": 0.27734375, "learning_rate": 0.00199968434456828, "loss": 0.3607, "step": 1802 }, { "epoch": 0.003198641109453279, "grad_norm": 0.75390625, "learning_rate": 0.001999682766386307, "loss": 0.8249, "step": 1804 }, { "epoch": 0.003202187274763094, "grad_norm": 0.240234375, "learning_rate": 0.001999681184269644, "loss": 0.3065, "step": 1806 }, { "epoch": 0.003205733440072909, "grad_norm": 0.515625, "learning_rate": 0.0019996795982182977, "loss": 0.31, "step": 1808 }, { "epoch": 0.003209279605382724, "grad_norm": 1.1171875, "learning_rate": 0.0019996780082322755, "loss": 0.4118, "step": 1810 }, { "epoch": 0.0032128257706925396, "grad_norm": 0.357421875, "learning_rate": 0.0019996764143115848, "loss": 0.3238, "step": 1812 }, { "epoch": 0.0032163719360023546, "grad_norm": 0.359375, "learning_rate": 0.0019996748164562315, "loss": 0.3518, "step": 1814 }, { "epoch": 0.00321991810131217, "grad_norm": 1.1484375, "learning_rate": 0.0019996732146662236, "loss": 0.4987, "step": 1816 }, { "epoch": 0.003223464266621985, "grad_norm": 0.48828125, "learning_rate": 0.001999671608941567, "loss": 0.3448, "step": 1818 }, { "epoch": 0.0032270104319318, "grad_norm": 0.26171875, "learning_rate": 0.00199966999928227, "loss": 0.3469, "step": 1820 }, { "epoch": 0.0032305565972416154, "grad_norm": 1.0234375, "learning_rate": 0.001999668385688339, "loss": 0.356, "step": 1822 }, { "epoch": 0.0032341027625514304, "grad_norm": 0.375, "learning_rate": 0.0019996667681597808, "loss": 0.3797, "step": 1824 }, { "epoch": 0.003237648927861246, "grad_norm": 0.4140625, "learning_rate": 0.0019996651466966026, "loss": 0.3039, "step": 1826 }, { "epoch": 0.003241195093171061, "grad_norm": 4.25, "learning_rate": 0.001999663521298812, "loss": 0.3366, "step": 1828 }, { "epoch": 0.003244741258480876, "grad_norm": 0.30078125, "learning_rate": 0.001999661891966416, "loss": 0.3459, "step": 1830 }, { "epoch": 0.0032482874237906912, "grad_norm": 0.56640625, "learning_rate": 0.0019996602586994207, "loss": 0.3181, "step": 1832 }, { "epoch": 0.003251833589100506, "grad_norm": 0.36328125, "learning_rate": 0.0019996586214978346, "loss": 0.2824, "step": 1834 }, { "epoch": 0.0032553797544103216, "grad_norm": 0.28125, "learning_rate": 0.001999656980361664, "loss": 0.2619, "step": 1836 }, { "epoch": 0.0032589259197201366, "grad_norm": 4.125, "learning_rate": 0.0019996553352909165, "loss": 0.5336, "step": 1838 }, { "epoch": 0.0032624720850299516, "grad_norm": 0.37109375, "learning_rate": 0.0019996536862855988, "loss": 0.2924, "step": 1840 }, { "epoch": 0.003266018250339767, "grad_norm": 0.7578125, "learning_rate": 0.0019996520333457187, "loss": 0.3617, "step": 1842 }, { "epoch": 0.003269564415649582, "grad_norm": 0.8046875, "learning_rate": 0.0019996503764712834, "loss": 0.3407, "step": 1844 }, { "epoch": 0.0032731105809593974, "grad_norm": 0.66015625, "learning_rate": 0.0019996487156622995, "loss": 0.3611, "step": 1846 }, { "epoch": 0.0032766567462692124, "grad_norm": 0.41015625, "learning_rate": 0.001999647050918775, "loss": 0.3965, "step": 1848 }, { "epoch": 0.003280202911579028, "grad_norm": 0.55859375, "learning_rate": 0.001999645382240717, "loss": 0.392, "step": 1850 }, { "epoch": 0.003283749076888843, "grad_norm": 0.67578125, "learning_rate": 0.0019996437096281325, "loss": 0.2987, "step": 1852 }, { "epoch": 0.003287295242198658, "grad_norm": 0.373046875, "learning_rate": 0.0019996420330810286, "loss": 0.374, "step": 1854 }, { "epoch": 0.0032908414075084732, "grad_norm": 0.361328125, "learning_rate": 0.0019996403525994137, "loss": 0.3721, "step": 1856 }, { "epoch": 0.003294387572818288, "grad_norm": 1.375, "learning_rate": 0.001999638668183294, "loss": 0.269, "step": 1858 }, { "epoch": 0.0032979337381281036, "grad_norm": 0.484375, "learning_rate": 0.0019996369798326776, "loss": 0.397, "step": 1860 }, { "epoch": 0.0033014799034379186, "grad_norm": 3.0, "learning_rate": 0.0019996352875475717, "loss": 0.3565, "step": 1862 }, { "epoch": 0.0033050260687477336, "grad_norm": 0.61328125, "learning_rate": 0.0019996335913279833, "loss": 0.3263, "step": 1864 }, { "epoch": 0.003308572234057549, "grad_norm": 0.58984375, "learning_rate": 0.0019996318911739202, "loss": 0.3105, "step": 1866 }, { "epoch": 0.003312118399367364, "grad_norm": 0.65234375, "learning_rate": 0.00199963018708539, "loss": 0.3022, "step": 1868 }, { "epoch": 0.0033156645646771794, "grad_norm": 0.5234375, "learning_rate": 0.0019996284790624, "loss": 0.3436, "step": 1870 }, { "epoch": 0.0033192107299869944, "grad_norm": 0.62890625, "learning_rate": 0.0019996267671049573, "loss": 0.3547, "step": 1872 }, { "epoch": 0.0033227568952968094, "grad_norm": 1.140625, "learning_rate": 0.00199962505121307, "loss": 0.4605, "step": 1874 }, { "epoch": 0.003326303060606625, "grad_norm": 1.15625, "learning_rate": 0.0019996233313867447, "loss": 0.3832, "step": 1876 }, { "epoch": 0.00332984922591644, "grad_norm": 0.302734375, "learning_rate": 0.00199962160762599, "loss": 0.3048, "step": 1878 }, { "epoch": 0.0033333953912262552, "grad_norm": 0.53515625, "learning_rate": 0.001999619879930813, "loss": 0.3729, "step": 1880 }, { "epoch": 0.0033369415565360702, "grad_norm": 0.84375, "learning_rate": 0.001999618148301221, "loss": 0.3066, "step": 1882 }, { "epoch": 0.0033404877218458856, "grad_norm": 0.64453125, "learning_rate": 0.0019996164127372216, "loss": 0.367, "step": 1884 }, { "epoch": 0.0033440338871557006, "grad_norm": 0.326171875, "learning_rate": 0.0019996146732388226, "loss": 0.3087, "step": 1886 }, { "epoch": 0.0033475800524655156, "grad_norm": 0.39453125, "learning_rate": 0.001999612929806032, "loss": 0.3728, "step": 1888 }, { "epoch": 0.003351126217775331, "grad_norm": 1.8125, "learning_rate": 0.0019996111824388567, "loss": 0.378, "step": 1890 }, { "epoch": 0.003354672383085146, "grad_norm": 0.5234375, "learning_rate": 0.001999609431137305, "loss": 0.3537, "step": 1892 }, { "epoch": 0.0033582185483949614, "grad_norm": 0.75390625, "learning_rate": 0.0019996076759013835, "loss": 0.3804, "step": 1894 }, { "epoch": 0.0033617647137047764, "grad_norm": 0.609375, "learning_rate": 0.0019996059167311014, "loss": 0.2966, "step": 1896 }, { "epoch": 0.0033653108790145914, "grad_norm": 0.2890625, "learning_rate": 0.001999604153626465, "loss": 0.4085, "step": 1898 }, { "epoch": 0.003368857044324407, "grad_norm": 0.734375, "learning_rate": 0.0019996023865874826, "loss": 0.3374, "step": 1900 }, { "epoch": 0.003372403209634222, "grad_norm": 1.2109375, "learning_rate": 0.0019996006156141623, "loss": 0.3316, "step": 1902 }, { "epoch": 0.0033759493749440372, "grad_norm": 0.392578125, "learning_rate": 0.001999598840706511, "loss": 0.356, "step": 1904 }, { "epoch": 0.0033794955402538522, "grad_norm": 0.265625, "learning_rate": 0.0019995970618645375, "loss": 0.3072, "step": 1906 }, { "epoch": 0.003383041705563667, "grad_norm": 2.484375, "learning_rate": 0.0019995952790882484, "loss": 0.2967, "step": 1908 }, { "epoch": 0.0033865878708734826, "grad_norm": 0.3984375, "learning_rate": 0.0019995934923776526, "loss": 0.2774, "step": 1910 }, { "epoch": 0.0033901340361832976, "grad_norm": 1.7578125, "learning_rate": 0.0019995917017327568, "loss": 0.566, "step": 1912 }, { "epoch": 0.003393680201493113, "grad_norm": 0.98046875, "learning_rate": 0.00199958990715357, "loss": 0.3883, "step": 1914 }, { "epoch": 0.003397226366802928, "grad_norm": 0.734375, "learning_rate": 0.001999588108640099, "loss": 0.3311, "step": 1916 }, { "epoch": 0.0034007725321127434, "grad_norm": 0.53515625, "learning_rate": 0.0019995863061923525, "loss": 0.3409, "step": 1918 }, { "epoch": 0.0034043186974225584, "grad_norm": 11.0, "learning_rate": 0.001999584499810338, "loss": 0.3615, "step": 1920 }, { "epoch": 0.0034078648627323734, "grad_norm": 0.484375, "learning_rate": 0.0019995826894940636, "loss": 0.3205, "step": 1922 }, { "epoch": 0.003411411028042189, "grad_norm": 4.96875, "learning_rate": 0.001999580875243537, "loss": 0.5128, "step": 1924 }, { "epoch": 0.003414957193352004, "grad_norm": 0.90234375, "learning_rate": 0.0019995790570587657, "loss": 0.5527, "step": 1926 }, { "epoch": 0.0034185033586618192, "grad_norm": 0.52734375, "learning_rate": 0.0019995772349397584, "loss": 0.2791, "step": 1928 }, { "epoch": 0.0034220495239716342, "grad_norm": 0.62109375, "learning_rate": 0.001999575408886523, "loss": 0.2989, "step": 1930 }, { "epoch": 0.0034255956892814492, "grad_norm": 1.0078125, "learning_rate": 0.0019995735788990673, "loss": 0.3708, "step": 1932 }, { "epoch": 0.0034291418545912646, "grad_norm": 0.4375, "learning_rate": 0.0019995717449773995, "loss": 0.5294, "step": 1934 }, { "epoch": 0.0034326880199010796, "grad_norm": 1.3046875, "learning_rate": 0.001999569907121527, "loss": 0.5341, "step": 1936 }, { "epoch": 0.003436234185210895, "grad_norm": 4.8125, "learning_rate": 0.0019995680653314583, "loss": 0.5079, "step": 1938 }, { "epoch": 0.00343978035052071, "grad_norm": 0.796875, "learning_rate": 0.0019995662196072017, "loss": 0.336, "step": 1940 }, { "epoch": 0.003443326515830525, "grad_norm": 0.40625, "learning_rate": 0.001999564369948765, "loss": 0.3488, "step": 1942 }, { "epoch": 0.0034468726811403404, "grad_norm": 0.3984375, "learning_rate": 0.0019995625163561556, "loss": 0.3103, "step": 1944 }, { "epoch": 0.0034504188464501554, "grad_norm": 0.484375, "learning_rate": 0.001999560658829383, "loss": 0.277, "step": 1946 }, { "epoch": 0.003453965011759971, "grad_norm": 1.7109375, "learning_rate": 0.001999558797368454, "loss": 0.3605, "step": 1948 }, { "epoch": 0.003457511177069786, "grad_norm": 0.515625, "learning_rate": 0.001999556931973378, "loss": 0.3346, "step": 1950 }, { "epoch": 0.0034610573423796013, "grad_norm": 6.0625, "learning_rate": 0.001999555062644162, "loss": 0.4588, "step": 1952 }, { "epoch": 0.0034646035076894162, "grad_norm": 1.171875, "learning_rate": 0.001999553189380815, "loss": 0.2776, "step": 1954 }, { "epoch": 0.0034681496729992312, "grad_norm": 1.9296875, "learning_rate": 0.0019995513121833447, "loss": 0.3906, "step": 1956 }, { "epoch": 0.0034716958383090466, "grad_norm": 0.462890625, "learning_rate": 0.0019995494310517596, "loss": 0.3159, "step": 1958 }, { "epoch": 0.0034752420036188616, "grad_norm": 0.26953125, "learning_rate": 0.001999547545986068, "loss": 0.2552, "step": 1960 }, { "epoch": 0.003478788168928677, "grad_norm": 1.1015625, "learning_rate": 0.001999545656986277, "loss": 0.3181, "step": 1962 }, { "epoch": 0.003482334334238492, "grad_norm": 0.494140625, "learning_rate": 0.0019995437640523968, "loss": 0.3241, "step": 1964 }, { "epoch": 0.003485880499548307, "grad_norm": 0.52734375, "learning_rate": 0.0019995418671844346, "loss": 0.4052, "step": 1966 }, { "epoch": 0.0034894266648581224, "grad_norm": 1.4609375, "learning_rate": 0.0019995399663823984, "loss": 0.3428, "step": 1968 }, { "epoch": 0.0034929728301679374, "grad_norm": 1.1796875, "learning_rate": 0.0019995380616462974, "loss": 0.3386, "step": 1970 }, { "epoch": 0.003496518995477753, "grad_norm": 0.55859375, "learning_rate": 0.001999536152976139, "loss": 0.2762, "step": 1972 }, { "epoch": 0.003500065160787568, "grad_norm": 0.5078125, "learning_rate": 0.0019995342403719323, "loss": 0.3334, "step": 1974 }, { "epoch": 0.003503611326097383, "grad_norm": 1.171875, "learning_rate": 0.0019995323238336847, "loss": 0.3141, "step": 1976 }, { "epoch": 0.0035071574914071982, "grad_norm": 0.81640625, "learning_rate": 0.001999530403361406, "loss": 0.4356, "step": 1978 }, { "epoch": 0.0035107036567170132, "grad_norm": 0.326171875, "learning_rate": 0.0019995284789551034, "loss": 0.3265, "step": 1980 }, { "epoch": 0.0035142498220268287, "grad_norm": 0.498046875, "learning_rate": 0.001999526550614786, "loss": 0.6316, "step": 1982 }, { "epoch": 0.0035177959873366436, "grad_norm": 0.34375, "learning_rate": 0.0019995246183404617, "loss": 0.322, "step": 1984 }, { "epoch": 0.0035213421526464586, "grad_norm": 1.4375, "learning_rate": 0.001999522682132139, "loss": 0.3813, "step": 1986 }, { "epoch": 0.003524888317956274, "grad_norm": 1.2890625, "learning_rate": 0.0019995207419898275, "loss": 0.2774, "step": 1988 }, { "epoch": 0.003528434483266089, "grad_norm": 3.953125, "learning_rate": 0.001999518797913534, "loss": 0.6197, "step": 1990 }, { "epoch": 0.0035319806485759045, "grad_norm": 0.3125, "learning_rate": 0.001999516849903268, "loss": 0.2728, "step": 1992 }, { "epoch": 0.0035355268138857194, "grad_norm": 0.314453125, "learning_rate": 0.001999514897959038, "loss": 0.3735, "step": 1994 }, { "epoch": 0.003539072979195535, "grad_norm": 3.046875, "learning_rate": 0.001999512942080852, "loss": 0.3896, "step": 1996 }, { "epoch": 0.00354261914450535, "grad_norm": 0.6328125, "learning_rate": 0.001999510982268719, "loss": 0.4228, "step": 1998 }, { "epoch": 0.003546165309815165, "grad_norm": 0.9375, "learning_rate": 0.0019995090185226474, "loss": 0.3514, "step": 2000 }, { "epoch": 0.0035497114751249803, "grad_norm": 0.2890625, "learning_rate": 0.0019995070508426463, "loss": 0.2548, "step": 2002 }, { "epoch": 0.0035532576404347952, "grad_norm": 0.92578125, "learning_rate": 0.0019995050792287234, "loss": 0.3597, "step": 2004 }, { "epoch": 0.0035568038057446107, "grad_norm": 0.6328125, "learning_rate": 0.001999503103680888, "loss": 0.2934, "step": 2006 }, { "epoch": 0.0035603499710544257, "grad_norm": 0.52734375, "learning_rate": 0.0019995011241991482, "loss": 0.363, "step": 2008 }, { "epoch": 0.0035638961363642406, "grad_norm": 0.875, "learning_rate": 0.0019994991407835134, "loss": 0.4091, "step": 2010 }, { "epoch": 0.003567442301674056, "grad_norm": 6.375, "learning_rate": 0.001999497153433991, "loss": 0.4613, "step": 2012 }, { "epoch": 0.003570988466983871, "grad_norm": 0.60546875, "learning_rate": 0.001999495162150591, "loss": 0.2809, "step": 2014 }, { "epoch": 0.0035745346322936865, "grad_norm": 0.458984375, "learning_rate": 0.0019994931669333224, "loss": 0.3114, "step": 2016 }, { "epoch": 0.0035780807976035015, "grad_norm": 0.376953125, "learning_rate": 0.0019994911677821926, "loss": 0.3755, "step": 2018 }, { "epoch": 0.0035816269629133164, "grad_norm": 1.703125, "learning_rate": 0.001999489164697211, "loss": 0.359, "step": 2020 }, { "epoch": 0.003585173128223132, "grad_norm": 0.73828125, "learning_rate": 0.001999487157678386, "loss": 0.4394, "step": 2022 }, { "epoch": 0.003588719293532947, "grad_norm": 0.5625, "learning_rate": 0.0019994851467257267, "loss": 0.368, "step": 2024 }, { "epoch": 0.0035922654588427623, "grad_norm": 0.9453125, "learning_rate": 0.001999483131839242, "loss": 0.5545, "step": 2026 }, { "epoch": 0.0035958116241525773, "grad_norm": 0.8359375, "learning_rate": 0.0019994811130189406, "loss": 0.3449, "step": 2028 }, { "epoch": 0.0035993577894623927, "grad_norm": 1.28125, "learning_rate": 0.0019994790902648314, "loss": 0.2781, "step": 2030 }, { "epoch": 0.0036029039547722077, "grad_norm": 1.296875, "learning_rate": 0.0019994770635769227, "loss": 0.3364, "step": 2032 }, { "epoch": 0.0036064501200820226, "grad_norm": 2.8125, "learning_rate": 0.001999475032955224, "loss": 0.4186, "step": 2034 }, { "epoch": 0.003609996285391838, "grad_norm": 1.2109375, "learning_rate": 0.0019994729983997444, "loss": 0.3297, "step": 2036 }, { "epoch": 0.003613542450701653, "grad_norm": 0.52734375, "learning_rate": 0.001999470959910492, "loss": 0.4499, "step": 2038 }, { "epoch": 0.0036170886160114685, "grad_norm": 1.0703125, "learning_rate": 0.001999468917487476, "loss": 0.3226, "step": 2040 }, { "epoch": 0.0036206347813212835, "grad_norm": 1.5390625, "learning_rate": 0.001999466871130706, "loss": 0.3072, "step": 2042 }, { "epoch": 0.0036241809466310984, "grad_norm": 1.6875, "learning_rate": 0.0019994648208401895, "loss": 0.4056, "step": 2044 }, { "epoch": 0.003627727111940914, "grad_norm": 0.453125, "learning_rate": 0.001999462766615937, "loss": 0.3269, "step": 2046 }, { "epoch": 0.003631273277250729, "grad_norm": 1.3671875, "learning_rate": 0.001999460708457956, "loss": 0.3396, "step": 2048 }, { "epoch": 0.0036348194425605443, "grad_norm": 0.4375, "learning_rate": 0.0019994586463662574, "loss": 0.347, "step": 2050 }, { "epoch": 0.0036383656078703593, "grad_norm": 0.388671875, "learning_rate": 0.0019994565803408484, "loss": 0.3198, "step": 2052 }, { "epoch": 0.0036419117731801742, "grad_norm": 1.0703125, "learning_rate": 0.0019994545103817394, "loss": 0.3497, "step": 2054 }, { "epoch": 0.0036454579384899897, "grad_norm": 0.49609375, "learning_rate": 0.0019994524364889384, "loss": 0.3626, "step": 2056 }, { "epoch": 0.0036490041037998047, "grad_norm": 0.296875, "learning_rate": 0.0019994503586624555, "loss": 0.2683, "step": 2058 }, { "epoch": 0.00365255026910962, "grad_norm": 2.125, "learning_rate": 0.0019994482769022984, "loss": 0.3466, "step": 2060 }, { "epoch": 0.003656096434419435, "grad_norm": 0.353515625, "learning_rate": 0.001999446191208477, "loss": 0.3426, "step": 2062 }, { "epoch": 0.0036596425997292505, "grad_norm": 0.375, "learning_rate": 0.0019994441015810014, "loss": 0.3127, "step": 2064 }, { "epoch": 0.0036631887650390655, "grad_norm": 0.302734375, "learning_rate": 0.0019994420080198787, "loss": 0.2981, "step": 2066 }, { "epoch": 0.0036667349303488805, "grad_norm": 0.41796875, "learning_rate": 0.0019994399105251197, "loss": 0.3194, "step": 2068 }, { "epoch": 0.003670281095658696, "grad_norm": 0.486328125, "learning_rate": 0.0019994378090967325, "loss": 0.3509, "step": 2070 }, { "epoch": 0.003673827260968511, "grad_norm": 1.375, "learning_rate": 0.001999435703734727, "loss": 0.4092, "step": 2072 }, { "epoch": 0.0036773734262783263, "grad_norm": 0.625, "learning_rate": 0.0019994335944391126, "loss": 0.4644, "step": 2074 }, { "epoch": 0.0036809195915881413, "grad_norm": 0.21875, "learning_rate": 0.0019994314812098977, "loss": 0.2814, "step": 2076 }, { "epoch": 0.0036844657568979563, "grad_norm": 1.171875, "learning_rate": 0.001999429364047092, "loss": 0.3216, "step": 2078 }, { "epoch": 0.0036880119222077717, "grad_norm": 7.21875, "learning_rate": 0.001999427242950705, "loss": 0.3708, "step": 2080 }, { "epoch": 0.0036915580875175867, "grad_norm": 0.58203125, "learning_rate": 0.0019994251179207456, "loss": 0.2891, "step": 2082 }, { "epoch": 0.003695104252827402, "grad_norm": 0.5546875, "learning_rate": 0.001999422988957223, "loss": 0.2936, "step": 2084 }, { "epoch": 0.003698650418137217, "grad_norm": 0.4140625, "learning_rate": 0.0019994208560601464, "loss": 0.3614, "step": 2086 }, { "epoch": 0.003702196583447032, "grad_norm": 0.92578125, "learning_rate": 0.0019994187192295255, "loss": 0.3889, "step": 2088 }, { "epoch": 0.0037057427487568475, "grad_norm": 0.6953125, "learning_rate": 0.00199941657846537, "loss": 0.3378, "step": 2090 }, { "epoch": 0.0037092889140666625, "grad_norm": 0.275390625, "learning_rate": 0.0019994144337676886, "loss": 0.3008, "step": 2092 }, { "epoch": 0.003712835079376478, "grad_norm": 0.390625, "learning_rate": 0.001999412285136491, "loss": 0.3034, "step": 2094 }, { "epoch": 0.003716381244686293, "grad_norm": 0.65625, "learning_rate": 0.0019994101325717865, "loss": 0.3028, "step": 2096 }, { "epoch": 0.0037199274099961083, "grad_norm": 2.390625, "learning_rate": 0.0019994079760735847, "loss": 0.4911, "step": 2098 }, { "epoch": 0.0037234735753059233, "grad_norm": 0.515625, "learning_rate": 0.0019994058156418944, "loss": 0.3719, "step": 2100 }, { "epoch": 0.0037270197406157383, "grad_norm": 0.39453125, "learning_rate": 0.001999403651276726, "loss": 0.3539, "step": 2102 }, { "epoch": 0.0037305659059255537, "grad_norm": 0.21875, "learning_rate": 0.001999401482978088, "loss": 0.5215, "step": 2104 }, { "epoch": 0.0037341120712353687, "grad_norm": 0.359375, "learning_rate": 0.0019993993107459904, "loss": 0.3986, "step": 2106 }, { "epoch": 0.003737658236545184, "grad_norm": 1.390625, "learning_rate": 0.001999397134580443, "loss": 0.4519, "step": 2108 }, { "epoch": 0.003741204401854999, "grad_norm": 0.8515625, "learning_rate": 0.0019993949544814546, "loss": 0.4013, "step": 2110 }, { "epoch": 0.003744750567164814, "grad_norm": 0.89453125, "learning_rate": 0.0019993927704490353, "loss": 0.3381, "step": 2112 }, { "epoch": 0.0037482967324746295, "grad_norm": 1.3671875, "learning_rate": 0.0019993905824831943, "loss": 0.3385, "step": 2114 }, { "epoch": 0.0037518428977844445, "grad_norm": 2.703125, "learning_rate": 0.0019993883905839414, "loss": 0.4074, "step": 2116 }, { "epoch": 0.00375538906309426, "grad_norm": 0.5703125, "learning_rate": 0.001999386194751286, "loss": 0.3746, "step": 2118 }, { "epoch": 0.003758935228404075, "grad_norm": 0.46875, "learning_rate": 0.0019993839949852383, "loss": 0.3088, "step": 2120 }, { "epoch": 0.00376248139371389, "grad_norm": 1.0625, "learning_rate": 0.0019993817912858066, "loss": 0.4044, "step": 2122 }, { "epoch": 0.0037660275590237053, "grad_norm": 0.546875, "learning_rate": 0.001999379583653002, "loss": 0.3365, "step": 2124 }, { "epoch": 0.0037695737243335203, "grad_norm": 0.640625, "learning_rate": 0.0019993773720868332, "loss": 0.3101, "step": 2126 }, { "epoch": 0.0037731198896433357, "grad_norm": 0.4609375, "learning_rate": 0.0019993751565873107, "loss": 0.3699, "step": 2128 }, { "epoch": 0.0037766660549531507, "grad_norm": 0.376953125, "learning_rate": 0.001999372937154443, "loss": 0.3317, "step": 2130 }, { "epoch": 0.003780212220262966, "grad_norm": 0.80859375, "learning_rate": 0.001999370713788241, "loss": 0.3458, "step": 2132 }, { "epoch": 0.003783758385572781, "grad_norm": 0.34375, "learning_rate": 0.001999368486488714, "loss": 0.4345, "step": 2134 }, { "epoch": 0.003787304550882596, "grad_norm": 0.546875, "learning_rate": 0.001999366255255871, "loss": 0.2812, "step": 2136 }, { "epoch": 0.0037908507161924115, "grad_norm": 0.35546875, "learning_rate": 0.001999364020089723, "loss": 0.2801, "step": 2138 }, { "epoch": 0.0037943968815022265, "grad_norm": 0.3125, "learning_rate": 0.0019993617809902792, "loss": 0.2712, "step": 2140 }, { "epoch": 0.003797943046812042, "grad_norm": 0.33203125, "learning_rate": 0.0019993595379575488, "loss": 0.2721, "step": 2142 }, { "epoch": 0.003801489212121857, "grad_norm": 0.28515625, "learning_rate": 0.0019993572909915427, "loss": 0.2625, "step": 2144 }, { "epoch": 0.003805035377431672, "grad_norm": 0.392578125, "learning_rate": 0.00199935504009227, "loss": 0.4411, "step": 2146 }, { "epoch": 0.0038085815427414873, "grad_norm": 0.2890625, "learning_rate": 0.001999352785259741, "loss": 0.3142, "step": 2148 }, { "epoch": 0.0038121277080513023, "grad_norm": 0.7265625, "learning_rate": 0.001999350526493965, "loss": 0.3174, "step": 2150 }, { "epoch": 0.0038156738733611177, "grad_norm": 1.1484375, "learning_rate": 0.0019993482637949526, "loss": 0.419, "step": 2152 }, { "epoch": 0.0038192200386709327, "grad_norm": 0.400390625, "learning_rate": 0.001999345997162713, "loss": 0.3591, "step": 2154 }, { "epoch": 0.0038227662039807477, "grad_norm": 0.3203125, "learning_rate": 0.0019993437265972565, "loss": 0.2762, "step": 2156 }, { "epoch": 0.003826312369290563, "grad_norm": 0.75, "learning_rate": 0.001999341452098593, "loss": 0.3903, "step": 2158 }, { "epoch": 0.003829858534600378, "grad_norm": 0.8984375, "learning_rate": 0.001999339173666732, "loss": 0.3826, "step": 2160 }, { "epoch": 0.0038334046999101935, "grad_norm": 0.22265625, "learning_rate": 0.0019993368913016844, "loss": 0.2625, "step": 2162 }, { "epoch": 0.0038369508652200085, "grad_norm": 0.498046875, "learning_rate": 0.0019993346050034594, "loss": 0.3452, "step": 2164 }, { "epoch": 0.003840497030529824, "grad_norm": 0.4375, "learning_rate": 0.0019993323147720673, "loss": 0.3819, "step": 2166 }, { "epoch": 0.003844043195839639, "grad_norm": 0.9375, "learning_rate": 0.001999330020607518, "loss": 0.2753, "step": 2168 }, { "epoch": 0.003847589361149454, "grad_norm": 1.2578125, "learning_rate": 0.0019993277225098215, "loss": 0.4099, "step": 2170 }, { "epoch": 0.0038511355264592693, "grad_norm": 0.3828125, "learning_rate": 0.001999325420478988, "loss": 0.4247, "step": 2172 }, { "epoch": 0.0038546816917690843, "grad_norm": 0.48828125, "learning_rate": 0.0019993231145150276, "loss": 0.3804, "step": 2174 }, { "epoch": 0.0038582278570788997, "grad_norm": 1.6875, "learning_rate": 0.00199932080461795, "loss": 0.4339, "step": 2176 }, { "epoch": 0.0038617740223887147, "grad_norm": 0.451171875, "learning_rate": 0.001999318490787766, "loss": 0.33, "step": 2178 }, { "epoch": 0.0038653201876985297, "grad_norm": 0.2470703125, "learning_rate": 0.001999316173024485, "loss": 0.3843, "step": 2180 }, { "epoch": 0.003868866353008345, "grad_norm": 0.5, "learning_rate": 0.0019993138513281173, "loss": 0.2957, "step": 2182 }, { "epoch": 0.00387241251831816, "grad_norm": 0.2392578125, "learning_rate": 0.0019993115256986735, "loss": 0.345, "step": 2184 }, { "epoch": 0.0038759586836279755, "grad_norm": 0.3125, "learning_rate": 0.001999309196136163, "loss": 0.2717, "step": 2186 }, { "epoch": 0.0038795048489377905, "grad_norm": 0.55859375, "learning_rate": 0.001999306862640597, "loss": 0.2542, "step": 2188 }, { "epoch": 0.0038830510142476055, "grad_norm": 1.1328125, "learning_rate": 0.0019993045252119854, "loss": 0.3122, "step": 2190 }, { "epoch": 0.003886597179557421, "grad_norm": 3.46875, "learning_rate": 0.0019993021838503374, "loss": 0.4333, "step": 2192 }, { "epoch": 0.003890143344867236, "grad_norm": 0.64453125, "learning_rate": 0.0019992998385556645, "loss": 0.3802, "step": 2194 }, { "epoch": 0.0038936895101770513, "grad_norm": 1.421875, "learning_rate": 0.0019992974893279767, "loss": 0.3688, "step": 2196 }, { "epoch": 0.0038972356754868663, "grad_norm": 0.35546875, "learning_rate": 0.001999295136167284, "loss": 0.3194, "step": 2198 }, { "epoch": 0.0039007818407966817, "grad_norm": 0.337890625, "learning_rate": 0.001999292779073596, "loss": 0.266, "step": 2200 }, { "epoch": 0.0039043280061064967, "grad_norm": 1.0078125, "learning_rate": 0.0019992904180469243, "loss": 0.3561, "step": 2202 }, { "epoch": 0.003907874171416312, "grad_norm": 0.3671875, "learning_rate": 0.0019992880530872786, "loss": 0.3423, "step": 2204 }, { "epoch": 0.003911420336726127, "grad_norm": 0.52734375, "learning_rate": 0.001999285684194669, "loss": 0.2538, "step": 2206 }, { "epoch": 0.003914966502035942, "grad_norm": 0.486328125, "learning_rate": 0.0019992833113691073, "loss": 0.3364, "step": 2208 }, { "epoch": 0.0039185126673457575, "grad_norm": 0.546875, "learning_rate": 0.001999280934610602, "loss": 0.3101, "step": 2210 }, { "epoch": 0.003922058832655572, "grad_norm": 0.4765625, "learning_rate": 0.001999278553919164, "loss": 0.337, "step": 2212 }, { "epoch": 0.0039256049979653875, "grad_norm": 1.1796875, "learning_rate": 0.0019992761692948042, "loss": 0.34, "step": 2214 }, { "epoch": 0.003929151163275203, "grad_norm": 0.5078125, "learning_rate": 0.001999273780737533, "loss": 0.3095, "step": 2216 }, { "epoch": 0.003932697328585018, "grad_norm": 0.5078125, "learning_rate": 0.0019992713882473604, "loss": 0.4177, "step": 2218 }, { "epoch": 0.003936243493894833, "grad_norm": 0.470703125, "learning_rate": 0.0019992689918242974, "loss": 0.3188, "step": 2220 }, { "epoch": 0.003939789659204648, "grad_norm": 0.6171875, "learning_rate": 0.001999266591468354, "loss": 0.327, "step": 2222 }, { "epoch": 0.003943335824514464, "grad_norm": 0.314453125, "learning_rate": 0.0019992641871795407, "loss": 0.3127, "step": 2224 }, { "epoch": 0.003946881989824278, "grad_norm": 0.890625, "learning_rate": 0.0019992617789578683, "loss": 0.274, "step": 2226 }, { "epoch": 0.003950428155134094, "grad_norm": 3.5625, "learning_rate": 0.001999259366803347, "loss": 0.3542, "step": 2228 }, { "epoch": 0.003953974320443909, "grad_norm": 1.6640625, "learning_rate": 0.001999256950715988, "loss": 0.4139, "step": 2230 }, { "epoch": 0.0039575204857537245, "grad_norm": 0.267578125, "learning_rate": 0.0019992545306958013, "loss": 0.3966, "step": 2232 }, { "epoch": 0.003961066651063539, "grad_norm": 0.98046875, "learning_rate": 0.0019992521067427977, "loss": 0.2843, "step": 2234 }, { "epoch": 0.0039646128163733545, "grad_norm": 1.046875, "learning_rate": 0.001999249678856988, "loss": 0.3295, "step": 2236 }, { "epoch": 0.00396815898168317, "grad_norm": 0.375, "learning_rate": 0.001999247247038382, "loss": 0.3355, "step": 2238 }, { "epoch": 0.0039717051469929845, "grad_norm": 0.302734375, "learning_rate": 0.001999244811286991, "loss": 0.2689, "step": 2240 }, { "epoch": 0.0039752513123028, "grad_norm": 0.99609375, "learning_rate": 0.0019992423716028262, "loss": 0.285, "step": 2242 }, { "epoch": 0.003978797477612615, "grad_norm": 0.30859375, "learning_rate": 0.0019992399279858968, "loss": 0.3874, "step": 2244 }, { "epoch": 0.00398234364292243, "grad_norm": 0.64453125, "learning_rate": 0.0019992374804362143, "loss": 0.624, "step": 2246 }, { "epoch": 0.003985889808232245, "grad_norm": 0.30859375, "learning_rate": 0.0019992350289537897, "loss": 0.3005, "step": 2248 }, { "epoch": 0.003989435973542061, "grad_norm": 1.4375, "learning_rate": 0.0019992325735386338, "loss": 0.5908, "step": 2250 }, { "epoch": 0.003992982138851876, "grad_norm": 0.6953125, "learning_rate": 0.001999230114190756, "loss": 0.4056, "step": 2252 }, { "epoch": 0.003996528304161691, "grad_norm": 1.0234375, "learning_rate": 0.0019992276509101688, "loss": 0.3623, "step": 2254 }, { "epoch": 0.004000074469471506, "grad_norm": 1.375, "learning_rate": 0.001999225183696882, "loss": 0.2732, "step": 2256 }, { "epoch": 0.0040036206347813215, "grad_norm": 0.2890625, "learning_rate": 0.0019992227125509065, "loss": 0.2714, "step": 2258 }, { "epoch": 0.004007166800091136, "grad_norm": 0.2734375, "learning_rate": 0.001999220237472253, "loss": 0.3026, "step": 2260 }, { "epoch": 0.0040107129654009515, "grad_norm": 2.21875, "learning_rate": 0.001999217758460933, "loss": 0.6627, "step": 2262 }, { "epoch": 0.004014259130710767, "grad_norm": 0.4609375, "learning_rate": 0.0019992152755169564, "loss": 0.2845, "step": 2264 }, { "epoch": 0.004017805296020582, "grad_norm": 0.291015625, "learning_rate": 0.0019992127886403347, "loss": 0.2716, "step": 2266 }, { "epoch": 0.004021351461330397, "grad_norm": 1.4375, "learning_rate": 0.0019992102978310788, "loss": 0.3575, "step": 2268 }, { "epoch": 0.004024897626640212, "grad_norm": 1.109375, "learning_rate": 0.001999207803089199, "loss": 0.4475, "step": 2270 }, { "epoch": 0.004028443791950028, "grad_norm": 0.734375, "learning_rate": 0.0019992053044147065, "loss": 0.5932, "step": 2272 }, { "epoch": 0.004031989957259842, "grad_norm": 1.03125, "learning_rate": 0.0019992028018076128, "loss": 0.4675, "step": 2274 }, { "epoch": 0.004035536122569658, "grad_norm": 0.6953125, "learning_rate": 0.001999200295267928, "loss": 0.3746, "step": 2276 }, { "epoch": 0.004039082287879473, "grad_norm": 0.431640625, "learning_rate": 0.0019991977847956637, "loss": 0.3107, "step": 2278 }, { "epoch": 0.004042628453189288, "grad_norm": 0.546875, "learning_rate": 0.0019991952703908304, "loss": 0.2674, "step": 2280 }, { "epoch": 0.004046174618499103, "grad_norm": 0.83203125, "learning_rate": 0.0019991927520534394, "loss": 0.3235, "step": 2282 }, { "epoch": 0.0040497207838089185, "grad_norm": 1.2109375, "learning_rate": 0.0019991902297835018, "loss": 0.3161, "step": 2284 }, { "epoch": 0.004053266949118734, "grad_norm": 0.369140625, "learning_rate": 0.001999187703581028, "loss": 0.3673, "step": 2286 }, { "epoch": 0.0040568131144285485, "grad_norm": 1.453125, "learning_rate": 0.00199918517344603, "loss": 0.2897, "step": 2288 }, { "epoch": 0.004060359279738364, "grad_norm": 3.078125, "learning_rate": 0.001999182639378518, "loss": 0.4174, "step": 2290 }, { "epoch": 0.004063905445048179, "grad_norm": 0.322265625, "learning_rate": 0.0019991801013785034, "loss": 0.296, "step": 2292 }, { "epoch": 0.004067451610357994, "grad_norm": 0.431640625, "learning_rate": 0.0019991775594459973, "loss": 0.307, "step": 2294 }, { "epoch": 0.004070997775667809, "grad_norm": 2.171875, "learning_rate": 0.0019991750135810115, "loss": 0.4551, "step": 2296 }, { "epoch": 0.004074543940977625, "grad_norm": 0.9921875, "learning_rate": 0.001999172463783556, "loss": 0.4069, "step": 2298 }, { "epoch": 0.00407809010628744, "grad_norm": 0.79296875, "learning_rate": 0.001999169910053642, "loss": 0.3585, "step": 2300 }, { "epoch": 0.004081636271597255, "grad_norm": 0.5703125, "learning_rate": 0.001999167352391282, "loss": 0.2759, "step": 2302 }, { "epoch": 0.00408518243690707, "grad_norm": 0.7421875, "learning_rate": 0.0019991647907964855, "loss": 0.2841, "step": 2304 }, { "epoch": 0.0040887286022168855, "grad_norm": 0.53515625, "learning_rate": 0.001999162225269265, "loss": 0.4041, "step": 2306 }, { "epoch": 0.0040922747675267, "grad_norm": 0.69921875, "learning_rate": 0.0019991596558096313, "loss": 0.5127, "step": 2308 }, { "epoch": 0.0040958209328365155, "grad_norm": 2.125, "learning_rate": 0.0019991570824175954, "loss": 0.414, "step": 2310 }, { "epoch": 0.004099367098146331, "grad_norm": 0.5, "learning_rate": 0.0019991545050931686, "loss": 0.3223, "step": 2312 }, { "epoch": 0.0041029132634561455, "grad_norm": 0.6953125, "learning_rate": 0.0019991519238363622, "loss": 0.3266, "step": 2314 }, { "epoch": 0.004106459428765961, "grad_norm": 0.82421875, "learning_rate": 0.001999149338647188, "loss": 0.4117, "step": 2316 }, { "epoch": 0.004110005594075776, "grad_norm": 0.44921875, "learning_rate": 0.001999146749525656, "loss": 0.2709, "step": 2318 }, { "epoch": 0.004113551759385592, "grad_norm": 0.3125, "learning_rate": 0.0019991441564717796, "loss": 0.3172, "step": 2320 }, { "epoch": 0.004117097924695406, "grad_norm": 0.7109375, "learning_rate": 0.0019991415594855684, "loss": 0.2935, "step": 2322 }, { "epoch": 0.004120644090005222, "grad_norm": 0.470703125, "learning_rate": 0.001999138958567034, "loss": 0.3147, "step": 2324 }, { "epoch": 0.004124190255315037, "grad_norm": 1.0546875, "learning_rate": 0.0019991363537161886, "loss": 0.2743, "step": 2326 }, { "epoch": 0.004127736420624852, "grad_norm": 0.3828125, "learning_rate": 0.001999133744933043, "loss": 0.2952, "step": 2328 }, { "epoch": 0.004131282585934667, "grad_norm": 0.3828125, "learning_rate": 0.0019991311322176083, "loss": 0.2564, "step": 2330 }, { "epoch": 0.0041348287512444825, "grad_norm": 2.125, "learning_rate": 0.0019991285155698964, "loss": 0.309, "step": 2332 }, { "epoch": 0.004138374916554298, "grad_norm": 0.26953125, "learning_rate": 0.0019991258949899186, "loss": 0.2408, "step": 2334 }, { "epoch": 0.0041419210818641125, "grad_norm": 0.578125, "learning_rate": 0.0019991232704776865, "loss": 0.2607, "step": 2336 }, { "epoch": 0.004145467247173928, "grad_norm": 0.69140625, "learning_rate": 0.001999120642033212, "loss": 0.3421, "step": 2338 }, { "epoch": 0.004149013412483743, "grad_norm": 0.3515625, "learning_rate": 0.0019991180096565054, "loss": 0.4084, "step": 2340 }, { "epoch": 0.004152559577793558, "grad_norm": 0.3515625, "learning_rate": 0.001999115373347579, "loss": 0.2662, "step": 2342 }, { "epoch": 0.004156105743103373, "grad_norm": 0.25, "learning_rate": 0.0019991127331064444, "loss": 0.267, "step": 2344 }, { "epoch": 0.004159651908413189, "grad_norm": 0.47265625, "learning_rate": 0.001999110088933113, "loss": 0.2545, "step": 2346 }, { "epoch": 0.004163198073723003, "grad_norm": 3.28125, "learning_rate": 0.001999107440827596, "loss": 0.358, "step": 2348 }, { "epoch": 0.004166744239032819, "grad_norm": 0.3125, "learning_rate": 0.0019991047887899056, "loss": 0.2937, "step": 2350 }, { "epoch": 0.004170290404342634, "grad_norm": 0.490234375, "learning_rate": 0.0019991021328200536, "loss": 0.2853, "step": 2352 }, { "epoch": 0.0041738365696524496, "grad_norm": 0.55078125, "learning_rate": 0.0019990994729180505, "loss": 0.3396, "step": 2354 }, { "epoch": 0.004177382734962264, "grad_norm": 0.4453125, "learning_rate": 0.0019990968090839085, "loss": 0.4468, "step": 2356 }, { "epoch": 0.0041809289002720795, "grad_norm": 0.427734375, "learning_rate": 0.0019990941413176398, "loss": 0.3123, "step": 2358 }, { "epoch": 0.004184475065581895, "grad_norm": 0.36328125, "learning_rate": 0.001999091469619255, "loss": 0.5217, "step": 2360 }, { "epoch": 0.0041880212308917095, "grad_norm": 0.384765625, "learning_rate": 0.0019990887939887667, "loss": 0.3404, "step": 2362 }, { "epoch": 0.004191567396201525, "grad_norm": 0.33984375, "learning_rate": 0.001999086114426186, "loss": 0.3254, "step": 2364 }, { "epoch": 0.00419511356151134, "grad_norm": 3.875, "learning_rate": 0.0019990834309315253, "loss": 0.5084, "step": 2366 }, { "epoch": 0.004198659726821156, "grad_norm": 0.671875, "learning_rate": 0.001999080743504796, "loss": 0.332, "step": 2368 }, { "epoch": 0.00420220589213097, "grad_norm": 0.6328125, "learning_rate": 0.001999078052146009, "loss": 0.3747, "step": 2370 }, { "epoch": 0.004205752057440786, "grad_norm": 0.7265625, "learning_rate": 0.0019990753568551777, "loss": 0.3267, "step": 2372 }, { "epoch": 0.004209298222750601, "grad_norm": 0.392578125, "learning_rate": 0.0019990726576323125, "loss": 0.3301, "step": 2374 }, { "epoch": 0.004212844388060416, "grad_norm": 0.296875, "learning_rate": 0.001999069954477426, "loss": 0.3264, "step": 2376 }, { "epoch": 0.004216390553370231, "grad_norm": 0.310546875, "learning_rate": 0.0019990672473905297, "loss": 0.2649, "step": 2378 }, { "epoch": 0.0042199367186800466, "grad_norm": 0.6328125, "learning_rate": 0.0019990645363716355, "loss": 0.3235, "step": 2380 }, { "epoch": 0.004223482883989861, "grad_norm": 0.58984375, "learning_rate": 0.001999061821420755, "loss": 0.348, "step": 2382 }, { "epoch": 0.0042270290492996765, "grad_norm": 0.734375, "learning_rate": 0.001999059102537901, "loss": 0.3561, "step": 2384 }, { "epoch": 0.004230575214609492, "grad_norm": 0.298828125, "learning_rate": 0.001999056379723084, "loss": 0.2547, "step": 2386 }, { "epoch": 0.004234121379919307, "grad_norm": 0.546875, "learning_rate": 0.001999053652976317, "loss": 0.326, "step": 2388 }, { "epoch": 0.004237667545229122, "grad_norm": 0.8203125, "learning_rate": 0.0019990509222976116, "loss": 0.37, "step": 2390 }, { "epoch": 0.004241213710538937, "grad_norm": 3.0625, "learning_rate": 0.0019990481876869795, "loss": 0.3745, "step": 2392 }, { "epoch": 0.004244759875848753, "grad_norm": 0.56640625, "learning_rate": 0.001999045449144433, "loss": 0.3181, "step": 2394 }, { "epoch": 0.004248306041158567, "grad_norm": 0.482421875, "learning_rate": 0.0019990427066699837, "loss": 0.2918, "step": 2396 }, { "epoch": 0.004251852206468383, "grad_norm": 0.55078125, "learning_rate": 0.001999039960263644, "loss": 0.2809, "step": 2398 }, { "epoch": 0.004255398371778198, "grad_norm": 0.43359375, "learning_rate": 0.0019990372099254255, "loss": 0.3693, "step": 2400 }, { "epoch": 0.004258944537088014, "grad_norm": 1.0078125, "learning_rate": 0.001999034455655341, "loss": 0.3528, "step": 2402 }, { "epoch": 0.004262490702397828, "grad_norm": 0.2294921875, "learning_rate": 0.0019990316974534015, "loss": 0.2559, "step": 2404 }, { "epoch": 0.0042660368677076435, "grad_norm": 0.267578125, "learning_rate": 0.00199902893531962, "loss": 0.245, "step": 2406 }, { "epoch": 0.004269583033017459, "grad_norm": 0.3515625, "learning_rate": 0.001999026169254008, "loss": 0.343, "step": 2408 }, { "epoch": 0.0042731291983272735, "grad_norm": 0.2890625, "learning_rate": 0.0019990233992565774, "loss": 0.2537, "step": 2410 }, { "epoch": 0.004276675363637089, "grad_norm": 0.44140625, "learning_rate": 0.001999020625327341, "loss": 0.2417, "step": 2412 }, { "epoch": 0.004280221528946904, "grad_norm": 2.484375, "learning_rate": 0.0019990178474663107, "loss": 0.4874, "step": 2414 }, { "epoch": 0.004283767694256719, "grad_norm": 0.5234375, "learning_rate": 0.0019990150656734986, "loss": 0.2428, "step": 2416 }, { "epoch": 0.004287313859566534, "grad_norm": 0.337890625, "learning_rate": 0.001999012279948916, "loss": 0.2781, "step": 2418 }, { "epoch": 0.00429086002487635, "grad_norm": 0.298828125, "learning_rate": 0.001999009490292577, "loss": 0.3514, "step": 2420 }, { "epoch": 0.004294406190186165, "grad_norm": 0.3671875, "learning_rate": 0.001999006696704492, "loss": 0.3882, "step": 2422 }, { "epoch": 0.00429795235549598, "grad_norm": 2.59375, "learning_rate": 0.0019990038991846743, "loss": 0.3699, "step": 2424 }, { "epoch": 0.004301498520805795, "grad_norm": 0.267578125, "learning_rate": 0.0019990010977331356, "loss": 0.3473, "step": 2426 }, { "epoch": 0.0043050446861156106, "grad_norm": 0.5, "learning_rate": 0.0019989982923498883, "loss": 0.2783, "step": 2428 }, { "epoch": 0.004308590851425425, "grad_norm": 2.34375, "learning_rate": 0.0019989954830349444, "loss": 0.4462, "step": 2430 }, { "epoch": 0.0043121370167352405, "grad_norm": 0.439453125, "learning_rate": 0.0019989926697883166, "loss": 0.357, "step": 2432 }, { "epoch": 0.004315683182045056, "grad_norm": 0.333984375, "learning_rate": 0.0019989898526100174, "loss": 0.3523, "step": 2434 }, { "epoch": 0.004319229347354871, "grad_norm": 0.30078125, "learning_rate": 0.0019989870315000585, "loss": 0.2859, "step": 2436 }, { "epoch": 0.004322775512664686, "grad_norm": 0.2412109375, "learning_rate": 0.0019989842064584524, "loss": 0.3051, "step": 2438 }, { "epoch": 0.004326321677974501, "grad_norm": 0.2265625, "learning_rate": 0.0019989813774852123, "loss": 0.2355, "step": 2440 }, { "epoch": 0.004329867843284317, "grad_norm": 0.482421875, "learning_rate": 0.001998978544580349, "loss": 0.4937, "step": 2442 }, { "epoch": 0.004333414008594131, "grad_norm": 0.91015625, "learning_rate": 0.001998975707743876, "loss": 0.4194, "step": 2444 }, { "epoch": 0.004336960173903947, "grad_norm": 0.330078125, "learning_rate": 0.0019989728669758053, "loss": 0.3299, "step": 2446 }, { "epoch": 0.004340506339213762, "grad_norm": 0.41796875, "learning_rate": 0.0019989700222761498, "loss": 0.2959, "step": 2448 }, { "epoch": 0.004344052504523577, "grad_norm": 0.8046875, "learning_rate": 0.0019989671736449213, "loss": 0.345, "step": 2450 }, { "epoch": 0.004347598669833392, "grad_norm": 1.046875, "learning_rate": 0.001998964321082133, "loss": 0.32, "step": 2452 }, { "epoch": 0.0043511448351432076, "grad_norm": 0.4453125, "learning_rate": 0.0019989614645877962, "loss": 0.318, "step": 2454 }, { "epoch": 0.004354691000453023, "grad_norm": 2.46875, "learning_rate": 0.001998958604161925, "loss": 0.3437, "step": 2456 }, { "epoch": 0.0043582371657628375, "grad_norm": 0.365234375, "learning_rate": 0.0019989557398045303, "loss": 0.3221, "step": 2458 }, { "epoch": 0.004361783331072653, "grad_norm": 2.65625, "learning_rate": 0.0019989528715156257, "loss": 0.4671, "step": 2460 }, { "epoch": 0.004365329496382468, "grad_norm": 2.015625, "learning_rate": 0.0019989499992952233, "loss": 0.4874, "step": 2462 }, { "epoch": 0.004368875661692283, "grad_norm": 0.486328125, "learning_rate": 0.001998947123143336, "loss": 0.3161, "step": 2464 }, { "epoch": 0.004372421827002098, "grad_norm": 0.91015625, "learning_rate": 0.001998944243059976, "loss": 0.5244, "step": 2466 }, { "epoch": 0.004375967992311914, "grad_norm": 0.63671875, "learning_rate": 0.0019989413590451558, "loss": 0.5607, "step": 2468 }, { "epoch": 0.004379514157621729, "grad_norm": 0.41015625, "learning_rate": 0.0019989384710988886, "loss": 0.2798, "step": 2470 }, { "epoch": 0.004383060322931544, "grad_norm": 0.51953125, "learning_rate": 0.001998935579221187, "loss": 0.3108, "step": 2472 }, { "epoch": 0.004386606488241359, "grad_norm": 2.90625, "learning_rate": 0.0019989326834120628, "loss": 0.4146, "step": 2474 }, { "epoch": 0.004390152653551175, "grad_norm": 0.41015625, "learning_rate": 0.001998929783671529, "loss": 0.3229, "step": 2476 }, { "epoch": 0.004393698818860989, "grad_norm": 0.431640625, "learning_rate": 0.001998926879999599, "loss": 0.3949, "step": 2478 }, { "epoch": 0.0043972449841708046, "grad_norm": 2.015625, "learning_rate": 0.0019989239723962847, "loss": 0.4253, "step": 2480 }, { "epoch": 0.00440079114948062, "grad_norm": 2.0, "learning_rate": 0.0019989210608615995, "loss": 0.503, "step": 2482 }, { "epoch": 0.0044043373147904345, "grad_norm": 1.8828125, "learning_rate": 0.0019989181453955555, "loss": 0.3762, "step": 2484 }, { "epoch": 0.00440788348010025, "grad_norm": 2.046875, "learning_rate": 0.001998915225998166, "loss": 0.4607, "step": 2486 }, { "epoch": 0.004411429645410065, "grad_norm": 0.314453125, "learning_rate": 0.0019989123026694427, "loss": 0.3172, "step": 2488 }, { "epoch": 0.004414975810719881, "grad_norm": 0.4375, "learning_rate": 0.0019989093754094, "loss": 0.2745, "step": 2490 }, { "epoch": 0.004418521976029695, "grad_norm": 0.2734375, "learning_rate": 0.001998906444218049, "loss": 0.3414, "step": 2492 }, { "epoch": 0.004422068141339511, "grad_norm": 0.71484375, "learning_rate": 0.001998903509095404, "loss": 0.434, "step": 2494 }, { "epoch": 0.004425614306649326, "grad_norm": 0.326171875, "learning_rate": 0.0019989005700414768, "loss": 0.4045, "step": 2496 }, { "epoch": 0.004429160471959141, "grad_norm": 0.59375, "learning_rate": 0.0019988976270562805, "loss": 0.2992, "step": 2498 }, { "epoch": 0.004432706637268956, "grad_norm": 1.546875, "learning_rate": 0.0019988946801398288, "loss": 0.4373, "step": 2500 }, { "epoch": 0.004436252802578772, "grad_norm": 0.5078125, "learning_rate": 0.0019988917292921332, "loss": 0.3932, "step": 2502 }, { "epoch": 0.004439798967888587, "grad_norm": 0.34765625, "learning_rate": 0.001998888774513208, "loss": 0.336, "step": 2504 }, { "epoch": 0.0044433451331984015, "grad_norm": 0.62890625, "learning_rate": 0.001998885815803065, "loss": 0.3264, "step": 2506 }, { "epoch": 0.004446891298508217, "grad_norm": 1.3984375, "learning_rate": 0.0019988828531617175, "loss": 0.3519, "step": 2508 }, { "epoch": 0.004450437463818032, "grad_norm": 1.125, "learning_rate": 0.0019988798865891787, "loss": 0.2821, "step": 2510 }, { "epoch": 0.004453983629127847, "grad_norm": 1.3828125, "learning_rate": 0.0019988769160854615, "loss": 0.3554, "step": 2512 }, { "epoch": 0.004457529794437662, "grad_norm": 1.0859375, "learning_rate": 0.0019988739416505787, "loss": 0.4213, "step": 2514 }, { "epoch": 0.004461075959747478, "grad_norm": 3.515625, "learning_rate": 0.0019988709632845435, "loss": 0.5323, "step": 2516 }, { "epoch": 0.004464622125057292, "grad_norm": 0.828125, "learning_rate": 0.0019988679809873687, "loss": 0.3411, "step": 2518 }, { "epoch": 0.004468168290367108, "grad_norm": 0.36328125, "learning_rate": 0.001998864994759067, "loss": 0.4354, "step": 2520 }, { "epoch": 0.004471714455676923, "grad_norm": 0.36328125, "learning_rate": 0.001998862004599653, "loss": 0.2528, "step": 2522 }, { "epoch": 0.004475260620986739, "grad_norm": 0.8125, "learning_rate": 0.0019988590105091382, "loss": 0.3286, "step": 2524 }, { "epoch": 0.004478806786296553, "grad_norm": 0.380859375, "learning_rate": 0.001998856012487536, "loss": 0.8415, "step": 2526 }, { "epoch": 0.004482352951606369, "grad_norm": 1.6171875, "learning_rate": 0.00199885301053486, "loss": 0.3309, "step": 2528 }, { "epoch": 0.004485899116916184, "grad_norm": 1.109375, "learning_rate": 0.0019988500046511227, "loss": 0.3953, "step": 2530 }, { "epoch": 0.0044894452822259985, "grad_norm": 0.44921875, "learning_rate": 0.0019988469948363377, "loss": 0.3209, "step": 2532 }, { "epoch": 0.004492991447535814, "grad_norm": 0.58984375, "learning_rate": 0.0019988439810905184, "loss": 0.3589, "step": 2534 }, { "epoch": 0.004496537612845629, "grad_norm": 0.498046875, "learning_rate": 0.001998840963413678, "loss": 0.3395, "step": 2536 }, { "epoch": 0.004500083778155445, "grad_norm": 0.4609375, "learning_rate": 0.001998837941805828, "loss": 0.2942, "step": 2538 }, { "epoch": 0.004503629943465259, "grad_norm": 0.373046875, "learning_rate": 0.001998834916266984, "loss": 0.3399, "step": 2540 }, { "epoch": 0.004507176108775075, "grad_norm": 2.40625, "learning_rate": 0.001998831886797158, "loss": 0.3492, "step": 2542 }, { "epoch": 0.00451072227408489, "grad_norm": 0.2734375, "learning_rate": 0.0019988288533963634, "loss": 0.2839, "step": 2544 }, { "epoch": 0.004514268439394705, "grad_norm": 0.37109375, "learning_rate": 0.001998825816064613, "loss": 0.2991, "step": 2546 }, { "epoch": 0.00451781460470452, "grad_norm": 0.1953125, "learning_rate": 0.0019988227748019213, "loss": 0.2732, "step": 2548 }, { "epoch": 0.004521360770014336, "grad_norm": 0.96484375, "learning_rate": 0.0019988197296083, "loss": 0.411, "step": 2550 }, { "epoch": 0.00452490693532415, "grad_norm": 0.51953125, "learning_rate": 0.0019988166804837644, "loss": 0.5054, "step": 2552 }, { "epoch": 0.0045284531006339656, "grad_norm": 0.35546875, "learning_rate": 0.001998813627428326, "loss": 0.2754, "step": 2554 }, { "epoch": 0.004531999265943781, "grad_norm": 0.2109375, "learning_rate": 0.0019988105704419994, "loss": 0.271, "step": 2556 }, { "epoch": 0.004535545431253596, "grad_norm": 0.625, "learning_rate": 0.001998807509524797, "loss": 0.3684, "step": 2558 }, { "epoch": 0.004539091596563411, "grad_norm": 0.49609375, "learning_rate": 0.001998804444676733, "loss": 0.3516, "step": 2560 }, { "epoch": 0.004542637761873226, "grad_norm": 0.40625, "learning_rate": 0.0019988013758978204, "loss": 0.3089, "step": 2562 }, { "epoch": 0.004546183927183042, "grad_norm": 0.447265625, "learning_rate": 0.0019987983031880723, "loss": 0.3493, "step": 2564 }, { "epoch": 0.004549730092492856, "grad_norm": 0.365234375, "learning_rate": 0.001998795226547503, "loss": 0.2243, "step": 2566 }, { "epoch": 0.004553276257802672, "grad_norm": 4.3125, "learning_rate": 0.001998792145976125, "loss": 0.413, "step": 2568 }, { "epoch": 0.004556822423112487, "grad_norm": 0.60546875, "learning_rate": 0.001998789061473952, "loss": 0.3219, "step": 2570 }, { "epoch": 0.004560368588422303, "grad_norm": 0.388671875, "learning_rate": 0.0019987859730409984, "loss": 0.2752, "step": 2572 }, { "epoch": 0.004563914753732117, "grad_norm": 0.6328125, "learning_rate": 0.001998782880677277, "loss": 0.4357, "step": 2574 }, { "epoch": 0.004567460919041933, "grad_norm": 0.5390625, "learning_rate": 0.001998779784382801, "loss": 0.3183, "step": 2576 }, { "epoch": 0.004571007084351748, "grad_norm": 0.9609375, "learning_rate": 0.001998776684157584, "loss": 0.4608, "step": 2578 }, { "epoch": 0.0045745532496615626, "grad_norm": 0.337890625, "learning_rate": 0.0019987735800016406, "loss": 0.371, "step": 2580 }, { "epoch": 0.004578099414971378, "grad_norm": 1.4453125, "learning_rate": 0.0019987704719149836, "loss": 0.2915, "step": 2582 }, { "epoch": 0.004581645580281193, "grad_norm": 1.0625, "learning_rate": 0.0019987673598976263, "loss": 0.4373, "step": 2584 }, { "epoch": 0.004585191745591008, "grad_norm": 0.265625, "learning_rate": 0.0019987642439495824, "loss": 0.272, "step": 2586 }, { "epoch": 0.004588737910900823, "grad_norm": 1.53125, "learning_rate": 0.001998761124070866, "loss": 0.484, "step": 2588 }, { "epoch": 0.004592284076210639, "grad_norm": 1.390625, "learning_rate": 0.0019987580002614907, "loss": 0.3142, "step": 2590 }, { "epoch": 0.004595830241520454, "grad_norm": 0.453125, "learning_rate": 0.0019987548725214697, "loss": 0.3708, "step": 2592 }, { "epoch": 0.004599376406830269, "grad_norm": 0.376953125, "learning_rate": 0.0019987517408508173, "loss": 0.3678, "step": 2594 }, { "epoch": 0.004602922572140084, "grad_norm": 1.796875, "learning_rate": 0.001998748605249546, "loss": 0.3708, "step": 2596 }, { "epoch": 0.0046064687374499, "grad_norm": 0.54296875, "learning_rate": 0.001998745465717671, "loss": 0.244, "step": 2598 }, { "epoch": 0.004610014902759714, "grad_norm": 1.1796875, "learning_rate": 0.0019987423222552056, "loss": 0.3241, "step": 2600 }, { "epoch": 0.00461356106806953, "grad_norm": 1.5625, "learning_rate": 0.001998739174862163, "loss": 0.3932, "step": 2602 }, { "epoch": 0.004617107233379345, "grad_norm": 0.3984375, "learning_rate": 0.0019987360235385575, "loss": 0.2386, "step": 2604 }, { "epoch": 0.00462065339868916, "grad_norm": 0.5625, "learning_rate": 0.0019987328682844027, "loss": 0.2481, "step": 2606 }, { "epoch": 0.004624199563998975, "grad_norm": 0.26953125, "learning_rate": 0.0019987297090997124, "loss": 0.3161, "step": 2608 }, { "epoch": 0.00462774572930879, "grad_norm": 1.4296875, "learning_rate": 0.0019987265459845, "loss": 0.658, "step": 2610 }, { "epoch": 0.004631291894618606, "grad_norm": 0.671875, "learning_rate": 0.00199872337893878, "loss": 0.3221, "step": 2612 }, { "epoch": 0.00463483805992842, "grad_norm": 0.9609375, "learning_rate": 0.001998720207962566, "loss": 0.4271, "step": 2614 }, { "epoch": 0.004638384225238236, "grad_norm": 0.21484375, "learning_rate": 0.0019987170330558715, "loss": 0.313, "step": 2616 }, { "epoch": 0.004641930390548051, "grad_norm": 1.046875, "learning_rate": 0.001998713854218711, "loss": 0.3507, "step": 2618 }, { "epoch": 0.004645476555857866, "grad_norm": 0.30859375, "learning_rate": 0.001998710671451098, "loss": 0.2538, "step": 2620 }, { "epoch": 0.004649022721167681, "grad_norm": 1.046875, "learning_rate": 0.001998707484753047, "loss": 0.3987, "step": 2622 }, { "epoch": 0.004652568886477497, "grad_norm": 0.54296875, "learning_rate": 0.001998704294124571, "loss": 0.3039, "step": 2624 }, { "epoch": 0.004656115051787312, "grad_norm": 0.36328125, "learning_rate": 0.001998701099565685, "loss": 0.3143, "step": 2626 }, { "epoch": 0.004659661217097127, "grad_norm": 0.34765625, "learning_rate": 0.001998697901076402, "loss": 0.3227, "step": 2628 }, { "epoch": 0.004663207382406942, "grad_norm": 0.365234375, "learning_rate": 0.0019986946986567363, "loss": 0.335, "step": 2630 }, { "epoch": 0.004666753547716757, "grad_norm": 1.6796875, "learning_rate": 0.0019986914923067027, "loss": 0.3679, "step": 2632 }, { "epoch": 0.004670299713026572, "grad_norm": 0.6328125, "learning_rate": 0.0019986882820263135, "loss": 0.3196, "step": 2634 }, { "epoch": 0.004673845878336387, "grad_norm": 0.369140625, "learning_rate": 0.0019986850678155844, "loss": 0.2708, "step": 2636 }, { "epoch": 0.004677392043646203, "grad_norm": 0.4296875, "learning_rate": 0.001998681849674529, "loss": 0.2926, "step": 2638 }, { "epoch": 0.004680938208956018, "grad_norm": 1.7109375, "learning_rate": 0.0019986786276031607, "loss": 0.2916, "step": 2640 }, { "epoch": 0.004684484374265833, "grad_norm": 0.4375, "learning_rate": 0.0019986754016014944, "loss": 0.3816, "step": 2642 }, { "epoch": 0.004688030539575648, "grad_norm": 0.33203125, "learning_rate": 0.001998672171669544, "loss": 0.4013, "step": 2644 }, { "epoch": 0.004691576704885464, "grad_norm": 0.375, "learning_rate": 0.001998668937807323, "loss": 0.3108, "step": 2646 }, { "epoch": 0.004695122870195278, "grad_norm": 0.197265625, "learning_rate": 0.0019986657000148466, "loss": 0.2387, "step": 2648 }, { "epoch": 0.004698669035505094, "grad_norm": 4.40625, "learning_rate": 0.001998662458292128, "loss": 0.3733, "step": 2650 }, { "epoch": 0.004702215200814909, "grad_norm": 0.51953125, "learning_rate": 0.001998659212639182, "loss": 0.4222, "step": 2652 }, { "epoch": 0.0047057613661247236, "grad_norm": 0.75, "learning_rate": 0.001998655963056023, "loss": 0.2702, "step": 2654 }, { "epoch": 0.004709307531434539, "grad_norm": 0.2734375, "learning_rate": 0.0019986527095426647, "loss": 0.2796, "step": 2656 }, { "epoch": 0.004712853696744354, "grad_norm": 1.46875, "learning_rate": 0.0019986494520991215, "loss": 0.3217, "step": 2658 }, { "epoch": 0.00471639986205417, "grad_norm": 1.015625, "learning_rate": 0.001998646190725407, "loss": 0.3625, "step": 2660 }, { "epoch": 0.004719946027363984, "grad_norm": 0.298828125, "learning_rate": 0.0019986429254215364, "loss": 0.2949, "step": 2662 }, { "epoch": 0.0047234921926738, "grad_norm": 0.328125, "learning_rate": 0.0019986396561875236, "loss": 0.2631, "step": 2664 }, { "epoch": 0.004727038357983615, "grad_norm": 2.890625, "learning_rate": 0.0019986363830233832, "loss": 0.4082, "step": 2666 }, { "epoch": 0.00473058452329343, "grad_norm": 0.734375, "learning_rate": 0.001998633105929129, "loss": 0.4736, "step": 2668 }, { "epoch": 0.004734130688603245, "grad_norm": 0.44921875, "learning_rate": 0.0019986298249047756, "loss": 0.329, "step": 2670 }, { "epoch": 0.004737676853913061, "grad_norm": 0.470703125, "learning_rate": 0.0019986265399503374, "loss": 0.4027, "step": 2672 }, { "epoch": 0.004741223019222876, "grad_norm": 0.5625, "learning_rate": 0.0019986232510658284, "loss": 0.2617, "step": 2674 }, { "epoch": 0.004744769184532691, "grad_norm": 0.5078125, "learning_rate": 0.001998619958251264, "loss": 0.3261, "step": 2676 }, { "epoch": 0.004748315349842506, "grad_norm": 1.3515625, "learning_rate": 0.001998616661506657, "loss": 0.396, "step": 2678 }, { "epoch": 0.004751861515152321, "grad_norm": 0.63671875, "learning_rate": 0.0019986133608320233, "loss": 0.3203, "step": 2680 }, { "epoch": 0.004755407680462136, "grad_norm": 1.3515625, "learning_rate": 0.0019986100562273765, "loss": 0.3113, "step": 2682 }, { "epoch": 0.004758953845771951, "grad_norm": 2.625, "learning_rate": 0.0019986067476927315, "loss": 0.5206, "step": 2684 }, { "epoch": 0.004762500011081767, "grad_norm": 1.515625, "learning_rate": 0.0019986034352281025, "loss": 0.4123, "step": 2686 }, { "epoch": 0.004766046176391581, "grad_norm": 0.33984375, "learning_rate": 0.001998600118833504, "loss": 0.2106, "step": 2688 }, { "epoch": 0.004769592341701397, "grad_norm": 0.48828125, "learning_rate": 0.0019985967985089504, "loss": 0.3059, "step": 2690 }, { "epoch": 0.004773138507011212, "grad_norm": 1.921875, "learning_rate": 0.0019985934742544564, "loss": 0.3459, "step": 2692 }, { "epoch": 0.004776684672321028, "grad_norm": 1.921875, "learning_rate": 0.0019985901460700365, "loss": 0.3537, "step": 2694 }, { "epoch": 0.004780230837630842, "grad_norm": 0.51171875, "learning_rate": 0.0019985868139557055, "loss": 0.3089, "step": 2696 }, { "epoch": 0.004783777002940658, "grad_norm": 1.015625, "learning_rate": 0.0019985834779114777, "loss": 0.4324, "step": 2698 }, { "epoch": 0.004787323168250473, "grad_norm": 1.8671875, "learning_rate": 0.0019985801379373675, "loss": 0.3557, "step": 2700 }, { "epoch": 0.004790869333560288, "grad_norm": 0.77734375, "learning_rate": 0.00199857679403339, "loss": 0.3592, "step": 2702 }, { "epoch": 0.004794415498870103, "grad_norm": 0.4921875, "learning_rate": 0.001998573446199559, "loss": 0.3282, "step": 2704 }, { "epoch": 0.004797961664179918, "grad_norm": 0.423828125, "learning_rate": 0.001998570094435891, "loss": 0.5474, "step": 2706 }, { "epoch": 0.004801507829489734, "grad_norm": 0.4375, "learning_rate": 0.0019985667387423978, "loss": 0.3031, "step": 2708 }, { "epoch": 0.004805053994799548, "grad_norm": 0.82421875, "learning_rate": 0.0019985633791190964, "loss": 0.3576, "step": 2710 }, { "epoch": 0.004808600160109364, "grad_norm": 1.328125, "learning_rate": 0.0019985600155660007, "loss": 0.4023, "step": 2712 }, { "epoch": 0.004812146325419179, "grad_norm": 0.98828125, "learning_rate": 0.001998556648083125, "loss": 0.2578, "step": 2714 }, { "epoch": 0.004815692490728994, "grad_norm": 0.4296875, "learning_rate": 0.001998553276670485, "loss": 0.2773, "step": 2716 }, { "epoch": 0.004819238656038809, "grad_norm": 0.31640625, "learning_rate": 0.0019985499013280947, "loss": 0.3085, "step": 2718 }, { "epoch": 0.004822784821348625, "grad_norm": 0.2890625, "learning_rate": 0.001998546522055969, "loss": 0.3116, "step": 2720 }, { "epoch": 0.004826330986658439, "grad_norm": 0.87109375, "learning_rate": 0.0019985431388541233, "loss": 0.4739, "step": 2722 }, { "epoch": 0.004829877151968255, "grad_norm": 0.69140625, "learning_rate": 0.001998539751722571, "loss": 0.3929, "step": 2724 }, { "epoch": 0.00483342331727807, "grad_norm": 1.2890625, "learning_rate": 0.0019985363606613285, "loss": 0.3235, "step": 2726 }, { "epoch": 0.0048369694825878854, "grad_norm": 0.765625, "learning_rate": 0.0019985329656704094, "loss": 0.2474, "step": 2728 }, { "epoch": 0.0048405156478977, "grad_norm": 0.6015625, "learning_rate": 0.001998529566749829, "loss": 0.354, "step": 2730 }, { "epoch": 0.004844061813207515, "grad_norm": 0.5703125, "learning_rate": 0.0019985261638996027, "loss": 0.2822, "step": 2732 }, { "epoch": 0.004847607978517331, "grad_norm": 0.5390625, "learning_rate": 0.0019985227571197445, "loss": 0.3357, "step": 2734 }, { "epoch": 0.004851154143827145, "grad_norm": 0.4296875, "learning_rate": 0.00199851934641027, "loss": 0.3095, "step": 2736 }, { "epoch": 0.004854700309136961, "grad_norm": 1.2578125, "learning_rate": 0.0019985159317711934, "loss": 0.554, "step": 2738 }, { "epoch": 0.004858246474446776, "grad_norm": 0.306640625, "learning_rate": 0.0019985125132025304, "loss": 0.3431, "step": 2740 }, { "epoch": 0.004861792639756591, "grad_norm": 0.6328125, "learning_rate": 0.001998509090704295, "loss": 0.3463, "step": 2742 }, { "epoch": 0.004865338805066406, "grad_norm": 0.302734375, "learning_rate": 0.001998505664276503, "loss": 0.298, "step": 2744 }, { "epoch": 0.004868884970376222, "grad_norm": 0.498046875, "learning_rate": 0.0019985022339191697, "loss": 0.2978, "step": 2746 }, { "epoch": 0.004872431135686037, "grad_norm": 0.74609375, "learning_rate": 0.0019984987996323087, "loss": 0.3427, "step": 2748 }, { "epoch": 0.004875977300995852, "grad_norm": 0.310546875, "learning_rate": 0.0019984953614159365, "loss": 0.3363, "step": 2750 }, { "epoch": 0.004879523466305667, "grad_norm": 0.2314453125, "learning_rate": 0.0019984919192700674, "loss": 0.2652, "step": 2752 }, { "epoch": 0.0048830696316154824, "grad_norm": 1.0625, "learning_rate": 0.0019984884731947166, "loss": 0.3352, "step": 2754 }, { "epoch": 0.004886615796925297, "grad_norm": 0.4921875, "learning_rate": 0.001998485023189899, "loss": 0.3254, "step": 2756 }, { "epoch": 0.004890161962235112, "grad_norm": 0.97265625, "learning_rate": 0.0019984815692556295, "loss": 0.4867, "step": 2758 }, { "epoch": 0.004893708127544928, "grad_norm": 0.765625, "learning_rate": 0.001998478111391924, "loss": 0.4473, "step": 2760 }, { "epoch": 0.004897254292854743, "grad_norm": 0.296875, "learning_rate": 0.0019984746495987967, "loss": 0.3472, "step": 2762 }, { "epoch": 0.004900800458164558, "grad_norm": 0.98046875, "learning_rate": 0.0019984711838762635, "loss": 0.4279, "step": 2764 }, { "epoch": 0.004904346623474373, "grad_norm": 3.09375, "learning_rate": 0.0019984677142243393, "loss": 0.3315, "step": 2766 }, { "epoch": 0.004907892788784189, "grad_norm": 0.734375, "learning_rate": 0.001998464240643039, "loss": 0.5489, "step": 2768 }, { "epoch": 0.004911438954094003, "grad_norm": 0.62890625, "learning_rate": 0.001998460763132378, "loss": 0.3425, "step": 2770 }, { "epoch": 0.004914985119403819, "grad_norm": 0.44140625, "learning_rate": 0.0019984572816923716, "loss": 0.4468, "step": 2772 }, { "epoch": 0.004918531284713634, "grad_norm": 0.3125, "learning_rate": 0.0019984537963230347, "loss": 0.2923, "step": 2774 }, { "epoch": 0.004922077450023449, "grad_norm": 0.83203125, "learning_rate": 0.001998450307024383, "loss": 0.3882, "step": 2776 }, { "epoch": 0.004925623615333264, "grad_norm": 0.78125, "learning_rate": 0.001998446813796432, "loss": 0.4952, "step": 2778 }, { "epoch": 0.004929169780643079, "grad_norm": 0.59375, "learning_rate": 0.0019984433166391957, "loss": 0.3895, "step": 2780 }, { "epoch": 0.004932715945952895, "grad_norm": 0.2890625, "learning_rate": 0.0019984398155526903, "loss": 0.2522, "step": 2782 }, { "epoch": 0.004936262111262709, "grad_norm": 0.330078125, "learning_rate": 0.001998436310536932, "loss": 0.2946, "step": 2784 }, { "epoch": 0.004939808276572525, "grad_norm": 0.72265625, "learning_rate": 0.001998432801591934, "loss": 0.3808, "step": 2786 }, { "epoch": 0.00494335444188234, "grad_norm": 0.4375, "learning_rate": 0.0019984292887177133, "loss": 0.4355, "step": 2788 }, { "epoch": 0.004946900607192155, "grad_norm": 1.984375, "learning_rate": 0.0019984257719142845, "loss": 0.4189, "step": 2790 }, { "epoch": 0.00495044677250197, "grad_norm": 0.6171875, "learning_rate": 0.0019984222511816633, "loss": 0.3627, "step": 2792 }, { "epoch": 0.004953992937811786, "grad_norm": 0.859375, "learning_rate": 0.001998418726519865, "loss": 0.3463, "step": 2794 }, { "epoch": 0.004957539103121601, "grad_norm": 0.37109375, "learning_rate": 0.0019984151979289054, "loss": 0.343, "step": 2796 }, { "epoch": 0.004961085268431416, "grad_norm": 1.1328125, "learning_rate": 0.0019984116654087995, "loss": 0.3639, "step": 2798 }, { "epoch": 0.004964631433741231, "grad_norm": 0.404296875, "learning_rate": 0.0019984081289595628, "loss": 0.3211, "step": 2800 }, { "epoch": 0.0049681775990510465, "grad_norm": 0.81640625, "learning_rate": 0.0019984045885812107, "loss": 0.4205, "step": 2802 }, { "epoch": 0.004971723764360861, "grad_norm": 0.4453125, "learning_rate": 0.0019984010442737586, "loss": 0.2752, "step": 2804 }, { "epoch": 0.004975269929670676, "grad_norm": 0.671875, "learning_rate": 0.0019983974960372224, "loss": 0.4363, "step": 2806 }, { "epoch": 0.004978816094980492, "grad_norm": 0.455078125, "learning_rate": 0.001998393943871618, "loss": 0.3035, "step": 2808 }, { "epoch": 0.004982362260290306, "grad_norm": 0.4609375, "learning_rate": 0.001998390387776959, "loss": 0.3992, "step": 2810 }, { "epoch": 0.004985908425600122, "grad_norm": 0.59765625, "learning_rate": 0.0019983868277532635, "loss": 0.4628, "step": 2812 }, { "epoch": 0.004989454590909937, "grad_norm": 0.9453125, "learning_rate": 0.001998383263800545, "loss": 0.3996, "step": 2814 }, { "epoch": 0.004993000756219753, "grad_norm": 0.255859375, "learning_rate": 0.0019983796959188206, "loss": 0.3718, "step": 2816 }, { "epoch": 0.004996546921529567, "grad_norm": 3.203125, "learning_rate": 0.001998376124108105, "loss": 0.4216, "step": 2818 }, { "epoch": 0.005000093086839383, "grad_norm": 1.2578125, "learning_rate": 0.001998372548368414, "loss": 0.3556, "step": 2820 }, { "epoch": 0.005003639252149198, "grad_norm": 0.26171875, "learning_rate": 0.001998368968699763, "loss": 0.2456, "step": 2822 }, { "epoch": 0.005007185417459013, "grad_norm": 1.7265625, "learning_rate": 0.0019983653851021687, "loss": 0.4176, "step": 2824 }, { "epoch": 0.005010731582768828, "grad_norm": 0.60546875, "learning_rate": 0.0019983617975756454, "loss": 0.3417, "step": 2826 }, { "epoch": 0.0050142777480786434, "grad_norm": 0.275390625, "learning_rate": 0.00199835820612021, "loss": 0.2935, "step": 2828 }, { "epoch": 0.005017823913388459, "grad_norm": 0.2470703125, "learning_rate": 0.001998354610735877, "loss": 0.2535, "step": 2830 }, { "epoch": 0.005021370078698273, "grad_norm": 0.6875, "learning_rate": 0.0019983510114226634, "loss": 0.2801, "step": 2832 }, { "epoch": 0.005024916244008089, "grad_norm": 0.38671875, "learning_rate": 0.001998347408180584, "loss": 0.2713, "step": 2834 }, { "epoch": 0.005028462409317904, "grad_norm": 0.54296875, "learning_rate": 0.001998343801009655, "loss": 0.3337, "step": 2836 }, { "epoch": 0.005032008574627719, "grad_norm": 0.33203125, "learning_rate": 0.001998340189909892, "loss": 0.2395, "step": 2838 }, { "epoch": 0.005035554739937534, "grad_norm": 0.388671875, "learning_rate": 0.0019983365748813104, "loss": 0.3079, "step": 2840 }, { "epoch": 0.00503910090524735, "grad_norm": 0.4765625, "learning_rate": 0.001998332955923927, "loss": 0.4155, "step": 2842 }, { "epoch": 0.005042647070557164, "grad_norm": 0.470703125, "learning_rate": 0.0019983293330377567, "loss": 0.3849, "step": 2844 }, { "epoch": 0.00504619323586698, "grad_norm": 0.373046875, "learning_rate": 0.001998325706222816, "loss": 0.3271, "step": 2846 }, { "epoch": 0.005049739401176795, "grad_norm": 0.7421875, "learning_rate": 0.00199832207547912, "loss": 0.2931, "step": 2848 }, { "epoch": 0.0050532855664866105, "grad_norm": 0.466796875, "learning_rate": 0.0019983184408066857, "loss": 0.3996, "step": 2850 }, { "epoch": 0.005056831731796425, "grad_norm": 0.63671875, "learning_rate": 0.001998314802205528, "loss": 0.2881, "step": 2852 }, { "epoch": 0.0050603778971062404, "grad_norm": 0.427734375, "learning_rate": 0.001998311159675663, "loss": 0.2577, "step": 2854 }, { "epoch": 0.005063924062416056, "grad_norm": 0.5625, "learning_rate": 0.0019983075132171068, "loss": 0.3623, "step": 2856 }, { "epoch": 0.00506747022772587, "grad_norm": 0.2294921875, "learning_rate": 0.0019983038628298756, "loss": 0.2591, "step": 2858 }, { "epoch": 0.005071016393035686, "grad_norm": 1.046875, "learning_rate": 0.001998300208513985, "loss": 0.3839, "step": 2860 }, { "epoch": 0.005074562558345501, "grad_norm": 0.267578125, "learning_rate": 0.001998296550269451, "loss": 0.3262, "step": 2862 }, { "epoch": 0.005078108723655317, "grad_norm": 0.283203125, "learning_rate": 0.0019982928880962897, "loss": 0.3527, "step": 2864 }, { "epoch": 0.005081654888965131, "grad_norm": 0.4375, "learning_rate": 0.0019982892219945173, "loss": 0.3176, "step": 2866 }, { "epoch": 0.005085201054274947, "grad_norm": 0.248046875, "learning_rate": 0.0019982855519641496, "loss": 0.2781, "step": 2868 }, { "epoch": 0.005088747219584762, "grad_norm": 0.306640625, "learning_rate": 0.0019982818780052026, "loss": 0.2864, "step": 2870 }, { "epoch": 0.005092293384894577, "grad_norm": 0.953125, "learning_rate": 0.0019982782001176924, "loss": 0.3155, "step": 2872 }, { "epoch": 0.005095839550204392, "grad_norm": 0.6015625, "learning_rate": 0.0019982745183016347, "loss": 0.4162, "step": 2874 }, { "epoch": 0.0050993857155142075, "grad_norm": 0.640625, "learning_rate": 0.001998270832557047, "loss": 0.3198, "step": 2876 }, { "epoch": 0.005102931880824022, "grad_norm": 0.546875, "learning_rate": 0.001998267142883944, "loss": 0.287, "step": 2878 }, { "epoch": 0.0051064780461338374, "grad_norm": 0.251953125, "learning_rate": 0.001998263449282342, "loss": 0.2977, "step": 2880 }, { "epoch": 0.005110024211443653, "grad_norm": 0.89453125, "learning_rate": 0.0019982597517522573, "loss": 0.321, "step": 2882 }, { "epoch": 0.005113570376753468, "grad_norm": 0.380859375, "learning_rate": 0.001998256050293707, "loss": 0.4265, "step": 2884 }, { "epoch": 0.005117116542063283, "grad_norm": 0.8203125, "learning_rate": 0.001998252344906706, "loss": 0.3992, "step": 2886 }, { "epoch": 0.005120662707373098, "grad_norm": 0.5, "learning_rate": 0.0019982486355912712, "loss": 0.3643, "step": 2888 }, { "epoch": 0.005124208872682914, "grad_norm": 0.7109375, "learning_rate": 0.0019982449223474186, "loss": 0.3402, "step": 2890 }, { "epoch": 0.005127755037992728, "grad_norm": 0.5703125, "learning_rate": 0.001998241205175164, "loss": 0.5101, "step": 2892 }, { "epoch": 0.005131301203302544, "grad_norm": 0.578125, "learning_rate": 0.001998237484074525, "loss": 0.3634, "step": 2894 }, { "epoch": 0.005134847368612359, "grad_norm": 0.58984375, "learning_rate": 0.0019982337590455164, "loss": 0.3343, "step": 2896 }, { "epoch": 0.0051383935339221745, "grad_norm": 0.40234375, "learning_rate": 0.001998230030088155, "loss": 0.4453, "step": 2898 }, { "epoch": 0.005141939699231989, "grad_norm": 1.2265625, "learning_rate": 0.0019982262972024576, "loss": 0.395, "step": 2900 }, { "epoch": 0.0051454858645418045, "grad_norm": 1.4921875, "learning_rate": 0.00199822256038844, "loss": 0.3722, "step": 2902 }, { "epoch": 0.00514903202985162, "grad_norm": 0.3046875, "learning_rate": 0.0019982188196461187, "loss": 0.3089, "step": 2904 }, { "epoch": 0.005152578195161434, "grad_norm": 0.6796875, "learning_rate": 0.00199821507497551, "loss": 0.3687, "step": 2906 }, { "epoch": 0.00515612436047125, "grad_norm": 0.373046875, "learning_rate": 0.0019982113263766303, "loss": 0.2594, "step": 2908 }, { "epoch": 0.005159670525781065, "grad_norm": 0.796875, "learning_rate": 0.001998207573849496, "loss": 0.4331, "step": 2910 }, { "epoch": 0.00516321669109088, "grad_norm": 0.220703125, "learning_rate": 0.0019982038173941234, "loss": 0.2379, "step": 2912 }, { "epoch": 0.005166762856400695, "grad_norm": 0.546875, "learning_rate": 0.001998200057010529, "loss": 0.395, "step": 2914 }, { "epoch": 0.005170309021710511, "grad_norm": 1.046875, "learning_rate": 0.0019981962926987292, "loss": 0.3564, "step": 2916 }, { "epoch": 0.005173855187020326, "grad_norm": 0.353515625, "learning_rate": 0.001998192524458741, "loss": 0.3461, "step": 2918 }, { "epoch": 0.005177401352330141, "grad_norm": 0.25390625, "learning_rate": 0.0019981887522905802, "loss": 0.298, "step": 2920 }, { "epoch": 0.005180947517639956, "grad_norm": 0.4375, "learning_rate": 0.0019981849761942635, "loss": 0.2868, "step": 2922 }, { "epoch": 0.0051844936829497715, "grad_norm": 0.890625, "learning_rate": 0.0019981811961698077, "loss": 0.3366, "step": 2924 }, { "epoch": 0.005188039848259586, "grad_norm": 0.349609375, "learning_rate": 0.001998177412217229, "loss": 0.4207, "step": 2926 }, { "epoch": 0.0051915860135694014, "grad_norm": 0.361328125, "learning_rate": 0.0019981736243365436, "loss": 0.2418, "step": 2928 }, { "epoch": 0.005195132178879217, "grad_norm": 0.52734375, "learning_rate": 0.0019981698325277687, "loss": 0.3548, "step": 2930 }, { "epoch": 0.005198678344189032, "grad_norm": 0.953125, "learning_rate": 0.0019981660367909206, "loss": 0.505, "step": 2932 }, { "epoch": 0.005202224509498847, "grad_norm": 2.6875, "learning_rate": 0.0019981622371260162, "loss": 0.3292, "step": 2934 }, { "epoch": 0.005205770674808662, "grad_norm": 0.59375, "learning_rate": 0.001998158433533072, "loss": 0.3489, "step": 2936 }, { "epoch": 0.005209316840118478, "grad_norm": 0.75, "learning_rate": 0.0019981546260121044, "loss": 0.3117, "step": 2938 }, { "epoch": 0.005212863005428292, "grad_norm": 1.0234375, "learning_rate": 0.00199815081456313, "loss": 0.4501, "step": 2940 }, { "epoch": 0.005216409170738108, "grad_norm": 0.314453125, "learning_rate": 0.001998146999186166, "loss": 0.2799, "step": 2942 }, { "epoch": 0.005219955336047923, "grad_norm": 0.49609375, "learning_rate": 0.0019981431798812284, "loss": 0.2491, "step": 2944 }, { "epoch": 0.005223501501357738, "grad_norm": 0.94921875, "learning_rate": 0.001998139356648334, "loss": 0.3046, "step": 2946 }, { "epoch": 0.005227047666667553, "grad_norm": 0.37890625, "learning_rate": 0.0019981355294875, "loss": 0.3474, "step": 2948 }, { "epoch": 0.0052305938319773685, "grad_norm": 0.60546875, "learning_rate": 0.001998131698398743, "loss": 0.3788, "step": 2950 }, { "epoch": 0.005234139997287184, "grad_norm": 0.5078125, "learning_rate": 0.0019981278633820795, "loss": 0.3671, "step": 2952 }, { "epoch": 0.0052376861625969984, "grad_norm": 0.35546875, "learning_rate": 0.0019981240244375266, "loss": 0.3066, "step": 2954 }, { "epoch": 0.005241232327906814, "grad_norm": 1.1328125, "learning_rate": 0.001998120181565101, "loss": 0.4496, "step": 2956 }, { "epoch": 0.005244778493216629, "grad_norm": 0.3515625, "learning_rate": 0.001998116334764819, "loss": 0.2959, "step": 2958 }, { "epoch": 0.005248324658526444, "grad_norm": 0.48828125, "learning_rate": 0.0019981124840366985, "loss": 0.3182, "step": 2960 }, { "epoch": 0.005251870823836259, "grad_norm": 1.125, "learning_rate": 0.001998108629380755, "loss": 0.3046, "step": 2962 }, { "epoch": 0.005255416989146075, "grad_norm": 0.97265625, "learning_rate": 0.001998104770797006, "loss": 0.3488, "step": 2964 }, { "epoch": 0.00525896315445589, "grad_norm": 0.380859375, "learning_rate": 0.0019981009082854695, "loss": 0.2864, "step": 2966 }, { "epoch": 0.005262509319765705, "grad_norm": 0.27734375, "learning_rate": 0.00199809704184616, "loss": 0.3037, "step": 2968 }, { "epoch": 0.00526605548507552, "grad_norm": 0.318359375, "learning_rate": 0.0019980931714790964, "loss": 0.3639, "step": 2970 }, { "epoch": 0.0052696016503853355, "grad_norm": 0.69921875, "learning_rate": 0.0019980892971842947, "loss": 0.3356, "step": 2972 }, { "epoch": 0.00527314781569515, "grad_norm": 1.7734375, "learning_rate": 0.0019980854189617725, "loss": 0.4104, "step": 2974 }, { "epoch": 0.0052766939810049655, "grad_norm": 0.4921875, "learning_rate": 0.001998081536811546, "loss": 0.2835, "step": 2976 }, { "epoch": 0.005280240146314781, "grad_norm": 0.2353515625, "learning_rate": 0.0019980776507336324, "loss": 0.2296, "step": 2978 }, { "epoch": 0.0052837863116245954, "grad_norm": 0.265625, "learning_rate": 0.0019980737607280486, "loss": 0.3178, "step": 2980 }, { "epoch": 0.005287332476934411, "grad_norm": 1.03125, "learning_rate": 0.0019980698667948125, "loss": 0.2921, "step": 2982 }, { "epoch": 0.005290878642244226, "grad_norm": 0.4609375, "learning_rate": 0.00199806596893394, "loss": 0.3058, "step": 2984 }, { "epoch": 0.005294424807554042, "grad_norm": 0.291015625, "learning_rate": 0.001998062067145449, "loss": 0.354, "step": 2986 }, { "epoch": 0.005297970972863856, "grad_norm": 0.388671875, "learning_rate": 0.0019980581614293556, "loss": 0.2857, "step": 2988 }, { "epoch": 0.005301517138173672, "grad_norm": 0.296875, "learning_rate": 0.001998054251785678, "loss": 0.2665, "step": 2990 }, { "epoch": 0.005305063303483487, "grad_norm": 0.828125, "learning_rate": 0.0019980503382144325, "loss": 0.3523, "step": 2992 }, { "epoch": 0.005308609468793302, "grad_norm": 0.54296875, "learning_rate": 0.0019980464207156367, "loss": 0.2823, "step": 2994 }, { "epoch": 0.005312155634103117, "grad_norm": 0.4375, "learning_rate": 0.001998042499289307, "loss": 0.2396, "step": 2996 }, { "epoch": 0.0053157017994129325, "grad_norm": 0.2890625, "learning_rate": 0.001998038573935462, "loss": 0.3331, "step": 2998 }, { "epoch": 0.005319247964722748, "grad_norm": 0.2255859375, "learning_rate": 0.001998034644654117, "loss": 0.3299, "step": 3000 }, { "epoch": 0.0053227941300325625, "grad_norm": 0.44140625, "learning_rate": 0.0019980307114452903, "loss": 0.2866, "step": 3002 }, { "epoch": 0.005326340295342378, "grad_norm": 0.369140625, "learning_rate": 0.0019980267743089993, "loss": 0.2317, "step": 3004 }, { "epoch": 0.005329886460652193, "grad_norm": 0.310546875, "learning_rate": 0.0019980228332452605, "loss": 0.3696, "step": 3006 }, { "epoch": 0.005333432625962008, "grad_norm": 0.44140625, "learning_rate": 0.001998018888254091, "loss": 0.3672, "step": 3008 }, { "epoch": 0.005336978791271823, "grad_norm": 0.3984375, "learning_rate": 0.0019980149393355093, "loss": 0.3108, "step": 3010 }, { "epoch": 0.005340524956581639, "grad_norm": 0.3828125, "learning_rate": 0.0019980109864895317, "loss": 0.7665, "step": 3012 }, { "epoch": 0.005344071121891453, "grad_norm": 3.28125, "learning_rate": 0.0019980070297161753, "loss": 0.6211, "step": 3014 }, { "epoch": 0.005347617287201269, "grad_norm": 0.337890625, "learning_rate": 0.001998003069015458, "loss": 0.3374, "step": 3016 }, { "epoch": 0.005351163452511084, "grad_norm": 0.361328125, "learning_rate": 0.0019979991043873975, "loss": 0.4111, "step": 3018 }, { "epoch": 0.0053547096178208995, "grad_norm": 0.58984375, "learning_rate": 0.0019979951358320095, "loss": 0.3796, "step": 3020 }, { "epoch": 0.005358255783130714, "grad_norm": 0.478515625, "learning_rate": 0.001997991163349313, "loss": 0.3128, "step": 3022 }, { "epoch": 0.0053618019484405295, "grad_norm": 0.86328125, "learning_rate": 0.001997987186939325, "loss": 0.5398, "step": 3024 }, { "epoch": 0.005365348113750345, "grad_norm": 0.51171875, "learning_rate": 0.001997983206602062, "loss": 0.3081, "step": 3026 }, { "epoch": 0.0053688942790601595, "grad_norm": 0.52734375, "learning_rate": 0.0019979792223375425, "loss": 0.3187, "step": 3028 }, { "epoch": 0.005372440444369975, "grad_norm": 2.3125, "learning_rate": 0.0019979752341457834, "loss": 0.5208, "step": 3030 }, { "epoch": 0.00537598660967979, "grad_norm": 0.380859375, "learning_rate": 0.001997971242026802, "loss": 0.3044, "step": 3032 }, { "epoch": 0.005379532774989606, "grad_norm": 0.265625, "learning_rate": 0.0019979672459806163, "loss": 0.301, "step": 3034 }, { "epoch": 0.00538307894029942, "grad_norm": 1.7734375, "learning_rate": 0.0019979632460072434, "loss": 0.5821, "step": 3036 }, { "epoch": 0.005386625105609236, "grad_norm": 0.2373046875, "learning_rate": 0.0019979592421067007, "loss": 0.2779, "step": 3038 }, { "epoch": 0.005390171270919051, "grad_norm": 0.38671875, "learning_rate": 0.001997955234279006, "loss": 0.297, "step": 3040 }, { "epoch": 0.005393717436228866, "grad_norm": 0.98828125, "learning_rate": 0.0019979512225241766, "loss": 0.3257, "step": 3042 }, { "epoch": 0.005397263601538681, "grad_norm": 0.228515625, "learning_rate": 0.0019979472068422303, "loss": 0.3346, "step": 3044 }, { "epoch": 0.0054008097668484965, "grad_norm": 0.474609375, "learning_rate": 0.0019979431872331845, "loss": 0.3538, "step": 3046 }, { "epoch": 0.005404355932158311, "grad_norm": 0.421875, "learning_rate": 0.0019979391636970566, "loss": 0.2874, "step": 3048 }, { "epoch": 0.0054079020974681265, "grad_norm": 0.31640625, "learning_rate": 0.0019979351362338646, "loss": 0.3763, "step": 3050 }, { "epoch": 0.005411448262777942, "grad_norm": 0.484375, "learning_rate": 0.001997931104843626, "loss": 0.4304, "step": 3052 }, { "epoch": 0.005414994428087757, "grad_norm": 0.1826171875, "learning_rate": 0.001997927069526358, "loss": 0.3974, "step": 3054 }, { "epoch": 0.005418540593397572, "grad_norm": 0.66796875, "learning_rate": 0.0019979230302820785, "loss": 0.396, "step": 3056 }, { "epoch": 0.005422086758707387, "grad_norm": 2.40625, "learning_rate": 0.0019979189871108054, "loss": 0.262, "step": 3058 }, { "epoch": 0.005425632924017203, "grad_norm": 0.416015625, "learning_rate": 0.0019979149400125564, "loss": 0.3403, "step": 3060 }, { "epoch": 0.005429179089327017, "grad_norm": 0.32421875, "learning_rate": 0.001997910888987349, "loss": 0.407, "step": 3062 }, { "epoch": 0.005432725254636833, "grad_norm": 2.859375, "learning_rate": 0.001997906834035201, "loss": 0.3674, "step": 3064 }, { "epoch": 0.005436271419946648, "grad_norm": 0.427734375, "learning_rate": 0.0019979027751561296, "loss": 0.376, "step": 3066 }, { "epoch": 0.0054398175852564635, "grad_norm": 0.2412109375, "learning_rate": 0.0019978987123501534, "loss": 0.3365, "step": 3068 }, { "epoch": 0.005443363750566278, "grad_norm": 0.53515625, "learning_rate": 0.00199789464561729, "loss": 0.3183, "step": 3070 }, { "epoch": 0.0054469099158760935, "grad_norm": 1.2578125, "learning_rate": 0.001997890574957557, "loss": 0.3286, "step": 3072 }, { "epoch": 0.005450456081185909, "grad_norm": 0.40625, "learning_rate": 0.001997886500370971, "loss": 0.2991, "step": 3074 }, { "epoch": 0.0054540022464957235, "grad_norm": 0.255859375, "learning_rate": 0.0019978824218575522, "loss": 0.2764, "step": 3076 }, { "epoch": 0.005457548411805539, "grad_norm": 0.86328125, "learning_rate": 0.001997878339417317, "loss": 0.3843, "step": 3078 }, { "epoch": 0.005461094577115354, "grad_norm": 2.578125, "learning_rate": 0.001997874253050283, "loss": 0.3829, "step": 3080 }, { "epoch": 0.005464640742425169, "grad_norm": 0.3203125, "learning_rate": 0.0019978701627564694, "loss": 0.2504, "step": 3082 }, { "epoch": 0.005468186907734984, "grad_norm": 4.03125, "learning_rate": 0.0019978660685358927, "loss": 0.372, "step": 3084 }, { "epoch": 0.0054717330730448, "grad_norm": 0.435546875, "learning_rate": 0.0019978619703885712, "loss": 0.2704, "step": 3086 }, { "epoch": 0.005475279238354615, "grad_norm": 2.328125, "learning_rate": 0.0019978578683145236, "loss": 0.3223, "step": 3088 }, { "epoch": 0.00547882540366443, "grad_norm": 0.439453125, "learning_rate": 0.0019978537623137668, "loss": 0.2812, "step": 3090 }, { "epoch": 0.005482371568974245, "grad_norm": 0.318359375, "learning_rate": 0.0019978496523863193, "loss": 0.2967, "step": 3092 }, { "epoch": 0.0054859177342840605, "grad_norm": 0.439453125, "learning_rate": 0.001997845538532199, "loss": 0.2953, "step": 3094 }, { "epoch": 0.005489463899593875, "grad_norm": 0.6875, "learning_rate": 0.001997841420751424, "loss": 0.222, "step": 3096 }, { "epoch": 0.0054930100649036905, "grad_norm": 0.48046875, "learning_rate": 0.001997837299044012, "loss": 0.3668, "step": 3098 }, { "epoch": 0.005496556230213506, "grad_norm": 0.3515625, "learning_rate": 0.001997833173409981, "loss": 0.3087, "step": 3100 }, { "epoch": 0.005500102395523321, "grad_norm": 0.443359375, "learning_rate": 0.001997829043849349, "loss": 0.3671, "step": 3102 }, { "epoch": 0.005503648560833136, "grad_norm": 0.306640625, "learning_rate": 0.001997824910362135, "loss": 0.2986, "step": 3104 }, { "epoch": 0.005507194726142951, "grad_norm": 0.369140625, "learning_rate": 0.001997820772948356, "loss": 0.3142, "step": 3106 }, { "epoch": 0.005510740891452767, "grad_norm": 1.2578125, "learning_rate": 0.0019978166316080305, "loss": 0.324, "step": 3108 }, { "epoch": 0.005514287056762581, "grad_norm": 0.5703125, "learning_rate": 0.0019978124863411764, "loss": 0.2928, "step": 3110 }, { "epoch": 0.005517833222072397, "grad_norm": 0.494140625, "learning_rate": 0.001997808337147812, "loss": 0.2638, "step": 3112 }, { "epoch": 0.005521379387382212, "grad_norm": 0.63671875, "learning_rate": 0.001997804184027956, "loss": 0.3343, "step": 3114 }, { "epoch": 0.005524925552692027, "grad_norm": 0.263671875, "learning_rate": 0.0019978000269816254, "loss": 0.2812, "step": 3116 }, { "epoch": 0.005528471718001842, "grad_norm": 0.349609375, "learning_rate": 0.001997795866008839, "loss": 0.3444, "step": 3118 }, { "epoch": 0.0055320178833116575, "grad_norm": 0.3125, "learning_rate": 0.0019977917011096153, "loss": 0.2748, "step": 3120 }, { "epoch": 0.005535564048621473, "grad_norm": 0.53515625, "learning_rate": 0.0019977875322839717, "loss": 0.3103, "step": 3122 }, { "epoch": 0.0055391102139312875, "grad_norm": 0.294921875, "learning_rate": 0.001997783359531927, "loss": 0.2796, "step": 3124 }, { "epoch": 0.005542656379241103, "grad_norm": 1.2578125, "learning_rate": 0.0019977791828535, "loss": 0.3919, "step": 3126 }, { "epoch": 0.005546202544550918, "grad_norm": 1.3515625, "learning_rate": 0.001997775002248708, "loss": 0.5206, "step": 3128 }, { "epoch": 0.005549748709860733, "grad_norm": 1.6171875, "learning_rate": 0.0019977708177175697, "loss": 0.3797, "step": 3130 }, { "epoch": 0.005553294875170548, "grad_norm": 0.8984375, "learning_rate": 0.001997766629260103, "loss": 0.2913, "step": 3132 }, { "epoch": 0.005556841040480364, "grad_norm": 0.224609375, "learning_rate": 0.001997762436876327, "loss": 0.2958, "step": 3134 }, { "epoch": 0.005560387205790179, "grad_norm": 0.5078125, "learning_rate": 0.0019977582405662593, "loss": 0.3557, "step": 3136 }, { "epoch": 0.005563933371099994, "grad_norm": 0.875, "learning_rate": 0.0019977540403299182, "loss": 0.5528, "step": 3138 }, { "epoch": 0.005567479536409809, "grad_norm": 0.625, "learning_rate": 0.001997749836167323, "loss": 0.4444, "step": 3140 }, { "epoch": 0.0055710257017196245, "grad_norm": 3.328125, "learning_rate": 0.0019977456280784915, "loss": 0.4219, "step": 3142 }, { "epoch": 0.005574571867029439, "grad_norm": 0.98828125, "learning_rate": 0.001997741416063442, "loss": 0.2498, "step": 3144 }, { "epoch": 0.0055781180323392545, "grad_norm": 0.318359375, "learning_rate": 0.0019977372001221926, "loss": 0.3011, "step": 3146 }, { "epoch": 0.00558166419764907, "grad_norm": 1.0859375, "learning_rate": 0.0019977329802547627, "loss": 0.2365, "step": 3148 }, { "epoch": 0.0055852103629588845, "grad_norm": 2.203125, "learning_rate": 0.00199772875646117, "loss": 0.6416, "step": 3150 }, { "epoch": 0.0055887565282687, "grad_norm": 0.640625, "learning_rate": 0.0019977245287414328, "loss": 0.3236, "step": 3152 }, { "epoch": 0.005592302693578515, "grad_norm": 0.3046875, "learning_rate": 0.0019977202970955705, "loss": 0.282, "step": 3154 }, { "epoch": 0.005595848858888331, "grad_norm": 3.296875, "learning_rate": 0.001997716061523601, "loss": 0.4333, "step": 3156 }, { "epoch": 0.005599395024198145, "grad_norm": 1.515625, "learning_rate": 0.001997711822025543, "loss": 0.4657, "step": 3158 }, { "epoch": 0.005602941189507961, "grad_norm": 0.50390625, "learning_rate": 0.0019977075786014147, "loss": 0.2869, "step": 3160 }, { "epoch": 0.005606487354817776, "grad_norm": 0.6875, "learning_rate": 0.0019977033312512348, "loss": 0.2784, "step": 3162 }, { "epoch": 0.005610033520127591, "grad_norm": 0.78515625, "learning_rate": 0.0019976990799750217, "loss": 0.3246, "step": 3164 }, { "epoch": 0.005613579685437406, "grad_norm": 0.3828125, "learning_rate": 0.001997694824772795, "loss": 0.2738, "step": 3166 }, { "epoch": 0.0056171258507472215, "grad_norm": 0.6484375, "learning_rate": 0.0019976905656445727, "loss": 0.2708, "step": 3168 }, { "epoch": 0.005620672016057037, "grad_norm": 1.34375, "learning_rate": 0.0019976863025903723, "loss": 0.4467, "step": 3170 }, { "epoch": 0.0056242181813668515, "grad_norm": 0.1953125, "learning_rate": 0.001997682035610214, "loss": 0.2979, "step": 3172 }, { "epoch": 0.005627764346676667, "grad_norm": 0.1767578125, "learning_rate": 0.0019976777647041162, "loss": 0.207, "step": 3174 }, { "epoch": 0.005631310511986482, "grad_norm": 0.58203125, "learning_rate": 0.001997673489872097, "loss": 0.3519, "step": 3176 }, { "epoch": 0.005634856677296297, "grad_norm": 0.474609375, "learning_rate": 0.0019976692111141753, "loss": 0.2799, "step": 3178 }, { "epoch": 0.005638402842606112, "grad_norm": 0.357421875, "learning_rate": 0.0019976649284303705, "loss": 0.2251, "step": 3180 }, { "epoch": 0.005641949007915928, "grad_norm": 0.2197265625, "learning_rate": 0.0019976606418207004, "loss": 0.3615, "step": 3182 }, { "epoch": 0.005645495173225742, "grad_norm": 0.3828125, "learning_rate": 0.0019976563512851837, "loss": 0.342, "step": 3184 }, { "epoch": 0.005649041338535558, "grad_norm": 1.5234375, "learning_rate": 0.00199765205682384, "loss": 0.3305, "step": 3186 }, { "epoch": 0.005652587503845373, "grad_norm": 0.69921875, "learning_rate": 0.001997647758436687, "loss": 0.2968, "step": 3188 }, { "epoch": 0.0056561336691551885, "grad_norm": 0.4375, "learning_rate": 0.0019976434561237446, "loss": 0.3484, "step": 3190 }, { "epoch": 0.005659679834465003, "grad_norm": 0.404296875, "learning_rate": 0.001997639149885031, "loss": 0.2637, "step": 3192 }, { "epoch": 0.0056632259997748185, "grad_norm": 0.35546875, "learning_rate": 0.001997634839720565, "loss": 0.3236, "step": 3194 }, { "epoch": 0.005666772165084634, "grad_norm": 0.41015625, "learning_rate": 0.0019976305256303663, "loss": 0.3569, "step": 3196 }, { "epoch": 0.0056703183303944485, "grad_norm": 4.46875, "learning_rate": 0.0019976262076144523, "loss": 0.2971, "step": 3198 }, { "epoch": 0.005673864495704264, "grad_norm": 1.3515625, "learning_rate": 0.001997621885672843, "loss": 0.3112, "step": 3200 }, { "epoch": 0.005677410661014079, "grad_norm": 1.6953125, "learning_rate": 0.001997617559805557, "loss": 0.5851, "step": 3202 }, { "epoch": 0.005680956826323895, "grad_norm": 1.5625, "learning_rate": 0.001997613230012613, "loss": 0.3979, "step": 3204 }, { "epoch": 0.005684502991633709, "grad_norm": 0.765625, "learning_rate": 0.001997608896294031, "loss": 0.4134, "step": 3206 }, { "epoch": 0.005688049156943525, "grad_norm": 0.5390625, "learning_rate": 0.001997604558649828, "loss": 0.3292, "step": 3208 }, { "epoch": 0.00569159532225334, "grad_norm": 0.41015625, "learning_rate": 0.001997600217080024, "loss": 0.3304, "step": 3210 }, { "epoch": 0.005695141487563155, "grad_norm": 0.26953125, "learning_rate": 0.0019975958715846387, "loss": 0.3288, "step": 3212 }, { "epoch": 0.00569868765287297, "grad_norm": 0.796875, "learning_rate": 0.0019975915221636903, "loss": 0.3124, "step": 3214 }, { "epoch": 0.0057022338181827855, "grad_norm": 0.263671875, "learning_rate": 0.001997587168817198, "loss": 0.2574, "step": 3216 }, { "epoch": 0.0057057799834926, "grad_norm": 0.27734375, "learning_rate": 0.0019975828115451804, "loss": 0.3256, "step": 3218 }, { "epoch": 0.0057093261488024155, "grad_norm": 2.1875, "learning_rate": 0.0019975784503476575, "loss": 0.3901, "step": 3220 }, { "epoch": 0.005712872314112231, "grad_norm": 2.765625, "learning_rate": 0.0019975740852246474, "loss": 0.3555, "step": 3222 }, { "epoch": 0.005716418479422046, "grad_norm": 0.53515625, "learning_rate": 0.00199756971617617, "loss": 0.3528, "step": 3224 }, { "epoch": 0.005719964644731861, "grad_norm": 1.625, "learning_rate": 0.0019975653432022437, "loss": 0.3324, "step": 3226 }, { "epoch": 0.005723510810041676, "grad_norm": 0.396484375, "learning_rate": 0.001997560966302888, "loss": 0.2853, "step": 3228 }, { "epoch": 0.005727056975351492, "grad_norm": 1.6953125, "learning_rate": 0.001997556585478122, "loss": 0.3712, "step": 3230 }, { "epoch": 0.005730603140661306, "grad_norm": 0.3828125, "learning_rate": 0.001997552200727965, "loss": 0.3184, "step": 3232 }, { "epoch": 0.005734149305971122, "grad_norm": 0.390625, "learning_rate": 0.001997547812052436, "loss": 0.3259, "step": 3234 }, { "epoch": 0.005737695471280937, "grad_norm": 0.58203125, "learning_rate": 0.0019975434194515543, "loss": 0.3406, "step": 3236 }, { "epoch": 0.0057412416365907526, "grad_norm": 0.61328125, "learning_rate": 0.001997539022925339, "loss": 0.2699, "step": 3238 }, { "epoch": 0.005744787801900567, "grad_norm": 0.5, "learning_rate": 0.001997534622473809, "loss": 0.2815, "step": 3240 }, { "epoch": 0.0057483339672103825, "grad_norm": 0.52734375, "learning_rate": 0.0019975302180969844, "loss": 0.2411, "step": 3242 }, { "epoch": 0.005751880132520198, "grad_norm": 0.2421875, "learning_rate": 0.001997525809794884, "loss": 0.3577, "step": 3244 }, { "epoch": 0.0057554262978300125, "grad_norm": 0.46484375, "learning_rate": 0.0019975213975675266, "loss": 0.3507, "step": 3246 }, { "epoch": 0.005758972463139828, "grad_norm": 0.361328125, "learning_rate": 0.0019975169814149324, "loss": 0.2854, "step": 3248 }, { "epoch": 0.005762518628449643, "grad_norm": 0.60546875, "learning_rate": 0.00199751256133712, "loss": 0.359, "step": 3250 }, { "epoch": 0.005766064793759458, "grad_norm": 0.455078125, "learning_rate": 0.001997508137334109, "loss": 0.2627, "step": 3252 }, { "epoch": 0.005769610959069273, "grad_norm": 0.3203125, "learning_rate": 0.001997503709405919, "loss": 0.3197, "step": 3254 }, { "epoch": 0.005773157124379089, "grad_norm": 0.384765625, "learning_rate": 0.001997499277552569, "loss": 0.2707, "step": 3256 }, { "epoch": 0.005776703289688904, "grad_norm": 0.8125, "learning_rate": 0.0019974948417740782, "loss": 0.2898, "step": 3258 }, { "epoch": 0.005780249454998719, "grad_norm": 0.33984375, "learning_rate": 0.001997490402070466, "loss": 0.2489, "step": 3260 }, { "epoch": 0.005783795620308534, "grad_norm": 6.1875, "learning_rate": 0.001997485958441753, "loss": 0.3204, "step": 3262 }, { "epoch": 0.0057873417856183496, "grad_norm": 0.400390625, "learning_rate": 0.001997481510887957, "loss": 0.2608, "step": 3264 }, { "epoch": 0.005790887950928164, "grad_norm": 0.296875, "learning_rate": 0.001997477059409099, "loss": 0.3455, "step": 3266 }, { "epoch": 0.0057944341162379795, "grad_norm": 0.55078125, "learning_rate": 0.001997472604005197, "loss": 0.2793, "step": 3268 }, { "epoch": 0.005797980281547795, "grad_norm": 0.25390625, "learning_rate": 0.001997468144676271, "loss": 0.2685, "step": 3270 }, { "epoch": 0.00580152644685761, "grad_norm": 0.67578125, "learning_rate": 0.0019974636814223414, "loss": 0.302, "step": 3272 }, { "epoch": 0.005805072612167425, "grad_norm": 0.330078125, "learning_rate": 0.0019974592142434264, "loss": 0.3065, "step": 3274 }, { "epoch": 0.00580861877747724, "grad_norm": 0.419921875, "learning_rate": 0.001997454743139546, "loss": 0.2545, "step": 3276 }, { "epoch": 0.005812164942787056, "grad_norm": 0.37890625, "learning_rate": 0.0019974502681107203, "loss": 0.2476, "step": 3278 }, { "epoch": 0.00581571110809687, "grad_norm": 0.294921875, "learning_rate": 0.001997445789156968, "loss": 0.3519, "step": 3280 }, { "epoch": 0.005819257273406686, "grad_norm": 0.287109375, "learning_rate": 0.0019974413062783095, "loss": 0.3906, "step": 3282 }, { "epoch": 0.005822803438716501, "grad_norm": 0.41796875, "learning_rate": 0.0019974368194747637, "loss": 0.2944, "step": 3284 }, { "epoch": 0.005826349604026316, "grad_norm": 0.439453125, "learning_rate": 0.001997432328746351, "loss": 0.2693, "step": 3286 }, { "epoch": 0.005829895769336131, "grad_norm": 0.322265625, "learning_rate": 0.00199742783409309, "loss": 0.3522, "step": 3288 }, { "epoch": 0.0058334419346459465, "grad_norm": 0.3125, "learning_rate": 0.0019974233355150015, "loss": 0.3537, "step": 3290 }, { "epoch": 0.005836988099955762, "grad_norm": 0.341796875, "learning_rate": 0.0019974188330121045, "loss": 0.2557, "step": 3292 }, { "epoch": 0.0058405342652655765, "grad_norm": 0.6953125, "learning_rate": 0.0019974143265844187, "loss": 0.2975, "step": 3294 }, { "epoch": 0.005844080430575392, "grad_norm": 0.52734375, "learning_rate": 0.0019974098162319634, "loss": 0.2782, "step": 3296 }, { "epoch": 0.005847626595885207, "grad_norm": 0.6015625, "learning_rate": 0.001997405301954759, "loss": 0.3137, "step": 3298 }, { "epoch": 0.005851172761195022, "grad_norm": 2.28125, "learning_rate": 0.001997400783752826, "loss": 0.5785, "step": 3300 }, { "epoch": 0.005854718926504837, "grad_norm": 0.65234375, "learning_rate": 0.0019973962616261823, "loss": 0.3086, "step": 3302 }, { "epoch": 0.005858265091814653, "grad_norm": 0.447265625, "learning_rate": 0.001997391735574849, "loss": 0.3966, "step": 3304 }, { "epoch": 0.005861811257124468, "grad_norm": 0.314453125, "learning_rate": 0.0019973872055988454, "loss": 0.2754, "step": 3306 }, { "epoch": 0.005865357422434283, "grad_norm": 0.390625, "learning_rate": 0.001997382671698192, "loss": 0.3123, "step": 3308 }, { "epoch": 0.005868903587744098, "grad_norm": 0.375, "learning_rate": 0.0019973781338729073, "loss": 0.6683, "step": 3310 }, { "epoch": 0.005872449753053914, "grad_norm": 1.0859375, "learning_rate": 0.0019973735921230123, "loss": 0.3723, "step": 3312 }, { "epoch": 0.005875995918363728, "grad_norm": 0.546875, "learning_rate": 0.001997369046448526, "loss": 0.2932, "step": 3314 }, { "epoch": 0.0058795420836735435, "grad_norm": 0.6640625, "learning_rate": 0.0019973644968494693, "loss": 0.4134, "step": 3316 }, { "epoch": 0.005883088248983359, "grad_norm": 0.69921875, "learning_rate": 0.001997359943325861, "loss": 0.4393, "step": 3318 }, { "epoch": 0.0058866344142931735, "grad_norm": 0.4375, "learning_rate": 0.001997355385877722, "loss": 0.3295, "step": 3320 }, { "epoch": 0.005890180579602989, "grad_norm": 0.322265625, "learning_rate": 0.0019973508245050716, "loss": 0.3216, "step": 3322 }, { "epoch": 0.005893726744912804, "grad_norm": 0.5, "learning_rate": 0.00199734625920793, "loss": 0.3491, "step": 3324 }, { "epoch": 0.00589727291022262, "grad_norm": 0.263671875, "learning_rate": 0.0019973416899863173, "loss": 0.2758, "step": 3326 }, { "epoch": 0.005900819075532434, "grad_norm": 0.353515625, "learning_rate": 0.0019973371168402533, "loss": 0.275, "step": 3328 }, { "epoch": 0.00590436524084225, "grad_norm": 0.59375, "learning_rate": 0.0019973325397697576, "loss": 0.5971, "step": 3330 }, { "epoch": 0.005907911406152065, "grad_norm": 0.38671875, "learning_rate": 0.001997327958774851, "loss": 0.3286, "step": 3332 }, { "epoch": 0.00591145757146188, "grad_norm": 0.6875, "learning_rate": 0.0019973233738555525, "loss": 0.3469, "step": 3334 }, { "epoch": 0.005915003736771695, "grad_norm": 0.404296875, "learning_rate": 0.0019973187850118833, "loss": 0.3213, "step": 3336 }, { "epoch": 0.0059185499020815106, "grad_norm": 1.71875, "learning_rate": 0.001997314192243863, "loss": 0.4108, "step": 3338 }, { "epoch": 0.005922096067391325, "grad_norm": 0.46875, "learning_rate": 0.0019973095955515114, "loss": 0.2918, "step": 3340 }, { "epoch": 0.0059256422327011405, "grad_norm": 0.40234375, "learning_rate": 0.001997304994934849, "loss": 0.2826, "step": 3342 }, { "epoch": 0.005929188398010956, "grad_norm": 0.283203125, "learning_rate": 0.0019973003903938956, "loss": 0.305, "step": 3344 }, { "epoch": 0.005932734563320771, "grad_norm": 0.330078125, "learning_rate": 0.0019972957819286716, "loss": 0.288, "step": 3346 }, { "epoch": 0.005936280728630586, "grad_norm": 0.28515625, "learning_rate": 0.0019972911695391973, "loss": 0.2985, "step": 3348 }, { "epoch": 0.005939826893940401, "grad_norm": 4.03125, "learning_rate": 0.0019972865532254924, "loss": 0.3797, "step": 3350 }, { "epoch": 0.005943373059250217, "grad_norm": 0.349609375, "learning_rate": 0.0019972819329875774, "loss": 0.2655, "step": 3352 }, { "epoch": 0.005946919224560031, "grad_norm": 0.2734375, "learning_rate": 0.001997277308825472, "loss": 0.2296, "step": 3354 }, { "epoch": 0.005950465389869847, "grad_norm": 0.6796875, "learning_rate": 0.0019972726807391977, "loss": 0.3465, "step": 3356 }, { "epoch": 0.005954011555179662, "grad_norm": 0.396484375, "learning_rate": 0.001997268048728773, "loss": 0.268, "step": 3358 }, { "epoch": 0.005957557720489478, "grad_norm": 0.28515625, "learning_rate": 0.00199726341279422, "loss": 0.2914, "step": 3360 }, { "epoch": 0.005961103885799292, "grad_norm": 0.330078125, "learning_rate": 0.001997258772935557, "loss": 0.2261, "step": 3362 }, { "epoch": 0.0059646500511091076, "grad_norm": 0.55078125, "learning_rate": 0.001997254129152806, "loss": 0.3199, "step": 3364 }, { "epoch": 0.005968196216418923, "grad_norm": 0.53515625, "learning_rate": 0.0019972494814459864, "loss": 0.2794, "step": 3366 }, { "epoch": 0.0059717423817287375, "grad_norm": 0.416015625, "learning_rate": 0.001997244829815119, "loss": 0.3171, "step": 3368 }, { "epoch": 0.005975288547038553, "grad_norm": 0.34765625, "learning_rate": 0.0019972401742602234, "loss": 0.3427, "step": 3370 }, { "epoch": 0.005978834712348368, "grad_norm": 0.6171875, "learning_rate": 0.001997235514781321, "loss": 0.2825, "step": 3372 }, { "epoch": 0.005982380877658183, "grad_norm": 0.69921875, "learning_rate": 0.0019972308513784313, "loss": 0.2863, "step": 3374 }, { "epoch": 0.005985927042967998, "grad_norm": 0.265625, "learning_rate": 0.001997226184051575, "loss": 0.2753, "step": 3376 }, { "epoch": 0.005989473208277814, "grad_norm": 0.53515625, "learning_rate": 0.0019972215128007727, "loss": 0.2723, "step": 3378 }, { "epoch": 0.005993019373587629, "grad_norm": 2.828125, "learning_rate": 0.0019972168376260445, "loss": 0.4631, "step": 3380 }, { "epoch": 0.005996565538897444, "grad_norm": 2.296875, "learning_rate": 0.0019972121585274116, "loss": 0.4052, "step": 3382 }, { "epoch": 0.006000111704207259, "grad_norm": 0.515625, "learning_rate": 0.0019972074755048932, "loss": 0.3341, "step": 3384 }, { "epoch": 0.006003657869517075, "grad_norm": 0.275390625, "learning_rate": 0.0019972027885585106, "loss": 0.2874, "step": 3386 }, { "epoch": 0.006007204034826889, "grad_norm": 0.39453125, "learning_rate": 0.0019971980976882845, "loss": 0.2982, "step": 3388 }, { "epoch": 0.0060107502001367046, "grad_norm": 0.33984375, "learning_rate": 0.001997193402894235, "loss": 0.2858, "step": 3390 }, { "epoch": 0.00601429636544652, "grad_norm": 0.4140625, "learning_rate": 0.001997188704176382, "loss": 0.2857, "step": 3392 }, { "epoch": 0.006017842530756335, "grad_norm": 0.353515625, "learning_rate": 0.0019971840015347475, "loss": 0.2693, "step": 3394 }, { "epoch": 0.00602138869606615, "grad_norm": 0.45703125, "learning_rate": 0.001997179294969351, "loss": 0.2423, "step": 3396 }, { "epoch": 0.006024934861375965, "grad_norm": 0.416015625, "learning_rate": 0.001997174584480214, "loss": 0.2608, "step": 3398 }, { "epoch": 0.006028481026685781, "grad_norm": 0.27734375, "learning_rate": 0.001997169870067356, "loss": 0.2955, "step": 3400 }, { "epoch": 0.006032027191995595, "grad_norm": 0.91796875, "learning_rate": 0.001997165151730798, "loss": 0.3376, "step": 3402 }, { "epoch": 0.006035573357305411, "grad_norm": 0.306640625, "learning_rate": 0.0019971604294705607, "loss": 0.3282, "step": 3404 }, { "epoch": 0.006039119522615226, "grad_norm": 0.2470703125, "learning_rate": 0.001997155703286665, "loss": 0.2785, "step": 3406 }, { "epoch": 0.006042665687925041, "grad_norm": 0.396484375, "learning_rate": 0.0019971509731791315, "loss": 0.3063, "step": 3408 }, { "epoch": 0.006046211853234856, "grad_norm": 0.38671875, "learning_rate": 0.0019971462391479805, "loss": 0.2914, "step": 3410 }, { "epoch": 0.006049758018544672, "grad_norm": 2.375, "learning_rate": 0.001997141501193233, "loss": 0.5058, "step": 3412 }, { "epoch": 0.006053304183854487, "grad_norm": 0.4296875, "learning_rate": 0.00199713675931491, "loss": 0.3155, "step": 3414 }, { "epoch": 0.0060568503491643015, "grad_norm": 0.333984375, "learning_rate": 0.001997132013513032, "loss": 0.3373, "step": 3416 }, { "epoch": 0.006060396514474117, "grad_norm": 0.359375, "learning_rate": 0.0019971272637876194, "loss": 0.3214, "step": 3418 }, { "epoch": 0.006063942679783932, "grad_norm": 0.375, "learning_rate": 0.001997122510138693, "loss": 0.4549, "step": 3420 }, { "epoch": 0.006067488845093747, "grad_norm": 0.7890625, "learning_rate": 0.0019971177525662746, "loss": 0.3052, "step": 3422 }, { "epoch": 0.006071035010403562, "grad_norm": 0.451171875, "learning_rate": 0.0019971129910703834, "loss": 0.3794, "step": 3424 }, { "epoch": 0.006074581175713378, "grad_norm": 0.5625, "learning_rate": 0.0019971082256510417, "loss": 0.2553, "step": 3426 }, { "epoch": 0.006078127341023193, "grad_norm": 0.82421875, "learning_rate": 0.00199710345630827, "loss": 0.2827, "step": 3428 }, { "epoch": 0.006081673506333008, "grad_norm": 0.51953125, "learning_rate": 0.0019970986830420883, "loss": 0.2815, "step": 3430 }, { "epoch": 0.006085219671642823, "grad_norm": 0.322265625, "learning_rate": 0.001997093905852518, "loss": 0.2938, "step": 3432 }, { "epoch": 0.006088765836952639, "grad_norm": 1.4375, "learning_rate": 0.0019970891247395803, "loss": 0.2535, "step": 3434 }, { "epoch": 0.006092312002262453, "grad_norm": 0.33984375, "learning_rate": 0.0019970843397032955, "loss": 0.2621, "step": 3436 }, { "epoch": 0.006095858167572269, "grad_norm": 0.61328125, "learning_rate": 0.0019970795507436856, "loss": 0.3499, "step": 3438 }, { "epoch": 0.006099404332882084, "grad_norm": 0.828125, "learning_rate": 0.0019970747578607704, "loss": 0.3129, "step": 3440 }, { "epoch": 0.0061029504981918985, "grad_norm": 0.369140625, "learning_rate": 0.001997069961054571, "loss": 0.3029, "step": 3442 }, { "epoch": 0.006106496663501714, "grad_norm": 0.333984375, "learning_rate": 0.001997065160325109, "loss": 0.2923, "step": 3444 }, { "epoch": 0.006110042828811529, "grad_norm": 0.65625, "learning_rate": 0.0019970603556724053, "loss": 0.3006, "step": 3446 }, { "epoch": 0.006113588994121345, "grad_norm": 0.66015625, "learning_rate": 0.0019970555470964803, "loss": 0.3404, "step": 3448 }, { "epoch": 0.006117135159431159, "grad_norm": 0.48828125, "learning_rate": 0.001997050734597356, "loss": 0.2949, "step": 3450 }, { "epoch": 0.006120681324740975, "grad_norm": 0.640625, "learning_rate": 0.001997045918175052, "loss": 0.3468, "step": 3452 }, { "epoch": 0.00612422749005079, "grad_norm": 1.78125, "learning_rate": 0.001997041097829591, "loss": 0.4974, "step": 3454 }, { "epoch": 0.006127773655360605, "grad_norm": 0.318359375, "learning_rate": 0.001997036273560992, "loss": 0.2368, "step": 3456 }, { "epoch": 0.00613131982067042, "grad_norm": 0.9296875, "learning_rate": 0.001997031445369279, "loss": 0.3351, "step": 3458 }, { "epoch": 0.006134865985980236, "grad_norm": 1.40625, "learning_rate": 0.0019970266132544705, "loss": 0.2688, "step": 3460 }, { "epoch": 0.006138412151290051, "grad_norm": 0.248046875, "learning_rate": 0.001997021777216589, "loss": 0.2481, "step": 3462 }, { "epoch": 0.0061419583165998656, "grad_norm": 1.0703125, "learning_rate": 0.0019970169372556554, "loss": 0.3255, "step": 3464 }, { "epoch": 0.006145504481909681, "grad_norm": 0.46875, "learning_rate": 0.001997012093371691, "loss": 0.3865, "step": 3466 }, { "epoch": 0.006149050647219496, "grad_norm": 0.380859375, "learning_rate": 0.001997007245564716, "loss": 0.2771, "step": 3468 }, { "epoch": 0.006152596812529311, "grad_norm": 9.25, "learning_rate": 0.001997002393834753, "loss": 0.3283, "step": 3470 }, { "epoch": 0.006156142977839126, "grad_norm": 0.51953125, "learning_rate": 0.001996997538181822, "loss": 0.3164, "step": 3472 }, { "epoch": 0.006159689143148942, "grad_norm": 0.44140625, "learning_rate": 0.0019969926786059453, "loss": 0.3194, "step": 3474 }, { "epoch": 0.006163235308458756, "grad_norm": 1.25, "learning_rate": 0.0019969878151071432, "loss": 0.2693, "step": 3476 }, { "epoch": 0.006166781473768572, "grad_norm": 0.35546875, "learning_rate": 0.001996982947685438, "loss": 0.2238, "step": 3478 }, { "epoch": 0.006170327639078387, "grad_norm": 0.8125, "learning_rate": 0.00199697807634085, "loss": 0.3017, "step": 3480 }, { "epoch": 0.006173873804388203, "grad_norm": 1.8203125, "learning_rate": 0.001996973201073401, "loss": 0.3029, "step": 3482 }, { "epoch": 0.006177419969698017, "grad_norm": 0.26953125, "learning_rate": 0.001996968321883112, "loss": 0.3359, "step": 3484 }, { "epoch": 0.006180966135007833, "grad_norm": 0.6328125, "learning_rate": 0.001996963438770005, "loss": 0.2878, "step": 3486 }, { "epoch": 0.006184512300317648, "grad_norm": 0.875, "learning_rate": 0.0019969585517341007, "loss": 0.321, "step": 3488 }, { "epoch": 0.0061880584656274626, "grad_norm": 0.40234375, "learning_rate": 0.0019969536607754215, "loss": 0.2913, "step": 3490 }, { "epoch": 0.006191604630937278, "grad_norm": 0.34765625, "learning_rate": 0.0019969487658939872, "loss": 0.2863, "step": 3492 }, { "epoch": 0.006195150796247093, "grad_norm": 0.77734375, "learning_rate": 0.00199694386708982, "loss": 0.5539, "step": 3494 }, { "epoch": 0.006198696961556909, "grad_norm": 0.56640625, "learning_rate": 0.0019969389643629413, "loss": 0.2408, "step": 3496 }, { "epoch": 0.006202243126866723, "grad_norm": 0.515625, "learning_rate": 0.001996934057713373, "loss": 0.2953, "step": 3498 }, { "epoch": 0.006205789292176539, "grad_norm": 0.66015625, "learning_rate": 0.0019969291471411354, "loss": 0.3432, "step": 3500 }, { "epoch": 0.006209335457486354, "grad_norm": 0.357421875, "learning_rate": 0.0019969242326462514, "loss": 0.342, "step": 3502 }, { "epoch": 0.006212881622796169, "grad_norm": 0.326171875, "learning_rate": 0.0019969193142287418, "loss": 0.2239, "step": 3504 }, { "epoch": 0.006216427788105984, "grad_norm": 0.25390625, "learning_rate": 0.0019969143918886277, "loss": 0.2682, "step": 3506 }, { "epoch": 0.0062199739534158, "grad_norm": 0.53125, "learning_rate": 0.0019969094656259313, "loss": 0.3301, "step": 3508 }, { "epoch": 0.006223520118725614, "grad_norm": 0.51953125, "learning_rate": 0.0019969045354406734, "loss": 0.2506, "step": 3510 }, { "epoch": 0.00622706628403543, "grad_norm": 0.49609375, "learning_rate": 0.001996899601332877, "loss": 0.344, "step": 3512 }, { "epoch": 0.006230612449345245, "grad_norm": 0.55859375, "learning_rate": 0.001996894663302562, "loss": 0.3055, "step": 3514 }, { "epoch": 0.00623415861465506, "grad_norm": 0.70703125, "learning_rate": 0.0019968897213497507, "loss": 0.3131, "step": 3516 }, { "epoch": 0.006237704779964875, "grad_norm": 2.0625, "learning_rate": 0.001996884775474465, "loss": 0.298, "step": 3518 }, { "epoch": 0.00624125094527469, "grad_norm": 0.60546875, "learning_rate": 0.0019968798256767262, "loss": 0.2768, "step": 3520 }, { "epoch": 0.006244797110584506, "grad_norm": 76.0, "learning_rate": 0.001996874871956556, "loss": 0.4224, "step": 3522 }, { "epoch": 0.00624834327589432, "grad_norm": 0.51953125, "learning_rate": 0.001996869914313976, "loss": 0.2167, "step": 3524 }, { "epoch": 0.006251889441204136, "grad_norm": 0.3125, "learning_rate": 0.0019968649527490083, "loss": 0.2859, "step": 3526 }, { "epoch": 0.006255435606513951, "grad_norm": 0.4765625, "learning_rate": 0.001996859987261674, "loss": 0.2549, "step": 3528 }, { "epoch": 0.006258981771823767, "grad_norm": 1.546875, "learning_rate": 0.001996855017851995, "loss": 0.3599, "step": 3530 }, { "epoch": 0.006262527937133581, "grad_norm": 0.29296875, "learning_rate": 0.0019968500445199933, "loss": 0.2769, "step": 3532 }, { "epoch": 0.006266074102443397, "grad_norm": 0.234375, "learning_rate": 0.0019968450672656905, "loss": 0.3375, "step": 3534 }, { "epoch": 0.006269620267753212, "grad_norm": 0.6484375, "learning_rate": 0.0019968400860891082, "loss": 0.307, "step": 3536 }, { "epoch": 0.006273166433063027, "grad_norm": 0.5078125, "learning_rate": 0.001996835100990268, "loss": 0.4704, "step": 3538 }, { "epoch": 0.006276712598372842, "grad_norm": 0.78125, "learning_rate": 0.0019968301119691924, "loss": 0.2594, "step": 3540 }, { "epoch": 0.006280258763682657, "grad_norm": 0.51171875, "learning_rate": 0.0019968251190259027, "loss": 0.2916, "step": 3542 }, { "epoch": 0.006283804928992472, "grad_norm": 1.671875, "learning_rate": 0.001996820122160421, "loss": 0.664, "step": 3544 }, { "epoch": 0.006287351094302287, "grad_norm": 2.078125, "learning_rate": 0.001996815121372769, "loss": 0.304, "step": 3546 }, { "epoch": 0.006290897259612103, "grad_norm": 0.57421875, "learning_rate": 0.0019968101166629683, "loss": 0.2742, "step": 3548 }, { "epoch": 0.006294443424921918, "grad_norm": 15.6875, "learning_rate": 0.001996805108031041, "loss": 0.3182, "step": 3550 }, { "epoch": 0.006297989590231733, "grad_norm": 1.6875, "learning_rate": 0.00199680009547701, "loss": 0.4785, "step": 3552 }, { "epoch": 0.006301535755541548, "grad_norm": 2.375, "learning_rate": 0.0019967950790008952, "loss": 0.4236, "step": 3554 }, { "epoch": 0.006305081920851364, "grad_norm": 0.5390625, "learning_rate": 0.0019967900586027204, "loss": 0.2976, "step": 3556 }, { "epoch": 0.006308628086161178, "grad_norm": 2.90625, "learning_rate": 0.0019967850342825066, "loss": 0.3757, "step": 3558 }, { "epoch": 0.006312174251470994, "grad_norm": 0.328125, "learning_rate": 0.0019967800060402756, "loss": 0.1831, "step": 3560 }, { "epoch": 0.006315720416780809, "grad_norm": 0.56640625, "learning_rate": 0.0019967749738760503, "loss": 0.2324, "step": 3562 }, { "epoch": 0.006319266582090624, "grad_norm": 0.3671875, "learning_rate": 0.0019967699377898517, "loss": 0.3373, "step": 3564 }, { "epoch": 0.006322812747400439, "grad_norm": 1.984375, "learning_rate": 0.0019967648977817025, "loss": 0.2453, "step": 3566 }, { "epoch": 0.006326358912710254, "grad_norm": 0.416015625, "learning_rate": 0.0019967598538516247, "loss": 0.3425, "step": 3568 }, { "epoch": 0.00632990507802007, "grad_norm": 0.40234375, "learning_rate": 0.00199675480599964, "loss": 0.2809, "step": 3570 }, { "epoch": 0.006333451243329884, "grad_norm": 1.3203125, "learning_rate": 0.0019967497542257707, "loss": 0.3548, "step": 3572 }, { "epoch": 0.0063369974086397, "grad_norm": 0.478515625, "learning_rate": 0.001996744698530039, "loss": 0.3291, "step": 3574 }, { "epoch": 0.006340543573949515, "grad_norm": 0.50390625, "learning_rate": 0.0019967396389124667, "loss": 0.2642, "step": 3576 }, { "epoch": 0.00634408973925933, "grad_norm": 0.8828125, "learning_rate": 0.001996734575373076, "loss": 0.2874, "step": 3578 }, { "epoch": 0.006347635904569145, "grad_norm": 0.43359375, "learning_rate": 0.0019967295079118897, "loss": 0.3389, "step": 3580 }, { "epoch": 0.006351182069878961, "grad_norm": 0.87109375, "learning_rate": 0.0019967244365289294, "loss": 0.5039, "step": 3582 }, { "epoch": 0.006354728235188776, "grad_norm": 1.2421875, "learning_rate": 0.001996719361224217, "loss": 0.3724, "step": 3584 }, { "epoch": 0.006358274400498591, "grad_norm": 4.96875, "learning_rate": 0.0019967142819977742, "loss": 0.7329, "step": 3586 }, { "epoch": 0.006361820565808406, "grad_norm": 0.357421875, "learning_rate": 0.0019967091988496253, "loss": 0.2947, "step": 3588 }, { "epoch": 0.006365366731118221, "grad_norm": 0.373046875, "learning_rate": 0.0019967041117797905, "loss": 0.5445, "step": 3590 }, { "epoch": 0.006368912896428036, "grad_norm": 0.58984375, "learning_rate": 0.001996699020788293, "loss": 0.2339, "step": 3592 }, { "epoch": 0.006372459061737851, "grad_norm": 0.490234375, "learning_rate": 0.0019966939258751547, "loss": 0.3004, "step": 3594 }, { "epoch": 0.006376005227047667, "grad_norm": 2.234375, "learning_rate": 0.0019966888270403983, "loss": 0.3147, "step": 3596 }, { "epoch": 0.006379551392357482, "grad_norm": 1.2578125, "learning_rate": 0.0019966837242840455, "loss": 0.3833, "step": 3598 }, { "epoch": 0.006383097557667297, "grad_norm": 0.73828125, "learning_rate": 0.0019966786176061195, "loss": 0.4519, "step": 3600 }, { "epoch": 0.006386643722977112, "grad_norm": 0.333984375, "learning_rate": 0.0019966735070066416, "loss": 0.2226, "step": 3602 }, { "epoch": 0.006390189888286928, "grad_norm": 0.70703125, "learning_rate": 0.0019966683924856348, "loss": 0.2774, "step": 3604 }, { "epoch": 0.006393736053596742, "grad_norm": 0.76171875, "learning_rate": 0.001996663274043121, "loss": 0.3601, "step": 3606 }, { "epoch": 0.006397282218906558, "grad_norm": 0.498046875, "learning_rate": 0.001996658151679123, "loss": 0.2697, "step": 3608 }, { "epoch": 0.006400828384216373, "grad_norm": 0.349609375, "learning_rate": 0.0019966530253936634, "loss": 0.2523, "step": 3610 }, { "epoch": 0.006404374549526188, "grad_norm": 2.875, "learning_rate": 0.001996647895186764, "loss": 0.3261, "step": 3612 }, { "epoch": 0.006407920714836003, "grad_norm": 0.478515625, "learning_rate": 0.001996642761058448, "loss": 0.2746, "step": 3614 }, { "epoch": 0.006411466880145818, "grad_norm": 0.27734375, "learning_rate": 0.001996637623008737, "loss": 0.2738, "step": 3616 }, { "epoch": 0.006415013045455634, "grad_norm": 1.1484375, "learning_rate": 0.0019966324810376536, "loss": 0.3113, "step": 3618 }, { "epoch": 0.006418559210765448, "grad_norm": 0.90234375, "learning_rate": 0.001996627335145221, "loss": 0.2843, "step": 3620 }, { "epoch": 0.006422105376075264, "grad_norm": 0.96875, "learning_rate": 0.001996622185331461, "loss": 0.2946, "step": 3622 }, { "epoch": 0.006425651541385079, "grad_norm": 1.734375, "learning_rate": 0.0019966170315963965, "loss": 0.3485, "step": 3624 }, { "epoch": 0.006429197706694894, "grad_norm": 2.046875, "learning_rate": 0.0019966118739400502, "loss": 0.4324, "step": 3626 }, { "epoch": 0.006432743872004709, "grad_norm": 0.4453125, "learning_rate": 0.001996606712362444, "loss": 0.2174, "step": 3628 }, { "epoch": 0.006436290037314525, "grad_norm": 1.5390625, "learning_rate": 0.001996601546863601, "loss": 0.3234, "step": 3630 }, { "epoch": 0.00643983620262434, "grad_norm": 0.451171875, "learning_rate": 0.0019965963774435437, "loss": 0.3029, "step": 3632 }, { "epoch": 0.006443382367934155, "grad_norm": 0.82421875, "learning_rate": 0.0019965912041022943, "loss": 0.2654, "step": 3634 }, { "epoch": 0.00644692853324397, "grad_norm": 0.53515625, "learning_rate": 0.001996586026839876, "loss": 0.2686, "step": 3636 }, { "epoch": 0.0064504746985537854, "grad_norm": 1.1796875, "learning_rate": 0.0019965808456563114, "loss": 0.2771, "step": 3638 }, { "epoch": 0.0064540208638636, "grad_norm": 0.73046875, "learning_rate": 0.001996575660551623, "loss": 0.415, "step": 3640 }, { "epoch": 0.006457567029173415, "grad_norm": 0.66015625, "learning_rate": 0.001996570471525833, "loss": 0.4476, "step": 3642 }, { "epoch": 0.006461113194483231, "grad_norm": 2.5, "learning_rate": 0.001996565278578965, "loss": 0.495, "step": 3644 }, { "epoch": 0.006464659359793045, "grad_norm": 0.55859375, "learning_rate": 0.001996560081711041, "loss": 0.2976, "step": 3646 }, { "epoch": 0.006468205525102861, "grad_norm": 1.0, "learning_rate": 0.001996554880922084, "loss": 0.2428, "step": 3648 }, { "epoch": 0.006471751690412676, "grad_norm": 0.90234375, "learning_rate": 0.001996549676212117, "loss": 0.5618, "step": 3650 }, { "epoch": 0.006475297855722492, "grad_norm": 0.396484375, "learning_rate": 0.0019965444675811624, "loss": 0.2521, "step": 3652 }, { "epoch": 0.006478844021032306, "grad_norm": 1.234375, "learning_rate": 0.001996539255029243, "loss": 0.3476, "step": 3654 }, { "epoch": 0.006482390186342122, "grad_norm": 0.625, "learning_rate": 0.0019965340385563815, "loss": 0.2919, "step": 3656 }, { "epoch": 0.006485936351651937, "grad_norm": 0.83203125, "learning_rate": 0.001996528818162601, "loss": 0.37, "step": 3658 }, { "epoch": 0.006489482516961752, "grad_norm": 0.578125, "learning_rate": 0.001996523593847924, "loss": 0.3794, "step": 3660 }, { "epoch": 0.006493028682271567, "grad_norm": 0.453125, "learning_rate": 0.001996518365612374, "loss": 0.2594, "step": 3662 }, { "epoch": 0.0064965748475813824, "grad_norm": 0.46484375, "learning_rate": 0.0019965131334559734, "loss": 0.4004, "step": 3664 }, { "epoch": 0.006500121012891198, "grad_norm": 0.7734375, "learning_rate": 0.001996507897378745, "loss": 0.321, "step": 3666 }, { "epoch": 0.006503667178201012, "grad_norm": 0.69140625, "learning_rate": 0.0019965026573807113, "loss": 0.3256, "step": 3668 }, { "epoch": 0.006507213343510828, "grad_norm": 0.5234375, "learning_rate": 0.001996497413461896, "loss": 0.2443, "step": 3670 }, { "epoch": 0.006510759508820643, "grad_norm": 1.6484375, "learning_rate": 0.0019964921656223226, "loss": 0.5052, "step": 3672 }, { "epoch": 0.006514305674130458, "grad_norm": 0.4140625, "learning_rate": 0.001996486913862012, "loss": 0.3542, "step": 3674 }, { "epoch": 0.006517851839440273, "grad_norm": 0.365234375, "learning_rate": 0.0019964816581809893, "loss": 0.2876, "step": 3676 }, { "epoch": 0.006521398004750089, "grad_norm": 0.404296875, "learning_rate": 0.0019964763985792764, "loss": 0.3216, "step": 3678 }, { "epoch": 0.006524944170059903, "grad_norm": 0.8515625, "learning_rate": 0.0019964711350568963, "loss": 0.3296, "step": 3680 }, { "epoch": 0.006528490335369719, "grad_norm": 0.287109375, "learning_rate": 0.001996465867613872, "loss": 0.2429, "step": 3682 }, { "epoch": 0.006532036500679534, "grad_norm": 0.71484375, "learning_rate": 0.001996460596250227, "loss": 0.2976, "step": 3684 }, { "epoch": 0.0065355826659893495, "grad_norm": 0.35546875, "learning_rate": 0.0019964553209659837, "loss": 0.247, "step": 3686 }, { "epoch": 0.006539128831299164, "grad_norm": 0.31640625, "learning_rate": 0.0019964500417611656, "loss": 0.2985, "step": 3688 }, { "epoch": 0.006542674996608979, "grad_norm": 0.427734375, "learning_rate": 0.001996444758635796, "loss": 0.3057, "step": 3690 }, { "epoch": 0.006546221161918795, "grad_norm": 1.859375, "learning_rate": 0.001996439471589898, "loss": 0.2865, "step": 3692 }, { "epoch": 0.006549767327228609, "grad_norm": 1.1796875, "learning_rate": 0.001996434180623494, "loss": 0.3899, "step": 3694 }, { "epoch": 0.006553313492538425, "grad_norm": 1.375, "learning_rate": 0.001996428885736608, "loss": 0.5637, "step": 3696 }, { "epoch": 0.00655685965784824, "grad_norm": 0.5234375, "learning_rate": 0.0019964235869292623, "loss": 0.331, "step": 3698 }, { "epoch": 0.006560405823158056, "grad_norm": 0.81640625, "learning_rate": 0.0019964182842014807, "loss": 0.2936, "step": 3700 }, { "epoch": 0.00656395198846787, "grad_norm": 0.46875, "learning_rate": 0.0019964129775532865, "loss": 0.2663, "step": 3702 }, { "epoch": 0.006567498153777686, "grad_norm": 0.98828125, "learning_rate": 0.0019964076669847022, "loss": 0.3022, "step": 3704 }, { "epoch": 0.006571044319087501, "grad_norm": 1.46875, "learning_rate": 0.0019964023524957518, "loss": 0.3109, "step": 3706 }, { "epoch": 0.006574590484397316, "grad_norm": 0.234375, "learning_rate": 0.001996397034086458, "loss": 0.3486, "step": 3708 }, { "epoch": 0.006578136649707131, "grad_norm": 0.369140625, "learning_rate": 0.001996391711756844, "loss": 0.3987, "step": 3710 }, { "epoch": 0.0065816828150169465, "grad_norm": 1.265625, "learning_rate": 0.001996386385506934, "loss": 0.3227, "step": 3712 }, { "epoch": 0.006585228980326761, "grad_norm": 0.36328125, "learning_rate": 0.00199638105533675, "loss": 0.3167, "step": 3714 }, { "epoch": 0.006588775145636576, "grad_norm": 0.494140625, "learning_rate": 0.0019963757212463165, "loss": 0.2572, "step": 3716 }, { "epoch": 0.006592321310946392, "grad_norm": 0.953125, "learning_rate": 0.0019963703832356562, "loss": 0.5886, "step": 3718 }, { "epoch": 0.006595867476256207, "grad_norm": 0.349609375, "learning_rate": 0.0019963650413047924, "loss": 0.2266, "step": 3720 }, { "epoch": 0.006599413641566022, "grad_norm": 1.40625, "learning_rate": 0.0019963596954537485, "loss": 0.4081, "step": 3722 }, { "epoch": 0.006602959806875837, "grad_norm": 0.5546875, "learning_rate": 0.001996354345682548, "loss": 0.2733, "step": 3724 }, { "epoch": 0.006606505972185653, "grad_norm": 0.96484375, "learning_rate": 0.0019963489919912142, "loss": 0.2669, "step": 3726 }, { "epoch": 0.006610052137495467, "grad_norm": 0.400390625, "learning_rate": 0.0019963436343797703, "loss": 0.2175, "step": 3728 }, { "epoch": 0.006613598302805283, "grad_norm": 0.251953125, "learning_rate": 0.001996338272848241, "loss": 0.2582, "step": 3730 }, { "epoch": 0.006617144468115098, "grad_norm": 0.359375, "learning_rate": 0.001996332907396648, "loss": 0.2588, "step": 3732 }, { "epoch": 0.0066206906334249135, "grad_norm": 0.412109375, "learning_rate": 0.001996327538025015, "loss": 0.2742, "step": 3734 }, { "epoch": 0.006624236798734728, "grad_norm": 0.55859375, "learning_rate": 0.0019963221647333667, "loss": 0.2768, "step": 3736 }, { "epoch": 0.0066277829640445434, "grad_norm": 0.361328125, "learning_rate": 0.001996316787521726, "loss": 0.2891, "step": 3738 }, { "epoch": 0.006631329129354359, "grad_norm": 0.765625, "learning_rate": 0.001996311406390116, "loss": 0.3709, "step": 3740 }, { "epoch": 0.006634875294664173, "grad_norm": 2.40625, "learning_rate": 0.0019963060213385605, "loss": 0.2232, "step": 3742 }, { "epoch": 0.006638421459973989, "grad_norm": 0.890625, "learning_rate": 0.0019963006323670835, "loss": 0.2206, "step": 3744 }, { "epoch": 0.006641967625283804, "grad_norm": 0.306640625, "learning_rate": 0.001996295239475708, "loss": 0.3767, "step": 3746 }, { "epoch": 0.006645513790593619, "grad_norm": 3.140625, "learning_rate": 0.0019962898426644574, "loss": 0.4518, "step": 3748 }, { "epoch": 0.006649059955903434, "grad_norm": 1.15625, "learning_rate": 0.001996284441933356, "loss": 0.368, "step": 3750 }, { "epoch": 0.00665260612121325, "grad_norm": 0.55859375, "learning_rate": 0.001996279037282427, "loss": 0.3004, "step": 3752 }, { "epoch": 0.006656152286523065, "grad_norm": 0.6640625, "learning_rate": 0.0019962736287116936, "loss": 0.3034, "step": 3754 }, { "epoch": 0.00665969845183288, "grad_norm": 0.6953125, "learning_rate": 0.001996268216221181, "loss": 0.4052, "step": 3756 }, { "epoch": 0.006663244617142695, "grad_norm": 0.59765625, "learning_rate": 0.0019962627998109115, "loss": 0.2305, "step": 3758 }, { "epoch": 0.0066667907824525105, "grad_norm": 1.765625, "learning_rate": 0.0019962573794809085, "loss": 0.3102, "step": 3760 }, { "epoch": 0.006670336947762325, "grad_norm": 0.87890625, "learning_rate": 0.0019962519552311968, "loss": 0.3272, "step": 3762 }, { "epoch": 0.0066738831130721404, "grad_norm": 0.7265625, "learning_rate": 0.0019962465270617997, "loss": 0.3258, "step": 3764 }, { "epoch": 0.006677429278381956, "grad_norm": 0.80078125, "learning_rate": 0.0019962410949727408, "loss": 0.2817, "step": 3766 }, { "epoch": 0.006680975443691771, "grad_norm": 0.3984375, "learning_rate": 0.0019962356589640438, "loss": 0.3349, "step": 3768 }, { "epoch": 0.006684521609001586, "grad_norm": 0.80859375, "learning_rate": 0.001996230219035733, "loss": 0.292, "step": 3770 }, { "epoch": 0.006688067774311401, "grad_norm": 0.77734375, "learning_rate": 0.0019962247751878315, "loss": 0.3389, "step": 3772 }, { "epoch": 0.006691613939621217, "grad_norm": 0.515625, "learning_rate": 0.001996219327420364, "loss": 0.3023, "step": 3774 }, { "epoch": 0.006695160104931031, "grad_norm": 0.80859375, "learning_rate": 0.0019962138757333527, "loss": 0.3233, "step": 3776 }, { "epoch": 0.006698706270240847, "grad_norm": 0.61328125, "learning_rate": 0.0019962084201268233, "loss": 0.3144, "step": 3778 }, { "epoch": 0.006702252435550662, "grad_norm": 0.478515625, "learning_rate": 0.0019962029606007984, "loss": 0.2663, "step": 3780 }, { "epoch": 0.006705798600860477, "grad_norm": 0.59375, "learning_rate": 0.0019961974971553025, "loss": 0.2852, "step": 3782 }, { "epoch": 0.006709344766170292, "grad_norm": 0.435546875, "learning_rate": 0.0019961920297903593, "loss": 0.2436, "step": 3784 }, { "epoch": 0.0067128909314801075, "grad_norm": 0.328125, "learning_rate": 0.001996186558505993, "loss": 0.4178, "step": 3786 }, { "epoch": 0.006716437096789923, "grad_norm": 0.373046875, "learning_rate": 0.0019961810833022267, "loss": 0.3111, "step": 3788 }, { "epoch": 0.006719983262099737, "grad_norm": 0.48046875, "learning_rate": 0.0019961756041790854, "loss": 0.2805, "step": 3790 }, { "epoch": 0.006723529427409553, "grad_norm": 0.3984375, "learning_rate": 0.0019961701211365927, "loss": 0.3611, "step": 3792 }, { "epoch": 0.006727075592719368, "grad_norm": 0.302734375, "learning_rate": 0.001996164634174772, "loss": 0.2097, "step": 3794 }, { "epoch": 0.006730621758029183, "grad_norm": 0.515625, "learning_rate": 0.0019961591432936477, "loss": 0.3033, "step": 3796 }, { "epoch": 0.006734167923338998, "grad_norm": 7.34375, "learning_rate": 0.0019961536484932444, "loss": 0.2526, "step": 3798 }, { "epoch": 0.006737714088648814, "grad_norm": 1.234375, "learning_rate": 0.001996148149773585, "loss": 0.2675, "step": 3800 }, { "epoch": 0.006741260253958629, "grad_norm": 0.462890625, "learning_rate": 0.0019961426471346946, "loss": 0.3236, "step": 3802 }, { "epoch": 0.006744806419268444, "grad_norm": 0.322265625, "learning_rate": 0.0019961371405765966, "loss": 0.2906, "step": 3804 }, { "epoch": 0.006748352584578259, "grad_norm": 0.59375, "learning_rate": 0.001996131630099315, "loss": 0.3176, "step": 3806 }, { "epoch": 0.0067518987498880745, "grad_norm": 0.373046875, "learning_rate": 0.0019961261157028748, "loss": 0.354, "step": 3808 }, { "epoch": 0.006755444915197889, "grad_norm": 0.2490234375, "learning_rate": 0.001996120597387299, "loss": 0.3218, "step": 3810 }, { "epoch": 0.0067589910805077045, "grad_norm": 0.5, "learning_rate": 0.001996115075152612, "loss": 0.2484, "step": 3812 }, { "epoch": 0.00676253724581752, "grad_norm": 0.400390625, "learning_rate": 0.001996109548998839, "loss": 0.3758, "step": 3814 }, { "epoch": 0.006766083411127334, "grad_norm": 0.72265625, "learning_rate": 0.001996104018926003, "loss": 0.1925, "step": 3816 }, { "epoch": 0.00676962957643715, "grad_norm": 1.859375, "learning_rate": 0.0019960984849341284, "loss": 0.3434, "step": 3818 }, { "epoch": 0.006773175741746965, "grad_norm": 0.81640625, "learning_rate": 0.00199609294702324, "loss": 0.2872, "step": 3820 }, { "epoch": 0.006776721907056781, "grad_norm": 0.416015625, "learning_rate": 0.0019960874051933608, "loss": 0.2461, "step": 3822 }, { "epoch": 0.006780268072366595, "grad_norm": 0.40625, "learning_rate": 0.0019960818594445162, "loss": 0.2654, "step": 3824 }, { "epoch": 0.006783814237676411, "grad_norm": 1.7421875, "learning_rate": 0.00199607630977673, "loss": 0.3872, "step": 3826 }, { "epoch": 0.006787360402986226, "grad_norm": 0.357421875, "learning_rate": 0.001996070756190027, "loss": 0.2594, "step": 3828 }, { "epoch": 0.006790906568296041, "grad_norm": 0.54296875, "learning_rate": 0.0019960651986844304, "loss": 0.2353, "step": 3830 }, { "epoch": 0.006794452733605856, "grad_norm": 0.30859375, "learning_rate": 0.001996059637259965, "loss": 0.3036, "step": 3832 }, { "epoch": 0.0067979988989156715, "grad_norm": 0.271484375, "learning_rate": 0.0019960540719166555, "loss": 0.2459, "step": 3834 }, { "epoch": 0.006801545064225487, "grad_norm": 0.59375, "learning_rate": 0.001996048502654526, "loss": 0.3075, "step": 3836 }, { "epoch": 0.0068050912295353014, "grad_norm": 0.34765625, "learning_rate": 0.001996042929473601, "loss": 0.336, "step": 3838 }, { "epoch": 0.006808637394845117, "grad_norm": 0.31640625, "learning_rate": 0.0019960373523739048, "loss": 0.3007, "step": 3840 }, { "epoch": 0.006812183560154932, "grad_norm": 0.3671875, "learning_rate": 0.0019960317713554614, "loss": 0.2348, "step": 3842 }, { "epoch": 0.006815729725464747, "grad_norm": 1.15625, "learning_rate": 0.0019960261864182954, "loss": 0.3357, "step": 3844 }, { "epoch": 0.006819275890774562, "grad_norm": 0.341796875, "learning_rate": 0.0019960205975624317, "loss": 0.3053, "step": 3846 }, { "epoch": 0.006822822056084378, "grad_norm": 0.44921875, "learning_rate": 0.001996015004787894, "loss": 0.265, "step": 3848 }, { "epoch": 0.006826368221394192, "grad_norm": 0.27734375, "learning_rate": 0.0019960094080947077, "loss": 0.3206, "step": 3850 }, { "epoch": 0.006829914386704008, "grad_norm": 0.333984375, "learning_rate": 0.0019960038074828958, "loss": 0.2849, "step": 3852 }, { "epoch": 0.006833460552013823, "grad_norm": 0.5546875, "learning_rate": 0.0019959982029524844, "loss": 0.2584, "step": 3854 }, { "epoch": 0.0068370067173236385, "grad_norm": 0.341796875, "learning_rate": 0.0019959925945034975, "loss": 0.3027, "step": 3856 }, { "epoch": 0.006840552882633453, "grad_norm": 0.337890625, "learning_rate": 0.001995986982135959, "loss": 0.2491, "step": 3858 }, { "epoch": 0.0068440990479432685, "grad_norm": 1.484375, "learning_rate": 0.0019959813658498938, "loss": 0.6935, "step": 3860 }, { "epoch": 0.006847645213253084, "grad_norm": 0.75, "learning_rate": 0.001995975745645327, "loss": 0.3274, "step": 3862 }, { "epoch": 0.0068511913785628984, "grad_norm": 0.578125, "learning_rate": 0.0019959701215222824, "loss": 0.2932, "step": 3864 }, { "epoch": 0.006854737543872714, "grad_norm": 1.0859375, "learning_rate": 0.001995964493480785, "loss": 0.3154, "step": 3866 }, { "epoch": 0.006858283709182529, "grad_norm": 0.5625, "learning_rate": 0.0019959588615208594, "loss": 0.3343, "step": 3868 }, { "epoch": 0.006861829874492345, "grad_norm": 0.515625, "learning_rate": 0.00199595322564253, "loss": 0.3474, "step": 3870 }, { "epoch": 0.006865376039802159, "grad_norm": 0.5078125, "learning_rate": 0.001995947585845822, "loss": 0.3114, "step": 3872 }, { "epoch": 0.006868922205111975, "grad_norm": 2.171875, "learning_rate": 0.001995941942130759, "loss": 0.4198, "step": 3874 }, { "epoch": 0.00687246837042179, "grad_norm": 0.3671875, "learning_rate": 0.0019959362944973673, "loss": 0.2397, "step": 3876 }, { "epoch": 0.006876014535731605, "grad_norm": 0.255859375, "learning_rate": 0.0019959306429456697, "loss": 0.2356, "step": 3878 }, { "epoch": 0.00687956070104142, "grad_norm": 1.015625, "learning_rate": 0.0019959249874756924, "loss": 0.2489, "step": 3880 }, { "epoch": 0.0068831068663512355, "grad_norm": 2.359375, "learning_rate": 0.0019959193280874596, "loss": 0.4578, "step": 3882 }, { "epoch": 0.00688665303166105, "grad_norm": 0.78515625, "learning_rate": 0.0019959136647809965, "loss": 0.2947, "step": 3884 }, { "epoch": 0.0068901991969708655, "grad_norm": 0.515625, "learning_rate": 0.001995907997556327, "loss": 0.3085, "step": 3886 }, { "epoch": 0.006893745362280681, "grad_norm": 2.15625, "learning_rate": 0.001995902326413476, "loss": 0.3363, "step": 3888 }, { "epoch": 0.006897291527590496, "grad_norm": 0.55859375, "learning_rate": 0.001995896651352469, "loss": 0.2899, "step": 3890 }, { "epoch": 0.006900837692900311, "grad_norm": 0.423828125, "learning_rate": 0.0019958909723733305, "loss": 0.267, "step": 3892 }, { "epoch": 0.006904383858210126, "grad_norm": 0.9375, "learning_rate": 0.0019958852894760852, "loss": 0.3345, "step": 3894 }, { "epoch": 0.006907930023519942, "grad_norm": 0.47265625, "learning_rate": 0.001995879602660758, "loss": 0.3044, "step": 3896 }, { "epoch": 0.006911476188829756, "grad_norm": 0.275390625, "learning_rate": 0.001995873911927374, "loss": 0.2471, "step": 3898 }, { "epoch": 0.006915022354139572, "grad_norm": 0.57421875, "learning_rate": 0.0019958682172759577, "loss": 0.2842, "step": 3900 }, { "epoch": 0.006918568519449387, "grad_norm": 0.306640625, "learning_rate": 0.0019958625187065336, "loss": 0.292, "step": 3902 }, { "epoch": 0.0069221146847592025, "grad_norm": 0.416015625, "learning_rate": 0.001995856816219128, "loss": 0.2522, "step": 3904 }, { "epoch": 0.006925660850069017, "grad_norm": 0.2041015625, "learning_rate": 0.0019958511098137647, "loss": 0.2242, "step": 3906 }, { "epoch": 0.0069292070153788325, "grad_norm": 0.5859375, "learning_rate": 0.0019958453994904693, "loss": 0.2738, "step": 3908 }, { "epoch": 0.006932753180688648, "grad_norm": 0.451171875, "learning_rate": 0.0019958396852492667, "loss": 0.2446, "step": 3910 }, { "epoch": 0.0069362993459984625, "grad_norm": 2.59375, "learning_rate": 0.0019958339670901816, "loss": 0.3562, "step": 3912 }, { "epoch": 0.006939845511308278, "grad_norm": 0.83203125, "learning_rate": 0.0019958282450132387, "loss": 0.2546, "step": 3914 }, { "epoch": 0.006943391676618093, "grad_norm": 0.72265625, "learning_rate": 0.0019958225190184636, "loss": 0.2398, "step": 3916 }, { "epoch": 0.006946937841927908, "grad_norm": 0.6796875, "learning_rate": 0.001995816789105881, "loss": 0.3362, "step": 3918 }, { "epoch": 0.006950484007237723, "grad_norm": 0.486328125, "learning_rate": 0.0019958110552755165, "loss": 0.29, "step": 3920 }, { "epoch": 0.006954030172547539, "grad_norm": 0.75, "learning_rate": 0.0019958053175273944, "loss": 0.2743, "step": 3922 }, { "epoch": 0.006957576337857354, "grad_norm": 1.6875, "learning_rate": 0.0019957995758615402, "loss": 0.3565, "step": 3924 }, { "epoch": 0.006961122503167169, "grad_norm": 0.78515625, "learning_rate": 0.0019957938302779792, "loss": 0.2403, "step": 3926 }, { "epoch": 0.006964668668476984, "grad_norm": 1.0, "learning_rate": 0.0019957880807767365, "loss": 0.3427, "step": 3928 }, { "epoch": 0.0069682148337867995, "grad_norm": 0.318359375, "learning_rate": 0.0019957823273578368, "loss": 0.221, "step": 3930 }, { "epoch": 0.006971760999096614, "grad_norm": 0.255859375, "learning_rate": 0.0019957765700213057, "loss": 0.2437, "step": 3932 }, { "epoch": 0.0069753071644064295, "grad_norm": 1.3828125, "learning_rate": 0.001995770808767168, "loss": 0.3686, "step": 3934 }, { "epoch": 0.006978853329716245, "grad_norm": 0.345703125, "learning_rate": 0.001995765043595449, "loss": 0.2674, "step": 3936 }, { "epoch": 0.0069823994950260595, "grad_norm": 2.109375, "learning_rate": 0.0019957592745061745, "loss": 0.3771, "step": 3938 }, { "epoch": 0.006985945660335875, "grad_norm": 0.8515625, "learning_rate": 0.001995753501499369, "loss": 0.2991, "step": 3940 }, { "epoch": 0.00698949182564569, "grad_norm": 0.828125, "learning_rate": 0.001995747724575058, "loss": 0.357, "step": 3942 }, { "epoch": 0.006993037990955506, "grad_norm": 0.265625, "learning_rate": 0.0019957419437332665, "loss": 0.2131, "step": 3944 }, { "epoch": 0.00699658415626532, "grad_norm": 0.251953125, "learning_rate": 0.0019957361589740203, "loss": 0.3052, "step": 3946 }, { "epoch": 0.007000130321575136, "grad_norm": 0.5546875, "learning_rate": 0.0019957303702973442, "loss": 0.2233, "step": 3948 }, { "epoch": 0.007003676486884951, "grad_norm": 0.765625, "learning_rate": 0.001995724577703264, "loss": 0.3408, "step": 3950 }, { "epoch": 0.007007222652194766, "grad_norm": 1.6015625, "learning_rate": 0.001995718781191805, "loss": 0.4903, "step": 3952 }, { "epoch": 0.007010768817504581, "grad_norm": 1.109375, "learning_rate": 0.001995712980762992, "loss": 0.2679, "step": 3954 }, { "epoch": 0.0070143149828143965, "grad_norm": 0.404296875, "learning_rate": 0.001995707176416851, "loss": 0.3888, "step": 3956 }, { "epoch": 0.007017861148124212, "grad_norm": 0.88671875, "learning_rate": 0.001995701368153407, "loss": 0.2263, "step": 3958 }, { "epoch": 0.0070214073134340265, "grad_norm": 3.015625, "learning_rate": 0.0019956955559726854, "loss": 0.3457, "step": 3960 }, { "epoch": 0.007024953478743842, "grad_norm": 0.369140625, "learning_rate": 0.0019956897398747116, "loss": 0.2678, "step": 3962 }, { "epoch": 0.007028499644053657, "grad_norm": 0.427734375, "learning_rate": 0.0019956839198595117, "loss": 0.3978, "step": 3964 }, { "epoch": 0.007032045809363472, "grad_norm": 0.9375, "learning_rate": 0.00199567809592711, "loss": 0.3307, "step": 3966 }, { "epoch": 0.007035591974673287, "grad_norm": 0.474609375, "learning_rate": 0.0019956722680775324, "loss": 0.2712, "step": 3968 }, { "epoch": 0.007039138139983103, "grad_norm": 0.65625, "learning_rate": 0.0019956664363108053, "loss": 0.3092, "step": 3970 }, { "epoch": 0.007042684305292917, "grad_norm": 0.65625, "learning_rate": 0.001995660600626953, "loss": 0.2978, "step": 3972 }, { "epoch": 0.007046230470602733, "grad_norm": 0.306640625, "learning_rate": 0.0019956547610260017, "loss": 0.251, "step": 3974 }, { "epoch": 0.007049776635912548, "grad_norm": 3.265625, "learning_rate": 0.0019956489175079768, "loss": 0.5655, "step": 3976 }, { "epoch": 0.0070533228012223635, "grad_norm": 0.365234375, "learning_rate": 0.0019956430700729037, "loss": 0.2985, "step": 3978 }, { "epoch": 0.007056868966532178, "grad_norm": 0.8671875, "learning_rate": 0.001995637218720808, "loss": 0.4254, "step": 3980 }, { "epoch": 0.0070604151318419935, "grad_norm": 1.1328125, "learning_rate": 0.0019956313634517152, "loss": 0.3667, "step": 3982 }, { "epoch": 0.007063961297151809, "grad_norm": 1.078125, "learning_rate": 0.0019956255042656514, "loss": 0.3232, "step": 3984 }, { "epoch": 0.0070675074624616235, "grad_norm": 0.54296875, "learning_rate": 0.0019956196411626418, "loss": 0.2812, "step": 3986 }, { "epoch": 0.007071053627771439, "grad_norm": 0.208984375, "learning_rate": 0.001995613774142712, "loss": 0.2535, "step": 3988 }, { "epoch": 0.007074599793081254, "grad_norm": 0.474609375, "learning_rate": 0.0019956079032058876, "loss": 0.3384, "step": 3990 }, { "epoch": 0.00707814595839107, "grad_norm": 0.6640625, "learning_rate": 0.001995602028352195, "loss": 0.2275, "step": 3992 }, { "epoch": 0.007081692123700884, "grad_norm": 0.3828125, "learning_rate": 0.001995596149581659, "loss": 0.2794, "step": 3994 }, { "epoch": 0.0070852382890107, "grad_norm": 0.251953125, "learning_rate": 0.001995590266894306, "loss": 0.2846, "step": 3996 }, { "epoch": 0.007088784454320515, "grad_norm": 0.423828125, "learning_rate": 0.001995584380290161, "loss": 0.328, "step": 3998 }, { "epoch": 0.00709233061963033, "grad_norm": 0.3671875, "learning_rate": 0.0019955784897692504, "loss": 0.2343, "step": 4000 }, { "epoch": 0.007095876784940145, "grad_norm": 0.287109375, "learning_rate": 0.0019955725953315997, "loss": 0.2368, "step": 4002 }, { "epoch": 0.0070994229502499605, "grad_norm": 0.828125, "learning_rate": 0.001995566696977234, "loss": 0.5125, "step": 4004 }, { "epoch": 0.007102969115559775, "grad_norm": 0.353515625, "learning_rate": 0.0019955607947061806, "loss": 0.3207, "step": 4006 }, { "epoch": 0.0071065152808695905, "grad_norm": 0.25390625, "learning_rate": 0.001995554888518464, "loss": 0.2598, "step": 4008 }, { "epoch": 0.007110061446179406, "grad_norm": 0.97265625, "learning_rate": 0.0019955489784141108, "loss": 0.2748, "step": 4010 }, { "epoch": 0.007113607611489221, "grad_norm": 0.345703125, "learning_rate": 0.0019955430643931464, "loss": 0.251, "step": 4012 }, { "epoch": 0.007117153776799036, "grad_norm": 0.376953125, "learning_rate": 0.001995537146455597, "loss": 0.2903, "step": 4014 }, { "epoch": 0.007120699942108851, "grad_norm": 0.65625, "learning_rate": 0.001995531224601488, "loss": 0.3316, "step": 4016 }, { "epoch": 0.007124246107418667, "grad_norm": 0.328125, "learning_rate": 0.001995525298830846, "loss": 0.269, "step": 4018 }, { "epoch": 0.007127792272728481, "grad_norm": 0.478515625, "learning_rate": 0.0019955193691436964, "loss": 0.3467, "step": 4020 }, { "epoch": 0.007131338438038297, "grad_norm": 0.392578125, "learning_rate": 0.0019955134355400654, "loss": 0.2788, "step": 4022 }, { "epoch": 0.007134884603348112, "grad_norm": 0.427734375, "learning_rate": 0.0019955074980199782, "loss": 0.2938, "step": 4024 }, { "epoch": 0.0071384307686579275, "grad_norm": 0.47265625, "learning_rate": 0.001995501556583462, "loss": 0.3235, "step": 4026 }, { "epoch": 0.007141976933967742, "grad_norm": 0.328125, "learning_rate": 0.0019954956112305418, "loss": 0.2632, "step": 4028 }, { "epoch": 0.0071455230992775575, "grad_norm": 1.4765625, "learning_rate": 0.0019954896619612445, "loss": 0.4317, "step": 4030 }, { "epoch": 0.007149069264587373, "grad_norm": 0.310546875, "learning_rate": 0.0019954837087755952, "loss": 0.3593, "step": 4032 }, { "epoch": 0.0071526154298971875, "grad_norm": 0.6484375, "learning_rate": 0.0019954777516736203, "loss": 0.4007, "step": 4034 }, { "epoch": 0.007156161595207003, "grad_norm": 0.271484375, "learning_rate": 0.001995471790655346, "loss": 0.2364, "step": 4036 }, { "epoch": 0.007159707760516818, "grad_norm": 1.2109375, "learning_rate": 0.0019954658257207982, "loss": 0.3287, "step": 4038 }, { "epoch": 0.007163253925826633, "grad_norm": 0.3125, "learning_rate": 0.0019954598568700027, "loss": 0.2472, "step": 4040 }, { "epoch": 0.007166800091136448, "grad_norm": 0.5078125, "learning_rate": 0.0019954538841029865, "loss": 0.2895, "step": 4042 }, { "epoch": 0.007170346256446264, "grad_norm": 0.318359375, "learning_rate": 0.0019954479074197743, "loss": 0.305, "step": 4044 }, { "epoch": 0.007173892421756079, "grad_norm": 0.3203125, "learning_rate": 0.001995441926820394, "loss": 0.2857, "step": 4046 }, { "epoch": 0.007177438587065894, "grad_norm": 0.65234375, "learning_rate": 0.0019954359423048702, "loss": 0.3389, "step": 4048 }, { "epoch": 0.007180984752375709, "grad_norm": 0.578125, "learning_rate": 0.00199542995387323, "loss": 0.3093, "step": 4050 }, { "epoch": 0.0071845309176855245, "grad_norm": 1.8046875, "learning_rate": 0.0019954239615254995, "loss": 0.3945, "step": 4052 }, { "epoch": 0.007188077082995339, "grad_norm": 0.419921875, "learning_rate": 0.0019954179652617045, "loss": 0.2524, "step": 4054 }, { "epoch": 0.0071916232483051545, "grad_norm": 0.291015625, "learning_rate": 0.0019954119650818715, "loss": 0.3658, "step": 4056 }, { "epoch": 0.00719516941361497, "grad_norm": 0.302734375, "learning_rate": 0.001995405960986027, "loss": 0.2964, "step": 4058 }, { "epoch": 0.007198715578924785, "grad_norm": 0.21484375, "learning_rate": 0.001995399952974196, "loss": 0.2584, "step": 4060 }, { "epoch": 0.0072022617442346, "grad_norm": 1.2421875, "learning_rate": 0.0019953939410464064, "loss": 0.4, "step": 4062 }, { "epoch": 0.007205807909544415, "grad_norm": 0.310546875, "learning_rate": 0.001995387925202684, "loss": 0.2817, "step": 4064 }, { "epoch": 0.007209354074854231, "grad_norm": 0.55859375, "learning_rate": 0.0019953819054430544, "loss": 0.3777, "step": 4066 }, { "epoch": 0.007212900240164045, "grad_norm": 0.3984375, "learning_rate": 0.0019953758817675446, "loss": 0.2462, "step": 4068 }, { "epoch": 0.007216446405473861, "grad_norm": 0.375, "learning_rate": 0.001995369854176181, "loss": 0.4524, "step": 4070 }, { "epoch": 0.007219992570783676, "grad_norm": 0.203125, "learning_rate": 0.00199536382266899, "loss": 0.2715, "step": 4072 }, { "epoch": 0.007223538736093491, "grad_norm": 0.453125, "learning_rate": 0.001995357787245997, "loss": 0.2618, "step": 4074 }, { "epoch": 0.007227084901403306, "grad_norm": 0.494140625, "learning_rate": 0.0019953517479072294, "loss": 0.4218, "step": 4076 }, { "epoch": 0.0072306310667131215, "grad_norm": 0.49609375, "learning_rate": 0.0019953457046527133, "loss": 0.2579, "step": 4078 }, { "epoch": 0.007234177232022937, "grad_norm": 0.294921875, "learning_rate": 0.001995339657482475, "loss": 0.2531, "step": 4080 }, { "epoch": 0.0072377233973327515, "grad_norm": 0.421875, "learning_rate": 0.0019953336063965417, "loss": 0.2742, "step": 4082 }, { "epoch": 0.007241269562642567, "grad_norm": 0.53125, "learning_rate": 0.001995327551394939, "loss": 0.2189, "step": 4084 }, { "epoch": 0.007244815727952382, "grad_norm": 0.984375, "learning_rate": 0.0019953214924776936, "loss": 0.3908, "step": 4086 }, { "epoch": 0.007248361893262197, "grad_norm": 0.263671875, "learning_rate": 0.001995315429644832, "loss": 0.2772, "step": 4088 }, { "epoch": 0.007251908058572012, "grad_norm": 1.203125, "learning_rate": 0.001995309362896381, "loss": 0.3453, "step": 4090 }, { "epoch": 0.007255454223881828, "grad_norm": 0.859375, "learning_rate": 0.0019953032922323667, "loss": 0.3992, "step": 4092 }, { "epoch": 0.007259000389191643, "grad_norm": 0.51953125, "learning_rate": 0.001995297217652816, "loss": 0.3004, "step": 4094 }, { "epoch": 0.007262546554501458, "grad_norm": 1.1015625, "learning_rate": 0.0019952911391577554, "loss": 0.5569, "step": 4096 }, { "epoch": 0.007266092719811273, "grad_norm": 0.388671875, "learning_rate": 0.001995285056747211, "loss": 0.3472, "step": 4098 }, { "epoch": 0.0072696388851210885, "grad_norm": 0.77734375, "learning_rate": 0.00199527897042121, "loss": 0.4015, "step": 4100 }, { "epoch": 0.007273185050430903, "grad_norm": 0.365234375, "learning_rate": 0.0019952728801797786, "loss": 0.264, "step": 4102 }, { "epoch": 0.0072767312157407185, "grad_norm": 0.5546875, "learning_rate": 0.001995266786022944, "loss": 0.38, "step": 4104 }, { "epoch": 0.007280277381050534, "grad_norm": 0.259765625, "learning_rate": 0.0019952606879507324, "loss": 0.2326, "step": 4106 }, { "epoch": 0.0072838235463603485, "grad_norm": 0.53515625, "learning_rate": 0.001995254585963171, "loss": 0.2613, "step": 4108 }, { "epoch": 0.007287369711670164, "grad_norm": 0.341796875, "learning_rate": 0.0019952484800602856, "loss": 0.2423, "step": 4110 }, { "epoch": 0.007290915876979979, "grad_norm": 1.046875, "learning_rate": 0.0019952423702421034, "loss": 0.2558, "step": 4112 }, { "epoch": 0.007294462042289795, "grad_norm": 0.30859375, "learning_rate": 0.001995236256508651, "loss": 0.2847, "step": 4114 }, { "epoch": 0.007298008207599609, "grad_norm": 0.470703125, "learning_rate": 0.0019952301388599554, "loss": 0.3737, "step": 4116 }, { "epoch": 0.007301554372909425, "grad_norm": 0.30859375, "learning_rate": 0.0019952240172960434, "loss": 0.2665, "step": 4118 }, { "epoch": 0.00730510053821924, "grad_norm": 0.67578125, "learning_rate": 0.001995217891816941, "loss": 0.2976, "step": 4120 }, { "epoch": 0.007308646703529055, "grad_norm": 0.796875, "learning_rate": 0.001995211762422676, "loss": 0.3633, "step": 4122 }, { "epoch": 0.00731219286883887, "grad_norm": 0.6015625, "learning_rate": 0.001995205629113275, "loss": 0.2471, "step": 4124 }, { "epoch": 0.0073157390341486855, "grad_norm": 0.82421875, "learning_rate": 0.001995199491888764, "loss": 0.3493, "step": 4126 }, { "epoch": 0.007319285199458501, "grad_norm": 0.55078125, "learning_rate": 0.0019951933507491703, "loss": 0.2472, "step": 4128 }, { "epoch": 0.0073228313647683155, "grad_norm": 0.34765625, "learning_rate": 0.001995187205694521, "loss": 0.2807, "step": 4130 }, { "epoch": 0.007326377530078131, "grad_norm": 0.703125, "learning_rate": 0.001995181056724843, "loss": 0.2845, "step": 4132 }, { "epoch": 0.007329923695387946, "grad_norm": 0.57421875, "learning_rate": 0.0019951749038401635, "loss": 0.4949, "step": 4134 }, { "epoch": 0.007333469860697761, "grad_norm": 2.03125, "learning_rate": 0.0019951687470405083, "loss": 0.2666, "step": 4136 }, { "epoch": 0.007337016026007576, "grad_norm": 1.2890625, "learning_rate": 0.0019951625863259053, "loss": 0.3614, "step": 4138 }, { "epoch": 0.007340562191317392, "grad_norm": 0.427734375, "learning_rate": 0.001995156421696381, "loss": 0.3208, "step": 4140 }, { "epoch": 0.007344108356627206, "grad_norm": 0.255859375, "learning_rate": 0.0019951502531519627, "loss": 0.2495, "step": 4142 }, { "epoch": 0.007347654521937022, "grad_norm": 0.58984375, "learning_rate": 0.001995144080692677, "loss": 0.3306, "step": 4144 }, { "epoch": 0.007351200687246837, "grad_norm": 0.4921875, "learning_rate": 0.0019951379043185507, "loss": 0.3835, "step": 4146 }, { "epoch": 0.0073547468525566526, "grad_norm": 0.439453125, "learning_rate": 0.0019951317240296117, "loss": 0.4441, "step": 4148 }, { "epoch": 0.007358293017866467, "grad_norm": 0.6328125, "learning_rate": 0.001995125539825886, "loss": 0.2542, "step": 4150 }, { "epoch": 0.0073618391831762825, "grad_norm": 0.369140625, "learning_rate": 0.0019951193517074015, "loss": 0.3727, "step": 4152 }, { "epoch": 0.007365385348486098, "grad_norm": 0.228515625, "learning_rate": 0.0019951131596741846, "loss": 0.2044, "step": 4154 }, { "epoch": 0.0073689315137959125, "grad_norm": 0.6171875, "learning_rate": 0.0019951069637262633, "loss": 0.2646, "step": 4156 }, { "epoch": 0.007372477679105728, "grad_norm": 0.4296875, "learning_rate": 0.0019951007638636634, "loss": 0.3063, "step": 4158 }, { "epoch": 0.007376023844415543, "grad_norm": 0.80078125, "learning_rate": 0.001995094560086413, "loss": 0.5668, "step": 4160 }, { "epoch": 0.007379570009725359, "grad_norm": 0.28515625, "learning_rate": 0.001995088352394539, "loss": 0.331, "step": 4162 }, { "epoch": 0.007383116175035173, "grad_norm": 0.578125, "learning_rate": 0.0019950821407880683, "loss": 0.2038, "step": 4164 }, { "epoch": 0.007386662340344989, "grad_norm": 0.859375, "learning_rate": 0.001995075925267028, "loss": 0.275, "step": 4166 }, { "epoch": 0.007390208505654804, "grad_norm": 0.39453125, "learning_rate": 0.001995069705831446, "loss": 0.291, "step": 4168 }, { "epoch": 0.007393754670964619, "grad_norm": 0.23046875, "learning_rate": 0.0019950634824813488, "loss": 0.2342, "step": 4170 }, { "epoch": 0.007397300836274434, "grad_norm": 0.3984375, "learning_rate": 0.0019950572552167637, "loss": 0.2628, "step": 4172 }, { "epoch": 0.0074008470015842496, "grad_norm": 0.337890625, "learning_rate": 0.0019950510240377187, "loss": 0.2843, "step": 4174 }, { "epoch": 0.007404393166894064, "grad_norm": 0.54296875, "learning_rate": 0.00199504478894424, "loss": 0.3176, "step": 4176 }, { "epoch": 0.0074079393322038795, "grad_norm": 0.333984375, "learning_rate": 0.0019950385499363553, "loss": 0.2461, "step": 4178 }, { "epoch": 0.007411485497513695, "grad_norm": 0.703125, "learning_rate": 0.0019950323070140915, "loss": 0.4238, "step": 4180 }, { "epoch": 0.00741503166282351, "grad_norm": 0.265625, "learning_rate": 0.0019950260601774767, "loss": 0.1986, "step": 4182 }, { "epoch": 0.007418577828133325, "grad_norm": 0.357421875, "learning_rate": 0.0019950198094265377, "loss": 0.2889, "step": 4184 }, { "epoch": 0.00742212399344314, "grad_norm": 3.609375, "learning_rate": 0.0019950135547613023, "loss": 0.3667, "step": 4186 }, { "epoch": 0.007425670158752956, "grad_norm": 0.4140625, "learning_rate": 0.001995007296181797, "loss": 0.2688, "step": 4188 }, { "epoch": 0.00742921632406277, "grad_norm": 0.2265625, "learning_rate": 0.00199500103368805, "loss": 0.2974, "step": 4190 }, { "epoch": 0.007432762489372586, "grad_norm": 0.408203125, "learning_rate": 0.0019949947672800884, "loss": 0.3078, "step": 4192 }, { "epoch": 0.007436308654682401, "grad_norm": 0.7421875, "learning_rate": 0.0019949884969579393, "loss": 0.2632, "step": 4194 }, { "epoch": 0.007439854819992217, "grad_norm": 0.83984375, "learning_rate": 0.0019949822227216304, "loss": 0.3044, "step": 4196 }, { "epoch": 0.007443400985302031, "grad_norm": 0.64453125, "learning_rate": 0.0019949759445711895, "loss": 0.2697, "step": 4198 }, { "epoch": 0.0074469471506118465, "grad_norm": 0.341796875, "learning_rate": 0.001994969662506643, "loss": 0.2577, "step": 4200 }, { "epoch": 0.007450493315921662, "grad_norm": 0.474609375, "learning_rate": 0.00199496337652802, "loss": 0.3347, "step": 4202 }, { "epoch": 0.0074540394812314765, "grad_norm": 0.35546875, "learning_rate": 0.0019949570866353464, "loss": 0.3018, "step": 4204 }, { "epoch": 0.007457585646541292, "grad_norm": 0.5546875, "learning_rate": 0.0019949507928286505, "loss": 0.2547, "step": 4206 }, { "epoch": 0.007461131811851107, "grad_norm": 0.91796875, "learning_rate": 0.001994944495107959, "loss": 0.4746, "step": 4208 }, { "epoch": 0.007464677977160922, "grad_norm": 0.546875, "learning_rate": 0.001994938193473301, "loss": 0.2632, "step": 4210 }, { "epoch": 0.007468224142470737, "grad_norm": 0.2294921875, "learning_rate": 0.001994931887924703, "loss": 0.2509, "step": 4212 }, { "epoch": 0.007471770307780553, "grad_norm": 0.30859375, "learning_rate": 0.001994925578462193, "loss": 0.2849, "step": 4214 }, { "epoch": 0.007475316473090368, "grad_norm": 0.263671875, "learning_rate": 0.001994919265085798, "loss": 0.2497, "step": 4216 }, { "epoch": 0.007478862638400183, "grad_norm": 0.345703125, "learning_rate": 0.0019949129477955458, "loss": 0.2916, "step": 4218 }, { "epoch": 0.007482408803709998, "grad_norm": 0.26953125, "learning_rate": 0.0019949066265914648, "loss": 0.2228, "step": 4220 }, { "epoch": 0.007485954969019814, "grad_norm": 0.28125, "learning_rate": 0.0019949003014735813, "loss": 0.2561, "step": 4222 }, { "epoch": 0.007489501134329628, "grad_norm": 0.490234375, "learning_rate": 0.001994893972441924, "loss": 0.3268, "step": 4224 }, { "epoch": 0.0074930472996394435, "grad_norm": 1.4609375, "learning_rate": 0.0019948876394965207, "loss": 0.5349, "step": 4226 }, { "epoch": 0.007496593464949259, "grad_norm": 0.38671875, "learning_rate": 0.0019948813026373978, "loss": 0.4591, "step": 4228 }, { "epoch": 0.007500139630259074, "grad_norm": 0.58984375, "learning_rate": 0.0019948749618645843, "loss": 0.2819, "step": 4230 }, { "epoch": 0.007503685795568889, "grad_norm": 0.396484375, "learning_rate": 0.001994868617178108, "loss": 0.2577, "step": 4232 }, { "epoch": 0.007507231960878704, "grad_norm": 1.75, "learning_rate": 0.001994862268577996, "loss": 0.2379, "step": 4234 }, { "epoch": 0.00751077812618852, "grad_norm": 0.4140625, "learning_rate": 0.001994855916064276, "loss": 0.325, "step": 4236 }, { "epoch": 0.007514324291498334, "grad_norm": 0.267578125, "learning_rate": 0.0019948495596369756, "loss": 0.2582, "step": 4238 }, { "epoch": 0.00751787045680815, "grad_norm": 0.259765625, "learning_rate": 0.001994843199296124, "loss": 0.3427, "step": 4240 }, { "epoch": 0.007521416622117965, "grad_norm": 0.6171875, "learning_rate": 0.0019948368350417474, "loss": 0.2594, "step": 4242 }, { "epoch": 0.00752496278742778, "grad_norm": 0.482421875, "learning_rate": 0.001994830466873874, "loss": 0.2235, "step": 4244 }, { "epoch": 0.007528508952737595, "grad_norm": 0.62109375, "learning_rate": 0.0019948240947925324, "loss": 0.3591, "step": 4246 }, { "epoch": 0.0075320551180474106, "grad_norm": 0.5546875, "learning_rate": 0.00199481771879775, "loss": 0.2935, "step": 4248 }, { "epoch": 0.007535601283357226, "grad_norm": 0.333984375, "learning_rate": 0.0019948113388895544, "loss": 0.249, "step": 4250 }, { "epoch": 0.0075391474486670405, "grad_norm": 0.7109375, "learning_rate": 0.0019948049550679737, "loss": 0.2767, "step": 4252 }, { "epoch": 0.007542693613976856, "grad_norm": 0.361328125, "learning_rate": 0.0019947985673330363, "loss": 0.3592, "step": 4254 }, { "epoch": 0.007546239779286671, "grad_norm": 0.46484375, "learning_rate": 0.0019947921756847697, "loss": 0.374, "step": 4256 }, { "epoch": 0.007549785944596486, "grad_norm": 0.291015625, "learning_rate": 0.0019947857801232015, "loss": 0.3037, "step": 4258 }, { "epoch": 0.007553332109906301, "grad_norm": 3.59375, "learning_rate": 0.0019947793806483604, "loss": 0.6666, "step": 4260 }, { "epoch": 0.007556878275216117, "grad_norm": 0.341796875, "learning_rate": 0.0019947729772602737, "loss": 0.221, "step": 4262 }, { "epoch": 0.007560424440525932, "grad_norm": 0.53515625, "learning_rate": 0.00199476656995897, "loss": 0.2994, "step": 4264 }, { "epoch": 0.007563970605835747, "grad_norm": 0.5859375, "learning_rate": 0.001994760158744477, "loss": 0.2465, "step": 4266 }, { "epoch": 0.007567516771145562, "grad_norm": 1.78125, "learning_rate": 0.001994753743616823, "loss": 0.2681, "step": 4268 }, { "epoch": 0.007571062936455378, "grad_norm": 0.484375, "learning_rate": 0.0019947473245760356, "loss": 0.4576, "step": 4270 }, { "epoch": 0.007574609101765192, "grad_norm": 0.5625, "learning_rate": 0.0019947409016221433, "loss": 0.2747, "step": 4272 }, { "epoch": 0.0075781552670750076, "grad_norm": 0.69921875, "learning_rate": 0.0019947344747551737, "loss": 0.4057, "step": 4274 }, { "epoch": 0.007581701432384823, "grad_norm": 1.6796875, "learning_rate": 0.001994728043975156, "loss": 0.3244, "step": 4276 }, { "epoch": 0.0075852475976946375, "grad_norm": 0.578125, "learning_rate": 0.0019947216092821166, "loss": 0.2432, "step": 4278 }, { "epoch": 0.007588793763004453, "grad_norm": 0.494140625, "learning_rate": 0.001994715170676085, "loss": 0.4124, "step": 4280 }, { "epoch": 0.007592339928314268, "grad_norm": 1.8203125, "learning_rate": 0.0019947087281570893, "loss": 0.3114, "step": 4282 }, { "epoch": 0.007595886093624084, "grad_norm": 0.263671875, "learning_rate": 0.001994702281725157, "loss": 0.266, "step": 4284 }, { "epoch": 0.007599432258933898, "grad_norm": 0.625, "learning_rate": 0.0019946958313803165, "loss": 0.2568, "step": 4286 }, { "epoch": 0.007602978424243714, "grad_norm": 0.6328125, "learning_rate": 0.001994689377122596, "loss": 0.3541, "step": 4288 }, { "epoch": 0.007606524589553529, "grad_norm": 0.3125, "learning_rate": 0.001994682918952024, "loss": 0.2765, "step": 4290 }, { "epoch": 0.007610070754863344, "grad_norm": 0.86328125, "learning_rate": 0.0019946764568686288, "loss": 0.3133, "step": 4292 }, { "epoch": 0.007613616920173159, "grad_norm": 0.416015625, "learning_rate": 0.0019946699908724385, "loss": 0.2714, "step": 4294 }, { "epoch": 0.007617163085482975, "grad_norm": 0.31640625, "learning_rate": 0.0019946635209634814, "loss": 0.2529, "step": 4296 }, { "epoch": 0.00762070925079279, "grad_norm": 0.60546875, "learning_rate": 0.0019946570471417856, "loss": 0.4352, "step": 4298 }, { "epoch": 0.0076242554161026046, "grad_norm": 0.28125, "learning_rate": 0.0019946505694073795, "loss": 0.2255, "step": 4300 }, { "epoch": 0.00762780158141242, "grad_norm": 0.58984375, "learning_rate": 0.0019946440877602915, "loss": 0.3296, "step": 4302 }, { "epoch": 0.007631347746722235, "grad_norm": 0.25, "learning_rate": 0.00199463760220055, "loss": 0.3566, "step": 4304 }, { "epoch": 0.00763489391203205, "grad_norm": 0.400390625, "learning_rate": 0.0019946311127281833, "loss": 0.2687, "step": 4306 }, { "epoch": 0.007638440077341865, "grad_norm": 0.21484375, "learning_rate": 0.0019946246193432195, "loss": 0.2209, "step": 4308 }, { "epoch": 0.007641986242651681, "grad_norm": 0.375, "learning_rate": 0.0019946181220456874, "loss": 0.2465, "step": 4310 }, { "epoch": 0.007645532407961495, "grad_norm": 0.73046875, "learning_rate": 0.0019946116208356154, "loss": 0.2641, "step": 4312 }, { "epoch": 0.007649078573271311, "grad_norm": 0.283203125, "learning_rate": 0.001994605115713032, "loss": 0.2838, "step": 4314 }, { "epoch": 0.007652624738581126, "grad_norm": 0.287109375, "learning_rate": 0.0019945986066779654, "loss": 0.3213, "step": 4316 }, { "epoch": 0.007656170903890942, "grad_norm": 0.27734375, "learning_rate": 0.001994592093730444, "loss": 0.2789, "step": 4318 }, { "epoch": 0.007659717069200756, "grad_norm": 1.0390625, "learning_rate": 0.0019945855768704964, "loss": 0.4544, "step": 4320 }, { "epoch": 0.007663263234510572, "grad_norm": 0.38671875, "learning_rate": 0.001994579056098151, "loss": 0.2123, "step": 4322 }, { "epoch": 0.007666809399820387, "grad_norm": 0.33984375, "learning_rate": 0.0019945725314134367, "loss": 0.253, "step": 4324 }, { "epoch": 0.0076703555651302015, "grad_norm": 0.2021484375, "learning_rate": 0.001994566002816382, "loss": 0.2218, "step": 4326 }, { "epoch": 0.007673901730440017, "grad_norm": 0.64453125, "learning_rate": 0.0019945594703070146, "loss": 0.2866, "step": 4328 }, { "epoch": 0.007677447895749832, "grad_norm": 0.80859375, "learning_rate": 0.001994552933885364, "loss": 0.2576, "step": 4330 }, { "epoch": 0.007680994061059648, "grad_norm": 1.015625, "learning_rate": 0.0019945463935514586, "loss": 0.2613, "step": 4332 }, { "epoch": 0.007684540226369462, "grad_norm": 0.453125, "learning_rate": 0.0019945398493053266, "loss": 0.4501, "step": 4334 }, { "epoch": 0.007688086391679278, "grad_norm": 0.98046875, "learning_rate": 0.0019945333011469973, "loss": 0.2772, "step": 4336 }, { "epoch": 0.007691632556989093, "grad_norm": 0.333984375, "learning_rate": 0.0019945267490764987, "loss": 0.247, "step": 4338 }, { "epoch": 0.007695178722298908, "grad_norm": 0.53125, "learning_rate": 0.00199452019309386, "loss": 0.2608, "step": 4340 }, { "epoch": 0.007698724887608723, "grad_norm": 4.09375, "learning_rate": 0.0019945136331991093, "loss": 0.364, "step": 4342 }, { "epoch": 0.007702271052918539, "grad_norm": 0.220703125, "learning_rate": 0.0019945070693922757, "loss": 0.2504, "step": 4344 }, { "epoch": 0.007705817218228353, "grad_norm": 0.3515625, "learning_rate": 0.0019945005016733875, "loss": 0.2802, "step": 4346 }, { "epoch": 0.0077093633835381686, "grad_norm": 0.37890625, "learning_rate": 0.001994493930042474, "loss": 0.2524, "step": 4348 }, { "epoch": 0.007712909548847984, "grad_norm": 0.7265625, "learning_rate": 0.001994487354499563, "loss": 0.2936, "step": 4350 }, { "epoch": 0.007716455714157799, "grad_norm": 1.2890625, "learning_rate": 0.0019944807750446845, "loss": 0.4244, "step": 4352 }, { "epoch": 0.007720001879467614, "grad_norm": 0.68359375, "learning_rate": 0.0019944741916778667, "loss": 0.3, "step": 4354 }, { "epoch": 0.007723548044777429, "grad_norm": 0.279296875, "learning_rate": 0.001994467604399138, "loss": 0.2836, "step": 4356 }, { "epoch": 0.007727094210087245, "grad_norm": 0.57421875, "learning_rate": 0.001994461013208528, "loss": 0.3151, "step": 4358 }, { "epoch": 0.007730640375397059, "grad_norm": 0.37109375, "learning_rate": 0.001994454418106065, "loss": 0.3651, "step": 4360 }, { "epoch": 0.007734186540706875, "grad_norm": 0.87109375, "learning_rate": 0.001994447819091778, "loss": 0.4105, "step": 4362 }, { "epoch": 0.00773773270601669, "grad_norm": 1.25, "learning_rate": 0.001994441216165695, "loss": 0.2935, "step": 4364 }, { "epoch": 0.007741278871326506, "grad_norm": 0.34765625, "learning_rate": 0.0019944346093278466, "loss": 0.2232, "step": 4366 }, { "epoch": 0.00774482503663632, "grad_norm": 0.69921875, "learning_rate": 0.0019944279985782605, "loss": 0.3407, "step": 4368 }, { "epoch": 0.007748371201946136, "grad_norm": 2.015625, "learning_rate": 0.0019944213839169656, "loss": 0.2508, "step": 4370 }, { "epoch": 0.007751917367255951, "grad_norm": 0.625, "learning_rate": 0.0019944147653439917, "loss": 0.3536, "step": 4372 }, { "epoch": 0.0077554635325657656, "grad_norm": 0.345703125, "learning_rate": 0.0019944081428593667, "loss": 0.4257, "step": 4374 }, { "epoch": 0.007759009697875581, "grad_norm": 0.294921875, "learning_rate": 0.00199440151646312, "loss": 0.2985, "step": 4376 }, { "epoch": 0.007762555863185396, "grad_norm": 0.337890625, "learning_rate": 0.0019943948861552807, "loss": 0.2368, "step": 4378 }, { "epoch": 0.007766102028495211, "grad_norm": 0.251953125, "learning_rate": 0.0019943882519358777, "loss": 0.2776, "step": 4380 }, { "epoch": 0.007769648193805026, "grad_norm": 0.3203125, "learning_rate": 0.00199438161380494, "loss": 0.2795, "step": 4382 }, { "epoch": 0.007773194359114842, "grad_norm": 0.330078125, "learning_rate": 0.0019943749717624966, "loss": 0.2514, "step": 4384 }, { "epoch": 0.007776740524424657, "grad_norm": 0.31640625, "learning_rate": 0.0019943683258085766, "loss": 0.2623, "step": 4386 }, { "epoch": 0.007780286689734472, "grad_norm": 0.5078125, "learning_rate": 0.001994361675943209, "loss": 0.5157, "step": 4388 }, { "epoch": 0.007783832855044287, "grad_norm": 0.5390625, "learning_rate": 0.001994355022166423, "loss": 0.374, "step": 4390 }, { "epoch": 0.007787379020354103, "grad_norm": 0.609375, "learning_rate": 0.001994348364478248, "loss": 0.3461, "step": 4392 }, { "epoch": 0.007790925185663917, "grad_norm": 0.318359375, "learning_rate": 0.001994341702878712, "loss": 0.2688, "step": 4394 }, { "epoch": 0.007794471350973733, "grad_norm": 2.609375, "learning_rate": 0.0019943350373678452, "loss": 0.4908, "step": 4396 }, { "epoch": 0.007798017516283548, "grad_norm": 0.1962890625, "learning_rate": 0.0019943283679456766, "loss": 0.2465, "step": 4398 }, { "epoch": 0.007801563681593363, "grad_norm": 0.58984375, "learning_rate": 0.001994321694612235, "loss": 0.2208, "step": 4400 }, { "epoch": 0.007805109846903178, "grad_norm": 2.109375, "learning_rate": 0.00199431501736755, "loss": 0.3789, "step": 4402 }, { "epoch": 0.007808656012212993, "grad_norm": 0.96484375, "learning_rate": 0.0019943083362116507, "loss": 0.2863, "step": 4404 }, { "epoch": 0.007812202177522809, "grad_norm": 0.31640625, "learning_rate": 0.001994301651144566, "loss": 0.2622, "step": 4406 }, { "epoch": 0.007815748342832624, "grad_norm": 0.255859375, "learning_rate": 0.0019942949621663255, "loss": 0.3736, "step": 4408 }, { "epoch": 0.007819294508142439, "grad_norm": 0.2890625, "learning_rate": 0.0019942882692769582, "loss": 0.2169, "step": 4410 }, { "epoch": 0.007822840673452253, "grad_norm": 0.8984375, "learning_rate": 0.0019942815724764934, "loss": 0.3357, "step": 4412 }, { "epoch": 0.00782638683876207, "grad_norm": 0.80078125, "learning_rate": 0.0019942748717649604, "loss": 0.2929, "step": 4414 }, { "epoch": 0.007829933004071884, "grad_norm": 0.271484375, "learning_rate": 0.001994268167142389, "loss": 0.2081, "step": 4416 }, { "epoch": 0.007833479169381699, "grad_norm": 0.306640625, "learning_rate": 0.001994261458608808, "loss": 0.3851, "step": 4418 }, { "epoch": 0.007837025334691515, "grad_norm": 0.267578125, "learning_rate": 0.0019942547461642463, "loss": 0.2196, "step": 4420 }, { "epoch": 0.00784057150000133, "grad_norm": 1.34375, "learning_rate": 0.001994248029808734, "loss": 0.3409, "step": 4422 }, { "epoch": 0.007844117665311144, "grad_norm": 0.9609375, "learning_rate": 0.0019942413095423005, "loss": 0.2777, "step": 4424 }, { "epoch": 0.00784766383062096, "grad_norm": 0.5234375, "learning_rate": 0.001994234585364975, "loss": 0.2788, "step": 4426 }, { "epoch": 0.007851209995930775, "grad_norm": 0.7890625, "learning_rate": 0.0019942278572767863, "loss": 0.2346, "step": 4428 }, { "epoch": 0.007854756161240591, "grad_norm": 0.796875, "learning_rate": 0.0019942211252777647, "loss": 0.5888, "step": 4430 }, { "epoch": 0.007858302326550406, "grad_norm": 0.37890625, "learning_rate": 0.0019942143893679396, "loss": 0.2735, "step": 4432 }, { "epoch": 0.00786184849186022, "grad_norm": 0.392578125, "learning_rate": 0.00199420764954734, "loss": 0.3055, "step": 4434 }, { "epoch": 0.007865394657170037, "grad_norm": 0.443359375, "learning_rate": 0.0019942009058159954, "loss": 0.272, "step": 4436 }, { "epoch": 0.007868940822479851, "grad_norm": 0.1787109375, "learning_rate": 0.0019941941581739357, "loss": 0.2479, "step": 4438 }, { "epoch": 0.007872486987789666, "grad_norm": 0.388671875, "learning_rate": 0.00199418740662119, "loss": 0.3031, "step": 4440 }, { "epoch": 0.007876033153099482, "grad_norm": 0.337890625, "learning_rate": 0.001994180651157788, "loss": 0.2757, "step": 4442 }, { "epoch": 0.007879579318409297, "grad_norm": 0.73046875, "learning_rate": 0.001994173891783759, "loss": 0.4637, "step": 4444 }, { "epoch": 0.007883125483719111, "grad_norm": 0.353515625, "learning_rate": 0.001994167128499133, "loss": 0.3172, "step": 4446 }, { "epoch": 0.007886671649028927, "grad_norm": 1.2890625, "learning_rate": 0.0019941603613039395, "loss": 0.2998, "step": 4448 }, { "epoch": 0.007890217814338742, "grad_norm": 0.2734375, "learning_rate": 0.001994153590198208, "loss": 0.2892, "step": 4450 }, { "epoch": 0.007893763979648557, "grad_norm": 0.6328125, "learning_rate": 0.001994146815181968, "loss": 0.3114, "step": 4452 }, { "epoch": 0.007897310144958373, "grad_norm": 0.3671875, "learning_rate": 0.0019941400362552494, "loss": 0.3865, "step": 4454 }, { "epoch": 0.007900856310268187, "grad_norm": 0.341796875, "learning_rate": 0.0019941332534180816, "loss": 0.2601, "step": 4456 }, { "epoch": 0.007904402475578002, "grad_norm": 0.302734375, "learning_rate": 0.0019941264666704936, "loss": 0.2924, "step": 4458 }, { "epoch": 0.007907948640887818, "grad_norm": 0.62890625, "learning_rate": 0.0019941196760125167, "loss": 0.2698, "step": 4460 }, { "epoch": 0.007911494806197633, "grad_norm": 0.388671875, "learning_rate": 0.001994112881444179, "loss": 0.3651, "step": 4462 }, { "epoch": 0.007915040971507449, "grad_norm": 0.36328125, "learning_rate": 0.0019941060829655115, "loss": 0.2912, "step": 4464 }, { "epoch": 0.007918587136817264, "grad_norm": 0.58203125, "learning_rate": 0.0019940992805765434, "loss": 0.2526, "step": 4466 }, { "epoch": 0.007922133302127078, "grad_norm": 0.2158203125, "learning_rate": 0.001994092474277304, "loss": 0.2764, "step": 4468 }, { "epoch": 0.007925679467436894, "grad_norm": 1.125, "learning_rate": 0.0019940856640678233, "loss": 0.2922, "step": 4470 }, { "epoch": 0.007929225632746709, "grad_norm": 0.263671875, "learning_rate": 0.001994078849948132, "loss": 0.2557, "step": 4472 }, { "epoch": 0.007932771798056524, "grad_norm": 0.61328125, "learning_rate": 0.0019940720319182583, "loss": 0.2532, "step": 4474 }, { "epoch": 0.00793631796336634, "grad_norm": 1.109375, "learning_rate": 0.0019940652099782333, "loss": 0.3192, "step": 4476 }, { "epoch": 0.007939864128676154, "grad_norm": 0.32421875, "learning_rate": 0.0019940583841280865, "loss": 0.2245, "step": 4478 }, { "epoch": 0.007943410293985969, "grad_norm": 0.494140625, "learning_rate": 0.001994051554367848, "loss": 0.3043, "step": 4480 }, { "epoch": 0.007946956459295785, "grad_norm": 1.046875, "learning_rate": 0.0019940447206975467, "loss": 0.2861, "step": 4482 }, { "epoch": 0.0079505026246056, "grad_norm": 0.416015625, "learning_rate": 0.001994037883117213, "loss": 0.2432, "step": 4484 }, { "epoch": 0.007954048789915414, "grad_norm": 0.337890625, "learning_rate": 0.001994031041626877, "loss": 0.2839, "step": 4486 }, { "epoch": 0.00795759495522523, "grad_norm": 0.73828125, "learning_rate": 0.001994024196226569, "loss": 0.3339, "step": 4488 }, { "epoch": 0.007961141120535045, "grad_norm": 0.435546875, "learning_rate": 0.0019940173469163184, "loss": 0.2935, "step": 4490 }, { "epoch": 0.00796468728584486, "grad_norm": 0.37109375, "learning_rate": 0.001994010493696155, "loss": 0.2689, "step": 4492 }, { "epoch": 0.007968233451154676, "grad_norm": 1.1796875, "learning_rate": 0.001994003636566109, "loss": 0.614, "step": 4494 }, { "epoch": 0.00797177961646449, "grad_norm": 0.53125, "learning_rate": 0.0019939967755262106, "loss": 0.4595, "step": 4496 }, { "epoch": 0.007975325781774307, "grad_norm": 0.44921875, "learning_rate": 0.001993989910576489, "loss": 0.3048, "step": 4498 }, { "epoch": 0.007978871947084121, "grad_norm": 0.484375, "learning_rate": 0.0019939830417169757, "loss": 0.3015, "step": 4500 }, { "epoch": 0.007982418112393936, "grad_norm": 0.9921875, "learning_rate": 0.0019939761689476993, "loss": 0.3926, "step": 4502 }, { "epoch": 0.007985964277703752, "grad_norm": 0.30859375, "learning_rate": 0.0019939692922686905, "loss": 0.2454, "step": 4504 }, { "epoch": 0.007989510443013567, "grad_norm": 2.28125, "learning_rate": 0.0019939624116799793, "loss": 0.2455, "step": 4506 }, { "epoch": 0.007993056608323381, "grad_norm": 0.306640625, "learning_rate": 0.001993955527181596, "loss": 0.2571, "step": 4508 }, { "epoch": 0.007996602773633198, "grad_norm": 0.8125, "learning_rate": 0.0019939486387735702, "loss": 0.4529, "step": 4510 }, { "epoch": 0.008000148938943012, "grad_norm": 0.265625, "learning_rate": 0.0019939417464559326, "loss": 0.2354, "step": 4512 }, { "epoch": 0.008003695104252827, "grad_norm": 0.3984375, "learning_rate": 0.001993934850228713, "loss": 0.2899, "step": 4514 }, { "epoch": 0.008007241269562643, "grad_norm": 0.314453125, "learning_rate": 0.0019939279500919417, "loss": 0.2149, "step": 4516 }, { "epoch": 0.008010787434872458, "grad_norm": 0.2734375, "learning_rate": 0.0019939210460456487, "loss": 0.2415, "step": 4518 }, { "epoch": 0.008014333600182272, "grad_norm": 0.29296875, "learning_rate": 0.001993914138089864, "loss": 0.3355, "step": 4520 }, { "epoch": 0.008017879765492088, "grad_norm": 1.1328125, "learning_rate": 0.001993907226224618, "loss": 0.3126, "step": 4522 }, { "epoch": 0.008021425930801903, "grad_norm": 0.69921875, "learning_rate": 0.0019939003104499416, "loss": 0.2445, "step": 4524 }, { "epoch": 0.008024972096111718, "grad_norm": 0.4375, "learning_rate": 0.0019938933907658646, "loss": 0.2574, "step": 4526 }, { "epoch": 0.008028518261421534, "grad_norm": 1.421875, "learning_rate": 0.0019938864671724165, "loss": 0.324, "step": 4528 }, { "epoch": 0.008032064426731348, "grad_norm": 0.38671875, "learning_rate": 0.001993879539669629, "loss": 0.2308, "step": 4530 }, { "epoch": 0.008035610592041165, "grad_norm": 2.21875, "learning_rate": 0.001993872608257531, "loss": 0.2725, "step": 4532 }, { "epoch": 0.00803915675735098, "grad_norm": 0.96484375, "learning_rate": 0.0019938656729361535, "loss": 0.325, "step": 4534 }, { "epoch": 0.008042702922660794, "grad_norm": 0.18359375, "learning_rate": 0.001993858733705527, "loss": 0.3296, "step": 4536 }, { "epoch": 0.00804624908797061, "grad_norm": 0.349609375, "learning_rate": 0.0019938517905656815, "loss": 0.3021, "step": 4538 }, { "epoch": 0.008049795253280425, "grad_norm": 0.35546875, "learning_rate": 0.0019938448435166474, "loss": 0.3478, "step": 4540 }, { "epoch": 0.00805334141859024, "grad_norm": 0.212890625, "learning_rate": 0.001993837892558455, "loss": 0.3006, "step": 4542 }, { "epoch": 0.008056887583900055, "grad_norm": 0.5703125, "learning_rate": 0.001993830937691135, "loss": 0.2103, "step": 4544 }, { "epoch": 0.00806043374920987, "grad_norm": 0.671875, "learning_rate": 0.001993823978914718, "loss": 0.2476, "step": 4546 }, { "epoch": 0.008063979914519685, "grad_norm": 0.4296875, "learning_rate": 0.001993817016229234, "loss": 0.2564, "step": 4548 }, { "epoch": 0.0080675260798295, "grad_norm": 0.396484375, "learning_rate": 0.001993810049634713, "loss": 0.5138, "step": 4550 }, { "epoch": 0.008071072245139315, "grad_norm": 0.359375, "learning_rate": 0.0019938030791311866, "loss": 0.2639, "step": 4552 }, { "epoch": 0.00807461841044913, "grad_norm": 0.490234375, "learning_rate": 0.0019937961047186846, "loss": 0.3402, "step": 4554 }, { "epoch": 0.008078164575758946, "grad_norm": 0.51171875, "learning_rate": 0.0019937891263972374, "loss": 0.3265, "step": 4556 }, { "epoch": 0.00808171074106876, "grad_norm": 1.140625, "learning_rate": 0.0019937821441668763, "loss": 0.3948, "step": 4558 }, { "epoch": 0.008085256906378575, "grad_norm": 0.34765625, "learning_rate": 0.001993775158027631, "loss": 0.2407, "step": 4560 }, { "epoch": 0.008088803071688392, "grad_norm": 0.55859375, "learning_rate": 0.0019937681679795317, "loss": 0.3222, "step": 4562 }, { "epoch": 0.008092349236998206, "grad_norm": 0.2451171875, "learning_rate": 0.0019937611740226103, "loss": 0.2497, "step": 4564 }, { "epoch": 0.008095895402308022, "grad_norm": 0.46875, "learning_rate": 0.0019937541761568963, "loss": 0.2194, "step": 4566 }, { "epoch": 0.008099441567617837, "grad_norm": 0.62109375, "learning_rate": 0.001993747174382421, "loss": 0.3039, "step": 4568 }, { "epoch": 0.008102987732927652, "grad_norm": 0.1806640625, "learning_rate": 0.0019937401686992147, "loss": 0.2621, "step": 4570 }, { "epoch": 0.008106533898237468, "grad_norm": 0.470703125, "learning_rate": 0.0019937331591073078, "loss": 0.2045, "step": 4572 }, { "epoch": 0.008110080063547282, "grad_norm": 0.30078125, "learning_rate": 0.0019937261456067315, "loss": 0.294, "step": 4574 }, { "epoch": 0.008113626228857097, "grad_norm": 0.2734375, "learning_rate": 0.001993719128197516, "loss": 0.2457, "step": 4576 }, { "epoch": 0.008117172394166913, "grad_norm": 0.1962890625, "learning_rate": 0.0019937121068796925, "loss": 0.2523, "step": 4578 }, { "epoch": 0.008120718559476728, "grad_norm": 0.34375, "learning_rate": 0.001993705081653291, "loss": 0.2384, "step": 4580 }, { "epoch": 0.008124264724786542, "grad_norm": 0.208984375, "learning_rate": 0.0019936980525183425, "loss": 0.3318, "step": 4582 }, { "epoch": 0.008127810890096359, "grad_norm": 0.73828125, "learning_rate": 0.001993691019474878, "loss": 0.3099, "step": 4584 }, { "epoch": 0.008131357055406173, "grad_norm": 0.2041015625, "learning_rate": 0.001993683982522928, "loss": 0.3007, "step": 4586 }, { "epoch": 0.008134903220715988, "grad_norm": 0.373046875, "learning_rate": 0.0019936769416625238, "loss": 0.2842, "step": 4588 }, { "epoch": 0.008138449386025804, "grad_norm": 0.388671875, "learning_rate": 0.001993669896893695, "loss": 0.2214, "step": 4590 }, { "epoch": 0.008141995551335619, "grad_norm": 0.50390625, "learning_rate": 0.001993662848216474, "loss": 0.2985, "step": 4592 }, { "epoch": 0.008145541716645433, "grad_norm": 2.015625, "learning_rate": 0.0019936557956308906, "loss": 0.404, "step": 4594 }, { "epoch": 0.00814908788195525, "grad_norm": 2.171875, "learning_rate": 0.0019936487391369754, "loss": 0.3872, "step": 4596 }, { "epoch": 0.008152634047265064, "grad_norm": 2.078125, "learning_rate": 0.00199364167873476, "loss": 0.501, "step": 4598 }, { "epoch": 0.00815618021257488, "grad_norm": 0.55859375, "learning_rate": 0.001993634614424275, "loss": 0.2852, "step": 4600 }, { "epoch": 0.008159726377884695, "grad_norm": 0.63671875, "learning_rate": 0.0019936275462055513, "loss": 0.2374, "step": 4602 }, { "epoch": 0.00816327254319451, "grad_norm": 0.458984375, "learning_rate": 0.0019936204740786194, "loss": 0.2571, "step": 4604 }, { "epoch": 0.008166818708504326, "grad_norm": 0.8515625, "learning_rate": 0.0019936133980435113, "loss": 0.2724, "step": 4606 }, { "epoch": 0.00817036487381414, "grad_norm": 0.439453125, "learning_rate": 0.0019936063181002564, "loss": 0.2986, "step": 4608 }, { "epoch": 0.008173911039123955, "grad_norm": 0.99609375, "learning_rate": 0.0019935992342488876, "loss": 0.2031, "step": 4610 }, { "epoch": 0.008177457204433771, "grad_norm": 0.4296875, "learning_rate": 0.001993592146489434, "loss": 0.2599, "step": 4612 }, { "epoch": 0.008181003369743586, "grad_norm": 0.345703125, "learning_rate": 0.001993585054821928, "loss": 0.2936, "step": 4614 }, { "epoch": 0.0081845495350534, "grad_norm": 0.60546875, "learning_rate": 0.0019935779592463996, "loss": 0.2948, "step": 4616 }, { "epoch": 0.008188095700363216, "grad_norm": 0.2333984375, "learning_rate": 0.0019935708597628803, "loss": 0.329, "step": 4618 }, { "epoch": 0.008191641865673031, "grad_norm": 0.5625, "learning_rate": 0.001993563756371401, "loss": 0.259, "step": 4620 }, { "epoch": 0.008195188030982846, "grad_norm": 0.3671875, "learning_rate": 0.0019935566490719933, "loss": 0.2434, "step": 4622 }, { "epoch": 0.008198734196292662, "grad_norm": 0.306640625, "learning_rate": 0.0019935495378646875, "loss": 0.2276, "step": 4624 }, { "epoch": 0.008202280361602476, "grad_norm": 0.490234375, "learning_rate": 0.001993542422749515, "loss": 0.2696, "step": 4626 }, { "epoch": 0.008205826526912291, "grad_norm": 0.35546875, "learning_rate": 0.0019935353037265073, "loss": 0.2543, "step": 4628 }, { "epoch": 0.008209372692222107, "grad_norm": 1.171875, "learning_rate": 0.001993528180795695, "loss": 0.328, "step": 4630 }, { "epoch": 0.008212918857531922, "grad_norm": 1.296875, "learning_rate": 0.0019935210539571094, "loss": 0.2815, "step": 4632 }, { "epoch": 0.008216465022841738, "grad_norm": 0.369140625, "learning_rate": 0.0019935139232107823, "loss": 0.2748, "step": 4634 }, { "epoch": 0.008220011188151553, "grad_norm": 0.2392578125, "learning_rate": 0.0019935067885567437, "loss": 0.2366, "step": 4636 }, { "epoch": 0.008223557353461367, "grad_norm": 0.50390625, "learning_rate": 0.0019934996499950254, "loss": 0.2969, "step": 4638 }, { "epoch": 0.008227103518771183, "grad_norm": 0.447265625, "learning_rate": 0.001993492507525659, "loss": 0.1919, "step": 4640 }, { "epoch": 0.008230649684080998, "grad_norm": 0.494140625, "learning_rate": 0.0019934853611486753, "loss": 0.4802, "step": 4642 }, { "epoch": 0.008234195849390813, "grad_norm": 0.4921875, "learning_rate": 0.001993478210864105, "loss": 0.28, "step": 4644 }, { "epoch": 0.008237742014700629, "grad_norm": 0.62109375, "learning_rate": 0.0019934710566719806, "loss": 0.3622, "step": 4646 }, { "epoch": 0.008241288180010443, "grad_norm": 0.37109375, "learning_rate": 0.001993463898572333, "loss": 0.2593, "step": 4648 }, { "epoch": 0.008244834345320258, "grad_norm": 0.60546875, "learning_rate": 0.0019934567365651927, "loss": 0.2329, "step": 4650 }, { "epoch": 0.008248380510630074, "grad_norm": 0.38671875, "learning_rate": 0.001993449570650592, "loss": 0.2304, "step": 4652 }, { "epoch": 0.008251926675939889, "grad_norm": 1.0703125, "learning_rate": 0.001993442400828562, "loss": 0.3452, "step": 4654 }, { "epoch": 0.008255472841249703, "grad_norm": 0.515625, "learning_rate": 0.0019934352270991333, "loss": 0.2684, "step": 4656 }, { "epoch": 0.00825901900655952, "grad_norm": 0.30078125, "learning_rate": 0.0019934280494623385, "loss": 0.2466, "step": 4658 }, { "epoch": 0.008262565171869334, "grad_norm": 1.2421875, "learning_rate": 0.001993420867918208, "loss": 0.3082, "step": 4660 }, { "epoch": 0.008266111337179149, "grad_norm": 0.26171875, "learning_rate": 0.0019934136824667735, "loss": 0.2719, "step": 4662 }, { "epoch": 0.008269657502488965, "grad_norm": 0.408203125, "learning_rate": 0.001993406493108067, "loss": 0.2737, "step": 4664 }, { "epoch": 0.00827320366779878, "grad_norm": 0.84765625, "learning_rate": 0.001993399299842119, "loss": 0.2495, "step": 4666 }, { "epoch": 0.008276749833108596, "grad_norm": 0.9453125, "learning_rate": 0.0019933921026689615, "loss": 0.4277, "step": 4668 }, { "epoch": 0.00828029599841841, "grad_norm": 0.302734375, "learning_rate": 0.001993384901588626, "loss": 0.2412, "step": 4670 }, { "epoch": 0.008283842163728225, "grad_norm": 0.37890625, "learning_rate": 0.001993377696601144, "loss": 0.2906, "step": 4672 }, { "epoch": 0.008287388329038041, "grad_norm": 0.82421875, "learning_rate": 0.001993370487706547, "loss": 0.2661, "step": 4674 }, { "epoch": 0.008290934494347856, "grad_norm": 0.5625, "learning_rate": 0.001993363274904866, "loss": 0.2185, "step": 4676 }, { "epoch": 0.00829448065965767, "grad_norm": 0.53515625, "learning_rate": 0.001993356058196133, "loss": 0.3298, "step": 4678 }, { "epoch": 0.008298026824967487, "grad_norm": 0.578125, "learning_rate": 0.00199334883758038, "loss": 0.2402, "step": 4680 }, { "epoch": 0.008301572990277301, "grad_norm": 0.953125, "learning_rate": 0.0019933416130576372, "loss": 0.3477, "step": 4682 }, { "epoch": 0.008305119155587116, "grad_norm": 0.8125, "learning_rate": 0.001993334384627938, "loss": 0.3677, "step": 4684 }, { "epoch": 0.008308665320896932, "grad_norm": 0.384765625, "learning_rate": 0.0019933271522913124, "loss": 0.2586, "step": 4686 }, { "epoch": 0.008312211486206747, "grad_norm": 1.8125, "learning_rate": 0.0019933199160477935, "loss": 0.4084, "step": 4688 }, { "epoch": 0.008315757651516561, "grad_norm": 0.4609375, "learning_rate": 0.001993312675897412, "loss": 0.336, "step": 4690 }, { "epoch": 0.008319303816826377, "grad_norm": 2.234375, "learning_rate": 0.0019933054318401994, "loss": 0.3933, "step": 4692 }, { "epoch": 0.008322849982136192, "grad_norm": 0.326171875, "learning_rate": 0.001993298183876188, "loss": 0.2626, "step": 4694 }, { "epoch": 0.008326396147446007, "grad_norm": 1.0078125, "learning_rate": 0.001993290932005409, "loss": 0.3769, "step": 4696 }, { "epoch": 0.008329942312755823, "grad_norm": 0.9140625, "learning_rate": 0.0019932836762278946, "loss": 0.5396, "step": 4698 }, { "epoch": 0.008333488478065637, "grad_norm": 0.490234375, "learning_rate": 0.001993276416543676, "loss": 0.3614, "step": 4700 }, { "epoch": 0.008337034643375454, "grad_norm": 0.58984375, "learning_rate": 0.001993269152952785, "loss": 0.3298, "step": 4702 }, { "epoch": 0.008340580808685268, "grad_norm": 0.5859375, "learning_rate": 0.0019932618854552543, "loss": 0.2188, "step": 4704 }, { "epoch": 0.008344126973995083, "grad_norm": 0.53515625, "learning_rate": 0.0019932546140511145, "loss": 0.3649, "step": 4706 }, { "epoch": 0.008347673139304899, "grad_norm": 0.466796875, "learning_rate": 0.0019932473387403982, "loss": 0.3457, "step": 4708 }, { "epoch": 0.008351219304614714, "grad_norm": 0.318359375, "learning_rate": 0.0019932400595231367, "loss": 0.2567, "step": 4710 }, { "epoch": 0.008354765469924528, "grad_norm": 0.2373046875, "learning_rate": 0.001993232776399362, "loss": 0.2158, "step": 4712 }, { "epoch": 0.008358311635234345, "grad_norm": 0.80859375, "learning_rate": 0.001993225489369106, "loss": 0.2842, "step": 4714 }, { "epoch": 0.008361857800544159, "grad_norm": 0.3984375, "learning_rate": 0.001993218198432401, "loss": 0.2714, "step": 4716 }, { "epoch": 0.008365403965853974, "grad_norm": 0.427734375, "learning_rate": 0.0019932109035892777, "loss": 0.3672, "step": 4718 }, { "epoch": 0.00836895013116379, "grad_norm": 0.353515625, "learning_rate": 0.0019932036048397692, "loss": 0.2279, "step": 4720 }, { "epoch": 0.008372496296473604, "grad_norm": 0.28515625, "learning_rate": 0.001993196302183907, "loss": 0.3203, "step": 4722 }, { "epoch": 0.008376042461783419, "grad_norm": 0.81640625, "learning_rate": 0.001993188995621723, "loss": 0.3099, "step": 4724 }, { "epoch": 0.008379588627093235, "grad_norm": 0.5546875, "learning_rate": 0.001993181685153249, "loss": 0.3665, "step": 4726 }, { "epoch": 0.00838313479240305, "grad_norm": 0.8203125, "learning_rate": 0.001993174370778517, "loss": 0.2998, "step": 4728 }, { "epoch": 0.008386680957712864, "grad_norm": 1.4453125, "learning_rate": 0.0019931670524975594, "loss": 0.2354, "step": 4730 }, { "epoch": 0.00839022712302268, "grad_norm": 1.3046875, "learning_rate": 0.0019931597303104076, "loss": 0.4875, "step": 4732 }, { "epoch": 0.008393773288332495, "grad_norm": 0.412109375, "learning_rate": 0.0019931524042170945, "loss": 0.3313, "step": 4734 }, { "epoch": 0.008397319453642312, "grad_norm": 1.0078125, "learning_rate": 0.001993145074217651, "loss": 0.4375, "step": 4736 }, { "epoch": 0.008400865618952126, "grad_norm": 0.328125, "learning_rate": 0.00199313774031211, "loss": 0.2734, "step": 4738 }, { "epoch": 0.00840441178426194, "grad_norm": 0.640625, "learning_rate": 0.001993130402500503, "loss": 0.3512, "step": 4740 }, { "epoch": 0.008407957949571757, "grad_norm": 0.234375, "learning_rate": 0.001993123060782863, "loss": 0.3138, "step": 4742 }, { "epoch": 0.008411504114881571, "grad_norm": 0.294921875, "learning_rate": 0.0019931157151592215, "loss": 0.2789, "step": 4744 }, { "epoch": 0.008415050280191386, "grad_norm": 0.50390625, "learning_rate": 0.0019931083656296103, "loss": 0.2437, "step": 4746 }, { "epoch": 0.008418596445501202, "grad_norm": 2.90625, "learning_rate": 0.001993101012194062, "loss": 0.4977, "step": 4748 }, { "epoch": 0.008422142610811017, "grad_norm": 0.490234375, "learning_rate": 0.0019930936548526084, "loss": 0.315, "step": 4750 }, { "epoch": 0.008425688776120831, "grad_norm": 0.60546875, "learning_rate": 0.0019930862936052827, "loss": 0.2915, "step": 4752 }, { "epoch": 0.008429234941430648, "grad_norm": 0.984375, "learning_rate": 0.0019930789284521152, "loss": 0.2825, "step": 4754 }, { "epoch": 0.008432781106740462, "grad_norm": 0.326171875, "learning_rate": 0.0019930715593931402, "loss": 0.2295, "step": 4756 }, { "epoch": 0.008436327272050277, "grad_norm": 0.478515625, "learning_rate": 0.0019930641864283885, "loss": 0.3066, "step": 4758 }, { "epoch": 0.008439873437360093, "grad_norm": 4.25, "learning_rate": 0.001993056809557893, "loss": 0.6235, "step": 4760 }, { "epoch": 0.008443419602669908, "grad_norm": 0.2255859375, "learning_rate": 0.0019930494287816853, "loss": 0.1938, "step": 4762 }, { "epoch": 0.008446965767979722, "grad_norm": 0.390625, "learning_rate": 0.001993042044099799, "loss": 0.2948, "step": 4764 }, { "epoch": 0.008450511933289538, "grad_norm": 0.59375, "learning_rate": 0.0019930346555122646, "loss": 0.2259, "step": 4766 }, { "epoch": 0.008454058098599353, "grad_norm": 0.44921875, "learning_rate": 0.0019930272630191157, "loss": 0.2892, "step": 4768 }, { "epoch": 0.00845760426390917, "grad_norm": 0.84375, "learning_rate": 0.0019930198666203843, "loss": 0.3746, "step": 4770 }, { "epoch": 0.008461150429218984, "grad_norm": 0.498046875, "learning_rate": 0.0019930124663161027, "loss": 0.2798, "step": 4772 }, { "epoch": 0.008464696594528798, "grad_norm": 3.15625, "learning_rate": 0.0019930050621063036, "loss": 0.358, "step": 4774 }, { "epoch": 0.008468242759838615, "grad_norm": 0.333984375, "learning_rate": 0.0019929976539910187, "loss": 0.235, "step": 4776 }, { "epoch": 0.00847178892514843, "grad_norm": 0.26953125, "learning_rate": 0.0019929902419702807, "loss": 0.223, "step": 4778 }, { "epoch": 0.008475335090458244, "grad_norm": 0.53515625, "learning_rate": 0.0019929828260441223, "loss": 0.3083, "step": 4780 }, { "epoch": 0.00847888125576806, "grad_norm": 0.38671875, "learning_rate": 0.001992975406212576, "loss": 0.1993, "step": 4782 }, { "epoch": 0.008482427421077875, "grad_norm": 0.462890625, "learning_rate": 0.0019929679824756733, "loss": 0.2866, "step": 4784 }, { "epoch": 0.00848597358638769, "grad_norm": 2.875, "learning_rate": 0.001992960554833448, "loss": 0.3772, "step": 4786 }, { "epoch": 0.008489519751697506, "grad_norm": 0.890625, "learning_rate": 0.001992953123285932, "loss": 0.391, "step": 4788 }, { "epoch": 0.00849306591700732, "grad_norm": 0.2412109375, "learning_rate": 0.001992945687833157, "loss": 0.4167, "step": 4790 }, { "epoch": 0.008496612082317135, "grad_norm": 0.484375, "learning_rate": 0.001992938248475157, "loss": 0.4974, "step": 4792 }, { "epoch": 0.008500158247626951, "grad_norm": 1.2578125, "learning_rate": 0.001992930805211963, "loss": 0.3321, "step": 4794 }, { "epoch": 0.008503704412936765, "grad_norm": 0.3984375, "learning_rate": 0.0019929233580436093, "loss": 0.2286, "step": 4796 }, { "epoch": 0.00850725057824658, "grad_norm": 0.515625, "learning_rate": 0.0019929159069701267, "loss": 0.3003, "step": 4798 }, { "epoch": 0.008510796743556396, "grad_norm": 0.388671875, "learning_rate": 0.001992908451991549, "loss": 0.2634, "step": 4800 }, { "epoch": 0.00851434290886621, "grad_norm": 1.0390625, "learning_rate": 0.001992900993107908, "loss": 0.3445, "step": 4802 }, { "epoch": 0.008517889074176027, "grad_norm": 1.1640625, "learning_rate": 0.0019928935303192372, "loss": 0.2538, "step": 4804 }, { "epoch": 0.008521435239485842, "grad_norm": 1.078125, "learning_rate": 0.0019928860636255685, "loss": 0.2472, "step": 4806 }, { "epoch": 0.008524981404795656, "grad_norm": 0.486328125, "learning_rate": 0.001992878593026935, "loss": 0.2252, "step": 4808 }, { "epoch": 0.008528527570105473, "grad_norm": 0.46484375, "learning_rate": 0.001992871118523369, "loss": 0.2489, "step": 4810 }, { "epoch": 0.008532073735415287, "grad_norm": 0.40625, "learning_rate": 0.001992863640114903, "loss": 0.3192, "step": 4812 }, { "epoch": 0.008535619900725102, "grad_norm": 0.357421875, "learning_rate": 0.0019928561578015707, "loss": 0.2293, "step": 4814 }, { "epoch": 0.008539166066034918, "grad_norm": 0.32421875, "learning_rate": 0.001992848671583404, "loss": 0.2109, "step": 4816 }, { "epoch": 0.008542712231344732, "grad_norm": 0.52734375, "learning_rate": 0.0019928411814604356, "loss": 0.2383, "step": 4818 }, { "epoch": 0.008546258396654547, "grad_norm": 0.416015625, "learning_rate": 0.0019928336874326987, "loss": 0.2808, "step": 4820 }, { "epoch": 0.008549804561964363, "grad_norm": 2.15625, "learning_rate": 0.0019928261895002263, "loss": 0.3658, "step": 4822 }, { "epoch": 0.008553350727274178, "grad_norm": 0.796875, "learning_rate": 0.00199281868766305, "loss": 0.3208, "step": 4824 }, { "epoch": 0.008556896892583992, "grad_norm": 0.7734375, "learning_rate": 0.001992811181921204, "loss": 0.3594, "step": 4826 }, { "epoch": 0.008560443057893809, "grad_norm": 0.5078125, "learning_rate": 0.00199280367227472, "loss": 0.2765, "step": 4828 }, { "epoch": 0.008563989223203623, "grad_norm": 0.62109375, "learning_rate": 0.0019927961587236313, "loss": 0.2422, "step": 4830 }, { "epoch": 0.008567535388513438, "grad_norm": 1.7734375, "learning_rate": 0.001992788641267971, "loss": 0.4287, "step": 4832 }, { "epoch": 0.008571081553823254, "grad_norm": 1.0234375, "learning_rate": 0.001992781119907772, "loss": 0.3503, "step": 4834 }, { "epoch": 0.008574627719133069, "grad_norm": 0.38671875, "learning_rate": 0.0019927735946430668, "loss": 0.22, "step": 4836 }, { "epoch": 0.008578173884442885, "grad_norm": 0.421875, "learning_rate": 0.0019927660654738884, "loss": 0.2637, "step": 4838 }, { "epoch": 0.0085817200497527, "grad_norm": 0.40234375, "learning_rate": 0.00199275853240027, "loss": 0.3468, "step": 4840 }, { "epoch": 0.008585266215062514, "grad_norm": 0.53125, "learning_rate": 0.001992750995422244, "loss": 0.3158, "step": 4842 }, { "epoch": 0.00858881238037233, "grad_norm": 2.578125, "learning_rate": 0.001992743454539844, "loss": 0.2404, "step": 4844 }, { "epoch": 0.008592358545682145, "grad_norm": 0.470703125, "learning_rate": 0.001992735909753103, "loss": 0.3253, "step": 4846 }, { "epoch": 0.00859590471099196, "grad_norm": 1.2578125, "learning_rate": 0.001992728361062053, "loss": 0.2342, "step": 4848 }, { "epoch": 0.008599450876301776, "grad_norm": 0.640625, "learning_rate": 0.001992720808466728, "loss": 0.2632, "step": 4850 }, { "epoch": 0.00860299704161159, "grad_norm": 0.515625, "learning_rate": 0.001992713251967161, "loss": 0.2621, "step": 4852 }, { "epoch": 0.008606543206921405, "grad_norm": 2.96875, "learning_rate": 0.0019927056915633842, "loss": 0.3666, "step": 4854 }, { "epoch": 0.008610089372231221, "grad_norm": 1.5703125, "learning_rate": 0.0019926981272554317, "loss": 0.4931, "step": 4856 }, { "epoch": 0.008613635537541036, "grad_norm": 0.33984375, "learning_rate": 0.001992690559043336, "loss": 0.2825, "step": 4858 }, { "epoch": 0.00861718170285085, "grad_norm": 0.29296875, "learning_rate": 0.0019926829869271307, "loss": 0.2756, "step": 4860 }, { "epoch": 0.008620727868160667, "grad_norm": 2.09375, "learning_rate": 0.0019926754109068482, "loss": 0.273, "step": 4862 }, { "epoch": 0.008624274033470481, "grad_norm": 0.345703125, "learning_rate": 0.001992667830982522, "loss": 0.2479, "step": 4864 }, { "epoch": 0.008627820198780296, "grad_norm": 1.0703125, "learning_rate": 0.001992660247154185, "loss": 0.2527, "step": 4866 }, { "epoch": 0.008631366364090112, "grad_norm": 0.373046875, "learning_rate": 0.001992652659421871, "loss": 0.2085, "step": 4868 }, { "epoch": 0.008634912529399926, "grad_norm": 1.0703125, "learning_rate": 0.0019926450677856125, "loss": 0.4311, "step": 4870 }, { "epoch": 0.008638458694709743, "grad_norm": 0.609375, "learning_rate": 0.001992637472245443, "loss": 0.4236, "step": 4872 }, { "epoch": 0.008642004860019557, "grad_norm": 0.384765625, "learning_rate": 0.001992629872801396, "loss": 0.2683, "step": 4874 }, { "epoch": 0.008645551025329372, "grad_norm": 4.21875, "learning_rate": 0.001992622269453504, "loss": 0.3981, "step": 4876 }, { "epoch": 0.008649097190639188, "grad_norm": 1.0859375, "learning_rate": 0.001992614662201801, "loss": 0.2772, "step": 4878 }, { "epoch": 0.008652643355949003, "grad_norm": 0.2138671875, "learning_rate": 0.00199260705104632, "loss": 0.2528, "step": 4880 }, { "epoch": 0.008656189521258817, "grad_norm": 0.4296875, "learning_rate": 0.001992599435987094, "loss": 0.2588, "step": 4882 }, { "epoch": 0.008659735686568634, "grad_norm": 0.69140625, "learning_rate": 0.0019925918170241573, "loss": 0.2687, "step": 4884 }, { "epoch": 0.008663281851878448, "grad_norm": 0.92578125, "learning_rate": 0.0019925841941575415, "loss": 0.297, "step": 4886 }, { "epoch": 0.008666828017188263, "grad_norm": 0.349609375, "learning_rate": 0.001992576567387281, "loss": 0.2267, "step": 4888 }, { "epoch": 0.008670374182498079, "grad_norm": 0.287109375, "learning_rate": 0.0019925689367134096, "loss": 0.2558, "step": 4890 }, { "epoch": 0.008673920347807893, "grad_norm": 0.330078125, "learning_rate": 0.00199256130213596, "loss": 0.3102, "step": 4892 }, { "epoch": 0.008677466513117708, "grad_norm": 2.734375, "learning_rate": 0.0019925536636549654, "loss": 0.3242, "step": 4894 }, { "epoch": 0.008681012678427524, "grad_norm": 2.796875, "learning_rate": 0.0019925460212704598, "loss": 0.4111, "step": 4896 }, { "epoch": 0.008684558843737339, "grad_norm": 0.53125, "learning_rate": 0.0019925383749824764, "loss": 0.3217, "step": 4898 }, { "epoch": 0.008688105009047153, "grad_norm": 0.9609375, "learning_rate": 0.001992530724791048, "loss": 0.2641, "step": 4900 }, { "epoch": 0.00869165117435697, "grad_norm": 0.625, "learning_rate": 0.0019925230706962093, "loss": 0.3279, "step": 4902 }, { "epoch": 0.008695197339666784, "grad_norm": 0.21875, "learning_rate": 0.0019925154126979932, "loss": 0.2284, "step": 4904 }, { "epoch": 0.0086987435049766, "grad_norm": 2.6875, "learning_rate": 0.0019925077507964325, "loss": 0.5094, "step": 4906 }, { "epoch": 0.008702289670286415, "grad_norm": 0.49609375, "learning_rate": 0.001992500084991562, "loss": 0.2678, "step": 4908 }, { "epoch": 0.00870583583559623, "grad_norm": 0.318359375, "learning_rate": 0.001992492415283414, "loss": 0.2485, "step": 4910 }, { "epoch": 0.008709382000906046, "grad_norm": 1.1484375, "learning_rate": 0.001992484741672023, "loss": 0.2542, "step": 4912 }, { "epoch": 0.00871292816621586, "grad_norm": 0.29296875, "learning_rate": 0.0019924770641574223, "loss": 0.2881, "step": 4914 }, { "epoch": 0.008716474331525675, "grad_norm": 0.45703125, "learning_rate": 0.001992469382739645, "loss": 0.305, "step": 4916 }, { "epoch": 0.008720020496835491, "grad_norm": 0.30078125, "learning_rate": 0.0019924616974187253, "loss": 0.2519, "step": 4918 }, { "epoch": 0.008723566662145306, "grad_norm": 0.4609375, "learning_rate": 0.0019924540081946956, "loss": 0.3043, "step": 4920 }, { "epoch": 0.00872711282745512, "grad_norm": 0.859375, "learning_rate": 0.0019924463150675915, "loss": 0.2178, "step": 4922 }, { "epoch": 0.008730658992764937, "grad_norm": 0.392578125, "learning_rate": 0.001992438618037445, "loss": 0.2876, "step": 4924 }, { "epoch": 0.008734205158074751, "grad_norm": 0.91015625, "learning_rate": 0.001992430917104291, "loss": 0.2921, "step": 4926 }, { "epoch": 0.008737751323384566, "grad_norm": 8.375, "learning_rate": 0.0019924232122681624, "loss": 0.3517, "step": 4928 }, { "epoch": 0.008741297488694382, "grad_norm": 0.59375, "learning_rate": 0.0019924155035290925, "loss": 0.2663, "step": 4930 }, { "epoch": 0.008744843654004197, "grad_norm": 2.828125, "learning_rate": 0.001992407790887116, "loss": 0.2653, "step": 4932 }, { "epoch": 0.008748389819314011, "grad_norm": 0.59765625, "learning_rate": 0.001992400074342266, "loss": 0.3318, "step": 4934 }, { "epoch": 0.008751935984623828, "grad_norm": 0.4609375, "learning_rate": 0.0019923923538945764, "loss": 0.242, "step": 4936 }, { "epoch": 0.008755482149933642, "grad_norm": 0.33203125, "learning_rate": 0.001992384629544081, "loss": 0.2559, "step": 4938 }, { "epoch": 0.008759028315243458, "grad_norm": 0.671875, "learning_rate": 0.0019923769012908142, "loss": 0.2524, "step": 4940 }, { "epoch": 0.008762574480553273, "grad_norm": 0.61328125, "learning_rate": 0.001992369169134809, "loss": 0.297, "step": 4942 }, { "epoch": 0.008766120645863087, "grad_norm": 0.50390625, "learning_rate": 0.0019923614330760986, "loss": 0.3369, "step": 4944 }, { "epoch": 0.008769666811172904, "grad_norm": 1.0078125, "learning_rate": 0.001992353693114719, "loss": 0.2508, "step": 4946 }, { "epoch": 0.008773212976482718, "grad_norm": 1.96875, "learning_rate": 0.0019923459492507014, "loss": 0.4605, "step": 4948 }, { "epoch": 0.008776759141792533, "grad_norm": 0.72265625, "learning_rate": 0.0019923382014840818, "loss": 0.2632, "step": 4950 }, { "epoch": 0.00878030530710235, "grad_norm": 0.306640625, "learning_rate": 0.001992330449814893, "loss": 0.2353, "step": 4952 }, { "epoch": 0.008783851472412164, "grad_norm": 0.302734375, "learning_rate": 0.0019923226942431685, "loss": 0.197, "step": 4954 }, { "epoch": 0.008787397637721978, "grad_norm": 0.3203125, "learning_rate": 0.0019923149347689435, "loss": 0.2633, "step": 4956 }, { "epoch": 0.008790943803031795, "grad_norm": 0.478515625, "learning_rate": 0.001992307171392251, "loss": 0.237, "step": 4958 }, { "epoch": 0.008794489968341609, "grad_norm": 0.9765625, "learning_rate": 0.0019922994041131257, "loss": 0.3999, "step": 4960 }, { "epoch": 0.008798036133651424, "grad_norm": 0.431640625, "learning_rate": 0.0019922916329316006, "loss": 0.2326, "step": 4962 }, { "epoch": 0.00880158229896124, "grad_norm": 0.79296875, "learning_rate": 0.0019922838578477105, "loss": 0.3141, "step": 4964 }, { "epoch": 0.008805128464271055, "grad_norm": 0.5234375, "learning_rate": 0.0019922760788614892, "loss": 0.2723, "step": 4966 }, { "epoch": 0.008808674629580869, "grad_norm": 1.2265625, "learning_rate": 0.0019922682959729703, "loss": 0.2961, "step": 4968 }, { "epoch": 0.008812220794890685, "grad_norm": 0.60546875, "learning_rate": 0.001992260509182188, "loss": 0.3318, "step": 4970 }, { "epoch": 0.0088157669602005, "grad_norm": 0.6328125, "learning_rate": 0.0019922527184891774, "loss": 0.253, "step": 4972 }, { "epoch": 0.008819313125510316, "grad_norm": 0.265625, "learning_rate": 0.0019922449238939707, "loss": 0.2028, "step": 4974 }, { "epoch": 0.00882285929082013, "grad_norm": 2.640625, "learning_rate": 0.0019922371253966037, "loss": 0.3068, "step": 4976 }, { "epoch": 0.008826405456129945, "grad_norm": 0.37890625, "learning_rate": 0.0019922293229971094, "loss": 0.3086, "step": 4978 }, { "epoch": 0.008829951621439762, "grad_norm": 3.3125, "learning_rate": 0.0019922215166955225, "loss": 0.4288, "step": 4980 }, { "epoch": 0.008833497786749576, "grad_norm": 0.28515625, "learning_rate": 0.0019922137064918768, "loss": 0.2756, "step": 4982 }, { "epoch": 0.00883704395205939, "grad_norm": 0.373046875, "learning_rate": 0.001992205892386207, "loss": 0.222, "step": 4984 }, { "epoch": 0.008840590117369207, "grad_norm": 0.39453125, "learning_rate": 0.001992198074378546, "loss": 0.1995, "step": 4986 }, { "epoch": 0.008844136282679022, "grad_norm": 1.453125, "learning_rate": 0.00199219025246893, "loss": 0.4126, "step": 4988 }, { "epoch": 0.008847682447988836, "grad_norm": 0.419921875, "learning_rate": 0.001992182426657391, "loss": 0.2793, "step": 4990 }, { "epoch": 0.008851228613298652, "grad_norm": 0.66796875, "learning_rate": 0.001992174596943965, "loss": 0.214, "step": 4992 }, { "epoch": 0.008854774778608467, "grad_norm": 0.498046875, "learning_rate": 0.0019921667633286855, "loss": 0.2751, "step": 4994 }, { "epoch": 0.008858320943918281, "grad_norm": 0.28515625, "learning_rate": 0.0019921589258115866, "loss": 0.2328, "step": 4996 }, { "epoch": 0.008861867109228098, "grad_norm": 0.2470703125, "learning_rate": 0.001992151084392703, "loss": 0.2115, "step": 4998 }, { "epoch": 0.008865413274537912, "grad_norm": 1.78125, "learning_rate": 0.0019921432390720686, "loss": 0.355, "step": 5000 }, { "epoch": 0.008868959439847727, "grad_norm": 0.2265625, "learning_rate": 0.001992135389849718, "loss": 0.2089, "step": 5002 }, { "epoch": 0.008872505605157543, "grad_norm": 0.55859375, "learning_rate": 0.001992127536725685, "loss": 0.2244, "step": 5004 }, { "epoch": 0.008876051770467358, "grad_norm": 0.40625, "learning_rate": 0.0019921196797000047, "loss": 0.3244, "step": 5006 }, { "epoch": 0.008879597935777174, "grad_norm": 0.46484375, "learning_rate": 0.0019921118187727115, "loss": 0.2634, "step": 5008 }, { "epoch": 0.008883144101086989, "grad_norm": 0.2158203125, "learning_rate": 0.001992103953943839, "loss": 0.2841, "step": 5010 }, { "epoch": 0.008886690266396803, "grad_norm": 0.328125, "learning_rate": 0.001992096085213422, "loss": 0.2345, "step": 5012 }, { "epoch": 0.00889023643170662, "grad_norm": 0.310546875, "learning_rate": 0.001992088212581495, "loss": 0.3092, "step": 5014 }, { "epoch": 0.008893782597016434, "grad_norm": 0.36328125, "learning_rate": 0.0019920803360480924, "loss": 0.2867, "step": 5016 }, { "epoch": 0.008897328762326248, "grad_norm": 0.423828125, "learning_rate": 0.001992072455613248, "loss": 0.2624, "step": 5018 }, { "epoch": 0.008900874927636065, "grad_norm": 0.392578125, "learning_rate": 0.001992064571276998, "loss": 0.278, "step": 5020 }, { "epoch": 0.00890442109294588, "grad_norm": 5.8125, "learning_rate": 0.001992056683039375, "loss": 0.5674, "step": 5022 }, { "epoch": 0.008907967258255694, "grad_norm": 0.69921875, "learning_rate": 0.0019920487909004143, "loss": 0.3603, "step": 5024 }, { "epoch": 0.00891151342356551, "grad_norm": 0.61328125, "learning_rate": 0.0019920408948601504, "loss": 0.2735, "step": 5026 }, { "epoch": 0.008915059588875325, "grad_norm": 0.37890625, "learning_rate": 0.0019920329949186175, "loss": 0.2716, "step": 5028 }, { "epoch": 0.00891860575418514, "grad_norm": 0.953125, "learning_rate": 0.0019920250910758506, "loss": 0.2975, "step": 5030 }, { "epoch": 0.008922151919494956, "grad_norm": 0.48046875, "learning_rate": 0.001992017183331884, "loss": 0.2184, "step": 5032 }, { "epoch": 0.00892569808480477, "grad_norm": 0.546875, "learning_rate": 0.001992009271686753, "loss": 0.2205, "step": 5034 }, { "epoch": 0.008929244250114585, "grad_norm": 0.62109375, "learning_rate": 0.001992001356140491, "loss": 0.2986, "step": 5036 }, { "epoch": 0.008932790415424401, "grad_norm": 0.30078125, "learning_rate": 0.0019919934366931335, "loss": 0.3347, "step": 5038 }, { "epoch": 0.008936336580734216, "grad_norm": 0.30078125, "learning_rate": 0.0019919855133447148, "loss": 0.2387, "step": 5040 }, { "epoch": 0.008939882746044032, "grad_norm": 0.404296875, "learning_rate": 0.0019919775860952693, "loss": 0.3003, "step": 5042 }, { "epoch": 0.008943428911353846, "grad_norm": 0.5234375, "learning_rate": 0.001991969654944832, "loss": 0.2495, "step": 5044 }, { "epoch": 0.008946975076663661, "grad_norm": 0.423828125, "learning_rate": 0.001991961719893438, "loss": 0.2789, "step": 5046 }, { "epoch": 0.008950521241973477, "grad_norm": 0.40234375, "learning_rate": 0.001991953780941121, "loss": 0.2735, "step": 5048 }, { "epoch": 0.008954067407283292, "grad_norm": 0.474609375, "learning_rate": 0.001991945838087917, "loss": 0.3177, "step": 5050 }, { "epoch": 0.008957613572593106, "grad_norm": 0.259765625, "learning_rate": 0.001991937891333859, "loss": 0.2191, "step": 5052 }, { "epoch": 0.008961159737902923, "grad_norm": 0.421875, "learning_rate": 0.001991929940678984, "loss": 0.2816, "step": 5054 }, { "epoch": 0.008964705903212737, "grad_norm": 0.62890625, "learning_rate": 0.0019919219861233243, "loss": 0.4708, "step": 5056 }, { "epoch": 0.008968252068522552, "grad_norm": 0.5546875, "learning_rate": 0.0019919140276669165, "loss": 0.325, "step": 5058 }, { "epoch": 0.008971798233832368, "grad_norm": 0.6953125, "learning_rate": 0.001991906065309795, "loss": 0.2673, "step": 5060 }, { "epoch": 0.008975344399142183, "grad_norm": 0.404296875, "learning_rate": 0.0019918980990519942, "loss": 0.2394, "step": 5062 }, { "epoch": 0.008978890564451997, "grad_norm": 1.2734375, "learning_rate": 0.001991890128893549, "loss": 0.3693, "step": 5064 }, { "epoch": 0.008982436729761813, "grad_norm": 0.44140625, "learning_rate": 0.0019918821548344946, "loss": 0.265, "step": 5066 }, { "epoch": 0.008985982895071628, "grad_norm": 1.140625, "learning_rate": 0.0019918741768748657, "loss": 0.2911, "step": 5068 }, { "epoch": 0.008989529060381442, "grad_norm": 0.703125, "learning_rate": 0.0019918661950146977, "loss": 0.2495, "step": 5070 }, { "epoch": 0.008993075225691259, "grad_norm": 0.57421875, "learning_rate": 0.0019918582092540247, "loss": 0.2729, "step": 5072 }, { "epoch": 0.008996621391001073, "grad_norm": 0.443359375, "learning_rate": 0.0019918502195928815, "loss": 0.2481, "step": 5074 }, { "epoch": 0.00900016755631089, "grad_norm": 0.400390625, "learning_rate": 0.001991842226031304, "loss": 0.2532, "step": 5076 }, { "epoch": 0.009003713721620704, "grad_norm": 0.515625, "learning_rate": 0.0019918342285693267, "loss": 0.4735, "step": 5078 }, { "epoch": 0.009007259886930519, "grad_norm": 0.33984375, "learning_rate": 0.001991826227206984, "loss": 0.1499, "step": 5080 }, { "epoch": 0.009010806052240335, "grad_norm": 1.453125, "learning_rate": 0.001991818221944312, "loss": 0.4161, "step": 5082 }, { "epoch": 0.00901435221755015, "grad_norm": 0.486328125, "learning_rate": 0.0019918102127813447, "loss": 0.3267, "step": 5084 }, { "epoch": 0.009017898382859964, "grad_norm": 0.41015625, "learning_rate": 0.001991802199718118, "loss": 0.3666, "step": 5086 }, { "epoch": 0.00902144454816978, "grad_norm": 0.4296875, "learning_rate": 0.0019917941827546663, "loss": 0.2611, "step": 5088 }, { "epoch": 0.009024990713479595, "grad_norm": 0.44921875, "learning_rate": 0.0019917861618910246, "loss": 0.2883, "step": 5090 }, { "epoch": 0.00902853687878941, "grad_norm": 1.421875, "learning_rate": 0.0019917781371272284, "loss": 0.4393, "step": 5092 }, { "epoch": 0.009032083044099226, "grad_norm": 0.216796875, "learning_rate": 0.001991770108463313, "loss": 0.2813, "step": 5094 }, { "epoch": 0.00903562920940904, "grad_norm": 0.4921875, "learning_rate": 0.0019917620758993127, "loss": 0.2903, "step": 5096 }, { "epoch": 0.009039175374718855, "grad_norm": 0.734375, "learning_rate": 0.0019917540394352633, "loss": 0.34, "step": 5098 }, { "epoch": 0.009042721540028671, "grad_norm": 0.5625, "learning_rate": 0.0019917459990711995, "loss": 0.2248, "step": 5100 }, { "epoch": 0.009046267705338486, "grad_norm": 0.279296875, "learning_rate": 0.001991737954807157, "loss": 0.3489, "step": 5102 }, { "epoch": 0.0090498138706483, "grad_norm": 1.03125, "learning_rate": 0.0019917299066431705, "loss": 0.1884, "step": 5104 }, { "epoch": 0.009053360035958117, "grad_norm": 0.609375, "learning_rate": 0.001991721854579275, "loss": 0.234, "step": 5106 }, { "epoch": 0.009056906201267931, "grad_norm": 0.46484375, "learning_rate": 0.0019917137986155066, "loss": 0.3182, "step": 5108 }, { "epoch": 0.009060452366577747, "grad_norm": 1.515625, "learning_rate": 0.0019917057387519, "loss": 0.4011, "step": 5110 }, { "epoch": 0.009063998531887562, "grad_norm": 0.294921875, "learning_rate": 0.0019916976749884898, "loss": 0.3163, "step": 5112 }, { "epoch": 0.009067544697197377, "grad_norm": 0.478515625, "learning_rate": 0.001991689607325313, "loss": 0.3905, "step": 5114 }, { "epoch": 0.009071090862507193, "grad_norm": 0.765625, "learning_rate": 0.001991681535762403, "loss": 0.2781, "step": 5116 }, { "epoch": 0.009074637027817007, "grad_norm": 0.37109375, "learning_rate": 0.0019916734602997963, "loss": 0.3354, "step": 5118 }, { "epoch": 0.009078183193126822, "grad_norm": 0.357421875, "learning_rate": 0.0019916653809375273, "loss": 0.2352, "step": 5120 }, { "epoch": 0.009081729358436638, "grad_norm": 0.63671875, "learning_rate": 0.0019916572976756324, "loss": 0.2426, "step": 5122 }, { "epoch": 0.009085275523746453, "grad_norm": 1.484375, "learning_rate": 0.0019916492105141463, "loss": 0.4047, "step": 5124 }, { "epoch": 0.009088821689056267, "grad_norm": 0.53515625, "learning_rate": 0.0019916411194531043, "loss": 0.2283, "step": 5126 }, { "epoch": 0.009092367854366084, "grad_norm": 0.953125, "learning_rate": 0.001991633024492542, "loss": 0.3103, "step": 5128 }, { "epoch": 0.009095914019675898, "grad_norm": 0.796875, "learning_rate": 0.001991624925632495, "loss": 0.2451, "step": 5130 }, { "epoch": 0.009099460184985713, "grad_norm": 0.416015625, "learning_rate": 0.0019916168228729983, "loss": 0.3287, "step": 5132 }, { "epoch": 0.009103006350295529, "grad_norm": 2.125, "learning_rate": 0.001991608716214088, "loss": 0.3232, "step": 5134 }, { "epoch": 0.009106552515605344, "grad_norm": 0.69140625, "learning_rate": 0.0019916006056557986, "loss": 0.1628, "step": 5136 }, { "epoch": 0.009110098680915158, "grad_norm": 0.5703125, "learning_rate": 0.001991592491198166, "loss": 0.3016, "step": 5138 }, { "epoch": 0.009113644846224974, "grad_norm": 0.470703125, "learning_rate": 0.001991584372841226, "loss": 0.2512, "step": 5140 }, { "epoch": 0.009117191011534789, "grad_norm": 6.65625, "learning_rate": 0.001991576250585014, "loss": 0.3315, "step": 5142 }, { "epoch": 0.009120737176844605, "grad_norm": 0.578125, "learning_rate": 0.0019915681244295647, "loss": 0.2593, "step": 5144 }, { "epoch": 0.00912428334215442, "grad_norm": 1.2421875, "learning_rate": 0.001991559994374915, "loss": 0.4136, "step": 5146 }, { "epoch": 0.009127829507464234, "grad_norm": 0.30859375, "learning_rate": 0.001991551860421099, "loss": 0.2256, "step": 5148 }, { "epoch": 0.00913137567277405, "grad_norm": 0.5546875, "learning_rate": 0.001991543722568154, "loss": 0.2684, "step": 5150 }, { "epoch": 0.009134921838083865, "grad_norm": 0.6640625, "learning_rate": 0.001991535580816114, "loss": 0.2399, "step": 5152 }, { "epoch": 0.00913846800339368, "grad_norm": 1.171875, "learning_rate": 0.001991527435165015, "loss": 0.2677, "step": 5154 }, { "epoch": 0.009142014168703496, "grad_norm": 2.421875, "learning_rate": 0.0019915192856148935, "loss": 0.4014, "step": 5156 }, { "epoch": 0.00914556033401331, "grad_norm": 0.4765625, "learning_rate": 0.0019915111321657845, "loss": 0.2408, "step": 5158 }, { "epoch": 0.009149106499323125, "grad_norm": 0.451171875, "learning_rate": 0.0019915029748177235, "loss": 0.5697, "step": 5160 }, { "epoch": 0.009152652664632941, "grad_norm": 1.03125, "learning_rate": 0.001991494813570746, "loss": 0.3591, "step": 5162 }, { "epoch": 0.009156198829942756, "grad_norm": 1.7265625, "learning_rate": 0.001991486648424888, "loss": 0.3965, "step": 5164 }, { "epoch": 0.00915974499525257, "grad_norm": 0.41796875, "learning_rate": 0.001991478479380186, "loss": 0.227, "step": 5166 }, { "epoch": 0.009163291160562387, "grad_norm": 0.484375, "learning_rate": 0.001991470306436674, "loss": 0.2379, "step": 5168 }, { "epoch": 0.009166837325872201, "grad_norm": 2.15625, "learning_rate": 0.0019914621295943893, "loss": 0.373, "step": 5170 }, { "epoch": 0.009170383491182016, "grad_norm": 0.4296875, "learning_rate": 0.001991453948853367, "loss": 0.2354, "step": 5172 }, { "epoch": 0.009173929656491832, "grad_norm": 0.70703125, "learning_rate": 0.001991445764213643, "loss": 0.4654, "step": 5174 }, { "epoch": 0.009177475821801647, "grad_norm": 0.2734375, "learning_rate": 0.001991437575675253, "loss": 0.2566, "step": 5176 }, { "epoch": 0.009181021987111463, "grad_norm": 0.359375, "learning_rate": 0.0019914293832382327, "loss": 0.2169, "step": 5178 }, { "epoch": 0.009184568152421278, "grad_norm": 0.98046875, "learning_rate": 0.001991421186902618, "loss": 0.2468, "step": 5180 }, { "epoch": 0.009188114317731092, "grad_norm": 0.2734375, "learning_rate": 0.001991412986668445, "loss": 0.3031, "step": 5182 }, { "epoch": 0.009191660483040908, "grad_norm": 0.58203125, "learning_rate": 0.0019914047825357493, "loss": 0.3574, "step": 5184 }, { "epoch": 0.009195206648350723, "grad_norm": 0.69140625, "learning_rate": 0.001991396574504567, "loss": 0.2387, "step": 5186 }, { "epoch": 0.009198752813660538, "grad_norm": 0.484375, "learning_rate": 0.0019913883625749342, "loss": 0.2928, "step": 5188 }, { "epoch": 0.009202298978970354, "grad_norm": 1.578125, "learning_rate": 0.0019913801467468855, "loss": 0.4186, "step": 5190 }, { "epoch": 0.009205845144280168, "grad_norm": 0.244140625, "learning_rate": 0.0019913719270204587, "loss": 0.2622, "step": 5192 }, { "epoch": 0.009209391309589983, "grad_norm": 0.328125, "learning_rate": 0.001991363703395689, "loss": 0.2574, "step": 5194 }, { "epoch": 0.0092129374748998, "grad_norm": 0.5078125, "learning_rate": 0.0019913554758726115, "loss": 0.289, "step": 5196 }, { "epoch": 0.009216483640209614, "grad_norm": 1.203125, "learning_rate": 0.001991347244451263, "loss": 0.2643, "step": 5198 }, { "epoch": 0.009220029805519428, "grad_norm": 0.28125, "learning_rate": 0.0019913390091316797, "loss": 0.2406, "step": 5200 }, { "epoch": 0.009223575970829245, "grad_norm": 0.8984375, "learning_rate": 0.0019913307699138973, "loss": 0.4134, "step": 5202 }, { "epoch": 0.00922712213613906, "grad_norm": 0.34375, "learning_rate": 0.001991322526797952, "loss": 0.2647, "step": 5204 }, { "epoch": 0.009230668301448874, "grad_norm": 0.41796875, "learning_rate": 0.0019913142797838793, "loss": 0.2725, "step": 5206 }, { "epoch": 0.00923421446675869, "grad_norm": 0.6953125, "learning_rate": 0.0019913060288717158, "loss": 0.2381, "step": 5208 }, { "epoch": 0.009237760632068505, "grad_norm": 0.7265625, "learning_rate": 0.0019912977740614976, "loss": 0.2676, "step": 5210 }, { "epoch": 0.00924130679737832, "grad_norm": 0.5078125, "learning_rate": 0.0019912895153532608, "loss": 0.3104, "step": 5212 }, { "epoch": 0.009244852962688135, "grad_norm": 0.37109375, "learning_rate": 0.001991281252747041, "loss": 0.3894, "step": 5214 }, { "epoch": 0.00924839912799795, "grad_norm": 1.0546875, "learning_rate": 0.001991272986242875, "loss": 0.297, "step": 5216 }, { "epoch": 0.009251945293307766, "grad_norm": 1.6484375, "learning_rate": 0.0019912647158407985, "loss": 0.3694, "step": 5218 }, { "epoch": 0.00925549145861758, "grad_norm": 0.318359375, "learning_rate": 0.001991256441540848, "loss": 0.3923, "step": 5220 }, { "epoch": 0.009259037623927395, "grad_norm": 0.2099609375, "learning_rate": 0.0019912481633430593, "loss": 0.2637, "step": 5222 }, { "epoch": 0.009262583789237212, "grad_norm": 2.359375, "learning_rate": 0.001991239881247469, "loss": 0.3471, "step": 5224 }, { "epoch": 0.009266129954547026, "grad_norm": 0.91015625, "learning_rate": 0.0019912315952541134, "loss": 0.3024, "step": 5226 }, { "epoch": 0.00926967611985684, "grad_norm": 0.373046875, "learning_rate": 0.001991223305363028, "loss": 0.2688, "step": 5228 }, { "epoch": 0.009273222285166657, "grad_norm": 0.2265625, "learning_rate": 0.00199121501157425, "loss": 0.298, "step": 5230 }, { "epoch": 0.009276768450476472, "grad_norm": 0.53125, "learning_rate": 0.001991206713887815, "loss": 0.3344, "step": 5232 }, { "epoch": 0.009280314615786286, "grad_norm": 1.1640625, "learning_rate": 0.0019911984123037593, "loss": 0.3017, "step": 5234 }, { "epoch": 0.009283860781096102, "grad_norm": 0.376953125, "learning_rate": 0.00199119010682212, "loss": 0.3407, "step": 5236 }, { "epoch": 0.009287406946405917, "grad_norm": 0.2890625, "learning_rate": 0.0019911817974429323, "loss": 0.2423, "step": 5238 }, { "epoch": 0.009290953111715732, "grad_norm": 0.33984375, "learning_rate": 0.001991173484166233, "loss": 0.2364, "step": 5240 }, { "epoch": 0.009294499277025548, "grad_norm": 1.390625, "learning_rate": 0.001991165166992059, "loss": 0.3402, "step": 5242 }, { "epoch": 0.009298045442335362, "grad_norm": 0.267578125, "learning_rate": 0.0019911568459204457, "loss": 0.2299, "step": 5244 }, { "epoch": 0.009301591607645179, "grad_norm": 0.421875, "learning_rate": 0.0019911485209514303, "loss": 0.3193, "step": 5246 }, { "epoch": 0.009305137772954993, "grad_norm": 0.376953125, "learning_rate": 0.001991140192085049, "loss": 0.2265, "step": 5248 }, { "epoch": 0.009308683938264808, "grad_norm": 0.37109375, "learning_rate": 0.0019911318593213378, "loss": 0.2498, "step": 5250 }, { "epoch": 0.009312230103574624, "grad_norm": 0.6171875, "learning_rate": 0.0019911235226603343, "loss": 0.2617, "step": 5252 }, { "epoch": 0.009315776268884439, "grad_norm": 0.58984375, "learning_rate": 0.0019911151821020733, "loss": 0.3511, "step": 5254 }, { "epoch": 0.009319322434194253, "grad_norm": 0.55859375, "learning_rate": 0.001991106837646592, "loss": 0.3287, "step": 5256 }, { "epoch": 0.00932286859950407, "grad_norm": 0.279296875, "learning_rate": 0.0019910984892939276, "loss": 0.3323, "step": 5258 }, { "epoch": 0.009326414764813884, "grad_norm": 0.2294921875, "learning_rate": 0.0019910901370441157, "loss": 0.2309, "step": 5260 }, { "epoch": 0.009329960930123699, "grad_norm": 0.283203125, "learning_rate": 0.0019910817808971933, "loss": 0.3175, "step": 5262 }, { "epoch": 0.009333507095433515, "grad_norm": 0.8203125, "learning_rate": 0.001991073420853197, "loss": 0.2664, "step": 5264 }, { "epoch": 0.00933705326074333, "grad_norm": 0.3515625, "learning_rate": 0.0019910650569121627, "loss": 0.2869, "step": 5266 }, { "epoch": 0.009340599426053144, "grad_norm": 0.220703125, "learning_rate": 0.0019910566890741273, "loss": 0.2393, "step": 5268 }, { "epoch": 0.00934414559136296, "grad_norm": 0.267578125, "learning_rate": 0.001991048317339128, "loss": 0.2711, "step": 5270 }, { "epoch": 0.009347691756672775, "grad_norm": 0.671875, "learning_rate": 0.001991039941707201, "loss": 0.3396, "step": 5272 }, { "epoch": 0.00935123792198259, "grad_norm": 0.55859375, "learning_rate": 0.0019910315621783827, "loss": 0.2058, "step": 5274 }, { "epoch": 0.009354784087292406, "grad_norm": 1.0703125, "learning_rate": 0.00199102317875271, "loss": 0.396, "step": 5276 }, { "epoch": 0.00935833025260222, "grad_norm": 1.046875, "learning_rate": 0.001991014791430219, "loss": 0.2666, "step": 5278 }, { "epoch": 0.009361876417912036, "grad_norm": 0.57421875, "learning_rate": 0.0019910064002109473, "loss": 0.2908, "step": 5280 }, { "epoch": 0.009365422583221851, "grad_norm": 1.5234375, "learning_rate": 0.001990998005094931, "loss": 0.3964, "step": 5282 }, { "epoch": 0.009368968748531666, "grad_norm": 2.296875, "learning_rate": 0.0019909896060822073, "loss": 0.3696, "step": 5284 }, { "epoch": 0.009372514913841482, "grad_norm": 0.3671875, "learning_rate": 0.001990981203172812, "loss": 0.264, "step": 5286 }, { "epoch": 0.009376061079151296, "grad_norm": 0.3515625, "learning_rate": 0.001990972796366783, "loss": 0.2277, "step": 5288 }, { "epoch": 0.009379607244461111, "grad_norm": 0.59765625, "learning_rate": 0.0019909643856641564, "loss": 0.2843, "step": 5290 }, { "epoch": 0.009383153409770927, "grad_norm": 0.314453125, "learning_rate": 0.0019909559710649693, "loss": 0.2127, "step": 5292 }, { "epoch": 0.009386699575080742, "grad_norm": 0.4140625, "learning_rate": 0.0019909475525692576, "loss": 0.2601, "step": 5294 }, { "epoch": 0.009390245740390556, "grad_norm": 0.298828125, "learning_rate": 0.00199093913017706, "loss": 0.2017, "step": 5296 }, { "epoch": 0.009393791905700373, "grad_norm": 0.34375, "learning_rate": 0.0019909307038884112, "loss": 0.2771, "step": 5298 }, { "epoch": 0.009397338071010187, "grad_norm": 0.3671875, "learning_rate": 0.001990922273703349, "loss": 0.1868, "step": 5300 }, { "epoch": 0.009400884236320002, "grad_norm": 0.341796875, "learning_rate": 0.001990913839621911, "loss": 0.3177, "step": 5302 }, { "epoch": 0.009404430401629818, "grad_norm": 0.28125, "learning_rate": 0.0019909054016441327, "loss": 0.2739, "step": 5304 }, { "epoch": 0.009407976566939633, "grad_norm": 0.921875, "learning_rate": 0.001990896959770052, "loss": 0.2774, "step": 5306 }, { "epoch": 0.009411522732249447, "grad_norm": 2.9375, "learning_rate": 0.0019908885139997053, "loss": 0.3488, "step": 5308 }, { "epoch": 0.009415068897559263, "grad_norm": 6.625, "learning_rate": 0.00199088006433313, "loss": 0.5172, "step": 5310 }, { "epoch": 0.009418615062869078, "grad_norm": 0.419921875, "learning_rate": 0.0019908716107703626, "loss": 0.2794, "step": 5312 }, { "epoch": 0.009422161228178894, "grad_norm": 1.3203125, "learning_rate": 0.00199086315331144, "loss": 0.3641, "step": 5314 }, { "epoch": 0.009425707393488709, "grad_norm": 0.54296875, "learning_rate": 0.0019908546919564, "loss": 0.2582, "step": 5316 }, { "epoch": 0.009429253558798523, "grad_norm": 0.380859375, "learning_rate": 0.001990846226705279, "loss": 0.3349, "step": 5318 }, { "epoch": 0.00943279972410834, "grad_norm": 1.1640625, "learning_rate": 0.001990837757558114, "loss": 0.2865, "step": 5320 }, { "epoch": 0.009436345889418154, "grad_norm": 0.486328125, "learning_rate": 0.0019908292845149415, "loss": 0.2807, "step": 5322 }, { "epoch": 0.009439892054727969, "grad_norm": 0.7421875, "learning_rate": 0.0019908208075757996, "loss": 0.2805, "step": 5324 }, { "epoch": 0.009443438220037785, "grad_norm": 0.60546875, "learning_rate": 0.001990812326740725, "loss": 0.28, "step": 5326 }, { "epoch": 0.0094469843853476, "grad_norm": 4.875, "learning_rate": 0.001990803842009755, "loss": 0.3472, "step": 5328 }, { "epoch": 0.009450530550657414, "grad_norm": 0.31640625, "learning_rate": 0.001990795353382926, "loss": 0.2013, "step": 5330 }, { "epoch": 0.00945407671596723, "grad_norm": 0.37890625, "learning_rate": 0.0019907868608602755, "loss": 0.2834, "step": 5332 }, { "epoch": 0.009457622881277045, "grad_norm": 0.416015625, "learning_rate": 0.001990778364441841, "loss": 0.2338, "step": 5334 }, { "epoch": 0.00946116904658686, "grad_norm": 0.56640625, "learning_rate": 0.001990769864127659, "loss": 0.2797, "step": 5336 }, { "epoch": 0.009464715211896676, "grad_norm": 0.61328125, "learning_rate": 0.001990761359917767, "loss": 0.2832, "step": 5338 }, { "epoch": 0.00946826137720649, "grad_norm": 1.453125, "learning_rate": 0.001990752851812203, "loss": 0.3602, "step": 5340 }, { "epoch": 0.009471807542516305, "grad_norm": 0.59765625, "learning_rate": 0.0019907443398110027, "loss": 0.3086, "step": 5342 }, { "epoch": 0.009475353707826121, "grad_norm": 0.6953125, "learning_rate": 0.0019907358239142046, "loss": 0.2671, "step": 5344 }, { "epoch": 0.009478899873135936, "grad_norm": 0.4140625, "learning_rate": 0.001990727304121845, "loss": 0.2537, "step": 5346 }, { "epoch": 0.009482446038445752, "grad_norm": 1.84375, "learning_rate": 0.001990718780433962, "loss": 0.3107, "step": 5348 }, { "epoch": 0.009485992203755567, "grad_norm": 0.28515625, "learning_rate": 0.0019907102528505917, "loss": 0.2356, "step": 5350 }, { "epoch": 0.009489538369065381, "grad_norm": 0.71484375, "learning_rate": 0.0019907017213717727, "loss": 0.241, "step": 5352 }, { "epoch": 0.009493084534375197, "grad_norm": 0.9609375, "learning_rate": 0.0019906931859975416, "loss": 0.2512, "step": 5354 }, { "epoch": 0.009496630699685012, "grad_norm": 0.337890625, "learning_rate": 0.001990684646727936, "loss": 0.3035, "step": 5356 }, { "epoch": 0.009500176864994827, "grad_norm": 0.2421875, "learning_rate": 0.0019906761035629926, "loss": 0.2676, "step": 5358 }, { "epoch": 0.009503723030304643, "grad_norm": 0.28515625, "learning_rate": 0.00199066755650275, "loss": 0.2732, "step": 5360 }, { "epoch": 0.009507269195614457, "grad_norm": 0.890625, "learning_rate": 0.0019906590055472446, "loss": 0.3169, "step": 5362 }, { "epoch": 0.009510815360924272, "grad_norm": 0.259765625, "learning_rate": 0.001990650450696514, "loss": 0.2364, "step": 5364 }, { "epoch": 0.009514361526234088, "grad_norm": 1.6640625, "learning_rate": 0.001990641891950596, "loss": 0.2666, "step": 5366 }, { "epoch": 0.009517907691543903, "grad_norm": 2.109375, "learning_rate": 0.001990633329309527, "loss": 0.4231, "step": 5368 }, { "epoch": 0.009521453856853717, "grad_norm": 0.314453125, "learning_rate": 0.0019906247627733457, "loss": 0.2215, "step": 5370 }, { "epoch": 0.009525000022163534, "grad_norm": 6.75, "learning_rate": 0.001990616192342089, "loss": 0.4135, "step": 5372 }, { "epoch": 0.009528546187473348, "grad_norm": 0.55078125, "learning_rate": 0.0019906076180157945, "loss": 0.3483, "step": 5374 }, { "epoch": 0.009532092352783163, "grad_norm": 0.51171875, "learning_rate": 0.0019905990397944993, "loss": 0.3112, "step": 5376 }, { "epoch": 0.009535638518092979, "grad_norm": 0.67578125, "learning_rate": 0.001990590457678241, "loss": 0.3207, "step": 5378 }, { "epoch": 0.009539184683402794, "grad_norm": 0.80078125, "learning_rate": 0.001990581871667058, "loss": 0.2671, "step": 5380 }, { "epoch": 0.00954273084871261, "grad_norm": 0.45703125, "learning_rate": 0.0019905732817609868, "loss": 0.338, "step": 5382 }, { "epoch": 0.009546277014022424, "grad_norm": 0.5078125, "learning_rate": 0.0019905646879600654, "loss": 0.316, "step": 5384 }, { "epoch": 0.009549823179332239, "grad_norm": 0.7578125, "learning_rate": 0.0019905560902643317, "loss": 0.2966, "step": 5386 }, { "epoch": 0.009553369344642055, "grad_norm": 0.88671875, "learning_rate": 0.001990547488673823, "loss": 0.2656, "step": 5388 }, { "epoch": 0.00955691550995187, "grad_norm": 0.546875, "learning_rate": 0.001990538883188576, "loss": 0.2888, "step": 5390 }, { "epoch": 0.009560461675261684, "grad_norm": 0.498046875, "learning_rate": 0.00199053027380863, "loss": 0.3572, "step": 5392 }, { "epoch": 0.0095640078405715, "grad_norm": 1.515625, "learning_rate": 0.001990521660534022, "loss": 0.5049, "step": 5394 }, { "epoch": 0.009567554005881315, "grad_norm": 0.4140625, "learning_rate": 0.001990513043364789, "loss": 0.3315, "step": 5396 }, { "epoch": 0.00957110017119113, "grad_norm": 0.392578125, "learning_rate": 0.001990504422300969, "loss": 0.2469, "step": 5398 }, { "epoch": 0.009574646336500946, "grad_norm": 1.015625, "learning_rate": 0.0019904957973426005, "loss": 0.3216, "step": 5400 }, { "epoch": 0.00957819250181076, "grad_norm": 0.4921875, "learning_rate": 0.0019904871684897204, "loss": 0.3441, "step": 5402 }, { "epoch": 0.009581738667120575, "grad_norm": 0.373046875, "learning_rate": 0.001990478535742367, "loss": 0.3262, "step": 5404 }, { "epoch": 0.009585284832430391, "grad_norm": 0.55859375, "learning_rate": 0.0019904698991005778, "loss": 0.4011, "step": 5406 }, { "epoch": 0.009588830997740206, "grad_norm": 0.33984375, "learning_rate": 0.0019904612585643897, "loss": 0.289, "step": 5408 }, { "epoch": 0.00959237716305002, "grad_norm": 0.76171875, "learning_rate": 0.001990452614133842, "loss": 0.3158, "step": 5410 }, { "epoch": 0.009595923328359837, "grad_norm": 0.330078125, "learning_rate": 0.0019904439658089716, "loss": 0.5208, "step": 5412 }, { "epoch": 0.009599469493669651, "grad_norm": 0.50390625, "learning_rate": 0.0019904353135898165, "loss": 0.313, "step": 5414 }, { "epoch": 0.009603015658979468, "grad_norm": 0.515625, "learning_rate": 0.0019904266574764145, "loss": 0.2822, "step": 5416 }, { "epoch": 0.009606561824289282, "grad_norm": 2.859375, "learning_rate": 0.0019904179974688033, "loss": 0.4619, "step": 5418 }, { "epoch": 0.009610107989599097, "grad_norm": 0.48828125, "learning_rate": 0.0019904093335670215, "loss": 0.2557, "step": 5420 }, { "epoch": 0.009613654154908913, "grad_norm": 0.76953125, "learning_rate": 0.0019904006657711065, "loss": 0.3193, "step": 5422 }, { "epoch": 0.009617200320218728, "grad_norm": 0.53125, "learning_rate": 0.001990391994081096, "loss": 0.2368, "step": 5424 }, { "epoch": 0.009620746485528542, "grad_norm": 0.26953125, "learning_rate": 0.0019903833184970283, "loss": 0.2528, "step": 5426 }, { "epoch": 0.009624292650838358, "grad_norm": 0.431640625, "learning_rate": 0.0019903746390189407, "loss": 0.2841, "step": 5428 }, { "epoch": 0.009627838816148173, "grad_norm": 0.28125, "learning_rate": 0.0019903659556468715, "loss": 0.27, "step": 5430 }, { "epoch": 0.009631384981457988, "grad_norm": 1.7109375, "learning_rate": 0.0019903572683808595, "loss": 0.2637, "step": 5432 }, { "epoch": 0.009634931146767804, "grad_norm": 0.578125, "learning_rate": 0.001990348577220942, "loss": 0.2954, "step": 5434 }, { "epoch": 0.009638477312077618, "grad_norm": 0.302734375, "learning_rate": 0.0019903398821671564, "loss": 0.2343, "step": 5436 }, { "epoch": 0.009642023477387433, "grad_norm": 0.6328125, "learning_rate": 0.0019903311832195417, "loss": 0.27, "step": 5438 }, { "epoch": 0.00964556964269725, "grad_norm": 0.578125, "learning_rate": 0.0019903224803781354, "loss": 0.2144, "step": 5440 }, { "epoch": 0.009649115808007064, "grad_norm": 0.51171875, "learning_rate": 0.0019903137736429757, "loss": 0.2683, "step": 5442 }, { "epoch": 0.009652661973316878, "grad_norm": 0.75, "learning_rate": 0.001990305063014101, "loss": 0.2296, "step": 5444 }, { "epoch": 0.009656208138626695, "grad_norm": 0.46875, "learning_rate": 0.001990296348491549, "loss": 0.2922, "step": 5446 }, { "epoch": 0.00965975430393651, "grad_norm": 0.486328125, "learning_rate": 0.001990287630075357, "loss": 0.2725, "step": 5448 }, { "epoch": 0.009663300469246324, "grad_norm": 0.431640625, "learning_rate": 0.0019902789077655652, "loss": 0.2249, "step": 5450 }, { "epoch": 0.00966684663455614, "grad_norm": 0.546875, "learning_rate": 0.0019902701815622103, "loss": 0.3001, "step": 5452 }, { "epoch": 0.009670392799865955, "grad_norm": 0.51953125, "learning_rate": 0.001990261451465331, "loss": 0.2404, "step": 5454 }, { "epoch": 0.009673938965175771, "grad_norm": 1.71875, "learning_rate": 0.001990252717474965, "loss": 0.2583, "step": 5456 }, { "epoch": 0.009677485130485585, "grad_norm": 0.408203125, "learning_rate": 0.001990243979591151, "loss": 0.2287, "step": 5458 }, { "epoch": 0.0096810312957954, "grad_norm": 0.296875, "learning_rate": 0.001990235237813926, "loss": 0.2383, "step": 5460 }, { "epoch": 0.009684577461105216, "grad_norm": 1.71875, "learning_rate": 0.00199022649214333, "loss": 0.4918, "step": 5462 }, { "epoch": 0.00968812362641503, "grad_norm": 0.71875, "learning_rate": 0.0019902177425794, "loss": 0.2526, "step": 5464 }, { "epoch": 0.009691669791724845, "grad_norm": 0.2333984375, "learning_rate": 0.0019902089891221755, "loss": 0.2841, "step": 5466 }, { "epoch": 0.009695215957034662, "grad_norm": 0.36328125, "learning_rate": 0.001990200231771693, "loss": 0.3146, "step": 5468 }, { "epoch": 0.009698762122344476, "grad_norm": 0.69140625, "learning_rate": 0.001990191470527992, "loss": 0.2704, "step": 5470 }, { "epoch": 0.00970230828765429, "grad_norm": 0.9921875, "learning_rate": 0.001990182705391111, "loss": 0.345, "step": 5472 }, { "epoch": 0.009705854452964107, "grad_norm": 1.265625, "learning_rate": 0.0019901739363610877, "loss": 0.6601, "step": 5474 }, { "epoch": 0.009709400618273922, "grad_norm": 1.0390625, "learning_rate": 0.0019901651634379606, "loss": 0.2783, "step": 5476 }, { "epoch": 0.009712946783583736, "grad_norm": 1.0859375, "learning_rate": 0.0019901563866217683, "loss": 0.2675, "step": 5478 }, { "epoch": 0.009716492948893552, "grad_norm": 0.30078125, "learning_rate": 0.001990147605912549, "loss": 0.251, "step": 5480 }, { "epoch": 0.009720039114203367, "grad_norm": 0.58984375, "learning_rate": 0.001990138821310341, "loss": 0.2504, "step": 5482 }, { "epoch": 0.009723585279513182, "grad_norm": 2.15625, "learning_rate": 0.001990130032815183, "loss": 0.4118, "step": 5484 }, { "epoch": 0.009727131444822998, "grad_norm": 0.3984375, "learning_rate": 0.001990121240427113, "loss": 0.2073, "step": 5486 }, { "epoch": 0.009730677610132812, "grad_norm": 0.96484375, "learning_rate": 0.0019901124441461704, "loss": 0.6021, "step": 5488 }, { "epoch": 0.009734223775442629, "grad_norm": 0.7578125, "learning_rate": 0.0019901036439723923, "loss": 0.2819, "step": 5490 }, { "epoch": 0.009737769940752443, "grad_norm": 0.28125, "learning_rate": 0.0019900948399058185, "loss": 0.2678, "step": 5492 }, { "epoch": 0.009741316106062258, "grad_norm": 0.408203125, "learning_rate": 0.0019900860319464865, "loss": 0.2186, "step": 5494 }, { "epoch": 0.009744862271372074, "grad_norm": 0.359375, "learning_rate": 0.001990077220094435, "loss": 0.3387, "step": 5496 }, { "epoch": 0.009748408436681889, "grad_norm": 0.76171875, "learning_rate": 0.0019900684043497037, "loss": 0.2981, "step": 5498 }, { "epoch": 0.009751954601991703, "grad_norm": 0.2734375, "learning_rate": 0.001990059584712329, "loss": 0.2693, "step": 5500 }, { "epoch": 0.00975550076730152, "grad_norm": 0.283203125, "learning_rate": 0.0019900507611823517, "loss": 0.2463, "step": 5502 }, { "epoch": 0.009759046932611334, "grad_norm": 0.416015625, "learning_rate": 0.0019900419337598087, "loss": 0.2784, "step": 5504 }, { "epoch": 0.009762593097921149, "grad_norm": 0.283203125, "learning_rate": 0.0019900331024447393, "loss": 0.2042, "step": 5506 }, { "epoch": 0.009766139263230965, "grad_norm": 0.52734375, "learning_rate": 0.001990024267237183, "loss": 0.2527, "step": 5508 }, { "epoch": 0.00976968542854078, "grad_norm": 0.67578125, "learning_rate": 0.0019900154281371762, "loss": 0.2652, "step": 5510 }, { "epoch": 0.009773231593850594, "grad_norm": 0.29296875, "learning_rate": 0.00199000658514476, "loss": 0.3096, "step": 5512 }, { "epoch": 0.00977677775916041, "grad_norm": 0.859375, "learning_rate": 0.0019899977382599712, "loss": 0.2882, "step": 5514 }, { "epoch": 0.009780323924470225, "grad_norm": 0.248046875, "learning_rate": 0.0019899888874828496, "loss": 0.3353, "step": 5516 }, { "epoch": 0.00978387008978004, "grad_norm": 0.2578125, "learning_rate": 0.0019899800328134335, "loss": 0.2215, "step": 5518 }, { "epoch": 0.009787416255089856, "grad_norm": 0.314453125, "learning_rate": 0.0019899711742517616, "loss": 0.241, "step": 5520 }, { "epoch": 0.00979096242039967, "grad_norm": 0.890625, "learning_rate": 0.001989962311797873, "loss": 0.3184, "step": 5522 }, { "epoch": 0.009794508585709487, "grad_norm": 0.59765625, "learning_rate": 0.001989953445451806, "loss": 0.272, "step": 5524 }, { "epoch": 0.009798054751019301, "grad_norm": 0.69921875, "learning_rate": 0.0019899445752136, "loss": 0.1761, "step": 5526 }, { "epoch": 0.009801600916329116, "grad_norm": 1.71875, "learning_rate": 0.001989935701083293, "loss": 0.4969, "step": 5528 }, { "epoch": 0.009805147081638932, "grad_norm": 1.2890625, "learning_rate": 0.001989926823060924, "loss": 0.434, "step": 5530 }, { "epoch": 0.009808693246948746, "grad_norm": 0.306640625, "learning_rate": 0.0019899179411465326, "loss": 0.3315, "step": 5532 }, { "epoch": 0.009812239412258561, "grad_norm": 0.39453125, "learning_rate": 0.0019899090553401563, "loss": 0.451, "step": 5534 }, { "epoch": 0.009815785577568377, "grad_norm": 0.58984375, "learning_rate": 0.001989900165641835, "loss": 0.4802, "step": 5536 }, { "epoch": 0.009819331742878192, "grad_norm": 0.2578125, "learning_rate": 0.001989891272051608, "loss": 0.2513, "step": 5538 }, { "epoch": 0.009822877908188006, "grad_norm": 0.224609375, "learning_rate": 0.0019898823745695127, "loss": 0.2198, "step": 5540 }, { "epoch": 0.009826424073497823, "grad_norm": 5.65625, "learning_rate": 0.0019898734731955887, "loss": 0.3903, "step": 5542 }, { "epoch": 0.009829970238807637, "grad_norm": 0.435546875, "learning_rate": 0.0019898645679298755, "loss": 0.2811, "step": 5544 }, { "epoch": 0.009833516404117452, "grad_norm": 0.515625, "learning_rate": 0.001989855658772411, "loss": 0.258, "step": 5546 }, { "epoch": 0.009837062569427268, "grad_norm": 0.50390625, "learning_rate": 0.0019898467457232353, "loss": 0.2887, "step": 5548 }, { "epoch": 0.009840608734737083, "grad_norm": 1.1015625, "learning_rate": 0.0019898378287823864, "loss": 0.3219, "step": 5550 }, { "epoch": 0.009844154900046897, "grad_norm": 1.1015625, "learning_rate": 0.001989828907949904, "loss": 0.6607, "step": 5552 }, { "epoch": 0.009847701065356713, "grad_norm": 0.322265625, "learning_rate": 0.001989819983225827, "loss": 0.27, "step": 5554 }, { "epoch": 0.009851247230666528, "grad_norm": 0.34765625, "learning_rate": 0.001989811054610194, "loss": 0.2863, "step": 5556 }, { "epoch": 0.009854793395976344, "grad_norm": 0.82421875, "learning_rate": 0.0019898021221030444, "loss": 0.3375, "step": 5558 }, { "epoch": 0.009858339561286159, "grad_norm": 1.046875, "learning_rate": 0.0019897931857044172, "loss": 0.2429, "step": 5560 }, { "epoch": 0.009861885726595973, "grad_norm": 1.0703125, "learning_rate": 0.0019897842454143513, "loss": 0.3023, "step": 5562 }, { "epoch": 0.00986543189190579, "grad_norm": 0.6953125, "learning_rate": 0.001989775301232886, "loss": 0.284, "step": 5564 }, { "epoch": 0.009868978057215604, "grad_norm": 0.51171875, "learning_rate": 0.0019897663531600607, "loss": 0.3629, "step": 5566 }, { "epoch": 0.009872524222525419, "grad_norm": 0.703125, "learning_rate": 0.0019897574011959137, "loss": 0.2543, "step": 5568 }, { "epoch": 0.009876070387835235, "grad_norm": 0.33984375, "learning_rate": 0.001989748445340485, "loss": 0.2965, "step": 5570 }, { "epoch": 0.00987961655314505, "grad_norm": 0.49609375, "learning_rate": 0.0019897394855938133, "loss": 0.2777, "step": 5572 }, { "epoch": 0.009883162718454864, "grad_norm": 0.5546875, "learning_rate": 0.001989730521955938, "loss": 0.2765, "step": 5574 }, { "epoch": 0.00988670888376468, "grad_norm": 0.462890625, "learning_rate": 0.001989721554426898, "loss": 0.2722, "step": 5576 }, { "epoch": 0.009890255049074495, "grad_norm": 0.365234375, "learning_rate": 0.001989712583006733, "loss": 0.2657, "step": 5578 }, { "epoch": 0.00989380121438431, "grad_norm": 0.6328125, "learning_rate": 0.001989703607695482, "loss": 0.2019, "step": 5580 }, { "epoch": 0.009897347379694126, "grad_norm": 0.43359375, "learning_rate": 0.001989694628493184, "loss": 0.3284, "step": 5582 }, { "epoch": 0.00990089354500394, "grad_norm": 0.419921875, "learning_rate": 0.0019896856453998782, "loss": 0.2827, "step": 5584 }, { "epoch": 0.009904439710313755, "grad_norm": 0.73046875, "learning_rate": 0.0019896766584156047, "loss": 0.3349, "step": 5586 }, { "epoch": 0.009907985875623571, "grad_norm": 0.453125, "learning_rate": 0.001989667667540402, "loss": 0.2096, "step": 5588 }, { "epoch": 0.009911532040933386, "grad_norm": 0.99609375, "learning_rate": 0.00198965867277431, "loss": 0.4079, "step": 5590 }, { "epoch": 0.009915078206243202, "grad_norm": 2.984375, "learning_rate": 0.0019896496741173674, "loss": 0.2277, "step": 5592 }, { "epoch": 0.009918624371553017, "grad_norm": 0.70703125, "learning_rate": 0.001989640671569614, "loss": 0.3198, "step": 5594 }, { "epoch": 0.009922170536862831, "grad_norm": 1.5625, "learning_rate": 0.0019896316651310895, "loss": 0.3016, "step": 5596 }, { "epoch": 0.009925716702172648, "grad_norm": 0.294921875, "learning_rate": 0.0019896226548018325, "loss": 0.2796, "step": 5598 }, { "epoch": 0.009929262867482462, "grad_norm": 0.58203125, "learning_rate": 0.0019896136405818826, "loss": 0.2614, "step": 5600 }, { "epoch": 0.009932809032792277, "grad_norm": 0.248046875, "learning_rate": 0.0019896046224712792, "loss": 0.2003, "step": 5602 }, { "epoch": 0.009936355198102093, "grad_norm": 3.09375, "learning_rate": 0.0019895956004700624, "loss": 0.3767, "step": 5604 }, { "epoch": 0.009939901363411907, "grad_norm": 0.416015625, "learning_rate": 0.001989586574578271, "loss": 0.2424, "step": 5606 }, { "epoch": 0.009943447528721722, "grad_norm": 3.9375, "learning_rate": 0.0019895775447959447, "loss": 0.2097, "step": 5608 }, { "epoch": 0.009946993694031538, "grad_norm": 0.546875, "learning_rate": 0.001989568511123123, "loss": 0.3036, "step": 5610 }, { "epoch": 0.009950539859341353, "grad_norm": 1.8125, "learning_rate": 0.001989559473559845, "loss": 0.2509, "step": 5612 }, { "epoch": 0.009954086024651167, "grad_norm": 0.64453125, "learning_rate": 0.0019895504321061513, "loss": 0.224, "step": 5614 }, { "epoch": 0.009957632189960984, "grad_norm": 2.53125, "learning_rate": 0.0019895413867620803, "loss": 0.371, "step": 5616 }, { "epoch": 0.009961178355270798, "grad_norm": 0.5, "learning_rate": 0.0019895323375276716, "loss": 0.2906, "step": 5618 }, { "epoch": 0.009964724520580613, "grad_norm": 0.30078125, "learning_rate": 0.001989523284402966, "loss": 0.2248, "step": 5620 }, { "epoch": 0.009968270685890429, "grad_norm": 0.40625, "learning_rate": 0.0019895142273880016, "loss": 0.2878, "step": 5622 }, { "epoch": 0.009971816851200244, "grad_norm": 0.482421875, "learning_rate": 0.0019895051664828188, "loss": 0.2276, "step": 5624 }, { "epoch": 0.00997536301651006, "grad_norm": 0.8125, "learning_rate": 0.0019894961016874574, "loss": 0.3383, "step": 5626 }, { "epoch": 0.009978909181819874, "grad_norm": 0.90234375, "learning_rate": 0.0019894870330019565, "loss": 0.2168, "step": 5628 }, { "epoch": 0.009982455347129689, "grad_norm": 0.6484375, "learning_rate": 0.0019894779604263555, "loss": 0.23, "step": 5630 }, { "epoch": 0.009986001512439505, "grad_norm": 0.359375, "learning_rate": 0.0019894688839606953, "loss": 0.2882, "step": 5632 }, { "epoch": 0.00998954767774932, "grad_norm": 0.55078125, "learning_rate": 0.0019894598036050144, "loss": 0.3022, "step": 5634 }, { "epoch": 0.009993093843059134, "grad_norm": 0.388671875, "learning_rate": 0.0019894507193593536, "loss": 0.2219, "step": 5636 }, { "epoch": 0.00999664000836895, "grad_norm": 0.6875, "learning_rate": 0.0019894416312237514, "loss": 0.2551, "step": 5638 }, { "epoch": 0.010000186173678765, "grad_norm": 0.296875, "learning_rate": 0.001989432539198248, "loss": 0.5288, "step": 5640 }, { "epoch": 0.01000373233898858, "grad_norm": 1.6953125, "learning_rate": 0.001989423443282884, "loss": 0.4341, "step": 5642 }, { "epoch": 0.010007278504298396, "grad_norm": 0.875, "learning_rate": 0.001989414343477698, "loss": 0.275, "step": 5644 }, { "epoch": 0.01001082466960821, "grad_norm": 0.365234375, "learning_rate": 0.00198940523978273, "loss": 0.2078, "step": 5646 }, { "epoch": 0.010014370834918025, "grad_norm": 0.458984375, "learning_rate": 0.001989396132198021, "loss": 0.4167, "step": 5648 }, { "epoch": 0.010017917000227841, "grad_norm": 0.2265625, "learning_rate": 0.0019893870207236095, "loss": 0.2656, "step": 5650 }, { "epoch": 0.010021463165537656, "grad_norm": 2.546875, "learning_rate": 0.0019893779053595357, "loss": 0.3801, "step": 5652 }, { "epoch": 0.01002500933084747, "grad_norm": 0.408203125, "learning_rate": 0.0019893687861058398, "loss": 0.2986, "step": 5654 }, { "epoch": 0.010028555496157287, "grad_norm": 0.419921875, "learning_rate": 0.001989359662962561, "loss": 0.2402, "step": 5656 }, { "epoch": 0.010032101661467101, "grad_norm": 0.4140625, "learning_rate": 0.00198935053592974, "loss": 0.2696, "step": 5658 }, { "epoch": 0.010035647826776918, "grad_norm": 1.1484375, "learning_rate": 0.001989341405007416, "loss": 0.1741, "step": 5660 }, { "epoch": 0.010039193992086732, "grad_norm": 0.59765625, "learning_rate": 0.00198933227019563, "loss": 0.2502, "step": 5662 }, { "epoch": 0.010042740157396547, "grad_norm": 0.3671875, "learning_rate": 0.0019893231314944207, "loss": 0.3084, "step": 5664 }, { "epoch": 0.010046286322706363, "grad_norm": 0.78515625, "learning_rate": 0.0019893139889038285, "loss": 0.4311, "step": 5666 }, { "epoch": 0.010049832488016178, "grad_norm": 0.4609375, "learning_rate": 0.0019893048424238936, "loss": 0.2815, "step": 5668 }, { "epoch": 0.010053378653325992, "grad_norm": 1.609375, "learning_rate": 0.0019892956920546565, "loss": 0.3271, "step": 5670 }, { "epoch": 0.010056924818635809, "grad_norm": 1.2734375, "learning_rate": 0.001989286537796156, "loss": 0.2578, "step": 5672 }, { "epoch": 0.010060470983945623, "grad_norm": 0.52734375, "learning_rate": 0.0019892773796484323, "loss": 0.2424, "step": 5674 }, { "epoch": 0.010064017149255438, "grad_norm": 0.3828125, "learning_rate": 0.0019892682176115267, "loss": 0.2775, "step": 5676 }, { "epoch": 0.010067563314565254, "grad_norm": 0.3984375, "learning_rate": 0.001989259051685478, "loss": 0.3256, "step": 5678 }, { "epoch": 0.010071109479875068, "grad_norm": 0.578125, "learning_rate": 0.001989249881870327, "loss": 0.3471, "step": 5680 }, { "epoch": 0.010074655645184883, "grad_norm": 0.51171875, "learning_rate": 0.0019892407081661136, "loss": 0.2242, "step": 5682 }, { "epoch": 0.0100782018104947, "grad_norm": 0.42578125, "learning_rate": 0.0019892315305728775, "loss": 0.2751, "step": 5684 }, { "epoch": 0.010081747975804514, "grad_norm": 0.51171875, "learning_rate": 0.0019892223490906597, "loss": 0.3133, "step": 5686 }, { "epoch": 0.010085294141114328, "grad_norm": 1.7265625, "learning_rate": 0.0019892131637195, "loss": 0.2903, "step": 5688 }, { "epoch": 0.010088840306424145, "grad_norm": 0.2314453125, "learning_rate": 0.0019892039744594378, "loss": 0.2409, "step": 5690 }, { "epoch": 0.01009238647173396, "grad_norm": 0.435546875, "learning_rate": 0.001989194781310514, "loss": 0.2348, "step": 5692 }, { "epoch": 0.010095932637043776, "grad_norm": 0.490234375, "learning_rate": 0.0019891855842727687, "loss": 0.2984, "step": 5694 }, { "epoch": 0.01009947880235359, "grad_norm": 1.25, "learning_rate": 0.001989176383346243, "loss": 0.3989, "step": 5696 }, { "epoch": 0.010103024967663405, "grad_norm": 0.640625, "learning_rate": 0.0019891671785309756, "loss": 0.289, "step": 5698 }, { "epoch": 0.010106571132973221, "grad_norm": 1.203125, "learning_rate": 0.0019891579698270074, "loss": 0.4843, "step": 5700 }, { "epoch": 0.010110117298283035, "grad_norm": 1.0390625, "learning_rate": 0.0019891487572343785, "loss": 0.4377, "step": 5702 }, { "epoch": 0.01011366346359285, "grad_norm": 0.380859375, "learning_rate": 0.00198913954075313, "loss": 0.2682, "step": 5704 }, { "epoch": 0.010117209628902666, "grad_norm": 0.69921875, "learning_rate": 0.0019891303203833015, "loss": 0.2555, "step": 5706 }, { "epoch": 0.010120755794212481, "grad_norm": 0.494140625, "learning_rate": 0.001989121096124933, "loss": 0.2466, "step": 5708 }, { "epoch": 0.010124301959522295, "grad_norm": 1.1875, "learning_rate": 0.0019891118679780657, "loss": 0.4772, "step": 5710 }, { "epoch": 0.010127848124832112, "grad_norm": 0.2333984375, "learning_rate": 0.0019891026359427394, "loss": 0.2197, "step": 5712 }, { "epoch": 0.010131394290141926, "grad_norm": 2.859375, "learning_rate": 0.001989093400018995, "loss": 0.3882, "step": 5714 }, { "epoch": 0.01013494045545174, "grad_norm": 0.40234375, "learning_rate": 0.0019890841602068724, "loss": 0.2922, "step": 5716 }, { "epoch": 0.010138486620761557, "grad_norm": 0.66015625, "learning_rate": 0.0019890749165064115, "loss": 0.311, "step": 5718 }, { "epoch": 0.010142032786071372, "grad_norm": 0.2470703125, "learning_rate": 0.0019890656689176543, "loss": 0.2461, "step": 5720 }, { "epoch": 0.010145578951381186, "grad_norm": 0.953125, "learning_rate": 0.0019890564174406397, "loss": 0.2785, "step": 5722 }, { "epoch": 0.010149125116691003, "grad_norm": 0.7578125, "learning_rate": 0.0019890471620754085, "loss": 0.2713, "step": 5724 }, { "epoch": 0.010152671282000817, "grad_norm": 0.85546875, "learning_rate": 0.001989037902822002, "loss": 0.3803, "step": 5726 }, { "epoch": 0.010156217447310633, "grad_norm": 0.4375, "learning_rate": 0.00198902863968046, "loss": 0.2137, "step": 5728 }, { "epoch": 0.010159763612620448, "grad_norm": 0.91015625, "learning_rate": 0.0019890193726508224, "loss": 0.3113, "step": 5730 }, { "epoch": 0.010163309777930262, "grad_norm": 0.275390625, "learning_rate": 0.0019890101017331313, "loss": 0.2358, "step": 5732 }, { "epoch": 0.010166855943240079, "grad_norm": 0.298828125, "learning_rate": 0.0019890008269274266, "loss": 0.2295, "step": 5734 }, { "epoch": 0.010170402108549893, "grad_norm": 0.74609375, "learning_rate": 0.001988991548233748, "loss": 0.2922, "step": 5736 }, { "epoch": 0.010173948273859708, "grad_norm": 0.68359375, "learning_rate": 0.0019889822656521373, "loss": 0.2203, "step": 5738 }, { "epoch": 0.010177494439169524, "grad_norm": 0.57421875, "learning_rate": 0.001988972979182634, "loss": 0.2339, "step": 5740 }, { "epoch": 0.010181040604479339, "grad_norm": 1.453125, "learning_rate": 0.0019889636888252796, "loss": 0.2815, "step": 5742 }, { "epoch": 0.010184586769789153, "grad_norm": 0.33984375, "learning_rate": 0.0019889543945801145, "loss": 0.321, "step": 5744 }, { "epoch": 0.01018813293509897, "grad_norm": 3.9375, "learning_rate": 0.001988945096447179, "loss": 0.2809, "step": 5746 }, { "epoch": 0.010191679100408784, "grad_norm": 2.734375, "learning_rate": 0.0019889357944265144, "loss": 0.4626, "step": 5748 }, { "epoch": 0.010195225265718599, "grad_norm": 0.73828125, "learning_rate": 0.0019889264885181606, "loss": 0.2143, "step": 5750 }, { "epoch": 0.010198771431028415, "grad_norm": 0.44140625, "learning_rate": 0.001988917178722159, "loss": 0.1887, "step": 5752 }, { "epoch": 0.01020231759633823, "grad_norm": 0.34375, "learning_rate": 0.0019889078650385497, "loss": 0.3456, "step": 5754 }, { "epoch": 0.010205863761648044, "grad_norm": 0.408203125, "learning_rate": 0.0019888985474673737, "loss": 0.288, "step": 5756 }, { "epoch": 0.01020940992695786, "grad_norm": 0.640625, "learning_rate": 0.001988889226008672, "loss": 0.2324, "step": 5758 }, { "epoch": 0.010212956092267675, "grad_norm": 0.9453125, "learning_rate": 0.001988879900662485, "loss": 0.2174, "step": 5760 }, { "epoch": 0.010216502257577491, "grad_norm": 0.2490234375, "learning_rate": 0.0019888705714288537, "loss": 0.2459, "step": 5762 }, { "epoch": 0.010220048422887306, "grad_norm": 0.380859375, "learning_rate": 0.001988861238307819, "loss": 0.2782, "step": 5764 }, { "epoch": 0.01022359458819712, "grad_norm": 0.5, "learning_rate": 0.001988851901299421, "loss": 0.2213, "step": 5766 }, { "epoch": 0.010227140753506937, "grad_norm": 0.75, "learning_rate": 0.001988842560403702, "loss": 0.2552, "step": 5768 }, { "epoch": 0.010230686918816751, "grad_norm": 0.3984375, "learning_rate": 0.0019888332156207016, "loss": 0.2348, "step": 5770 }, { "epoch": 0.010234233084126566, "grad_norm": 0.361328125, "learning_rate": 0.0019888238669504604, "loss": 0.2418, "step": 5772 }, { "epoch": 0.010237779249436382, "grad_norm": 0.46875, "learning_rate": 0.0019888145143930206, "loss": 0.2455, "step": 5774 }, { "epoch": 0.010241325414746196, "grad_norm": 0.384765625, "learning_rate": 0.001988805157948422, "loss": 0.2102, "step": 5776 }, { "epoch": 0.010244871580056011, "grad_norm": 0.45703125, "learning_rate": 0.001988795797616706, "loss": 0.2653, "step": 5778 }, { "epoch": 0.010248417745365827, "grad_norm": 0.3671875, "learning_rate": 0.0019887864333979137, "loss": 0.2891, "step": 5780 }, { "epoch": 0.010251963910675642, "grad_norm": 0.416015625, "learning_rate": 0.0019887770652920857, "loss": 0.2714, "step": 5782 }, { "epoch": 0.010255510075985456, "grad_norm": 0.302734375, "learning_rate": 0.001988767693299263, "loss": 0.2319, "step": 5784 }, { "epoch": 0.010259056241295273, "grad_norm": 1.359375, "learning_rate": 0.0019887583174194867, "loss": 0.2893, "step": 5786 }, { "epoch": 0.010262602406605087, "grad_norm": 0.2890625, "learning_rate": 0.0019887489376527977, "loss": 0.5093, "step": 5788 }, { "epoch": 0.010266148571914902, "grad_norm": 0.22265625, "learning_rate": 0.001988739553999237, "loss": 0.2232, "step": 5790 }, { "epoch": 0.010269694737224718, "grad_norm": 0.62109375, "learning_rate": 0.001988730166458846, "loss": 0.2264, "step": 5792 }, { "epoch": 0.010273240902534533, "grad_norm": 1.2578125, "learning_rate": 0.0019887207750316654, "loss": 0.374, "step": 5794 }, { "epoch": 0.010276787067844349, "grad_norm": 1.2421875, "learning_rate": 0.001988711379717736, "loss": 0.4457, "step": 5796 }, { "epoch": 0.010280333233154164, "grad_norm": 0.78125, "learning_rate": 0.0019887019805170996, "loss": 0.2772, "step": 5798 }, { "epoch": 0.010283879398463978, "grad_norm": 0.53125, "learning_rate": 0.001988692577429797, "loss": 0.3339, "step": 5800 }, { "epoch": 0.010287425563773794, "grad_norm": 2.703125, "learning_rate": 0.0019886831704558692, "loss": 0.2457, "step": 5802 }, { "epoch": 0.010290971729083609, "grad_norm": 0.546875, "learning_rate": 0.001988673759595358, "loss": 0.2628, "step": 5804 }, { "epoch": 0.010294517894393423, "grad_norm": 0.388671875, "learning_rate": 0.001988664344848303, "loss": 0.256, "step": 5806 }, { "epoch": 0.01029806405970324, "grad_norm": 1.0546875, "learning_rate": 0.0019886549262147467, "loss": 0.3069, "step": 5808 }, { "epoch": 0.010301610225013054, "grad_norm": 0.515625, "learning_rate": 0.0019886455036947303, "loss": 0.3723, "step": 5810 }, { "epoch": 0.010305156390322869, "grad_norm": 0.81640625, "learning_rate": 0.001988636077288294, "loss": 0.3231, "step": 5812 }, { "epoch": 0.010308702555632685, "grad_norm": 0.42578125, "learning_rate": 0.0019886266469954805, "loss": 0.2946, "step": 5814 }, { "epoch": 0.0103122487209425, "grad_norm": 0.486328125, "learning_rate": 0.0019886172128163295, "loss": 0.2659, "step": 5816 }, { "epoch": 0.010315794886252314, "grad_norm": 0.51953125, "learning_rate": 0.001988607774750883, "loss": 0.2917, "step": 5818 }, { "epoch": 0.01031934105156213, "grad_norm": 0.197265625, "learning_rate": 0.0019885983327991826, "loss": 0.2409, "step": 5820 }, { "epoch": 0.010322887216871945, "grad_norm": 0.5234375, "learning_rate": 0.001988588886961269, "loss": 0.2564, "step": 5822 }, { "epoch": 0.01032643338218176, "grad_norm": 0.3515625, "learning_rate": 0.0019885794372371843, "loss": 0.2172, "step": 5824 }, { "epoch": 0.010329979547491576, "grad_norm": 0.88671875, "learning_rate": 0.001988569983626969, "loss": 0.557, "step": 5826 }, { "epoch": 0.01033352571280139, "grad_norm": 0.87109375, "learning_rate": 0.0019885605261306645, "loss": 0.3127, "step": 5828 }, { "epoch": 0.010337071878111207, "grad_norm": 0.515625, "learning_rate": 0.0019885510647483125, "loss": 0.2379, "step": 5830 }, { "epoch": 0.010340618043421021, "grad_norm": 3.0625, "learning_rate": 0.0019885415994799543, "loss": 0.304, "step": 5832 }, { "epoch": 0.010344164208730836, "grad_norm": 1.171875, "learning_rate": 0.0019885321303256312, "loss": 0.344, "step": 5834 }, { "epoch": 0.010347710374040652, "grad_norm": 1.125, "learning_rate": 0.001988522657285385, "loss": 0.5607, "step": 5836 }, { "epoch": 0.010351256539350467, "grad_norm": 1.4140625, "learning_rate": 0.0019885131803592565, "loss": 0.37, "step": 5838 }, { "epoch": 0.010354802704660281, "grad_norm": 0.33203125, "learning_rate": 0.0019885036995472877, "loss": 0.2129, "step": 5840 }, { "epoch": 0.010358348869970098, "grad_norm": 0.3125, "learning_rate": 0.0019884942148495196, "loss": 0.2877, "step": 5842 }, { "epoch": 0.010361895035279912, "grad_norm": 1.25, "learning_rate": 0.001988484726265994, "loss": 0.3503, "step": 5844 }, { "epoch": 0.010365441200589727, "grad_norm": 0.7265625, "learning_rate": 0.001988475233796752, "loss": 0.3453, "step": 5846 }, { "epoch": 0.010368987365899543, "grad_norm": 0.66796875, "learning_rate": 0.001988465737441836, "loss": 0.3034, "step": 5848 }, { "epoch": 0.010372533531209358, "grad_norm": 0.48828125, "learning_rate": 0.001988456237201287, "loss": 0.2343, "step": 5850 }, { "epoch": 0.010376079696519172, "grad_norm": 0.64453125, "learning_rate": 0.0019884467330751458, "loss": 0.3782, "step": 5852 }, { "epoch": 0.010379625861828988, "grad_norm": 0.326171875, "learning_rate": 0.0019884372250634544, "loss": 0.2747, "step": 5854 }, { "epoch": 0.010383172027138803, "grad_norm": 0.310546875, "learning_rate": 0.0019884277131662553, "loss": 0.2435, "step": 5856 }, { "epoch": 0.010386718192448617, "grad_norm": 0.82421875, "learning_rate": 0.0019884181973835896, "loss": 0.414, "step": 5858 }, { "epoch": 0.010390264357758434, "grad_norm": 0.326171875, "learning_rate": 0.0019884086777154984, "loss": 0.2359, "step": 5860 }, { "epoch": 0.010393810523068248, "grad_norm": 0.41796875, "learning_rate": 0.001988399154162024, "loss": 0.2979, "step": 5862 }, { "epoch": 0.010397356688378065, "grad_norm": 0.62890625, "learning_rate": 0.0019883896267232077, "loss": 0.2331, "step": 5864 }, { "epoch": 0.010400902853687879, "grad_norm": 1.1640625, "learning_rate": 0.001988380095399091, "loss": 0.3314, "step": 5866 }, { "epoch": 0.010404449018997694, "grad_norm": 0.5546875, "learning_rate": 0.001988370560189716, "loss": 0.2767, "step": 5868 }, { "epoch": 0.01040799518430751, "grad_norm": 0.357421875, "learning_rate": 0.0019883610210951236, "loss": 0.2005, "step": 5870 }, { "epoch": 0.010411541349617325, "grad_norm": 0.3203125, "learning_rate": 0.001988351478115357, "loss": 0.2644, "step": 5872 }, { "epoch": 0.010415087514927139, "grad_norm": 1.1796875, "learning_rate": 0.0019883419312504563, "loss": 0.3104, "step": 5874 }, { "epoch": 0.010418633680236955, "grad_norm": 0.375, "learning_rate": 0.0019883323805004647, "loss": 0.2735, "step": 5876 }, { "epoch": 0.01042217984554677, "grad_norm": 0.25, "learning_rate": 0.0019883228258654228, "loss": 0.2543, "step": 5878 }, { "epoch": 0.010425726010856584, "grad_norm": 0.578125, "learning_rate": 0.0019883132673453726, "loss": 0.2897, "step": 5880 }, { "epoch": 0.0104292721761664, "grad_norm": 0.65625, "learning_rate": 0.001988303704940357, "loss": 0.2792, "step": 5882 }, { "epoch": 0.010432818341476215, "grad_norm": 0.80078125, "learning_rate": 0.0019882941386504165, "loss": 0.3873, "step": 5884 }, { "epoch": 0.01043636450678603, "grad_norm": 0.66015625, "learning_rate": 0.0019882845684755933, "loss": 0.2324, "step": 5886 }, { "epoch": 0.010439910672095846, "grad_norm": 1.203125, "learning_rate": 0.0019882749944159293, "loss": 0.3238, "step": 5888 }, { "epoch": 0.01044345683740566, "grad_norm": 0.59375, "learning_rate": 0.001988265416471467, "loss": 0.2852, "step": 5890 }, { "epoch": 0.010447003002715475, "grad_norm": 5.4375, "learning_rate": 0.001988255834642247, "loss": 0.3307, "step": 5892 }, { "epoch": 0.010450549168025292, "grad_norm": 2.125, "learning_rate": 0.001988246248928313, "loss": 0.2993, "step": 5894 }, { "epoch": 0.010454095333335106, "grad_norm": 0.53515625, "learning_rate": 0.001988236659329705, "loss": 0.2431, "step": 5896 }, { "epoch": 0.010457641498644922, "grad_norm": 0.8046875, "learning_rate": 0.001988227065846466, "loss": 0.3667, "step": 5898 }, { "epoch": 0.010461187663954737, "grad_norm": 0.69921875, "learning_rate": 0.0019882174684786374, "loss": 0.3712, "step": 5900 }, { "epoch": 0.010464733829264551, "grad_norm": 3.578125, "learning_rate": 0.001988207867226262, "loss": 0.277, "step": 5902 }, { "epoch": 0.010468279994574368, "grad_norm": 0.5859375, "learning_rate": 0.001988198262089381, "loss": 0.3025, "step": 5904 }, { "epoch": 0.010471826159884182, "grad_norm": 0.796875, "learning_rate": 0.0019881886530680373, "loss": 0.2458, "step": 5906 }, { "epoch": 0.010475372325193997, "grad_norm": 0.326171875, "learning_rate": 0.001988179040162272, "loss": 0.3324, "step": 5908 }, { "epoch": 0.010478918490503813, "grad_norm": 0.80859375, "learning_rate": 0.0019881694233721274, "loss": 0.2186, "step": 5910 }, { "epoch": 0.010482464655813628, "grad_norm": 0.38671875, "learning_rate": 0.0019881598026976455, "loss": 0.1934, "step": 5912 }, { "epoch": 0.010486010821123442, "grad_norm": 0.3125, "learning_rate": 0.001988150178138869, "loss": 0.2527, "step": 5914 }, { "epoch": 0.010489556986433259, "grad_norm": 1.0546875, "learning_rate": 0.001988140549695839, "loss": 0.4183, "step": 5916 }, { "epoch": 0.010493103151743073, "grad_norm": 0.546875, "learning_rate": 0.0019881309173685985, "loss": 0.2469, "step": 5918 }, { "epoch": 0.010496649317052888, "grad_norm": 0.443359375, "learning_rate": 0.0019881212811571894, "loss": 0.3144, "step": 5920 }, { "epoch": 0.010500195482362704, "grad_norm": 0.515625, "learning_rate": 0.0019881116410616533, "loss": 0.2961, "step": 5922 }, { "epoch": 0.010503741647672519, "grad_norm": 0.453125, "learning_rate": 0.0019881019970820328, "loss": 0.2274, "step": 5924 }, { "epoch": 0.010507287812982333, "grad_norm": 1.1796875, "learning_rate": 0.0019880923492183703, "loss": 0.2423, "step": 5926 }, { "epoch": 0.01051083397829215, "grad_norm": 0.66796875, "learning_rate": 0.0019880826974707074, "loss": 0.3151, "step": 5928 }, { "epoch": 0.010514380143601964, "grad_norm": 0.55078125, "learning_rate": 0.001988073041839087, "loss": 0.2301, "step": 5930 }, { "epoch": 0.01051792630891178, "grad_norm": 0.328125, "learning_rate": 0.001988063382323551, "loss": 0.2753, "step": 5932 }, { "epoch": 0.010521472474221595, "grad_norm": 2.671875, "learning_rate": 0.001988053718924141, "loss": 0.4232, "step": 5934 }, { "epoch": 0.01052501863953141, "grad_norm": 3.234375, "learning_rate": 0.0019880440516409003, "loss": 0.3246, "step": 5936 }, { "epoch": 0.010528564804841226, "grad_norm": 0.361328125, "learning_rate": 0.001988034380473871, "loss": 0.2806, "step": 5938 }, { "epoch": 0.01053211097015104, "grad_norm": 0.79296875, "learning_rate": 0.0019880247054230946, "loss": 0.2824, "step": 5940 }, { "epoch": 0.010535657135460855, "grad_norm": 0.453125, "learning_rate": 0.0019880150264886143, "loss": 0.2641, "step": 5942 }, { "epoch": 0.010539203300770671, "grad_norm": 0.390625, "learning_rate": 0.0019880053436704724, "loss": 0.2486, "step": 5944 }, { "epoch": 0.010542749466080486, "grad_norm": 0.5546875, "learning_rate": 0.00198799565696871, "loss": 0.2487, "step": 5946 }, { "epoch": 0.0105462956313903, "grad_norm": 0.640625, "learning_rate": 0.0019879859663833716, "loss": 0.5694, "step": 5948 }, { "epoch": 0.010549841796700116, "grad_norm": 0.74609375, "learning_rate": 0.0019879762719144977, "loss": 0.3262, "step": 5950 }, { "epoch": 0.010553387962009931, "grad_norm": 0.322265625, "learning_rate": 0.0019879665735621312, "loss": 0.2522, "step": 5952 }, { "epoch": 0.010556934127319745, "grad_norm": 0.421875, "learning_rate": 0.0019879568713263153, "loss": 0.2822, "step": 5954 }, { "epoch": 0.010560480292629562, "grad_norm": 0.61328125, "learning_rate": 0.0019879471652070914, "loss": 0.2267, "step": 5956 }, { "epoch": 0.010564026457939376, "grad_norm": 1.578125, "learning_rate": 0.0019879374552045025, "loss": 0.339, "step": 5958 }, { "epoch": 0.010567572623249191, "grad_norm": 0.7890625, "learning_rate": 0.0019879277413185907, "loss": 0.2751, "step": 5960 }, { "epoch": 0.010571118788559007, "grad_norm": 0.96484375, "learning_rate": 0.0019879180235493994, "loss": 0.4019, "step": 5962 }, { "epoch": 0.010574664953868822, "grad_norm": 0.54296875, "learning_rate": 0.0019879083018969697, "loss": 0.2823, "step": 5964 }, { "epoch": 0.010578211119178638, "grad_norm": 0.8828125, "learning_rate": 0.001987898576361345, "loss": 0.2677, "step": 5966 }, { "epoch": 0.010581757284488453, "grad_norm": 0.35546875, "learning_rate": 0.001987888846942568, "loss": 0.246, "step": 5968 }, { "epoch": 0.010585303449798267, "grad_norm": 0.306640625, "learning_rate": 0.0019878791136406808, "loss": 0.2474, "step": 5970 }, { "epoch": 0.010588849615108083, "grad_norm": 0.2890625, "learning_rate": 0.0019878693764557257, "loss": 0.2399, "step": 5972 }, { "epoch": 0.010592395780417898, "grad_norm": 0.51171875, "learning_rate": 0.001987859635387746, "loss": 0.2762, "step": 5974 }, { "epoch": 0.010595941945727713, "grad_norm": 0.54296875, "learning_rate": 0.0019878498904367845, "loss": 0.2992, "step": 5976 }, { "epoch": 0.010599488111037529, "grad_norm": 1.140625, "learning_rate": 0.0019878401416028825, "loss": 0.2313, "step": 5978 }, { "epoch": 0.010603034276347343, "grad_norm": 1.2109375, "learning_rate": 0.001987830388886084, "loss": 0.5073, "step": 5980 }, { "epoch": 0.010606580441657158, "grad_norm": 0.400390625, "learning_rate": 0.0019878206322864306, "loss": 0.2226, "step": 5982 }, { "epoch": 0.010610126606966974, "grad_norm": 0.87109375, "learning_rate": 0.001987810871803966, "loss": 0.3639, "step": 5984 }, { "epoch": 0.010613672772276789, "grad_norm": 0.51953125, "learning_rate": 0.001987801107438732, "loss": 0.2666, "step": 5986 }, { "epoch": 0.010617218937586603, "grad_norm": 0.32421875, "learning_rate": 0.0019877913391907714, "loss": 0.2591, "step": 5988 }, { "epoch": 0.01062076510289642, "grad_norm": 4.6875, "learning_rate": 0.0019877815670601277, "loss": 0.2809, "step": 5990 }, { "epoch": 0.010624311268206234, "grad_norm": 0.494140625, "learning_rate": 0.0019877717910468428, "loss": 0.2693, "step": 5992 }, { "epoch": 0.010627857433516049, "grad_norm": 0.75390625, "learning_rate": 0.00198776201115096, "loss": 0.2601, "step": 5994 }, { "epoch": 0.010631403598825865, "grad_norm": 0.29296875, "learning_rate": 0.0019877522273725216, "loss": 0.2682, "step": 5996 }, { "epoch": 0.01063494976413568, "grad_norm": 1.1015625, "learning_rate": 0.0019877424397115708, "loss": 0.2899, "step": 5998 }, { "epoch": 0.010638495929445496, "grad_norm": 0.451171875, "learning_rate": 0.00198773264816815, "loss": 0.2456, "step": 6000 }, { "epoch": 0.01064204209475531, "grad_norm": 0.498046875, "learning_rate": 0.0019877228527423025, "loss": 0.2319, "step": 6002 }, { "epoch": 0.010645588260065125, "grad_norm": 0.5390625, "learning_rate": 0.0019877130534340704, "loss": 0.2378, "step": 6004 }, { "epoch": 0.010649134425374941, "grad_norm": 0.275390625, "learning_rate": 0.0019877032502434978, "loss": 0.2863, "step": 6006 }, { "epoch": 0.010652680590684756, "grad_norm": 0.390625, "learning_rate": 0.0019876934431706265, "loss": 0.2035, "step": 6008 }, { "epoch": 0.01065622675599457, "grad_norm": 0.51953125, "learning_rate": 0.0019876836322154996, "loss": 0.2592, "step": 6010 }, { "epoch": 0.010659772921304387, "grad_norm": 1.296875, "learning_rate": 0.0019876738173781605, "loss": 0.2982, "step": 6012 }, { "epoch": 0.010663319086614201, "grad_norm": 0.228515625, "learning_rate": 0.001987663998658651, "loss": 0.23, "step": 6014 }, { "epoch": 0.010666865251924016, "grad_norm": 0.2412109375, "learning_rate": 0.0019876541760570155, "loss": 0.2322, "step": 6016 }, { "epoch": 0.010670411417233832, "grad_norm": 0.267578125, "learning_rate": 0.001987644349573296, "loss": 0.2854, "step": 6018 }, { "epoch": 0.010673957582543647, "grad_norm": 0.6171875, "learning_rate": 0.0019876345192075357, "loss": 0.3641, "step": 6020 }, { "epoch": 0.010677503747853461, "grad_norm": 0.55078125, "learning_rate": 0.0019876246849597776, "loss": 0.227, "step": 6022 }, { "epoch": 0.010681049913163277, "grad_norm": 0.34765625, "learning_rate": 0.001987614846830065, "loss": 0.2549, "step": 6024 }, { "epoch": 0.010684596078473092, "grad_norm": 0.46875, "learning_rate": 0.00198760500481844, "loss": 0.2421, "step": 6026 }, { "epoch": 0.010688142243782906, "grad_norm": 0.625, "learning_rate": 0.001987595158924947, "loss": 0.2925, "step": 6028 }, { "epoch": 0.010691688409092723, "grad_norm": 2.703125, "learning_rate": 0.001987585309149628, "loss": 0.205, "step": 6030 }, { "epoch": 0.010695234574402537, "grad_norm": 0.384765625, "learning_rate": 0.001987575455492526, "loss": 0.4531, "step": 6032 }, { "epoch": 0.010698780739712354, "grad_norm": 0.2412109375, "learning_rate": 0.001987565597953685, "loss": 0.2631, "step": 6034 }, { "epoch": 0.010702326905022168, "grad_norm": 5.78125, "learning_rate": 0.0019875557365331476, "loss": 0.2783, "step": 6036 }, { "epoch": 0.010705873070331983, "grad_norm": 0.859375, "learning_rate": 0.001987545871230957, "loss": 0.254, "step": 6038 }, { "epoch": 0.010709419235641799, "grad_norm": 0.314453125, "learning_rate": 0.0019875360020471565, "loss": 0.2089, "step": 6040 }, { "epoch": 0.010712965400951614, "grad_norm": 0.3984375, "learning_rate": 0.0019875261289817887, "loss": 0.225, "step": 6042 }, { "epoch": 0.010716511566261428, "grad_norm": 0.578125, "learning_rate": 0.0019875162520348972, "loss": 0.2282, "step": 6044 }, { "epoch": 0.010720057731571244, "grad_norm": 0.71875, "learning_rate": 0.001987506371206525, "loss": 0.2575, "step": 6046 }, { "epoch": 0.010723603896881059, "grad_norm": 0.322265625, "learning_rate": 0.0019874964864967162, "loss": 0.2463, "step": 6048 }, { "epoch": 0.010727150062190874, "grad_norm": 0.578125, "learning_rate": 0.001987486597905513, "loss": 0.3057, "step": 6050 }, { "epoch": 0.01073069622750069, "grad_norm": 0.298828125, "learning_rate": 0.0019874767054329583, "loss": 0.259, "step": 6052 }, { "epoch": 0.010734242392810504, "grad_norm": 0.224609375, "learning_rate": 0.0019874668090790965, "loss": 0.2948, "step": 6054 }, { "epoch": 0.010737788558120319, "grad_norm": 0.474609375, "learning_rate": 0.0019874569088439704, "loss": 0.3562, "step": 6056 }, { "epoch": 0.010741334723430135, "grad_norm": 0.51171875, "learning_rate": 0.001987447004727623, "loss": 0.286, "step": 6058 }, { "epoch": 0.01074488088873995, "grad_norm": 1.0390625, "learning_rate": 0.001987437096730098, "loss": 0.4945, "step": 6060 }, { "epoch": 0.010748427054049764, "grad_norm": 0.466796875, "learning_rate": 0.001987427184851439, "loss": 0.2475, "step": 6062 }, { "epoch": 0.01075197321935958, "grad_norm": 0.60546875, "learning_rate": 0.0019874172690916886, "loss": 0.2243, "step": 6064 }, { "epoch": 0.010755519384669395, "grad_norm": 0.30859375, "learning_rate": 0.0019874073494508906, "loss": 0.3817, "step": 6066 }, { "epoch": 0.010759065549979211, "grad_norm": 1.015625, "learning_rate": 0.001987397425929088, "loss": 0.3127, "step": 6068 }, { "epoch": 0.010762611715289026, "grad_norm": 1.0703125, "learning_rate": 0.001987387498526325, "loss": 0.3228, "step": 6070 }, { "epoch": 0.01076615788059884, "grad_norm": 0.45703125, "learning_rate": 0.0019873775672426446, "loss": 0.2621, "step": 6072 }, { "epoch": 0.010769704045908657, "grad_norm": 0.48828125, "learning_rate": 0.00198736763207809, "loss": 0.2767, "step": 6074 }, { "epoch": 0.010773250211218471, "grad_norm": 0.6875, "learning_rate": 0.0019873576930327045, "loss": 0.2513, "step": 6076 }, { "epoch": 0.010776796376528286, "grad_norm": 1.03125, "learning_rate": 0.0019873477501065324, "loss": 0.3358, "step": 6078 }, { "epoch": 0.010780342541838102, "grad_norm": 0.734375, "learning_rate": 0.0019873378032996165, "loss": 0.2464, "step": 6080 }, { "epoch": 0.010783888707147917, "grad_norm": 0.4453125, "learning_rate": 0.0019873278526120006, "loss": 0.3333, "step": 6082 }, { "epoch": 0.010787434872457731, "grad_norm": 0.75, "learning_rate": 0.0019873178980437272, "loss": 0.2762, "step": 6084 }, { "epoch": 0.010790981037767548, "grad_norm": 0.48046875, "learning_rate": 0.001987307939594842, "loss": 0.2529, "step": 6086 }, { "epoch": 0.010794527203077362, "grad_norm": 0.41015625, "learning_rate": 0.0019872979772653865, "loss": 0.2608, "step": 6088 }, { "epoch": 0.010798073368387177, "grad_norm": 0.39453125, "learning_rate": 0.001987288011055405, "loss": 0.3697, "step": 6090 }, { "epoch": 0.010801619533696993, "grad_norm": 2.671875, "learning_rate": 0.0019872780409649414, "loss": 0.4219, "step": 6092 }, { "epoch": 0.010805165699006808, "grad_norm": 1.5234375, "learning_rate": 0.001987268066994039, "loss": 0.3257, "step": 6094 }, { "epoch": 0.010808711864316622, "grad_norm": 0.26171875, "learning_rate": 0.001987258089142742, "loss": 0.2178, "step": 6096 }, { "epoch": 0.010812258029626438, "grad_norm": 0.2099609375, "learning_rate": 0.0019872481074110927, "loss": 0.2763, "step": 6098 }, { "epoch": 0.010815804194936253, "grad_norm": 0.48046875, "learning_rate": 0.001987238121799136, "loss": 0.2948, "step": 6100 }, { "epoch": 0.01081935036024607, "grad_norm": 0.4921875, "learning_rate": 0.001987228132306915, "loss": 0.4501, "step": 6102 }, { "epoch": 0.010822896525555884, "grad_norm": 1.0625, "learning_rate": 0.0019872181389344735, "loss": 0.3309, "step": 6104 }, { "epoch": 0.010826442690865698, "grad_norm": 5.03125, "learning_rate": 0.0019872081416818553, "loss": 0.4552, "step": 6106 }, { "epoch": 0.010829988856175515, "grad_norm": 1.21875, "learning_rate": 0.001987198140549104, "loss": 0.2493, "step": 6108 }, { "epoch": 0.01083353502148533, "grad_norm": 0.400390625, "learning_rate": 0.001987188135536263, "loss": 0.2925, "step": 6110 }, { "epoch": 0.010837081186795144, "grad_norm": 0.390625, "learning_rate": 0.0019871781266433772, "loss": 0.2401, "step": 6112 }, { "epoch": 0.01084062735210496, "grad_norm": 0.63671875, "learning_rate": 0.001987168113870489, "loss": 0.2624, "step": 6114 }, { "epoch": 0.010844173517414775, "grad_norm": 0.78125, "learning_rate": 0.001987158097217643, "loss": 0.296, "step": 6116 }, { "epoch": 0.010847719682724589, "grad_norm": 0.24609375, "learning_rate": 0.0019871480766848826, "loss": 0.2174, "step": 6118 }, { "epoch": 0.010851265848034405, "grad_norm": 0.341796875, "learning_rate": 0.0019871380522722523, "loss": 0.2534, "step": 6120 }, { "epoch": 0.01085481201334422, "grad_norm": 1.015625, "learning_rate": 0.001987128023979795, "loss": 0.2794, "step": 6122 }, { "epoch": 0.010858358178654035, "grad_norm": 0.34375, "learning_rate": 0.0019871179918075554, "loss": 0.2548, "step": 6124 }, { "epoch": 0.01086190434396385, "grad_norm": 0.47265625, "learning_rate": 0.001987107955755577, "loss": 0.2993, "step": 6126 }, { "epoch": 0.010865450509273665, "grad_norm": 0.4765625, "learning_rate": 0.001987097915823903, "loss": 0.2443, "step": 6128 }, { "epoch": 0.01086899667458348, "grad_norm": 0.3125, "learning_rate": 0.0019870878720125787, "loss": 0.2306, "step": 6130 }, { "epoch": 0.010872542839893296, "grad_norm": 0.4765625, "learning_rate": 0.001987077824321647, "loss": 0.2146, "step": 6132 }, { "epoch": 0.01087608900520311, "grad_norm": 5.875, "learning_rate": 0.0019870677727511525, "loss": 0.3652, "step": 6134 }, { "epoch": 0.010879635170512927, "grad_norm": 1.203125, "learning_rate": 0.001987057717301138, "loss": 0.2946, "step": 6136 }, { "epoch": 0.010883181335822742, "grad_norm": 0.4921875, "learning_rate": 0.0019870476579716494, "loss": 0.264, "step": 6138 }, { "epoch": 0.010886727501132556, "grad_norm": 0.25390625, "learning_rate": 0.001987037594762729, "loss": 0.2724, "step": 6140 }, { "epoch": 0.010890273666442372, "grad_norm": 0.380859375, "learning_rate": 0.0019870275276744217, "loss": 0.2383, "step": 6142 }, { "epoch": 0.010893819831752187, "grad_norm": 0.625, "learning_rate": 0.0019870174567067716, "loss": 0.2365, "step": 6144 }, { "epoch": 0.010897365997062002, "grad_norm": 0.37890625, "learning_rate": 0.0019870073818598214, "loss": 0.2421, "step": 6146 }, { "epoch": 0.010900912162371818, "grad_norm": 0.65234375, "learning_rate": 0.0019869973031336166, "loss": 0.2806, "step": 6148 }, { "epoch": 0.010904458327681632, "grad_norm": 0.77734375, "learning_rate": 0.001986987220528201, "loss": 0.3103, "step": 6150 }, { "epoch": 0.010908004492991447, "grad_norm": 1.0078125, "learning_rate": 0.0019869771340436187, "loss": 0.2912, "step": 6152 }, { "epoch": 0.010911550658301263, "grad_norm": 1.25, "learning_rate": 0.001986967043679913, "loss": 0.3896, "step": 6154 }, { "epoch": 0.010915096823611078, "grad_norm": 1.59375, "learning_rate": 0.001986956949437129, "loss": 0.4143, "step": 6156 }, { "epoch": 0.010918642988920892, "grad_norm": 0.29296875, "learning_rate": 0.0019869468513153106, "loss": 0.2488, "step": 6158 }, { "epoch": 0.010922189154230709, "grad_norm": 1.1328125, "learning_rate": 0.001986936749314502, "loss": 0.2611, "step": 6160 }, { "epoch": 0.010925735319540523, "grad_norm": 0.435546875, "learning_rate": 0.001986926643434747, "loss": 0.2172, "step": 6162 }, { "epoch": 0.010929281484850338, "grad_norm": 0.67578125, "learning_rate": 0.00198691653367609, "loss": 0.3055, "step": 6164 }, { "epoch": 0.010932827650160154, "grad_norm": 0.416015625, "learning_rate": 0.001986906420038575, "loss": 0.2987, "step": 6166 }, { "epoch": 0.010936373815469969, "grad_norm": 0.796875, "learning_rate": 0.001986896302522247, "loss": 0.2669, "step": 6168 }, { "epoch": 0.010939919980779785, "grad_norm": 0.4765625, "learning_rate": 0.0019868861811271495, "loss": 0.2294, "step": 6170 }, { "epoch": 0.0109434661460896, "grad_norm": 0.5234375, "learning_rate": 0.001986876055853327, "loss": 0.2986, "step": 6172 }, { "epoch": 0.010947012311399414, "grad_norm": 0.49609375, "learning_rate": 0.001986865926700824, "loss": 0.2823, "step": 6174 }, { "epoch": 0.01095055847670923, "grad_norm": 0.48828125, "learning_rate": 0.0019868557936696847, "loss": 0.2529, "step": 6176 }, { "epoch": 0.010954104642019045, "grad_norm": 0.7578125, "learning_rate": 0.001986845656759953, "loss": 0.2319, "step": 6178 }, { "epoch": 0.01095765080732886, "grad_norm": 0.42578125, "learning_rate": 0.0019868355159716735, "loss": 0.2275, "step": 6180 }, { "epoch": 0.010961196972638676, "grad_norm": 0.298828125, "learning_rate": 0.0019868253713048907, "loss": 0.1985, "step": 6182 }, { "epoch": 0.01096474313794849, "grad_norm": 0.5234375, "learning_rate": 0.001986815222759649, "loss": 0.2351, "step": 6184 }, { "epoch": 0.010968289303258305, "grad_norm": 0.3671875, "learning_rate": 0.001986805070335992, "loss": 0.2689, "step": 6186 }, { "epoch": 0.010971835468568121, "grad_norm": 0.259765625, "learning_rate": 0.0019867949140339658, "loss": 0.2128, "step": 6188 }, { "epoch": 0.010975381633877936, "grad_norm": 0.42578125, "learning_rate": 0.001986784753853613, "loss": 0.2121, "step": 6190 }, { "epoch": 0.01097892779918775, "grad_norm": 0.408203125, "learning_rate": 0.001986774589794979, "loss": 0.1796, "step": 6192 }, { "epoch": 0.010982473964497566, "grad_norm": 0.8984375, "learning_rate": 0.001986764421858108, "loss": 0.3173, "step": 6194 }, { "epoch": 0.010986020129807381, "grad_norm": 0.294921875, "learning_rate": 0.0019867542500430447, "loss": 0.2264, "step": 6196 }, { "epoch": 0.010989566295117196, "grad_norm": 2.171875, "learning_rate": 0.001986744074349833, "loss": 0.3214, "step": 6198 }, { "epoch": 0.010993112460427012, "grad_norm": 0.41796875, "learning_rate": 0.0019867338947785183, "loss": 0.2817, "step": 6200 }, { "epoch": 0.010996658625736826, "grad_norm": 0.84765625, "learning_rate": 0.001986723711329144, "loss": 0.206, "step": 6202 }, { "epoch": 0.011000204791046643, "grad_norm": 1.015625, "learning_rate": 0.001986713524001756, "loss": 0.3411, "step": 6204 }, { "epoch": 0.011003750956356457, "grad_norm": 0.515625, "learning_rate": 0.0019867033327963975, "loss": 0.3907, "step": 6206 }, { "epoch": 0.011007297121666272, "grad_norm": 1.5078125, "learning_rate": 0.001986693137713114, "loss": 0.4127, "step": 6208 }, { "epoch": 0.011010843286976088, "grad_norm": 0.482421875, "learning_rate": 0.0019866829387519495, "loss": 0.2162, "step": 6210 }, { "epoch": 0.011014389452285903, "grad_norm": 2.1875, "learning_rate": 0.001986672735912949, "loss": 0.4836, "step": 6212 }, { "epoch": 0.011017935617595717, "grad_norm": 0.9609375, "learning_rate": 0.001986662529196157, "loss": 0.2363, "step": 6214 }, { "epoch": 0.011021481782905533, "grad_norm": 1.1875, "learning_rate": 0.0019866523186016184, "loss": 0.6834, "step": 6216 }, { "epoch": 0.011025027948215348, "grad_norm": 0.45703125, "learning_rate": 0.0019866421041293773, "loss": 0.2378, "step": 6218 }, { "epoch": 0.011028574113525163, "grad_norm": 0.59765625, "learning_rate": 0.001986631885779479, "loss": 0.2674, "step": 6220 }, { "epoch": 0.011032120278834979, "grad_norm": 0.349609375, "learning_rate": 0.0019866216635519673, "loss": 0.3375, "step": 6222 }, { "epoch": 0.011035666444144793, "grad_norm": 0.3125, "learning_rate": 0.001986611437446888, "loss": 0.4332, "step": 6224 }, { "epoch": 0.011039212609454608, "grad_norm": 0.33984375, "learning_rate": 0.0019866012074642846, "loss": 0.3157, "step": 6226 }, { "epoch": 0.011042758774764424, "grad_norm": 0.345703125, "learning_rate": 0.001986590973604203, "loss": 0.2539, "step": 6228 }, { "epoch": 0.011046304940074239, "grad_norm": 0.2451171875, "learning_rate": 0.001986580735866688, "loss": 0.257, "step": 6230 }, { "epoch": 0.011049851105384053, "grad_norm": 0.31640625, "learning_rate": 0.0019865704942517827, "loss": 0.244, "step": 6232 }, { "epoch": 0.01105339727069387, "grad_norm": 0.40234375, "learning_rate": 0.001986560248759534, "loss": 0.2814, "step": 6234 }, { "epoch": 0.011056943436003684, "grad_norm": 0.427734375, "learning_rate": 0.001986549999389985, "loss": 0.2961, "step": 6236 }, { "epoch": 0.0110604896013135, "grad_norm": 0.5703125, "learning_rate": 0.0019865397461431814, "loss": 0.3519, "step": 6238 }, { "epoch": 0.011064035766623315, "grad_norm": 0.1689453125, "learning_rate": 0.0019865294890191684, "loss": 0.1827, "step": 6240 }, { "epoch": 0.01106758193193313, "grad_norm": 0.283203125, "learning_rate": 0.00198651922801799, "loss": 0.2183, "step": 6242 }, { "epoch": 0.011071128097242946, "grad_norm": 1.4609375, "learning_rate": 0.0019865089631396914, "loss": 0.3905, "step": 6244 }, { "epoch": 0.01107467426255276, "grad_norm": 0.4765625, "learning_rate": 0.0019864986943843176, "loss": 0.2733, "step": 6246 }, { "epoch": 0.011078220427862575, "grad_norm": 0.435546875, "learning_rate": 0.0019864884217519136, "loss": 0.2972, "step": 6248 }, { "epoch": 0.011081766593172391, "grad_norm": 0.478515625, "learning_rate": 0.001986478145242524, "loss": 0.329, "step": 6250 }, { "epoch": 0.011085312758482206, "grad_norm": 0.8203125, "learning_rate": 0.0019864678648561945, "loss": 0.363, "step": 6252 }, { "epoch": 0.01108885892379202, "grad_norm": 0.70703125, "learning_rate": 0.0019864575805929687, "loss": 0.3268, "step": 6254 }, { "epoch": 0.011092405089101837, "grad_norm": 0.59765625, "learning_rate": 0.0019864472924528928, "loss": 0.2758, "step": 6256 }, { "epoch": 0.011095951254411651, "grad_norm": 1.3359375, "learning_rate": 0.0019864370004360112, "loss": 0.2462, "step": 6258 }, { "epoch": 0.011099497419721466, "grad_norm": 0.98828125, "learning_rate": 0.0019864267045423692, "loss": 0.2855, "step": 6260 }, { "epoch": 0.011103043585031282, "grad_norm": 0.3515625, "learning_rate": 0.0019864164047720114, "loss": 0.289, "step": 6262 }, { "epoch": 0.011106589750341097, "grad_norm": 0.625, "learning_rate": 0.0019864061011249834, "loss": 0.6329, "step": 6264 }, { "epoch": 0.011110135915650911, "grad_norm": 0.392578125, "learning_rate": 0.00198639579360133, "loss": 0.241, "step": 6266 }, { "epoch": 0.011113682080960727, "grad_norm": 0.26171875, "learning_rate": 0.001986385482201096, "loss": 0.236, "step": 6268 }, { "epoch": 0.011117228246270542, "grad_norm": 1.28125, "learning_rate": 0.001986375166924327, "loss": 0.314, "step": 6270 }, { "epoch": 0.011120774411580358, "grad_norm": 0.365234375, "learning_rate": 0.001986364847771068, "loss": 0.2481, "step": 6272 }, { "epoch": 0.011124320576890173, "grad_norm": 0.369140625, "learning_rate": 0.0019863545247413637, "loss": 0.2299, "step": 6274 }, { "epoch": 0.011127866742199987, "grad_norm": 0.87890625, "learning_rate": 0.00198634419783526, "loss": 0.2389, "step": 6276 }, { "epoch": 0.011131412907509804, "grad_norm": 0.3359375, "learning_rate": 0.0019863338670528014, "loss": 0.2651, "step": 6278 }, { "epoch": 0.011134959072819618, "grad_norm": 4.59375, "learning_rate": 0.001986323532394033, "loss": 0.2686, "step": 6280 }, { "epoch": 0.011138505238129433, "grad_norm": 0.306640625, "learning_rate": 0.0019863131938590004, "loss": 0.2385, "step": 6282 }, { "epoch": 0.011142051403439249, "grad_norm": 0.3046875, "learning_rate": 0.0019863028514477492, "loss": 0.3189, "step": 6284 }, { "epoch": 0.011145597568749064, "grad_norm": 1.1796875, "learning_rate": 0.001986292505160324, "loss": 0.3359, "step": 6286 }, { "epoch": 0.011149143734058878, "grad_norm": 0.8671875, "learning_rate": 0.00198628215499677, "loss": 0.2666, "step": 6288 }, { "epoch": 0.011152689899368694, "grad_norm": 0.625, "learning_rate": 0.0019862718009571326, "loss": 0.3466, "step": 6290 }, { "epoch": 0.011156236064678509, "grad_norm": 0.4375, "learning_rate": 0.0019862614430414573, "loss": 0.2416, "step": 6292 }, { "epoch": 0.011159782229988324, "grad_norm": 0.30859375, "learning_rate": 0.001986251081249789, "loss": 0.209, "step": 6294 }, { "epoch": 0.01116332839529814, "grad_norm": 0.6484375, "learning_rate": 0.0019862407155821736, "loss": 0.259, "step": 6296 }, { "epoch": 0.011166874560607954, "grad_norm": 1.78125, "learning_rate": 0.001986230346038656, "loss": 0.3313, "step": 6298 }, { "epoch": 0.011170420725917769, "grad_norm": 0.6953125, "learning_rate": 0.0019862199726192816, "loss": 0.2995, "step": 6300 }, { "epoch": 0.011173966891227585, "grad_norm": 0.3046875, "learning_rate": 0.001986209595324096, "loss": 0.2362, "step": 6302 }, { "epoch": 0.0111775130565374, "grad_norm": 1.2890625, "learning_rate": 0.001986199214153144, "loss": 0.291, "step": 6304 }, { "epoch": 0.011181059221847216, "grad_norm": 0.79296875, "learning_rate": 0.0019861888291064717, "loss": 0.2559, "step": 6306 }, { "epoch": 0.01118460538715703, "grad_norm": 0.2060546875, "learning_rate": 0.0019861784401841243, "loss": 0.2296, "step": 6308 }, { "epoch": 0.011188151552466845, "grad_norm": 0.439453125, "learning_rate": 0.001986168047386147, "loss": 0.2676, "step": 6310 }, { "epoch": 0.011191697717776661, "grad_norm": 1.6953125, "learning_rate": 0.0019861576507125855, "loss": 0.5058, "step": 6312 }, { "epoch": 0.011195243883086476, "grad_norm": 0.51171875, "learning_rate": 0.001986147250163485, "loss": 0.2751, "step": 6314 }, { "epoch": 0.01119879004839629, "grad_norm": 0.59765625, "learning_rate": 0.0019861368457388916, "loss": 0.3495, "step": 6316 }, { "epoch": 0.011202336213706107, "grad_norm": 0.2353515625, "learning_rate": 0.00198612643743885, "loss": 0.354, "step": 6318 }, { "epoch": 0.011205882379015921, "grad_norm": 0.484375, "learning_rate": 0.001986116025263406, "loss": 0.2894, "step": 6320 }, { "epoch": 0.011209428544325736, "grad_norm": 0.45703125, "learning_rate": 0.0019861056092126054, "loss": 0.2521, "step": 6322 }, { "epoch": 0.011212974709635552, "grad_norm": 0.69140625, "learning_rate": 0.0019860951892864934, "loss": 0.2555, "step": 6324 }, { "epoch": 0.011216520874945367, "grad_norm": 0.8359375, "learning_rate": 0.001986084765485116, "loss": 0.3349, "step": 6326 }, { "epoch": 0.011220067040255181, "grad_norm": 0.30078125, "learning_rate": 0.0019860743378085186, "loss": 0.2099, "step": 6328 }, { "epoch": 0.011223613205564998, "grad_norm": 0.8125, "learning_rate": 0.0019860639062567464, "loss": 0.24, "step": 6330 }, { "epoch": 0.011227159370874812, "grad_norm": 0.51171875, "learning_rate": 0.001986053470829846, "loss": 0.2166, "step": 6332 }, { "epoch": 0.011230705536184627, "grad_norm": 0.68359375, "learning_rate": 0.0019860430315278618, "loss": 0.2391, "step": 6334 }, { "epoch": 0.011234251701494443, "grad_norm": 0.56640625, "learning_rate": 0.00198603258835084, "loss": 0.3065, "step": 6336 }, { "epoch": 0.011237797866804258, "grad_norm": 1.2734375, "learning_rate": 0.0019860221412988264, "loss": 0.3638, "step": 6338 }, { "epoch": 0.011241344032114074, "grad_norm": 0.6875, "learning_rate": 0.0019860116903718666, "loss": 0.2502, "step": 6340 }, { "epoch": 0.011244890197423888, "grad_norm": 0.80859375, "learning_rate": 0.0019860012355700065, "loss": 0.2676, "step": 6342 }, { "epoch": 0.011248436362733703, "grad_norm": 0.91796875, "learning_rate": 0.001985990776893292, "loss": 0.2933, "step": 6344 }, { "epoch": 0.01125198252804352, "grad_norm": 0.70703125, "learning_rate": 0.001985980314341768, "loss": 0.2384, "step": 6346 }, { "epoch": 0.011255528693353334, "grad_norm": 0.4140625, "learning_rate": 0.0019859698479154806, "loss": 0.2574, "step": 6348 }, { "epoch": 0.011259074858663148, "grad_norm": 1.03125, "learning_rate": 0.001985959377614476, "loss": 0.255, "step": 6350 }, { "epoch": 0.011262621023972965, "grad_norm": 1.4296875, "learning_rate": 0.0019859489034387994, "loss": 0.5737, "step": 6352 }, { "epoch": 0.01126616718928278, "grad_norm": 0.384765625, "learning_rate": 0.0019859384253884975, "loss": 0.2619, "step": 6354 }, { "epoch": 0.011269713354592594, "grad_norm": 0.69921875, "learning_rate": 0.001985927943463615, "loss": 0.2124, "step": 6356 }, { "epoch": 0.01127325951990241, "grad_norm": 17.0, "learning_rate": 0.001985917457664199, "loss": 0.3274, "step": 6358 }, { "epoch": 0.011276805685212225, "grad_norm": 0.87890625, "learning_rate": 0.0019859069679902938, "loss": 0.2299, "step": 6360 }, { "epoch": 0.01128035185052204, "grad_norm": 1.15625, "learning_rate": 0.0019858964744419467, "loss": 0.2498, "step": 6362 }, { "epoch": 0.011283898015831855, "grad_norm": 2.28125, "learning_rate": 0.0019858859770192027, "loss": 0.3732, "step": 6364 }, { "epoch": 0.01128744418114167, "grad_norm": 0.53125, "learning_rate": 0.001985875475722108, "loss": 0.3462, "step": 6366 }, { "epoch": 0.011290990346451485, "grad_norm": 0.1943359375, "learning_rate": 0.0019858649705507088, "loss": 0.2156, "step": 6368 }, { "epoch": 0.0112945365117613, "grad_norm": 0.30078125, "learning_rate": 0.0019858544615050503, "loss": 0.3415, "step": 6370 }, { "epoch": 0.011298082677071115, "grad_norm": 1.46875, "learning_rate": 0.0019858439485851793, "loss": 0.2704, "step": 6372 }, { "epoch": 0.011301628842380932, "grad_norm": 0.42578125, "learning_rate": 0.0019858334317911413, "loss": 0.3162, "step": 6374 }, { "epoch": 0.011305175007690746, "grad_norm": 0.5703125, "learning_rate": 0.0019858229111229826, "loss": 0.2298, "step": 6376 }, { "epoch": 0.01130872117300056, "grad_norm": 0.396484375, "learning_rate": 0.0019858123865807487, "loss": 0.2279, "step": 6378 }, { "epoch": 0.011312267338310377, "grad_norm": 0.4609375, "learning_rate": 0.0019858018581644862, "loss": 0.3286, "step": 6380 }, { "epoch": 0.011315813503620192, "grad_norm": 0.53125, "learning_rate": 0.0019857913258742414, "loss": 0.4062, "step": 6382 }, { "epoch": 0.011319359668930006, "grad_norm": 0.55078125, "learning_rate": 0.0019857807897100594, "loss": 0.2201, "step": 6384 }, { "epoch": 0.011322905834239822, "grad_norm": 0.6171875, "learning_rate": 0.0019857702496719866, "loss": 0.2164, "step": 6386 }, { "epoch": 0.011326451999549637, "grad_norm": 0.1630859375, "learning_rate": 0.0019857597057600694, "loss": 0.217, "step": 6388 }, { "epoch": 0.011329998164859452, "grad_norm": 0.318359375, "learning_rate": 0.001985749157974354, "loss": 0.2746, "step": 6390 }, { "epoch": 0.011333544330169268, "grad_norm": 1.2109375, "learning_rate": 0.001985738606314886, "loss": 0.6119, "step": 6392 }, { "epoch": 0.011337090495479082, "grad_norm": 0.5, "learning_rate": 0.0019857280507817117, "loss": 0.2621, "step": 6394 }, { "epoch": 0.011340636660788897, "grad_norm": 0.99609375, "learning_rate": 0.0019857174913748775, "loss": 0.2558, "step": 6396 }, { "epoch": 0.011344182826098713, "grad_norm": 0.58984375, "learning_rate": 0.0019857069280944297, "loss": 0.279, "step": 6398 }, { "epoch": 0.011347728991408528, "grad_norm": 0.7421875, "learning_rate": 0.0019856963609404137, "loss": 0.2397, "step": 6400 }, { "epoch": 0.011351275156718342, "grad_norm": 0.318359375, "learning_rate": 0.001985685789912877, "loss": 0.1866, "step": 6402 }, { "epoch": 0.011354821322028159, "grad_norm": 0.4453125, "learning_rate": 0.0019856752150118648, "loss": 0.3086, "step": 6404 }, { "epoch": 0.011358367487337973, "grad_norm": 0.71875, "learning_rate": 0.0019856646362374237, "loss": 0.2461, "step": 6406 }, { "epoch": 0.01136191365264779, "grad_norm": 0.98828125, "learning_rate": 0.0019856540535896, "loss": 0.3312, "step": 6408 }, { "epoch": 0.011365459817957604, "grad_norm": 0.400390625, "learning_rate": 0.00198564346706844, "loss": 0.2237, "step": 6410 }, { "epoch": 0.011369005983267419, "grad_norm": 0.64453125, "learning_rate": 0.0019856328766739897, "loss": 0.3129, "step": 6412 }, { "epoch": 0.011372552148577235, "grad_norm": 0.400390625, "learning_rate": 0.0019856222824062957, "loss": 0.2855, "step": 6414 }, { "epoch": 0.01137609831388705, "grad_norm": 0.75390625, "learning_rate": 0.0019856116842654043, "loss": 0.2575, "step": 6416 }, { "epoch": 0.011379644479196864, "grad_norm": 1.3828125, "learning_rate": 0.0019856010822513616, "loss": 0.509, "step": 6418 }, { "epoch": 0.01138319064450668, "grad_norm": 1.0234375, "learning_rate": 0.0019855904763642143, "loss": 0.2234, "step": 6420 }, { "epoch": 0.011386736809816495, "grad_norm": 0.5859375, "learning_rate": 0.001985579866604009, "loss": 0.2209, "step": 6422 }, { "epoch": 0.01139028297512631, "grad_norm": 0.337890625, "learning_rate": 0.0019855692529707914, "loss": 0.3473, "step": 6424 }, { "epoch": 0.011393829140436126, "grad_norm": 0.41015625, "learning_rate": 0.0019855586354646086, "loss": 0.272, "step": 6426 }, { "epoch": 0.01139737530574594, "grad_norm": 0.38671875, "learning_rate": 0.0019855480140855064, "loss": 0.2868, "step": 6428 }, { "epoch": 0.011400921471055755, "grad_norm": 0.365234375, "learning_rate": 0.0019855373888335317, "loss": 0.3183, "step": 6430 }, { "epoch": 0.011404467636365571, "grad_norm": 0.53515625, "learning_rate": 0.001985526759708731, "loss": 0.2578, "step": 6432 }, { "epoch": 0.011408013801675386, "grad_norm": 0.67578125, "learning_rate": 0.0019855161267111504, "loss": 0.2703, "step": 6434 }, { "epoch": 0.0114115599669852, "grad_norm": 0.43359375, "learning_rate": 0.0019855054898408366, "loss": 0.2035, "step": 6436 }, { "epoch": 0.011415106132295016, "grad_norm": 0.32421875, "learning_rate": 0.0019854948490978363, "loss": 0.2835, "step": 6438 }, { "epoch": 0.011418652297604831, "grad_norm": 0.3984375, "learning_rate": 0.001985484204482196, "loss": 0.2202, "step": 6440 }, { "epoch": 0.011422198462914647, "grad_norm": 0.546875, "learning_rate": 0.001985473555993962, "loss": 0.2038, "step": 6442 }, { "epoch": 0.011425744628224462, "grad_norm": 0.3671875, "learning_rate": 0.001985462903633181, "loss": 0.2454, "step": 6444 }, { "epoch": 0.011429290793534276, "grad_norm": 0.78515625, "learning_rate": 0.0019854522473998996, "loss": 0.3056, "step": 6446 }, { "epoch": 0.011432836958844093, "grad_norm": 13.25, "learning_rate": 0.0019854415872941644, "loss": 0.3753, "step": 6448 }, { "epoch": 0.011436383124153907, "grad_norm": 0.482421875, "learning_rate": 0.001985430923316022, "loss": 0.2377, "step": 6450 }, { "epoch": 0.011439929289463722, "grad_norm": 0.453125, "learning_rate": 0.001985420255465519, "loss": 0.352, "step": 6452 }, { "epoch": 0.011443475454773538, "grad_norm": 0.43359375, "learning_rate": 0.0019854095837427022, "loss": 0.3862, "step": 6454 }, { "epoch": 0.011447021620083353, "grad_norm": 0.6640625, "learning_rate": 0.001985398908147618, "loss": 0.3506, "step": 6456 }, { "epoch": 0.011450567785393167, "grad_norm": 0.486328125, "learning_rate": 0.0019853882286803137, "loss": 0.2485, "step": 6458 }, { "epoch": 0.011454113950702983, "grad_norm": 0.408203125, "learning_rate": 0.001985377545340835, "loss": 0.2664, "step": 6460 }, { "epoch": 0.011457660116012798, "grad_norm": 1.046875, "learning_rate": 0.0019853668581292297, "loss": 0.2627, "step": 6462 }, { "epoch": 0.011461206281322613, "grad_norm": 1.734375, "learning_rate": 0.001985356167045544, "loss": 0.5401, "step": 6464 }, { "epoch": 0.011464752446632429, "grad_norm": 0.396484375, "learning_rate": 0.0019853454720898246, "loss": 0.2897, "step": 6466 }, { "epoch": 0.011468298611942243, "grad_norm": 0.7890625, "learning_rate": 0.0019853347732621185, "loss": 0.2578, "step": 6468 }, { "epoch": 0.011471844777252058, "grad_norm": 0.3671875, "learning_rate": 0.0019853240705624718, "loss": 0.3041, "step": 6470 }, { "epoch": 0.011475390942561874, "grad_norm": 1.7109375, "learning_rate": 0.0019853133639909327, "loss": 0.2205, "step": 6472 }, { "epoch": 0.011478937107871689, "grad_norm": 10.125, "learning_rate": 0.001985302653547547, "loss": 0.2979, "step": 6474 }, { "epoch": 0.011482483273181505, "grad_norm": 0.392578125, "learning_rate": 0.0019852919392323613, "loss": 0.2948, "step": 6476 }, { "epoch": 0.01148602943849132, "grad_norm": 0.27734375, "learning_rate": 0.001985281221045423, "loss": 0.3383, "step": 6478 }, { "epoch": 0.011489575603801134, "grad_norm": 6.125, "learning_rate": 0.001985270498986779, "loss": 0.5451, "step": 6480 }, { "epoch": 0.01149312176911095, "grad_norm": 2.015625, "learning_rate": 0.001985259773056476, "loss": 0.3665, "step": 6482 }, { "epoch": 0.011496667934420765, "grad_norm": 0.40234375, "learning_rate": 0.0019852490432545615, "loss": 0.2287, "step": 6484 }, { "epoch": 0.01150021409973058, "grad_norm": 0.8828125, "learning_rate": 0.001985238309581081, "loss": 0.2771, "step": 6486 }, { "epoch": 0.011503760265040396, "grad_norm": 0.494140625, "learning_rate": 0.001985227572036083, "loss": 0.2507, "step": 6488 }, { "epoch": 0.01150730643035021, "grad_norm": 1.0546875, "learning_rate": 0.0019852168306196136, "loss": 0.2873, "step": 6490 }, { "epoch": 0.011510852595660025, "grad_norm": 1.4609375, "learning_rate": 0.0019852060853317198, "loss": 0.3437, "step": 6492 }, { "epoch": 0.011514398760969841, "grad_norm": 0.37890625, "learning_rate": 0.001985195336172449, "loss": 0.2354, "step": 6494 }, { "epoch": 0.011517944926279656, "grad_norm": 0.26171875, "learning_rate": 0.001985184583141848, "loss": 0.2171, "step": 6496 }, { "epoch": 0.01152149109158947, "grad_norm": 3.046875, "learning_rate": 0.001985173826239964, "loss": 0.3967, "step": 6498 }, { "epoch": 0.011525037256899287, "grad_norm": 3.34375, "learning_rate": 0.001985163065466843, "loss": 0.6066, "step": 6500 }, { "epoch": 0.011528583422209101, "grad_norm": 1.375, "learning_rate": 0.001985152300822534, "loss": 0.3862, "step": 6502 }, { "epoch": 0.011532129587518916, "grad_norm": 0.39453125, "learning_rate": 0.0019851415323070823, "loss": 0.2662, "step": 6504 }, { "epoch": 0.011535675752828732, "grad_norm": 0.625, "learning_rate": 0.001985130759920536, "loss": 0.2981, "step": 6506 }, { "epoch": 0.011539221918138547, "grad_norm": 1.3203125, "learning_rate": 0.0019851199836629415, "loss": 0.2541, "step": 6508 }, { "epoch": 0.011542768083448363, "grad_norm": 2.515625, "learning_rate": 0.0019851092035343466, "loss": 0.2319, "step": 6510 }, { "epoch": 0.011546314248758177, "grad_norm": 1.015625, "learning_rate": 0.0019850984195347986, "loss": 0.3721, "step": 6512 }, { "epoch": 0.011549860414067992, "grad_norm": 0.42578125, "learning_rate": 0.0019850876316643436, "loss": 0.3427, "step": 6514 }, { "epoch": 0.011553406579377808, "grad_norm": 2.96875, "learning_rate": 0.0019850768399230297, "loss": 0.3732, "step": 6516 }, { "epoch": 0.011556952744687623, "grad_norm": 0.44140625, "learning_rate": 0.0019850660443109036, "loss": 0.3362, "step": 6518 }, { "epoch": 0.011560498909997437, "grad_norm": 0.6171875, "learning_rate": 0.001985055244828013, "loss": 0.3409, "step": 6520 }, { "epoch": 0.011564045075307254, "grad_norm": 0.7265625, "learning_rate": 0.001985044441474405, "loss": 0.3454, "step": 6522 }, { "epoch": 0.011567591240617068, "grad_norm": 0.306640625, "learning_rate": 0.001985033634250126, "loss": 0.3016, "step": 6524 }, { "epoch": 0.011571137405926883, "grad_norm": 0.3359375, "learning_rate": 0.0019850228231552245, "loss": 0.4201, "step": 6526 }, { "epoch": 0.011574683571236699, "grad_norm": 0.76953125, "learning_rate": 0.001985012008189747, "loss": 0.2919, "step": 6528 }, { "epoch": 0.011578229736546514, "grad_norm": 0.50390625, "learning_rate": 0.0019850011893537416, "loss": 0.3108, "step": 6530 }, { "epoch": 0.011581775901856328, "grad_norm": 0.66015625, "learning_rate": 0.0019849903666472545, "loss": 0.3685, "step": 6532 }, { "epoch": 0.011585322067166145, "grad_norm": 0.43359375, "learning_rate": 0.001984979540070334, "loss": 0.3112, "step": 6534 }, { "epoch": 0.011588868232475959, "grad_norm": 3.234375, "learning_rate": 0.0019849687096230267, "loss": 0.4082, "step": 6536 }, { "epoch": 0.011592414397785774, "grad_norm": 1.3515625, "learning_rate": 0.0019849578753053806, "loss": 0.6489, "step": 6538 }, { "epoch": 0.01159596056309559, "grad_norm": 1.0703125, "learning_rate": 0.0019849470371174427, "loss": 0.288, "step": 6540 }, { "epoch": 0.011599506728405404, "grad_norm": 0.68359375, "learning_rate": 0.0019849361950592605, "loss": 0.2855, "step": 6542 }, { "epoch": 0.01160305289371522, "grad_norm": 1.171875, "learning_rate": 0.0019849253491308816, "loss": 0.2629, "step": 6544 }, { "epoch": 0.011606599059025035, "grad_norm": 0.5703125, "learning_rate": 0.0019849144993323528, "loss": 0.1863, "step": 6546 }, { "epoch": 0.01161014522433485, "grad_norm": 0.54296875, "learning_rate": 0.0019849036456637222, "loss": 0.2295, "step": 6548 }, { "epoch": 0.011613691389644666, "grad_norm": 1.6328125, "learning_rate": 0.001984892788125037, "loss": 0.3621, "step": 6550 }, { "epoch": 0.01161723755495448, "grad_norm": 0.59765625, "learning_rate": 0.001984881926716345, "loss": 0.2898, "step": 6552 }, { "epoch": 0.011620783720264295, "grad_norm": 0.5546875, "learning_rate": 0.001984871061437693, "loss": 0.2619, "step": 6554 }, { "epoch": 0.011624329885574112, "grad_norm": 0.365234375, "learning_rate": 0.0019848601922891292, "loss": 0.2608, "step": 6556 }, { "epoch": 0.011627876050883926, "grad_norm": 0.625, "learning_rate": 0.0019848493192707014, "loss": 0.4518, "step": 6558 }, { "epoch": 0.01163142221619374, "grad_norm": 2.171875, "learning_rate": 0.0019848384423824562, "loss": 0.3408, "step": 6560 }, { "epoch": 0.011634968381503557, "grad_norm": 0.462890625, "learning_rate": 0.0019848275616244416, "loss": 0.2635, "step": 6562 }, { "epoch": 0.011638514546813371, "grad_norm": 0.482421875, "learning_rate": 0.001984816676996705, "loss": 0.2266, "step": 6564 }, { "epoch": 0.011642060712123186, "grad_norm": 0.41796875, "learning_rate": 0.0019848057884992946, "loss": 0.2707, "step": 6566 }, { "epoch": 0.011645606877433002, "grad_norm": 0.47265625, "learning_rate": 0.0019847948961322576, "loss": 0.3033, "step": 6568 }, { "epoch": 0.011649153042742817, "grad_norm": 0.416015625, "learning_rate": 0.001984783999895642, "loss": 0.3255, "step": 6570 }, { "epoch": 0.011652699208052631, "grad_norm": 0.515625, "learning_rate": 0.0019847730997894944, "loss": 0.3003, "step": 6572 }, { "epoch": 0.011656245373362448, "grad_norm": 0.63671875, "learning_rate": 0.0019847621958138635, "loss": 0.277, "step": 6574 }, { "epoch": 0.011659791538672262, "grad_norm": 0.32421875, "learning_rate": 0.0019847512879687967, "loss": 0.2551, "step": 6576 }, { "epoch": 0.011663337703982079, "grad_norm": 0.71875, "learning_rate": 0.001984740376254342, "loss": 0.2087, "step": 6578 }, { "epoch": 0.011666883869291893, "grad_norm": 1.3203125, "learning_rate": 0.0019847294606705466, "loss": 0.3965, "step": 6580 }, { "epoch": 0.011670430034601708, "grad_norm": 1.0078125, "learning_rate": 0.0019847185412174583, "loss": 0.3613, "step": 6582 }, { "epoch": 0.011673976199911524, "grad_norm": 0.4375, "learning_rate": 0.001984707617895125, "loss": 0.3577, "step": 6584 }, { "epoch": 0.011677522365221338, "grad_norm": 0.53515625, "learning_rate": 0.0019846966907035943, "loss": 0.1842, "step": 6586 }, { "epoch": 0.011681068530531153, "grad_norm": 0.82421875, "learning_rate": 0.0019846857596429145, "loss": 0.3677, "step": 6588 }, { "epoch": 0.01168461469584097, "grad_norm": 0.310546875, "learning_rate": 0.0019846748247131334, "loss": 0.2998, "step": 6590 }, { "epoch": 0.011688160861150784, "grad_norm": 0.515625, "learning_rate": 0.001984663885914298, "loss": 0.2502, "step": 6592 }, { "epoch": 0.011691707026460598, "grad_norm": 1.3046875, "learning_rate": 0.001984652943246457, "loss": 0.3343, "step": 6594 }, { "epoch": 0.011695253191770415, "grad_norm": 1.4921875, "learning_rate": 0.001984641996709657, "loss": 0.3007, "step": 6596 }, { "epoch": 0.01169879935708023, "grad_norm": 2.171875, "learning_rate": 0.001984631046303948, "loss": 0.4247, "step": 6598 }, { "epoch": 0.011702345522390044, "grad_norm": 0.51953125, "learning_rate": 0.0019846200920293757, "loss": 0.207, "step": 6600 }, { "epoch": 0.01170589168769986, "grad_norm": 5.0625, "learning_rate": 0.0019846091338859896, "loss": 0.5443, "step": 6602 }, { "epoch": 0.011709437853009675, "grad_norm": 0.71484375, "learning_rate": 0.0019845981718738365, "loss": 0.2723, "step": 6604 }, { "epoch": 0.01171298401831949, "grad_norm": 2.15625, "learning_rate": 0.0019845872059929648, "loss": 0.2741, "step": 6606 }, { "epoch": 0.011716530183629306, "grad_norm": 1.875, "learning_rate": 0.001984576236243423, "loss": 0.2785, "step": 6608 }, { "epoch": 0.01172007634893912, "grad_norm": 0.41015625, "learning_rate": 0.0019845652626252585, "loss": 0.2408, "step": 6610 }, { "epoch": 0.011723622514248936, "grad_norm": 0.51171875, "learning_rate": 0.001984554285138519, "loss": 0.357, "step": 6612 }, { "epoch": 0.011727168679558751, "grad_norm": 0.515625, "learning_rate": 0.001984543303783253, "loss": 0.245, "step": 6614 }, { "epoch": 0.011730714844868565, "grad_norm": 0.29296875, "learning_rate": 0.0019845323185595084, "loss": 0.1853, "step": 6616 }, { "epoch": 0.011734261010178382, "grad_norm": 0.859375, "learning_rate": 0.001984521329467333, "loss": 0.2566, "step": 6618 }, { "epoch": 0.011737807175488196, "grad_norm": 0.76953125, "learning_rate": 0.0019845103365067757, "loss": 0.2525, "step": 6620 }, { "epoch": 0.01174135334079801, "grad_norm": 0.61328125, "learning_rate": 0.0019844993396778833, "loss": 0.2193, "step": 6622 }, { "epoch": 0.011744899506107827, "grad_norm": 0.474609375, "learning_rate": 0.001984488338980705, "loss": 0.2799, "step": 6624 }, { "epoch": 0.011748445671417642, "grad_norm": 0.5859375, "learning_rate": 0.0019844773344152885, "loss": 0.2092, "step": 6626 }, { "epoch": 0.011751991836727456, "grad_norm": 0.419921875, "learning_rate": 0.0019844663259816816, "loss": 0.2723, "step": 6628 }, { "epoch": 0.011755538002037273, "grad_norm": 0.6328125, "learning_rate": 0.001984455313679933, "loss": 0.2936, "step": 6630 }, { "epoch": 0.011759084167347087, "grad_norm": 0.890625, "learning_rate": 0.0019844442975100905, "loss": 0.2109, "step": 6632 }, { "epoch": 0.011762630332656902, "grad_norm": 1.28125, "learning_rate": 0.0019844332774722026, "loss": 0.3474, "step": 6634 }, { "epoch": 0.011766176497966718, "grad_norm": 0.4296875, "learning_rate": 0.001984422253566317, "loss": 0.3077, "step": 6636 }, { "epoch": 0.011769722663276532, "grad_norm": 0.40234375, "learning_rate": 0.001984411225792482, "loss": 0.1891, "step": 6638 }, { "epoch": 0.011773268828586347, "grad_norm": 1.6015625, "learning_rate": 0.001984400194150747, "loss": 0.2661, "step": 6640 }, { "epoch": 0.011776814993896163, "grad_norm": 0.349609375, "learning_rate": 0.0019843891586411584, "loss": 0.2582, "step": 6642 }, { "epoch": 0.011780361159205978, "grad_norm": 0.48828125, "learning_rate": 0.0019843781192637657, "loss": 0.2815, "step": 6644 }, { "epoch": 0.011783907324515792, "grad_norm": 0.341796875, "learning_rate": 0.0019843670760186167, "loss": 0.2513, "step": 6646 }, { "epoch": 0.011787453489825609, "grad_norm": 1.671875, "learning_rate": 0.0019843560289057594, "loss": 0.3517, "step": 6648 }, { "epoch": 0.011790999655135423, "grad_norm": 1.0625, "learning_rate": 0.001984344977925243, "loss": 0.217, "step": 6650 }, { "epoch": 0.01179454582044524, "grad_norm": 0.255859375, "learning_rate": 0.001984333923077115, "loss": 0.2576, "step": 6652 }, { "epoch": 0.011798091985755054, "grad_norm": 0.625, "learning_rate": 0.001984322864361424, "loss": 0.2353, "step": 6654 }, { "epoch": 0.011801638151064869, "grad_norm": 0.408203125, "learning_rate": 0.0019843118017782192, "loss": 0.2349, "step": 6656 }, { "epoch": 0.011805184316374685, "grad_norm": 0.25390625, "learning_rate": 0.001984300735327548, "loss": 0.2386, "step": 6658 }, { "epoch": 0.0118087304816845, "grad_norm": 0.4140625, "learning_rate": 0.0019842896650094587, "loss": 0.2554, "step": 6660 }, { "epoch": 0.011812276646994314, "grad_norm": 0.255859375, "learning_rate": 0.0019842785908240003, "loss": 0.239, "step": 6662 }, { "epoch": 0.01181582281230413, "grad_norm": 0.28125, "learning_rate": 0.001984267512771221, "loss": 0.2206, "step": 6664 }, { "epoch": 0.011819368977613945, "grad_norm": 1.90625, "learning_rate": 0.001984256430851169, "loss": 0.3458, "step": 6666 }, { "epoch": 0.01182291514292376, "grad_norm": 0.259765625, "learning_rate": 0.0019842453450638936, "loss": 0.2103, "step": 6668 }, { "epoch": 0.011826461308233576, "grad_norm": 0.8984375, "learning_rate": 0.0019842342554094424, "loss": 0.3074, "step": 6670 }, { "epoch": 0.01183000747354339, "grad_norm": 0.7421875, "learning_rate": 0.001984223161887864, "loss": 0.2499, "step": 6672 }, { "epoch": 0.011833553638853205, "grad_norm": 0.86328125, "learning_rate": 0.001984212064499207, "loss": 0.271, "step": 6674 }, { "epoch": 0.011837099804163021, "grad_norm": 0.435546875, "learning_rate": 0.0019842009632435203, "loss": 0.4066, "step": 6676 }, { "epoch": 0.011840645969472836, "grad_norm": 0.443359375, "learning_rate": 0.0019841898581208525, "loss": 0.2566, "step": 6678 }, { "epoch": 0.01184419213478265, "grad_norm": 0.85546875, "learning_rate": 0.0019841787491312515, "loss": 0.265, "step": 6680 }, { "epoch": 0.011847738300092467, "grad_norm": 1.0390625, "learning_rate": 0.0019841676362747657, "loss": 0.2992, "step": 6682 }, { "epoch": 0.011851284465402281, "grad_norm": 0.54296875, "learning_rate": 0.001984156519551445, "loss": 0.3472, "step": 6684 }, { "epoch": 0.011854830630712097, "grad_norm": 0.396484375, "learning_rate": 0.001984145398961337, "loss": 0.2074, "step": 6686 }, { "epoch": 0.011858376796021912, "grad_norm": 0.265625, "learning_rate": 0.001984134274504491, "loss": 0.2638, "step": 6688 }, { "epoch": 0.011861922961331726, "grad_norm": 0.28125, "learning_rate": 0.001984123146180955, "loss": 0.2428, "step": 6690 }, { "epoch": 0.011865469126641543, "grad_norm": 0.38671875, "learning_rate": 0.0019841120139907774, "loss": 0.2761, "step": 6692 }, { "epoch": 0.011869015291951357, "grad_norm": 0.49609375, "learning_rate": 0.001984100877934008, "loss": 0.3499, "step": 6694 }, { "epoch": 0.011872561457261172, "grad_norm": 0.50390625, "learning_rate": 0.0019840897380106947, "loss": 0.2732, "step": 6696 }, { "epoch": 0.011876107622570988, "grad_norm": 0.8203125, "learning_rate": 0.0019840785942208867, "loss": 0.2414, "step": 6698 }, { "epoch": 0.011879653787880803, "grad_norm": 0.7734375, "learning_rate": 0.001984067446564632, "loss": 0.2762, "step": 6700 }, { "epoch": 0.011883199953190617, "grad_norm": 0.42578125, "learning_rate": 0.00198405629504198, "loss": 0.2606, "step": 6702 }, { "epoch": 0.011886746118500434, "grad_norm": 0.6875, "learning_rate": 0.0019840451396529795, "loss": 0.2388, "step": 6704 }, { "epoch": 0.011890292283810248, "grad_norm": 0.51953125, "learning_rate": 0.001984033980397679, "loss": 0.2446, "step": 6706 }, { "epoch": 0.011893838449120063, "grad_norm": 0.36328125, "learning_rate": 0.0019840228172761268, "loss": 0.3288, "step": 6708 }, { "epoch": 0.011897384614429879, "grad_norm": 0.330078125, "learning_rate": 0.001984011650288373, "loss": 0.1916, "step": 6710 }, { "epoch": 0.011900930779739693, "grad_norm": 0.4140625, "learning_rate": 0.0019840004794344653, "loss": 0.2786, "step": 6712 }, { "epoch": 0.011904476945049508, "grad_norm": 0.55078125, "learning_rate": 0.001983989304714453, "loss": 0.2285, "step": 6714 }, { "epoch": 0.011908023110359324, "grad_norm": 1.3125, "learning_rate": 0.0019839781261283855, "loss": 0.2872, "step": 6716 }, { "epoch": 0.011911569275669139, "grad_norm": 0.6328125, "learning_rate": 0.001983966943676311, "loss": 0.2707, "step": 6718 }, { "epoch": 0.011915115440978955, "grad_norm": 0.57421875, "learning_rate": 0.001983955757358278, "loss": 0.1967, "step": 6720 }, { "epoch": 0.01191866160628877, "grad_norm": 0.45703125, "learning_rate": 0.0019839445671743366, "loss": 0.3742, "step": 6722 }, { "epoch": 0.011922207771598584, "grad_norm": 0.7734375, "learning_rate": 0.001983933373124535, "loss": 0.2414, "step": 6724 }, { "epoch": 0.0119257539369084, "grad_norm": 0.4296875, "learning_rate": 0.001983922175208922, "loss": 0.2942, "step": 6726 }, { "epoch": 0.011929300102218215, "grad_norm": 0.734375, "learning_rate": 0.001983910973427547, "loss": 0.3007, "step": 6728 }, { "epoch": 0.01193284626752803, "grad_norm": 0.462890625, "learning_rate": 0.001983899767780459, "loss": 0.2479, "step": 6730 }, { "epoch": 0.011936392432837846, "grad_norm": 0.6484375, "learning_rate": 0.0019838885582677064, "loss": 0.2173, "step": 6732 }, { "epoch": 0.01193993859814766, "grad_norm": 0.671875, "learning_rate": 0.001983877344889339, "loss": 0.278, "step": 6734 }, { "epoch": 0.011943484763457475, "grad_norm": 0.330078125, "learning_rate": 0.0019838661276454055, "loss": 0.2704, "step": 6736 }, { "epoch": 0.011947030928767291, "grad_norm": 0.72265625, "learning_rate": 0.001983854906535955, "loss": 0.2654, "step": 6738 }, { "epoch": 0.011950577094077106, "grad_norm": 0.7109375, "learning_rate": 0.0019838436815610362, "loss": 0.2121, "step": 6740 }, { "epoch": 0.01195412325938692, "grad_norm": 2.09375, "learning_rate": 0.001983832452720699, "loss": 0.2342, "step": 6742 }, { "epoch": 0.011957669424696737, "grad_norm": 1.515625, "learning_rate": 0.0019838212200149917, "loss": 0.2736, "step": 6744 }, { "epoch": 0.011961215590006551, "grad_norm": 0.470703125, "learning_rate": 0.0019838099834439643, "loss": 0.2863, "step": 6746 }, { "epoch": 0.011964761755316366, "grad_norm": 0.4140625, "learning_rate": 0.0019837987430076647, "loss": 0.2547, "step": 6748 }, { "epoch": 0.011968307920626182, "grad_norm": 4.0, "learning_rate": 0.001983787498706143, "loss": 0.3155, "step": 6750 }, { "epoch": 0.011971854085935997, "grad_norm": 0.80859375, "learning_rate": 0.0019837762505394486, "loss": 0.2375, "step": 6752 }, { "epoch": 0.011975400251245813, "grad_norm": 1.0390625, "learning_rate": 0.0019837649985076297, "loss": 0.3251, "step": 6754 }, { "epoch": 0.011978946416555628, "grad_norm": 0.447265625, "learning_rate": 0.0019837537426107364, "loss": 0.273, "step": 6756 }, { "epoch": 0.011982492581865442, "grad_norm": 0.28125, "learning_rate": 0.001983742482848817, "loss": 0.4972, "step": 6758 }, { "epoch": 0.011986038747175258, "grad_norm": 0.314453125, "learning_rate": 0.0019837312192219217, "loss": 0.3103, "step": 6760 }, { "epoch": 0.011989584912485073, "grad_norm": 1.1171875, "learning_rate": 0.0019837199517301, "loss": 0.2812, "step": 6762 }, { "epoch": 0.011993131077794887, "grad_norm": 0.796875, "learning_rate": 0.0019837086803734, "loss": 0.2744, "step": 6764 }, { "epoch": 0.011996677243104704, "grad_norm": 0.494140625, "learning_rate": 0.0019836974051518712, "loss": 0.2333, "step": 6766 }, { "epoch": 0.012000223408414518, "grad_norm": 0.3984375, "learning_rate": 0.0019836861260655635, "loss": 0.3304, "step": 6768 }, { "epoch": 0.012003769573724333, "grad_norm": 0.2470703125, "learning_rate": 0.001983674843114526, "loss": 0.242, "step": 6770 }, { "epoch": 0.01200731573903415, "grad_norm": 0.359375, "learning_rate": 0.0019836635562988083, "loss": 0.1962, "step": 6772 }, { "epoch": 0.012010861904343964, "grad_norm": 0.5234375, "learning_rate": 0.001983652265618459, "loss": 0.2323, "step": 6774 }, { "epoch": 0.012014408069653778, "grad_norm": 0.31640625, "learning_rate": 0.0019836409710735285, "loss": 0.2398, "step": 6776 }, { "epoch": 0.012017954234963595, "grad_norm": 0.3828125, "learning_rate": 0.0019836296726640653, "loss": 0.2744, "step": 6778 }, { "epoch": 0.012021500400273409, "grad_norm": 0.83984375, "learning_rate": 0.001983618370390119, "loss": 0.2671, "step": 6780 }, { "epoch": 0.012025046565583224, "grad_norm": 1.3359375, "learning_rate": 0.00198360706425174, "loss": 0.5433, "step": 6782 }, { "epoch": 0.01202859273089304, "grad_norm": 3.03125, "learning_rate": 0.0019835957542489765, "loss": 0.4246, "step": 6784 }, { "epoch": 0.012032138896202854, "grad_norm": 0.5234375, "learning_rate": 0.001983584440381878, "loss": 0.3178, "step": 6786 }, { "epoch": 0.01203568506151267, "grad_norm": 0.3828125, "learning_rate": 0.0019835731226504954, "loss": 0.2733, "step": 6788 }, { "epoch": 0.012039231226822485, "grad_norm": 0.8515625, "learning_rate": 0.0019835618010548765, "loss": 0.3158, "step": 6790 }, { "epoch": 0.0120427773921323, "grad_norm": 0.609375, "learning_rate": 0.0019835504755950717, "loss": 0.2384, "step": 6792 }, { "epoch": 0.012046323557442116, "grad_norm": 0.5859375, "learning_rate": 0.00198353914627113, "loss": 0.2731, "step": 6794 }, { "epoch": 0.01204986972275193, "grad_norm": 0.2490234375, "learning_rate": 0.0019835278130831014, "loss": 0.1919, "step": 6796 }, { "epoch": 0.012053415888061745, "grad_norm": 0.251953125, "learning_rate": 0.0019835164760310356, "loss": 0.2322, "step": 6798 }, { "epoch": 0.012056962053371562, "grad_norm": 0.4140625, "learning_rate": 0.001983505135114982, "loss": 0.2973, "step": 6800 }, { "epoch": 0.012060508218681376, "grad_norm": 0.345703125, "learning_rate": 0.00198349379033499, "loss": 0.2675, "step": 6802 }, { "epoch": 0.01206405438399119, "grad_norm": 0.765625, "learning_rate": 0.00198348244169111, "loss": 0.2733, "step": 6804 }, { "epoch": 0.012067600549301007, "grad_norm": 0.33984375, "learning_rate": 0.00198347108918339, "loss": 0.2579, "step": 6806 }, { "epoch": 0.012071146714610822, "grad_norm": 0.32421875, "learning_rate": 0.001983459732811881, "loss": 0.316, "step": 6808 }, { "epoch": 0.012074692879920636, "grad_norm": 0.3046875, "learning_rate": 0.0019834483725766324, "loss": 0.2426, "step": 6810 }, { "epoch": 0.012078239045230452, "grad_norm": 0.609375, "learning_rate": 0.0019834370084776936, "loss": 0.3107, "step": 6812 }, { "epoch": 0.012081785210540267, "grad_norm": 0.462890625, "learning_rate": 0.001983425640515115, "loss": 0.1981, "step": 6814 }, { "epoch": 0.012085331375850081, "grad_norm": 0.6328125, "learning_rate": 0.001983414268688945, "loss": 0.27, "step": 6816 }, { "epoch": 0.012088877541159898, "grad_norm": 0.71875, "learning_rate": 0.0019834028929992344, "loss": 0.2811, "step": 6818 }, { "epoch": 0.012092423706469712, "grad_norm": 0.89453125, "learning_rate": 0.001983391513446033, "loss": 0.2405, "step": 6820 }, { "epoch": 0.012095969871779529, "grad_norm": 0.44921875, "learning_rate": 0.00198338013002939, "loss": 0.2651, "step": 6822 }, { "epoch": 0.012099516037089343, "grad_norm": 0.52734375, "learning_rate": 0.0019833687427493556, "loss": 0.3148, "step": 6824 }, { "epoch": 0.012103062202399158, "grad_norm": 0.87109375, "learning_rate": 0.0019833573516059794, "loss": 0.264, "step": 6826 }, { "epoch": 0.012106608367708974, "grad_norm": 0.3359375, "learning_rate": 0.001983345956599311, "loss": 0.2805, "step": 6828 }, { "epoch": 0.012110154533018789, "grad_norm": 1.21875, "learning_rate": 0.0019833345577294006, "loss": 0.3786, "step": 6830 }, { "epoch": 0.012113700698328603, "grad_norm": 0.2158203125, "learning_rate": 0.001983323154996298, "loss": 0.2356, "step": 6832 }, { "epoch": 0.01211724686363842, "grad_norm": 0.357421875, "learning_rate": 0.0019833117484000535, "loss": 0.2613, "step": 6834 }, { "epoch": 0.012120793028948234, "grad_norm": 0.4140625, "learning_rate": 0.001983300337940716, "loss": 0.2556, "step": 6836 }, { "epoch": 0.012124339194258048, "grad_norm": 6.59375, "learning_rate": 0.0019832889236183356, "loss": 0.3318, "step": 6838 }, { "epoch": 0.012127885359567865, "grad_norm": 0.9765625, "learning_rate": 0.001983277505432963, "loss": 0.1884, "step": 6840 }, { "epoch": 0.01213143152487768, "grad_norm": 0.1826171875, "learning_rate": 0.0019832660833846473, "loss": 0.2034, "step": 6842 }, { "epoch": 0.012134977690187494, "grad_norm": 0.58203125, "learning_rate": 0.0019832546574734397, "loss": 0.2472, "step": 6844 }, { "epoch": 0.01213852385549731, "grad_norm": 0.2421875, "learning_rate": 0.0019832432276993884, "loss": 0.2302, "step": 6846 }, { "epoch": 0.012142070020807125, "grad_norm": 0.392578125, "learning_rate": 0.0019832317940625447, "loss": 0.2369, "step": 6848 }, { "epoch": 0.01214561618611694, "grad_norm": 0.484375, "learning_rate": 0.001983220356562958, "loss": 0.3195, "step": 6850 }, { "epoch": 0.012149162351426756, "grad_norm": 0.380859375, "learning_rate": 0.0019832089152006785, "loss": 0.2562, "step": 6852 }, { "epoch": 0.01215270851673657, "grad_norm": 0.58984375, "learning_rate": 0.001983197469975756, "loss": 0.2764, "step": 6854 }, { "epoch": 0.012156254682046386, "grad_norm": 0.337890625, "learning_rate": 0.001983186020888241, "loss": 0.2632, "step": 6856 }, { "epoch": 0.012159800847356201, "grad_norm": 0.46875, "learning_rate": 0.0019831745679381833, "loss": 0.2197, "step": 6858 }, { "epoch": 0.012163347012666016, "grad_norm": 1.421875, "learning_rate": 0.001983163111125633, "loss": 0.2786, "step": 6860 }, { "epoch": 0.012166893177975832, "grad_norm": 0.68359375, "learning_rate": 0.0019831516504506407, "loss": 0.2758, "step": 6862 }, { "epoch": 0.012170439343285646, "grad_norm": 0.56640625, "learning_rate": 0.0019831401859132553, "loss": 0.2994, "step": 6864 }, { "epoch": 0.012173985508595461, "grad_norm": 0.353515625, "learning_rate": 0.0019831287175135284, "loss": 0.2428, "step": 6866 }, { "epoch": 0.012177531673905277, "grad_norm": 0.55859375, "learning_rate": 0.001983117245251509, "loss": 0.2468, "step": 6868 }, { "epoch": 0.012181077839215092, "grad_norm": 0.439453125, "learning_rate": 0.001983105769127248, "loss": 0.2521, "step": 6870 }, { "epoch": 0.012184624004524906, "grad_norm": 0.251953125, "learning_rate": 0.001983094289140796, "loss": 0.2257, "step": 6872 }, { "epoch": 0.012188170169834723, "grad_norm": 0.93359375, "learning_rate": 0.0019830828052922016, "loss": 0.224, "step": 6874 }, { "epoch": 0.012191716335144537, "grad_norm": 0.70703125, "learning_rate": 0.001983071317581516, "loss": 0.2698, "step": 6876 }, { "epoch": 0.012195262500454352, "grad_norm": 0.38671875, "learning_rate": 0.0019830598260087897, "loss": 0.3066, "step": 6878 }, { "epoch": 0.012198808665764168, "grad_norm": 0.3828125, "learning_rate": 0.0019830483305740727, "loss": 0.2594, "step": 6880 }, { "epoch": 0.012202354831073983, "grad_norm": 0.66796875, "learning_rate": 0.001983036831277415, "loss": 0.2496, "step": 6882 }, { "epoch": 0.012205900996383797, "grad_norm": 0.2236328125, "learning_rate": 0.0019830253281188674, "loss": 0.1695, "step": 6884 }, { "epoch": 0.012209447161693613, "grad_norm": 0.25, "learning_rate": 0.0019830138210984796, "loss": 0.2206, "step": 6886 }, { "epoch": 0.012212993327003428, "grad_norm": 0.71875, "learning_rate": 0.0019830023102163025, "loss": 0.277, "step": 6888 }, { "epoch": 0.012216539492313244, "grad_norm": 2.328125, "learning_rate": 0.0019829907954723863, "loss": 0.3232, "step": 6890 }, { "epoch": 0.012220085657623059, "grad_norm": 0.5703125, "learning_rate": 0.001982979276866781, "loss": 0.1966, "step": 6892 }, { "epoch": 0.012223631822932873, "grad_norm": 0.30859375, "learning_rate": 0.0019829677543995376, "loss": 0.2742, "step": 6894 }, { "epoch": 0.01222717798824269, "grad_norm": 0.369140625, "learning_rate": 0.0019829562280707057, "loss": 0.3344, "step": 6896 }, { "epoch": 0.012230724153552504, "grad_norm": 0.3359375, "learning_rate": 0.0019829446978803364, "loss": 0.2517, "step": 6898 }, { "epoch": 0.012234270318862319, "grad_norm": 11.25, "learning_rate": 0.00198293316382848, "loss": 0.3401, "step": 6900 }, { "epoch": 0.012237816484172135, "grad_norm": 0.318359375, "learning_rate": 0.0019829216259151863, "loss": 0.1887, "step": 6902 }, { "epoch": 0.01224136264948195, "grad_norm": 0.37109375, "learning_rate": 0.0019829100841405067, "loss": 0.1818, "step": 6904 }, { "epoch": 0.012244908814791764, "grad_norm": 4.0, "learning_rate": 0.0019828985385044913, "loss": 0.3568, "step": 6906 }, { "epoch": 0.01224845498010158, "grad_norm": 0.21875, "learning_rate": 0.00198288698900719, "loss": 0.3633, "step": 6908 }, { "epoch": 0.012252001145411395, "grad_norm": 1.4140625, "learning_rate": 0.0019828754356486546, "loss": 0.4123, "step": 6910 }, { "epoch": 0.01225554731072121, "grad_norm": 0.2041015625, "learning_rate": 0.0019828638784289347, "loss": 0.2238, "step": 6912 }, { "epoch": 0.012259093476031026, "grad_norm": 1.0546875, "learning_rate": 0.0019828523173480808, "loss": 0.2899, "step": 6914 }, { "epoch": 0.01226263964134084, "grad_norm": 0.427734375, "learning_rate": 0.0019828407524061435, "loss": 0.2454, "step": 6916 }, { "epoch": 0.012266185806650655, "grad_norm": 0.49609375, "learning_rate": 0.0019828291836031737, "loss": 0.2928, "step": 6918 }, { "epoch": 0.012269731971960471, "grad_norm": 0.25390625, "learning_rate": 0.001982817610939222, "loss": 0.2622, "step": 6920 }, { "epoch": 0.012273278137270286, "grad_norm": 0.67578125, "learning_rate": 0.0019828060344143387, "loss": 0.3946, "step": 6922 }, { "epoch": 0.012276824302580102, "grad_norm": 0.56640625, "learning_rate": 0.0019827944540285747, "loss": 0.215, "step": 6924 }, { "epoch": 0.012280370467889917, "grad_norm": 0.87109375, "learning_rate": 0.0019827828697819806, "loss": 0.4017, "step": 6926 }, { "epoch": 0.012283916633199731, "grad_norm": 0.765625, "learning_rate": 0.0019827712816746067, "loss": 0.3001, "step": 6928 }, { "epoch": 0.012287462798509547, "grad_norm": 0.314453125, "learning_rate": 0.0019827596897065048, "loss": 0.2846, "step": 6930 }, { "epoch": 0.012291008963819362, "grad_norm": 0.83203125, "learning_rate": 0.0019827480938777236, "loss": 0.3703, "step": 6932 }, { "epoch": 0.012294555129129177, "grad_norm": 0.64453125, "learning_rate": 0.001982736494188316, "loss": 0.2309, "step": 6934 }, { "epoch": 0.012298101294438993, "grad_norm": 1.7109375, "learning_rate": 0.0019827248906383317, "loss": 0.3196, "step": 6936 }, { "epoch": 0.012301647459748807, "grad_norm": 0.2177734375, "learning_rate": 0.0019827132832278206, "loss": 0.291, "step": 6938 }, { "epoch": 0.012305193625058622, "grad_norm": 0.322265625, "learning_rate": 0.001982701671956835, "loss": 0.2409, "step": 6940 }, { "epoch": 0.012308739790368438, "grad_norm": 0.55078125, "learning_rate": 0.001982690056825425, "loss": 0.2597, "step": 6942 }, { "epoch": 0.012312285955678253, "grad_norm": 0.27734375, "learning_rate": 0.001982678437833641, "loss": 0.2432, "step": 6944 }, { "epoch": 0.012315832120988067, "grad_norm": 0.361328125, "learning_rate": 0.0019826668149815346, "loss": 0.2968, "step": 6946 }, { "epoch": 0.012319378286297884, "grad_norm": 0.380859375, "learning_rate": 0.001982655188269156, "loss": 0.2664, "step": 6948 }, { "epoch": 0.012322924451607698, "grad_norm": 0.27734375, "learning_rate": 0.001982643557696557, "loss": 0.2904, "step": 6950 }, { "epoch": 0.012326470616917513, "grad_norm": 0.2255859375, "learning_rate": 0.0019826319232637874, "loss": 0.2259, "step": 6952 }, { "epoch": 0.012330016782227329, "grad_norm": 0.365234375, "learning_rate": 0.001982620284970898, "loss": 0.3693, "step": 6954 }, { "epoch": 0.012333562947537144, "grad_norm": 0.6015625, "learning_rate": 0.0019826086428179407, "loss": 0.1981, "step": 6956 }, { "epoch": 0.01233710911284696, "grad_norm": 1.9375, "learning_rate": 0.001982596996804966, "loss": 0.3156, "step": 6958 }, { "epoch": 0.012340655278156774, "grad_norm": 0.294921875, "learning_rate": 0.001982585346932024, "loss": 0.2237, "step": 6960 }, { "epoch": 0.012344201443466589, "grad_norm": 0.4609375, "learning_rate": 0.001982573693199167, "loss": 0.2888, "step": 6962 }, { "epoch": 0.012347747608776405, "grad_norm": 0.3515625, "learning_rate": 0.001982562035606445, "loss": 0.2527, "step": 6964 }, { "epoch": 0.01235129377408622, "grad_norm": 0.287109375, "learning_rate": 0.0019825503741539097, "loss": 0.2258, "step": 6966 }, { "epoch": 0.012354839939396034, "grad_norm": 0.29296875, "learning_rate": 0.0019825387088416115, "loss": 0.2394, "step": 6968 }, { "epoch": 0.01235838610470585, "grad_norm": 0.3828125, "learning_rate": 0.0019825270396696014, "loss": 0.2348, "step": 6970 }, { "epoch": 0.012361932270015665, "grad_norm": 0.48828125, "learning_rate": 0.0019825153666379303, "loss": 0.2296, "step": 6972 }, { "epoch": 0.01236547843532548, "grad_norm": 0.32421875, "learning_rate": 0.0019825036897466505, "loss": 0.1985, "step": 6974 }, { "epoch": 0.012369024600635296, "grad_norm": 0.3203125, "learning_rate": 0.0019824920089958117, "loss": 0.269, "step": 6976 }, { "epoch": 0.01237257076594511, "grad_norm": 0.224609375, "learning_rate": 0.0019824803243854655, "loss": 0.2197, "step": 6978 }, { "epoch": 0.012376116931254925, "grad_norm": 2.546875, "learning_rate": 0.0019824686359156633, "loss": 0.4333, "step": 6980 }, { "epoch": 0.012379663096564741, "grad_norm": 1.1484375, "learning_rate": 0.0019824569435864556, "loss": 0.2689, "step": 6982 }, { "epoch": 0.012383209261874556, "grad_norm": 0.1953125, "learning_rate": 0.001982445247397894, "loss": 0.2146, "step": 6984 }, { "epoch": 0.01238675542718437, "grad_norm": 0.97265625, "learning_rate": 0.001982433547350029, "loss": 0.1743, "step": 6986 }, { "epoch": 0.012390301592494187, "grad_norm": 0.49609375, "learning_rate": 0.0019824218434429126, "loss": 0.2409, "step": 6988 }, { "epoch": 0.012393847757804001, "grad_norm": 0.1650390625, "learning_rate": 0.0019824101356765954, "loss": 0.2394, "step": 6990 }, { "epoch": 0.012397393923113818, "grad_norm": 0.3828125, "learning_rate": 0.001982398424051129, "loss": 0.2411, "step": 6992 }, { "epoch": 0.012400940088423632, "grad_norm": 0.369140625, "learning_rate": 0.001982386708566565, "loss": 0.2816, "step": 6994 }, { "epoch": 0.012404486253733447, "grad_norm": 0.2421875, "learning_rate": 0.0019823749892229534, "loss": 0.2394, "step": 6996 }, { "epoch": 0.012408032419043263, "grad_norm": 0.4375, "learning_rate": 0.001982363266020346, "loss": 0.2617, "step": 6998 }, { "epoch": 0.012411578584353078, "grad_norm": 0.6171875, "learning_rate": 0.0019823515389587945, "loss": 0.2929, "step": 7000 }, { "epoch": 0.012415124749662892, "grad_norm": 0.65625, "learning_rate": 0.0019823398080383503, "loss": 0.4538, "step": 7002 }, { "epoch": 0.012418670914972708, "grad_norm": 8.125, "learning_rate": 0.001982328073259064, "loss": 0.2788, "step": 7004 }, { "epoch": 0.012422217080282523, "grad_norm": 1.9296875, "learning_rate": 0.0019823163346209868, "loss": 0.3532, "step": 7006 }, { "epoch": 0.012425763245592338, "grad_norm": 0.6875, "learning_rate": 0.0019823045921241707, "loss": 0.2222, "step": 7008 }, { "epoch": 0.012429309410902154, "grad_norm": 0.2138671875, "learning_rate": 0.001982292845768667, "loss": 0.2737, "step": 7010 }, { "epoch": 0.012432855576211968, "grad_norm": 0.2734375, "learning_rate": 0.0019822810955545268, "loss": 0.3135, "step": 7012 }, { "epoch": 0.012436401741521783, "grad_norm": 0.365234375, "learning_rate": 0.0019822693414818016, "loss": 0.2874, "step": 7014 }, { "epoch": 0.0124399479068316, "grad_norm": 0.34375, "learning_rate": 0.001982257583550543, "loss": 0.2411, "step": 7016 }, { "epoch": 0.012443494072141414, "grad_norm": 0.26953125, "learning_rate": 0.001982245821760802, "loss": 0.2644, "step": 7018 }, { "epoch": 0.012447040237451228, "grad_norm": 0.72265625, "learning_rate": 0.00198223405611263, "loss": 0.2268, "step": 7020 }, { "epoch": 0.012450586402761045, "grad_norm": 0.6640625, "learning_rate": 0.0019822222866060788, "loss": 0.254, "step": 7022 }, { "epoch": 0.01245413256807086, "grad_norm": 0.251953125, "learning_rate": 0.0019822105132412, "loss": 0.287, "step": 7024 }, { "epoch": 0.012457678733380675, "grad_norm": 1.8515625, "learning_rate": 0.001982198736018045, "loss": 0.2902, "step": 7026 }, { "epoch": 0.01246122489869049, "grad_norm": 0.71484375, "learning_rate": 0.001982186954936665, "loss": 0.5944, "step": 7028 }, { "epoch": 0.012464771064000305, "grad_norm": 2.28125, "learning_rate": 0.001982175169997111, "loss": 0.4291, "step": 7030 }, { "epoch": 0.01246831722931012, "grad_norm": 2.328125, "learning_rate": 0.001982163381199436, "loss": 0.3188, "step": 7032 }, { "epoch": 0.012471863394619935, "grad_norm": 0.31640625, "learning_rate": 0.001982151588543691, "loss": 0.2657, "step": 7034 }, { "epoch": 0.01247540955992975, "grad_norm": 0.52734375, "learning_rate": 0.001982139792029927, "loss": 0.262, "step": 7036 }, { "epoch": 0.012478955725239566, "grad_norm": 0.47265625, "learning_rate": 0.001982127991658196, "loss": 0.2731, "step": 7038 }, { "epoch": 0.01248250189054938, "grad_norm": 0.23046875, "learning_rate": 0.0019821161874285496, "loss": 0.208, "step": 7040 }, { "epoch": 0.012486048055859195, "grad_norm": 0.56640625, "learning_rate": 0.001982104379341039, "loss": 0.3549, "step": 7042 }, { "epoch": 0.012489594221169012, "grad_norm": 0.4921875, "learning_rate": 0.0019820925673957168, "loss": 0.2285, "step": 7044 }, { "epoch": 0.012493140386478826, "grad_norm": 0.306640625, "learning_rate": 0.0019820807515926334, "loss": 0.2439, "step": 7046 }, { "epoch": 0.01249668655178864, "grad_norm": 0.419921875, "learning_rate": 0.0019820689319318416, "loss": 0.2338, "step": 7048 }, { "epoch": 0.012500232717098457, "grad_norm": 0.44140625, "learning_rate": 0.001982057108413393, "loss": 0.4401, "step": 7050 }, { "epoch": 0.012503778882408272, "grad_norm": 0.484375, "learning_rate": 0.001982045281037339, "loss": 0.2782, "step": 7052 }, { "epoch": 0.012507325047718086, "grad_norm": 0.5390625, "learning_rate": 0.0019820334498037305, "loss": 0.2343, "step": 7054 }, { "epoch": 0.012510871213027902, "grad_norm": 0.408203125, "learning_rate": 0.0019820216147126207, "loss": 0.2572, "step": 7056 }, { "epoch": 0.012514417378337717, "grad_norm": 0.302734375, "learning_rate": 0.0019820097757640605, "loss": 0.2594, "step": 7058 }, { "epoch": 0.012517963543647533, "grad_norm": 0.5859375, "learning_rate": 0.0019819979329581015, "loss": 0.2041, "step": 7060 }, { "epoch": 0.012521509708957348, "grad_norm": 0.369140625, "learning_rate": 0.0019819860862947966, "loss": 0.2931, "step": 7062 }, { "epoch": 0.012525055874267162, "grad_norm": 0.41015625, "learning_rate": 0.0019819742357741962, "loss": 0.2811, "step": 7064 }, { "epoch": 0.012528602039576979, "grad_norm": 1.0546875, "learning_rate": 0.001981962381396353, "loss": 0.3539, "step": 7066 }, { "epoch": 0.012532148204886793, "grad_norm": 0.2734375, "learning_rate": 0.0019819505231613186, "loss": 0.2371, "step": 7068 }, { "epoch": 0.012535694370196608, "grad_norm": 0.267578125, "learning_rate": 0.001981938661069145, "loss": 0.2808, "step": 7070 }, { "epoch": 0.012539240535506424, "grad_norm": 0.5390625, "learning_rate": 0.001981926795119884, "loss": 0.4738, "step": 7072 }, { "epoch": 0.012542786700816239, "grad_norm": 0.2734375, "learning_rate": 0.001981914925313588, "loss": 0.3067, "step": 7074 }, { "epoch": 0.012546332866126053, "grad_norm": 0.734375, "learning_rate": 0.001981903051650308, "loss": 0.4865, "step": 7076 }, { "epoch": 0.01254987903143587, "grad_norm": 0.32421875, "learning_rate": 0.001981891174130096, "loss": 0.2807, "step": 7078 }, { "epoch": 0.012553425196745684, "grad_norm": 0.51171875, "learning_rate": 0.0019818792927530043, "loss": 0.2514, "step": 7080 }, { "epoch": 0.012556971362055499, "grad_norm": 0.390625, "learning_rate": 0.0019818674075190853, "loss": 0.2599, "step": 7082 }, { "epoch": 0.012560517527365315, "grad_norm": 0.490234375, "learning_rate": 0.0019818555184283903, "loss": 0.2465, "step": 7084 }, { "epoch": 0.01256406369267513, "grad_norm": 0.99609375, "learning_rate": 0.0019818436254809713, "loss": 0.293, "step": 7086 }, { "epoch": 0.012567609857984944, "grad_norm": 0.3828125, "learning_rate": 0.0019818317286768804, "loss": 0.3002, "step": 7088 }, { "epoch": 0.01257115602329476, "grad_norm": 0.28515625, "learning_rate": 0.0019818198280161705, "loss": 0.3467, "step": 7090 }, { "epoch": 0.012574702188604575, "grad_norm": 0.353515625, "learning_rate": 0.0019818079234988923, "loss": 0.2387, "step": 7092 }, { "epoch": 0.012578248353914391, "grad_norm": 13.4375, "learning_rate": 0.0019817960151250983, "loss": 0.445, "step": 7094 }, { "epoch": 0.012581794519224206, "grad_norm": 0.328125, "learning_rate": 0.0019817841028948414, "loss": 0.2352, "step": 7096 }, { "epoch": 0.01258534068453402, "grad_norm": 0.2314453125, "learning_rate": 0.0019817721868081724, "loss": 0.1747, "step": 7098 }, { "epoch": 0.012588886849843836, "grad_norm": 0.263671875, "learning_rate": 0.001981760266865144, "loss": 0.2832, "step": 7100 }, { "epoch": 0.012592433015153651, "grad_norm": 0.30078125, "learning_rate": 0.001981748343065809, "loss": 0.2656, "step": 7102 }, { "epoch": 0.012595979180463466, "grad_norm": 0.69140625, "learning_rate": 0.0019817364154102184, "loss": 0.2461, "step": 7104 }, { "epoch": 0.012599525345773282, "grad_norm": 0.54296875, "learning_rate": 0.001981724483898425, "loss": 0.2458, "step": 7106 }, { "epoch": 0.012603071511083096, "grad_norm": 3.265625, "learning_rate": 0.001981712548530481, "loss": 0.5001, "step": 7108 }, { "epoch": 0.012606617676392911, "grad_norm": 0.470703125, "learning_rate": 0.0019817006093064385, "loss": 0.3432, "step": 7110 }, { "epoch": 0.012610163841702727, "grad_norm": 1.265625, "learning_rate": 0.0019816886662263494, "loss": 0.2609, "step": 7112 }, { "epoch": 0.012613710007012542, "grad_norm": 0.30078125, "learning_rate": 0.001981676719290267, "loss": 0.2565, "step": 7114 }, { "epoch": 0.012617256172322356, "grad_norm": 0.66796875, "learning_rate": 0.001981664768498242, "loss": 0.2765, "step": 7116 }, { "epoch": 0.012620802337632173, "grad_norm": 0.5234375, "learning_rate": 0.0019816528138503274, "loss": 0.2701, "step": 7118 }, { "epoch": 0.012624348502941987, "grad_norm": 0.6875, "learning_rate": 0.001981640855346576, "loss": 0.2449, "step": 7120 }, { "epoch": 0.012627894668251802, "grad_norm": 0.205078125, "learning_rate": 0.0019816288929870394, "loss": 0.2288, "step": 7122 }, { "epoch": 0.012631440833561618, "grad_norm": 0.52734375, "learning_rate": 0.0019816169267717703, "loss": 0.2604, "step": 7124 }, { "epoch": 0.012634986998871433, "grad_norm": 0.625, "learning_rate": 0.0019816049567008202, "loss": 0.2875, "step": 7126 }, { "epoch": 0.012638533164181249, "grad_norm": 0.609375, "learning_rate": 0.0019815929827742425, "loss": 0.5593, "step": 7128 }, { "epoch": 0.012642079329491063, "grad_norm": 0.240234375, "learning_rate": 0.0019815810049920897, "loss": 0.1914, "step": 7130 }, { "epoch": 0.012645625494800878, "grad_norm": 0.25, "learning_rate": 0.0019815690233544133, "loss": 0.2331, "step": 7132 }, { "epoch": 0.012649171660110694, "grad_norm": 0.380859375, "learning_rate": 0.001981557037861266, "loss": 0.3437, "step": 7134 }, { "epoch": 0.012652717825420509, "grad_norm": 0.32421875, "learning_rate": 0.0019815450485127, "loss": 0.2202, "step": 7136 }, { "epoch": 0.012656263990730323, "grad_norm": 0.50390625, "learning_rate": 0.0019815330553087686, "loss": 0.2587, "step": 7138 }, { "epoch": 0.01265981015604014, "grad_norm": 0.52734375, "learning_rate": 0.0019815210582495232, "loss": 0.2261, "step": 7140 }, { "epoch": 0.012663356321349954, "grad_norm": 0.87890625, "learning_rate": 0.001981509057335017, "loss": 0.383, "step": 7142 }, { "epoch": 0.012666902486659769, "grad_norm": 0.58984375, "learning_rate": 0.001981497052565302, "loss": 0.2801, "step": 7144 }, { "epoch": 0.012670448651969585, "grad_norm": 0.40625, "learning_rate": 0.001981485043940431, "loss": 0.2346, "step": 7146 }, { "epoch": 0.0126739948172794, "grad_norm": 0.85546875, "learning_rate": 0.0019814730314604567, "loss": 0.3033, "step": 7148 }, { "epoch": 0.012677540982589214, "grad_norm": 0.43359375, "learning_rate": 0.001981461015125431, "loss": 0.2223, "step": 7150 }, { "epoch": 0.01268108714789903, "grad_norm": 0.7734375, "learning_rate": 0.0019814489949354073, "loss": 0.2798, "step": 7152 }, { "epoch": 0.012684633313208845, "grad_norm": 0.421875, "learning_rate": 0.0019814369708904375, "loss": 0.2874, "step": 7154 }, { "epoch": 0.01268817947851866, "grad_norm": 0.2275390625, "learning_rate": 0.0019814249429905744, "loss": 0.2074, "step": 7156 }, { "epoch": 0.012691725643828476, "grad_norm": 0.349609375, "learning_rate": 0.0019814129112358703, "loss": 0.3419, "step": 7158 }, { "epoch": 0.01269527180913829, "grad_norm": 0.2734375, "learning_rate": 0.001981400875626378, "loss": 0.2377, "step": 7160 }, { "epoch": 0.012698817974448107, "grad_norm": 0.40234375, "learning_rate": 0.001981388836162151, "loss": 0.256, "step": 7162 }, { "epoch": 0.012702364139757921, "grad_norm": 0.6484375, "learning_rate": 0.0019813767928432407, "loss": 0.2384, "step": 7164 }, { "epoch": 0.012705910305067736, "grad_norm": 0.486328125, "learning_rate": 0.0019813647456697002, "loss": 0.2071, "step": 7166 }, { "epoch": 0.012709456470377552, "grad_norm": 0.55859375, "learning_rate": 0.0019813526946415826, "loss": 0.2576, "step": 7168 }, { "epoch": 0.012713002635687367, "grad_norm": 2.234375, "learning_rate": 0.00198134063975894, "loss": 0.5217, "step": 7170 }, { "epoch": 0.012716548800997181, "grad_norm": 0.28125, "learning_rate": 0.0019813285810218254, "loss": 0.2629, "step": 7172 }, { "epoch": 0.012720094966306997, "grad_norm": 0.337890625, "learning_rate": 0.0019813165184302916, "loss": 0.2286, "step": 7174 }, { "epoch": 0.012723641131616812, "grad_norm": 1.375, "learning_rate": 0.0019813044519843915, "loss": 0.2999, "step": 7176 }, { "epoch": 0.012727187296926627, "grad_norm": 0.6875, "learning_rate": 0.0019812923816841773, "loss": 0.2969, "step": 7178 }, { "epoch": 0.012730733462236443, "grad_norm": 0.416015625, "learning_rate": 0.001981280307529702, "loss": 0.2352, "step": 7180 }, { "epoch": 0.012734279627546257, "grad_norm": 0.3984375, "learning_rate": 0.001981268229521019, "loss": 0.329, "step": 7182 }, { "epoch": 0.012737825792856072, "grad_norm": 0.369140625, "learning_rate": 0.0019812561476581806, "loss": 0.229, "step": 7184 }, { "epoch": 0.012741371958165888, "grad_norm": 1.1328125, "learning_rate": 0.001981244061941239, "loss": 0.2523, "step": 7186 }, { "epoch": 0.012744918123475703, "grad_norm": 2.265625, "learning_rate": 0.001981231972370249, "loss": 0.3817, "step": 7188 }, { "epoch": 0.012748464288785517, "grad_norm": 0.4140625, "learning_rate": 0.0019812198789452614, "loss": 0.2409, "step": 7190 }, { "epoch": 0.012752010454095334, "grad_norm": 0.6796875, "learning_rate": 0.00198120778166633, "loss": 0.3105, "step": 7192 }, { "epoch": 0.012755556619405148, "grad_norm": 0.251953125, "learning_rate": 0.0019811956805335074, "loss": 0.268, "step": 7194 }, { "epoch": 0.012759102784714964, "grad_norm": 0.40625, "learning_rate": 0.0019811835755468472, "loss": 0.2836, "step": 7196 }, { "epoch": 0.012762648950024779, "grad_norm": 0.310546875, "learning_rate": 0.0019811714667064017, "loss": 0.2399, "step": 7198 }, { "epoch": 0.012766195115334594, "grad_norm": 0.33203125, "learning_rate": 0.001981159354012224, "loss": 0.2313, "step": 7200 }, { "epoch": 0.01276974128064441, "grad_norm": 0.3046875, "learning_rate": 0.0019811472374643676, "loss": 0.3612, "step": 7202 }, { "epoch": 0.012773287445954224, "grad_norm": 0.466796875, "learning_rate": 0.0019811351170628847, "loss": 0.2715, "step": 7204 }, { "epoch": 0.012776833611264039, "grad_norm": 0.57421875, "learning_rate": 0.0019811229928078287, "loss": 0.2722, "step": 7206 }, { "epoch": 0.012780379776573855, "grad_norm": 0.3984375, "learning_rate": 0.001981110864699252, "loss": 0.249, "step": 7208 }, { "epoch": 0.01278392594188367, "grad_norm": 0.255859375, "learning_rate": 0.0019810987327372087, "loss": 0.3748, "step": 7210 }, { "epoch": 0.012787472107193484, "grad_norm": 1.5703125, "learning_rate": 0.001981086596921751, "loss": 0.3629, "step": 7212 }, { "epoch": 0.0127910182725033, "grad_norm": 1.4765625, "learning_rate": 0.001981074457252933, "loss": 0.3046, "step": 7214 }, { "epoch": 0.012794564437813115, "grad_norm": 0.49609375, "learning_rate": 0.0019810623137308065, "loss": 0.2893, "step": 7216 }, { "epoch": 0.01279811060312293, "grad_norm": 0.2578125, "learning_rate": 0.001981050166355426, "loss": 0.1819, "step": 7218 }, { "epoch": 0.012801656768432746, "grad_norm": 0.384765625, "learning_rate": 0.001981038015126843, "loss": 0.2163, "step": 7220 }, { "epoch": 0.01280520293374256, "grad_norm": 0.68359375, "learning_rate": 0.0019810258600451115, "loss": 0.2835, "step": 7222 }, { "epoch": 0.012808749099052375, "grad_norm": 0.306640625, "learning_rate": 0.001981013701110285, "loss": 0.2571, "step": 7224 }, { "epoch": 0.012812295264362191, "grad_norm": 0.53125, "learning_rate": 0.001981001538322416, "loss": 0.2347, "step": 7226 }, { "epoch": 0.012815841429672006, "grad_norm": 0.333984375, "learning_rate": 0.001980989371681558, "loss": 0.2317, "step": 7228 }, { "epoch": 0.012819387594981822, "grad_norm": 0.4140625, "learning_rate": 0.001980977201187765, "loss": 0.2647, "step": 7230 }, { "epoch": 0.012822933760291637, "grad_norm": 0.400390625, "learning_rate": 0.0019809650268410887, "loss": 0.3027, "step": 7232 }, { "epoch": 0.012826479925601451, "grad_norm": 0.30078125, "learning_rate": 0.0019809528486415835, "loss": 0.2051, "step": 7234 }, { "epoch": 0.012830026090911268, "grad_norm": 0.3125, "learning_rate": 0.001980940666589302, "loss": 0.326, "step": 7236 }, { "epoch": 0.012833572256221082, "grad_norm": 0.375, "learning_rate": 0.0019809284806842978, "loss": 0.2098, "step": 7238 }, { "epoch": 0.012837118421530897, "grad_norm": 0.1904296875, "learning_rate": 0.001980916290926624, "loss": 0.2324, "step": 7240 }, { "epoch": 0.012840664586840713, "grad_norm": 0.376953125, "learning_rate": 0.001980904097316334, "loss": 0.2191, "step": 7242 }, { "epoch": 0.012844210752150528, "grad_norm": 0.3515625, "learning_rate": 0.001980891899853482, "loss": 0.2297, "step": 7244 }, { "epoch": 0.012847756917460342, "grad_norm": 0.7734375, "learning_rate": 0.0019808796985381193, "loss": 0.2185, "step": 7246 }, { "epoch": 0.012851303082770158, "grad_norm": 0.7578125, "learning_rate": 0.001980867493370301, "loss": 0.247, "step": 7248 }, { "epoch": 0.012854849248079973, "grad_norm": 0.38671875, "learning_rate": 0.00198085528435008, "loss": 0.2162, "step": 7250 }, { "epoch": 0.012858395413389788, "grad_norm": 0.5625, "learning_rate": 0.0019808430714775096, "loss": 0.2796, "step": 7252 }, { "epoch": 0.012861941578699604, "grad_norm": 1.3359375, "learning_rate": 0.0019808308547526435, "loss": 0.2255, "step": 7254 }, { "epoch": 0.012865487744009418, "grad_norm": 0.1826171875, "learning_rate": 0.0019808186341755346, "loss": 0.2234, "step": 7256 }, { "epoch": 0.012869033909319233, "grad_norm": 0.921875, "learning_rate": 0.001980806409746237, "loss": 0.3388, "step": 7258 }, { "epoch": 0.01287258007462905, "grad_norm": 0.341796875, "learning_rate": 0.0019807941814648034, "loss": 0.3057, "step": 7260 }, { "epoch": 0.012876126239938864, "grad_norm": 0.67578125, "learning_rate": 0.0019807819493312877, "loss": 0.292, "step": 7262 }, { "epoch": 0.01287967240524868, "grad_norm": 0.298828125, "learning_rate": 0.0019807697133457434, "loss": 0.2776, "step": 7264 }, { "epoch": 0.012883218570558495, "grad_norm": 2.03125, "learning_rate": 0.001980757473508224, "loss": 0.3523, "step": 7266 }, { "epoch": 0.01288676473586831, "grad_norm": 0.1513671875, "learning_rate": 0.0019807452298187833, "loss": 0.2111, "step": 7268 }, { "epoch": 0.012890310901178125, "grad_norm": 0.6484375, "learning_rate": 0.0019807329822774744, "loss": 0.3608, "step": 7270 }, { "epoch": 0.01289385706648794, "grad_norm": 0.302734375, "learning_rate": 0.0019807207308843505, "loss": 0.5, "step": 7272 }, { "epoch": 0.012897403231797755, "grad_norm": 1.5390625, "learning_rate": 0.0019807084756394665, "loss": 0.3164, "step": 7274 }, { "epoch": 0.012900949397107571, "grad_norm": 0.255859375, "learning_rate": 0.001980696216542875, "loss": 0.2405, "step": 7276 }, { "epoch": 0.012904495562417385, "grad_norm": 0.9140625, "learning_rate": 0.0019806839535946295, "loss": 0.2778, "step": 7278 }, { "epoch": 0.0129080417277272, "grad_norm": 0.2470703125, "learning_rate": 0.001980671686794784, "loss": 0.2086, "step": 7280 }, { "epoch": 0.012911587893037016, "grad_norm": 0.30078125, "learning_rate": 0.0019806594161433924, "loss": 0.3295, "step": 7282 }, { "epoch": 0.01291513405834683, "grad_norm": 1.0078125, "learning_rate": 0.001980647141640508, "loss": 0.301, "step": 7284 }, { "epoch": 0.012918680223656645, "grad_norm": 0.439453125, "learning_rate": 0.001980634863286184, "loss": 0.3033, "step": 7286 }, { "epoch": 0.012922226388966462, "grad_norm": 0.41796875, "learning_rate": 0.0019806225810804754, "loss": 0.2387, "step": 7288 }, { "epoch": 0.012925772554276276, "grad_norm": 0.2216796875, "learning_rate": 0.0019806102950234348, "loss": 0.2494, "step": 7290 }, { "epoch": 0.01292931871958609, "grad_norm": 0.3671875, "learning_rate": 0.0019805980051151163, "loss": 0.2023, "step": 7292 }, { "epoch": 0.012932864884895907, "grad_norm": 0.4453125, "learning_rate": 0.001980585711355574, "loss": 0.2692, "step": 7294 }, { "epoch": 0.012936411050205722, "grad_norm": 0.296875, "learning_rate": 0.0019805734137448607, "loss": 0.2295, "step": 7296 }, { "epoch": 0.012939957215515538, "grad_norm": 0.265625, "learning_rate": 0.001980561112283031, "loss": 0.2222, "step": 7298 }, { "epoch": 0.012943503380825352, "grad_norm": 0.416015625, "learning_rate": 0.0019805488069701387, "loss": 0.3077, "step": 7300 }, { "epoch": 0.012947049546135167, "grad_norm": 0.4453125, "learning_rate": 0.001980536497806237, "loss": 0.2136, "step": 7302 }, { "epoch": 0.012950595711444983, "grad_norm": 0.65625, "learning_rate": 0.0019805241847913805, "loss": 0.257, "step": 7304 }, { "epoch": 0.012954141876754798, "grad_norm": 0.890625, "learning_rate": 0.0019805118679256223, "loss": 0.2409, "step": 7306 }, { "epoch": 0.012957688042064612, "grad_norm": 3.328125, "learning_rate": 0.001980499547209017, "loss": 0.4833, "step": 7308 }, { "epoch": 0.012961234207374429, "grad_norm": 0.2421875, "learning_rate": 0.0019804872226416178, "loss": 0.2097, "step": 7310 }, { "epoch": 0.012964780372684243, "grad_norm": 0.58203125, "learning_rate": 0.0019804748942234794, "loss": 0.3131, "step": 7312 }, { "epoch": 0.012968326537994058, "grad_norm": 0.3984375, "learning_rate": 0.0019804625619546552, "loss": 0.2601, "step": 7314 }, { "epoch": 0.012971872703303874, "grad_norm": 1.21875, "learning_rate": 0.001980450225835199, "loss": 0.2731, "step": 7316 }, { "epoch": 0.012975418868613689, "grad_norm": 0.59375, "learning_rate": 0.001980437885865165, "loss": 0.4425, "step": 7318 }, { "epoch": 0.012978965033923503, "grad_norm": 2.328125, "learning_rate": 0.0019804255420446067, "loss": 0.2614, "step": 7320 }, { "epoch": 0.01298251119923332, "grad_norm": 0.5625, "learning_rate": 0.001980413194373579, "loss": 0.4741, "step": 7322 }, { "epoch": 0.012986057364543134, "grad_norm": 0.455078125, "learning_rate": 0.001980400842852135, "loss": 0.2157, "step": 7324 }, { "epoch": 0.012989603529852949, "grad_norm": 0.3828125, "learning_rate": 0.0019803884874803296, "loss": 0.2669, "step": 7326 }, { "epoch": 0.012993149695162765, "grad_norm": 0.40234375, "learning_rate": 0.0019803761282582164, "loss": 0.209, "step": 7328 }, { "epoch": 0.01299669586047258, "grad_norm": 0.37109375, "learning_rate": 0.001980363765185849, "loss": 0.3217, "step": 7330 }, { "epoch": 0.013000242025782396, "grad_norm": 1.71875, "learning_rate": 0.0019803513982632817, "loss": 0.2535, "step": 7332 }, { "epoch": 0.01300378819109221, "grad_norm": 0.86328125, "learning_rate": 0.001980339027490569, "loss": 0.2472, "step": 7334 }, { "epoch": 0.013007334356402025, "grad_norm": 1.578125, "learning_rate": 0.0019803266528677648, "loss": 0.4076, "step": 7336 }, { "epoch": 0.013010880521711841, "grad_norm": 0.341796875, "learning_rate": 0.0019803142743949234, "loss": 0.1899, "step": 7338 }, { "epoch": 0.013014426687021656, "grad_norm": 0.185546875, "learning_rate": 0.0019803018920720983, "loss": 0.239, "step": 7340 }, { "epoch": 0.01301797285233147, "grad_norm": 1.640625, "learning_rate": 0.001980289505899344, "loss": 0.3933, "step": 7342 }, { "epoch": 0.013021519017641286, "grad_norm": 0.380859375, "learning_rate": 0.0019802771158767152, "loss": 0.2236, "step": 7344 }, { "epoch": 0.013025065182951101, "grad_norm": 0.28125, "learning_rate": 0.001980264722004265, "loss": 0.2797, "step": 7346 }, { "epoch": 0.013028611348260916, "grad_norm": 0.46875, "learning_rate": 0.0019802523242820488, "loss": 0.5111, "step": 7348 }, { "epoch": 0.013032157513570732, "grad_norm": 0.4765625, "learning_rate": 0.0019802399227101205, "loss": 0.293, "step": 7350 }, { "epoch": 0.013035703678880546, "grad_norm": 0.8828125, "learning_rate": 0.0019802275172885334, "loss": 0.279, "step": 7352 }, { "epoch": 0.013039249844190361, "grad_norm": 1.6171875, "learning_rate": 0.0019802151080173425, "loss": 0.3866, "step": 7354 }, { "epoch": 0.013042796009500177, "grad_norm": 0.53125, "learning_rate": 0.0019802026948966024, "loss": 0.2419, "step": 7356 }, { "epoch": 0.013046342174809992, "grad_norm": 0.4375, "learning_rate": 0.001980190277926367, "loss": 0.3264, "step": 7358 }, { "epoch": 0.013049888340119806, "grad_norm": 0.6171875, "learning_rate": 0.0019801778571066904, "loss": 0.2487, "step": 7360 }, { "epoch": 0.013053434505429623, "grad_norm": 0.33203125, "learning_rate": 0.001980165432437627, "loss": 0.2401, "step": 7362 }, { "epoch": 0.013056980670739437, "grad_norm": 0.515625, "learning_rate": 0.0019801530039192314, "loss": 0.3544, "step": 7364 }, { "epoch": 0.013060526836049254, "grad_norm": 0.73046875, "learning_rate": 0.0019801405715515574, "loss": 0.3347, "step": 7366 }, { "epoch": 0.013064073001359068, "grad_norm": 0.83984375, "learning_rate": 0.0019801281353346604, "loss": 0.2402, "step": 7368 }, { "epoch": 0.013067619166668883, "grad_norm": 1.140625, "learning_rate": 0.0019801156952685937, "loss": 0.2637, "step": 7370 }, { "epoch": 0.013071165331978699, "grad_norm": 0.322265625, "learning_rate": 0.0019801032513534125, "loss": 0.2193, "step": 7372 }, { "epoch": 0.013074711497288513, "grad_norm": 2.296875, "learning_rate": 0.001980090803589171, "loss": 0.2314, "step": 7374 }, { "epoch": 0.013078257662598328, "grad_norm": 0.921875, "learning_rate": 0.001980078351975923, "loss": 0.2706, "step": 7376 }, { "epoch": 0.013081803827908144, "grad_norm": 0.2275390625, "learning_rate": 0.001980065896513724, "loss": 0.2573, "step": 7378 }, { "epoch": 0.013085349993217959, "grad_norm": 1.8984375, "learning_rate": 0.001980053437202628, "loss": 0.3453, "step": 7380 }, { "epoch": 0.013088896158527773, "grad_norm": 7.6875, "learning_rate": 0.001980040974042689, "loss": 0.3178, "step": 7382 }, { "epoch": 0.01309244232383759, "grad_norm": 0.55078125, "learning_rate": 0.0019800285070339626, "loss": 0.291, "step": 7384 }, { "epoch": 0.013095988489147404, "grad_norm": 0.53125, "learning_rate": 0.001980016036176502, "loss": 0.1913, "step": 7386 }, { "epoch": 0.013099534654457219, "grad_norm": 0.373046875, "learning_rate": 0.001980003561470363, "loss": 0.1708, "step": 7388 }, { "epoch": 0.013103080819767035, "grad_norm": 1.65625, "learning_rate": 0.0019799910829155993, "loss": 0.2892, "step": 7390 }, { "epoch": 0.01310662698507685, "grad_norm": 1.4375, "learning_rate": 0.001979978600512266, "loss": 0.2598, "step": 7392 }, { "epoch": 0.013110173150386664, "grad_norm": 0.30859375, "learning_rate": 0.001979966114260417, "loss": 0.2125, "step": 7394 }, { "epoch": 0.01311371931569648, "grad_norm": 0.66796875, "learning_rate": 0.0019799536241601077, "loss": 0.2582, "step": 7396 }, { "epoch": 0.013117265481006295, "grad_norm": 0.8828125, "learning_rate": 0.0019799411302113927, "loss": 0.2937, "step": 7398 }, { "epoch": 0.013120811646316111, "grad_norm": 0.306640625, "learning_rate": 0.001979928632414326, "loss": 0.3104, "step": 7400 }, { "epoch": 0.013124357811625926, "grad_norm": 1.125, "learning_rate": 0.0019799161307689625, "loss": 0.2228, "step": 7402 }, { "epoch": 0.01312790397693574, "grad_norm": 0.57421875, "learning_rate": 0.001979903625275357, "loss": 0.3244, "step": 7404 }, { "epoch": 0.013131450142245557, "grad_norm": 0.359375, "learning_rate": 0.001979891115933564, "loss": 0.202, "step": 7406 }, { "epoch": 0.013134996307555371, "grad_norm": 0.349609375, "learning_rate": 0.0019798786027436384, "loss": 0.246, "step": 7408 }, { "epoch": 0.013138542472865186, "grad_norm": 0.51953125, "learning_rate": 0.001979866085705635, "loss": 0.2544, "step": 7410 }, { "epoch": 0.013142088638175002, "grad_norm": 0.5078125, "learning_rate": 0.0019798535648196084, "loss": 0.1871, "step": 7412 }, { "epoch": 0.013145634803484817, "grad_norm": 1.5625, "learning_rate": 0.0019798410400856136, "loss": 0.4191, "step": 7414 }, { "epoch": 0.013149180968794631, "grad_norm": 0.6953125, "learning_rate": 0.0019798285115037047, "loss": 0.3978, "step": 7416 }, { "epoch": 0.013152727134104448, "grad_norm": 1.2734375, "learning_rate": 0.0019798159790739376, "loss": 0.4776, "step": 7418 }, { "epoch": 0.013156273299414262, "grad_norm": 0.4375, "learning_rate": 0.001979803442796366, "loss": 0.2267, "step": 7420 }, { "epoch": 0.013159819464724077, "grad_norm": 0.458984375, "learning_rate": 0.001979790902671045, "loss": 0.2982, "step": 7422 }, { "epoch": 0.013163365630033893, "grad_norm": 0.8828125, "learning_rate": 0.0019797783586980302, "loss": 0.3043, "step": 7424 }, { "epoch": 0.013166911795343707, "grad_norm": 1.078125, "learning_rate": 0.0019797658108773753, "loss": 0.3142, "step": 7426 }, { "epoch": 0.013170457960653522, "grad_norm": 0.4296875, "learning_rate": 0.0019797532592091367, "loss": 0.2354, "step": 7428 }, { "epoch": 0.013174004125963338, "grad_norm": 0.361328125, "learning_rate": 0.0019797407036933677, "loss": 0.406, "step": 7430 }, { "epoch": 0.013177550291273153, "grad_norm": 1.2734375, "learning_rate": 0.001979728144330124, "loss": 0.2033, "step": 7432 }, { "epoch": 0.01318109645658297, "grad_norm": 1.2890625, "learning_rate": 0.0019797155811194602, "loss": 0.2822, "step": 7434 }, { "epoch": 0.013184642621892784, "grad_norm": 0.640625, "learning_rate": 0.0019797030140614315, "loss": 0.2742, "step": 7436 }, { "epoch": 0.013188188787202598, "grad_norm": 1.6328125, "learning_rate": 0.001979690443156093, "loss": 0.5442, "step": 7438 }, { "epoch": 0.013191734952512415, "grad_norm": 0.474609375, "learning_rate": 0.001979677868403499, "loss": 0.2336, "step": 7440 }, { "epoch": 0.013195281117822229, "grad_norm": 0.349609375, "learning_rate": 0.0019796652898037056, "loss": 0.2037, "step": 7442 }, { "epoch": 0.013198827283132044, "grad_norm": 0.341796875, "learning_rate": 0.001979652707356767, "loss": 0.2163, "step": 7444 }, { "epoch": 0.01320237344844186, "grad_norm": 0.310546875, "learning_rate": 0.0019796401210627384, "loss": 0.1865, "step": 7446 }, { "epoch": 0.013205919613751674, "grad_norm": 0.92578125, "learning_rate": 0.0019796275309216745, "loss": 0.4165, "step": 7448 }, { "epoch": 0.013209465779061489, "grad_norm": 0.404296875, "learning_rate": 0.0019796149369336316, "loss": 0.2459, "step": 7450 }, { "epoch": 0.013213011944371305, "grad_norm": 3.234375, "learning_rate": 0.0019796023390986632, "loss": 0.7228, "step": 7452 }, { "epoch": 0.01321655810968112, "grad_norm": 0.3984375, "learning_rate": 0.0019795897374168254, "loss": 0.281, "step": 7454 }, { "epoch": 0.013220104274990934, "grad_norm": 0.455078125, "learning_rate": 0.001979577131888173, "loss": 0.2723, "step": 7456 }, { "epoch": 0.01322365044030075, "grad_norm": 0.734375, "learning_rate": 0.0019795645225127606, "loss": 0.2263, "step": 7458 }, { "epoch": 0.013227196605610565, "grad_norm": 0.4140625, "learning_rate": 0.0019795519092906445, "loss": 0.2266, "step": 7460 }, { "epoch": 0.01323074277092038, "grad_norm": 2.515625, "learning_rate": 0.0019795392922218793, "loss": 0.3231, "step": 7462 }, { "epoch": 0.013234288936230196, "grad_norm": 0.40625, "learning_rate": 0.00197952667130652, "loss": 0.2287, "step": 7464 }, { "epoch": 0.01323783510154001, "grad_norm": 0.443359375, "learning_rate": 0.0019795140465446214, "loss": 0.2105, "step": 7466 }, { "epoch": 0.013241381266849827, "grad_norm": 1.09375, "learning_rate": 0.00197950141793624, "loss": 0.3577, "step": 7468 }, { "epoch": 0.013244927432159641, "grad_norm": 0.97265625, "learning_rate": 0.00197948878548143, "loss": 0.3316, "step": 7470 }, { "epoch": 0.013248473597469456, "grad_norm": 1.203125, "learning_rate": 0.0019794761491802467, "loss": 0.316, "step": 7472 }, { "epoch": 0.013252019762779272, "grad_norm": 0.318359375, "learning_rate": 0.001979463509032746, "loss": 0.2238, "step": 7474 }, { "epoch": 0.013255565928089087, "grad_norm": 1.515625, "learning_rate": 0.0019794508650389825, "loss": 0.2358, "step": 7476 }, { "epoch": 0.013259112093398901, "grad_norm": 1.0703125, "learning_rate": 0.0019794382171990114, "loss": 0.311, "step": 7478 }, { "epoch": 0.013262658258708718, "grad_norm": 0.298828125, "learning_rate": 0.001979425565512889, "loss": 0.3175, "step": 7480 }, { "epoch": 0.013266204424018532, "grad_norm": 0.41796875, "learning_rate": 0.00197941290998067, "loss": 0.2808, "step": 7482 }, { "epoch": 0.013269750589328347, "grad_norm": 0.78125, "learning_rate": 0.0019794002506024096, "loss": 0.1972, "step": 7484 }, { "epoch": 0.013273296754638163, "grad_norm": 0.27734375, "learning_rate": 0.001979387587378163, "loss": 0.1635, "step": 7486 }, { "epoch": 0.013276842919947978, "grad_norm": 1.0703125, "learning_rate": 0.0019793749203079864, "loss": 0.3265, "step": 7488 }, { "epoch": 0.013280389085257792, "grad_norm": 2.90625, "learning_rate": 0.0019793622493919343, "loss": 0.2144, "step": 7490 }, { "epoch": 0.013283935250567609, "grad_norm": 0.349609375, "learning_rate": 0.0019793495746300623, "loss": 0.245, "step": 7492 }, { "epoch": 0.013287481415877423, "grad_norm": 0.466796875, "learning_rate": 0.0019793368960224268, "loss": 0.2353, "step": 7494 }, { "epoch": 0.013291027581187238, "grad_norm": 0.546875, "learning_rate": 0.0019793242135690823, "loss": 0.2539, "step": 7496 }, { "epoch": 0.013294573746497054, "grad_norm": 0.431640625, "learning_rate": 0.001979311527270084, "loss": 0.2414, "step": 7498 }, { "epoch": 0.013298119911806868, "grad_norm": 0.75390625, "learning_rate": 0.0019792988371254883, "loss": 0.3139, "step": 7500 }, { "epoch": 0.013301666077116685, "grad_norm": 0.302734375, "learning_rate": 0.0019792861431353497, "loss": 0.2354, "step": 7502 }, { "epoch": 0.0133052122424265, "grad_norm": 0.451171875, "learning_rate": 0.0019792734452997248, "loss": 0.1684, "step": 7504 }, { "epoch": 0.013308758407736314, "grad_norm": 0.55078125, "learning_rate": 0.0019792607436186684, "loss": 0.2232, "step": 7506 }, { "epoch": 0.01331230457304613, "grad_norm": 0.51953125, "learning_rate": 0.0019792480380922367, "loss": 0.1877, "step": 7508 }, { "epoch": 0.013315850738355945, "grad_norm": 0.91015625, "learning_rate": 0.001979235328720484, "loss": 0.2452, "step": 7510 }, { "epoch": 0.01331939690366576, "grad_norm": 0.5078125, "learning_rate": 0.0019792226155034673, "loss": 0.3374, "step": 7512 }, { "epoch": 0.013322943068975576, "grad_norm": 0.53125, "learning_rate": 0.001979209898441241, "loss": 0.2133, "step": 7514 }, { "epoch": 0.01332648923428539, "grad_norm": 0.458984375, "learning_rate": 0.0019791971775338616, "loss": 0.2446, "step": 7516 }, { "epoch": 0.013330035399595205, "grad_norm": 0.337890625, "learning_rate": 0.0019791844527813846, "loss": 0.201, "step": 7518 }, { "epoch": 0.013333581564905021, "grad_norm": 0.458984375, "learning_rate": 0.0019791717241838657, "loss": 0.2199, "step": 7520 }, { "epoch": 0.013337127730214835, "grad_norm": 0.34375, "learning_rate": 0.00197915899174136, "loss": 0.2111, "step": 7522 }, { "epoch": 0.01334067389552465, "grad_norm": 1.3671875, "learning_rate": 0.001979146255453924, "loss": 0.2488, "step": 7524 }, { "epoch": 0.013344220060834466, "grad_norm": 0.416015625, "learning_rate": 0.0019791335153216123, "loss": 0.3305, "step": 7526 }, { "epoch": 0.013347766226144281, "grad_norm": 1.5234375, "learning_rate": 0.0019791207713444814, "loss": 0.273, "step": 7528 }, { "epoch": 0.013351312391454095, "grad_norm": 0.3046875, "learning_rate": 0.001979108023522587, "loss": 0.1712, "step": 7530 }, { "epoch": 0.013354858556763912, "grad_norm": 0.271484375, "learning_rate": 0.0019790952718559845, "loss": 0.1873, "step": 7532 }, { "epoch": 0.013358404722073726, "grad_norm": 3.9375, "learning_rate": 0.0019790825163447305, "loss": 0.2767, "step": 7534 }, { "epoch": 0.013361950887383543, "grad_norm": 2.265625, "learning_rate": 0.00197906975698888, "loss": 0.5543, "step": 7536 }, { "epoch": 0.013365497052693357, "grad_norm": 0.431640625, "learning_rate": 0.001979056993788489, "loss": 0.2476, "step": 7538 }, { "epoch": 0.013369043218003172, "grad_norm": 0.50390625, "learning_rate": 0.001979044226743613, "loss": 0.2713, "step": 7540 }, { "epoch": 0.013372589383312988, "grad_norm": 0.73046875, "learning_rate": 0.0019790314558543087, "loss": 0.4439, "step": 7542 }, { "epoch": 0.013376135548622803, "grad_norm": 0.9140625, "learning_rate": 0.001979018681120631, "loss": 0.2361, "step": 7544 }, { "epoch": 0.013379681713932617, "grad_norm": 0.66015625, "learning_rate": 0.001979005902542636, "loss": 0.286, "step": 7546 }, { "epoch": 0.013383227879242433, "grad_norm": 0.45703125, "learning_rate": 0.0019789931201203803, "loss": 0.224, "step": 7548 }, { "epoch": 0.013386774044552248, "grad_norm": 0.54296875, "learning_rate": 0.0019789803338539193, "loss": 0.2424, "step": 7550 }, { "epoch": 0.013390320209862062, "grad_norm": 0.71484375, "learning_rate": 0.0019789675437433085, "loss": 0.2961, "step": 7552 }, { "epoch": 0.013393866375171879, "grad_norm": 0.6484375, "learning_rate": 0.0019789547497886043, "loss": 0.2981, "step": 7554 }, { "epoch": 0.013397412540481693, "grad_norm": 0.6953125, "learning_rate": 0.0019789419519898625, "loss": 0.4272, "step": 7556 }, { "epoch": 0.013400958705791508, "grad_norm": 0.71484375, "learning_rate": 0.001978929150347139, "loss": 0.2879, "step": 7558 }, { "epoch": 0.013404504871101324, "grad_norm": 0.5, "learning_rate": 0.0019789163448604907, "loss": 0.2423, "step": 7560 }, { "epoch": 0.013408051036411139, "grad_norm": 0.8984375, "learning_rate": 0.0019789035355299725, "loss": 0.2264, "step": 7562 }, { "epoch": 0.013411597201720953, "grad_norm": 0.3046875, "learning_rate": 0.0019788907223556407, "loss": 0.1948, "step": 7564 }, { "epoch": 0.01341514336703077, "grad_norm": 0.3046875, "learning_rate": 0.001978877905337551, "loss": 0.2547, "step": 7566 }, { "epoch": 0.013418689532340584, "grad_norm": 0.84375, "learning_rate": 0.0019788650844757604, "loss": 0.2852, "step": 7568 }, { "epoch": 0.0134222356976504, "grad_norm": 0.462890625, "learning_rate": 0.0019788522597703243, "loss": 0.2168, "step": 7570 }, { "epoch": 0.013425781862960215, "grad_norm": 0.439453125, "learning_rate": 0.0019788394312212987, "loss": 0.219, "step": 7572 }, { "epoch": 0.01342932802827003, "grad_norm": 0.236328125, "learning_rate": 0.00197882659882874, "loss": 0.2216, "step": 7574 }, { "epoch": 0.013432874193579846, "grad_norm": 0.2412109375, "learning_rate": 0.0019788137625927045, "loss": 0.2031, "step": 7576 }, { "epoch": 0.01343642035888966, "grad_norm": 5.15625, "learning_rate": 0.001978800922513248, "loss": 0.3581, "step": 7578 }, { "epoch": 0.013439966524199475, "grad_norm": 0.94140625, "learning_rate": 0.0019787880785904263, "loss": 0.2773, "step": 7580 }, { "epoch": 0.013443512689509291, "grad_norm": 0.68359375, "learning_rate": 0.0019787752308242966, "loss": 0.2136, "step": 7582 }, { "epoch": 0.013447058854819106, "grad_norm": 0.37109375, "learning_rate": 0.001978762379214914, "loss": 0.2067, "step": 7584 }, { "epoch": 0.01345060502012892, "grad_norm": 0.83984375, "learning_rate": 0.0019787495237623353, "loss": 0.2701, "step": 7586 }, { "epoch": 0.013454151185438737, "grad_norm": 0.84765625, "learning_rate": 0.0019787366644666167, "loss": 0.246, "step": 7588 }, { "epoch": 0.013457697350748551, "grad_norm": 1.0390625, "learning_rate": 0.001978723801327815, "loss": 0.4068, "step": 7590 }, { "epoch": 0.013461243516058366, "grad_norm": 0.39453125, "learning_rate": 0.001978710934345985, "loss": 0.1986, "step": 7592 }, { "epoch": 0.013464789681368182, "grad_norm": 0.306640625, "learning_rate": 0.001978698063521184, "loss": 0.2285, "step": 7594 }, { "epoch": 0.013468335846677996, "grad_norm": 1.5859375, "learning_rate": 0.001978685188853468, "loss": 0.4398, "step": 7596 }, { "epoch": 0.013471882011987811, "grad_norm": 0.41015625, "learning_rate": 0.001978672310342893, "loss": 0.4547, "step": 7598 }, { "epoch": 0.013475428177297627, "grad_norm": 0.474609375, "learning_rate": 0.0019786594279895165, "loss": 0.2128, "step": 7600 }, { "epoch": 0.013478974342607442, "grad_norm": 0.81640625, "learning_rate": 0.0019786465417933937, "loss": 0.2281, "step": 7602 }, { "epoch": 0.013482520507917258, "grad_norm": 0.53125, "learning_rate": 0.0019786336517545813, "loss": 0.3014, "step": 7604 }, { "epoch": 0.013486066673227073, "grad_norm": 6.125, "learning_rate": 0.0019786207578731357, "loss": 0.2493, "step": 7606 }, { "epoch": 0.013489612838536887, "grad_norm": 0.56640625, "learning_rate": 0.0019786078601491132, "loss": 0.2138, "step": 7608 }, { "epoch": 0.013493159003846704, "grad_norm": 0.384765625, "learning_rate": 0.0019785949585825707, "loss": 0.3604, "step": 7610 }, { "epoch": 0.013496705169156518, "grad_norm": 1.4921875, "learning_rate": 0.0019785820531735636, "loss": 0.2503, "step": 7612 }, { "epoch": 0.013500251334466333, "grad_norm": 0.46875, "learning_rate": 0.0019785691439221493, "loss": 0.2542, "step": 7614 }, { "epoch": 0.013503797499776149, "grad_norm": 0.546875, "learning_rate": 0.0019785562308283836, "loss": 0.2516, "step": 7616 }, { "epoch": 0.013507343665085964, "grad_norm": 0.251953125, "learning_rate": 0.0019785433138923233, "loss": 0.2466, "step": 7618 }, { "epoch": 0.013510889830395778, "grad_norm": 0.37890625, "learning_rate": 0.0019785303931140253, "loss": 0.3439, "step": 7620 }, { "epoch": 0.013514435995705594, "grad_norm": 0.357421875, "learning_rate": 0.0019785174684935456, "loss": 0.2255, "step": 7622 }, { "epoch": 0.013517982161015409, "grad_norm": 0.376953125, "learning_rate": 0.0019785045400309404, "loss": 0.3415, "step": 7624 }, { "epoch": 0.013521528326325223, "grad_norm": 0.68359375, "learning_rate": 0.0019784916077262666, "loss": 0.2489, "step": 7626 }, { "epoch": 0.01352507449163504, "grad_norm": 0.484375, "learning_rate": 0.001978478671579581, "loss": 0.2358, "step": 7628 }, { "epoch": 0.013528620656944854, "grad_norm": 0.53125, "learning_rate": 0.00197846573159094, "loss": 0.2224, "step": 7630 }, { "epoch": 0.013532166822254669, "grad_norm": 3.578125, "learning_rate": 0.0019784527877604, "loss": 0.2525, "step": 7632 }, { "epoch": 0.013535712987564485, "grad_norm": 1.6171875, "learning_rate": 0.001978439840088018, "loss": 0.5032, "step": 7634 }, { "epoch": 0.0135392591528743, "grad_norm": 0.57421875, "learning_rate": 0.001978426888573851, "loss": 0.2922, "step": 7636 }, { "epoch": 0.013542805318184116, "grad_norm": 0.55078125, "learning_rate": 0.001978413933217954, "loss": 0.354, "step": 7638 }, { "epoch": 0.01354635148349393, "grad_norm": 1.5234375, "learning_rate": 0.001978400974020385, "loss": 0.53, "step": 7640 }, { "epoch": 0.013549897648803745, "grad_norm": 0.49609375, "learning_rate": 0.0019783880109812005, "loss": 0.222, "step": 7642 }, { "epoch": 0.013553443814113561, "grad_norm": 0.353515625, "learning_rate": 0.001978375044100457, "loss": 0.254, "step": 7644 }, { "epoch": 0.013556989979423376, "grad_norm": 0.59765625, "learning_rate": 0.0019783620733782115, "loss": 0.189, "step": 7646 }, { "epoch": 0.01356053614473319, "grad_norm": 0.53125, "learning_rate": 0.0019783490988145203, "loss": 0.3683, "step": 7648 }, { "epoch": 0.013564082310043007, "grad_norm": 0.455078125, "learning_rate": 0.00197833612040944, "loss": 0.2526, "step": 7650 }, { "epoch": 0.013567628475352821, "grad_norm": 0.267578125, "learning_rate": 0.001978323138163028, "loss": 0.2494, "step": 7652 }, { "epoch": 0.013571174640662636, "grad_norm": 0.6484375, "learning_rate": 0.001978310152075341, "loss": 0.3841, "step": 7654 }, { "epoch": 0.013574720805972452, "grad_norm": 1.1328125, "learning_rate": 0.0019782971621464356, "loss": 0.3076, "step": 7656 }, { "epoch": 0.013578266971282267, "grad_norm": 0.330078125, "learning_rate": 0.0019782841683763683, "loss": 0.2062, "step": 7658 }, { "epoch": 0.013581813136592081, "grad_norm": 0.73046875, "learning_rate": 0.0019782711707651965, "loss": 0.3023, "step": 7660 }, { "epoch": 0.013585359301901898, "grad_norm": 0.5078125, "learning_rate": 0.0019782581693129765, "loss": 0.2293, "step": 7662 }, { "epoch": 0.013588905467211712, "grad_norm": 1.03125, "learning_rate": 0.0019782451640197652, "loss": 0.2587, "step": 7664 }, { "epoch": 0.013592451632521527, "grad_norm": 0.4296875, "learning_rate": 0.0019782321548856203, "loss": 0.2545, "step": 7666 }, { "epoch": 0.013595997797831343, "grad_norm": 1.0390625, "learning_rate": 0.0019782191419105977, "loss": 0.2582, "step": 7668 }, { "epoch": 0.013599543963141158, "grad_norm": 0.322265625, "learning_rate": 0.001978206125094755, "loss": 0.1954, "step": 7670 }, { "epoch": 0.013603090128450974, "grad_norm": 0.47265625, "learning_rate": 0.0019781931044381483, "loss": 0.2613, "step": 7672 }, { "epoch": 0.013606636293760788, "grad_norm": 0.359375, "learning_rate": 0.0019781800799408356, "loss": 0.3365, "step": 7674 }, { "epoch": 0.013610182459070603, "grad_norm": 0.39453125, "learning_rate": 0.0019781670516028733, "loss": 0.1916, "step": 7676 }, { "epoch": 0.01361372862438042, "grad_norm": 0.5546875, "learning_rate": 0.001978154019424318, "loss": 0.2629, "step": 7678 }, { "epoch": 0.013617274789690234, "grad_norm": 0.75, "learning_rate": 0.0019781409834052272, "loss": 0.2863, "step": 7680 }, { "epoch": 0.013620820955000048, "grad_norm": 0.322265625, "learning_rate": 0.0019781279435456584, "loss": 0.2385, "step": 7682 }, { "epoch": 0.013624367120309865, "grad_norm": 0.3984375, "learning_rate": 0.001978114899845667, "loss": 0.2907, "step": 7684 }, { "epoch": 0.013627913285619679, "grad_norm": 0.46875, "learning_rate": 0.001978101852305312, "loss": 0.2181, "step": 7686 }, { "epoch": 0.013631459450929494, "grad_norm": 0.80859375, "learning_rate": 0.0019780888009246493, "loss": 0.2996, "step": 7688 }, { "epoch": 0.01363500561623931, "grad_norm": 0.421875, "learning_rate": 0.0019780757457037363, "loss": 0.2227, "step": 7690 }, { "epoch": 0.013638551781549125, "grad_norm": 0.5078125, "learning_rate": 0.00197806268664263, "loss": 0.2362, "step": 7692 }, { "epoch": 0.013642097946858939, "grad_norm": 0.369140625, "learning_rate": 0.0019780496237413875, "loss": 0.2549, "step": 7694 }, { "epoch": 0.013645644112168755, "grad_norm": 0.58203125, "learning_rate": 0.001978036557000066, "loss": 0.2283, "step": 7696 }, { "epoch": 0.01364919027747857, "grad_norm": 0.3125, "learning_rate": 0.0019780234864187228, "loss": 0.4069, "step": 7698 }, { "epoch": 0.013652736442788384, "grad_norm": 1.625, "learning_rate": 0.0019780104119974146, "loss": 0.2306, "step": 7700 }, { "epoch": 0.0136562826080982, "grad_norm": 1.734375, "learning_rate": 0.001977997333736199, "loss": 0.2526, "step": 7702 }, { "epoch": 0.013659828773408015, "grad_norm": 0.333984375, "learning_rate": 0.0019779842516351328, "loss": 0.2475, "step": 7704 }, { "epoch": 0.013663374938717832, "grad_norm": 0.255859375, "learning_rate": 0.0019779711656942736, "loss": 0.1859, "step": 7706 }, { "epoch": 0.013666921104027646, "grad_norm": 0.439453125, "learning_rate": 0.0019779580759136787, "loss": 0.2434, "step": 7708 }, { "epoch": 0.01367046726933746, "grad_norm": 2.21875, "learning_rate": 0.001977944982293405, "loss": 0.3137, "step": 7710 }, { "epoch": 0.013674013434647277, "grad_norm": 0.474609375, "learning_rate": 0.0019779318848335103, "loss": 0.2193, "step": 7712 }, { "epoch": 0.013677559599957092, "grad_norm": 0.37890625, "learning_rate": 0.0019779187835340514, "loss": 0.2377, "step": 7714 }, { "epoch": 0.013681105765266906, "grad_norm": 1.40625, "learning_rate": 0.001977905678395085, "loss": 0.2706, "step": 7716 }, { "epoch": 0.013684651930576722, "grad_norm": 0.337890625, "learning_rate": 0.0019778925694166703, "loss": 0.2012, "step": 7718 }, { "epoch": 0.013688198095886537, "grad_norm": 0.4140625, "learning_rate": 0.0019778794565988625, "loss": 0.3076, "step": 7720 }, { "epoch": 0.013691744261196351, "grad_norm": 0.88671875, "learning_rate": 0.00197786633994172, "loss": 0.2691, "step": 7722 }, { "epoch": 0.013695290426506168, "grad_norm": 0.271484375, "learning_rate": 0.0019778532194453004, "loss": 0.2075, "step": 7724 }, { "epoch": 0.013698836591815982, "grad_norm": 0.48828125, "learning_rate": 0.0019778400951096604, "loss": 0.5053, "step": 7726 }, { "epoch": 0.013702382757125797, "grad_norm": 1.71875, "learning_rate": 0.001977826966934858, "loss": 0.3419, "step": 7728 }, { "epoch": 0.013705928922435613, "grad_norm": 1.015625, "learning_rate": 0.0019778138349209502, "loss": 0.2519, "step": 7730 }, { "epoch": 0.013709475087745428, "grad_norm": 0.439453125, "learning_rate": 0.0019778006990679945, "loss": 0.212, "step": 7732 }, { "epoch": 0.013713021253055242, "grad_norm": 1.7109375, "learning_rate": 0.0019777875593760485, "loss": 0.2716, "step": 7734 }, { "epoch": 0.013716567418365059, "grad_norm": 2.34375, "learning_rate": 0.0019777744158451698, "loss": 0.225, "step": 7736 }, { "epoch": 0.013720113583674873, "grad_norm": 0.66796875, "learning_rate": 0.0019777612684754153, "loss": 0.273, "step": 7738 }, { "epoch": 0.01372365974898469, "grad_norm": 0.72265625, "learning_rate": 0.001977748117266843, "loss": 0.3076, "step": 7740 }, { "epoch": 0.013727205914294504, "grad_norm": 0.455078125, "learning_rate": 0.0019777349622195103, "loss": 0.2543, "step": 7742 }, { "epoch": 0.013730752079604319, "grad_norm": 0.703125, "learning_rate": 0.001977721803333475, "loss": 0.2176, "step": 7744 }, { "epoch": 0.013734298244914135, "grad_norm": 0.55078125, "learning_rate": 0.001977708640608794, "loss": 0.2822, "step": 7746 }, { "epoch": 0.01373784441022395, "grad_norm": 0.84765625, "learning_rate": 0.0019776954740455257, "loss": 0.2354, "step": 7748 }, { "epoch": 0.013741390575533764, "grad_norm": 2.609375, "learning_rate": 0.0019776823036437266, "loss": 0.2701, "step": 7750 }, { "epoch": 0.01374493674084358, "grad_norm": 2.34375, "learning_rate": 0.0019776691294034554, "loss": 0.3859, "step": 7752 }, { "epoch": 0.013748482906153395, "grad_norm": 0.56640625, "learning_rate": 0.001977655951324769, "loss": 0.306, "step": 7754 }, { "epoch": 0.01375202907146321, "grad_norm": 0.40625, "learning_rate": 0.0019776427694077254, "loss": 0.2485, "step": 7756 }, { "epoch": 0.013755575236773026, "grad_norm": 0.212890625, "learning_rate": 0.001977629583652382, "loss": 0.3498, "step": 7758 }, { "epoch": 0.01375912140208284, "grad_norm": 0.359375, "learning_rate": 0.0019776163940587966, "loss": 0.2204, "step": 7760 }, { "epoch": 0.013762667567392655, "grad_norm": 0.474609375, "learning_rate": 0.0019776032006270272, "loss": 0.1741, "step": 7762 }, { "epoch": 0.013766213732702471, "grad_norm": 0.609375, "learning_rate": 0.001977590003357131, "loss": 0.3222, "step": 7764 }, { "epoch": 0.013769759898012286, "grad_norm": 1.6796875, "learning_rate": 0.001977576802249166, "loss": 0.3199, "step": 7766 }, { "epoch": 0.0137733060633221, "grad_norm": 0.5, "learning_rate": 0.0019775635973031894, "loss": 0.2006, "step": 7768 }, { "epoch": 0.013776852228631916, "grad_norm": 1.0546875, "learning_rate": 0.0019775503885192595, "loss": 0.3139, "step": 7770 }, { "epoch": 0.013780398393941731, "grad_norm": 0.337890625, "learning_rate": 0.001977537175897434, "loss": 0.2011, "step": 7772 }, { "epoch": 0.013783944559251547, "grad_norm": 2.734375, "learning_rate": 0.0019775239594377707, "loss": 0.3913, "step": 7774 }, { "epoch": 0.013787490724561362, "grad_norm": 0.7265625, "learning_rate": 0.001977510739140327, "loss": 0.2466, "step": 7776 }, { "epoch": 0.013791036889871176, "grad_norm": 0.443359375, "learning_rate": 0.0019774975150051617, "loss": 0.3482, "step": 7778 }, { "epoch": 0.013794583055180993, "grad_norm": 1.484375, "learning_rate": 0.0019774842870323313, "loss": 0.3375, "step": 7780 }, { "epoch": 0.013798129220490807, "grad_norm": 0.8125, "learning_rate": 0.001977471055221895, "loss": 0.2647, "step": 7782 }, { "epoch": 0.013801675385800622, "grad_norm": 1.015625, "learning_rate": 0.0019774578195739098, "loss": 0.2908, "step": 7784 }, { "epoch": 0.013805221551110438, "grad_norm": 0.462890625, "learning_rate": 0.0019774445800884335, "loss": 0.2689, "step": 7786 }, { "epoch": 0.013808767716420253, "grad_norm": 0.4453125, "learning_rate": 0.0019774313367655243, "loss": 0.3056, "step": 7788 }, { "epoch": 0.013812313881730067, "grad_norm": 0.490234375, "learning_rate": 0.0019774180896052406, "loss": 0.1835, "step": 7790 }, { "epoch": 0.013815860047039883, "grad_norm": 0.375, "learning_rate": 0.0019774048386076394, "loss": 0.2325, "step": 7792 }, { "epoch": 0.013819406212349698, "grad_norm": 0.294921875, "learning_rate": 0.001977391583772779, "loss": 0.2261, "step": 7794 }, { "epoch": 0.013822952377659512, "grad_norm": 0.412109375, "learning_rate": 0.0019773783251007177, "loss": 0.2229, "step": 7796 }, { "epoch": 0.013826498542969329, "grad_norm": 0.73046875, "learning_rate": 0.001977365062591513, "loss": 0.2722, "step": 7798 }, { "epoch": 0.013830044708279143, "grad_norm": 0.94140625, "learning_rate": 0.0019773517962452234, "loss": 0.2715, "step": 7800 }, { "epoch": 0.013833590873588958, "grad_norm": 1.0390625, "learning_rate": 0.0019773385260619066, "loss": 0.2004, "step": 7802 }, { "epoch": 0.013837137038898774, "grad_norm": 0.5390625, "learning_rate": 0.001977325252041621, "loss": 0.3634, "step": 7804 }, { "epoch": 0.013840683204208589, "grad_norm": 0.87109375, "learning_rate": 0.001977311974184424, "loss": 0.2166, "step": 7806 }, { "epoch": 0.013844229369518405, "grad_norm": 1.7265625, "learning_rate": 0.001977298692490374, "loss": 0.3054, "step": 7808 }, { "epoch": 0.01384777553482822, "grad_norm": 0.5859375, "learning_rate": 0.001977285406959529, "loss": 0.313, "step": 7810 }, { "epoch": 0.013851321700138034, "grad_norm": 0.87109375, "learning_rate": 0.001977272117591948, "loss": 0.2316, "step": 7812 }, { "epoch": 0.01385486786544785, "grad_norm": 0.421875, "learning_rate": 0.0019772588243876874, "loss": 0.2951, "step": 7814 }, { "epoch": 0.013858414030757665, "grad_norm": 0.3046875, "learning_rate": 0.0019772455273468067, "loss": 0.2366, "step": 7816 }, { "epoch": 0.01386196019606748, "grad_norm": 0.294921875, "learning_rate": 0.0019772322264693634, "loss": 0.232, "step": 7818 }, { "epoch": 0.013865506361377296, "grad_norm": 2.3125, "learning_rate": 0.001977218921755416, "loss": 0.3022, "step": 7820 }, { "epoch": 0.01386905252668711, "grad_norm": 0.609375, "learning_rate": 0.001977205613205023, "loss": 0.2661, "step": 7822 }, { "epoch": 0.013872598691996925, "grad_norm": 1.3671875, "learning_rate": 0.0019771923008182414, "loss": 0.2097, "step": 7824 }, { "epoch": 0.013876144857306741, "grad_norm": 1.6328125, "learning_rate": 0.001977178984595131, "loss": 0.3737, "step": 7826 }, { "epoch": 0.013879691022616556, "grad_norm": 0.482421875, "learning_rate": 0.001977165664535749, "loss": 0.1889, "step": 7828 }, { "epoch": 0.01388323718792637, "grad_norm": 0.66796875, "learning_rate": 0.0019771523406401535, "loss": 0.592, "step": 7830 }, { "epoch": 0.013886783353236187, "grad_norm": 1.40625, "learning_rate": 0.0019771390129084037, "loss": 0.2485, "step": 7832 }, { "epoch": 0.013890329518546001, "grad_norm": 3.953125, "learning_rate": 0.001977125681340557, "loss": 0.351, "step": 7834 }, { "epoch": 0.013893875683855816, "grad_norm": 1.28125, "learning_rate": 0.001977112345936672, "loss": 0.3311, "step": 7836 }, { "epoch": 0.013897421849165632, "grad_norm": 1.3671875, "learning_rate": 0.0019770990066968076, "loss": 0.258, "step": 7838 }, { "epoch": 0.013900968014475447, "grad_norm": 0.30078125, "learning_rate": 0.001977085663621021, "loss": 0.2161, "step": 7840 }, { "epoch": 0.013904514179785261, "grad_norm": 0.61328125, "learning_rate": 0.0019770723167093717, "loss": 0.2532, "step": 7842 }, { "epoch": 0.013908060345095077, "grad_norm": 0.7578125, "learning_rate": 0.0019770589659619175, "loss": 0.2677, "step": 7844 }, { "epoch": 0.013911606510404892, "grad_norm": 1.0390625, "learning_rate": 0.0019770456113787165, "loss": 0.2911, "step": 7846 }, { "epoch": 0.013915152675714708, "grad_norm": 4.5625, "learning_rate": 0.0019770322529598277, "loss": 0.2344, "step": 7848 }, { "epoch": 0.013918698841024523, "grad_norm": 0.71484375, "learning_rate": 0.001977018890705309, "loss": 0.4691, "step": 7850 }, { "epoch": 0.013922245006334337, "grad_norm": 1.03125, "learning_rate": 0.0019770055246152193, "loss": 0.2439, "step": 7852 }, { "epoch": 0.013925791171644154, "grad_norm": 1.4140625, "learning_rate": 0.001976992154689617, "loss": 0.2845, "step": 7854 }, { "epoch": 0.013929337336953968, "grad_norm": 0.6015625, "learning_rate": 0.00197697878092856, "loss": 0.402, "step": 7856 }, { "epoch": 0.013932883502263783, "grad_norm": 0.2490234375, "learning_rate": 0.001976965403332108, "loss": 0.2305, "step": 7858 }, { "epoch": 0.013936429667573599, "grad_norm": 0.4765625, "learning_rate": 0.001976952021900318, "loss": 0.2462, "step": 7860 }, { "epoch": 0.013939975832883414, "grad_norm": 0.5078125, "learning_rate": 0.0019769386366332497, "loss": 0.2552, "step": 7862 }, { "epoch": 0.013943521998193228, "grad_norm": 1.8125, "learning_rate": 0.001976925247530961, "loss": 0.2692, "step": 7864 }, { "epoch": 0.013947068163503044, "grad_norm": 0.9375, "learning_rate": 0.0019769118545935106, "loss": 0.2507, "step": 7866 }, { "epoch": 0.013950614328812859, "grad_norm": 0.703125, "learning_rate": 0.001976898457820957, "loss": 0.2629, "step": 7868 }, { "epoch": 0.013954160494122674, "grad_norm": 0.92578125, "learning_rate": 0.0019768850572133593, "loss": 0.2842, "step": 7870 }, { "epoch": 0.01395770665943249, "grad_norm": 0.244140625, "learning_rate": 0.0019768716527707756, "loss": 0.2646, "step": 7872 }, { "epoch": 0.013961252824742304, "grad_norm": 0.431640625, "learning_rate": 0.0019768582444932644, "loss": 0.225, "step": 7874 }, { "epoch": 0.013964798990052119, "grad_norm": 0.396484375, "learning_rate": 0.0019768448323808844, "loss": 0.2122, "step": 7876 }, { "epoch": 0.013968345155361935, "grad_norm": 3.25, "learning_rate": 0.001976831416433695, "loss": 0.2633, "step": 7878 }, { "epoch": 0.01397189132067175, "grad_norm": 0.44140625, "learning_rate": 0.001976817996651754, "loss": 0.3261, "step": 7880 }, { "epoch": 0.013975437485981566, "grad_norm": 0.52734375, "learning_rate": 0.00197680457303512, "loss": 0.2683, "step": 7882 }, { "epoch": 0.01397898365129138, "grad_norm": 5.46875, "learning_rate": 0.0019767911455838526, "loss": 0.339, "step": 7884 }, { "epoch": 0.013982529816601195, "grad_norm": 0.396484375, "learning_rate": 0.00197677771429801, "loss": 0.2727, "step": 7886 }, { "epoch": 0.013986075981911011, "grad_norm": 0.380859375, "learning_rate": 0.001976764279177651, "loss": 0.2702, "step": 7888 }, { "epoch": 0.013989622147220826, "grad_norm": 0.85546875, "learning_rate": 0.0019767508402228347, "loss": 0.1854, "step": 7890 }, { "epoch": 0.01399316831253064, "grad_norm": 1.46875, "learning_rate": 0.0019767373974336187, "loss": 0.2519, "step": 7892 }, { "epoch": 0.013996714477840457, "grad_norm": 0.65625, "learning_rate": 0.001976723950810063, "loss": 0.2901, "step": 7894 }, { "epoch": 0.014000260643150271, "grad_norm": 0.423828125, "learning_rate": 0.001976710500352226, "loss": 0.2789, "step": 7896 }, { "epoch": 0.014003806808460086, "grad_norm": 0.462890625, "learning_rate": 0.0019766970460601664, "loss": 0.3878, "step": 7898 }, { "epoch": 0.014007352973769902, "grad_norm": 0.404296875, "learning_rate": 0.0019766835879339436, "loss": 0.2027, "step": 7900 }, { "epoch": 0.014010899139079717, "grad_norm": 0.56640625, "learning_rate": 0.0019766701259736155, "loss": 0.3059, "step": 7902 }, { "epoch": 0.014014445304389531, "grad_norm": 0.50390625, "learning_rate": 0.001976656660179242, "loss": 0.3784, "step": 7904 }, { "epoch": 0.014017991469699348, "grad_norm": 0.453125, "learning_rate": 0.0019766431905508808, "loss": 0.2456, "step": 7906 }, { "epoch": 0.014021537635009162, "grad_norm": 0.337890625, "learning_rate": 0.0019766297170885918, "loss": 0.2295, "step": 7908 }, { "epoch": 0.014025083800318977, "grad_norm": 0.41015625, "learning_rate": 0.001976616239792434, "loss": 0.2423, "step": 7910 }, { "epoch": 0.014028629965628793, "grad_norm": 0.8828125, "learning_rate": 0.0019766027586624654, "loss": 0.2471, "step": 7912 }, { "epoch": 0.014032176130938608, "grad_norm": 0.439453125, "learning_rate": 0.001976589273698746, "loss": 0.2488, "step": 7914 }, { "epoch": 0.014035722296248424, "grad_norm": 0.53125, "learning_rate": 0.001976575784901334, "loss": 0.2615, "step": 7916 }, { "epoch": 0.014039268461558238, "grad_norm": 0.7890625, "learning_rate": 0.001976562292270289, "loss": 0.2338, "step": 7918 }, { "epoch": 0.014042814626868053, "grad_norm": 0.296875, "learning_rate": 0.001976548795805669, "loss": 0.2023, "step": 7920 }, { "epoch": 0.01404636079217787, "grad_norm": 2.03125, "learning_rate": 0.0019765352955075344, "loss": 0.3934, "step": 7922 }, { "epoch": 0.014049906957487684, "grad_norm": 0.53515625, "learning_rate": 0.0019765217913759433, "loss": 0.2818, "step": 7924 }, { "epoch": 0.014053453122797498, "grad_norm": 1.296875, "learning_rate": 0.001976508283410955, "loss": 0.2061, "step": 7926 }, { "epoch": 0.014056999288107315, "grad_norm": 0.32421875, "learning_rate": 0.001976494771612629, "loss": 0.2473, "step": 7928 }, { "epoch": 0.01406054545341713, "grad_norm": 0.61328125, "learning_rate": 0.0019764812559810237, "loss": 0.2271, "step": 7930 }, { "epoch": 0.014064091618726944, "grad_norm": 2.28125, "learning_rate": 0.0019764677365161987, "loss": 0.2527, "step": 7932 }, { "epoch": 0.01406763778403676, "grad_norm": 1.125, "learning_rate": 0.0019764542132182125, "loss": 0.3249, "step": 7934 }, { "epoch": 0.014071183949346575, "grad_norm": 0.388671875, "learning_rate": 0.001976440686087125, "loss": 0.251, "step": 7936 }, { "epoch": 0.014074730114656389, "grad_norm": 2.828125, "learning_rate": 0.0019764271551229945, "loss": 0.2776, "step": 7938 }, { "epoch": 0.014078276279966205, "grad_norm": 1.3046875, "learning_rate": 0.0019764136203258816, "loss": 0.3305, "step": 7940 }, { "epoch": 0.01408182244527602, "grad_norm": 0.349609375, "learning_rate": 0.0019764000816958442, "loss": 0.2662, "step": 7942 }, { "epoch": 0.014085368610585835, "grad_norm": 2.234375, "learning_rate": 0.001976386539232942, "loss": 0.3798, "step": 7944 }, { "epoch": 0.01408891477589565, "grad_norm": 0.9296875, "learning_rate": 0.001976372992937234, "loss": 0.3456, "step": 7946 }, { "epoch": 0.014092460941205465, "grad_norm": 0.5078125, "learning_rate": 0.0019763594428087792, "loss": 0.2858, "step": 7948 }, { "epoch": 0.014096007106515282, "grad_norm": 0.3984375, "learning_rate": 0.001976345888847638, "loss": 0.3072, "step": 7950 }, { "epoch": 0.014099553271825096, "grad_norm": 0.8671875, "learning_rate": 0.0019763323310538687, "loss": 0.2565, "step": 7952 }, { "epoch": 0.01410309943713491, "grad_norm": 0.6875, "learning_rate": 0.0019763187694275307, "loss": 0.2388, "step": 7954 }, { "epoch": 0.014106645602444727, "grad_norm": 0.271484375, "learning_rate": 0.0019763052039686833, "loss": 0.2174, "step": 7956 }, { "epoch": 0.014110191767754542, "grad_norm": 0.60546875, "learning_rate": 0.001976291634677386, "loss": 0.2882, "step": 7958 }, { "epoch": 0.014113737933064356, "grad_norm": 1.0859375, "learning_rate": 0.001976278061553698, "loss": 0.2814, "step": 7960 }, { "epoch": 0.014117284098374172, "grad_norm": 0.43359375, "learning_rate": 0.0019762644845976794, "loss": 0.2552, "step": 7962 }, { "epoch": 0.014120830263683987, "grad_norm": 0.52734375, "learning_rate": 0.0019762509038093886, "loss": 0.282, "step": 7964 }, { "epoch": 0.014124376428993802, "grad_norm": 3.125, "learning_rate": 0.001976237319188885, "loss": 0.4814, "step": 7966 }, { "epoch": 0.014127922594303618, "grad_norm": 0.275390625, "learning_rate": 0.001976223730736229, "loss": 0.2168, "step": 7968 }, { "epoch": 0.014131468759613432, "grad_norm": 0.396484375, "learning_rate": 0.001976210138451479, "loss": 0.238, "step": 7970 }, { "epoch": 0.014135014924923247, "grad_norm": 0.482421875, "learning_rate": 0.0019761965423346945, "loss": 0.2386, "step": 7972 }, { "epoch": 0.014138561090233063, "grad_norm": 0.73046875, "learning_rate": 0.0019761829423859357, "loss": 0.4044, "step": 7974 }, { "epoch": 0.014142107255542878, "grad_norm": 0.94921875, "learning_rate": 0.0019761693386052613, "loss": 0.2591, "step": 7976 }, { "epoch": 0.014145653420852692, "grad_norm": 0.263671875, "learning_rate": 0.001976155730992731, "loss": 0.3086, "step": 7978 }, { "epoch": 0.014149199586162509, "grad_norm": 1.0859375, "learning_rate": 0.001976142119548405, "loss": 0.3166, "step": 7980 }, { "epoch": 0.014152745751472323, "grad_norm": 0.318359375, "learning_rate": 0.0019761285042723424, "loss": 0.2635, "step": 7982 }, { "epoch": 0.01415629191678214, "grad_norm": 0.3984375, "learning_rate": 0.0019761148851646024, "loss": 0.2934, "step": 7984 }, { "epoch": 0.014159838082091954, "grad_norm": 0.5546875, "learning_rate": 0.0019761012622252446, "loss": 0.2311, "step": 7986 }, { "epoch": 0.014163384247401769, "grad_norm": 0.369140625, "learning_rate": 0.001976087635454329, "loss": 0.2768, "step": 7988 }, { "epoch": 0.014166930412711585, "grad_norm": 1.1328125, "learning_rate": 0.001976074004851915, "loss": 0.3145, "step": 7990 }, { "epoch": 0.0141704765780214, "grad_norm": 0.953125, "learning_rate": 0.001976060370418062, "loss": 0.2376, "step": 7992 }, { "epoch": 0.014174022743331214, "grad_norm": 0.41015625, "learning_rate": 0.00197604673215283, "loss": 0.2532, "step": 7994 }, { "epoch": 0.01417756890864103, "grad_norm": 0.330078125, "learning_rate": 0.0019760330900562783, "loss": 0.2099, "step": 7996 }, { "epoch": 0.014181115073950845, "grad_norm": 0.64453125, "learning_rate": 0.001976019444128467, "loss": 0.3335, "step": 7998 }, { "epoch": 0.01418466123926066, "grad_norm": 0.3359375, "learning_rate": 0.001976005794369455, "loss": 0.2508, "step": 8000 }, { "epoch": 0.014188207404570476, "grad_norm": 0.59375, "learning_rate": 0.001975992140779303, "loss": 0.2161, "step": 8002 }, { "epoch": 0.01419175356988029, "grad_norm": 0.62890625, "learning_rate": 0.00197597848335807, "loss": 0.2582, "step": 8004 }, { "epoch": 0.014195299735190105, "grad_norm": 0.296875, "learning_rate": 0.001975964822105816, "loss": 0.2452, "step": 8006 }, { "epoch": 0.014198845900499921, "grad_norm": 0.32421875, "learning_rate": 0.0019759511570226007, "loss": 0.2437, "step": 8008 }, { "epoch": 0.014202392065809736, "grad_norm": 0.353515625, "learning_rate": 0.001975937488108484, "loss": 0.2512, "step": 8010 }, { "epoch": 0.01420593823111955, "grad_norm": 0.93359375, "learning_rate": 0.001975923815363525, "loss": 0.2971, "step": 8012 }, { "epoch": 0.014209484396429366, "grad_norm": 0.322265625, "learning_rate": 0.0019759101387877846, "loss": 0.2463, "step": 8014 }, { "epoch": 0.014213030561739181, "grad_norm": 3.09375, "learning_rate": 0.0019758964583813216, "loss": 0.3887, "step": 8016 }, { "epoch": 0.014216576727048997, "grad_norm": 0.58984375, "learning_rate": 0.0019758827741441966, "loss": 0.2735, "step": 8018 }, { "epoch": 0.014220122892358812, "grad_norm": 0.392578125, "learning_rate": 0.001975869086076469, "loss": 0.219, "step": 8020 }, { "epoch": 0.014223669057668626, "grad_norm": 1.75, "learning_rate": 0.0019758553941781986, "loss": 0.4032, "step": 8022 }, { "epoch": 0.014227215222978443, "grad_norm": 2.296875, "learning_rate": 0.0019758416984494457, "loss": 0.4561, "step": 8024 }, { "epoch": 0.014230761388288257, "grad_norm": 0.177734375, "learning_rate": 0.0019758279988902703, "loss": 0.2875, "step": 8026 }, { "epoch": 0.014234307553598072, "grad_norm": 0.625, "learning_rate": 0.0019758142955007313, "loss": 0.2761, "step": 8028 }, { "epoch": 0.014237853718907888, "grad_norm": 0.419921875, "learning_rate": 0.0019758005882808895, "loss": 0.3136, "step": 8030 }, { "epoch": 0.014241399884217703, "grad_norm": 0.50390625, "learning_rate": 0.001975786877230805, "loss": 0.2373, "step": 8032 }, { "epoch": 0.014244946049527517, "grad_norm": 0.294921875, "learning_rate": 0.001975773162350537, "loss": 0.2776, "step": 8034 }, { "epoch": 0.014248492214837333, "grad_norm": 2.4375, "learning_rate": 0.001975759443640146, "loss": 0.4026, "step": 8036 }, { "epoch": 0.014252038380147148, "grad_norm": 0.5546875, "learning_rate": 0.0019757457210996918, "loss": 0.2715, "step": 8038 }, { "epoch": 0.014255584545456963, "grad_norm": 0.96875, "learning_rate": 0.001975731994729235, "loss": 0.2807, "step": 8040 }, { "epoch": 0.014259130710766779, "grad_norm": 0.431640625, "learning_rate": 0.0019757182645288346, "loss": 0.2532, "step": 8042 }, { "epoch": 0.014262676876076593, "grad_norm": 0.337890625, "learning_rate": 0.0019757045304985513, "loss": 0.3072, "step": 8044 }, { "epoch": 0.014266223041386408, "grad_norm": 0.43359375, "learning_rate": 0.001975690792638445, "loss": 0.2501, "step": 8046 }, { "epoch": 0.014269769206696224, "grad_norm": 0.47265625, "learning_rate": 0.0019756770509485764, "loss": 0.3048, "step": 8048 }, { "epoch": 0.014273315372006039, "grad_norm": 0.765625, "learning_rate": 0.0019756633054290045, "loss": 0.3172, "step": 8050 }, { "epoch": 0.014276861537315855, "grad_norm": 0.291015625, "learning_rate": 0.00197564955607979, "loss": 0.2048, "step": 8052 }, { "epoch": 0.01428040770262567, "grad_norm": 2.3125, "learning_rate": 0.001975635802900993, "loss": 0.39, "step": 8054 }, { "epoch": 0.014283953867935484, "grad_norm": 0.50390625, "learning_rate": 0.0019756220458926736, "loss": 0.2126, "step": 8056 }, { "epoch": 0.0142875000332453, "grad_norm": 1.140625, "learning_rate": 0.0019756082850548922, "loss": 0.325, "step": 8058 }, { "epoch": 0.014291046198555115, "grad_norm": 0.53515625, "learning_rate": 0.0019755945203877084, "loss": 0.2655, "step": 8060 }, { "epoch": 0.01429459236386493, "grad_norm": 0.359375, "learning_rate": 0.001975580751891183, "loss": 0.2543, "step": 8062 }, { "epoch": 0.014298138529174746, "grad_norm": 0.470703125, "learning_rate": 0.0019755669795653765, "loss": 0.2692, "step": 8064 }, { "epoch": 0.01430168469448456, "grad_norm": 0.51953125, "learning_rate": 0.0019755532034103477, "loss": 0.2488, "step": 8066 }, { "epoch": 0.014305230859794375, "grad_norm": 0.40234375, "learning_rate": 0.001975539423426158, "loss": 0.2301, "step": 8068 }, { "epoch": 0.014308777025104191, "grad_norm": 0.33203125, "learning_rate": 0.001975525639612868, "loss": 0.2632, "step": 8070 }, { "epoch": 0.014312323190414006, "grad_norm": 0.220703125, "learning_rate": 0.001975511851970537, "loss": 0.2281, "step": 8072 }, { "epoch": 0.01431586935572382, "grad_norm": 0.421875, "learning_rate": 0.001975498060499226, "loss": 0.2813, "step": 8074 }, { "epoch": 0.014319415521033637, "grad_norm": 0.265625, "learning_rate": 0.0019754842651989943, "loss": 0.2185, "step": 8076 }, { "epoch": 0.014322961686343451, "grad_norm": 3.625, "learning_rate": 0.0019754704660699036, "loss": 0.3642, "step": 8078 }, { "epoch": 0.014326507851653266, "grad_norm": 0.380859375, "learning_rate": 0.0019754566631120136, "loss": 0.2144, "step": 8080 }, { "epoch": 0.014330054016963082, "grad_norm": 0.302734375, "learning_rate": 0.0019754428563253843, "loss": 0.2512, "step": 8082 }, { "epoch": 0.014333600182272897, "grad_norm": 0.384765625, "learning_rate": 0.001975429045710077, "loss": 0.2246, "step": 8084 }, { "epoch": 0.014337146347582713, "grad_norm": 0.458984375, "learning_rate": 0.001975415231266151, "loss": 0.2908, "step": 8086 }, { "epoch": 0.014340692512892527, "grad_norm": 1.4375, "learning_rate": 0.001975401412993668, "loss": 0.4582, "step": 8088 }, { "epoch": 0.014344238678202342, "grad_norm": 1.0, "learning_rate": 0.001975387590892687, "loss": 0.2912, "step": 8090 }, { "epoch": 0.014347784843512158, "grad_norm": 0.51171875, "learning_rate": 0.0019753737649632697, "loss": 0.2078, "step": 8092 }, { "epoch": 0.014351331008821973, "grad_norm": 0.3828125, "learning_rate": 0.0019753599352054754, "loss": 0.2609, "step": 8094 }, { "epoch": 0.014354877174131787, "grad_norm": 1.5703125, "learning_rate": 0.0019753461016193655, "loss": 0.3123, "step": 8096 }, { "epoch": 0.014358423339441604, "grad_norm": 0.69921875, "learning_rate": 0.0019753322642050005, "loss": 0.2319, "step": 8098 }, { "epoch": 0.014361969504751418, "grad_norm": 0.427734375, "learning_rate": 0.0019753184229624405, "loss": 0.214, "step": 8100 }, { "epoch": 0.014365515670061233, "grad_norm": 0.83203125, "learning_rate": 0.0019753045778917464, "loss": 0.2201, "step": 8102 }, { "epoch": 0.014369061835371049, "grad_norm": 0.30078125, "learning_rate": 0.0019752907289929777, "loss": 0.2869, "step": 8104 }, { "epoch": 0.014372608000680864, "grad_norm": 2.890625, "learning_rate": 0.0019752768762661965, "loss": 0.3829, "step": 8106 }, { "epoch": 0.014376154165990678, "grad_norm": 0.318359375, "learning_rate": 0.001975263019711462, "loss": 0.233, "step": 8108 }, { "epoch": 0.014379700331300494, "grad_norm": 0.6015625, "learning_rate": 0.0019752491593288363, "loss": 0.3708, "step": 8110 }, { "epoch": 0.014383246496610309, "grad_norm": 1.3203125, "learning_rate": 0.0019752352951183786, "loss": 0.4101, "step": 8112 }, { "epoch": 0.014386792661920124, "grad_norm": 2.765625, "learning_rate": 0.0019752214270801504, "loss": 0.2547, "step": 8114 }, { "epoch": 0.01439033882722994, "grad_norm": 0.65625, "learning_rate": 0.001975207555214212, "loss": 0.2971, "step": 8116 }, { "epoch": 0.014393884992539754, "grad_norm": 0.5078125, "learning_rate": 0.0019751936795206243, "loss": 0.2023, "step": 8118 }, { "epoch": 0.01439743115784957, "grad_norm": 0.392578125, "learning_rate": 0.001975179799999448, "loss": 0.2655, "step": 8120 }, { "epoch": 0.014400977323159385, "grad_norm": 0.341796875, "learning_rate": 0.001975165916650743, "loss": 0.4227, "step": 8122 }, { "epoch": 0.0144045234884692, "grad_norm": 0.423828125, "learning_rate": 0.0019751520294745708, "loss": 0.2429, "step": 8124 }, { "epoch": 0.014408069653779016, "grad_norm": 0.5, "learning_rate": 0.001975138138470992, "loss": 0.2966, "step": 8126 }, { "epoch": 0.01441161581908883, "grad_norm": 0.44140625, "learning_rate": 0.0019751242436400673, "loss": 0.3511, "step": 8128 }, { "epoch": 0.014415161984398645, "grad_norm": 0.2578125, "learning_rate": 0.0019751103449818573, "loss": 0.2727, "step": 8130 }, { "epoch": 0.014418708149708461, "grad_norm": 0.365234375, "learning_rate": 0.0019750964424964236, "loss": 0.2762, "step": 8132 }, { "epoch": 0.014422254315018276, "grad_norm": 0.298828125, "learning_rate": 0.001975082536183826, "loss": 0.2352, "step": 8134 }, { "epoch": 0.01442580048032809, "grad_norm": 0.28125, "learning_rate": 0.0019750686260441254, "loss": 0.1915, "step": 8136 }, { "epoch": 0.014429346645637907, "grad_norm": 1.9765625, "learning_rate": 0.0019750547120773836, "loss": 0.312, "step": 8138 }, { "epoch": 0.014432892810947721, "grad_norm": 1.4609375, "learning_rate": 0.0019750407942836605, "loss": 0.5173, "step": 8140 }, { "epoch": 0.014436438976257536, "grad_norm": 0.2890625, "learning_rate": 0.001975026872663017, "loss": 0.2093, "step": 8142 }, { "epoch": 0.014439985141567352, "grad_norm": 0.421875, "learning_rate": 0.0019750129472155143, "loss": 0.2657, "step": 8144 }, { "epoch": 0.014443531306877167, "grad_norm": 0.181640625, "learning_rate": 0.0019749990179412135, "loss": 0.2081, "step": 8146 }, { "epoch": 0.014447077472186981, "grad_norm": 0.6015625, "learning_rate": 0.001974985084840175, "loss": 0.1942, "step": 8148 }, { "epoch": 0.014450623637496798, "grad_norm": 0.50390625, "learning_rate": 0.0019749711479124603, "loss": 0.2726, "step": 8150 }, { "epoch": 0.014454169802806612, "grad_norm": 0.9375, "learning_rate": 0.0019749572071581295, "loss": 0.246, "step": 8152 }, { "epoch": 0.014457715968116428, "grad_norm": 0.39453125, "learning_rate": 0.001974943262577245, "loss": 0.2227, "step": 8154 }, { "epoch": 0.014461262133426243, "grad_norm": 0.35546875, "learning_rate": 0.0019749293141698657, "loss": 0.2184, "step": 8156 }, { "epoch": 0.014464808298736058, "grad_norm": 0.373046875, "learning_rate": 0.0019749153619360547, "loss": 0.2237, "step": 8158 }, { "epoch": 0.014468354464045874, "grad_norm": 1.9375, "learning_rate": 0.0019749014058758722, "loss": 0.3866, "step": 8160 }, { "epoch": 0.014471900629355688, "grad_norm": 0.142578125, "learning_rate": 0.0019748874459893785, "loss": 0.278, "step": 8162 }, { "epoch": 0.014475446794665503, "grad_norm": 0.24609375, "learning_rate": 0.0019748734822766355, "loss": 0.1951, "step": 8164 }, { "epoch": 0.01447899295997532, "grad_norm": 1.671875, "learning_rate": 0.0019748595147377045, "loss": 0.3066, "step": 8166 }, { "epoch": 0.014482539125285134, "grad_norm": 0.75390625, "learning_rate": 0.0019748455433726457, "loss": 0.2686, "step": 8168 }, { "epoch": 0.014486085290594948, "grad_norm": 0.51171875, "learning_rate": 0.0019748315681815207, "loss": 0.312, "step": 8170 }, { "epoch": 0.014489631455904765, "grad_norm": 0.244140625, "learning_rate": 0.001974817589164391, "loss": 0.2182, "step": 8172 }, { "epoch": 0.01449317762121458, "grad_norm": 0.56640625, "learning_rate": 0.001974803606321317, "loss": 0.2493, "step": 8174 }, { "epoch": 0.014496723786524394, "grad_norm": 0.58984375, "learning_rate": 0.0019747896196523605, "loss": 0.4058, "step": 8176 }, { "epoch": 0.01450026995183421, "grad_norm": 0.263671875, "learning_rate": 0.0019747756291575817, "loss": 0.3673, "step": 8178 }, { "epoch": 0.014503816117144025, "grad_norm": 0.8125, "learning_rate": 0.0019747616348370425, "loss": 0.1943, "step": 8180 }, { "epoch": 0.01450736228245384, "grad_norm": 1.7265625, "learning_rate": 0.0019747476366908045, "loss": 0.3337, "step": 8182 }, { "epoch": 0.014510908447763655, "grad_norm": 0.2451171875, "learning_rate": 0.0019747336347189282, "loss": 0.2392, "step": 8184 }, { "epoch": 0.01451445461307347, "grad_norm": 0.67578125, "learning_rate": 0.0019747196289214754, "loss": 0.2143, "step": 8186 }, { "epoch": 0.014518000778383286, "grad_norm": 0.30078125, "learning_rate": 0.001974705619298507, "loss": 0.2118, "step": 8188 }, { "epoch": 0.0145215469436931, "grad_norm": 0.40625, "learning_rate": 0.001974691605850084, "loss": 0.2607, "step": 8190 }, { "epoch": 0.014525093109002915, "grad_norm": 0.44140625, "learning_rate": 0.001974677588576268, "loss": 0.2566, "step": 8192 }, { "epoch": 0.014528639274312732, "grad_norm": 0.41796875, "learning_rate": 0.00197466356747712, "loss": 0.2181, "step": 8194 }, { "epoch": 0.014532185439622546, "grad_norm": 0.234375, "learning_rate": 0.001974649542552702, "loss": 0.2491, "step": 8196 }, { "epoch": 0.01453573160493236, "grad_norm": 1.8984375, "learning_rate": 0.0019746355138030754, "loss": 0.2448, "step": 8198 }, { "epoch": 0.014539277770242177, "grad_norm": 0.984375, "learning_rate": 0.0019746214812283005, "loss": 0.2635, "step": 8200 }, { "epoch": 0.014542823935551992, "grad_norm": 0.345703125, "learning_rate": 0.00197460744482844, "loss": 0.2502, "step": 8202 }, { "epoch": 0.014546370100861806, "grad_norm": 0.421875, "learning_rate": 0.0019745934046035535, "loss": 0.2543, "step": 8204 }, { "epoch": 0.014549916266171622, "grad_norm": 0.7109375, "learning_rate": 0.0019745793605537043, "loss": 0.299, "step": 8206 }, { "epoch": 0.014553462431481437, "grad_norm": 0.43359375, "learning_rate": 0.0019745653126789523, "loss": 0.3053, "step": 8208 }, { "epoch": 0.014557008596791252, "grad_norm": 0.43359375, "learning_rate": 0.0019745512609793603, "loss": 0.4376, "step": 8210 }, { "epoch": 0.014560554762101068, "grad_norm": 0.291015625, "learning_rate": 0.0019745372054549887, "loss": 0.2934, "step": 8212 }, { "epoch": 0.014564100927410882, "grad_norm": 0.267578125, "learning_rate": 0.0019745231461058997, "loss": 0.3122, "step": 8214 }, { "epoch": 0.014567647092720697, "grad_norm": 0.53125, "learning_rate": 0.0019745090829321544, "loss": 0.3878, "step": 8216 }, { "epoch": 0.014571193258030513, "grad_norm": 0.859375, "learning_rate": 0.001974495015933814, "loss": 0.2351, "step": 8218 }, { "epoch": 0.014574739423340328, "grad_norm": 0.32421875, "learning_rate": 0.0019744809451109403, "loss": 0.3699, "step": 8220 }, { "epoch": 0.014578285588650144, "grad_norm": 0.314453125, "learning_rate": 0.001974466870463595, "loss": 0.2613, "step": 8222 }, { "epoch": 0.014581831753959959, "grad_norm": 1.34375, "learning_rate": 0.00197445279199184, "loss": 0.2594, "step": 8224 }, { "epoch": 0.014585377919269773, "grad_norm": 0.6640625, "learning_rate": 0.001974438709695736, "loss": 0.3091, "step": 8226 }, { "epoch": 0.01458892408457959, "grad_norm": 0.330078125, "learning_rate": 0.001974424623575345, "loss": 0.1648, "step": 8228 }, { "epoch": 0.014592470249889404, "grad_norm": 0.306640625, "learning_rate": 0.001974410533630729, "loss": 0.3051, "step": 8230 }, { "epoch": 0.014596016415199219, "grad_norm": 0.3359375, "learning_rate": 0.0019743964398619487, "loss": 0.2379, "step": 8232 }, { "epoch": 0.014599562580509035, "grad_norm": 0.4609375, "learning_rate": 0.0019743823422690666, "loss": 0.2683, "step": 8234 }, { "epoch": 0.01460310874581885, "grad_norm": 0.55859375, "learning_rate": 0.001974368240852144, "loss": 0.2994, "step": 8236 }, { "epoch": 0.014606654911128664, "grad_norm": 0.365234375, "learning_rate": 0.0019743541356112425, "loss": 0.2508, "step": 8238 }, { "epoch": 0.01461020107643848, "grad_norm": 0.3984375, "learning_rate": 0.001974340026546424, "loss": 0.5068, "step": 8240 }, { "epoch": 0.014613747241748295, "grad_norm": 1.8515625, "learning_rate": 0.0019743259136577504, "loss": 0.2735, "step": 8242 }, { "epoch": 0.01461729340705811, "grad_norm": 0.388671875, "learning_rate": 0.001974311796945283, "loss": 0.2804, "step": 8244 }, { "epoch": 0.014620839572367926, "grad_norm": 0.4140625, "learning_rate": 0.0019742976764090835, "loss": 0.2208, "step": 8246 }, { "epoch": 0.01462438573767774, "grad_norm": 0.921875, "learning_rate": 0.001974283552049214, "loss": 0.28, "step": 8248 }, { "epoch": 0.014627931902987555, "grad_norm": 0.29296875, "learning_rate": 0.0019742694238657358, "loss": 0.2355, "step": 8250 }, { "epoch": 0.014631478068297371, "grad_norm": 0.419921875, "learning_rate": 0.001974255291858711, "loss": 0.2687, "step": 8252 }, { "epoch": 0.014635024233607186, "grad_norm": 0.6953125, "learning_rate": 0.0019742411560282015, "loss": 0.2353, "step": 8254 }, { "epoch": 0.014638570398917002, "grad_norm": 0.6953125, "learning_rate": 0.001974227016374269, "loss": 0.2276, "step": 8256 }, { "epoch": 0.014642116564226816, "grad_norm": 1.984375, "learning_rate": 0.001974212872896975, "loss": 0.4946, "step": 8258 }, { "epoch": 0.014645662729536631, "grad_norm": 0.6484375, "learning_rate": 0.0019741987255963824, "loss": 0.2401, "step": 8260 }, { "epoch": 0.014649208894846447, "grad_norm": 0.5546875, "learning_rate": 0.001974184574472552, "loss": 0.2202, "step": 8262 }, { "epoch": 0.014652755060156262, "grad_norm": 1.671875, "learning_rate": 0.0019741704195255457, "loss": 0.4242, "step": 8264 }, { "epoch": 0.014656301225466076, "grad_norm": 0.53125, "learning_rate": 0.0019741562607554263, "loss": 0.2719, "step": 8266 }, { "epoch": 0.014659847390775893, "grad_norm": 1.03125, "learning_rate": 0.0019741420981622547, "loss": 0.2843, "step": 8268 }, { "epoch": 0.014663393556085707, "grad_norm": 0.6171875, "learning_rate": 0.001974127931746094, "loss": 0.21, "step": 8270 }, { "epoch": 0.014666939721395522, "grad_norm": 0.78515625, "learning_rate": 0.001974113761507005, "loss": 0.239, "step": 8272 }, { "epoch": 0.014670485886705338, "grad_norm": 0.3515625, "learning_rate": 0.00197409958744505, "loss": 0.212, "step": 8274 }, { "epoch": 0.014674032052015153, "grad_norm": 0.455078125, "learning_rate": 0.0019740854095602915, "loss": 0.2199, "step": 8276 }, { "epoch": 0.014677578217324967, "grad_norm": 0.9140625, "learning_rate": 0.001974071227852791, "loss": 0.5372, "step": 8278 }, { "epoch": 0.014681124382634783, "grad_norm": 0.58984375, "learning_rate": 0.001974057042322611, "loss": 0.1938, "step": 8280 }, { "epoch": 0.014684670547944598, "grad_norm": 0.62890625, "learning_rate": 0.001974042852969813, "loss": 0.216, "step": 8282 }, { "epoch": 0.014688216713254413, "grad_norm": 0.34375, "learning_rate": 0.001974028659794459, "loss": 0.2577, "step": 8284 }, { "epoch": 0.014691762878564229, "grad_norm": 0.75390625, "learning_rate": 0.0019740144627966114, "loss": 0.2617, "step": 8286 }, { "epoch": 0.014695309043874043, "grad_norm": 0.66015625, "learning_rate": 0.0019740002619763326, "loss": 0.2305, "step": 8288 }, { "epoch": 0.01469885520918386, "grad_norm": 0.359375, "learning_rate": 0.0019739860573336843, "loss": 0.2246, "step": 8290 }, { "epoch": 0.014702401374493674, "grad_norm": 1.4921875, "learning_rate": 0.001973971848868728, "loss": 0.2925, "step": 8292 }, { "epoch": 0.014705947539803489, "grad_norm": 2.09375, "learning_rate": 0.0019739576365815272, "loss": 0.2573, "step": 8294 }, { "epoch": 0.014709493705113305, "grad_norm": 0.69921875, "learning_rate": 0.001973943420472143, "loss": 0.1889, "step": 8296 }, { "epoch": 0.01471303987042312, "grad_norm": 1.2109375, "learning_rate": 0.001973929200540638, "loss": 0.358, "step": 8298 }, { "epoch": 0.014716586035732934, "grad_norm": 0.5078125, "learning_rate": 0.001973914976787074, "loss": 0.2469, "step": 8300 }, { "epoch": 0.01472013220104275, "grad_norm": 0.294921875, "learning_rate": 0.0019739007492115142, "loss": 0.2914, "step": 8302 }, { "epoch": 0.014723678366352565, "grad_norm": 1.046875, "learning_rate": 0.0019738865178140197, "loss": 0.2945, "step": 8304 }, { "epoch": 0.01472722453166238, "grad_norm": 0.2236328125, "learning_rate": 0.001973872282594653, "loss": 0.1866, "step": 8306 }, { "epoch": 0.014730770696972196, "grad_norm": 0.56640625, "learning_rate": 0.001973858043553477, "loss": 0.2336, "step": 8308 }, { "epoch": 0.01473431686228201, "grad_norm": 0.30859375, "learning_rate": 0.0019738438006905537, "loss": 0.3078, "step": 8310 }, { "epoch": 0.014737863027591825, "grad_norm": 0.29296875, "learning_rate": 0.0019738295540059447, "loss": 0.2634, "step": 8312 }, { "epoch": 0.014741409192901641, "grad_norm": 0.65234375, "learning_rate": 0.0019738153034997126, "loss": 0.278, "step": 8314 }, { "epoch": 0.014744955358211456, "grad_norm": 0.279296875, "learning_rate": 0.0019738010491719203, "loss": 0.2528, "step": 8316 }, { "epoch": 0.01474850152352127, "grad_norm": 0.4453125, "learning_rate": 0.0019737867910226297, "loss": 0.2905, "step": 8318 }, { "epoch": 0.014752047688831087, "grad_norm": 0.228515625, "learning_rate": 0.0019737725290519034, "loss": 0.2447, "step": 8320 }, { "epoch": 0.014755593854140901, "grad_norm": 0.470703125, "learning_rate": 0.0019737582632598036, "loss": 0.3212, "step": 8322 }, { "epoch": 0.014759140019450718, "grad_norm": 0.451171875, "learning_rate": 0.0019737439936463926, "loss": 0.2487, "step": 8324 }, { "epoch": 0.014762686184760532, "grad_norm": 0.328125, "learning_rate": 0.001973729720211733, "loss": 0.2462, "step": 8326 }, { "epoch": 0.014766232350070347, "grad_norm": 0.421875, "learning_rate": 0.001973715442955887, "loss": 0.2829, "step": 8328 }, { "epoch": 0.014769778515380163, "grad_norm": 0.396484375, "learning_rate": 0.0019737011618789174, "loss": 0.2193, "step": 8330 }, { "epoch": 0.014773324680689977, "grad_norm": 6.5625, "learning_rate": 0.001973686876980886, "loss": 0.4112, "step": 8332 }, { "epoch": 0.014776870845999792, "grad_norm": 0.28125, "learning_rate": 0.001973672588261856, "loss": 0.3945, "step": 8334 }, { "epoch": 0.014780417011309608, "grad_norm": 0.412109375, "learning_rate": 0.0019736582957218898, "loss": 0.2134, "step": 8336 }, { "epoch": 0.014783963176619423, "grad_norm": 0.291015625, "learning_rate": 0.001973643999361049, "loss": 0.2612, "step": 8338 }, { "epoch": 0.014787509341929237, "grad_norm": 0.36328125, "learning_rate": 0.0019736296991793973, "loss": 0.2279, "step": 8340 }, { "epoch": 0.014791055507239054, "grad_norm": 0.69921875, "learning_rate": 0.0019736153951769968, "loss": 0.2541, "step": 8342 }, { "epoch": 0.014794601672548868, "grad_norm": 0.58203125, "learning_rate": 0.00197360108735391, "loss": 0.1865, "step": 8344 }, { "epoch": 0.014798147837858683, "grad_norm": 0.7890625, "learning_rate": 0.0019735867757101995, "loss": 0.5469, "step": 8346 }, { "epoch": 0.014801694003168499, "grad_norm": 0.8125, "learning_rate": 0.0019735724602459276, "loss": 0.208, "step": 8348 }, { "epoch": 0.014805240168478314, "grad_norm": 0.478515625, "learning_rate": 0.001973558140961157, "loss": 0.2023, "step": 8350 }, { "epoch": 0.014808786333788128, "grad_norm": 0.4375, "learning_rate": 0.001973543817855951, "loss": 0.2787, "step": 8352 }, { "epoch": 0.014812332499097944, "grad_norm": 0.421875, "learning_rate": 0.001973529490930372, "loss": 0.276, "step": 8354 }, { "epoch": 0.014815878664407759, "grad_norm": 0.57421875, "learning_rate": 0.0019735151601844815, "loss": 0.2239, "step": 8356 }, { "epoch": 0.014819424829717575, "grad_norm": 0.87109375, "learning_rate": 0.001973500825618344, "loss": 0.2352, "step": 8358 }, { "epoch": 0.01482297099502739, "grad_norm": 0.78515625, "learning_rate": 0.0019734864872320208, "loss": 0.2223, "step": 8360 }, { "epoch": 0.014826517160337204, "grad_norm": 0.3671875, "learning_rate": 0.0019734721450255753, "loss": 0.2678, "step": 8362 }, { "epoch": 0.01483006332564702, "grad_norm": 0.59375, "learning_rate": 0.0019734577989990697, "loss": 0.2802, "step": 8364 }, { "epoch": 0.014833609490956835, "grad_norm": 0.361328125, "learning_rate": 0.0019734434491525676, "loss": 0.1879, "step": 8366 }, { "epoch": 0.01483715565626665, "grad_norm": 0.3125, "learning_rate": 0.001973429095486131, "loss": 0.3326, "step": 8368 }, { "epoch": 0.014840701821576466, "grad_norm": 0.8125, "learning_rate": 0.0019734147379998224, "loss": 0.3008, "step": 8370 }, { "epoch": 0.01484424798688628, "grad_norm": 0.408203125, "learning_rate": 0.0019734003766937055, "loss": 0.168, "step": 8372 }, { "epoch": 0.014847794152196095, "grad_norm": 0.314453125, "learning_rate": 0.0019733860115678428, "loss": 0.2097, "step": 8374 }, { "epoch": 0.014851340317505912, "grad_norm": 1.234375, "learning_rate": 0.0019733716426222963, "loss": 0.2889, "step": 8376 }, { "epoch": 0.014854886482815726, "grad_norm": 0.60546875, "learning_rate": 0.0019733572698571304, "loss": 0.2187, "step": 8378 }, { "epoch": 0.01485843264812554, "grad_norm": 0.380859375, "learning_rate": 0.001973342893272407, "loss": 0.2115, "step": 8380 }, { "epoch": 0.014861978813435357, "grad_norm": 0.55078125, "learning_rate": 0.0019733285128681888, "loss": 0.2622, "step": 8382 }, { "epoch": 0.014865524978745171, "grad_norm": 1.109375, "learning_rate": 0.001973314128644539, "loss": 0.3458, "step": 8384 }, { "epoch": 0.014869071144054986, "grad_norm": 0.7109375, "learning_rate": 0.00197329974060152, "loss": 0.1967, "step": 8386 }, { "epoch": 0.014872617309364802, "grad_norm": 1.3828125, "learning_rate": 0.001973285348739196, "loss": 0.2537, "step": 8388 }, { "epoch": 0.014876163474674617, "grad_norm": 1.203125, "learning_rate": 0.001973270953057629, "loss": 0.4077, "step": 8390 }, { "epoch": 0.014879709639984433, "grad_norm": 0.5703125, "learning_rate": 0.0019732565535568822, "loss": 0.2731, "step": 8392 }, { "epoch": 0.014883255805294248, "grad_norm": 0.2734375, "learning_rate": 0.0019732421502370178, "loss": 0.2402, "step": 8394 }, { "epoch": 0.014886801970604062, "grad_norm": 0.25390625, "learning_rate": 0.0019732277430981, "loss": 0.2126, "step": 8396 }, { "epoch": 0.014890348135913879, "grad_norm": 0.5703125, "learning_rate": 0.0019732133321401914, "loss": 0.2764, "step": 8398 }, { "epoch": 0.014893894301223693, "grad_norm": 0.265625, "learning_rate": 0.0019731989173633548, "loss": 0.1994, "step": 8400 }, { "epoch": 0.014897440466533508, "grad_norm": 0.7421875, "learning_rate": 0.0019731844987676533, "loss": 0.2315, "step": 8402 }, { "epoch": 0.014900986631843324, "grad_norm": 0.31640625, "learning_rate": 0.00197317007635315, "loss": 0.2386, "step": 8404 }, { "epoch": 0.014904532797153138, "grad_norm": 0.34765625, "learning_rate": 0.001973155650119908, "loss": 0.2045, "step": 8406 }, { "epoch": 0.014908078962462953, "grad_norm": 0.3828125, "learning_rate": 0.00197314122006799, "loss": 0.2094, "step": 8408 }, { "epoch": 0.01491162512777277, "grad_norm": 0.75390625, "learning_rate": 0.0019731267861974604, "loss": 0.3569, "step": 8410 }, { "epoch": 0.014915171293082584, "grad_norm": 0.443359375, "learning_rate": 0.0019731123485083805, "loss": 0.237, "step": 8412 }, { "epoch": 0.014918717458392398, "grad_norm": 0.361328125, "learning_rate": 0.0019730979070008148, "loss": 0.3474, "step": 8414 }, { "epoch": 0.014922263623702215, "grad_norm": 0.74609375, "learning_rate": 0.001973083461674826, "loss": 0.3039, "step": 8416 }, { "epoch": 0.01492580978901203, "grad_norm": 1.203125, "learning_rate": 0.0019730690125304767, "loss": 0.2608, "step": 8418 }, { "epoch": 0.014929355954321844, "grad_norm": 0.7265625, "learning_rate": 0.0019730545595678314, "loss": 0.2046, "step": 8420 }, { "epoch": 0.01493290211963166, "grad_norm": 0.400390625, "learning_rate": 0.0019730401027869523, "loss": 0.2329, "step": 8422 }, { "epoch": 0.014936448284941475, "grad_norm": 0.53515625, "learning_rate": 0.001973025642187903, "loss": 0.2868, "step": 8424 }, { "epoch": 0.014939994450251291, "grad_norm": 0.400390625, "learning_rate": 0.001973011177770747, "loss": 0.2596, "step": 8426 }, { "epoch": 0.014943540615561106, "grad_norm": 1.8984375, "learning_rate": 0.0019729967095355465, "loss": 0.461, "step": 8428 }, { "epoch": 0.01494708678087092, "grad_norm": 4.15625, "learning_rate": 0.0019729822374823657, "loss": 0.3654, "step": 8430 }, { "epoch": 0.014950632946180736, "grad_norm": 0.55859375, "learning_rate": 0.001972967761611268, "loss": 0.4703, "step": 8432 }, { "epoch": 0.014954179111490551, "grad_norm": 0.81640625, "learning_rate": 0.001972953281922316, "loss": 0.2539, "step": 8434 }, { "epoch": 0.014957725276800365, "grad_norm": 0.3671875, "learning_rate": 0.0019729387984155737, "loss": 0.2693, "step": 8436 }, { "epoch": 0.014961271442110182, "grad_norm": 0.69140625, "learning_rate": 0.0019729243110911043, "loss": 0.3123, "step": 8438 }, { "epoch": 0.014964817607419996, "grad_norm": 1.0234375, "learning_rate": 0.001972909819948971, "loss": 0.2053, "step": 8440 }, { "epoch": 0.01496836377272981, "grad_norm": 0.5703125, "learning_rate": 0.0019728953249892366, "loss": 0.2043, "step": 8442 }, { "epoch": 0.014971909938039627, "grad_norm": 2.28125, "learning_rate": 0.0019728808262119654, "loss": 0.3579, "step": 8444 }, { "epoch": 0.014975456103349442, "grad_norm": 0.412109375, "learning_rate": 0.001972866323617221, "loss": 0.2023, "step": 8446 }, { "epoch": 0.014979002268659256, "grad_norm": 3.296875, "learning_rate": 0.0019728518172050656, "loss": 0.5548, "step": 8448 }, { "epoch": 0.014982548433969073, "grad_norm": 0.392578125, "learning_rate": 0.0019728373069755637, "loss": 0.2251, "step": 8450 }, { "epoch": 0.014986094599278887, "grad_norm": 0.8515625, "learning_rate": 0.001972822792928778, "loss": 0.2292, "step": 8452 }, { "epoch": 0.014989640764588702, "grad_norm": 0.69140625, "learning_rate": 0.001972808275064773, "loss": 0.2682, "step": 8454 }, { "epoch": 0.014993186929898518, "grad_norm": 0.80859375, "learning_rate": 0.0019727937533836116, "loss": 0.253, "step": 8456 }, { "epoch": 0.014996733095208332, "grad_norm": 0.5625, "learning_rate": 0.001972779227885357, "loss": 0.2328, "step": 8458 }, { "epoch": 0.015000279260518149, "grad_norm": 0.96875, "learning_rate": 0.001972764698570073, "loss": 0.2995, "step": 8460 }, { "epoch": 0.015003825425827963, "grad_norm": 0.349609375, "learning_rate": 0.0019727501654378233, "loss": 0.3771, "step": 8462 }, { "epoch": 0.015007371591137778, "grad_norm": 0.70703125, "learning_rate": 0.0019727356284886715, "loss": 0.2372, "step": 8464 }, { "epoch": 0.015010917756447594, "grad_norm": 0.5546875, "learning_rate": 0.0019727210877226804, "loss": 0.2373, "step": 8466 }, { "epoch": 0.015014463921757409, "grad_norm": 0.26953125, "learning_rate": 0.001972706543139915, "loss": 0.247, "step": 8468 }, { "epoch": 0.015018010087067223, "grad_norm": 1.4609375, "learning_rate": 0.0019726919947404375, "loss": 0.2755, "step": 8470 }, { "epoch": 0.01502155625237704, "grad_norm": 0.458984375, "learning_rate": 0.0019726774425243123, "loss": 0.1957, "step": 8472 }, { "epoch": 0.015025102417686854, "grad_norm": 0.828125, "learning_rate": 0.001972662886491603, "loss": 0.2928, "step": 8474 }, { "epoch": 0.015028648582996669, "grad_norm": 0.5546875, "learning_rate": 0.0019726483266423733, "loss": 0.1797, "step": 8476 }, { "epoch": 0.015032194748306485, "grad_norm": 0.37109375, "learning_rate": 0.001972633762976686, "loss": 0.2013, "step": 8478 }, { "epoch": 0.0150357409136163, "grad_norm": 0.337890625, "learning_rate": 0.0019726191954946063, "loss": 0.2781, "step": 8480 }, { "epoch": 0.015039287078926114, "grad_norm": 1.6640625, "learning_rate": 0.0019726046241961967, "loss": 0.2612, "step": 8482 }, { "epoch": 0.01504283324423593, "grad_norm": 2.296875, "learning_rate": 0.0019725900490815216, "loss": 0.2983, "step": 8484 }, { "epoch": 0.015046379409545745, "grad_norm": 0.35546875, "learning_rate": 0.0019725754701506444, "loss": 0.186, "step": 8486 }, { "epoch": 0.01504992557485556, "grad_norm": 1.796875, "learning_rate": 0.001972560887403629, "loss": 0.2253, "step": 8488 }, { "epoch": 0.015053471740165376, "grad_norm": 0.2490234375, "learning_rate": 0.0019725463008405386, "loss": 0.2858, "step": 8490 }, { "epoch": 0.01505701790547519, "grad_norm": 0.80078125, "learning_rate": 0.001972531710461438, "loss": 0.2459, "step": 8492 }, { "epoch": 0.015060564070785007, "grad_norm": 0.458984375, "learning_rate": 0.00197251711626639, "loss": 0.2563, "step": 8494 }, { "epoch": 0.015064110236094821, "grad_norm": 0.79296875, "learning_rate": 0.0019725025182554595, "loss": 0.3505, "step": 8496 }, { "epoch": 0.015067656401404636, "grad_norm": 1.6953125, "learning_rate": 0.0019724879164287096, "loss": 0.2718, "step": 8498 }, { "epoch": 0.015071202566714452, "grad_norm": 0.4453125, "learning_rate": 0.0019724733107862047, "loss": 0.3746, "step": 8500 }, { "epoch": 0.015074748732024267, "grad_norm": 0.88671875, "learning_rate": 0.001972458701328008, "loss": 0.2796, "step": 8502 }, { "epoch": 0.015078294897334081, "grad_norm": 0.396484375, "learning_rate": 0.001972444088054184, "loss": 0.1974, "step": 8504 }, { "epoch": 0.015081841062643897, "grad_norm": 1.4765625, "learning_rate": 0.001972429470964796, "loss": 0.259, "step": 8506 }, { "epoch": 0.015085387227953712, "grad_norm": 0.73828125, "learning_rate": 0.0019724148500599083, "loss": 0.3854, "step": 8508 }, { "epoch": 0.015088933393263526, "grad_norm": 0.78515625, "learning_rate": 0.001972400225339585, "loss": 0.2912, "step": 8510 }, { "epoch": 0.015092479558573343, "grad_norm": 2.046875, "learning_rate": 0.00197238559680389, "loss": 0.2514, "step": 8512 }, { "epoch": 0.015096025723883157, "grad_norm": 0.5234375, "learning_rate": 0.0019723709644528867, "loss": 0.2807, "step": 8514 }, { "epoch": 0.015099571889192972, "grad_norm": 0.458984375, "learning_rate": 0.00197235632828664, "loss": 0.251, "step": 8516 }, { "epoch": 0.015103118054502788, "grad_norm": 0.478515625, "learning_rate": 0.001972341688305213, "loss": 0.328, "step": 8518 }, { "epoch": 0.015106664219812603, "grad_norm": 0.5078125, "learning_rate": 0.00197232704450867, "loss": 0.2192, "step": 8520 }, { "epoch": 0.015110210385122417, "grad_norm": 1.5078125, "learning_rate": 0.001972312396897076, "loss": 0.2817, "step": 8522 }, { "epoch": 0.015113756550432234, "grad_norm": 0.8984375, "learning_rate": 0.001972297745470494, "loss": 0.221, "step": 8524 }, { "epoch": 0.015117302715742048, "grad_norm": 0.2734375, "learning_rate": 0.001972283090228988, "loss": 0.1854, "step": 8526 }, { "epoch": 0.015120848881051864, "grad_norm": 0.302734375, "learning_rate": 0.0019722684311726225, "loss": 0.2156, "step": 8528 }, { "epoch": 0.015124395046361679, "grad_norm": 0.45703125, "learning_rate": 0.0019722537683014617, "loss": 0.2754, "step": 8530 }, { "epoch": 0.015127941211671493, "grad_norm": 0.236328125, "learning_rate": 0.0019722391016155695, "loss": 0.2803, "step": 8532 }, { "epoch": 0.01513148737698131, "grad_norm": 0.439453125, "learning_rate": 0.0019722244311150103, "loss": 0.3082, "step": 8534 }, { "epoch": 0.015135033542291124, "grad_norm": 0.404296875, "learning_rate": 0.001972209756799848, "loss": 0.2524, "step": 8536 }, { "epoch": 0.015138579707600939, "grad_norm": 0.64453125, "learning_rate": 0.001972195078670147, "loss": 0.2449, "step": 8538 }, { "epoch": 0.015142125872910755, "grad_norm": 0.44140625, "learning_rate": 0.0019721803967259707, "loss": 0.251, "step": 8540 }, { "epoch": 0.01514567203822057, "grad_norm": 0.326171875, "learning_rate": 0.001972165710967385, "loss": 0.2039, "step": 8542 }, { "epoch": 0.015149218203530384, "grad_norm": 1.1171875, "learning_rate": 0.0019721510213944523, "loss": 0.2846, "step": 8544 }, { "epoch": 0.0151527643688402, "grad_norm": 0.333984375, "learning_rate": 0.001972136328007238, "loss": 0.2438, "step": 8546 }, { "epoch": 0.015156310534150015, "grad_norm": 1.1640625, "learning_rate": 0.001972121630805806, "loss": 0.2857, "step": 8548 }, { "epoch": 0.01515985669945983, "grad_norm": 0.326171875, "learning_rate": 0.0019721069297902205, "loss": 0.1888, "step": 8550 }, { "epoch": 0.015163402864769646, "grad_norm": 2.4375, "learning_rate": 0.001972092224960546, "loss": 0.3156, "step": 8552 }, { "epoch": 0.01516694903007946, "grad_norm": 0.431640625, "learning_rate": 0.001972077516316846, "loss": 0.2829, "step": 8554 }, { "epoch": 0.015170495195389275, "grad_norm": 0.55859375, "learning_rate": 0.0019720628038591864, "loss": 0.2099, "step": 8556 }, { "epoch": 0.015174041360699091, "grad_norm": 0.6484375, "learning_rate": 0.0019720480875876304, "loss": 0.287, "step": 8558 }, { "epoch": 0.015177587526008906, "grad_norm": 0.26953125, "learning_rate": 0.0019720333675022424, "loss": 0.2322, "step": 8560 }, { "epoch": 0.015181133691318722, "grad_norm": 0.474609375, "learning_rate": 0.001972018643603087, "loss": 0.2953, "step": 8562 }, { "epoch": 0.015184679856628537, "grad_norm": 0.22265625, "learning_rate": 0.0019720039158902286, "loss": 0.2044, "step": 8564 }, { "epoch": 0.015188226021938351, "grad_norm": 0.51953125, "learning_rate": 0.0019719891843637317, "loss": 0.2271, "step": 8566 }, { "epoch": 0.015191772187248168, "grad_norm": 1.0, "learning_rate": 0.0019719744490236607, "loss": 0.298, "step": 8568 }, { "epoch": 0.015195318352557982, "grad_norm": 0.412109375, "learning_rate": 0.0019719597098700795, "loss": 0.2512, "step": 8570 }, { "epoch": 0.015198864517867797, "grad_norm": 0.451171875, "learning_rate": 0.001971944966903054, "loss": 0.224, "step": 8572 }, { "epoch": 0.015202410683177613, "grad_norm": 2.53125, "learning_rate": 0.0019719302201226464, "loss": 0.3056, "step": 8574 }, { "epoch": 0.015205956848487428, "grad_norm": 0.435546875, "learning_rate": 0.0019719154695289234, "loss": 0.3421, "step": 8576 }, { "epoch": 0.015209503013797242, "grad_norm": 0.458984375, "learning_rate": 0.001971900715121948, "loss": 0.295, "step": 8578 }, { "epoch": 0.015213049179107058, "grad_norm": 0.494140625, "learning_rate": 0.001971885956901786, "loss": 0.2454, "step": 8580 }, { "epoch": 0.015216595344416873, "grad_norm": 0.4296875, "learning_rate": 0.0019718711948685007, "loss": 0.2786, "step": 8582 }, { "epoch": 0.015220141509726687, "grad_norm": 0.392578125, "learning_rate": 0.0019718564290221578, "loss": 0.2791, "step": 8584 }, { "epoch": 0.015223687675036504, "grad_norm": 0.5234375, "learning_rate": 0.001971841659362821, "loss": 0.2626, "step": 8586 }, { "epoch": 0.015227233840346318, "grad_norm": 0.34375, "learning_rate": 0.0019718268858905557, "loss": 0.1955, "step": 8588 }, { "epoch": 0.015230780005656133, "grad_norm": 0.58203125, "learning_rate": 0.001971812108605425, "loss": 0.2952, "step": 8590 }, { "epoch": 0.01523432617096595, "grad_norm": 0.66796875, "learning_rate": 0.001971797327507495, "loss": 0.2773, "step": 8592 }, { "epoch": 0.015237872336275764, "grad_norm": 0.5390625, "learning_rate": 0.0019717825425968304, "loss": 0.2793, "step": 8594 }, { "epoch": 0.01524141850158558, "grad_norm": 0.90625, "learning_rate": 0.001971767753873495, "loss": 0.2537, "step": 8596 }, { "epoch": 0.015244964666895395, "grad_norm": 0.474609375, "learning_rate": 0.0019717529613375536, "loss": 0.2958, "step": 8598 }, { "epoch": 0.015248510832205209, "grad_norm": 1.7578125, "learning_rate": 0.0019717381649890712, "loss": 0.3074, "step": 8600 }, { "epoch": 0.015252056997515025, "grad_norm": 0.2890625, "learning_rate": 0.0019717233648281125, "loss": 0.3694, "step": 8602 }, { "epoch": 0.01525560316282484, "grad_norm": 0.4296875, "learning_rate": 0.0019717085608547424, "loss": 0.2201, "step": 8604 }, { "epoch": 0.015259149328134654, "grad_norm": 0.265625, "learning_rate": 0.001971693753069025, "loss": 0.2749, "step": 8606 }, { "epoch": 0.01526269549344447, "grad_norm": 0.58203125, "learning_rate": 0.001971678941471026, "loss": 0.2294, "step": 8608 }, { "epoch": 0.015266241658754285, "grad_norm": 0.59375, "learning_rate": 0.0019716641260608086, "loss": 0.2263, "step": 8610 }, { "epoch": 0.0152697878240641, "grad_norm": 0.412109375, "learning_rate": 0.0019716493068384394, "loss": 0.2387, "step": 8612 }, { "epoch": 0.015273333989373916, "grad_norm": 0.326171875, "learning_rate": 0.0019716344838039824, "loss": 0.2777, "step": 8614 }, { "epoch": 0.01527688015468373, "grad_norm": 4.28125, "learning_rate": 0.0019716196569575027, "loss": 0.3567, "step": 8616 }, { "epoch": 0.015280426319993545, "grad_norm": 1.0234375, "learning_rate": 0.001971604826299065, "loss": 0.3221, "step": 8618 }, { "epoch": 0.015283972485303362, "grad_norm": 0.4296875, "learning_rate": 0.001971589991828733, "loss": 0.1985, "step": 8620 }, { "epoch": 0.015287518650613176, "grad_norm": 0.86328125, "learning_rate": 0.001971575153546574, "loss": 0.2859, "step": 8622 }, { "epoch": 0.01529106481592299, "grad_norm": 0.490234375, "learning_rate": 0.001971560311452651, "loss": 0.254, "step": 8624 }, { "epoch": 0.015294610981232807, "grad_norm": 1.828125, "learning_rate": 0.001971545465547029, "loss": 0.343, "step": 8626 }, { "epoch": 0.015298157146542622, "grad_norm": 0.49609375, "learning_rate": 0.001971530615829774, "loss": 0.3985, "step": 8628 }, { "epoch": 0.015301703311852438, "grad_norm": 0.3125, "learning_rate": 0.0019715157623009503, "loss": 0.2491, "step": 8630 }, { "epoch": 0.015305249477162252, "grad_norm": 0.353515625, "learning_rate": 0.0019715009049606227, "loss": 0.2225, "step": 8632 }, { "epoch": 0.015308795642472067, "grad_norm": 0.353515625, "learning_rate": 0.001971486043808856, "loss": 0.2721, "step": 8634 }, { "epoch": 0.015312341807781883, "grad_norm": 0.9375, "learning_rate": 0.001971471178845716, "loss": 0.2457, "step": 8636 }, { "epoch": 0.015315887973091698, "grad_norm": 0.369140625, "learning_rate": 0.001971456310071267, "loss": 0.2807, "step": 8638 }, { "epoch": 0.015319434138401512, "grad_norm": 0.439453125, "learning_rate": 0.001971441437485575, "loss": 0.2984, "step": 8640 }, { "epoch": 0.015322980303711329, "grad_norm": 0.296875, "learning_rate": 0.001971426561088704, "loss": 0.246, "step": 8642 }, { "epoch": 0.015326526469021143, "grad_norm": 0.5625, "learning_rate": 0.001971411680880719, "loss": 0.2864, "step": 8644 }, { "epoch": 0.015330072634330958, "grad_norm": 1.734375, "learning_rate": 0.001971396796861686, "loss": 0.2798, "step": 8646 }, { "epoch": 0.015333618799640774, "grad_norm": 0.4765625, "learning_rate": 0.0019713819090316693, "loss": 0.2598, "step": 8648 }, { "epoch": 0.015337164964950589, "grad_norm": 0.326171875, "learning_rate": 0.001971367017390734, "loss": 0.2185, "step": 8650 }, { "epoch": 0.015340711130260403, "grad_norm": 0.2265625, "learning_rate": 0.001971352121938946, "loss": 0.2564, "step": 8652 }, { "epoch": 0.01534425729557022, "grad_norm": 0.26953125, "learning_rate": 0.00197133722267637, "loss": 0.2426, "step": 8654 }, { "epoch": 0.015347803460880034, "grad_norm": 0.384765625, "learning_rate": 0.0019713223196030707, "loss": 0.2428, "step": 8656 }, { "epoch": 0.015351349626189848, "grad_norm": 1.2578125, "learning_rate": 0.001971307412719114, "loss": 0.5143, "step": 8658 }, { "epoch": 0.015354895791499665, "grad_norm": 0.6640625, "learning_rate": 0.0019712925020245646, "loss": 0.2882, "step": 8660 }, { "epoch": 0.01535844195680948, "grad_norm": 0.294921875, "learning_rate": 0.001971277587519488, "loss": 0.255, "step": 8662 }, { "epoch": 0.015361988122119296, "grad_norm": 0.53125, "learning_rate": 0.0019712626692039493, "loss": 0.2761, "step": 8664 }, { "epoch": 0.01536553428742911, "grad_norm": 0.314453125, "learning_rate": 0.001971247747078014, "loss": 0.2327, "step": 8666 }, { "epoch": 0.015369080452738925, "grad_norm": 0.345703125, "learning_rate": 0.001971232821141747, "loss": 0.2087, "step": 8668 }, { "epoch": 0.015372626618048741, "grad_norm": 0.36328125, "learning_rate": 0.0019712178913952137, "loss": 0.2459, "step": 8670 }, { "epoch": 0.015376172783358556, "grad_norm": 0.2236328125, "learning_rate": 0.0019712029578384796, "loss": 0.1898, "step": 8672 }, { "epoch": 0.01537971894866837, "grad_norm": 0.365234375, "learning_rate": 0.0019711880204716092, "loss": 0.2762, "step": 8674 }, { "epoch": 0.015383265113978186, "grad_norm": 0.416015625, "learning_rate": 0.001971173079294669, "loss": 0.3124, "step": 8676 }, { "epoch": 0.015386811279288001, "grad_norm": 0.50390625, "learning_rate": 0.0019711581343077236, "loss": 0.2829, "step": 8678 }, { "epoch": 0.015390357444597816, "grad_norm": 0.67578125, "learning_rate": 0.0019711431855108387, "loss": 0.2668, "step": 8680 }, { "epoch": 0.015393903609907632, "grad_norm": 0.8984375, "learning_rate": 0.001971128232904079, "loss": 0.2707, "step": 8682 }, { "epoch": 0.015397449775217446, "grad_norm": 1.8984375, "learning_rate": 0.001971113276487511, "loss": 0.3612, "step": 8684 }, { "epoch": 0.015400995940527261, "grad_norm": 0.76171875, "learning_rate": 0.001971098316261199, "loss": 0.2707, "step": 8686 }, { "epoch": 0.015404542105837077, "grad_norm": 0.3828125, "learning_rate": 0.0019710833522252097, "loss": 0.2478, "step": 8688 }, { "epoch": 0.015408088271146892, "grad_norm": 0.6953125, "learning_rate": 0.0019710683843796074, "loss": 0.2291, "step": 8690 }, { "epoch": 0.015411634436456706, "grad_norm": 0.703125, "learning_rate": 0.0019710534127244583, "loss": 0.2109, "step": 8692 }, { "epoch": 0.015415180601766523, "grad_norm": 0.47265625, "learning_rate": 0.001971038437259827, "loss": 0.231, "step": 8694 }, { "epoch": 0.015418726767076337, "grad_norm": 1.1640625, "learning_rate": 0.0019710234579857796, "loss": 0.3979, "step": 8696 }, { "epoch": 0.015422272932386153, "grad_norm": 0.640625, "learning_rate": 0.001971008474902382, "loss": 0.2348, "step": 8698 }, { "epoch": 0.015425819097695968, "grad_norm": 0.310546875, "learning_rate": 0.0019709934880096985, "loss": 0.2488, "step": 8700 }, { "epoch": 0.015429365263005783, "grad_norm": 0.26953125, "learning_rate": 0.001970978497307796, "loss": 0.2801, "step": 8702 }, { "epoch": 0.015432911428315599, "grad_norm": 0.462890625, "learning_rate": 0.0019709635027967396, "loss": 0.2071, "step": 8704 }, { "epoch": 0.015436457593625413, "grad_norm": 0.625, "learning_rate": 0.0019709485044765943, "loss": 0.2892, "step": 8706 }, { "epoch": 0.015440003758935228, "grad_norm": 0.28125, "learning_rate": 0.0019709335023474265, "loss": 0.1877, "step": 8708 }, { "epoch": 0.015443549924245044, "grad_norm": 1.875, "learning_rate": 0.0019709184964093016, "loss": 0.4012, "step": 8710 }, { "epoch": 0.015447096089554859, "grad_norm": 0.7421875, "learning_rate": 0.0019709034866622847, "loss": 0.2192, "step": 8712 }, { "epoch": 0.015450642254864673, "grad_norm": 1.3984375, "learning_rate": 0.0019708884731064417, "loss": 0.4603, "step": 8714 }, { "epoch": 0.01545418842017449, "grad_norm": 0.2373046875, "learning_rate": 0.0019708734557418385, "loss": 0.2656, "step": 8716 }, { "epoch": 0.015457734585484304, "grad_norm": 0.5234375, "learning_rate": 0.001970858434568541, "loss": 0.2632, "step": 8718 }, { "epoch": 0.015461280750794119, "grad_norm": 0.59375, "learning_rate": 0.0019708434095866145, "loss": 0.213, "step": 8720 }, { "epoch": 0.015464826916103935, "grad_norm": 1.3203125, "learning_rate": 0.0019708283807961247, "loss": 0.2399, "step": 8722 }, { "epoch": 0.01546837308141375, "grad_norm": 0.453125, "learning_rate": 0.001970813348197137, "loss": 0.6017, "step": 8724 }, { "epoch": 0.015471919246723564, "grad_norm": 0.73828125, "learning_rate": 0.0019707983117897183, "loss": 0.2586, "step": 8726 }, { "epoch": 0.01547546541203338, "grad_norm": 0.8359375, "learning_rate": 0.0019707832715739333, "loss": 0.4973, "step": 8728 }, { "epoch": 0.015479011577343195, "grad_norm": 1.2265625, "learning_rate": 0.001970768227549848, "loss": 0.3071, "step": 8730 }, { "epoch": 0.015482557742653011, "grad_norm": 0.412109375, "learning_rate": 0.001970753179717528, "loss": 0.2667, "step": 8732 }, { "epoch": 0.015486103907962826, "grad_norm": 0.443359375, "learning_rate": 0.0019707381280770395, "loss": 0.2113, "step": 8734 }, { "epoch": 0.01548965007327264, "grad_norm": 0.9453125, "learning_rate": 0.0019707230726284486, "loss": 0.2516, "step": 8736 }, { "epoch": 0.015493196238582457, "grad_norm": 0.287109375, "learning_rate": 0.0019707080133718204, "loss": 0.2966, "step": 8738 }, { "epoch": 0.015496742403892271, "grad_norm": 0.458984375, "learning_rate": 0.0019706929503072214, "loss": 0.2895, "step": 8740 }, { "epoch": 0.015500288569202086, "grad_norm": 0.369140625, "learning_rate": 0.001970677883434717, "loss": 0.2919, "step": 8742 }, { "epoch": 0.015503834734511902, "grad_norm": 1.828125, "learning_rate": 0.001970662812754373, "loss": 0.365, "step": 8744 }, { "epoch": 0.015507380899821717, "grad_norm": 0.6015625, "learning_rate": 0.001970647738266256, "loss": 0.1899, "step": 8746 }, { "epoch": 0.015510927065131531, "grad_norm": 0.7734375, "learning_rate": 0.0019706326599704315, "loss": 0.253, "step": 8748 }, { "epoch": 0.015514473230441347, "grad_norm": 0.33984375, "learning_rate": 0.0019706175778669652, "loss": 0.1949, "step": 8750 }, { "epoch": 0.015518019395751162, "grad_norm": 0.48046875, "learning_rate": 0.0019706024919559236, "loss": 0.2643, "step": 8752 }, { "epoch": 0.015521565561060977, "grad_norm": 0.484375, "learning_rate": 0.001970587402237372, "loss": 0.2585, "step": 8754 }, { "epoch": 0.015525111726370793, "grad_norm": 1.0, "learning_rate": 0.0019705723087113775, "loss": 0.3023, "step": 8756 }, { "epoch": 0.015528657891680607, "grad_norm": 0.37109375, "learning_rate": 0.001970557211378005, "loss": 0.2703, "step": 8758 }, { "epoch": 0.015532204056990422, "grad_norm": 0.359375, "learning_rate": 0.001970542110237321, "loss": 0.3066, "step": 8760 }, { "epoch": 0.015535750222300238, "grad_norm": 2.828125, "learning_rate": 0.0019705270052893914, "loss": 0.3463, "step": 8762 }, { "epoch": 0.015539296387610053, "grad_norm": 1.2734375, "learning_rate": 0.0019705118965342825, "loss": 0.3042, "step": 8764 }, { "epoch": 0.015542842552919869, "grad_norm": 2.359375, "learning_rate": 0.00197049678397206, "loss": 0.3152, "step": 8766 }, { "epoch": 0.015546388718229684, "grad_norm": 1.6640625, "learning_rate": 0.0019704816676027904, "loss": 0.4442, "step": 8768 }, { "epoch": 0.015549934883539498, "grad_norm": 3.171875, "learning_rate": 0.001970466547426539, "loss": 0.45, "step": 8770 }, { "epoch": 0.015553481048849314, "grad_norm": 1.703125, "learning_rate": 0.0019704514234433735, "loss": 0.2297, "step": 8772 }, { "epoch": 0.015557027214159129, "grad_norm": 1.28125, "learning_rate": 0.001970436295653359, "loss": 0.2423, "step": 8774 }, { "epoch": 0.015560573379468944, "grad_norm": 1.734375, "learning_rate": 0.0019704211640565613, "loss": 0.2454, "step": 8776 }, { "epoch": 0.01556411954477876, "grad_norm": 1.0234375, "learning_rate": 0.001970406028653047, "loss": 0.2548, "step": 8778 }, { "epoch": 0.015567665710088574, "grad_norm": 0.2490234375, "learning_rate": 0.0019703908894428824, "loss": 0.3037, "step": 8780 }, { "epoch": 0.015571211875398389, "grad_norm": 1.15625, "learning_rate": 0.001970375746426134, "loss": 0.2951, "step": 8782 }, { "epoch": 0.015574758040708205, "grad_norm": 0.40234375, "learning_rate": 0.001970360599602867, "loss": 0.2847, "step": 8784 }, { "epoch": 0.01557830420601802, "grad_norm": 0.431640625, "learning_rate": 0.001970345448973149, "loss": 0.1947, "step": 8786 }, { "epoch": 0.015581850371327834, "grad_norm": 0.42578125, "learning_rate": 0.001970330294537045, "loss": 0.2164, "step": 8788 }, { "epoch": 0.01558539653663765, "grad_norm": 0.6875, "learning_rate": 0.001970315136294622, "loss": 0.4597, "step": 8790 }, { "epoch": 0.015588942701947465, "grad_norm": 0.47265625, "learning_rate": 0.0019702999742459465, "loss": 0.2569, "step": 8792 }, { "epoch": 0.01559248886725728, "grad_norm": 1.8125, "learning_rate": 0.001970284808391084, "loss": 0.3938, "step": 8794 }, { "epoch": 0.015596035032567096, "grad_norm": 0.287109375, "learning_rate": 0.0019702696387301015, "loss": 0.1909, "step": 8796 }, { "epoch": 0.01559958119787691, "grad_norm": 1.015625, "learning_rate": 0.0019702544652630653, "loss": 0.2431, "step": 8798 }, { "epoch": 0.015603127363186727, "grad_norm": 0.40234375, "learning_rate": 0.001970239287990041, "loss": 0.2457, "step": 8800 }, { "epoch": 0.015606673528496541, "grad_norm": 1.4140625, "learning_rate": 0.001970224106911096, "loss": 0.2552, "step": 8802 }, { "epoch": 0.015610219693806356, "grad_norm": 0.4765625, "learning_rate": 0.001970208922026296, "loss": 0.2023, "step": 8804 }, { "epoch": 0.015613765859116172, "grad_norm": 0.52734375, "learning_rate": 0.001970193733335708, "loss": 0.2378, "step": 8806 }, { "epoch": 0.015617312024425987, "grad_norm": 0.55859375, "learning_rate": 0.001970178540839398, "loss": 0.3996, "step": 8808 }, { "epoch": 0.015620858189735801, "grad_norm": 0.50390625, "learning_rate": 0.0019701633445374325, "loss": 0.1683, "step": 8810 }, { "epoch": 0.015624404355045618, "grad_norm": 1.1875, "learning_rate": 0.0019701481444298775, "loss": 0.3738, "step": 8812 }, { "epoch": 0.01562795052035543, "grad_norm": 0.73828125, "learning_rate": 0.0019701329405168, "loss": 0.521, "step": 8814 }, { "epoch": 0.01563149668566525, "grad_norm": 0.7265625, "learning_rate": 0.001970117732798267, "loss": 0.4277, "step": 8816 }, { "epoch": 0.015635042850975063, "grad_norm": 0.314453125, "learning_rate": 0.001970102521274344, "loss": 0.2433, "step": 8818 }, { "epoch": 0.015638589016284878, "grad_norm": 0.322265625, "learning_rate": 0.0019700873059450984, "loss": 0.2382, "step": 8820 }, { "epoch": 0.015642135181594692, "grad_norm": 0.2431640625, "learning_rate": 0.0019700720868105963, "loss": 0.2892, "step": 8822 }, { "epoch": 0.015645681346904507, "grad_norm": 0.392578125, "learning_rate": 0.001970056863870904, "loss": 0.2265, "step": 8824 }, { "epoch": 0.015649227512214325, "grad_norm": 0.357421875, "learning_rate": 0.0019700416371260885, "loss": 0.2546, "step": 8826 }, { "epoch": 0.01565277367752414, "grad_norm": 2.359375, "learning_rate": 0.001970026406576216, "loss": 0.3298, "step": 8828 }, { "epoch": 0.015656319842833954, "grad_norm": 0.42578125, "learning_rate": 0.0019700111722213537, "loss": 0.304, "step": 8830 }, { "epoch": 0.01565986600814377, "grad_norm": 0.62890625, "learning_rate": 0.001969995934061568, "loss": 0.2647, "step": 8832 }, { "epoch": 0.015663412173453583, "grad_norm": 0.291015625, "learning_rate": 0.0019699806920969254, "loss": 0.2051, "step": 8834 }, { "epoch": 0.015666958338763397, "grad_norm": 1.1171875, "learning_rate": 0.0019699654463274925, "loss": 0.3798, "step": 8836 }, { "epoch": 0.015670504504073215, "grad_norm": 3.078125, "learning_rate": 0.0019699501967533357, "loss": 0.2796, "step": 8838 }, { "epoch": 0.01567405066938303, "grad_norm": 3.953125, "learning_rate": 0.0019699349433745226, "loss": 0.3843, "step": 8840 }, { "epoch": 0.015677596834692845, "grad_norm": 0.447265625, "learning_rate": 0.001969919686191119, "loss": 0.3353, "step": 8842 }, { "epoch": 0.01568114300000266, "grad_norm": 0.58984375, "learning_rate": 0.0019699044252031923, "loss": 0.193, "step": 8844 }, { "epoch": 0.015684689165312474, "grad_norm": 0.77734375, "learning_rate": 0.001969889160410809, "loss": 0.4398, "step": 8846 }, { "epoch": 0.015688235330622288, "grad_norm": 0.546875, "learning_rate": 0.0019698738918140354, "loss": 0.2342, "step": 8848 }, { "epoch": 0.015691781495932106, "grad_norm": 0.84765625, "learning_rate": 0.001969858619412939, "loss": 0.3275, "step": 8850 }, { "epoch": 0.01569532766124192, "grad_norm": 0.3828125, "learning_rate": 0.001969843343207586, "loss": 0.2707, "step": 8852 }, { "epoch": 0.015698873826551735, "grad_norm": 0.75390625, "learning_rate": 0.001969828063198044, "loss": 0.2012, "step": 8854 }, { "epoch": 0.01570241999186155, "grad_norm": 0.54296875, "learning_rate": 0.0019698127793843787, "loss": 0.2342, "step": 8856 }, { "epoch": 0.015705966157171364, "grad_norm": 0.6875, "learning_rate": 0.001969797491766658, "loss": 0.2103, "step": 8858 }, { "epoch": 0.015709512322481183, "grad_norm": 0.5546875, "learning_rate": 0.001969782200344948, "loss": 0.2587, "step": 8860 }, { "epoch": 0.015713058487790997, "grad_norm": 0.421875, "learning_rate": 0.0019697669051193163, "loss": 0.1839, "step": 8862 }, { "epoch": 0.01571660465310081, "grad_norm": 0.419921875, "learning_rate": 0.001969751606089829, "loss": 0.2522, "step": 8864 }, { "epoch": 0.015720150818410626, "grad_norm": 0.60546875, "learning_rate": 0.0019697363032565533, "loss": 0.3043, "step": 8866 }, { "epoch": 0.01572369698372044, "grad_norm": 1.171875, "learning_rate": 0.0019697209966195563, "loss": 0.3253, "step": 8868 }, { "epoch": 0.015727243149030255, "grad_norm": 0.376953125, "learning_rate": 0.001969705686178905, "loss": 0.3684, "step": 8870 }, { "epoch": 0.015730789314340073, "grad_norm": 0.2197265625, "learning_rate": 0.001969690371934666, "loss": 0.2541, "step": 8872 }, { "epoch": 0.015734335479649888, "grad_norm": 0.365234375, "learning_rate": 0.001969675053886907, "loss": 0.2577, "step": 8874 }, { "epoch": 0.015737881644959702, "grad_norm": 0.70703125, "learning_rate": 0.001969659732035694, "loss": 0.3146, "step": 8876 }, { "epoch": 0.015741427810269517, "grad_norm": 0.427734375, "learning_rate": 0.0019696444063810946, "loss": 0.2843, "step": 8878 }, { "epoch": 0.01574497397557933, "grad_norm": 0.365234375, "learning_rate": 0.0019696290769231754, "loss": 0.1831, "step": 8880 }, { "epoch": 0.015748520140889146, "grad_norm": 1.5234375, "learning_rate": 0.001969613743662004, "loss": 0.2378, "step": 8882 }, { "epoch": 0.015752066306198964, "grad_norm": 1.84375, "learning_rate": 0.0019695984065976474, "loss": 0.4394, "step": 8884 }, { "epoch": 0.01575561247150878, "grad_norm": 0.6015625, "learning_rate": 0.0019695830657301726, "loss": 0.2568, "step": 8886 }, { "epoch": 0.015759158636818593, "grad_norm": 0.55859375, "learning_rate": 0.0019695677210596466, "loss": 0.226, "step": 8888 }, { "epoch": 0.015762704802128408, "grad_norm": 0.2373046875, "learning_rate": 0.0019695523725861363, "loss": 0.1982, "step": 8890 }, { "epoch": 0.015766250967438222, "grad_norm": 0.35546875, "learning_rate": 0.0019695370203097087, "loss": 0.431, "step": 8892 }, { "epoch": 0.01576979713274804, "grad_norm": 0.37890625, "learning_rate": 0.0019695216642304316, "loss": 0.2, "step": 8894 }, { "epoch": 0.015773343298057855, "grad_norm": 0.83984375, "learning_rate": 0.001969506304348372, "loss": 0.2671, "step": 8896 }, { "epoch": 0.01577688946336767, "grad_norm": 0.404296875, "learning_rate": 0.0019694909406635965, "loss": 0.3697, "step": 8898 }, { "epoch": 0.015780435628677484, "grad_norm": 0.318359375, "learning_rate": 0.001969475573176173, "loss": 0.2599, "step": 8900 }, { "epoch": 0.0157839817939873, "grad_norm": 0.447265625, "learning_rate": 0.001969460201886168, "loss": 0.2766, "step": 8902 }, { "epoch": 0.015787527959297113, "grad_norm": 0.486328125, "learning_rate": 0.0019694448267936495, "loss": 0.2613, "step": 8904 }, { "epoch": 0.01579107412460693, "grad_norm": 0.37890625, "learning_rate": 0.0019694294478986843, "loss": 0.2224, "step": 8906 }, { "epoch": 0.015794620289916746, "grad_norm": 0.70703125, "learning_rate": 0.0019694140652013396, "loss": 0.2584, "step": 8908 }, { "epoch": 0.01579816645522656, "grad_norm": 0.66796875, "learning_rate": 0.0019693986787016828, "loss": 0.2002, "step": 8910 }, { "epoch": 0.015801712620536375, "grad_norm": 0.318359375, "learning_rate": 0.0019693832883997814, "loss": 0.1634, "step": 8912 }, { "epoch": 0.01580525878584619, "grad_norm": 0.76171875, "learning_rate": 0.0019693678942957022, "loss": 0.2095, "step": 8914 }, { "epoch": 0.015808804951156004, "grad_norm": 0.56640625, "learning_rate": 0.001969352496389513, "loss": 0.2213, "step": 8916 }, { "epoch": 0.015812351116465822, "grad_norm": 0.28125, "learning_rate": 0.001969337094681281, "loss": 0.183, "step": 8918 }, { "epoch": 0.015815897281775636, "grad_norm": 0.416015625, "learning_rate": 0.001969321689171073, "loss": 0.2282, "step": 8920 }, { "epoch": 0.01581944344708545, "grad_norm": 0.73046875, "learning_rate": 0.0019693062798589577, "loss": 0.3324, "step": 8922 }, { "epoch": 0.015822989612395266, "grad_norm": 0.5078125, "learning_rate": 0.0019692908667450016, "loss": 0.1827, "step": 8924 }, { "epoch": 0.01582653577770508, "grad_norm": 0.494140625, "learning_rate": 0.0019692754498292714, "loss": 0.2467, "step": 8926 }, { "epoch": 0.015830081943014898, "grad_norm": 0.671875, "learning_rate": 0.001969260029111836, "loss": 0.2548, "step": 8928 }, { "epoch": 0.015833628108324713, "grad_norm": 0.890625, "learning_rate": 0.001969244604592762, "loss": 0.2927, "step": 8930 }, { "epoch": 0.015837174273634527, "grad_norm": 0.23046875, "learning_rate": 0.001969229176272117, "loss": 0.2603, "step": 8932 }, { "epoch": 0.015840720438944342, "grad_norm": 0.2119140625, "learning_rate": 0.0019692137441499682, "loss": 0.2697, "step": 8934 }, { "epoch": 0.015844266604254156, "grad_norm": 0.419921875, "learning_rate": 0.0019691983082263838, "loss": 0.2379, "step": 8936 }, { "epoch": 0.01584781276956397, "grad_norm": 0.8203125, "learning_rate": 0.001969182868501431, "loss": 0.2241, "step": 8938 }, { "epoch": 0.01585135893487379, "grad_norm": 0.376953125, "learning_rate": 0.0019691674249751765, "loss": 0.2037, "step": 8940 }, { "epoch": 0.015854905100183603, "grad_norm": 0.63671875, "learning_rate": 0.0019691519776476895, "loss": 0.2639, "step": 8942 }, { "epoch": 0.015858451265493418, "grad_norm": 0.33984375, "learning_rate": 0.0019691365265190356, "loss": 0.191, "step": 8944 }, { "epoch": 0.015861997430803233, "grad_norm": 0.421875, "learning_rate": 0.001969121071589284, "loss": 0.2542, "step": 8946 }, { "epoch": 0.015865543596113047, "grad_norm": 0.73046875, "learning_rate": 0.001969105612858502, "loss": 0.2814, "step": 8948 }, { "epoch": 0.01586908976142286, "grad_norm": 0.765625, "learning_rate": 0.0019690901503267564, "loss": 0.2825, "step": 8950 }, { "epoch": 0.01587263592673268, "grad_norm": 0.76953125, "learning_rate": 0.001969074683994115, "loss": 0.3389, "step": 8952 }, { "epoch": 0.015876182092042494, "grad_norm": 0.40234375, "learning_rate": 0.0019690592138606466, "loss": 0.2813, "step": 8954 }, { "epoch": 0.01587972825735231, "grad_norm": 0.33203125, "learning_rate": 0.001969043739926417, "loss": 0.2227, "step": 8956 }, { "epoch": 0.015883274422662123, "grad_norm": 0.3203125, "learning_rate": 0.0019690282621914958, "loss": 0.3896, "step": 8958 }, { "epoch": 0.015886820587971938, "grad_norm": 2.5, "learning_rate": 0.0019690127806559496, "loss": 0.3129, "step": 8960 }, { "epoch": 0.015890366753281756, "grad_norm": 0.458984375, "learning_rate": 0.001968997295319846, "loss": 0.2696, "step": 8962 }, { "epoch": 0.01589391291859157, "grad_norm": 0.65625, "learning_rate": 0.0019689818061832533, "loss": 0.2344, "step": 8964 }, { "epoch": 0.015897459083901385, "grad_norm": 0.40234375, "learning_rate": 0.0019689663132462384, "loss": 0.2639, "step": 8966 }, { "epoch": 0.0159010052492112, "grad_norm": 0.98046875, "learning_rate": 0.00196895081650887, "loss": 0.259, "step": 8968 }, { "epoch": 0.015904551414521014, "grad_norm": 0.6640625, "learning_rate": 0.0019689353159712156, "loss": 0.1795, "step": 8970 }, { "epoch": 0.01590809757983083, "grad_norm": 0.671875, "learning_rate": 0.0019689198116333425, "loss": 0.2184, "step": 8972 }, { "epoch": 0.015911643745140647, "grad_norm": 1.03125, "learning_rate": 0.001968904303495319, "loss": 0.2089, "step": 8974 }, { "epoch": 0.01591518991045046, "grad_norm": 0.73046875, "learning_rate": 0.0019688887915572132, "loss": 0.2451, "step": 8976 }, { "epoch": 0.015918736075760276, "grad_norm": 0.4375, "learning_rate": 0.001968873275819092, "loss": 0.2544, "step": 8978 }, { "epoch": 0.01592228224107009, "grad_norm": 6.65625, "learning_rate": 0.001968857756281024, "loss": 0.4497, "step": 8980 }, { "epoch": 0.015925828406379905, "grad_norm": 0.546875, "learning_rate": 0.001968842232943077, "loss": 0.1765, "step": 8982 }, { "epoch": 0.01592937457168972, "grad_norm": 0.359375, "learning_rate": 0.0019688267058053186, "loss": 0.2274, "step": 8984 }, { "epoch": 0.015932920736999538, "grad_norm": 0.9296875, "learning_rate": 0.0019688111748678164, "loss": 0.2432, "step": 8986 }, { "epoch": 0.015936466902309352, "grad_norm": 4.28125, "learning_rate": 0.0019687956401306396, "loss": 0.2341, "step": 8988 }, { "epoch": 0.015940013067619167, "grad_norm": 4.1875, "learning_rate": 0.0019687801015938547, "loss": 0.3296, "step": 8990 }, { "epoch": 0.01594355923292898, "grad_norm": 0.470703125, "learning_rate": 0.001968764559257531, "loss": 0.213, "step": 8992 }, { "epoch": 0.015947105398238796, "grad_norm": 0.87890625, "learning_rate": 0.001968749013121735, "loss": 0.2537, "step": 8994 }, { "epoch": 0.015950651563548614, "grad_norm": 0.41796875, "learning_rate": 0.001968733463186536, "loss": 0.21, "step": 8996 }, { "epoch": 0.01595419772885843, "grad_norm": 0.400390625, "learning_rate": 0.001968717909452001, "loss": 0.2433, "step": 8998 }, { "epoch": 0.015957743894168243, "grad_norm": 0.515625, "learning_rate": 0.0019687023519181987, "loss": 0.3763, "step": 9000 }, { "epoch": 0.015961290059478057, "grad_norm": 0.6015625, "learning_rate": 0.001968686790585197, "loss": 0.2646, "step": 9002 }, { "epoch": 0.015964836224787872, "grad_norm": 0.59765625, "learning_rate": 0.0019686712254530637, "loss": 0.2571, "step": 9004 }, { "epoch": 0.015968382390097687, "grad_norm": 2.140625, "learning_rate": 0.001968655656521867, "loss": 0.4143, "step": 9006 }, { "epoch": 0.015971928555407505, "grad_norm": 0.28515625, "learning_rate": 0.0019686400837916752, "loss": 0.2215, "step": 9008 }, { "epoch": 0.01597547472071732, "grad_norm": 0.41015625, "learning_rate": 0.001968624507262556, "loss": 0.2064, "step": 9010 }, { "epoch": 0.015979020886027134, "grad_norm": 0.51953125, "learning_rate": 0.0019686089269345787, "loss": 0.2513, "step": 9012 }, { "epoch": 0.015982567051336948, "grad_norm": 0.498046875, "learning_rate": 0.0019685933428078097, "loss": 0.276, "step": 9014 }, { "epoch": 0.015986113216646763, "grad_norm": 0.37109375, "learning_rate": 0.0019685777548823177, "loss": 0.1765, "step": 9016 }, { "epoch": 0.015989659381956577, "grad_norm": 0.30859375, "learning_rate": 0.0019685621631581715, "loss": 0.1829, "step": 9018 }, { "epoch": 0.015993205547266395, "grad_norm": 2.296875, "learning_rate": 0.001968546567635439, "loss": 0.3764, "step": 9020 }, { "epoch": 0.01599675171257621, "grad_norm": 0.65234375, "learning_rate": 0.001968530968314188, "loss": 0.2513, "step": 9022 }, { "epoch": 0.016000297877886024, "grad_norm": 0.5, "learning_rate": 0.001968515365194487, "loss": 0.3134, "step": 9024 }, { "epoch": 0.01600384404319584, "grad_norm": 0.291015625, "learning_rate": 0.0019684997582764046, "loss": 0.1954, "step": 9026 }, { "epoch": 0.016007390208505654, "grad_norm": 0.29296875, "learning_rate": 0.001968484147560009, "loss": 0.1838, "step": 9028 }, { "epoch": 0.01601093637381547, "grad_norm": 0.498046875, "learning_rate": 0.001968468533045368, "loss": 0.347, "step": 9030 }, { "epoch": 0.016014482539125286, "grad_norm": 0.390625, "learning_rate": 0.0019684529147325496, "loss": 0.1951, "step": 9032 }, { "epoch": 0.0160180287044351, "grad_norm": 0.375, "learning_rate": 0.001968437292621623, "loss": 0.419, "step": 9034 }, { "epoch": 0.016021574869744915, "grad_norm": 2.125, "learning_rate": 0.0019684216667126566, "loss": 0.2516, "step": 9036 }, { "epoch": 0.01602512103505473, "grad_norm": 0.2373046875, "learning_rate": 0.0019684060370057177, "loss": 0.2799, "step": 9038 }, { "epoch": 0.016028667200364544, "grad_norm": 0.419921875, "learning_rate": 0.001968390403500875, "loss": 0.2165, "step": 9040 }, { "epoch": 0.016032213365674362, "grad_norm": 0.3046875, "learning_rate": 0.0019683747661981975, "loss": 0.2137, "step": 9042 }, { "epoch": 0.016035759530984177, "grad_norm": 0.7578125, "learning_rate": 0.001968359125097753, "loss": 0.3507, "step": 9044 }, { "epoch": 0.01603930569629399, "grad_norm": 0.462890625, "learning_rate": 0.00196834348019961, "loss": 0.2477, "step": 9046 }, { "epoch": 0.016042851861603806, "grad_norm": 0.7734375, "learning_rate": 0.0019683278315038378, "loss": 0.2222, "step": 9048 }, { "epoch": 0.01604639802691362, "grad_norm": 0.3671875, "learning_rate": 0.0019683121790105033, "loss": 0.2, "step": 9050 }, { "epoch": 0.016049944192223435, "grad_norm": 0.75390625, "learning_rate": 0.0019682965227196757, "loss": 0.2866, "step": 9052 }, { "epoch": 0.016053490357533253, "grad_norm": 0.2353515625, "learning_rate": 0.0019682808626314235, "loss": 0.2183, "step": 9054 }, { "epoch": 0.016057036522843068, "grad_norm": 1.75, "learning_rate": 0.0019682651987458157, "loss": 0.2317, "step": 9056 }, { "epoch": 0.016060582688152882, "grad_norm": 0.953125, "learning_rate": 0.00196824953106292, "loss": 0.2602, "step": 9058 }, { "epoch": 0.016064128853462697, "grad_norm": 0.4765625, "learning_rate": 0.0019682338595828045, "loss": 0.2026, "step": 9060 }, { "epoch": 0.01606767501877251, "grad_norm": 0.306640625, "learning_rate": 0.0019682181843055395, "loss": 0.2367, "step": 9062 }, { "epoch": 0.01607122118408233, "grad_norm": 1.203125, "learning_rate": 0.0019682025052311916, "loss": 0.3423, "step": 9064 }, { "epoch": 0.016074767349392144, "grad_norm": 0.365234375, "learning_rate": 0.001968186822359831, "loss": 0.2734, "step": 9066 }, { "epoch": 0.01607831351470196, "grad_norm": 0.55859375, "learning_rate": 0.001968171135691525, "loss": 0.1992, "step": 9068 }, { "epoch": 0.016081859680011773, "grad_norm": 0.9140625, "learning_rate": 0.001968155445226343, "loss": 0.3029, "step": 9070 }, { "epoch": 0.016085405845321588, "grad_norm": 0.578125, "learning_rate": 0.001968139750964353, "loss": 0.2207, "step": 9072 }, { "epoch": 0.016088952010631402, "grad_norm": 0.6640625, "learning_rate": 0.0019681240529056247, "loss": 0.3986, "step": 9074 }, { "epoch": 0.01609249817594122, "grad_norm": 0.267578125, "learning_rate": 0.0019681083510502254, "loss": 0.5211, "step": 9076 }, { "epoch": 0.016096044341251035, "grad_norm": 0.65625, "learning_rate": 0.0019680926453982243, "loss": 0.2527, "step": 9078 }, { "epoch": 0.01609959050656085, "grad_norm": 0.2734375, "learning_rate": 0.001968076935949691, "loss": 0.204, "step": 9080 }, { "epoch": 0.016103136671870664, "grad_norm": 1.6484375, "learning_rate": 0.001968061222704693, "loss": 0.3984, "step": 9082 }, { "epoch": 0.01610668283718048, "grad_norm": 0.52734375, "learning_rate": 0.0019680455056632993, "loss": 0.2289, "step": 9084 }, { "epoch": 0.016110229002490293, "grad_norm": 2.1875, "learning_rate": 0.001968029784825579, "loss": 0.3513, "step": 9086 }, { "epoch": 0.01611377516780011, "grad_norm": 1.2890625, "learning_rate": 0.0019680140601916005, "loss": 0.2703, "step": 9088 }, { "epoch": 0.016117321333109925, "grad_norm": 0.37890625, "learning_rate": 0.001967998331761433, "loss": 0.229, "step": 9090 }, { "epoch": 0.01612086749841974, "grad_norm": 0.6484375, "learning_rate": 0.0019679825995351447, "loss": 0.3204, "step": 9092 }, { "epoch": 0.016124413663729555, "grad_norm": 0.36328125, "learning_rate": 0.001967966863512805, "loss": 0.224, "step": 9094 }, { "epoch": 0.01612795982903937, "grad_norm": 0.57421875, "learning_rate": 0.0019679511236944816, "loss": 0.2061, "step": 9096 }, { "epoch": 0.016131505994349187, "grad_norm": 0.48828125, "learning_rate": 0.001967935380080245, "loss": 0.2472, "step": 9098 }, { "epoch": 0.016135052159659, "grad_norm": 0.466796875, "learning_rate": 0.0019679196326701626, "loss": 0.2555, "step": 9100 }, { "epoch": 0.016138598324968816, "grad_norm": 0.474609375, "learning_rate": 0.0019679038814643043, "loss": 0.1926, "step": 9102 }, { "epoch": 0.01614214449027863, "grad_norm": 0.38671875, "learning_rate": 0.001967888126462739, "loss": 0.2364, "step": 9104 }, { "epoch": 0.016145690655588445, "grad_norm": 0.35546875, "learning_rate": 0.001967872367665534, "loss": 0.2426, "step": 9106 }, { "epoch": 0.01614923682089826, "grad_norm": 0.90625, "learning_rate": 0.0019678566050727606, "loss": 0.2446, "step": 9108 }, { "epoch": 0.016152782986208078, "grad_norm": 0.455078125, "learning_rate": 0.001967840838684486, "loss": 0.2825, "step": 9110 }, { "epoch": 0.016156329151517893, "grad_norm": 0.515625, "learning_rate": 0.001967825068500779, "loss": 0.2445, "step": 9112 }, { "epoch": 0.016159875316827707, "grad_norm": 1.3046875, "learning_rate": 0.00196780929452171, "loss": 0.2415, "step": 9114 }, { "epoch": 0.01616342148213752, "grad_norm": 0.58203125, "learning_rate": 0.001967793516747347, "loss": 0.3694, "step": 9116 }, { "epoch": 0.016166967647447336, "grad_norm": 0.359375, "learning_rate": 0.0019677777351777594, "loss": 0.1842, "step": 9118 }, { "epoch": 0.01617051381275715, "grad_norm": 0.419921875, "learning_rate": 0.001967761949813016, "loss": 0.3075, "step": 9120 }, { "epoch": 0.01617405997806697, "grad_norm": 0.7734375, "learning_rate": 0.001967746160653186, "loss": 0.2139, "step": 9122 }, { "epoch": 0.016177606143376783, "grad_norm": 0.62890625, "learning_rate": 0.001967730367698338, "loss": 0.281, "step": 9124 }, { "epoch": 0.016181152308686598, "grad_norm": 0.78515625, "learning_rate": 0.0019677145709485417, "loss": 0.2777, "step": 9126 }, { "epoch": 0.016184698473996412, "grad_norm": 0.376953125, "learning_rate": 0.0019676987704038656, "loss": 0.2851, "step": 9128 }, { "epoch": 0.016188244639306227, "grad_norm": 0.61328125, "learning_rate": 0.001967682966064379, "loss": 0.2513, "step": 9130 }, { "epoch": 0.016191790804616045, "grad_norm": 0.50390625, "learning_rate": 0.001967667157930152, "loss": 0.2483, "step": 9132 }, { "epoch": 0.01619533696992586, "grad_norm": 0.88671875, "learning_rate": 0.001967651346001252, "loss": 0.2562, "step": 9134 }, { "epoch": 0.016198883135235674, "grad_norm": 0.85546875, "learning_rate": 0.0019676355302777494, "loss": 0.4384, "step": 9136 }, { "epoch": 0.01620242930054549, "grad_norm": 0.4140625, "learning_rate": 0.001967619710759713, "loss": 0.26, "step": 9138 }, { "epoch": 0.016205975465855303, "grad_norm": 0.365234375, "learning_rate": 0.0019676038874472117, "loss": 0.264, "step": 9140 }, { "epoch": 0.016209521631165118, "grad_norm": 0.6796875, "learning_rate": 0.001967588060340315, "loss": 0.3224, "step": 9142 }, { "epoch": 0.016213067796474936, "grad_norm": 0.248046875, "learning_rate": 0.0019675722294390918, "loss": 0.2915, "step": 9144 }, { "epoch": 0.01621661396178475, "grad_norm": 0.59765625, "learning_rate": 0.0019675563947436124, "loss": 0.2036, "step": 9146 }, { "epoch": 0.016220160127094565, "grad_norm": 1.3125, "learning_rate": 0.0019675405562539446, "loss": 0.2886, "step": 9148 }, { "epoch": 0.01622370629240438, "grad_norm": 0.8671875, "learning_rate": 0.0019675247139701586, "loss": 0.2406, "step": 9150 }, { "epoch": 0.016227252457714194, "grad_norm": 0.58984375, "learning_rate": 0.0019675088678923233, "loss": 0.2157, "step": 9152 }, { "epoch": 0.01623079862302401, "grad_norm": 1.4296875, "learning_rate": 0.001967493018020508, "loss": 0.2927, "step": 9154 }, { "epoch": 0.016234344788333827, "grad_norm": 0.64453125, "learning_rate": 0.001967477164354783, "loss": 0.2413, "step": 9156 }, { "epoch": 0.01623789095364364, "grad_norm": 0.77734375, "learning_rate": 0.0019674613068952156, "loss": 0.2759, "step": 9158 }, { "epoch": 0.016241437118953456, "grad_norm": 0.451171875, "learning_rate": 0.001967445445641877, "loss": 0.2807, "step": 9160 }, { "epoch": 0.01624498328426327, "grad_norm": 0.3984375, "learning_rate": 0.0019674295805948354, "loss": 0.2622, "step": 9162 }, { "epoch": 0.016248529449573085, "grad_norm": 2.921875, "learning_rate": 0.001967413711754161, "loss": 0.2902, "step": 9164 }, { "epoch": 0.016252075614882903, "grad_norm": 0.45703125, "learning_rate": 0.001967397839119923, "loss": 0.2736, "step": 9166 }, { "epoch": 0.016255621780192717, "grad_norm": 1.7265625, "learning_rate": 0.0019673819626921905, "loss": 0.3685, "step": 9168 }, { "epoch": 0.016259167945502532, "grad_norm": 0.57421875, "learning_rate": 0.0019673660824710334, "loss": 0.1939, "step": 9170 }, { "epoch": 0.016262714110812346, "grad_norm": 1.7265625, "learning_rate": 0.0019673501984565204, "loss": 0.3006, "step": 9172 }, { "epoch": 0.01626626027612216, "grad_norm": 1.0625, "learning_rate": 0.001967334310648722, "loss": 0.2636, "step": 9174 }, { "epoch": 0.016269806441431976, "grad_norm": 0.33203125, "learning_rate": 0.0019673184190477067, "loss": 0.3767, "step": 9176 }, { "epoch": 0.016273352606741794, "grad_norm": 1.171875, "learning_rate": 0.0019673025236535448, "loss": 0.2422, "step": 9178 }, { "epoch": 0.016276898772051608, "grad_norm": 0.45703125, "learning_rate": 0.0019672866244663054, "loss": 0.1636, "step": 9180 }, { "epoch": 0.016280444937361423, "grad_norm": 0.32421875, "learning_rate": 0.0019672707214860577, "loss": 0.2745, "step": 9182 }, { "epoch": 0.016283991102671237, "grad_norm": 1.2734375, "learning_rate": 0.001967254814712872, "loss": 0.2408, "step": 9184 }, { "epoch": 0.016287537267981052, "grad_norm": 0.412109375, "learning_rate": 0.0019672389041468174, "loss": 0.2722, "step": 9186 }, { "epoch": 0.016291083433290866, "grad_norm": 0.83984375, "learning_rate": 0.0019672229897879636, "loss": 0.2187, "step": 9188 }, { "epoch": 0.016294629598600684, "grad_norm": 0.40234375, "learning_rate": 0.00196720707163638, "loss": 0.3051, "step": 9190 }, { "epoch": 0.0162981757639105, "grad_norm": 0.3671875, "learning_rate": 0.0019671911496921364, "loss": 0.2302, "step": 9192 }, { "epoch": 0.016301721929220313, "grad_norm": 0.46484375, "learning_rate": 0.0019671752239553025, "loss": 0.3003, "step": 9194 }, { "epoch": 0.016305268094530128, "grad_norm": 1.2265625, "learning_rate": 0.001967159294425948, "loss": 0.3314, "step": 9196 }, { "epoch": 0.016308814259839943, "grad_norm": 1.03125, "learning_rate": 0.0019671433611041424, "loss": 0.2185, "step": 9198 }, { "epoch": 0.01631236042514976, "grad_norm": 0.494140625, "learning_rate": 0.0019671274239899555, "loss": 0.2651, "step": 9200 }, { "epoch": 0.016315906590459575, "grad_norm": 0.52734375, "learning_rate": 0.001967111483083457, "loss": 0.2236, "step": 9202 }, { "epoch": 0.01631945275576939, "grad_norm": 0.87890625, "learning_rate": 0.001967095538384716, "loss": 0.2779, "step": 9204 }, { "epoch": 0.016322998921079204, "grad_norm": 0.39453125, "learning_rate": 0.0019670795898938036, "loss": 0.3884, "step": 9206 }, { "epoch": 0.01632654508638902, "grad_norm": 2.5, "learning_rate": 0.0019670636376107886, "loss": 0.2762, "step": 9208 }, { "epoch": 0.016330091251698833, "grad_norm": 0.53125, "learning_rate": 0.0019670476815357406, "loss": 0.2428, "step": 9210 }, { "epoch": 0.01633363741700865, "grad_norm": 0.38671875, "learning_rate": 0.0019670317216687297, "loss": 0.2458, "step": 9212 }, { "epoch": 0.016337183582318466, "grad_norm": 0.384765625, "learning_rate": 0.0019670157580098254, "loss": 0.2737, "step": 9214 }, { "epoch": 0.01634072974762828, "grad_norm": 2.3125, "learning_rate": 0.0019669997905590983, "loss": 0.2491, "step": 9216 }, { "epoch": 0.016344275912938095, "grad_norm": 1.2421875, "learning_rate": 0.0019669838193166174, "loss": 0.2095, "step": 9218 }, { "epoch": 0.01634782207824791, "grad_norm": 0.369140625, "learning_rate": 0.0019669678442824534, "loss": 0.2204, "step": 9220 }, { "epoch": 0.016351368243557724, "grad_norm": 0.3046875, "learning_rate": 0.0019669518654566753, "loss": 0.258, "step": 9222 }, { "epoch": 0.016354914408867542, "grad_norm": 0.357421875, "learning_rate": 0.0019669358828393532, "loss": 0.1901, "step": 9224 }, { "epoch": 0.016358460574177357, "grad_norm": 0.302734375, "learning_rate": 0.001966919896430557, "loss": 0.2416, "step": 9226 }, { "epoch": 0.01636200673948717, "grad_norm": 2.5625, "learning_rate": 0.001966903906230357, "loss": 0.3124, "step": 9228 }, { "epoch": 0.016365552904796986, "grad_norm": 0.29296875, "learning_rate": 0.001966887912238823, "loss": 0.5054, "step": 9230 }, { "epoch": 0.0163690990701068, "grad_norm": 0.443359375, "learning_rate": 0.0019668719144560246, "loss": 0.2961, "step": 9232 }, { "epoch": 0.01637264523541662, "grad_norm": 0.703125, "learning_rate": 0.0019668559128820317, "loss": 0.2606, "step": 9234 }, { "epoch": 0.016376191400726433, "grad_norm": 0.80078125, "learning_rate": 0.001966839907516915, "loss": 0.2821, "step": 9236 }, { "epoch": 0.016379737566036248, "grad_norm": 0.62109375, "learning_rate": 0.0019668238983607442, "loss": 0.2284, "step": 9238 }, { "epoch": 0.016383283731346062, "grad_norm": 3.140625, "learning_rate": 0.001966807885413589, "loss": 0.2976, "step": 9240 }, { "epoch": 0.016386829896655877, "grad_norm": 1.546875, "learning_rate": 0.0019667918686755194, "loss": 0.3194, "step": 9242 }, { "epoch": 0.01639037606196569, "grad_norm": 0.92578125, "learning_rate": 0.001966775848146606, "loss": 0.1863, "step": 9244 }, { "epoch": 0.01639392222727551, "grad_norm": 0.279296875, "learning_rate": 0.0019667598238269184, "loss": 0.1665, "step": 9246 }, { "epoch": 0.016397468392585324, "grad_norm": 0.490234375, "learning_rate": 0.0019667437957165265, "loss": 0.2047, "step": 9248 }, { "epoch": 0.01640101455789514, "grad_norm": 0.62109375, "learning_rate": 0.001966727763815501, "loss": 0.3, "step": 9250 }, { "epoch": 0.016404560723204953, "grad_norm": 1.6953125, "learning_rate": 0.0019667117281239118, "loss": 0.3029, "step": 9252 }, { "epoch": 0.016408106888514767, "grad_norm": 0.84375, "learning_rate": 0.001966695688641829, "loss": 0.3038, "step": 9254 }, { "epoch": 0.016411653053824582, "grad_norm": 0.75390625, "learning_rate": 0.0019666796453693225, "loss": 0.2118, "step": 9256 }, { "epoch": 0.0164151992191344, "grad_norm": 0.8046875, "learning_rate": 0.0019666635983064625, "loss": 0.2733, "step": 9258 }, { "epoch": 0.016418745384444215, "grad_norm": 0.443359375, "learning_rate": 0.0019666475474533194, "loss": 0.1609, "step": 9260 }, { "epoch": 0.01642229154975403, "grad_norm": 0.353515625, "learning_rate": 0.0019666314928099638, "loss": 0.2794, "step": 9262 }, { "epoch": 0.016425837715063844, "grad_norm": 0.94140625, "learning_rate": 0.001966615434376465, "loss": 0.2715, "step": 9264 }, { "epoch": 0.016429383880373658, "grad_norm": 0.45703125, "learning_rate": 0.0019665993721528934, "loss": 0.2159, "step": 9266 }, { "epoch": 0.016432930045683476, "grad_norm": 0.8359375, "learning_rate": 0.00196658330613932, "loss": 0.2003, "step": 9268 }, { "epoch": 0.01643647621099329, "grad_norm": 0.353515625, "learning_rate": 0.0019665672363358144, "loss": 0.1921, "step": 9270 }, { "epoch": 0.016440022376303105, "grad_norm": 0.55859375, "learning_rate": 0.0019665511627424475, "loss": 0.2469, "step": 9272 }, { "epoch": 0.01644356854161292, "grad_norm": 0.3203125, "learning_rate": 0.001966535085359289, "loss": 0.2532, "step": 9274 }, { "epoch": 0.016447114706922734, "grad_norm": 0.65234375, "learning_rate": 0.001966519004186409, "loss": 0.2338, "step": 9276 }, { "epoch": 0.01645066087223255, "grad_norm": 0.396484375, "learning_rate": 0.001966502919223878, "loss": 0.2488, "step": 9278 }, { "epoch": 0.016454207037542367, "grad_norm": 0.53515625, "learning_rate": 0.0019664868304717672, "loss": 0.1875, "step": 9280 }, { "epoch": 0.01645775320285218, "grad_norm": 0.79296875, "learning_rate": 0.001966470737930146, "loss": 0.2337, "step": 9282 }, { "epoch": 0.016461299368161996, "grad_norm": 1.4453125, "learning_rate": 0.0019664546415990854, "loss": 0.3458, "step": 9284 }, { "epoch": 0.01646484553347181, "grad_norm": 0.56640625, "learning_rate": 0.0019664385414786554, "loss": 0.2514, "step": 9286 }, { "epoch": 0.016468391698781625, "grad_norm": 0.369140625, "learning_rate": 0.001966422437568926, "loss": 0.2683, "step": 9288 }, { "epoch": 0.01647193786409144, "grad_norm": 0.421875, "learning_rate": 0.001966406329869969, "loss": 0.2173, "step": 9290 }, { "epoch": 0.016475484029401258, "grad_norm": 0.447265625, "learning_rate": 0.001966390218381853, "loss": 0.2834, "step": 9292 }, { "epoch": 0.016479030194711072, "grad_norm": 1.390625, "learning_rate": 0.00196637410310465, "loss": 0.2814, "step": 9294 }, { "epoch": 0.016482576360020887, "grad_norm": 0.8125, "learning_rate": 0.0019663579840384303, "loss": 0.2642, "step": 9296 }, { "epoch": 0.0164861225253307, "grad_norm": 0.53125, "learning_rate": 0.0019663418611832635, "loss": 0.2865, "step": 9298 }, { "epoch": 0.016489668690640516, "grad_norm": 0.419921875, "learning_rate": 0.001966325734539221, "loss": 0.212, "step": 9300 }, { "epoch": 0.016493214855950334, "grad_norm": 0.6953125, "learning_rate": 0.001966309604106372, "loss": 0.3237, "step": 9302 }, { "epoch": 0.01649676102126015, "grad_norm": 0.36328125, "learning_rate": 0.001966293469884789, "loss": 0.2307, "step": 9304 }, { "epoch": 0.016500307186569963, "grad_norm": 0.42578125, "learning_rate": 0.001966277331874541, "loss": 0.2543, "step": 9306 }, { "epoch": 0.016503853351879778, "grad_norm": 0.82421875, "learning_rate": 0.0019662611900756995, "loss": 0.2334, "step": 9308 }, { "epoch": 0.016507399517189592, "grad_norm": 0.9609375, "learning_rate": 0.001966245044488335, "loss": 0.2651, "step": 9310 }, { "epoch": 0.016510945682499407, "grad_norm": 0.412109375, "learning_rate": 0.0019662288951125175, "loss": 0.1734, "step": 9312 }, { "epoch": 0.016514491847809225, "grad_norm": 0.359375, "learning_rate": 0.0019662127419483176, "loss": 0.2603, "step": 9314 }, { "epoch": 0.01651803801311904, "grad_norm": 0.59375, "learning_rate": 0.001966196584995807, "loss": 0.2612, "step": 9316 }, { "epoch": 0.016521584178428854, "grad_norm": 0.375, "learning_rate": 0.0019661804242550552, "loss": 0.2759, "step": 9318 }, { "epoch": 0.01652513034373867, "grad_norm": 0.6484375, "learning_rate": 0.0019661642597261337, "loss": 0.2949, "step": 9320 }, { "epoch": 0.016528676509048483, "grad_norm": 1.2265625, "learning_rate": 0.0019661480914091125, "loss": 0.2587, "step": 9322 }, { "epoch": 0.016532222674358298, "grad_norm": 0.6875, "learning_rate": 0.001966131919304063, "loss": 0.282, "step": 9324 }, { "epoch": 0.016535768839668116, "grad_norm": 0.3828125, "learning_rate": 0.0019661157434110554, "loss": 0.2139, "step": 9326 }, { "epoch": 0.01653931500497793, "grad_norm": 0.259765625, "learning_rate": 0.0019660995637301604, "loss": 0.3594, "step": 9328 }, { "epoch": 0.016542861170287745, "grad_norm": 0.328125, "learning_rate": 0.0019660833802614495, "loss": 0.2163, "step": 9330 }, { "epoch": 0.01654640733559756, "grad_norm": 0.373046875, "learning_rate": 0.001966067193004993, "loss": 0.2012, "step": 9332 }, { "epoch": 0.016549953500907374, "grad_norm": 0.423828125, "learning_rate": 0.001966051001960861, "loss": 0.2219, "step": 9334 }, { "epoch": 0.016553499666217192, "grad_norm": 0.6875, "learning_rate": 0.0019660348071291254, "loss": 0.2263, "step": 9336 }, { "epoch": 0.016557045831527006, "grad_norm": 0.28515625, "learning_rate": 0.001966018608509857, "loss": 0.2248, "step": 9338 }, { "epoch": 0.01656059199683682, "grad_norm": 0.609375, "learning_rate": 0.001966002406103126, "loss": 0.2741, "step": 9340 }, { "epoch": 0.016564138162146635, "grad_norm": 0.255859375, "learning_rate": 0.0019659861999090033, "loss": 0.2063, "step": 9342 }, { "epoch": 0.01656768432745645, "grad_norm": 0.40234375, "learning_rate": 0.00196596998992756, "loss": 0.2669, "step": 9344 }, { "epoch": 0.016571230492766265, "grad_norm": 0.75, "learning_rate": 0.001965953776158867, "loss": 0.2563, "step": 9346 }, { "epoch": 0.016574776658076083, "grad_norm": 0.404296875, "learning_rate": 0.001965937558602995, "loss": 0.4689, "step": 9348 }, { "epoch": 0.016578322823385897, "grad_norm": 0.380859375, "learning_rate": 0.0019659213372600157, "loss": 0.2028, "step": 9350 }, { "epoch": 0.01658186898869571, "grad_norm": 0.435546875, "learning_rate": 0.001965905112129999, "loss": 0.2586, "step": 9352 }, { "epoch": 0.016585415154005526, "grad_norm": 0.3671875, "learning_rate": 0.0019658888832130164, "loss": 0.2505, "step": 9354 }, { "epoch": 0.01658896131931534, "grad_norm": 0.30859375, "learning_rate": 0.0019658726505091388, "loss": 0.2558, "step": 9356 }, { "epoch": 0.016592507484625155, "grad_norm": 3.703125, "learning_rate": 0.0019658564140184374, "loss": 0.2322, "step": 9358 }, { "epoch": 0.016596053649934973, "grad_norm": 0.34765625, "learning_rate": 0.0019658401737409825, "loss": 0.2673, "step": 9360 }, { "epoch": 0.016599599815244788, "grad_norm": 0.84375, "learning_rate": 0.001965823929676846, "loss": 0.2613, "step": 9362 }, { "epoch": 0.016603145980554603, "grad_norm": 0.365234375, "learning_rate": 0.0019658076818260986, "loss": 0.2405, "step": 9364 }, { "epoch": 0.016606692145864417, "grad_norm": 0.80078125, "learning_rate": 0.001965791430188811, "loss": 0.2341, "step": 9366 }, { "epoch": 0.01661023831117423, "grad_norm": 0.2333984375, "learning_rate": 0.0019657751747650548, "loss": 0.2252, "step": 9368 }, { "epoch": 0.01661378447648405, "grad_norm": 0.376953125, "learning_rate": 0.0019657589155549007, "loss": 0.2062, "step": 9370 }, { "epoch": 0.016617330641793864, "grad_norm": 0.6796875, "learning_rate": 0.0019657426525584204, "loss": 0.3197, "step": 9372 }, { "epoch": 0.01662087680710368, "grad_norm": 0.314453125, "learning_rate": 0.0019657263857756845, "loss": 0.2255, "step": 9374 }, { "epoch": 0.016624422972413493, "grad_norm": 0.3359375, "learning_rate": 0.001965710115206764, "loss": 0.2125, "step": 9376 }, { "epoch": 0.016627969137723308, "grad_norm": 0.52734375, "learning_rate": 0.001965693840851731, "loss": 0.2745, "step": 9378 }, { "epoch": 0.016631515303033122, "grad_norm": 0.337890625, "learning_rate": 0.0019656775627106553, "loss": 0.2305, "step": 9380 }, { "epoch": 0.01663506146834294, "grad_norm": 0.40625, "learning_rate": 0.001965661280783609, "loss": 0.2013, "step": 9382 }, { "epoch": 0.016638607633652755, "grad_norm": 0.50390625, "learning_rate": 0.001965644995070663, "loss": 0.2655, "step": 9384 }, { "epoch": 0.01664215379896257, "grad_norm": 0.396484375, "learning_rate": 0.0019656287055718883, "loss": 0.338, "step": 9386 }, { "epoch": 0.016645699964272384, "grad_norm": 0.46484375, "learning_rate": 0.0019656124122873573, "loss": 0.1839, "step": 9388 }, { "epoch": 0.0166492461295822, "grad_norm": 0.470703125, "learning_rate": 0.0019655961152171397, "loss": 0.2716, "step": 9390 }, { "epoch": 0.016652792294892013, "grad_norm": 0.462890625, "learning_rate": 0.001965579814361308, "loss": 0.2017, "step": 9392 }, { "epoch": 0.01665633846020183, "grad_norm": 1.9375, "learning_rate": 0.0019655635097199325, "loss": 0.3478, "step": 9394 }, { "epoch": 0.016659884625511646, "grad_norm": 0.25390625, "learning_rate": 0.0019655472012930853, "loss": 0.2572, "step": 9396 }, { "epoch": 0.01666343079082146, "grad_norm": 0.8828125, "learning_rate": 0.0019655308890808372, "loss": 0.2865, "step": 9398 }, { "epoch": 0.016666976956131275, "grad_norm": 0.41796875, "learning_rate": 0.00196551457308326, "loss": 0.2431, "step": 9400 }, { "epoch": 0.01667052312144109, "grad_norm": 0.298828125, "learning_rate": 0.0019654982533004245, "loss": 0.4089, "step": 9402 }, { "epoch": 0.016674069286750907, "grad_norm": 0.89453125, "learning_rate": 0.0019654819297324024, "loss": 0.2949, "step": 9404 }, { "epoch": 0.016677615452060722, "grad_norm": 1.125, "learning_rate": 0.0019654656023792654, "loss": 0.4328, "step": 9406 }, { "epoch": 0.016681161617370537, "grad_norm": 0.373046875, "learning_rate": 0.0019654492712410845, "loss": 0.2589, "step": 9408 }, { "epoch": 0.01668470778268035, "grad_norm": 0.451171875, "learning_rate": 0.001965432936317931, "loss": 0.3978, "step": 9410 }, { "epoch": 0.016688253947990166, "grad_norm": 0.70703125, "learning_rate": 0.0019654165976098767, "loss": 0.2142, "step": 9412 }, { "epoch": 0.01669180011329998, "grad_norm": 0.408203125, "learning_rate": 0.0019654002551169926, "loss": 0.208, "step": 9414 }, { "epoch": 0.016695346278609798, "grad_norm": 0.59375, "learning_rate": 0.001965383908839351, "loss": 0.2061, "step": 9416 }, { "epoch": 0.016698892443919613, "grad_norm": 0.8828125, "learning_rate": 0.0019653675587770225, "loss": 0.4743, "step": 9418 }, { "epoch": 0.016702438609229427, "grad_norm": 1.0625, "learning_rate": 0.001965351204930079, "loss": 0.2348, "step": 9420 }, { "epoch": 0.016705984774539242, "grad_norm": 0.7734375, "learning_rate": 0.001965334847298592, "loss": 0.2371, "step": 9422 }, { "epoch": 0.016709530939849056, "grad_norm": 2.40625, "learning_rate": 0.0019653184858826334, "loss": 0.2486, "step": 9424 }, { "epoch": 0.01671307710515887, "grad_norm": 0.3828125, "learning_rate": 0.0019653021206822736, "loss": 0.2321, "step": 9426 }, { "epoch": 0.01671662327046869, "grad_norm": 0.404296875, "learning_rate": 0.0019652857516975855, "loss": 0.1857, "step": 9428 }, { "epoch": 0.016720169435778504, "grad_norm": 0.5234375, "learning_rate": 0.00196526937892864, "loss": 0.2349, "step": 9430 }, { "epoch": 0.016723715601088318, "grad_norm": 0.3984375, "learning_rate": 0.0019652530023755088, "loss": 0.1952, "step": 9432 }, { "epoch": 0.016727261766398133, "grad_norm": 0.453125, "learning_rate": 0.0019652366220382638, "loss": 0.2145, "step": 9434 }, { "epoch": 0.016730807931707947, "grad_norm": 0.54296875, "learning_rate": 0.001965220237916976, "loss": 0.1916, "step": 9436 }, { "epoch": 0.016734354097017765, "grad_norm": 0.63671875, "learning_rate": 0.0019652038500117177, "loss": 0.2237, "step": 9438 }, { "epoch": 0.01673790026232758, "grad_norm": 0.6484375, "learning_rate": 0.00196518745832256, "loss": 0.2424, "step": 9440 }, { "epoch": 0.016741446427637394, "grad_norm": 0.71484375, "learning_rate": 0.0019651710628495757, "loss": 0.276, "step": 9442 }, { "epoch": 0.01674499259294721, "grad_norm": 0.62109375, "learning_rate": 0.0019651546635928354, "loss": 0.3992, "step": 9444 }, { "epoch": 0.016748538758257023, "grad_norm": 0.3046875, "learning_rate": 0.0019651382605524108, "loss": 0.1902, "step": 9446 }, { "epoch": 0.016752084923566838, "grad_norm": 0.84375, "learning_rate": 0.0019651218537283742, "loss": 0.232, "step": 9448 }, { "epoch": 0.016755631088876656, "grad_norm": 1.234375, "learning_rate": 0.0019651054431207974, "loss": 0.1949, "step": 9450 }, { "epoch": 0.01675917725418647, "grad_norm": 1.0078125, "learning_rate": 0.0019650890287297514, "loss": 0.3047, "step": 9452 }, { "epoch": 0.016762723419496285, "grad_norm": 0.48046875, "learning_rate": 0.0019650726105553086, "loss": 0.2615, "step": 9454 }, { "epoch": 0.0167662695848061, "grad_norm": 0.61328125, "learning_rate": 0.001965056188597541, "loss": 0.2362, "step": 9456 }, { "epoch": 0.016769815750115914, "grad_norm": 0.302734375, "learning_rate": 0.00196503976285652, "loss": 0.2349, "step": 9458 }, { "epoch": 0.01677336191542573, "grad_norm": 1.03125, "learning_rate": 0.0019650233333323172, "loss": 0.2014, "step": 9460 }, { "epoch": 0.016776908080735547, "grad_norm": 0.337890625, "learning_rate": 0.001965006900025005, "loss": 0.2017, "step": 9462 }, { "epoch": 0.01678045424604536, "grad_norm": 0.5, "learning_rate": 0.0019649904629346557, "loss": 0.2151, "step": 9464 }, { "epoch": 0.016784000411355176, "grad_norm": 0.40234375, "learning_rate": 0.0019649740220613393, "loss": 0.2648, "step": 9466 }, { "epoch": 0.01678754657666499, "grad_norm": 0.55859375, "learning_rate": 0.00196495757740513, "loss": 0.2362, "step": 9468 }, { "epoch": 0.016791092741974805, "grad_norm": 1.71875, "learning_rate": 0.001964941128966098, "loss": 0.3285, "step": 9470 }, { "epoch": 0.016794638907284623, "grad_norm": 1.1484375, "learning_rate": 0.0019649246767443167, "loss": 0.3969, "step": 9472 }, { "epoch": 0.016798185072594438, "grad_norm": 0.388671875, "learning_rate": 0.001964908220739857, "loss": 0.2377, "step": 9474 }, { "epoch": 0.016801731237904252, "grad_norm": 0.58984375, "learning_rate": 0.001964891760952791, "loss": 0.4679, "step": 9476 }, { "epoch": 0.016805277403214067, "grad_norm": 0.369140625, "learning_rate": 0.0019648752973831906, "loss": 0.1752, "step": 9478 }, { "epoch": 0.01680882356852388, "grad_norm": 0.5625, "learning_rate": 0.0019648588300311283, "loss": 0.2766, "step": 9480 }, { "epoch": 0.016812369733833696, "grad_norm": 0.2412109375, "learning_rate": 0.001964842358896676, "loss": 0.2533, "step": 9482 }, { "epoch": 0.016815915899143514, "grad_norm": 1.0546875, "learning_rate": 0.0019648258839799052, "loss": 0.2178, "step": 9484 }, { "epoch": 0.01681946206445333, "grad_norm": 0.5625, "learning_rate": 0.0019648094052808884, "loss": 0.2029, "step": 9486 }, { "epoch": 0.016823008229763143, "grad_norm": 1.140625, "learning_rate": 0.0019647929227996977, "loss": 0.2834, "step": 9488 }, { "epoch": 0.016826554395072957, "grad_norm": 0.384765625, "learning_rate": 0.0019647764365364052, "loss": 0.4014, "step": 9490 }, { "epoch": 0.016830100560382772, "grad_norm": 0.3125, "learning_rate": 0.001964759946491083, "loss": 0.2392, "step": 9492 }, { "epoch": 0.016833646725692587, "grad_norm": 0.263671875, "learning_rate": 0.001964743452663803, "loss": 0.2035, "step": 9494 }, { "epoch": 0.016837192891002405, "grad_norm": 1.21875, "learning_rate": 0.0019647269550546373, "loss": 0.2188, "step": 9496 }, { "epoch": 0.01684073905631222, "grad_norm": 0.5, "learning_rate": 0.001964710453663658, "loss": 0.2276, "step": 9498 }, { "epoch": 0.016844285221622034, "grad_norm": 0.51953125, "learning_rate": 0.0019646939484909377, "loss": 0.2155, "step": 9500 }, { "epoch": 0.01684783138693185, "grad_norm": 0.8828125, "learning_rate": 0.0019646774395365484, "loss": 0.2257, "step": 9502 }, { "epoch": 0.016851377552241663, "grad_norm": 0.3671875, "learning_rate": 0.001964660926800562, "loss": 0.3609, "step": 9504 }, { "epoch": 0.01685492371755148, "grad_norm": 0.87890625, "learning_rate": 0.0019646444102830512, "loss": 0.2938, "step": 9506 }, { "epoch": 0.016858469882861295, "grad_norm": 2.21875, "learning_rate": 0.001964627889984088, "loss": 0.4673, "step": 9508 }, { "epoch": 0.01686201604817111, "grad_norm": 0.63671875, "learning_rate": 0.0019646113659037446, "loss": 0.2517, "step": 9510 }, { "epoch": 0.016865562213480925, "grad_norm": 0.68359375, "learning_rate": 0.0019645948380420937, "loss": 0.268, "step": 9512 }, { "epoch": 0.01686910837879074, "grad_norm": 0.341796875, "learning_rate": 0.0019645783063992066, "loss": 0.2256, "step": 9514 }, { "epoch": 0.016872654544100554, "grad_norm": 1.8828125, "learning_rate": 0.001964561770975156, "loss": 0.3528, "step": 9516 }, { "epoch": 0.01687620070941037, "grad_norm": 2.6875, "learning_rate": 0.0019645452317700153, "loss": 0.3423, "step": 9518 }, { "epoch": 0.016879746874720186, "grad_norm": 0.2470703125, "learning_rate": 0.001964528688783855, "loss": 0.1844, "step": 9520 }, { "epoch": 0.01688329304003, "grad_norm": 0.58984375, "learning_rate": 0.001964512142016749, "loss": 0.1722, "step": 9522 }, { "epoch": 0.016886839205339815, "grad_norm": 0.63671875, "learning_rate": 0.0019644955914687686, "loss": 0.2366, "step": 9524 }, { "epoch": 0.01689038537064963, "grad_norm": 0.671875, "learning_rate": 0.001964479037139987, "loss": 0.1995, "step": 9526 }, { "epoch": 0.016893931535959444, "grad_norm": 0.3203125, "learning_rate": 0.0019644624790304765, "loss": 0.2399, "step": 9528 }, { "epoch": 0.016897477701269262, "grad_norm": 0.7265625, "learning_rate": 0.001964445917140309, "loss": 0.2286, "step": 9530 }, { "epoch": 0.016901023866579077, "grad_norm": 0.66796875, "learning_rate": 0.001964429351469557, "loss": 0.3129, "step": 9532 }, { "epoch": 0.01690457003188889, "grad_norm": 0.5390625, "learning_rate": 0.0019644127820182935, "loss": 0.2849, "step": 9534 }, { "epoch": 0.016908116197198706, "grad_norm": 1.7265625, "learning_rate": 0.00196439620878659, "loss": 0.2212, "step": 9536 }, { "epoch": 0.01691166236250852, "grad_norm": 0.76171875, "learning_rate": 0.00196437963177452, "loss": 0.319, "step": 9538 }, { "epoch": 0.01691520852781834, "grad_norm": 0.5625, "learning_rate": 0.001964363050982156, "loss": 0.2335, "step": 9540 }, { "epoch": 0.016918754693128153, "grad_norm": 0.2490234375, "learning_rate": 0.001964346466409569, "loss": 0.2173, "step": 9542 }, { "epoch": 0.016922300858437968, "grad_norm": 0.53515625, "learning_rate": 0.0019643298780568333, "loss": 0.2494, "step": 9544 }, { "epoch": 0.016925847023747782, "grad_norm": 1.0859375, "learning_rate": 0.001964313285924021, "loss": 0.2045, "step": 9546 }, { "epoch": 0.016929393189057597, "grad_norm": 0.41796875, "learning_rate": 0.001964296690011204, "loss": 0.1743, "step": 9548 }, { "epoch": 0.01693293935436741, "grad_norm": 2.140625, "learning_rate": 0.0019642800903184555, "loss": 0.4388, "step": 9550 }, { "epoch": 0.01693648551967723, "grad_norm": 0.330078125, "learning_rate": 0.001964263486845848, "loss": 0.2515, "step": 9552 }, { "epoch": 0.016940031684987044, "grad_norm": 1.5703125, "learning_rate": 0.0019642468795934534, "loss": 0.3165, "step": 9554 }, { "epoch": 0.01694357785029686, "grad_norm": 0.326171875, "learning_rate": 0.001964230268561346, "loss": 0.2904, "step": 9556 }, { "epoch": 0.016947124015606673, "grad_norm": 0.490234375, "learning_rate": 0.001964213653749597, "loss": 0.2365, "step": 9558 }, { "epoch": 0.016950670180916488, "grad_norm": 0.53125, "learning_rate": 0.001964197035158279, "loss": 0.2333, "step": 9560 }, { "epoch": 0.016954216346226302, "grad_norm": 0.38671875, "learning_rate": 0.001964180412787466, "loss": 0.2503, "step": 9562 }, { "epoch": 0.01695776251153612, "grad_norm": 0.337890625, "learning_rate": 0.0019641637866372294, "loss": 0.32, "step": 9564 }, { "epoch": 0.016961308676845935, "grad_norm": 0.51953125, "learning_rate": 0.0019641471567076424, "loss": 0.3051, "step": 9566 }, { "epoch": 0.01696485484215575, "grad_norm": 0.55859375, "learning_rate": 0.0019641305229987772, "loss": 0.2374, "step": 9568 }, { "epoch": 0.016968401007465564, "grad_norm": 0.42578125, "learning_rate": 0.0019641138855107073, "loss": 0.2053, "step": 9570 }, { "epoch": 0.01697194717277538, "grad_norm": 1.21875, "learning_rate": 0.0019640972442435058, "loss": 0.3455, "step": 9572 }, { "epoch": 0.016975493338085196, "grad_norm": 0.4765625, "learning_rate": 0.0019640805991972443, "loss": 0.237, "step": 9574 }, { "epoch": 0.01697903950339501, "grad_norm": 0.341796875, "learning_rate": 0.001964063950371996, "loss": 0.1935, "step": 9576 }, { "epoch": 0.016982585668704826, "grad_norm": 1.1640625, "learning_rate": 0.0019640472977678347, "loss": 0.2765, "step": 9578 }, { "epoch": 0.01698613183401464, "grad_norm": 0.40625, "learning_rate": 0.001964030641384832, "loss": 0.1984, "step": 9580 }, { "epoch": 0.016989677999324455, "grad_norm": 0.8671875, "learning_rate": 0.001964013981223061, "loss": 0.2682, "step": 9582 }, { "epoch": 0.01699322416463427, "grad_norm": 1.671875, "learning_rate": 0.001963997317282595, "loss": 0.4339, "step": 9584 }, { "epoch": 0.016996770329944087, "grad_norm": 2.640625, "learning_rate": 0.001963980649563506, "loss": 0.285, "step": 9586 }, { "epoch": 0.017000316495253902, "grad_norm": 0.71484375, "learning_rate": 0.0019639639780658683, "loss": 0.2807, "step": 9588 }, { "epoch": 0.017003862660563716, "grad_norm": 0.44921875, "learning_rate": 0.0019639473027897535, "loss": 0.1932, "step": 9590 }, { "epoch": 0.01700740882587353, "grad_norm": 0.62890625, "learning_rate": 0.0019639306237352354, "loss": 0.2416, "step": 9592 }, { "epoch": 0.017010954991183345, "grad_norm": 0.6171875, "learning_rate": 0.0019639139409023862, "loss": 0.2396, "step": 9594 }, { "epoch": 0.01701450115649316, "grad_norm": 0.875, "learning_rate": 0.0019638972542912795, "loss": 0.2727, "step": 9596 }, { "epoch": 0.017018047321802978, "grad_norm": 0.408203125, "learning_rate": 0.001963880563901988, "loss": 0.2144, "step": 9598 }, { "epoch": 0.017021593487112793, "grad_norm": 1.0625, "learning_rate": 0.001963863869734585, "loss": 0.2307, "step": 9600 }, { "epoch": 0.017025139652422607, "grad_norm": 0.423828125, "learning_rate": 0.0019638471717891423, "loss": 0.2664, "step": 9602 }, { "epoch": 0.01702868581773242, "grad_norm": 0.5546875, "learning_rate": 0.0019638304700657343, "loss": 0.1799, "step": 9604 }, { "epoch": 0.017032231983042236, "grad_norm": 1.0859375, "learning_rate": 0.001963813764564434, "loss": 0.2072, "step": 9606 }, { "epoch": 0.017035778148352054, "grad_norm": 1.1640625, "learning_rate": 0.001963797055285314, "loss": 0.2371, "step": 9608 }, { "epoch": 0.01703932431366187, "grad_norm": 1.25, "learning_rate": 0.001963780342228447, "loss": 0.2137, "step": 9610 }, { "epoch": 0.017042870478971683, "grad_norm": 0.63671875, "learning_rate": 0.0019637636253939067, "loss": 0.1847, "step": 9612 }, { "epoch": 0.017046416644281498, "grad_norm": 1.7578125, "learning_rate": 0.001963746904781766, "loss": 0.2864, "step": 9614 }, { "epoch": 0.017049962809591312, "grad_norm": 6.8125, "learning_rate": 0.0019637301803920983, "loss": 0.3559, "step": 9616 }, { "epoch": 0.017053508974901127, "grad_norm": 0.8046875, "learning_rate": 0.0019637134522249764, "loss": 0.2836, "step": 9618 }, { "epoch": 0.017057055140210945, "grad_norm": 0.35546875, "learning_rate": 0.0019636967202804733, "loss": 0.4623, "step": 9620 }, { "epoch": 0.01706060130552076, "grad_norm": 0.369140625, "learning_rate": 0.0019636799845586628, "loss": 0.2132, "step": 9622 }, { "epoch": 0.017064147470830574, "grad_norm": 1.03125, "learning_rate": 0.0019636632450596176, "loss": 0.2257, "step": 9624 }, { "epoch": 0.01706769363614039, "grad_norm": 0.40234375, "learning_rate": 0.0019636465017834107, "loss": 0.2445, "step": 9626 }, { "epoch": 0.017071239801450203, "grad_norm": 0.83203125, "learning_rate": 0.0019636297547301162, "loss": 0.2282, "step": 9628 }, { "epoch": 0.017074785966760018, "grad_norm": 0.498046875, "learning_rate": 0.0019636130038998066, "loss": 0.2248, "step": 9630 }, { "epoch": 0.017078332132069836, "grad_norm": 0.3984375, "learning_rate": 0.0019635962492925555, "loss": 0.2048, "step": 9632 }, { "epoch": 0.01708187829737965, "grad_norm": 0.427734375, "learning_rate": 0.001963579490908436, "loss": 0.2065, "step": 9634 }, { "epoch": 0.017085424462689465, "grad_norm": 0.421875, "learning_rate": 0.001963562728747521, "loss": 0.2628, "step": 9636 }, { "epoch": 0.01708897062799928, "grad_norm": 0.703125, "learning_rate": 0.001963545962809885, "loss": 0.1987, "step": 9638 }, { "epoch": 0.017092516793309094, "grad_norm": 0.54296875, "learning_rate": 0.0019635291930955997, "loss": 0.2323, "step": 9640 }, { "epoch": 0.017096062958618912, "grad_norm": 0.5703125, "learning_rate": 0.00196351241960474, "loss": 0.2546, "step": 9642 }, { "epoch": 0.017099609123928727, "grad_norm": 0.341796875, "learning_rate": 0.001963495642337378, "loss": 0.212, "step": 9644 }, { "epoch": 0.01710315528923854, "grad_norm": 0.6484375, "learning_rate": 0.0019634788612935884, "loss": 0.2484, "step": 9646 }, { "epoch": 0.017106701454548356, "grad_norm": 0.6484375, "learning_rate": 0.001963462076473443, "loss": 0.2394, "step": 9648 }, { "epoch": 0.01711024761985817, "grad_norm": 0.443359375, "learning_rate": 0.001963445287877017, "loss": 0.2231, "step": 9650 }, { "epoch": 0.017113793785167985, "grad_norm": 0.56640625, "learning_rate": 0.0019634284955043816, "loss": 0.2258, "step": 9652 }, { "epoch": 0.017117339950477803, "grad_norm": 0.8671875, "learning_rate": 0.0019634116993556125, "loss": 0.2194, "step": 9654 }, { "epoch": 0.017120886115787617, "grad_norm": 0.45703125, "learning_rate": 0.001963394899430782, "loss": 0.2386, "step": 9656 }, { "epoch": 0.017124432281097432, "grad_norm": 0.9453125, "learning_rate": 0.0019633780957299635, "loss": 0.2901, "step": 9658 }, { "epoch": 0.017127978446407247, "grad_norm": 0.416015625, "learning_rate": 0.0019633612882532306, "loss": 0.2492, "step": 9660 }, { "epoch": 0.01713152461171706, "grad_norm": 0.4609375, "learning_rate": 0.001963344477000657, "loss": 0.2727, "step": 9662 }, { "epoch": 0.017135070777026876, "grad_norm": 0.255859375, "learning_rate": 0.0019633276619723163, "loss": 0.2561, "step": 9664 }, { "epoch": 0.017138616942336694, "grad_norm": 0.875, "learning_rate": 0.001963310843168282, "loss": 0.2272, "step": 9666 }, { "epoch": 0.017142163107646508, "grad_norm": 0.330078125, "learning_rate": 0.0019632940205886276, "loss": 0.2567, "step": 9668 }, { "epoch": 0.017145709272956323, "grad_norm": 0.890625, "learning_rate": 0.001963277194233426, "loss": 0.3432, "step": 9670 }, { "epoch": 0.017149255438266137, "grad_norm": 2.359375, "learning_rate": 0.001963260364102752, "loss": 0.3546, "step": 9672 }, { "epoch": 0.017152801603575952, "grad_norm": 0.46484375, "learning_rate": 0.0019632435301966786, "loss": 0.2367, "step": 9674 }, { "epoch": 0.01715634776888577, "grad_norm": 0.7109375, "learning_rate": 0.0019632266925152793, "loss": 0.2208, "step": 9676 }, { "epoch": 0.017159893934195584, "grad_norm": 0.2490234375, "learning_rate": 0.0019632098510586277, "loss": 0.1687, "step": 9678 }, { "epoch": 0.0171634400995054, "grad_norm": 0.279296875, "learning_rate": 0.0019631930058267976, "loss": 0.2545, "step": 9680 }, { "epoch": 0.017166986264815214, "grad_norm": 0.427734375, "learning_rate": 0.001963176156819863, "loss": 0.2024, "step": 9682 }, { "epoch": 0.017170532430125028, "grad_norm": 0.349609375, "learning_rate": 0.001963159304037897, "loss": 0.2608, "step": 9684 }, { "epoch": 0.017174078595434843, "grad_norm": 0.52734375, "learning_rate": 0.0019631424474809735, "loss": 0.2846, "step": 9686 }, { "epoch": 0.01717762476074466, "grad_norm": 5.6875, "learning_rate": 0.0019631255871491666, "loss": 0.385, "step": 9688 }, { "epoch": 0.017181170926054475, "grad_norm": 0.7890625, "learning_rate": 0.0019631087230425493, "loss": 0.1695, "step": 9690 }, { "epoch": 0.01718471709136429, "grad_norm": 0.4609375, "learning_rate": 0.0019630918551611963, "loss": 0.2207, "step": 9692 }, { "epoch": 0.017188263256674104, "grad_norm": 1.0078125, "learning_rate": 0.001963074983505181, "loss": 0.2661, "step": 9694 }, { "epoch": 0.01719180942198392, "grad_norm": 0.43359375, "learning_rate": 0.001963058108074577, "loss": 0.3198, "step": 9696 }, { "epoch": 0.017195355587293733, "grad_norm": 0.240234375, "learning_rate": 0.0019630412288694577, "loss": 0.19, "step": 9698 }, { "epoch": 0.01719890175260355, "grad_norm": 0.287109375, "learning_rate": 0.0019630243458898977, "loss": 0.1994, "step": 9700 }, { "epoch": 0.017202447917913366, "grad_norm": 0.419921875, "learning_rate": 0.0019630074591359702, "loss": 0.2371, "step": 9702 }, { "epoch": 0.01720599408322318, "grad_norm": 0.65625, "learning_rate": 0.00196299056860775, "loss": 0.2823, "step": 9704 }, { "epoch": 0.017209540248532995, "grad_norm": 0.71484375, "learning_rate": 0.0019629736743053097, "loss": 0.2987, "step": 9706 }, { "epoch": 0.01721308641384281, "grad_norm": 0.41796875, "learning_rate": 0.001962956776228724, "loss": 0.2761, "step": 9708 }, { "epoch": 0.017216632579152628, "grad_norm": 0.404296875, "learning_rate": 0.001962939874378067, "loss": 0.2157, "step": 9710 }, { "epoch": 0.017220178744462442, "grad_norm": 0.6875, "learning_rate": 0.0019629229687534114, "loss": 0.2217, "step": 9712 }, { "epoch": 0.017223724909772257, "grad_norm": 0.298828125, "learning_rate": 0.0019629060593548326, "loss": 0.276, "step": 9714 }, { "epoch": 0.01722727107508207, "grad_norm": 0.314453125, "learning_rate": 0.001962889146182404, "loss": 0.2515, "step": 9716 }, { "epoch": 0.017230817240391886, "grad_norm": 0.404296875, "learning_rate": 0.0019628722292361995, "loss": 0.246, "step": 9718 }, { "epoch": 0.0172343634057017, "grad_norm": 0.431640625, "learning_rate": 0.0019628553085162927, "loss": 0.2115, "step": 9720 }, { "epoch": 0.01723790957101152, "grad_norm": 0.515625, "learning_rate": 0.001962838384022758, "loss": 0.1892, "step": 9722 }, { "epoch": 0.017241455736321333, "grad_norm": 0.40625, "learning_rate": 0.0019628214557556698, "loss": 0.2051, "step": 9724 }, { "epoch": 0.017245001901631148, "grad_norm": 0.9140625, "learning_rate": 0.0019628045237151015, "loss": 0.1695, "step": 9726 }, { "epoch": 0.017248548066940962, "grad_norm": 0.9765625, "learning_rate": 0.0019627875879011276, "loss": 0.3314, "step": 9728 }, { "epoch": 0.017252094232250777, "grad_norm": 6.09375, "learning_rate": 0.001962770648313822, "loss": 0.4471, "step": 9730 }, { "epoch": 0.01725564039756059, "grad_norm": 0.435546875, "learning_rate": 0.0019627537049532583, "loss": 0.2151, "step": 9732 }, { "epoch": 0.01725918656287041, "grad_norm": 0.61328125, "learning_rate": 0.0019627367578195112, "loss": 0.2691, "step": 9734 }, { "epoch": 0.017262732728180224, "grad_norm": 0.28515625, "learning_rate": 0.0019627198069126547, "loss": 0.3818, "step": 9736 }, { "epoch": 0.01726627889349004, "grad_norm": 0.44921875, "learning_rate": 0.001962702852232763, "loss": 0.3112, "step": 9738 }, { "epoch": 0.017269825058799853, "grad_norm": 0.419921875, "learning_rate": 0.0019626858937799104, "loss": 0.2058, "step": 9740 }, { "epoch": 0.017273371224109667, "grad_norm": 0.353515625, "learning_rate": 0.0019626689315541705, "loss": 0.2172, "step": 9742 }, { "epoch": 0.017276917389419486, "grad_norm": 0.88671875, "learning_rate": 0.0019626519655556174, "loss": 0.3053, "step": 9744 }, { "epoch": 0.0172804635547293, "grad_norm": 0.2890625, "learning_rate": 0.001962634995784326, "loss": 0.2005, "step": 9746 }, { "epoch": 0.017284009720039115, "grad_norm": 0.330078125, "learning_rate": 0.0019626180222403703, "loss": 0.2145, "step": 9748 }, { "epoch": 0.01728755588534893, "grad_norm": 0.703125, "learning_rate": 0.0019626010449238247, "loss": 0.3239, "step": 9750 }, { "epoch": 0.017291102050658744, "grad_norm": 0.62890625, "learning_rate": 0.001962584063834763, "loss": 0.2424, "step": 9752 }, { "epoch": 0.01729464821596856, "grad_norm": 0.3515625, "learning_rate": 0.001962567078973259, "loss": 0.2768, "step": 9754 }, { "epoch": 0.017298194381278376, "grad_norm": 2.96875, "learning_rate": 0.0019625500903393883, "loss": 0.2033, "step": 9756 }, { "epoch": 0.01730174054658819, "grad_norm": 0.6953125, "learning_rate": 0.0019625330979332247, "loss": 0.2733, "step": 9758 }, { "epoch": 0.017305286711898005, "grad_norm": 0.64453125, "learning_rate": 0.001962516101754842, "loss": 0.346, "step": 9760 }, { "epoch": 0.01730883287720782, "grad_norm": 0.353515625, "learning_rate": 0.001962499101804315, "loss": 0.2294, "step": 9762 }, { "epoch": 0.017312379042517635, "grad_norm": 1.0859375, "learning_rate": 0.001962482098081718, "loss": 0.2946, "step": 9764 }, { "epoch": 0.01731592520782745, "grad_norm": 0.322265625, "learning_rate": 0.0019624650905871246, "loss": 0.2107, "step": 9766 }, { "epoch": 0.017319471373137267, "grad_norm": 1.1171875, "learning_rate": 0.001962448079320611, "loss": 0.2931, "step": 9768 }, { "epoch": 0.01732301753844708, "grad_norm": 0.435546875, "learning_rate": 0.0019624310642822494, "loss": 0.3014, "step": 9770 }, { "epoch": 0.017326563703756896, "grad_norm": 4.71875, "learning_rate": 0.001962414045472116, "loss": 0.3846, "step": 9772 }, { "epoch": 0.01733010986906671, "grad_norm": 0.9765625, "learning_rate": 0.001962397022890284, "loss": 0.2053, "step": 9774 }, { "epoch": 0.017333656034376525, "grad_norm": 0.478515625, "learning_rate": 0.0019623799965368294, "loss": 0.2185, "step": 9776 }, { "epoch": 0.017337202199686343, "grad_norm": 0.92578125, "learning_rate": 0.0019623629664118247, "loss": 0.3544, "step": 9778 }, { "epoch": 0.017340748364996158, "grad_norm": 0.42578125, "learning_rate": 0.0019623459325153456, "loss": 0.2414, "step": 9780 }, { "epoch": 0.017344294530305972, "grad_norm": 5.65625, "learning_rate": 0.001962328894847466, "loss": 0.3282, "step": 9782 }, { "epoch": 0.017347840695615787, "grad_norm": 0.93359375, "learning_rate": 0.0019623118534082607, "loss": 0.3129, "step": 9784 }, { "epoch": 0.0173513868609256, "grad_norm": 0.703125, "learning_rate": 0.0019622948081978045, "loss": 0.2256, "step": 9786 }, { "epoch": 0.017354933026235416, "grad_norm": 0.380859375, "learning_rate": 0.001962277759216172, "loss": 0.2163, "step": 9788 }, { "epoch": 0.017358479191545234, "grad_norm": 0.353515625, "learning_rate": 0.001962260706463437, "loss": 0.2617, "step": 9790 }, { "epoch": 0.01736202535685505, "grad_norm": 0.423828125, "learning_rate": 0.0019622436499396744, "loss": 0.2128, "step": 9792 }, { "epoch": 0.017365571522164863, "grad_norm": 0.3125, "learning_rate": 0.0019622265896449592, "loss": 0.2186, "step": 9794 }, { "epoch": 0.017369117687474678, "grad_norm": 0.8984375, "learning_rate": 0.001962209525579366, "loss": 0.28, "step": 9796 }, { "epoch": 0.017372663852784492, "grad_norm": 0.78125, "learning_rate": 0.0019621924577429687, "loss": 0.2019, "step": 9798 }, { "epoch": 0.017376210018094307, "grad_norm": 0.67578125, "learning_rate": 0.0019621753861358425, "loss": 0.2743, "step": 9800 }, { "epoch": 0.017379756183404125, "grad_norm": 0.61328125, "learning_rate": 0.0019621583107580624, "loss": 0.2726, "step": 9802 }, { "epoch": 0.01738330234871394, "grad_norm": 2.09375, "learning_rate": 0.001962141231609702, "loss": 0.3481, "step": 9804 }, { "epoch": 0.017386848514023754, "grad_norm": 0.2890625, "learning_rate": 0.001962124148690837, "loss": 0.1917, "step": 9806 }, { "epoch": 0.01739039467933357, "grad_norm": 0.63671875, "learning_rate": 0.001962107062001542, "loss": 0.1999, "step": 9808 }, { "epoch": 0.017393940844643383, "grad_norm": 0.51171875, "learning_rate": 0.0019620899715418905, "loss": 0.2412, "step": 9810 }, { "epoch": 0.0173974870099532, "grad_norm": 0.91796875, "learning_rate": 0.0019620728773119592, "loss": 0.2498, "step": 9812 }, { "epoch": 0.017401033175263016, "grad_norm": 0.41796875, "learning_rate": 0.0019620557793118215, "loss": 0.247, "step": 9814 }, { "epoch": 0.01740457934057283, "grad_norm": 0.48046875, "learning_rate": 0.001962038677541553, "loss": 0.2054, "step": 9816 }, { "epoch": 0.017408125505882645, "grad_norm": 0.93359375, "learning_rate": 0.0019620215720012276, "loss": 0.329, "step": 9818 }, { "epoch": 0.01741167167119246, "grad_norm": 0.55078125, "learning_rate": 0.0019620044626909206, "loss": 0.2947, "step": 9820 }, { "epoch": 0.017415217836502274, "grad_norm": 0.52734375, "learning_rate": 0.001961987349610707, "loss": 0.1959, "step": 9822 }, { "epoch": 0.017418764001812092, "grad_norm": 10.0, "learning_rate": 0.0019619702327606614, "loss": 0.335, "step": 9824 }, { "epoch": 0.017422310167121906, "grad_norm": 0.318359375, "learning_rate": 0.0019619531121408585, "loss": 0.2384, "step": 9826 }, { "epoch": 0.01742585633243172, "grad_norm": 0.84375, "learning_rate": 0.0019619359877513735, "loss": 0.3579, "step": 9828 }, { "epoch": 0.017429402497741536, "grad_norm": 0.314453125, "learning_rate": 0.001961918859592281, "loss": 0.2646, "step": 9830 }, { "epoch": 0.01743294866305135, "grad_norm": 0.388671875, "learning_rate": 0.001961901727663656, "loss": 0.216, "step": 9832 }, { "epoch": 0.017436494828361165, "grad_norm": 0.60546875, "learning_rate": 0.001961884591965574, "loss": 0.2314, "step": 9834 }, { "epoch": 0.017440040993670983, "grad_norm": 0.251953125, "learning_rate": 0.0019618674524981092, "loss": 0.253, "step": 9836 }, { "epoch": 0.017443587158980797, "grad_norm": 0.53125, "learning_rate": 0.0019618503092613364, "loss": 0.3197, "step": 9838 }, { "epoch": 0.017447133324290612, "grad_norm": 2.8125, "learning_rate": 0.001961833162255331, "loss": 0.3736, "step": 9840 }, { "epoch": 0.017450679489600426, "grad_norm": 0.4609375, "learning_rate": 0.0019618160114801687, "loss": 0.2202, "step": 9842 }, { "epoch": 0.01745422565491024, "grad_norm": 0.455078125, "learning_rate": 0.001961798856935923, "loss": 0.2819, "step": 9844 }, { "epoch": 0.01745777182022006, "grad_norm": 0.5625, "learning_rate": 0.0019617816986226702, "loss": 0.2464, "step": 9846 }, { "epoch": 0.017461317985529873, "grad_norm": 0.56640625, "learning_rate": 0.0019617645365404847, "loss": 0.2757, "step": 9848 }, { "epoch": 0.017464864150839688, "grad_norm": 0.83203125, "learning_rate": 0.0019617473706894416, "loss": 0.2038, "step": 9850 }, { "epoch": 0.017468410316149503, "grad_norm": 1.15625, "learning_rate": 0.0019617302010696163, "loss": 0.4924, "step": 9852 }, { "epoch": 0.017471956481459317, "grad_norm": 0.384765625, "learning_rate": 0.0019617130276810835, "loss": 0.242, "step": 9854 }, { "epoch": 0.01747550264676913, "grad_norm": 1.1953125, "learning_rate": 0.001961695850523918, "loss": 0.3336, "step": 9856 }, { "epoch": 0.01747904881207895, "grad_norm": 0.376953125, "learning_rate": 0.001961678669598196, "loss": 0.1924, "step": 9858 }, { "epoch": 0.017482594977388764, "grad_norm": 0.310546875, "learning_rate": 0.001961661484903992, "loss": 0.2198, "step": 9860 }, { "epoch": 0.01748614114269858, "grad_norm": 0.400390625, "learning_rate": 0.001961644296441381, "loss": 0.3027, "step": 9862 }, { "epoch": 0.017489687308008393, "grad_norm": 0.359375, "learning_rate": 0.0019616271042104385, "loss": 0.2166, "step": 9864 }, { "epoch": 0.017493233473318208, "grad_norm": 0.515625, "learning_rate": 0.0019616099082112393, "loss": 0.2122, "step": 9866 }, { "epoch": 0.017496779638628022, "grad_norm": 0.4375, "learning_rate": 0.0019615927084438585, "loss": 0.3945, "step": 9868 }, { "epoch": 0.01750032580393784, "grad_norm": 16.0, "learning_rate": 0.001961575504908372, "loss": 0.3347, "step": 9870 }, { "epoch": 0.017503871969247655, "grad_norm": 1.59375, "learning_rate": 0.0019615582976048552, "loss": 0.3852, "step": 9872 }, { "epoch": 0.01750741813455747, "grad_norm": 1.015625, "learning_rate": 0.0019615410865333823, "loss": 0.35, "step": 9874 }, { "epoch": 0.017510964299867284, "grad_norm": 0.416015625, "learning_rate": 0.0019615238716940296, "loss": 0.2599, "step": 9876 }, { "epoch": 0.0175145104651771, "grad_norm": 1.328125, "learning_rate": 0.0019615066530868715, "loss": 0.3203, "step": 9878 }, { "epoch": 0.017518056630486917, "grad_norm": 0.5703125, "learning_rate": 0.0019614894307119837, "loss": 0.3224, "step": 9880 }, { "epoch": 0.01752160279579673, "grad_norm": 0.83203125, "learning_rate": 0.0019614722045694414, "loss": 0.2866, "step": 9882 }, { "epoch": 0.017525148961106546, "grad_norm": 0.4609375, "learning_rate": 0.0019614549746593208, "loss": 0.2413, "step": 9884 }, { "epoch": 0.01752869512641636, "grad_norm": 0.46484375, "learning_rate": 0.001961437740981696, "loss": 0.2427, "step": 9886 }, { "epoch": 0.017532241291726175, "grad_norm": 0.40625, "learning_rate": 0.001961420503536643, "loss": 0.2393, "step": 9888 }, { "epoch": 0.01753578745703599, "grad_norm": 0.28125, "learning_rate": 0.001961403262324237, "loss": 0.221, "step": 9890 }, { "epoch": 0.017539333622345808, "grad_norm": 0.357421875, "learning_rate": 0.0019613860173445536, "loss": 0.2751, "step": 9892 }, { "epoch": 0.017542879787655622, "grad_norm": 0.302734375, "learning_rate": 0.001961368768597668, "loss": 0.2411, "step": 9894 }, { "epoch": 0.017546425952965437, "grad_norm": 1.9140625, "learning_rate": 0.0019613515160836563, "loss": 0.4987, "step": 9896 }, { "epoch": 0.01754997211827525, "grad_norm": 0.40625, "learning_rate": 0.0019613342598025925, "loss": 0.1701, "step": 9898 }, { "epoch": 0.017553518283585066, "grad_norm": 0.66796875, "learning_rate": 0.0019613169997545533, "loss": 0.2124, "step": 9900 }, { "epoch": 0.01755706444889488, "grad_norm": 0.9140625, "learning_rate": 0.001961299735939614, "loss": 0.2289, "step": 9902 }, { "epoch": 0.0175606106142047, "grad_norm": 0.439453125, "learning_rate": 0.00196128246835785, "loss": 0.2478, "step": 9904 }, { "epoch": 0.017564156779514513, "grad_norm": 0.48828125, "learning_rate": 0.0019612651970093366, "loss": 0.2447, "step": 9906 }, { "epoch": 0.017567702944824327, "grad_norm": 0.314453125, "learning_rate": 0.0019612479218941497, "loss": 0.2238, "step": 9908 }, { "epoch": 0.017571249110134142, "grad_norm": 0.4765625, "learning_rate": 0.0019612306430123647, "loss": 0.2221, "step": 9910 }, { "epoch": 0.017574795275443957, "grad_norm": 0.482421875, "learning_rate": 0.0019612133603640566, "loss": 0.3662, "step": 9912 }, { "epoch": 0.017578341440753775, "grad_norm": 0.37109375, "learning_rate": 0.001961196073949302, "loss": 0.2078, "step": 9914 }, { "epoch": 0.01758188760606359, "grad_norm": 1.4140625, "learning_rate": 0.001961178783768176, "loss": 0.3762, "step": 9916 }, { "epoch": 0.017585433771373404, "grad_norm": 0.625, "learning_rate": 0.001961161489820754, "loss": 0.3405, "step": 9918 }, { "epoch": 0.017588979936683218, "grad_norm": 0.455078125, "learning_rate": 0.001961144192107112, "loss": 0.3045, "step": 9920 }, { "epoch": 0.017592526101993033, "grad_norm": 1.5859375, "learning_rate": 0.001961126890627326, "loss": 0.2774, "step": 9922 }, { "epoch": 0.017596072267302847, "grad_norm": 0.37109375, "learning_rate": 0.0019611095853814702, "loss": 0.2138, "step": 9924 }, { "epoch": 0.017599618432612665, "grad_norm": 0.46484375, "learning_rate": 0.0019610922763696223, "loss": 0.2308, "step": 9926 }, { "epoch": 0.01760316459792248, "grad_norm": 0.87890625, "learning_rate": 0.001961074963591856, "loss": 0.2296, "step": 9928 }, { "epoch": 0.017606710763232294, "grad_norm": 1.671875, "learning_rate": 0.0019610576470482487, "loss": 0.4829, "step": 9930 }, { "epoch": 0.01761025692854211, "grad_norm": 0.5546875, "learning_rate": 0.001961040326738875, "loss": 0.3005, "step": 9932 }, { "epoch": 0.017613803093851924, "grad_norm": 0.2578125, "learning_rate": 0.001961023002663811, "loss": 0.1953, "step": 9934 }, { "epoch": 0.017617349259161738, "grad_norm": 0.95703125, "learning_rate": 0.0019610056748231327, "loss": 0.2426, "step": 9936 }, { "epoch": 0.017620895424471556, "grad_norm": 0.51171875, "learning_rate": 0.001960988343216916, "loss": 0.1865, "step": 9938 }, { "epoch": 0.01762444158978137, "grad_norm": 2.046875, "learning_rate": 0.0019609710078452363, "loss": 0.4257, "step": 9940 }, { "epoch": 0.017627987755091185, "grad_norm": 0.74609375, "learning_rate": 0.0019609536687081692, "loss": 0.223, "step": 9942 }, { "epoch": 0.017631533920401, "grad_norm": 0.20703125, "learning_rate": 0.001960936325805791, "loss": 0.1712, "step": 9944 }, { "epoch": 0.017635080085710814, "grad_norm": 0.56640625, "learning_rate": 0.0019609189791381775, "loss": 0.2821, "step": 9946 }, { "epoch": 0.017638626251020632, "grad_norm": 0.443359375, "learning_rate": 0.0019609016287054043, "loss": 0.2669, "step": 9948 }, { "epoch": 0.017642172416330447, "grad_norm": 2.484375, "learning_rate": 0.0019608842745075477, "loss": 0.2853, "step": 9950 }, { "epoch": 0.01764571858164026, "grad_norm": 1.7109375, "learning_rate": 0.0019608669165446834, "loss": 0.2976, "step": 9952 }, { "epoch": 0.017649264746950076, "grad_norm": 0.73046875, "learning_rate": 0.001960849554816887, "loss": 0.271, "step": 9954 }, { "epoch": 0.01765281091225989, "grad_norm": 0.455078125, "learning_rate": 0.0019608321893242343, "loss": 0.2153, "step": 9956 }, { "epoch": 0.017656357077569705, "grad_norm": 0.458984375, "learning_rate": 0.001960814820066802, "loss": 0.2775, "step": 9958 }, { "epoch": 0.017659903242879523, "grad_norm": 0.46484375, "learning_rate": 0.0019607974470446663, "loss": 0.2958, "step": 9960 }, { "epoch": 0.017663449408189338, "grad_norm": 0.267578125, "learning_rate": 0.001960780070257902, "loss": 0.2829, "step": 9962 }, { "epoch": 0.017666995573499152, "grad_norm": 0.41015625, "learning_rate": 0.0019607626897065857, "loss": 0.2188, "step": 9964 }, { "epoch": 0.017670541738808967, "grad_norm": 0.71484375, "learning_rate": 0.0019607453053907932, "loss": 0.3304, "step": 9966 }, { "epoch": 0.01767408790411878, "grad_norm": 0.337890625, "learning_rate": 0.001960727917310601, "loss": 0.2613, "step": 9968 }, { "epoch": 0.017677634069428596, "grad_norm": 0.28515625, "learning_rate": 0.0019607105254660848, "loss": 0.3164, "step": 9970 }, { "epoch": 0.017681180234738414, "grad_norm": 0.72265625, "learning_rate": 0.0019606931298573205, "loss": 0.2424, "step": 9972 }, { "epoch": 0.01768472640004823, "grad_norm": 0.5390625, "learning_rate": 0.0019606757304843846, "loss": 0.2003, "step": 9974 }, { "epoch": 0.017688272565358043, "grad_norm": 0.314453125, "learning_rate": 0.0019606583273473533, "loss": 0.1778, "step": 9976 }, { "epoch": 0.017691818730667858, "grad_norm": 0.458984375, "learning_rate": 0.001960640920446302, "loss": 0.2982, "step": 9978 }, { "epoch": 0.017695364895977672, "grad_norm": 0.36328125, "learning_rate": 0.0019606235097813073, "loss": 0.2776, "step": 9980 }, { "epoch": 0.01769891106128749, "grad_norm": 0.416015625, "learning_rate": 0.0019606060953524453, "loss": 0.208, "step": 9982 }, { "epoch": 0.017702457226597305, "grad_norm": 0.328125, "learning_rate": 0.0019605886771597923, "loss": 0.2513, "step": 9984 }, { "epoch": 0.01770600339190712, "grad_norm": 0.38671875, "learning_rate": 0.001960571255203424, "loss": 0.2291, "step": 9986 }, { "epoch": 0.017709549557216934, "grad_norm": 0.6171875, "learning_rate": 0.001960553829483417, "loss": 0.2553, "step": 9988 }, { "epoch": 0.01771309572252675, "grad_norm": 0.40234375, "learning_rate": 0.0019605363999998476, "loss": 0.1867, "step": 9990 }, { "epoch": 0.017716641887836563, "grad_norm": 0.244140625, "learning_rate": 0.001960518966752792, "loss": 0.2054, "step": 9992 }, { "epoch": 0.01772018805314638, "grad_norm": 0.46875, "learning_rate": 0.001960501529742326, "loss": 0.3144, "step": 9994 }, { "epoch": 0.017723734218456196, "grad_norm": 0.62109375, "learning_rate": 0.001960484088968526, "loss": 0.2266, "step": 9996 }, { "epoch": 0.01772728038376601, "grad_norm": 0.6953125, "learning_rate": 0.001960466644431469, "loss": 0.2959, "step": 9998 }, { "epoch": 0.017730826549075825, "grad_norm": 3.296875, "learning_rate": 0.00196044919613123, "loss": 0.3539, "step": 10000 }, { "epoch": 0.01773437271438564, "grad_norm": 0.3515625, "learning_rate": 0.001960431744067886, "loss": 0.2411, "step": 10002 }, { "epoch": 0.017737918879695454, "grad_norm": 0.64453125, "learning_rate": 0.0019604142882415137, "loss": 0.2444, "step": 10004 }, { "epoch": 0.017741465045005272, "grad_norm": 0.40625, "learning_rate": 0.001960396828652189, "loss": 0.2599, "step": 10006 }, { "epoch": 0.017745011210315086, "grad_norm": 0.5625, "learning_rate": 0.0019603793652999886, "loss": 0.2996, "step": 10008 }, { "epoch": 0.0177485573756249, "grad_norm": 0.458984375, "learning_rate": 0.0019603618981849885, "loss": 0.413, "step": 10010 }, { "epoch": 0.017752103540934715, "grad_norm": 1.1953125, "learning_rate": 0.0019603444273072652, "loss": 0.3284, "step": 10012 }, { "epoch": 0.01775564970624453, "grad_norm": 0.369140625, "learning_rate": 0.001960326952666895, "loss": 0.2157, "step": 10014 }, { "epoch": 0.017759195871554348, "grad_norm": 1.6171875, "learning_rate": 0.001960309474263955, "loss": 0.2594, "step": 10016 }, { "epoch": 0.017762742036864163, "grad_norm": 0.57421875, "learning_rate": 0.0019602919920985204, "loss": 0.2838, "step": 10018 }, { "epoch": 0.017766288202173977, "grad_norm": 0.7578125, "learning_rate": 0.0019602745061706684, "loss": 0.2199, "step": 10020 }, { "epoch": 0.01776983436748379, "grad_norm": 0.48828125, "learning_rate": 0.001960257016480476, "loss": 0.2057, "step": 10022 }, { "epoch": 0.017773380532793606, "grad_norm": 0.40625, "learning_rate": 0.0019602395230280184, "loss": 0.1974, "step": 10024 }, { "epoch": 0.01777692669810342, "grad_norm": 0.69921875, "learning_rate": 0.0019602220258133733, "loss": 0.2449, "step": 10026 }, { "epoch": 0.01778047286341324, "grad_norm": 0.28125, "learning_rate": 0.001960204524836616, "loss": 0.2083, "step": 10028 }, { "epoch": 0.017784019028723053, "grad_norm": 0.80859375, "learning_rate": 0.001960187020097825, "loss": 0.3281, "step": 10030 }, { "epoch": 0.017787565194032868, "grad_norm": 0.31640625, "learning_rate": 0.0019601695115970745, "loss": 0.2249, "step": 10032 }, { "epoch": 0.017791111359342682, "grad_norm": 0.43359375, "learning_rate": 0.0019601519993344427, "loss": 0.2312, "step": 10034 }, { "epoch": 0.017794657524652497, "grad_norm": 0.318359375, "learning_rate": 0.001960134483310006, "loss": 0.3007, "step": 10036 }, { "epoch": 0.01779820368996231, "grad_norm": 0.392578125, "learning_rate": 0.00196011696352384, "loss": 0.2588, "step": 10038 }, { "epoch": 0.01780174985527213, "grad_norm": 0.36328125, "learning_rate": 0.0019600994399760225, "loss": 0.2709, "step": 10040 }, { "epoch": 0.017805296020581944, "grad_norm": 0.474609375, "learning_rate": 0.0019600819126666296, "loss": 0.2179, "step": 10042 }, { "epoch": 0.01780884218589176, "grad_norm": 0.54296875, "learning_rate": 0.0019600643815957377, "loss": 0.1617, "step": 10044 }, { "epoch": 0.017812388351201573, "grad_norm": 0.341796875, "learning_rate": 0.0019600468467634237, "loss": 0.2976, "step": 10046 }, { "epoch": 0.017815934516511388, "grad_norm": 0.50390625, "learning_rate": 0.0019600293081697647, "loss": 0.2313, "step": 10048 }, { "epoch": 0.017819480681821206, "grad_norm": 0.27734375, "learning_rate": 0.001960011765814837, "loss": 0.2177, "step": 10050 }, { "epoch": 0.01782302684713102, "grad_norm": 0.345703125, "learning_rate": 0.0019599942196987176, "loss": 0.2227, "step": 10052 }, { "epoch": 0.017826573012440835, "grad_norm": 2.71875, "learning_rate": 0.001959976669821483, "loss": 0.4067, "step": 10054 }, { "epoch": 0.01783011917775065, "grad_norm": 1.4921875, "learning_rate": 0.0019599591161832096, "loss": 0.2393, "step": 10056 }, { "epoch": 0.017833665343060464, "grad_norm": 0.61328125, "learning_rate": 0.001959941558783975, "loss": 0.2976, "step": 10058 }, { "epoch": 0.01783721150837028, "grad_norm": 0.291015625, "learning_rate": 0.001959923997623855, "loss": 0.2025, "step": 10060 }, { "epoch": 0.017840757673680097, "grad_norm": 0.82421875, "learning_rate": 0.0019599064327029273, "loss": 0.2785, "step": 10062 }, { "epoch": 0.01784430383898991, "grad_norm": 0.8828125, "learning_rate": 0.0019598888640212685, "loss": 0.22, "step": 10064 }, { "epoch": 0.017847850004299726, "grad_norm": 0.29296875, "learning_rate": 0.001959871291578955, "loss": 0.2273, "step": 10066 }, { "epoch": 0.01785139616960954, "grad_norm": 0.31640625, "learning_rate": 0.001959853715376064, "loss": 0.1614, "step": 10068 }, { "epoch": 0.017854942334919355, "grad_norm": 0.49609375, "learning_rate": 0.001959836135412673, "loss": 0.2047, "step": 10070 }, { "epoch": 0.01785848850022917, "grad_norm": 1.2265625, "learning_rate": 0.001959818551688857, "loss": 0.3237, "step": 10072 }, { "epoch": 0.017862034665538987, "grad_norm": 0.375, "learning_rate": 0.0019598009642046946, "loss": 0.284, "step": 10074 }, { "epoch": 0.017865580830848802, "grad_norm": 0.69140625, "learning_rate": 0.001959783372960262, "loss": 0.2585, "step": 10076 }, { "epoch": 0.017869126996158616, "grad_norm": 0.64453125, "learning_rate": 0.001959765777955637, "loss": 0.2346, "step": 10078 }, { "epoch": 0.01787267316146843, "grad_norm": 0.8046875, "learning_rate": 0.0019597481791908955, "loss": 0.2129, "step": 10080 }, { "epoch": 0.017876219326778246, "grad_norm": 0.40234375, "learning_rate": 0.001959730576666115, "loss": 0.1958, "step": 10082 }, { "epoch": 0.017879765492088064, "grad_norm": 0.71484375, "learning_rate": 0.001959712970381372, "loss": 0.1828, "step": 10084 }, { "epoch": 0.017883311657397878, "grad_norm": 0.375, "learning_rate": 0.0019596953603367444, "loss": 0.2684, "step": 10086 }, { "epoch": 0.017886857822707693, "grad_norm": 1.4296875, "learning_rate": 0.001959677746532308, "loss": 0.2528, "step": 10088 }, { "epoch": 0.017890403988017507, "grad_norm": 0.41796875, "learning_rate": 0.0019596601289681406, "loss": 0.2253, "step": 10090 }, { "epoch": 0.017893950153327322, "grad_norm": 0.59765625, "learning_rate": 0.0019596425076443195, "loss": 0.2649, "step": 10092 }, { "epoch": 0.017897496318637136, "grad_norm": 0.37109375, "learning_rate": 0.0019596248825609216, "loss": 0.2368, "step": 10094 }, { "epoch": 0.017901042483946954, "grad_norm": 0.76953125, "learning_rate": 0.0019596072537180235, "loss": 0.22, "step": 10096 }, { "epoch": 0.01790458864925677, "grad_norm": 6.125, "learning_rate": 0.001959589621115702, "loss": 0.444, "step": 10098 }, { "epoch": 0.017908134814566583, "grad_norm": 1.984375, "learning_rate": 0.0019595719847540355, "loss": 0.3407, "step": 10100 }, { "epoch": 0.017911680979876398, "grad_norm": 0.62109375, "learning_rate": 0.0019595543446331008, "loss": 0.2183, "step": 10102 }, { "epoch": 0.017915227145186213, "grad_norm": 0.88671875, "learning_rate": 0.001959536700752974, "loss": 0.2608, "step": 10104 }, { "epoch": 0.017918773310496027, "grad_norm": 0.6875, "learning_rate": 0.0019595190531137333, "loss": 0.3895, "step": 10106 }, { "epoch": 0.017922319475805845, "grad_norm": 3.53125, "learning_rate": 0.001959501401715455, "loss": 0.3692, "step": 10108 }, { "epoch": 0.01792586564111566, "grad_norm": 0.6328125, "learning_rate": 0.0019594837465582173, "loss": 0.2468, "step": 10110 }, { "epoch": 0.017929411806425474, "grad_norm": 0.453125, "learning_rate": 0.001959466087642097, "loss": 0.2157, "step": 10112 }, { "epoch": 0.01793295797173529, "grad_norm": 1.3359375, "learning_rate": 0.001959448424967171, "loss": 0.2063, "step": 10114 }, { "epoch": 0.017936504137045103, "grad_norm": 0.3515625, "learning_rate": 0.001959430758533517, "loss": 0.2479, "step": 10116 }, { "epoch": 0.01794005030235492, "grad_norm": 0.83203125, "learning_rate": 0.001959413088341212, "loss": 0.2144, "step": 10118 }, { "epoch": 0.017943596467664736, "grad_norm": 0.55859375, "learning_rate": 0.0019593954143903333, "loss": 0.2429, "step": 10120 }, { "epoch": 0.01794714263297455, "grad_norm": 0.5078125, "learning_rate": 0.0019593777366809584, "loss": 0.2281, "step": 10122 }, { "epoch": 0.017950688798284365, "grad_norm": 3.78125, "learning_rate": 0.0019593600552131646, "loss": 0.1981, "step": 10124 }, { "epoch": 0.01795423496359418, "grad_norm": 1.890625, "learning_rate": 0.0019593423699870286, "loss": 0.3672, "step": 10126 }, { "epoch": 0.017957781128903994, "grad_norm": 0.3359375, "learning_rate": 0.0019593246810026286, "loss": 0.2159, "step": 10128 }, { "epoch": 0.017961327294213812, "grad_norm": 0.3046875, "learning_rate": 0.0019593069882600416, "loss": 0.2239, "step": 10130 }, { "epoch": 0.017964873459523627, "grad_norm": 0.3828125, "learning_rate": 0.001959289291759345, "loss": 0.2239, "step": 10132 }, { "epoch": 0.01796841962483344, "grad_norm": 2.21875, "learning_rate": 0.0019592715915006157, "loss": 0.2045, "step": 10134 }, { "epoch": 0.017971965790143256, "grad_norm": 0.375, "learning_rate": 0.0019592538874839325, "loss": 0.2109, "step": 10136 }, { "epoch": 0.01797551195545307, "grad_norm": 0.439453125, "learning_rate": 0.001959236179709371, "loss": 0.2654, "step": 10138 }, { "epoch": 0.017979058120762885, "grad_norm": 0.357421875, "learning_rate": 0.00195921846817701, "loss": 0.1866, "step": 10140 }, { "epoch": 0.017982604286072703, "grad_norm": 0.4765625, "learning_rate": 0.0019592007528869263, "loss": 0.2253, "step": 10142 }, { "epoch": 0.017986150451382518, "grad_norm": 3.5625, "learning_rate": 0.0019591830338391977, "loss": 0.3771, "step": 10144 }, { "epoch": 0.017989696616692332, "grad_norm": 0.431640625, "learning_rate": 0.001959165311033902, "loss": 0.2116, "step": 10146 }, { "epoch": 0.017993242782002147, "grad_norm": 0.341796875, "learning_rate": 0.0019591475844711157, "loss": 0.244, "step": 10148 }, { "epoch": 0.01799678894731196, "grad_norm": 0.208984375, "learning_rate": 0.001959129854150917, "loss": 0.2034, "step": 10150 }, { "epoch": 0.01800033511262178, "grad_norm": 0.447265625, "learning_rate": 0.001959112120073384, "loss": 0.2257, "step": 10152 }, { "epoch": 0.018003881277931594, "grad_norm": 0.482421875, "learning_rate": 0.0019590943822385925, "loss": 0.1996, "step": 10154 }, { "epoch": 0.01800742744324141, "grad_norm": 0.484375, "learning_rate": 0.001959076640646622, "loss": 0.2547, "step": 10156 }, { "epoch": 0.018010973608551223, "grad_norm": 0.34765625, "learning_rate": 0.001959058895297549, "loss": 0.1912, "step": 10158 }, { "epoch": 0.018014519773861037, "grad_norm": 0.431640625, "learning_rate": 0.0019590411461914516, "loss": 0.2222, "step": 10160 }, { "epoch": 0.018018065939170852, "grad_norm": 0.373046875, "learning_rate": 0.001959023393328407, "loss": 0.1483, "step": 10162 }, { "epoch": 0.01802161210448067, "grad_norm": 0.88671875, "learning_rate": 0.001959005636708493, "loss": 0.1871, "step": 10164 }, { "epoch": 0.018025158269790485, "grad_norm": 0.94140625, "learning_rate": 0.0019589878763317872, "loss": 0.3038, "step": 10166 }, { "epoch": 0.0180287044351003, "grad_norm": 0.64453125, "learning_rate": 0.001958970112198368, "loss": 0.2151, "step": 10168 }, { "epoch": 0.018032250600410114, "grad_norm": 0.75, "learning_rate": 0.0019589523443083122, "loss": 0.2968, "step": 10170 }, { "epoch": 0.018035796765719928, "grad_norm": 0.44140625, "learning_rate": 0.0019589345726616974, "loss": 0.2659, "step": 10172 }, { "epoch": 0.018039342931029743, "grad_norm": 1.6953125, "learning_rate": 0.0019589167972586022, "loss": 0.2288, "step": 10174 }, { "epoch": 0.01804288909633956, "grad_norm": 3.46875, "learning_rate": 0.001958899018099104, "loss": 0.2663, "step": 10176 }, { "epoch": 0.018046435261649375, "grad_norm": 0.8359375, "learning_rate": 0.0019588812351832795, "loss": 0.1911, "step": 10178 }, { "epoch": 0.01804998142695919, "grad_norm": 1.1015625, "learning_rate": 0.001958863448511208, "loss": 0.2843, "step": 10180 }, { "epoch": 0.018053527592269004, "grad_norm": 0.9140625, "learning_rate": 0.001958845658082967, "loss": 0.2965, "step": 10182 }, { "epoch": 0.01805707375757882, "grad_norm": 0.87109375, "learning_rate": 0.0019588278638986334, "loss": 0.1668, "step": 10184 }, { "epoch": 0.018060619922888637, "grad_norm": 1.03125, "learning_rate": 0.0019588100659582858, "loss": 0.4837, "step": 10186 }, { "epoch": 0.01806416608819845, "grad_norm": 0.451171875, "learning_rate": 0.0019587922642620016, "loss": 0.1943, "step": 10188 }, { "epoch": 0.018067712253508266, "grad_norm": 0.306640625, "learning_rate": 0.0019587744588098594, "loss": 0.1822, "step": 10190 }, { "epoch": 0.01807125841881808, "grad_norm": 0.3828125, "learning_rate": 0.001958756649601936, "loss": 0.169, "step": 10192 }, { "epoch": 0.018074804584127895, "grad_norm": 0.466796875, "learning_rate": 0.00195873883663831, "loss": 0.201, "step": 10194 }, { "epoch": 0.01807835074943771, "grad_norm": 1.3828125, "learning_rate": 0.0019587210199190595, "loss": 0.2239, "step": 10196 }, { "epoch": 0.018081896914747528, "grad_norm": 0.2275390625, "learning_rate": 0.0019587031994442615, "loss": 0.2852, "step": 10198 }, { "epoch": 0.018085443080057342, "grad_norm": 0.5625, "learning_rate": 0.0019586853752139952, "loss": 0.2208, "step": 10200 }, { "epoch": 0.018088989245367157, "grad_norm": 1.7265625, "learning_rate": 0.001958667547228337, "loss": 0.2622, "step": 10202 }, { "epoch": 0.01809253541067697, "grad_norm": 0.96875, "learning_rate": 0.001958649715487366, "loss": 0.176, "step": 10204 }, { "epoch": 0.018096081575986786, "grad_norm": 0.5859375, "learning_rate": 0.0019586318799911604, "loss": 0.2102, "step": 10206 }, { "epoch": 0.0180996277412966, "grad_norm": 0.80859375, "learning_rate": 0.0019586140407397974, "loss": 0.4113, "step": 10208 }, { "epoch": 0.01810317390660642, "grad_norm": 0.42578125, "learning_rate": 0.0019585961977333552, "loss": 0.245, "step": 10210 }, { "epoch": 0.018106720071916233, "grad_norm": 3.78125, "learning_rate": 0.001958578350971912, "loss": 0.3333, "step": 10212 }, { "epoch": 0.018110266237226048, "grad_norm": 0.6796875, "learning_rate": 0.001958560500455546, "loss": 0.2492, "step": 10214 }, { "epoch": 0.018113812402535862, "grad_norm": 0.453125, "learning_rate": 0.001958542646184335, "loss": 0.2149, "step": 10216 }, { "epoch": 0.018117358567845677, "grad_norm": 0.73828125, "learning_rate": 0.001958524788158357, "loss": 0.2814, "step": 10218 }, { "epoch": 0.018120904733155495, "grad_norm": 0.65234375, "learning_rate": 0.0019585069263776903, "loss": 0.277, "step": 10220 }, { "epoch": 0.01812445089846531, "grad_norm": 0.51171875, "learning_rate": 0.001958489060842413, "loss": 0.3089, "step": 10222 }, { "epoch": 0.018127997063775124, "grad_norm": 1.40625, "learning_rate": 0.0019584711915526035, "loss": 0.2348, "step": 10224 }, { "epoch": 0.01813154322908494, "grad_norm": 0.73828125, "learning_rate": 0.0019584533185083393, "loss": 0.2089, "step": 10226 }, { "epoch": 0.018135089394394753, "grad_norm": 0.63671875, "learning_rate": 0.0019584354417096993, "loss": 0.2093, "step": 10228 }, { "epoch": 0.018138635559704568, "grad_norm": 0.76953125, "learning_rate": 0.001958417561156761, "loss": 0.3352, "step": 10230 }, { "epoch": 0.018142181725014386, "grad_norm": 0.71484375, "learning_rate": 0.0019583996768496033, "loss": 0.2888, "step": 10232 }, { "epoch": 0.0181457278903242, "grad_norm": 1.0, "learning_rate": 0.0019583817887883037, "loss": 0.328, "step": 10234 }, { "epoch": 0.018149274055634015, "grad_norm": 0.255859375, "learning_rate": 0.0019583638969729407, "loss": 0.1834, "step": 10236 }, { "epoch": 0.01815282022094383, "grad_norm": 1.7265625, "learning_rate": 0.0019583460014035927, "loss": 0.2001, "step": 10238 }, { "epoch": 0.018156366386253644, "grad_norm": 0.326171875, "learning_rate": 0.0019583281020803377, "loss": 0.2567, "step": 10240 }, { "epoch": 0.01815991255156346, "grad_norm": 0.73828125, "learning_rate": 0.0019583101990032544, "loss": 0.2188, "step": 10242 }, { "epoch": 0.018163458716873276, "grad_norm": 1.8671875, "learning_rate": 0.0019582922921724207, "loss": 0.3756, "step": 10244 }, { "epoch": 0.01816700488218309, "grad_norm": 1.2265625, "learning_rate": 0.001958274381587915, "loss": 0.2334, "step": 10246 }, { "epoch": 0.018170551047492906, "grad_norm": 2.109375, "learning_rate": 0.0019582564672498163, "loss": 0.4077, "step": 10248 }, { "epoch": 0.01817409721280272, "grad_norm": 0.44140625, "learning_rate": 0.001958238549158202, "loss": 0.1877, "step": 10250 }, { "epoch": 0.018177643378112535, "grad_norm": 0.59375, "learning_rate": 0.0019582206273131507, "loss": 0.2476, "step": 10252 }, { "epoch": 0.018181189543422353, "grad_norm": 0.51171875, "learning_rate": 0.0019582027017147406, "loss": 0.176, "step": 10254 }, { "epoch": 0.018184735708732167, "grad_norm": 0.4375, "learning_rate": 0.001958184772363051, "loss": 0.36, "step": 10256 }, { "epoch": 0.018188281874041982, "grad_norm": 0.6171875, "learning_rate": 0.0019581668392581594, "loss": 0.2001, "step": 10258 }, { "epoch": 0.018191828039351796, "grad_norm": 0.353515625, "learning_rate": 0.0019581489024001445, "loss": 0.215, "step": 10260 }, { "epoch": 0.01819537420466161, "grad_norm": 1.6875, "learning_rate": 0.0019581309617890848, "loss": 0.2457, "step": 10262 }, { "epoch": 0.018198920369971425, "grad_norm": 0.3359375, "learning_rate": 0.001958113017425059, "loss": 0.2749, "step": 10264 }, { "epoch": 0.018202466535281243, "grad_norm": 1.0234375, "learning_rate": 0.001958095069308145, "loss": 0.235, "step": 10266 }, { "epoch": 0.018206012700591058, "grad_norm": 1.4921875, "learning_rate": 0.0019580771174384217, "loss": 0.2405, "step": 10268 }, { "epoch": 0.018209558865900873, "grad_norm": 0.216796875, "learning_rate": 0.001958059161815967, "loss": 0.2308, "step": 10270 }, { "epoch": 0.018213105031210687, "grad_norm": 0.474609375, "learning_rate": 0.0019580412024408607, "loss": 0.4021, "step": 10272 }, { "epoch": 0.0182166511965205, "grad_norm": 0.51171875, "learning_rate": 0.0019580232393131805, "loss": 0.1867, "step": 10274 }, { "epoch": 0.018220197361830316, "grad_norm": 0.28515625, "learning_rate": 0.001958005272433005, "loss": 0.2155, "step": 10276 }, { "epoch": 0.018223743527140134, "grad_norm": 0.21875, "learning_rate": 0.001957987301800413, "loss": 0.2168, "step": 10278 }, { "epoch": 0.01822728969244995, "grad_norm": 0.478515625, "learning_rate": 0.0019579693274154823, "loss": 0.1634, "step": 10280 }, { "epoch": 0.018230835857759763, "grad_norm": 0.54296875, "learning_rate": 0.0019579513492782924, "loss": 0.2428, "step": 10282 }, { "epoch": 0.018234382023069578, "grad_norm": 0.7578125, "learning_rate": 0.0019579333673889216, "loss": 0.2462, "step": 10284 }, { "epoch": 0.018237928188379392, "grad_norm": 0.51953125, "learning_rate": 0.0019579153817474485, "loss": 0.2854, "step": 10286 }, { "epoch": 0.01824147435368921, "grad_norm": 1.4609375, "learning_rate": 0.0019578973923539524, "loss": 0.368, "step": 10288 }, { "epoch": 0.018245020518999025, "grad_norm": 0.2734375, "learning_rate": 0.001957879399208511, "loss": 0.2404, "step": 10290 }, { "epoch": 0.01824856668430884, "grad_norm": 0.3046875, "learning_rate": 0.001957861402311204, "loss": 0.3172, "step": 10292 }, { "epoch": 0.018252112849618654, "grad_norm": 0.453125, "learning_rate": 0.0019578434016621084, "loss": 0.1834, "step": 10294 }, { "epoch": 0.01825565901492847, "grad_norm": 0.70703125, "learning_rate": 0.001957825397261305, "loss": 0.2276, "step": 10296 }, { "epoch": 0.018259205180238283, "grad_norm": 0.70703125, "learning_rate": 0.001957807389108871, "loss": 0.2495, "step": 10298 }, { "epoch": 0.0182627513455481, "grad_norm": 0.31640625, "learning_rate": 0.0019577893772048864, "loss": 0.244, "step": 10300 }, { "epoch": 0.018266297510857916, "grad_norm": 0.46484375, "learning_rate": 0.001957771361549429, "loss": 0.2325, "step": 10302 }, { "epoch": 0.01826984367616773, "grad_norm": 0.5625, "learning_rate": 0.0019577533421425777, "loss": 0.2792, "step": 10304 }, { "epoch": 0.018273389841477545, "grad_norm": 0.6328125, "learning_rate": 0.0019577353189844117, "loss": 0.2202, "step": 10306 }, { "epoch": 0.01827693600678736, "grad_norm": 0.546875, "learning_rate": 0.00195771729207501, "loss": 0.1957, "step": 10308 }, { "epoch": 0.018280482172097174, "grad_norm": 1.046875, "learning_rate": 0.0019576992614144507, "loss": 0.1921, "step": 10310 }, { "epoch": 0.018284028337406992, "grad_norm": 0.314453125, "learning_rate": 0.001957681227002813, "loss": 0.2769, "step": 10312 }, { "epoch": 0.018287574502716807, "grad_norm": 1.0234375, "learning_rate": 0.001957663188840176, "loss": 0.2295, "step": 10314 }, { "epoch": 0.01829112066802662, "grad_norm": 0.3984375, "learning_rate": 0.0019576451469266185, "loss": 0.2137, "step": 10316 }, { "epoch": 0.018294666833336436, "grad_norm": 0.55078125, "learning_rate": 0.001957627101262219, "loss": 0.4195, "step": 10318 }, { "epoch": 0.01829821299864625, "grad_norm": 0.57421875, "learning_rate": 0.001957609051847057, "loss": 0.3452, "step": 10320 }, { "epoch": 0.018301759163956068, "grad_norm": 0.3984375, "learning_rate": 0.001957590998681211, "loss": 0.3118, "step": 10322 }, { "epoch": 0.018305305329265883, "grad_norm": 0.9765625, "learning_rate": 0.0019575729417647602, "loss": 0.2397, "step": 10324 }, { "epoch": 0.018308851494575697, "grad_norm": 0.6015625, "learning_rate": 0.001957554881097783, "loss": 0.2115, "step": 10326 }, { "epoch": 0.018312397659885512, "grad_norm": 0.78125, "learning_rate": 0.0019575368166803594, "loss": 0.2911, "step": 10328 }, { "epoch": 0.018315943825195326, "grad_norm": 1.2734375, "learning_rate": 0.001957518748512568, "loss": 0.251, "step": 10330 }, { "epoch": 0.01831948999050514, "grad_norm": 0.5390625, "learning_rate": 0.0019575006765944875, "loss": 0.2515, "step": 10332 }, { "epoch": 0.01832303615581496, "grad_norm": 0.8828125, "learning_rate": 0.001957482600926197, "loss": 0.2356, "step": 10334 }, { "epoch": 0.018326582321124774, "grad_norm": 1.5859375, "learning_rate": 0.0019574645215077757, "loss": 0.2841, "step": 10336 }, { "epoch": 0.018330128486434588, "grad_norm": 0.373046875, "learning_rate": 0.001957446438339303, "loss": 0.2069, "step": 10338 }, { "epoch": 0.018333674651744403, "grad_norm": 0.349609375, "learning_rate": 0.0019574283514208575, "loss": 0.2113, "step": 10340 }, { "epoch": 0.018337220817054217, "grad_norm": 5.59375, "learning_rate": 0.001957410260752518, "loss": 0.2449, "step": 10342 }, { "epoch": 0.018340766982364032, "grad_norm": 0.515625, "learning_rate": 0.0019573921663343648, "loss": 0.1909, "step": 10344 }, { "epoch": 0.01834431314767385, "grad_norm": 0.63671875, "learning_rate": 0.0019573740681664755, "loss": 0.2399, "step": 10346 }, { "epoch": 0.018347859312983664, "grad_norm": 0.5703125, "learning_rate": 0.0019573559662489303, "loss": 0.2411, "step": 10348 }, { "epoch": 0.01835140547829348, "grad_norm": 2.9375, "learning_rate": 0.0019573378605818085, "loss": 0.4718, "step": 10350 }, { "epoch": 0.018354951643603293, "grad_norm": 0.26953125, "learning_rate": 0.001957319751165189, "loss": 0.2428, "step": 10352 }, { "epoch": 0.018358497808913108, "grad_norm": 2.59375, "learning_rate": 0.0019573016379991503, "loss": 0.215, "step": 10354 }, { "epoch": 0.018362043974222926, "grad_norm": 0.6875, "learning_rate": 0.0019572835210837727, "loss": 0.2647, "step": 10356 }, { "epoch": 0.01836559013953274, "grad_norm": 0.6015625, "learning_rate": 0.0019572654004191346, "loss": 0.2356, "step": 10358 }, { "epoch": 0.018369136304842555, "grad_norm": 0.5859375, "learning_rate": 0.001957247276005316, "loss": 0.1624, "step": 10360 }, { "epoch": 0.01837268247015237, "grad_norm": 0.72265625, "learning_rate": 0.0019572291478423954, "loss": 0.1786, "step": 10362 }, { "epoch": 0.018376228635462184, "grad_norm": 0.671875, "learning_rate": 0.001957211015930452, "loss": 0.2001, "step": 10364 }, { "epoch": 0.018379774800772, "grad_norm": 0.90625, "learning_rate": 0.001957192880269567, "loss": 0.3353, "step": 10366 }, { "epoch": 0.018383320966081817, "grad_norm": 0.5546875, "learning_rate": 0.001957174740859817, "loss": 0.1889, "step": 10368 }, { "epoch": 0.01838686713139163, "grad_norm": 1.1171875, "learning_rate": 0.001957156597701283, "loss": 0.3182, "step": 10370 }, { "epoch": 0.018390413296701446, "grad_norm": 0.53515625, "learning_rate": 0.0019571384507940438, "loss": 0.2011, "step": 10372 }, { "epoch": 0.01839395946201126, "grad_norm": 0.259765625, "learning_rate": 0.0019571203001381788, "loss": 0.215, "step": 10374 }, { "epoch": 0.018397505627321075, "grad_norm": 0.3984375, "learning_rate": 0.001957102145733768, "loss": 0.1834, "step": 10376 }, { "epoch": 0.01840105179263089, "grad_norm": 0.69140625, "learning_rate": 0.0019570839875808895, "loss": 0.5054, "step": 10378 }, { "epoch": 0.018404597957940708, "grad_norm": 0.32421875, "learning_rate": 0.001957065825679624, "loss": 0.1915, "step": 10380 }, { "epoch": 0.018408144123250522, "grad_norm": 0.82421875, "learning_rate": 0.0019570476600300505, "loss": 0.2373, "step": 10382 }, { "epoch": 0.018411690288560337, "grad_norm": 0.6171875, "learning_rate": 0.0019570294906322483, "loss": 0.2582, "step": 10384 }, { "epoch": 0.01841523645387015, "grad_norm": 0.388671875, "learning_rate": 0.0019570113174862966, "loss": 0.2506, "step": 10386 }, { "epoch": 0.018418782619179966, "grad_norm": 1.1640625, "learning_rate": 0.0019569931405922754, "loss": 0.2311, "step": 10388 }, { "epoch": 0.018422328784489784, "grad_norm": 0.7109375, "learning_rate": 0.0019569749599502645, "loss": 0.2086, "step": 10390 }, { "epoch": 0.0184258749497996, "grad_norm": 0.60546875, "learning_rate": 0.0019569567755603422, "loss": 0.2449, "step": 10392 }, { "epoch": 0.018429421115109413, "grad_norm": 0.3515625, "learning_rate": 0.001956938587422589, "loss": 0.1607, "step": 10394 }, { "epoch": 0.018432967280419228, "grad_norm": 0.65625, "learning_rate": 0.0019569203955370844, "loss": 0.2871, "step": 10396 }, { "epoch": 0.018436513445729042, "grad_norm": 1.3515625, "learning_rate": 0.001956902199903907, "loss": 0.2812, "step": 10398 }, { "epoch": 0.018440059611038857, "grad_norm": 0.484375, "learning_rate": 0.0019568840005231383, "loss": 0.223, "step": 10400 }, { "epoch": 0.018443605776348675, "grad_norm": 0.6796875, "learning_rate": 0.001956865797394856, "loss": 0.2565, "step": 10402 }, { "epoch": 0.01844715194165849, "grad_norm": 0.45703125, "learning_rate": 0.0019568475905191404, "loss": 0.2083, "step": 10404 }, { "epoch": 0.018450698106968304, "grad_norm": 0.59375, "learning_rate": 0.0019568293798960714, "loss": 0.2588, "step": 10406 }, { "epoch": 0.01845424427227812, "grad_norm": 0.75390625, "learning_rate": 0.0019568111655257282, "loss": 0.2578, "step": 10408 }, { "epoch": 0.018457790437587933, "grad_norm": 0.7890625, "learning_rate": 0.001956792947408191, "loss": 0.2195, "step": 10410 }, { "epoch": 0.018461336602897747, "grad_norm": 0.21484375, "learning_rate": 0.0019567747255435385, "loss": 0.2523, "step": 10412 }, { "epoch": 0.018464882768207565, "grad_norm": 0.546875, "learning_rate": 0.0019567564999318516, "loss": 0.2143, "step": 10414 }, { "epoch": 0.01846842893351738, "grad_norm": 0.353515625, "learning_rate": 0.001956738270573209, "loss": 0.248, "step": 10416 }, { "epoch": 0.018471975098827195, "grad_norm": 1.0703125, "learning_rate": 0.001956720037467691, "loss": 0.2287, "step": 10418 }, { "epoch": 0.01847552126413701, "grad_norm": 0.94921875, "learning_rate": 0.0019567018006153777, "loss": 0.2996, "step": 10420 }, { "epoch": 0.018479067429446824, "grad_norm": 0.8125, "learning_rate": 0.001956683560016348, "loss": 0.1991, "step": 10422 }, { "epoch": 0.01848261359475664, "grad_norm": 0.271484375, "learning_rate": 0.001956665315670682, "loss": 0.1997, "step": 10424 }, { "epoch": 0.018486159760066456, "grad_norm": 0.435546875, "learning_rate": 0.0019566470675784595, "loss": 0.2157, "step": 10426 }, { "epoch": 0.01848970592537627, "grad_norm": 0.765625, "learning_rate": 0.00195662881573976, "loss": 0.2223, "step": 10428 }, { "epoch": 0.018493252090686085, "grad_norm": 1.109375, "learning_rate": 0.001956610560154664, "loss": 0.1762, "step": 10430 }, { "epoch": 0.0184967982559959, "grad_norm": 0.6328125, "learning_rate": 0.001956592300823251, "loss": 0.2002, "step": 10432 }, { "epoch": 0.018500344421305714, "grad_norm": 0.62109375, "learning_rate": 0.001956574037745601, "loss": 0.2455, "step": 10434 }, { "epoch": 0.018503890586615532, "grad_norm": 0.494140625, "learning_rate": 0.0019565557709217934, "loss": 0.2304, "step": 10436 }, { "epoch": 0.018507436751925347, "grad_norm": 1.5625, "learning_rate": 0.0019565375003519087, "loss": 0.5566, "step": 10438 }, { "epoch": 0.01851098291723516, "grad_norm": 1.1640625, "learning_rate": 0.001956519226036026, "loss": 0.3678, "step": 10440 }, { "epoch": 0.018514529082544976, "grad_norm": 0.55859375, "learning_rate": 0.0019565009479742264, "loss": 0.2132, "step": 10442 }, { "epoch": 0.01851807524785479, "grad_norm": 0.546875, "learning_rate": 0.0019564826661665887, "loss": 0.2474, "step": 10444 }, { "epoch": 0.018521621413164605, "grad_norm": 0.435546875, "learning_rate": 0.0019564643806131935, "loss": 0.2052, "step": 10446 }, { "epoch": 0.018525167578474423, "grad_norm": 1.3046875, "learning_rate": 0.0019564460913141205, "loss": 0.2361, "step": 10448 }, { "epoch": 0.018528713743784238, "grad_norm": 0.3828125, "learning_rate": 0.0019564277982694494, "loss": 0.2359, "step": 10450 }, { "epoch": 0.018532259909094052, "grad_norm": 0.65625, "learning_rate": 0.0019564095014792614, "loss": 0.2016, "step": 10452 }, { "epoch": 0.018535806074403867, "grad_norm": 0.65234375, "learning_rate": 0.0019563912009436355, "loss": 0.3994, "step": 10454 }, { "epoch": 0.01853935223971368, "grad_norm": 0.6015625, "learning_rate": 0.0019563728966626517, "loss": 0.2731, "step": 10456 }, { "epoch": 0.0185428984050235, "grad_norm": 0.2490234375, "learning_rate": 0.00195635458863639, "loss": 0.2361, "step": 10458 }, { "epoch": 0.018546444570333314, "grad_norm": 0.52734375, "learning_rate": 0.0019563362768649315, "loss": 0.2998, "step": 10460 }, { "epoch": 0.01854999073564313, "grad_norm": 0.6015625, "learning_rate": 0.001956317961348355, "loss": 0.2047, "step": 10462 }, { "epoch": 0.018553536900952943, "grad_norm": 2.0625, "learning_rate": 0.0019562996420867413, "loss": 0.3433, "step": 10464 }, { "epoch": 0.018557083066262758, "grad_norm": 0.61328125, "learning_rate": 0.0019562813190801705, "loss": 0.1959, "step": 10466 }, { "epoch": 0.018560629231572572, "grad_norm": 0.478515625, "learning_rate": 0.001956262992328722, "loss": 0.2983, "step": 10468 }, { "epoch": 0.01856417539688239, "grad_norm": 0.2890625, "learning_rate": 0.001956244661832477, "loss": 0.1771, "step": 10470 }, { "epoch": 0.018567721562192205, "grad_norm": 0.365234375, "learning_rate": 0.0019562263275915157, "loss": 0.2537, "step": 10472 }, { "epoch": 0.01857126772750202, "grad_norm": 0.53515625, "learning_rate": 0.001956207989605917, "loss": 0.1784, "step": 10474 }, { "epoch": 0.018574813892811834, "grad_norm": 0.5859375, "learning_rate": 0.0019561896478757623, "loss": 0.1929, "step": 10476 }, { "epoch": 0.01857836005812165, "grad_norm": 1.8515625, "learning_rate": 0.0019561713024011315, "loss": 0.3238, "step": 10478 }, { "epoch": 0.018581906223431463, "grad_norm": 0.3203125, "learning_rate": 0.0019561529531821045, "loss": 0.2495, "step": 10480 }, { "epoch": 0.01858545238874128, "grad_norm": 0.41015625, "learning_rate": 0.001956134600218762, "loss": 0.202, "step": 10482 }, { "epoch": 0.018588998554051096, "grad_norm": 0.7578125, "learning_rate": 0.001956116243511184, "loss": 0.208, "step": 10484 }, { "epoch": 0.01859254471936091, "grad_norm": 0.80078125, "learning_rate": 0.0019560978830594506, "loss": 0.2025, "step": 10486 }, { "epoch": 0.018596090884670725, "grad_norm": 1.53125, "learning_rate": 0.0019560795188636425, "loss": 0.2007, "step": 10488 }, { "epoch": 0.01859963704998054, "grad_norm": 0.44921875, "learning_rate": 0.0019560611509238397, "loss": 0.2096, "step": 10490 }, { "epoch": 0.018603183215290357, "grad_norm": 1.5546875, "learning_rate": 0.001956042779240123, "loss": 0.2777, "step": 10492 }, { "epoch": 0.018606729380600172, "grad_norm": 0.58984375, "learning_rate": 0.001956024403812572, "loss": 0.3095, "step": 10494 }, { "epoch": 0.018610275545909986, "grad_norm": 1.296875, "learning_rate": 0.001956006024641268, "loss": 0.41, "step": 10496 }, { "epoch": 0.0186138217112198, "grad_norm": 0.5859375, "learning_rate": 0.0019559876417262908, "loss": 0.1939, "step": 10498 }, { "epoch": 0.018617367876529616, "grad_norm": 2.125, "learning_rate": 0.0019559692550677205, "loss": 0.2805, "step": 10500 }, { "epoch": 0.01862091404183943, "grad_norm": 0.5859375, "learning_rate": 0.0019559508646656383, "loss": 0.2347, "step": 10502 }, { "epoch": 0.018624460207149248, "grad_norm": 0.83203125, "learning_rate": 0.0019559324705201242, "loss": 0.2676, "step": 10504 }, { "epoch": 0.018628006372459063, "grad_norm": 0.69140625, "learning_rate": 0.001955914072631258, "loss": 0.2571, "step": 10506 }, { "epoch": 0.018631552537768877, "grad_norm": 0.248046875, "learning_rate": 0.0019558956709991217, "loss": 0.1935, "step": 10508 }, { "epoch": 0.01863509870307869, "grad_norm": 0.478515625, "learning_rate": 0.0019558772656237946, "loss": 0.2045, "step": 10510 }, { "epoch": 0.018638644868388506, "grad_norm": 0.3203125, "learning_rate": 0.001955858856505357, "loss": 0.1858, "step": 10512 }, { "epoch": 0.01864219103369832, "grad_norm": 0.44140625, "learning_rate": 0.0019558404436438906, "loss": 0.1759, "step": 10514 }, { "epoch": 0.01864573719900814, "grad_norm": 0.322265625, "learning_rate": 0.0019558220270394747, "loss": 0.2053, "step": 10516 }, { "epoch": 0.018649283364317953, "grad_norm": 0.3125, "learning_rate": 0.0019558036066921907, "loss": 0.2209, "step": 10518 }, { "epoch": 0.018652829529627768, "grad_norm": 0.90234375, "learning_rate": 0.0019557851826021186, "loss": 0.2606, "step": 10520 }, { "epoch": 0.018656375694937583, "grad_norm": 0.40625, "learning_rate": 0.0019557667547693393, "loss": 0.2002, "step": 10522 }, { "epoch": 0.018659921860247397, "grad_norm": 0.3359375, "learning_rate": 0.0019557483231939336, "loss": 0.1602, "step": 10524 }, { "epoch": 0.018663468025557215, "grad_norm": 0.375, "learning_rate": 0.0019557298878759813, "loss": 0.2976, "step": 10526 }, { "epoch": 0.01866701419086703, "grad_norm": 0.68359375, "learning_rate": 0.001955711448815564, "loss": 0.2863, "step": 10528 }, { "epoch": 0.018670560356176844, "grad_norm": 8.375, "learning_rate": 0.001955693006012762, "loss": 0.3923, "step": 10530 }, { "epoch": 0.01867410652148666, "grad_norm": 0.515625, "learning_rate": 0.001955674559467655, "loss": 0.1933, "step": 10532 }, { "epoch": 0.018677652686796473, "grad_norm": 0.77734375, "learning_rate": 0.0019556561091803254, "loss": 0.1943, "step": 10534 }, { "epoch": 0.018681198852106288, "grad_norm": 0.392578125, "learning_rate": 0.0019556376551508525, "loss": 0.1805, "step": 10536 }, { "epoch": 0.018684745017416106, "grad_norm": 0.83984375, "learning_rate": 0.0019556191973793178, "loss": 0.2526, "step": 10538 }, { "epoch": 0.01868829118272592, "grad_norm": 1.328125, "learning_rate": 0.0019556007358658015, "loss": 0.2645, "step": 10540 }, { "epoch": 0.018691837348035735, "grad_norm": 0.3828125, "learning_rate": 0.0019555822706103843, "loss": 0.2595, "step": 10542 }, { "epoch": 0.01869538351334555, "grad_norm": 0.4375, "learning_rate": 0.001955563801613148, "loss": 0.2784, "step": 10544 }, { "epoch": 0.018698929678655364, "grad_norm": 4.9375, "learning_rate": 0.001955545328874172, "loss": 0.388, "step": 10546 }, { "epoch": 0.01870247584396518, "grad_norm": 0.55859375, "learning_rate": 0.001955526852393538, "loss": 0.2246, "step": 10548 }, { "epoch": 0.018706022009274997, "grad_norm": 0.439453125, "learning_rate": 0.001955508372171326, "loss": 0.258, "step": 10550 }, { "epoch": 0.01870956817458481, "grad_norm": 1.265625, "learning_rate": 0.001955489888207618, "loss": 0.4772, "step": 10552 }, { "epoch": 0.018713114339894626, "grad_norm": 1.03125, "learning_rate": 0.001955471400502494, "loss": 0.207, "step": 10554 }, { "epoch": 0.01871666050520444, "grad_norm": 0.84765625, "learning_rate": 0.0019554529090560348, "loss": 0.2586, "step": 10556 }, { "epoch": 0.018720206670514255, "grad_norm": 0.56640625, "learning_rate": 0.0019554344138683214, "loss": 0.2212, "step": 10558 }, { "epoch": 0.018723752835824073, "grad_norm": 1.875, "learning_rate": 0.001955415914939435, "loss": 0.3548, "step": 10560 }, { "epoch": 0.018727299001133887, "grad_norm": 0.435546875, "learning_rate": 0.001955397412269456, "loss": 0.1997, "step": 10562 }, { "epoch": 0.018730845166443702, "grad_norm": 1.125, "learning_rate": 0.0019553789058584657, "loss": 0.2845, "step": 10564 }, { "epoch": 0.018734391331753517, "grad_norm": 0.9453125, "learning_rate": 0.0019553603957065454, "loss": 0.1796, "step": 10566 }, { "epoch": 0.01873793749706333, "grad_norm": 0.2373046875, "learning_rate": 0.001955341881813775, "loss": 0.247, "step": 10568 }, { "epoch": 0.018741483662373146, "grad_norm": 0.349609375, "learning_rate": 0.0019553233641802364, "loss": 0.2784, "step": 10570 }, { "epoch": 0.018745029827682964, "grad_norm": 0.490234375, "learning_rate": 0.00195530484280601, "loss": 0.1938, "step": 10572 }, { "epoch": 0.018748575992992778, "grad_norm": 0.62109375, "learning_rate": 0.001955286317691177, "loss": 0.2325, "step": 10574 }, { "epoch": 0.018752122158302593, "grad_norm": 0.486328125, "learning_rate": 0.0019552677888358184, "loss": 0.277, "step": 10576 }, { "epoch": 0.018755668323612407, "grad_norm": 0.439453125, "learning_rate": 0.001955249256240016, "loss": 0.1885, "step": 10578 }, { "epoch": 0.018759214488922222, "grad_norm": 0.85546875, "learning_rate": 0.0019552307199038493, "loss": 0.2588, "step": 10580 }, { "epoch": 0.018762760654232036, "grad_norm": 0.458984375, "learning_rate": 0.0019552121798274004, "loss": 0.1862, "step": 10582 }, { "epoch": 0.018766306819541854, "grad_norm": 1.671875, "learning_rate": 0.00195519363601075, "loss": 0.2548, "step": 10584 }, { "epoch": 0.01876985298485167, "grad_norm": 0.345703125, "learning_rate": 0.00195517508845398, "loss": 0.2568, "step": 10586 }, { "epoch": 0.018773399150161484, "grad_norm": 0.236328125, "learning_rate": 0.0019551565371571707, "loss": 0.2379, "step": 10588 }, { "epoch": 0.018776945315471298, "grad_norm": 0.455078125, "learning_rate": 0.0019551379821204033, "loss": 0.2563, "step": 10590 }, { "epoch": 0.018780491480781113, "grad_norm": 0.8828125, "learning_rate": 0.001955119423343759, "loss": 0.2551, "step": 10592 }, { "epoch": 0.01878403764609093, "grad_norm": 0.359375, "learning_rate": 0.001955100860827319, "loss": 0.22, "step": 10594 }, { "epoch": 0.018787583811400745, "grad_norm": 0.76953125, "learning_rate": 0.001955082294571165, "loss": 0.2162, "step": 10596 }, { "epoch": 0.01879112997671056, "grad_norm": 0.515625, "learning_rate": 0.0019550637245753775, "loss": 0.3297, "step": 10598 }, { "epoch": 0.018794676142020374, "grad_norm": 1.0390625, "learning_rate": 0.001955045150840038, "loss": 0.2616, "step": 10600 }, { "epoch": 0.01879822230733019, "grad_norm": 0.6796875, "learning_rate": 0.0019550265733652276, "loss": 0.2574, "step": 10602 }, { "epoch": 0.018801768472640003, "grad_norm": 0.83203125, "learning_rate": 0.0019550079921510275, "loss": 0.2556, "step": 10604 }, { "epoch": 0.01880531463794982, "grad_norm": 0.306640625, "learning_rate": 0.001954989407197519, "loss": 0.2032, "step": 10606 }, { "epoch": 0.018808860803259636, "grad_norm": 1.4375, "learning_rate": 0.001954970818504784, "loss": 0.2785, "step": 10608 }, { "epoch": 0.01881240696856945, "grad_norm": 0.92578125, "learning_rate": 0.001954952226072903, "loss": 0.2119, "step": 10610 }, { "epoch": 0.018815953133879265, "grad_norm": 0.376953125, "learning_rate": 0.0019549336299019573, "loss": 0.2636, "step": 10612 }, { "epoch": 0.01881949929918908, "grad_norm": 0.310546875, "learning_rate": 0.0019549150299920286, "loss": 0.3351, "step": 10614 }, { "epoch": 0.018823045464498894, "grad_norm": 0.57421875, "learning_rate": 0.0019548964263431984, "loss": 0.1953, "step": 10616 }, { "epoch": 0.018826591629808712, "grad_norm": 0.41796875, "learning_rate": 0.0019548778189555477, "loss": 0.2252, "step": 10618 }, { "epoch": 0.018830137795118527, "grad_norm": 1.4453125, "learning_rate": 0.001954859207829158, "loss": 0.2535, "step": 10620 }, { "epoch": 0.01883368396042834, "grad_norm": 0.46484375, "learning_rate": 0.0019548405929641108, "loss": 0.2568, "step": 10622 }, { "epoch": 0.018837230125738156, "grad_norm": 0.76171875, "learning_rate": 0.001954821974360487, "loss": 0.2194, "step": 10624 }, { "epoch": 0.01884077629104797, "grad_norm": 3.609375, "learning_rate": 0.0019548033520183686, "loss": 0.3385, "step": 10626 }, { "epoch": 0.01884432245635779, "grad_norm": 0.64453125, "learning_rate": 0.001954784725937837, "loss": 0.1935, "step": 10628 }, { "epoch": 0.018847868621667603, "grad_norm": 0.625, "learning_rate": 0.0019547660961189736, "loss": 0.344, "step": 10630 }, { "epoch": 0.018851414786977418, "grad_norm": 0.31640625, "learning_rate": 0.0019547474625618596, "loss": 0.1781, "step": 10632 }, { "epoch": 0.018854960952287232, "grad_norm": 0.7578125, "learning_rate": 0.0019547288252665766, "loss": 0.2042, "step": 10634 }, { "epoch": 0.018858507117597047, "grad_norm": 0.515625, "learning_rate": 0.0019547101842332065, "loss": 0.2229, "step": 10636 }, { "epoch": 0.01886205328290686, "grad_norm": 0.59375, "learning_rate": 0.0019546915394618304, "loss": 0.2343, "step": 10638 }, { "epoch": 0.01886559944821668, "grad_norm": 1.8046875, "learning_rate": 0.0019546728909525302, "loss": 0.4387, "step": 10640 }, { "epoch": 0.018869145613526494, "grad_norm": 0.365234375, "learning_rate": 0.0019546542387053867, "loss": 0.1795, "step": 10642 }, { "epoch": 0.01887269177883631, "grad_norm": 0.60546875, "learning_rate": 0.0019546355827204827, "loss": 0.2605, "step": 10644 }, { "epoch": 0.018876237944146123, "grad_norm": 1.03125, "learning_rate": 0.0019546169229978983, "loss": 0.3554, "step": 10646 }, { "epoch": 0.018879784109455938, "grad_norm": 0.68359375, "learning_rate": 0.0019545982595377165, "loss": 0.2676, "step": 10648 }, { "epoch": 0.018883330274765752, "grad_norm": 0.72265625, "learning_rate": 0.001954579592340018, "loss": 0.2549, "step": 10650 }, { "epoch": 0.01888687644007557, "grad_norm": 1.9453125, "learning_rate": 0.001954560921404885, "loss": 0.2426, "step": 10652 }, { "epoch": 0.018890422605385385, "grad_norm": 0.96484375, "learning_rate": 0.001954542246732399, "loss": 0.303, "step": 10654 }, { "epoch": 0.0188939687706952, "grad_norm": 0.5078125, "learning_rate": 0.0019545235683226412, "loss": 0.2656, "step": 10656 }, { "epoch": 0.018897514936005014, "grad_norm": 0.3046875, "learning_rate": 0.0019545048861756937, "loss": 0.2267, "step": 10658 }, { "epoch": 0.01890106110131483, "grad_norm": 0.7421875, "learning_rate": 0.0019544862002916384, "loss": 0.3153, "step": 10660 }, { "epoch": 0.018904607266624646, "grad_norm": 0.31640625, "learning_rate": 0.001954467510670557, "loss": 0.1953, "step": 10662 }, { "epoch": 0.01890815343193446, "grad_norm": 0.953125, "learning_rate": 0.0019544488173125307, "loss": 0.291, "step": 10664 }, { "epoch": 0.018911699597244275, "grad_norm": 0.244140625, "learning_rate": 0.001954430120217642, "loss": 0.283, "step": 10666 }, { "epoch": 0.01891524576255409, "grad_norm": 0.65234375, "learning_rate": 0.0019544114193859713, "loss": 0.2134, "step": 10668 }, { "epoch": 0.018918791927863905, "grad_norm": 0.51953125, "learning_rate": 0.0019543927148176024, "loss": 0.2538, "step": 10670 }, { "epoch": 0.01892233809317372, "grad_norm": 0.4609375, "learning_rate": 0.0019543740065126156, "loss": 0.2825, "step": 10672 }, { "epoch": 0.018925884258483537, "grad_norm": 0.51953125, "learning_rate": 0.001954355294471093, "loss": 0.2298, "step": 10674 }, { "epoch": 0.01892943042379335, "grad_norm": 0.8984375, "learning_rate": 0.0019543365786931174, "loss": 0.2322, "step": 10676 }, { "epoch": 0.018932976589103166, "grad_norm": 0.546875, "learning_rate": 0.001954317859178769, "loss": 0.2352, "step": 10678 }, { "epoch": 0.01893652275441298, "grad_norm": 0.8828125, "learning_rate": 0.0019542991359281312, "loss": 0.2686, "step": 10680 }, { "epoch": 0.018940068919722795, "grad_norm": 0.36328125, "learning_rate": 0.0019542804089412846, "loss": 0.2246, "step": 10682 }, { "epoch": 0.01894361508503261, "grad_norm": 0.4609375, "learning_rate": 0.001954261678218312, "loss": 0.2252, "step": 10684 }, { "epoch": 0.018947161250342428, "grad_norm": 0.353515625, "learning_rate": 0.001954242943759295, "loss": 0.2093, "step": 10686 }, { "epoch": 0.018950707415652242, "grad_norm": 0.29296875, "learning_rate": 0.0019542242055643156, "loss": 0.2244, "step": 10688 }, { "epoch": 0.018954253580962057, "grad_norm": 0.75, "learning_rate": 0.001954205463633456, "loss": 0.2106, "step": 10690 }, { "epoch": 0.01895779974627187, "grad_norm": 0.5234375, "learning_rate": 0.0019541867179667972, "loss": 0.24, "step": 10692 }, { "epoch": 0.018961345911581686, "grad_norm": 1.046875, "learning_rate": 0.0019541679685644224, "loss": 0.2106, "step": 10694 }, { "epoch": 0.018964892076891504, "grad_norm": 1.3515625, "learning_rate": 0.001954149215426413, "loss": 0.2169, "step": 10696 }, { "epoch": 0.01896843824220132, "grad_norm": 0.60546875, "learning_rate": 0.0019541304585528507, "loss": 0.2906, "step": 10698 }, { "epoch": 0.018971984407511133, "grad_norm": 1.6953125, "learning_rate": 0.0019541116979438183, "loss": 0.2548, "step": 10700 }, { "epoch": 0.018975530572820948, "grad_norm": 0.48046875, "learning_rate": 0.001954092933599397, "loss": 0.1975, "step": 10702 }, { "epoch": 0.018979076738130762, "grad_norm": 1.265625, "learning_rate": 0.00195407416551967, "loss": 0.2005, "step": 10704 }, { "epoch": 0.018982622903440577, "grad_norm": 0.6015625, "learning_rate": 0.0019540553937047187, "loss": 0.3157, "step": 10706 }, { "epoch": 0.018986169068750395, "grad_norm": 0.482421875, "learning_rate": 0.0019540366181546244, "loss": 0.2572, "step": 10708 }, { "epoch": 0.01898971523406021, "grad_norm": 1.0, "learning_rate": 0.001954017838869471, "loss": 0.2349, "step": 10710 }, { "epoch": 0.018993261399370024, "grad_norm": 1.578125, "learning_rate": 0.001953999055849339, "loss": 0.2347, "step": 10712 }, { "epoch": 0.01899680756467984, "grad_norm": 0.5390625, "learning_rate": 0.001953980269094311, "loss": 0.2762, "step": 10714 }, { "epoch": 0.019000353729989653, "grad_norm": 1.7890625, "learning_rate": 0.00195396147860447, "loss": 0.2047, "step": 10716 }, { "epoch": 0.019003899895299468, "grad_norm": 0.390625, "learning_rate": 0.001953942684379897, "loss": 0.2115, "step": 10718 }, { "epoch": 0.019007446060609286, "grad_norm": 2.1875, "learning_rate": 0.0019539238864206753, "loss": 0.2352, "step": 10720 }, { "epoch": 0.0190109922259191, "grad_norm": 0.45703125, "learning_rate": 0.0019539050847268862, "loss": 0.1856, "step": 10722 }, { "epoch": 0.019014538391228915, "grad_norm": 1.6796875, "learning_rate": 0.001953886279298612, "loss": 0.2969, "step": 10724 }, { "epoch": 0.01901808455653873, "grad_norm": 0.828125, "learning_rate": 0.001953867470135936, "loss": 0.2008, "step": 10726 }, { "epoch": 0.019021630721848544, "grad_norm": 1.5703125, "learning_rate": 0.001953848657238939, "loss": 0.2502, "step": 10728 }, { "epoch": 0.019025176887158362, "grad_norm": 0.47265625, "learning_rate": 0.001953829840607704, "loss": 0.3565, "step": 10730 }, { "epoch": 0.019028723052468176, "grad_norm": 0.52734375, "learning_rate": 0.001953811020242313, "loss": 0.2566, "step": 10732 }, { "epoch": 0.01903226921777799, "grad_norm": 3.265625, "learning_rate": 0.001953792196142849, "loss": 0.3169, "step": 10734 }, { "epoch": 0.019035815383087806, "grad_norm": 0.95703125, "learning_rate": 0.001953773368309394, "loss": 0.2331, "step": 10736 }, { "epoch": 0.01903936154839762, "grad_norm": 0.61328125, "learning_rate": 0.00195375453674203, "loss": 0.1891, "step": 10738 }, { "epoch": 0.019042907713707435, "grad_norm": 3.265625, "learning_rate": 0.0019537357014408393, "loss": 0.2587, "step": 10740 }, { "epoch": 0.019046453879017253, "grad_norm": 0.486328125, "learning_rate": 0.001953716862405905, "loss": 0.2695, "step": 10742 }, { "epoch": 0.019050000044327067, "grad_norm": 0.302734375, "learning_rate": 0.001953698019637309, "loss": 0.3266, "step": 10744 }, { "epoch": 0.019053546209636882, "grad_norm": 1.140625, "learning_rate": 0.0019536791731351337, "loss": 0.2231, "step": 10746 }, { "epoch": 0.019057092374946696, "grad_norm": 0.796875, "learning_rate": 0.0019536603228994614, "loss": 0.296, "step": 10748 }, { "epoch": 0.01906063854025651, "grad_norm": 0.640625, "learning_rate": 0.0019536414689303745, "loss": 0.2146, "step": 10750 }, { "epoch": 0.019064184705566325, "grad_norm": 1.0625, "learning_rate": 0.001953622611227956, "loss": 0.3058, "step": 10752 }, { "epoch": 0.019067730870876144, "grad_norm": 0.30859375, "learning_rate": 0.001953603749792288, "loss": 0.3183, "step": 10754 }, { "epoch": 0.019071277036185958, "grad_norm": 0.310546875, "learning_rate": 0.001953584884623453, "loss": 0.199, "step": 10756 }, { "epoch": 0.019074823201495773, "grad_norm": 0.6640625, "learning_rate": 0.0019535660157215338, "loss": 0.2279, "step": 10758 }, { "epoch": 0.019078369366805587, "grad_norm": 0.359375, "learning_rate": 0.0019535471430866124, "loss": 0.2577, "step": 10760 }, { "epoch": 0.0190819155321154, "grad_norm": 0.3828125, "learning_rate": 0.0019535282667187716, "loss": 0.2329, "step": 10762 }, { "epoch": 0.01908546169742522, "grad_norm": 0.9375, "learning_rate": 0.001953509386618094, "loss": 0.2759, "step": 10764 }, { "epoch": 0.019089007862735034, "grad_norm": 1.2421875, "learning_rate": 0.001953490502784662, "loss": 0.2235, "step": 10766 }, { "epoch": 0.01909255402804485, "grad_norm": 0.490234375, "learning_rate": 0.0019534716152185584, "loss": 0.3044, "step": 10768 }, { "epoch": 0.019096100193354663, "grad_norm": 0.86328125, "learning_rate": 0.0019534527239198655, "loss": 0.3898, "step": 10770 }, { "epoch": 0.019099646358664478, "grad_norm": 0.59375, "learning_rate": 0.0019534338288886666, "loss": 0.2063, "step": 10772 }, { "epoch": 0.019103192523974293, "grad_norm": 1.5859375, "learning_rate": 0.0019534149301250435, "loss": 0.2357, "step": 10774 }, { "epoch": 0.01910673868928411, "grad_norm": 0.45703125, "learning_rate": 0.0019533960276290796, "loss": 0.2266, "step": 10776 }, { "epoch": 0.019110284854593925, "grad_norm": 0.40234375, "learning_rate": 0.0019533771214008564, "loss": 0.1959, "step": 10778 }, { "epoch": 0.01911383101990374, "grad_norm": 0.376953125, "learning_rate": 0.001953358211440458, "loss": 0.2363, "step": 10780 }, { "epoch": 0.019117377185213554, "grad_norm": 0.3203125, "learning_rate": 0.0019533392977479666, "loss": 0.191, "step": 10782 }, { "epoch": 0.01912092335052337, "grad_norm": 0.5546875, "learning_rate": 0.0019533203803234643, "loss": 0.2211, "step": 10784 }, { "epoch": 0.019124469515833183, "grad_norm": 0.6640625, "learning_rate": 0.0019533014591670344, "loss": 0.2336, "step": 10786 }, { "epoch": 0.019128015681143, "grad_norm": 1.125, "learning_rate": 0.0019532825342787603, "loss": 0.2508, "step": 10788 }, { "epoch": 0.019131561846452816, "grad_norm": 0.65625, "learning_rate": 0.001953263605658723, "loss": 0.2136, "step": 10790 }, { "epoch": 0.01913510801176263, "grad_norm": 4.5, "learning_rate": 0.001953244673307007, "loss": 0.301, "step": 10792 }, { "epoch": 0.019138654177072445, "grad_norm": 0.39453125, "learning_rate": 0.0019532257372236943, "loss": 0.2218, "step": 10794 }, { "epoch": 0.01914220034238226, "grad_norm": 0.462890625, "learning_rate": 0.001953206797408868, "loss": 0.2472, "step": 10796 }, { "epoch": 0.019145746507692078, "grad_norm": 0.66796875, "learning_rate": 0.0019531878538626103, "loss": 0.2439, "step": 10798 }, { "epoch": 0.019149292673001892, "grad_norm": 1.828125, "learning_rate": 0.001953168906585005, "loss": 0.2473, "step": 10800 }, { "epoch": 0.019152838838311707, "grad_norm": 0.357421875, "learning_rate": 0.001953149955576134, "loss": 0.1757, "step": 10802 }, { "epoch": 0.01915638500362152, "grad_norm": 0.6015625, "learning_rate": 0.0019531310008360806, "loss": 0.2036, "step": 10804 }, { "epoch": 0.019159931168931336, "grad_norm": 2.375, "learning_rate": 0.0019531120423649283, "loss": 0.2246, "step": 10806 }, { "epoch": 0.01916347733424115, "grad_norm": 0.71484375, "learning_rate": 0.0019530930801627594, "loss": 0.1578, "step": 10808 }, { "epoch": 0.01916702349955097, "grad_norm": 0.44921875, "learning_rate": 0.0019530741142296565, "loss": 0.2797, "step": 10810 }, { "epoch": 0.019170569664860783, "grad_norm": 0.55078125, "learning_rate": 0.0019530551445657031, "loss": 0.2009, "step": 10812 }, { "epoch": 0.019174115830170597, "grad_norm": 0.6640625, "learning_rate": 0.0019530361711709823, "loss": 0.3717, "step": 10814 }, { "epoch": 0.019177661995480412, "grad_norm": 1.90625, "learning_rate": 0.0019530171940455763, "loss": 0.3369, "step": 10816 }, { "epoch": 0.019181208160790227, "grad_norm": 0.671875, "learning_rate": 0.0019529982131895691, "loss": 0.2041, "step": 10818 }, { "epoch": 0.01918475432610004, "grad_norm": 0.318359375, "learning_rate": 0.001952979228603043, "loss": 0.2526, "step": 10820 }, { "epoch": 0.01918830049140986, "grad_norm": 0.453125, "learning_rate": 0.001952960240286081, "loss": 0.2245, "step": 10822 }, { "epoch": 0.019191846656719674, "grad_norm": 1.3359375, "learning_rate": 0.0019529412482387667, "loss": 0.2152, "step": 10824 }, { "epoch": 0.019195392822029488, "grad_norm": 0.64453125, "learning_rate": 0.0019529222524611825, "loss": 0.2138, "step": 10826 }, { "epoch": 0.019198938987339303, "grad_norm": 0.279296875, "learning_rate": 0.0019529032529534123, "loss": 0.2304, "step": 10828 }, { "epoch": 0.019202485152649117, "grad_norm": 0.98828125, "learning_rate": 0.0019528842497155382, "loss": 0.2946, "step": 10830 }, { "epoch": 0.019206031317958935, "grad_norm": 0.4375, "learning_rate": 0.0019528652427476438, "loss": 0.3555, "step": 10832 }, { "epoch": 0.01920957748326875, "grad_norm": 0.314453125, "learning_rate": 0.0019528462320498126, "loss": 0.4899, "step": 10834 }, { "epoch": 0.019213123648578564, "grad_norm": 1.8671875, "learning_rate": 0.0019528272176221272, "loss": 0.3046, "step": 10836 }, { "epoch": 0.01921666981388838, "grad_norm": 2.109375, "learning_rate": 0.001952808199464671, "loss": 0.2421, "step": 10838 }, { "epoch": 0.019220215979198194, "grad_norm": 0.49609375, "learning_rate": 0.0019527891775775268, "loss": 0.4896, "step": 10840 }, { "epoch": 0.019223762144508008, "grad_norm": 0.953125, "learning_rate": 0.0019527701519607783, "loss": 0.3056, "step": 10842 }, { "epoch": 0.019227308309817826, "grad_norm": 0.98046875, "learning_rate": 0.0019527511226145088, "loss": 0.2108, "step": 10844 }, { "epoch": 0.01923085447512764, "grad_norm": 0.578125, "learning_rate": 0.001952732089538801, "loss": 0.3534, "step": 10846 }, { "epoch": 0.019234400640437455, "grad_norm": 0.8046875, "learning_rate": 0.0019527130527337382, "loss": 0.2177, "step": 10848 }, { "epoch": 0.01923794680574727, "grad_norm": 0.51171875, "learning_rate": 0.0019526940121994036, "loss": 0.3468, "step": 10850 }, { "epoch": 0.019241492971057084, "grad_norm": 0.376953125, "learning_rate": 0.0019526749679358813, "loss": 0.3032, "step": 10852 }, { "epoch": 0.0192450391363669, "grad_norm": 0.255859375, "learning_rate": 0.0019526559199432537, "loss": 0.212, "step": 10854 }, { "epoch": 0.019248585301676717, "grad_norm": 0.78125, "learning_rate": 0.0019526368682216044, "loss": 0.2494, "step": 10856 }, { "epoch": 0.01925213146698653, "grad_norm": 0.34375, "learning_rate": 0.0019526178127710165, "loss": 0.1968, "step": 10858 }, { "epoch": 0.019255677632296346, "grad_norm": 0.443359375, "learning_rate": 0.001952598753591574, "loss": 0.2657, "step": 10860 }, { "epoch": 0.01925922379760616, "grad_norm": 0.59375, "learning_rate": 0.0019525796906833598, "loss": 0.2805, "step": 10862 }, { "epoch": 0.019262769962915975, "grad_norm": 0.85546875, "learning_rate": 0.0019525606240464565, "loss": 0.2177, "step": 10864 }, { "epoch": 0.01926631612822579, "grad_norm": 0.26953125, "learning_rate": 0.0019525415536809493, "loss": 0.2162, "step": 10866 }, { "epoch": 0.019269862293535608, "grad_norm": 0.25, "learning_rate": 0.0019525224795869196, "loss": 0.3353, "step": 10868 }, { "epoch": 0.019273408458845422, "grad_norm": 0.400390625, "learning_rate": 0.0019525034017644522, "loss": 0.2781, "step": 10870 }, { "epoch": 0.019276954624155237, "grad_norm": 0.87890625, "learning_rate": 0.0019524843202136303, "loss": 0.2958, "step": 10872 }, { "epoch": 0.01928050078946505, "grad_norm": 0.31640625, "learning_rate": 0.0019524652349345371, "loss": 0.2147, "step": 10874 }, { "epoch": 0.019284046954774866, "grad_norm": 0.412109375, "learning_rate": 0.0019524461459272562, "loss": 0.2359, "step": 10876 }, { "epoch": 0.019287593120084684, "grad_norm": 0.81640625, "learning_rate": 0.0019524270531918707, "loss": 0.2246, "step": 10878 }, { "epoch": 0.0192911392853945, "grad_norm": 0.37109375, "learning_rate": 0.0019524079567284644, "loss": 0.2572, "step": 10880 }, { "epoch": 0.019294685450704313, "grad_norm": 0.462890625, "learning_rate": 0.001952388856537121, "loss": 0.211, "step": 10882 }, { "epoch": 0.019298231616014128, "grad_norm": 0.671875, "learning_rate": 0.001952369752617924, "loss": 0.2447, "step": 10884 }, { "epoch": 0.019301777781323942, "grad_norm": 0.54296875, "learning_rate": 0.0019523506449709567, "loss": 0.2415, "step": 10886 }, { "epoch": 0.019305323946633757, "grad_norm": 0.349609375, "learning_rate": 0.0019523315335963026, "loss": 0.2132, "step": 10888 }, { "epoch": 0.019308870111943575, "grad_norm": 0.7890625, "learning_rate": 0.0019523124184940457, "loss": 0.2173, "step": 10890 }, { "epoch": 0.01931241627725339, "grad_norm": 0.58203125, "learning_rate": 0.0019522932996642694, "loss": 0.2229, "step": 10892 }, { "epoch": 0.019315962442563204, "grad_norm": 0.83203125, "learning_rate": 0.0019522741771070573, "loss": 0.2663, "step": 10894 }, { "epoch": 0.01931950860787302, "grad_norm": 0.375, "learning_rate": 0.0019522550508224929, "loss": 0.2241, "step": 10896 }, { "epoch": 0.019323054773182833, "grad_norm": 0.298828125, "learning_rate": 0.0019522359208106598, "loss": 0.2131, "step": 10898 }, { "epoch": 0.019326600938492648, "grad_norm": 0.46484375, "learning_rate": 0.0019522167870716421, "loss": 0.2677, "step": 10900 }, { "epoch": 0.019330147103802466, "grad_norm": 2.15625, "learning_rate": 0.001952197649605523, "loss": 0.3025, "step": 10902 }, { "epoch": 0.01933369326911228, "grad_norm": 0.515625, "learning_rate": 0.0019521785084123864, "loss": 0.2222, "step": 10904 }, { "epoch": 0.019337239434422095, "grad_norm": 0.453125, "learning_rate": 0.0019521593634923163, "loss": 0.2296, "step": 10906 }, { "epoch": 0.01934078559973191, "grad_norm": 0.5390625, "learning_rate": 0.0019521402148453958, "loss": 0.2072, "step": 10908 }, { "epoch": 0.019344331765041724, "grad_norm": 0.41796875, "learning_rate": 0.0019521210624717093, "loss": 0.2531, "step": 10910 }, { "epoch": 0.019347877930351542, "grad_norm": 0.353515625, "learning_rate": 0.0019521019063713397, "loss": 0.2032, "step": 10912 }, { "epoch": 0.019351424095661356, "grad_norm": 0.314453125, "learning_rate": 0.0019520827465443718, "loss": 0.2349, "step": 10914 }, { "epoch": 0.01935497026097117, "grad_norm": 1.375, "learning_rate": 0.0019520635829908886, "loss": 0.4852, "step": 10916 }, { "epoch": 0.019358516426280985, "grad_norm": 0.302734375, "learning_rate": 0.0019520444157109745, "loss": 0.1609, "step": 10918 }, { "epoch": 0.0193620625915908, "grad_norm": 0.99609375, "learning_rate": 0.001952025244704713, "loss": 0.2318, "step": 10920 }, { "epoch": 0.019365608756900615, "grad_norm": 1.7421875, "learning_rate": 0.0019520060699721878, "loss": 0.3149, "step": 10922 }, { "epoch": 0.019369154922210433, "grad_norm": 0.625, "learning_rate": 0.001951986891513483, "loss": 0.222, "step": 10924 }, { "epoch": 0.019372701087520247, "grad_norm": 0.53515625, "learning_rate": 0.0019519677093286826, "loss": 0.2297, "step": 10926 }, { "epoch": 0.01937624725283006, "grad_norm": 0.84375, "learning_rate": 0.0019519485234178705, "loss": 0.2908, "step": 10928 }, { "epoch": 0.019379793418139876, "grad_norm": 0.9453125, "learning_rate": 0.00195192933378113, "loss": 0.2186, "step": 10930 }, { "epoch": 0.01938333958344969, "grad_norm": 0.53125, "learning_rate": 0.0019519101404185456, "loss": 0.3206, "step": 10932 }, { "epoch": 0.019386885748759505, "grad_norm": 0.8671875, "learning_rate": 0.0019518909433302012, "loss": 0.2265, "step": 10934 }, { "epoch": 0.019390431914069323, "grad_norm": 0.78125, "learning_rate": 0.0019518717425161807, "loss": 0.1825, "step": 10936 }, { "epoch": 0.019393978079379138, "grad_norm": 0.40625, "learning_rate": 0.0019518525379765676, "loss": 0.193, "step": 10938 }, { "epoch": 0.019397524244688952, "grad_norm": 0.408203125, "learning_rate": 0.0019518333297114468, "loss": 0.2487, "step": 10940 }, { "epoch": 0.019401070409998767, "grad_norm": 0.3984375, "learning_rate": 0.0019518141177209015, "loss": 0.3763, "step": 10942 }, { "epoch": 0.01940461657530858, "grad_norm": 2.046875, "learning_rate": 0.0019517949020050162, "loss": 0.2061, "step": 10944 }, { "epoch": 0.0194081627406184, "grad_norm": 0.51171875, "learning_rate": 0.0019517756825638748, "loss": 0.2514, "step": 10946 }, { "epoch": 0.019411708905928214, "grad_norm": 0.96484375, "learning_rate": 0.0019517564593975615, "loss": 0.177, "step": 10948 }, { "epoch": 0.01941525507123803, "grad_norm": 0.7109375, "learning_rate": 0.0019517372325061598, "loss": 0.2142, "step": 10950 }, { "epoch": 0.019418801236547843, "grad_norm": 0.29296875, "learning_rate": 0.0019517180018897543, "loss": 0.1947, "step": 10952 }, { "epoch": 0.019422347401857658, "grad_norm": 0.3125, "learning_rate": 0.001951698767548429, "loss": 0.2146, "step": 10954 }, { "epoch": 0.019425893567167472, "grad_norm": 0.7890625, "learning_rate": 0.0019516795294822681, "loss": 0.2465, "step": 10956 }, { "epoch": 0.01942943973247729, "grad_norm": 0.58984375, "learning_rate": 0.0019516602876913558, "loss": 0.286, "step": 10958 }, { "epoch": 0.019432985897787105, "grad_norm": 0.5546875, "learning_rate": 0.0019516410421757757, "loss": 0.2414, "step": 10960 }, { "epoch": 0.01943653206309692, "grad_norm": 0.7890625, "learning_rate": 0.0019516217929356127, "loss": 0.2892, "step": 10962 }, { "epoch": 0.019440078228406734, "grad_norm": 1.5625, "learning_rate": 0.0019516025399709507, "loss": 0.3304, "step": 10964 }, { "epoch": 0.01944362439371655, "grad_norm": 0.7578125, "learning_rate": 0.0019515832832818739, "loss": 0.2137, "step": 10966 }, { "epoch": 0.019447170559026363, "grad_norm": 0.76171875, "learning_rate": 0.001951564022868466, "loss": 0.1856, "step": 10968 }, { "epoch": 0.01945071672433618, "grad_norm": 0.349609375, "learning_rate": 0.0019515447587308123, "loss": 0.2202, "step": 10970 }, { "epoch": 0.019454262889645996, "grad_norm": 0.671875, "learning_rate": 0.001951525490868996, "loss": 0.225, "step": 10972 }, { "epoch": 0.01945780905495581, "grad_norm": 0.43359375, "learning_rate": 0.001951506219283102, "loss": 0.263, "step": 10974 }, { "epoch": 0.019461355220265625, "grad_norm": 1.2109375, "learning_rate": 0.0019514869439732146, "loss": 0.2425, "step": 10976 }, { "epoch": 0.01946490138557544, "grad_norm": 1.5625, "learning_rate": 0.001951467664939418, "loss": 0.2397, "step": 10978 }, { "epoch": 0.019468447550885257, "grad_norm": 1.71875, "learning_rate": 0.001951448382181796, "loss": 0.527, "step": 10980 }, { "epoch": 0.019471993716195072, "grad_norm": 0.357421875, "learning_rate": 0.0019514290957004334, "loss": 0.24, "step": 10982 }, { "epoch": 0.019475539881504886, "grad_norm": 0.69140625, "learning_rate": 0.0019514098054954146, "loss": 0.2465, "step": 10984 }, { "epoch": 0.0194790860468147, "grad_norm": 0.53515625, "learning_rate": 0.0019513905115668237, "loss": 0.2122, "step": 10986 }, { "epoch": 0.019482632212124516, "grad_norm": 1.4296875, "learning_rate": 0.0019513712139147456, "loss": 0.4023, "step": 10988 }, { "epoch": 0.01948617837743433, "grad_norm": 0.28125, "learning_rate": 0.0019513519125392645, "loss": 0.2558, "step": 10990 }, { "epoch": 0.019489724542744148, "grad_norm": 1.78125, "learning_rate": 0.001951332607440464, "loss": 0.3743, "step": 10992 }, { "epoch": 0.019493270708053963, "grad_norm": 0.330078125, "learning_rate": 0.0019513132986184296, "loss": 0.26, "step": 10994 }, { "epoch": 0.019496816873363777, "grad_norm": 0.88671875, "learning_rate": 0.0019512939860732452, "loss": 0.2315, "step": 10996 }, { "epoch": 0.019500363038673592, "grad_norm": 0.21484375, "learning_rate": 0.0019512746698049958, "loss": 0.2222, "step": 10998 }, { "epoch": 0.019503909203983406, "grad_norm": 0.625, "learning_rate": 0.001951255349813765, "loss": 0.2595, "step": 11000 }, { "epoch": 0.01950745536929322, "grad_norm": 0.349609375, "learning_rate": 0.001951236026099638, "loss": 0.3253, "step": 11002 }, { "epoch": 0.01951100153460304, "grad_norm": 0.39453125, "learning_rate": 0.0019512166986626989, "loss": 0.2113, "step": 11004 }, { "epoch": 0.019514547699912854, "grad_norm": 0.32421875, "learning_rate": 0.0019511973675030326, "loss": 0.221, "step": 11006 }, { "epoch": 0.019518093865222668, "grad_norm": 0.86328125, "learning_rate": 0.0019511780326207234, "loss": 0.191, "step": 11008 }, { "epoch": 0.019521640030532483, "grad_norm": 0.36328125, "learning_rate": 0.0019511586940158556, "loss": 0.2271, "step": 11010 }, { "epoch": 0.019525186195842297, "grad_norm": 0.9140625, "learning_rate": 0.0019511393516885142, "loss": 0.2208, "step": 11012 }, { "epoch": 0.019528732361152115, "grad_norm": 0.98046875, "learning_rate": 0.001951120005638784, "loss": 0.2405, "step": 11014 }, { "epoch": 0.01953227852646193, "grad_norm": 0.58203125, "learning_rate": 0.0019511006558667492, "loss": 0.2341, "step": 11016 }, { "epoch": 0.019535824691771744, "grad_norm": 1.6796875, "learning_rate": 0.0019510813023724944, "loss": 0.5106, "step": 11018 }, { "epoch": 0.01953937085708156, "grad_norm": 0.890625, "learning_rate": 0.0019510619451561042, "loss": 0.2823, "step": 11020 }, { "epoch": 0.019542917022391373, "grad_norm": 0.54296875, "learning_rate": 0.0019510425842176639, "loss": 0.2425, "step": 11022 }, { "epoch": 0.019546463187701188, "grad_norm": 0.36328125, "learning_rate": 0.001951023219557257, "loss": 0.2201, "step": 11024 }, { "epoch": 0.019550009353011006, "grad_norm": 0.734375, "learning_rate": 0.0019510038511749692, "loss": 0.281, "step": 11026 }, { "epoch": 0.01955355551832082, "grad_norm": 0.484375, "learning_rate": 0.0019509844790708848, "loss": 0.2588, "step": 11028 }, { "epoch": 0.019557101683630635, "grad_norm": 0.43359375, "learning_rate": 0.0019509651032450887, "loss": 0.2392, "step": 11030 }, { "epoch": 0.01956064784894045, "grad_norm": 0.73828125, "learning_rate": 0.0019509457236976657, "loss": 0.3737, "step": 11032 }, { "epoch": 0.019564194014250264, "grad_norm": 0.375, "learning_rate": 0.0019509263404287004, "loss": 0.3482, "step": 11034 }, { "epoch": 0.01956774017956008, "grad_norm": 0.82421875, "learning_rate": 0.0019509069534382772, "loss": 0.2544, "step": 11036 }, { "epoch": 0.019571286344869897, "grad_norm": 0.40234375, "learning_rate": 0.0019508875627264814, "loss": 0.2289, "step": 11038 }, { "epoch": 0.01957483251017971, "grad_norm": 0.302734375, "learning_rate": 0.0019508681682933978, "loss": 0.2791, "step": 11040 }, { "epoch": 0.019578378675489526, "grad_norm": 0.423828125, "learning_rate": 0.001950848770139111, "loss": 0.2202, "step": 11042 }, { "epoch": 0.01958192484079934, "grad_norm": 0.41015625, "learning_rate": 0.0019508293682637056, "loss": 0.3792, "step": 11044 }, { "epoch": 0.019585471006109155, "grad_norm": 0.3984375, "learning_rate": 0.0019508099626672673, "loss": 0.1706, "step": 11046 }, { "epoch": 0.019589017171418973, "grad_norm": 0.953125, "learning_rate": 0.0019507905533498802, "loss": 0.229, "step": 11048 }, { "epoch": 0.019592563336728788, "grad_norm": 0.3359375, "learning_rate": 0.0019507711403116293, "loss": 0.2952, "step": 11050 }, { "epoch": 0.019596109502038602, "grad_norm": 0.259765625, "learning_rate": 0.0019507517235525997, "loss": 0.4037, "step": 11052 }, { "epoch": 0.019599655667348417, "grad_norm": 0.34765625, "learning_rate": 0.0019507323030728762, "loss": 0.2403, "step": 11054 }, { "epoch": 0.01960320183265823, "grad_norm": 0.310546875, "learning_rate": 0.0019507128788725438, "loss": 0.3066, "step": 11056 }, { "epoch": 0.019606747997968046, "grad_norm": 0.4921875, "learning_rate": 0.0019506934509516875, "loss": 0.2749, "step": 11058 }, { "epoch": 0.019610294163277864, "grad_norm": 12.125, "learning_rate": 0.001950674019310392, "loss": 0.2871, "step": 11060 }, { "epoch": 0.01961384032858768, "grad_norm": 10.8125, "learning_rate": 0.0019506545839487427, "loss": 0.3001, "step": 11062 }, { "epoch": 0.019617386493897493, "grad_norm": 0.95703125, "learning_rate": 0.001950635144866824, "loss": 0.2572, "step": 11064 }, { "epoch": 0.019620932659207307, "grad_norm": 0.47265625, "learning_rate": 0.0019506157020647216, "loss": 0.2778, "step": 11066 }, { "epoch": 0.019624478824517122, "grad_norm": 0.373046875, "learning_rate": 0.0019505962555425205, "loss": 0.2602, "step": 11068 }, { "epoch": 0.019628024989826937, "grad_norm": 0.66015625, "learning_rate": 0.0019505768053003048, "loss": 0.4013, "step": 11070 }, { "epoch": 0.019631571155136755, "grad_norm": 0.328125, "learning_rate": 0.001950557351338161, "loss": 0.2023, "step": 11072 }, { "epoch": 0.01963511732044657, "grad_norm": 1.3671875, "learning_rate": 0.001950537893656173, "loss": 0.2618, "step": 11074 }, { "epoch": 0.019638663485756384, "grad_norm": 0.3046875, "learning_rate": 0.0019505184322544259, "loss": 0.2211, "step": 11076 }, { "epoch": 0.019642209651066198, "grad_norm": 0.94921875, "learning_rate": 0.0019504989671330056, "loss": 0.2719, "step": 11078 }, { "epoch": 0.019645755816376013, "grad_norm": 0.6171875, "learning_rate": 0.001950479498291997, "loss": 0.3977, "step": 11080 }, { "epoch": 0.01964930198168583, "grad_norm": 0.64453125, "learning_rate": 0.0019504600257314849, "loss": 0.2069, "step": 11082 }, { "epoch": 0.019652848146995645, "grad_norm": 0.396484375, "learning_rate": 0.0019504405494515545, "loss": 0.2656, "step": 11084 }, { "epoch": 0.01965639431230546, "grad_norm": 1.6875, "learning_rate": 0.0019504210694522913, "loss": 0.3066, "step": 11086 }, { "epoch": 0.019659940477615274, "grad_norm": 7.4375, "learning_rate": 0.0019504015857337803, "loss": 0.2919, "step": 11088 }, { "epoch": 0.01966348664292509, "grad_norm": 0.287109375, "learning_rate": 0.0019503820982961068, "loss": 0.2172, "step": 11090 }, { "epoch": 0.019667032808234904, "grad_norm": 2.203125, "learning_rate": 0.001950362607139356, "loss": 0.6718, "step": 11092 }, { "epoch": 0.01967057897354472, "grad_norm": 0.9765625, "learning_rate": 0.0019503431122636131, "loss": 0.2758, "step": 11094 }, { "epoch": 0.019674125138854536, "grad_norm": 0.671875, "learning_rate": 0.0019503236136689632, "loss": 0.2416, "step": 11096 }, { "epoch": 0.01967767130416435, "grad_norm": 0.56640625, "learning_rate": 0.0019503041113554918, "loss": 0.2775, "step": 11098 }, { "epoch": 0.019681217469474165, "grad_norm": 0.39453125, "learning_rate": 0.0019502846053232844, "loss": 0.2401, "step": 11100 }, { "epoch": 0.01968476363478398, "grad_norm": 0.490234375, "learning_rate": 0.0019502650955724255, "loss": 0.2443, "step": 11102 }, { "epoch": 0.019688309800093794, "grad_norm": 2.03125, "learning_rate": 0.0019502455821030014, "loss": 0.24, "step": 11104 }, { "epoch": 0.019691855965403612, "grad_norm": 0.4453125, "learning_rate": 0.001950226064915097, "loss": 0.2128, "step": 11106 }, { "epoch": 0.019695402130713427, "grad_norm": 4.28125, "learning_rate": 0.0019502065440087975, "loss": 0.4816, "step": 11108 }, { "epoch": 0.01969894829602324, "grad_norm": 0.54296875, "learning_rate": 0.0019501870193841884, "loss": 0.1832, "step": 11110 }, { "epoch": 0.019702494461333056, "grad_norm": 1.734375, "learning_rate": 0.0019501674910413554, "loss": 0.2731, "step": 11112 }, { "epoch": 0.01970604062664287, "grad_norm": 0.515625, "learning_rate": 0.0019501479589803831, "loss": 0.2385, "step": 11114 }, { "epoch": 0.01970958679195269, "grad_norm": 0.390625, "learning_rate": 0.001950128423201358, "loss": 0.2106, "step": 11116 }, { "epoch": 0.019713132957262503, "grad_norm": 1.453125, "learning_rate": 0.0019501088837043648, "loss": 0.3689, "step": 11118 }, { "epoch": 0.019716679122572318, "grad_norm": 0.4921875, "learning_rate": 0.0019500893404894892, "loss": 0.2387, "step": 11120 }, { "epoch": 0.019720225287882132, "grad_norm": 0.96484375, "learning_rate": 0.0019500697935568166, "loss": 0.277, "step": 11122 }, { "epoch": 0.019723771453191947, "grad_norm": 1.3359375, "learning_rate": 0.0019500502429064324, "loss": 0.34, "step": 11124 }, { "epoch": 0.01972731761850176, "grad_norm": 0.53125, "learning_rate": 0.001950030688538422, "loss": 0.2054, "step": 11126 }, { "epoch": 0.01973086378381158, "grad_norm": 1.0546875, "learning_rate": 0.0019500111304528716, "loss": 0.2229, "step": 11128 }, { "epoch": 0.019734409949121394, "grad_norm": 3.0625, "learning_rate": 0.001949991568649866, "loss": 0.2168, "step": 11130 }, { "epoch": 0.01973795611443121, "grad_norm": 0.890625, "learning_rate": 0.001949972003129491, "loss": 0.3146, "step": 11132 }, { "epoch": 0.019741502279741023, "grad_norm": 1.375, "learning_rate": 0.0019499524338918322, "loss": 0.308, "step": 11134 }, { "epoch": 0.019745048445050838, "grad_norm": 0.62109375, "learning_rate": 0.001949932860936975, "loss": 0.2132, "step": 11136 }, { "epoch": 0.019748594610360652, "grad_norm": 1.2265625, "learning_rate": 0.0019499132842650056, "loss": 0.2605, "step": 11138 }, { "epoch": 0.01975214077567047, "grad_norm": 0.3828125, "learning_rate": 0.001949893703876009, "loss": 0.2386, "step": 11140 }, { "epoch": 0.019755686940980285, "grad_norm": 0.330078125, "learning_rate": 0.001949874119770071, "loss": 0.2523, "step": 11142 }, { "epoch": 0.0197592331062901, "grad_norm": 0.30859375, "learning_rate": 0.0019498545319472772, "loss": 0.2623, "step": 11144 }, { "epoch": 0.019762779271599914, "grad_norm": 0.48046875, "learning_rate": 0.0019498349404077132, "loss": 0.2426, "step": 11146 }, { "epoch": 0.01976632543690973, "grad_norm": 0.396484375, "learning_rate": 0.001949815345151465, "loss": 0.283, "step": 11148 }, { "epoch": 0.019769871602219546, "grad_norm": 0.447265625, "learning_rate": 0.0019497957461786183, "loss": 0.2172, "step": 11150 }, { "epoch": 0.01977341776752936, "grad_norm": 0.408203125, "learning_rate": 0.001949776143489258, "loss": 0.2157, "step": 11152 }, { "epoch": 0.019776963932839176, "grad_norm": 0.29296875, "learning_rate": 0.0019497565370834712, "loss": 0.3706, "step": 11154 }, { "epoch": 0.01978051009814899, "grad_norm": 0.375, "learning_rate": 0.0019497369269613424, "loss": 0.2533, "step": 11156 }, { "epoch": 0.019784056263458805, "grad_norm": 0.984375, "learning_rate": 0.001949717313122958, "loss": 0.4503, "step": 11158 }, { "epoch": 0.01978760242876862, "grad_norm": 0.2314453125, "learning_rate": 0.0019496976955684037, "loss": 0.2002, "step": 11160 }, { "epoch": 0.019791148594078437, "grad_norm": 0.7109375, "learning_rate": 0.0019496780742977653, "loss": 0.4008, "step": 11162 }, { "epoch": 0.019794694759388252, "grad_norm": 0.81640625, "learning_rate": 0.0019496584493111282, "loss": 0.2809, "step": 11164 }, { "epoch": 0.019798240924698066, "grad_norm": 1.828125, "learning_rate": 0.001949638820608579, "loss": 0.285, "step": 11166 }, { "epoch": 0.01980178709000788, "grad_norm": 1.5390625, "learning_rate": 0.0019496191881902033, "loss": 0.3015, "step": 11168 }, { "epoch": 0.019805333255317695, "grad_norm": 0.39453125, "learning_rate": 0.0019495995520560864, "loss": 0.2502, "step": 11170 }, { "epoch": 0.01980887942062751, "grad_norm": 0.359375, "learning_rate": 0.0019495799122063143, "loss": 0.2487, "step": 11172 }, { "epoch": 0.019812425585937328, "grad_norm": 1.0078125, "learning_rate": 0.0019495602686409738, "loss": 0.326, "step": 11174 }, { "epoch": 0.019815971751247143, "grad_norm": 0.2412109375, "learning_rate": 0.0019495406213601496, "loss": 0.1826, "step": 11176 }, { "epoch": 0.019819517916556957, "grad_norm": 2.0, "learning_rate": 0.0019495209703639287, "loss": 0.2762, "step": 11178 }, { "epoch": 0.01982306408186677, "grad_norm": 1.0546875, "learning_rate": 0.0019495013156523961, "loss": 0.3166, "step": 11180 }, { "epoch": 0.019826610247176586, "grad_norm": 0.267578125, "learning_rate": 0.0019494816572256384, "loss": 0.1833, "step": 11182 }, { "epoch": 0.019830156412486404, "grad_norm": 0.57421875, "learning_rate": 0.0019494619950837413, "loss": 0.43, "step": 11184 }, { "epoch": 0.01983370257779622, "grad_norm": 1.7109375, "learning_rate": 0.001949442329226791, "loss": 0.2684, "step": 11186 }, { "epoch": 0.019837248743106033, "grad_norm": 0.7109375, "learning_rate": 0.0019494226596548734, "loss": 0.1878, "step": 11188 }, { "epoch": 0.019840794908415848, "grad_norm": 0.3359375, "learning_rate": 0.0019494029863680743, "loss": 0.2336, "step": 11190 }, { "epoch": 0.019844341073725662, "grad_norm": 2.328125, "learning_rate": 0.00194938330936648, "loss": 0.3088, "step": 11192 }, { "epoch": 0.019847887239035477, "grad_norm": 10.6875, "learning_rate": 0.0019493636286501766, "loss": 0.2701, "step": 11194 }, { "epoch": 0.019851433404345295, "grad_norm": 5.875, "learning_rate": 0.00194934394421925, "loss": 0.207, "step": 11196 }, { "epoch": 0.01985497956965511, "grad_norm": 1.0859375, "learning_rate": 0.001949324256073786, "loss": 0.2564, "step": 11198 }, { "epoch": 0.019858525734964924, "grad_norm": 0.5546875, "learning_rate": 0.0019493045642138714, "loss": 0.2302, "step": 11200 }, { "epoch": 0.01986207190027474, "grad_norm": 1.734375, "learning_rate": 0.001949284868639592, "loss": 0.2123, "step": 11202 }, { "epoch": 0.019865618065584553, "grad_norm": 0.396484375, "learning_rate": 0.0019492651693510338, "loss": 0.2276, "step": 11204 }, { "epoch": 0.019869164230894368, "grad_norm": 0.6171875, "learning_rate": 0.0019492454663482832, "loss": 0.2275, "step": 11206 }, { "epoch": 0.019872710396204186, "grad_norm": 0.33984375, "learning_rate": 0.001949225759631426, "loss": 0.2468, "step": 11208 }, { "epoch": 0.019876256561514, "grad_norm": 0.79296875, "learning_rate": 0.0019492060492005488, "loss": 0.2124, "step": 11210 }, { "epoch": 0.019879802726823815, "grad_norm": 0.63671875, "learning_rate": 0.0019491863350557378, "loss": 0.2968, "step": 11212 }, { "epoch": 0.01988334889213363, "grad_norm": 0.3125, "learning_rate": 0.0019491666171970786, "loss": 0.2171, "step": 11214 }, { "epoch": 0.019886895057443444, "grad_norm": 0.451171875, "learning_rate": 0.001949146895624658, "loss": 0.1843, "step": 11216 }, { "epoch": 0.019890441222753262, "grad_norm": 0.2890625, "learning_rate": 0.0019491271703385622, "loss": 0.2565, "step": 11218 }, { "epoch": 0.019893987388063077, "grad_norm": 0.515625, "learning_rate": 0.0019491074413388774, "loss": 0.2432, "step": 11220 }, { "epoch": 0.01989753355337289, "grad_norm": 0.89453125, "learning_rate": 0.0019490877086256898, "loss": 0.2286, "step": 11222 }, { "epoch": 0.019901079718682706, "grad_norm": 0.6640625, "learning_rate": 0.0019490679721990858, "loss": 0.2235, "step": 11224 }, { "epoch": 0.01990462588399252, "grad_norm": 0.267578125, "learning_rate": 0.0019490482320591515, "loss": 0.1426, "step": 11226 }, { "epoch": 0.019908172049302335, "grad_norm": 0.482421875, "learning_rate": 0.001949028488205974, "loss": 0.2499, "step": 11228 }, { "epoch": 0.019911718214612153, "grad_norm": 0.47265625, "learning_rate": 0.0019490087406396387, "loss": 0.256, "step": 11230 }, { "epoch": 0.019915264379921967, "grad_norm": 0.8125, "learning_rate": 0.0019489889893602322, "loss": 0.2721, "step": 11232 }, { "epoch": 0.019918810545231782, "grad_norm": 0.54296875, "learning_rate": 0.001948969234367841, "loss": 0.2636, "step": 11234 }, { "epoch": 0.019922356710541596, "grad_norm": 0.345703125, "learning_rate": 0.0019489494756625516, "loss": 0.2813, "step": 11236 }, { "epoch": 0.01992590287585141, "grad_norm": 0.494140625, "learning_rate": 0.0019489297132444505, "loss": 0.3022, "step": 11238 }, { "epoch": 0.019929449041161226, "grad_norm": 0.2265625, "learning_rate": 0.0019489099471136238, "loss": 0.4236, "step": 11240 }, { "epoch": 0.019932995206471044, "grad_norm": 2.25, "learning_rate": 0.0019488901772701582, "loss": 0.265, "step": 11242 }, { "epoch": 0.019936541371780858, "grad_norm": 0.337890625, "learning_rate": 0.0019488704037141397, "loss": 0.2515, "step": 11244 }, { "epoch": 0.019940087537090673, "grad_norm": 0.21484375, "learning_rate": 0.0019488506264456556, "loss": 0.4747, "step": 11246 }, { "epoch": 0.019943633702400487, "grad_norm": 0.451171875, "learning_rate": 0.0019488308454647916, "loss": 0.3037, "step": 11248 }, { "epoch": 0.019947179867710302, "grad_norm": 1.0625, "learning_rate": 0.0019488110607716346, "loss": 0.2295, "step": 11250 }, { "epoch": 0.01995072603302012, "grad_norm": 1.7890625, "learning_rate": 0.0019487912723662715, "loss": 0.2905, "step": 11252 }, { "epoch": 0.019954272198329934, "grad_norm": 0.28515625, "learning_rate": 0.001948771480248788, "loss": 0.2631, "step": 11254 }, { "epoch": 0.01995781836363975, "grad_norm": 0.462890625, "learning_rate": 0.001948751684419271, "loss": 0.2534, "step": 11256 }, { "epoch": 0.019961364528949564, "grad_norm": 0.482421875, "learning_rate": 0.0019487318848778073, "loss": 0.1893, "step": 11258 }, { "epoch": 0.019964910694259378, "grad_norm": 4.65625, "learning_rate": 0.0019487120816244834, "loss": 0.4082, "step": 11260 }, { "epoch": 0.019968456859569193, "grad_norm": 0.455078125, "learning_rate": 0.0019486922746593856, "loss": 0.216, "step": 11262 }, { "epoch": 0.01997200302487901, "grad_norm": 0.4609375, "learning_rate": 0.001948672463982601, "loss": 0.2413, "step": 11264 }, { "epoch": 0.019975549190188825, "grad_norm": 0.328125, "learning_rate": 0.0019486526495942158, "loss": 0.3381, "step": 11266 }, { "epoch": 0.01997909535549864, "grad_norm": 0.73828125, "learning_rate": 0.001948632831494317, "loss": 0.2167, "step": 11268 }, { "epoch": 0.019982641520808454, "grad_norm": 0.228515625, "learning_rate": 0.001948613009682991, "loss": 0.221, "step": 11270 }, { "epoch": 0.01998618768611827, "grad_norm": 0.498046875, "learning_rate": 0.001948593184160325, "loss": 0.2228, "step": 11272 }, { "epoch": 0.019989733851428083, "grad_norm": 0.451171875, "learning_rate": 0.001948573354926405, "loss": 0.3056, "step": 11274 }, { "epoch": 0.0199932800167379, "grad_norm": 0.3671875, "learning_rate": 0.0019485535219813182, "loss": 0.2726, "step": 11276 }, { "epoch": 0.019996826182047716, "grad_norm": 0.1884765625, "learning_rate": 0.0019485336853251511, "loss": 0.2645, "step": 11278 }, { "epoch": 0.02000037234735753, "grad_norm": 0.3125, "learning_rate": 0.001948513844957991, "loss": 0.2255, "step": 11280 }, { "epoch": 0.020003918512667345, "grad_norm": 1.765625, "learning_rate": 0.0019484940008799236, "loss": 0.2621, "step": 11282 }, { "epoch": 0.02000746467797716, "grad_norm": 0.380859375, "learning_rate": 0.0019484741530910363, "loss": 0.1661, "step": 11284 }, { "epoch": 0.020011010843286978, "grad_norm": 0.291015625, "learning_rate": 0.0019484543015914162, "loss": 0.3313, "step": 11286 }, { "epoch": 0.020014557008596792, "grad_norm": 0.490234375, "learning_rate": 0.0019484344463811501, "loss": 0.1959, "step": 11288 }, { "epoch": 0.020018103173906607, "grad_norm": 0.859375, "learning_rate": 0.001948414587460324, "loss": 0.2805, "step": 11290 }, { "epoch": 0.02002164933921642, "grad_norm": 0.52734375, "learning_rate": 0.0019483947248290256, "loss": 0.2232, "step": 11292 }, { "epoch": 0.020025195504526236, "grad_norm": 1.921875, "learning_rate": 0.0019483748584873412, "loss": 0.2612, "step": 11294 }, { "epoch": 0.02002874166983605, "grad_norm": 0.8046875, "learning_rate": 0.001948354988435358, "loss": 0.5627, "step": 11296 }, { "epoch": 0.02003228783514587, "grad_norm": 0.25, "learning_rate": 0.0019483351146731634, "loss": 0.215, "step": 11298 }, { "epoch": 0.020035834000455683, "grad_norm": 0.546875, "learning_rate": 0.0019483152372008433, "loss": 0.2519, "step": 11300 }, { "epoch": 0.020039380165765498, "grad_norm": 0.2578125, "learning_rate": 0.0019482953560184854, "loss": 0.2748, "step": 11302 }, { "epoch": 0.020042926331075312, "grad_norm": 0.8515625, "learning_rate": 0.001948275471126176, "loss": 0.2328, "step": 11304 }, { "epoch": 0.020046472496385127, "grad_norm": 0.26171875, "learning_rate": 0.0019482555825240026, "loss": 0.2138, "step": 11306 }, { "epoch": 0.02005001866169494, "grad_norm": 0.2578125, "learning_rate": 0.0019482356902120522, "loss": 0.2024, "step": 11308 }, { "epoch": 0.02005356482700476, "grad_norm": 0.49609375, "learning_rate": 0.0019482157941904112, "loss": 0.235, "step": 11310 }, { "epoch": 0.020057110992314574, "grad_norm": 1.9453125, "learning_rate": 0.0019481958944591675, "loss": 0.4309, "step": 11312 }, { "epoch": 0.02006065715762439, "grad_norm": 0.50390625, "learning_rate": 0.0019481759910184072, "loss": 0.2425, "step": 11314 }, { "epoch": 0.020064203322934203, "grad_norm": 0.33984375, "learning_rate": 0.0019481560838682183, "loss": 0.2217, "step": 11316 }, { "epoch": 0.020067749488244017, "grad_norm": 0.357421875, "learning_rate": 0.001948136173008687, "loss": 0.2315, "step": 11318 }, { "epoch": 0.020071295653553835, "grad_norm": 0.46484375, "learning_rate": 0.0019481162584399005, "loss": 0.2198, "step": 11320 }, { "epoch": 0.02007484181886365, "grad_norm": 0.80078125, "learning_rate": 0.0019480963401619464, "loss": 0.3774, "step": 11322 }, { "epoch": 0.020078387984173465, "grad_norm": 1.15625, "learning_rate": 0.0019480764181749118, "loss": 0.1891, "step": 11324 }, { "epoch": 0.02008193414948328, "grad_norm": 0.345703125, "learning_rate": 0.001948056492478883, "loss": 0.1933, "step": 11326 }, { "epoch": 0.020085480314793094, "grad_norm": 1.0859375, "learning_rate": 0.0019480365630739484, "loss": 0.42, "step": 11328 }, { "epoch": 0.020089026480102908, "grad_norm": 0.828125, "learning_rate": 0.001948016629960194, "loss": 0.1988, "step": 11330 }, { "epoch": 0.020092572645412726, "grad_norm": 1.0234375, "learning_rate": 0.0019479966931377076, "loss": 0.408, "step": 11332 }, { "epoch": 0.02009611881072254, "grad_norm": 0.5703125, "learning_rate": 0.001947976752606576, "loss": 0.2296, "step": 11334 }, { "epoch": 0.020099664976032355, "grad_norm": 0.25, "learning_rate": 0.0019479568083668871, "loss": 0.2747, "step": 11336 }, { "epoch": 0.02010321114134217, "grad_norm": 0.31640625, "learning_rate": 0.0019479368604187273, "loss": 0.2475, "step": 11338 }, { "epoch": 0.020106757306651984, "grad_norm": 2.40625, "learning_rate": 0.0019479169087621843, "loss": 0.3176, "step": 11340 }, { "epoch": 0.0201103034719618, "grad_norm": 0.49609375, "learning_rate": 0.0019478969533973452, "loss": 0.2713, "step": 11342 }, { "epoch": 0.020113849637271617, "grad_norm": 0.326171875, "learning_rate": 0.0019478769943242975, "loss": 0.2049, "step": 11344 }, { "epoch": 0.02011739580258143, "grad_norm": 0.3125, "learning_rate": 0.0019478570315431282, "loss": 0.217, "step": 11346 }, { "epoch": 0.020120941967891246, "grad_norm": 2.140625, "learning_rate": 0.0019478370650539247, "loss": 0.2857, "step": 11348 }, { "epoch": 0.02012448813320106, "grad_norm": 2.265625, "learning_rate": 0.001947817094856775, "loss": 0.3395, "step": 11350 }, { "epoch": 0.020128034298510875, "grad_norm": 0.484375, "learning_rate": 0.001947797120951765, "loss": 0.3197, "step": 11352 }, { "epoch": 0.020131580463820693, "grad_norm": 0.91015625, "learning_rate": 0.001947777143338983, "loss": 0.1779, "step": 11354 }, { "epoch": 0.020135126629130508, "grad_norm": 0.83203125, "learning_rate": 0.0019477571620185165, "loss": 0.201, "step": 11356 }, { "epoch": 0.020138672794440322, "grad_norm": 0.35546875, "learning_rate": 0.0019477371769904522, "loss": 0.2357, "step": 11358 }, { "epoch": 0.020142218959750137, "grad_norm": 1.0625, "learning_rate": 0.0019477171882548781, "loss": 0.5784, "step": 11360 }, { "epoch": 0.02014576512505995, "grad_norm": 1.5859375, "learning_rate": 0.0019476971958118817, "loss": 0.2947, "step": 11362 }, { "epoch": 0.020149311290369766, "grad_norm": 0.61328125, "learning_rate": 0.00194767719966155, "loss": 0.2103, "step": 11364 }, { "epoch": 0.020152857455679584, "grad_norm": 0.9296875, "learning_rate": 0.0019476571998039707, "loss": 0.1959, "step": 11366 }, { "epoch": 0.0201564036209894, "grad_norm": 0.5078125, "learning_rate": 0.0019476371962392307, "loss": 0.1873, "step": 11368 }, { "epoch": 0.020159949786299213, "grad_norm": 0.34765625, "learning_rate": 0.0019476171889674185, "loss": 0.2362, "step": 11370 }, { "epoch": 0.020163495951609028, "grad_norm": 0.57421875, "learning_rate": 0.0019475971779886207, "loss": 0.2522, "step": 11372 }, { "epoch": 0.020167042116918842, "grad_norm": 0.57421875, "learning_rate": 0.0019475771633029255, "loss": 0.2236, "step": 11374 }, { "epoch": 0.020170588282228657, "grad_norm": 0.5703125, "learning_rate": 0.0019475571449104202, "loss": 0.2101, "step": 11376 }, { "epoch": 0.020174134447538475, "grad_norm": 0.7109375, "learning_rate": 0.001947537122811192, "loss": 0.2019, "step": 11378 }, { "epoch": 0.02017768061284829, "grad_norm": 0.328125, "learning_rate": 0.0019475170970053289, "loss": 0.2312, "step": 11380 }, { "epoch": 0.020181226778158104, "grad_norm": 1.6875, "learning_rate": 0.0019474970674929182, "loss": 0.2084, "step": 11382 }, { "epoch": 0.02018477294346792, "grad_norm": 0.62890625, "learning_rate": 0.0019474770342740478, "loss": 0.2404, "step": 11384 }, { "epoch": 0.020188319108777733, "grad_norm": 0.546875, "learning_rate": 0.0019474569973488049, "loss": 0.184, "step": 11386 }, { "epoch": 0.02019186527408755, "grad_norm": 0.54296875, "learning_rate": 0.001947436956717277, "loss": 0.4145, "step": 11388 }, { "epoch": 0.020195411439397366, "grad_norm": 0.451171875, "learning_rate": 0.001947416912379553, "loss": 0.2548, "step": 11390 }, { "epoch": 0.02019895760470718, "grad_norm": 0.78125, "learning_rate": 0.001947396864335719, "loss": 0.2814, "step": 11392 }, { "epoch": 0.020202503770016995, "grad_norm": 0.59765625, "learning_rate": 0.0019473768125858632, "loss": 0.3115, "step": 11394 }, { "epoch": 0.02020604993532681, "grad_norm": 0.83984375, "learning_rate": 0.0019473567571300738, "loss": 0.2221, "step": 11396 }, { "epoch": 0.020209596100636624, "grad_norm": 0.408203125, "learning_rate": 0.001947336697968438, "loss": 0.325, "step": 11398 }, { "epoch": 0.020213142265946442, "grad_norm": 0.55859375, "learning_rate": 0.0019473166351010442, "loss": 0.1654, "step": 11400 }, { "epoch": 0.020216688431256256, "grad_norm": 0.5546875, "learning_rate": 0.001947296568527979, "loss": 0.2179, "step": 11402 }, { "epoch": 0.02022023459656607, "grad_norm": 0.26171875, "learning_rate": 0.0019472764982493309, "loss": 0.2067, "step": 11404 }, { "epoch": 0.020223780761875886, "grad_norm": 0.63671875, "learning_rate": 0.0019472564242651877, "loss": 0.2715, "step": 11406 }, { "epoch": 0.0202273269271857, "grad_norm": 0.421875, "learning_rate": 0.001947236346575637, "loss": 0.177, "step": 11408 }, { "epoch": 0.020230873092495515, "grad_norm": 1.375, "learning_rate": 0.0019472162651807668, "loss": 0.298, "step": 11410 }, { "epoch": 0.020234419257805333, "grad_norm": 0.208984375, "learning_rate": 0.0019471961800806646, "loss": 0.2611, "step": 11412 }, { "epoch": 0.020237965423115147, "grad_norm": 0.65625, "learning_rate": 0.0019471760912754185, "loss": 0.1969, "step": 11414 }, { "epoch": 0.020241511588424962, "grad_norm": 0.671875, "learning_rate": 0.001947155998765116, "loss": 0.2581, "step": 11416 }, { "epoch": 0.020245057753734776, "grad_norm": 0.33203125, "learning_rate": 0.0019471359025498454, "loss": 0.218, "step": 11418 }, { "epoch": 0.02024860391904459, "grad_norm": 3.015625, "learning_rate": 0.0019471158026296946, "loss": 0.4829, "step": 11420 }, { "epoch": 0.02025215008435441, "grad_norm": 0.482421875, "learning_rate": 0.0019470956990047512, "loss": 0.1781, "step": 11422 }, { "epoch": 0.020255696249664223, "grad_norm": 2.5, "learning_rate": 0.0019470755916751034, "loss": 0.3491, "step": 11424 }, { "epoch": 0.020259242414974038, "grad_norm": 0.578125, "learning_rate": 0.001947055480640839, "loss": 0.7109, "step": 11426 }, { "epoch": 0.020262788580283853, "grad_norm": 0.2470703125, "learning_rate": 0.0019470353659020456, "loss": 0.2406, "step": 11428 }, { "epoch": 0.020266334745593667, "grad_norm": 0.8515625, "learning_rate": 0.0019470152474588118, "loss": 0.2212, "step": 11430 }, { "epoch": 0.02026988091090348, "grad_norm": 0.474609375, "learning_rate": 0.0019469951253112253, "loss": 0.2106, "step": 11432 }, { "epoch": 0.0202734270762133, "grad_norm": 4.71875, "learning_rate": 0.001946974999459374, "loss": 0.3944, "step": 11434 }, { "epoch": 0.020276973241523114, "grad_norm": 0.275390625, "learning_rate": 0.0019469548699033463, "loss": 0.1671, "step": 11436 }, { "epoch": 0.02028051940683293, "grad_norm": 0.390625, "learning_rate": 0.0019469347366432297, "loss": 0.2384, "step": 11438 }, { "epoch": 0.020284065572142743, "grad_norm": 0.3203125, "learning_rate": 0.0019469145996791127, "loss": 0.2598, "step": 11440 }, { "epoch": 0.020287611737452558, "grad_norm": 0.76171875, "learning_rate": 0.001946894459011083, "loss": 0.436, "step": 11442 }, { "epoch": 0.020291157902762372, "grad_norm": 0.31640625, "learning_rate": 0.001946874314639229, "loss": 0.2505, "step": 11444 }, { "epoch": 0.02029470406807219, "grad_norm": 0.353515625, "learning_rate": 0.0019468541665636388, "loss": 0.2663, "step": 11446 }, { "epoch": 0.020298250233382005, "grad_norm": 0.68359375, "learning_rate": 0.0019468340147844004, "loss": 0.1851, "step": 11448 }, { "epoch": 0.02030179639869182, "grad_norm": 0.61328125, "learning_rate": 0.0019468138593016016, "loss": 0.2056, "step": 11450 }, { "epoch": 0.020305342564001634, "grad_norm": 0.65625, "learning_rate": 0.001946793700115331, "loss": 0.2989, "step": 11452 }, { "epoch": 0.02030888872931145, "grad_norm": 0.74609375, "learning_rate": 0.0019467735372256764, "loss": 0.3187, "step": 11454 }, { "epoch": 0.020312434894621267, "grad_norm": 0.3515625, "learning_rate": 0.0019467533706327268, "loss": 0.2322, "step": 11456 }, { "epoch": 0.02031598105993108, "grad_norm": 20.875, "learning_rate": 0.0019467332003365694, "loss": 0.256, "step": 11458 }, { "epoch": 0.020319527225240896, "grad_norm": 0.65234375, "learning_rate": 0.001946713026337293, "loss": 0.2115, "step": 11460 }, { "epoch": 0.02032307339055071, "grad_norm": 0.44140625, "learning_rate": 0.0019466928486349855, "loss": 0.2224, "step": 11462 }, { "epoch": 0.020326619555860525, "grad_norm": 1.03125, "learning_rate": 0.0019466726672297354, "loss": 0.2574, "step": 11464 }, { "epoch": 0.02033016572117034, "grad_norm": 1.1484375, "learning_rate": 0.001946652482121631, "loss": 0.2771, "step": 11466 }, { "epoch": 0.020333711886480157, "grad_norm": 0.5546875, "learning_rate": 0.0019466322933107602, "loss": 0.2037, "step": 11468 }, { "epoch": 0.020337258051789972, "grad_norm": 0.388671875, "learning_rate": 0.0019466121007972112, "loss": 0.2151, "step": 11470 }, { "epoch": 0.020340804217099787, "grad_norm": 0.3359375, "learning_rate": 0.0019465919045810734, "loss": 0.1725, "step": 11472 }, { "epoch": 0.0203443503824096, "grad_norm": 0.32421875, "learning_rate": 0.001946571704662434, "loss": 0.374, "step": 11474 }, { "epoch": 0.020347896547719416, "grad_norm": 0.423828125, "learning_rate": 0.0019465515010413817, "loss": 0.157, "step": 11476 }, { "epoch": 0.02035144271302923, "grad_norm": 0.84765625, "learning_rate": 0.0019465312937180048, "loss": 0.4223, "step": 11478 }, { "epoch": 0.02035498887833905, "grad_norm": 0.2275390625, "learning_rate": 0.001946511082692392, "loss": 0.1701, "step": 11480 }, { "epoch": 0.020358535043648863, "grad_norm": 1.4296875, "learning_rate": 0.0019464908679646309, "loss": 0.2756, "step": 11482 }, { "epoch": 0.020362081208958677, "grad_norm": 0.85546875, "learning_rate": 0.001946470649534811, "loss": 0.274, "step": 11484 }, { "epoch": 0.020365627374268492, "grad_norm": 0.4921875, "learning_rate": 0.0019464504274030198, "loss": 0.2443, "step": 11486 }, { "epoch": 0.020369173539578306, "grad_norm": 0.57421875, "learning_rate": 0.0019464302015693464, "loss": 0.1874, "step": 11488 }, { "epoch": 0.020372719704888125, "grad_norm": 0.33203125, "learning_rate": 0.0019464099720338788, "loss": 0.1898, "step": 11490 }, { "epoch": 0.02037626587019794, "grad_norm": 0.279296875, "learning_rate": 0.0019463897387967059, "loss": 0.2622, "step": 11492 }, { "epoch": 0.020379812035507754, "grad_norm": 0.455078125, "learning_rate": 0.0019463695018579158, "loss": 0.1773, "step": 11494 }, { "epoch": 0.020383358200817568, "grad_norm": 0.265625, "learning_rate": 0.0019463492612175968, "loss": 0.2106, "step": 11496 }, { "epoch": 0.020386904366127383, "grad_norm": 1.4453125, "learning_rate": 0.001946329016875838, "loss": 0.2889, "step": 11498 }, { "epoch": 0.020390450531437197, "grad_norm": 0.44140625, "learning_rate": 0.0019463087688327276, "loss": 0.2176, "step": 11500 }, { "epoch": 0.020393996696747015, "grad_norm": 0.78515625, "learning_rate": 0.0019462885170883544, "loss": 0.182, "step": 11502 }, { "epoch": 0.02039754286205683, "grad_norm": 0.3046875, "learning_rate": 0.0019462682616428067, "loss": 0.19, "step": 11504 }, { "epoch": 0.020401089027366644, "grad_norm": 1.234375, "learning_rate": 0.0019462480024961732, "loss": 0.2702, "step": 11506 }, { "epoch": 0.02040463519267646, "grad_norm": 0.88671875, "learning_rate": 0.0019462277396485423, "loss": 0.2376, "step": 11508 }, { "epoch": 0.020408181357986274, "grad_norm": 1.21875, "learning_rate": 0.0019462074731000033, "loss": 0.2597, "step": 11510 }, { "epoch": 0.020411727523296088, "grad_norm": 0.5, "learning_rate": 0.0019461872028506439, "loss": 0.1977, "step": 11512 }, { "epoch": 0.020415273688605906, "grad_norm": 0.58203125, "learning_rate": 0.0019461669289005535, "loss": 0.1813, "step": 11514 }, { "epoch": 0.02041881985391572, "grad_norm": 0.43359375, "learning_rate": 0.0019461466512498203, "loss": 0.2265, "step": 11516 }, { "epoch": 0.020422366019225535, "grad_norm": 4.8125, "learning_rate": 0.0019461263698985333, "loss": 0.2465, "step": 11518 }, { "epoch": 0.02042591218453535, "grad_norm": 6.5, "learning_rate": 0.0019461060848467806, "loss": 0.2049, "step": 11520 }, { "epoch": 0.020429458349845164, "grad_norm": 0.87890625, "learning_rate": 0.001946085796094652, "loss": 0.2118, "step": 11522 }, { "epoch": 0.020433004515154982, "grad_norm": 3.5625, "learning_rate": 0.0019460655036422352, "loss": 0.2437, "step": 11524 }, { "epoch": 0.020436550680464797, "grad_norm": 0.423828125, "learning_rate": 0.0019460452074896194, "loss": 0.1848, "step": 11526 }, { "epoch": 0.02044009684577461, "grad_norm": 0.6328125, "learning_rate": 0.0019460249076368934, "loss": 0.2134, "step": 11528 }, { "epoch": 0.020443643011084426, "grad_norm": 1.28125, "learning_rate": 0.0019460046040841459, "loss": 0.3412, "step": 11530 }, { "epoch": 0.02044718917639424, "grad_norm": 1.0390625, "learning_rate": 0.0019459842968314654, "loss": 0.2589, "step": 11532 }, { "epoch": 0.020450735341704055, "grad_norm": 1.1171875, "learning_rate": 0.0019459639858789414, "loss": 0.3058, "step": 11534 }, { "epoch": 0.020454281507013873, "grad_norm": 1.328125, "learning_rate": 0.0019459436712266618, "loss": 0.2385, "step": 11536 }, { "epoch": 0.020457827672323688, "grad_norm": 1.0625, "learning_rate": 0.0019459233528747164, "loss": 0.322, "step": 11538 }, { "epoch": 0.020461373837633502, "grad_norm": 0.55078125, "learning_rate": 0.0019459030308231935, "loss": 0.4301, "step": 11540 }, { "epoch": 0.020464920002943317, "grad_norm": 2.203125, "learning_rate": 0.0019458827050721824, "loss": 0.1894, "step": 11542 }, { "epoch": 0.02046846616825313, "grad_norm": 0.57421875, "learning_rate": 0.0019458623756217713, "loss": 0.2129, "step": 11544 }, { "epoch": 0.020472012333562946, "grad_norm": 1.203125, "learning_rate": 0.0019458420424720492, "loss": 0.2497, "step": 11546 }, { "epoch": 0.020475558498872764, "grad_norm": 0.33984375, "learning_rate": 0.001945821705623106, "loss": 0.2217, "step": 11548 }, { "epoch": 0.02047910466418258, "grad_norm": 0.5390625, "learning_rate": 0.0019458013650750297, "loss": 0.2514, "step": 11550 }, { "epoch": 0.020482650829492393, "grad_norm": 3.578125, "learning_rate": 0.0019457810208279097, "loss": 0.3662, "step": 11552 }, { "epoch": 0.020486196994802208, "grad_norm": 0.271484375, "learning_rate": 0.0019457606728818342, "loss": 0.2547, "step": 11554 }, { "epoch": 0.020489743160112022, "grad_norm": 0.416015625, "learning_rate": 0.0019457403212368935, "loss": 0.1947, "step": 11556 }, { "epoch": 0.02049328932542184, "grad_norm": 0.30078125, "learning_rate": 0.0019457199658931756, "loss": 0.232, "step": 11558 }, { "epoch": 0.020496835490731655, "grad_norm": 1.0859375, "learning_rate": 0.0019456996068507697, "loss": 0.2397, "step": 11560 }, { "epoch": 0.02050038165604147, "grad_norm": 0.6171875, "learning_rate": 0.001945679244109765, "loss": 0.2404, "step": 11562 }, { "epoch": 0.020503927821351284, "grad_norm": 0.41796875, "learning_rate": 0.0019456588776702508, "loss": 0.1804, "step": 11564 }, { "epoch": 0.0205074739866611, "grad_norm": 0.77734375, "learning_rate": 0.0019456385075323158, "loss": 0.2784, "step": 11566 }, { "epoch": 0.020511020151970913, "grad_norm": 0.9609375, "learning_rate": 0.0019456181336960491, "loss": 0.2522, "step": 11568 }, { "epoch": 0.02051456631728073, "grad_norm": 0.42578125, "learning_rate": 0.0019455977561615397, "loss": 0.2078, "step": 11570 }, { "epoch": 0.020518112482590545, "grad_norm": 0.5234375, "learning_rate": 0.0019455773749288772, "loss": 0.1963, "step": 11572 }, { "epoch": 0.02052165864790036, "grad_norm": 0.59765625, "learning_rate": 0.0019455569899981503, "loss": 0.2886, "step": 11574 }, { "epoch": 0.020525204813210175, "grad_norm": 0.55859375, "learning_rate": 0.0019455366013694483, "loss": 0.2404, "step": 11576 }, { "epoch": 0.02052875097851999, "grad_norm": 0.306640625, "learning_rate": 0.0019455162090428603, "loss": 0.3266, "step": 11578 }, { "epoch": 0.020532297143829804, "grad_norm": 1.25, "learning_rate": 0.0019454958130184755, "loss": 0.2087, "step": 11580 }, { "epoch": 0.02053584330913962, "grad_norm": 0.419921875, "learning_rate": 0.0019454754132963831, "loss": 0.2971, "step": 11582 }, { "epoch": 0.020539389474449436, "grad_norm": 0.61328125, "learning_rate": 0.0019454550098766726, "loss": 0.2213, "step": 11584 }, { "epoch": 0.02054293563975925, "grad_norm": 1.375, "learning_rate": 0.0019454346027594327, "loss": 0.2925, "step": 11586 }, { "epoch": 0.020546481805069065, "grad_norm": 1.453125, "learning_rate": 0.001945414191944753, "loss": 0.2987, "step": 11588 }, { "epoch": 0.02055002797037888, "grad_norm": 0.306640625, "learning_rate": 0.001945393777432723, "loss": 0.2941, "step": 11590 }, { "epoch": 0.020553574135688698, "grad_norm": 0.62109375, "learning_rate": 0.0019453733592234312, "loss": 0.2371, "step": 11592 }, { "epoch": 0.020557120300998512, "grad_norm": 0.796875, "learning_rate": 0.0019453529373169678, "loss": 0.3297, "step": 11594 }, { "epoch": 0.020560666466308327, "grad_norm": 0.60546875, "learning_rate": 0.0019453325117134216, "loss": 0.2031, "step": 11596 }, { "epoch": 0.02056421263161814, "grad_norm": 0.55078125, "learning_rate": 0.0019453120824128817, "loss": 0.281, "step": 11598 }, { "epoch": 0.020567758796927956, "grad_norm": 0.291015625, "learning_rate": 0.001945291649415438, "loss": 0.2049, "step": 11600 }, { "epoch": 0.02057130496223777, "grad_norm": 0.875, "learning_rate": 0.0019452712127211796, "loss": 0.3167, "step": 11602 }, { "epoch": 0.02057485112754759, "grad_norm": 0.451171875, "learning_rate": 0.001945250772330196, "loss": 0.2726, "step": 11604 }, { "epoch": 0.020578397292857403, "grad_norm": 0.24609375, "learning_rate": 0.0019452303282425766, "loss": 0.1802, "step": 11606 }, { "epoch": 0.020581943458167218, "grad_norm": 1.375, "learning_rate": 0.0019452098804584104, "loss": 0.2708, "step": 11608 }, { "epoch": 0.020585489623477032, "grad_norm": 0.625, "learning_rate": 0.0019451894289777873, "loss": 0.2571, "step": 11610 }, { "epoch": 0.020589035788786847, "grad_norm": 0.6484375, "learning_rate": 0.0019451689738007965, "loss": 0.2276, "step": 11612 }, { "epoch": 0.02059258195409666, "grad_norm": 0.44921875, "learning_rate": 0.0019451485149275278, "loss": 0.2533, "step": 11614 }, { "epoch": 0.02059612811940648, "grad_norm": 0.73828125, "learning_rate": 0.00194512805235807, "loss": 0.2683, "step": 11616 }, { "epoch": 0.020599674284716294, "grad_norm": 0.462890625, "learning_rate": 0.0019451075860925135, "loss": 0.3028, "step": 11618 }, { "epoch": 0.02060322045002611, "grad_norm": 0.65625, "learning_rate": 0.001945087116130947, "loss": 0.2313, "step": 11620 }, { "epoch": 0.020606766615335923, "grad_norm": 0.6953125, "learning_rate": 0.0019450666424734601, "loss": 0.2631, "step": 11622 }, { "epoch": 0.020610312780645738, "grad_norm": 0.38671875, "learning_rate": 0.001945046165120143, "loss": 0.2482, "step": 11624 }, { "epoch": 0.020613858945955556, "grad_norm": 0.85546875, "learning_rate": 0.0019450256840710847, "loss": 0.2074, "step": 11626 }, { "epoch": 0.02061740511126537, "grad_norm": 0.25390625, "learning_rate": 0.001945005199326375, "loss": 0.204, "step": 11628 }, { "epoch": 0.020620951276575185, "grad_norm": 0.380859375, "learning_rate": 0.0019449847108861033, "loss": 0.2374, "step": 11630 }, { "epoch": 0.020624497441885, "grad_norm": 0.2353515625, "learning_rate": 0.0019449642187503594, "loss": 0.22, "step": 11632 }, { "epoch": 0.020628043607194814, "grad_norm": 0.4375, "learning_rate": 0.001944943722919233, "loss": 0.2157, "step": 11634 }, { "epoch": 0.02063158977250463, "grad_norm": 0.76953125, "learning_rate": 0.0019449232233928128, "loss": 0.2011, "step": 11636 }, { "epoch": 0.020635135937814447, "grad_norm": 0.466796875, "learning_rate": 0.00194490272017119, "loss": 0.209, "step": 11638 }, { "epoch": 0.02063868210312426, "grad_norm": 1.3828125, "learning_rate": 0.0019448822132544531, "loss": 0.4375, "step": 11640 }, { "epoch": 0.020642228268434076, "grad_norm": 0.3671875, "learning_rate": 0.0019448617026426923, "loss": 0.175, "step": 11642 }, { "epoch": 0.02064577443374389, "grad_norm": 3.53125, "learning_rate": 0.0019448411883359969, "loss": 0.6556, "step": 11644 }, { "epoch": 0.020649320599053705, "grad_norm": 0.4453125, "learning_rate": 0.001944820670334457, "loss": 0.2209, "step": 11646 }, { "epoch": 0.02065286676436352, "grad_norm": 1.328125, "learning_rate": 0.0019448001486381625, "loss": 0.2888, "step": 11648 }, { "epoch": 0.020656412929673337, "grad_norm": 1.1171875, "learning_rate": 0.0019447796232472025, "loss": 0.3793, "step": 11650 }, { "epoch": 0.020659959094983152, "grad_norm": 0.34375, "learning_rate": 0.0019447590941616675, "loss": 0.2187, "step": 11652 }, { "epoch": 0.020663505260292966, "grad_norm": 0.4375, "learning_rate": 0.0019447385613816466, "loss": 0.1953, "step": 11654 }, { "epoch": 0.02066705142560278, "grad_norm": 0.322265625, "learning_rate": 0.0019447180249072304, "loss": 0.2351, "step": 11656 }, { "epoch": 0.020670597590912596, "grad_norm": 0.59765625, "learning_rate": 0.0019446974847385076, "loss": 0.2192, "step": 11658 }, { "epoch": 0.020674143756222414, "grad_norm": 0.72265625, "learning_rate": 0.0019446769408755689, "loss": 0.2595, "step": 11660 }, { "epoch": 0.020677689921532228, "grad_norm": 0.35546875, "learning_rate": 0.0019446563933185042, "loss": 0.2263, "step": 11662 }, { "epoch": 0.020681236086842043, "grad_norm": 0.3984375, "learning_rate": 0.0019446358420674027, "loss": 0.2359, "step": 11664 }, { "epoch": 0.020684782252151857, "grad_norm": 0.9453125, "learning_rate": 0.0019446152871223548, "loss": 0.25, "step": 11666 }, { "epoch": 0.020688328417461672, "grad_norm": 0.421875, "learning_rate": 0.0019445947284834502, "loss": 0.2671, "step": 11668 }, { "epoch": 0.020691874582771486, "grad_norm": 1.9296875, "learning_rate": 0.0019445741661507788, "loss": 0.6032, "step": 11670 }, { "epoch": 0.020695420748081304, "grad_norm": 0.58203125, "learning_rate": 0.0019445536001244305, "loss": 0.4077, "step": 11672 }, { "epoch": 0.02069896691339112, "grad_norm": 0.58203125, "learning_rate": 0.0019445330304044958, "loss": 0.2392, "step": 11674 }, { "epoch": 0.020702513078700933, "grad_norm": 1.7109375, "learning_rate": 0.0019445124569910637, "loss": 0.4682, "step": 11676 }, { "epoch": 0.020706059244010748, "grad_norm": 0.51953125, "learning_rate": 0.0019444918798842247, "loss": 0.2496, "step": 11678 }, { "epoch": 0.020709605409320563, "grad_norm": 0.484375, "learning_rate": 0.001944471299084069, "loss": 0.5108, "step": 11680 }, { "epoch": 0.020713151574630377, "grad_norm": 0.41015625, "learning_rate": 0.0019444507145906862, "loss": 0.2027, "step": 11682 }, { "epoch": 0.020716697739940195, "grad_norm": 0.57421875, "learning_rate": 0.0019444301264041667, "loss": 0.2185, "step": 11684 }, { "epoch": 0.02072024390525001, "grad_norm": 1.34375, "learning_rate": 0.0019444095345246002, "loss": 0.2453, "step": 11686 }, { "epoch": 0.020723790070559824, "grad_norm": 0.53125, "learning_rate": 0.0019443889389520767, "loss": 0.2386, "step": 11688 }, { "epoch": 0.02072733623586964, "grad_norm": 0.34765625, "learning_rate": 0.0019443683396866867, "loss": 0.2393, "step": 11690 }, { "epoch": 0.020730882401179453, "grad_norm": 0.86328125, "learning_rate": 0.00194434773672852, "loss": 0.2187, "step": 11692 }, { "epoch": 0.02073442856648927, "grad_norm": 0.578125, "learning_rate": 0.0019443271300776666, "loss": 0.2132, "step": 11694 }, { "epoch": 0.020737974731799086, "grad_norm": 0.50390625, "learning_rate": 0.0019443065197342168, "loss": 0.2072, "step": 11696 }, { "epoch": 0.0207415208971089, "grad_norm": 0.69140625, "learning_rate": 0.0019442859056982612, "loss": 0.2375, "step": 11698 }, { "epoch": 0.020745067062418715, "grad_norm": 0.27734375, "learning_rate": 0.001944265287969889, "loss": 0.2856, "step": 11700 }, { "epoch": 0.02074861322772853, "grad_norm": 0.5625, "learning_rate": 0.0019442446665491905, "loss": 0.2713, "step": 11702 }, { "epoch": 0.020752159393038344, "grad_norm": 0.359375, "learning_rate": 0.0019442240414362568, "loss": 0.2755, "step": 11704 }, { "epoch": 0.020755705558348162, "grad_norm": 0.419921875, "learning_rate": 0.0019442034126311773, "loss": 0.2994, "step": 11706 }, { "epoch": 0.020759251723657977, "grad_norm": 0.25, "learning_rate": 0.0019441827801340427, "loss": 0.2344, "step": 11708 }, { "epoch": 0.02076279788896779, "grad_norm": 0.86328125, "learning_rate": 0.001944162143944943, "loss": 0.3913, "step": 11710 }, { "epoch": 0.020766344054277606, "grad_norm": 1.640625, "learning_rate": 0.001944141504063968, "loss": 0.2525, "step": 11712 }, { "epoch": 0.02076989021958742, "grad_norm": 2.828125, "learning_rate": 0.0019441208604912088, "loss": 0.3243, "step": 11714 }, { "epoch": 0.020773436384897235, "grad_norm": 0.33984375, "learning_rate": 0.0019441002132267549, "loss": 0.2193, "step": 11716 }, { "epoch": 0.020776982550207053, "grad_norm": 0.314453125, "learning_rate": 0.0019440795622706975, "loss": 0.2688, "step": 11718 }, { "epoch": 0.020780528715516867, "grad_norm": 0.44921875, "learning_rate": 0.0019440589076231258, "loss": 0.2419, "step": 11720 }, { "epoch": 0.020784074880826682, "grad_norm": 0.314453125, "learning_rate": 0.001944038249284131, "loss": 0.2258, "step": 11722 }, { "epoch": 0.020787621046136497, "grad_norm": 0.953125, "learning_rate": 0.0019440175872538032, "loss": 0.2392, "step": 11724 }, { "epoch": 0.02079116721144631, "grad_norm": 0.55078125, "learning_rate": 0.0019439969215322328, "loss": 0.2375, "step": 11726 }, { "epoch": 0.02079471337675613, "grad_norm": 0.30859375, "learning_rate": 0.00194397625211951, "loss": 0.2131, "step": 11728 }, { "epoch": 0.020798259542065944, "grad_norm": 1.1484375, "learning_rate": 0.0019439555790157254, "loss": 0.2618, "step": 11730 }, { "epoch": 0.020801805707375758, "grad_norm": 0.330078125, "learning_rate": 0.0019439349022209692, "loss": 0.2294, "step": 11732 }, { "epoch": 0.020805351872685573, "grad_norm": 0.48828125, "learning_rate": 0.001943914221735332, "loss": 0.1592, "step": 11734 }, { "epoch": 0.020808898037995387, "grad_norm": 0.42578125, "learning_rate": 0.0019438935375589044, "loss": 0.1544, "step": 11736 }, { "epoch": 0.020812444203305202, "grad_norm": 2.0, "learning_rate": 0.0019438728496917763, "loss": 0.4247, "step": 11738 }, { "epoch": 0.02081599036861502, "grad_norm": 0.359375, "learning_rate": 0.0019438521581340387, "loss": 0.2018, "step": 11740 }, { "epoch": 0.020819536533924834, "grad_norm": 0.36328125, "learning_rate": 0.001943831462885782, "loss": 0.1906, "step": 11742 }, { "epoch": 0.02082308269923465, "grad_norm": 3.078125, "learning_rate": 0.0019438107639470966, "loss": 0.3867, "step": 11744 }, { "epoch": 0.020826628864544464, "grad_norm": 0.59375, "learning_rate": 0.001943790061318073, "loss": 0.2404, "step": 11746 }, { "epoch": 0.020830175029854278, "grad_norm": 0.43359375, "learning_rate": 0.0019437693549988018, "loss": 0.2527, "step": 11748 }, { "epoch": 0.020833721195164093, "grad_norm": 1.546875, "learning_rate": 0.0019437486449893737, "loss": 0.3323, "step": 11750 }, { "epoch": 0.02083726736047391, "grad_norm": 0.248046875, "learning_rate": 0.0019437279312898791, "loss": 0.2413, "step": 11752 }, { "epoch": 0.020840813525783725, "grad_norm": 0.279296875, "learning_rate": 0.0019437072139004087, "loss": 0.2021, "step": 11754 }, { "epoch": 0.02084435969109354, "grad_norm": 0.8359375, "learning_rate": 0.0019436864928210527, "loss": 0.1949, "step": 11756 }, { "epoch": 0.020847905856403354, "grad_norm": 0.515625, "learning_rate": 0.0019436657680519023, "loss": 0.2339, "step": 11758 }, { "epoch": 0.02085145202171317, "grad_norm": 0.46875, "learning_rate": 0.0019436450395930477, "loss": 0.2197, "step": 11760 }, { "epoch": 0.020854998187022987, "grad_norm": 0.27734375, "learning_rate": 0.0019436243074445801, "loss": 0.2461, "step": 11762 }, { "epoch": 0.0208585443523328, "grad_norm": 1.1640625, "learning_rate": 0.0019436035716065897, "loss": 0.3151, "step": 11764 }, { "epoch": 0.020862090517642616, "grad_norm": 1.53125, "learning_rate": 0.0019435828320791668, "loss": 0.3223, "step": 11766 }, { "epoch": 0.02086563668295243, "grad_norm": 3.875, "learning_rate": 0.0019435620888624031, "loss": 0.4664, "step": 11768 }, { "epoch": 0.020869182848262245, "grad_norm": 0.7578125, "learning_rate": 0.0019435413419563888, "loss": 0.3204, "step": 11770 }, { "epoch": 0.02087272901357206, "grad_norm": 0.51171875, "learning_rate": 0.0019435205913612146, "loss": 0.2213, "step": 11772 }, { "epoch": 0.020876275178881878, "grad_norm": 0.423828125, "learning_rate": 0.0019434998370769713, "loss": 0.2593, "step": 11774 }, { "epoch": 0.020879821344191692, "grad_norm": 0.3828125, "learning_rate": 0.0019434790791037495, "loss": 0.2493, "step": 11776 }, { "epoch": 0.020883367509501507, "grad_norm": 0.306640625, "learning_rate": 0.0019434583174416402, "loss": 0.1507, "step": 11778 }, { "epoch": 0.02088691367481132, "grad_norm": 0.341796875, "learning_rate": 0.0019434375520907346, "loss": 0.264, "step": 11780 }, { "epoch": 0.020890459840121136, "grad_norm": 1.203125, "learning_rate": 0.0019434167830511228, "loss": 0.2504, "step": 11782 }, { "epoch": 0.02089400600543095, "grad_norm": 0.435546875, "learning_rate": 0.0019433960103228958, "loss": 0.1792, "step": 11784 }, { "epoch": 0.02089755217074077, "grad_norm": 0.255859375, "learning_rate": 0.0019433752339061442, "loss": 0.2101, "step": 11786 }, { "epoch": 0.020901098336050583, "grad_norm": 1.1640625, "learning_rate": 0.0019433544538009597, "loss": 0.1699, "step": 11788 }, { "epoch": 0.020904644501360398, "grad_norm": 0.59375, "learning_rate": 0.0019433336700074328, "loss": 0.2343, "step": 11790 }, { "epoch": 0.020908190666670212, "grad_norm": 0.52734375, "learning_rate": 0.0019433128825256541, "loss": 0.2101, "step": 11792 }, { "epoch": 0.020911736831980027, "grad_norm": 0.4765625, "learning_rate": 0.0019432920913557148, "loss": 0.2671, "step": 11794 }, { "epoch": 0.020915282997289845, "grad_norm": 1.7890625, "learning_rate": 0.0019432712964977058, "loss": 0.2376, "step": 11796 }, { "epoch": 0.02091882916259966, "grad_norm": 0.302734375, "learning_rate": 0.0019432504979517177, "loss": 0.2463, "step": 11798 }, { "epoch": 0.020922375327909474, "grad_norm": 0.9921875, "learning_rate": 0.001943229695717842, "loss": 0.3419, "step": 11800 }, { "epoch": 0.02092592149321929, "grad_norm": 0.5546875, "learning_rate": 0.0019432088897961693, "loss": 0.1954, "step": 11802 }, { "epoch": 0.020929467658529103, "grad_norm": 0.41796875, "learning_rate": 0.0019431880801867908, "loss": 0.1908, "step": 11804 }, { "epoch": 0.020933013823838918, "grad_norm": 0.359375, "learning_rate": 0.0019431672668897974, "loss": 0.2249, "step": 11806 }, { "epoch": 0.020936559989148736, "grad_norm": 0.71484375, "learning_rate": 0.00194314644990528, "loss": 0.2014, "step": 11808 }, { "epoch": 0.02094010615445855, "grad_norm": 0.76953125, "learning_rate": 0.0019431256292333297, "loss": 0.277, "step": 11810 }, { "epoch": 0.020943652319768365, "grad_norm": 0.388671875, "learning_rate": 0.0019431048048740378, "loss": 0.4032, "step": 11812 }, { "epoch": 0.02094719848507818, "grad_norm": 0.267578125, "learning_rate": 0.0019430839768274954, "loss": 0.2798, "step": 11814 }, { "epoch": 0.020950744650387994, "grad_norm": 2.34375, "learning_rate": 0.001943063145093793, "loss": 0.5086, "step": 11816 }, { "epoch": 0.02095429081569781, "grad_norm": 0.546875, "learning_rate": 0.0019430423096730223, "loss": 0.194, "step": 11818 }, { "epoch": 0.020957836981007626, "grad_norm": 0.359375, "learning_rate": 0.0019430214705652745, "loss": 0.1704, "step": 11820 }, { "epoch": 0.02096138314631744, "grad_norm": 0.5703125, "learning_rate": 0.0019430006277706402, "loss": 0.1989, "step": 11822 }, { "epoch": 0.020964929311627255, "grad_norm": 2.0, "learning_rate": 0.0019429797812892107, "loss": 0.2876, "step": 11824 }, { "epoch": 0.02096847547693707, "grad_norm": 0.3515625, "learning_rate": 0.0019429589311210776, "loss": 0.2928, "step": 11826 }, { "epoch": 0.020972021642246885, "grad_norm": 0.92578125, "learning_rate": 0.0019429380772663317, "loss": 0.2403, "step": 11828 }, { "epoch": 0.020975567807556703, "grad_norm": 1.328125, "learning_rate": 0.0019429172197250645, "loss": 0.2353, "step": 11830 }, { "epoch": 0.020979113972866517, "grad_norm": 0.70703125, "learning_rate": 0.0019428963584973665, "loss": 0.1783, "step": 11832 }, { "epoch": 0.02098266013817633, "grad_norm": 1.1328125, "learning_rate": 0.0019428754935833297, "loss": 0.2238, "step": 11834 }, { "epoch": 0.020986206303486146, "grad_norm": 0.5390625, "learning_rate": 0.001942854624983045, "loss": 0.2795, "step": 11836 }, { "epoch": 0.02098975246879596, "grad_norm": 0.80859375, "learning_rate": 0.0019428337526966038, "loss": 0.252, "step": 11838 }, { "epoch": 0.020993298634105775, "grad_norm": 1.0625, "learning_rate": 0.0019428128767240973, "loss": 0.2096, "step": 11840 }, { "epoch": 0.020996844799415593, "grad_norm": 0.65234375, "learning_rate": 0.0019427919970656168, "loss": 0.1612, "step": 11842 }, { "epoch": 0.021000390964725408, "grad_norm": 0.52734375, "learning_rate": 0.001942771113721254, "loss": 0.3048, "step": 11844 }, { "epoch": 0.021003937130035222, "grad_norm": 0.29296875, "learning_rate": 0.0019427502266910997, "loss": 0.239, "step": 11846 }, { "epoch": 0.021007483295345037, "grad_norm": 0.3671875, "learning_rate": 0.0019427293359752453, "loss": 0.2246, "step": 11848 }, { "epoch": 0.02101102946065485, "grad_norm": 0.625, "learning_rate": 0.0019427084415737826, "loss": 0.1583, "step": 11850 }, { "epoch": 0.021014575625964666, "grad_norm": 0.470703125, "learning_rate": 0.0019426875434868022, "loss": 0.2764, "step": 11852 }, { "epoch": 0.021018121791274484, "grad_norm": 0.451171875, "learning_rate": 0.0019426666417143965, "loss": 0.1836, "step": 11854 }, { "epoch": 0.0210216679565843, "grad_norm": 0.78515625, "learning_rate": 0.0019426457362566561, "loss": 0.2434, "step": 11856 }, { "epoch": 0.021025214121894113, "grad_norm": 0.380859375, "learning_rate": 0.001942624827113673, "loss": 0.2406, "step": 11858 }, { "epoch": 0.021028760287203928, "grad_norm": 0.353515625, "learning_rate": 0.001942603914285538, "loss": 0.2339, "step": 11860 }, { "epoch": 0.021032306452513742, "grad_norm": 1.1875, "learning_rate": 0.0019425829977723428, "loss": 0.2634, "step": 11862 }, { "epoch": 0.02103585261782356, "grad_norm": 0.318359375, "learning_rate": 0.0019425620775741792, "loss": 0.1601, "step": 11864 }, { "epoch": 0.021039398783133375, "grad_norm": 0.38671875, "learning_rate": 0.001942541153691139, "loss": 0.2079, "step": 11866 }, { "epoch": 0.02104294494844319, "grad_norm": 0.53125, "learning_rate": 0.0019425202261233124, "loss": 0.2634, "step": 11868 }, { "epoch": 0.021046491113753004, "grad_norm": 1.4296875, "learning_rate": 0.001942499294870792, "loss": 0.2744, "step": 11870 }, { "epoch": 0.02105003727906282, "grad_norm": 0.40625, "learning_rate": 0.0019424783599336693, "loss": 0.2494, "step": 11872 }, { "epoch": 0.021053583444372633, "grad_norm": 0.380859375, "learning_rate": 0.0019424574213120355, "loss": 0.2023, "step": 11874 }, { "epoch": 0.02105712960968245, "grad_norm": 0.37890625, "learning_rate": 0.001942436479005982, "loss": 0.2424, "step": 11876 }, { "epoch": 0.021060675774992266, "grad_norm": 0.95703125, "learning_rate": 0.0019424155330156011, "loss": 0.2661, "step": 11878 }, { "epoch": 0.02106422194030208, "grad_norm": 2.546875, "learning_rate": 0.0019423945833409839, "loss": 0.4265, "step": 11880 }, { "epoch": 0.021067768105611895, "grad_norm": 0.310546875, "learning_rate": 0.0019423736299822218, "loss": 0.149, "step": 11882 }, { "epoch": 0.02107131427092171, "grad_norm": 0.609375, "learning_rate": 0.001942352672939407, "loss": 0.1588, "step": 11884 }, { "epoch": 0.021074860436231524, "grad_norm": 0.53125, "learning_rate": 0.001942331712212631, "loss": 0.205, "step": 11886 }, { "epoch": 0.021078406601541342, "grad_norm": 0.46484375, "learning_rate": 0.0019423107478019853, "loss": 0.1862, "step": 11888 }, { "epoch": 0.021081952766851157, "grad_norm": 0.66015625, "learning_rate": 0.0019422897797075616, "loss": 0.2353, "step": 11890 }, { "epoch": 0.02108549893216097, "grad_norm": 0.37109375, "learning_rate": 0.0019422688079294517, "loss": 0.1906, "step": 11892 }, { "epoch": 0.021089045097470786, "grad_norm": 1.3203125, "learning_rate": 0.0019422478324677473, "loss": 0.2766, "step": 11894 }, { "epoch": 0.0210925912627806, "grad_norm": 0.375, "learning_rate": 0.00194222685332254, "loss": 0.3606, "step": 11896 }, { "epoch": 0.021096137428090418, "grad_norm": 1.1796875, "learning_rate": 0.0019422058704939218, "loss": 0.4156, "step": 11898 }, { "epoch": 0.021099683593400233, "grad_norm": 0.7890625, "learning_rate": 0.0019421848839819844, "loss": 0.17, "step": 11900 }, { "epoch": 0.021103229758710047, "grad_norm": 0.37109375, "learning_rate": 0.0019421638937868193, "loss": 0.2583, "step": 11902 }, { "epoch": 0.021106775924019862, "grad_norm": 1.9140625, "learning_rate": 0.0019421428999085188, "loss": 0.438, "step": 11904 }, { "epoch": 0.021110322089329676, "grad_norm": 1.71875, "learning_rate": 0.0019421219023471742, "loss": 0.3924, "step": 11906 }, { "epoch": 0.02111386825463949, "grad_norm": 1.1015625, "learning_rate": 0.0019421009011028776, "loss": 0.2796, "step": 11908 }, { "epoch": 0.02111741441994931, "grad_norm": 1.1328125, "learning_rate": 0.0019420798961757206, "loss": 0.2339, "step": 11910 }, { "epoch": 0.021120960585259124, "grad_norm": 1.90625, "learning_rate": 0.0019420588875657958, "loss": 0.2266, "step": 11912 }, { "epoch": 0.021124506750568938, "grad_norm": 0.81640625, "learning_rate": 0.0019420378752731942, "loss": 0.2492, "step": 11914 }, { "epoch": 0.021128052915878753, "grad_norm": 2.21875, "learning_rate": 0.0019420168592980082, "loss": 0.4295, "step": 11916 }, { "epoch": 0.021131599081188567, "grad_norm": 1.5859375, "learning_rate": 0.0019419958396403294, "loss": 0.3756, "step": 11918 }, { "epoch": 0.021135145246498382, "grad_norm": 2.25, "learning_rate": 0.0019419748163002498, "loss": 0.2401, "step": 11920 }, { "epoch": 0.0211386914118082, "grad_norm": 0.703125, "learning_rate": 0.0019419537892778618, "loss": 0.2321, "step": 11922 }, { "epoch": 0.021142237577118014, "grad_norm": 0.333984375, "learning_rate": 0.0019419327585732565, "loss": 0.2658, "step": 11924 }, { "epoch": 0.02114578374242783, "grad_norm": 1.203125, "learning_rate": 0.0019419117241865267, "loss": 0.5251, "step": 11926 }, { "epoch": 0.021149329907737643, "grad_norm": 1.8828125, "learning_rate": 0.001941890686117764, "loss": 0.3023, "step": 11928 }, { "epoch": 0.021152876073047458, "grad_norm": 0.5546875, "learning_rate": 0.0019418696443670605, "loss": 0.2206, "step": 11930 }, { "epoch": 0.021156422238357276, "grad_norm": 0.3359375, "learning_rate": 0.001941848598934508, "loss": 0.2282, "step": 11932 }, { "epoch": 0.02115996840366709, "grad_norm": 0.306640625, "learning_rate": 0.0019418275498201988, "loss": 0.2521, "step": 11934 }, { "epoch": 0.021163514568976905, "grad_norm": 0.53125, "learning_rate": 0.001941806497024225, "loss": 0.2321, "step": 11936 }, { "epoch": 0.02116706073428672, "grad_norm": 0.359375, "learning_rate": 0.0019417854405466787, "loss": 0.248, "step": 11938 }, { "epoch": 0.021170606899596534, "grad_norm": 0.7109375, "learning_rate": 0.0019417643803876516, "loss": 0.2089, "step": 11940 }, { "epoch": 0.02117415306490635, "grad_norm": 0.51953125, "learning_rate": 0.001941743316547236, "loss": 0.1851, "step": 11942 }, { "epoch": 0.021177699230216167, "grad_norm": 0.37890625, "learning_rate": 0.0019417222490255247, "loss": 0.205, "step": 11944 }, { "epoch": 0.02118124539552598, "grad_norm": 0.357421875, "learning_rate": 0.0019417011778226083, "loss": 0.2196, "step": 11946 }, { "epoch": 0.021184791560835796, "grad_norm": 0.6796875, "learning_rate": 0.0019416801029385805, "loss": 0.2138, "step": 11948 }, { "epoch": 0.02118833772614561, "grad_norm": 0.328125, "learning_rate": 0.0019416590243735328, "loss": 0.2103, "step": 11950 }, { "epoch": 0.021191883891455425, "grad_norm": 0.6640625, "learning_rate": 0.001941637942127557, "loss": 0.1775, "step": 11952 }, { "epoch": 0.02119543005676524, "grad_norm": 1.0546875, "learning_rate": 0.001941616856200746, "loss": 0.3328, "step": 11954 }, { "epoch": 0.021198976222075058, "grad_norm": 0.408203125, "learning_rate": 0.0019415957665931917, "loss": 0.2312, "step": 11956 }, { "epoch": 0.021202522387384872, "grad_norm": 0.26953125, "learning_rate": 0.0019415746733049864, "loss": 0.1949, "step": 11958 }, { "epoch": 0.021206068552694687, "grad_norm": 0.57421875, "learning_rate": 0.0019415535763362224, "loss": 0.2523, "step": 11960 }, { "epoch": 0.0212096147180045, "grad_norm": 0.275390625, "learning_rate": 0.0019415324756869917, "loss": 0.2221, "step": 11962 }, { "epoch": 0.021213160883314316, "grad_norm": 0.66796875, "learning_rate": 0.001941511371357387, "loss": 0.2071, "step": 11964 }, { "epoch": 0.021216707048624134, "grad_norm": 0.275390625, "learning_rate": 0.0019414902633475002, "loss": 0.1605, "step": 11966 }, { "epoch": 0.02122025321393395, "grad_norm": 0.447265625, "learning_rate": 0.0019414691516574237, "loss": 0.2008, "step": 11968 }, { "epoch": 0.021223799379243763, "grad_norm": 0.41796875, "learning_rate": 0.0019414480362872502, "loss": 0.1891, "step": 11970 }, { "epoch": 0.021227345544553577, "grad_norm": 0.490234375, "learning_rate": 0.0019414269172370715, "loss": 0.2498, "step": 11972 }, { "epoch": 0.021230891709863392, "grad_norm": 0.42578125, "learning_rate": 0.0019414057945069804, "loss": 0.2774, "step": 11974 }, { "epoch": 0.021234437875173207, "grad_norm": 0.265625, "learning_rate": 0.001941384668097069, "loss": 0.1612, "step": 11976 }, { "epoch": 0.021237984040483025, "grad_norm": 0.9921875, "learning_rate": 0.0019413635380074296, "loss": 0.2101, "step": 11978 }, { "epoch": 0.02124153020579284, "grad_norm": 0.69140625, "learning_rate": 0.001941342404238155, "loss": 0.3062, "step": 11980 }, { "epoch": 0.021245076371102654, "grad_norm": 0.50390625, "learning_rate": 0.0019413212667893376, "loss": 0.2146, "step": 11982 }, { "epoch": 0.021248622536412468, "grad_norm": 0.4921875, "learning_rate": 0.0019413001256610696, "loss": 0.1958, "step": 11984 }, { "epoch": 0.021252168701722283, "grad_norm": 3.40625, "learning_rate": 0.0019412789808534434, "loss": 0.1789, "step": 11986 }, { "epoch": 0.021255714867032097, "grad_norm": 0.703125, "learning_rate": 0.0019412578323665518, "loss": 0.241, "step": 11988 }, { "epoch": 0.021259261032341915, "grad_norm": 0.466796875, "learning_rate": 0.0019412366802004871, "loss": 0.2686, "step": 11990 }, { "epoch": 0.02126280719765173, "grad_norm": 0.734375, "learning_rate": 0.0019412155243553415, "loss": 0.2405, "step": 11992 }, { "epoch": 0.021266353362961544, "grad_norm": 0.5, "learning_rate": 0.001941194364831208, "loss": 0.2495, "step": 11994 }, { "epoch": 0.02126989952827136, "grad_norm": 0.1669921875, "learning_rate": 0.001941173201628179, "loss": 0.1895, "step": 11996 }, { "epoch": 0.021273445693581174, "grad_norm": 0.361328125, "learning_rate": 0.001941152034746347, "loss": 0.1647, "step": 11998 }, { "epoch": 0.02127699185889099, "grad_norm": 0.68359375, "learning_rate": 0.0019411308641858046, "loss": 0.2204, "step": 12000 }, { "epoch": 0.021280538024200806, "grad_norm": 0.3046875, "learning_rate": 0.0019411096899466444, "loss": 0.1912, "step": 12002 }, { "epoch": 0.02128408418951062, "grad_norm": 0.49609375, "learning_rate": 0.001941088512028959, "loss": 0.2513, "step": 12004 }, { "epoch": 0.021287630354820435, "grad_norm": 0.41796875, "learning_rate": 0.0019410673304328411, "loss": 0.2244, "step": 12006 }, { "epoch": 0.02129117652013025, "grad_norm": 0.68359375, "learning_rate": 0.0019410461451583832, "loss": 0.2104, "step": 12008 }, { "epoch": 0.021294722685440064, "grad_norm": 0.2578125, "learning_rate": 0.0019410249562056782, "loss": 0.1873, "step": 12010 }, { "epoch": 0.021298268850749882, "grad_norm": 0.3203125, "learning_rate": 0.0019410037635748181, "loss": 0.4049, "step": 12012 }, { "epoch": 0.021301815016059697, "grad_norm": 0.9375, "learning_rate": 0.001940982567265896, "loss": 0.2183, "step": 12014 }, { "epoch": 0.02130536118136951, "grad_norm": 0.482421875, "learning_rate": 0.0019409613672790051, "loss": 0.2753, "step": 12016 }, { "epoch": 0.021308907346679326, "grad_norm": 0.53125, "learning_rate": 0.0019409401636142375, "loss": 0.2376, "step": 12018 }, { "epoch": 0.02131245351198914, "grad_norm": 0.365234375, "learning_rate": 0.001940918956271686, "loss": 0.2325, "step": 12020 }, { "epoch": 0.021315999677298955, "grad_norm": 0.75390625, "learning_rate": 0.0019408977452514437, "loss": 0.2987, "step": 12022 }, { "epoch": 0.021319545842608773, "grad_norm": 0.90625, "learning_rate": 0.0019408765305536032, "loss": 0.2472, "step": 12024 }, { "epoch": 0.021323092007918588, "grad_norm": 1.9765625, "learning_rate": 0.0019408553121782566, "loss": 0.2271, "step": 12026 }, { "epoch": 0.021326638173228402, "grad_norm": 0.5, "learning_rate": 0.0019408340901254978, "loss": 0.3015, "step": 12028 }, { "epoch": 0.021330184338538217, "grad_norm": 0.98046875, "learning_rate": 0.001940812864395419, "loss": 0.2415, "step": 12030 }, { "epoch": 0.02133373050384803, "grad_norm": 0.95703125, "learning_rate": 0.0019407916349881132, "loss": 0.1871, "step": 12032 }, { "epoch": 0.02133727666915785, "grad_norm": 0.2578125, "learning_rate": 0.001940770401903673, "loss": 0.2125, "step": 12034 }, { "epoch": 0.021340822834467664, "grad_norm": 0.3515625, "learning_rate": 0.0019407491651421917, "loss": 0.3204, "step": 12036 }, { "epoch": 0.02134436899977748, "grad_norm": 0.5546875, "learning_rate": 0.0019407279247037614, "loss": 0.1831, "step": 12038 }, { "epoch": 0.021347915165087293, "grad_norm": 0.69921875, "learning_rate": 0.001940706680588476, "loss": 0.1985, "step": 12040 }, { "epoch": 0.021351461330397108, "grad_norm": 0.42578125, "learning_rate": 0.0019406854327964275, "loss": 0.2091, "step": 12042 }, { "epoch": 0.021355007495706922, "grad_norm": 0.44140625, "learning_rate": 0.0019406641813277097, "loss": 0.2764, "step": 12044 }, { "epoch": 0.02135855366101674, "grad_norm": 0.6953125, "learning_rate": 0.0019406429261824149, "loss": 0.2291, "step": 12046 }, { "epoch": 0.021362099826326555, "grad_norm": 1.1484375, "learning_rate": 0.0019406216673606364, "loss": 0.3149, "step": 12048 }, { "epoch": 0.02136564599163637, "grad_norm": 0.6328125, "learning_rate": 0.0019406004048624665, "loss": 0.2321, "step": 12050 }, { "epoch": 0.021369192156946184, "grad_norm": 0.34375, "learning_rate": 0.0019405791386879992, "loss": 0.2101, "step": 12052 }, { "epoch": 0.021372738322256, "grad_norm": 0.66796875, "learning_rate": 0.0019405578688373268, "loss": 0.2775, "step": 12054 }, { "epoch": 0.021376284487565813, "grad_norm": 0.49609375, "learning_rate": 0.0019405365953105427, "loss": 0.1969, "step": 12056 }, { "epoch": 0.02137983065287563, "grad_norm": 0.6953125, "learning_rate": 0.0019405153181077394, "loss": 0.234, "step": 12058 }, { "epoch": 0.021383376818185446, "grad_norm": 0.36328125, "learning_rate": 0.0019404940372290107, "loss": 0.2024, "step": 12060 }, { "epoch": 0.02138692298349526, "grad_norm": 1.1640625, "learning_rate": 0.0019404727526744492, "loss": 0.2882, "step": 12062 }, { "epoch": 0.021390469148805075, "grad_norm": 0.55078125, "learning_rate": 0.0019404514644441482, "loss": 0.3102, "step": 12064 }, { "epoch": 0.02139401531411489, "grad_norm": 0.439453125, "learning_rate": 0.0019404301725382005, "loss": 0.234, "step": 12066 }, { "epoch": 0.021397561479424707, "grad_norm": 0.5703125, "learning_rate": 0.0019404088769566993, "loss": 0.2715, "step": 12068 }, { "epoch": 0.021401107644734522, "grad_norm": 0.73046875, "learning_rate": 0.001940387577699738, "loss": 0.1932, "step": 12070 }, { "epoch": 0.021404653810044336, "grad_norm": 2.5625, "learning_rate": 0.0019403662747674098, "loss": 0.221, "step": 12072 }, { "epoch": 0.02140819997535415, "grad_norm": 0.87109375, "learning_rate": 0.0019403449681598076, "loss": 0.1921, "step": 12074 }, { "epoch": 0.021411746140663965, "grad_norm": 1.9140625, "learning_rate": 0.0019403236578770244, "loss": 0.24, "step": 12076 }, { "epoch": 0.02141529230597378, "grad_norm": 0.44140625, "learning_rate": 0.0019403023439191539, "loss": 0.2054, "step": 12078 }, { "epoch": 0.021418838471283598, "grad_norm": 0.365234375, "learning_rate": 0.001940281026286289, "loss": 0.2254, "step": 12080 }, { "epoch": 0.021422384636593413, "grad_norm": 0.53515625, "learning_rate": 0.0019402597049785226, "loss": 0.2484, "step": 12082 }, { "epoch": 0.021425930801903227, "grad_norm": 0.37109375, "learning_rate": 0.0019402383799959487, "loss": 0.2282, "step": 12084 }, { "epoch": 0.02142947696721304, "grad_norm": 0.6640625, "learning_rate": 0.00194021705133866, "loss": 0.251, "step": 12086 }, { "epoch": 0.021433023132522856, "grad_norm": 0.30859375, "learning_rate": 0.00194019571900675, "loss": 0.1941, "step": 12088 }, { "epoch": 0.02143656929783267, "grad_norm": 0.365234375, "learning_rate": 0.0019401743830003123, "loss": 0.2044, "step": 12090 }, { "epoch": 0.02144011546314249, "grad_norm": 0.4921875, "learning_rate": 0.0019401530433194394, "loss": 0.2134, "step": 12092 }, { "epoch": 0.021443661628452303, "grad_norm": 0.7890625, "learning_rate": 0.0019401316999642256, "loss": 0.2312, "step": 12094 }, { "epoch": 0.021447207793762118, "grad_norm": 0.99609375, "learning_rate": 0.0019401103529347635, "loss": 0.2354, "step": 12096 }, { "epoch": 0.021450753959071932, "grad_norm": 1.015625, "learning_rate": 0.0019400890022311466, "loss": 0.3034, "step": 12098 }, { "epoch": 0.021454300124381747, "grad_norm": 1.4375, "learning_rate": 0.0019400676478534685, "loss": 0.2345, "step": 12100 }, { "epoch": 0.021457846289691565, "grad_norm": 0.53515625, "learning_rate": 0.0019400462898018223, "loss": 0.2946, "step": 12102 }, { "epoch": 0.02146139245500138, "grad_norm": 0.63671875, "learning_rate": 0.0019400249280763018, "loss": 0.2005, "step": 12104 }, { "epoch": 0.021464938620311194, "grad_norm": 1.1796875, "learning_rate": 0.0019400035626770003, "loss": 0.374, "step": 12106 }, { "epoch": 0.02146848478562101, "grad_norm": 0.58984375, "learning_rate": 0.001939982193604011, "loss": 0.2566, "step": 12108 }, { "epoch": 0.021472030950930823, "grad_norm": 0.220703125, "learning_rate": 0.0019399608208574273, "loss": 0.1892, "step": 12110 }, { "epoch": 0.021475577116240638, "grad_norm": 0.58203125, "learning_rate": 0.0019399394444373432, "loss": 0.2316, "step": 12112 }, { "epoch": 0.021479123281550456, "grad_norm": 0.431640625, "learning_rate": 0.0019399180643438518, "loss": 0.2632, "step": 12114 }, { "epoch": 0.02148266944686027, "grad_norm": 0.5, "learning_rate": 0.0019398966805770465, "loss": 0.2702, "step": 12116 }, { "epoch": 0.021486215612170085, "grad_norm": 0.326171875, "learning_rate": 0.0019398752931370214, "loss": 0.2617, "step": 12118 }, { "epoch": 0.0214897617774799, "grad_norm": 1.171875, "learning_rate": 0.0019398539020238693, "loss": 0.2637, "step": 12120 }, { "epoch": 0.021493307942789714, "grad_norm": 0.322265625, "learning_rate": 0.001939832507237684, "loss": 0.2221, "step": 12122 }, { "epoch": 0.02149685410809953, "grad_norm": 0.6484375, "learning_rate": 0.0019398111087785593, "loss": 0.2877, "step": 12124 }, { "epoch": 0.021500400273409347, "grad_norm": 0.42578125, "learning_rate": 0.0019397897066465886, "loss": 0.1861, "step": 12126 }, { "epoch": 0.02150394643871916, "grad_norm": 0.59375, "learning_rate": 0.0019397683008418654, "loss": 0.2165, "step": 12128 }, { "epoch": 0.021507492604028976, "grad_norm": 0.3984375, "learning_rate": 0.0019397468913644835, "loss": 0.2441, "step": 12130 }, { "epoch": 0.02151103876933879, "grad_norm": 0.37890625, "learning_rate": 0.0019397254782145365, "loss": 0.2458, "step": 12132 }, { "epoch": 0.021514584934648605, "grad_norm": 1.0, "learning_rate": 0.0019397040613921182, "loss": 0.4961, "step": 12134 }, { "epoch": 0.021518131099958423, "grad_norm": 0.97265625, "learning_rate": 0.001939682640897322, "loss": 0.1737, "step": 12136 }, { "epoch": 0.021521677265268237, "grad_norm": 0.2890625, "learning_rate": 0.001939661216730242, "loss": 0.2835, "step": 12138 }, { "epoch": 0.021525223430578052, "grad_norm": 0.486328125, "learning_rate": 0.001939639788890971, "loss": 0.2059, "step": 12140 }, { "epoch": 0.021528769595887867, "grad_norm": 0.4609375, "learning_rate": 0.0019396183573796033, "loss": 0.1787, "step": 12142 }, { "epoch": 0.02153231576119768, "grad_norm": 0.9453125, "learning_rate": 0.001939596922196233, "loss": 0.2147, "step": 12144 }, { "epoch": 0.021535861926507496, "grad_norm": 1.2109375, "learning_rate": 0.0019395754833409532, "loss": 0.3419, "step": 12146 }, { "epoch": 0.021539408091817314, "grad_norm": 0.296875, "learning_rate": 0.0019395540408138582, "loss": 0.2073, "step": 12148 }, { "epoch": 0.021542954257127128, "grad_norm": 0.51953125, "learning_rate": 0.0019395325946150411, "loss": 0.2068, "step": 12150 }, { "epoch": 0.021546500422436943, "grad_norm": 0.5234375, "learning_rate": 0.0019395111447445962, "loss": 0.2172, "step": 12152 }, { "epoch": 0.021550046587746757, "grad_norm": 0.82421875, "learning_rate": 0.0019394896912026172, "loss": 0.2032, "step": 12154 }, { "epoch": 0.021553592753056572, "grad_norm": 0.451171875, "learning_rate": 0.001939468233989198, "loss": 0.2949, "step": 12156 }, { "epoch": 0.021557138918366386, "grad_norm": 0.7109375, "learning_rate": 0.0019394467731044325, "loss": 0.2129, "step": 12158 }, { "epoch": 0.021560685083676204, "grad_norm": 0.69921875, "learning_rate": 0.0019394253085484143, "loss": 0.2023, "step": 12160 }, { "epoch": 0.02156423124898602, "grad_norm": 2.28125, "learning_rate": 0.0019394038403212372, "loss": 0.2987, "step": 12162 }, { "epoch": 0.021567777414295834, "grad_norm": 1.4453125, "learning_rate": 0.0019393823684229954, "loss": 0.2498, "step": 12164 }, { "epoch": 0.021571323579605648, "grad_norm": 0.3515625, "learning_rate": 0.0019393608928537829, "loss": 0.1891, "step": 12166 }, { "epoch": 0.021574869744915463, "grad_norm": 0.53515625, "learning_rate": 0.0019393394136136934, "loss": 0.2225, "step": 12168 }, { "epoch": 0.02157841591022528, "grad_norm": 0.392578125, "learning_rate": 0.0019393179307028207, "loss": 0.1897, "step": 12170 }, { "epoch": 0.021581962075535095, "grad_norm": 0.2099609375, "learning_rate": 0.0019392964441212592, "loss": 0.2067, "step": 12172 }, { "epoch": 0.02158550824084491, "grad_norm": 0.54296875, "learning_rate": 0.001939274953869102, "loss": 0.1903, "step": 12174 }, { "epoch": 0.021589054406154724, "grad_norm": 0.671875, "learning_rate": 0.001939253459946444, "loss": 0.2438, "step": 12176 }, { "epoch": 0.02159260057146454, "grad_norm": 0.6171875, "learning_rate": 0.0019392319623533787, "loss": 0.2248, "step": 12178 }, { "epoch": 0.021596146736774353, "grad_norm": 0.310546875, "learning_rate": 0.0019392104610900006, "loss": 0.1917, "step": 12180 }, { "epoch": 0.02159969290208417, "grad_norm": 0.48828125, "learning_rate": 0.001939188956156403, "loss": 0.1941, "step": 12182 }, { "epoch": 0.021603239067393986, "grad_norm": 1.28125, "learning_rate": 0.0019391674475526808, "loss": 0.252, "step": 12184 }, { "epoch": 0.0216067852327038, "grad_norm": 0.55078125, "learning_rate": 0.0019391459352789274, "loss": 0.4299, "step": 12186 }, { "epoch": 0.021610331398013615, "grad_norm": 0.3828125, "learning_rate": 0.001939124419335237, "loss": 0.2168, "step": 12188 }, { "epoch": 0.02161387756332343, "grad_norm": 0.318359375, "learning_rate": 0.001939102899721704, "loss": 0.2389, "step": 12190 }, { "epoch": 0.021617423728633244, "grad_norm": 0.58984375, "learning_rate": 0.0019390813764384224, "loss": 0.1927, "step": 12192 }, { "epoch": 0.021620969893943062, "grad_norm": 1.375, "learning_rate": 0.001939059849485486, "loss": 0.2345, "step": 12194 }, { "epoch": 0.021624516059252877, "grad_norm": 0.55078125, "learning_rate": 0.0019390383188629897, "loss": 0.1983, "step": 12196 }, { "epoch": 0.02162806222456269, "grad_norm": 0.49609375, "learning_rate": 0.0019390167845710264, "loss": 0.2746, "step": 12198 }, { "epoch": 0.021631608389872506, "grad_norm": 0.49609375, "learning_rate": 0.0019389952466096917, "loss": 0.1553, "step": 12200 }, { "epoch": 0.02163515455518232, "grad_norm": 1.21875, "learning_rate": 0.0019389737049790785, "loss": 0.2208, "step": 12202 }, { "epoch": 0.02163870072049214, "grad_norm": 0.24609375, "learning_rate": 0.0019389521596792822, "loss": 0.1653, "step": 12204 }, { "epoch": 0.021642246885801953, "grad_norm": 0.546875, "learning_rate": 0.0019389306107103963, "loss": 0.3346, "step": 12206 }, { "epoch": 0.021645793051111768, "grad_norm": 0.2451171875, "learning_rate": 0.0019389090580725153, "loss": 0.2473, "step": 12208 }, { "epoch": 0.021649339216421582, "grad_norm": 0.51171875, "learning_rate": 0.0019388875017657332, "loss": 0.2674, "step": 12210 }, { "epoch": 0.021652885381731397, "grad_norm": 0.255859375, "learning_rate": 0.0019388659417901447, "loss": 0.1683, "step": 12212 }, { "epoch": 0.02165643154704121, "grad_norm": 0.40625, "learning_rate": 0.0019388443781458437, "loss": 0.1817, "step": 12214 }, { "epoch": 0.02165997771235103, "grad_norm": 0.37890625, "learning_rate": 0.0019388228108329244, "loss": 0.22, "step": 12216 }, { "epoch": 0.021663523877660844, "grad_norm": 0.6875, "learning_rate": 0.0019388012398514818, "loss": 0.2446, "step": 12218 }, { "epoch": 0.02166707004297066, "grad_norm": 1.5078125, "learning_rate": 0.0019387796652016096, "loss": 0.3187, "step": 12220 }, { "epoch": 0.021670616208280473, "grad_norm": 0.341796875, "learning_rate": 0.0019387580868834022, "loss": 0.2163, "step": 12222 }, { "epoch": 0.021674162373590287, "grad_norm": 0.376953125, "learning_rate": 0.0019387365048969545, "loss": 0.1586, "step": 12224 }, { "epoch": 0.021677708538900102, "grad_norm": 0.421875, "learning_rate": 0.0019387149192423606, "loss": 0.2063, "step": 12226 }, { "epoch": 0.02168125470420992, "grad_norm": 0.48046875, "learning_rate": 0.0019386933299197144, "loss": 0.2062, "step": 12228 }, { "epoch": 0.021684800869519735, "grad_norm": 1.2578125, "learning_rate": 0.001938671736929111, "loss": 0.2413, "step": 12230 }, { "epoch": 0.02168834703482955, "grad_norm": 1.3515625, "learning_rate": 0.0019386501402706442, "loss": 0.2209, "step": 12232 }, { "epoch": 0.021691893200139364, "grad_norm": 0.82421875, "learning_rate": 0.0019386285399444092, "loss": 0.3747, "step": 12234 }, { "epoch": 0.021695439365449178, "grad_norm": 0.6796875, "learning_rate": 0.0019386069359505003, "loss": 0.2395, "step": 12236 }, { "epoch": 0.021698985530758996, "grad_norm": 0.359375, "learning_rate": 0.0019385853282890113, "loss": 0.1763, "step": 12238 }, { "epoch": 0.02170253169606881, "grad_norm": 0.419921875, "learning_rate": 0.0019385637169600375, "loss": 0.3008, "step": 12240 }, { "epoch": 0.021706077861378625, "grad_norm": 1.625, "learning_rate": 0.001938542101963673, "loss": 0.3692, "step": 12242 }, { "epoch": 0.02170962402668844, "grad_norm": 0.4765625, "learning_rate": 0.0019385204833000122, "loss": 0.1671, "step": 12244 }, { "epoch": 0.021713170191998254, "grad_norm": 0.8515625, "learning_rate": 0.00193849886096915, "loss": 0.2634, "step": 12246 }, { "epoch": 0.02171671635730807, "grad_norm": 1.40625, "learning_rate": 0.0019384772349711813, "loss": 0.3233, "step": 12248 }, { "epoch": 0.021720262522617887, "grad_norm": 0.408203125, "learning_rate": 0.0019384556053061996, "loss": 0.2088, "step": 12250 }, { "epoch": 0.0217238086879277, "grad_norm": 0.447265625, "learning_rate": 0.0019384339719743008, "loss": 0.1729, "step": 12252 }, { "epoch": 0.021727354853237516, "grad_norm": 0.462890625, "learning_rate": 0.001938412334975578, "loss": 0.2285, "step": 12254 }, { "epoch": 0.02173090101854733, "grad_norm": 1.1640625, "learning_rate": 0.001938390694310127, "loss": 0.33, "step": 12256 }, { "epoch": 0.021734447183857145, "grad_norm": 3.125, "learning_rate": 0.0019383690499780424, "loss": 0.3129, "step": 12258 }, { "epoch": 0.02173799334916696, "grad_norm": 0.5234375, "learning_rate": 0.0019383474019794183, "loss": 0.3055, "step": 12260 }, { "epoch": 0.021741539514476778, "grad_norm": 1.1640625, "learning_rate": 0.0019383257503143494, "loss": 0.2927, "step": 12262 }, { "epoch": 0.021745085679786592, "grad_norm": 1.71875, "learning_rate": 0.001938304094982931, "loss": 0.3187, "step": 12264 }, { "epoch": 0.021748631845096407, "grad_norm": 2.5, "learning_rate": 0.0019382824359852574, "loss": 0.2836, "step": 12266 }, { "epoch": 0.02175217801040622, "grad_norm": 1.0078125, "learning_rate": 0.0019382607733214232, "loss": 0.2343, "step": 12268 }, { "epoch": 0.021755724175716036, "grad_norm": 3.6875, "learning_rate": 0.0019382391069915233, "loss": 0.4043, "step": 12270 }, { "epoch": 0.021759270341025854, "grad_norm": 0.84765625, "learning_rate": 0.0019382174369956527, "loss": 0.1895, "step": 12272 }, { "epoch": 0.02176281650633567, "grad_norm": 0.41796875, "learning_rate": 0.0019381957633339058, "loss": 0.2292, "step": 12274 }, { "epoch": 0.021766362671645483, "grad_norm": 0.61328125, "learning_rate": 0.0019381740860063773, "loss": 0.2785, "step": 12276 }, { "epoch": 0.021769908836955298, "grad_norm": 0.341796875, "learning_rate": 0.0019381524050131622, "loss": 0.2036, "step": 12278 }, { "epoch": 0.021773455002265112, "grad_norm": 1.265625, "learning_rate": 0.0019381307203543557, "loss": 0.2037, "step": 12280 }, { "epoch": 0.021777001167574927, "grad_norm": 0.5234375, "learning_rate": 0.0019381090320300521, "loss": 0.2789, "step": 12282 }, { "epoch": 0.021780547332884745, "grad_norm": 0.474609375, "learning_rate": 0.0019380873400403466, "loss": 0.2316, "step": 12284 }, { "epoch": 0.02178409349819456, "grad_norm": 1.2734375, "learning_rate": 0.0019380656443853336, "loss": 0.336, "step": 12286 }, { "epoch": 0.021787639663504374, "grad_norm": 0.58203125, "learning_rate": 0.0019380439450651082, "loss": 0.2047, "step": 12288 }, { "epoch": 0.02179118582881419, "grad_norm": 0.70703125, "learning_rate": 0.0019380222420797655, "loss": 0.2368, "step": 12290 }, { "epoch": 0.021794731994124003, "grad_norm": 2.421875, "learning_rate": 0.0019380005354294003, "loss": 0.5012, "step": 12292 }, { "epoch": 0.021798278159433818, "grad_norm": 0.69921875, "learning_rate": 0.0019379788251141078, "loss": 0.2229, "step": 12294 }, { "epoch": 0.021801824324743636, "grad_norm": 1.8984375, "learning_rate": 0.0019379571111339824, "loss": 0.2811, "step": 12296 }, { "epoch": 0.02180537049005345, "grad_norm": 0.84375, "learning_rate": 0.0019379353934891193, "loss": 0.2322, "step": 12298 }, { "epoch": 0.021808916655363265, "grad_norm": 0.62890625, "learning_rate": 0.0019379136721796137, "loss": 0.2054, "step": 12300 }, { "epoch": 0.02181246282067308, "grad_norm": 0.4296875, "learning_rate": 0.0019378919472055605, "loss": 0.2031, "step": 12302 }, { "epoch": 0.021816008985982894, "grad_norm": 0.490234375, "learning_rate": 0.0019378702185670542, "loss": 0.3056, "step": 12304 }, { "epoch": 0.021819555151292712, "grad_norm": 0.46484375, "learning_rate": 0.0019378484862641909, "loss": 0.2262, "step": 12306 }, { "epoch": 0.021823101316602526, "grad_norm": 0.45703125, "learning_rate": 0.0019378267502970644, "loss": 0.2083, "step": 12308 }, { "epoch": 0.02182664748191234, "grad_norm": 0.6015625, "learning_rate": 0.0019378050106657705, "loss": 0.2274, "step": 12310 }, { "epoch": 0.021830193647222156, "grad_norm": 0.8046875, "learning_rate": 0.0019377832673704042, "loss": 0.2745, "step": 12312 }, { "epoch": 0.02183373981253197, "grad_norm": 0.44140625, "learning_rate": 0.0019377615204110605, "loss": 0.2309, "step": 12314 }, { "epoch": 0.021837285977841785, "grad_norm": 0.337890625, "learning_rate": 0.0019377397697878346, "loss": 0.253, "step": 12316 }, { "epoch": 0.021840832143151603, "grad_norm": 1.828125, "learning_rate": 0.0019377180155008217, "loss": 0.2683, "step": 12318 }, { "epoch": 0.021844378308461417, "grad_norm": 0.7421875, "learning_rate": 0.0019376962575501165, "loss": 0.1883, "step": 12320 }, { "epoch": 0.021847924473771232, "grad_norm": 0.6484375, "learning_rate": 0.0019376744959358147, "loss": 0.2341, "step": 12322 }, { "epoch": 0.021851470639081046, "grad_norm": 0.51171875, "learning_rate": 0.0019376527306580109, "loss": 0.2922, "step": 12324 }, { "epoch": 0.02185501680439086, "grad_norm": 1.3671875, "learning_rate": 0.0019376309617168011, "loss": 0.2842, "step": 12326 }, { "epoch": 0.021858562969700675, "grad_norm": 0.33203125, "learning_rate": 0.0019376091891122795, "loss": 0.2821, "step": 12328 }, { "epoch": 0.021862109135010493, "grad_norm": 0.466796875, "learning_rate": 0.0019375874128445424, "loss": 0.1806, "step": 12330 }, { "epoch": 0.021865655300320308, "grad_norm": 0.72265625, "learning_rate": 0.0019375656329136842, "loss": 0.2354, "step": 12332 }, { "epoch": 0.021869201465630123, "grad_norm": 0.49609375, "learning_rate": 0.0019375438493198006, "loss": 0.2346, "step": 12334 }, { "epoch": 0.021872747630939937, "grad_norm": 0.447265625, "learning_rate": 0.0019375220620629864, "loss": 0.1906, "step": 12336 }, { "epoch": 0.02187629379624975, "grad_norm": 1.6328125, "learning_rate": 0.0019375002711433374, "loss": 0.2535, "step": 12338 }, { "epoch": 0.02187983996155957, "grad_norm": 3.984375, "learning_rate": 0.0019374784765609488, "loss": 0.3205, "step": 12340 }, { "epoch": 0.021883386126869384, "grad_norm": 0.51953125, "learning_rate": 0.0019374566783159155, "loss": 0.2011, "step": 12342 }, { "epoch": 0.0218869322921792, "grad_norm": 0.2734375, "learning_rate": 0.0019374348764083333, "loss": 0.209, "step": 12344 }, { "epoch": 0.021890478457489013, "grad_norm": 0.7421875, "learning_rate": 0.0019374130708382975, "loss": 0.2034, "step": 12346 }, { "epoch": 0.021894024622798828, "grad_norm": 0.3828125, "learning_rate": 0.0019373912616059033, "loss": 0.2628, "step": 12348 }, { "epoch": 0.021897570788108642, "grad_norm": 0.56640625, "learning_rate": 0.0019373694487112459, "loss": 0.3466, "step": 12350 }, { "epoch": 0.02190111695341846, "grad_norm": 1.1015625, "learning_rate": 0.0019373476321544213, "loss": 0.2858, "step": 12352 }, { "epoch": 0.021904663118728275, "grad_norm": 0.515625, "learning_rate": 0.0019373258119355244, "loss": 0.251, "step": 12354 }, { "epoch": 0.02190820928403809, "grad_norm": 0.361328125, "learning_rate": 0.0019373039880546505, "loss": 0.2294, "step": 12356 }, { "epoch": 0.021911755449347904, "grad_norm": 0.44921875, "learning_rate": 0.0019372821605118957, "loss": 0.1893, "step": 12358 }, { "epoch": 0.02191530161465772, "grad_norm": 0.3203125, "learning_rate": 0.001937260329307355, "loss": 0.2073, "step": 12360 }, { "epoch": 0.021918847779967533, "grad_norm": 3.015625, "learning_rate": 0.0019372384944411239, "loss": 0.2401, "step": 12362 }, { "epoch": 0.02192239394527735, "grad_norm": 0.375, "learning_rate": 0.001937216655913298, "loss": 0.1999, "step": 12364 }, { "epoch": 0.021925940110587166, "grad_norm": 0.416015625, "learning_rate": 0.0019371948137239725, "loss": 0.1976, "step": 12366 }, { "epoch": 0.02192948627589698, "grad_norm": 0.396484375, "learning_rate": 0.0019371729678732436, "loss": 0.2518, "step": 12368 }, { "epoch": 0.021933032441206795, "grad_norm": 1.2421875, "learning_rate": 0.0019371511183612063, "loss": 0.1864, "step": 12370 }, { "epoch": 0.02193657860651661, "grad_norm": 0.53125, "learning_rate": 0.0019371292651879561, "loss": 0.2521, "step": 12372 }, { "epoch": 0.021940124771826428, "grad_norm": 0.439453125, "learning_rate": 0.0019371074083535892, "loss": 0.203, "step": 12374 }, { "epoch": 0.021943670937136242, "grad_norm": 1.8046875, "learning_rate": 0.0019370855478582, "loss": 0.3093, "step": 12376 }, { "epoch": 0.021947217102446057, "grad_norm": 0.421875, "learning_rate": 0.0019370636837018854, "loss": 0.2231, "step": 12378 }, { "epoch": 0.02195076326775587, "grad_norm": 0.306640625, "learning_rate": 0.0019370418158847402, "loss": 0.2516, "step": 12380 }, { "epoch": 0.021954309433065686, "grad_norm": 0.515625, "learning_rate": 0.0019370199444068606, "loss": 0.197, "step": 12382 }, { "epoch": 0.0219578555983755, "grad_norm": 0.84765625, "learning_rate": 0.0019369980692683418, "loss": 0.1843, "step": 12384 }, { "epoch": 0.02196140176368532, "grad_norm": 0.5546875, "learning_rate": 0.0019369761904692794, "loss": 0.2833, "step": 12386 }, { "epoch": 0.021964947928995133, "grad_norm": 0.51171875, "learning_rate": 0.0019369543080097699, "loss": 0.2317, "step": 12388 }, { "epoch": 0.021968494094304947, "grad_norm": 1.7890625, "learning_rate": 0.0019369324218899081, "loss": 0.2449, "step": 12390 }, { "epoch": 0.021972040259614762, "grad_norm": 0.330078125, "learning_rate": 0.0019369105321097896, "loss": 0.2416, "step": 12392 }, { "epoch": 0.021975586424924577, "grad_norm": 0.94140625, "learning_rate": 0.001936888638669511, "loss": 0.2719, "step": 12394 }, { "epoch": 0.02197913259023439, "grad_norm": 0.65625, "learning_rate": 0.0019368667415691678, "loss": 0.1933, "step": 12396 }, { "epoch": 0.02198267875554421, "grad_norm": 0.7421875, "learning_rate": 0.0019368448408088553, "loss": 0.1717, "step": 12398 }, { "epoch": 0.021986224920854024, "grad_norm": 1.25, "learning_rate": 0.0019368229363886699, "loss": 0.2045, "step": 12400 }, { "epoch": 0.021989771086163838, "grad_norm": 1.2734375, "learning_rate": 0.0019368010283087067, "loss": 0.2901, "step": 12402 }, { "epoch": 0.021993317251473653, "grad_norm": 0.384765625, "learning_rate": 0.001936779116569062, "loss": 0.1717, "step": 12404 }, { "epoch": 0.021996863416783467, "grad_norm": 0.83984375, "learning_rate": 0.0019367572011698313, "loss": 0.2326, "step": 12406 }, { "epoch": 0.022000409582093285, "grad_norm": 0.333984375, "learning_rate": 0.001936735282111111, "loss": 0.2094, "step": 12408 }, { "epoch": 0.0220039557474031, "grad_norm": 0.373046875, "learning_rate": 0.0019367133593929963, "loss": 0.1992, "step": 12410 }, { "epoch": 0.022007501912712914, "grad_norm": 0.81640625, "learning_rate": 0.0019366914330155836, "loss": 0.2278, "step": 12412 }, { "epoch": 0.02201104807802273, "grad_norm": 1.390625, "learning_rate": 0.0019366695029789684, "loss": 0.226, "step": 12414 }, { "epoch": 0.022014594243332544, "grad_norm": 0.6640625, "learning_rate": 0.001936647569283247, "loss": 0.2568, "step": 12416 }, { "epoch": 0.022018140408642358, "grad_norm": 0.39453125, "learning_rate": 0.001936625631928515, "loss": 0.2123, "step": 12418 }, { "epoch": 0.022021686573952176, "grad_norm": 0.54296875, "learning_rate": 0.0019366036909148686, "loss": 0.2135, "step": 12420 }, { "epoch": 0.02202523273926199, "grad_norm": 0.400390625, "learning_rate": 0.0019365817462424033, "loss": 0.2936, "step": 12422 }, { "epoch": 0.022028778904571805, "grad_norm": 0.36328125, "learning_rate": 0.0019365597979112154, "loss": 0.2172, "step": 12424 }, { "epoch": 0.02203232506988162, "grad_norm": 2.75, "learning_rate": 0.0019365378459214014, "loss": 0.279, "step": 12426 }, { "epoch": 0.022035871235191434, "grad_norm": 0.396484375, "learning_rate": 0.0019365158902730562, "loss": 0.287, "step": 12428 }, { "epoch": 0.02203941740050125, "grad_norm": 0.333984375, "learning_rate": 0.0019364939309662768, "loss": 0.1919, "step": 12430 }, { "epoch": 0.022042963565811067, "grad_norm": 0.5390625, "learning_rate": 0.0019364719680011586, "loss": 0.3153, "step": 12432 }, { "epoch": 0.02204650973112088, "grad_norm": 2.5, "learning_rate": 0.0019364500013777982, "loss": 0.2378, "step": 12434 }, { "epoch": 0.022050055896430696, "grad_norm": 1.875, "learning_rate": 0.0019364280310962907, "loss": 0.268, "step": 12436 }, { "epoch": 0.02205360206174051, "grad_norm": 1.265625, "learning_rate": 0.0019364060571567334, "loss": 0.4786, "step": 12438 }, { "epoch": 0.022057148227050325, "grad_norm": 0.44921875, "learning_rate": 0.0019363840795592218, "loss": 0.243, "step": 12440 }, { "epoch": 0.022060694392360143, "grad_norm": 0.197265625, "learning_rate": 0.0019363620983038519, "loss": 0.1825, "step": 12442 }, { "epoch": 0.022064240557669958, "grad_norm": 1.0234375, "learning_rate": 0.0019363401133907204, "loss": 0.2716, "step": 12444 }, { "epoch": 0.022067786722979772, "grad_norm": 0.57421875, "learning_rate": 0.0019363181248199224, "loss": 0.2539, "step": 12446 }, { "epoch": 0.022071332888289587, "grad_norm": 0.380859375, "learning_rate": 0.001936296132591555, "loss": 0.5329, "step": 12448 }, { "epoch": 0.0220748790535994, "grad_norm": 1.9296875, "learning_rate": 0.0019362741367057142, "loss": 0.3482, "step": 12450 }, { "epoch": 0.022078425218909216, "grad_norm": 0.451171875, "learning_rate": 0.0019362521371624958, "loss": 0.2587, "step": 12452 }, { "epoch": 0.022081971384219034, "grad_norm": 0.578125, "learning_rate": 0.0019362301339619968, "loss": 0.2146, "step": 12454 }, { "epoch": 0.02208551754952885, "grad_norm": 1.59375, "learning_rate": 0.0019362081271043126, "loss": 0.2371, "step": 12456 }, { "epoch": 0.022089063714838663, "grad_norm": 0.69921875, "learning_rate": 0.0019361861165895398, "loss": 0.186, "step": 12458 }, { "epoch": 0.022092609880148478, "grad_norm": 0.34375, "learning_rate": 0.0019361641024177748, "loss": 0.167, "step": 12460 }, { "epoch": 0.022096156045458292, "grad_norm": 0.56640625, "learning_rate": 0.0019361420845891136, "loss": 0.2413, "step": 12462 }, { "epoch": 0.022099702210768107, "grad_norm": 0.66796875, "learning_rate": 0.0019361200631036527, "loss": 0.2264, "step": 12464 }, { "epoch": 0.022103248376077925, "grad_norm": 0.326171875, "learning_rate": 0.0019360980379614884, "loss": 0.2081, "step": 12466 }, { "epoch": 0.02210679454138774, "grad_norm": 0.373046875, "learning_rate": 0.0019360760091627165, "loss": 0.2218, "step": 12468 }, { "epoch": 0.022110340706697554, "grad_norm": 0.21875, "learning_rate": 0.0019360539767074342, "loss": 0.1746, "step": 12470 }, { "epoch": 0.02211388687200737, "grad_norm": 0.7890625, "learning_rate": 0.001936031940595737, "loss": 0.2122, "step": 12472 }, { "epoch": 0.022117433037317183, "grad_norm": 0.77734375, "learning_rate": 0.0019360099008277221, "loss": 0.2285, "step": 12474 }, { "epoch": 0.022120979202627, "grad_norm": 1.3125, "learning_rate": 0.0019359878574034856, "loss": 0.3159, "step": 12476 }, { "epoch": 0.022124525367936815, "grad_norm": 0.4921875, "learning_rate": 0.0019359658103231237, "loss": 0.2407, "step": 12478 }, { "epoch": 0.02212807153324663, "grad_norm": 0.365234375, "learning_rate": 0.0019359437595867328, "loss": 0.21, "step": 12480 }, { "epoch": 0.022131617698556445, "grad_norm": 1.2734375, "learning_rate": 0.0019359217051944095, "loss": 0.2898, "step": 12482 }, { "epoch": 0.02213516386386626, "grad_norm": 1.25, "learning_rate": 0.0019358996471462505, "loss": 0.4589, "step": 12484 }, { "epoch": 0.022138710029176074, "grad_norm": 0.294921875, "learning_rate": 0.0019358775854423516, "loss": 0.1939, "step": 12486 }, { "epoch": 0.02214225619448589, "grad_norm": 2.765625, "learning_rate": 0.00193585552008281, "loss": 0.3736, "step": 12488 }, { "epoch": 0.022145802359795706, "grad_norm": 0.357421875, "learning_rate": 0.0019358334510677217, "loss": 0.272, "step": 12490 }, { "epoch": 0.02214934852510552, "grad_norm": 0.478515625, "learning_rate": 0.0019358113783971831, "loss": 0.2606, "step": 12492 }, { "epoch": 0.022152894690415335, "grad_norm": 3.109375, "learning_rate": 0.0019357893020712914, "loss": 0.4371, "step": 12494 }, { "epoch": 0.02215644085572515, "grad_norm": 0.267578125, "learning_rate": 0.0019357672220901428, "loss": 0.2283, "step": 12496 }, { "epoch": 0.022159987021034964, "grad_norm": 0.2578125, "learning_rate": 0.001935745138453834, "loss": 0.2839, "step": 12498 }, { "epoch": 0.022163533186344783, "grad_norm": 1.4140625, "learning_rate": 0.0019357230511624609, "loss": 0.3252, "step": 12500 }, { "epoch": 0.022167079351654597, "grad_norm": 0.35546875, "learning_rate": 0.001935700960216121, "loss": 0.2162, "step": 12502 }, { "epoch": 0.02217062551696441, "grad_norm": 0.54296875, "learning_rate": 0.0019356788656149104, "loss": 0.2722, "step": 12504 }, { "epoch": 0.022174171682274226, "grad_norm": 0.34375, "learning_rate": 0.0019356567673589258, "loss": 0.2602, "step": 12506 }, { "epoch": 0.02217771784758404, "grad_norm": 1.5703125, "learning_rate": 0.001935634665448264, "loss": 0.3855, "step": 12508 }, { "epoch": 0.02218126401289386, "grad_norm": 0.26953125, "learning_rate": 0.0019356125598830216, "loss": 0.2396, "step": 12510 }, { "epoch": 0.022184810178203673, "grad_norm": 0.51953125, "learning_rate": 0.0019355904506632954, "loss": 0.2376, "step": 12512 }, { "epoch": 0.022188356343513488, "grad_norm": 0.91015625, "learning_rate": 0.0019355683377891815, "loss": 0.3056, "step": 12514 }, { "epoch": 0.022191902508823302, "grad_norm": 1.28125, "learning_rate": 0.0019355462212607775, "loss": 0.2295, "step": 12516 }, { "epoch": 0.022195448674133117, "grad_norm": 0.296875, "learning_rate": 0.0019355241010781793, "loss": 0.2017, "step": 12518 }, { "epoch": 0.02219899483944293, "grad_norm": 0.2333984375, "learning_rate": 0.0019355019772414844, "loss": 0.225, "step": 12520 }, { "epoch": 0.02220254100475275, "grad_norm": 0.31640625, "learning_rate": 0.0019354798497507888, "loss": 0.214, "step": 12522 }, { "epoch": 0.022206087170062564, "grad_norm": 1.1796875, "learning_rate": 0.00193545771860619, "loss": 0.3216, "step": 12524 }, { "epoch": 0.02220963333537238, "grad_norm": 1.296875, "learning_rate": 0.001935435583807784, "loss": 0.2132, "step": 12526 }, { "epoch": 0.022213179500682193, "grad_norm": 0.3984375, "learning_rate": 0.0019354134453556681, "loss": 0.2195, "step": 12528 }, { "epoch": 0.022216725665992008, "grad_norm": 0.212890625, "learning_rate": 0.0019353913032499393, "loss": 0.197, "step": 12530 }, { "epoch": 0.022220271831301822, "grad_norm": 0.30859375, "learning_rate": 0.0019353691574906942, "loss": 0.3062, "step": 12532 }, { "epoch": 0.02222381799661164, "grad_norm": 0.39453125, "learning_rate": 0.0019353470080780296, "loss": 0.3812, "step": 12534 }, { "epoch": 0.022227364161921455, "grad_norm": 0.6171875, "learning_rate": 0.0019353248550120423, "loss": 0.4081, "step": 12536 }, { "epoch": 0.02223091032723127, "grad_norm": 0.47265625, "learning_rate": 0.0019353026982928294, "loss": 0.214, "step": 12538 }, { "epoch": 0.022234456492541084, "grad_norm": 0.287109375, "learning_rate": 0.0019352805379204876, "loss": 0.2167, "step": 12540 }, { "epoch": 0.0222380026578509, "grad_norm": 3.171875, "learning_rate": 0.001935258373895114, "loss": 0.2288, "step": 12542 }, { "epoch": 0.022241548823160717, "grad_norm": 1.1015625, "learning_rate": 0.0019352362062168055, "loss": 0.2162, "step": 12544 }, { "epoch": 0.02224509498847053, "grad_norm": 1.0234375, "learning_rate": 0.0019352140348856588, "loss": 0.2131, "step": 12546 }, { "epoch": 0.022248641153780346, "grad_norm": 0.396484375, "learning_rate": 0.0019351918599017713, "loss": 0.1651, "step": 12548 }, { "epoch": 0.02225218731909016, "grad_norm": 1.140625, "learning_rate": 0.0019351696812652396, "loss": 0.2031, "step": 12550 }, { "epoch": 0.022255733484399975, "grad_norm": 1.0, "learning_rate": 0.0019351474989761609, "loss": 0.2361, "step": 12552 }, { "epoch": 0.02225927964970979, "grad_norm": 0.734375, "learning_rate": 0.001935125313034632, "loss": 0.2252, "step": 12554 }, { "epoch": 0.022262825815019607, "grad_norm": 0.50390625, "learning_rate": 0.0019351031234407501, "loss": 0.2152, "step": 12556 }, { "epoch": 0.022266371980329422, "grad_norm": 0.28125, "learning_rate": 0.0019350809301946126, "loss": 0.2363, "step": 12558 }, { "epoch": 0.022269918145639236, "grad_norm": 0.796875, "learning_rate": 0.001935058733296316, "loss": 0.2747, "step": 12560 }, { "epoch": 0.02227346431094905, "grad_norm": 1.0859375, "learning_rate": 0.0019350365327459572, "loss": 0.2693, "step": 12562 }, { "epoch": 0.022277010476258866, "grad_norm": 0.384765625, "learning_rate": 0.001935014328543634, "loss": 0.2107, "step": 12564 }, { "epoch": 0.02228055664156868, "grad_norm": 0.515625, "learning_rate": 0.001934992120689443, "loss": 0.2243, "step": 12566 }, { "epoch": 0.022284102806878498, "grad_norm": 0.373046875, "learning_rate": 0.001934969909183482, "loss": 0.1968, "step": 12568 }, { "epoch": 0.022287648972188313, "grad_norm": 1.4140625, "learning_rate": 0.0019349476940258468, "loss": 0.2083, "step": 12570 }, { "epoch": 0.022291195137498127, "grad_norm": 0.5234375, "learning_rate": 0.0019349254752166359, "loss": 0.2048, "step": 12572 }, { "epoch": 0.022294741302807942, "grad_norm": 0.310546875, "learning_rate": 0.0019349032527559457, "loss": 0.2374, "step": 12574 }, { "epoch": 0.022298287468117756, "grad_norm": 0.33203125, "learning_rate": 0.0019348810266438738, "loss": 0.2298, "step": 12576 }, { "epoch": 0.022301833633427574, "grad_norm": 0.2080078125, "learning_rate": 0.0019348587968805173, "loss": 0.2126, "step": 12578 }, { "epoch": 0.02230537979873739, "grad_norm": 0.265625, "learning_rate": 0.001934836563465973, "loss": 0.2324, "step": 12580 }, { "epoch": 0.022308925964047203, "grad_norm": 0.1689453125, "learning_rate": 0.001934814326400339, "loss": 0.1943, "step": 12582 }, { "epoch": 0.022312472129357018, "grad_norm": 0.439453125, "learning_rate": 0.0019347920856837117, "loss": 0.1817, "step": 12584 }, { "epoch": 0.022316018294666833, "grad_norm": 0.78125, "learning_rate": 0.001934769841316189, "loss": 0.1601, "step": 12586 }, { "epoch": 0.022319564459976647, "grad_norm": 1.4765625, "learning_rate": 0.0019347475932978678, "loss": 0.2859, "step": 12588 }, { "epoch": 0.022323110625286465, "grad_norm": 0.5546875, "learning_rate": 0.0019347253416288452, "loss": 0.2455, "step": 12590 }, { "epoch": 0.02232665679059628, "grad_norm": 0.75390625, "learning_rate": 0.0019347030863092192, "loss": 0.3191, "step": 12592 }, { "epoch": 0.022330202955906094, "grad_norm": 3.296875, "learning_rate": 0.0019346808273390866, "loss": 0.4205, "step": 12594 }, { "epoch": 0.02233374912121591, "grad_norm": 0.55859375, "learning_rate": 0.001934658564718545, "loss": 0.2514, "step": 12596 }, { "epoch": 0.022337295286525723, "grad_norm": 0.53125, "learning_rate": 0.0019346362984476917, "loss": 0.2415, "step": 12598 }, { "epoch": 0.022340841451835538, "grad_norm": 0.62890625, "learning_rate": 0.001934614028526624, "loss": 0.4898, "step": 12600 }, { "epoch": 0.022344387617145356, "grad_norm": 0.2890625, "learning_rate": 0.0019345917549554394, "loss": 0.2184, "step": 12602 }, { "epoch": 0.02234793378245517, "grad_norm": 0.4140625, "learning_rate": 0.0019345694777342351, "loss": 0.2711, "step": 12604 }, { "epoch": 0.022351479947764985, "grad_norm": 0.357421875, "learning_rate": 0.0019345471968631087, "loss": 0.2013, "step": 12606 }, { "epoch": 0.0223550261130748, "grad_norm": 2.265625, "learning_rate": 0.0019345249123421575, "loss": 0.1935, "step": 12608 }, { "epoch": 0.022358572278384614, "grad_norm": 0.33203125, "learning_rate": 0.0019345026241714793, "loss": 0.2628, "step": 12610 }, { "epoch": 0.022362118443694432, "grad_norm": 0.80078125, "learning_rate": 0.0019344803323511715, "loss": 0.2409, "step": 12612 }, { "epoch": 0.022365664609004247, "grad_norm": 0.86328125, "learning_rate": 0.0019344580368813311, "loss": 0.3114, "step": 12614 }, { "epoch": 0.02236921077431406, "grad_norm": 0.27734375, "learning_rate": 0.001934435737762056, "loss": 0.2277, "step": 12616 }, { "epoch": 0.022372756939623876, "grad_norm": 0.1689453125, "learning_rate": 0.0019344134349934439, "loss": 0.2605, "step": 12618 }, { "epoch": 0.02237630310493369, "grad_norm": 0.95703125, "learning_rate": 0.001934391128575592, "loss": 0.3065, "step": 12620 }, { "epoch": 0.022379849270243505, "grad_norm": 1.375, "learning_rate": 0.0019343688185085978, "loss": 0.1976, "step": 12622 }, { "epoch": 0.022383395435553323, "grad_norm": 0.90234375, "learning_rate": 0.0019343465047925593, "loss": 0.2209, "step": 12624 }, { "epoch": 0.022386941600863138, "grad_norm": 0.28515625, "learning_rate": 0.0019343241874275737, "loss": 0.1795, "step": 12626 }, { "epoch": 0.022390487766172952, "grad_norm": 0.435546875, "learning_rate": 0.0019343018664137387, "loss": 0.235, "step": 12628 }, { "epoch": 0.022394033931482767, "grad_norm": 0.97265625, "learning_rate": 0.0019342795417511523, "loss": 0.1741, "step": 12630 }, { "epoch": 0.02239758009679258, "grad_norm": 0.376953125, "learning_rate": 0.0019342572134399114, "loss": 0.2232, "step": 12632 }, { "epoch": 0.022401126262102396, "grad_norm": 0.6171875, "learning_rate": 0.0019342348814801143, "loss": 0.2044, "step": 12634 }, { "epoch": 0.022404672427412214, "grad_norm": 0.83203125, "learning_rate": 0.0019342125458718584, "loss": 0.307, "step": 12636 }, { "epoch": 0.02240821859272203, "grad_norm": 2.875, "learning_rate": 0.001934190206615241, "loss": 0.2746, "step": 12638 }, { "epoch": 0.022411764758031843, "grad_norm": 0.70703125, "learning_rate": 0.0019341678637103605, "loss": 0.2264, "step": 12640 }, { "epoch": 0.022415310923341657, "grad_norm": 0.400390625, "learning_rate": 0.0019341455171573143, "loss": 0.2504, "step": 12642 }, { "epoch": 0.022418857088651472, "grad_norm": 0.88671875, "learning_rate": 0.0019341231669562002, "loss": 0.2102, "step": 12644 }, { "epoch": 0.02242240325396129, "grad_norm": 0.60546875, "learning_rate": 0.0019341008131071162, "loss": 0.1765, "step": 12646 }, { "epoch": 0.022425949419271105, "grad_norm": 0.9296875, "learning_rate": 0.0019340784556101589, "loss": 0.251, "step": 12648 }, { "epoch": 0.02242949558458092, "grad_norm": 1.015625, "learning_rate": 0.0019340560944654274, "loss": 0.2228, "step": 12650 }, { "epoch": 0.022433041749890734, "grad_norm": 0.58203125, "learning_rate": 0.001934033729673019, "loss": 0.2078, "step": 12652 }, { "epoch": 0.022436587915200548, "grad_norm": 0.75, "learning_rate": 0.0019340113612330316, "loss": 0.3198, "step": 12654 }, { "epoch": 0.022440134080510363, "grad_norm": 0.57421875, "learning_rate": 0.0019339889891455632, "loss": 0.2347, "step": 12656 }, { "epoch": 0.02244368024582018, "grad_norm": 0.365234375, "learning_rate": 0.001933966613410711, "loss": 0.245, "step": 12658 }, { "epoch": 0.022447226411129995, "grad_norm": 0.373046875, "learning_rate": 0.0019339442340285734, "loss": 0.3191, "step": 12660 }, { "epoch": 0.02245077257643981, "grad_norm": 0.32421875, "learning_rate": 0.0019339218509992478, "loss": 0.1804, "step": 12662 }, { "epoch": 0.022454318741749624, "grad_norm": 0.7421875, "learning_rate": 0.001933899464322833, "loss": 0.3906, "step": 12664 }, { "epoch": 0.02245786490705944, "grad_norm": 0.2236328125, "learning_rate": 0.0019338770739994258, "loss": 0.2101, "step": 12666 }, { "epoch": 0.022461411072369254, "grad_norm": 0.328125, "learning_rate": 0.001933854680029125, "loss": 0.2814, "step": 12668 }, { "epoch": 0.02246495723767907, "grad_norm": 0.45703125, "learning_rate": 0.001933832282412028, "loss": 0.2235, "step": 12670 }, { "epoch": 0.022468503402988886, "grad_norm": 0.54296875, "learning_rate": 0.001933809881148233, "loss": 0.2009, "step": 12672 }, { "epoch": 0.0224720495682987, "grad_norm": 0.5078125, "learning_rate": 0.0019337874762378378, "loss": 0.1984, "step": 12674 }, { "epoch": 0.022475595733608515, "grad_norm": 0.609375, "learning_rate": 0.0019337650676809408, "loss": 0.2714, "step": 12676 }, { "epoch": 0.02247914189891833, "grad_norm": 1.03125, "learning_rate": 0.0019337426554776396, "loss": 0.1851, "step": 12678 }, { "epoch": 0.022482688064228148, "grad_norm": 0.263671875, "learning_rate": 0.001933720239628032, "loss": 0.1721, "step": 12680 }, { "epoch": 0.022486234229537962, "grad_norm": 0.4375, "learning_rate": 0.0019336978201322168, "loss": 0.232, "step": 12682 }, { "epoch": 0.022489780394847777, "grad_norm": 0.396484375, "learning_rate": 0.0019336753969902915, "loss": 0.1737, "step": 12684 }, { "epoch": 0.02249332656015759, "grad_norm": 0.6953125, "learning_rate": 0.001933652970202354, "loss": 0.3207, "step": 12686 }, { "epoch": 0.022496872725467406, "grad_norm": 1.1484375, "learning_rate": 0.001933630539768503, "loss": 0.2332, "step": 12688 }, { "epoch": 0.02250041889077722, "grad_norm": 0.40234375, "learning_rate": 0.001933608105688836, "loss": 0.2303, "step": 12690 }, { "epoch": 0.02250396505608704, "grad_norm": 0.75, "learning_rate": 0.0019335856679634515, "loss": 0.1993, "step": 12692 }, { "epoch": 0.022507511221396853, "grad_norm": 0.7109375, "learning_rate": 0.0019335632265924474, "loss": 0.2327, "step": 12694 }, { "epoch": 0.022511057386706668, "grad_norm": 0.77734375, "learning_rate": 0.0019335407815759221, "loss": 0.2747, "step": 12696 }, { "epoch": 0.022514603552016482, "grad_norm": 0.6796875, "learning_rate": 0.0019335183329139735, "loss": 0.2459, "step": 12698 }, { "epoch": 0.022518149717326297, "grad_norm": 1.0703125, "learning_rate": 0.0019334958806067, "loss": 0.2798, "step": 12700 }, { "epoch": 0.02252169588263611, "grad_norm": 0.890625, "learning_rate": 0.0019334734246541992, "loss": 0.1991, "step": 12702 }, { "epoch": 0.02252524204794593, "grad_norm": 0.51171875, "learning_rate": 0.0019334509650565702, "loss": 0.24, "step": 12704 }, { "epoch": 0.022528788213255744, "grad_norm": 2.109375, "learning_rate": 0.0019334285018139108, "loss": 0.2288, "step": 12706 }, { "epoch": 0.02253233437856556, "grad_norm": 0.51171875, "learning_rate": 0.001933406034926319, "loss": 0.2194, "step": 12708 }, { "epoch": 0.022535880543875373, "grad_norm": 2.53125, "learning_rate": 0.0019333835643938937, "loss": 0.2483, "step": 12710 }, { "epoch": 0.022539426709185188, "grad_norm": 0.62109375, "learning_rate": 0.0019333610902167325, "loss": 0.2336, "step": 12712 }, { "epoch": 0.022542972874495006, "grad_norm": 1.3046875, "learning_rate": 0.001933338612394934, "loss": 0.2901, "step": 12714 }, { "epoch": 0.02254651903980482, "grad_norm": 0.640625, "learning_rate": 0.0019333161309285966, "loss": 0.268, "step": 12716 }, { "epoch": 0.022550065205114635, "grad_norm": 0.466796875, "learning_rate": 0.001933293645817818, "loss": 0.2504, "step": 12718 }, { "epoch": 0.02255361137042445, "grad_norm": 0.490234375, "learning_rate": 0.0019332711570626973, "loss": 0.1955, "step": 12720 }, { "epoch": 0.022557157535734264, "grad_norm": 0.322265625, "learning_rate": 0.0019332486646633327, "loss": 0.1811, "step": 12722 }, { "epoch": 0.02256070370104408, "grad_norm": 0.85546875, "learning_rate": 0.0019332261686198224, "loss": 0.3394, "step": 12724 }, { "epoch": 0.022564249866353896, "grad_norm": 0.58203125, "learning_rate": 0.001933203668932265, "loss": 0.2198, "step": 12726 }, { "epoch": 0.02256779603166371, "grad_norm": 0.96484375, "learning_rate": 0.0019331811656007588, "loss": 0.2544, "step": 12728 }, { "epoch": 0.022571342196973525, "grad_norm": 0.8515625, "learning_rate": 0.0019331586586254018, "loss": 0.2869, "step": 12730 }, { "epoch": 0.02257488836228334, "grad_norm": 2.03125, "learning_rate": 0.0019331361480062928, "loss": 0.1893, "step": 12732 }, { "epoch": 0.022578434527593155, "grad_norm": 0.8671875, "learning_rate": 0.0019331136337435305, "loss": 0.2712, "step": 12734 }, { "epoch": 0.02258198069290297, "grad_norm": 1.234375, "learning_rate": 0.001933091115837213, "loss": 0.2521, "step": 12736 }, { "epoch": 0.022585526858212787, "grad_norm": 0.7578125, "learning_rate": 0.001933068594287439, "loss": 0.1989, "step": 12738 }, { "epoch": 0.0225890730235226, "grad_norm": 0.2216796875, "learning_rate": 0.0019330460690943066, "loss": 0.3316, "step": 12740 }, { "epoch": 0.022592619188832416, "grad_norm": 0.52734375, "learning_rate": 0.0019330235402579147, "loss": 0.2238, "step": 12742 }, { "epoch": 0.02259616535414223, "grad_norm": 0.890625, "learning_rate": 0.0019330010077783615, "loss": 0.2815, "step": 12744 }, { "epoch": 0.022599711519452045, "grad_norm": 6.75, "learning_rate": 0.0019329784716557458, "loss": 0.392, "step": 12746 }, { "epoch": 0.022603257684761863, "grad_norm": 0.546875, "learning_rate": 0.0019329559318901665, "loss": 0.2155, "step": 12748 }, { "epoch": 0.022606803850071678, "grad_norm": 0.44921875, "learning_rate": 0.0019329333884817216, "loss": 0.2363, "step": 12750 }, { "epoch": 0.022610350015381492, "grad_norm": 0.466796875, "learning_rate": 0.0019329108414305098, "loss": 0.1871, "step": 12752 }, { "epoch": 0.022613896180691307, "grad_norm": 0.6484375, "learning_rate": 0.0019328882907366299, "loss": 0.223, "step": 12754 }, { "epoch": 0.02261744234600112, "grad_norm": 0.62890625, "learning_rate": 0.0019328657364001802, "loss": 0.227, "step": 12756 }, { "epoch": 0.022620988511310936, "grad_norm": 1.046875, "learning_rate": 0.0019328431784212598, "loss": 0.2606, "step": 12758 }, { "epoch": 0.022624534676620754, "grad_norm": 0.54296875, "learning_rate": 0.001932820616799967, "loss": 0.2807, "step": 12760 }, { "epoch": 0.02262808084193057, "grad_norm": 0.69921875, "learning_rate": 0.0019327980515364005, "loss": 0.2303, "step": 12762 }, { "epoch": 0.022631627007240383, "grad_norm": 0.65625, "learning_rate": 0.0019327754826306593, "loss": 0.2385, "step": 12764 }, { "epoch": 0.022635173172550198, "grad_norm": 0.98046875, "learning_rate": 0.0019327529100828418, "loss": 0.2364, "step": 12766 }, { "epoch": 0.022638719337860012, "grad_norm": 0.5859375, "learning_rate": 0.0019327303338930466, "loss": 0.179, "step": 12768 }, { "epoch": 0.022642265503169827, "grad_norm": 0.609375, "learning_rate": 0.0019327077540613727, "loss": 0.2257, "step": 12770 }, { "epoch": 0.022645811668479645, "grad_norm": 0.5859375, "learning_rate": 0.001932685170587919, "loss": 0.2471, "step": 12772 }, { "epoch": 0.02264935783378946, "grad_norm": 3.359375, "learning_rate": 0.0019326625834727837, "loss": 0.2343, "step": 12774 }, { "epoch": 0.022652903999099274, "grad_norm": 2.140625, "learning_rate": 0.0019326399927160661, "loss": 0.3141, "step": 12776 }, { "epoch": 0.02265645016440909, "grad_norm": 0.451171875, "learning_rate": 0.001932617398317865, "loss": 0.1911, "step": 12778 }, { "epoch": 0.022659996329718903, "grad_norm": 0.89453125, "learning_rate": 0.001932594800278279, "loss": 0.2355, "step": 12780 }, { "epoch": 0.02266354249502872, "grad_norm": 2.078125, "learning_rate": 0.0019325721985974068, "loss": 0.3804, "step": 12782 }, { "epoch": 0.022667088660338536, "grad_norm": 0.357421875, "learning_rate": 0.0019325495932753475, "loss": 0.232, "step": 12784 }, { "epoch": 0.02267063482564835, "grad_norm": 0.392578125, "learning_rate": 0.0019325269843121998, "loss": 0.2189, "step": 12786 }, { "epoch": 0.022674180990958165, "grad_norm": 3.703125, "learning_rate": 0.0019325043717080626, "loss": 0.4226, "step": 12788 }, { "epoch": 0.02267772715626798, "grad_norm": 0.439453125, "learning_rate": 0.0019324817554630352, "loss": 0.2359, "step": 12790 }, { "epoch": 0.022681273321577794, "grad_norm": 0.66015625, "learning_rate": 0.0019324591355772156, "loss": 0.25, "step": 12792 }, { "epoch": 0.022684819486887612, "grad_norm": 2.390625, "learning_rate": 0.0019324365120507038, "loss": 0.2969, "step": 12794 }, { "epoch": 0.022688365652197427, "grad_norm": 0.38671875, "learning_rate": 0.001932413884883598, "loss": 0.1732, "step": 12796 }, { "epoch": 0.02269191181750724, "grad_norm": 0.2314453125, "learning_rate": 0.0019323912540759973, "loss": 0.2004, "step": 12798 }, { "epoch": 0.022695457982817056, "grad_norm": 0.255859375, "learning_rate": 0.001932368619628001, "loss": 0.3216, "step": 12800 }, { "epoch": 0.02269900414812687, "grad_norm": 0.9375, "learning_rate": 0.0019323459815397078, "loss": 0.2472, "step": 12802 }, { "epoch": 0.022702550313436685, "grad_norm": 0.63671875, "learning_rate": 0.0019323233398112168, "loss": 0.2268, "step": 12804 }, { "epoch": 0.022706096478746503, "grad_norm": 0.79296875, "learning_rate": 0.0019323006944426267, "loss": 0.2901, "step": 12806 }, { "epoch": 0.022709642644056317, "grad_norm": 0.88671875, "learning_rate": 0.0019322780454340371, "loss": 0.2921, "step": 12808 }, { "epoch": 0.022713188809366132, "grad_norm": 0.431640625, "learning_rate": 0.0019322553927855468, "loss": 0.188, "step": 12810 }, { "epoch": 0.022716734974675946, "grad_norm": 0.921875, "learning_rate": 0.0019322327364972548, "loss": 0.1867, "step": 12812 }, { "epoch": 0.02272028113998576, "grad_norm": 0.796875, "learning_rate": 0.00193221007656926, "loss": 0.279, "step": 12814 }, { "epoch": 0.02272382730529558, "grad_norm": 0.275390625, "learning_rate": 0.0019321874130016619, "loss": 0.2007, "step": 12816 }, { "epoch": 0.022727373470605394, "grad_norm": 0.87890625, "learning_rate": 0.0019321647457945595, "loss": 0.2951, "step": 12818 }, { "epoch": 0.022730919635915208, "grad_norm": 0.41015625, "learning_rate": 0.0019321420749480519, "loss": 0.1994, "step": 12820 }, { "epoch": 0.022734465801225023, "grad_norm": 0.671875, "learning_rate": 0.0019321194004622382, "loss": 0.1762, "step": 12822 }, { "epoch": 0.022738011966534837, "grad_norm": 4.3125, "learning_rate": 0.0019320967223372175, "loss": 0.3836, "step": 12824 }, { "epoch": 0.022741558131844652, "grad_norm": 0.68359375, "learning_rate": 0.0019320740405730891, "loss": 0.246, "step": 12826 }, { "epoch": 0.02274510429715447, "grad_norm": 0.53515625, "learning_rate": 0.0019320513551699524, "loss": 0.1992, "step": 12828 }, { "epoch": 0.022748650462464284, "grad_norm": 0.98828125, "learning_rate": 0.001932028666127906, "loss": 0.2541, "step": 12830 }, { "epoch": 0.0227521966277741, "grad_norm": 0.369140625, "learning_rate": 0.0019320059734470496, "loss": 0.2374, "step": 12832 }, { "epoch": 0.022755742793083913, "grad_norm": 0.88671875, "learning_rate": 0.0019319832771274826, "loss": 0.1668, "step": 12834 }, { "epoch": 0.022759288958393728, "grad_norm": 0.71875, "learning_rate": 0.0019319605771693038, "loss": 0.3272, "step": 12836 }, { "epoch": 0.022762835123703543, "grad_norm": 1.6796875, "learning_rate": 0.0019319378735726128, "loss": 0.2246, "step": 12838 }, { "epoch": 0.02276638128901336, "grad_norm": 0.40625, "learning_rate": 0.0019319151663375088, "loss": 0.2337, "step": 12840 }, { "epoch": 0.022769927454323175, "grad_norm": 2.65625, "learning_rate": 0.001931892455464091, "loss": 0.2668, "step": 12842 }, { "epoch": 0.02277347361963299, "grad_norm": 0.2236328125, "learning_rate": 0.001931869740952459, "loss": 0.247, "step": 12844 }, { "epoch": 0.022777019784942804, "grad_norm": 7.03125, "learning_rate": 0.0019318470228027115, "loss": 0.1666, "step": 12846 }, { "epoch": 0.02278056595025262, "grad_norm": 0.486328125, "learning_rate": 0.001931824301014949, "loss": 0.1994, "step": 12848 }, { "epoch": 0.022784112115562437, "grad_norm": 1.65625, "learning_rate": 0.00193180157558927, "loss": 0.2013, "step": 12850 }, { "epoch": 0.02278765828087225, "grad_norm": 0.625, "learning_rate": 0.0019317788465257738, "loss": 0.2737, "step": 12852 }, { "epoch": 0.022791204446182066, "grad_norm": 0.640625, "learning_rate": 0.00193175611382456, "loss": 0.2346, "step": 12854 }, { "epoch": 0.02279475061149188, "grad_norm": 1.0546875, "learning_rate": 0.0019317333774857284, "loss": 0.2299, "step": 12856 }, { "epoch": 0.022798296776801695, "grad_norm": 0.40625, "learning_rate": 0.001931710637509378, "loss": 0.2206, "step": 12858 }, { "epoch": 0.02280184294211151, "grad_norm": 2.421875, "learning_rate": 0.0019316878938956084, "loss": 0.2176, "step": 12860 }, { "epoch": 0.022805389107421328, "grad_norm": 1.6328125, "learning_rate": 0.0019316651466445194, "loss": 0.3634, "step": 12862 }, { "epoch": 0.022808935272731142, "grad_norm": 0.78125, "learning_rate": 0.0019316423957562096, "loss": 0.2359, "step": 12864 }, { "epoch": 0.022812481438040957, "grad_norm": 0.92578125, "learning_rate": 0.0019316196412307793, "loss": 0.2436, "step": 12866 }, { "epoch": 0.02281602760335077, "grad_norm": 0.69140625, "learning_rate": 0.0019315968830683277, "loss": 0.212, "step": 12868 }, { "epoch": 0.022819573768660586, "grad_norm": 0.55078125, "learning_rate": 0.0019315741212689544, "loss": 0.2726, "step": 12870 }, { "epoch": 0.0228231199339704, "grad_norm": 0.451171875, "learning_rate": 0.001931551355832759, "loss": 0.1419, "step": 12872 }, { "epoch": 0.02282666609928022, "grad_norm": 0.5625, "learning_rate": 0.0019315285867598409, "loss": 0.1984, "step": 12874 }, { "epoch": 0.022830212264590033, "grad_norm": 0.81640625, "learning_rate": 0.0019315058140502996, "loss": 0.2207, "step": 12876 }, { "epoch": 0.022833758429899847, "grad_norm": 0.61328125, "learning_rate": 0.0019314830377042352, "loss": 0.1625, "step": 12878 }, { "epoch": 0.022837304595209662, "grad_norm": 0.212890625, "learning_rate": 0.0019314602577217468, "loss": 0.1847, "step": 12880 }, { "epoch": 0.022840850760519477, "grad_norm": 0.28125, "learning_rate": 0.001931437474102934, "loss": 0.2147, "step": 12882 }, { "epoch": 0.022844396925829295, "grad_norm": 0.337890625, "learning_rate": 0.001931414686847897, "loss": 0.3228, "step": 12884 }, { "epoch": 0.02284794309113911, "grad_norm": 1.0859375, "learning_rate": 0.001931391895956735, "loss": 0.1632, "step": 12886 }, { "epoch": 0.022851489256448924, "grad_norm": 0.6484375, "learning_rate": 0.0019313691014295478, "loss": 0.2619, "step": 12888 }, { "epoch": 0.02285503542175874, "grad_norm": 0.4296875, "learning_rate": 0.0019313463032664352, "loss": 0.1789, "step": 12890 }, { "epoch": 0.022858581587068553, "grad_norm": 0.703125, "learning_rate": 0.0019313235014674966, "loss": 0.2279, "step": 12892 }, { "epoch": 0.022862127752378367, "grad_norm": 0.65234375, "learning_rate": 0.0019313006960328318, "loss": 0.2073, "step": 12894 }, { "epoch": 0.022865673917688185, "grad_norm": 0.62890625, "learning_rate": 0.0019312778869625404, "loss": 0.251, "step": 12896 }, { "epoch": 0.022869220082998, "grad_norm": 1.078125, "learning_rate": 0.001931255074256723, "loss": 0.3197, "step": 12898 }, { "epoch": 0.022872766248307815, "grad_norm": 1.5625, "learning_rate": 0.0019312322579154784, "loss": 0.2321, "step": 12900 }, { "epoch": 0.02287631241361763, "grad_norm": 0.44140625, "learning_rate": 0.0019312094379389066, "loss": 0.2021, "step": 12902 }, { "epoch": 0.022879858578927444, "grad_norm": 0.80859375, "learning_rate": 0.001931186614327108, "loss": 0.2541, "step": 12904 }, { "epoch": 0.022883404744237258, "grad_norm": 0.65625, "learning_rate": 0.0019311637870801815, "loss": 0.2278, "step": 12906 }, { "epoch": 0.022886950909547076, "grad_norm": 1.390625, "learning_rate": 0.0019311409561982276, "loss": 0.4176, "step": 12908 }, { "epoch": 0.02289049707485689, "grad_norm": 1.1484375, "learning_rate": 0.001931118121681346, "loss": 0.2303, "step": 12910 }, { "epoch": 0.022894043240166705, "grad_norm": 0.96875, "learning_rate": 0.0019310952835296365, "loss": 0.1868, "step": 12912 }, { "epoch": 0.02289758940547652, "grad_norm": 5.625, "learning_rate": 0.001931072441743199, "loss": 0.2724, "step": 12914 }, { "epoch": 0.022901135570786334, "grad_norm": 3.296875, "learning_rate": 0.0019310495963221334, "loss": 0.4192, "step": 12916 }, { "epoch": 0.022904681736096152, "grad_norm": 0.9375, "learning_rate": 0.0019310267472665392, "loss": 0.251, "step": 12918 }, { "epoch": 0.022908227901405967, "grad_norm": 0.490234375, "learning_rate": 0.0019310038945765172, "loss": 0.2704, "step": 12920 }, { "epoch": 0.02291177406671578, "grad_norm": 0.78515625, "learning_rate": 0.0019309810382521666, "loss": 0.2699, "step": 12922 }, { "epoch": 0.022915320232025596, "grad_norm": 1.1328125, "learning_rate": 0.0019309581782935876, "loss": 0.2919, "step": 12924 }, { "epoch": 0.02291886639733541, "grad_norm": 0.482421875, "learning_rate": 0.0019309353147008805, "loss": 0.1962, "step": 12926 }, { "epoch": 0.022922412562645225, "grad_norm": 4.84375, "learning_rate": 0.0019309124474741449, "loss": 0.2168, "step": 12928 }, { "epoch": 0.022925958727955043, "grad_norm": 1.015625, "learning_rate": 0.0019308895766134806, "loss": 0.208, "step": 12930 }, { "epoch": 0.022929504893264858, "grad_norm": 5.0, "learning_rate": 0.0019308667021189884, "loss": 0.2692, "step": 12932 }, { "epoch": 0.022933051058574672, "grad_norm": 0.64453125, "learning_rate": 0.0019308438239907676, "loss": 0.2011, "step": 12934 }, { "epoch": 0.022936597223884487, "grad_norm": 5.125, "learning_rate": 0.0019308209422289186, "loss": 0.2225, "step": 12936 }, { "epoch": 0.0229401433891943, "grad_norm": 2.0625, "learning_rate": 0.0019307980568335414, "loss": 0.3926, "step": 12938 }, { "epoch": 0.022943689554504116, "grad_norm": 1.8125, "learning_rate": 0.0019307751678047358, "loss": 0.3128, "step": 12940 }, { "epoch": 0.022947235719813934, "grad_norm": 0.5390625, "learning_rate": 0.0019307522751426027, "loss": 0.2158, "step": 12942 }, { "epoch": 0.02295078188512375, "grad_norm": 0.875, "learning_rate": 0.0019307293788472415, "loss": 0.2262, "step": 12944 }, { "epoch": 0.022954328050433563, "grad_norm": 1.0859375, "learning_rate": 0.0019307064789187522, "loss": 0.3075, "step": 12946 }, { "epoch": 0.022957874215743378, "grad_norm": 1.1953125, "learning_rate": 0.001930683575357236, "loss": 0.2198, "step": 12948 }, { "epoch": 0.022961420381053192, "grad_norm": 0.640625, "learning_rate": 0.0019306606681627916, "loss": 0.216, "step": 12950 }, { "epoch": 0.02296496654636301, "grad_norm": 0.91796875, "learning_rate": 0.0019306377573355203, "loss": 0.2853, "step": 12952 }, { "epoch": 0.022968512711672825, "grad_norm": 0.515625, "learning_rate": 0.0019306148428755222, "loss": 0.2414, "step": 12954 }, { "epoch": 0.02297205887698264, "grad_norm": 0.9375, "learning_rate": 0.001930591924782897, "loss": 0.2223, "step": 12956 }, { "epoch": 0.022975605042292454, "grad_norm": 0.328125, "learning_rate": 0.001930569003057745, "loss": 0.1855, "step": 12958 }, { "epoch": 0.02297915120760227, "grad_norm": 0.93359375, "learning_rate": 0.0019305460777001666, "loss": 0.2487, "step": 12960 }, { "epoch": 0.022982697372912083, "grad_norm": 0.396484375, "learning_rate": 0.0019305231487102624, "loss": 0.206, "step": 12962 }, { "epoch": 0.0229862435382219, "grad_norm": 0.373046875, "learning_rate": 0.0019305002160881323, "loss": 0.2376, "step": 12964 }, { "epoch": 0.022989789703531716, "grad_norm": 0.75, "learning_rate": 0.0019304772798338769, "loss": 0.2325, "step": 12966 }, { "epoch": 0.02299333586884153, "grad_norm": 0.39453125, "learning_rate": 0.0019304543399475957, "loss": 0.2152, "step": 12968 }, { "epoch": 0.022996882034151345, "grad_norm": 0.294921875, "learning_rate": 0.00193043139642939, "loss": 0.1903, "step": 12970 }, { "epoch": 0.02300042819946116, "grad_norm": 1.1875, "learning_rate": 0.0019304084492793596, "loss": 0.269, "step": 12972 }, { "epoch": 0.023003974364770974, "grad_norm": 1.1328125, "learning_rate": 0.0019303854984976053, "loss": 0.2906, "step": 12974 }, { "epoch": 0.023007520530080792, "grad_norm": 0.390625, "learning_rate": 0.0019303625440842268, "loss": 0.2932, "step": 12976 }, { "epoch": 0.023011066695390606, "grad_norm": 0.58984375, "learning_rate": 0.0019303395860393246, "loss": 0.1753, "step": 12978 }, { "epoch": 0.02301461286070042, "grad_norm": 1.4375, "learning_rate": 0.0019303166243630002, "loss": 0.2325, "step": 12980 }, { "epoch": 0.023018159026010235, "grad_norm": 0.55859375, "learning_rate": 0.0019302936590553526, "loss": 0.3821, "step": 12982 }, { "epoch": 0.02302170519132005, "grad_norm": 2.375, "learning_rate": 0.001930270690116483, "loss": 0.3146, "step": 12984 }, { "epoch": 0.023025251356629868, "grad_norm": 3.296875, "learning_rate": 0.0019302477175464916, "loss": 0.4533, "step": 12986 }, { "epoch": 0.023028797521939683, "grad_norm": 3.28125, "learning_rate": 0.0019302247413454793, "loss": 0.3294, "step": 12988 }, { "epoch": 0.023032343687249497, "grad_norm": 0.5078125, "learning_rate": 0.001930201761513546, "loss": 0.2068, "step": 12990 }, { "epoch": 0.02303588985255931, "grad_norm": 0.93359375, "learning_rate": 0.001930178778050792, "loss": 0.2309, "step": 12992 }, { "epoch": 0.023039436017869126, "grad_norm": 0.578125, "learning_rate": 0.0019301557909573185, "loss": 0.2445, "step": 12994 }, { "epoch": 0.02304298218317894, "grad_norm": 0.5703125, "learning_rate": 0.001930132800233226, "loss": 0.1791, "step": 12996 }, { "epoch": 0.02304652834848876, "grad_norm": 2.359375, "learning_rate": 0.0019301098058786149, "loss": 0.6387, "step": 12998 }, { "epoch": 0.023050074513798573, "grad_norm": 0.75390625, "learning_rate": 0.0019300868078935853, "loss": 0.2871, "step": 13000 }, { "epoch": 0.023053620679108388, "grad_norm": 0.400390625, "learning_rate": 0.0019300638062782385, "loss": 0.2078, "step": 13002 }, { "epoch": 0.023057166844418202, "grad_norm": 0.326171875, "learning_rate": 0.0019300408010326747, "loss": 0.2409, "step": 13004 }, { "epoch": 0.023060713009728017, "grad_norm": 0.65625, "learning_rate": 0.0019300177921569947, "loss": 0.2867, "step": 13006 }, { "epoch": 0.02306425917503783, "grad_norm": 0.97265625, "learning_rate": 0.0019299947796512988, "loss": 0.5307, "step": 13008 }, { "epoch": 0.02306780534034765, "grad_norm": 1.671875, "learning_rate": 0.001929971763515688, "loss": 0.2227, "step": 13010 }, { "epoch": 0.023071351505657464, "grad_norm": 1.2109375, "learning_rate": 0.0019299487437502627, "loss": 0.3788, "step": 13012 }, { "epoch": 0.02307489767096728, "grad_norm": 0.33203125, "learning_rate": 0.0019299257203551237, "loss": 0.2514, "step": 13014 }, { "epoch": 0.023078443836277093, "grad_norm": 3.046875, "learning_rate": 0.0019299026933303717, "loss": 0.2558, "step": 13016 }, { "epoch": 0.023081990001586908, "grad_norm": 1.28125, "learning_rate": 0.0019298796626761076, "loss": 0.2056, "step": 13018 }, { "epoch": 0.023085536166896726, "grad_norm": 0.6328125, "learning_rate": 0.0019298566283924314, "loss": 0.1677, "step": 13020 }, { "epoch": 0.02308908233220654, "grad_norm": 0.5625, "learning_rate": 0.0019298335904794446, "loss": 0.1941, "step": 13022 }, { "epoch": 0.023092628497516355, "grad_norm": 0.60546875, "learning_rate": 0.001929810548937248, "loss": 0.1634, "step": 13024 }, { "epoch": 0.02309617466282617, "grad_norm": 0.484375, "learning_rate": 0.0019297875037659416, "loss": 0.1903, "step": 13026 }, { "epoch": 0.023099720828135984, "grad_norm": 0.71875, "learning_rate": 0.0019297644549656268, "loss": 0.1982, "step": 13028 }, { "epoch": 0.0231032669934458, "grad_norm": 0.7578125, "learning_rate": 0.0019297414025364044, "loss": 0.1951, "step": 13030 }, { "epoch": 0.023106813158755617, "grad_norm": 2.359375, "learning_rate": 0.001929718346478375, "loss": 0.2912, "step": 13032 }, { "epoch": 0.02311035932406543, "grad_norm": 0.3046875, "learning_rate": 0.0019296952867916396, "loss": 0.2001, "step": 13034 }, { "epoch": 0.023113905489375246, "grad_norm": 1.7734375, "learning_rate": 0.001929672223476299, "loss": 0.2551, "step": 13036 }, { "epoch": 0.02311745165468506, "grad_norm": 0.36328125, "learning_rate": 0.001929649156532454, "loss": 0.2102, "step": 13038 }, { "epoch": 0.023120997819994875, "grad_norm": 0.65625, "learning_rate": 0.0019296260859602054, "loss": 0.2956, "step": 13040 }, { "epoch": 0.02312454398530469, "grad_norm": 0.58984375, "learning_rate": 0.001929603011759654, "loss": 0.2675, "step": 13042 }, { "epoch": 0.023128090150614507, "grad_norm": 1.1796875, "learning_rate": 0.001929579933930901, "loss": 0.2036, "step": 13044 }, { "epoch": 0.023131636315924322, "grad_norm": 0.50390625, "learning_rate": 0.0019295568524740475, "loss": 0.1797, "step": 13046 }, { "epoch": 0.023135182481234137, "grad_norm": 0.65234375, "learning_rate": 0.0019295337673891936, "loss": 0.2802, "step": 13048 }, { "epoch": 0.02313872864654395, "grad_norm": 0.734375, "learning_rate": 0.0019295106786764418, "loss": 0.2052, "step": 13050 }, { "epoch": 0.023142274811853766, "grad_norm": 0.6171875, "learning_rate": 0.0019294875863358916, "loss": 0.1848, "step": 13052 }, { "epoch": 0.023145820977163584, "grad_norm": 2.1875, "learning_rate": 0.0019294644903676444, "loss": 0.2691, "step": 13054 }, { "epoch": 0.023149367142473398, "grad_norm": 1.4765625, "learning_rate": 0.0019294413907718017, "loss": 0.2286, "step": 13056 }, { "epoch": 0.023152913307783213, "grad_norm": 1.234375, "learning_rate": 0.0019294182875484638, "loss": 0.1945, "step": 13058 }, { "epoch": 0.023156459473093027, "grad_norm": 1.109375, "learning_rate": 0.0019293951806977324, "loss": 0.3339, "step": 13060 }, { "epoch": 0.023160005638402842, "grad_norm": 0.5390625, "learning_rate": 0.0019293720702197078, "loss": 0.2257, "step": 13062 }, { "epoch": 0.023163551803712656, "grad_norm": 1.4375, "learning_rate": 0.0019293489561144918, "loss": 0.1969, "step": 13064 }, { "epoch": 0.023167097969022474, "grad_norm": 0.52734375, "learning_rate": 0.0019293258383821853, "loss": 0.1827, "step": 13066 }, { "epoch": 0.02317064413433229, "grad_norm": 0.31640625, "learning_rate": 0.0019293027170228891, "loss": 0.3714, "step": 13068 }, { "epoch": 0.023174190299642104, "grad_norm": 0.365234375, "learning_rate": 0.0019292795920367047, "loss": 0.2114, "step": 13070 }, { "epoch": 0.023177736464951918, "grad_norm": 0.466796875, "learning_rate": 0.0019292564634237333, "loss": 0.237, "step": 13072 }, { "epoch": 0.023181282630261733, "grad_norm": 0.90234375, "learning_rate": 0.0019292333311840754, "loss": 0.2124, "step": 13074 }, { "epoch": 0.023184828795571547, "grad_norm": 0.306640625, "learning_rate": 0.0019292101953178327, "loss": 0.236, "step": 13076 }, { "epoch": 0.023188374960881365, "grad_norm": 0.8125, "learning_rate": 0.0019291870558251064, "loss": 0.2994, "step": 13078 }, { "epoch": 0.02319192112619118, "grad_norm": 0.6015625, "learning_rate": 0.0019291639127059974, "loss": 0.2153, "step": 13080 }, { "epoch": 0.023195467291500994, "grad_norm": 2.09375, "learning_rate": 0.0019291407659606072, "loss": 0.2804, "step": 13082 }, { "epoch": 0.02319901345681081, "grad_norm": 0.255859375, "learning_rate": 0.0019291176155890367, "loss": 0.2051, "step": 13084 }, { "epoch": 0.023202559622120623, "grad_norm": 0.27734375, "learning_rate": 0.0019290944615913874, "loss": 0.2339, "step": 13086 }, { "epoch": 0.02320610578743044, "grad_norm": 4.1875, "learning_rate": 0.0019290713039677608, "loss": 0.2361, "step": 13088 }, { "epoch": 0.023209651952740256, "grad_norm": 0.84375, "learning_rate": 0.0019290481427182576, "loss": 0.2502, "step": 13090 }, { "epoch": 0.02321319811805007, "grad_norm": 0.7109375, "learning_rate": 0.0019290249778429797, "loss": 0.4189, "step": 13092 }, { "epoch": 0.023216744283359885, "grad_norm": 4.71875, "learning_rate": 0.001929001809342028, "loss": 0.3903, "step": 13094 }, { "epoch": 0.0232202904486697, "grad_norm": 0.765625, "learning_rate": 0.001928978637215504, "loss": 0.2088, "step": 13096 }, { "epoch": 0.023223836613979514, "grad_norm": 0.51171875, "learning_rate": 0.0019289554614635085, "loss": 0.2329, "step": 13098 }, { "epoch": 0.023227382779289332, "grad_norm": 0.8125, "learning_rate": 0.0019289322820861437, "loss": 0.2424, "step": 13100 }, { "epoch": 0.023230928944599147, "grad_norm": 1.4296875, "learning_rate": 0.0019289090990835106, "loss": 0.2687, "step": 13102 }, { "epoch": 0.02323447510990896, "grad_norm": 0.59375, "learning_rate": 0.0019288859124557106, "loss": 0.1787, "step": 13104 }, { "epoch": 0.023238021275218776, "grad_norm": 0.302734375, "learning_rate": 0.001928862722202845, "loss": 0.2024, "step": 13106 }, { "epoch": 0.02324156744052859, "grad_norm": 0.55078125, "learning_rate": 0.0019288395283250154, "loss": 0.1788, "step": 13108 }, { "epoch": 0.023245113605838405, "grad_norm": 1.5234375, "learning_rate": 0.001928816330822323, "loss": 0.2198, "step": 13110 }, { "epoch": 0.023248659771148223, "grad_norm": 0.859375, "learning_rate": 0.0019287931296948695, "loss": 0.1872, "step": 13112 }, { "epoch": 0.023252205936458038, "grad_norm": 0.734375, "learning_rate": 0.0019287699249427562, "loss": 0.2031, "step": 13114 }, { "epoch": 0.023255752101767852, "grad_norm": 0.435546875, "learning_rate": 0.001928746716566085, "loss": 0.2621, "step": 13116 }, { "epoch": 0.023259298267077667, "grad_norm": 0.76171875, "learning_rate": 0.0019287235045649565, "loss": 0.4895, "step": 13118 }, { "epoch": 0.02326284443238748, "grad_norm": 2.515625, "learning_rate": 0.0019287002889394735, "loss": 0.451, "step": 13120 }, { "epoch": 0.0232663905976973, "grad_norm": 1.171875, "learning_rate": 0.001928677069689736, "loss": 0.1918, "step": 13122 }, { "epoch": 0.023269936763007114, "grad_norm": 1.8046875, "learning_rate": 0.0019286538468158468, "loss": 0.2922, "step": 13124 }, { "epoch": 0.02327348292831693, "grad_norm": 0.23828125, "learning_rate": 0.0019286306203179066, "loss": 0.1963, "step": 13126 }, { "epoch": 0.023277029093626743, "grad_norm": 1.2265625, "learning_rate": 0.001928607390196018, "loss": 0.3841, "step": 13128 }, { "epoch": 0.023280575258936557, "grad_norm": 0.84765625, "learning_rate": 0.0019285841564502817, "loss": 0.2603, "step": 13130 }, { "epoch": 0.023284121424246372, "grad_norm": 0.7265625, "learning_rate": 0.0019285609190807995, "loss": 0.2258, "step": 13132 }, { "epoch": 0.02328766758955619, "grad_norm": 1.9296875, "learning_rate": 0.0019285376780876734, "loss": 0.309, "step": 13134 }, { "epoch": 0.023291213754866005, "grad_norm": 0.98828125, "learning_rate": 0.0019285144334710046, "loss": 0.2555, "step": 13136 }, { "epoch": 0.02329475992017582, "grad_norm": 5.0625, "learning_rate": 0.001928491185230895, "loss": 0.2059, "step": 13138 }, { "epoch": 0.023298306085485634, "grad_norm": 1.125, "learning_rate": 0.001928467933367446, "loss": 0.2046, "step": 13140 }, { "epoch": 0.02330185225079545, "grad_norm": 0.388671875, "learning_rate": 0.0019284446778807596, "loss": 0.2326, "step": 13142 }, { "epoch": 0.023305398416105263, "grad_norm": 0.41796875, "learning_rate": 0.0019284214187709375, "loss": 0.2009, "step": 13144 }, { "epoch": 0.02330894458141508, "grad_norm": 0.91796875, "learning_rate": 0.001928398156038081, "loss": 0.2004, "step": 13146 }, { "epoch": 0.023312490746724895, "grad_norm": 1.796875, "learning_rate": 0.0019283748896822923, "loss": 0.2838, "step": 13148 }, { "epoch": 0.02331603691203471, "grad_norm": 0.5703125, "learning_rate": 0.0019283516197036732, "loss": 0.237, "step": 13150 }, { "epoch": 0.023319583077344525, "grad_norm": 1.875, "learning_rate": 0.0019283283461023254, "loss": 0.2416, "step": 13152 }, { "epoch": 0.02332312924265434, "grad_norm": 1.796875, "learning_rate": 0.00192830506887835, "loss": 0.2245, "step": 13154 }, { "epoch": 0.023326675407964157, "grad_norm": 1.1796875, "learning_rate": 0.0019282817880318498, "loss": 0.2192, "step": 13156 }, { "epoch": 0.02333022157327397, "grad_norm": 1.40625, "learning_rate": 0.0019282585035629264, "loss": 0.2011, "step": 13158 }, { "epoch": 0.023333767738583786, "grad_norm": 0.435546875, "learning_rate": 0.001928235215471681, "loss": 0.2881, "step": 13160 }, { "epoch": 0.0233373139038936, "grad_norm": 0.40234375, "learning_rate": 0.0019282119237582157, "loss": 0.1893, "step": 13162 }, { "epoch": 0.023340860069203415, "grad_norm": 1.046875, "learning_rate": 0.0019281886284226327, "loss": 0.2091, "step": 13164 }, { "epoch": 0.02334440623451323, "grad_norm": 0.259765625, "learning_rate": 0.001928165329465034, "loss": 0.1681, "step": 13166 }, { "epoch": 0.023347952399823048, "grad_norm": 0.447265625, "learning_rate": 0.0019281420268855212, "loss": 0.2459, "step": 13168 }, { "epoch": 0.023351498565132862, "grad_norm": 0.8359375, "learning_rate": 0.0019281187206841958, "loss": 0.2593, "step": 13170 }, { "epoch": 0.023355044730442677, "grad_norm": 0.32421875, "learning_rate": 0.0019280954108611605, "loss": 0.1949, "step": 13172 }, { "epoch": 0.02335859089575249, "grad_norm": 0.66796875, "learning_rate": 0.0019280720974165166, "loss": 0.1931, "step": 13174 }, { "epoch": 0.023362137061062306, "grad_norm": 1.0703125, "learning_rate": 0.0019280487803503665, "loss": 0.2145, "step": 13176 }, { "epoch": 0.02336568322637212, "grad_norm": 0.62109375, "learning_rate": 0.001928025459662812, "loss": 0.2072, "step": 13178 }, { "epoch": 0.02336922939168194, "grad_norm": 0.6796875, "learning_rate": 0.001928002135353955, "loss": 0.2401, "step": 13180 }, { "epoch": 0.023372775556991753, "grad_norm": 0.6796875, "learning_rate": 0.001927978807423898, "loss": 0.2412, "step": 13182 }, { "epoch": 0.023376321722301568, "grad_norm": 0.70703125, "learning_rate": 0.0019279554758727423, "loss": 0.2462, "step": 13184 }, { "epoch": 0.023379867887611382, "grad_norm": 2.328125, "learning_rate": 0.0019279321407005903, "loss": 0.2282, "step": 13186 }, { "epoch": 0.023383414052921197, "grad_norm": 0.55859375, "learning_rate": 0.0019279088019075442, "loss": 0.344, "step": 13188 }, { "epoch": 0.023386960218231015, "grad_norm": 6.5, "learning_rate": 0.001927885459493706, "loss": 0.2536, "step": 13190 }, { "epoch": 0.02339050638354083, "grad_norm": 0.8359375, "learning_rate": 0.0019278621134591776, "loss": 0.1924, "step": 13192 }, { "epoch": 0.023394052548850644, "grad_norm": 1.75, "learning_rate": 0.0019278387638040609, "loss": 0.3228, "step": 13194 }, { "epoch": 0.02339759871416046, "grad_norm": 0.380859375, "learning_rate": 0.0019278154105284587, "loss": 0.2052, "step": 13196 }, { "epoch": 0.023401144879470273, "grad_norm": 1.0078125, "learning_rate": 0.0019277920536324722, "loss": 0.2303, "step": 13198 }, { "epoch": 0.023404691044780088, "grad_norm": 0.453125, "learning_rate": 0.0019277686931162047, "loss": 0.283, "step": 13200 }, { "epoch": 0.023408237210089906, "grad_norm": 0.75, "learning_rate": 0.0019277453289797575, "loss": 0.2098, "step": 13202 }, { "epoch": 0.02341178337539972, "grad_norm": 0.447265625, "learning_rate": 0.001927721961223233, "loss": 0.2524, "step": 13204 }, { "epoch": 0.023415329540709535, "grad_norm": 0.61328125, "learning_rate": 0.0019276985898467336, "loss": 0.2307, "step": 13206 }, { "epoch": 0.02341887570601935, "grad_norm": 0.32421875, "learning_rate": 0.0019276752148503612, "loss": 0.227, "step": 13208 }, { "epoch": 0.023422421871329164, "grad_norm": 0.640625, "learning_rate": 0.0019276518362342184, "loss": 0.3369, "step": 13210 }, { "epoch": 0.02342596803663898, "grad_norm": 0.5234375, "learning_rate": 0.001927628453998407, "loss": 0.3536, "step": 13212 }, { "epoch": 0.023429514201948796, "grad_norm": 8.25, "learning_rate": 0.0019276050681430294, "loss": 0.244, "step": 13214 }, { "epoch": 0.02343306036725861, "grad_norm": 0.890625, "learning_rate": 0.0019275816786681883, "loss": 0.1726, "step": 13216 }, { "epoch": 0.023436606532568426, "grad_norm": 1.2109375, "learning_rate": 0.0019275582855739855, "loss": 0.2719, "step": 13218 }, { "epoch": 0.02344015269787824, "grad_norm": 0.640625, "learning_rate": 0.0019275348888605232, "loss": 0.2649, "step": 13220 }, { "epoch": 0.023443698863188055, "grad_norm": 1.5, "learning_rate": 0.0019275114885279046, "loss": 0.2871, "step": 13222 }, { "epoch": 0.023447245028497873, "grad_norm": 0.384765625, "learning_rate": 0.001927488084576231, "loss": 0.3739, "step": 13224 }, { "epoch": 0.023450791193807687, "grad_norm": 1.5546875, "learning_rate": 0.0019274646770056053, "loss": 0.2474, "step": 13226 }, { "epoch": 0.023454337359117502, "grad_norm": 2.859375, "learning_rate": 0.0019274412658161293, "loss": 0.3691, "step": 13228 }, { "epoch": 0.023457883524427316, "grad_norm": 0.392578125, "learning_rate": 0.0019274178510079064, "loss": 0.256, "step": 13230 }, { "epoch": 0.02346142968973713, "grad_norm": 1.5078125, "learning_rate": 0.0019273944325810383, "loss": 0.4181, "step": 13232 }, { "epoch": 0.023464975855046945, "grad_norm": 0.5546875, "learning_rate": 0.0019273710105356275, "loss": 0.1921, "step": 13234 }, { "epoch": 0.023468522020356763, "grad_norm": 0.62890625, "learning_rate": 0.0019273475848717765, "loss": 0.2186, "step": 13236 }, { "epoch": 0.023472068185666578, "grad_norm": 0.5625, "learning_rate": 0.0019273241555895877, "loss": 0.2411, "step": 13238 }, { "epoch": 0.023475614350976393, "grad_norm": 0.49609375, "learning_rate": 0.001927300722689164, "loss": 0.2295, "step": 13240 }, { "epoch": 0.023479160516286207, "grad_norm": 0.4921875, "learning_rate": 0.001927277286170607, "loss": 0.2778, "step": 13242 }, { "epoch": 0.02348270668159602, "grad_norm": 0.703125, "learning_rate": 0.0019272538460340198, "loss": 0.2496, "step": 13244 }, { "epoch": 0.023486252846905836, "grad_norm": 0.27734375, "learning_rate": 0.001927230402279505, "loss": 0.2786, "step": 13246 }, { "epoch": 0.023489799012215654, "grad_norm": 0.53125, "learning_rate": 0.0019272069549071645, "loss": 0.2335, "step": 13248 }, { "epoch": 0.02349334517752547, "grad_norm": 0.49609375, "learning_rate": 0.0019271835039171017, "loss": 0.2211, "step": 13250 }, { "epoch": 0.023496891342835283, "grad_norm": 0.5546875, "learning_rate": 0.0019271600493094186, "loss": 0.234, "step": 13252 }, { "epoch": 0.023500437508145098, "grad_norm": 1.0390625, "learning_rate": 0.001927136591084218, "loss": 0.2217, "step": 13254 }, { "epoch": 0.023503983673454912, "grad_norm": 1.1171875, "learning_rate": 0.0019271131292416025, "loss": 0.2276, "step": 13256 }, { "epoch": 0.023507529838764727, "grad_norm": 0.439453125, "learning_rate": 0.0019270896637816743, "loss": 0.1903, "step": 13258 }, { "epoch": 0.023511076004074545, "grad_norm": 0.859375, "learning_rate": 0.0019270661947045362, "loss": 0.1778, "step": 13260 }, { "epoch": 0.02351462216938436, "grad_norm": 0.71484375, "learning_rate": 0.0019270427220102915, "loss": 0.2327, "step": 13262 }, { "epoch": 0.023518168334694174, "grad_norm": 0.609375, "learning_rate": 0.0019270192456990421, "loss": 0.21, "step": 13264 }, { "epoch": 0.02352171450000399, "grad_norm": 0.40625, "learning_rate": 0.001926995765770891, "loss": 0.2364, "step": 13266 }, { "epoch": 0.023525260665313803, "grad_norm": 0.93359375, "learning_rate": 0.0019269722822259406, "loss": 0.2732, "step": 13268 }, { "epoch": 0.02352880683062362, "grad_norm": 0.72265625, "learning_rate": 0.0019269487950642939, "loss": 0.2324, "step": 13270 }, { "epoch": 0.023532352995933436, "grad_norm": 0.59765625, "learning_rate": 0.0019269253042860537, "loss": 0.1911, "step": 13272 }, { "epoch": 0.02353589916124325, "grad_norm": 2.015625, "learning_rate": 0.0019269018098913224, "loss": 0.2773, "step": 13274 }, { "epoch": 0.023539445326553065, "grad_norm": 0.56640625, "learning_rate": 0.0019268783118802026, "loss": 0.1592, "step": 13276 }, { "epoch": 0.02354299149186288, "grad_norm": 1.9609375, "learning_rate": 0.0019268548102527976, "loss": 0.3859, "step": 13278 }, { "epoch": 0.023546537657172694, "grad_norm": 0.291015625, "learning_rate": 0.00192683130500921, "loss": 0.2417, "step": 13280 }, { "epoch": 0.023550083822482512, "grad_norm": 0.55078125, "learning_rate": 0.001926807796149543, "loss": 0.2277, "step": 13282 }, { "epoch": 0.023553629987792327, "grad_norm": 0.55078125, "learning_rate": 0.0019267842836738986, "loss": 0.2861, "step": 13284 }, { "epoch": 0.02355717615310214, "grad_norm": 0.875, "learning_rate": 0.0019267607675823796, "loss": 0.2087, "step": 13286 }, { "epoch": 0.023560722318411956, "grad_norm": 0.56640625, "learning_rate": 0.0019267372478750898, "loss": 0.2332, "step": 13288 }, { "epoch": 0.02356426848372177, "grad_norm": 0.953125, "learning_rate": 0.0019267137245521312, "loss": 0.1913, "step": 13290 }, { "epoch": 0.023567814649031585, "grad_norm": 0.349609375, "learning_rate": 0.001926690197613607, "loss": 0.2295, "step": 13292 }, { "epoch": 0.023571360814341403, "grad_norm": 0.390625, "learning_rate": 0.0019266666670596203, "loss": 0.2056, "step": 13294 }, { "epoch": 0.023574906979651217, "grad_norm": 0.2890625, "learning_rate": 0.0019266431328902735, "loss": 0.1952, "step": 13296 }, { "epoch": 0.023578453144961032, "grad_norm": 0.83984375, "learning_rate": 0.0019266195951056697, "loss": 0.2857, "step": 13298 }, { "epoch": 0.023581999310270847, "grad_norm": 1.09375, "learning_rate": 0.0019265960537059124, "loss": 0.2769, "step": 13300 }, { "epoch": 0.02358554547558066, "grad_norm": 0.515625, "learning_rate": 0.0019265725086911039, "loss": 0.2274, "step": 13302 }, { "epoch": 0.02358909164089048, "grad_norm": 0.6328125, "learning_rate": 0.0019265489600613472, "loss": 0.2535, "step": 13304 }, { "epoch": 0.023592637806200294, "grad_norm": 1.171875, "learning_rate": 0.0019265254078167458, "loss": 0.2675, "step": 13306 }, { "epoch": 0.023596183971510108, "grad_norm": 0.953125, "learning_rate": 0.0019265018519574021, "loss": 0.2195, "step": 13308 }, { "epoch": 0.023599730136819923, "grad_norm": 0.5546875, "learning_rate": 0.0019264782924834193, "loss": 0.2241, "step": 13310 }, { "epoch": 0.023603276302129737, "grad_norm": 0.41015625, "learning_rate": 0.0019264547293949008, "loss": 0.2474, "step": 13312 }, { "epoch": 0.023606822467439552, "grad_norm": 0.302734375, "learning_rate": 0.0019264311626919493, "loss": 0.1809, "step": 13314 }, { "epoch": 0.02361036863274937, "grad_norm": 1.1015625, "learning_rate": 0.001926407592374668, "loss": 0.1892, "step": 13316 }, { "epoch": 0.023613914798059184, "grad_norm": 0.7265625, "learning_rate": 0.00192638401844316, "loss": 0.2106, "step": 13318 }, { "epoch": 0.023617460963369, "grad_norm": 0.330078125, "learning_rate": 0.0019263604408975281, "loss": 0.1831, "step": 13320 }, { "epoch": 0.023621007128678814, "grad_norm": 0.423828125, "learning_rate": 0.0019263368597378756, "loss": 0.1624, "step": 13322 }, { "epoch": 0.023624553293988628, "grad_norm": 0.396484375, "learning_rate": 0.0019263132749643057, "loss": 0.6868, "step": 13324 }, { "epoch": 0.023628099459298443, "grad_norm": 0.353515625, "learning_rate": 0.0019262896865769217, "loss": 0.2172, "step": 13326 }, { "epoch": 0.02363164562460826, "grad_norm": 0.392578125, "learning_rate": 0.0019262660945758266, "loss": 0.1942, "step": 13328 }, { "epoch": 0.023635191789918075, "grad_norm": 0.26953125, "learning_rate": 0.0019262424989611234, "loss": 0.2103, "step": 13330 }, { "epoch": 0.02363873795522789, "grad_norm": 0.82421875, "learning_rate": 0.0019262188997329155, "loss": 0.2087, "step": 13332 }, { "epoch": 0.023642284120537704, "grad_norm": 0.470703125, "learning_rate": 0.0019261952968913061, "loss": 0.187, "step": 13334 }, { "epoch": 0.02364583028584752, "grad_norm": 6.0, "learning_rate": 0.0019261716904363981, "loss": 0.3445, "step": 13336 }, { "epoch": 0.023649376451157337, "grad_norm": 0.5, "learning_rate": 0.0019261480803682953, "loss": 0.3638, "step": 13338 }, { "epoch": 0.02365292261646715, "grad_norm": 0.296875, "learning_rate": 0.0019261244666871008, "loss": 0.2022, "step": 13340 }, { "epoch": 0.023656468781776966, "grad_norm": 0.91015625, "learning_rate": 0.0019261008493929176, "loss": 0.2352, "step": 13342 }, { "epoch": 0.02366001494708678, "grad_norm": 0.86328125, "learning_rate": 0.001926077228485849, "loss": 0.1972, "step": 13344 }, { "epoch": 0.023663561112396595, "grad_norm": 0.30078125, "learning_rate": 0.0019260536039659986, "loss": 0.1952, "step": 13346 }, { "epoch": 0.02366710727770641, "grad_norm": 3.546875, "learning_rate": 0.0019260299758334695, "loss": 0.2752, "step": 13348 }, { "epoch": 0.023670653443016228, "grad_norm": 0.89453125, "learning_rate": 0.001926006344088365, "loss": 0.2709, "step": 13350 }, { "epoch": 0.023674199608326042, "grad_norm": 1.765625, "learning_rate": 0.0019259827087307888, "loss": 0.2542, "step": 13352 }, { "epoch": 0.023677745773635857, "grad_norm": 0.31640625, "learning_rate": 0.0019259590697608437, "loss": 0.219, "step": 13354 }, { "epoch": 0.02368129193894567, "grad_norm": 0.765625, "learning_rate": 0.0019259354271786338, "loss": 0.2112, "step": 13356 }, { "epoch": 0.023684838104255486, "grad_norm": 0.56640625, "learning_rate": 0.0019259117809842618, "loss": 0.237, "step": 13358 }, { "epoch": 0.0236883842695653, "grad_norm": 4.875, "learning_rate": 0.0019258881311778318, "loss": 0.2161, "step": 13360 }, { "epoch": 0.02369193043487512, "grad_norm": 0.64453125, "learning_rate": 0.0019258644777594464, "loss": 0.2566, "step": 13362 }, { "epoch": 0.023695476600184933, "grad_norm": 0.99609375, "learning_rate": 0.0019258408207292095, "loss": 0.3512, "step": 13364 }, { "epoch": 0.023699022765494748, "grad_norm": 0.365234375, "learning_rate": 0.001925817160087225, "loss": 0.2509, "step": 13366 }, { "epoch": 0.023702568930804562, "grad_norm": 4.25, "learning_rate": 0.0019257934958335956, "loss": 0.4774, "step": 13368 }, { "epoch": 0.023706115096114377, "grad_norm": 0.349609375, "learning_rate": 0.0019257698279684253, "loss": 0.2623, "step": 13370 }, { "epoch": 0.023709661261424195, "grad_norm": 0.81640625, "learning_rate": 0.001925746156491817, "loss": 0.234, "step": 13372 }, { "epoch": 0.02371320742673401, "grad_norm": 10.0, "learning_rate": 0.0019257224814038753, "loss": 0.2755, "step": 13374 }, { "epoch": 0.023716753592043824, "grad_norm": 0.671875, "learning_rate": 0.0019256988027047028, "loss": 0.3044, "step": 13376 }, { "epoch": 0.02372029975735364, "grad_norm": 0.90234375, "learning_rate": 0.0019256751203944032, "loss": 0.2837, "step": 13378 }, { "epoch": 0.023723845922663453, "grad_norm": 0.69140625, "learning_rate": 0.0019256514344730804, "loss": 0.2667, "step": 13380 }, { "epoch": 0.023727392087973267, "grad_norm": 0.625, "learning_rate": 0.0019256277449408378, "loss": 0.2156, "step": 13382 }, { "epoch": 0.023730938253283086, "grad_norm": 1.203125, "learning_rate": 0.001925604051797779, "loss": 0.2001, "step": 13384 }, { "epoch": 0.0237344844185929, "grad_norm": 0.47265625, "learning_rate": 0.0019255803550440077, "loss": 0.5126, "step": 13386 }, { "epoch": 0.023738030583902715, "grad_norm": 0.494140625, "learning_rate": 0.0019255566546796275, "loss": 0.2626, "step": 13388 }, { "epoch": 0.02374157674921253, "grad_norm": 0.7578125, "learning_rate": 0.001925532950704742, "loss": 0.2491, "step": 13390 }, { "epoch": 0.023745122914522344, "grad_norm": 0.416015625, "learning_rate": 0.0019255092431194549, "loss": 0.1858, "step": 13392 }, { "epoch": 0.02374866907983216, "grad_norm": 1.0703125, "learning_rate": 0.0019254855319238695, "loss": 0.2308, "step": 13394 }, { "epoch": 0.023752215245141976, "grad_norm": 0.578125, "learning_rate": 0.0019254618171180905, "loss": 0.1833, "step": 13396 }, { "epoch": 0.02375576141045179, "grad_norm": 0.380859375, "learning_rate": 0.001925438098702221, "loss": 0.1903, "step": 13398 }, { "epoch": 0.023759307575761605, "grad_norm": 1.0703125, "learning_rate": 0.0019254143766763642, "loss": 0.2423, "step": 13400 }, { "epoch": 0.02376285374107142, "grad_norm": 0.3359375, "learning_rate": 0.0019253906510406248, "loss": 0.2154, "step": 13402 }, { "epoch": 0.023766399906381235, "grad_norm": 0.490234375, "learning_rate": 0.0019253669217951057, "loss": 0.4263, "step": 13404 }, { "epoch": 0.023769946071691053, "grad_norm": 0.59765625, "learning_rate": 0.0019253431889399116, "loss": 0.286, "step": 13406 }, { "epoch": 0.023773492237000867, "grad_norm": 3.421875, "learning_rate": 0.0019253194524751457, "loss": 0.3845, "step": 13408 }, { "epoch": 0.02377703840231068, "grad_norm": 1.0859375, "learning_rate": 0.0019252957124009115, "loss": 0.1887, "step": 13410 }, { "epoch": 0.023780584567620496, "grad_norm": 0.515625, "learning_rate": 0.0019252719687173137, "loss": 0.196, "step": 13412 }, { "epoch": 0.02378413073293031, "grad_norm": 0.51171875, "learning_rate": 0.0019252482214244554, "loss": 0.2476, "step": 13414 }, { "epoch": 0.023787676898240125, "grad_norm": 0.78125, "learning_rate": 0.001925224470522441, "loss": 0.2232, "step": 13416 }, { "epoch": 0.023791223063549943, "grad_norm": 0.416015625, "learning_rate": 0.0019252007160113743, "loss": 0.2808, "step": 13418 }, { "epoch": 0.023794769228859758, "grad_norm": 0.55859375, "learning_rate": 0.0019251769578913585, "loss": 0.253, "step": 13420 }, { "epoch": 0.023798315394169572, "grad_norm": 0.55859375, "learning_rate": 0.0019251531961624981, "loss": 0.3107, "step": 13422 }, { "epoch": 0.023801861559479387, "grad_norm": 0.298828125, "learning_rate": 0.001925129430824897, "loss": 0.2207, "step": 13424 }, { "epoch": 0.0238054077247892, "grad_norm": 0.76953125, "learning_rate": 0.0019251056618786594, "loss": 0.2049, "step": 13426 }, { "epoch": 0.023808953890099016, "grad_norm": 1.0234375, "learning_rate": 0.0019250818893238885, "loss": 0.2018, "step": 13428 }, { "epoch": 0.023812500055408834, "grad_norm": 0.3203125, "learning_rate": 0.0019250581131606888, "loss": 0.2755, "step": 13430 }, { "epoch": 0.02381604622071865, "grad_norm": 0.427734375, "learning_rate": 0.001925034333389164, "loss": 0.2266, "step": 13432 }, { "epoch": 0.023819592386028463, "grad_norm": 1.421875, "learning_rate": 0.0019250105500094184, "loss": 0.222, "step": 13434 }, { "epoch": 0.023823138551338278, "grad_norm": 1.140625, "learning_rate": 0.0019249867630215562, "loss": 0.188, "step": 13436 }, { "epoch": 0.023826684716648092, "grad_norm": 0.6640625, "learning_rate": 0.0019249629724256805, "loss": 0.2158, "step": 13438 }, { "epoch": 0.02383023088195791, "grad_norm": 1.1875, "learning_rate": 0.0019249391782218963, "loss": 0.192, "step": 13440 }, { "epoch": 0.023833777047267725, "grad_norm": 0.703125, "learning_rate": 0.0019249153804103076, "loss": 0.1922, "step": 13442 }, { "epoch": 0.02383732321257754, "grad_norm": 0.62890625, "learning_rate": 0.0019248915789910174, "loss": 0.2622, "step": 13444 }, { "epoch": 0.023840869377887354, "grad_norm": 0.451171875, "learning_rate": 0.0019248677739641313, "loss": 0.2145, "step": 13446 }, { "epoch": 0.02384441554319717, "grad_norm": 1.0703125, "learning_rate": 0.0019248439653297524, "loss": 0.2629, "step": 13448 }, { "epoch": 0.023847961708506983, "grad_norm": 3.296875, "learning_rate": 0.0019248201530879853, "loss": 0.4618, "step": 13450 }, { "epoch": 0.0238515078738168, "grad_norm": 1.890625, "learning_rate": 0.0019247963372389337, "loss": 0.2324, "step": 13452 }, { "epoch": 0.023855054039126616, "grad_norm": 0.5234375, "learning_rate": 0.0019247725177827023, "loss": 0.238, "step": 13454 }, { "epoch": 0.02385860020443643, "grad_norm": 0.31640625, "learning_rate": 0.0019247486947193946, "loss": 0.1965, "step": 13456 }, { "epoch": 0.023862146369746245, "grad_norm": 0.765625, "learning_rate": 0.0019247248680491155, "loss": 0.4042, "step": 13458 }, { "epoch": 0.02386569253505606, "grad_norm": 0.8984375, "learning_rate": 0.001924701037771969, "loss": 0.2071, "step": 13460 }, { "epoch": 0.023869238700365874, "grad_norm": 0.828125, "learning_rate": 0.001924677203888059, "loss": 0.2464, "step": 13462 }, { "epoch": 0.023872784865675692, "grad_norm": 0.21484375, "learning_rate": 0.00192465336639749, "loss": 0.1901, "step": 13464 }, { "epoch": 0.023876331030985506, "grad_norm": 0.298828125, "learning_rate": 0.001924629525300366, "loss": 0.1725, "step": 13466 }, { "epoch": 0.02387987719629532, "grad_norm": 1.6015625, "learning_rate": 0.0019246056805967916, "loss": 0.2647, "step": 13468 }, { "epoch": 0.023883423361605136, "grad_norm": 0.84375, "learning_rate": 0.0019245818322868708, "loss": 0.2205, "step": 13470 }, { "epoch": 0.02388696952691495, "grad_norm": 0.75, "learning_rate": 0.0019245579803707086, "loss": 0.3807, "step": 13472 }, { "epoch": 0.023890515692224768, "grad_norm": 1.234375, "learning_rate": 0.001924534124848408, "loss": 0.3317, "step": 13474 }, { "epoch": 0.023894061857534583, "grad_norm": 1.0078125, "learning_rate": 0.0019245102657200746, "loss": 0.2225, "step": 13476 }, { "epoch": 0.023897608022844397, "grad_norm": 0.41015625, "learning_rate": 0.001924486402985812, "loss": 0.173, "step": 13478 }, { "epoch": 0.023901154188154212, "grad_norm": 0.62109375, "learning_rate": 0.0019244625366457249, "loss": 0.2371, "step": 13480 }, { "epoch": 0.023904700353464026, "grad_norm": 0.7265625, "learning_rate": 0.0019244386666999174, "loss": 0.22, "step": 13482 }, { "epoch": 0.02390824651877384, "grad_norm": 0.625, "learning_rate": 0.001924414793148494, "loss": 0.4865, "step": 13484 }, { "epoch": 0.02391179268408366, "grad_norm": 0.29296875, "learning_rate": 0.0019243909159915597, "loss": 0.2268, "step": 13486 }, { "epoch": 0.023915338849393473, "grad_norm": 0.6796875, "learning_rate": 0.0019243670352292182, "loss": 0.2388, "step": 13488 }, { "epoch": 0.023918885014703288, "grad_norm": 0.515625, "learning_rate": 0.0019243431508615737, "loss": 0.193, "step": 13490 }, { "epoch": 0.023922431180013103, "grad_norm": 1.625, "learning_rate": 0.0019243192628887315, "loss": 0.2962, "step": 13492 }, { "epoch": 0.023925977345322917, "grad_norm": 1.546875, "learning_rate": 0.0019242953713107954, "loss": 0.2897, "step": 13494 }, { "epoch": 0.02392952351063273, "grad_norm": 0.283203125, "learning_rate": 0.0019242714761278704, "loss": 0.2944, "step": 13496 }, { "epoch": 0.02393306967594255, "grad_norm": 0.69921875, "learning_rate": 0.0019242475773400608, "loss": 0.1719, "step": 13498 }, { "epoch": 0.023936615841252364, "grad_norm": 0.478515625, "learning_rate": 0.0019242236749474706, "loss": 0.2781, "step": 13500 }, { "epoch": 0.02394016200656218, "grad_norm": 0.400390625, "learning_rate": 0.0019241997689502053, "loss": 0.2557, "step": 13502 }, { "epoch": 0.023943708171871993, "grad_norm": 0.400390625, "learning_rate": 0.001924175859348369, "loss": 0.2425, "step": 13504 }, { "epoch": 0.023947254337181808, "grad_norm": 0.77734375, "learning_rate": 0.0019241519461420658, "loss": 0.4107, "step": 13506 }, { "epoch": 0.023950800502491626, "grad_norm": 0.98828125, "learning_rate": 0.0019241280293314012, "loss": 0.2165, "step": 13508 }, { "epoch": 0.02395434666780144, "grad_norm": 0.423828125, "learning_rate": 0.0019241041089164787, "loss": 0.2631, "step": 13510 }, { "epoch": 0.023957892833111255, "grad_norm": 0.75, "learning_rate": 0.0019240801848974041, "loss": 0.2622, "step": 13512 }, { "epoch": 0.02396143899842107, "grad_norm": 0.265625, "learning_rate": 0.0019240562572742812, "loss": 0.2273, "step": 13514 }, { "epoch": 0.023964985163730884, "grad_norm": 5.40625, "learning_rate": 0.0019240323260472148, "loss": 0.3926, "step": 13516 }, { "epoch": 0.0239685313290407, "grad_norm": 1.7890625, "learning_rate": 0.00192400839121631, "loss": 0.4296, "step": 13518 }, { "epoch": 0.023972077494350517, "grad_norm": 0.46875, "learning_rate": 0.001923984452781671, "loss": 0.3169, "step": 13520 }, { "epoch": 0.02397562365966033, "grad_norm": 2.71875, "learning_rate": 0.0019239605107434024, "loss": 0.2553, "step": 13522 }, { "epoch": 0.023979169824970146, "grad_norm": 0.69140625, "learning_rate": 0.0019239365651016092, "loss": 0.1952, "step": 13524 }, { "epoch": 0.02398271599027996, "grad_norm": 0.75, "learning_rate": 0.001923912615856396, "loss": 0.253, "step": 13526 }, { "epoch": 0.023986262155589775, "grad_norm": 0.55859375, "learning_rate": 0.0019238886630078681, "loss": 0.179, "step": 13528 }, { "epoch": 0.02398980832089959, "grad_norm": 0.326171875, "learning_rate": 0.0019238647065561295, "loss": 0.2136, "step": 13530 }, { "epoch": 0.023993354486209408, "grad_norm": 0.2060546875, "learning_rate": 0.001923840746501285, "loss": 0.184, "step": 13532 }, { "epoch": 0.023996900651519222, "grad_norm": 20.5, "learning_rate": 0.0019238167828434399, "loss": 0.2101, "step": 13534 }, { "epoch": 0.024000446816829037, "grad_norm": 0.71484375, "learning_rate": 0.0019237928155826985, "loss": 0.1452, "step": 13536 }, { "epoch": 0.02400399298213885, "grad_norm": 2.53125, "learning_rate": 0.001923768844719166, "loss": 0.2051, "step": 13538 }, { "epoch": 0.024007539147448666, "grad_norm": 2.671875, "learning_rate": 0.001923744870252947, "loss": 0.3055, "step": 13540 }, { "epoch": 0.024011085312758484, "grad_norm": 0.49609375, "learning_rate": 0.0019237208921841465, "loss": 0.2392, "step": 13542 }, { "epoch": 0.0240146314780683, "grad_norm": 0.51171875, "learning_rate": 0.0019236969105128692, "loss": 0.6113, "step": 13544 }, { "epoch": 0.024018177643378113, "grad_norm": 0.64453125, "learning_rate": 0.00192367292523922, "loss": 0.2406, "step": 13546 }, { "epoch": 0.024021723808687927, "grad_norm": 0.5, "learning_rate": 0.001923648936363304, "loss": 0.2404, "step": 13548 }, { "epoch": 0.024025269973997742, "grad_norm": 0.41796875, "learning_rate": 0.001923624943885226, "loss": 0.2664, "step": 13550 }, { "epoch": 0.024028816139307557, "grad_norm": 3.453125, "learning_rate": 0.001923600947805091, "loss": 0.2874, "step": 13552 }, { "epoch": 0.024032362304617375, "grad_norm": 0.88671875, "learning_rate": 0.0019235769481230037, "loss": 0.244, "step": 13554 }, { "epoch": 0.02403590846992719, "grad_norm": 0.1953125, "learning_rate": 0.0019235529448390694, "loss": 0.1692, "step": 13556 }, { "epoch": 0.024039454635237004, "grad_norm": 0.365234375, "learning_rate": 0.0019235289379533928, "loss": 0.2704, "step": 13558 }, { "epoch": 0.024043000800546818, "grad_norm": 0.2470703125, "learning_rate": 0.0019235049274660787, "loss": 0.181, "step": 13560 }, { "epoch": 0.024046546965856633, "grad_norm": 0.296875, "learning_rate": 0.0019234809133772326, "loss": 0.1587, "step": 13562 }, { "epoch": 0.024050093131166447, "grad_norm": 0.48046875, "learning_rate": 0.0019234568956869595, "loss": 0.3816, "step": 13564 }, { "epoch": 0.024053639296476265, "grad_norm": 0.984375, "learning_rate": 0.0019234328743953642, "loss": 0.3782, "step": 13566 }, { "epoch": 0.02405718546178608, "grad_norm": 0.443359375, "learning_rate": 0.0019234088495025518, "loss": 0.2962, "step": 13568 }, { "epoch": 0.024060731627095894, "grad_norm": 0.490234375, "learning_rate": 0.0019233848210086272, "loss": 0.1817, "step": 13570 }, { "epoch": 0.02406427779240571, "grad_norm": 1.15625, "learning_rate": 0.0019233607889136957, "loss": 0.2801, "step": 13572 }, { "epoch": 0.024067823957715524, "grad_norm": 1.0, "learning_rate": 0.0019233367532178625, "loss": 0.3423, "step": 13574 }, { "epoch": 0.02407137012302534, "grad_norm": 0.36328125, "learning_rate": 0.0019233127139212326, "loss": 0.2034, "step": 13576 }, { "epoch": 0.024074916288335156, "grad_norm": 4.71875, "learning_rate": 0.0019232886710239111, "loss": 0.4523, "step": 13578 }, { "epoch": 0.02407846245364497, "grad_norm": 0.40234375, "learning_rate": 0.001923264624526003, "loss": 0.2173, "step": 13580 }, { "epoch": 0.024082008618954785, "grad_norm": 0.34765625, "learning_rate": 0.0019232405744276139, "loss": 0.2106, "step": 13582 }, { "epoch": 0.0240855547842646, "grad_norm": 1.5390625, "learning_rate": 0.0019232165207288487, "loss": 0.2558, "step": 13584 }, { "epoch": 0.024089100949574414, "grad_norm": 0.98046875, "learning_rate": 0.0019231924634298123, "loss": 0.2368, "step": 13586 }, { "epoch": 0.024092647114884232, "grad_norm": 0.3359375, "learning_rate": 0.0019231684025306107, "loss": 0.3319, "step": 13588 }, { "epoch": 0.024096193280194047, "grad_norm": 0.7109375, "learning_rate": 0.0019231443380313483, "loss": 0.3243, "step": 13590 }, { "epoch": 0.02409973944550386, "grad_norm": 1.4375, "learning_rate": 0.0019231202699321309, "loss": 0.2435, "step": 13592 }, { "epoch": 0.024103285610813676, "grad_norm": 0.57421875, "learning_rate": 0.0019230961982330634, "loss": 0.2668, "step": 13594 }, { "epoch": 0.02410683177612349, "grad_norm": 0.470703125, "learning_rate": 0.0019230721229342512, "loss": 0.212, "step": 13596 }, { "epoch": 0.024110377941433305, "grad_norm": 0.34375, "learning_rate": 0.0019230480440358, "loss": 0.2123, "step": 13598 }, { "epoch": 0.024113924106743123, "grad_norm": 0.470703125, "learning_rate": 0.0019230239615378145, "loss": 0.2428, "step": 13600 }, { "epoch": 0.024117470272052938, "grad_norm": 0.2734375, "learning_rate": 0.0019229998754404002, "loss": 0.466, "step": 13602 }, { "epoch": 0.024121016437362752, "grad_norm": 0.53125, "learning_rate": 0.0019229757857436628, "loss": 0.2363, "step": 13604 }, { "epoch": 0.024124562602672567, "grad_norm": 0.357421875, "learning_rate": 0.0019229516924477068, "loss": 0.2132, "step": 13606 }, { "epoch": 0.02412810876798238, "grad_norm": 0.73046875, "learning_rate": 0.0019229275955526387, "loss": 0.2369, "step": 13608 }, { "epoch": 0.0241316549332922, "grad_norm": 0.52734375, "learning_rate": 0.001922903495058563, "loss": 0.1827, "step": 13610 }, { "epoch": 0.024135201098602014, "grad_norm": 0.66015625, "learning_rate": 0.0019228793909655856, "loss": 0.2831, "step": 13612 }, { "epoch": 0.02413874726391183, "grad_norm": 0.4453125, "learning_rate": 0.0019228552832738115, "loss": 0.2283, "step": 13614 }, { "epoch": 0.024142293429221643, "grad_norm": 0.326171875, "learning_rate": 0.0019228311719833465, "loss": 0.1682, "step": 13616 }, { "epoch": 0.024145839594531458, "grad_norm": 0.3984375, "learning_rate": 0.0019228070570942958, "loss": 0.1725, "step": 13618 }, { "epoch": 0.024149385759841272, "grad_norm": 0.90234375, "learning_rate": 0.001922782938606765, "loss": 0.1719, "step": 13620 }, { "epoch": 0.02415293192515109, "grad_norm": 0.64453125, "learning_rate": 0.00192275881652086, "loss": 0.2025, "step": 13622 }, { "epoch": 0.024156478090460905, "grad_norm": 1.2109375, "learning_rate": 0.0019227346908366851, "loss": 0.2257, "step": 13624 }, { "epoch": 0.02416002425577072, "grad_norm": 0.55859375, "learning_rate": 0.001922710561554347, "loss": 0.2611, "step": 13626 }, { "epoch": 0.024163570421080534, "grad_norm": 0.94921875, "learning_rate": 0.0019226864286739508, "loss": 0.2491, "step": 13628 }, { "epoch": 0.02416711658639035, "grad_norm": 1.6484375, "learning_rate": 0.0019226622921956018, "loss": 0.1742, "step": 13630 }, { "epoch": 0.024170662751700163, "grad_norm": 0.640625, "learning_rate": 0.0019226381521194058, "loss": 0.2892, "step": 13632 }, { "epoch": 0.02417420891700998, "grad_norm": 0.87890625, "learning_rate": 0.0019226140084454687, "loss": 0.2418, "step": 13634 }, { "epoch": 0.024177755082319796, "grad_norm": 0.330078125, "learning_rate": 0.0019225898611738954, "loss": 0.1863, "step": 13636 }, { "epoch": 0.02418130124762961, "grad_norm": 0.408203125, "learning_rate": 0.0019225657103047922, "loss": 0.1864, "step": 13638 }, { "epoch": 0.024184847412939425, "grad_norm": 0.6484375, "learning_rate": 0.0019225415558382641, "loss": 0.2033, "step": 13640 }, { "epoch": 0.02418839357824924, "grad_norm": 0.8828125, "learning_rate": 0.0019225173977744172, "loss": 0.2221, "step": 13642 }, { "epoch": 0.024191939743559057, "grad_norm": 0.75, "learning_rate": 0.001922493236113357, "loss": 0.2803, "step": 13644 }, { "epoch": 0.024195485908868872, "grad_norm": 0.498046875, "learning_rate": 0.0019224690708551891, "loss": 0.2529, "step": 13646 }, { "epoch": 0.024199032074178686, "grad_norm": 0.59375, "learning_rate": 0.0019224449020000189, "loss": 0.2156, "step": 13648 }, { "epoch": 0.0242025782394885, "grad_norm": 2.078125, "learning_rate": 0.001922420729547953, "loss": 0.2818, "step": 13650 }, { "epoch": 0.024206124404798315, "grad_norm": 0.2578125, "learning_rate": 0.0019223965534990962, "loss": 0.3445, "step": 13652 }, { "epoch": 0.02420967057010813, "grad_norm": 1.9296875, "learning_rate": 0.0019223723738535546, "loss": 0.2911, "step": 13654 }, { "epoch": 0.024213216735417948, "grad_norm": 1.9765625, "learning_rate": 0.001922348190611434, "loss": 0.4146, "step": 13656 }, { "epoch": 0.024216762900727763, "grad_norm": 0.6875, "learning_rate": 0.00192232400377284, "loss": 0.2885, "step": 13658 }, { "epoch": 0.024220309066037577, "grad_norm": 0.77734375, "learning_rate": 0.0019222998133378785, "loss": 0.2697, "step": 13660 }, { "epoch": 0.02422385523134739, "grad_norm": 3.90625, "learning_rate": 0.0019222756193066552, "loss": 0.1908, "step": 13662 }, { "epoch": 0.024227401396657206, "grad_norm": 0.439453125, "learning_rate": 0.0019222514216792762, "loss": 0.3814, "step": 13664 }, { "epoch": 0.02423094756196702, "grad_norm": 2.34375, "learning_rate": 0.001922227220455847, "loss": 0.3804, "step": 13666 }, { "epoch": 0.02423449372727684, "grad_norm": 0.275390625, "learning_rate": 0.0019222030156364736, "loss": 0.158, "step": 13668 }, { "epoch": 0.024238039892586653, "grad_norm": 1.40625, "learning_rate": 0.0019221788072212615, "loss": 0.2145, "step": 13670 }, { "epoch": 0.024241586057896468, "grad_norm": 0.6171875, "learning_rate": 0.0019221545952103174, "loss": 0.2379, "step": 13672 }, { "epoch": 0.024245132223206282, "grad_norm": 0.5703125, "learning_rate": 0.0019221303796037464, "loss": 0.2329, "step": 13674 }, { "epoch": 0.024248678388516097, "grad_norm": 0.34765625, "learning_rate": 0.0019221061604016544, "loss": 0.2512, "step": 13676 }, { "epoch": 0.024252224553825915, "grad_norm": 1.4375, "learning_rate": 0.001922081937604148, "loss": 0.1933, "step": 13678 }, { "epoch": 0.02425577071913573, "grad_norm": 0.404296875, "learning_rate": 0.0019220577112113325, "loss": 0.2023, "step": 13680 }, { "epoch": 0.024259316884445544, "grad_norm": 1.21875, "learning_rate": 0.001922033481223314, "loss": 0.277, "step": 13682 }, { "epoch": 0.02426286304975536, "grad_norm": 2.71875, "learning_rate": 0.0019220092476401988, "loss": 0.2647, "step": 13684 }, { "epoch": 0.024266409215065173, "grad_norm": 1.1015625, "learning_rate": 0.0019219850104620925, "loss": 0.281, "step": 13686 }, { "epoch": 0.024269955380374988, "grad_norm": 1.7890625, "learning_rate": 0.0019219607696891012, "loss": 0.3359, "step": 13688 }, { "epoch": 0.024273501545684806, "grad_norm": 0.74609375, "learning_rate": 0.001921936525321331, "loss": 0.3584, "step": 13690 }, { "epoch": 0.02427704771099462, "grad_norm": 0.1513671875, "learning_rate": 0.001921912277358888, "loss": 0.1822, "step": 13692 }, { "epoch": 0.024280593876304435, "grad_norm": 0.5, "learning_rate": 0.0019218880258018778, "loss": 0.2041, "step": 13694 }, { "epoch": 0.02428414004161425, "grad_norm": 0.70703125, "learning_rate": 0.0019218637706504072, "loss": 0.2228, "step": 13696 }, { "epoch": 0.024287686206924064, "grad_norm": 0.30859375, "learning_rate": 0.0019218395119045815, "loss": 0.1759, "step": 13698 }, { "epoch": 0.02429123237223388, "grad_norm": 0.828125, "learning_rate": 0.0019218152495645074, "loss": 0.2733, "step": 13700 }, { "epoch": 0.024294778537543697, "grad_norm": 1.0859375, "learning_rate": 0.0019217909836302905, "loss": 0.1805, "step": 13702 }, { "epoch": 0.02429832470285351, "grad_norm": 0.3046875, "learning_rate": 0.0019217667141020374, "loss": 0.2562, "step": 13704 }, { "epoch": 0.024301870868163326, "grad_norm": 0.359375, "learning_rate": 0.0019217424409798541, "loss": 0.247, "step": 13706 }, { "epoch": 0.02430541703347314, "grad_norm": 0.251953125, "learning_rate": 0.0019217181642638467, "loss": 0.2947, "step": 13708 }, { "epoch": 0.024308963198782955, "grad_norm": 0.7890625, "learning_rate": 0.001921693883954121, "loss": 0.2822, "step": 13710 }, { "epoch": 0.024312509364092773, "grad_norm": 1.5234375, "learning_rate": 0.0019216696000507838, "loss": 0.2057, "step": 13712 }, { "epoch": 0.024316055529402587, "grad_norm": 0.515625, "learning_rate": 0.001921645312553941, "loss": 0.2415, "step": 13714 }, { "epoch": 0.024319601694712402, "grad_norm": 0.228515625, "learning_rate": 0.001921621021463699, "loss": 0.1979, "step": 13716 }, { "epoch": 0.024323147860022216, "grad_norm": 0.5546875, "learning_rate": 0.0019215967267801636, "loss": 0.2427, "step": 13718 }, { "epoch": 0.02432669402533203, "grad_norm": 3.890625, "learning_rate": 0.0019215724285034418, "loss": 0.6283, "step": 13720 }, { "epoch": 0.024330240190641846, "grad_norm": 0.326171875, "learning_rate": 0.001921548126633639, "loss": 0.2413, "step": 13722 }, { "epoch": 0.024333786355951664, "grad_norm": 0.76171875, "learning_rate": 0.001921523821170862, "loss": 0.283, "step": 13724 }, { "epoch": 0.024337332521261478, "grad_norm": 0.40625, "learning_rate": 0.0019214995121152173, "loss": 0.2244, "step": 13726 }, { "epoch": 0.024340878686571293, "grad_norm": 1.09375, "learning_rate": 0.0019214751994668107, "loss": 0.207, "step": 13728 }, { "epoch": 0.024344424851881107, "grad_norm": 0.6015625, "learning_rate": 0.0019214508832257487, "loss": 0.2805, "step": 13730 }, { "epoch": 0.024347971017190922, "grad_norm": 0.73828125, "learning_rate": 0.0019214265633921378, "loss": 0.2206, "step": 13732 }, { "epoch": 0.024351517182500736, "grad_norm": 0.47265625, "learning_rate": 0.0019214022399660841, "loss": 0.3517, "step": 13734 }, { "epoch": 0.024355063347810554, "grad_norm": 0.453125, "learning_rate": 0.0019213779129476944, "loss": 0.2401, "step": 13736 }, { "epoch": 0.02435860951312037, "grad_norm": 0.8515625, "learning_rate": 0.0019213535823370744, "loss": 0.2, "step": 13738 }, { "epoch": 0.024362155678430183, "grad_norm": 0.478515625, "learning_rate": 0.0019213292481343315, "loss": 0.2087, "step": 13740 }, { "epoch": 0.024365701843739998, "grad_norm": 0.8671875, "learning_rate": 0.001921304910339571, "loss": 0.2439, "step": 13742 }, { "epoch": 0.024369248009049813, "grad_norm": 0.515625, "learning_rate": 0.0019212805689529002, "loss": 0.2096, "step": 13744 }, { "epoch": 0.02437279417435963, "grad_norm": 0.9296875, "learning_rate": 0.001921256223974425, "loss": 0.2542, "step": 13746 }, { "epoch": 0.024376340339669445, "grad_norm": 0.63671875, "learning_rate": 0.0019212318754042524, "loss": 0.3706, "step": 13748 }, { "epoch": 0.02437988650497926, "grad_norm": 1.4453125, "learning_rate": 0.0019212075232424887, "loss": 0.3073, "step": 13750 }, { "epoch": 0.024383432670289074, "grad_norm": 1.2890625, "learning_rate": 0.00192118316748924, "loss": 0.2408, "step": 13752 }, { "epoch": 0.02438697883559889, "grad_norm": 0.68359375, "learning_rate": 0.0019211588081446131, "loss": 0.2567, "step": 13754 }, { "epoch": 0.024390525000908703, "grad_norm": 0.474609375, "learning_rate": 0.0019211344452087149, "loss": 0.2415, "step": 13756 }, { "epoch": 0.02439407116621852, "grad_norm": 1.53125, "learning_rate": 0.0019211100786816514, "loss": 0.3627, "step": 13758 }, { "epoch": 0.024397617331528336, "grad_norm": 0.65625, "learning_rate": 0.0019210857085635294, "loss": 0.2069, "step": 13760 }, { "epoch": 0.02440116349683815, "grad_norm": 0.60546875, "learning_rate": 0.0019210613348544555, "loss": 0.1754, "step": 13762 }, { "epoch": 0.024404709662147965, "grad_norm": 1.6640625, "learning_rate": 0.001921036957554536, "loss": 0.254, "step": 13764 }, { "epoch": 0.02440825582745778, "grad_norm": 1.4765625, "learning_rate": 0.001921012576663878, "loss": 0.4525, "step": 13766 }, { "epoch": 0.024411801992767594, "grad_norm": 0.46875, "learning_rate": 0.0019209881921825878, "loss": 0.2479, "step": 13768 }, { "epoch": 0.024415348158077412, "grad_norm": 0.384765625, "learning_rate": 0.0019209638041107722, "loss": 0.2775, "step": 13770 }, { "epoch": 0.024418894323387227, "grad_norm": 0.5625, "learning_rate": 0.0019209394124485378, "loss": 0.3512, "step": 13772 }, { "epoch": 0.02442244048869704, "grad_norm": 0.58203125, "learning_rate": 0.001920915017195991, "loss": 0.2219, "step": 13774 }, { "epoch": 0.024425986654006856, "grad_norm": 0.251953125, "learning_rate": 0.001920890618353239, "loss": 0.2009, "step": 13776 }, { "epoch": 0.02442953281931667, "grad_norm": 0.56640625, "learning_rate": 0.0019208662159203884, "loss": 0.2186, "step": 13778 }, { "epoch": 0.02443307898462649, "grad_norm": 0.82421875, "learning_rate": 0.0019208418098975457, "loss": 0.2147, "step": 13780 }, { "epoch": 0.024436625149936303, "grad_norm": 0.57421875, "learning_rate": 0.0019208174002848177, "loss": 0.2648, "step": 13782 }, { "epoch": 0.024440171315246118, "grad_norm": 0.380859375, "learning_rate": 0.001920792987082311, "loss": 0.1901, "step": 13784 }, { "epoch": 0.024443717480555932, "grad_norm": 0.63671875, "learning_rate": 0.001920768570290133, "loss": 0.1862, "step": 13786 }, { "epoch": 0.024447263645865747, "grad_norm": 0.5078125, "learning_rate": 0.0019207441499083896, "loss": 0.1555, "step": 13788 }, { "epoch": 0.02445080981117556, "grad_norm": 0.470703125, "learning_rate": 0.0019207197259371882, "loss": 0.1781, "step": 13790 }, { "epoch": 0.02445435597648538, "grad_norm": 1.0859375, "learning_rate": 0.0019206952983766353, "loss": 0.2094, "step": 13792 }, { "epoch": 0.024457902141795194, "grad_norm": 1.484375, "learning_rate": 0.0019206708672268378, "loss": 0.3294, "step": 13794 }, { "epoch": 0.02446144830710501, "grad_norm": 0.3359375, "learning_rate": 0.001920646432487903, "loss": 0.3318, "step": 13796 }, { "epoch": 0.024464994472414823, "grad_norm": 0.365234375, "learning_rate": 0.0019206219941599373, "loss": 0.2351, "step": 13798 }, { "epoch": 0.024468540637724637, "grad_norm": 0.69140625, "learning_rate": 0.0019205975522430475, "loss": 0.2617, "step": 13800 }, { "epoch": 0.024472086803034452, "grad_norm": 0.306640625, "learning_rate": 0.0019205731067373406, "loss": 0.231, "step": 13802 }, { "epoch": 0.02447563296834427, "grad_norm": 5.03125, "learning_rate": 0.001920548657642924, "loss": 0.2353, "step": 13804 }, { "epoch": 0.024479179133654085, "grad_norm": 0.8671875, "learning_rate": 0.001920524204959904, "loss": 0.2762, "step": 13806 }, { "epoch": 0.0244827252989639, "grad_norm": 0.267578125, "learning_rate": 0.0019204997486883875, "loss": 0.1902, "step": 13808 }, { "epoch": 0.024486271464273714, "grad_norm": 0.6171875, "learning_rate": 0.0019204752888284818, "loss": 0.296, "step": 13810 }, { "epoch": 0.024489817629583528, "grad_norm": 0.345703125, "learning_rate": 0.0019204508253802936, "loss": 0.1988, "step": 13812 }, { "epoch": 0.024493363794893346, "grad_norm": 0.58984375, "learning_rate": 0.0019204263583439305, "loss": 0.2514, "step": 13814 }, { "epoch": 0.02449690996020316, "grad_norm": 0.341796875, "learning_rate": 0.001920401887719499, "loss": 0.2111, "step": 13816 }, { "epoch": 0.024500456125512975, "grad_norm": 0.40625, "learning_rate": 0.0019203774135071062, "loss": 0.2105, "step": 13818 }, { "epoch": 0.02450400229082279, "grad_norm": 1.09375, "learning_rate": 0.001920352935706859, "loss": 0.3845, "step": 13820 }, { "epoch": 0.024507548456132604, "grad_norm": 0.486328125, "learning_rate": 0.0019203284543188647, "loss": 0.2376, "step": 13822 }, { "epoch": 0.02451109462144242, "grad_norm": 0.455078125, "learning_rate": 0.00192030396934323, "loss": 0.1809, "step": 13824 }, { "epoch": 0.024514640786752237, "grad_norm": 0.29296875, "learning_rate": 0.0019202794807800622, "loss": 0.2303, "step": 13826 }, { "epoch": 0.02451818695206205, "grad_norm": 1.3046875, "learning_rate": 0.0019202549886294686, "loss": 0.2005, "step": 13828 }, { "epoch": 0.024521733117371866, "grad_norm": 0.90625, "learning_rate": 0.0019202304928915561, "loss": 0.1902, "step": 13830 }, { "epoch": 0.02452527928268168, "grad_norm": 0.341796875, "learning_rate": 0.001920205993566432, "loss": 0.1995, "step": 13832 }, { "epoch": 0.024528825447991495, "grad_norm": 0.53515625, "learning_rate": 0.0019201814906542032, "loss": 0.1956, "step": 13834 }, { "epoch": 0.02453237161330131, "grad_norm": 0.33203125, "learning_rate": 0.0019201569841549768, "loss": 0.2358, "step": 13836 }, { "epoch": 0.024535917778611128, "grad_norm": 1.3984375, "learning_rate": 0.0019201324740688605, "loss": 0.3039, "step": 13838 }, { "epoch": 0.024539463943920942, "grad_norm": 0.40234375, "learning_rate": 0.001920107960395961, "loss": 0.1798, "step": 13840 }, { "epoch": 0.024543010109230757, "grad_norm": 0.86328125, "learning_rate": 0.0019200834431363856, "loss": 0.3259, "step": 13842 }, { "epoch": 0.02454655627454057, "grad_norm": 0.2109375, "learning_rate": 0.0019200589222902417, "loss": 0.2328, "step": 13844 }, { "epoch": 0.024550102439850386, "grad_norm": 0.1435546875, "learning_rate": 0.0019200343978576363, "loss": 0.3027, "step": 13846 }, { "epoch": 0.024553648605160204, "grad_norm": 0.5703125, "learning_rate": 0.0019200098698386769, "loss": 0.2139, "step": 13848 }, { "epoch": 0.02455719477047002, "grad_norm": 1.2578125, "learning_rate": 0.0019199853382334702, "loss": 0.338, "step": 13850 }, { "epoch": 0.024560740935779833, "grad_norm": 0.51953125, "learning_rate": 0.0019199608030421244, "loss": 0.2449, "step": 13852 }, { "epoch": 0.024564287101089648, "grad_norm": 0.58984375, "learning_rate": 0.0019199362642647465, "loss": 0.1955, "step": 13854 }, { "epoch": 0.024567833266399462, "grad_norm": 0.9453125, "learning_rate": 0.0019199117219014432, "loss": 0.2068, "step": 13856 }, { "epoch": 0.024571379431709277, "grad_norm": 0.26953125, "learning_rate": 0.0019198871759523225, "loss": 0.2273, "step": 13858 }, { "epoch": 0.024574925597019095, "grad_norm": 0.703125, "learning_rate": 0.0019198626264174915, "loss": 0.235, "step": 13860 }, { "epoch": 0.02457847176232891, "grad_norm": 0.46484375, "learning_rate": 0.0019198380732970578, "loss": 0.2033, "step": 13862 }, { "epoch": 0.024582017927638724, "grad_norm": 1.1015625, "learning_rate": 0.0019198135165911282, "loss": 0.2702, "step": 13864 }, { "epoch": 0.02458556409294854, "grad_norm": 0.443359375, "learning_rate": 0.0019197889562998108, "loss": 0.1868, "step": 13866 }, { "epoch": 0.024589110258258353, "grad_norm": 0.97265625, "learning_rate": 0.0019197643924232127, "loss": 0.2834, "step": 13868 }, { "epoch": 0.024592656423568168, "grad_norm": 0.341796875, "learning_rate": 0.0019197398249614411, "loss": 0.3313, "step": 13870 }, { "epoch": 0.024596202588877986, "grad_norm": 0.474609375, "learning_rate": 0.001919715253914604, "loss": 0.2846, "step": 13872 }, { "epoch": 0.0245997487541878, "grad_norm": 0.59375, "learning_rate": 0.0019196906792828081, "loss": 0.2002, "step": 13874 }, { "epoch": 0.024603294919497615, "grad_norm": 0.5703125, "learning_rate": 0.0019196661010661615, "loss": 0.2345, "step": 13876 }, { "epoch": 0.02460684108480743, "grad_norm": 0.9765625, "learning_rate": 0.0019196415192647715, "loss": 0.3253, "step": 13878 }, { "epoch": 0.024610387250117244, "grad_norm": 0.283203125, "learning_rate": 0.0019196169338787457, "loss": 0.1902, "step": 13880 }, { "epoch": 0.024613933415427062, "grad_norm": 0.6171875, "learning_rate": 0.0019195923449081913, "loss": 0.2404, "step": 13882 }, { "epoch": 0.024617479580736876, "grad_norm": 1.0234375, "learning_rate": 0.0019195677523532162, "loss": 0.4937, "step": 13884 }, { "epoch": 0.02462102574604669, "grad_norm": 0.41796875, "learning_rate": 0.0019195431562139278, "loss": 0.1941, "step": 13886 }, { "epoch": 0.024624571911356505, "grad_norm": 0.314453125, "learning_rate": 0.0019195185564904336, "loss": 0.2128, "step": 13888 }, { "epoch": 0.02462811807666632, "grad_norm": 0.341796875, "learning_rate": 0.0019194939531828414, "loss": 0.2343, "step": 13890 }, { "epoch": 0.024631664241976135, "grad_norm": 0.2734375, "learning_rate": 0.0019194693462912585, "loss": 0.2266, "step": 13892 }, { "epoch": 0.024635210407285953, "grad_norm": 0.53515625, "learning_rate": 0.001919444735815793, "loss": 0.2116, "step": 13894 }, { "epoch": 0.024638756572595767, "grad_norm": 0.470703125, "learning_rate": 0.001919420121756552, "loss": 0.2475, "step": 13896 }, { "epoch": 0.02464230273790558, "grad_norm": 0.263671875, "learning_rate": 0.0019193955041136434, "loss": 0.2494, "step": 13898 }, { "epoch": 0.024645848903215396, "grad_norm": 0.96484375, "learning_rate": 0.0019193708828871747, "loss": 0.2323, "step": 13900 }, { "epoch": 0.02464939506852521, "grad_norm": 0.8671875, "learning_rate": 0.0019193462580772537, "loss": 0.1967, "step": 13902 }, { "epoch": 0.024652941233835025, "grad_norm": 0.52734375, "learning_rate": 0.0019193216296839884, "loss": 0.2507, "step": 13904 }, { "epoch": 0.024656487399144843, "grad_norm": 1.0625, "learning_rate": 0.0019192969977074859, "loss": 0.221, "step": 13906 }, { "epoch": 0.024660033564454658, "grad_norm": 1.4765625, "learning_rate": 0.0019192723621478545, "loss": 0.2676, "step": 13908 }, { "epoch": 0.024663579729764473, "grad_norm": 0.345703125, "learning_rate": 0.0019192477230052015, "loss": 0.2427, "step": 13910 }, { "epoch": 0.024667125895074287, "grad_norm": 0.85546875, "learning_rate": 0.0019192230802796348, "loss": 0.2562, "step": 13912 }, { "epoch": 0.0246706720603841, "grad_norm": 0.5078125, "learning_rate": 0.0019191984339712626, "loss": 0.2307, "step": 13914 }, { "epoch": 0.02467421822569392, "grad_norm": 0.6328125, "learning_rate": 0.0019191737840801918, "loss": 0.3262, "step": 13916 }, { "epoch": 0.024677764391003734, "grad_norm": 0.30078125, "learning_rate": 0.0019191491306065308, "loss": 0.2705, "step": 13918 }, { "epoch": 0.02468131055631355, "grad_norm": 0.251953125, "learning_rate": 0.0019191244735503872, "loss": 0.1889, "step": 13920 }, { "epoch": 0.024684856721623363, "grad_norm": 0.375, "learning_rate": 0.0019190998129118694, "loss": 0.1917, "step": 13922 }, { "epoch": 0.024688402886933178, "grad_norm": 0.7734375, "learning_rate": 0.0019190751486910845, "loss": 0.2384, "step": 13924 }, { "epoch": 0.024691949052242992, "grad_norm": 0.3671875, "learning_rate": 0.0019190504808881406, "loss": 0.2819, "step": 13926 }, { "epoch": 0.02469549521755281, "grad_norm": 1.03125, "learning_rate": 0.001919025809503146, "loss": 0.3895, "step": 13928 }, { "epoch": 0.024699041382862625, "grad_norm": 0.21875, "learning_rate": 0.001919001134536208, "loss": 0.2031, "step": 13930 }, { "epoch": 0.02470258754817244, "grad_norm": 1.359375, "learning_rate": 0.0019189764559874346, "loss": 0.3032, "step": 13932 }, { "epoch": 0.024706133713482254, "grad_norm": 0.52734375, "learning_rate": 0.0019189517738569344, "loss": 0.2921, "step": 13934 }, { "epoch": 0.02470967987879207, "grad_norm": 0.51953125, "learning_rate": 0.0019189270881448143, "loss": 0.2276, "step": 13936 }, { "epoch": 0.024713226044101883, "grad_norm": 0.2470703125, "learning_rate": 0.001918902398851183, "loss": 0.1609, "step": 13938 }, { "epoch": 0.0247167722094117, "grad_norm": 0.376953125, "learning_rate": 0.0019188777059761485, "loss": 0.2039, "step": 13940 }, { "epoch": 0.024720318374721516, "grad_norm": 0.380859375, "learning_rate": 0.0019188530095198184, "loss": 0.2135, "step": 13942 }, { "epoch": 0.02472386454003133, "grad_norm": 0.6484375, "learning_rate": 0.001918828309482301, "loss": 0.2362, "step": 13944 }, { "epoch": 0.024727410705341145, "grad_norm": 0.85546875, "learning_rate": 0.0019188036058637042, "loss": 0.2801, "step": 13946 }, { "epoch": 0.02473095687065096, "grad_norm": 0.34765625, "learning_rate": 0.0019187788986641358, "loss": 0.2272, "step": 13948 }, { "epoch": 0.024734503035960777, "grad_norm": 2.171875, "learning_rate": 0.0019187541878837042, "loss": 0.4132, "step": 13950 }, { "epoch": 0.024738049201270592, "grad_norm": 0.2392578125, "learning_rate": 0.0019187294735225175, "loss": 0.162, "step": 13952 }, { "epoch": 0.024741595366580407, "grad_norm": 0.208984375, "learning_rate": 0.0019187047555806835, "loss": 0.1601, "step": 13954 }, { "epoch": 0.02474514153189022, "grad_norm": 0.7578125, "learning_rate": 0.0019186800340583105, "loss": 0.3078, "step": 13956 }, { "epoch": 0.024748687697200036, "grad_norm": 0.703125, "learning_rate": 0.0019186553089555068, "loss": 0.2143, "step": 13958 }, { "epoch": 0.02475223386250985, "grad_norm": 0.6171875, "learning_rate": 0.0019186305802723802, "loss": 0.2658, "step": 13960 }, { "epoch": 0.024755780027819668, "grad_norm": 0.412109375, "learning_rate": 0.001918605848009039, "loss": 0.3714, "step": 13962 }, { "epoch": 0.024759326193129483, "grad_norm": 0.63671875, "learning_rate": 0.001918581112165591, "loss": 0.181, "step": 13964 }, { "epoch": 0.024762872358439297, "grad_norm": 0.9765625, "learning_rate": 0.001918556372742145, "loss": 0.2423, "step": 13966 }, { "epoch": 0.024766418523749112, "grad_norm": 0.546875, "learning_rate": 0.0019185316297388086, "loss": 0.3429, "step": 13968 }, { "epoch": 0.024769964689058926, "grad_norm": 0.45703125, "learning_rate": 0.0019185068831556907, "loss": 0.3132, "step": 13970 }, { "epoch": 0.02477351085436874, "grad_norm": 0.3984375, "learning_rate": 0.001918482132992899, "loss": 0.2819, "step": 13972 }, { "epoch": 0.02477705701967856, "grad_norm": 0.7734375, "learning_rate": 0.001918457379250542, "loss": 0.2544, "step": 13974 }, { "epoch": 0.024780603184988374, "grad_norm": 1.6953125, "learning_rate": 0.0019184326219287276, "loss": 0.401, "step": 13976 }, { "epoch": 0.024784149350298188, "grad_norm": 1.09375, "learning_rate": 0.0019184078610275644, "loss": 0.1855, "step": 13978 }, { "epoch": 0.024787695515608003, "grad_norm": 0.5390625, "learning_rate": 0.0019183830965471605, "loss": 0.2471, "step": 13980 }, { "epoch": 0.024791241680917817, "grad_norm": 0.400390625, "learning_rate": 0.0019183583284876247, "loss": 0.2138, "step": 13982 }, { "epoch": 0.024794787846227635, "grad_norm": 0.61328125, "learning_rate": 0.0019183335568490646, "loss": 0.229, "step": 13984 }, { "epoch": 0.02479833401153745, "grad_norm": 0.33984375, "learning_rate": 0.0019183087816315887, "loss": 0.1648, "step": 13986 }, { "epoch": 0.024801880176847264, "grad_norm": 1.7890625, "learning_rate": 0.001918284002835306, "loss": 0.2413, "step": 13988 }, { "epoch": 0.02480542634215708, "grad_norm": 0.37890625, "learning_rate": 0.0019182592204603239, "loss": 0.1872, "step": 13990 }, { "epoch": 0.024808972507466893, "grad_norm": 0.33984375, "learning_rate": 0.0019182344345067515, "loss": 0.2338, "step": 13992 }, { "epoch": 0.024812518672776708, "grad_norm": 0.453125, "learning_rate": 0.001918209644974697, "loss": 0.2351, "step": 13994 }, { "epoch": 0.024816064838086526, "grad_norm": 0.62109375, "learning_rate": 0.0019181848518642689, "loss": 0.2204, "step": 13996 }, { "epoch": 0.02481961100339634, "grad_norm": 0.48046875, "learning_rate": 0.0019181600551755752, "loss": 0.1808, "step": 13998 }, { "epoch": 0.024823157168706155, "grad_norm": 1.390625, "learning_rate": 0.0019181352549087248, "loss": 0.2362, "step": 14000 }, { "epoch": 0.02482670333401597, "grad_norm": 0.267578125, "learning_rate": 0.0019181104510638263, "loss": 0.1877, "step": 14002 }, { "epoch": 0.024830249499325784, "grad_norm": 0.3828125, "learning_rate": 0.0019180856436409874, "loss": 0.2181, "step": 14004 }, { "epoch": 0.0248337956646356, "grad_norm": 0.482421875, "learning_rate": 0.0019180608326403174, "loss": 0.2647, "step": 14006 }, { "epoch": 0.024837341829945417, "grad_norm": 0.72265625, "learning_rate": 0.0019180360180619243, "loss": 0.2128, "step": 14008 }, { "epoch": 0.02484088799525523, "grad_norm": 0.400390625, "learning_rate": 0.0019180111999059172, "loss": 0.2545, "step": 14010 }, { "epoch": 0.024844434160565046, "grad_norm": 1.2265625, "learning_rate": 0.001917986378172404, "loss": 0.2386, "step": 14012 }, { "epoch": 0.02484798032587486, "grad_norm": 0.81640625, "learning_rate": 0.0019179615528614935, "loss": 0.2461, "step": 14014 }, { "epoch": 0.024851526491184675, "grad_norm": 0.232421875, "learning_rate": 0.0019179367239732946, "loss": 0.1619, "step": 14016 }, { "epoch": 0.024855072656494493, "grad_norm": 0.54296875, "learning_rate": 0.0019179118915079152, "loss": 0.5902, "step": 14018 }, { "epoch": 0.024858618821804308, "grad_norm": 0.9609375, "learning_rate": 0.0019178870554654643, "loss": 0.4067, "step": 14020 }, { "epoch": 0.024862164987114122, "grad_norm": 0.38671875, "learning_rate": 0.001917862215846051, "loss": 0.4312, "step": 14022 }, { "epoch": 0.024865711152423937, "grad_norm": 0.46875, "learning_rate": 0.001917837372649783, "loss": 0.1862, "step": 14024 }, { "epoch": 0.02486925731773375, "grad_norm": 2.984375, "learning_rate": 0.0019178125258767693, "loss": 0.5017, "step": 14026 }, { "epoch": 0.024872803483043566, "grad_norm": 0.296875, "learning_rate": 0.0019177876755271188, "loss": 0.1826, "step": 14028 }, { "epoch": 0.024876349648353384, "grad_norm": 0.79296875, "learning_rate": 0.0019177628216009402, "loss": 0.2232, "step": 14030 }, { "epoch": 0.0248798958136632, "grad_norm": 0.51171875, "learning_rate": 0.0019177379640983418, "loss": 0.2982, "step": 14032 }, { "epoch": 0.024883441978973013, "grad_norm": 0.57421875, "learning_rate": 0.0019177131030194328, "loss": 0.2894, "step": 14034 }, { "epoch": 0.024886988144282828, "grad_norm": 0.6015625, "learning_rate": 0.0019176882383643214, "loss": 0.3256, "step": 14036 }, { "epoch": 0.024890534309592642, "grad_norm": 1.484375, "learning_rate": 0.0019176633701331167, "loss": 0.2414, "step": 14038 }, { "epoch": 0.024894080474902457, "grad_norm": 0.30078125, "learning_rate": 0.0019176384983259273, "loss": 0.3114, "step": 14040 }, { "epoch": 0.024897626640212275, "grad_norm": 1.28125, "learning_rate": 0.0019176136229428625, "loss": 0.2717, "step": 14042 }, { "epoch": 0.02490117280552209, "grad_norm": 0.6171875, "learning_rate": 0.00191758874398403, "loss": 0.1346, "step": 14044 }, { "epoch": 0.024904718970831904, "grad_norm": 0.703125, "learning_rate": 0.0019175638614495397, "loss": 0.3922, "step": 14046 }, { "epoch": 0.02490826513614172, "grad_norm": 1.34375, "learning_rate": 0.0019175389753394998, "loss": 0.3447, "step": 14048 }, { "epoch": 0.024911811301451533, "grad_norm": 0.345703125, "learning_rate": 0.0019175140856540192, "loss": 0.1935, "step": 14050 }, { "epoch": 0.02491535746676135, "grad_norm": 0.57421875, "learning_rate": 0.0019174891923932071, "loss": 0.2704, "step": 14052 }, { "epoch": 0.024918903632071165, "grad_norm": 0.365234375, "learning_rate": 0.0019174642955571719, "loss": 0.2024, "step": 14054 }, { "epoch": 0.02492244979738098, "grad_norm": 0.41015625, "learning_rate": 0.0019174393951460226, "loss": 0.181, "step": 14056 }, { "epoch": 0.024925995962690795, "grad_norm": 3.6875, "learning_rate": 0.0019174144911598687, "loss": 0.2252, "step": 14058 }, { "epoch": 0.02492954212800061, "grad_norm": 1.2890625, "learning_rate": 0.0019173895835988184, "loss": 0.2413, "step": 14060 }, { "epoch": 0.024933088293310424, "grad_norm": 1.0, "learning_rate": 0.0019173646724629807, "loss": 0.3171, "step": 14062 }, { "epoch": 0.02493663445862024, "grad_norm": 0.57421875, "learning_rate": 0.0019173397577524648, "loss": 0.2648, "step": 14064 }, { "epoch": 0.024940180623930056, "grad_norm": 2.234375, "learning_rate": 0.0019173148394673795, "loss": 0.3527, "step": 14066 }, { "epoch": 0.02494372678923987, "grad_norm": 3.765625, "learning_rate": 0.001917289917607834, "loss": 0.3515, "step": 14068 }, { "epoch": 0.024947272954549685, "grad_norm": 0.47265625, "learning_rate": 0.0019172649921739369, "loss": 0.192, "step": 14070 }, { "epoch": 0.0249508191198595, "grad_norm": 0.439453125, "learning_rate": 0.0019172400631657979, "loss": 0.2277, "step": 14072 }, { "epoch": 0.024954365285169314, "grad_norm": 0.361328125, "learning_rate": 0.0019172151305835252, "loss": 0.198, "step": 14074 }, { "epoch": 0.024957911450479132, "grad_norm": 0.353515625, "learning_rate": 0.0019171901944272283, "loss": 0.1811, "step": 14076 }, { "epoch": 0.024961457615788947, "grad_norm": 0.43359375, "learning_rate": 0.001917165254697016, "loss": 0.2093, "step": 14078 }, { "epoch": 0.02496500378109876, "grad_norm": 0.6875, "learning_rate": 0.0019171403113929977, "loss": 0.2266, "step": 14080 }, { "epoch": 0.024968549946408576, "grad_norm": 0.1953125, "learning_rate": 0.0019171153645152826, "loss": 0.1848, "step": 14082 }, { "epoch": 0.02497209611171839, "grad_norm": 1.1484375, "learning_rate": 0.001917090414063979, "loss": 0.2748, "step": 14084 }, { "epoch": 0.02497564227702821, "grad_norm": 0.33984375, "learning_rate": 0.001917065460039197, "loss": 0.1671, "step": 14086 }, { "epoch": 0.024979188442338023, "grad_norm": 1.0234375, "learning_rate": 0.0019170405024410452, "loss": 0.3008, "step": 14088 }, { "epoch": 0.024982734607647838, "grad_norm": 0.59375, "learning_rate": 0.0019170155412696326, "loss": 0.252, "step": 14090 }, { "epoch": 0.024986280772957652, "grad_norm": 0.3671875, "learning_rate": 0.0019169905765250685, "loss": 0.3065, "step": 14092 }, { "epoch": 0.024989826938267467, "grad_norm": 0.70703125, "learning_rate": 0.0019169656082074628, "loss": 0.2121, "step": 14094 }, { "epoch": 0.02499337310357728, "grad_norm": 1.1796875, "learning_rate": 0.0019169406363169234, "loss": 0.3891, "step": 14096 }, { "epoch": 0.0249969192688871, "grad_norm": 1.0859375, "learning_rate": 0.0019169156608535608, "loss": 0.2253, "step": 14098 }, { "epoch": 0.025000465434196914, "grad_norm": 0.50390625, "learning_rate": 0.0019168906818174832, "loss": 0.2216, "step": 14100 }, { "epoch": 0.02500401159950673, "grad_norm": 0.31640625, "learning_rate": 0.0019168656992088003, "loss": 0.1991, "step": 14102 }, { "epoch": 0.025007557764816543, "grad_norm": 1.4375, "learning_rate": 0.0019168407130276213, "loss": 0.2448, "step": 14104 }, { "epoch": 0.025011103930126358, "grad_norm": 0.369140625, "learning_rate": 0.0019168157232740558, "loss": 0.2675, "step": 14106 }, { "epoch": 0.025014650095436172, "grad_norm": 9.375, "learning_rate": 0.0019167907299482126, "loss": 0.2919, "step": 14108 }, { "epoch": 0.02501819626074599, "grad_norm": 1.8125, "learning_rate": 0.0019167657330502012, "loss": 0.2866, "step": 14110 }, { "epoch": 0.025021742426055805, "grad_norm": 0.5625, "learning_rate": 0.001916740732580131, "loss": 0.2347, "step": 14112 }, { "epoch": 0.02502528859136562, "grad_norm": 0.5546875, "learning_rate": 0.001916715728538111, "loss": 0.2305, "step": 14114 }, { "epoch": 0.025028834756675434, "grad_norm": 0.380859375, "learning_rate": 0.001916690720924251, "loss": 0.2971, "step": 14116 }, { "epoch": 0.02503238092198525, "grad_norm": 0.341796875, "learning_rate": 0.0019166657097386603, "loss": 0.2291, "step": 14118 }, { "epoch": 0.025035927087295066, "grad_norm": 0.35546875, "learning_rate": 0.0019166406949814479, "loss": 0.2293, "step": 14120 }, { "epoch": 0.02503947325260488, "grad_norm": 1.2890625, "learning_rate": 0.0019166156766527236, "loss": 0.2367, "step": 14122 }, { "epoch": 0.025043019417914696, "grad_norm": 0.392578125, "learning_rate": 0.0019165906547525965, "loss": 0.1856, "step": 14124 }, { "epoch": 0.02504656558322451, "grad_norm": 4.21875, "learning_rate": 0.0019165656292811767, "loss": 0.3748, "step": 14126 }, { "epoch": 0.025050111748534325, "grad_norm": 2.546875, "learning_rate": 0.0019165406002385726, "loss": 0.2931, "step": 14128 }, { "epoch": 0.02505365791384414, "grad_norm": 0.361328125, "learning_rate": 0.0019165155676248945, "loss": 0.1808, "step": 14130 }, { "epoch": 0.025057204079153957, "grad_norm": 0.63671875, "learning_rate": 0.0019164905314402515, "loss": 0.2894, "step": 14132 }, { "epoch": 0.025060750244463772, "grad_norm": 0.35546875, "learning_rate": 0.0019164654916847533, "loss": 0.1998, "step": 14134 }, { "epoch": 0.025064296409773586, "grad_norm": 0.66796875, "learning_rate": 0.001916440448358509, "loss": 0.2136, "step": 14136 }, { "epoch": 0.0250678425750834, "grad_norm": 0.365234375, "learning_rate": 0.0019164154014616288, "loss": 0.2767, "step": 14138 }, { "epoch": 0.025071388740393215, "grad_norm": 2.8125, "learning_rate": 0.0019163903509942216, "loss": 0.3425, "step": 14140 }, { "epoch": 0.02507493490570303, "grad_norm": 0.490234375, "learning_rate": 0.0019163652969563971, "loss": 0.2439, "step": 14142 }, { "epoch": 0.025078481071012848, "grad_norm": 0.2578125, "learning_rate": 0.0019163402393482657, "loss": 0.1865, "step": 14144 }, { "epoch": 0.025082027236322663, "grad_norm": 0.306640625, "learning_rate": 0.0019163151781699354, "loss": 0.2019, "step": 14146 }, { "epoch": 0.025085573401632477, "grad_norm": 0.796875, "learning_rate": 0.0019162901134215172, "loss": 0.2086, "step": 14148 }, { "epoch": 0.02508911956694229, "grad_norm": 0.78515625, "learning_rate": 0.0019162650451031201, "loss": 0.2189, "step": 14150 }, { "epoch": 0.025092665732252106, "grad_norm": 1.3046875, "learning_rate": 0.0019162399732148537, "loss": 0.3601, "step": 14152 }, { "epoch": 0.025096211897561924, "grad_norm": 0.51953125, "learning_rate": 0.001916214897756828, "loss": 0.2762, "step": 14154 }, { "epoch": 0.02509975806287174, "grad_norm": 1.0546875, "learning_rate": 0.0019161898187291522, "loss": 0.1849, "step": 14156 }, { "epoch": 0.025103304228181553, "grad_norm": 1.265625, "learning_rate": 0.0019161647361319366, "loss": 0.2427, "step": 14158 }, { "epoch": 0.025106850393491368, "grad_norm": 2.1875, "learning_rate": 0.00191613964996529, "loss": 0.326, "step": 14160 }, { "epoch": 0.025110396558801183, "grad_norm": 0.466796875, "learning_rate": 0.001916114560229323, "loss": 0.3915, "step": 14162 }, { "epoch": 0.025113942724110997, "grad_norm": 1.5546875, "learning_rate": 0.0019160894669241453, "loss": 0.2227, "step": 14164 }, { "epoch": 0.025117488889420815, "grad_norm": 0.94921875, "learning_rate": 0.0019160643700498656, "loss": 0.2625, "step": 14166 }, { "epoch": 0.02512103505473063, "grad_norm": 1.515625, "learning_rate": 0.0019160392696065949, "loss": 0.2359, "step": 14168 }, { "epoch": 0.025124581220040444, "grad_norm": 0.3203125, "learning_rate": 0.0019160141655944424, "loss": 0.205, "step": 14170 }, { "epoch": 0.02512812738535026, "grad_norm": 0.34375, "learning_rate": 0.0019159890580135175, "loss": 0.1611, "step": 14172 }, { "epoch": 0.025131673550660073, "grad_norm": 0.3359375, "learning_rate": 0.0019159639468639307, "loss": 0.2296, "step": 14174 }, { "epoch": 0.025135219715969888, "grad_norm": 0.95703125, "learning_rate": 0.0019159388321457918, "loss": 0.2817, "step": 14176 }, { "epoch": 0.025138765881279706, "grad_norm": 0.71484375, "learning_rate": 0.00191591371385921, "loss": 0.178, "step": 14178 }, { "epoch": 0.02514231204658952, "grad_norm": 0.376953125, "learning_rate": 0.001915888592004296, "loss": 0.1538, "step": 14180 }, { "epoch": 0.025145858211899335, "grad_norm": 0.6015625, "learning_rate": 0.001915863466581159, "loss": 0.1971, "step": 14182 }, { "epoch": 0.02514940437720915, "grad_norm": 0.921875, "learning_rate": 0.0019158383375899092, "loss": 0.2822, "step": 14184 }, { "epoch": 0.025152950542518964, "grad_norm": 0.36328125, "learning_rate": 0.001915813205030656, "loss": 0.1973, "step": 14186 }, { "epoch": 0.025156496707828782, "grad_norm": 0.3203125, "learning_rate": 0.0019157880689035102, "loss": 0.2598, "step": 14188 }, { "epoch": 0.025160042873138597, "grad_norm": 0.31640625, "learning_rate": 0.001915762929208581, "loss": 0.2675, "step": 14190 }, { "epoch": 0.02516358903844841, "grad_norm": 0.431640625, "learning_rate": 0.0019157377859459785, "loss": 0.1687, "step": 14192 }, { "epoch": 0.025167135203758226, "grad_norm": 0.71875, "learning_rate": 0.001915712639115813, "loss": 0.2638, "step": 14194 }, { "epoch": 0.02517068136906804, "grad_norm": 0.61328125, "learning_rate": 0.0019156874887181943, "loss": 0.2039, "step": 14196 }, { "epoch": 0.025174227534377855, "grad_norm": 0.447265625, "learning_rate": 0.0019156623347532323, "loss": 0.2796, "step": 14198 }, { "epoch": 0.025177773699687673, "grad_norm": 0.67578125, "learning_rate": 0.0019156371772210369, "loss": 0.2175, "step": 14200 }, { "epoch": 0.025181319864997487, "grad_norm": 0.30859375, "learning_rate": 0.0019156120161217182, "loss": 0.2446, "step": 14202 }, { "epoch": 0.025184866030307302, "grad_norm": 0.330078125, "learning_rate": 0.0019155868514553864, "loss": 0.2009, "step": 14204 }, { "epoch": 0.025188412195617117, "grad_norm": 0.486328125, "learning_rate": 0.0019155616832221515, "loss": 0.2326, "step": 14206 }, { "epoch": 0.02519195836092693, "grad_norm": 0.3125, "learning_rate": 0.0019155365114221234, "loss": 0.2416, "step": 14208 }, { "epoch": 0.025195504526236746, "grad_norm": 0.44140625, "learning_rate": 0.0019155113360554124, "loss": 0.1995, "step": 14210 }, { "epoch": 0.025199050691546564, "grad_norm": 2.21875, "learning_rate": 0.0019154861571221288, "loss": 0.5416, "step": 14212 }, { "epoch": 0.025202596856856378, "grad_norm": 0.28125, "learning_rate": 0.001915460974622382, "loss": 0.1937, "step": 14214 }, { "epoch": 0.025206143022166193, "grad_norm": 3.65625, "learning_rate": 0.0019154357885562828, "loss": 0.2904, "step": 14216 }, { "epoch": 0.025209689187476007, "grad_norm": 0.390625, "learning_rate": 0.001915410598923941, "loss": 0.1829, "step": 14218 }, { "epoch": 0.025213235352785822, "grad_norm": 0.39453125, "learning_rate": 0.0019153854057254672, "loss": 0.2214, "step": 14220 }, { "epoch": 0.02521678151809564, "grad_norm": 1.3515625, "learning_rate": 0.0019153602089609708, "loss": 0.2305, "step": 14222 }, { "epoch": 0.025220327683405454, "grad_norm": 1.65625, "learning_rate": 0.0019153350086305628, "loss": 0.2849, "step": 14224 }, { "epoch": 0.02522387384871527, "grad_norm": 0.306640625, "learning_rate": 0.001915309804734353, "loss": 0.161, "step": 14226 }, { "epoch": 0.025227420014025084, "grad_norm": 2.015625, "learning_rate": 0.0019152845972724517, "loss": 0.2227, "step": 14228 }, { "epoch": 0.025230966179334898, "grad_norm": 0.76171875, "learning_rate": 0.001915259386244969, "loss": 0.2329, "step": 14230 }, { "epoch": 0.025234512344644713, "grad_norm": 5.9375, "learning_rate": 0.0019152341716520153, "loss": 0.3652, "step": 14232 }, { "epoch": 0.02523805850995453, "grad_norm": 0.36328125, "learning_rate": 0.0019152089534937014, "loss": 0.2174, "step": 14234 }, { "epoch": 0.025241604675264345, "grad_norm": 0.61328125, "learning_rate": 0.0019151837317701364, "loss": 0.1968, "step": 14236 }, { "epoch": 0.02524515084057416, "grad_norm": 0.94921875, "learning_rate": 0.0019151585064814316, "loss": 0.216, "step": 14238 }, { "epoch": 0.025248697005883974, "grad_norm": 0.279296875, "learning_rate": 0.001915133277627697, "loss": 0.2583, "step": 14240 }, { "epoch": 0.02525224317119379, "grad_norm": 0.30078125, "learning_rate": 0.0019151080452090429, "loss": 0.1993, "step": 14242 }, { "epoch": 0.025255789336503603, "grad_norm": 0.8359375, "learning_rate": 0.0019150828092255794, "loss": 0.2012, "step": 14244 }, { "epoch": 0.02525933550181342, "grad_norm": 0.671875, "learning_rate": 0.0019150575696774176, "loss": 0.2384, "step": 14246 }, { "epoch": 0.025262881667123236, "grad_norm": 0.431640625, "learning_rate": 0.001915032326564667, "loss": 0.2082, "step": 14248 }, { "epoch": 0.02526642783243305, "grad_norm": 0.453125, "learning_rate": 0.001915007079887439, "loss": 0.2235, "step": 14250 }, { "epoch": 0.025269973997742865, "grad_norm": 0.482421875, "learning_rate": 0.0019149818296458428, "loss": 0.3525, "step": 14252 }, { "epoch": 0.02527352016305268, "grad_norm": 0.8203125, "learning_rate": 0.0019149565758399898, "loss": 0.2393, "step": 14254 }, { "epoch": 0.025277066328362498, "grad_norm": 0.7421875, "learning_rate": 0.0019149313184699903, "loss": 0.305, "step": 14256 }, { "epoch": 0.025280612493672312, "grad_norm": 1.546875, "learning_rate": 0.001914906057535954, "loss": 0.31, "step": 14258 }, { "epoch": 0.025284158658982127, "grad_norm": 0.376953125, "learning_rate": 0.0019148807930379924, "loss": 0.2632, "step": 14260 }, { "epoch": 0.02528770482429194, "grad_norm": 0.73046875, "learning_rate": 0.0019148555249762155, "loss": 0.247, "step": 14262 }, { "epoch": 0.025291250989601756, "grad_norm": 0.40234375, "learning_rate": 0.001914830253350734, "loss": 0.227, "step": 14264 }, { "epoch": 0.02529479715491157, "grad_norm": 0.45703125, "learning_rate": 0.0019148049781616582, "loss": 0.1652, "step": 14266 }, { "epoch": 0.02529834332022139, "grad_norm": 1.8515625, "learning_rate": 0.0019147796994090987, "loss": 0.2517, "step": 14268 }, { "epoch": 0.025301889485531203, "grad_norm": 0.67578125, "learning_rate": 0.0019147544170931658, "loss": 0.2481, "step": 14270 }, { "epoch": 0.025305435650841018, "grad_norm": 0.8515625, "learning_rate": 0.0019147291312139707, "loss": 0.2564, "step": 14272 }, { "epoch": 0.025308981816150832, "grad_norm": 0.9375, "learning_rate": 0.0019147038417716236, "loss": 0.234, "step": 14274 }, { "epoch": 0.025312527981460647, "grad_norm": 0.38671875, "learning_rate": 0.0019146785487662349, "loss": 0.2343, "step": 14276 }, { "epoch": 0.02531607414677046, "grad_norm": 0.205078125, "learning_rate": 0.0019146532521979156, "loss": 0.2061, "step": 14278 }, { "epoch": 0.02531962031208028, "grad_norm": 0.359375, "learning_rate": 0.0019146279520667762, "loss": 0.1956, "step": 14280 }, { "epoch": 0.025323166477390094, "grad_norm": 0.984375, "learning_rate": 0.0019146026483729272, "loss": 0.3258, "step": 14282 }, { "epoch": 0.02532671264269991, "grad_norm": 0.63671875, "learning_rate": 0.0019145773411164795, "loss": 0.2872, "step": 14284 }, { "epoch": 0.025330258808009723, "grad_norm": 0.40625, "learning_rate": 0.0019145520302975437, "loss": 0.1759, "step": 14286 }, { "epoch": 0.025333804973319538, "grad_norm": 0.390625, "learning_rate": 0.0019145267159162303, "loss": 0.211, "step": 14288 }, { "epoch": 0.025337351138629356, "grad_norm": 1.171875, "learning_rate": 0.0019145013979726502, "loss": 0.2766, "step": 14290 }, { "epoch": 0.02534089730393917, "grad_norm": 0.671875, "learning_rate": 0.0019144760764669144, "loss": 0.2262, "step": 14292 }, { "epoch": 0.025344443469248985, "grad_norm": 0.4765625, "learning_rate": 0.001914450751399133, "loss": 0.2265, "step": 14294 }, { "epoch": 0.0253479896345588, "grad_norm": 0.466796875, "learning_rate": 0.0019144254227694173, "loss": 0.2133, "step": 14296 }, { "epoch": 0.025351535799868614, "grad_norm": 0.416015625, "learning_rate": 0.001914400090577878, "loss": 0.2247, "step": 14298 }, { "epoch": 0.02535508196517843, "grad_norm": 0.46484375, "learning_rate": 0.0019143747548246255, "loss": 0.1978, "step": 14300 }, { "epoch": 0.025358628130488246, "grad_norm": 0.408203125, "learning_rate": 0.0019143494155097712, "loss": 0.2178, "step": 14302 }, { "epoch": 0.02536217429579806, "grad_norm": 0.42578125, "learning_rate": 0.0019143240726334253, "loss": 0.216, "step": 14304 }, { "epoch": 0.025365720461107875, "grad_norm": 0.244140625, "learning_rate": 0.001914298726195699, "loss": 0.2115, "step": 14306 }, { "epoch": 0.02536926662641769, "grad_norm": 0.984375, "learning_rate": 0.001914273376196703, "loss": 0.3008, "step": 14308 }, { "epoch": 0.025372812791727505, "grad_norm": 0.53125, "learning_rate": 0.001914248022636548, "loss": 0.2185, "step": 14310 }, { "epoch": 0.02537635895703732, "grad_norm": 1.0859375, "learning_rate": 0.0019142226655153457, "loss": 0.2335, "step": 14312 }, { "epoch": 0.025379905122347137, "grad_norm": 1.2578125, "learning_rate": 0.0019141973048332063, "loss": 0.2434, "step": 14314 }, { "epoch": 0.02538345128765695, "grad_norm": 0.41796875, "learning_rate": 0.0019141719405902406, "loss": 0.1836, "step": 14316 }, { "epoch": 0.025386997452966766, "grad_norm": 4.0625, "learning_rate": 0.0019141465727865595, "loss": 0.3423, "step": 14318 }, { "epoch": 0.02539054361827658, "grad_norm": 0.234375, "learning_rate": 0.0019141212014222747, "loss": 0.1716, "step": 14320 }, { "epoch": 0.025394089783586395, "grad_norm": 0.4140625, "learning_rate": 0.0019140958264974963, "loss": 0.2725, "step": 14322 }, { "epoch": 0.025397635948896213, "grad_norm": 0.59375, "learning_rate": 0.001914070448012336, "loss": 0.2923, "step": 14324 }, { "epoch": 0.025401182114206028, "grad_norm": 0.76953125, "learning_rate": 0.0019140450659669039, "loss": 0.2703, "step": 14326 }, { "epoch": 0.025404728279515842, "grad_norm": 0.404296875, "learning_rate": 0.0019140196803613119, "loss": 0.2744, "step": 14328 }, { "epoch": 0.025408274444825657, "grad_norm": 0.5703125, "learning_rate": 0.001913994291195671, "loss": 0.2087, "step": 14330 }, { "epoch": 0.02541182061013547, "grad_norm": 1.1328125, "learning_rate": 0.0019139688984700916, "loss": 0.3415, "step": 14332 }, { "epoch": 0.025415366775445286, "grad_norm": 1.0234375, "learning_rate": 0.0019139435021846849, "loss": 0.1995, "step": 14334 }, { "epoch": 0.025418912940755104, "grad_norm": 0.62890625, "learning_rate": 0.001913918102339562, "loss": 0.2535, "step": 14336 }, { "epoch": 0.02542245910606492, "grad_norm": 0.515625, "learning_rate": 0.0019138926989348342, "loss": 0.4705, "step": 14338 }, { "epoch": 0.025426005271374733, "grad_norm": 0.59765625, "learning_rate": 0.0019138672919706127, "loss": 0.2064, "step": 14340 }, { "epoch": 0.025429551436684548, "grad_norm": 1.0234375, "learning_rate": 0.0019138418814470082, "loss": 0.2218, "step": 14342 }, { "epoch": 0.025433097601994362, "grad_norm": 1.109375, "learning_rate": 0.0019138164673641322, "loss": 0.279, "step": 14344 }, { "epoch": 0.025436643767304177, "grad_norm": 0.287109375, "learning_rate": 0.0019137910497220956, "loss": 0.2018, "step": 14346 }, { "epoch": 0.025440189932613995, "grad_norm": 0.314453125, "learning_rate": 0.00191376562852101, "loss": 0.2745, "step": 14348 }, { "epoch": 0.02544373609792381, "grad_norm": 0.640625, "learning_rate": 0.0019137402037609857, "loss": 0.2219, "step": 14350 }, { "epoch": 0.025447282263233624, "grad_norm": 0.447265625, "learning_rate": 0.001913714775442135, "loss": 0.234, "step": 14352 }, { "epoch": 0.02545082842854344, "grad_norm": 0.470703125, "learning_rate": 0.001913689343564568, "loss": 0.2991, "step": 14354 }, { "epoch": 0.025454374593853253, "grad_norm": 0.66796875, "learning_rate": 0.0019136639081283964, "loss": 0.2025, "step": 14356 }, { "epoch": 0.02545792075916307, "grad_norm": 0.45703125, "learning_rate": 0.0019136384691337321, "loss": 0.2744, "step": 14358 }, { "epoch": 0.025461466924472886, "grad_norm": 1.0390625, "learning_rate": 0.0019136130265806853, "loss": 0.1906, "step": 14360 }, { "epoch": 0.0254650130897827, "grad_norm": 3.71875, "learning_rate": 0.0019135875804693677, "loss": 0.3135, "step": 14362 }, { "epoch": 0.025468559255092515, "grad_norm": 0.38671875, "learning_rate": 0.0019135621307998908, "loss": 0.2412, "step": 14364 }, { "epoch": 0.02547210542040233, "grad_norm": 0.388671875, "learning_rate": 0.0019135366775723658, "loss": 0.1991, "step": 14366 }, { "epoch": 0.025475651585712144, "grad_norm": 0.39453125, "learning_rate": 0.0019135112207869036, "loss": 0.3452, "step": 14368 }, { "epoch": 0.025479197751021962, "grad_norm": 0.37890625, "learning_rate": 0.001913485760443616, "loss": 0.2382, "step": 14370 }, { "epoch": 0.025482743916331776, "grad_norm": 1.734375, "learning_rate": 0.0019134602965426143, "loss": 0.2199, "step": 14372 }, { "epoch": 0.02548629008164159, "grad_norm": 0.46484375, "learning_rate": 0.00191343482908401, "loss": 0.2345, "step": 14374 }, { "epoch": 0.025489836246951406, "grad_norm": 0.70703125, "learning_rate": 0.0019134093580679137, "loss": 0.2167, "step": 14376 }, { "epoch": 0.02549338241226122, "grad_norm": 0.61328125, "learning_rate": 0.0019133838834944376, "loss": 0.1742, "step": 14378 }, { "epoch": 0.025496928577571035, "grad_norm": 1.96875, "learning_rate": 0.001913358405363693, "loss": 0.2832, "step": 14380 }, { "epoch": 0.025500474742880853, "grad_norm": 1.03125, "learning_rate": 0.0019133329236757912, "loss": 0.2628, "step": 14382 }, { "epoch": 0.025504020908190667, "grad_norm": 0.259765625, "learning_rate": 0.0019133074384308434, "loss": 0.2282, "step": 14384 }, { "epoch": 0.025507567073500482, "grad_norm": 3.046875, "learning_rate": 0.0019132819496289616, "loss": 0.3004, "step": 14386 }, { "epoch": 0.025511113238810296, "grad_norm": 0.384765625, "learning_rate": 0.0019132564572702567, "loss": 0.2918, "step": 14388 }, { "epoch": 0.02551465940412011, "grad_norm": 0.578125, "learning_rate": 0.0019132309613548405, "loss": 0.2561, "step": 14390 }, { "epoch": 0.02551820556942993, "grad_norm": 1.0625, "learning_rate": 0.0019132054618828246, "loss": 0.3287, "step": 14392 }, { "epoch": 0.025521751734739744, "grad_norm": 0.36328125, "learning_rate": 0.0019131799588543203, "loss": 0.2029, "step": 14394 }, { "epoch": 0.025525297900049558, "grad_norm": 0.310546875, "learning_rate": 0.001913154452269439, "loss": 0.1439, "step": 14396 }, { "epoch": 0.025528844065359373, "grad_norm": 1.0234375, "learning_rate": 0.0019131289421282927, "loss": 0.2456, "step": 14398 }, { "epoch": 0.025532390230669187, "grad_norm": 0.263671875, "learning_rate": 0.0019131034284309925, "loss": 0.1774, "step": 14400 }, { "epoch": 0.025535936395979, "grad_norm": 0.251953125, "learning_rate": 0.0019130779111776507, "loss": 0.2214, "step": 14402 }, { "epoch": 0.02553948256128882, "grad_norm": 0.4453125, "learning_rate": 0.0019130523903683782, "loss": 0.2823, "step": 14404 }, { "epoch": 0.025543028726598634, "grad_norm": 0.435546875, "learning_rate": 0.0019130268660032866, "loss": 0.2331, "step": 14406 }, { "epoch": 0.02554657489190845, "grad_norm": 0.62890625, "learning_rate": 0.0019130013380824875, "loss": 0.2679, "step": 14408 }, { "epoch": 0.025550121057218263, "grad_norm": 0.6328125, "learning_rate": 0.0019129758066060935, "loss": 0.2774, "step": 14410 }, { "epoch": 0.025553667222528078, "grad_norm": 0.9453125, "learning_rate": 0.001912950271574215, "loss": 0.172, "step": 14412 }, { "epoch": 0.025557213387837893, "grad_norm": 1.9765625, "learning_rate": 0.0019129247329869644, "loss": 0.2489, "step": 14414 }, { "epoch": 0.02556075955314771, "grad_norm": 0.46484375, "learning_rate": 0.0019128991908444533, "loss": 0.1763, "step": 14416 }, { "epoch": 0.025564305718457525, "grad_norm": 0.25390625, "learning_rate": 0.0019128736451467935, "loss": 0.2252, "step": 14418 }, { "epoch": 0.02556785188376734, "grad_norm": 0.3671875, "learning_rate": 0.0019128480958940962, "loss": 0.2655, "step": 14420 }, { "epoch": 0.025571398049077154, "grad_norm": 0.62109375, "learning_rate": 0.0019128225430864734, "loss": 0.4226, "step": 14422 }, { "epoch": 0.02557494421438697, "grad_norm": 1.0234375, "learning_rate": 0.0019127969867240371, "loss": 0.2347, "step": 14424 }, { "epoch": 0.025578490379696787, "grad_norm": 1.5625, "learning_rate": 0.0019127714268068992, "loss": 0.2773, "step": 14426 }, { "epoch": 0.0255820365450066, "grad_norm": 0.65625, "learning_rate": 0.001912745863335171, "loss": 0.2183, "step": 14428 }, { "epoch": 0.025585582710316416, "grad_norm": 1.1640625, "learning_rate": 0.0019127202963089643, "loss": 0.2333, "step": 14430 }, { "epoch": 0.02558912887562623, "grad_norm": 0.9296875, "learning_rate": 0.0019126947257283914, "loss": 0.2336, "step": 14432 }, { "epoch": 0.025592675040936045, "grad_norm": 0.84375, "learning_rate": 0.0019126691515935635, "loss": 0.2532, "step": 14434 }, { "epoch": 0.02559622120624586, "grad_norm": 0.53515625, "learning_rate": 0.001912643573904593, "loss": 0.3615, "step": 14436 }, { "epoch": 0.025599767371555678, "grad_norm": 0.87890625, "learning_rate": 0.0019126179926615917, "loss": 0.1932, "step": 14438 }, { "epoch": 0.025603313536865492, "grad_norm": 1.2109375, "learning_rate": 0.0019125924078646714, "loss": 0.2298, "step": 14440 }, { "epoch": 0.025606859702175307, "grad_norm": 0.5390625, "learning_rate": 0.0019125668195139433, "loss": 0.2653, "step": 14442 }, { "epoch": 0.02561040586748512, "grad_norm": 0.376953125, "learning_rate": 0.0019125412276095204, "loss": 0.2084, "step": 14444 }, { "epoch": 0.025613952032794936, "grad_norm": 0.32421875, "learning_rate": 0.0019125156321515141, "loss": 0.174, "step": 14446 }, { "epoch": 0.02561749819810475, "grad_norm": 0.3671875, "learning_rate": 0.0019124900331400363, "loss": 0.2152, "step": 14448 }, { "epoch": 0.02562104436341457, "grad_norm": 0.3359375, "learning_rate": 0.0019124644305751992, "loss": 0.2205, "step": 14450 }, { "epoch": 0.025624590528724383, "grad_norm": 8.1875, "learning_rate": 0.0019124388244571146, "loss": 0.2619, "step": 14452 }, { "epoch": 0.025628136694034197, "grad_norm": 2.453125, "learning_rate": 0.0019124132147858941, "loss": 0.2973, "step": 14454 }, { "epoch": 0.025631682859344012, "grad_norm": 6.1875, "learning_rate": 0.0019123876015616505, "loss": 0.509, "step": 14456 }, { "epoch": 0.025635229024653827, "grad_norm": 0.494140625, "learning_rate": 0.0019123619847844953, "loss": 0.3097, "step": 14458 }, { "epoch": 0.025638775189963645, "grad_norm": 1.4765625, "learning_rate": 0.001912336364454541, "loss": 0.2535, "step": 14460 }, { "epoch": 0.02564232135527346, "grad_norm": 0.490234375, "learning_rate": 0.001912310740571899, "loss": 0.1607, "step": 14462 }, { "epoch": 0.025645867520583274, "grad_norm": 0.447265625, "learning_rate": 0.0019122851131366813, "loss": 0.2405, "step": 14464 }, { "epoch": 0.025649413685893088, "grad_norm": 3.046875, "learning_rate": 0.001912259482149001, "loss": 0.3025, "step": 14466 }, { "epoch": 0.025652959851202903, "grad_norm": 0.5546875, "learning_rate": 0.001912233847608969, "loss": 0.2178, "step": 14468 }, { "epoch": 0.025656506016512717, "grad_norm": 0.93359375, "learning_rate": 0.0019122082095166984, "loss": 0.2059, "step": 14470 }, { "epoch": 0.025660052181822535, "grad_norm": 2.828125, "learning_rate": 0.0019121825678723005, "loss": 0.3008, "step": 14472 }, { "epoch": 0.02566359834713235, "grad_norm": 0.400390625, "learning_rate": 0.001912156922675888, "loss": 0.3689, "step": 14474 }, { "epoch": 0.025667144512442164, "grad_norm": 0.416015625, "learning_rate": 0.001912131273927573, "loss": 0.4026, "step": 14476 }, { "epoch": 0.02567069067775198, "grad_norm": 0.302734375, "learning_rate": 0.0019121056216274673, "loss": 0.1851, "step": 14478 }, { "epoch": 0.025674236843061794, "grad_norm": 0.353515625, "learning_rate": 0.0019120799657756833, "loss": 0.2009, "step": 14480 }, { "epoch": 0.025677783008371608, "grad_norm": 0.416015625, "learning_rate": 0.0019120543063723336, "loss": 0.2027, "step": 14482 }, { "epoch": 0.025681329173681426, "grad_norm": 0.251953125, "learning_rate": 0.0019120286434175298, "loss": 0.1606, "step": 14484 }, { "epoch": 0.02568487533899124, "grad_norm": 0.63671875, "learning_rate": 0.0019120029769113846, "loss": 0.1858, "step": 14486 }, { "epoch": 0.025688421504301055, "grad_norm": 0.34765625, "learning_rate": 0.00191197730685401, "loss": 0.2374, "step": 14488 }, { "epoch": 0.02569196766961087, "grad_norm": 0.6640625, "learning_rate": 0.001911951633245518, "loss": 0.2286, "step": 14490 }, { "epoch": 0.025695513834920684, "grad_norm": 1.2265625, "learning_rate": 0.0019119259560860216, "loss": 0.2667, "step": 14492 }, { "epoch": 0.025699060000230502, "grad_norm": 0.4765625, "learning_rate": 0.0019119002753756323, "loss": 0.2498, "step": 14494 }, { "epoch": 0.025702606165540317, "grad_norm": 1.1484375, "learning_rate": 0.0019118745911144632, "loss": 0.2345, "step": 14496 }, { "epoch": 0.02570615233085013, "grad_norm": 2.03125, "learning_rate": 0.0019118489033026261, "loss": 0.3078, "step": 14498 }, { "epoch": 0.025709698496159946, "grad_norm": 0.365234375, "learning_rate": 0.0019118232119402332, "loss": 0.2554, "step": 14500 }, { "epoch": 0.02571324466146976, "grad_norm": 0.4765625, "learning_rate": 0.0019117975170273973, "loss": 0.2464, "step": 14502 }, { "epoch": 0.025716790826779575, "grad_norm": 0.45703125, "learning_rate": 0.0019117718185642308, "loss": 0.2298, "step": 14504 }, { "epoch": 0.025720336992089393, "grad_norm": 0.58203125, "learning_rate": 0.0019117461165508458, "loss": 0.2165, "step": 14506 }, { "epoch": 0.025723883157399208, "grad_norm": 0.419921875, "learning_rate": 0.0019117204109873544, "loss": 0.1989, "step": 14508 }, { "epoch": 0.025727429322709022, "grad_norm": 0.39453125, "learning_rate": 0.0019116947018738702, "loss": 0.2287, "step": 14510 }, { "epoch": 0.025730975488018837, "grad_norm": 0.412109375, "learning_rate": 0.0019116689892105044, "loss": 0.2387, "step": 14512 }, { "epoch": 0.02573452165332865, "grad_norm": 0.56640625, "learning_rate": 0.00191164327299737, "loss": 0.2285, "step": 14514 }, { "epoch": 0.025738067818638466, "grad_norm": 0.388671875, "learning_rate": 0.0019116175532345792, "loss": 0.2518, "step": 14516 }, { "epoch": 0.025741613983948284, "grad_norm": 1.9453125, "learning_rate": 0.001911591829922245, "loss": 0.1906, "step": 14518 }, { "epoch": 0.0257451601492581, "grad_norm": 3.3125, "learning_rate": 0.0019115661030604792, "loss": 0.2865, "step": 14520 }, { "epoch": 0.025748706314567913, "grad_norm": 0.828125, "learning_rate": 0.001911540372649395, "loss": 0.1852, "step": 14522 }, { "epoch": 0.025752252479877728, "grad_norm": 2.15625, "learning_rate": 0.0019115146386891042, "loss": 0.5389, "step": 14524 }, { "epoch": 0.025755798645187542, "grad_norm": 0.4921875, "learning_rate": 0.00191148890117972, "loss": 0.3038, "step": 14526 }, { "epoch": 0.02575934481049736, "grad_norm": 0.32421875, "learning_rate": 0.001911463160121355, "loss": 0.1592, "step": 14528 }, { "epoch": 0.025762890975807175, "grad_norm": 0.9609375, "learning_rate": 0.001911437415514121, "loss": 0.29, "step": 14530 }, { "epoch": 0.02576643714111699, "grad_norm": 0.322265625, "learning_rate": 0.0019114116673581317, "loss": 0.1901, "step": 14532 }, { "epoch": 0.025769983306426804, "grad_norm": 4.03125, "learning_rate": 0.0019113859156534986, "loss": 0.3074, "step": 14534 }, { "epoch": 0.02577352947173662, "grad_norm": 1.15625, "learning_rate": 0.0019113601604003347, "loss": 0.1938, "step": 14536 }, { "epoch": 0.025777075637046433, "grad_norm": 0.244140625, "learning_rate": 0.001911334401598753, "loss": 0.2131, "step": 14538 }, { "epoch": 0.02578062180235625, "grad_norm": 0.67578125, "learning_rate": 0.0019113086392488659, "loss": 0.222, "step": 14540 }, { "epoch": 0.025784167967666066, "grad_norm": 0.578125, "learning_rate": 0.001911282873350786, "loss": 0.2063, "step": 14542 }, { "epoch": 0.02578771413297588, "grad_norm": 1.3125, "learning_rate": 0.0019112571039046262, "loss": 0.4433, "step": 14544 }, { "epoch": 0.025791260298285695, "grad_norm": 0.85546875, "learning_rate": 0.0019112313309104989, "loss": 0.2687, "step": 14546 }, { "epoch": 0.02579480646359551, "grad_norm": 0.65625, "learning_rate": 0.001911205554368517, "loss": 0.2466, "step": 14548 }, { "epoch": 0.025798352628905324, "grad_norm": 0.28515625, "learning_rate": 0.0019111797742787933, "loss": 0.2469, "step": 14550 }, { "epoch": 0.025801898794215142, "grad_norm": 1.4140625, "learning_rate": 0.0019111539906414403, "loss": 0.2228, "step": 14552 }, { "epoch": 0.025805444959524956, "grad_norm": 0.8671875, "learning_rate": 0.001911128203456571, "loss": 0.3096, "step": 14554 }, { "epoch": 0.02580899112483477, "grad_norm": 0.92578125, "learning_rate": 0.0019111024127242983, "loss": 0.2732, "step": 14556 }, { "epoch": 0.025812537290144585, "grad_norm": 0.69140625, "learning_rate": 0.0019110766184447345, "loss": 0.2657, "step": 14558 }, { "epoch": 0.0258160834554544, "grad_norm": 1.03125, "learning_rate": 0.0019110508206179931, "loss": 0.2445, "step": 14560 }, { "epoch": 0.025819629620764218, "grad_norm": 0.2294921875, "learning_rate": 0.0019110250192441861, "loss": 0.1686, "step": 14562 }, { "epoch": 0.025823175786074033, "grad_norm": 0.26953125, "learning_rate": 0.0019109992143234269, "loss": 0.1855, "step": 14564 }, { "epoch": 0.025826721951383847, "grad_norm": 0.72265625, "learning_rate": 0.0019109734058558284, "loss": 0.2585, "step": 14566 }, { "epoch": 0.02583026811669366, "grad_norm": 0.625, "learning_rate": 0.0019109475938415027, "loss": 0.3157, "step": 14568 }, { "epoch": 0.025833814282003476, "grad_norm": 1.78125, "learning_rate": 0.0019109217782805638, "loss": 0.4324, "step": 14570 }, { "epoch": 0.02583736044731329, "grad_norm": 0.443359375, "learning_rate": 0.001910895959173124, "loss": 0.2578, "step": 14572 }, { "epoch": 0.02584090661262311, "grad_norm": 0.498046875, "learning_rate": 0.0019108701365192964, "loss": 0.1843, "step": 14574 }, { "epoch": 0.025844452777932923, "grad_norm": 0.462890625, "learning_rate": 0.0019108443103191938, "loss": 0.1911, "step": 14576 }, { "epoch": 0.025847998943242738, "grad_norm": 0.392578125, "learning_rate": 0.0019108184805729292, "loss": 0.2157, "step": 14578 }, { "epoch": 0.025851545108552552, "grad_norm": 0.59765625, "learning_rate": 0.0019107926472806153, "loss": 0.3567, "step": 14580 }, { "epoch": 0.025855091273862367, "grad_norm": 0.56640625, "learning_rate": 0.0019107668104423655, "loss": 0.2774, "step": 14582 }, { "epoch": 0.02585863743917218, "grad_norm": 0.380859375, "learning_rate": 0.0019107409700582926, "loss": 0.2489, "step": 14584 }, { "epoch": 0.025862183604482, "grad_norm": 0.546875, "learning_rate": 0.0019107151261285094, "loss": 0.2831, "step": 14586 }, { "epoch": 0.025865729769791814, "grad_norm": 0.390625, "learning_rate": 0.0019106892786531294, "loss": 0.1998, "step": 14588 }, { "epoch": 0.02586927593510163, "grad_norm": 0.3125, "learning_rate": 0.0019106634276322655, "loss": 0.2331, "step": 14590 }, { "epoch": 0.025872822100411443, "grad_norm": 0.365234375, "learning_rate": 0.0019106375730660306, "loss": 0.2044, "step": 14592 }, { "epoch": 0.025876368265721258, "grad_norm": 0.453125, "learning_rate": 0.0019106117149545376, "loss": 0.3099, "step": 14594 }, { "epoch": 0.025879914431031076, "grad_norm": 0.5703125, "learning_rate": 0.0019105858532979002, "loss": 0.1866, "step": 14596 }, { "epoch": 0.02588346059634089, "grad_norm": 0.6875, "learning_rate": 0.0019105599880962307, "loss": 0.2265, "step": 14598 }, { "epoch": 0.025887006761650705, "grad_norm": 0.42578125, "learning_rate": 0.0019105341193496428, "loss": 0.2651, "step": 14600 }, { "epoch": 0.02589055292696052, "grad_norm": 1.359375, "learning_rate": 0.0019105082470582495, "loss": 0.2419, "step": 14602 }, { "epoch": 0.025894099092270334, "grad_norm": 0.546875, "learning_rate": 0.0019104823712221641, "loss": 0.2241, "step": 14604 }, { "epoch": 0.02589764525758015, "grad_norm": 0.27734375, "learning_rate": 0.0019104564918414994, "loss": 0.1834, "step": 14606 }, { "epoch": 0.025901191422889967, "grad_norm": 1.578125, "learning_rate": 0.001910430608916369, "loss": 0.5238, "step": 14608 }, { "epoch": 0.02590473758819978, "grad_norm": 0.25390625, "learning_rate": 0.0019104047224468856, "loss": 0.2118, "step": 14610 }, { "epoch": 0.025908283753509596, "grad_norm": 1.3984375, "learning_rate": 0.0019103788324331629, "loss": 0.2461, "step": 14612 }, { "epoch": 0.02591182991881941, "grad_norm": 0.52734375, "learning_rate": 0.0019103529388753138, "loss": 0.2201, "step": 14614 }, { "epoch": 0.025915376084129225, "grad_norm": 0.283203125, "learning_rate": 0.0019103270417734515, "loss": 0.169, "step": 14616 }, { "epoch": 0.02591892224943904, "grad_norm": 0.30859375, "learning_rate": 0.0019103011411276893, "loss": 0.1584, "step": 14618 }, { "epoch": 0.025922468414748857, "grad_norm": 0.412109375, "learning_rate": 0.001910275236938141, "loss": 0.2714, "step": 14620 }, { "epoch": 0.025926014580058672, "grad_norm": 0.5546875, "learning_rate": 0.0019102493292049193, "loss": 0.2209, "step": 14622 }, { "epoch": 0.025929560745368486, "grad_norm": 0.37890625, "learning_rate": 0.0019102234179281378, "loss": 0.217, "step": 14624 }, { "epoch": 0.0259331069106783, "grad_norm": 1.4296875, "learning_rate": 0.0019101975031079095, "loss": 0.2033, "step": 14626 }, { "epoch": 0.025936653075988116, "grad_norm": 1.7578125, "learning_rate": 0.001910171584744348, "loss": 0.3697, "step": 14628 }, { "epoch": 0.025940199241297934, "grad_norm": 0.392578125, "learning_rate": 0.0019101456628375665, "loss": 0.1862, "step": 14630 }, { "epoch": 0.025943745406607748, "grad_norm": 0.5859375, "learning_rate": 0.001910119737387679, "loss": 0.3081, "step": 14632 }, { "epoch": 0.025947291571917563, "grad_norm": 0.65625, "learning_rate": 0.0019100938083947976, "loss": 0.2581, "step": 14634 }, { "epoch": 0.025950837737227377, "grad_norm": 18.75, "learning_rate": 0.0019100678758590369, "loss": 0.2683, "step": 14636 }, { "epoch": 0.025954383902537192, "grad_norm": 1.0625, "learning_rate": 0.0019100419397805096, "loss": 0.2786, "step": 14638 }, { "epoch": 0.025957930067847006, "grad_norm": 0.3984375, "learning_rate": 0.0019100160001593299, "loss": 0.1847, "step": 14640 }, { "epoch": 0.025961476233156824, "grad_norm": 0.322265625, "learning_rate": 0.0019099900569956101, "loss": 0.385, "step": 14642 }, { "epoch": 0.02596502239846664, "grad_norm": 1.7109375, "learning_rate": 0.0019099641102894645, "loss": 0.2356, "step": 14644 }, { "epoch": 0.025968568563776454, "grad_norm": 0.6875, "learning_rate": 0.0019099381600410064, "loss": 0.2214, "step": 14646 }, { "epoch": 0.025972114729086268, "grad_norm": 0.326171875, "learning_rate": 0.0019099122062503494, "loss": 0.2698, "step": 14648 }, { "epoch": 0.025975660894396083, "grad_norm": 0.55859375, "learning_rate": 0.0019098862489176065, "loss": 0.2492, "step": 14650 }, { "epoch": 0.025979207059705897, "grad_norm": 0.4296875, "learning_rate": 0.0019098602880428916, "loss": 0.2228, "step": 14652 }, { "epoch": 0.025982753225015715, "grad_norm": 1.5703125, "learning_rate": 0.0019098343236263187, "loss": 0.2568, "step": 14654 }, { "epoch": 0.02598629939032553, "grad_norm": 2.234375, "learning_rate": 0.0019098083556680005, "loss": 0.4226, "step": 14656 }, { "epoch": 0.025989845555635344, "grad_norm": 0.255859375, "learning_rate": 0.0019097823841680512, "loss": 0.2181, "step": 14658 }, { "epoch": 0.02599339172094516, "grad_norm": 0.2578125, "learning_rate": 0.001909756409126584, "loss": 0.1788, "step": 14660 }, { "epoch": 0.025996937886254973, "grad_norm": 0.484375, "learning_rate": 0.0019097304305437123, "loss": 0.2516, "step": 14662 }, { "epoch": 0.02600048405156479, "grad_norm": 0.65625, "learning_rate": 0.0019097044484195508, "loss": 0.2304, "step": 14664 }, { "epoch": 0.026004030216874606, "grad_norm": 0.2099609375, "learning_rate": 0.0019096784627542116, "loss": 0.183, "step": 14666 }, { "epoch": 0.02600757638218442, "grad_norm": 7.3125, "learning_rate": 0.0019096524735478094, "loss": 0.5231, "step": 14668 }, { "epoch": 0.026011122547494235, "grad_norm": 0.359375, "learning_rate": 0.0019096264808004578, "loss": 0.2108, "step": 14670 }, { "epoch": 0.02601466871280405, "grad_norm": 0.3515625, "learning_rate": 0.0019096004845122702, "loss": 0.2743, "step": 14672 }, { "epoch": 0.026018214878113864, "grad_norm": 0.2734375, "learning_rate": 0.00190957448468336, "loss": 0.1845, "step": 14674 }, { "epoch": 0.026021761043423682, "grad_norm": 0.53125, "learning_rate": 0.0019095484813138415, "loss": 0.2807, "step": 14676 }, { "epoch": 0.026025307208733497, "grad_norm": 0.71484375, "learning_rate": 0.0019095224744038282, "loss": 0.194, "step": 14678 }, { "epoch": 0.02602885337404331, "grad_norm": 0.5546875, "learning_rate": 0.001909496463953434, "loss": 0.1754, "step": 14680 }, { "epoch": 0.026032399539353126, "grad_norm": 0.400390625, "learning_rate": 0.0019094704499627723, "loss": 0.2059, "step": 14682 }, { "epoch": 0.02603594570466294, "grad_norm": 0.28515625, "learning_rate": 0.001909444432431957, "loss": 0.2378, "step": 14684 }, { "epoch": 0.026039491869972755, "grad_norm": 0.2578125, "learning_rate": 0.0019094184113611018, "loss": 0.2093, "step": 14686 }, { "epoch": 0.026043038035282573, "grad_norm": 1.2421875, "learning_rate": 0.001909392386750321, "loss": 0.2315, "step": 14688 }, { "epoch": 0.026046584200592388, "grad_norm": 0.67578125, "learning_rate": 0.0019093663585997276, "loss": 0.2437, "step": 14690 }, { "epoch": 0.026050130365902202, "grad_norm": 0.357421875, "learning_rate": 0.0019093403269094362, "loss": 0.2245, "step": 14692 }, { "epoch": 0.026053676531212017, "grad_norm": 0.94140625, "learning_rate": 0.0019093142916795603, "loss": 0.2478, "step": 14694 }, { "epoch": 0.02605722269652183, "grad_norm": 2.953125, "learning_rate": 0.0019092882529102135, "loss": 0.2911, "step": 14696 }, { "epoch": 0.02606076886183165, "grad_norm": 0.63671875, "learning_rate": 0.0019092622106015105, "loss": 0.1966, "step": 14698 }, { "epoch": 0.026064315027141464, "grad_norm": 0.578125, "learning_rate": 0.0019092361647535642, "loss": 0.2857, "step": 14700 }, { "epoch": 0.02606786119245128, "grad_norm": 1.5390625, "learning_rate": 0.0019092101153664893, "loss": 0.2704, "step": 14702 }, { "epoch": 0.026071407357761093, "grad_norm": 0.439453125, "learning_rate": 0.0019091840624403994, "loss": 0.2451, "step": 14704 }, { "epoch": 0.026074953523070907, "grad_norm": 0.298828125, "learning_rate": 0.001909158005975408, "loss": 0.2542, "step": 14706 }, { "epoch": 0.026078499688380722, "grad_norm": 0.5859375, "learning_rate": 0.0019091319459716301, "loss": 0.1703, "step": 14708 }, { "epoch": 0.02608204585369054, "grad_norm": 0.1962890625, "learning_rate": 0.0019091058824291787, "loss": 0.2259, "step": 14710 }, { "epoch": 0.026085592019000355, "grad_norm": 0.333984375, "learning_rate": 0.0019090798153481683, "loss": 0.2256, "step": 14712 }, { "epoch": 0.02608913818431017, "grad_norm": 0.408203125, "learning_rate": 0.0019090537447287127, "loss": 0.1817, "step": 14714 }, { "epoch": 0.026092684349619984, "grad_norm": 0.65234375, "learning_rate": 0.001909027670570926, "loss": 0.2133, "step": 14716 }, { "epoch": 0.026096230514929798, "grad_norm": 0.7421875, "learning_rate": 0.0019090015928749223, "loss": 0.2392, "step": 14718 }, { "epoch": 0.026099776680239613, "grad_norm": 1.0859375, "learning_rate": 0.0019089755116408155, "loss": 0.2425, "step": 14720 }, { "epoch": 0.02610332284554943, "grad_norm": 1.3359375, "learning_rate": 0.0019089494268687195, "loss": 0.6104, "step": 14722 }, { "epoch": 0.026106869010859245, "grad_norm": 0.37109375, "learning_rate": 0.0019089233385587488, "loss": 0.2047, "step": 14724 }, { "epoch": 0.02611041517616906, "grad_norm": 1.1171875, "learning_rate": 0.001908897246711017, "loss": 0.2606, "step": 14726 }, { "epoch": 0.026113961341478874, "grad_norm": 0.84375, "learning_rate": 0.001908871151325639, "loss": 0.2366, "step": 14728 }, { "epoch": 0.02611750750678869, "grad_norm": 0.8203125, "learning_rate": 0.0019088450524027281, "loss": 0.2255, "step": 14730 }, { "epoch": 0.026121053672098507, "grad_norm": 0.76953125, "learning_rate": 0.0019088189499423987, "loss": 0.22, "step": 14732 }, { "epoch": 0.02612459983740832, "grad_norm": 0.6953125, "learning_rate": 0.0019087928439447653, "loss": 0.1648, "step": 14734 }, { "epoch": 0.026128146002718136, "grad_norm": 0.59765625, "learning_rate": 0.0019087667344099416, "loss": 0.1796, "step": 14736 }, { "epoch": 0.02613169216802795, "grad_norm": 0.4921875, "learning_rate": 0.0019087406213380421, "loss": 0.2897, "step": 14738 }, { "epoch": 0.026135238333337765, "grad_norm": 0.97265625, "learning_rate": 0.001908714504729181, "loss": 0.2544, "step": 14740 }, { "epoch": 0.02613878449864758, "grad_norm": 0.470703125, "learning_rate": 0.0019086883845834722, "loss": 0.1746, "step": 14742 }, { "epoch": 0.026142330663957398, "grad_norm": 0.333984375, "learning_rate": 0.0019086622609010304, "loss": 0.2445, "step": 14744 }, { "epoch": 0.026145876829267212, "grad_norm": 0.291015625, "learning_rate": 0.0019086361336819694, "loss": 0.2983, "step": 14746 }, { "epoch": 0.026149422994577027, "grad_norm": 0.82421875, "learning_rate": 0.0019086100029264035, "loss": 0.2083, "step": 14748 }, { "epoch": 0.02615296915988684, "grad_norm": 10.5625, "learning_rate": 0.0019085838686344475, "loss": 0.3305, "step": 14750 }, { "epoch": 0.026156515325196656, "grad_norm": 1.859375, "learning_rate": 0.0019085577308062152, "loss": 0.264, "step": 14752 }, { "epoch": 0.02616006149050647, "grad_norm": 0.4921875, "learning_rate": 0.0019085315894418209, "loss": 0.1988, "step": 14754 }, { "epoch": 0.02616360765581629, "grad_norm": 1.390625, "learning_rate": 0.0019085054445413792, "loss": 0.201, "step": 14756 }, { "epoch": 0.026167153821126103, "grad_norm": 0.56640625, "learning_rate": 0.0019084792961050047, "loss": 0.2224, "step": 14758 }, { "epoch": 0.026170699986435918, "grad_norm": 0.6953125, "learning_rate": 0.0019084531441328108, "loss": 0.2322, "step": 14760 }, { "epoch": 0.026174246151745732, "grad_norm": 0.54296875, "learning_rate": 0.0019084269886249128, "loss": 0.2054, "step": 14762 }, { "epoch": 0.026177792317055547, "grad_norm": 0.423828125, "learning_rate": 0.0019084008295814244, "loss": 0.3416, "step": 14764 }, { "epoch": 0.026181338482365365, "grad_norm": 0.32421875, "learning_rate": 0.0019083746670024606, "loss": 0.2573, "step": 14766 }, { "epoch": 0.02618488464767518, "grad_norm": 0.9375, "learning_rate": 0.0019083485008881355, "loss": 0.2149, "step": 14768 }, { "epoch": 0.026188430812984994, "grad_norm": 0.37109375, "learning_rate": 0.0019083223312385635, "loss": 0.2298, "step": 14770 }, { "epoch": 0.02619197697829481, "grad_norm": 1.234375, "learning_rate": 0.0019082961580538596, "loss": 0.2629, "step": 14772 }, { "epoch": 0.026195523143604623, "grad_norm": 0.330078125, "learning_rate": 0.0019082699813341374, "loss": 0.2463, "step": 14774 }, { "epoch": 0.026199069308914438, "grad_norm": 0.77734375, "learning_rate": 0.001908243801079512, "loss": 0.3223, "step": 14776 }, { "epoch": 0.026202615474224256, "grad_norm": 1.2734375, "learning_rate": 0.0019082176172900973, "loss": 0.4925, "step": 14778 }, { "epoch": 0.02620616163953407, "grad_norm": 0.55859375, "learning_rate": 0.0019081914299660086, "loss": 0.259, "step": 14780 }, { "epoch": 0.026209707804843885, "grad_norm": 1.9921875, "learning_rate": 0.00190816523910736, "loss": 0.3621, "step": 14782 }, { "epoch": 0.0262132539701537, "grad_norm": 0.41796875, "learning_rate": 0.001908139044714266, "loss": 0.3189, "step": 14784 }, { "epoch": 0.026216800135463514, "grad_norm": 0.353515625, "learning_rate": 0.0019081128467868414, "loss": 0.1443, "step": 14786 }, { "epoch": 0.02622034630077333, "grad_norm": 0.70703125, "learning_rate": 0.0019080866453252004, "loss": 0.185, "step": 14788 }, { "epoch": 0.026223892466083146, "grad_norm": 1.0390625, "learning_rate": 0.0019080604403294578, "loss": 0.241, "step": 14790 }, { "epoch": 0.02622743863139296, "grad_norm": 0.51171875, "learning_rate": 0.0019080342317997286, "loss": 0.2265, "step": 14792 }, { "epoch": 0.026230984796702776, "grad_norm": 0.765625, "learning_rate": 0.0019080080197361265, "loss": 0.4541, "step": 14794 }, { "epoch": 0.02623453096201259, "grad_norm": 0.43359375, "learning_rate": 0.0019079818041387669, "loss": 0.1784, "step": 14796 }, { "epoch": 0.026238077127322405, "grad_norm": 0.3515625, "learning_rate": 0.0019079555850077643, "loss": 0.1908, "step": 14798 }, { "epoch": 0.026241623292632223, "grad_norm": 0.35546875, "learning_rate": 0.0019079293623432332, "loss": 0.3, "step": 14800 }, { "epoch": 0.026245169457942037, "grad_norm": 0.49609375, "learning_rate": 0.0019079031361452885, "loss": 0.167, "step": 14802 }, { "epoch": 0.026248715623251852, "grad_norm": 0.8203125, "learning_rate": 0.0019078769064140447, "loss": 0.2549, "step": 14804 }, { "epoch": 0.026252261788561666, "grad_norm": 0.46484375, "learning_rate": 0.0019078506731496163, "loss": 0.2131, "step": 14806 }, { "epoch": 0.02625580795387148, "grad_norm": 0.474609375, "learning_rate": 0.0019078244363521185, "loss": 0.2364, "step": 14808 }, { "epoch": 0.026259354119181295, "grad_norm": 2.96875, "learning_rate": 0.001907798196021666, "loss": 0.2523, "step": 14810 }, { "epoch": 0.026262900284491113, "grad_norm": 0.349609375, "learning_rate": 0.0019077719521583731, "loss": 0.2722, "step": 14812 }, { "epoch": 0.026266446449800928, "grad_norm": 0.4296875, "learning_rate": 0.0019077457047623552, "loss": 0.3053, "step": 14814 }, { "epoch": 0.026269992615110743, "grad_norm": 0.400390625, "learning_rate": 0.0019077194538337269, "loss": 0.2543, "step": 14816 }, { "epoch": 0.026273538780420557, "grad_norm": 0.474609375, "learning_rate": 0.0019076931993726026, "loss": 0.2537, "step": 14818 }, { "epoch": 0.02627708494573037, "grad_norm": 3.375, "learning_rate": 0.0019076669413790974, "loss": 0.3939, "step": 14820 }, { "epoch": 0.026280631111040186, "grad_norm": 0.82421875, "learning_rate": 0.0019076406798533264, "loss": 0.2492, "step": 14822 }, { "epoch": 0.026284177276350004, "grad_norm": 0.5625, "learning_rate": 0.001907614414795404, "loss": 0.2189, "step": 14824 }, { "epoch": 0.02628772344165982, "grad_norm": 2.53125, "learning_rate": 0.001907588146205445, "loss": 0.1692, "step": 14826 }, { "epoch": 0.026291269606969633, "grad_norm": 0.578125, "learning_rate": 0.0019075618740835652, "loss": 0.2354, "step": 14828 }, { "epoch": 0.026294815772279448, "grad_norm": 0.625, "learning_rate": 0.0019075355984298783, "loss": 0.1972, "step": 14830 }, { "epoch": 0.026298361937589262, "grad_norm": 0.625, "learning_rate": 0.0019075093192445001, "loss": 0.1916, "step": 14832 }, { "epoch": 0.02630190810289908, "grad_norm": 0.353515625, "learning_rate": 0.001907483036527545, "loss": 0.2462, "step": 14834 }, { "epoch": 0.026305454268208895, "grad_norm": 1.078125, "learning_rate": 0.0019074567502791282, "loss": 0.3043, "step": 14836 }, { "epoch": 0.02630900043351871, "grad_norm": 4.40625, "learning_rate": 0.0019074304604993647, "loss": 0.3007, "step": 14838 }, { "epoch": 0.026312546598828524, "grad_norm": 0.314453125, "learning_rate": 0.0019074041671883692, "loss": 0.1707, "step": 14840 }, { "epoch": 0.02631609276413834, "grad_norm": 0.52734375, "learning_rate": 0.001907377870346257, "loss": 0.3716, "step": 14842 }, { "epoch": 0.026319638929448153, "grad_norm": 0.7265625, "learning_rate": 0.001907351569973143, "loss": 0.1962, "step": 14844 }, { "epoch": 0.02632318509475797, "grad_norm": 0.390625, "learning_rate": 0.001907325266069142, "loss": 0.2329, "step": 14846 }, { "epoch": 0.026326731260067786, "grad_norm": 1.359375, "learning_rate": 0.0019072989586343693, "loss": 0.2513, "step": 14848 }, { "epoch": 0.0263302774253776, "grad_norm": 1.609375, "learning_rate": 0.0019072726476689402, "loss": 0.6849, "step": 14850 }, { "epoch": 0.026333823590687415, "grad_norm": 0.7578125, "learning_rate": 0.001907246333172969, "loss": 0.2384, "step": 14852 }, { "epoch": 0.02633736975599723, "grad_norm": 0.55859375, "learning_rate": 0.0019072200151465715, "loss": 0.181, "step": 14854 }, { "epoch": 0.026340915921307044, "grad_norm": 1.2578125, "learning_rate": 0.0019071936935898624, "loss": 0.5526, "step": 14856 }, { "epoch": 0.026344462086616862, "grad_norm": 0.4296875, "learning_rate": 0.001907167368502957, "loss": 0.2215, "step": 14858 }, { "epoch": 0.026348008251926677, "grad_norm": 0.5234375, "learning_rate": 0.0019071410398859706, "loss": 0.2278, "step": 14860 }, { "epoch": 0.02635155441723649, "grad_norm": 0.5546875, "learning_rate": 0.0019071147077390181, "loss": 0.2034, "step": 14862 }, { "epoch": 0.026355100582546306, "grad_norm": 0.60546875, "learning_rate": 0.0019070883720622145, "loss": 0.2976, "step": 14864 }, { "epoch": 0.02635864674785612, "grad_norm": 4.53125, "learning_rate": 0.0019070620328556754, "loss": 0.2985, "step": 14866 }, { "epoch": 0.02636219291316594, "grad_norm": 0.609375, "learning_rate": 0.0019070356901195154, "loss": 0.2863, "step": 14868 }, { "epoch": 0.026365739078475753, "grad_norm": 0.2734375, "learning_rate": 0.0019070093438538502, "loss": 0.2001, "step": 14870 }, { "epoch": 0.026369285243785567, "grad_norm": 0.416015625, "learning_rate": 0.0019069829940587951, "loss": 0.3035, "step": 14872 }, { "epoch": 0.026372831409095382, "grad_norm": 0.443359375, "learning_rate": 0.0019069566407344653, "loss": 0.222, "step": 14874 }, { "epoch": 0.026376377574405196, "grad_norm": 0.396484375, "learning_rate": 0.0019069302838809754, "loss": 0.222, "step": 14876 }, { "epoch": 0.02637992373971501, "grad_norm": 6.21875, "learning_rate": 0.0019069039234984413, "loss": 0.3695, "step": 14878 }, { "epoch": 0.02638346990502483, "grad_norm": 0.62890625, "learning_rate": 0.0019068775595869783, "loss": 0.2371, "step": 14880 }, { "epoch": 0.026387016070334644, "grad_norm": 0.384765625, "learning_rate": 0.0019068511921467015, "loss": 0.2246, "step": 14882 }, { "epoch": 0.026390562235644458, "grad_norm": 2.984375, "learning_rate": 0.0019068248211777262, "loss": 0.2463, "step": 14884 }, { "epoch": 0.026394108400954273, "grad_norm": 0.345703125, "learning_rate": 0.0019067984466801678, "loss": 0.1926, "step": 14886 }, { "epoch": 0.026397654566264087, "grad_norm": 0.64453125, "learning_rate": 0.0019067720686541416, "loss": 0.1887, "step": 14888 }, { "epoch": 0.026401200731573902, "grad_norm": 0.435546875, "learning_rate": 0.0019067456870997628, "loss": 0.1676, "step": 14890 }, { "epoch": 0.02640474689688372, "grad_norm": 0.53515625, "learning_rate": 0.0019067193020171475, "loss": 0.242, "step": 14892 }, { "epoch": 0.026408293062193534, "grad_norm": 0.4453125, "learning_rate": 0.0019066929134064104, "loss": 0.2627, "step": 14894 }, { "epoch": 0.02641183922750335, "grad_norm": 0.306640625, "learning_rate": 0.0019066665212676668, "loss": 0.1893, "step": 14896 }, { "epoch": 0.026415385392813164, "grad_norm": 0.251953125, "learning_rate": 0.0019066401256010326, "loss": 0.2064, "step": 14898 }, { "epoch": 0.026418931558122978, "grad_norm": 0.4140625, "learning_rate": 0.001906613726406623, "loss": 0.2684, "step": 14900 }, { "epoch": 0.026422477723432796, "grad_norm": 0.66015625, "learning_rate": 0.0019065873236845535, "loss": 0.2618, "step": 14902 }, { "epoch": 0.02642602388874261, "grad_norm": 0.32421875, "learning_rate": 0.00190656091743494, "loss": 0.1992, "step": 14904 }, { "epoch": 0.026429570054052425, "grad_norm": 0.83984375, "learning_rate": 0.0019065345076578971, "loss": 0.3719, "step": 14906 }, { "epoch": 0.02643311621936224, "grad_norm": 0.5703125, "learning_rate": 0.0019065080943535405, "loss": 0.2528, "step": 14908 }, { "epoch": 0.026436662384672054, "grad_norm": 1.34375, "learning_rate": 0.0019064816775219862, "loss": 0.2649, "step": 14910 }, { "epoch": 0.02644020854998187, "grad_norm": 0.5859375, "learning_rate": 0.0019064552571633497, "loss": 0.1754, "step": 14912 }, { "epoch": 0.026443754715291687, "grad_norm": 0.7890625, "learning_rate": 0.0019064288332777462, "loss": 0.2324, "step": 14914 }, { "epoch": 0.0264473008806015, "grad_norm": 0.78125, "learning_rate": 0.0019064024058652914, "loss": 0.2982, "step": 14916 }, { "epoch": 0.026450847045911316, "grad_norm": 0.34375, "learning_rate": 0.0019063759749261009, "loss": 0.2612, "step": 14918 }, { "epoch": 0.02645439321122113, "grad_norm": 0.64453125, "learning_rate": 0.0019063495404602903, "loss": 0.225, "step": 14920 }, { "epoch": 0.026457939376530945, "grad_norm": 0.76953125, "learning_rate": 0.0019063231024679752, "loss": 0.2348, "step": 14922 }, { "epoch": 0.02646148554184076, "grad_norm": 0.310546875, "learning_rate": 0.0019062966609492713, "loss": 0.2207, "step": 14924 }, { "epoch": 0.026465031707150578, "grad_norm": 0.6953125, "learning_rate": 0.0019062702159042938, "loss": 0.297, "step": 14926 }, { "epoch": 0.026468577872460392, "grad_norm": 2.1875, "learning_rate": 0.0019062437673331592, "loss": 0.3689, "step": 14928 }, { "epoch": 0.026472124037770207, "grad_norm": 0.89453125, "learning_rate": 0.0019062173152359825, "loss": 0.2048, "step": 14930 }, { "epoch": 0.02647567020308002, "grad_norm": 0.40625, "learning_rate": 0.0019061908596128795, "loss": 0.2138, "step": 14932 }, { "epoch": 0.026479216368389836, "grad_norm": 0.515625, "learning_rate": 0.001906164400463966, "loss": 0.2556, "step": 14934 }, { "epoch": 0.026482762533699654, "grad_norm": 0.259765625, "learning_rate": 0.0019061379377893577, "loss": 0.1824, "step": 14936 }, { "epoch": 0.02648630869900947, "grad_norm": 0.5703125, "learning_rate": 0.0019061114715891705, "loss": 0.2387, "step": 14938 }, { "epoch": 0.026489854864319283, "grad_norm": 0.279296875, "learning_rate": 0.0019060850018635196, "loss": 0.1963, "step": 14940 }, { "epoch": 0.026493401029629098, "grad_norm": 0.5078125, "learning_rate": 0.0019060585286125213, "loss": 0.2328, "step": 14942 }, { "epoch": 0.026496947194938912, "grad_norm": 0.80078125, "learning_rate": 0.0019060320518362915, "loss": 0.315, "step": 14944 }, { "epoch": 0.026500493360248727, "grad_norm": 1.5859375, "learning_rate": 0.0019060055715349454, "loss": 0.3271, "step": 14946 }, { "epoch": 0.026504039525558545, "grad_norm": 0.345703125, "learning_rate": 0.0019059790877085992, "loss": 0.204, "step": 14948 }, { "epoch": 0.02650758569086836, "grad_norm": 0.41796875, "learning_rate": 0.0019059526003573685, "loss": 0.3085, "step": 14950 }, { "epoch": 0.026511131856178174, "grad_norm": 1.390625, "learning_rate": 0.0019059261094813693, "loss": 0.242, "step": 14952 }, { "epoch": 0.02651467802148799, "grad_norm": 0.640625, "learning_rate": 0.0019058996150807174, "loss": 0.3998, "step": 14954 }, { "epoch": 0.026518224186797803, "grad_norm": 1.75, "learning_rate": 0.001905873117155529, "loss": 0.3869, "step": 14956 }, { "epoch": 0.026521770352107617, "grad_norm": 1.234375, "learning_rate": 0.0019058466157059193, "loss": 0.33, "step": 14958 }, { "epoch": 0.026525316517417435, "grad_norm": 0.7578125, "learning_rate": 0.001905820110732005, "loss": 0.3124, "step": 14960 }, { "epoch": 0.02652886268272725, "grad_norm": 0.8125, "learning_rate": 0.0019057936022339014, "loss": 0.2348, "step": 14962 }, { "epoch": 0.026532408848037065, "grad_norm": 1.984375, "learning_rate": 0.0019057670902117249, "loss": 0.3382, "step": 14964 }, { "epoch": 0.02653595501334688, "grad_norm": 0.9375, "learning_rate": 0.0019057405746655907, "loss": 0.2266, "step": 14966 }, { "epoch": 0.026539501178656694, "grad_norm": 0.39453125, "learning_rate": 0.0019057140555956155, "loss": 0.2307, "step": 14968 }, { "epoch": 0.02654304734396651, "grad_norm": 0.255859375, "learning_rate": 0.001905687533001915, "loss": 0.3062, "step": 14970 }, { "epoch": 0.026546593509276326, "grad_norm": 0.515625, "learning_rate": 0.0019056610068846054, "loss": 0.2401, "step": 14972 }, { "epoch": 0.02655013967458614, "grad_norm": 0.373046875, "learning_rate": 0.0019056344772438024, "loss": 0.1732, "step": 14974 }, { "epoch": 0.026553685839895955, "grad_norm": 1.015625, "learning_rate": 0.0019056079440796221, "loss": 0.3149, "step": 14976 }, { "epoch": 0.02655723200520577, "grad_norm": 1.2890625, "learning_rate": 0.0019055814073921807, "loss": 0.1881, "step": 14978 }, { "epoch": 0.026560778170515584, "grad_norm": 0.388671875, "learning_rate": 0.0019055548671815942, "loss": 0.2802, "step": 14980 }, { "epoch": 0.026564324335825402, "grad_norm": 0.93359375, "learning_rate": 0.0019055283234479786, "loss": 0.2396, "step": 14982 }, { "epoch": 0.026567870501135217, "grad_norm": 0.62890625, "learning_rate": 0.0019055017761914499, "loss": 0.1673, "step": 14984 }, { "epoch": 0.02657141666644503, "grad_norm": 1.046875, "learning_rate": 0.0019054752254121243, "loss": 0.3285, "step": 14986 }, { "epoch": 0.026574962831754846, "grad_norm": 0.326171875, "learning_rate": 0.0019054486711101183, "loss": 0.1991, "step": 14988 }, { "epoch": 0.02657850899706466, "grad_norm": 1.0078125, "learning_rate": 0.0019054221132855473, "loss": 0.1926, "step": 14990 }, { "epoch": 0.026582055162374475, "grad_norm": 0.7734375, "learning_rate": 0.001905395551938528, "loss": 0.3694, "step": 14992 }, { "epoch": 0.026585601327684293, "grad_norm": 0.451171875, "learning_rate": 0.0019053689870691763, "loss": 0.1863, "step": 14994 }, { "epoch": 0.026589147492994108, "grad_norm": 0.396484375, "learning_rate": 0.0019053424186776084, "loss": 0.233, "step": 14996 }, { "epoch": 0.026592693658303922, "grad_norm": 0.66015625, "learning_rate": 0.001905315846763941, "loss": 0.2588, "step": 14998 }, { "epoch": 0.026596239823613737, "grad_norm": 0.306640625, "learning_rate": 0.0019052892713282892, "loss": 0.2344, "step": 15000 }, { "epoch": 0.02659978598892355, "grad_norm": 0.376953125, "learning_rate": 0.0019052626923707703, "loss": 0.1813, "step": 15002 }, { "epoch": 0.02660333215423337, "grad_norm": 0.3671875, "learning_rate": 0.0019052361098915002, "loss": 0.2422, "step": 15004 }, { "epoch": 0.026606878319543184, "grad_norm": 0.59375, "learning_rate": 0.001905209523890595, "loss": 0.2422, "step": 15006 }, { "epoch": 0.026610424484853, "grad_norm": 0.71875, "learning_rate": 0.0019051829343681708, "loss": 0.2918, "step": 15008 }, { "epoch": 0.026613970650162813, "grad_norm": 0.357421875, "learning_rate": 0.0019051563413243447, "loss": 0.168, "step": 15010 }, { "epoch": 0.026617516815472628, "grad_norm": 0.75, "learning_rate": 0.001905129744759232, "loss": 0.2462, "step": 15012 }, { "epoch": 0.026621062980782442, "grad_norm": 0.60546875, "learning_rate": 0.0019051031446729494, "loss": 0.2822, "step": 15014 }, { "epoch": 0.02662460914609226, "grad_norm": 0.3515625, "learning_rate": 0.0019050765410656137, "loss": 0.2151, "step": 15016 }, { "epoch": 0.026628155311402075, "grad_norm": 0.4765625, "learning_rate": 0.0019050499339373407, "loss": 0.1869, "step": 15018 }, { "epoch": 0.02663170147671189, "grad_norm": 0.49609375, "learning_rate": 0.0019050233232882467, "loss": 0.2133, "step": 15020 }, { "epoch": 0.026635247642021704, "grad_norm": 2.671875, "learning_rate": 0.0019049967091184484, "loss": 0.2579, "step": 15022 }, { "epoch": 0.02663879380733152, "grad_norm": 2.484375, "learning_rate": 0.0019049700914280621, "loss": 0.2855, "step": 15024 }, { "epoch": 0.026642339972641333, "grad_norm": 0.80078125, "learning_rate": 0.0019049434702172045, "loss": 0.232, "step": 15026 }, { "epoch": 0.02664588613795115, "grad_norm": 0.419921875, "learning_rate": 0.0019049168454859915, "loss": 0.2858, "step": 15028 }, { "epoch": 0.026649432303260966, "grad_norm": 0.578125, "learning_rate": 0.0019048902172345396, "loss": 0.363, "step": 15030 }, { "epoch": 0.02665297846857078, "grad_norm": 1.0390625, "learning_rate": 0.0019048635854629658, "loss": 0.2173, "step": 15032 }, { "epoch": 0.026656524633880595, "grad_norm": 0.9296875, "learning_rate": 0.0019048369501713856, "loss": 0.175, "step": 15034 }, { "epoch": 0.02666007079919041, "grad_norm": 0.37109375, "learning_rate": 0.0019048103113599166, "loss": 0.2386, "step": 15036 }, { "epoch": 0.026663616964500227, "grad_norm": 2.875, "learning_rate": 0.0019047836690286746, "loss": 0.2733, "step": 15038 }, { "epoch": 0.026667163129810042, "grad_norm": 0.62109375, "learning_rate": 0.0019047570231777765, "loss": 0.2928, "step": 15040 }, { "epoch": 0.026670709295119856, "grad_norm": 0.279296875, "learning_rate": 0.0019047303738073385, "loss": 0.3489, "step": 15042 }, { "epoch": 0.02667425546042967, "grad_norm": 1.9375, "learning_rate": 0.0019047037209174774, "loss": 0.2277, "step": 15044 }, { "epoch": 0.026677801625739486, "grad_norm": 2.203125, "learning_rate": 0.0019046770645083093, "loss": 0.4361, "step": 15046 }, { "epoch": 0.0266813477910493, "grad_norm": 0.3828125, "learning_rate": 0.0019046504045799513, "loss": 0.1842, "step": 15048 }, { "epoch": 0.026684893956359118, "grad_norm": 0.66015625, "learning_rate": 0.00190462374113252, "loss": 0.3395, "step": 15050 }, { "epoch": 0.026688440121668933, "grad_norm": 1.0390625, "learning_rate": 0.0019045970741661318, "loss": 0.512, "step": 15052 }, { "epoch": 0.026691986286978747, "grad_norm": 0.98828125, "learning_rate": 0.0019045704036809033, "loss": 0.308, "step": 15054 }, { "epoch": 0.026695532452288562, "grad_norm": 0.86328125, "learning_rate": 0.001904543729676951, "loss": 0.2244, "step": 15056 }, { "epoch": 0.026699078617598376, "grad_norm": 0.87890625, "learning_rate": 0.001904517052154392, "loss": 0.199, "step": 15058 }, { "epoch": 0.02670262478290819, "grad_norm": 3.6875, "learning_rate": 0.0019044903711133427, "loss": 0.3927, "step": 15060 }, { "epoch": 0.02670617094821801, "grad_norm": 0.76953125, "learning_rate": 0.00190446368655392, "loss": 0.2784, "step": 15062 }, { "epoch": 0.026709717113527823, "grad_norm": 15.75, "learning_rate": 0.0019044369984762403, "loss": 0.3221, "step": 15064 }, { "epoch": 0.026713263278837638, "grad_norm": 0.55859375, "learning_rate": 0.0019044103068804204, "loss": 0.3118, "step": 15066 }, { "epoch": 0.026716809444147453, "grad_norm": 0.69921875, "learning_rate": 0.001904383611766577, "loss": 0.2349, "step": 15068 }, { "epoch": 0.026720355609457267, "grad_norm": 0.423828125, "learning_rate": 0.001904356913134827, "loss": 0.2012, "step": 15070 }, { "epoch": 0.026723901774767085, "grad_norm": 0.3828125, "learning_rate": 0.0019043302109852868, "loss": 0.2411, "step": 15072 }, { "epoch": 0.0267274479400769, "grad_norm": 0.51171875, "learning_rate": 0.001904303505318074, "loss": 0.2508, "step": 15074 }, { "epoch": 0.026730994105386714, "grad_norm": 0.443359375, "learning_rate": 0.0019042767961333046, "loss": 0.2092, "step": 15076 }, { "epoch": 0.02673454027069653, "grad_norm": 0.458984375, "learning_rate": 0.0019042500834310955, "loss": 0.2352, "step": 15078 }, { "epoch": 0.026738086436006343, "grad_norm": 0.71484375, "learning_rate": 0.0019042233672115641, "loss": 0.2208, "step": 15080 }, { "epoch": 0.026741632601316158, "grad_norm": 0.4375, "learning_rate": 0.0019041966474748265, "loss": 0.2644, "step": 15082 }, { "epoch": 0.026745178766625976, "grad_norm": 0.45703125, "learning_rate": 0.0019041699242209997, "loss": 0.2331, "step": 15084 }, { "epoch": 0.02674872493193579, "grad_norm": 1.0390625, "learning_rate": 0.0019041431974502012, "loss": 0.2221, "step": 15086 }, { "epoch": 0.026752271097245605, "grad_norm": 0.5703125, "learning_rate": 0.0019041164671625478, "loss": 0.2456, "step": 15088 }, { "epoch": 0.02675581726255542, "grad_norm": 0.388671875, "learning_rate": 0.0019040897333581555, "loss": 0.2733, "step": 15090 }, { "epoch": 0.026759363427865234, "grad_norm": 0.5625, "learning_rate": 0.0019040629960371416, "loss": 0.3393, "step": 15092 }, { "epoch": 0.02676290959317505, "grad_norm": 1.140625, "learning_rate": 0.0019040362551996236, "loss": 0.2042, "step": 15094 }, { "epoch": 0.026766455758484867, "grad_norm": 1.25, "learning_rate": 0.0019040095108457179, "loss": 0.2111, "step": 15096 }, { "epoch": 0.02677000192379468, "grad_norm": 0.71484375, "learning_rate": 0.0019039827629755417, "loss": 0.1714, "step": 15098 }, { "epoch": 0.026773548089104496, "grad_norm": 0.9140625, "learning_rate": 0.0019039560115892118, "loss": 0.178, "step": 15100 }, { "epoch": 0.02677709425441431, "grad_norm": 0.34375, "learning_rate": 0.0019039292566868453, "loss": 0.2248, "step": 15102 }, { "epoch": 0.026780640419724125, "grad_norm": 1.1171875, "learning_rate": 0.001903902498268559, "loss": 0.271, "step": 15104 }, { "epoch": 0.026784186585033943, "grad_norm": 0.5703125, "learning_rate": 0.0019038757363344708, "loss": 0.2073, "step": 15106 }, { "epoch": 0.026787732750343757, "grad_norm": 0.5703125, "learning_rate": 0.0019038489708846967, "loss": 0.2431, "step": 15108 }, { "epoch": 0.026791278915653572, "grad_norm": 1.1640625, "learning_rate": 0.0019038222019193537, "loss": 0.2159, "step": 15110 }, { "epoch": 0.026794825080963387, "grad_norm": 0.67578125, "learning_rate": 0.00190379542943856, "loss": 0.2053, "step": 15112 }, { "epoch": 0.0267983712462732, "grad_norm": 0.46484375, "learning_rate": 0.0019037686534424316, "loss": 0.2193, "step": 15114 }, { "epoch": 0.026801917411583016, "grad_norm": 0.55078125, "learning_rate": 0.0019037418739310862, "loss": 0.3041, "step": 15116 }, { "epoch": 0.026805463576892834, "grad_norm": 0.55859375, "learning_rate": 0.0019037150909046406, "loss": 0.2518, "step": 15118 }, { "epoch": 0.026809009742202648, "grad_norm": 0.32421875, "learning_rate": 0.001903688304363212, "loss": 0.1878, "step": 15120 }, { "epoch": 0.026812555907512463, "grad_norm": 0.51171875, "learning_rate": 0.0019036615143069179, "loss": 0.2409, "step": 15122 }, { "epoch": 0.026816102072822277, "grad_norm": 0.349609375, "learning_rate": 0.0019036347207358748, "loss": 0.177, "step": 15124 }, { "epoch": 0.026819648238132092, "grad_norm": 1.9609375, "learning_rate": 0.0019036079236502005, "loss": 0.2686, "step": 15126 }, { "epoch": 0.026823194403441906, "grad_norm": 3.328125, "learning_rate": 0.0019035811230500117, "loss": 0.5553, "step": 15128 }, { "epoch": 0.026826740568751724, "grad_norm": 0.99609375, "learning_rate": 0.0019035543189354258, "loss": 0.4466, "step": 15130 }, { "epoch": 0.02683028673406154, "grad_norm": 0.451171875, "learning_rate": 0.0019035275113065602, "loss": 0.2988, "step": 15132 }, { "epoch": 0.026833832899371354, "grad_norm": 0.7578125, "learning_rate": 0.001903500700163532, "loss": 0.2313, "step": 15134 }, { "epoch": 0.026837379064681168, "grad_norm": 1.328125, "learning_rate": 0.0019034738855064587, "loss": 0.2671, "step": 15136 }, { "epoch": 0.026840925229990983, "grad_norm": 0.66796875, "learning_rate": 0.0019034470673354568, "loss": 0.1987, "step": 15138 }, { "epoch": 0.0268444713953008, "grad_norm": 1.109375, "learning_rate": 0.0019034202456506446, "loss": 0.2458, "step": 15140 }, { "epoch": 0.026848017560610615, "grad_norm": 0.1748046875, "learning_rate": 0.001903393420452139, "loss": 0.236, "step": 15142 }, { "epoch": 0.02685156372592043, "grad_norm": 0.67578125, "learning_rate": 0.0019033665917400568, "loss": 0.2526, "step": 15144 }, { "epoch": 0.026855109891230244, "grad_norm": 1.359375, "learning_rate": 0.001903339759514516, "loss": 0.3702, "step": 15146 }, { "epoch": 0.02685865605654006, "grad_norm": 0.58984375, "learning_rate": 0.0019033129237756338, "loss": 0.268, "step": 15148 }, { "epoch": 0.026862202221849873, "grad_norm": 0.3984375, "learning_rate": 0.0019032860845235274, "loss": 0.2284, "step": 15150 }, { "epoch": 0.02686574838715969, "grad_norm": 0.259765625, "learning_rate": 0.0019032592417583143, "loss": 0.1844, "step": 15152 }, { "epoch": 0.026869294552469506, "grad_norm": 0.734375, "learning_rate": 0.001903232395480112, "loss": 0.2287, "step": 15154 }, { "epoch": 0.02687284071777932, "grad_norm": 1.984375, "learning_rate": 0.0019032055456890374, "loss": 0.2131, "step": 15156 }, { "epoch": 0.026876386883089135, "grad_norm": 0.390625, "learning_rate": 0.0019031786923852084, "loss": 0.1591, "step": 15158 }, { "epoch": 0.02687993304839895, "grad_norm": 0.515625, "learning_rate": 0.0019031518355687425, "loss": 0.1689, "step": 15160 }, { "epoch": 0.026883479213708764, "grad_norm": 3.40625, "learning_rate": 0.001903124975239757, "loss": 0.2431, "step": 15162 }, { "epoch": 0.026887025379018582, "grad_norm": 0.478515625, "learning_rate": 0.0019030981113983692, "loss": 0.1768, "step": 15164 }, { "epoch": 0.026890571544328397, "grad_norm": 0.314453125, "learning_rate": 0.0019030712440446969, "loss": 0.2345, "step": 15166 }, { "epoch": 0.02689411770963821, "grad_norm": 1.0546875, "learning_rate": 0.0019030443731788576, "loss": 0.2273, "step": 15168 }, { "epoch": 0.026897663874948026, "grad_norm": 0.85546875, "learning_rate": 0.0019030174988009685, "loss": 0.1511, "step": 15170 }, { "epoch": 0.02690121004025784, "grad_norm": 0.451171875, "learning_rate": 0.0019029906209111474, "loss": 0.2022, "step": 15172 }, { "epoch": 0.02690475620556766, "grad_norm": 0.69140625, "learning_rate": 0.0019029637395095117, "loss": 0.4968, "step": 15174 }, { "epoch": 0.026908302370877473, "grad_norm": 0.294921875, "learning_rate": 0.001902936854596179, "loss": 0.3302, "step": 15176 }, { "epoch": 0.026911848536187288, "grad_norm": 0.71484375, "learning_rate": 0.0019029099661712668, "loss": 0.2483, "step": 15178 }, { "epoch": 0.026915394701497102, "grad_norm": 1.2421875, "learning_rate": 0.0019028830742348927, "loss": 0.415, "step": 15180 }, { "epoch": 0.026918940866806917, "grad_norm": 0.357421875, "learning_rate": 0.0019028561787871748, "loss": 0.2501, "step": 15182 }, { "epoch": 0.02692248703211673, "grad_norm": 0.890625, "learning_rate": 0.00190282927982823, "loss": 0.2902, "step": 15184 }, { "epoch": 0.02692603319742655, "grad_norm": 0.44921875, "learning_rate": 0.0019028023773581765, "loss": 0.3006, "step": 15186 }, { "epoch": 0.026929579362736364, "grad_norm": 0.53125, "learning_rate": 0.0019027754713771316, "loss": 0.2204, "step": 15188 }, { "epoch": 0.02693312552804618, "grad_norm": 0.4765625, "learning_rate": 0.0019027485618852132, "loss": 0.1845, "step": 15190 }, { "epoch": 0.026936671693355993, "grad_norm": 0.53515625, "learning_rate": 0.0019027216488825386, "loss": 0.2787, "step": 15192 }, { "epoch": 0.026940217858665808, "grad_norm": 0.8828125, "learning_rate": 0.0019026947323692262, "loss": 0.4492, "step": 15194 }, { "epoch": 0.026943764023975622, "grad_norm": 2.125, "learning_rate": 0.001902667812345393, "loss": 0.3507, "step": 15196 }, { "epoch": 0.02694731018928544, "grad_norm": 0.27734375, "learning_rate": 0.0019026408888111572, "loss": 0.238, "step": 15198 }, { "epoch": 0.026950856354595255, "grad_norm": 2.34375, "learning_rate": 0.0019026139617666362, "loss": 0.2794, "step": 15200 }, { "epoch": 0.02695440251990507, "grad_norm": 0.462890625, "learning_rate": 0.001902587031211948, "loss": 0.2536, "step": 15202 }, { "epoch": 0.026957948685214884, "grad_norm": 0.55078125, "learning_rate": 0.0019025600971472104, "loss": 0.2051, "step": 15204 }, { "epoch": 0.0269614948505247, "grad_norm": 0.8359375, "learning_rate": 0.0019025331595725413, "loss": 0.203, "step": 15206 }, { "epoch": 0.026965041015834516, "grad_norm": 0.2080078125, "learning_rate": 0.0019025062184880581, "loss": 0.1757, "step": 15208 }, { "epoch": 0.02696858718114433, "grad_norm": 0.400390625, "learning_rate": 0.0019024792738938786, "loss": 0.2133, "step": 15210 }, { "epoch": 0.026972133346454145, "grad_norm": 0.640625, "learning_rate": 0.0019024523257901214, "loss": 0.4925, "step": 15212 }, { "epoch": 0.02697567951176396, "grad_norm": 0.498046875, "learning_rate": 0.0019024253741769033, "loss": 0.1979, "step": 15214 }, { "epoch": 0.026979225677073775, "grad_norm": 0.921875, "learning_rate": 0.0019023984190543431, "loss": 0.2097, "step": 15216 }, { "epoch": 0.02698277184238359, "grad_norm": 0.63671875, "learning_rate": 0.0019023714604225583, "loss": 0.203, "step": 15218 }, { "epoch": 0.026986318007693407, "grad_norm": 0.40234375, "learning_rate": 0.0019023444982816666, "loss": 0.21, "step": 15220 }, { "epoch": 0.02698986417300322, "grad_norm": 0.390625, "learning_rate": 0.0019023175326317863, "loss": 0.257, "step": 15222 }, { "epoch": 0.026993410338313036, "grad_norm": 1.015625, "learning_rate": 0.0019022905634730352, "loss": 0.2313, "step": 15224 }, { "epoch": 0.02699695650362285, "grad_norm": 0.53515625, "learning_rate": 0.001902263590805531, "loss": 0.2813, "step": 15226 }, { "epoch": 0.027000502668932665, "grad_norm": 0.50390625, "learning_rate": 0.0019022366146293922, "loss": 0.2477, "step": 15228 }, { "epoch": 0.02700404883424248, "grad_norm": 1.6875, "learning_rate": 0.001902209634944736, "loss": 0.3164, "step": 15230 }, { "epoch": 0.027007594999552298, "grad_norm": 0.8203125, "learning_rate": 0.0019021826517516811, "loss": 0.2341, "step": 15232 }, { "epoch": 0.027011141164862112, "grad_norm": 1.0234375, "learning_rate": 0.0019021556650503453, "loss": 0.2046, "step": 15234 }, { "epoch": 0.027014687330171927, "grad_norm": 0.6484375, "learning_rate": 0.0019021286748408463, "loss": 0.197, "step": 15236 }, { "epoch": 0.02701823349548174, "grad_norm": 0.609375, "learning_rate": 0.0019021016811233027, "loss": 0.273, "step": 15238 }, { "epoch": 0.027021779660791556, "grad_norm": 1.484375, "learning_rate": 0.001902074683897832, "loss": 0.2518, "step": 15240 }, { "epoch": 0.027025325826101374, "grad_norm": 1.921875, "learning_rate": 0.0019020476831645527, "loss": 0.4678, "step": 15242 }, { "epoch": 0.02702887199141119, "grad_norm": 0.2890625, "learning_rate": 0.0019020206789235826, "loss": 0.1867, "step": 15244 }, { "epoch": 0.027032418156721003, "grad_norm": 7.03125, "learning_rate": 0.0019019936711750403, "loss": 0.3129, "step": 15246 }, { "epoch": 0.027035964322030818, "grad_norm": 0.451171875, "learning_rate": 0.001901966659919043, "loss": 0.2265, "step": 15248 }, { "epoch": 0.027039510487340632, "grad_norm": 0.3203125, "learning_rate": 0.0019019396451557095, "loss": 0.3175, "step": 15250 }, { "epoch": 0.027043056652650447, "grad_norm": 0.54296875, "learning_rate": 0.001901912626885158, "loss": 0.2248, "step": 15252 }, { "epoch": 0.027046602817960265, "grad_norm": 0.25390625, "learning_rate": 0.0019018856051075065, "loss": 0.2404, "step": 15254 }, { "epoch": 0.02705014898327008, "grad_norm": 0.89453125, "learning_rate": 0.001901858579822873, "loss": 0.2238, "step": 15256 }, { "epoch": 0.027053695148579894, "grad_norm": 0.69921875, "learning_rate": 0.0019018315510313758, "loss": 0.2281, "step": 15258 }, { "epoch": 0.02705724131388971, "grad_norm": 0.20703125, "learning_rate": 0.0019018045187331333, "loss": 0.2019, "step": 15260 }, { "epoch": 0.027060787479199523, "grad_norm": 0.302734375, "learning_rate": 0.0019017774829282635, "loss": 0.16, "step": 15262 }, { "epoch": 0.027064333644509338, "grad_norm": 0.4765625, "learning_rate": 0.0019017504436168846, "loss": 0.1872, "step": 15264 }, { "epoch": 0.027067879809819156, "grad_norm": 0.392578125, "learning_rate": 0.0019017234007991152, "loss": 0.2284, "step": 15266 }, { "epoch": 0.02707142597512897, "grad_norm": 0.6953125, "learning_rate": 0.0019016963544750732, "loss": 0.2375, "step": 15268 }, { "epoch": 0.027074972140438785, "grad_norm": 3.078125, "learning_rate": 0.001901669304644877, "loss": 0.2851, "step": 15270 }, { "epoch": 0.0270785183057486, "grad_norm": 0.6015625, "learning_rate": 0.0019016422513086454, "loss": 0.2036, "step": 15272 }, { "epoch": 0.027082064471058414, "grad_norm": 0.484375, "learning_rate": 0.0019016151944664956, "loss": 0.2692, "step": 15274 }, { "epoch": 0.027085610636368232, "grad_norm": 0.375, "learning_rate": 0.0019015881341185468, "loss": 0.2084, "step": 15276 }, { "epoch": 0.027089156801678047, "grad_norm": 0.66015625, "learning_rate": 0.0019015610702649173, "loss": 0.3184, "step": 15278 }, { "epoch": 0.02709270296698786, "grad_norm": 0.6875, "learning_rate": 0.0019015340029057248, "loss": 0.221, "step": 15280 }, { "epoch": 0.027096249132297676, "grad_norm": 0.890625, "learning_rate": 0.0019015069320410887, "loss": 0.1924, "step": 15282 }, { "epoch": 0.02709979529760749, "grad_norm": 0.345703125, "learning_rate": 0.0019014798576711266, "loss": 0.2075, "step": 15284 }, { "epoch": 0.027103341462917305, "grad_norm": 0.78515625, "learning_rate": 0.0019014527797959573, "loss": 0.3273, "step": 15286 }, { "epoch": 0.027106887628227123, "grad_norm": 0.3359375, "learning_rate": 0.001901425698415699, "loss": 0.3972, "step": 15288 }, { "epoch": 0.027110433793536937, "grad_norm": 1.046875, "learning_rate": 0.00190139861353047, "loss": 0.2948, "step": 15290 }, { "epoch": 0.027113979958846752, "grad_norm": 2.140625, "learning_rate": 0.0019013715251403893, "loss": 0.2245, "step": 15292 }, { "epoch": 0.027117526124156566, "grad_norm": 0.40234375, "learning_rate": 0.001901344433245575, "loss": 0.2347, "step": 15294 }, { "epoch": 0.02712107228946638, "grad_norm": 0.498046875, "learning_rate": 0.0019013173378461452, "loss": 0.2032, "step": 15296 }, { "epoch": 0.027124618454776196, "grad_norm": 0.86328125, "learning_rate": 0.0019012902389422192, "loss": 0.2114, "step": 15298 }, { "epoch": 0.027128164620086014, "grad_norm": 0.3515625, "learning_rate": 0.0019012631365339153, "loss": 0.2238, "step": 15300 }, { "epoch": 0.027131710785395828, "grad_norm": 1.15625, "learning_rate": 0.0019012360306213518, "loss": 0.2421, "step": 15302 }, { "epoch": 0.027135256950705643, "grad_norm": 0.38671875, "learning_rate": 0.0019012089212046468, "loss": 0.1971, "step": 15304 }, { "epoch": 0.027138803116015457, "grad_norm": 0.65234375, "learning_rate": 0.0019011818082839199, "loss": 0.2091, "step": 15306 }, { "epoch": 0.027142349281325272, "grad_norm": 0.283203125, "learning_rate": 0.001901154691859289, "loss": 0.2308, "step": 15308 }, { "epoch": 0.02714589544663509, "grad_norm": 0.7421875, "learning_rate": 0.001901127571930873, "loss": 0.2504, "step": 15310 }, { "epoch": 0.027149441611944904, "grad_norm": 0.48828125, "learning_rate": 0.0019011004484987902, "loss": 0.1844, "step": 15312 }, { "epoch": 0.02715298777725472, "grad_norm": 0.83984375, "learning_rate": 0.001901073321563159, "loss": 0.2676, "step": 15314 }, { "epoch": 0.027156533942564533, "grad_norm": 0.451171875, "learning_rate": 0.001901046191124099, "loss": 0.2291, "step": 15316 }, { "epoch": 0.027160080107874348, "grad_norm": 0.83203125, "learning_rate": 0.001901019057181728, "loss": 0.4434, "step": 15318 }, { "epoch": 0.027163626273184163, "grad_norm": 1.015625, "learning_rate": 0.0019009919197361651, "loss": 0.5163, "step": 15320 }, { "epoch": 0.02716717243849398, "grad_norm": 0.404296875, "learning_rate": 0.0019009647787875288, "loss": 0.2067, "step": 15322 }, { "epoch": 0.027170718603803795, "grad_norm": 0.244140625, "learning_rate": 0.0019009376343359374, "loss": 0.1615, "step": 15324 }, { "epoch": 0.02717426476911361, "grad_norm": 0.462890625, "learning_rate": 0.0019009104863815106, "loss": 0.197, "step": 15326 }, { "epoch": 0.027177810934423424, "grad_norm": 1.0390625, "learning_rate": 0.001900883334924366, "loss": 0.248, "step": 15328 }, { "epoch": 0.02718135709973324, "grad_norm": 0.404296875, "learning_rate": 0.0019008561799646233, "loss": 0.217, "step": 15330 }, { "epoch": 0.027184903265043053, "grad_norm": 0.51171875, "learning_rate": 0.001900829021502401, "loss": 0.2013, "step": 15332 }, { "epoch": 0.02718844943035287, "grad_norm": 0.5078125, "learning_rate": 0.0019008018595378174, "loss": 0.2051, "step": 15334 }, { "epoch": 0.027191995595662686, "grad_norm": 0.50390625, "learning_rate": 0.0019007746940709917, "loss": 0.2385, "step": 15336 }, { "epoch": 0.0271955417609725, "grad_norm": 0.42578125, "learning_rate": 0.0019007475251020425, "loss": 0.2572, "step": 15338 }, { "epoch": 0.027199087926282315, "grad_norm": 2.34375, "learning_rate": 0.001900720352631089, "loss": 0.2728, "step": 15340 }, { "epoch": 0.02720263409159213, "grad_norm": 0.7265625, "learning_rate": 0.0019006931766582498, "loss": 0.2144, "step": 15342 }, { "epoch": 0.027206180256901948, "grad_norm": 0.341796875, "learning_rate": 0.0019006659971836436, "loss": 0.237, "step": 15344 }, { "epoch": 0.027209726422211762, "grad_norm": 0.58203125, "learning_rate": 0.0019006388142073893, "loss": 0.2356, "step": 15346 }, { "epoch": 0.027213272587521577, "grad_norm": 0.6015625, "learning_rate": 0.0019006116277296061, "loss": 0.2006, "step": 15348 }, { "epoch": 0.02721681875283139, "grad_norm": 1.515625, "learning_rate": 0.0019005844377504124, "loss": 0.2658, "step": 15350 }, { "epoch": 0.027220364918141206, "grad_norm": 0.66796875, "learning_rate": 0.0019005572442699278, "loss": 0.2551, "step": 15352 }, { "epoch": 0.02722391108345102, "grad_norm": 0.7578125, "learning_rate": 0.0019005300472882706, "loss": 0.1957, "step": 15354 }, { "epoch": 0.02722745724876084, "grad_norm": 0.640625, "learning_rate": 0.00190050284680556, "loss": 0.2783, "step": 15356 }, { "epoch": 0.027231003414070653, "grad_norm": 1.4453125, "learning_rate": 0.0019004756428219153, "loss": 0.2551, "step": 15358 }, { "epoch": 0.027234549579380467, "grad_norm": 2.21875, "learning_rate": 0.0019004484353374548, "loss": 0.2885, "step": 15360 }, { "epoch": 0.027238095744690282, "grad_norm": 0.1982421875, "learning_rate": 0.0019004212243522976, "loss": 0.2348, "step": 15362 }, { "epoch": 0.027241641910000097, "grad_norm": 0.330078125, "learning_rate": 0.0019003940098665628, "loss": 0.2123, "step": 15364 }, { "epoch": 0.02724518807530991, "grad_norm": 1.0390625, "learning_rate": 0.00190036679188037, "loss": 0.2455, "step": 15366 }, { "epoch": 0.02724873424061973, "grad_norm": 1.0078125, "learning_rate": 0.0019003395703938374, "loss": 0.3356, "step": 15368 }, { "epoch": 0.027252280405929544, "grad_norm": 0.5, "learning_rate": 0.001900312345407085, "loss": 0.1785, "step": 15370 }, { "epoch": 0.027255826571239358, "grad_norm": 1.796875, "learning_rate": 0.0019002851169202307, "loss": 0.4332, "step": 15372 }, { "epoch": 0.027259372736549173, "grad_norm": 1.5, "learning_rate": 0.0019002578849333942, "loss": 0.2256, "step": 15374 }, { "epoch": 0.027262918901858987, "grad_norm": 0.765625, "learning_rate": 0.001900230649446695, "loss": 0.2256, "step": 15376 }, { "epoch": 0.027266465067168805, "grad_norm": 0.61328125, "learning_rate": 0.0019002034104602513, "loss": 0.1571, "step": 15378 }, { "epoch": 0.02727001123247862, "grad_norm": 0.43359375, "learning_rate": 0.001900176167974183, "loss": 0.3175, "step": 15380 }, { "epoch": 0.027273557397788434, "grad_norm": 2.109375, "learning_rate": 0.0019001489219886087, "loss": 0.3228, "step": 15382 }, { "epoch": 0.02727710356309825, "grad_norm": 0.72265625, "learning_rate": 0.0019001216725036479, "loss": 0.2183, "step": 15384 }, { "epoch": 0.027280649728408064, "grad_norm": 0.365234375, "learning_rate": 0.0019000944195194196, "loss": 0.1846, "step": 15386 }, { "epoch": 0.027284195893717878, "grad_norm": 0.494140625, "learning_rate": 0.001900067163036043, "loss": 0.2067, "step": 15388 }, { "epoch": 0.027287742059027696, "grad_norm": 1.1484375, "learning_rate": 0.0019000399030536378, "loss": 0.2178, "step": 15390 }, { "epoch": 0.02729128822433751, "grad_norm": 1.0625, "learning_rate": 0.0019000126395723224, "loss": 0.2557, "step": 15392 }, { "epoch": 0.027294834389647325, "grad_norm": 0.609375, "learning_rate": 0.0018999853725922163, "loss": 0.5676, "step": 15394 }, { "epoch": 0.02729838055495714, "grad_norm": 0.80859375, "learning_rate": 0.0018999581021134392, "loss": 0.2546, "step": 15396 }, { "epoch": 0.027301926720266954, "grad_norm": 0.47265625, "learning_rate": 0.0018999308281361096, "loss": 0.2156, "step": 15398 }, { "epoch": 0.02730547288557677, "grad_norm": 1.203125, "learning_rate": 0.0018999035506603477, "loss": 0.4945, "step": 15400 }, { "epoch": 0.027309019050886587, "grad_norm": 0.310546875, "learning_rate": 0.0018998762696862722, "loss": 0.2199, "step": 15402 }, { "epoch": 0.0273125652161964, "grad_norm": 1.3046875, "learning_rate": 0.0018998489852140022, "loss": 0.2495, "step": 15404 }, { "epoch": 0.027316111381506216, "grad_norm": 0.255859375, "learning_rate": 0.0018998216972436577, "loss": 0.4403, "step": 15406 }, { "epoch": 0.02731965754681603, "grad_norm": 0.6640625, "learning_rate": 0.0018997944057753575, "loss": 0.2146, "step": 15408 }, { "epoch": 0.027323203712125845, "grad_norm": 0.890625, "learning_rate": 0.001899767110809221, "loss": 0.2135, "step": 15410 }, { "epoch": 0.027326749877435663, "grad_norm": 0.69921875, "learning_rate": 0.001899739812345368, "loss": 0.2371, "step": 15412 }, { "epoch": 0.027330296042745478, "grad_norm": 0.6875, "learning_rate": 0.0018997125103839177, "loss": 0.2115, "step": 15414 }, { "epoch": 0.027333842208055292, "grad_norm": 4.75, "learning_rate": 0.0018996852049249892, "loss": 0.3307, "step": 15416 }, { "epoch": 0.027337388373365107, "grad_norm": 0.53515625, "learning_rate": 0.0018996578959687021, "loss": 0.1585, "step": 15418 }, { "epoch": 0.02734093453867492, "grad_norm": 0.5859375, "learning_rate": 0.0018996305835151759, "loss": 0.2031, "step": 15420 }, { "epoch": 0.027344480703984736, "grad_norm": 0.5625, "learning_rate": 0.00189960326756453, "loss": 0.2385, "step": 15422 }, { "epoch": 0.027348026869294554, "grad_norm": 0.5703125, "learning_rate": 0.001899575948116884, "loss": 0.2273, "step": 15424 }, { "epoch": 0.02735157303460437, "grad_norm": 0.443359375, "learning_rate": 0.001899548625172357, "loss": 0.2049, "step": 15426 }, { "epoch": 0.027355119199914183, "grad_norm": 0.5625, "learning_rate": 0.001899521298731069, "loss": 0.2159, "step": 15428 }, { "epoch": 0.027358665365223998, "grad_norm": 1.96875, "learning_rate": 0.0018994939687931392, "loss": 0.3147, "step": 15430 }, { "epoch": 0.027362211530533812, "grad_norm": 0.53125, "learning_rate": 0.001899466635358687, "loss": 0.2018, "step": 15432 }, { "epoch": 0.027365757695843627, "grad_norm": 0.30859375, "learning_rate": 0.0018994392984278323, "loss": 0.2216, "step": 15434 }, { "epoch": 0.027369303861153445, "grad_norm": 2.421875, "learning_rate": 0.0018994119580006946, "loss": 0.3849, "step": 15436 }, { "epoch": 0.02737285002646326, "grad_norm": 0.80078125, "learning_rate": 0.001899384614077393, "loss": 0.2914, "step": 15438 }, { "epoch": 0.027376396191773074, "grad_norm": 0.87109375, "learning_rate": 0.0018993572666580476, "loss": 0.3477, "step": 15440 }, { "epoch": 0.02737994235708289, "grad_norm": 0.318359375, "learning_rate": 0.001899329915742778, "loss": 0.1767, "step": 15442 }, { "epoch": 0.027383488522392703, "grad_norm": 0.5703125, "learning_rate": 0.0018993025613317034, "loss": 0.1951, "step": 15444 }, { "epoch": 0.02738703468770252, "grad_norm": 2.109375, "learning_rate": 0.0018992752034249437, "loss": 0.4441, "step": 15446 }, { "epoch": 0.027390580853012336, "grad_norm": 0.376953125, "learning_rate": 0.0018992478420226187, "loss": 0.1852, "step": 15448 }, { "epoch": 0.02739412701832215, "grad_norm": 0.49609375, "learning_rate": 0.0018992204771248477, "loss": 0.1859, "step": 15450 }, { "epoch": 0.027397673183631965, "grad_norm": 2.140625, "learning_rate": 0.0018991931087317508, "loss": 0.2127, "step": 15452 }, { "epoch": 0.02740121934894178, "grad_norm": 0.3359375, "learning_rate": 0.0018991657368434476, "loss": 0.3461, "step": 15454 }, { "epoch": 0.027404765514251594, "grad_norm": 1.0859375, "learning_rate": 0.0018991383614600575, "loss": 0.2311, "step": 15456 }, { "epoch": 0.027408311679561412, "grad_norm": 1.6640625, "learning_rate": 0.0018991109825817001, "loss": 0.3202, "step": 15458 }, { "epoch": 0.027411857844871226, "grad_norm": 0.3515625, "learning_rate": 0.0018990836002084957, "loss": 0.2116, "step": 15460 }, { "epoch": 0.02741540401018104, "grad_norm": 0.67578125, "learning_rate": 0.0018990562143405638, "loss": 0.2348, "step": 15462 }, { "epoch": 0.027418950175490855, "grad_norm": 0.37890625, "learning_rate": 0.0018990288249780243, "loss": 0.1984, "step": 15464 }, { "epoch": 0.02742249634080067, "grad_norm": 0.98046875, "learning_rate": 0.0018990014321209965, "loss": 0.2039, "step": 15466 }, { "epoch": 0.027426042506110485, "grad_norm": 0.95703125, "learning_rate": 0.0018989740357696011, "loss": 0.2799, "step": 15468 }, { "epoch": 0.027429588671420303, "grad_norm": 0.6328125, "learning_rate": 0.0018989466359239566, "loss": 0.1798, "step": 15470 }, { "epoch": 0.027433134836730117, "grad_norm": 1.5078125, "learning_rate": 0.0018989192325841841, "loss": 0.2245, "step": 15472 }, { "epoch": 0.02743668100203993, "grad_norm": 1.0546875, "learning_rate": 0.001898891825750403, "loss": 0.2221, "step": 15474 }, { "epoch": 0.027440227167349746, "grad_norm": 0.4140625, "learning_rate": 0.001898864415422733, "loss": 0.3451, "step": 15476 }, { "epoch": 0.02744377333265956, "grad_norm": 1.015625, "learning_rate": 0.001898837001601294, "loss": 0.2383, "step": 15478 }, { "epoch": 0.02744731949796938, "grad_norm": 0.85546875, "learning_rate": 0.001898809584286206, "loss": 0.2777, "step": 15480 }, { "epoch": 0.027450865663279193, "grad_norm": 0.53125, "learning_rate": 0.001898782163477589, "loss": 0.2234, "step": 15482 }, { "epoch": 0.027454411828589008, "grad_norm": 2.0, "learning_rate": 0.0018987547391755627, "loss": 0.2381, "step": 15484 }, { "epoch": 0.027457957993898822, "grad_norm": 1.796875, "learning_rate": 0.0018987273113802473, "loss": 0.2826, "step": 15486 }, { "epoch": 0.027461504159208637, "grad_norm": 0.490234375, "learning_rate": 0.001898699880091762, "loss": 0.4324, "step": 15488 }, { "epoch": 0.02746505032451845, "grad_norm": 0.52734375, "learning_rate": 0.001898672445310228, "loss": 0.1801, "step": 15490 }, { "epoch": 0.02746859648982827, "grad_norm": 1.234375, "learning_rate": 0.0018986450070357646, "loss": 0.1696, "step": 15492 }, { "epoch": 0.027472142655138084, "grad_norm": 0.345703125, "learning_rate": 0.0018986175652684917, "loss": 0.2298, "step": 15494 }, { "epoch": 0.0274756888204479, "grad_norm": 1.6015625, "learning_rate": 0.0018985901200085293, "loss": 0.2746, "step": 15496 }, { "epoch": 0.027479234985757713, "grad_norm": 3.890625, "learning_rate": 0.001898562671255998, "loss": 0.1679, "step": 15498 }, { "epoch": 0.027482781151067528, "grad_norm": 0.86328125, "learning_rate": 0.001898535219011017, "loss": 0.2745, "step": 15500 }, { "epoch": 0.027486327316377342, "grad_norm": 0.5078125, "learning_rate": 0.0018985077632737073, "loss": 0.2252, "step": 15502 }, { "epoch": 0.02748987348168716, "grad_norm": 0.63671875, "learning_rate": 0.0018984803040441877, "loss": 0.2374, "step": 15504 }, { "epoch": 0.027493419646996975, "grad_norm": 0.365234375, "learning_rate": 0.0018984528413225799, "loss": 0.2092, "step": 15506 }, { "epoch": 0.02749696581230679, "grad_norm": 1.3359375, "learning_rate": 0.001898425375109003, "loss": 0.1766, "step": 15508 }, { "epoch": 0.027500511977616604, "grad_norm": 0.65625, "learning_rate": 0.0018983979054035768, "loss": 0.218, "step": 15510 }, { "epoch": 0.02750405814292642, "grad_norm": 0.435546875, "learning_rate": 0.0018983704322064223, "loss": 0.2114, "step": 15512 }, { "epoch": 0.027507604308236237, "grad_norm": 2.234375, "learning_rate": 0.001898342955517659, "loss": 0.2277, "step": 15514 }, { "epoch": 0.02751115047354605, "grad_norm": 1.890625, "learning_rate": 0.0018983154753374077, "loss": 0.2003, "step": 15516 }, { "epoch": 0.027514696638855866, "grad_norm": 7.65625, "learning_rate": 0.0018982879916657881, "loss": 0.2852, "step": 15518 }, { "epoch": 0.02751824280416568, "grad_norm": 0.51171875, "learning_rate": 0.0018982605045029205, "loss": 0.2034, "step": 15520 }, { "epoch": 0.027521788969475495, "grad_norm": 3.109375, "learning_rate": 0.0018982330138489253, "loss": 0.4603, "step": 15522 }, { "epoch": 0.02752533513478531, "grad_norm": 1.921875, "learning_rate": 0.001898205519703922, "loss": 0.2909, "step": 15524 }, { "epoch": 0.027528881300095127, "grad_norm": 2.78125, "learning_rate": 0.0018981780220680318, "loss": 0.3515, "step": 15526 }, { "epoch": 0.027532427465404942, "grad_norm": 0.8125, "learning_rate": 0.0018981505209413748, "loss": 0.2402, "step": 15528 }, { "epoch": 0.027535973630714757, "grad_norm": 0.3125, "learning_rate": 0.0018981230163240708, "loss": 0.2306, "step": 15530 }, { "epoch": 0.02753951979602457, "grad_norm": 0.43359375, "learning_rate": 0.0018980955082162402, "loss": 0.2403, "step": 15532 }, { "epoch": 0.027543065961334386, "grad_norm": 0.90234375, "learning_rate": 0.0018980679966180039, "loss": 0.2921, "step": 15534 }, { "epoch": 0.0275466121266442, "grad_norm": 0.58984375, "learning_rate": 0.0018980404815294811, "loss": 0.2111, "step": 15536 }, { "epoch": 0.027550158291954018, "grad_norm": 0.6953125, "learning_rate": 0.0018980129629507932, "loss": 0.1853, "step": 15538 }, { "epoch": 0.027553704457263833, "grad_norm": 0.279296875, "learning_rate": 0.0018979854408820603, "loss": 0.1969, "step": 15540 }, { "epoch": 0.027557250622573647, "grad_norm": 0.298828125, "learning_rate": 0.0018979579153234024, "loss": 0.2102, "step": 15542 }, { "epoch": 0.027560796787883462, "grad_norm": 1.0078125, "learning_rate": 0.00189793038627494, "loss": 0.2929, "step": 15544 }, { "epoch": 0.027564342953193276, "grad_norm": 1.4375, "learning_rate": 0.0018979028537367934, "loss": 0.2484, "step": 15546 }, { "epoch": 0.027567889118503094, "grad_norm": 0.357421875, "learning_rate": 0.0018978753177090837, "loss": 0.2442, "step": 15548 }, { "epoch": 0.02757143528381291, "grad_norm": 0.6640625, "learning_rate": 0.0018978477781919306, "loss": 0.21, "step": 15550 }, { "epoch": 0.027574981449122724, "grad_norm": 0.69140625, "learning_rate": 0.0018978202351854546, "loss": 0.2478, "step": 15552 }, { "epoch": 0.027578527614432538, "grad_norm": 0.625, "learning_rate": 0.0018977926886897766, "loss": 0.2219, "step": 15554 }, { "epoch": 0.027582073779742353, "grad_norm": 0.2041015625, "learning_rate": 0.0018977651387050165, "loss": 0.2007, "step": 15556 }, { "epoch": 0.027585619945052167, "grad_norm": 0.30078125, "learning_rate": 0.0018977375852312955, "loss": 0.2055, "step": 15558 }, { "epoch": 0.027589166110361985, "grad_norm": 0.91015625, "learning_rate": 0.0018977100282687332, "loss": 0.2607, "step": 15560 }, { "epoch": 0.0275927122756718, "grad_norm": 0.369140625, "learning_rate": 0.0018976824678174507, "loss": 0.1687, "step": 15562 }, { "epoch": 0.027596258440981614, "grad_norm": 0.71484375, "learning_rate": 0.0018976549038775686, "loss": 0.1937, "step": 15564 }, { "epoch": 0.02759980460629143, "grad_norm": 2.0625, "learning_rate": 0.0018976273364492073, "loss": 0.1824, "step": 15566 }, { "epoch": 0.027603350771601243, "grad_norm": 1.0859375, "learning_rate": 0.0018975997655324873, "loss": 0.2877, "step": 15568 }, { "epoch": 0.027606896936911058, "grad_norm": 0.404296875, "learning_rate": 0.001897572191127529, "loss": 0.2208, "step": 15570 }, { "epoch": 0.027610443102220876, "grad_norm": 0.37890625, "learning_rate": 0.0018975446132344536, "loss": 0.1956, "step": 15572 }, { "epoch": 0.02761398926753069, "grad_norm": 5.5, "learning_rate": 0.0018975170318533813, "loss": 0.322, "step": 15574 }, { "epoch": 0.027617535432840505, "grad_norm": 1.2734375, "learning_rate": 0.0018974894469844326, "loss": 0.1873, "step": 15576 }, { "epoch": 0.02762108159815032, "grad_norm": 0.3046875, "learning_rate": 0.0018974618586277284, "loss": 0.3496, "step": 15578 }, { "epoch": 0.027624627763460134, "grad_norm": 0.4375, "learning_rate": 0.0018974342667833891, "loss": 0.2293, "step": 15580 }, { "epoch": 0.027628173928769952, "grad_norm": 0.81640625, "learning_rate": 0.0018974066714515354, "loss": 0.1992, "step": 15582 }, { "epoch": 0.027631720094079767, "grad_norm": 2.375, "learning_rate": 0.0018973790726322885, "loss": 0.4927, "step": 15584 }, { "epoch": 0.02763526625938958, "grad_norm": 0.80078125, "learning_rate": 0.0018973514703257684, "loss": 0.2591, "step": 15586 }, { "epoch": 0.027638812424699396, "grad_norm": 0.72265625, "learning_rate": 0.001897323864532096, "loss": 0.2868, "step": 15588 }, { "epoch": 0.02764235859000921, "grad_norm": 1.421875, "learning_rate": 0.0018972962552513926, "loss": 0.2461, "step": 15590 }, { "epoch": 0.027645904755319025, "grad_norm": 1.3046875, "learning_rate": 0.0018972686424837781, "loss": 0.2519, "step": 15592 }, { "epoch": 0.027649450920628843, "grad_norm": 0.640625, "learning_rate": 0.001897241026229374, "loss": 0.1797, "step": 15594 }, { "epoch": 0.027652997085938658, "grad_norm": 0.291015625, "learning_rate": 0.0018972134064883005, "loss": 0.2251, "step": 15596 }, { "epoch": 0.027656543251248472, "grad_norm": 1.6953125, "learning_rate": 0.0018971857832606788, "loss": 0.2488, "step": 15598 }, { "epoch": 0.027660089416558287, "grad_norm": 0.53125, "learning_rate": 0.0018971581565466293, "loss": 0.2194, "step": 15600 }, { "epoch": 0.0276636355818681, "grad_norm": 0.80078125, "learning_rate": 0.0018971305263462733, "loss": 0.2385, "step": 15602 }, { "epoch": 0.027667181747177916, "grad_norm": 1.4453125, "learning_rate": 0.0018971028926597316, "loss": 0.2466, "step": 15604 }, { "epoch": 0.027670727912487734, "grad_norm": 0.8515625, "learning_rate": 0.0018970752554871245, "loss": 0.2331, "step": 15606 }, { "epoch": 0.02767427407779755, "grad_norm": 1.8203125, "learning_rate": 0.0018970476148285735, "loss": 0.1913, "step": 15608 }, { "epoch": 0.027677820243107363, "grad_norm": 0.87890625, "learning_rate": 0.0018970199706841988, "loss": 0.2529, "step": 15610 }, { "epoch": 0.027681366408417177, "grad_norm": 1.125, "learning_rate": 0.0018969923230541222, "loss": 0.3198, "step": 15612 }, { "epoch": 0.027684912573726992, "grad_norm": 0.40234375, "learning_rate": 0.0018969646719384638, "loss": 0.2148, "step": 15614 }, { "epoch": 0.02768845873903681, "grad_norm": 0.609375, "learning_rate": 0.001896937017337345, "loss": 0.216, "step": 15616 }, { "epoch": 0.027692004904346625, "grad_norm": 0.58203125, "learning_rate": 0.0018969093592508864, "loss": 0.2154, "step": 15618 }, { "epoch": 0.02769555106965644, "grad_norm": 3.734375, "learning_rate": 0.0018968816976792093, "loss": 0.2969, "step": 15620 }, { "epoch": 0.027699097234966254, "grad_norm": 0.40625, "learning_rate": 0.0018968540326224346, "loss": 0.1977, "step": 15622 }, { "epoch": 0.027702643400276068, "grad_norm": 0.51171875, "learning_rate": 0.001896826364080683, "loss": 0.297, "step": 15624 }, { "epoch": 0.027706189565585883, "grad_norm": 0.462890625, "learning_rate": 0.0018967986920540763, "loss": 0.2014, "step": 15626 }, { "epoch": 0.0277097357308957, "grad_norm": 2.046875, "learning_rate": 0.0018967710165427344, "loss": 0.2084, "step": 15628 }, { "epoch": 0.027713281896205515, "grad_norm": 0.39453125, "learning_rate": 0.0018967433375467792, "loss": 0.2166, "step": 15630 }, { "epoch": 0.02771682806151533, "grad_norm": 0.359375, "learning_rate": 0.0018967156550663312, "loss": 0.2063, "step": 15632 }, { "epoch": 0.027720374226825144, "grad_norm": 0.640625, "learning_rate": 0.0018966879691015117, "loss": 0.2084, "step": 15634 }, { "epoch": 0.02772392039213496, "grad_norm": 0.65234375, "learning_rate": 0.0018966602796524421, "loss": 0.2782, "step": 15636 }, { "epoch": 0.027727466557444774, "grad_norm": 1.078125, "learning_rate": 0.0018966325867192432, "loss": 0.1774, "step": 15638 }, { "epoch": 0.02773101272275459, "grad_norm": 4.03125, "learning_rate": 0.001896604890302036, "loss": 0.3807, "step": 15640 }, { "epoch": 0.027734558888064406, "grad_norm": 0.1953125, "learning_rate": 0.0018965771904009415, "loss": 0.2192, "step": 15642 }, { "epoch": 0.02773810505337422, "grad_norm": 0.4921875, "learning_rate": 0.001896549487016081, "loss": 0.2476, "step": 15644 }, { "epoch": 0.027741651218684035, "grad_norm": 0.67578125, "learning_rate": 0.001896521780147576, "loss": 0.2919, "step": 15646 }, { "epoch": 0.02774519738399385, "grad_norm": 0.482421875, "learning_rate": 0.0018964940697955475, "loss": 0.176, "step": 15648 }, { "epoch": 0.027748743549303664, "grad_norm": 1.1796875, "learning_rate": 0.0018964663559601163, "loss": 0.2488, "step": 15650 }, { "epoch": 0.027752289714613482, "grad_norm": 0.44921875, "learning_rate": 0.0018964386386414042, "loss": 0.1735, "step": 15652 }, { "epoch": 0.027755835879923297, "grad_norm": 0.4375, "learning_rate": 0.001896410917839532, "loss": 0.1998, "step": 15654 }, { "epoch": 0.02775938204523311, "grad_norm": 0.48828125, "learning_rate": 0.0018963831935546209, "loss": 0.2088, "step": 15656 }, { "epoch": 0.027762928210542926, "grad_norm": 0.2255859375, "learning_rate": 0.0018963554657867926, "loss": 0.2592, "step": 15658 }, { "epoch": 0.02776647437585274, "grad_norm": 0.50390625, "learning_rate": 0.0018963277345361677, "loss": 0.1769, "step": 15660 }, { "epoch": 0.02777002054116256, "grad_norm": 1.4140625, "learning_rate": 0.0018962999998028683, "loss": 0.2791, "step": 15662 }, { "epoch": 0.027773566706472373, "grad_norm": 0.466796875, "learning_rate": 0.001896272261587015, "loss": 0.2015, "step": 15664 }, { "epoch": 0.027777112871782188, "grad_norm": 3.015625, "learning_rate": 0.0018962445198887296, "loss": 0.2435, "step": 15666 }, { "epoch": 0.027780659037092002, "grad_norm": 4.0625, "learning_rate": 0.001896216774708133, "loss": 0.237, "step": 15668 }, { "epoch": 0.027784205202401817, "grad_norm": 0.5703125, "learning_rate": 0.0018961890260453464, "loss": 0.2398, "step": 15670 }, { "epoch": 0.02778775136771163, "grad_norm": 0.6796875, "learning_rate": 0.0018961612739004919, "loss": 0.1938, "step": 15672 }, { "epoch": 0.02779129753302145, "grad_norm": 0.73046875, "learning_rate": 0.0018961335182736904, "loss": 0.2593, "step": 15674 }, { "epoch": 0.027794843698331264, "grad_norm": 1.2265625, "learning_rate": 0.001896105759165063, "loss": 0.243, "step": 15676 }, { "epoch": 0.02779838986364108, "grad_norm": 1.921875, "learning_rate": 0.001896077996574732, "loss": 0.2006, "step": 15678 }, { "epoch": 0.027801936028950893, "grad_norm": 0.875, "learning_rate": 0.0018960502305028178, "loss": 0.3268, "step": 15680 }, { "epoch": 0.027805482194260708, "grad_norm": 0.412109375, "learning_rate": 0.0018960224609494423, "loss": 0.1961, "step": 15682 }, { "epoch": 0.027809028359570522, "grad_norm": 2.484375, "learning_rate": 0.0018959946879147274, "loss": 0.3464, "step": 15684 }, { "epoch": 0.02781257452488034, "grad_norm": 0.59375, "learning_rate": 0.0018959669113987937, "loss": 0.2466, "step": 15686 }, { "epoch": 0.027816120690190155, "grad_norm": 0.6796875, "learning_rate": 0.001895939131401763, "loss": 0.2505, "step": 15688 }, { "epoch": 0.02781966685549997, "grad_norm": 0.66015625, "learning_rate": 0.001895911347923757, "loss": 0.2469, "step": 15690 }, { "epoch": 0.027823213020809784, "grad_norm": 0.431640625, "learning_rate": 0.0018958835609648973, "loss": 0.2324, "step": 15692 }, { "epoch": 0.0278267591861196, "grad_norm": 0.44921875, "learning_rate": 0.0018958557705253051, "loss": 0.256, "step": 15694 }, { "epoch": 0.027830305351429416, "grad_norm": 1.625, "learning_rate": 0.0018958279766051018, "loss": 0.198, "step": 15696 }, { "epoch": 0.02783385151673923, "grad_norm": 0.369140625, "learning_rate": 0.0018958001792044096, "loss": 0.1609, "step": 15698 }, { "epoch": 0.027837397682049046, "grad_norm": 1.1484375, "learning_rate": 0.0018957723783233494, "loss": 0.3339, "step": 15700 }, { "epoch": 0.02784094384735886, "grad_norm": 0.26953125, "learning_rate": 0.0018957445739620432, "loss": 0.1605, "step": 15702 }, { "epoch": 0.027844490012668675, "grad_norm": 2.015625, "learning_rate": 0.0018957167661206125, "loss": 0.2186, "step": 15704 }, { "epoch": 0.02784803617797849, "grad_norm": 0.419921875, "learning_rate": 0.0018956889547991787, "loss": 0.2542, "step": 15706 }, { "epoch": 0.027851582343288307, "grad_norm": 0.80078125, "learning_rate": 0.0018956611399978637, "loss": 0.2654, "step": 15708 }, { "epoch": 0.027855128508598122, "grad_norm": 3.84375, "learning_rate": 0.001895633321716789, "loss": 0.3302, "step": 15710 }, { "epoch": 0.027858674673907936, "grad_norm": 1.25, "learning_rate": 0.0018956054999560762, "loss": 0.1935, "step": 15712 }, { "epoch": 0.02786222083921775, "grad_norm": 1.3046875, "learning_rate": 0.001895577674715847, "loss": 0.2726, "step": 15714 }, { "epoch": 0.027865767004527565, "grad_norm": 0.68359375, "learning_rate": 0.0018955498459962234, "loss": 0.1816, "step": 15716 }, { "epoch": 0.02786931316983738, "grad_norm": 0.55078125, "learning_rate": 0.001895522013797327, "loss": 0.1816, "step": 15718 }, { "epoch": 0.027872859335147198, "grad_norm": 0.248046875, "learning_rate": 0.0018954941781192795, "loss": 0.217, "step": 15720 }, { "epoch": 0.027876405500457013, "grad_norm": 0.416015625, "learning_rate": 0.0018954663389622022, "loss": 0.1722, "step": 15722 }, { "epoch": 0.027879951665766827, "grad_norm": 0.71875, "learning_rate": 0.0018954384963262175, "loss": 0.198, "step": 15724 }, { "epoch": 0.02788349783107664, "grad_norm": 2.3125, "learning_rate": 0.0018954106502114465, "loss": 0.3218, "step": 15726 }, { "epoch": 0.027887043996386456, "grad_norm": 1.8671875, "learning_rate": 0.0018953828006180117, "loss": 0.3218, "step": 15728 }, { "epoch": 0.027890590161696274, "grad_norm": 0.310546875, "learning_rate": 0.0018953549475460342, "loss": 0.1864, "step": 15730 }, { "epoch": 0.02789413632700609, "grad_norm": 0.6953125, "learning_rate": 0.0018953270909956364, "loss": 0.2432, "step": 15732 }, { "epoch": 0.027897682492315903, "grad_norm": 0.330078125, "learning_rate": 0.0018952992309669396, "loss": 0.1888, "step": 15734 }, { "epoch": 0.027901228657625718, "grad_norm": 0.76171875, "learning_rate": 0.001895271367460066, "loss": 0.2592, "step": 15736 }, { "epoch": 0.027904774822935532, "grad_norm": 0.298828125, "learning_rate": 0.001895243500475138, "loss": 0.1659, "step": 15738 }, { "epoch": 0.027908320988245347, "grad_norm": 0.484375, "learning_rate": 0.0018952156300122763, "loss": 0.2086, "step": 15740 }, { "epoch": 0.027911867153555165, "grad_norm": 0.6640625, "learning_rate": 0.0018951877560716033, "loss": 0.2274, "step": 15742 }, { "epoch": 0.02791541331886498, "grad_norm": 0.421875, "learning_rate": 0.001895159878653241, "loss": 0.1829, "step": 15744 }, { "epoch": 0.027918959484174794, "grad_norm": 0.72265625, "learning_rate": 0.0018951319977573114, "loss": 0.2438, "step": 15746 }, { "epoch": 0.02792250564948461, "grad_norm": 0.796875, "learning_rate": 0.0018951041133839365, "loss": 0.1785, "step": 15748 }, { "epoch": 0.027926051814794423, "grad_norm": 8.75, "learning_rate": 0.0018950762255332378, "loss": 0.2103, "step": 15750 }, { "epoch": 0.027929597980104238, "grad_norm": 0.50390625, "learning_rate": 0.0018950483342053373, "loss": 0.2072, "step": 15752 }, { "epoch": 0.027933144145414056, "grad_norm": 2.015625, "learning_rate": 0.0018950204394003573, "loss": 0.2162, "step": 15754 }, { "epoch": 0.02793669031072387, "grad_norm": 0.5859375, "learning_rate": 0.0018949925411184201, "loss": 0.2156, "step": 15756 }, { "epoch": 0.027940236476033685, "grad_norm": 0.828125, "learning_rate": 0.0018949646393596469, "loss": 0.1922, "step": 15758 }, { "epoch": 0.0279437826413435, "grad_norm": 0.7578125, "learning_rate": 0.0018949367341241606, "loss": 0.1897, "step": 15760 }, { "epoch": 0.027947328806653314, "grad_norm": 0.43359375, "learning_rate": 0.0018949088254120825, "loss": 0.2115, "step": 15762 }, { "epoch": 0.027950874971963132, "grad_norm": 0.84375, "learning_rate": 0.0018948809132235348, "loss": 0.1647, "step": 15764 }, { "epoch": 0.027954421137272947, "grad_norm": 1.6328125, "learning_rate": 0.00189485299755864, "loss": 0.3455, "step": 15766 }, { "epoch": 0.02795796730258276, "grad_norm": 1.0234375, "learning_rate": 0.0018948250784175198, "loss": 0.2627, "step": 15768 }, { "epoch": 0.027961513467892576, "grad_norm": 0.396484375, "learning_rate": 0.0018947971558002962, "loss": 0.2093, "step": 15770 }, { "epoch": 0.02796505963320239, "grad_norm": 0.26953125, "learning_rate": 0.0018947692297070916, "loss": 0.1956, "step": 15772 }, { "epoch": 0.027968605798512205, "grad_norm": 0.5234375, "learning_rate": 0.001894741300138028, "loss": 0.1643, "step": 15774 }, { "epoch": 0.027972151963822023, "grad_norm": 0.734375, "learning_rate": 0.0018947133670932277, "loss": 0.1987, "step": 15776 }, { "epoch": 0.027975698129131837, "grad_norm": 0.2021484375, "learning_rate": 0.001894685430572813, "loss": 0.1331, "step": 15778 }, { "epoch": 0.027979244294441652, "grad_norm": 0.6484375, "learning_rate": 0.0018946574905769052, "loss": 0.2511, "step": 15780 }, { "epoch": 0.027982790459751467, "grad_norm": 1.3984375, "learning_rate": 0.0018946295471056276, "loss": 0.2689, "step": 15782 }, { "epoch": 0.02798633662506128, "grad_norm": 0.77734375, "learning_rate": 0.0018946016001591018, "loss": 0.281, "step": 15784 }, { "epoch": 0.027989882790371096, "grad_norm": 2.171875, "learning_rate": 0.0018945736497374501, "loss": 0.3021, "step": 15786 }, { "epoch": 0.027993428955680914, "grad_norm": 0.26953125, "learning_rate": 0.0018945456958407952, "loss": 0.2186, "step": 15788 }, { "epoch": 0.027996975120990728, "grad_norm": 0.8828125, "learning_rate": 0.0018945177384692587, "loss": 0.2289, "step": 15790 }, { "epoch": 0.028000521286300543, "grad_norm": 1.0234375, "learning_rate": 0.001894489777622963, "loss": 0.2135, "step": 15792 }, { "epoch": 0.028004067451610357, "grad_norm": 1.5625, "learning_rate": 0.0018944618133020306, "loss": 0.4, "step": 15794 }, { "epoch": 0.028007613616920172, "grad_norm": 0.69921875, "learning_rate": 0.0018944338455065836, "loss": 0.1867, "step": 15796 }, { "epoch": 0.02801115978222999, "grad_norm": 1.1015625, "learning_rate": 0.0018944058742367445, "loss": 0.181, "step": 15798 }, { "epoch": 0.028014705947539804, "grad_norm": 0.89453125, "learning_rate": 0.0018943778994926356, "loss": 0.2815, "step": 15800 }, { "epoch": 0.02801825211284962, "grad_norm": 0.64453125, "learning_rate": 0.001894349921274379, "loss": 0.2253, "step": 15802 }, { "epoch": 0.028021798278159434, "grad_norm": 0.5625, "learning_rate": 0.0018943219395820977, "loss": 0.1856, "step": 15804 }, { "epoch": 0.028025344443469248, "grad_norm": 0.29296875, "learning_rate": 0.0018942939544159133, "loss": 0.1999, "step": 15806 }, { "epoch": 0.028028890608779063, "grad_norm": 0.6796875, "learning_rate": 0.0018942659657759484, "loss": 0.2594, "step": 15808 }, { "epoch": 0.02803243677408888, "grad_norm": 1.5859375, "learning_rate": 0.001894237973662326, "loss": 0.2131, "step": 15810 }, { "epoch": 0.028035982939398695, "grad_norm": 0.365234375, "learning_rate": 0.0018942099780751675, "loss": 0.2195, "step": 15812 }, { "epoch": 0.02803952910470851, "grad_norm": 1.5, "learning_rate": 0.0018941819790145962, "loss": 0.3423, "step": 15814 }, { "epoch": 0.028043075270018324, "grad_norm": 5.9375, "learning_rate": 0.0018941539764807344, "loss": 0.4074, "step": 15816 }, { "epoch": 0.02804662143532814, "grad_norm": 0.69140625, "learning_rate": 0.0018941259704737042, "loss": 0.1996, "step": 15818 }, { "epoch": 0.028050167600637953, "grad_norm": 0.83984375, "learning_rate": 0.001894097960993628, "loss": 0.2494, "step": 15820 }, { "epoch": 0.02805371376594777, "grad_norm": 0.44921875, "learning_rate": 0.001894069948040629, "loss": 0.2007, "step": 15822 }, { "epoch": 0.028057259931257586, "grad_norm": 1.0390625, "learning_rate": 0.0018940419316148289, "loss": 0.2276, "step": 15824 }, { "epoch": 0.0280608060965674, "grad_norm": 0.90234375, "learning_rate": 0.001894013911716351, "loss": 0.2376, "step": 15826 }, { "epoch": 0.028064352261877215, "grad_norm": 0.66015625, "learning_rate": 0.0018939858883453174, "loss": 0.1771, "step": 15828 }, { "epoch": 0.02806789842718703, "grad_norm": 7.84375, "learning_rate": 0.0018939578615018507, "loss": 0.2792, "step": 15830 }, { "epoch": 0.028071444592496848, "grad_norm": 0.5546875, "learning_rate": 0.0018939298311860732, "loss": 0.1768, "step": 15832 }, { "epoch": 0.028074990757806662, "grad_norm": 18.25, "learning_rate": 0.0018939017973981084, "loss": 0.2214, "step": 15834 }, { "epoch": 0.028078536923116477, "grad_norm": 2.109375, "learning_rate": 0.001893873760138078, "loss": 0.333, "step": 15836 }, { "epoch": 0.02808208308842629, "grad_norm": 0.58203125, "learning_rate": 0.0018938457194061047, "loss": 0.2659, "step": 15838 }, { "epoch": 0.028085629253736106, "grad_norm": 0.98046875, "learning_rate": 0.0018938176752023115, "loss": 0.1937, "step": 15840 }, { "epoch": 0.02808917541904592, "grad_norm": 0.67578125, "learning_rate": 0.001893789627526821, "loss": 0.2785, "step": 15842 }, { "epoch": 0.02809272158435574, "grad_norm": 0.3046875, "learning_rate": 0.0018937615763797557, "loss": 0.2623, "step": 15844 }, { "epoch": 0.028096267749665553, "grad_norm": 0.54296875, "learning_rate": 0.0018937335217612385, "loss": 0.2229, "step": 15846 }, { "epoch": 0.028099813914975368, "grad_norm": 0.56640625, "learning_rate": 0.0018937054636713915, "loss": 0.179, "step": 15848 }, { "epoch": 0.028103360080285182, "grad_norm": 1.359375, "learning_rate": 0.001893677402110338, "loss": 0.2584, "step": 15850 }, { "epoch": 0.028106906245594997, "grad_norm": 0.439453125, "learning_rate": 0.0018936493370782007, "loss": 0.2035, "step": 15852 }, { "epoch": 0.02811045241090481, "grad_norm": 0.6015625, "learning_rate": 0.001893621268575102, "loss": 0.2927, "step": 15854 }, { "epoch": 0.02811399857621463, "grad_norm": 0.291015625, "learning_rate": 0.001893593196601165, "loss": 0.1962, "step": 15856 }, { "epoch": 0.028117544741524444, "grad_norm": 0.640625, "learning_rate": 0.0018935651211565126, "loss": 0.2311, "step": 15858 }, { "epoch": 0.02812109090683426, "grad_norm": 0.59375, "learning_rate": 0.001893537042241267, "loss": 0.2132, "step": 15860 }, { "epoch": 0.028124637072144073, "grad_norm": 0.65625, "learning_rate": 0.0018935089598555513, "loss": 0.2103, "step": 15862 }, { "epoch": 0.028128183237453887, "grad_norm": 1.0078125, "learning_rate": 0.0018934808739994884, "loss": 0.1775, "step": 15864 }, { "epoch": 0.028131729402763705, "grad_norm": 0.6953125, "learning_rate": 0.001893452784673201, "loss": 0.3314, "step": 15866 }, { "epoch": 0.02813527556807352, "grad_norm": 0.31640625, "learning_rate": 0.0018934246918768122, "loss": 0.1797, "step": 15868 }, { "epoch": 0.028138821733383335, "grad_norm": 0.76953125, "learning_rate": 0.0018933965956104443, "loss": 0.2096, "step": 15870 }, { "epoch": 0.02814236789869315, "grad_norm": 0.625, "learning_rate": 0.001893368495874221, "loss": 0.1255, "step": 15872 }, { "epoch": 0.028145914064002964, "grad_norm": 0.6484375, "learning_rate": 0.0018933403926682647, "loss": 0.2218, "step": 15874 }, { "epoch": 0.028149460229312778, "grad_norm": 1.3203125, "learning_rate": 0.0018933122859926981, "loss": 0.2418, "step": 15876 }, { "epoch": 0.028153006394622596, "grad_norm": 0.5390625, "learning_rate": 0.0018932841758476445, "loss": 0.1882, "step": 15878 }, { "epoch": 0.02815655255993241, "grad_norm": 0.79296875, "learning_rate": 0.0018932560622332267, "loss": 0.3185, "step": 15880 }, { "epoch": 0.028160098725242225, "grad_norm": 0.90234375, "learning_rate": 0.0018932279451495675, "loss": 0.2732, "step": 15882 }, { "epoch": 0.02816364489055204, "grad_norm": 1.484375, "learning_rate": 0.0018931998245967903, "loss": 0.365, "step": 15884 }, { "epoch": 0.028167191055861854, "grad_norm": 0.6875, "learning_rate": 0.0018931717005750176, "loss": 0.1813, "step": 15886 }, { "epoch": 0.02817073722117167, "grad_norm": 0.55078125, "learning_rate": 0.0018931435730843726, "loss": 0.2858, "step": 15888 }, { "epoch": 0.028174283386481487, "grad_norm": 1.84375, "learning_rate": 0.0018931154421249787, "loss": 0.3423, "step": 15890 }, { "epoch": 0.0281778295517913, "grad_norm": 1.5, "learning_rate": 0.001893087307696958, "loss": 0.2913, "step": 15892 }, { "epoch": 0.028181375717101116, "grad_norm": 0.68359375, "learning_rate": 0.0018930591698004347, "loss": 0.2432, "step": 15894 }, { "epoch": 0.02818492188241093, "grad_norm": 1.6875, "learning_rate": 0.0018930310284355305, "loss": 0.2646, "step": 15896 }, { "epoch": 0.028188468047720745, "grad_norm": 0.7265625, "learning_rate": 0.0018930028836023698, "loss": 0.2917, "step": 15898 }, { "epoch": 0.028192014213030563, "grad_norm": 0.46875, "learning_rate": 0.0018929747353010752, "loss": 0.2206, "step": 15900 }, { "epoch": 0.028195560378340378, "grad_norm": 0.515625, "learning_rate": 0.0018929465835317695, "loss": 0.2153, "step": 15902 }, { "epoch": 0.028199106543650192, "grad_norm": 0.5546875, "learning_rate": 0.001892918428294576, "loss": 0.2367, "step": 15904 }, { "epoch": 0.028202652708960007, "grad_norm": 0.5234375, "learning_rate": 0.0018928902695896177, "loss": 0.1805, "step": 15906 }, { "epoch": 0.02820619887426982, "grad_norm": 0.93359375, "learning_rate": 0.0018928621074170184, "loss": 0.203, "step": 15908 }, { "epoch": 0.028209745039579636, "grad_norm": 1.21875, "learning_rate": 0.0018928339417769002, "loss": 0.2046, "step": 15910 }, { "epoch": 0.028213291204889454, "grad_norm": 0.78515625, "learning_rate": 0.0018928057726693872, "loss": 0.2524, "step": 15912 }, { "epoch": 0.02821683737019927, "grad_norm": 0.396484375, "learning_rate": 0.0018927776000946025, "loss": 0.1715, "step": 15914 }, { "epoch": 0.028220383535509083, "grad_norm": 1.609375, "learning_rate": 0.0018927494240526688, "loss": 0.2176, "step": 15916 }, { "epoch": 0.028223929700818898, "grad_norm": 0.7421875, "learning_rate": 0.0018927212445437097, "loss": 0.213, "step": 15918 }, { "epoch": 0.028227475866128712, "grad_norm": 0.50390625, "learning_rate": 0.0018926930615678482, "loss": 0.1996, "step": 15920 }, { "epoch": 0.028231022031438527, "grad_norm": 0.55859375, "learning_rate": 0.0018926648751252078, "loss": 0.2574, "step": 15922 }, { "epoch": 0.028234568196748345, "grad_norm": 0.27734375, "learning_rate": 0.0018926366852159118, "loss": 0.1654, "step": 15924 }, { "epoch": 0.02823811436205816, "grad_norm": 0.69140625, "learning_rate": 0.001892608491840083, "loss": 0.1936, "step": 15926 }, { "epoch": 0.028241660527367974, "grad_norm": 0.29296875, "learning_rate": 0.0018925802949978454, "loss": 0.2084, "step": 15928 }, { "epoch": 0.02824520669267779, "grad_norm": 1.109375, "learning_rate": 0.001892552094689322, "loss": 0.3007, "step": 15930 }, { "epoch": 0.028248752857987603, "grad_norm": 0.90234375, "learning_rate": 0.0018925238909146358, "loss": 0.2479, "step": 15932 }, { "epoch": 0.02825229902329742, "grad_norm": 0.51171875, "learning_rate": 0.0018924956836739106, "loss": 0.2443, "step": 15934 }, { "epoch": 0.028255845188607236, "grad_norm": 0.47265625, "learning_rate": 0.0018924674729672694, "loss": 0.1874, "step": 15936 }, { "epoch": 0.02825939135391705, "grad_norm": 0.62109375, "learning_rate": 0.0018924392587948366, "loss": 0.2762, "step": 15938 }, { "epoch": 0.028262937519226865, "grad_norm": 0.353515625, "learning_rate": 0.001892411041156734, "loss": 0.2239, "step": 15940 }, { "epoch": 0.02826648368453668, "grad_norm": 1.75, "learning_rate": 0.001892382820053086, "loss": 0.2542, "step": 15942 }, { "epoch": 0.028270029849846494, "grad_norm": 0.390625, "learning_rate": 0.0018923545954840156, "loss": 0.2308, "step": 15944 }, { "epoch": 0.028273576015156312, "grad_norm": 0.64453125, "learning_rate": 0.0018923263674496466, "loss": 0.3358, "step": 15946 }, { "epoch": 0.028277122180466126, "grad_norm": 0.703125, "learning_rate": 0.0018922981359501025, "loss": 0.2517, "step": 15948 }, { "epoch": 0.02828066834577594, "grad_norm": 0.435546875, "learning_rate": 0.0018922699009855065, "loss": 0.2051, "step": 15950 }, { "epoch": 0.028284214511085756, "grad_norm": 0.2392578125, "learning_rate": 0.0018922416625559819, "loss": 0.2164, "step": 15952 }, { "epoch": 0.02828776067639557, "grad_norm": 0.94921875, "learning_rate": 0.0018922134206616529, "loss": 0.1689, "step": 15954 }, { "epoch": 0.028291306841705385, "grad_norm": 0.3125, "learning_rate": 0.0018921851753026424, "loss": 0.1976, "step": 15956 }, { "epoch": 0.028294853007015203, "grad_norm": 3.046875, "learning_rate": 0.001892156926479074, "loss": 0.2118, "step": 15958 }, { "epoch": 0.028298399172325017, "grad_norm": 0.46484375, "learning_rate": 0.0018921286741910713, "loss": 0.2075, "step": 15960 }, { "epoch": 0.028301945337634832, "grad_norm": 0.51171875, "learning_rate": 0.001892100418438758, "loss": 0.2397, "step": 15962 }, { "epoch": 0.028305491502944646, "grad_norm": 0.5859375, "learning_rate": 0.0018920721592222574, "loss": 0.1644, "step": 15964 }, { "epoch": 0.02830903766825446, "grad_norm": 0.26171875, "learning_rate": 0.0018920438965416935, "loss": 0.2419, "step": 15966 }, { "epoch": 0.02831258383356428, "grad_norm": 0.5234375, "learning_rate": 0.0018920156303971897, "loss": 0.2436, "step": 15968 }, { "epoch": 0.028316129998874093, "grad_norm": 0.5703125, "learning_rate": 0.0018919873607888694, "loss": 0.2288, "step": 15970 }, { "epoch": 0.028319676164183908, "grad_norm": 2.015625, "learning_rate": 0.0018919590877168567, "loss": 0.1821, "step": 15972 }, { "epoch": 0.028323222329493723, "grad_norm": 0.408203125, "learning_rate": 0.0018919308111812745, "loss": 0.179, "step": 15974 }, { "epoch": 0.028326768494803537, "grad_norm": 0.3828125, "learning_rate": 0.0018919025311822475, "loss": 0.2558, "step": 15976 }, { "epoch": 0.02833031466011335, "grad_norm": 0.5546875, "learning_rate": 0.0018918742477198984, "loss": 0.1427, "step": 15978 }, { "epoch": 0.02833386082542317, "grad_norm": 0.51953125, "learning_rate": 0.0018918459607943515, "loss": 0.1755, "step": 15980 }, { "epoch": 0.028337406990732984, "grad_norm": 0.59375, "learning_rate": 0.0018918176704057301, "loss": 0.2999, "step": 15982 }, { "epoch": 0.0283409531560428, "grad_norm": 0.60546875, "learning_rate": 0.0018917893765541586, "loss": 0.2466, "step": 15984 }, { "epoch": 0.028344499321352613, "grad_norm": 0.2890625, "learning_rate": 0.00189176107923976, "loss": 0.3597, "step": 15986 }, { "epoch": 0.028348045486662428, "grad_norm": 0.6171875, "learning_rate": 0.0018917327784626584, "loss": 0.2457, "step": 15988 }, { "epoch": 0.028351591651972242, "grad_norm": 0.796875, "learning_rate": 0.0018917044742229772, "loss": 0.1995, "step": 15990 }, { "epoch": 0.02835513781728206, "grad_norm": 0.6484375, "learning_rate": 0.0018916761665208409, "loss": 0.1778, "step": 15992 }, { "epoch": 0.028358683982591875, "grad_norm": 0.5625, "learning_rate": 0.001891647855356373, "loss": 0.3216, "step": 15994 }, { "epoch": 0.02836223014790169, "grad_norm": 0.2021484375, "learning_rate": 0.0018916195407296967, "loss": 0.2082, "step": 15996 }, { "epoch": 0.028365776313211504, "grad_norm": 1.171875, "learning_rate": 0.001891591222640937, "loss": 0.2779, "step": 15998 }, { "epoch": 0.02836932247852132, "grad_norm": 0.474609375, "learning_rate": 0.0018915629010902166, "loss": 0.2353, "step": 16000 }, { "epoch": 0.028372868643831137, "grad_norm": 0.53125, "learning_rate": 0.00189153457607766, "loss": 0.2481, "step": 16002 }, { "epoch": 0.02837641480914095, "grad_norm": 1.046875, "learning_rate": 0.0018915062476033914, "loss": 0.2794, "step": 16004 }, { "epoch": 0.028379960974450766, "grad_norm": 0.322265625, "learning_rate": 0.0018914779156675335, "loss": 0.1641, "step": 16006 }, { "epoch": 0.02838350713976058, "grad_norm": 4.40625, "learning_rate": 0.0018914495802702113, "loss": 0.3629, "step": 16008 }, { "epoch": 0.028387053305070395, "grad_norm": 0.326171875, "learning_rate": 0.0018914212414115486, "loss": 0.2027, "step": 16010 }, { "epoch": 0.02839059947038021, "grad_norm": 0.546875, "learning_rate": 0.0018913928990916687, "loss": 0.2024, "step": 16012 }, { "epoch": 0.028394145635690028, "grad_norm": 1.015625, "learning_rate": 0.001891364553310696, "loss": 0.2727, "step": 16014 }, { "epoch": 0.028397691800999842, "grad_norm": 0.52734375, "learning_rate": 0.0018913362040687547, "loss": 0.1945, "step": 16016 }, { "epoch": 0.028401237966309657, "grad_norm": 1.078125, "learning_rate": 0.0018913078513659682, "loss": 0.3466, "step": 16018 }, { "epoch": 0.02840478413161947, "grad_norm": 0.90625, "learning_rate": 0.001891279495202461, "loss": 0.1855, "step": 16020 }, { "epoch": 0.028408330296929286, "grad_norm": 0.7265625, "learning_rate": 0.0018912511355783567, "loss": 0.1622, "step": 16022 }, { "epoch": 0.0284118764622391, "grad_norm": 1.1953125, "learning_rate": 0.0018912227724937798, "loss": 0.2448, "step": 16024 }, { "epoch": 0.02841542262754892, "grad_norm": 0.53515625, "learning_rate": 0.0018911944059488539, "loss": 0.2216, "step": 16026 }, { "epoch": 0.028418968792858733, "grad_norm": 0.69921875, "learning_rate": 0.0018911660359437034, "loss": 0.3516, "step": 16028 }, { "epoch": 0.028422514958168547, "grad_norm": 1.0625, "learning_rate": 0.0018911376624784525, "loss": 0.204, "step": 16030 }, { "epoch": 0.028426061123478362, "grad_norm": 0.515625, "learning_rate": 0.0018911092855532243, "loss": 0.2476, "step": 16032 }, { "epoch": 0.028429607288788177, "grad_norm": 0.609375, "learning_rate": 0.001891080905168144, "loss": 0.2303, "step": 16034 }, { "epoch": 0.028433153454097995, "grad_norm": 0.421875, "learning_rate": 0.0018910525213233355, "loss": 0.2817, "step": 16036 }, { "epoch": 0.02843669961940781, "grad_norm": 0.59765625, "learning_rate": 0.0018910241340189225, "loss": 0.1912, "step": 16038 }, { "epoch": 0.028440245784717624, "grad_norm": 0.8203125, "learning_rate": 0.0018909957432550297, "loss": 0.2838, "step": 16040 }, { "epoch": 0.028443791950027438, "grad_norm": 0.271484375, "learning_rate": 0.0018909673490317806, "loss": 0.1876, "step": 16042 }, { "epoch": 0.028447338115337253, "grad_norm": 0.8515625, "learning_rate": 0.0018909389513493, "loss": 0.1592, "step": 16044 }, { "epoch": 0.028450884280647067, "grad_norm": 0.421875, "learning_rate": 0.0018909105502077118, "loss": 0.1832, "step": 16046 }, { "epoch": 0.028454430445956885, "grad_norm": 5.53125, "learning_rate": 0.0018908821456071403, "loss": 0.2691, "step": 16048 }, { "epoch": 0.0284579766112667, "grad_norm": 0.5546875, "learning_rate": 0.0018908537375477098, "loss": 0.2313, "step": 16050 }, { "epoch": 0.028461522776576514, "grad_norm": 6.125, "learning_rate": 0.0018908253260295443, "loss": 0.5124, "step": 16052 }, { "epoch": 0.02846506894188633, "grad_norm": 1.046875, "learning_rate": 0.001890796911052768, "loss": 0.2686, "step": 16054 }, { "epoch": 0.028468615107196144, "grad_norm": 0.84375, "learning_rate": 0.0018907684926175057, "loss": 0.2473, "step": 16056 }, { "epoch": 0.028472161272505958, "grad_norm": 0.59765625, "learning_rate": 0.0018907400707238813, "loss": 0.2157, "step": 16058 }, { "epoch": 0.028475707437815776, "grad_norm": 1.1171875, "learning_rate": 0.001890711645372019, "loss": 0.3402, "step": 16060 }, { "epoch": 0.02847925360312559, "grad_norm": 0.30078125, "learning_rate": 0.001890683216562043, "loss": 0.309, "step": 16062 }, { "epoch": 0.028482799768435405, "grad_norm": 0.486328125, "learning_rate": 0.0018906547842940785, "loss": 0.1839, "step": 16064 }, { "epoch": 0.02848634593374522, "grad_norm": 0.80859375, "learning_rate": 0.001890626348568249, "loss": 0.2789, "step": 16066 }, { "epoch": 0.028489892099055034, "grad_norm": 0.72265625, "learning_rate": 0.001890597909384679, "loss": 0.186, "step": 16068 }, { "epoch": 0.028493438264364852, "grad_norm": 0.609375, "learning_rate": 0.0018905694667434932, "loss": 0.2678, "step": 16070 }, { "epoch": 0.028496984429674667, "grad_norm": 0.8359375, "learning_rate": 0.0018905410206448154, "loss": 0.2975, "step": 16072 }, { "epoch": 0.02850053059498448, "grad_norm": 1.65625, "learning_rate": 0.0018905125710887707, "loss": 0.2584, "step": 16074 }, { "epoch": 0.028504076760294296, "grad_norm": 0.5390625, "learning_rate": 0.001890484118075483, "loss": 0.2225, "step": 16076 }, { "epoch": 0.02850762292560411, "grad_norm": 1.84375, "learning_rate": 0.0018904556616050772, "loss": 0.4534, "step": 16078 }, { "epoch": 0.028511169090913925, "grad_norm": 1.25, "learning_rate": 0.001890427201677677, "loss": 0.2263, "step": 16080 }, { "epoch": 0.028514715256223743, "grad_norm": 0.373046875, "learning_rate": 0.001890398738293408, "loss": 0.1873, "step": 16082 }, { "epoch": 0.028518261421533558, "grad_norm": 0.84375, "learning_rate": 0.0018903702714523933, "loss": 0.1838, "step": 16084 }, { "epoch": 0.028521807586843372, "grad_norm": 4.15625, "learning_rate": 0.0018903418011547589, "loss": 0.3164, "step": 16086 }, { "epoch": 0.028525353752153187, "grad_norm": 0.640625, "learning_rate": 0.0018903133274006281, "loss": 0.251, "step": 16088 }, { "epoch": 0.028528899917463, "grad_norm": 0.3671875, "learning_rate": 0.0018902848501901257, "loss": 0.2652, "step": 16090 }, { "epoch": 0.028532446082772816, "grad_norm": 0.671875, "learning_rate": 0.0018902563695233768, "loss": 0.2756, "step": 16092 }, { "epoch": 0.028535992248082634, "grad_norm": 1.984375, "learning_rate": 0.0018902278854005057, "loss": 0.3013, "step": 16094 }, { "epoch": 0.02853953841339245, "grad_norm": 0.48828125, "learning_rate": 0.0018901993978216363, "loss": 0.2504, "step": 16096 }, { "epoch": 0.028543084578702263, "grad_norm": 1.5859375, "learning_rate": 0.001890170906786894, "loss": 0.2205, "step": 16098 }, { "epoch": 0.028546630744012078, "grad_norm": 0.7890625, "learning_rate": 0.0018901424122964032, "loss": 0.2133, "step": 16100 }, { "epoch": 0.028550176909321892, "grad_norm": 0.65625, "learning_rate": 0.0018901139143502885, "loss": 0.2287, "step": 16102 }, { "epoch": 0.02855372307463171, "grad_norm": 4.375, "learning_rate": 0.0018900854129486745, "loss": 0.2745, "step": 16104 }, { "epoch": 0.028557269239941525, "grad_norm": 0.5546875, "learning_rate": 0.0018900569080916858, "loss": 0.2275, "step": 16106 }, { "epoch": 0.02856081540525134, "grad_norm": 0.82421875, "learning_rate": 0.001890028399779447, "loss": 0.3168, "step": 16108 }, { "epoch": 0.028564361570561154, "grad_norm": 1.546875, "learning_rate": 0.001889999888012083, "loss": 0.2457, "step": 16110 }, { "epoch": 0.02856790773587097, "grad_norm": 0.326171875, "learning_rate": 0.001889971372789718, "loss": 0.188, "step": 16112 }, { "epoch": 0.028571453901180783, "grad_norm": 0.703125, "learning_rate": 0.0018899428541124777, "loss": 0.2551, "step": 16114 }, { "epoch": 0.0285750000664906, "grad_norm": 0.54296875, "learning_rate": 0.0018899143319804858, "loss": 0.1573, "step": 16116 }, { "epoch": 0.028578546231800415, "grad_norm": 0.5, "learning_rate": 0.0018898858063938677, "loss": 0.1726, "step": 16118 }, { "epoch": 0.02858209239711023, "grad_norm": 0.373046875, "learning_rate": 0.0018898572773527478, "loss": 0.1967, "step": 16120 }, { "epoch": 0.028585638562420045, "grad_norm": 0.48828125, "learning_rate": 0.0018898287448572508, "loss": 0.2627, "step": 16122 }, { "epoch": 0.02858918472772986, "grad_norm": 0.384765625, "learning_rate": 0.0018898002089075018, "loss": 0.1974, "step": 16124 }, { "epoch": 0.028592730893039674, "grad_norm": 0.55859375, "learning_rate": 0.0018897716695036257, "loss": 0.236, "step": 16126 }, { "epoch": 0.02859627705834949, "grad_norm": 1.53125, "learning_rate": 0.0018897431266457466, "loss": 0.2361, "step": 16128 }, { "epoch": 0.028599823223659306, "grad_norm": 0.78125, "learning_rate": 0.00188971458033399, "loss": 0.2677, "step": 16130 }, { "epoch": 0.02860336938896912, "grad_norm": 0.9921875, "learning_rate": 0.0018896860305684807, "loss": 0.2073, "step": 16132 }, { "epoch": 0.028606915554278935, "grad_norm": 3.109375, "learning_rate": 0.001889657477349343, "loss": 0.3756, "step": 16134 }, { "epoch": 0.02861046171958875, "grad_norm": 0.40234375, "learning_rate": 0.0018896289206767028, "loss": 0.2062, "step": 16136 }, { "epoch": 0.028614007884898568, "grad_norm": 1.703125, "learning_rate": 0.0018896003605506839, "loss": 0.299, "step": 16138 }, { "epoch": 0.028617554050208382, "grad_norm": 0.41015625, "learning_rate": 0.0018895717969714119, "loss": 0.2244, "step": 16140 }, { "epoch": 0.028621100215518197, "grad_norm": 1.2421875, "learning_rate": 0.0018895432299390112, "loss": 0.2021, "step": 16142 }, { "epoch": 0.02862464638082801, "grad_norm": 0.5703125, "learning_rate": 0.0018895146594536075, "loss": 0.2252, "step": 16144 }, { "epoch": 0.028628192546137826, "grad_norm": 0.75390625, "learning_rate": 0.0018894860855153252, "loss": 0.2229, "step": 16146 }, { "epoch": 0.02863173871144764, "grad_norm": 15.9375, "learning_rate": 0.001889457508124289, "loss": 0.5262, "step": 16148 }, { "epoch": 0.02863528487675746, "grad_norm": 0.58203125, "learning_rate": 0.0018894289272806245, "loss": 0.1838, "step": 16150 }, { "epoch": 0.028638831042067273, "grad_norm": 0.98046875, "learning_rate": 0.0018894003429844562, "loss": 0.3266, "step": 16152 }, { "epoch": 0.028642377207377088, "grad_norm": 8.1875, "learning_rate": 0.0018893717552359096, "loss": 0.2512, "step": 16154 }, { "epoch": 0.028645923372686902, "grad_norm": 0.7578125, "learning_rate": 0.0018893431640351092, "loss": 0.2521, "step": 16156 }, { "epoch": 0.028649469537996717, "grad_norm": 0.9609375, "learning_rate": 0.0018893145693821805, "loss": 0.2877, "step": 16158 }, { "epoch": 0.02865301570330653, "grad_norm": 1.1484375, "learning_rate": 0.0018892859712772484, "loss": 0.2224, "step": 16160 }, { "epoch": 0.02865656186861635, "grad_norm": 2.34375, "learning_rate": 0.0018892573697204377, "loss": 0.222, "step": 16162 }, { "epoch": 0.028660108033926164, "grad_norm": 0.447265625, "learning_rate": 0.001889228764711874, "loss": 0.2235, "step": 16164 }, { "epoch": 0.02866365419923598, "grad_norm": 0.484375, "learning_rate": 0.0018892001562516821, "loss": 0.2232, "step": 16166 }, { "epoch": 0.028667200364545793, "grad_norm": 0.39453125, "learning_rate": 0.0018891715443399868, "loss": 0.2098, "step": 16168 }, { "epoch": 0.028670746529855608, "grad_norm": 0.400390625, "learning_rate": 0.0018891429289769137, "loss": 0.1835, "step": 16170 }, { "epoch": 0.028674292695165426, "grad_norm": 2.671875, "learning_rate": 0.0018891143101625878, "loss": 0.3569, "step": 16172 }, { "epoch": 0.02867783886047524, "grad_norm": 1.2421875, "learning_rate": 0.0018890856878971346, "loss": 0.2325, "step": 16174 }, { "epoch": 0.028681385025785055, "grad_norm": 0.796875, "learning_rate": 0.0018890570621806786, "loss": 0.2588, "step": 16176 }, { "epoch": 0.02868493119109487, "grad_norm": 0.447265625, "learning_rate": 0.0018890284330133455, "loss": 0.2173, "step": 16178 }, { "epoch": 0.028688477356404684, "grad_norm": 2.96875, "learning_rate": 0.0018889998003952604, "loss": 0.1701, "step": 16180 }, { "epoch": 0.0286920235217145, "grad_norm": 0.828125, "learning_rate": 0.0018889711643265484, "loss": 0.1776, "step": 16182 }, { "epoch": 0.028695569687024317, "grad_norm": 0.80078125, "learning_rate": 0.001888942524807335, "loss": 0.1984, "step": 16184 }, { "epoch": 0.02869911585233413, "grad_norm": 0.83203125, "learning_rate": 0.0018889138818377448, "loss": 0.2511, "step": 16186 }, { "epoch": 0.028702662017643946, "grad_norm": 0.90234375, "learning_rate": 0.001888885235417904, "loss": 0.2487, "step": 16188 }, { "epoch": 0.02870620818295376, "grad_norm": 0.2216796875, "learning_rate": 0.0018888565855479373, "loss": 0.2029, "step": 16190 }, { "epoch": 0.028709754348263575, "grad_norm": 1.5859375, "learning_rate": 0.00188882793222797, "loss": 0.2647, "step": 16192 }, { "epoch": 0.02871330051357339, "grad_norm": 1.1484375, "learning_rate": 0.0018887992754581275, "loss": 0.2147, "step": 16194 }, { "epoch": 0.028716846678883207, "grad_norm": 1.2890625, "learning_rate": 0.0018887706152385356, "loss": 0.2279, "step": 16196 }, { "epoch": 0.028720392844193022, "grad_norm": 0.5234375, "learning_rate": 0.0018887419515693188, "loss": 0.2022, "step": 16198 }, { "epoch": 0.028723939009502836, "grad_norm": 0.8203125, "learning_rate": 0.001888713284450603, "loss": 0.2689, "step": 16200 }, { "epoch": 0.02872748517481265, "grad_norm": 0.94921875, "learning_rate": 0.0018886846138825132, "loss": 0.22, "step": 16202 }, { "epoch": 0.028731031340122466, "grad_norm": 0.75390625, "learning_rate": 0.0018886559398651752, "loss": 0.3533, "step": 16204 }, { "epoch": 0.028734577505432284, "grad_norm": 0.298828125, "learning_rate": 0.0018886272623987142, "loss": 0.1996, "step": 16206 }, { "epoch": 0.028738123670742098, "grad_norm": 1.7734375, "learning_rate": 0.0018885985814832559, "loss": 0.2396, "step": 16208 }, { "epoch": 0.028741669836051913, "grad_norm": 0.6640625, "learning_rate": 0.0018885698971189248, "loss": 0.2406, "step": 16210 }, { "epoch": 0.028745216001361727, "grad_norm": 1.5703125, "learning_rate": 0.0018885412093058477, "loss": 0.2215, "step": 16212 }, { "epoch": 0.028748762166671542, "grad_norm": 0.57421875, "learning_rate": 0.0018885125180441492, "loss": 0.3141, "step": 16214 }, { "epoch": 0.028752308331981356, "grad_norm": 1.046875, "learning_rate": 0.001888483823333955, "loss": 0.3075, "step": 16216 }, { "epoch": 0.028755854497291174, "grad_norm": 0.703125, "learning_rate": 0.0018884551251753903, "loss": 0.3037, "step": 16218 }, { "epoch": 0.02875940066260099, "grad_norm": 0.6953125, "learning_rate": 0.0018884264235685812, "loss": 0.1991, "step": 16220 }, { "epoch": 0.028762946827910803, "grad_norm": 0.2109375, "learning_rate": 0.0018883977185136526, "loss": 0.1997, "step": 16222 }, { "epoch": 0.028766492993220618, "grad_norm": 0.498046875, "learning_rate": 0.0018883690100107307, "loss": 0.1624, "step": 16224 }, { "epoch": 0.028770039158530433, "grad_norm": 0.78125, "learning_rate": 0.0018883402980599403, "loss": 0.2061, "step": 16226 }, { "epoch": 0.028773585323840247, "grad_norm": 0.375, "learning_rate": 0.0018883115826614078, "loss": 0.2027, "step": 16228 }, { "epoch": 0.028777131489150065, "grad_norm": 0.84375, "learning_rate": 0.0018882828638152578, "loss": 0.2728, "step": 16230 }, { "epoch": 0.02878067765445988, "grad_norm": 1.2421875, "learning_rate": 0.0018882541415216167, "loss": 0.2691, "step": 16232 }, { "epoch": 0.028784223819769694, "grad_norm": 0.6640625, "learning_rate": 0.0018882254157806101, "loss": 0.2265, "step": 16234 }, { "epoch": 0.02878776998507951, "grad_norm": 0.84375, "learning_rate": 0.0018881966865923632, "loss": 0.2075, "step": 16236 }, { "epoch": 0.028791316150389323, "grad_norm": 0.8125, "learning_rate": 0.0018881679539570018, "loss": 0.2381, "step": 16238 }, { "epoch": 0.02879486231569914, "grad_norm": 0.427734375, "learning_rate": 0.0018881392178746513, "loss": 0.1985, "step": 16240 }, { "epoch": 0.028798408481008956, "grad_norm": 0.671875, "learning_rate": 0.001888110478345438, "loss": 0.2827, "step": 16242 }, { "epoch": 0.02880195464631877, "grad_norm": 0.60546875, "learning_rate": 0.0018880817353694873, "loss": 0.2171, "step": 16244 }, { "epoch": 0.028805500811628585, "grad_norm": 2.875, "learning_rate": 0.0018880529889469249, "loss": 0.2308, "step": 16246 }, { "epoch": 0.0288090469769384, "grad_norm": 0.9921875, "learning_rate": 0.001888024239077876, "loss": 0.365, "step": 16248 }, { "epoch": 0.028812593142248214, "grad_norm": 1.953125, "learning_rate": 0.0018879954857624675, "loss": 0.3029, "step": 16250 }, { "epoch": 0.028816139307558032, "grad_norm": 0.609375, "learning_rate": 0.001887966729000824, "loss": 0.1876, "step": 16252 }, { "epoch": 0.028819685472867847, "grad_norm": 0.82421875, "learning_rate": 0.0018879379687930717, "loss": 0.1694, "step": 16254 }, { "epoch": 0.02882323163817766, "grad_norm": 0.546875, "learning_rate": 0.0018879092051393366, "loss": 0.185, "step": 16256 }, { "epoch": 0.028826777803487476, "grad_norm": 0.314453125, "learning_rate": 0.0018878804380397444, "loss": 0.2309, "step": 16258 }, { "epoch": 0.02883032396879729, "grad_norm": 1.1328125, "learning_rate": 0.001887851667494421, "loss": 0.1924, "step": 16260 }, { "epoch": 0.028833870134107105, "grad_norm": 0.73046875, "learning_rate": 0.0018878228935034918, "loss": 0.2033, "step": 16262 }, { "epoch": 0.028837416299416923, "grad_norm": 0.734375, "learning_rate": 0.0018877941160670825, "loss": 0.2618, "step": 16264 }, { "epoch": 0.028840962464726737, "grad_norm": 0.78125, "learning_rate": 0.0018877653351853196, "loss": 0.314, "step": 16266 }, { "epoch": 0.028844508630036552, "grad_norm": 1.0859375, "learning_rate": 0.0018877365508583289, "loss": 0.2583, "step": 16268 }, { "epoch": 0.028848054795346367, "grad_norm": 0.640625, "learning_rate": 0.001887707763086236, "loss": 0.2073, "step": 16270 }, { "epoch": 0.02885160096065618, "grad_norm": 3.890625, "learning_rate": 0.0018876789718691672, "loss": 0.3067, "step": 16272 }, { "epoch": 0.028855147125966, "grad_norm": 1.46875, "learning_rate": 0.0018876501772072474, "loss": 0.2392, "step": 16274 }, { "epoch": 0.028858693291275814, "grad_norm": 0.34765625, "learning_rate": 0.001887621379100604, "loss": 0.2292, "step": 16276 }, { "epoch": 0.02886223945658563, "grad_norm": 0.55078125, "learning_rate": 0.0018875925775493619, "loss": 0.3087, "step": 16278 }, { "epoch": 0.028865785621895443, "grad_norm": 0.57421875, "learning_rate": 0.0018875637725536472, "loss": 0.2272, "step": 16280 }, { "epoch": 0.028869331787205257, "grad_norm": 0.53515625, "learning_rate": 0.001887534964113586, "loss": 0.5092, "step": 16282 }, { "epoch": 0.028872877952515072, "grad_norm": 0.345703125, "learning_rate": 0.0018875061522293044, "loss": 0.2413, "step": 16284 }, { "epoch": 0.02887642411782489, "grad_norm": 1.1640625, "learning_rate": 0.0018874773369009286, "loss": 0.3652, "step": 16286 }, { "epoch": 0.028879970283134705, "grad_norm": 1.15625, "learning_rate": 0.001887448518128584, "loss": 0.2412, "step": 16288 }, { "epoch": 0.02888351644844452, "grad_norm": 0.76953125, "learning_rate": 0.0018874196959123974, "loss": 0.2275, "step": 16290 }, { "epoch": 0.028887062613754334, "grad_norm": 1.1328125, "learning_rate": 0.001887390870252494, "loss": 0.3363, "step": 16292 }, { "epoch": 0.028890608779064148, "grad_norm": 0.28515625, "learning_rate": 0.0018873620411490005, "loss": 0.1917, "step": 16294 }, { "epoch": 0.028894154944373963, "grad_norm": 0.96875, "learning_rate": 0.0018873332086020425, "loss": 0.2718, "step": 16296 }, { "epoch": 0.02889770110968378, "grad_norm": 0.9609375, "learning_rate": 0.0018873043726117466, "loss": 0.2529, "step": 16298 }, { "epoch": 0.028901247274993595, "grad_norm": 0.263671875, "learning_rate": 0.0018872755331782389, "loss": 0.3467, "step": 16300 }, { "epoch": 0.02890479344030341, "grad_norm": 0.2060546875, "learning_rate": 0.0018872466903016452, "loss": 0.2387, "step": 16302 }, { "epoch": 0.028908339605613224, "grad_norm": 0.44921875, "learning_rate": 0.0018872178439820915, "loss": 0.288, "step": 16304 }, { "epoch": 0.02891188577092304, "grad_norm": 0.486328125, "learning_rate": 0.0018871889942197045, "loss": 0.2158, "step": 16306 }, { "epoch": 0.028915431936232857, "grad_norm": 1.1953125, "learning_rate": 0.0018871601410146103, "loss": 0.1974, "step": 16308 }, { "epoch": 0.02891897810154267, "grad_norm": 1.234375, "learning_rate": 0.0018871312843669343, "loss": 0.2197, "step": 16310 }, { "epoch": 0.028922524266852486, "grad_norm": 1.3671875, "learning_rate": 0.001887102424276804, "loss": 0.2766, "step": 16312 }, { "epoch": 0.0289260704321623, "grad_norm": 0.361328125, "learning_rate": 0.0018870735607443441, "loss": 0.2176, "step": 16314 }, { "epoch": 0.028929616597472115, "grad_norm": 0.7734375, "learning_rate": 0.0018870446937696822, "loss": 0.2404, "step": 16316 }, { "epoch": 0.02893316276278193, "grad_norm": 0.47265625, "learning_rate": 0.001887015823352944, "loss": 0.2791, "step": 16318 }, { "epoch": 0.028936708928091748, "grad_norm": 1.0703125, "learning_rate": 0.0018869869494942556, "loss": 0.2242, "step": 16320 }, { "epoch": 0.028940255093401562, "grad_norm": 0.9375, "learning_rate": 0.0018869580721937432, "loss": 0.2457, "step": 16322 }, { "epoch": 0.028943801258711377, "grad_norm": 2.328125, "learning_rate": 0.0018869291914515333, "loss": 0.5809, "step": 16324 }, { "epoch": 0.02894734742402119, "grad_norm": 0.6171875, "learning_rate": 0.0018869003072677526, "loss": 0.2402, "step": 16326 }, { "epoch": 0.028950893589331006, "grad_norm": 1.9609375, "learning_rate": 0.0018868714196425267, "loss": 0.3535, "step": 16328 }, { "epoch": 0.02895443975464082, "grad_norm": 0.50390625, "learning_rate": 0.0018868425285759824, "loss": 0.1926, "step": 16330 }, { "epoch": 0.02895798591995064, "grad_norm": 0.50390625, "learning_rate": 0.0018868136340682458, "loss": 0.1949, "step": 16332 }, { "epoch": 0.028961532085260453, "grad_norm": 0.93359375, "learning_rate": 0.0018867847361194437, "loss": 0.265, "step": 16334 }, { "epoch": 0.028965078250570268, "grad_norm": 0.4609375, "learning_rate": 0.001886755834729702, "loss": 0.1774, "step": 16336 }, { "epoch": 0.028968624415880082, "grad_norm": 8.5625, "learning_rate": 0.0018867269298991475, "loss": 0.4013, "step": 16338 }, { "epoch": 0.028972170581189897, "grad_norm": 1.03125, "learning_rate": 0.001886698021627906, "loss": 0.2338, "step": 16340 }, { "epoch": 0.028975716746499715, "grad_norm": 2.515625, "learning_rate": 0.0018866691099161045, "loss": 0.2397, "step": 16342 }, { "epoch": 0.02897926291180953, "grad_norm": 0.392578125, "learning_rate": 0.0018866401947638693, "loss": 0.2269, "step": 16344 }, { "epoch": 0.028982809077119344, "grad_norm": 1.4140625, "learning_rate": 0.001886611276171327, "loss": 0.3197, "step": 16346 }, { "epoch": 0.02898635524242916, "grad_norm": 0.6875, "learning_rate": 0.0018865823541386036, "loss": 0.1638, "step": 16348 }, { "epoch": 0.028989901407738973, "grad_norm": 0.419921875, "learning_rate": 0.0018865534286658262, "loss": 0.213, "step": 16350 }, { "epoch": 0.028993447573048788, "grad_norm": 1.2265625, "learning_rate": 0.0018865244997531205, "loss": 0.2, "step": 16352 }, { "epoch": 0.028996993738358606, "grad_norm": 0.57421875, "learning_rate": 0.0018864955674006139, "loss": 0.2437, "step": 16354 }, { "epoch": 0.02900053990366842, "grad_norm": 0.369140625, "learning_rate": 0.0018864666316084325, "loss": 0.205, "step": 16356 }, { "epoch": 0.029004086068978235, "grad_norm": 0.63671875, "learning_rate": 0.0018864376923767026, "loss": 0.2274, "step": 16358 }, { "epoch": 0.02900763223428805, "grad_norm": 3.890625, "learning_rate": 0.0018864087497055513, "loss": 0.3339, "step": 16360 }, { "epoch": 0.029011178399597864, "grad_norm": 0.53125, "learning_rate": 0.0018863798035951053, "loss": 0.2139, "step": 16362 }, { "epoch": 0.02901472456490768, "grad_norm": 1.8828125, "learning_rate": 0.00188635085404549, "loss": 0.3722, "step": 16364 }, { "epoch": 0.029018270730217496, "grad_norm": 0.515625, "learning_rate": 0.0018863219010568336, "loss": 0.1834, "step": 16366 }, { "epoch": 0.02902181689552731, "grad_norm": 0.8125, "learning_rate": 0.0018862929446292614, "loss": 0.205, "step": 16368 }, { "epoch": 0.029025363060837125, "grad_norm": 4.71875, "learning_rate": 0.0018862639847629008, "loss": 0.3994, "step": 16370 }, { "epoch": 0.02902890922614694, "grad_norm": 0.443359375, "learning_rate": 0.0018862350214578782, "loss": 0.2011, "step": 16372 }, { "epoch": 0.029032455391456755, "grad_norm": 1.0546875, "learning_rate": 0.0018862060547143204, "loss": 0.3336, "step": 16374 }, { "epoch": 0.029036001556766573, "grad_norm": 0.416015625, "learning_rate": 0.001886177084532354, "loss": 0.1906, "step": 16376 }, { "epoch": 0.029039547722076387, "grad_norm": 1.765625, "learning_rate": 0.0018861481109121057, "loss": 0.3205, "step": 16378 }, { "epoch": 0.0290430938873862, "grad_norm": 4.09375, "learning_rate": 0.0018861191338537021, "loss": 0.1958, "step": 16380 }, { "epoch": 0.029046640052696016, "grad_norm": 6.375, "learning_rate": 0.00188609015335727, "loss": 0.4469, "step": 16382 }, { "epoch": 0.02905018621800583, "grad_norm": 0.59765625, "learning_rate": 0.0018860611694229365, "loss": 0.271, "step": 16384 }, { "epoch": 0.029053732383315645, "grad_norm": 0.396484375, "learning_rate": 0.0018860321820508277, "loss": 0.2677, "step": 16386 }, { "epoch": 0.029057278548625463, "grad_norm": 0.921875, "learning_rate": 0.001886003191241071, "loss": 0.2044, "step": 16388 }, { "epoch": 0.029060824713935278, "grad_norm": 0.36328125, "learning_rate": 0.0018859741969937927, "loss": 0.1816, "step": 16390 }, { "epoch": 0.029064370879245092, "grad_norm": 1.234375, "learning_rate": 0.00188594519930912, "loss": 0.2402, "step": 16392 }, { "epoch": 0.029067917044554907, "grad_norm": 0.55078125, "learning_rate": 0.0018859161981871792, "loss": 0.1972, "step": 16394 }, { "epoch": 0.02907146320986472, "grad_norm": 0.2431640625, "learning_rate": 0.0018858871936280979, "loss": 0.1519, "step": 16396 }, { "epoch": 0.029075009375174536, "grad_norm": 0.5078125, "learning_rate": 0.0018858581856320022, "loss": 0.167, "step": 16398 }, { "epoch": 0.029078555540484354, "grad_norm": 3.125, "learning_rate": 0.0018858291741990192, "loss": 0.322, "step": 16400 }, { "epoch": 0.02908210170579417, "grad_norm": 0.416015625, "learning_rate": 0.001885800159329276, "loss": 0.1824, "step": 16402 }, { "epoch": 0.029085647871103983, "grad_norm": 0.34375, "learning_rate": 0.0018857711410228998, "loss": 0.1885, "step": 16404 }, { "epoch": 0.029089194036413798, "grad_norm": 0.310546875, "learning_rate": 0.0018857421192800164, "loss": 0.1803, "step": 16406 }, { "epoch": 0.029092740201723612, "grad_norm": 0.80859375, "learning_rate": 0.0018857130941007535, "loss": 0.25, "step": 16408 }, { "epoch": 0.02909628636703343, "grad_norm": 1.390625, "learning_rate": 0.001885684065485238, "loss": 0.2244, "step": 16410 }, { "epoch": 0.029099832532343245, "grad_norm": 0.62109375, "learning_rate": 0.0018856550334335972, "loss": 0.2335, "step": 16412 }, { "epoch": 0.02910337869765306, "grad_norm": 4.96875, "learning_rate": 0.001885625997945957, "loss": 0.4523, "step": 16414 }, { "epoch": 0.029106924862962874, "grad_norm": 0.953125, "learning_rate": 0.001885596959022445, "loss": 0.2302, "step": 16416 }, { "epoch": 0.02911047102827269, "grad_norm": 0.79296875, "learning_rate": 0.0018855679166631888, "loss": 0.2314, "step": 16418 }, { "epoch": 0.029114017193582503, "grad_norm": 0.578125, "learning_rate": 0.0018855388708683145, "loss": 0.2205, "step": 16420 }, { "epoch": 0.02911756335889232, "grad_norm": 0.69921875, "learning_rate": 0.0018855098216379494, "loss": 0.1928, "step": 16422 }, { "epoch": 0.029121109524202136, "grad_norm": 0.369140625, "learning_rate": 0.001885480768972221, "loss": 0.1918, "step": 16424 }, { "epoch": 0.02912465568951195, "grad_norm": 0.6328125, "learning_rate": 0.0018854517128712555, "loss": 0.1821, "step": 16426 }, { "epoch": 0.029128201854821765, "grad_norm": 0.474609375, "learning_rate": 0.0018854226533351808, "loss": 0.2326, "step": 16428 }, { "epoch": 0.02913174802013158, "grad_norm": 0.5859375, "learning_rate": 0.0018853935903641234, "loss": 0.2422, "step": 16430 }, { "epoch": 0.029135294185441394, "grad_norm": 0.396484375, "learning_rate": 0.0018853645239582109, "loss": 0.2677, "step": 16432 }, { "epoch": 0.029138840350751212, "grad_norm": 0.400390625, "learning_rate": 0.0018853354541175703, "loss": 0.2216, "step": 16434 }, { "epoch": 0.029142386516061027, "grad_norm": 0.31640625, "learning_rate": 0.0018853063808423282, "loss": 0.1685, "step": 16436 }, { "epoch": 0.02914593268137084, "grad_norm": 0.515625, "learning_rate": 0.0018852773041326122, "loss": 0.2739, "step": 16438 }, { "epoch": 0.029149478846680656, "grad_norm": 0.546875, "learning_rate": 0.0018852482239885492, "loss": 0.2305, "step": 16440 }, { "epoch": 0.02915302501199047, "grad_norm": 0.431640625, "learning_rate": 0.0018852191404102672, "loss": 0.2419, "step": 16442 }, { "epoch": 0.029156571177300288, "grad_norm": 1.09375, "learning_rate": 0.0018851900533978926, "loss": 0.2139, "step": 16444 }, { "epoch": 0.029160117342610103, "grad_norm": 0.71875, "learning_rate": 0.0018851609629515526, "loss": 0.2155, "step": 16446 }, { "epoch": 0.029163663507919917, "grad_norm": 1.21875, "learning_rate": 0.0018851318690713745, "loss": 0.1672, "step": 16448 }, { "epoch": 0.029167209673229732, "grad_norm": 0.384765625, "learning_rate": 0.001885102771757486, "loss": 0.2126, "step": 16450 }, { "epoch": 0.029170755838539546, "grad_norm": 1.1796875, "learning_rate": 0.001885073671010014, "loss": 0.2298, "step": 16452 }, { "epoch": 0.02917430200384936, "grad_norm": 0.625, "learning_rate": 0.0018850445668290856, "loss": 0.2557, "step": 16454 }, { "epoch": 0.02917784816915918, "grad_norm": 0.84765625, "learning_rate": 0.0018850154592148284, "loss": 0.4395, "step": 16456 }, { "epoch": 0.029181394334468994, "grad_norm": 1.2734375, "learning_rate": 0.0018849863481673697, "loss": 0.2591, "step": 16458 }, { "epoch": 0.029184940499778808, "grad_norm": 0.53125, "learning_rate": 0.0018849572336868364, "loss": 0.1886, "step": 16460 }, { "epoch": 0.029188486665088623, "grad_norm": 0.6015625, "learning_rate": 0.001884928115773356, "loss": 0.1877, "step": 16462 }, { "epoch": 0.029192032830398437, "grad_norm": 0.65234375, "learning_rate": 0.0018848989944270564, "loss": 0.1752, "step": 16464 }, { "epoch": 0.029195578995708252, "grad_norm": 1.65625, "learning_rate": 0.0018848698696480645, "loss": 0.203, "step": 16466 }, { "epoch": 0.02919912516101807, "grad_norm": 0.80859375, "learning_rate": 0.0018848407414365075, "loss": 0.1898, "step": 16468 }, { "epoch": 0.029202671326327884, "grad_norm": 0.54296875, "learning_rate": 0.001884811609792513, "loss": 0.2323, "step": 16470 }, { "epoch": 0.0292062174916377, "grad_norm": 0.2314453125, "learning_rate": 0.0018847824747162082, "loss": 0.2808, "step": 16472 }, { "epoch": 0.029209763656947513, "grad_norm": 0.5234375, "learning_rate": 0.001884753336207721, "loss": 0.1978, "step": 16474 }, { "epoch": 0.029213309822257328, "grad_norm": 0.4140625, "learning_rate": 0.0018847241942671785, "loss": 0.2062, "step": 16476 }, { "epoch": 0.029216855987567146, "grad_norm": 0.6015625, "learning_rate": 0.001884695048894708, "loss": 0.2442, "step": 16478 }, { "epoch": 0.02922040215287696, "grad_norm": 1.8359375, "learning_rate": 0.001884665900090437, "loss": 0.3183, "step": 16480 }, { "epoch": 0.029223948318186775, "grad_norm": 4.625, "learning_rate": 0.0018846367478544937, "loss": 0.2864, "step": 16482 }, { "epoch": 0.02922749448349659, "grad_norm": 4.375, "learning_rate": 0.001884607592187005, "loss": 0.2509, "step": 16484 }, { "epoch": 0.029231040648806404, "grad_norm": 1.8359375, "learning_rate": 0.0018845784330880982, "loss": 0.3211, "step": 16486 }, { "epoch": 0.02923458681411622, "grad_norm": 0.44140625, "learning_rate": 0.001884549270557901, "loss": 0.271, "step": 16488 }, { "epoch": 0.029238132979426037, "grad_norm": 0.306640625, "learning_rate": 0.0018845201045965413, "loss": 0.2279, "step": 16490 }, { "epoch": 0.02924167914473585, "grad_norm": 0.83203125, "learning_rate": 0.0018844909352041458, "loss": 0.2755, "step": 16492 }, { "epoch": 0.029245225310045666, "grad_norm": 0.58203125, "learning_rate": 0.0018844617623808432, "loss": 0.23, "step": 16494 }, { "epoch": 0.02924877147535548, "grad_norm": 0.67578125, "learning_rate": 0.00188443258612676, "loss": 0.2595, "step": 16496 }, { "epoch": 0.029252317640665295, "grad_norm": 0.58203125, "learning_rate": 0.001884403406442025, "loss": 0.2312, "step": 16498 }, { "epoch": 0.02925586380597511, "grad_norm": 0.30078125, "learning_rate": 0.001884374223326765, "loss": 0.1869, "step": 16500 }, { "epoch": 0.029259409971284928, "grad_norm": 1.8203125, "learning_rate": 0.0018843450367811072, "loss": 0.2256, "step": 16502 }, { "epoch": 0.029262956136594742, "grad_norm": 0.578125, "learning_rate": 0.0018843158468051804, "loss": 0.2081, "step": 16504 }, { "epoch": 0.029266502301904557, "grad_norm": 2.5625, "learning_rate": 0.0018842866533991115, "loss": 0.4354, "step": 16506 }, { "epoch": 0.02927004846721437, "grad_norm": 0.427734375, "learning_rate": 0.0018842574565630281, "loss": 0.2215, "step": 16508 }, { "epoch": 0.029273594632524186, "grad_norm": 1.3125, "learning_rate": 0.0018842282562970584, "loss": 0.3376, "step": 16510 }, { "epoch": 0.029277140797834004, "grad_norm": 1.3203125, "learning_rate": 0.0018841990526013298, "loss": 0.2389, "step": 16512 }, { "epoch": 0.02928068696314382, "grad_norm": 0.625, "learning_rate": 0.0018841698454759698, "loss": 0.2105, "step": 16514 }, { "epoch": 0.029284233128453633, "grad_norm": 0.33203125, "learning_rate": 0.001884140634921107, "loss": 0.1605, "step": 16516 }, { "epoch": 0.029287779293763447, "grad_norm": 1.7109375, "learning_rate": 0.001884111420936868, "loss": 0.4578, "step": 16518 }, { "epoch": 0.029291325459073262, "grad_norm": 1.1015625, "learning_rate": 0.0018840822035233811, "loss": 0.2545, "step": 16520 }, { "epoch": 0.029294871624383077, "grad_norm": 0.8828125, "learning_rate": 0.0018840529826807744, "loss": 0.2587, "step": 16522 }, { "epoch": 0.029298417789692895, "grad_norm": 0.91015625, "learning_rate": 0.0018840237584091752, "loss": 0.3245, "step": 16524 }, { "epoch": 0.02930196395500271, "grad_norm": 0.84375, "learning_rate": 0.0018839945307087117, "loss": 0.2438, "step": 16526 }, { "epoch": 0.029305510120312524, "grad_norm": 1.125, "learning_rate": 0.0018839652995795114, "loss": 0.3821, "step": 16528 }, { "epoch": 0.02930905628562234, "grad_norm": 1.0546875, "learning_rate": 0.0018839360650217021, "loss": 0.2183, "step": 16530 }, { "epoch": 0.029312602450932153, "grad_norm": 0.73046875, "learning_rate": 0.0018839068270354118, "loss": 0.1633, "step": 16532 }, { "epoch": 0.029316148616241967, "grad_norm": 5.09375, "learning_rate": 0.0018838775856207686, "loss": 0.2708, "step": 16534 }, { "epoch": 0.029319694781551785, "grad_norm": 0.6796875, "learning_rate": 0.0018838483407779, "loss": 0.2068, "step": 16536 }, { "epoch": 0.0293232409468616, "grad_norm": 0.35546875, "learning_rate": 0.0018838190925069345, "loss": 0.1839, "step": 16538 }, { "epoch": 0.029326787112171415, "grad_norm": 0.61328125, "learning_rate": 0.0018837898408079993, "loss": 0.2545, "step": 16540 }, { "epoch": 0.02933033327748123, "grad_norm": 2.46875, "learning_rate": 0.0018837605856812224, "loss": 0.3261, "step": 16542 }, { "epoch": 0.029333879442791044, "grad_norm": 1.046875, "learning_rate": 0.0018837313271267322, "loss": 0.2515, "step": 16544 }, { "epoch": 0.02933742560810086, "grad_norm": 0.404296875, "learning_rate": 0.0018837020651446563, "loss": 0.2496, "step": 16546 }, { "epoch": 0.029340971773410676, "grad_norm": 0.40234375, "learning_rate": 0.0018836727997351228, "loss": 0.22, "step": 16548 }, { "epoch": 0.02934451793872049, "grad_norm": 1.1015625, "learning_rate": 0.0018836435308982598, "loss": 0.1889, "step": 16550 }, { "epoch": 0.029348064104030305, "grad_norm": 1.1875, "learning_rate": 0.001883614258634195, "loss": 0.2547, "step": 16552 }, { "epoch": 0.02935161026934012, "grad_norm": 0.5390625, "learning_rate": 0.0018835849829430568, "loss": 0.2459, "step": 16554 }, { "epoch": 0.029355156434649934, "grad_norm": 1.125, "learning_rate": 0.001883555703824973, "loss": 0.2455, "step": 16556 }, { "epoch": 0.029358702599959752, "grad_norm": 0.32421875, "learning_rate": 0.0018835264212800715, "loss": 0.2801, "step": 16558 }, { "epoch": 0.029362248765269567, "grad_norm": 0.55859375, "learning_rate": 0.001883497135308481, "loss": 0.2469, "step": 16560 }, { "epoch": 0.02936579493057938, "grad_norm": 0.470703125, "learning_rate": 0.001883467845910329, "loss": 0.1964, "step": 16562 }, { "epoch": 0.029369341095889196, "grad_norm": 1.40625, "learning_rate": 0.0018834385530857435, "loss": 0.2267, "step": 16564 }, { "epoch": 0.02937288726119901, "grad_norm": 0.66796875, "learning_rate": 0.001883409256834853, "loss": 0.2216, "step": 16566 }, { "epoch": 0.029376433426508825, "grad_norm": 0.451171875, "learning_rate": 0.0018833799571577852, "loss": 0.2015, "step": 16568 }, { "epoch": 0.029379979591818643, "grad_norm": 0.396484375, "learning_rate": 0.0018833506540546687, "loss": 0.1955, "step": 16570 }, { "epoch": 0.029383525757128458, "grad_norm": 0.62109375, "learning_rate": 0.0018833213475256316, "loss": 0.2211, "step": 16572 }, { "epoch": 0.029387071922438272, "grad_norm": 0.36328125, "learning_rate": 0.0018832920375708019, "loss": 0.2667, "step": 16574 }, { "epoch": 0.029390618087748087, "grad_norm": 4.3125, "learning_rate": 0.0018832627241903077, "loss": 0.2771, "step": 16576 }, { "epoch": 0.0293941642530579, "grad_norm": 1.515625, "learning_rate": 0.0018832334073842774, "loss": 0.3164, "step": 16578 }, { "epoch": 0.02939771041836772, "grad_norm": 0.72265625, "learning_rate": 0.0018832040871528393, "loss": 0.2078, "step": 16580 }, { "epoch": 0.029401256583677534, "grad_norm": 0.298828125, "learning_rate": 0.0018831747634961212, "loss": 0.2292, "step": 16582 }, { "epoch": 0.02940480274898735, "grad_norm": 0.322265625, "learning_rate": 0.0018831454364142514, "loss": 0.2135, "step": 16584 }, { "epoch": 0.029408348914297163, "grad_norm": 3.296875, "learning_rate": 0.0018831161059073586, "loss": 0.2987, "step": 16586 }, { "epoch": 0.029411895079606978, "grad_norm": 1.1328125, "learning_rate": 0.0018830867719755709, "loss": 0.2286, "step": 16588 }, { "epoch": 0.029415441244916792, "grad_norm": 0.609375, "learning_rate": 0.0018830574346190166, "loss": 0.2297, "step": 16590 }, { "epoch": 0.02941898741022661, "grad_norm": 0.29296875, "learning_rate": 0.001883028093837824, "loss": 0.2936, "step": 16592 }, { "epoch": 0.029422533575536425, "grad_norm": 1.0625, "learning_rate": 0.001882998749632121, "loss": 0.2445, "step": 16594 }, { "epoch": 0.02942607974084624, "grad_norm": 0.4296875, "learning_rate": 0.0018829694020020364, "loss": 0.2801, "step": 16596 }, { "epoch": 0.029429625906156054, "grad_norm": 0.30859375, "learning_rate": 0.0018829400509476986, "loss": 0.1658, "step": 16598 }, { "epoch": 0.02943317207146587, "grad_norm": 0.373046875, "learning_rate": 0.0018829106964692358, "loss": 0.218, "step": 16600 }, { "epoch": 0.029436718236775683, "grad_norm": 2.96875, "learning_rate": 0.0018828813385667765, "loss": 0.2704, "step": 16602 }, { "epoch": 0.0294402644020855, "grad_norm": 0.87109375, "learning_rate": 0.0018828519772404483, "loss": 0.2337, "step": 16604 }, { "epoch": 0.029443810567395316, "grad_norm": 0.75, "learning_rate": 0.0018828226124903808, "loss": 0.2671, "step": 16606 }, { "epoch": 0.02944735673270513, "grad_norm": 0.43359375, "learning_rate": 0.0018827932443167019, "loss": 0.2349, "step": 16608 }, { "epoch": 0.029450902898014945, "grad_norm": 0.49609375, "learning_rate": 0.0018827638727195403, "loss": 0.2073, "step": 16610 }, { "epoch": 0.02945444906332476, "grad_norm": 1.1328125, "learning_rate": 0.001882734497699024, "loss": 0.2541, "step": 16612 }, { "epoch": 0.029457995228634577, "grad_norm": 0.75390625, "learning_rate": 0.0018827051192552816, "loss": 0.2349, "step": 16614 }, { "epoch": 0.029461541393944392, "grad_norm": 3.578125, "learning_rate": 0.0018826757373884416, "loss": 0.2521, "step": 16616 }, { "epoch": 0.029465087559254206, "grad_norm": 1.4375, "learning_rate": 0.0018826463520986326, "loss": 0.2359, "step": 16618 }, { "epoch": 0.02946863372456402, "grad_norm": 0.83203125, "learning_rate": 0.0018826169633859831, "loss": 0.282, "step": 16620 }, { "epoch": 0.029472179889873835, "grad_norm": 0.578125, "learning_rate": 0.0018825875712506215, "loss": 0.1747, "step": 16622 }, { "epoch": 0.02947572605518365, "grad_norm": 0.63671875, "learning_rate": 0.0018825581756926764, "loss": 0.1783, "step": 16624 }, { "epoch": 0.029479272220493468, "grad_norm": 0.9921875, "learning_rate": 0.0018825287767122768, "loss": 0.1752, "step": 16626 }, { "epoch": 0.029482818385803283, "grad_norm": 0.859375, "learning_rate": 0.0018824993743095507, "loss": 0.3269, "step": 16628 }, { "epoch": 0.029486364551113097, "grad_norm": 0.341796875, "learning_rate": 0.0018824699684846264, "loss": 0.2186, "step": 16630 }, { "epoch": 0.02948991071642291, "grad_norm": 0.6953125, "learning_rate": 0.0018824405592376334, "loss": 0.1926, "step": 16632 }, { "epoch": 0.029493456881732726, "grad_norm": 0.95703125, "learning_rate": 0.0018824111465687, "loss": 0.3023, "step": 16634 }, { "epoch": 0.02949700304704254, "grad_norm": 0.671875, "learning_rate": 0.0018823817304779546, "loss": 0.1736, "step": 16636 }, { "epoch": 0.02950054921235236, "grad_norm": 0.609375, "learning_rate": 0.001882352310965526, "loss": 0.1993, "step": 16638 }, { "epoch": 0.029504095377662173, "grad_norm": 1.71875, "learning_rate": 0.001882322888031543, "loss": 0.3678, "step": 16640 }, { "epoch": 0.029507641542971988, "grad_norm": 0.486328125, "learning_rate": 0.0018822934616761337, "loss": 0.2318, "step": 16642 }, { "epoch": 0.029511187708281802, "grad_norm": 2.09375, "learning_rate": 0.0018822640318994274, "loss": 0.3982, "step": 16644 }, { "epoch": 0.029514733873591617, "grad_norm": 0.8671875, "learning_rate": 0.0018822345987015523, "loss": 0.3106, "step": 16646 }, { "epoch": 0.029518280038901435, "grad_norm": 0.79296875, "learning_rate": 0.001882205162082638, "loss": 0.2578, "step": 16648 }, { "epoch": 0.02952182620421125, "grad_norm": 0.84765625, "learning_rate": 0.001882175722042812, "loss": 0.2203, "step": 16650 }, { "epoch": 0.029525372369521064, "grad_norm": 0.9140625, "learning_rate": 0.0018821462785822045, "loss": 0.331, "step": 16652 }, { "epoch": 0.02952891853483088, "grad_norm": 0.60546875, "learning_rate": 0.0018821168317009432, "loss": 0.2047, "step": 16654 }, { "epoch": 0.029532464700140693, "grad_norm": 0.6953125, "learning_rate": 0.0018820873813991572, "loss": 0.2383, "step": 16656 }, { "epoch": 0.029536010865450508, "grad_norm": 0.55078125, "learning_rate": 0.001882057927676975, "loss": 0.2129, "step": 16658 }, { "epoch": 0.029539557030760326, "grad_norm": 10.9375, "learning_rate": 0.001882028470534526, "loss": 0.3519, "step": 16660 }, { "epoch": 0.02954310319607014, "grad_norm": 0.9296875, "learning_rate": 0.001881999009971939, "loss": 0.2205, "step": 16662 }, { "epoch": 0.029546649361379955, "grad_norm": 0.470703125, "learning_rate": 0.0018819695459893421, "loss": 0.2093, "step": 16664 }, { "epoch": 0.02955019552668977, "grad_norm": 1.359375, "learning_rate": 0.0018819400785868646, "loss": 0.2302, "step": 16666 }, { "epoch": 0.029553741691999584, "grad_norm": 0.65625, "learning_rate": 0.0018819106077646356, "loss": 0.2871, "step": 16668 }, { "epoch": 0.0295572878573094, "grad_norm": 0.640625, "learning_rate": 0.001881881133522784, "loss": 0.2343, "step": 16670 }, { "epoch": 0.029560834022619217, "grad_norm": 1.953125, "learning_rate": 0.0018818516558614382, "loss": 0.3144, "step": 16672 }, { "epoch": 0.02956438018792903, "grad_norm": 0.455078125, "learning_rate": 0.0018818221747807274, "loss": 0.2072, "step": 16674 }, { "epoch": 0.029567926353238846, "grad_norm": 6.0, "learning_rate": 0.0018817926902807804, "loss": 0.2485, "step": 16676 }, { "epoch": 0.02957147251854866, "grad_norm": 1.75, "learning_rate": 0.0018817632023617266, "loss": 0.2983, "step": 16678 }, { "epoch": 0.029575018683858475, "grad_norm": 2.296875, "learning_rate": 0.0018817337110236946, "loss": 0.2936, "step": 16680 }, { "epoch": 0.029578564849168293, "grad_norm": 0.291015625, "learning_rate": 0.0018817042162668132, "loss": 0.1842, "step": 16682 }, { "epoch": 0.029582111014478107, "grad_norm": 0.482421875, "learning_rate": 0.001881674718091212, "loss": 0.1882, "step": 16684 }, { "epoch": 0.029585657179787922, "grad_norm": 0.37890625, "learning_rate": 0.0018816452164970192, "loss": 0.2116, "step": 16686 }, { "epoch": 0.029589203345097737, "grad_norm": 0.486328125, "learning_rate": 0.0018816157114843646, "loss": 0.1892, "step": 16688 }, { "epoch": 0.02959274951040755, "grad_norm": 0.8515625, "learning_rate": 0.0018815862030533768, "loss": 0.2035, "step": 16690 }, { "epoch": 0.029596295675717366, "grad_norm": 0.58203125, "learning_rate": 0.001881556691204185, "loss": 0.3968, "step": 16692 }, { "epoch": 0.029599841841027184, "grad_norm": 0.51171875, "learning_rate": 0.001881527175936918, "loss": 0.217, "step": 16694 }, { "epoch": 0.029603388006336998, "grad_norm": 0.474609375, "learning_rate": 0.0018814976572517053, "loss": 0.2913, "step": 16696 }, { "epoch": 0.029606934171646813, "grad_norm": 7.4375, "learning_rate": 0.001881468135148676, "loss": 0.4673, "step": 16698 }, { "epoch": 0.029610480336956627, "grad_norm": 0.84765625, "learning_rate": 0.0018814386096279586, "loss": 0.2237, "step": 16700 }, { "epoch": 0.029614026502266442, "grad_norm": 1.0, "learning_rate": 0.0018814090806896827, "loss": 0.2352, "step": 16702 }, { "epoch": 0.029617572667576256, "grad_norm": 0.48046875, "learning_rate": 0.0018813795483339775, "loss": 0.174, "step": 16704 }, { "epoch": 0.029621118832886074, "grad_norm": 0.49609375, "learning_rate": 0.001881350012560972, "loss": 0.241, "step": 16706 }, { "epoch": 0.02962466499819589, "grad_norm": 0.5546875, "learning_rate": 0.0018813204733707954, "loss": 0.232, "step": 16708 }, { "epoch": 0.029628211163505704, "grad_norm": 0.458984375, "learning_rate": 0.0018812909307635768, "loss": 0.1704, "step": 16710 }, { "epoch": 0.029631757328815518, "grad_norm": 1.09375, "learning_rate": 0.0018812613847394458, "loss": 0.2548, "step": 16712 }, { "epoch": 0.029635303494125333, "grad_norm": 1.078125, "learning_rate": 0.0018812318352985312, "loss": 0.2749, "step": 16714 }, { "epoch": 0.02963884965943515, "grad_norm": 2.65625, "learning_rate": 0.0018812022824409623, "loss": 0.2695, "step": 16716 }, { "epoch": 0.029642395824744965, "grad_norm": 1.03125, "learning_rate": 0.0018811727261668681, "loss": 0.2257, "step": 16718 }, { "epoch": 0.02964594199005478, "grad_norm": 0.70703125, "learning_rate": 0.0018811431664763787, "loss": 0.243, "step": 16720 }, { "epoch": 0.029649488155364594, "grad_norm": 1.25, "learning_rate": 0.0018811136033696223, "loss": 0.4543, "step": 16722 }, { "epoch": 0.02965303432067441, "grad_norm": 0.5234375, "learning_rate": 0.0018810840368467289, "loss": 0.2042, "step": 16724 }, { "epoch": 0.029656580485984223, "grad_norm": 1.046875, "learning_rate": 0.0018810544669078278, "loss": 0.2378, "step": 16726 }, { "epoch": 0.02966012665129404, "grad_norm": 0.515625, "learning_rate": 0.0018810248935530478, "loss": 0.1971, "step": 16728 }, { "epoch": 0.029663672816603856, "grad_norm": 0.4375, "learning_rate": 0.0018809953167825192, "loss": 0.2117, "step": 16730 }, { "epoch": 0.02966721898191367, "grad_norm": 0.8046875, "learning_rate": 0.0018809657365963703, "loss": 0.2126, "step": 16732 }, { "epoch": 0.029670765147223485, "grad_norm": 0.431640625, "learning_rate": 0.001880936152994731, "loss": 0.2501, "step": 16734 }, { "epoch": 0.0296743113125333, "grad_norm": 1.875, "learning_rate": 0.0018809065659777305, "loss": 0.2054, "step": 16736 }, { "epoch": 0.029677857477843114, "grad_norm": 0.41796875, "learning_rate": 0.0018808769755454982, "loss": 0.2013, "step": 16738 }, { "epoch": 0.029681403643152932, "grad_norm": 0.5625, "learning_rate": 0.001880847381698164, "loss": 0.2365, "step": 16740 }, { "epoch": 0.029684949808462747, "grad_norm": 0.734375, "learning_rate": 0.0018808177844358565, "loss": 0.204, "step": 16742 }, { "epoch": 0.02968849597377256, "grad_norm": 4.8125, "learning_rate": 0.001880788183758706, "loss": 0.2949, "step": 16744 }, { "epoch": 0.029692042139082376, "grad_norm": 0.314453125, "learning_rate": 0.0018807585796668412, "loss": 0.1435, "step": 16746 }, { "epoch": 0.02969558830439219, "grad_norm": 0.56640625, "learning_rate": 0.0018807289721603918, "loss": 0.2879, "step": 16748 }, { "epoch": 0.02969913446970201, "grad_norm": 0.431640625, "learning_rate": 0.0018806993612394873, "loss": 0.2119, "step": 16750 }, { "epoch": 0.029702680635011823, "grad_norm": 0.72265625, "learning_rate": 0.0018806697469042578, "loss": 0.1907, "step": 16752 }, { "epoch": 0.029706226800321638, "grad_norm": 0.5234375, "learning_rate": 0.0018806401291548318, "loss": 0.3089, "step": 16754 }, { "epoch": 0.029709772965631452, "grad_norm": 0.416015625, "learning_rate": 0.0018806105079913393, "loss": 0.1845, "step": 16756 }, { "epoch": 0.029713319130941267, "grad_norm": 0.671875, "learning_rate": 0.0018805808834139101, "loss": 0.2809, "step": 16758 }, { "epoch": 0.02971686529625108, "grad_norm": 0.796875, "learning_rate": 0.0018805512554226737, "loss": 0.1991, "step": 16760 }, { "epoch": 0.0297204114615609, "grad_norm": 0.77734375, "learning_rate": 0.0018805216240177594, "loss": 0.2467, "step": 16762 }, { "epoch": 0.029723957626870714, "grad_norm": 0.578125, "learning_rate": 0.0018804919891992969, "loss": 0.2788, "step": 16764 }, { "epoch": 0.02972750379218053, "grad_norm": 1.7890625, "learning_rate": 0.001880462350967416, "loss": 0.2334, "step": 16766 }, { "epoch": 0.029731049957490343, "grad_norm": 1.5625, "learning_rate": 0.0018804327093222455, "loss": 0.1918, "step": 16768 }, { "epoch": 0.029734596122800157, "grad_norm": 0.33984375, "learning_rate": 0.0018804030642639161, "loss": 0.2141, "step": 16770 }, { "epoch": 0.029738142288109972, "grad_norm": 0.337890625, "learning_rate": 0.001880373415792557, "loss": 0.1788, "step": 16772 }, { "epoch": 0.02974168845341979, "grad_norm": 0.380859375, "learning_rate": 0.0018803437639082975, "loss": 0.1849, "step": 16774 }, { "epoch": 0.029745234618729605, "grad_norm": 0.484375, "learning_rate": 0.001880314108611268, "loss": 0.2101, "step": 16776 }, { "epoch": 0.02974878078403942, "grad_norm": 0.78125, "learning_rate": 0.0018802844499015978, "loss": 0.4142, "step": 16778 }, { "epoch": 0.029752326949349234, "grad_norm": 1.359375, "learning_rate": 0.0018802547877794168, "loss": 0.3349, "step": 16780 }, { "epoch": 0.02975587311465905, "grad_norm": 0.8828125, "learning_rate": 0.0018802251222448543, "loss": 0.2832, "step": 16782 }, { "epoch": 0.029759419279968866, "grad_norm": 0.86328125, "learning_rate": 0.0018801954532980405, "loss": 0.2191, "step": 16784 }, { "epoch": 0.02976296544527868, "grad_norm": 0.357421875, "learning_rate": 0.0018801657809391049, "loss": 0.2201, "step": 16786 }, { "epoch": 0.029766511610588495, "grad_norm": 0.65625, "learning_rate": 0.0018801361051681775, "loss": 0.1762, "step": 16788 }, { "epoch": 0.02977005777589831, "grad_norm": 0.5859375, "learning_rate": 0.0018801064259853877, "loss": 0.2584, "step": 16790 }, { "epoch": 0.029773603941208125, "grad_norm": 0.6640625, "learning_rate": 0.0018800767433908658, "loss": 0.2513, "step": 16792 }, { "epoch": 0.02977715010651794, "grad_norm": 1.0234375, "learning_rate": 0.0018800470573847412, "loss": 0.1866, "step": 16794 }, { "epoch": 0.029780696271827757, "grad_norm": 1.3984375, "learning_rate": 0.001880017367967144, "loss": 0.2667, "step": 16796 }, { "epoch": 0.02978424243713757, "grad_norm": 0.40234375, "learning_rate": 0.0018799876751382037, "loss": 0.1656, "step": 16798 }, { "epoch": 0.029787788602447386, "grad_norm": 1.2890625, "learning_rate": 0.0018799579788980503, "loss": 0.2954, "step": 16800 }, { "epoch": 0.0297913347677572, "grad_norm": 0.6328125, "learning_rate": 0.001879928279246814, "loss": 0.1839, "step": 16802 }, { "epoch": 0.029794880933067015, "grad_norm": 0.734375, "learning_rate": 0.0018798985761846246, "loss": 0.228, "step": 16804 }, { "epoch": 0.02979842709837683, "grad_norm": 0.357421875, "learning_rate": 0.0018798688697116116, "loss": 0.1738, "step": 16806 }, { "epoch": 0.029801973263686648, "grad_norm": 0.396484375, "learning_rate": 0.0018798391598279052, "loss": 0.3059, "step": 16808 }, { "epoch": 0.029805519428996462, "grad_norm": 3.078125, "learning_rate": 0.0018798094465336352, "loss": 0.3257, "step": 16810 }, { "epoch": 0.029809065594306277, "grad_norm": 1.015625, "learning_rate": 0.0018797797298289317, "loss": 0.2544, "step": 16812 }, { "epoch": 0.02981261175961609, "grad_norm": 0.7265625, "learning_rate": 0.0018797500097139247, "loss": 0.1742, "step": 16814 }, { "epoch": 0.029816157924925906, "grad_norm": 0.73046875, "learning_rate": 0.0018797202861887442, "loss": 0.1878, "step": 16816 }, { "epoch": 0.029819704090235724, "grad_norm": 0.50390625, "learning_rate": 0.0018796905592535196, "loss": 0.1701, "step": 16818 }, { "epoch": 0.02982325025554554, "grad_norm": 0.78125, "learning_rate": 0.0018796608289083818, "loss": 0.2311, "step": 16820 }, { "epoch": 0.029826796420855353, "grad_norm": 0.75, "learning_rate": 0.0018796310951534603, "loss": 0.1479, "step": 16822 }, { "epoch": 0.029830342586165168, "grad_norm": 0.453125, "learning_rate": 0.0018796013579888853, "loss": 0.1896, "step": 16824 }, { "epoch": 0.029833888751474982, "grad_norm": 0.84765625, "learning_rate": 0.001879571617414787, "loss": 0.1928, "step": 16826 }, { "epoch": 0.029837434916784797, "grad_norm": 0.314453125, "learning_rate": 0.001879541873431295, "loss": 0.2392, "step": 16828 }, { "epoch": 0.029840981082094615, "grad_norm": 0.82421875, "learning_rate": 0.0018795121260385397, "loss": 0.2058, "step": 16830 }, { "epoch": 0.02984452724740443, "grad_norm": 0.61328125, "learning_rate": 0.0018794823752366512, "loss": 0.1877, "step": 16832 }, { "epoch": 0.029848073412714244, "grad_norm": 0.65234375, "learning_rate": 0.0018794526210257595, "loss": 0.2212, "step": 16834 }, { "epoch": 0.02985161957802406, "grad_norm": 0.51171875, "learning_rate": 0.0018794228634059949, "loss": 0.2698, "step": 16836 }, { "epoch": 0.029855165743333873, "grad_norm": 0.55078125, "learning_rate": 0.0018793931023774874, "loss": 0.1771, "step": 16838 }, { "epoch": 0.029858711908643688, "grad_norm": 1.59375, "learning_rate": 0.001879363337940367, "loss": 0.3661, "step": 16840 }, { "epoch": 0.029862258073953506, "grad_norm": 0.40625, "learning_rate": 0.0018793335700947643, "loss": 0.2074, "step": 16842 }, { "epoch": 0.02986580423926332, "grad_norm": 0.875, "learning_rate": 0.0018793037988408094, "loss": 0.2325, "step": 16844 }, { "epoch": 0.029869350404573135, "grad_norm": 0.259765625, "learning_rate": 0.001879274024178632, "loss": 0.2228, "step": 16846 }, { "epoch": 0.02987289656988295, "grad_norm": 0.45703125, "learning_rate": 0.0018792442461083628, "loss": 0.2294, "step": 16848 }, { "epoch": 0.029876442735192764, "grad_norm": 0.2412109375, "learning_rate": 0.001879214464630132, "loss": 0.2239, "step": 16850 }, { "epoch": 0.029879988900502582, "grad_norm": 0.345703125, "learning_rate": 0.0018791846797440697, "loss": 0.2419, "step": 16852 }, { "epoch": 0.029883535065812396, "grad_norm": 1.578125, "learning_rate": 0.001879154891450306, "loss": 0.2174, "step": 16854 }, { "epoch": 0.02988708123112221, "grad_norm": 3.0, "learning_rate": 0.0018791250997489718, "loss": 0.4148, "step": 16856 }, { "epoch": 0.029890627396432026, "grad_norm": 1.046875, "learning_rate": 0.0018790953046401969, "loss": 0.2759, "step": 16858 }, { "epoch": 0.02989417356174184, "grad_norm": 0.3203125, "learning_rate": 0.0018790655061241112, "loss": 0.3068, "step": 16860 }, { "epoch": 0.029897719727051655, "grad_norm": 0.3984375, "learning_rate": 0.0018790357042008461, "loss": 0.1669, "step": 16862 }, { "epoch": 0.029901265892361473, "grad_norm": 0.29296875, "learning_rate": 0.0018790058988705312, "loss": 0.2189, "step": 16864 }, { "epoch": 0.029904812057671287, "grad_norm": 0.369140625, "learning_rate": 0.0018789760901332967, "loss": 0.2027, "step": 16866 }, { "epoch": 0.029908358222981102, "grad_norm": 0.99609375, "learning_rate": 0.0018789462779892736, "loss": 0.1999, "step": 16868 }, { "epoch": 0.029911904388290916, "grad_norm": 0.6875, "learning_rate": 0.001878916462438592, "loss": 0.3361, "step": 16870 }, { "epoch": 0.02991545055360073, "grad_norm": 0.392578125, "learning_rate": 0.0018788866434813819, "loss": 0.1945, "step": 16872 }, { "epoch": 0.029918996718910545, "grad_norm": 0.7734375, "learning_rate": 0.0018788568211177744, "loss": 0.196, "step": 16874 }, { "epoch": 0.029922542884220363, "grad_norm": 0.78515625, "learning_rate": 0.0018788269953478993, "loss": 0.2187, "step": 16876 }, { "epoch": 0.029926089049530178, "grad_norm": 0.85546875, "learning_rate": 0.0018787971661718874, "loss": 0.2111, "step": 16878 }, { "epoch": 0.029929635214839993, "grad_norm": 2.28125, "learning_rate": 0.0018787673335898692, "loss": 0.2855, "step": 16880 }, { "epoch": 0.029933181380149807, "grad_norm": 1.015625, "learning_rate": 0.0018787374976019751, "loss": 0.2007, "step": 16882 }, { "epoch": 0.02993672754545962, "grad_norm": 0.984375, "learning_rate": 0.0018787076582083351, "loss": 0.2528, "step": 16884 }, { "epoch": 0.02994027371076944, "grad_norm": 1.9765625, "learning_rate": 0.0018786778154090804, "loss": 0.2743, "step": 16886 }, { "epoch": 0.029943819876079254, "grad_norm": 0.53125, "learning_rate": 0.0018786479692043411, "loss": 0.2032, "step": 16888 }, { "epoch": 0.02994736604138907, "grad_norm": 0.349609375, "learning_rate": 0.0018786181195942482, "loss": 0.1751, "step": 16890 }, { "epoch": 0.029950912206698883, "grad_norm": 1.546875, "learning_rate": 0.0018785882665789317, "loss": 0.2956, "step": 16892 }, { "epoch": 0.029954458372008698, "grad_norm": 0.875, "learning_rate": 0.0018785584101585224, "loss": 0.2187, "step": 16894 }, { "epoch": 0.029958004537318512, "grad_norm": 0.4375, "learning_rate": 0.001878528550333151, "loss": 0.2531, "step": 16896 }, { "epoch": 0.02996155070262833, "grad_norm": 0.431640625, "learning_rate": 0.0018784986871029478, "loss": 0.2083, "step": 16898 }, { "epoch": 0.029965096867938145, "grad_norm": 0.388671875, "learning_rate": 0.0018784688204680437, "loss": 0.2307, "step": 16900 }, { "epoch": 0.02996864303324796, "grad_norm": 0.44140625, "learning_rate": 0.0018784389504285688, "loss": 0.2447, "step": 16902 }, { "epoch": 0.029972189198557774, "grad_norm": 0.486328125, "learning_rate": 0.0018784090769846545, "loss": 0.2985, "step": 16904 }, { "epoch": 0.02997573536386759, "grad_norm": 0.55859375, "learning_rate": 0.0018783792001364308, "loss": 0.2225, "step": 16906 }, { "epoch": 0.029979281529177403, "grad_norm": 0.95703125, "learning_rate": 0.0018783493198840288, "loss": 0.1836, "step": 16908 }, { "epoch": 0.02998282769448722, "grad_norm": 0.625, "learning_rate": 0.0018783194362275788, "loss": 0.2482, "step": 16910 }, { "epoch": 0.029986373859797036, "grad_norm": 1.625, "learning_rate": 0.001878289549167212, "loss": 0.2458, "step": 16912 }, { "epoch": 0.02998992002510685, "grad_norm": 2.15625, "learning_rate": 0.0018782596587030582, "loss": 0.2405, "step": 16914 }, { "epoch": 0.029993466190416665, "grad_norm": 0.78125, "learning_rate": 0.0018782297648352493, "loss": 0.3013, "step": 16916 }, { "epoch": 0.02999701235572648, "grad_norm": 0.390625, "learning_rate": 0.0018781998675639153, "loss": 0.1412, "step": 16918 }, { "epoch": 0.030000558521036298, "grad_norm": 0.5390625, "learning_rate": 0.0018781699668891873, "loss": 0.2485, "step": 16920 }, { "epoch": 0.030004104686346112, "grad_norm": 1.234375, "learning_rate": 0.0018781400628111956, "loss": 0.2366, "step": 16922 }, { "epoch": 0.030007650851655927, "grad_norm": 0.296875, "learning_rate": 0.0018781101553300712, "loss": 0.1848, "step": 16924 }, { "epoch": 0.03001119701696574, "grad_norm": 0.56640625, "learning_rate": 0.0018780802444459451, "loss": 0.2282, "step": 16926 }, { "epoch": 0.030014743182275556, "grad_norm": 0.609375, "learning_rate": 0.0018780503301589477, "loss": 0.2358, "step": 16928 }, { "epoch": 0.03001828934758537, "grad_norm": 1.4140625, "learning_rate": 0.0018780204124692107, "loss": 0.2081, "step": 16930 }, { "epoch": 0.03002183551289519, "grad_norm": 0.71875, "learning_rate": 0.0018779904913768643, "loss": 0.194, "step": 16932 }, { "epoch": 0.030025381678205003, "grad_norm": 5.4375, "learning_rate": 0.0018779605668820388, "loss": 0.1854, "step": 16934 }, { "epoch": 0.030028927843514817, "grad_norm": 1.265625, "learning_rate": 0.0018779306389848658, "loss": 0.2488, "step": 16936 }, { "epoch": 0.030032474008824632, "grad_norm": 0.80078125, "learning_rate": 0.0018779007076854765, "loss": 0.2456, "step": 16938 }, { "epoch": 0.030036020174134447, "grad_norm": 0.439453125, "learning_rate": 0.001877870772984001, "loss": 0.2359, "step": 16940 }, { "epoch": 0.03003956633944426, "grad_norm": 0.5859375, "learning_rate": 0.0018778408348805707, "loss": 0.2924, "step": 16942 }, { "epoch": 0.03004311250475408, "grad_norm": 0.74609375, "learning_rate": 0.0018778108933753162, "loss": 0.2236, "step": 16944 }, { "epoch": 0.030046658670063894, "grad_norm": 2.859375, "learning_rate": 0.001877780948468369, "loss": 0.2881, "step": 16946 }, { "epoch": 0.030050204835373708, "grad_norm": 1.8984375, "learning_rate": 0.0018777510001598595, "loss": 0.3003, "step": 16948 }, { "epoch": 0.030053751000683523, "grad_norm": 0.423828125, "learning_rate": 0.001877721048449919, "loss": 0.3232, "step": 16950 }, { "epoch": 0.030057297165993337, "grad_norm": 0.267578125, "learning_rate": 0.0018776910933386783, "loss": 0.1536, "step": 16952 }, { "epoch": 0.030060843331303155, "grad_norm": 2.09375, "learning_rate": 0.0018776611348262687, "loss": 0.2436, "step": 16954 }, { "epoch": 0.03006438949661297, "grad_norm": 1.6640625, "learning_rate": 0.0018776311729128208, "loss": 0.1865, "step": 16956 }, { "epoch": 0.030067935661922784, "grad_norm": 0.546875, "learning_rate": 0.0018776012075984658, "loss": 0.2413, "step": 16958 }, { "epoch": 0.0300714818272326, "grad_norm": 0.37890625, "learning_rate": 0.001877571238883335, "loss": 0.208, "step": 16960 }, { "epoch": 0.030075027992542414, "grad_norm": 0.8359375, "learning_rate": 0.0018775412667675591, "loss": 0.2029, "step": 16962 }, { "epoch": 0.030078574157852228, "grad_norm": 0.470703125, "learning_rate": 0.0018775112912512697, "loss": 0.1643, "step": 16964 }, { "epoch": 0.030082120323162046, "grad_norm": 1.328125, "learning_rate": 0.001877481312334597, "loss": 0.3411, "step": 16966 }, { "epoch": 0.03008566648847186, "grad_norm": 0.61328125, "learning_rate": 0.001877451330017673, "loss": 0.2141, "step": 16968 }, { "epoch": 0.030089212653781675, "grad_norm": 0.70703125, "learning_rate": 0.0018774213443006284, "loss": 0.1575, "step": 16970 }, { "epoch": 0.03009275881909149, "grad_norm": 0.37890625, "learning_rate": 0.0018773913551835945, "loss": 0.2291, "step": 16972 }, { "epoch": 0.030096304984401304, "grad_norm": 0.6875, "learning_rate": 0.0018773613626667024, "loss": 0.2575, "step": 16974 }, { "epoch": 0.03009985114971112, "grad_norm": 0.90234375, "learning_rate": 0.0018773313667500833, "loss": 0.2712, "step": 16976 }, { "epoch": 0.030103397315020937, "grad_norm": 0.390625, "learning_rate": 0.0018773013674338681, "loss": 0.2503, "step": 16978 }, { "epoch": 0.03010694348033075, "grad_norm": 0.60546875, "learning_rate": 0.0018772713647181882, "loss": 0.217, "step": 16980 }, { "epoch": 0.030110489645640566, "grad_norm": 0.380859375, "learning_rate": 0.0018772413586031751, "loss": 0.2303, "step": 16982 }, { "epoch": 0.03011403581095038, "grad_norm": 0.279296875, "learning_rate": 0.0018772113490889595, "loss": 0.2836, "step": 16984 }, { "epoch": 0.030117581976260195, "grad_norm": 1.53125, "learning_rate": 0.0018771813361756731, "loss": 0.3995, "step": 16986 }, { "epoch": 0.030121128141570013, "grad_norm": 0.71875, "learning_rate": 0.0018771513198634468, "loss": 0.1998, "step": 16988 }, { "epoch": 0.030124674306879828, "grad_norm": 0.380859375, "learning_rate": 0.001877121300152412, "loss": 0.2062, "step": 16990 }, { "epoch": 0.030128220472189642, "grad_norm": 0.3515625, "learning_rate": 0.0018770912770427006, "loss": 0.2045, "step": 16992 }, { "epoch": 0.030131766637499457, "grad_norm": 1.7421875, "learning_rate": 0.001877061250534443, "loss": 0.2022, "step": 16994 }, { "epoch": 0.03013531280280927, "grad_norm": 0.330078125, "learning_rate": 0.0018770312206277708, "loss": 0.1698, "step": 16996 }, { "epoch": 0.030138858968119086, "grad_norm": 0.349609375, "learning_rate": 0.0018770011873228151, "loss": 0.3099, "step": 16998 }, { "epoch": 0.030142405133428904, "grad_norm": 0.75390625, "learning_rate": 0.001876971150619708, "loss": 0.3465, "step": 17000 }, { "epoch": 0.03014595129873872, "grad_norm": 0.98046875, "learning_rate": 0.0018769411105185802, "loss": 0.2811, "step": 17002 }, { "epoch": 0.030149497464048533, "grad_norm": 1.1328125, "learning_rate": 0.0018769110670195633, "loss": 0.1728, "step": 17004 }, { "epoch": 0.030153043629358348, "grad_norm": 0.85546875, "learning_rate": 0.0018768810201227888, "loss": 0.2316, "step": 17006 }, { "epoch": 0.030156589794668162, "grad_norm": 0.423828125, "learning_rate": 0.001876850969828388, "loss": 0.2049, "step": 17008 }, { "epoch": 0.030160135959977977, "grad_norm": 0.333984375, "learning_rate": 0.0018768209161364922, "loss": 0.1935, "step": 17010 }, { "epoch": 0.030163682125287795, "grad_norm": 0.42578125, "learning_rate": 0.0018767908590472328, "loss": 0.2269, "step": 17012 }, { "epoch": 0.03016722829059761, "grad_norm": 0.5703125, "learning_rate": 0.0018767607985607417, "loss": 0.2079, "step": 17014 }, { "epoch": 0.030170774455907424, "grad_norm": 0.3515625, "learning_rate": 0.00187673073467715, "loss": 0.2292, "step": 17016 }, { "epoch": 0.03017432062121724, "grad_norm": 0.421875, "learning_rate": 0.001876700667396589, "loss": 0.1944, "step": 17018 }, { "epoch": 0.030177866786527053, "grad_norm": 0.2119140625, "learning_rate": 0.0018766705967191905, "loss": 0.2205, "step": 17020 }, { "epoch": 0.03018141295183687, "grad_norm": 1.1484375, "learning_rate": 0.0018766405226450862, "loss": 0.2445, "step": 17022 }, { "epoch": 0.030184959117146686, "grad_norm": 0.34765625, "learning_rate": 0.0018766104451744072, "loss": 0.1733, "step": 17024 }, { "epoch": 0.0301885052824565, "grad_norm": 0.306640625, "learning_rate": 0.0018765803643072852, "loss": 0.1993, "step": 17026 }, { "epoch": 0.030192051447766315, "grad_norm": 0.423828125, "learning_rate": 0.0018765502800438519, "loss": 0.245, "step": 17028 }, { "epoch": 0.03019559761307613, "grad_norm": 0.92578125, "learning_rate": 0.0018765201923842388, "loss": 0.2859, "step": 17030 }, { "epoch": 0.030199143778385944, "grad_norm": 1.6328125, "learning_rate": 0.0018764901013285774, "loss": 0.2456, "step": 17032 }, { "epoch": 0.030202689943695762, "grad_norm": 0.51171875, "learning_rate": 0.0018764600068769991, "loss": 0.1842, "step": 17034 }, { "epoch": 0.030206236109005576, "grad_norm": 1.8046875, "learning_rate": 0.0018764299090296356, "loss": 0.2503, "step": 17036 }, { "epoch": 0.03020978227431539, "grad_norm": 3.109375, "learning_rate": 0.0018763998077866194, "loss": 0.2284, "step": 17038 }, { "epoch": 0.030213328439625205, "grad_norm": 0.287109375, "learning_rate": 0.001876369703148081, "loss": 0.2129, "step": 17040 }, { "epoch": 0.03021687460493502, "grad_norm": 0.498046875, "learning_rate": 0.0018763395951141525, "loss": 0.179, "step": 17042 }, { "epoch": 0.030220420770244835, "grad_norm": 1.1328125, "learning_rate": 0.0018763094836849654, "loss": 0.1709, "step": 17044 }, { "epoch": 0.030223966935554653, "grad_norm": 0.796875, "learning_rate": 0.0018762793688606517, "loss": 0.2237, "step": 17046 }, { "epoch": 0.030227513100864467, "grad_norm": 0.55859375, "learning_rate": 0.0018762492506413432, "loss": 0.26, "step": 17048 }, { "epoch": 0.03023105926617428, "grad_norm": 1.6484375, "learning_rate": 0.001876219129027171, "loss": 0.2591, "step": 17050 }, { "epoch": 0.030234605431484096, "grad_norm": 1.25, "learning_rate": 0.0018761890040182676, "loss": 0.3108, "step": 17052 }, { "epoch": 0.03023815159679391, "grad_norm": 0.67578125, "learning_rate": 0.001876158875614764, "loss": 0.3266, "step": 17054 }, { "epoch": 0.03024169776210373, "grad_norm": 1.40625, "learning_rate": 0.0018761287438167924, "loss": 0.3167, "step": 17056 }, { "epoch": 0.030245243927413543, "grad_norm": 1.359375, "learning_rate": 0.0018760986086244844, "loss": 0.2147, "step": 17058 }, { "epoch": 0.030248790092723358, "grad_norm": 0.78125, "learning_rate": 0.0018760684700379721, "loss": 0.2478, "step": 17060 }, { "epoch": 0.030252336258033172, "grad_norm": 0.45703125, "learning_rate": 0.0018760383280573875, "loss": 0.2234, "step": 17062 }, { "epoch": 0.030255882423342987, "grad_norm": 0.87109375, "learning_rate": 0.0018760081826828612, "loss": 0.2975, "step": 17064 }, { "epoch": 0.0302594285886528, "grad_norm": 0.93359375, "learning_rate": 0.0018759780339145265, "loss": 0.3501, "step": 17066 }, { "epoch": 0.03026297475396262, "grad_norm": 0.5234375, "learning_rate": 0.001875947881752514, "loss": 0.2559, "step": 17068 }, { "epoch": 0.030266520919272434, "grad_norm": 0.462890625, "learning_rate": 0.001875917726196957, "loss": 0.2329, "step": 17070 }, { "epoch": 0.03027006708458225, "grad_norm": 0.80859375, "learning_rate": 0.0018758875672479861, "loss": 0.2273, "step": 17072 }, { "epoch": 0.030273613249892063, "grad_norm": 0.48046875, "learning_rate": 0.0018758574049057336, "loss": 0.2218, "step": 17074 }, { "epoch": 0.030277159415201878, "grad_norm": 0.52734375, "learning_rate": 0.0018758272391703318, "loss": 0.2829, "step": 17076 }, { "epoch": 0.030280705580511692, "grad_norm": 0.3359375, "learning_rate": 0.001875797070041912, "loss": 0.233, "step": 17078 }, { "epoch": 0.03028425174582151, "grad_norm": 0.96484375, "learning_rate": 0.0018757668975206065, "loss": 0.2369, "step": 17080 }, { "epoch": 0.030287797911131325, "grad_norm": 0.87890625, "learning_rate": 0.0018757367216065473, "loss": 0.2397, "step": 17082 }, { "epoch": 0.03029134407644114, "grad_norm": 0.400390625, "learning_rate": 0.001875706542299866, "loss": 0.219, "step": 17084 }, { "epoch": 0.030294890241750954, "grad_norm": 0.361328125, "learning_rate": 0.0018756763596006953, "loss": 0.1815, "step": 17086 }, { "epoch": 0.03029843640706077, "grad_norm": 0.5078125, "learning_rate": 0.0018756461735091666, "loss": 0.1752, "step": 17088 }, { "epoch": 0.030301982572370587, "grad_norm": 0.341796875, "learning_rate": 0.0018756159840254121, "loss": 0.2077, "step": 17090 }, { "epoch": 0.0303055287376804, "grad_norm": 0.490234375, "learning_rate": 0.0018755857911495638, "loss": 0.1905, "step": 17092 }, { "epoch": 0.030309074902990216, "grad_norm": 0.6875, "learning_rate": 0.0018755555948817536, "loss": 0.2014, "step": 17094 }, { "epoch": 0.03031262106830003, "grad_norm": 6.96875, "learning_rate": 0.0018755253952221138, "loss": 0.3129, "step": 17096 }, { "epoch": 0.030316167233609845, "grad_norm": 0.96875, "learning_rate": 0.0018754951921707765, "loss": 0.1956, "step": 17098 }, { "epoch": 0.03031971339891966, "grad_norm": 1.140625, "learning_rate": 0.0018754649857278735, "loss": 0.1567, "step": 17100 }, { "epoch": 0.030323259564229477, "grad_norm": 2.015625, "learning_rate": 0.0018754347758935376, "loss": 0.2174, "step": 17102 }, { "epoch": 0.030326805729539292, "grad_norm": 1.2109375, "learning_rate": 0.0018754045626679, "loss": 0.1959, "step": 17104 }, { "epoch": 0.030330351894849106, "grad_norm": 0.59375, "learning_rate": 0.0018753743460510933, "loss": 0.2569, "step": 17106 }, { "epoch": 0.03033389806015892, "grad_norm": 0.74609375, "learning_rate": 0.0018753441260432498, "loss": 0.1682, "step": 17108 }, { "epoch": 0.030337444225468736, "grad_norm": 1.71875, "learning_rate": 0.0018753139026445012, "loss": 0.378, "step": 17110 }, { "epoch": 0.03034099039077855, "grad_norm": 0.87109375, "learning_rate": 0.0018752836758549798, "loss": 0.2137, "step": 17112 }, { "epoch": 0.030344536556088368, "grad_norm": 0.69140625, "learning_rate": 0.0018752534456748186, "loss": 0.1817, "step": 17114 }, { "epoch": 0.030348082721398183, "grad_norm": 0.328125, "learning_rate": 0.0018752232121041486, "loss": 0.3175, "step": 17116 }, { "epoch": 0.030351628886707997, "grad_norm": 0.35546875, "learning_rate": 0.0018751929751431025, "loss": 0.1888, "step": 17118 }, { "epoch": 0.030355175052017812, "grad_norm": 0.34765625, "learning_rate": 0.001875162734791813, "loss": 0.2529, "step": 17120 }, { "epoch": 0.030358721217327626, "grad_norm": 0.3671875, "learning_rate": 0.0018751324910504118, "loss": 0.3258, "step": 17122 }, { "epoch": 0.030362267382637444, "grad_norm": 0.921875, "learning_rate": 0.0018751022439190313, "loss": 0.2126, "step": 17124 }, { "epoch": 0.03036581354794726, "grad_norm": 6.03125, "learning_rate": 0.0018750719933978038, "loss": 0.2061, "step": 17126 }, { "epoch": 0.030369359713257073, "grad_norm": 0.65234375, "learning_rate": 0.0018750417394868618, "loss": 0.2403, "step": 17128 }, { "epoch": 0.030372905878566888, "grad_norm": 1.6015625, "learning_rate": 0.001875011482186337, "loss": 0.2513, "step": 17130 }, { "epoch": 0.030376452043876703, "grad_norm": 1.1640625, "learning_rate": 0.0018749812214963627, "loss": 0.2021, "step": 17132 }, { "epoch": 0.030379998209186517, "grad_norm": 1.140625, "learning_rate": 0.0018749509574170703, "loss": 0.2372, "step": 17134 }, { "epoch": 0.030383544374496335, "grad_norm": 2.171875, "learning_rate": 0.0018749206899485926, "loss": 0.2956, "step": 17136 }, { "epoch": 0.03038709053980615, "grad_norm": 0.349609375, "learning_rate": 0.0018748904190910621, "loss": 0.2667, "step": 17138 }, { "epoch": 0.030390636705115964, "grad_norm": 4.375, "learning_rate": 0.001874860144844611, "loss": 0.4426, "step": 17140 }, { "epoch": 0.03039418287042578, "grad_norm": 0.55078125, "learning_rate": 0.0018748298672093718, "loss": 0.2179, "step": 17142 }, { "epoch": 0.030397729035735593, "grad_norm": 0.56640625, "learning_rate": 0.0018747995861854766, "loss": 0.227, "step": 17144 }, { "epoch": 0.030401275201045408, "grad_norm": 0.65625, "learning_rate": 0.001874769301773058, "loss": 0.2886, "step": 17146 }, { "epoch": 0.030404821366355226, "grad_norm": 0.35546875, "learning_rate": 0.0018747390139722488, "loss": 0.1827, "step": 17148 }, { "epoch": 0.03040836753166504, "grad_norm": 2.453125, "learning_rate": 0.001874708722783181, "loss": 0.2352, "step": 17150 }, { "epoch": 0.030411913696974855, "grad_norm": 0.54296875, "learning_rate": 0.0018746784282059874, "loss": 0.2116, "step": 17152 }, { "epoch": 0.03041545986228467, "grad_norm": 0.458984375, "learning_rate": 0.0018746481302408004, "loss": 0.2293, "step": 17154 }, { "epoch": 0.030419006027594484, "grad_norm": 0.8203125, "learning_rate": 0.001874617828887752, "loss": 0.2005, "step": 17156 }, { "epoch": 0.030422552192904302, "grad_norm": 0.671875, "learning_rate": 0.0018745875241469756, "loss": 0.1953, "step": 17158 }, { "epoch": 0.030426098358214117, "grad_norm": 0.34765625, "learning_rate": 0.001874557216018603, "loss": 0.2324, "step": 17160 }, { "epoch": 0.03042964452352393, "grad_norm": 0.30859375, "learning_rate": 0.0018745269045027672, "loss": 0.2548, "step": 17162 }, { "epoch": 0.030433190688833746, "grad_norm": 0.69921875, "learning_rate": 0.0018744965895996007, "loss": 0.1941, "step": 17164 }, { "epoch": 0.03043673685414356, "grad_norm": 0.408203125, "learning_rate": 0.001874466271309236, "loss": 0.2761, "step": 17166 }, { "epoch": 0.030440283019453375, "grad_norm": 0.6640625, "learning_rate": 0.0018744359496318055, "loss": 0.2193, "step": 17168 }, { "epoch": 0.030443829184763193, "grad_norm": 0.546875, "learning_rate": 0.0018744056245674422, "loss": 0.2135, "step": 17170 }, { "epoch": 0.030447375350073008, "grad_norm": 0.6328125, "learning_rate": 0.0018743752961162788, "loss": 0.2046, "step": 17172 }, { "epoch": 0.030450921515382822, "grad_norm": 0.69140625, "learning_rate": 0.001874344964278447, "loss": 0.3431, "step": 17174 }, { "epoch": 0.030454467680692637, "grad_norm": 0.486328125, "learning_rate": 0.0018743146290540805, "loss": 0.1866, "step": 17176 }, { "epoch": 0.03045801384600245, "grad_norm": 0.244140625, "learning_rate": 0.0018742842904433115, "loss": 0.1631, "step": 17178 }, { "epoch": 0.030461560011312266, "grad_norm": 0.53515625, "learning_rate": 0.001874253948446273, "loss": 0.2105, "step": 17180 }, { "epoch": 0.030465106176622084, "grad_norm": 0.59765625, "learning_rate": 0.0018742236030630972, "loss": 0.269, "step": 17182 }, { "epoch": 0.0304686523419319, "grad_norm": 0.390625, "learning_rate": 0.001874193254293917, "loss": 0.2082, "step": 17184 }, { "epoch": 0.030472198507241713, "grad_norm": 0.51171875, "learning_rate": 0.0018741629021388653, "loss": 0.1461, "step": 17186 }, { "epoch": 0.030475744672551527, "grad_norm": 0.77734375, "learning_rate": 0.0018741325465980747, "loss": 0.1725, "step": 17188 }, { "epoch": 0.030479290837861342, "grad_norm": 0.91015625, "learning_rate": 0.0018741021876716784, "loss": 0.2368, "step": 17190 }, { "epoch": 0.03048283700317116, "grad_norm": 0.671875, "learning_rate": 0.0018740718253598087, "loss": 0.2017, "step": 17192 }, { "epoch": 0.030486383168480975, "grad_norm": 0.39453125, "learning_rate": 0.0018740414596625978, "loss": 0.1784, "step": 17194 }, { "epoch": 0.03048992933379079, "grad_norm": 0.3515625, "learning_rate": 0.00187401109058018, "loss": 0.2186, "step": 17196 }, { "epoch": 0.030493475499100604, "grad_norm": 0.72265625, "learning_rate": 0.0018739807181126866, "loss": 0.1943, "step": 17198 }, { "epoch": 0.030497021664410418, "grad_norm": 0.5546875, "learning_rate": 0.0018739503422602517, "loss": 0.2272, "step": 17200 }, { "epoch": 0.030500567829720233, "grad_norm": 0.4140625, "learning_rate": 0.0018739199630230072, "loss": 0.1759, "step": 17202 }, { "epoch": 0.03050411399503005, "grad_norm": 0.419921875, "learning_rate": 0.0018738895804010865, "loss": 0.2049, "step": 17204 }, { "epoch": 0.030507660160339865, "grad_norm": 0.359375, "learning_rate": 0.0018738591943946224, "loss": 0.2916, "step": 17206 }, { "epoch": 0.03051120632564968, "grad_norm": 1.8515625, "learning_rate": 0.0018738288050037478, "loss": 0.2168, "step": 17208 }, { "epoch": 0.030514752490959494, "grad_norm": 0.87109375, "learning_rate": 0.001873798412228595, "loss": 0.1849, "step": 17210 }, { "epoch": 0.03051829865626931, "grad_norm": 1.859375, "learning_rate": 0.0018737680160692976, "loss": 0.2193, "step": 17212 }, { "epoch": 0.030521844821579124, "grad_norm": 0.4296875, "learning_rate": 0.0018737376165259886, "loss": 0.1575, "step": 17214 }, { "epoch": 0.03052539098688894, "grad_norm": 1.3671875, "learning_rate": 0.0018737072135988004, "loss": 0.3244, "step": 17216 }, { "epoch": 0.030528937152198756, "grad_norm": 0.61328125, "learning_rate": 0.0018736768072878662, "loss": 0.2045, "step": 17218 }, { "epoch": 0.03053248331750857, "grad_norm": 0.275390625, "learning_rate": 0.0018736463975933195, "loss": 0.2313, "step": 17220 }, { "epoch": 0.030536029482818385, "grad_norm": 1.2890625, "learning_rate": 0.0018736159845152928, "loss": 0.3612, "step": 17222 }, { "epoch": 0.0305395756481282, "grad_norm": 0.6171875, "learning_rate": 0.0018735855680539187, "loss": 0.2265, "step": 17224 }, { "epoch": 0.030543121813438018, "grad_norm": 0.408203125, "learning_rate": 0.001873555148209331, "loss": 0.3208, "step": 17226 }, { "epoch": 0.030546667978747832, "grad_norm": 0.345703125, "learning_rate": 0.0018735247249816623, "loss": 0.2595, "step": 17228 }, { "epoch": 0.030550214144057647, "grad_norm": 1.015625, "learning_rate": 0.0018734942983710458, "loss": 0.2208, "step": 17230 }, { "epoch": 0.03055376030936746, "grad_norm": 0.265625, "learning_rate": 0.0018734638683776145, "loss": 0.297, "step": 17232 }, { "epoch": 0.030557306474677276, "grad_norm": 0.486328125, "learning_rate": 0.001873433435001502, "loss": 0.2394, "step": 17234 }, { "epoch": 0.03056085263998709, "grad_norm": 0.73828125, "learning_rate": 0.0018734029982428403, "loss": 0.1875, "step": 17236 }, { "epoch": 0.03056439880529691, "grad_norm": 0.376953125, "learning_rate": 0.001873372558101763, "loss": 0.1928, "step": 17238 }, { "epoch": 0.030567944970606723, "grad_norm": 0.81640625, "learning_rate": 0.001873342114578404, "loss": 0.2573, "step": 17240 }, { "epoch": 0.030571491135916538, "grad_norm": 1.0625, "learning_rate": 0.0018733116676728954, "loss": 0.2527, "step": 17242 }, { "epoch": 0.030575037301226352, "grad_norm": 0.32421875, "learning_rate": 0.0018732812173853709, "loss": 0.1357, "step": 17244 }, { "epoch": 0.030578583466536167, "grad_norm": 0.40625, "learning_rate": 0.0018732507637159634, "loss": 0.2199, "step": 17246 }, { "epoch": 0.03058212963184598, "grad_norm": 0.37109375, "learning_rate": 0.0018732203066648063, "loss": 0.2373, "step": 17248 }, { "epoch": 0.0305856757971558, "grad_norm": 1.03125, "learning_rate": 0.0018731898462320327, "loss": 0.2899, "step": 17250 }, { "epoch": 0.030589221962465614, "grad_norm": 0.90234375, "learning_rate": 0.001873159382417776, "loss": 0.2681, "step": 17252 }, { "epoch": 0.03059276812777543, "grad_norm": 1.2734375, "learning_rate": 0.001873128915222169, "loss": 0.2425, "step": 17254 }, { "epoch": 0.030596314293085243, "grad_norm": 0.5546875, "learning_rate": 0.0018730984446453454, "loss": 0.1893, "step": 17256 }, { "epoch": 0.030599860458395058, "grad_norm": 0.43359375, "learning_rate": 0.0018730679706874384, "loss": 0.1932, "step": 17258 }, { "epoch": 0.030603406623704876, "grad_norm": 0.8828125, "learning_rate": 0.0018730374933485807, "loss": 0.2455, "step": 17260 }, { "epoch": 0.03060695278901469, "grad_norm": 0.77734375, "learning_rate": 0.001873007012628906, "loss": 0.2182, "step": 17262 }, { "epoch": 0.030610498954324505, "grad_norm": 4.59375, "learning_rate": 0.0018729765285285481, "loss": 0.4682, "step": 17264 }, { "epoch": 0.03061404511963432, "grad_norm": 1.6640625, "learning_rate": 0.00187294604104764, "loss": 0.212, "step": 17266 }, { "epoch": 0.030617591284944134, "grad_norm": 0.65625, "learning_rate": 0.0018729155501863142, "loss": 0.2242, "step": 17268 }, { "epoch": 0.03062113745025395, "grad_norm": 2.578125, "learning_rate": 0.0018728850559447052, "loss": 0.2564, "step": 17270 }, { "epoch": 0.030624683615563766, "grad_norm": 0.5859375, "learning_rate": 0.001872854558322946, "loss": 0.2246, "step": 17272 }, { "epoch": 0.03062822978087358, "grad_norm": 1.4453125, "learning_rate": 0.0018728240573211697, "loss": 0.2896, "step": 17274 }, { "epoch": 0.030631775946183395, "grad_norm": 1.84375, "learning_rate": 0.0018727935529395097, "loss": 0.5221, "step": 17276 }, { "epoch": 0.03063532211149321, "grad_norm": 0.47265625, "learning_rate": 0.0018727630451781, "loss": 0.2076, "step": 17278 }, { "epoch": 0.030638868276803025, "grad_norm": 0.7578125, "learning_rate": 0.0018727325340370732, "loss": 0.2596, "step": 17280 }, { "epoch": 0.03064241444211284, "grad_norm": 0.77734375, "learning_rate": 0.0018727020195165637, "loss": 0.1966, "step": 17282 }, { "epoch": 0.030645960607422657, "grad_norm": 0.71484375, "learning_rate": 0.0018726715016167037, "loss": 0.2053, "step": 17284 }, { "epoch": 0.03064950677273247, "grad_norm": 1.4921875, "learning_rate": 0.001872640980337628, "loss": 0.239, "step": 17286 }, { "epoch": 0.030653052938042286, "grad_norm": 0.71484375, "learning_rate": 0.0018726104556794692, "loss": 0.2036, "step": 17288 }, { "epoch": 0.0306565991033521, "grad_norm": 0.470703125, "learning_rate": 0.0018725799276423612, "loss": 0.2449, "step": 17290 }, { "epoch": 0.030660145268661915, "grad_norm": 0.26171875, "learning_rate": 0.0018725493962264371, "loss": 0.2187, "step": 17292 }, { "epoch": 0.030663691433971733, "grad_norm": 0.4375, "learning_rate": 0.001872518861431831, "loss": 0.295, "step": 17294 }, { "epoch": 0.030667237599281548, "grad_norm": 0.373046875, "learning_rate": 0.0018724883232586757, "loss": 0.1794, "step": 17296 }, { "epoch": 0.030670783764591363, "grad_norm": 1.3671875, "learning_rate": 0.0018724577817071054, "loss": 0.3304, "step": 17298 }, { "epoch": 0.030674329929901177, "grad_norm": 1.3046875, "learning_rate": 0.0018724272367772537, "loss": 0.3189, "step": 17300 }, { "epoch": 0.03067787609521099, "grad_norm": 1.0703125, "learning_rate": 0.0018723966884692535, "loss": 0.2513, "step": 17302 }, { "epoch": 0.030681422260520806, "grad_norm": 0.4609375, "learning_rate": 0.0018723661367832391, "loss": 0.2062, "step": 17304 }, { "epoch": 0.030684968425830624, "grad_norm": 0.93359375, "learning_rate": 0.0018723355817193438, "loss": 0.2779, "step": 17306 }, { "epoch": 0.03068851459114044, "grad_norm": 0.8984375, "learning_rate": 0.0018723050232777015, "loss": 0.2425, "step": 17308 }, { "epoch": 0.030692060756450253, "grad_norm": 0.8125, "learning_rate": 0.0018722744614584454, "loss": 0.2628, "step": 17310 }, { "epoch": 0.030695606921760068, "grad_norm": 1.171875, "learning_rate": 0.0018722438962617096, "loss": 0.193, "step": 17312 }, { "epoch": 0.030699153087069882, "grad_norm": 0.431640625, "learning_rate": 0.0018722133276876272, "loss": 0.2005, "step": 17314 }, { "epoch": 0.030702699252379697, "grad_norm": 0.36328125, "learning_rate": 0.0018721827557363327, "loss": 0.1877, "step": 17316 }, { "epoch": 0.030706245417689515, "grad_norm": 0.38671875, "learning_rate": 0.0018721521804079592, "loss": 0.3564, "step": 17318 }, { "epoch": 0.03070979158299933, "grad_norm": 3.09375, "learning_rate": 0.0018721216017026406, "loss": 0.3906, "step": 17320 }, { "epoch": 0.030713337748309144, "grad_norm": 0.6953125, "learning_rate": 0.0018720910196205103, "loss": 0.1994, "step": 17322 }, { "epoch": 0.03071688391361896, "grad_norm": 0.408203125, "learning_rate": 0.0018720604341617027, "loss": 0.1929, "step": 17324 }, { "epoch": 0.030720430078928773, "grad_norm": 1.609375, "learning_rate": 0.0018720298453263511, "loss": 0.2712, "step": 17326 }, { "epoch": 0.03072397624423859, "grad_norm": 0.80078125, "learning_rate": 0.0018719992531145897, "loss": 0.1947, "step": 17328 }, { "epoch": 0.030727522409548406, "grad_norm": 0.443359375, "learning_rate": 0.0018719686575265515, "loss": 0.207, "step": 17330 }, { "epoch": 0.03073106857485822, "grad_norm": 0.25390625, "learning_rate": 0.0018719380585623712, "loss": 0.2166, "step": 17332 }, { "epoch": 0.030734614740168035, "grad_norm": 2.0625, "learning_rate": 0.0018719074562221821, "loss": 0.2616, "step": 17334 }, { "epoch": 0.03073816090547785, "grad_norm": 0.314453125, "learning_rate": 0.0018718768505061182, "loss": 0.18, "step": 17336 }, { "epoch": 0.030741707070787664, "grad_norm": 0.8203125, "learning_rate": 0.0018718462414143132, "loss": 0.222, "step": 17338 }, { "epoch": 0.030745253236097482, "grad_norm": 0.384765625, "learning_rate": 0.001871815628946901, "loss": 0.2071, "step": 17340 }, { "epoch": 0.030748799401407297, "grad_norm": 0.7109375, "learning_rate": 0.0018717850131040159, "loss": 0.1489, "step": 17342 }, { "epoch": 0.03075234556671711, "grad_norm": 0.99609375, "learning_rate": 0.0018717543938857912, "loss": 0.3018, "step": 17344 }, { "epoch": 0.030755891732026926, "grad_norm": 0.43359375, "learning_rate": 0.0018717237712923612, "loss": 0.2209, "step": 17346 }, { "epoch": 0.03075943789733674, "grad_norm": 1.34375, "learning_rate": 0.0018716931453238597, "loss": 0.5243, "step": 17348 }, { "epoch": 0.030762984062646555, "grad_norm": 1.5078125, "learning_rate": 0.0018716625159804206, "loss": 0.2587, "step": 17350 }, { "epoch": 0.030766530227956373, "grad_norm": 0.353515625, "learning_rate": 0.0018716318832621777, "loss": 0.1936, "step": 17352 }, { "epoch": 0.030770076393266187, "grad_norm": 0.63671875, "learning_rate": 0.0018716012471692652, "loss": 0.2319, "step": 17354 }, { "epoch": 0.030773622558576002, "grad_norm": 0.61328125, "learning_rate": 0.0018715706077018174, "loss": 0.2333, "step": 17356 }, { "epoch": 0.030777168723885816, "grad_norm": 1.03125, "learning_rate": 0.0018715399648599677, "loss": 0.2349, "step": 17358 }, { "epoch": 0.03078071488919563, "grad_norm": 15.9375, "learning_rate": 0.0018715093186438502, "loss": 0.1792, "step": 17360 }, { "epoch": 0.03078426105450545, "grad_norm": 0.1845703125, "learning_rate": 0.0018714786690535992, "loss": 0.181, "step": 17362 }, { "epoch": 0.030787807219815264, "grad_norm": 3.40625, "learning_rate": 0.0018714480160893488, "loss": 0.3829, "step": 17364 }, { "epoch": 0.030791353385125078, "grad_norm": 0.484375, "learning_rate": 0.0018714173597512327, "loss": 0.3335, "step": 17366 }, { "epoch": 0.030794899550434893, "grad_norm": 0.60546875, "learning_rate": 0.0018713867000393852, "loss": 0.2416, "step": 17368 }, { "epoch": 0.030798445715744707, "grad_norm": 0.7109375, "learning_rate": 0.0018713560369539404, "loss": 0.2032, "step": 17370 }, { "epoch": 0.030801991881054522, "grad_norm": 2.796875, "learning_rate": 0.001871325370495032, "loss": 0.3384, "step": 17372 }, { "epoch": 0.03080553804636434, "grad_norm": 0.91796875, "learning_rate": 0.0018712947006627953, "loss": 0.3237, "step": 17374 }, { "epoch": 0.030809084211674154, "grad_norm": 0.67578125, "learning_rate": 0.0018712640274573629, "loss": 0.1685, "step": 17376 }, { "epoch": 0.03081263037698397, "grad_norm": 0.5234375, "learning_rate": 0.00187123335087887, "loss": 0.2009, "step": 17378 }, { "epoch": 0.030816176542293783, "grad_norm": 0.310546875, "learning_rate": 0.00187120267092745, "loss": 0.1704, "step": 17380 }, { "epoch": 0.030819722707603598, "grad_norm": 0.49609375, "learning_rate": 0.0018711719876032377, "loss": 0.1788, "step": 17382 }, { "epoch": 0.030823268872913413, "grad_norm": 0.4375, "learning_rate": 0.0018711413009063673, "loss": 0.1816, "step": 17384 }, { "epoch": 0.03082681503822323, "grad_norm": 0.462890625, "learning_rate": 0.0018711106108369728, "loss": 0.2069, "step": 17386 }, { "epoch": 0.030830361203533045, "grad_norm": 0.65625, "learning_rate": 0.001871079917395188, "loss": 0.1887, "step": 17388 }, { "epoch": 0.03083390736884286, "grad_norm": 0.5546875, "learning_rate": 0.0018710492205811478, "loss": 0.3078, "step": 17390 }, { "epoch": 0.030837453534152674, "grad_norm": 0.953125, "learning_rate": 0.0018710185203949861, "loss": 0.2971, "step": 17392 }, { "epoch": 0.03084099969946249, "grad_norm": 0.54296875, "learning_rate": 0.0018709878168368373, "loss": 0.2023, "step": 17394 }, { "epoch": 0.030844545864772307, "grad_norm": 0.31640625, "learning_rate": 0.0018709571099068357, "loss": 0.2332, "step": 17396 }, { "epoch": 0.03084809203008212, "grad_norm": 0.3046875, "learning_rate": 0.0018709263996051152, "loss": 0.1837, "step": 17398 }, { "epoch": 0.030851638195391936, "grad_norm": 0.462890625, "learning_rate": 0.001870895685931811, "loss": 0.2283, "step": 17400 }, { "epoch": 0.03085518436070175, "grad_norm": 0.87890625, "learning_rate": 0.0018708649688870562, "loss": 0.2538, "step": 17402 }, { "epoch": 0.030858730526011565, "grad_norm": 0.6015625, "learning_rate": 0.0018708342484709862, "loss": 0.2414, "step": 17404 }, { "epoch": 0.03086227669132138, "grad_norm": 0.248046875, "learning_rate": 0.001870803524683735, "loss": 0.302, "step": 17406 }, { "epoch": 0.030865822856631198, "grad_norm": 0.478515625, "learning_rate": 0.001870772797525437, "loss": 0.2277, "step": 17408 }, { "epoch": 0.030869369021941012, "grad_norm": 0.54296875, "learning_rate": 0.001870742066996226, "loss": 0.262, "step": 17410 }, { "epoch": 0.030872915187250827, "grad_norm": 0.396484375, "learning_rate": 0.0018707113330962374, "loss": 0.2552, "step": 17412 }, { "epoch": 0.03087646135256064, "grad_norm": 0.84375, "learning_rate": 0.0018706805958256048, "loss": 0.2257, "step": 17414 }, { "epoch": 0.030880007517870456, "grad_norm": 1.34375, "learning_rate": 0.001870649855184463, "loss": 0.3411, "step": 17416 }, { "epoch": 0.03088355368318027, "grad_norm": 0.369140625, "learning_rate": 0.0018706191111729465, "loss": 0.2146, "step": 17418 }, { "epoch": 0.03088709984849009, "grad_norm": 0.322265625, "learning_rate": 0.0018705883637911895, "loss": 0.184, "step": 17420 }, { "epoch": 0.030890646013799903, "grad_norm": 3.03125, "learning_rate": 0.0018705576130393267, "loss": 0.3681, "step": 17422 }, { "epoch": 0.030894192179109718, "grad_norm": 1.1015625, "learning_rate": 0.0018705268589174924, "loss": 0.249, "step": 17424 }, { "epoch": 0.030897738344419532, "grad_norm": 0.1875, "learning_rate": 0.0018704961014258212, "loss": 0.2316, "step": 17426 }, { "epoch": 0.030901284509729347, "grad_norm": 0.79296875, "learning_rate": 0.001870465340564448, "loss": 0.2682, "step": 17428 }, { "epoch": 0.030904830675039165, "grad_norm": 0.609375, "learning_rate": 0.0018704345763335061, "loss": 0.2361, "step": 17430 }, { "epoch": 0.03090837684034898, "grad_norm": 0.65625, "learning_rate": 0.0018704038087331315, "loss": 0.2588, "step": 17432 }, { "epoch": 0.030911923005658794, "grad_norm": 0.734375, "learning_rate": 0.001870373037763458, "loss": 0.2163, "step": 17434 }, { "epoch": 0.03091546917096861, "grad_norm": 0.265625, "learning_rate": 0.0018703422634246205, "loss": 0.2092, "step": 17436 }, { "epoch": 0.030919015336278423, "grad_norm": 0.50390625, "learning_rate": 0.0018703114857167534, "loss": 0.2253, "step": 17438 }, { "epoch": 0.030922561501588237, "grad_norm": 0.640625, "learning_rate": 0.001870280704639991, "loss": 0.3098, "step": 17440 }, { "epoch": 0.030926107666898055, "grad_norm": 0.2294921875, "learning_rate": 0.0018702499201944684, "loss": 0.1965, "step": 17442 }, { "epoch": 0.03092965383220787, "grad_norm": 0.1904296875, "learning_rate": 0.0018702191323803203, "loss": 0.2075, "step": 17444 }, { "epoch": 0.030933199997517685, "grad_norm": 0.451171875, "learning_rate": 0.0018701883411976806, "loss": 0.2383, "step": 17446 }, { "epoch": 0.0309367461628275, "grad_norm": 0.43359375, "learning_rate": 0.001870157546646685, "loss": 0.2165, "step": 17448 }, { "epoch": 0.030940292328137314, "grad_norm": 0.609375, "learning_rate": 0.0018701267487274673, "loss": 0.3288, "step": 17450 }, { "epoch": 0.030943838493447128, "grad_norm": 1.828125, "learning_rate": 0.0018700959474401626, "loss": 0.3086, "step": 17452 }, { "epoch": 0.030947384658756946, "grad_norm": 0.494140625, "learning_rate": 0.0018700651427849056, "loss": 0.2196, "step": 17454 }, { "epoch": 0.03095093082406676, "grad_norm": 5.3125, "learning_rate": 0.001870034334761831, "loss": 0.3595, "step": 17456 }, { "epoch": 0.030954476989376575, "grad_norm": 0.58984375, "learning_rate": 0.0018700035233710735, "loss": 0.2255, "step": 17458 }, { "epoch": 0.03095802315468639, "grad_norm": 0.6015625, "learning_rate": 0.001869972708612768, "loss": 0.1879, "step": 17460 }, { "epoch": 0.030961569319996204, "grad_norm": 1.59375, "learning_rate": 0.001869941890487049, "loss": 0.2213, "step": 17462 }, { "epoch": 0.030965115485306022, "grad_norm": 0.88671875, "learning_rate": 0.0018699110689940513, "loss": 0.177, "step": 17464 }, { "epoch": 0.030968661650615837, "grad_norm": 0.2412109375, "learning_rate": 0.0018698802441339097, "loss": 0.1349, "step": 17466 }, { "epoch": 0.03097220781592565, "grad_norm": 0.322265625, "learning_rate": 0.0018698494159067594, "loss": 0.2248, "step": 17468 }, { "epoch": 0.030975753981235466, "grad_norm": 0.55078125, "learning_rate": 0.0018698185843127348, "loss": 0.1855, "step": 17470 }, { "epoch": 0.03097930014654528, "grad_norm": 0.6015625, "learning_rate": 0.001869787749351971, "loss": 0.1833, "step": 17472 }, { "epoch": 0.030982846311855095, "grad_norm": 0.56640625, "learning_rate": 0.0018697569110246027, "loss": 0.222, "step": 17474 }, { "epoch": 0.030986392477164913, "grad_norm": 0.63671875, "learning_rate": 0.0018697260693307648, "loss": 0.2189, "step": 17476 }, { "epoch": 0.030989938642474728, "grad_norm": 1.4921875, "learning_rate": 0.0018696952242705921, "loss": 0.2365, "step": 17478 }, { "epoch": 0.030993484807784542, "grad_norm": 0.515625, "learning_rate": 0.0018696643758442193, "loss": 0.1738, "step": 17480 }, { "epoch": 0.030997030973094357, "grad_norm": 0.5703125, "learning_rate": 0.001869633524051782, "loss": 0.2623, "step": 17482 }, { "epoch": 0.03100057713840417, "grad_norm": 0.734375, "learning_rate": 0.0018696026688934146, "loss": 0.2507, "step": 17484 }, { "epoch": 0.031004123303713986, "grad_norm": 2.09375, "learning_rate": 0.0018695718103692524, "loss": 0.2423, "step": 17486 }, { "epoch": 0.031007669469023804, "grad_norm": 0.412109375, "learning_rate": 0.0018695409484794296, "loss": 0.2604, "step": 17488 }, { "epoch": 0.03101121563433362, "grad_norm": 0.77734375, "learning_rate": 0.0018695100832240821, "loss": 0.3385, "step": 17490 }, { "epoch": 0.031014761799643433, "grad_norm": 0.455078125, "learning_rate": 0.0018694792146033443, "loss": 0.2156, "step": 17492 }, { "epoch": 0.031018307964953248, "grad_norm": 0.796875, "learning_rate": 0.0018694483426173517, "loss": 0.1883, "step": 17494 }, { "epoch": 0.031021854130263062, "grad_norm": 2.03125, "learning_rate": 0.0018694174672662387, "loss": 0.1886, "step": 17496 }, { "epoch": 0.03102540029557288, "grad_norm": 0.412109375, "learning_rate": 0.0018693865885501407, "loss": 0.2398, "step": 17498 }, { "epoch": 0.031028946460882695, "grad_norm": 0.408203125, "learning_rate": 0.0018693557064691927, "loss": 0.2397, "step": 17500 }, { "epoch": 0.03103249262619251, "grad_norm": 0.4921875, "learning_rate": 0.0018693248210235297, "loss": 0.1971, "step": 17502 }, { "epoch": 0.031036038791502324, "grad_norm": 0.466796875, "learning_rate": 0.0018692939322132868, "loss": 0.2357, "step": 17504 }, { "epoch": 0.03103958495681214, "grad_norm": 1.1953125, "learning_rate": 0.0018692630400385989, "loss": 0.2297, "step": 17506 }, { "epoch": 0.031043131122121953, "grad_norm": 0.77734375, "learning_rate": 0.0018692321444996017, "loss": 0.2307, "step": 17508 }, { "epoch": 0.03104667728743177, "grad_norm": 0.73046875, "learning_rate": 0.0018692012455964295, "loss": 0.1995, "step": 17510 }, { "epoch": 0.031050223452741586, "grad_norm": 0.86328125, "learning_rate": 0.0018691703433292182, "loss": 0.241, "step": 17512 }, { "epoch": 0.0310537696180514, "grad_norm": 0.98828125, "learning_rate": 0.0018691394376981025, "loss": 0.2082, "step": 17514 }, { "epoch": 0.031057315783361215, "grad_norm": 0.53125, "learning_rate": 0.001869108528703218, "loss": 0.218, "step": 17516 }, { "epoch": 0.03106086194867103, "grad_norm": 0.2431640625, "learning_rate": 0.001869077616344699, "loss": 0.1956, "step": 17518 }, { "epoch": 0.031064408113980844, "grad_norm": 1.09375, "learning_rate": 0.0018690467006226813, "loss": 0.4033, "step": 17520 }, { "epoch": 0.031067954279290662, "grad_norm": 0.65234375, "learning_rate": 0.0018690157815373, "loss": 0.2213, "step": 17522 }, { "epoch": 0.031071500444600476, "grad_norm": 1.296875, "learning_rate": 0.0018689848590886908, "loss": 0.1714, "step": 17524 }, { "epoch": 0.03107504660991029, "grad_norm": 0.53125, "learning_rate": 0.001868953933276988, "loss": 0.2766, "step": 17526 }, { "epoch": 0.031078592775220105, "grad_norm": 0.5234375, "learning_rate": 0.001868923004102328, "loss": 0.2182, "step": 17528 }, { "epoch": 0.03108213894052992, "grad_norm": 0.33984375, "learning_rate": 0.001868892071564845, "loss": 0.2309, "step": 17530 }, { "epoch": 0.031085685105839738, "grad_norm": 0.62109375, "learning_rate": 0.0018688611356646745, "loss": 0.198, "step": 17532 }, { "epoch": 0.031089231271149553, "grad_norm": 1.78125, "learning_rate": 0.0018688301964019524, "loss": 0.3087, "step": 17534 }, { "epoch": 0.031092777436459367, "grad_norm": 0.412109375, "learning_rate": 0.0018687992537768136, "loss": 0.2131, "step": 17536 }, { "epoch": 0.03109632360176918, "grad_norm": 3.71875, "learning_rate": 0.0018687683077893932, "loss": 0.5201, "step": 17538 }, { "epoch": 0.031099869767078996, "grad_norm": 0.66015625, "learning_rate": 0.0018687373584398267, "loss": 0.2124, "step": 17540 }, { "epoch": 0.03110341593238881, "grad_norm": 0.3671875, "learning_rate": 0.00186870640572825, "loss": 0.2241, "step": 17542 }, { "epoch": 0.03110696209769863, "grad_norm": 1.2109375, "learning_rate": 0.0018686754496547975, "loss": 0.2391, "step": 17544 }, { "epoch": 0.031110508263008443, "grad_norm": 0.62890625, "learning_rate": 0.0018686444902196054, "loss": 0.2052, "step": 17546 }, { "epoch": 0.031114054428318258, "grad_norm": 0.5, "learning_rate": 0.0018686135274228086, "loss": 0.2078, "step": 17548 }, { "epoch": 0.031117600593628073, "grad_norm": 0.30078125, "learning_rate": 0.0018685825612645428, "loss": 0.2223, "step": 17550 }, { "epoch": 0.031121146758937887, "grad_norm": 1.3046875, "learning_rate": 0.0018685515917449432, "loss": 0.2463, "step": 17552 }, { "epoch": 0.0311246929242477, "grad_norm": 5.96875, "learning_rate": 0.0018685206188641455, "loss": 0.3168, "step": 17554 }, { "epoch": 0.03112823908955752, "grad_norm": 1.8359375, "learning_rate": 0.0018684896426222853, "loss": 0.4675, "step": 17556 }, { "epoch": 0.031131785254867334, "grad_norm": 0.3359375, "learning_rate": 0.0018684586630194973, "loss": 0.2195, "step": 17558 }, { "epoch": 0.03113533142017715, "grad_norm": 2.53125, "learning_rate": 0.0018684276800559174, "loss": 0.3469, "step": 17560 }, { "epoch": 0.031138877585486963, "grad_norm": 0.2236328125, "learning_rate": 0.0018683966937316817, "loss": 0.2229, "step": 17562 }, { "epoch": 0.031142423750796778, "grad_norm": 0.69140625, "learning_rate": 0.0018683657040469248, "loss": 0.1791, "step": 17564 }, { "epoch": 0.031145969916106596, "grad_norm": 0.59765625, "learning_rate": 0.0018683347110017828, "loss": 0.2017, "step": 17566 }, { "epoch": 0.03114951608141641, "grad_norm": 0.36328125, "learning_rate": 0.001868303714596391, "loss": 0.222, "step": 17568 }, { "epoch": 0.031153062246726225, "grad_norm": 1.2109375, "learning_rate": 0.0018682727148308853, "loss": 0.2837, "step": 17570 }, { "epoch": 0.03115660841203604, "grad_norm": 0.7421875, "learning_rate": 0.0018682417117054006, "loss": 0.3787, "step": 17572 }, { "epoch": 0.031160154577345854, "grad_norm": 0.5390625, "learning_rate": 0.0018682107052200732, "loss": 0.2609, "step": 17574 }, { "epoch": 0.03116370074265567, "grad_norm": 0.8046875, "learning_rate": 0.0018681796953750383, "loss": 0.2596, "step": 17576 }, { "epoch": 0.031167246907965487, "grad_norm": 0.7421875, "learning_rate": 0.0018681486821704317, "loss": 0.2354, "step": 17578 }, { "epoch": 0.0311707930732753, "grad_norm": 0.31640625, "learning_rate": 0.0018681176656063887, "loss": 0.173, "step": 17580 }, { "epoch": 0.031174339238585116, "grad_norm": 1.59375, "learning_rate": 0.0018680866456830454, "loss": 0.3466, "step": 17582 }, { "epoch": 0.03117788540389493, "grad_norm": 0.76953125, "learning_rate": 0.0018680556224005371, "loss": 0.2364, "step": 17584 }, { "epoch": 0.031181431569204745, "grad_norm": 0.408203125, "learning_rate": 0.001868024595759, "loss": 0.2513, "step": 17586 }, { "epoch": 0.03118497773451456, "grad_norm": 0.9921875, "learning_rate": 0.0018679935657585694, "loss": 0.2596, "step": 17588 }, { "epoch": 0.031188523899824377, "grad_norm": 3.140625, "learning_rate": 0.001867962532399381, "loss": 0.3123, "step": 17590 }, { "epoch": 0.031192070065134192, "grad_norm": 0.5703125, "learning_rate": 0.0018679314956815702, "loss": 0.2948, "step": 17592 }, { "epoch": 0.031195616230444007, "grad_norm": 0.416015625, "learning_rate": 0.0018679004556052734, "loss": 0.275, "step": 17594 }, { "epoch": 0.03119916239575382, "grad_norm": 0.34765625, "learning_rate": 0.001867869412170626, "loss": 0.1929, "step": 17596 }, { "epoch": 0.031202708561063636, "grad_norm": 1.1953125, "learning_rate": 0.001867838365377764, "loss": 0.2254, "step": 17598 }, { "epoch": 0.031206254726373454, "grad_norm": 0.5390625, "learning_rate": 0.0018678073152268227, "loss": 0.2186, "step": 17600 }, { "epoch": 0.031209800891683268, "grad_norm": 0.65625, "learning_rate": 0.0018677762617179384, "loss": 0.2129, "step": 17602 }, { "epoch": 0.031213347056993083, "grad_norm": 0.337890625, "learning_rate": 0.0018677452048512465, "loss": 0.2509, "step": 17604 }, { "epoch": 0.031216893222302897, "grad_norm": 0.390625, "learning_rate": 0.0018677141446268833, "loss": 0.246, "step": 17606 }, { "epoch": 0.031220439387612712, "grad_norm": 0.466796875, "learning_rate": 0.0018676830810449843, "loss": 0.195, "step": 17608 }, { "epoch": 0.031223985552922526, "grad_norm": 0.64453125, "learning_rate": 0.0018676520141056854, "loss": 0.1648, "step": 17610 }, { "epoch": 0.031227531718232344, "grad_norm": 0.609375, "learning_rate": 0.0018676209438091226, "loss": 0.2633, "step": 17612 }, { "epoch": 0.03123107788354216, "grad_norm": 1.515625, "learning_rate": 0.0018675898701554314, "loss": 0.2349, "step": 17614 }, { "epoch": 0.031234624048851974, "grad_norm": 0.5859375, "learning_rate": 0.0018675587931447479, "loss": 0.1571, "step": 17616 }, { "epoch": 0.031238170214161788, "grad_norm": 4.125, "learning_rate": 0.0018675277127772085, "loss": 0.2977, "step": 17618 }, { "epoch": 0.031241716379471603, "grad_norm": 1.25, "learning_rate": 0.0018674966290529488, "loss": 0.2097, "step": 17620 }, { "epoch": 0.031245262544781417, "grad_norm": 0.73828125, "learning_rate": 0.0018674655419721044, "loss": 0.2524, "step": 17622 }, { "epoch": 0.031248808710091235, "grad_norm": 0.625, "learning_rate": 0.0018674344515348111, "loss": 0.1837, "step": 17624 }, { "epoch": 0.031252354875401046, "grad_norm": 0.44921875, "learning_rate": 0.001867403357741206, "loss": 0.1841, "step": 17626 }, { "epoch": 0.03125590104071086, "grad_norm": 2.171875, "learning_rate": 0.001867372260591424, "loss": 0.1978, "step": 17628 }, { "epoch": 0.03125944720602068, "grad_norm": 2.421875, "learning_rate": 0.0018673411600856017, "loss": 0.2488, "step": 17630 }, { "epoch": 0.0312629933713305, "grad_norm": 1.5546875, "learning_rate": 0.0018673100562238746, "loss": 0.3018, "step": 17632 }, { "epoch": 0.03126653953664031, "grad_norm": 0.84765625, "learning_rate": 0.0018672789490063793, "loss": 0.3209, "step": 17634 }, { "epoch": 0.031270085701950126, "grad_norm": 0.52734375, "learning_rate": 0.0018672478384332513, "loss": 0.2762, "step": 17636 }, { "epoch": 0.03127363186725994, "grad_norm": 0.490234375, "learning_rate": 0.0018672167245046272, "loss": 0.2803, "step": 17638 }, { "epoch": 0.031277178032569755, "grad_norm": 0.388671875, "learning_rate": 0.0018671856072206426, "loss": 0.179, "step": 17640 }, { "epoch": 0.03128072419787957, "grad_norm": 0.48828125, "learning_rate": 0.0018671544865814338, "loss": 0.2225, "step": 17642 }, { "epoch": 0.031284270363189384, "grad_norm": 2.796875, "learning_rate": 0.0018671233625871368, "loss": 0.2085, "step": 17644 }, { "epoch": 0.0312878165284992, "grad_norm": 5.25, "learning_rate": 0.0018670922352378883, "loss": 0.5095, "step": 17646 }, { "epoch": 0.03129136269380901, "grad_norm": 0.48828125, "learning_rate": 0.0018670611045338236, "loss": 0.2, "step": 17648 }, { "epoch": 0.03129490885911883, "grad_norm": 0.3125, "learning_rate": 0.0018670299704750789, "loss": 0.322, "step": 17650 }, { "epoch": 0.03129845502442865, "grad_norm": 0.5390625, "learning_rate": 0.0018669988330617911, "loss": 0.2108, "step": 17652 }, { "epoch": 0.031302001189738464, "grad_norm": 0.81640625, "learning_rate": 0.001866967692294096, "loss": 0.2359, "step": 17654 }, { "epoch": 0.03130554735504828, "grad_norm": 2.109375, "learning_rate": 0.0018669365481721292, "loss": 0.2707, "step": 17656 }, { "epoch": 0.03130909352035809, "grad_norm": 0.244140625, "learning_rate": 0.0018669054006960279, "loss": 0.1762, "step": 17658 }, { "epoch": 0.03131263968566791, "grad_norm": 0.71484375, "learning_rate": 0.0018668742498659276, "loss": 0.2128, "step": 17660 }, { "epoch": 0.03131618585097772, "grad_norm": 0.75390625, "learning_rate": 0.0018668430956819647, "loss": 0.2031, "step": 17662 }, { "epoch": 0.03131973201628754, "grad_norm": 0.65234375, "learning_rate": 0.001866811938144276, "loss": 0.2487, "step": 17664 }, { "epoch": 0.03132327818159735, "grad_norm": 1.46875, "learning_rate": 0.001866780777252997, "loss": 0.2409, "step": 17666 }, { "epoch": 0.031326824346907166, "grad_norm": 0.423828125, "learning_rate": 0.0018667496130082642, "loss": 0.2776, "step": 17668 }, { "epoch": 0.03133037051221698, "grad_norm": 0.34375, "learning_rate": 0.001866718445410214, "loss": 0.1541, "step": 17670 }, { "epoch": 0.031333916677526795, "grad_norm": 0.8515625, "learning_rate": 0.0018666872744589826, "loss": 0.2181, "step": 17672 }, { "epoch": 0.031337462842836616, "grad_norm": 0.51171875, "learning_rate": 0.0018666561001547068, "loss": 0.3044, "step": 17674 }, { "epoch": 0.03134100900814643, "grad_norm": 0.439453125, "learning_rate": 0.001866624922497522, "loss": 0.1773, "step": 17676 }, { "epoch": 0.031344555173456246, "grad_norm": 0.50390625, "learning_rate": 0.0018665937414875655, "loss": 0.2183, "step": 17678 }, { "epoch": 0.03134810133876606, "grad_norm": 0.48046875, "learning_rate": 0.001866562557124973, "loss": 0.2307, "step": 17680 }, { "epoch": 0.031351647504075875, "grad_norm": 0.9296875, "learning_rate": 0.0018665313694098814, "loss": 0.2377, "step": 17682 }, { "epoch": 0.03135519366938569, "grad_norm": 0.9296875, "learning_rate": 0.0018665001783424267, "loss": 0.2033, "step": 17684 }, { "epoch": 0.031358739834695504, "grad_norm": 1.0234375, "learning_rate": 0.0018664689839227454, "loss": 0.1996, "step": 17686 }, { "epoch": 0.03136228600000532, "grad_norm": 0.3671875, "learning_rate": 0.0018664377861509743, "loss": 0.1965, "step": 17688 }, { "epoch": 0.03136583216531513, "grad_norm": 0.94921875, "learning_rate": 0.0018664065850272492, "loss": 0.1761, "step": 17690 }, { "epoch": 0.03136937833062495, "grad_norm": 0.42578125, "learning_rate": 0.0018663753805517069, "loss": 0.5204, "step": 17692 }, { "epoch": 0.03137292449593476, "grad_norm": 1.5, "learning_rate": 0.001866344172724484, "loss": 0.397, "step": 17694 }, { "epoch": 0.031376470661244577, "grad_norm": 0.36328125, "learning_rate": 0.0018663129615457167, "loss": 0.1731, "step": 17696 }, { "epoch": 0.0313800168265544, "grad_norm": 0.6640625, "learning_rate": 0.0018662817470155415, "loss": 0.3383, "step": 17698 }, { "epoch": 0.03138356299186421, "grad_norm": 1.0, "learning_rate": 0.0018662505291340951, "loss": 0.3073, "step": 17700 }, { "epoch": 0.03138710915717403, "grad_norm": 0.40234375, "learning_rate": 0.0018662193079015143, "loss": 0.2507, "step": 17702 }, { "epoch": 0.03139065532248384, "grad_norm": 0.54296875, "learning_rate": 0.0018661880833179352, "loss": 0.1776, "step": 17704 }, { "epoch": 0.031394201487793656, "grad_norm": 1.9296875, "learning_rate": 0.0018661568553834944, "loss": 0.3084, "step": 17706 }, { "epoch": 0.03139774765310347, "grad_norm": 0.703125, "learning_rate": 0.0018661256240983285, "loss": 0.2354, "step": 17708 }, { "epoch": 0.031401293818413285, "grad_norm": 0.75390625, "learning_rate": 0.0018660943894625744, "loss": 0.1604, "step": 17710 }, { "epoch": 0.0314048399837231, "grad_norm": 1.4296875, "learning_rate": 0.0018660631514763682, "loss": 0.2698, "step": 17712 }, { "epoch": 0.031408386149032914, "grad_norm": 1.328125, "learning_rate": 0.001866031910139847, "loss": 0.2641, "step": 17714 }, { "epoch": 0.03141193231434273, "grad_norm": 0.88671875, "learning_rate": 0.001866000665453147, "loss": 0.1925, "step": 17716 }, { "epoch": 0.031415478479652544, "grad_norm": 0.82421875, "learning_rate": 0.0018659694174164047, "loss": 0.1866, "step": 17718 }, { "epoch": 0.031419024644962365, "grad_norm": 0.435546875, "learning_rate": 0.0018659381660297576, "loss": 0.296, "step": 17720 }, { "epoch": 0.03142257081027218, "grad_norm": 1.7109375, "learning_rate": 0.0018659069112933416, "loss": 0.2878, "step": 17722 }, { "epoch": 0.031426116975581994, "grad_norm": 0.62890625, "learning_rate": 0.0018658756532072937, "loss": 0.1713, "step": 17724 }, { "epoch": 0.03142966314089181, "grad_norm": 1.5703125, "learning_rate": 0.0018658443917717506, "loss": 0.2247, "step": 17726 }, { "epoch": 0.03143320930620162, "grad_norm": 0.412109375, "learning_rate": 0.0018658131269868492, "loss": 0.3071, "step": 17728 }, { "epoch": 0.03143675547151144, "grad_norm": 0.376953125, "learning_rate": 0.0018657818588527259, "loss": 0.2512, "step": 17730 }, { "epoch": 0.03144030163682125, "grad_norm": 0.67578125, "learning_rate": 0.0018657505873695174, "loss": 0.1854, "step": 17732 }, { "epoch": 0.03144384780213107, "grad_norm": 0.71875, "learning_rate": 0.0018657193125373608, "loss": 0.2104, "step": 17734 }, { "epoch": 0.03144739396744088, "grad_norm": 1.203125, "learning_rate": 0.0018656880343563924, "loss": 0.3567, "step": 17736 }, { "epoch": 0.031450940132750696, "grad_norm": 2.765625, "learning_rate": 0.0018656567528267498, "loss": 0.4753, "step": 17738 }, { "epoch": 0.03145448629806051, "grad_norm": 0.462890625, "learning_rate": 0.0018656254679485689, "loss": 0.2237, "step": 17740 }, { "epoch": 0.03145803246337033, "grad_norm": 0.470703125, "learning_rate": 0.001865594179721987, "loss": 0.2699, "step": 17742 }, { "epoch": 0.03146157862868015, "grad_norm": 0.5, "learning_rate": 0.0018655628881471412, "loss": 0.1918, "step": 17744 }, { "epoch": 0.03146512479398996, "grad_norm": 1.9140625, "learning_rate": 0.0018655315932241678, "loss": 0.2191, "step": 17746 }, { "epoch": 0.031468670959299776, "grad_norm": 0.35546875, "learning_rate": 0.0018655002949532035, "loss": 0.2002, "step": 17748 }, { "epoch": 0.03147221712460959, "grad_norm": 18.375, "learning_rate": 0.001865468993334386, "loss": 0.4105, "step": 17750 }, { "epoch": 0.031475763289919405, "grad_norm": 2.640625, "learning_rate": 0.0018654376883678516, "loss": 0.3898, "step": 17752 }, { "epoch": 0.03147930945522922, "grad_norm": 1.8984375, "learning_rate": 0.0018654063800537378, "loss": 0.2361, "step": 17754 }, { "epoch": 0.031482855620539034, "grad_norm": 0.56640625, "learning_rate": 0.0018653750683921804, "loss": 0.2057, "step": 17756 }, { "epoch": 0.03148640178584885, "grad_norm": 0.498046875, "learning_rate": 0.0018653437533833177, "loss": 0.2252, "step": 17758 }, { "epoch": 0.03148994795115866, "grad_norm": 0.62890625, "learning_rate": 0.0018653124350272857, "loss": 0.1853, "step": 17760 }, { "epoch": 0.03149349411646848, "grad_norm": 0.9375, "learning_rate": 0.0018652811133242216, "loss": 0.2732, "step": 17762 }, { "epoch": 0.03149704028177829, "grad_norm": 1.0546875, "learning_rate": 0.0018652497882742625, "loss": 0.1769, "step": 17764 }, { "epoch": 0.031500586447088114, "grad_norm": 0.51953125, "learning_rate": 0.0018652184598775452, "loss": 0.2036, "step": 17766 }, { "epoch": 0.03150413261239793, "grad_norm": 1.1796875, "learning_rate": 0.001865187128134207, "loss": 0.2111, "step": 17768 }, { "epoch": 0.03150767877770774, "grad_norm": 0.34375, "learning_rate": 0.0018651557930443846, "loss": 0.2134, "step": 17770 }, { "epoch": 0.03151122494301756, "grad_norm": 0.4609375, "learning_rate": 0.0018651244546082153, "loss": 0.1905, "step": 17772 }, { "epoch": 0.03151477110832737, "grad_norm": 0.63671875, "learning_rate": 0.001865093112825836, "loss": 0.2593, "step": 17774 }, { "epoch": 0.031518317273637186, "grad_norm": 1.3203125, "learning_rate": 0.0018650617676973843, "loss": 0.2784, "step": 17776 }, { "epoch": 0.031521863438947, "grad_norm": 7.53125, "learning_rate": 0.0018650304192229963, "loss": 0.1985, "step": 17778 }, { "epoch": 0.031525409604256815, "grad_norm": 1.25, "learning_rate": 0.00186499906740281, "loss": 0.2204, "step": 17780 }, { "epoch": 0.03152895576956663, "grad_norm": 0.94921875, "learning_rate": 0.0018649677122369616, "loss": 0.1602, "step": 17782 }, { "epoch": 0.031532501934876445, "grad_norm": 0.408203125, "learning_rate": 0.0018649363537255892, "loss": 0.2226, "step": 17784 }, { "epoch": 0.03153604810018626, "grad_norm": 1.078125, "learning_rate": 0.0018649049918688294, "loss": 0.2038, "step": 17786 }, { "epoch": 0.03153959426549608, "grad_norm": 0.55078125, "learning_rate": 0.0018648736266668197, "loss": 0.1976, "step": 17788 }, { "epoch": 0.031543140430805895, "grad_norm": 0.8046875, "learning_rate": 0.0018648422581196966, "loss": 0.2489, "step": 17790 }, { "epoch": 0.03154668659611571, "grad_norm": 0.361328125, "learning_rate": 0.001864810886227598, "loss": 0.1839, "step": 17792 }, { "epoch": 0.031550232761425524, "grad_norm": 0.310546875, "learning_rate": 0.001864779510990661, "loss": 0.1903, "step": 17794 }, { "epoch": 0.03155377892673534, "grad_norm": 0.24609375, "learning_rate": 0.0018647481324090226, "loss": 0.2307, "step": 17796 }, { "epoch": 0.03155732509204515, "grad_norm": 0.58984375, "learning_rate": 0.00186471675048282, "loss": 0.2603, "step": 17798 }, { "epoch": 0.03156087125735497, "grad_norm": 0.3359375, "learning_rate": 0.0018646853652121904, "loss": 0.2546, "step": 17800 }, { "epoch": 0.03156441742266478, "grad_norm": 0.86328125, "learning_rate": 0.0018646539765972711, "loss": 0.4379, "step": 17802 }, { "epoch": 0.0315679635879746, "grad_norm": 1.3828125, "learning_rate": 0.0018646225846381999, "loss": 0.2227, "step": 17804 }, { "epoch": 0.03157150975328441, "grad_norm": 0.70703125, "learning_rate": 0.0018645911893351134, "loss": 0.3412, "step": 17806 }, { "epoch": 0.031575055918594226, "grad_norm": 2.59375, "learning_rate": 0.0018645597906881493, "loss": 0.336, "step": 17808 }, { "epoch": 0.03157860208390405, "grad_norm": 0.9765625, "learning_rate": 0.0018645283886974447, "loss": 0.3767, "step": 17810 }, { "epoch": 0.03158214824921386, "grad_norm": 0.515625, "learning_rate": 0.0018644969833631368, "loss": 0.1747, "step": 17812 }, { "epoch": 0.03158569441452368, "grad_norm": 0.5703125, "learning_rate": 0.0018644655746853636, "loss": 0.1672, "step": 17814 }, { "epoch": 0.03158924057983349, "grad_norm": 0.98828125, "learning_rate": 0.0018644341626642617, "loss": 0.1931, "step": 17816 }, { "epoch": 0.031592786745143306, "grad_norm": 0.53515625, "learning_rate": 0.001864402747299969, "loss": 0.2153, "step": 17818 }, { "epoch": 0.03159633291045312, "grad_norm": 0.421875, "learning_rate": 0.0018643713285926228, "loss": 0.1417, "step": 17820 }, { "epoch": 0.031599879075762935, "grad_norm": 0.58984375, "learning_rate": 0.00186433990654236, "loss": 0.3583, "step": 17822 }, { "epoch": 0.03160342524107275, "grad_norm": 0.482421875, "learning_rate": 0.0018643084811493188, "loss": 0.2388, "step": 17824 }, { "epoch": 0.031606971406382564, "grad_norm": 1.625, "learning_rate": 0.001864277052413636, "loss": 0.3, "step": 17826 }, { "epoch": 0.03161051757169238, "grad_norm": 1.6875, "learning_rate": 0.0018642456203354495, "loss": 0.2398, "step": 17828 }, { "epoch": 0.03161406373700219, "grad_norm": 0.63671875, "learning_rate": 0.0018642141849148967, "loss": 0.2565, "step": 17830 }, { "epoch": 0.03161760990231201, "grad_norm": 0.240234375, "learning_rate": 0.0018641827461521145, "loss": 0.206, "step": 17832 }, { "epoch": 0.03162115606762183, "grad_norm": 0.341796875, "learning_rate": 0.0018641513040472412, "loss": 0.2475, "step": 17834 }, { "epoch": 0.031624702232931644, "grad_norm": 0.2294921875, "learning_rate": 0.001864119858600414, "loss": 0.2042, "step": 17836 }, { "epoch": 0.03162824839824146, "grad_norm": 0.408203125, "learning_rate": 0.0018640884098117703, "loss": 0.1757, "step": 17838 }, { "epoch": 0.03163179456355127, "grad_norm": 0.5625, "learning_rate": 0.0018640569576814476, "loss": 0.2236, "step": 17840 }, { "epoch": 0.03163534072886109, "grad_norm": 0.8359375, "learning_rate": 0.0018640255022095839, "loss": 0.534, "step": 17842 }, { "epoch": 0.0316388868941709, "grad_norm": 0.2236328125, "learning_rate": 0.0018639940433963164, "loss": 0.2065, "step": 17844 }, { "epoch": 0.03164243305948072, "grad_norm": 0.419921875, "learning_rate": 0.0018639625812417824, "loss": 0.239, "step": 17846 }, { "epoch": 0.03164597922479053, "grad_norm": 0.416015625, "learning_rate": 0.0018639311157461202, "loss": 0.1978, "step": 17848 }, { "epoch": 0.031649525390100346, "grad_norm": 0.41796875, "learning_rate": 0.001863899646909467, "loss": 0.2182, "step": 17850 }, { "epoch": 0.03165307155541016, "grad_norm": 0.458984375, "learning_rate": 0.00186386817473196, "loss": 0.214, "step": 17852 }, { "epoch": 0.031656617720719975, "grad_norm": 0.302734375, "learning_rate": 0.001863836699213738, "loss": 0.228, "step": 17854 }, { "epoch": 0.031660163886029796, "grad_norm": 0.408203125, "learning_rate": 0.0018638052203549375, "loss": 0.1656, "step": 17856 }, { "epoch": 0.03166371005133961, "grad_norm": 0.41015625, "learning_rate": 0.0018637737381556967, "loss": 0.2155, "step": 17858 }, { "epoch": 0.031667256216649425, "grad_norm": 0.37109375, "learning_rate": 0.0018637422526161533, "loss": 0.2515, "step": 17860 }, { "epoch": 0.03167080238195924, "grad_norm": 2.171875, "learning_rate": 0.001863710763736445, "loss": 0.3484, "step": 17862 }, { "epoch": 0.031674348547269054, "grad_norm": 0.306640625, "learning_rate": 0.0018636792715167095, "loss": 0.2331, "step": 17864 }, { "epoch": 0.03167789471257887, "grad_norm": 0.341796875, "learning_rate": 0.001863647775957084, "loss": 0.3115, "step": 17866 }, { "epoch": 0.031681440877888684, "grad_norm": 0.3125, "learning_rate": 0.0018636162770577069, "loss": 0.1633, "step": 17868 }, { "epoch": 0.0316849870431985, "grad_norm": 0.61328125, "learning_rate": 0.001863584774818716, "loss": 0.2658, "step": 17870 }, { "epoch": 0.03168853320850831, "grad_norm": 0.44921875, "learning_rate": 0.0018635532692402488, "loss": 0.218, "step": 17872 }, { "epoch": 0.03169207937381813, "grad_norm": 2.140625, "learning_rate": 0.001863521760322443, "loss": 0.2217, "step": 17874 }, { "epoch": 0.03169562553912794, "grad_norm": 0.6015625, "learning_rate": 0.0018634902480654364, "loss": 0.2117, "step": 17876 }, { "epoch": 0.03169917170443776, "grad_norm": 0.87890625, "learning_rate": 0.0018634587324693672, "loss": 0.1975, "step": 17878 }, { "epoch": 0.03170271786974758, "grad_norm": 0.546875, "learning_rate": 0.0018634272135343733, "loss": 0.3673, "step": 17880 }, { "epoch": 0.03170626403505739, "grad_norm": 0.53125, "learning_rate": 0.0018633956912605916, "loss": 0.4124, "step": 17882 }, { "epoch": 0.03170981020036721, "grad_norm": 1.6796875, "learning_rate": 0.0018633641656481607, "loss": 0.23, "step": 17884 }, { "epoch": 0.03171335636567702, "grad_norm": 0.75390625, "learning_rate": 0.0018633326366972185, "loss": 0.2893, "step": 17886 }, { "epoch": 0.031716902530986836, "grad_norm": 0.58984375, "learning_rate": 0.0018633011044079026, "loss": 0.1991, "step": 17888 }, { "epoch": 0.03172044869629665, "grad_norm": 0.322265625, "learning_rate": 0.0018632695687803512, "loss": 0.2246, "step": 17890 }, { "epoch": 0.031723994861606465, "grad_norm": 1.109375, "learning_rate": 0.001863238029814702, "loss": 0.2162, "step": 17892 }, { "epoch": 0.03172754102691628, "grad_norm": 2.71875, "learning_rate": 0.001863206487511093, "loss": 0.3298, "step": 17894 }, { "epoch": 0.031731087192226094, "grad_norm": 0.6875, "learning_rate": 0.0018631749418696621, "loss": 0.2567, "step": 17896 }, { "epoch": 0.03173463335753591, "grad_norm": 0.49609375, "learning_rate": 0.0018631433928905476, "loss": 0.2203, "step": 17898 }, { "epoch": 0.03173817952284572, "grad_norm": 0.55078125, "learning_rate": 0.0018631118405738867, "loss": 0.2427, "step": 17900 }, { "epoch": 0.031741725688155545, "grad_norm": 0.33203125, "learning_rate": 0.0018630802849198182, "loss": 0.1782, "step": 17902 }, { "epoch": 0.03174527185346536, "grad_norm": 0.5859375, "learning_rate": 0.0018630487259284798, "loss": 0.3068, "step": 17904 }, { "epoch": 0.031748818018775174, "grad_norm": 0.361328125, "learning_rate": 0.0018630171636000094, "loss": 0.3974, "step": 17906 }, { "epoch": 0.03175236418408499, "grad_norm": 0.400390625, "learning_rate": 0.0018629855979345451, "loss": 0.23, "step": 17908 }, { "epoch": 0.0317559103493948, "grad_norm": 0.328125, "learning_rate": 0.0018629540289322249, "loss": 0.2461, "step": 17910 }, { "epoch": 0.03175945651470462, "grad_norm": 0.984375, "learning_rate": 0.0018629224565931872, "loss": 0.2578, "step": 17912 }, { "epoch": 0.03176300268001443, "grad_norm": 0.6796875, "learning_rate": 0.0018628908809175697, "loss": 0.1649, "step": 17914 }, { "epoch": 0.03176654884532425, "grad_norm": 1.4453125, "learning_rate": 0.0018628593019055107, "loss": 0.2234, "step": 17916 }, { "epoch": 0.03177009501063406, "grad_norm": 0.51171875, "learning_rate": 0.001862827719557148, "loss": 0.2182, "step": 17918 }, { "epoch": 0.031773641175943876, "grad_norm": 0.32421875, "learning_rate": 0.0018627961338726204, "loss": 0.1624, "step": 17920 }, { "epoch": 0.03177718734125369, "grad_norm": 0.73046875, "learning_rate": 0.0018627645448520654, "loss": 0.2471, "step": 17922 }, { "epoch": 0.03178073350656351, "grad_norm": 0.455078125, "learning_rate": 0.0018627329524956212, "loss": 0.2871, "step": 17924 }, { "epoch": 0.031784279671873326, "grad_norm": 0.400390625, "learning_rate": 0.0018627013568034262, "loss": 0.197, "step": 17926 }, { "epoch": 0.03178782583718314, "grad_norm": 0.470703125, "learning_rate": 0.0018626697577756185, "loss": 0.2584, "step": 17928 }, { "epoch": 0.031791372002492956, "grad_norm": 0.86328125, "learning_rate": 0.0018626381554123362, "loss": 0.3106, "step": 17930 }, { "epoch": 0.03179491816780277, "grad_norm": 0.58984375, "learning_rate": 0.0018626065497137176, "loss": 0.2439, "step": 17932 }, { "epoch": 0.031798464333112585, "grad_norm": 0.5859375, "learning_rate": 0.001862574940679901, "loss": 0.2669, "step": 17934 }, { "epoch": 0.0318020104984224, "grad_norm": 0.5625, "learning_rate": 0.0018625433283110248, "loss": 0.1524, "step": 17936 }, { "epoch": 0.031805556663732214, "grad_norm": 0.73828125, "learning_rate": 0.0018625117126072266, "loss": 0.2604, "step": 17938 }, { "epoch": 0.03180910282904203, "grad_norm": 1.875, "learning_rate": 0.0018624800935686454, "loss": 0.283, "step": 17940 }, { "epoch": 0.03181264899435184, "grad_norm": 0.419921875, "learning_rate": 0.0018624484711954191, "loss": 0.2197, "step": 17942 }, { "epoch": 0.03181619515966166, "grad_norm": 0.62109375, "learning_rate": 0.001862416845487686, "loss": 0.2309, "step": 17944 }, { "epoch": 0.03181974132497148, "grad_norm": 0.859375, "learning_rate": 0.0018623852164455844, "loss": 0.2321, "step": 17946 }, { "epoch": 0.03182328749028129, "grad_norm": 0.828125, "learning_rate": 0.001862353584069253, "loss": 0.2239, "step": 17948 }, { "epoch": 0.03182683365559111, "grad_norm": 0.71484375, "learning_rate": 0.0018623219483588295, "loss": 0.1956, "step": 17950 }, { "epoch": 0.03183037982090092, "grad_norm": 1.078125, "learning_rate": 0.0018622903093144527, "loss": 0.2365, "step": 17952 }, { "epoch": 0.03183392598621074, "grad_norm": 0.36328125, "learning_rate": 0.001862258666936261, "loss": 0.1912, "step": 17954 }, { "epoch": 0.03183747215152055, "grad_norm": 0.31640625, "learning_rate": 0.0018622270212243928, "loss": 0.2004, "step": 17956 }, { "epoch": 0.031841018316830366, "grad_norm": 0.349609375, "learning_rate": 0.001862195372178986, "loss": 0.2406, "step": 17958 }, { "epoch": 0.03184456448214018, "grad_norm": 0.26171875, "learning_rate": 0.0018621637198001798, "loss": 0.2245, "step": 17960 }, { "epoch": 0.031848110647449995, "grad_norm": 0.4921875, "learning_rate": 0.0018621320640881116, "loss": 0.4642, "step": 17962 }, { "epoch": 0.03185165681275981, "grad_norm": 0.34765625, "learning_rate": 0.001862100405042921, "loss": 0.1687, "step": 17964 }, { "epoch": 0.031855202978069624, "grad_norm": 0.9609375, "learning_rate": 0.0018620687426647458, "loss": 0.2321, "step": 17966 }, { "epoch": 0.03185874914337944, "grad_norm": 0.2451171875, "learning_rate": 0.0018620370769537244, "loss": 0.2305, "step": 17968 }, { "epoch": 0.03186229530868926, "grad_norm": 0.431640625, "learning_rate": 0.0018620054079099956, "loss": 0.2585, "step": 17970 }, { "epoch": 0.031865841473999075, "grad_norm": 0.79296875, "learning_rate": 0.0018619737355336978, "loss": 0.1928, "step": 17972 }, { "epoch": 0.03186938763930889, "grad_norm": 0.384765625, "learning_rate": 0.0018619420598249695, "loss": 0.2071, "step": 17974 }, { "epoch": 0.031872933804618704, "grad_norm": 0.6171875, "learning_rate": 0.0018619103807839491, "loss": 0.2396, "step": 17976 }, { "epoch": 0.03187647996992852, "grad_norm": 0.375, "learning_rate": 0.0018618786984107751, "loss": 0.1904, "step": 17978 }, { "epoch": 0.03188002613523833, "grad_norm": 0.28515625, "learning_rate": 0.0018618470127055868, "loss": 0.2272, "step": 17980 }, { "epoch": 0.03188357230054815, "grad_norm": 0.59375, "learning_rate": 0.0018618153236685218, "loss": 0.1658, "step": 17982 }, { "epoch": 0.03188711846585796, "grad_norm": 0.5625, "learning_rate": 0.0018617836312997192, "loss": 0.1885, "step": 17984 }, { "epoch": 0.03189066463116778, "grad_norm": 0.6484375, "learning_rate": 0.0018617519355993172, "loss": 0.2103, "step": 17986 }, { "epoch": 0.03189421079647759, "grad_norm": 0.234375, "learning_rate": 0.001861720236567455, "loss": 0.2018, "step": 17988 }, { "epoch": 0.031897756961787406, "grad_norm": 0.4140625, "learning_rate": 0.001861688534204271, "loss": 0.1625, "step": 17990 }, { "epoch": 0.03190130312709723, "grad_norm": 2.90625, "learning_rate": 0.0018616568285099036, "loss": 0.2244, "step": 17992 }, { "epoch": 0.03190484929240704, "grad_norm": 0.35546875, "learning_rate": 0.001861625119484492, "loss": 0.2451, "step": 17994 }, { "epoch": 0.03190839545771686, "grad_norm": 0.392578125, "learning_rate": 0.0018615934071281742, "loss": 0.2537, "step": 17996 }, { "epoch": 0.03191194162302667, "grad_norm": 0.35546875, "learning_rate": 0.0018615616914410891, "loss": 0.21, "step": 17998 }, { "epoch": 0.031915487788336486, "grad_norm": 0.7734375, "learning_rate": 0.0018615299724233756, "loss": 0.4146, "step": 18000 }, { "epoch": 0.0319190339536463, "grad_norm": 0.30859375, "learning_rate": 0.0018614982500751724, "loss": 0.2046, "step": 18002 }, { "epoch": 0.031922580118956115, "grad_norm": 1.71875, "learning_rate": 0.0018614665243966183, "loss": 0.5474, "step": 18004 }, { "epoch": 0.03192612628426593, "grad_norm": 0.380859375, "learning_rate": 0.0018614347953878519, "loss": 0.1986, "step": 18006 }, { "epoch": 0.031929672449575744, "grad_norm": 0.337890625, "learning_rate": 0.0018614030630490122, "loss": 0.1813, "step": 18008 }, { "epoch": 0.03193321861488556, "grad_norm": 0.7890625, "learning_rate": 0.0018613713273802374, "loss": 0.2413, "step": 18010 }, { "epoch": 0.03193676478019537, "grad_norm": 0.2158203125, "learning_rate": 0.0018613395883816668, "loss": 0.1492, "step": 18012 }, { "epoch": 0.031940310945505195, "grad_norm": 0.46875, "learning_rate": 0.001861307846053439, "loss": 0.1784, "step": 18014 }, { "epoch": 0.03194385711081501, "grad_norm": 0.337890625, "learning_rate": 0.0018612761003956933, "loss": 0.2061, "step": 18016 }, { "epoch": 0.031947403276124824, "grad_norm": 0.9765625, "learning_rate": 0.0018612443514085678, "loss": 0.2748, "step": 18018 }, { "epoch": 0.03195094944143464, "grad_norm": 0.7890625, "learning_rate": 0.0018612125990922017, "loss": 0.2316, "step": 18020 }, { "epoch": 0.03195449560674445, "grad_norm": 0.70703125, "learning_rate": 0.001861180843446734, "loss": 0.2314, "step": 18022 }, { "epoch": 0.03195804177205427, "grad_norm": 1.3515625, "learning_rate": 0.0018611490844723032, "loss": 0.2928, "step": 18024 }, { "epoch": 0.03196158793736408, "grad_norm": 0.59765625, "learning_rate": 0.0018611173221690484, "loss": 0.2028, "step": 18026 }, { "epoch": 0.031965134102673896, "grad_norm": 0.6484375, "learning_rate": 0.0018610855565371087, "loss": 0.2538, "step": 18028 }, { "epoch": 0.03196868026798371, "grad_norm": 0.26171875, "learning_rate": 0.0018610537875766232, "loss": 0.1566, "step": 18030 }, { "epoch": 0.031972226433293525, "grad_norm": 0.5546875, "learning_rate": 0.0018610220152877302, "loss": 0.2096, "step": 18032 }, { "epoch": 0.03197577259860334, "grad_norm": 0.298828125, "learning_rate": 0.0018609902396705688, "loss": 0.1914, "step": 18034 }, { "epoch": 0.031979318763913155, "grad_norm": 0.78515625, "learning_rate": 0.0018609584607252783, "loss": 0.2491, "step": 18036 }, { "epoch": 0.031982864929222976, "grad_norm": 0.30078125, "learning_rate": 0.0018609266784519977, "loss": 0.2483, "step": 18038 }, { "epoch": 0.03198641109453279, "grad_norm": 0.55078125, "learning_rate": 0.0018608948928508654, "loss": 0.2743, "step": 18040 }, { "epoch": 0.031989957259842605, "grad_norm": 1.078125, "learning_rate": 0.0018608631039220209, "loss": 0.2122, "step": 18042 }, { "epoch": 0.03199350342515242, "grad_norm": 0.2578125, "learning_rate": 0.0018608313116656033, "loss": 0.2182, "step": 18044 }, { "epoch": 0.031997049590462234, "grad_norm": 1.2109375, "learning_rate": 0.0018607995160817516, "loss": 0.239, "step": 18046 }, { "epoch": 0.03200059575577205, "grad_norm": 0.6484375, "learning_rate": 0.0018607677171706045, "loss": 0.4483, "step": 18048 }, { "epoch": 0.03200414192108186, "grad_norm": 0.8359375, "learning_rate": 0.0018607359149323012, "loss": 0.2102, "step": 18050 }, { "epoch": 0.03200768808639168, "grad_norm": 0.4921875, "learning_rate": 0.001860704109366981, "loss": 0.2599, "step": 18052 }, { "epoch": 0.03201123425170149, "grad_norm": 0.41796875, "learning_rate": 0.0018606723004747831, "loss": 0.2488, "step": 18054 }, { "epoch": 0.03201478041701131, "grad_norm": 1.015625, "learning_rate": 0.0018606404882558466, "loss": 0.2863, "step": 18056 }, { "epoch": 0.03201832658232112, "grad_norm": 1.7890625, "learning_rate": 0.00186060867271031, "loss": 0.2543, "step": 18058 }, { "epoch": 0.03202187274763094, "grad_norm": 0.455078125, "learning_rate": 0.001860576853838313, "loss": 0.2351, "step": 18060 }, { "epoch": 0.03202541891294076, "grad_norm": 1.2734375, "learning_rate": 0.0018605450316399948, "loss": 0.2415, "step": 18062 }, { "epoch": 0.03202896507825057, "grad_norm": 0.302734375, "learning_rate": 0.001860513206115494, "loss": 0.253, "step": 18064 }, { "epoch": 0.03203251124356039, "grad_norm": 0.44921875, "learning_rate": 0.0018604813772649504, "loss": 0.2276, "step": 18066 }, { "epoch": 0.0320360574088702, "grad_norm": 0.2578125, "learning_rate": 0.0018604495450885033, "loss": 0.2334, "step": 18068 }, { "epoch": 0.032039603574180016, "grad_norm": 0.2421875, "learning_rate": 0.0018604177095862913, "loss": 0.1755, "step": 18070 }, { "epoch": 0.03204314973948983, "grad_norm": 1.1484375, "learning_rate": 0.001860385870758454, "loss": 0.5021, "step": 18072 }, { "epoch": 0.032046695904799645, "grad_norm": 0.41015625, "learning_rate": 0.0018603540286051307, "loss": 0.1723, "step": 18074 }, { "epoch": 0.03205024207010946, "grad_norm": 0.32421875, "learning_rate": 0.0018603221831264604, "loss": 0.1575, "step": 18076 }, { "epoch": 0.032053788235419274, "grad_norm": 0.58984375, "learning_rate": 0.0018602903343225825, "loss": 0.2388, "step": 18078 }, { "epoch": 0.03205733440072909, "grad_norm": 0.359375, "learning_rate": 0.0018602584821936362, "loss": 0.2637, "step": 18080 }, { "epoch": 0.03206088056603891, "grad_norm": 2.453125, "learning_rate": 0.0018602266267397612, "loss": 0.3091, "step": 18082 }, { "epoch": 0.032064426731348725, "grad_norm": 0.396484375, "learning_rate": 0.0018601947679610963, "loss": 0.2268, "step": 18084 }, { "epoch": 0.03206797289665854, "grad_norm": 0.353515625, "learning_rate": 0.0018601629058577812, "loss": 0.2248, "step": 18086 }, { "epoch": 0.032071519061968354, "grad_norm": 1.8125, "learning_rate": 0.0018601310404299553, "loss": 0.2685, "step": 18088 }, { "epoch": 0.03207506522727817, "grad_norm": 0.34765625, "learning_rate": 0.0018600991716777571, "loss": 0.1575, "step": 18090 }, { "epoch": 0.03207861139258798, "grad_norm": 0.609375, "learning_rate": 0.001860067299601327, "loss": 0.287, "step": 18092 }, { "epoch": 0.0320821575578978, "grad_norm": 1.4609375, "learning_rate": 0.0018600354242008044, "loss": 0.2244, "step": 18094 }, { "epoch": 0.03208570372320761, "grad_norm": 0.3828125, "learning_rate": 0.0018600035454763278, "loss": 0.1829, "step": 18096 }, { "epoch": 0.03208924988851743, "grad_norm": 1.1640625, "learning_rate": 0.0018599716634280377, "loss": 0.328, "step": 18098 }, { "epoch": 0.03209279605382724, "grad_norm": 2.390625, "learning_rate": 0.0018599397780560726, "loss": 0.3586, "step": 18100 }, { "epoch": 0.032096342219137056, "grad_norm": 0.2431640625, "learning_rate": 0.0018599078893605722, "loss": 0.132, "step": 18102 }, { "epoch": 0.03209988838444687, "grad_norm": 2.546875, "learning_rate": 0.0018598759973416762, "loss": 0.3367, "step": 18104 }, { "epoch": 0.03210343454975669, "grad_norm": 0.474609375, "learning_rate": 0.0018598441019995243, "loss": 0.2589, "step": 18106 }, { "epoch": 0.032106980715066506, "grad_norm": 0.2451171875, "learning_rate": 0.0018598122033342554, "loss": 0.4878, "step": 18108 }, { "epoch": 0.03211052688037632, "grad_norm": 0.435546875, "learning_rate": 0.0018597803013460092, "loss": 0.205, "step": 18110 }, { "epoch": 0.032114073045686135, "grad_norm": 0.330078125, "learning_rate": 0.0018597483960349258, "loss": 0.2288, "step": 18112 }, { "epoch": 0.03211761921099595, "grad_norm": 0.58203125, "learning_rate": 0.0018597164874011437, "loss": 0.1929, "step": 18114 }, { "epoch": 0.032121165376305764, "grad_norm": 0.322265625, "learning_rate": 0.001859684575444803, "loss": 0.22, "step": 18116 }, { "epoch": 0.03212471154161558, "grad_norm": 3.140625, "learning_rate": 0.0018596526601660432, "loss": 0.2775, "step": 18118 }, { "epoch": 0.032128257706925394, "grad_norm": 0.98046875, "learning_rate": 0.0018596207415650043, "loss": 0.2005, "step": 18120 }, { "epoch": 0.03213180387223521, "grad_norm": 0.453125, "learning_rate": 0.0018595888196418255, "loss": 0.3944, "step": 18122 }, { "epoch": 0.03213535003754502, "grad_norm": 3.71875, "learning_rate": 0.0018595568943966458, "loss": 0.2905, "step": 18124 }, { "epoch": 0.03213889620285484, "grad_norm": 3.28125, "learning_rate": 0.0018595249658296062, "loss": 0.6198, "step": 18126 }, { "epoch": 0.03214244236816466, "grad_norm": 0.28515625, "learning_rate": 0.0018594930339408454, "loss": 0.1551, "step": 18128 }, { "epoch": 0.03214598853347447, "grad_norm": 0.59765625, "learning_rate": 0.001859461098730503, "loss": 0.2012, "step": 18130 }, { "epoch": 0.03214953469878429, "grad_norm": 0.265625, "learning_rate": 0.001859429160198719, "loss": 0.3668, "step": 18132 }, { "epoch": 0.0321530808640941, "grad_norm": 1.046875, "learning_rate": 0.0018593972183456329, "loss": 0.373, "step": 18134 }, { "epoch": 0.03215662702940392, "grad_norm": 0.5078125, "learning_rate": 0.0018593652731713846, "loss": 0.2481, "step": 18136 }, { "epoch": 0.03216017319471373, "grad_norm": 0.38671875, "learning_rate": 0.0018593333246761138, "loss": 0.3061, "step": 18138 }, { "epoch": 0.032163719360023546, "grad_norm": 0.73046875, "learning_rate": 0.00185930137285996, "loss": 0.1828, "step": 18140 }, { "epoch": 0.03216726552533336, "grad_norm": 0.66796875, "learning_rate": 0.001859269417723063, "loss": 0.1925, "step": 18142 }, { "epoch": 0.032170811690643175, "grad_norm": 2.5, "learning_rate": 0.0018592374592655626, "loss": 0.3239, "step": 18144 }, { "epoch": 0.03217435785595299, "grad_norm": 0.54296875, "learning_rate": 0.0018592054974875985, "loss": 0.187, "step": 18146 }, { "epoch": 0.032177904021262804, "grad_norm": 0.58203125, "learning_rate": 0.0018591735323893106, "loss": 0.2462, "step": 18148 }, { "epoch": 0.032181450186572626, "grad_norm": 0.7890625, "learning_rate": 0.0018591415639708385, "loss": 0.1938, "step": 18150 }, { "epoch": 0.03218499635188244, "grad_norm": 0.41015625, "learning_rate": 0.0018591095922323227, "loss": 0.1855, "step": 18152 }, { "epoch": 0.032188542517192255, "grad_norm": 0.52734375, "learning_rate": 0.001859077617173902, "loss": 0.2045, "step": 18154 }, { "epoch": 0.03219208868250207, "grad_norm": 0.87890625, "learning_rate": 0.001859045638795717, "loss": 0.1768, "step": 18156 }, { "epoch": 0.032195634847811884, "grad_norm": 0.484375, "learning_rate": 0.001859013657097907, "loss": 0.2209, "step": 18158 }, { "epoch": 0.0321991810131217, "grad_norm": 0.64453125, "learning_rate": 0.0018589816720806123, "loss": 0.1719, "step": 18160 }, { "epoch": 0.03220272717843151, "grad_norm": 2.125, "learning_rate": 0.0018589496837439728, "loss": 0.2348, "step": 18162 }, { "epoch": 0.03220627334374133, "grad_norm": 7.96875, "learning_rate": 0.001858917692088128, "loss": 0.2563, "step": 18164 }, { "epoch": 0.03220981950905114, "grad_norm": 0.66796875, "learning_rate": 0.001858885697113218, "loss": 0.2144, "step": 18166 }, { "epoch": 0.03221336567436096, "grad_norm": 0.63671875, "learning_rate": 0.001858853698819383, "loss": 0.1818, "step": 18168 }, { "epoch": 0.03221691183967077, "grad_norm": 0.73828125, "learning_rate": 0.0018588216972067625, "loss": 0.2706, "step": 18170 }, { "epoch": 0.032220458004980586, "grad_norm": 0.6953125, "learning_rate": 0.0018587896922754967, "loss": 0.1309, "step": 18172 }, { "epoch": 0.03222400417029041, "grad_norm": 0.341796875, "learning_rate": 0.0018587576840257255, "loss": 0.1866, "step": 18174 }, { "epoch": 0.03222755033560022, "grad_norm": 0.33984375, "learning_rate": 0.0018587256724575895, "loss": 0.2312, "step": 18176 }, { "epoch": 0.032231096500910036, "grad_norm": 0.68359375, "learning_rate": 0.0018586936575712275, "loss": 0.2164, "step": 18178 }, { "epoch": 0.03223464266621985, "grad_norm": 0.80078125, "learning_rate": 0.0018586616393667804, "loss": 0.22, "step": 18180 }, { "epoch": 0.032238188831529666, "grad_norm": 0.3203125, "learning_rate": 0.0018586296178443877, "loss": 0.1834, "step": 18182 }, { "epoch": 0.03224173499683948, "grad_norm": 0.55859375, "learning_rate": 0.0018585975930041898, "loss": 0.2287, "step": 18184 }, { "epoch": 0.032245281162149295, "grad_norm": 0.482421875, "learning_rate": 0.0018585655648463268, "loss": 0.261, "step": 18186 }, { "epoch": 0.03224882732745911, "grad_norm": 0.50390625, "learning_rate": 0.0018585335333709389, "loss": 0.2358, "step": 18188 }, { "epoch": 0.032252373492768924, "grad_norm": 0.451171875, "learning_rate": 0.0018585014985781653, "loss": 0.1816, "step": 18190 }, { "epoch": 0.03225591965807874, "grad_norm": 0.65234375, "learning_rate": 0.0018584694604681473, "loss": 0.1735, "step": 18192 }, { "epoch": 0.03225946582338855, "grad_norm": 0.28125, "learning_rate": 0.0018584374190410242, "loss": 0.159, "step": 18194 }, { "epoch": 0.032263011988698374, "grad_norm": 0.8515625, "learning_rate": 0.0018584053742969364, "loss": 0.235, "step": 18196 }, { "epoch": 0.03226655815400819, "grad_norm": 1.1484375, "learning_rate": 0.0018583733262360243, "loss": 0.1609, "step": 18198 }, { "epoch": 0.032270104319318, "grad_norm": 0.609375, "learning_rate": 0.0018583412748584274, "loss": 0.1851, "step": 18200 }, { "epoch": 0.03227365048462782, "grad_norm": 0.2578125, "learning_rate": 0.0018583092201642864, "loss": 0.23, "step": 18202 }, { "epoch": 0.03227719664993763, "grad_norm": 1.265625, "learning_rate": 0.0018582771621537414, "loss": 0.1818, "step": 18204 }, { "epoch": 0.03228074281524745, "grad_norm": 0.5546875, "learning_rate": 0.0018582451008269326, "loss": 0.2168, "step": 18206 }, { "epoch": 0.03228428898055726, "grad_norm": 0.45703125, "learning_rate": 0.0018582130361839999, "loss": 0.2498, "step": 18208 }, { "epoch": 0.032287835145867076, "grad_norm": 0.60546875, "learning_rate": 0.0018581809682250842, "loss": 0.2346, "step": 18210 }, { "epoch": 0.03229138131117689, "grad_norm": 0.244140625, "learning_rate": 0.001858148896950325, "loss": 0.2292, "step": 18212 }, { "epoch": 0.032294927476486705, "grad_norm": 1.0625, "learning_rate": 0.0018581168223598633, "loss": 0.2996, "step": 18214 }, { "epoch": 0.03229847364179652, "grad_norm": 0.3203125, "learning_rate": 0.0018580847444538385, "loss": 0.1784, "step": 18216 }, { "epoch": 0.03230201980710634, "grad_norm": 0.294921875, "learning_rate": 0.0018580526632323915, "loss": 0.2367, "step": 18218 }, { "epoch": 0.032305565972416156, "grad_norm": 0.482421875, "learning_rate": 0.0018580205786956627, "loss": 0.4424, "step": 18220 }, { "epoch": 0.03230911213772597, "grad_norm": 12.9375, "learning_rate": 0.0018579884908437921, "loss": 0.3309, "step": 18222 }, { "epoch": 0.032312658303035785, "grad_norm": 0.275390625, "learning_rate": 0.00185795639967692, "loss": 0.2179, "step": 18224 }, { "epoch": 0.0323162044683456, "grad_norm": 0.34375, "learning_rate": 0.0018579243051951872, "loss": 0.2933, "step": 18226 }, { "epoch": 0.032319750633655414, "grad_norm": 0.294921875, "learning_rate": 0.0018578922073987335, "loss": 0.2038, "step": 18228 }, { "epoch": 0.03232329679896523, "grad_norm": 0.1826171875, "learning_rate": 0.0018578601062876996, "loss": 0.163, "step": 18230 }, { "epoch": 0.03232684296427504, "grad_norm": 0.7890625, "learning_rate": 0.0018578280018622256, "loss": 0.2317, "step": 18232 }, { "epoch": 0.03233038912958486, "grad_norm": 0.26953125, "learning_rate": 0.0018577958941224525, "loss": 0.1973, "step": 18234 }, { "epoch": 0.03233393529489467, "grad_norm": 0.353515625, "learning_rate": 0.0018577637830685199, "loss": 0.1978, "step": 18236 }, { "epoch": 0.03233748146020449, "grad_norm": 1.1875, "learning_rate": 0.001857731668700569, "loss": 0.4513, "step": 18238 }, { "epoch": 0.0323410276255143, "grad_norm": 2.140625, "learning_rate": 0.0018576995510187397, "loss": 0.3938, "step": 18240 }, { "epoch": 0.03234457379082412, "grad_norm": 0.486328125, "learning_rate": 0.001857667430023173, "loss": 0.1946, "step": 18242 }, { "epoch": 0.03234811995613394, "grad_norm": 0.36328125, "learning_rate": 0.0018576353057140089, "loss": 0.2113, "step": 18244 }, { "epoch": 0.03235166612144375, "grad_norm": 2.90625, "learning_rate": 0.001857603178091388, "loss": 0.2205, "step": 18246 }, { "epoch": 0.03235521228675357, "grad_norm": 0.61328125, "learning_rate": 0.001857571047155451, "loss": 0.2187, "step": 18248 }, { "epoch": 0.03235875845206338, "grad_norm": 0.375, "learning_rate": 0.001857538912906338, "loss": 0.1649, "step": 18250 }, { "epoch": 0.032362304617373196, "grad_norm": 0.69921875, "learning_rate": 0.0018575067753441904, "loss": 0.2236, "step": 18252 }, { "epoch": 0.03236585078268301, "grad_norm": 0.96875, "learning_rate": 0.0018574746344691476, "loss": 0.2026, "step": 18254 }, { "epoch": 0.032369396947992825, "grad_norm": 0.8046875, "learning_rate": 0.0018574424902813508, "loss": 0.7105, "step": 18256 }, { "epoch": 0.03237294311330264, "grad_norm": 2.125, "learning_rate": 0.0018574103427809409, "loss": 0.3104, "step": 18258 }, { "epoch": 0.032376489278612454, "grad_norm": 0.609375, "learning_rate": 0.0018573781919680576, "loss": 0.1761, "step": 18260 }, { "epoch": 0.03238003544392227, "grad_norm": 0.83984375, "learning_rate": 0.0018573460378428423, "loss": 0.3704, "step": 18262 }, { "epoch": 0.03238358160923209, "grad_norm": 0.60546875, "learning_rate": 0.0018573138804054354, "loss": 0.2325, "step": 18264 }, { "epoch": 0.032387127774541905, "grad_norm": 0.287109375, "learning_rate": 0.0018572817196559775, "loss": 0.1435, "step": 18266 }, { "epoch": 0.03239067393985172, "grad_norm": 0.38671875, "learning_rate": 0.0018572495555946092, "loss": 0.2591, "step": 18268 }, { "epoch": 0.032394220105161534, "grad_norm": 0.6484375, "learning_rate": 0.0018572173882214714, "loss": 0.2314, "step": 18270 }, { "epoch": 0.03239776627047135, "grad_norm": 1.2109375, "learning_rate": 0.0018571852175367043, "loss": 0.2122, "step": 18272 }, { "epoch": 0.03240131243578116, "grad_norm": 0.78125, "learning_rate": 0.001857153043540449, "loss": 0.2188, "step": 18274 }, { "epoch": 0.03240485860109098, "grad_norm": 0.353515625, "learning_rate": 0.0018571208662328457, "loss": 0.2736, "step": 18276 }, { "epoch": 0.03240840476640079, "grad_norm": 0.46484375, "learning_rate": 0.001857088685614036, "loss": 0.1802, "step": 18278 }, { "epoch": 0.032411950931710606, "grad_norm": 1.0390625, "learning_rate": 0.0018570565016841603, "loss": 0.2233, "step": 18280 }, { "epoch": 0.03241549709702042, "grad_norm": 0.29296875, "learning_rate": 0.0018570243144433588, "loss": 0.1998, "step": 18282 }, { "epoch": 0.032419043262330235, "grad_norm": 0.3046875, "learning_rate": 0.001856992123891773, "loss": 0.2229, "step": 18284 }, { "epoch": 0.03242258942764006, "grad_norm": 0.44140625, "learning_rate": 0.0018569599300295432, "loss": 0.1703, "step": 18286 }, { "epoch": 0.03242613559294987, "grad_norm": 0.56640625, "learning_rate": 0.0018569277328568101, "loss": 0.2164, "step": 18288 }, { "epoch": 0.032429681758259686, "grad_norm": 0.6328125, "learning_rate": 0.0018568955323737153, "loss": 0.2132, "step": 18290 }, { "epoch": 0.0324332279235695, "grad_norm": 0.453125, "learning_rate": 0.0018568633285803988, "loss": 0.2348, "step": 18292 }, { "epoch": 0.032436774088879315, "grad_norm": 0.251953125, "learning_rate": 0.0018568311214770018, "loss": 0.2406, "step": 18294 }, { "epoch": 0.03244032025418913, "grad_norm": 0.353515625, "learning_rate": 0.001856798911063665, "loss": 0.1664, "step": 18296 }, { "epoch": 0.032443866419498944, "grad_norm": 1.1171875, "learning_rate": 0.0018567666973405294, "loss": 0.2333, "step": 18298 }, { "epoch": 0.03244741258480876, "grad_norm": 0.8984375, "learning_rate": 0.001856734480307736, "loss": 0.2664, "step": 18300 }, { "epoch": 0.03245095875011857, "grad_norm": 0.357421875, "learning_rate": 0.0018567022599654253, "loss": 0.2371, "step": 18302 }, { "epoch": 0.03245450491542839, "grad_norm": 0.97265625, "learning_rate": 0.0018566700363137388, "loss": 0.2988, "step": 18304 }, { "epoch": 0.0324580510807382, "grad_norm": 0.453125, "learning_rate": 0.0018566378093528167, "loss": 0.2135, "step": 18306 }, { "epoch": 0.03246159724604802, "grad_norm": 0.435546875, "learning_rate": 0.0018566055790828005, "loss": 0.4595, "step": 18308 }, { "epoch": 0.03246514341135784, "grad_norm": 1.46875, "learning_rate": 0.0018565733455038309, "loss": 0.2645, "step": 18310 }, { "epoch": 0.03246868957666765, "grad_norm": 1.109375, "learning_rate": 0.0018565411086160493, "loss": 0.2987, "step": 18312 }, { "epoch": 0.03247223574197747, "grad_norm": 0.419921875, "learning_rate": 0.0018565088684195958, "loss": 0.1973, "step": 18314 }, { "epoch": 0.03247578190728728, "grad_norm": 0.875, "learning_rate": 0.0018564766249146124, "loss": 0.2218, "step": 18316 }, { "epoch": 0.0324793280725971, "grad_norm": 2.984375, "learning_rate": 0.0018564443781012392, "loss": 0.1939, "step": 18318 }, { "epoch": 0.03248287423790691, "grad_norm": 0.29296875, "learning_rate": 0.0018564121279796182, "loss": 0.1703, "step": 18320 }, { "epoch": 0.032486420403216726, "grad_norm": 0.5859375, "learning_rate": 0.0018563798745498896, "loss": 0.2094, "step": 18322 }, { "epoch": 0.03248996656852654, "grad_norm": 1.265625, "learning_rate": 0.001856347617812195, "loss": 0.3317, "step": 18324 }, { "epoch": 0.032493512733836355, "grad_norm": 1.9453125, "learning_rate": 0.001856315357766675, "loss": 0.3003, "step": 18326 }, { "epoch": 0.03249705889914617, "grad_norm": 0.609375, "learning_rate": 0.0018562830944134712, "loss": 0.2795, "step": 18328 }, { "epoch": 0.032500605064455984, "grad_norm": 0.78125, "learning_rate": 0.0018562508277527243, "loss": 0.1912, "step": 18330 }, { "epoch": 0.032504151229765806, "grad_norm": 6.28125, "learning_rate": 0.0018562185577845757, "loss": 0.3172, "step": 18332 }, { "epoch": 0.03250769739507562, "grad_norm": 1.640625, "learning_rate": 0.0018561862845091661, "loss": 0.2308, "step": 18334 }, { "epoch": 0.032511243560385435, "grad_norm": 1.0703125, "learning_rate": 0.0018561540079266373, "loss": 0.2428, "step": 18336 }, { "epoch": 0.03251478972569525, "grad_norm": 0.828125, "learning_rate": 0.0018561217280371298, "loss": 0.6595, "step": 18338 }, { "epoch": 0.032518335891005064, "grad_norm": 0.2578125, "learning_rate": 0.0018560894448407853, "loss": 0.4818, "step": 18340 }, { "epoch": 0.03252188205631488, "grad_norm": 1.1796875, "learning_rate": 0.0018560571583377445, "loss": 0.2607, "step": 18342 }, { "epoch": 0.03252542822162469, "grad_norm": 4.25, "learning_rate": 0.001856024868528149, "loss": 0.4292, "step": 18344 }, { "epoch": 0.03252897438693451, "grad_norm": 0.62890625, "learning_rate": 0.0018559925754121401, "loss": 0.2462, "step": 18346 }, { "epoch": 0.03253252055224432, "grad_norm": 0.296875, "learning_rate": 0.0018559602789898584, "loss": 0.1767, "step": 18348 }, { "epoch": 0.03253606671755414, "grad_norm": 0.283203125, "learning_rate": 0.0018559279792614459, "loss": 0.2148, "step": 18350 }, { "epoch": 0.03253961288286395, "grad_norm": 0.609375, "learning_rate": 0.001855895676227043, "loss": 0.2486, "step": 18352 }, { "epoch": 0.03254315904817377, "grad_norm": 0.703125, "learning_rate": 0.0018558633698867921, "loss": 0.2073, "step": 18354 }, { "epoch": 0.03254670521348359, "grad_norm": 0.828125, "learning_rate": 0.0018558310602408334, "loss": 0.4217, "step": 18356 }, { "epoch": 0.0325502513787934, "grad_norm": 1.6171875, "learning_rate": 0.0018557987472893093, "loss": 0.2194, "step": 18358 }, { "epoch": 0.032553797544103216, "grad_norm": 0.482421875, "learning_rate": 0.0018557664310323598, "loss": 0.2198, "step": 18360 }, { "epoch": 0.03255734370941303, "grad_norm": 0.66796875, "learning_rate": 0.0018557341114701273, "loss": 0.2548, "step": 18362 }, { "epoch": 0.032560889874722845, "grad_norm": 0.90625, "learning_rate": 0.0018557017886027527, "loss": 0.3193, "step": 18364 }, { "epoch": 0.03256443604003266, "grad_norm": 1.0703125, "learning_rate": 0.0018556694624303777, "loss": 0.2035, "step": 18366 }, { "epoch": 0.032567982205342474, "grad_norm": 1.859375, "learning_rate": 0.001855637132953143, "loss": 0.2947, "step": 18368 }, { "epoch": 0.03257152837065229, "grad_norm": 0.412109375, "learning_rate": 0.0018556048001711907, "loss": 0.2424, "step": 18370 }, { "epoch": 0.032575074535962104, "grad_norm": 0.224609375, "learning_rate": 0.0018555724640846618, "loss": 0.3273, "step": 18372 }, { "epoch": 0.03257862070127192, "grad_norm": 0.35546875, "learning_rate": 0.0018555401246936978, "loss": 0.224, "step": 18374 }, { "epoch": 0.03258216686658173, "grad_norm": 0.28125, "learning_rate": 0.0018555077819984403, "loss": 0.1635, "step": 18376 }, { "epoch": 0.032585713031891554, "grad_norm": 0.38671875, "learning_rate": 0.0018554754359990302, "loss": 0.1918, "step": 18378 }, { "epoch": 0.03258925919720137, "grad_norm": 0.275390625, "learning_rate": 0.0018554430866956097, "loss": 0.2205, "step": 18380 }, { "epoch": 0.03259280536251118, "grad_norm": 0.205078125, "learning_rate": 0.0018554107340883201, "loss": 0.222, "step": 18382 }, { "epoch": 0.032596351527821, "grad_norm": 2.625, "learning_rate": 0.0018553783781773026, "loss": 0.2785, "step": 18384 }, { "epoch": 0.03259989769313081, "grad_norm": 0.51171875, "learning_rate": 0.0018553460189626987, "loss": 0.2235, "step": 18386 }, { "epoch": 0.03260344385844063, "grad_norm": 0.35546875, "learning_rate": 0.0018553136564446503, "loss": 0.2468, "step": 18388 }, { "epoch": 0.03260699002375044, "grad_norm": 1.4140625, "learning_rate": 0.0018552812906232985, "loss": 0.3162, "step": 18390 }, { "epoch": 0.032610536189060256, "grad_norm": 1.125, "learning_rate": 0.0018552489214987851, "loss": 0.2348, "step": 18392 }, { "epoch": 0.03261408235437007, "grad_norm": 0.244140625, "learning_rate": 0.0018552165490712518, "loss": 0.1933, "step": 18394 }, { "epoch": 0.032617628519679885, "grad_norm": 1.3046875, "learning_rate": 0.00185518417334084, "loss": 0.1257, "step": 18396 }, { "epoch": 0.0326211746849897, "grad_norm": 0.59375, "learning_rate": 0.001855151794307691, "loss": 0.1953, "step": 18398 }, { "epoch": 0.03262472085029952, "grad_norm": 0.275390625, "learning_rate": 0.0018551194119719467, "loss": 0.2064, "step": 18400 }, { "epoch": 0.032628267015609336, "grad_norm": 0.6484375, "learning_rate": 0.0018550870263337493, "loss": 0.2442, "step": 18402 }, { "epoch": 0.03263181318091915, "grad_norm": 0.380859375, "learning_rate": 0.0018550546373932392, "loss": 0.261, "step": 18404 }, { "epoch": 0.032635359346228965, "grad_norm": 0.66015625, "learning_rate": 0.001855022245150559, "loss": 0.2579, "step": 18406 }, { "epoch": 0.03263890551153878, "grad_norm": 0.6484375, "learning_rate": 0.0018549898496058497, "loss": 0.1617, "step": 18408 }, { "epoch": 0.032642451676848594, "grad_norm": 0.9921875, "learning_rate": 0.0018549574507592537, "loss": 0.3228, "step": 18410 }, { "epoch": 0.03264599784215841, "grad_norm": 0.2353515625, "learning_rate": 0.0018549250486109123, "loss": 0.2052, "step": 18412 }, { "epoch": 0.03264954400746822, "grad_norm": 0.4921875, "learning_rate": 0.0018548926431609673, "loss": 0.2159, "step": 18414 }, { "epoch": 0.03265309017277804, "grad_norm": 0.37109375, "learning_rate": 0.0018548602344095604, "loss": 0.1717, "step": 18416 }, { "epoch": 0.03265663633808785, "grad_norm": 0.76953125, "learning_rate": 0.0018548278223568333, "loss": 0.169, "step": 18418 }, { "epoch": 0.03266018250339767, "grad_norm": 0.5390625, "learning_rate": 0.0018547954070029277, "loss": 0.1941, "step": 18420 }, { "epoch": 0.03266372866870749, "grad_norm": 0.5078125, "learning_rate": 0.0018547629883479853, "loss": 0.2066, "step": 18422 }, { "epoch": 0.0326672748340173, "grad_norm": 2.0, "learning_rate": 0.0018547305663921478, "loss": 0.3647, "step": 18424 }, { "epoch": 0.03267082099932712, "grad_norm": 2.671875, "learning_rate": 0.001854698141135558, "loss": 0.359, "step": 18426 }, { "epoch": 0.03267436716463693, "grad_norm": 0.5, "learning_rate": 0.0018546657125783563, "loss": 0.1781, "step": 18428 }, { "epoch": 0.032677913329946746, "grad_norm": 0.40234375, "learning_rate": 0.0018546332807206853, "loss": 0.198, "step": 18430 }, { "epoch": 0.03268145949525656, "grad_norm": 0.546875, "learning_rate": 0.0018546008455626866, "loss": 0.1844, "step": 18432 }, { "epoch": 0.032685005660566376, "grad_norm": 0.435546875, "learning_rate": 0.001854568407104502, "loss": 0.285, "step": 18434 }, { "epoch": 0.03268855182587619, "grad_norm": 1.2109375, "learning_rate": 0.0018545359653462737, "loss": 0.3232, "step": 18436 }, { "epoch": 0.032692097991186005, "grad_norm": 1.65625, "learning_rate": 0.0018545035202881435, "loss": 0.354, "step": 18438 }, { "epoch": 0.03269564415649582, "grad_norm": 0.90234375, "learning_rate": 0.0018544710719302529, "loss": 0.2312, "step": 18440 }, { "epoch": 0.032699190321805634, "grad_norm": 0.326171875, "learning_rate": 0.0018544386202727441, "loss": 0.2175, "step": 18442 }, { "epoch": 0.03270273648711545, "grad_norm": 0.34765625, "learning_rate": 0.0018544061653157592, "loss": 0.1953, "step": 18444 }, { "epoch": 0.03270628265242527, "grad_norm": 0.51171875, "learning_rate": 0.0018543737070594397, "loss": 0.2682, "step": 18446 }, { "epoch": 0.032709828817735084, "grad_norm": 2.421875, "learning_rate": 0.001854341245503928, "loss": 0.221, "step": 18448 }, { "epoch": 0.0327133749830449, "grad_norm": 0.5390625, "learning_rate": 0.0018543087806493657, "loss": 0.2892, "step": 18450 }, { "epoch": 0.03271692114835471, "grad_norm": 1.8203125, "learning_rate": 0.001854276312495895, "loss": 0.5521, "step": 18452 }, { "epoch": 0.03272046731366453, "grad_norm": 0.56640625, "learning_rate": 0.0018542438410436579, "loss": 0.4132, "step": 18454 }, { "epoch": 0.03272401347897434, "grad_norm": 1.296875, "learning_rate": 0.0018542113662927961, "loss": 0.1982, "step": 18456 }, { "epoch": 0.03272755964428416, "grad_norm": 0.8046875, "learning_rate": 0.0018541788882434523, "loss": 0.2705, "step": 18458 }, { "epoch": 0.03273110580959397, "grad_norm": 0.1591796875, "learning_rate": 0.001854146406895768, "loss": 0.1707, "step": 18460 }, { "epoch": 0.032734651974903786, "grad_norm": 2.015625, "learning_rate": 0.0018541139222498853, "loss": 0.3587, "step": 18462 }, { "epoch": 0.0327381981402136, "grad_norm": 0.2373046875, "learning_rate": 0.0018540814343059465, "loss": 0.2082, "step": 18464 }, { "epoch": 0.032741744305523415, "grad_norm": 0.376953125, "learning_rate": 0.0018540489430640932, "loss": 0.1517, "step": 18466 }, { "epoch": 0.03274529047083324, "grad_norm": 0.53515625, "learning_rate": 0.0018540164485244682, "loss": 0.2393, "step": 18468 }, { "epoch": 0.03274883663614305, "grad_norm": 0.96484375, "learning_rate": 0.001853983950687213, "loss": 0.1924, "step": 18470 }, { "epoch": 0.032752382801452866, "grad_norm": 0.453125, "learning_rate": 0.00185395144955247, "loss": 0.1957, "step": 18472 }, { "epoch": 0.03275592896676268, "grad_norm": 0.40625, "learning_rate": 0.0018539189451203815, "loss": 0.2412, "step": 18474 }, { "epoch": 0.032759475132072495, "grad_norm": 0.63671875, "learning_rate": 0.0018538864373910892, "loss": 0.353, "step": 18476 }, { "epoch": 0.03276302129738231, "grad_norm": 0.462890625, "learning_rate": 0.0018538539263647355, "loss": 0.3638, "step": 18478 }, { "epoch": 0.032766567462692124, "grad_norm": 1.0625, "learning_rate": 0.0018538214120414629, "loss": 0.2229, "step": 18480 }, { "epoch": 0.03277011362800194, "grad_norm": 0.828125, "learning_rate": 0.0018537888944214131, "loss": 0.2285, "step": 18482 }, { "epoch": 0.03277365979331175, "grad_norm": 0.65234375, "learning_rate": 0.0018537563735047287, "loss": 0.2865, "step": 18484 }, { "epoch": 0.03277720595862157, "grad_norm": 0.443359375, "learning_rate": 0.0018537238492915516, "loss": 0.1738, "step": 18486 }, { "epoch": 0.03278075212393138, "grad_norm": 0.6484375, "learning_rate": 0.0018536913217820242, "loss": 0.223, "step": 18488 }, { "epoch": 0.032784298289241204, "grad_norm": 0.279296875, "learning_rate": 0.0018536587909762888, "loss": 0.2491, "step": 18490 }, { "epoch": 0.03278784445455102, "grad_norm": 0.337890625, "learning_rate": 0.0018536262568744878, "loss": 0.3263, "step": 18492 }, { "epoch": 0.03279139061986083, "grad_norm": 0.5078125, "learning_rate": 0.001853593719476763, "loss": 0.2398, "step": 18494 }, { "epoch": 0.03279493678517065, "grad_norm": 1.3984375, "learning_rate": 0.0018535611787832572, "loss": 0.2213, "step": 18496 }, { "epoch": 0.03279848295048046, "grad_norm": 0.4140625, "learning_rate": 0.0018535286347941126, "loss": 0.2515, "step": 18498 }, { "epoch": 0.03280202911579028, "grad_norm": 0.4765625, "learning_rate": 0.001853496087509471, "loss": 0.3427, "step": 18500 }, { "epoch": 0.03280557528110009, "grad_norm": 0.28515625, "learning_rate": 0.0018534635369294756, "loss": 0.2382, "step": 18502 }, { "epoch": 0.032809121446409906, "grad_norm": 0.43359375, "learning_rate": 0.001853430983054268, "loss": 0.1691, "step": 18504 }, { "epoch": 0.03281266761171972, "grad_norm": 0.2734375, "learning_rate": 0.0018533984258839915, "loss": 0.1911, "step": 18506 }, { "epoch": 0.032816213777029535, "grad_norm": 1.1953125, "learning_rate": 0.0018533658654187877, "loss": 0.2575, "step": 18508 }, { "epoch": 0.03281975994233935, "grad_norm": 0.1611328125, "learning_rate": 0.0018533333016587993, "loss": 0.1656, "step": 18510 }, { "epoch": 0.032823306107649164, "grad_norm": 0.65625, "learning_rate": 0.001853300734604168, "loss": 0.2313, "step": 18512 }, { "epoch": 0.032826852272958985, "grad_norm": 0.484375, "learning_rate": 0.0018532681642550375, "loss": 0.25, "step": 18514 }, { "epoch": 0.0328303984382688, "grad_norm": 1.296875, "learning_rate": 0.0018532355906115494, "loss": 0.3014, "step": 18516 }, { "epoch": 0.032833944603578614, "grad_norm": 0.294921875, "learning_rate": 0.0018532030136738464, "loss": 0.4298, "step": 18518 }, { "epoch": 0.03283749076888843, "grad_norm": 0.99609375, "learning_rate": 0.0018531704334420708, "loss": 0.2297, "step": 18520 }, { "epoch": 0.032841036934198244, "grad_norm": 0.26171875, "learning_rate": 0.001853137849916365, "loss": 0.2198, "step": 18522 }, { "epoch": 0.03284458309950806, "grad_norm": 1.203125, "learning_rate": 0.0018531052630968723, "loss": 0.2834, "step": 18524 }, { "epoch": 0.03284812926481787, "grad_norm": 0.69921875, "learning_rate": 0.0018530726729837344, "loss": 0.1838, "step": 18526 }, { "epoch": 0.03285167543012769, "grad_norm": 0.73046875, "learning_rate": 0.001853040079577094, "loss": 0.25, "step": 18528 }, { "epoch": 0.0328552215954375, "grad_norm": 0.474609375, "learning_rate": 0.0018530074828770938, "loss": 0.1778, "step": 18530 }, { "epoch": 0.032858767760747316, "grad_norm": 0.251953125, "learning_rate": 0.001852974882883876, "loss": 0.2248, "step": 18532 }, { "epoch": 0.03286231392605713, "grad_norm": 0.41015625, "learning_rate": 0.0018529422795975836, "loss": 0.1937, "step": 18534 }, { "epoch": 0.03286586009136695, "grad_norm": 0.5625, "learning_rate": 0.0018529096730183592, "loss": 0.5235, "step": 18536 }, { "epoch": 0.03286940625667677, "grad_norm": 0.470703125, "learning_rate": 0.001852877063146345, "loss": 0.2376, "step": 18538 }, { "epoch": 0.03287295242198658, "grad_norm": 0.41796875, "learning_rate": 0.001852844449981684, "loss": 0.2307, "step": 18540 }, { "epoch": 0.032876498587296396, "grad_norm": 0.2265625, "learning_rate": 0.0018528118335245187, "loss": 0.2056, "step": 18542 }, { "epoch": 0.03288004475260621, "grad_norm": 0.333984375, "learning_rate": 0.0018527792137749918, "loss": 0.2216, "step": 18544 }, { "epoch": 0.032883590917916025, "grad_norm": 1.09375, "learning_rate": 0.0018527465907332457, "loss": 0.3162, "step": 18546 }, { "epoch": 0.03288713708322584, "grad_norm": 0.357421875, "learning_rate": 0.0018527139643994234, "loss": 0.2156, "step": 18548 }, { "epoch": 0.032890683248535654, "grad_norm": 0.435546875, "learning_rate": 0.0018526813347736672, "loss": 0.1578, "step": 18550 }, { "epoch": 0.03289422941384547, "grad_norm": 1.390625, "learning_rate": 0.0018526487018561205, "loss": 0.4636, "step": 18552 }, { "epoch": 0.03289777557915528, "grad_norm": 1.5625, "learning_rate": 0.0018526160656469253, "loss": 0.2631, "step": 18554 }, { "epoch": 0.0329013217444651, "grad_norm": 0.298828125, "learning_rate": 0.0018525834261462243, "loss": 0.3989, "step": 18556 }, { "epoch": 0.03290486790977492, "grad_norm": 0.40625, "learning_rate": 0.0018525507833541607, "loss": 0.2258, "step": 18558 }, { "epoch": 0.032908414075084734, "grad_norm": 1.171875, "learning_rate": 0.0018525181372708772, "loss": 0.1785, "step": 18560 }, { "epoch": 0.03291196024039455, "grad_norm": 0.29296875, "learning_rate": 0.0018524854878965169, "loss": 0.1763, "step": 18562 }, { "epoch": 0.03291550640570436, "grad_norm": 0.6328125, "learning_rate": 0.0018524528352312215, "loss": 0.3331, "step": 18564 }, { "epoch": 0.03291905257101418, "grad_norm": 0.7890625, "learning_rate": 0.0018524201792751347, "loss": 0.1974, "step": 18566 }, { "epoch": 0.03292259873632399, "grad_norm": 0.23828125, "learning_rate": 0.0018523875200283993, "loss": 0.1745, "step": 18568 }, { "epoch": 0.03292614490163381, "grad_norm": 0.6875, "learning_rate": 0.0018523548574911578, "loss": 0.2699, "step": 18570 }, { "epoch": 0.03292969106694362, "grad_norm": 0.48046875, "learning_rate": 0.0018523221916635533, "loss": 0.2172, "step": 18572 }, { "epoch": 0.032933237232253436, "grad_norm": 2.46875, "learning_rate": 0.0018522895225457283, "loss": 0.2807, "step": 18574 }, { "epoch": 0.03293678339756325, "grad_norm": 0.46484375, "learning_rate": 0.0018522568501378258, "loss": 0.2051, "step": 18576 }, { "epoch": 0.032940329562873065, "grad_norm": 0.2353515625, "learning_rate": 0.0018522241744399893, "loss": 0.2094, "step": 18578 }, { "epoch": 0.03294387572818288, "grad_norm": 1.3828125, "learning_rate": 0.0018521914954523606, "loss": 0.2467, "step": 18580 }, { "epoch": 0.0329474218934927, "grad_norm": 0.39453125, "learning_rate": 0.0018521588131750835, "loss": 0.2121, "step": 18582 }, { "epoch": 0.032950968058802516, "grad_norm": 0.455078125, "learning_rate": 0.0018521261276083008, "loss": 0.228, "step": 18584 }, { "epoch": 0.03295451422411233, "grad_norm": 0.306640625, "learning_rate": 0.0018520934387521552, "loss": 0.2836, "step": 18586 }, { "epoch": 0.032958060389422145, "grad_norm": 0.283203125, "learning_rate": 0.0018520607466067896, "loss": 0.1817, "step": 18588 }, { "epoch": 0.03296160655473196, "grad_norm": 0.5234375, "learning_rate": 0.0018520280511723475, "loss": 0.193, "step": 18590 }, { "epoch": 0.032965152720041774, "grad_norm": 1.4921875, "learning_rate": 0.0018519953524489712, "loss": 0.2192, "step": 18592 }, { "epoch": 0.03296869888535159, "grad_norm": 0.94140625, "learning_rate": 0.0018519626504368044, "loss": 0.2242, "step": 18594 }, { "epoch": 0.0329722450506614, "grad_norm": 0.578125, "learning_rate": 0.0018519299451359894, "loss": 0.1593, "step": 18596 }, { "epoch": 0.03297579121597122, "grad_norm": 0.546875, "learning_rate": 0.0018518972365466698, "loss": 0.2202, "step": 18598 }, { "epoch": 0.03297933738128103, "grad_norm": 0.318359375, "learning_rate": 0.0018518645246689883, "loss": 0.3765, "step": 18600 }, { "epoch": 0.03298288354659085, "grad_norm": 1.140625, "learning_rate": 0.0018518318095030882, "loss": 0.2371, "step": 18602 }, { "epoch": 0.03298642971190067, "grad_norm": 0.33984375, "learning_rate": 0.0018517990910491126, "loss": 0.1665, "step": 18604 }, { "epoch": 0.03298997587721048, "grad_norm": 0.51171875, "learning_rate": 0.0018517663693072043, "loss": 0.2465, "step": 18606 }, { "epoch": 0.0329935220425203, "grad_norm": 0.375, "learning_rate": 0.0018517336442775065, "loss": 0.214, "step": 18608 }, { "epoch": 0.03299706820783011, "grad_norm": 0.330078125, "learning_rate": 0.0018517009159601623, "loss": 0.217, "step": 18610 }, { "epoch": 0.033000614373139926, "grad_norm": 1.046875, "learning_rate": 0.0018516681843553152, "loss": 0.2101, "step": 18612 }, { "epoch": 0.03300416053844974, "grad_norm": 0.34375, "learning_rate": 0.001851635449463108, "loss": 0.1815, "step": 18614 }, { "epoch": 0.033007706703759555, "grad_norm": 0.54296875, "learning_rate": 0.0018516027112836838, "loss": 0.1736, "step": 18616 }, { "epoch": 0.03301125286906937, "grad_norm": 0.36328125, "learning_rate": 0.0018515699698171862, "loss": 0.19, "step": 18618 }, { "epoch": 0.033014799034379184, "grad_norm": 0.29296875, "learning_rate": 0.0018515372250637579, "loss": 0.2253, "step": 18620 }, { "epoch": 0.033018345199689, "grad_norm": 1.265625, "learning_rate": 0.0018515044770235425, "loss": 0.215, "step": 18622 }, { "epoch": 0.033021891364998814, "grad_norm": 1.03125, "learning_rate": 0.0018514717256966828, "loss": 0.4838, "step": 18624 }, { "epoch": 0.033025437530308635, "grad_norm": 0.83203125, "learning_rate": 0.0018514389710833227, "loss": 0.1887, "step": 18626 }, { "epoch": 0.03302898369561845, "grad_norm": 0.578125, "learning_rate": 0.0018514062131836048, "loss": 0.2011, "step": 18628 }, { "epoch": 0.033032529860928264, "grad_norm": 0.5859375, "learning_rate": 0.0018513734519976723, "loss": 0.1552, "step": 18630 }, { "epoch": 0.03303607602623808, "grad_norm": 0.58203125, "learning_rate": 0.0018513406875256694, "loss": 0.2178, "step": 18632 }, { "epoch": 0.03303962219154789, "grad_norm": 0.734375, "learning_rate": 0.0018513079197677383, "loss": 0.1976, "step": 18634 }, { "epoch": 0.03304316835685771, "grad_norm": 0.76953125, "learning_rate": 0.001851275148724023, "loss": 0.2241, "step": 18636 }, { "epoch": 0.03304671452216752, "grad_norm": 1.25, "learning_rate": 0.0018512423743946664, "loss": 0.2003, "step": 18638 }, { "epoch": 0.03305026068747734, "grad_norm": 1.0703125, "learning_rate": 0.0018512095967798121, "loss": 0.2308, "step": 18640 }, { "epoch": 0.03305380685278715, "grad_norm": 0.765625, "learning_rate": 0.0018511768158796032, "loss": 0.2545, "step": 18642 }, { "epoch": 0.033057353018096966, "grad_norm": 0.41796875, "learning_rate": 0.0018511440316941834, "loss": 0.2928, "step": 18644 }, { "epoch": 0.03306089918340678, "grad_norm": 1.5859375, "learning_rate": 0.0018511112442236959, "loss": 0.2709, "step": 18646 }, { "epoch": 0.033064445348716595, "grad_norm": 0.326171875, "learning_rate": 0.0018510784534682841, "loss": 0.1298, "step": 18648 }, { "epoch": 0.03306799151402642, "grad_norm": 0.349609375, "learning_rate": 0.0018510456594280915, "loss": 0.2339, "step": 18650 }, { "epoch": 0.03307153767933623, "grad_norm": 0.416015625, "learning_rate": 0.0018510128621032616, "loss": 0.151, "step": 18652 }, { "epoch": 0.033075083844646046, "grad_norm": 0.61328125, "learning_rate": 0.0018509800614939374, "loss": 0.2144, "step": 18654 }, { "epoch": 0.03307863000995586, "grad_norm": 0.271484375, "learning_rate": 0.0018509472576002627, "loss": 0.1915, "step": 18656 }, { "epoch": 0.033082176175265675, "grad_norm": 0.76953125, "learning_rate": 0.0018509144504223808, "loss": 0.1852, "step": 18658 }, { "epoch": 0.03308572234057549, "grad_norm": 0.3125, "learning_rate": 0.0018508816399604353, "loss": 0.1801, "step": 18660 }, { "epoch": 0.033089268505885304, "grad_norm": 0.60546875, "learning_rate": 0.0018508488262145703, "loss": 0.2263, "step": 18662 }, { "epoch": 0.03309281467119512, "grad_norm": 0.59765625, "learning_rate": 0.0018508160091849277, "loss": 0.2726, "step": 18664 }, { "epoch": 0.03309636083650493, "grad_norm": 0.58203125, "learning_rate": 0.0018507831888716526, "loss": 0.2189, "step": 18666 }, { "epoch": 0.03309990700181475, "grad_norm": 0.6171875, "learning_rate": 0.0018507503652748878, "loss": 0.2005, "step": 18668 }, { "epoch": 0.03310345316712456, "grad_norm": 0.6484375, "learning_rate": 0.0018507175383947767, "loss": 0.211, "step": 18670 }, { "epoch": 0.033106999332434384, "grad_norm": 0.45703125, "learning_rate": 0.0018506847082314637, "loss": 0.201, "step": 18672 }, { "epoch": 0.0331105454977442, "grad_norm": 0.275390625, "learning_rate": 0.0018506518747850916, "loss": 0.1901, "step": 18674 }, { "epoch": 0.03311409166305401, "grad_norm": 0.38671875, "learning_rate": 0.0018506190380558041, "loss": 0.1831, "step": 18676 }, { "epoch": 0.03311763782836383, "grad_norm": 0.4296875, "learning_rate": 0.001850586198043745, "loss": 0.2073, "step": 18678 }, { "epoch": 0.03312118399367364, "grad_norm": 1.703125, "learning_rate": 0.0018505533547490578, "loss": 0.2433, "step": 18680 }, { "epoch": 0.033124730158983456, "grad_norm": 0.22265625, "learning_rate": 0.0018505205081718865, "loss": 0.2346, "step": 18682 }, { "epoch": 0.03312827632429327, "grad_norm": 0.45703125, "learning_rate": 0.0018504876583123743, "loss": 0.2053, "step": 18684 }, { "epoch": 0.033131822489603086, "grad_norm": 0.390625, "learning_rate": 0.0018504548051706648, "loss": 0.2083, "step": 18686 }, { "epoch": 0.0331353686549129, "grad_norm": 0.470703125, "learning_rate": 0.001850421948746902, "loss": 0.1528, "step": 18688 }, { "epoch": 0.033138914820222715, "grad_norm": 0.341796875, "learning_rate": 0.0018503890890412295, "loss": 0.1885, "step": 18690 }, { "epoch": 0.03314246098553253, "grad_norm": 0.412109375, "learning_rate": 0.0018503562260537914, "loss": 0.227, "step": 18692 }, { "epoch": 0.03314600715084235, "grad_norm": 0.2470703125, "learning_rate": 0.0018503233597847307, "loss": 0.2024, "step": 18694 }, { "epoch": 0.033149553316152165, "grad_norm": 0.373046875, "learning_rate": 0.0018502904902341912, "loss": 0.1938, "step": 18696 }, { "epoch": 0.03315309948146198, "grad_norm": 1.2265625, "learning_rate": 0.001850257617402317, "loss": 0.2967, "step": 18698 }, { "epoch": 0.033156645646771794, "grad_norm": 1.59375, "learning_rate": 0.0018502247412892521, "loss": 0.2787, "step": 18700 }, { "epoch": 0.03316019181208161, "grad_norm": 0.79296875, "learning_rate": 0.0018501918618951398, "loss": 0.2648, "step": 18702 }, { "epoch": 0.03316373797739142, "grad_norm": 1.0078125, "learning_rate": 0.001850158979220124, "loss": 0.189, "step": 18704 }, { "epoch": 0.03316728414270124, "grad_norm": 0.44921875, "learning_rate": 0.0018501260932643488, "loss": 0.1882, "step": 18706 }, { "epoch": 0.03317083030801105, "grad_norm": 0.3515625, "learning_rate": 0.0018500932040279576, "loss": 0.1915, "step": 18708 }, { "epoch": 0.03317437647332087, "grad_norm": 0.5078125, "learning_rate": 0.0018500603115110949, "loss": 0.1916, "step": 18710 }, { "epoch": 0.03317792263863068, "grad_norm": 0.412109375, "learning_rate": 0.0018500274157139032, "loss": 0.2237, "step": 18712 }, { "epoch": 0.033181468803940496, "grad_norm": 0.48046875, "learning_rate": 0.001849994516636528, "loss": 0.2083, "step": 18714 }, { "epoch": 0.03318501496925031, "grad_norm": 0.578125, "learning_rate": 0.0018499616142791117, "loss": 0.2221, "step": 18716 }, { "epoch": 0.03318856113456013, "grad_norm": 0.86328125, "learning_rate": 0.0018499287086417998, "loss": 0.3681, "step": 18718 }, { "epoch": 0.03319210729986995, "grad_norm": 0.369140625, "learning_rate": 0.0018498957997247348, "loss": 0.2953, "step": 18720 }, { "epoch": 0.03319565346517976, "grad_norm": 0.423828125, "learning_rate": 0.001849862887528061, "loss": 0.2633, "step": 18722 }, { "epoch": 0.033199199630489576, "grad_norm": 0.3828125, "learning_rate": 0.001849829972051923, "loss": 0.2133, "step": 18724 }, { "epoch": 0.03320274579579939, "grad_norm": 0.3203125, "learning_rate": 0.001849797053296464, "loss": 0.235, "step": 18726 }, { "epoch": 0.033206291961109205, "grad_norm": 0.2236328125, "learning_rate": 0.0018497641312618283, "loss": 0.2189, "step": 18728 }, { "epoch": 0.03320983812641902, "grad_norm": 0.279296875, "learning_rate": 0.0018497312059481596, "loss": 0.1839, "step": 18730 }, { "epoch": 0.033213384291728834, "grad_norm": 0.7265625, "learning_rate": 0.0018496982773556023, "loss": 0.2953, "step": 18732 }, { "epoch": 0.03321693045703865, "grad_norm": 0.4609375, "learning_rate": 0.0018496653454843004, "loss": 0.1469, "step": 18734 }, { "epoch": 0.03322047662234846, "grad_norm": 3.1875, "learning_rate": 0.0018496324103343972, "loss": 0.3843, "step": 18736 }, { "epoch": 0.03322402278765828, "grad_norm": 0.392578125, "learning_rate": 0.0018495994719060378, "loss": 0.2196, "step": 18738 }, { "epoch": 0.0332275689529681, "grad_norm": 1.7578125, "learning_rate": 0.0018495665301993653, "loss": 0.4872, "step": 18740 }, { "epoch": 0.033231115118277914, "grad_norm": 0.57421875, "learning_rate": 0.0018495335852145246, "loss": 0.2595, "step": 18742 }, { "epoch": 0.03323466128358773, "grad_norm": 0.306640625, "learning_rate": 0.0018495006369516592, "loss": 0.2142, "step": 18744 }, { "epoch": 0.03323820744889754, "grad_norm": 3.171875, "learning_rate": 0.0018494676854109134, "loss": 0.3017, "step": 18746 }, { "epoch": 0.03324175361420736, "grad_norm": 0.37109375, "learning_rate": 0.001849434730592431, "loss": 0.1897, "step": 18748 }, { "epoch": 0.03324529977951717, "grad_norm": 0.5625, "learning_rate": 0.0018494017724963567, "loss": 0.2462, "step": 18750 }, { "epoch": 0.03324884594482699, "grad_norm": 0.546875, "learning_rate": 0.0018493688111228346, "loss": 0.2316, "step": 18752 }, { "epoch": 0.0332523921101368, "grad_norm": 0.380859375, "learning_rate": 0.001849335846472008, "loss": 0.1849, "step": 18754 }, { "epoch": 0.033255938275446616, "grad_norm": 0.62109375, "learning_rate": 0.001849302878544022, "loss": 0.2454, "step": 18756 }, { "epoch": 0.03325948444075643, "grad_norm": 0.84375, "learning_rate": 0.0018492699073390205, "loss": 0.2996, "step": 18758 }, { "epoch": 0.033263030606066245, "grad_norm": 0.416015625, "learning_rate": 0.0018492369328571476, "loss": 0.25, "step": 18760 }, { "epoch": 0.033266576771376066, "grad_norm": 2.78125, "learning_rate": 0.0018492039550985476, "loss": 0.2499, "step": 18762 }, { "epoch": 0.03327012293668588, "grad_norm": 0.734375, "learning_rate": 0.0018491709740633647, "loss": 0.2176, "step": 18764 }, { "epoch": 0.033273669101995695, "grad_norm": 1.046875, "learning_rate": 0.001849137989751743, "loss": 0.2178, "step": 18766 }, { "epoch": 0.03327721526730551, "grad_norm": 1.484375, "learning_rate": 0.001849105002163827, "loss": 0.179, "step": 18768 }, { "epoch": 0.033280761432615324, "grad_norm": 0.9765625, "learning_rate": 0.0018490720112997608, "loss": 0.2716, "step": 18770 }, { "epoch": 0.03328430759792514, "grad_norm": 0.375, "learning_rate": 0.0018490390171596886, "loss": 0.2062, "step": 18772 }, { "epoch": 0.033287853763234954, "grad_norm": 0.6171875, "learning_rate": 0.001849006019743755, "loss": 0.3005, "step": 18774 }, { "epoch": 0.03329139992854477, "grad_norm": 0.44921875, "learning_rate": 0.0018489730190521044, "loss": 0.2306, "step": 18776 }, { "epoch": 0.03329494609385458, "grad_norm": 0.57421875, "learning_rate": 0.0018489400150848805, "loss": 0.1936, "step": 18778 }, { "epoch": 0.0332984922591644, "grad_norm": 0.60546875, "learning_rate": 0.001848907007842228, "loss": 0.2347, "step": 18780 }, { "epoch": 0.03330203842447421, "grad_norm": 0.29296875, "learning_rate": 0.0018488739973242915, "loss": 0.1813, "step": 18782 }, { "epoch": 0.033305584589784026, "grad_norm": 1.015625, "learning_rate": 0.001848840983531215, "loss": 0.2902, "step": 18784 }, { "epoch": 0.03330913075509385, "grad_norm": 0.353515625, "learning_rate": 0.001848807966463143, "loss": 0.2607, "step": 18786 }, { "epoch": 0.03331267692040366, "grad_norm": 0.74609375, "learning_rate": 0.00184877494612022, "loss": 0.4356, "step": 18788 }, { "epoch": 0.03331622308571348, "grad_norm": 0.6015625, "learning_rate": 0.0018487419225025903, "loss": 0.1875, "step": 18790 }, { "epoch": 0.03331976925102329, "grad_norm": 0.4296875, "learning_rate": 0.0018487088956103982, "loss": 0.1902, "step": 18792 }, { "epoch": 0.033323315416333106, "grad_norm": 0.3203125, "learning_rate": 0.0018486758654437886, "loss": 0.2309, "step": 18794 }, { "epoch": 0.03332686158164292, "grad_norm": 0.462890625, "learning_rate": 0.0018486428320029053, "loss": 0.2175, "step": 18796 }, { "epoch": 0.033330407746952735, "grad_norm": 0.85546875, "learning_rate": 0.0018486097952878932, "loss": 0.2416, "step": 18798 }, { "epoch": 0.03333395391226255, "grad_norm": 0.62109375, "learning_rate": 0.0018485767552988968, "loss": 0.2062, "step": 18800 }, { "epoch": 0.033337500077572364, "grad_norm": 0.4453125, "learning_rate": 0.0018485437120360603, "loss": 0.2121, "step": 18802 }, { "epoch": 0.03334104624288218, "grad_norm": 0.5703125, "learning_rate": 0.0018485106654995288, "loss": 0.2597, "step": 18804 }, { "epoch": 0.03334459240819199, "grad_norm": 0.25390625, "learning_rate": 0.0018484776156894463, "loss": 0.3383, "step": 18806 }, { "epoch": 0.033348138573501815, "grad_norm": 0.72265625, "learning_rate": 0.0018484445626059573, "loss": 0.2085, "step": 18808 }, { "epoch": 0.03335168473881163, "grad_norm": 0.60546875, "learning_rate": 0.0018484115062492067, "loss": 0.2383, "step": 18810 }, { "epoch": 0.033355230904121444, "grad_norm": 0.7265625, "learning_rate": 0.0018483784466193386, "loss": 0.1899, "step": 18812 }, { "epoch": 0.03335877706943126, "grad_norm": 0.2177734375, "learning_rate": 0.0018483453837164984, "loss": 0.2314, "step": 18814 }, { "epoch": 0.03336232323474107, "grad_norm": 0.306640625, "learning_rate": 0.0018483123175408295, "loss": 0.1807, "step": 18816 }, { "epoch": 0.03336586940005089, "grad_norm": 1.3046875, "learning_rate": 0.001848279248092478, "loss": 0.1743, "step": 18818 }, { "epoch": 0.0333694155653607, "grad_norm": 1.015625, "learning_rate": 0.001848246175371587, "loss": 0.2333, "step": 18820 }, { "epoch": 0.03337296173067052, "grad_norm": 0.392578125, "learning_rate": 0.0018482130993783025, "loss": 0.188, "step": 18822 }, { "epoch": 0.03337650789598033, "grad_norm": 0.296875, "learning_rate": 0.001848180020112768, "loss": 0.2444, "step": 18824 }, { "epoch": 0.033380054061290146, "grad_norm": 4.96875, "learning_rate": 0.001848146937575129, "loss": 0.4252, "step": 18826 }, { "epoch": 0.03338360022659996, "grad_norm": 2.515625, "learning_rate": 0.0018481138517655296, "loss": 0.1596, "step": 18828 }, { "epoch": 0.03338714639190978, "grad_norm": 0.404296875, "learning_rate": 0.0018480807626841149, "loss": 0.2782, "step": 18830 }, { "epoch": 0.033390692557219596, "grad_norm": 0.318359375, "learning_rate": 0.0018480476703310296, "loss": 0.3535, "step": 18832 }, { "epoch": 0.03339423872252941, "grad_norm": 0.51171875, "learning_rate": 0.0018480145747064181, "loss": 0.1631, "step": 18834 }, { "epoch": 0.033397784887839226, "grad_norm": 0.33984375, "learning_rate": 0.0018479814758104256, "loss": 0.1894, "step": 18836 }, { "epoch": 0.03340133105314904, "grad_norm": 0.51171875, "learning_rate": 0.0018479483736431963, "loss": 0.2116, "step": 18838 }, { "epoch": 0.033404877218458855, "grad_norm": 1.1328125, "learning_rate": 0.0018479152682048757, "loss": 0.2601, "step": 18840 }, { "epoch": 0.03340842338376867, "grad_norm": 0.6171875, "learning_rate": 0.0018478821594956076, "loss": 0.2387, "step": 18842 }, { "epoch": 0.033411969549078484, "grad_norm": 0.50390625, "learning_rate": 0.0018478490475155379, "loss": 0.1727, "step": 18844 }, { "epoch": 0.0334155157143883, "grad_norm": 0.458984375, "learning_rate": 0.0018478159322648105, "loss": 0.1703, "step": 18846 }, { "epoch": 0.03341906187969811, "grad_norm": 19.25, "learning_rate": 0.0018477828137435706, "loss": 0.2858, "step": 18848 }, { "epoch": 0.03342260804500793, "grad_norm": 0.44921875, "learning_rate": 0.0018477496919519633, "loss": 0.1944, "step": 18850 }, { "epoch": 0.03342615421031774, "grad_norm": 0.53125, "learning_rate": 0.0018477165668901328, "loss": 0.2487, "step": 18852 }, { "epoch": 0.03342970037562756, "grad_norm": 0.6484375, "learning_rate": 0.0018476834385582246, "loss": 0.2345, "step": 18854 }, { "epoch": 0.03343324654093738, "grad_norm": 1.3515625, "learning_rate": 0.0018476503069563834, "loss": 0.3625, "step": 18856 }, { "epoch": 0.03343679270624719, "grad_norm": 0.67578125, "learning_rate": 0.001847617172084754, "loss": 0.1696, "step": 18858 }, { "epoch": 0.03344033887155701, "grad_norm": 0.89453125, "learning_rate": 0.0018475840339434813, "loss": 0.221, "step": 18860 }, { "epoch": 0.03344388503686682, "grad_norm": 1.25, "learning_rate": 0.0018475508925327104, "loss": 0.2301, "step": 18862 }, { "epoch": 0.033447431202176636, "grad_norm": 0.8671875, "learning_rate": 0.0018475177478525862, "loss": 0.2339, "step": 18864 }, { "epoch": 0.03345097736748645, "grad_norm": 1.0234375, "learning_rate": 0.0018474845999032533, "loss": 0.2813, "step": 18866 }, { "epoch": 0.033454523532796265, "grad_norm": 1.34375, "learning_rate": 0.001847451448684857, "loss": 0.2237, "step": 18868 }, { "epoch": 0.03345806969810608, "grad_norm": 0.92578125, "learning_rate": 0.0018474182941975424, "loss": 0.3002, "step": 18870 }, { "epoch": 0.033461615863415894, "grad_norm": 0.52734375, "learning_rate": 0.0018473851364414543, "loss": 0.192, "step": 18872 }, { "epoch": 0.03346516202872571, "grad_norm": 0.34765625, "learning_rate": 0.0018473519754167375, "loss": 0.2454, "step": 18874 }, { "epoch": 0.03346870819403553, "grad_norm": 0.69140625, "learning_rate": 0.0018473188111235374, "loss": 0.195, "step": 18876 }, { "epoch": 0.033472254359345345, "grad_norm": 0.28125, "learning_rate": 0.0018472856435619994, "loss": 0.1908, "step": 18878 }, { "epoch": 0.03347580052465516, "grad_norm": 0.5078125, "learning_rate": 0.0018472524727322676, "loss": 0.2444, "step": 18880 }, { "epoch": 0.033479346689964974, "grad_norm": 0.478515625, "learning_rate": 0.0018472192986344876, "loss": 0.2898, "step": 18882 }, { "epoch": 0.03348289285527479, "grad_norm": 0.2890625, "learning_rate": 0.0018471861212688045, "loss": 0.2057, "step": 18884 }, { "epoch": 0.0334864390205846, "grad_norm": 0.25, "learning_rate": 0.0018471529406353631, "loss": 0.246, "step": 18886 }, { "epoch": 0.03348998518589442, "grad_norm": 0.470703125, "learning_rate": 0.0018471197567343088, "loss": 0.2827, "step": 18888 }, { "epoch": 0.03349353135120423, "grad_norm": 0.392578125, "learning_rate": 0.0018470865695657868, "loss": 0.2161, "step": 18890 }, { "epoch": 0.03349707751651405, "grad_norm": 0.2431640625, "learning_rate": 0.001847053379129942, "loss": 0.2115, "step": 18892 }, { "epoch": 0.03350062368182386, "grad_norm": 0.357421875, "learning_rate": 0.0018470201854269197, "loss": 0.1322, "step": 18894 }, { "epoch": 0.033504169847133676, "grad_norm": 1.0859375, "learning_rate": 0.0018469869884568649, "loss": 0.1865, "step": 18896 }, { "epoch": 0.0335077160124435, "grad_norm": 0.341796875, "learning_rate": 0.0018469537882199233, "loss": 0.225, "step": 18898 }, { "epoch": 0.03351126217775331, "grad_norm": 0.8125, "learning_rate": 0.0018469205847162393, "loss": 0.435, "step": 18900 }, { "epoch": 0.03351480834306313, "grad_norm": 0.453125, "learning_rate": 0.0018468873779459588, "loss": 0.3007, "step": 18902 }, { "epoch": 0.03351835450837294, "grad_norm": 0.63671875, "learning_rate": 0.0018468541679092267, "loss": 0.3188, "step": 18904 }, { "epoch": 0.033521900673682756, "grad_norm": 0.3515625, "learning_rate": 0.001846820954606188, "loss": 0.2082, "step": 18906 }, { "epoch": 0.03352544683899257, "grad_norm": 1.515625, "learning_rate": 0.0018467877380369886, "loss": 0.2822, "step": 18908 }, { "epoch": 0.033528993004302385, "grad_norm": 0.53515625, "learning_rate": 0.001846754518201773, "loss": 0.2131, "step": 18910 }, { "epoch": 0.0335325391696122, "grad_norm": 1.328125, "learning_rate": 0.0018467212951006873, "loss": 0.3754, "step": 18912 }, { "epoch": 0.033536085334922014, "grad_norm": 0.515625, "learning_rate": 0.0018466880687338764, "loss": 0.2043, "step": 18914 }, { "epoch": 0.03353963150023183, "grad_norm": 0.3046875, "learning_rate": 0.0018466548391014857, "loss": 0.1718, "step": 18916 }, { "epoch": 0.03354317766554164, "grad_norm": 0.443359375, "learning_rate": 0.00184662160620366, "loss": 0.2072, "step": 18918 }, { "epoch": 0.03354672383085146, "grad_norm": 0.53125, "learning_rate": 0.0018465883700405456, "loss": 0.2001, "step": 18920 }, { "epoch": 0.03355026999616128, "grad_norm": 0.64453125, "learning_rate": 0.001846555130612287, "loss": 0.2529, "step": 18922 }, { "epoch": 0.033553816161471094, "grad_norm": 0.421875, "learning_rate": 0.00184652188791903, "loss": 0.2507, "step": 18924 }, { "epoch": 0.03355736232678091, "grad_norm": 0.390625, "learning_rate": 0.0018464886419609198, "loss": 0.2161, "step": 18926 }, { "epoch": 0.03356090849209072, "grad_norm": 0.30078125, "learning_rate": 0.0018464553927381023, "loss": 0.2447, "step": 18928 }, { "epoch": 0.03356445465740054, "grad_norm": 0.462890625, "learning_rate": 0.0018464221402507222, "loss": 0.1866, "step": 18930 }, { "epoch": 0.03356800082271035, "grad_norm": 0.4453125, "learning_rate": 0.0018463888844989249, "loss": 0.2096, "step": 18932 }, { "epoch": 0.033571546988020166, "grad_norm": 0.390625, "learning_rate": 0.0018463556254828565, "loss": 0.2082, "step": 18934 }, { "epoch": 0.03357509315332998, "grad_norm": 0.6953125, "learning_rate": 0.0018463223632026625, "loss": 0.2534, "step": 18936 }, { "epoch": 0.033578639318639796, "grad_norm": 2.140625, "learning_rate": 0.0018462890976584877, "loss": 0.2559, "step": 18938 }, { "epoch": 0.03358218548394961, "grad_norm": 0.38671875, "learning_rate": 0.0018462558288504774, "loss": 0.2209, "step": 18940 }, { "epoch": 0.033585731649259425, "grad_norm": 1.21875, "learning_rate": 0.001846222556778778, "loss": 0.2767, "step": 18942 }, { "epoch": 0.033589277814569246, "grad_norm": 1.21875, "learning_rate": 0.0018461892814435345, "loss": 0.252, "step": 18944 }, { "epoch": 0.03359282397987906, "grad_norm": 0.8828125, "learning_rate": 0.001846156002844893, "loss": 0.1962, "step": 18946 }, { "epoch": 0.033596370145188875, "grad_norm": 0.59765625, "learning_rate": 0.001846122720982998, "loss": 0.2091, "step": 18948 }, { "epoch": 0.03359991631049869, "grad_norm": 0.416015625, "learning_rate": 0.0018460894358579957, "loss": 0.1944, "step": 18950 }, { "epoch": 0.033603462475808504, "grad_norm": 0.3671875, "learning_rate": 0.0018460561474700316, "loss": 0.2193, "step": 18952 }, { "epoch": 0.03360700864111832, "grad_norm": 0.376953125, "learning_rate": 0.0018460228558192515, "loss": 0.2845, "step": 18954 }, { "epoch": 0.03361055480642813, "grad_norm": 0.6640625, "learning_rate": 0.0018459895609058005, "loss": 0.1916, "step": 18956 }, { "epoch": 0.03361410097173795, "grad_norm": 0.24609375, "learning_rate": 0.0018459562627298248, "loss": 0.2705, "step": 18958 }, { "epoch": 0.03361764713704776, "grad_norm": 0.287109375, "learning_rate": 0.0018459229612914694, "loss": 0.1657, "step": 18960 }, { "epoch": 0.03362119330235758, "grad_norm": 0.490234375, "learning_rate": 0.0018458896565908803, "loss": 0.193, "step": 18962 }, { "epoch": 0.03362473946766739, "grad_norm": 0.66015625, "learning_rate": 0.0018458563486282034, "loss": 0.2113, "step": 18964 }, { "epoch": 0.03362828563297721, "grad_norm": 0.87890625, "learning_rate": 0.0018458230374035838, "loss": 0.1834, "step": 18966 }, { "epoch": 0.03363183179828703, "grad_norm": 0.314453125, "learning_rate": 0.0018457897229171673, "loss": 0.1338, "step": 18968 }, { "epoch": 0.03363537796359684, "grad_norm": 1.859375, "learning_rate": 0.0018457564051691001, "loss": 0.4528, "step": 18970 }, { "epoch": 0.03363892412890666, "grad_norm": 0.36328125, "learning_rate": 0.0018457230841595273, "loss": 0.3895, "step": 18972 }, { "epoch": 0.03364247029421647, "grad_norm": 2.625, "learning_rate": 0.0018456897598885952, "loss": 0.2551, "step": 18974 }, { "epoch": 0.033646016459526286, "grad_norm": 0.59765625, "learning_rate": 0.0018456564323564488, "loss": 0.2106, "step": 18976 }, { "epoch": 0.0336495626248361, "grad_norm": 0.3515625, "learning_rate": 0.0018456231015632348, "loss": 0.1826, "step": 18978 }, { "epoch": 0.033653108790145915, "grad_norm": 0.55078125, "learning_rate": 0.001845589767509098, "loss": 0.2581, "step": 18980 }, { "epoch": 0.03365665495545573, "grad_norm": 0.298828125, "learning_rate": 0.001845556430194185, "loss": 0.2521, "step": 18982 }, { "epoch": 0.033660201120765544, "grad_norm": 6.65625, "learning_rate": 0.001845523089618641, "loss": 0.3095, "step": 18984 }, { "epoch": 0.03366374728607536, "grad_norm": 6.46875, "learning_rate": 0.0018454897457826124, "loss": 0.3289, "step": 18986 }, { "epoch": 0.03366729345138517, "grad_norm": 0.390625, "learning_rate": 0.0018454563986862445, "loss": 0.2168, "step": 18988 }, { "epoch": 0.033670839616694995, "grad_norm": 0.240234375, "learning_rate": 0.0018454230483296833, "loss": 0.2024, "step": 18990 }, { "epoch": 0.03367438578200481, "grad_norm": 0.302734375, "learning_rate": 0.0018453896947130746, "loss": 0.1981, "step": 18992 }, { "epoch": 0.033677931947314624, "grad_norm": 1.375, "learning_rate": 0.0018453563378365645, "loss": 0.2182, "step": 18994 }, { "epoch": 0.03368147811262444, "grad_norm": 0.78515625, "learning_rate": 0.0018453229777002987, "loss": 0.1984, "step": 18996 }, { "epoch": 0.03368502427793425, "grad_norm": 0.1982421875, "learning_rate": 0.0018452896143044228, "loss": 0.1803, "step": 18998 }, { "epoch": 0.03368857044324407, "grad_norm": 0.55859375, "learning_rate": 0.0018452562476490835, "loss": 0.1515, "step": 19000 }, { "epoch": 0.03369211660855388, "grad_norm": 0.306640625, "learning_rate": 0.001845222877734426, "loss": 0.2303, "step": 19002 }, { "epoch": 0.0336956627738637, "grad_norm": 0.609375, "learning_rate": 0.0018451895045605963, "loss": 0.1867, "step": 19004 }, { "epoch": 0.03369920893917351, "grad_norm": 0.4609375, "learning_rate": 0.0018451561281277412, "loss": 0.2536, "step": 19006 }, { "epoch": 0.033702755104483326, "grad_norm": 0.369140625, "learning_rate": 0.0018451227484360057, "loss": 0.1953, "step": 19008 }, { "epoch": 0.03370630126979314, "grad_norm": 0.640625, "learning_rate": 0.001845089365485536, "loss": 0.2006, "step": 19010 }, { "epoch": 0.03370984743510296, "grad_norm": 0.32421875, "learning_rate": 0.0018450559792764784, "loss": 0.2292, "step": 19012 }, { "epoch": 0.033713393600412776, "grad_norm": 0.53515625, "learning_rate": 0.0018450225898089785, "loss": 0.2359, "step": 19014 }, { "epoch": 0.03371693976572259, "grad_norm": 0.287109375, "learning_rate": 0.0018449891970831827, "loss": 0.2856, "step": 19016 }, { "epoch": 0.033720485931032405, "grad_norm": 0.42578125, "learning_rate": 0.0018449558010992372, "loss": 0.2128, "step": 19018 }, { "epoch": 0.03372403209634222, "grad_norm": 0.484375, "learning_rate": 0.0018449224018572872, "loss": 0.301, "step": 19020 }, { "epoch": 0.033727578261652034, "grad_norm": 0.625, "learning_rate": 0.0018448889993574796, "loss": 0.2252, "step": 19022 }, { "epoch": 0.03373112442696185, "grad_norm": 0.65625, "learning_rate": 0.0018448555935999603, "loss": 0.3472, "step": 19024 }, { "epoch": 0.033734670592271664, "grad_norm": 2.5625, "learning_rate": 0.0018448221845848752, "loss": 0.3541, "step": 19026 }, { "epoch": 0.03373821675758148, "grad_norm": 0.33984375, "learning_rate": 0.0018447887723123705, "loss": 0.1741, "step": 19028 }, { "epoch": 0.03374176292289129, "grad_norm": 0.74609375, "learning_rate": 0.0018447553567825922, "loss": 0.4304, "step": 19030 }, { "epoch": 0.03374530908820111, "grad_norm": 1.2578125, "learning_rate": 0.0018447219379956867, "loss": 0.2288, "step": 19032 }, { "epoch": 0.03374885525351093, "grad_norm": 0.796875, "learning_rate": 0.0018446885159517999, "loss": 0.264, "step": 19034 }, { "epoch": 0.03375240141882074, "grad_norm": 0.44140625, "learning_rate": 0.0018446550906510782, "loss": 0.1917, "step": 19036 }, { "epoch": 0.03375594758413056, "grad_norm": 0.43359375, "learning_rate": 0.0018446216620936675, "loss": 0.2097, "step": 19038 }, { "epoch": 0.03375949374944037, "grad_norm": 1.4609375, "learning_rate": 0.0018445882302797147, "loss": 0.4741, "step": 19040 }, { "epoch": 0.03376303991475019, "grad_norm": 0.296875, "learning_rate": 0.0018445547952093651, "loss": 0.2618, "step": 19042 }, { "epoch": 0.03376658608006, "grad_norm": 1.2890625, "learning_rate": 0.0018445213568827653, "loss": 0.2557, "step": 19044 }, { "epoch": 0.033770132245369816, "grad_norm": 0.515625, "learning_rate": 0.0018444879153000616, "loss": 0.2402, "step": 19046 }, { "epoch": 0.03377367841067963, "grad_norm": 0.232421875, "learning_rate": 0.0018444544704614002, "loss": 0.1612, "step": 19048 }, { "epoch": 0.033777224575989445, "grad_norm": 0.44140625, "learning_rate": 0.0018444210223669275, "loss": 0.1946, "step": 19050 }, { "epoch": 0.03378077074129926, "grad_norm": 0.89453125, "learning_rate": 0.0018443875710167897, "loss": 0.2281, "step": 19052 }, { "epoch": 0.033784316906609074, "grad_norm": 0.91796875, "learning_rate": 0.0018443541164111326, "loss": 0.1907, "step": 19054 }, { "epoch": 0.03378786307191889, "grad_norm": 0.427734375, "learning_rate": 0.0018443206585501033, "loss": 0.2029, "step": 19056 }, { "epoch": 0.03379140923722871, "grad_norm": 0.89453125, "learning_rate": 0.001844287197433848, "loss": 0.223, "step": 19058 }, { "epoch": 0.033794955402538525, "grad_norm": 0.453125, "learning_rate": 0.0018442537330625127, "loss": 0.226, "step": 19060 }, { "epoch": 0.03379850156784834, "grad_norm": 0.36328125, "learning_rate": 0.0018442202654362437, "loss": 0.227, "step": 19062 }, { "epoch": 0.033802047733158154, "grad_norm": 0.97265625, "learning_rate": 0.0018441867945551877, "loss": 0.2429, "step": 19064 }, { "epoch": 0.03380559389846797, "grad_norm": 0.5, "learning_rate": 0.0018441533204194907, "loss": 0.2129, "step": 19066 }, { "epoch": 0.03380914006377778, "grad_norm": 0.4375, "learning_rate": 0.0018441198430292995, "loss": 0.1582, "step": 19068 }, { "epoch": 0.0338126862290876, "grad_norm": 0.435546875, "learning_rate": 0.0018440863623847604, "loss": 0.2068, "step": 19070 }, { "epoch": 0.03381623239439741, "grad_norm": 1.28125, "learning_rate": 0.00184405287848602, "loss": 0.221, "step": 19072 }, { "epoch": 0.03381977855970723, "grad_norm": 0.61328125, "learning_rate": 0.001844019391333224, "loss": 0.2143, "step": 19074 }, { "epoch": 0.03382332472501704, "grad_norm": 0.46484375, "learning_rate": 0.0018439859009265196, "loss": 0.3348, "step": 19076 }, { "epoch": 0.033826870890326856, "grad_norm": 0.6328125, "learning_rate": 0.001843952407266053, "loss": 0.1941, "step": 19078 }, { "epoch": 0.03383041705563668, "grad_norm": 0.1669921875, "learning_rate": 0.0018439189103519708, "loss": 0.2115, "step": 19080 }, { "epoch": 0.03383396322094649, "grad_norm": 0.51953125, "learning_rate": 0.0018438854101844194, "loss": 0.2639, "step": 19082 }, { "epoch": 0.033837509386256306, "grad_norm": 0.703125, "learning_rate": 0.0018438519067635454, "loss": 0.2026, "step": 19084 }, { "epoch": 0.03384105555156612, "grad_norm": 0.41015625, "learning_rate": 0.0018438184000894948, "loss": 0.2551, "step": 19086 }, { "epoch": 0.033844601716875936, "grad_norm": 0.69921875, "learning_rate": 0.001843784890162415, "loss": 0.2381, "step": 19088 }, { "epoch": 0.03384814788218575, "grad_norm": 0.85546875, "learning_rate": 0.001843751376982452, "loss": 0.2712, "step": 19090 }, { "epoch": 0.033851694047495565, "grad_norm": 3.984375, "learning_rate": 0.0018437178605497525, "loss": 0.3249, "step": 19092 }, { "epoch": 0.03385524021280538, "grad_norm": 2.578125, "learning_rate": 0.0018436843408644631, "loss": 0.2692, "step": 19094 }, { "epoch": 0.033858786378115194, "grad_norm": 0.4453125, "learning_rate": 0.0018436508179267303, "loss": 0.2199, "step": 19096 }, { "epoch": 0.03386233254342501, "grad_norm": 0.81640625, "learning_rate": 0.0018436172917367009, "loss": 0.2365, "step": 19098 }, { "epoch": 0.03386587870873482, "grad_norm": 3.09375, "learning_rate": 0.0018435837622945213, "loss": 0.3604, "step": 19100 }, { "epoch": 0.033869424874044644, "grad_norm": 0.3984375, "learning_rate": 0.0018435502296003383, "loss": 0.2479, "step": 19102 }, { "epoch": 0.03387297103935446, "grad_norm": 0.703125, "learning_rate": 0.0018435166936542986, "loss": 0.1854, "step": 19104 }, { "epoch": 0.03387651720466427, "grad_norm": 0.1845703125, "learning_rate": 0.0018434831544565486, "loss": 0.1999, "step": 19106 }, { "epoch": 0.03388006336997409, "grad_norm": 0.5859375, "learning_rate": 0.0018434496120072353, "loss": 0.1936, "step": 19108 }, { "epoch": 0.0338836095352839, "grad_norm": 1.2109375, "learning_rate": 0.0018434160663065053, "loss": 0.3086, "step": 19110 }, { "epoch": 0.03388715570059372, "grad_norm": 1.1171875, "learning_rate": 0.001843382517354505, "loss": 0.3781, "step": 19112 }, { "epoch": 0.03389070186590353, "grad_norm": 3.4375, "learning_rate": 0.0018433489651513814, "loss": 0.2386, "step": 19114 }, { "epoch": 0.033894248031213346, "grad_norm": 0.2275390625, "learning_rate": 0.0018433154096972814, "loss": 0.2564, "step": 19116 }, { "epoch": 0.03389779419652316, "grad_norm": 3.125, "learning_rate": 0.0018432818509923515, "loss": 0.1917, "step": 19118 }, { "epoch": 0.033901340361832975, "grad_norm": 0.33203125, "learning_rate": 0.0018432482890367384, "loss": 0.2098, "step": 19120 }, { "epoch": 0.03390488652714279, "grad_norm": 0.3046875, "learning_rate": 0.0018432147238305891, "loss": 0.1995, "step": 19122 }, { "epoch": 0.033908432692452604, "grad_norm": 1.609375, "learning_rate": 0.0018431811553740508, "loss": 0.2775, "step": 19124 }, { "epoch": 0.033911978857762426, "grad_norm": 0.478515625, "learning_rate": 0.0018431475836672689, "loss": 0.2088, "step": 19126 }, { "epoch": 0.03391552502307224, "grad_norm": 1.265625, "learning_rate": 0.0018431140087103919, "loss": 0.228, "step": 19128 }, { "epoch": 0.033919071188382055, "grad_norm": 1.640625, "learning_rate": 0.0018430804305035653, "loss": 0.2621, "step": 19130 }, { "epoch": 0.03392261735369187, "grad_norm": 0.470703125, "learning_rate": 0.001843046849046937, "loss": 0.2344, "step": 19132 }, { "epoch": 0.033926163519001684, "grad_norm": 0.267578125, "learning_rate": 0.0018430132643406534, "loss": 0.169, "step": 19134 }, { "epoch": 0.0339297096843115, "grad_norm": 2.65625, "learning_rate": 0.001842979676384861, "loss": 0.3076, "step": 19136 }, { "epoch": 0.03393325584962131, "grad_norm": 1.609375, "learning_rate": 0.0018429460851797073, "loss": 0.225, "step": 19138 }, { "epoch": 0.03393680201493113, "grad_norm": 0.71875, "learning_rate": 0.001842912490725339, "loss": 0.2014, "step": 19140 }, { "epoch": 0.03394034818024094, "grad_norm": 0.318359375, "learning_rate": 0.0018428788930219028, "loss": 0.2189, "step": 19142 }, { "epoch": 0.03394389434555076, "grad_norm": 1.1015625, "learning_rate": 0.0018428452920695457, "loss": 0.2543, "step": 19144 }, { "epoch": 0.03394744051086057, "grad_norm": 0.984375, "learning_rate": 0.0018428116878684154, "loss": 0.2473, "step": 19146 }, { "epoch": 0.03395098667617039, "grad_norm": 1.3828125, "learning_rate": 0.0018427780804186578, "loss": 0.3891, "step": 19148 }, { "epoch": 0.03395453284148021, "grad_norm": 0.87890625, "learning_rate": 0.0018427444697204202, "loss": 0.1533, "step": 19150 }, { "epoch": 0.03395807900679002, "grad_norm": 0.62109375, "learning_rate": 0.00184271085577385, "loss": 0.2407, "step": 19152 }, { "epoch": 0.03396162517209984, "grad_norm": 0.46484375, "learning_rate": 0.001842677238579094, "loss": 0.219, "step": 19154 }, { "epoch": 0.03396517133740965, "grad_norm": 0.302734375, "learning_rate": 0.001842643618136299, "loss": 0.2856, "step": 19156 }, { "epoch": 0.033968717502719466, "grad_norm": 2.5, "learning_rate": 0.0018426099944456123, "loss": 0.3575, "step": 19158 }, { "epoch": 0.03397226366802928, "grad_norm": 0.47265625, "learning_rate": 0.001842576367507181, "loss": 0.2355, "step": 19160 }, { "epoch": 0.033975809833339095, "grad_norm": 0.6796875, "learning_rate": 0.0018425427373211518, "loss": 0.1953, "step": 19162 }, { "epoch": 0.03397935599864891, "grad_norm": 1.7421875, "learning_rate": 0.001842509103887672, "loss": 0.3155, "step": 19164 }, { "epoch": 0.033982902163958724, "grad_norm": 0.34375, "learning_rate": 0.0018424754672068885, "loss": 0.2594, "step": 19166 }, { "epoch": 0.03398644832926854, "grad_norm": 0.57421875, "learning_rate": 0.0018424418272789487, "loss": 0.2617, "step": 19168 }, { "epoch": 0.03398999449457836, "grad_norm": 0.84375, "learning_rate": 0.0018424081841039996, "loss": 0.2766, "step": 19170 }, { "epoch": 0.033993540659888175, "grad_norm": 0.95703125, "learning_rate": 0.0018423745376821886, "loss": 0.3368, "step": 19172 }, { "epoch": 0.03399708682519799, "grad_norm": 0.5546875, "learning_rate": 0.0018423408880136622, "loss": 0.1981, "step": 19174 }, { "epoch": 0.034000632990507804, "grad_norm": 1.09375, "learning_rate": 0.0018423072350985683, "loss": 0.2789, "step": 19176 }, { "epoch": 0.03400417915581762, "grad_norm": 0.65625, "learning_rate": 0.0018422735789370534, "loss": 0.1432, "step": 19178 }, { "epoch": 0.03400772532112743, "grad_norm": 0.6171875, "learning_rate": 0.0018422399195292655, "loss": 0.2191, "step": 19180 }, { "epoch": 0.03401127148643725, "grad_norm": 0.6875, "learning_rate": 0.001842206256875351, "loss": 0.2134, "step": 19182 }, { "epoch": 0.03401481765174706, "grad_norm": 0.291015625, "learning_rate": 0.0018421725909754574, "loss": 0.2076, "step": 19184 }, { "epoch": 0.034018363817056876, "grad_norm": 0.32421875, "learning_rate": 0.0018421389218297324, "loss": 0.1866, "step": 19186 }, { "epoch": 0.03402190998236669, "grad_norm": 0.76953125, "learning_rate": 0.0018421052494383223, "loss": 0.2017, "step": 19188 }, { "epoch": 0.034025456147676506, "grad_norm": 0.43359375, "learning_rate": 0.0018420715738013754, "loss": 0.2194, "step": 19190 }, { "epoch": 0.03402900231298632, "grad_norm": 1.578125, "learning_rate": 0.0018420378949190379, "loss": 0.2218, "step": 19192 }, { "epoch": 0.03403254847829614, "grad_norm": 0.32421875, "learning_rate": 0.0018420042127914582, "loss": 0.2075, "step": 19194 }, { "epoch": 0.034036094643605956, "grad_norm": 1.0859375, "learning_rate": 0.001841970527418783, "loss": 0.2269, "step": 19196 }, { "epoch": 0.03403964080891577, "grad_norm": 0.9765625, "learning_rate": 0.0018419368388011596, "loss": 0.1838, "step": 19198 }, { "epoch": 0.034043186974225585, "grad_norm": 0.6328125, "learning_rate": 0.0018419031469387355, "loss": 0.2427, "step": 19200 }, { "epoch": 0.0340467331395354, "grad_norm": 0.5625, "learning_rate": 0.0018418694518316577, "loss": 0.2295, "step": 19202 }, { "epoch": 0.034050279304845214, "grad_norm": 1.1328125, "learning_rate": 0.0018418357534800743, "loss": 0.4088, "step": 19204 }, { "epoch": 0.03405382547015503, "grad_norm": 0.26171875, "learning_rate": 0.0018418020518841318, "loss": 0.1708, "step": 19206 }, { "epoch": 0.03405737163546484, "grad_norm": 0.609375, "learning_rate": 0.0018417683470439781, "loss": 0.213, "step": 19208 }, { "epoch": 0.03406091780077466, "grad_norm": 0.4921875, "learning_rate": 0.0018417346389597606, "loss": 0.2157, "step": 19210 }, { "epoch": 0.03406446396608447, "grad_norm": 0.357421875, "learning_rate": 0.0018417009276316268, "loss": 0.2962, "step": 19212 }, { "epoch": 0.03406801013139429, "grad_norm": 1.1875, "learning_rate": 0.001841667213059724, "loss": 0.2374, "step": 19214 }, { "epoch": 0.03407155629670411, "grad_norm": 1.078125, "learning_rate": 0.0018416334952441992, "loss": 0.4036, "step": 19216 }, { "epoch": 0.03407510246201392, "grad_norm": 0.392578125, "learning_rate": 0.0018415997741852007, "loss": 0.1918, "step": 19218 }, { "epoch": 0.03407864862732374, "grad_norm": 0.490234375, "learning_rate": 0.0018415660498828752, "loss": 0.1809, "step": 19220 }, { "epoch": 0.03408219479263355, "grad_norm": 0.4140625, "learning_rate": 0.001841532322337371, "loss": 0.238, "step": 19222 }, { "epoch": 0.03408574095794337, "grad_norm": 0.8046875, "learning_rate": 0.0018414985915488347, "loss": 0.2962, "step": 19224 }, { "epoch": 0.03408928712325318, "grad_norm": 0.2421875, "learning_rate": 0.0018414648575174144, "loss": 0.1616, "step": 19226 }, { "epoch": 0.034092833288562996, "grad_norm": 0.5, "learning_rate": 0.0018414311202432576, "loss": 0.2683, "step": 19228 }, { "epoch": 0.03409637945387281, "grad_norm": 0.294921875, "learning_rate": 0.001841397379726512, "loss": 0.2012, "step": 19230 }, { "epoch": 0.034099925619182625, "grad_norm": 0.33203125, "learning_rate": 0.0018413636359673245, "loss": 0.2255, "step": 19232 }, { "epoch": 0.03410347178449244, "grad_norm": 0.67578125, "learning_rate": 0.0018413298889658435, "loss": 0.1975, "step": 19234 }, { "epoch": 0.034107017949802254, "grad_norm": 0.2734375, "learning_rate": 0.0018412961387222159, "loss": 0.2286, "step": 19236 }, { "epoch": 0.034110564115112076, "grad_norm": 2.1875, "learning_rate": 0.0018412623852365896, "loss": 0.4569, "step": 19238 }, { "epoch": 0.03411411028042189, "grad_norm": 1.0625, "learning_rate": 0.0018412286285091123, "loss": 0.2914, "step": 19240 }, { "epoch": 0.034117656445731705, "grad_norm": 2.203125, "learning_rate": 0.0018411948685399312, "loss": 0.1773, "step": 19242 }, { "epoch": 0.03412120261104152, "grad_norm": 0.65625, "learning_rate": 0.0018411611053291946, "loss": 0.2066, "step": 19244 }, { "epoch": 0.034124748776351334, "grad_norm": 0.5703125, "learning_rate": 0.0018411273388770498, "loss": 0.3644, "step": 19246 }, { "epoch": 0.03412829494166115, "grad_norm": 0.341796875, "learning_rate": 0.0018410935691836443, "loss": 0.1911, "step": 19248 }, { "epoch": 0.03413184110697096, "grad_norm": 0.75390625, "learning_rate": 0.0018410597962491265, "loss": 0.2419, "step": 19250 }, { "epoch": 0.03413538727228078, "grad_norm": 0.50390625, "learning_rate": 0.0018410260200736432, "loss": 0.2272, "step": 19252 }, { "epoch": 0.03413893343759059, "grad_norm": 0.57421875, "learning_rate": 0.0018409922406573426, "loss": 0.1901, "step": 19254 }, { "epoch": 0.03414247960290041, "grad_norm": 0.1689453125, "learning_rate": 0.0018409584580003724, "loss": 0.1967, "step": 19256 }, { "epoch": 0.03414602576821022, "grad_norm": 3.609375, "learning_rate": 0.0018409246721028806, "loss": 0.1886, "step": 19258 }, { "epoch": 0.034149571933520036, "grad_norm": 1.2265625, "learning_rate": 0.0018408908829650142, "loss": 0.2766, "step": 19260 }, { "epoch": 0.03415311809882986, "grad_norm": 0.375, "learning_rate": 0.0018408570905869214, "loss": 0.1666, "step": 19262 }, { "epoch": 0.03415666426413967, "grad_norm": 0.92578125, "learning_rate": 0.0018408232949687505, "loss": 0.5652, "step": 19264 }, { "epoch": 0.034160210429449486, "grad_norm": 0.369140625, "learning_rate": 0.0018407894961106487, "loss": 0.2625, "step": 19266 }, { "epoch": 0.0341637565947593, "grad_norm": 0.40625, "learning_rate": 0.0018407556940127636, "loss": 0.2398, "step": 19268 }, { "epoch": 0.034167302760069115, "grad_norm": 0.50390625, "learning_rate": 0.0018407218886752434, "loss": 0.4504, "step": 19270 }, { "epoch": 0.03417084892537893, "grad_norm": 0.56640625, "learning_rate": 0.0018406880800982365, "loss": 0.2331, "step": 19272 }, { "epoch": 0.034174395090688744, "grad_norm": 0.7578125, "learning_rate": 0.0018406542682818897, "loss": 0.2426, "step": 19274 }, { "epoch": 0.03417794125599856, "grad_norm": 0.43359375, "learning_rate": 0.0018406204532263515, "loss": 0.2185, "step": 19276 }, { "epoch": 0.034181487421308374, "grad_norm": 0.91796875, "learning_rate": 0.0018405866349317694, "loss": 0.212, "step": 19278 }, { "epoch": 0.03418503358661819, "grad_norm": 1.328125, "learning_rate": 0.001840552813398292, "loss": 0.2749, "step": 19280 }, { "epoch": 0.034188579751928, "grad_norm": 1.125, "learning_rate": 0.0018405189886260662, "loss": 0.4763, "step": 19282 }, { "epoch": 0.034192125917237824, "grad_norm": 0.93359375, "learning_rate": 0.001840485160615241, "loss": 0.1994, "step": 19284 }, { "epoch": 0.03419567208254764, "grad_norm": 0.373046875, "learning_rate": 0.0018404513293659632, "loss": 0.2514, "step": 19286 }, { "epoch": 0.03419921824785745, "grad_norm": 0.5625, "learning_rate": 0.001840417494878382, "loss": 0.2071, "step": 19288 }, { "epoch": 0.03420276441316727, "grad_norm": 0.353515625, "learning_rate": 0.0018403836571526447, "loss": 0.1594, "step": 19290 }, { "epoch": 0.03420631057847708, "grad_norm": 0.48828125, "learning_rate": 0.001840349816188899, "loss": 0.1771, "step": 19292 }, { "epoch": 0.0342098567437869, "grad_norm": 0.67578125, "learning_rate": 0.0018403159719872935, "loss": 0.2512, "step": 19294 }, { "epoch": 0.03421340290909671, "grad_norm": 0.578125, "learning_rate": 0.001840282124547976, "loss": 0.3169, "step": 19296 }, { "epoch": 0.034216949074406526, "grad_norm": 0.8828125, "learning_rate": 0.0018402482738710945, "loss": 0.2098, "step": 19298 }, { "epoch": 0.03422049523971634, "grad_norm": 0.38671875, "learning_rate": 0.001840214419956797, "loss": 0.255, "step": 19300 }, { "epoch": 0.034224041405026155, "grad_norm": 0.376953125, "learning_rate": 0.0018401805628052315, "loss": 0.1598, "step": 19302 }, { "epoch": 0.03422758757033597, "grad_norm": 0.373046875, "learning_rate": 0.0018401467024165461, "loss": 0.1736, "step": 19304 }, { "epoch": 0.034231133735645784, "grad_norm": 0.5234375, "learning_rate": 0.001840112838790889, "loss": 0.185, "step": 19306 }, { "epoch": 0.034234679900955606, "grad_norm": 1.34375, "learning_rate": 0.0018400789719284086, "loss": 0.2465, "step": 19308 }, { "epoch": 0.03423822606626542, "grad_norm": 3.328125, "learning_rate": 0.0018400451018292523, "loss": 0.197, "step": 19310 }, { "epoch": 0.034241772231575235, "grad_norm": 0.578125, "learning_rate": 0.0018400112284935688, "loss": 0.2497, "step": 19312 }, { "epoch": 0.03424531839688505, "grad_norm": 0.419921875, "learning_rate": 0.0018399773519215054, "loss": 0.2632, "step": 19314 }, { "epoch": 0.034248864562194864, "grad_norm": 0.45703125, "learning_rate": 0.0018399434721132117, "loss": 0.2231, "step": 19316 }, { "epoch": 0.03425241072750468, "grad_norm": 0.83984375, "learning_rate": 0.001839909589068835, "loss": 0.5745, "step": 19318 }, { "epoch": 0.03425595689281449, "grad_norm": 2.40625, "learning_rate": 0.001839875702788523, "loss": 0.2247, "step": 19320 }, { "epoch": 0.03425950305812431, "grad_norm": 0.431640625, "learning_rate": 0.0018398418132724251, "loss": 0.1917, "step": 19322 }, { "epoch": 0.03426304922343412, "grad_norm": 0.671875, "learning_rate": 0.001839807920520688, "loss": 0.3159, "step": 19324 }, { "epoch": 0.03426659538874394, "grad_norm": 0.423828125, "learning_rate": 0.0018397740245334616, "loss": 0.1965, "step": 19326 }, { "epoch": 0.03427014155405375, "grad_norm": 0.54296875, "learning_rate": 0.001839740125310893, "loss": 0.2511, "step": 19328 }, { "epoch": 0.03427368771936357, "grad_norm": 1.2109375, "learning_rate": 0.001839706222853131, "loss": 0.3093, "step": 19330 }, { "epoch": 0.03427723388467339, "grad_norm": 0.69140625, "learning_rate": 0.0018396723171603233, "loss": 0.2201, "step": 19332 }, { "epoch": 0.0342807800499832, "grad_norm": 1.3828125, "learning_rate": 0.0018396384082326187, "loss": 0.3054, "step": 19334 }, { "epoch": 0.034284326215293016, "grad_norm": 0.251953125, "learning_rate": 0.0018396044960701655, "loss": 0.2889, "step": 19336 }, { "epoch": 0.03428787238060283, "grad_norm": 0.6484375, "learning_rate": 0.0018395705806731118, "loss": 0.2257, "step": 19338 }, { "epoch": 0.034291418545912646, "grad_norm": 0.5234375, "learning_rate": 0.001839536662041606, "loss": 0.1897, "step": 19340 }, { "epoch": 0.03429496471122246, "grad_norm": 0.56640625, "learning_rate": 0.001839502740175796, "loss": 0.2285, "step": 19342 }, { "epoch": 0.034298510876532275, "grad_norm": 0.5390625, "learning_rate": 0.001839468815075831, "loss": 0.2467, "step": 19344 }, { "epoch": 0.03430205704184209, "grad_norm": 0.404296875, "learning_rate": 0.0018394348867418588, "loss": 0.204, "step": 19346 }, { "epoch": 0.034305603207151904, "grad_norm": 0.310546875, "learning_rate": 0.001839400955174028, "loss": 0.2198, "step": 19348 }, { "epoch": 0.03430914937246172, "grad_norm": 0.64453125, "learning_rate": 0.0018393670203724869, "loss": 0.243, "step": 19350 }, { "epoch": 0.03431269553777154, "grad_norm": 0.2451171875, "learning_rate": 0.0018393330823373835, "loss": 0.1733, "step": 19352 }, { "epoch": 0.034316241703081354, "grad_norm": 1.7109375, "learning_rate": 0.001839299141068867, "loss": 0.2794, "step": 19354 }, { "epoch": 0.03431978786839117, "grad_norm": 1.0859375, "learning_rate": 0.0018392651965670856, "loss": 0.2212, "step": 19356 }, { "epoch": 0.03432333403370098, "grad_norm": 2.703125, "learning_rate": 0.0018392312488321877, "loss": 0.3003, "step": 19358 }, { "epoch": 0.0343268801990108, "grad_norm": 0.703125, "learning_rate": 0.0018391972978643214, "loss": 0.2195, "step": 19360 }, { "epoch": 0.03433042636432061, "grad_norm": 0.3046875, "learning_rate": 0.001839163343663636, "loss": 0.2255, "step": 19362 }, { "epoch": 0.03433397252963043, "grad_norm": 0.83203125, "learning_rate": 0.0018391293862302792, "loss": 0.2213, "step": 19364 }, { "epoch": 0.03433751869494024, "grad_norm": 0.447265625, "learning_rate": 0.0018390954255643997, "loss": 0.194, "step": 19366 }, { "epoch": 0.034341064860250056, "grad_norm": 0.453125, "learning_rate": 0.0018390614616661458, "loss": 0.2491, "step": 19368 }, { "epoch": 0.03434461102555987, "grad_norm": 0.357421875, "learning_rate": 0.001839027494535667, "loss": 0.2536, "step": 19370 }, { "epoch": 0.034348157190869685, "grad_norm": 0.6875, "learning_rate": 0.0018389935241731109, "loss": 0.222, "step": 19372 }, { "epoch": 0.0343517033561795, "grad_norm": 0.52734375, "learning_rate": 0.0018389595505786266, "loss": 0.1888, "step": 19374 }, { "epoch": 0.03435524952148932, "grad_norm": 0.375, "learning_rate": 0.0018389255737523622, "loss": 0.2139, "step": 19376 }, { "epoch": 0.034358795686799136, "grad_norm": 0.45703125, "learning_rate": 0.0018388915936944667, "loss": 0.2227, "step": 19378 }, { "epoch": 0.03436234185210895, "grad_norm": 1.625, "learning_rate": 0.0018388576104050884, "loss": 0.2889, "step": 19380 }, { "epoch": 0.034365888017418765, "grad_norm": 0.33984375, "learning_rate": 0.0018388236238843763, "loss": 0.2093, "step": 19382 }, { "epoch": 0.03436943418272858, "grad_norm": 2.234375, "learning_rate": 0.0018387896341324787, "loss": 0.3643, "step": 19384 }, { "epoch": 0.034372980348038394, "grad_norm": 0.2119140625, "learning_rate": 0.0018387556411495448, "loss": 0.1925, "step": 19386 }, { "epoch": 0.03437652651334821, "grad_norm": 0.4296875, "learning_rate": 0.0018387216449357222, "loss": 0.183, "step": 19388 }, { "epoch": 0.03438007267865802, "grad_norm": 0.455078125, "learning_rate": 0.0018386876454911606, "loss": 0.2109, "step": 19390 }, { "epoch": 0.03438361884396784, "grad_norm": 0.92578125, "learning_rate": 0.001838653642816008, "loss": 0.3275, "step": 19392 }, { "epoch": 0.03438716500927765, "grad_norm": 0.625, "learning_rate": 0.001838619636910414, "loss": 0.2296, "step": 19394 }, { "epoch": 0.03439071117458747, "grad_norm": 0.51953125, "learning_rate": 0.0018385856277745264, "loss": 0.147, "step": 19396 }, { "epoch": 0.03439425733989729, "grad_norm": 0.255859375, "learning_rate": 0.001838551615408494, "loss": 0.1728, "step": 19398 }, { "epoch": 0.0343978035052071, "grad_norm": 1.4609375, "learning_rate": 0.0018385175998124664, "loss": 0.2031, "step": 19400 }, { "epoch": 0.03440134967051692, "grad_norm": 0.44140625, "learning_rate": 0.0018384835809865914, "loss": 0.1995, "step": 19402 }, { "epoch": 0.03440489583582673, "grad_norm": 0.80859375, "learning_rate": 0.0018384495589310185, "loss": 0.2261, "step": 19404 }, { "epoch": 0.03440844200113655, "grad_norm": 0.494140625, "learning_rate": 0.0018384155336458958, "loss": 0.2119, "step": 19406 }, { "epoch": 0.03441198816644636, "grad_norm": 0.61328125, "learning_rate": 0.0018383815051313728, "loss": 0.2988, "step": 19408 }, { "epoch": 0.034415534331756176, "grad_norm": 0.275390625, "learning_rate": 0.0018383474733875977, "loss": 0.176, "step": 19410 }, { "epoch": 0.03441908049706599, "grad_norm": 0.1640625, "learning_rate": 0.0018383134384147197, "loss": 0.2196, "step": 19412 }, { "epoch": 0.034422626662375805, "grad_norm": 0.248046875, "learning_rate": 0.001838279400212888, "loss": 0.2307, "step": 19414 }, { "epoch": 0.03442617282768562, "grad_norm": 2.9375, "learning_rate": 0.0018382453587822505, "loss": 0.3128, "step": 19416 }, { "epoch": 0.034429718992995434, "grad_norm": 0.56640625, "learning_rate": 0.0018382113141229568, "loss": 0.1993, "step": 19418 }, { "epoch": 0.034433265158305255, "grad_norm": 1.03125, "learning_rate": 0.0018381772662351557, "loss": 0.3101, "step": 19420 }, { "epoch": 0.03443681132361507, "grad_norm": 4.15625, "learning_rate": 0.0018381432151189964, "loss": 0.471, "step": 19422 }, { "epoch": 0.034440357488924885, "grad_norm": 0.369140625, "learning_rate": 0.0018381091607746268, "loss": 0.1733, "step": 19424 }, { "epoch": 0.0344439036542347, "grad_norm": 0.298828125, "learning_rate": 0.0018380751032021968, "loss": 0.2283, "step": 19426 }, { "epoch": 0.034447449819544514, "grad_norm": 0.5546875, "learning_rate": 0.001838041042401855, "loss": 0.2391, "step": 19428 }, { "epoch": 0.03445099598485433, "grad_norm": 0.859375, "learning_rate": 0.0018380069783737502, "loss": 0.4435, "step": 19430 }, { "epoch": 0.03445454215016414, "grad_norm": 1.421875, "learning_rate": 0.0018379729111180315, "loss": 0.3458, "step": 19432 }, { "epoch": 0.03445808831547396, "grad_norm": 0.474609375, "learning_rate": 0.0018379388406348485, "loss": 0.1678, "step": 19434 }, { "epoch": 0.03446163448078377, "grad_norm": 1.6953125, "learning_rate": 0.0018379047669243491, "loss": 0.2759, "step": 19436 }, { "epoch": 0.034465180646093586, "grad_norm": 1.0078125, "learning_rate": 0.0018378706899866834, "loss": 0.1661, "step": 19438 }, { "epoch": 0.0344687268114034, "grad_norm": 0.3984375, "learning_rate": 0.0018378366098219993, "loss": 0.2248, "step": 19440 }, { "epoch": 0.034472272976713215, "grad_norm": 0.6328125, "learning_rate": 0.0018378025264304465, "loss": 0.2803, "step": 19442 }, { "epoch": 0.03447581914202304, "grad_norm": 0.76171875, "learning_rate": 0.0018377684398121744, "loss": 0.2816, "step": 19444 }, { "epoch": 0.03447936530733285, "grad_norm": 0.365234375, "learning_rate": 0.0018377343499673315, "loss": 0.1521, "step": 19446 }, { "epoch": 0.034482911472642666, "grad_norm": 0.609375, "learning_rate": 0.0018377002568960672, "loss": 0.3504, "step": 19448 }, { "epoch": 0.03448645763795248, "grad_norm": 0.2119140625, "learning_rate": 0.0018376661605985303, "loss": 0.1611, "step": 19450 }, { "epoch": 0.034490003803262295, "grad_norm": 1.015625, "learning_rate": 0.0018376320610748703, "loss": 0.214, "step": 19452 }, { "epoch": 0.03449354996857211, "grad_norm": 0.2578125, "learning_rate": 0.001837597958325236, "loss": 0.1684, "step": 19454 }, { "epoch": 0.034497096133881924, "grad_norm": 0.470703125, "learning_rate": 0.0018375638523497767, "loss": 0.2457, "step": 19456 }, { "epoch": 0.03450064229919174, "grad_norm": 0.78125, "learning_rate": 0.0018375297431486414, "loss": 0.1863, "step": 19458 }, { "epoch": 0.03450418846450155, "grad_norm": 0.353515625, "learning_rate": 0.0018374956307219796, "loss": 0.2677, "step": 19460 }, { "epoch": 0.03450773462981137, "grad_norm": 0.314453125, "learning_rate": 0.0018374615150699406, "loss": 0.2287, "step": 19462 }, { "epoch": 0.03451128079512118, "grad_norm": 1.375, "learning_rate": 0.0018374273961926728, "loss": 0.203, "step": 19464 }, { "epoch": 0.034514826960431004, "grad_norm": 0.337890625, "learning_rate": 0.0018373932740903258, "loss": 0.1679, "step": 19466 }, { "epoch": 0.03451837312574082, "grad_norm": 0.263671875, "learning_rate": 0.0018373591487630496, "loss": 0.1568, "step": 19468 }, { "epoch": 0.03452191929105063, "grad_norm": 0.62890625, "learning_rate": 0.0018373250202109923, "loss": 0.1573, "step": 19470 }, { "epoch": 0.03452546545636045, "grad_norm": 0.3203125, "learning_rate": 0.0018372908884343039, "loss": 0.211, "step": 19472 }, { "epoch": 0.03452901162167026, "grad_norm": 0.2578125, "learning_rate": 0.0018372567534331332, "loss": 0.2539, "step": 19474 }, { "epoch": 0.03453255778698008, "grad_norm": 0.64453125, "learning_rate": 0.00183722261520763, "loss": 0.1686, "step": 19476 }, { "epoch": 0.03453610395228989, "grad_norm": 0.5625, "learning_rate": 0.001837188473757943, "loss": 0.1906, "step": 19478 }, { "epoch": 0.034539650117599706, "grad_norm": 0.69921875, "learning_rate": 0.0018371543290842218, "loss": 0.1984, "step": 19480 }, { "epoch": 0.03454319628290952, "grad_norm": 0.515625, "learning_rate": 0.001837120181186616, "loss": 0.2214, "step": 19482 }, { "epoch": 0.034546742448219335, "grad_norm": 0.443359375, "learning_rate": 0.0018370860300652746, "loss": 0.1957, "step": 19484 }, { "epoch": 0.03455028861352915, "grad_norm": 0.314453125, "learning_rate": 0.001837051875720347, "loss": 0.2128, "step": 19486 }, { "epoch": 0.03455383477883897, "grad_norm": 1.234375, "learning_rate": 0.0018370177181519828, "loss": 0.2483, "step": 19488 }, { "epoch": 0.034557380944148786, "grad_norm": 0.48046875, "learning_rate": 0.001836983557360331, "loss": 0.2041, "step": 19490 }, { "epoch": 0.0345609271094586, "grad_norm": 0.80859375, "learning_rate": 0.0018369493933455413, "loss": 0.1802, "step": 19492 }, { "epoch": 0.034564473274768415, "grad_norm": 0.423828125, "learning_rate": 0.0018369152261077629, "loss": 0.1584, "step": 19494 }, { "epoch": 0.03456801944007823, "grad_norm": 0.59765625, "learning_rate": 0.0018368810556471456, "loss": 0.2182, "step": 19496 }, { "epoch": 0.034571565605388044, "grad_norm": 1.640625, "learning_rate": 0.0018368468819638384, "loss": 0.254, "step": 19498 }, { "epoch": 0.03457511177069786, "grad_norm": 0.302734375, "learning_rate": 0.001836812705057991, "loss": 0.2093, "step": 19500 }, { "epoch": 0.03457865793600767, "grad_norm": 1.5625, "learning_rate": 0.001836778524929753, "loss": 0.2199, "step": 19502 }, { "epoch": 0.03458220410131749, "grad_norm": 0.515625, "learning_rate": 0.0018367443415792735, "loss": 0.2821, "step": 19504 }, { "epoch": 0.0345857502666273, "grad_norm": 0.31640625, "learning_rate": 0.001836710155006702, "loss": 0.1945, "step": 19506 }, { "epoch": 0.03458929643193712, "grad_norm": 0.294921875, "learning_rate": 0.0018366759652121887, "loss": 0.1777, "step": 19508 }, { "epoch": 0.03459284259724693, "grad_norm": 0.462890625, "learning_rate": 0.0018366417721958823, "loss": 0.2308, "step": 19510 }, { "epoch": 0.03459638876255675, "grad_norm": 0.4453125, "learning_rate": 0.001836607575957933, "loss": 0.2335, "step": 19512 }, { "epoch": 0.03459993492786657, "grad_norm": 1.0546875, "learning_rate": 0.0018365733764984897, "loss": 0.3872, "step": 19514 }, { "epoch": 0.03460348109317638, "grad_norm": 0.6015625, "learning_rate": 0.0018365391738177027, "loss": 0.209, "step": 19516 }, { "epoch": 0.034607027258486196, "grad_norm": 0.2734375, "learning_rate": 0.0018365049679157205, "loss": 0.2127, "step": 19518 }, { "epoch": 0.03461057342379601, "grad_norm": 0.474609375, "learning_rate": 0.0018364707587926938, "loss": 0.4515, "step": 19520 }, { "epoch": 0.034614119589105825, "grad_norm": 1.0703125, "learning_rate": 0.0018364365464487718, "loss": 0.2133, "step": 19522 }, { "epoch": 0.03461766575441564, "grad_norm": 7.46875, "learning_rate": 0.0018364023308841042, "loss": 0.3535, "step": 19524 }, { "epoch": 0.034621211919725454, "grad_norm": 0.345703125, "learning_rate": 0.0018363681120988407, "loss": 0.2294, "step": 19526 }, { "epoch": 0.03462475808503527, "grad_norm": 2.171875, "learning_rate": 0.0018363338900931306, "loss": 0.3578, "step": 19528 }, { "epoch": 0.034628304250345084, "grad_norm": 0.53515625, "learning_rate": 0.0018362996648671238, "loss": 0.2449, "step": 19530 }, { "epoch": 0.0346318504156549, "grad_norm": 0.52734375, "learning_rate": 0.0018362654364209696, "loss": 0.1809, "step": 19532 }, { "epoch": 0.03463539658096472, "grad_norm": 0.490234375, "learning_rate": 0.0018362312047548184, "loss": 0.1666, "step": 19534 }, { "epoch": 0.034638942746274534, "grad_norm": 0.58203125, "learning_rate": 0.0018361969698688194, "loss": 0.1788, "step": 19536 }, { "epoch": 0.03464248891158435, "grad_norm": 0.302734375, "learning_rate": 0.0018361627317631227, "loss": 0.1806, "step": 19538 }, { "epoch": 0.03464603507689416, "grad_norm": 0.267578125, "learning_rate": 0.0018361284904378778, "loss": 0.1997, "step": 19540 }, { "epoch": 0.03464958124220398, "grad_norm": 0.75390625, "learning_rate": 0.0018360942458932343, "loss": 0.1637, "step": 19542 }, { "epoch": 0.03465312740751379, "grad_norm": 1.1875, "learning_rate": 0.0018360599981293423, "loss": 0.2992, "step": 19544 }, { "epoch": 0.03465667357282361, "grad_norm": 0.353515625, "learning_rate": 0.0018360257471463515, "loss": 0.1982, "step": 19546 }, { "epoch": 0.03466021973813342, "grad_norm": 0.427734375, "learning_rate": 0.0018359914929444118, "loss": 0.2741, "step": 19548 }, { "epoch": 0.034663765903443236, "grad_norm": 0.984375, "learning_rate": 0.0018359572355236722, "loss": 0.3033, "step": 19550 }, { "epoch": 0.03466731206875305, "grad_norm": 0.357421875, "learning_rate": 0.0018359229748842836, "loss": 0.1823, "step": 19552 }, { "epoch": 0.034670858234062865, "grad_norm": 0.6796875, "learning_rate": 0.001835888711026395, "loss": 0.3342, "step": 19554 }, { "epoch": 0.03467440439937269, "grad_norm": 0.33984375, "learning_rate": 0.001835854443950157, "loss": 0.1951, "step": 19556 }, { "epoch": 0.0346779505646825, "grad_norm": 0.75, "learning_rate": 0.0018358201736557189, "loss": 0.1605, "step": 19558 }, { "epoch": 0.034681496729992316, "grad_norm": 0.87109375, "learning_rate": 0.001835785900143231, "loss": 0.1565, "step": 19560 }, { "epoch": 0.03468504289530213, "grad_norm": 1.3515625, "learning_rate": 0.0018357516234128429, "loss": 0.2595, "step": 19562 }, { "epoch": 0.034688589060611945, "grad_norm": 0.44140625, "learning_rate": 0.0018357173434647043, "loss": 0.2521, "step": 19564 }, { "epoch": 0.03469213522592176, "grad_norm": 1.5078125, "learning_rate": 0.0018356830602989658, "loss": 0.1983, "step": 19566 }, { "epoch": 0.034695681391231574, "grad_norm": 0.77734375, "learning_rate": 0.0018356487739157768, "loss": 0.2048, "step": 19568 }, { "epoch": 0.03469922755654139, "grad_norm": 3.40625, "learning_rate": 0.0018356144843152873, "loss": 0.2228, "step": 19570 }, { "epoch": 0.0347027737218512, "grad_norm": 0.72265625, "learning_rate": 0.0018355801914976472, "loss": 0.2046, "step": 19572 }, { "epoch": 0.03470631988716102, "grad_norm": 0.486328125, "learning_rate": 0.0018355458954630068, "loss": 0.2267, "step": 19574 }, { "epoch": 0.03470986605247083, "grad_norm": 0.59375, "learning_rate": 0.0018355115962115159, "loss": 0.2311, "step": 19576 }, { "epoch": 0.03471341221778065, "grad_norm": 1.2265625, "learning_rate": 0.0018354772937433247, "loss": 0.2378, "step": 19578 }, { "epoch": 0.03471695838309047, "grad_norm": 0.4375, "learning_rate": 0.0018354429880585825, "loss": 0.2148, "step": 19580 }, { "epoch": 0.03472050454840028, "grad_norm": 0.265625, "learning_rate": 0.0018354086791574405, "loss": 0.2161, "step": 19582 }, { "epoch": 0.0347240507137101, "grad_norm": 0.373046875, "learning_rate": 0.0018353743670400475, "loss": 0.227, "step": 19584 }, { "epoch": 0.03472759687901991, "grad_norm": 1.3671875, "learning_rate": 0.0018353400517065546, "loss": 0.3436, "step": 19586 }, { "epoch": 0.034731143044329726, "grad_norm": 0.412109375, "learning_rate": 0.0018353057331571112, "loss": 0.2, "step": 19588 }, { "epoch": 0.03473468920963954, "grad_norm": 0.392578125, "learning_rate": 0.0018352714113918681, "loss": 0.209, "step": 19590 }, { "epoch": 0.034738235374949356, "grad_norm": 0.53515625, "learning_rate": 0.0018352370864109747, "loss": 0.1817, "step": 19592 }, { "epoch": 0.03474178154025917, "grad_norm": 0.4765625, "learning_rate": 0.0018352027582145812, "loss": 0.1742, "step": 19594 }, { "epoch": 0.034745327705568985, "grad_norm": 0.71484375, "learning_rate": 0.0018351684268028377, "loss": 0.339, "step": 19596 }, { "epoch": 0.0347488738708788, "grad_norm": 0.57421875, "learning_rate": 0.0018351340921758947, "loss": 0.2354, "step": 19598 }, { "epoch": 0.034752420036188614, "grad_norm": 0.427734375, "learning_rate": 0.0018350997543339026, "loss": 0.2815, "step": 19600 }, { "epoch": 0.034755966201498435, "grad_norm": 2.453125, "learning_rate": 0.0018350654132770108, "loss": 0.3986, "step": 19602 }, { "epoch": 0.03475951236680825, "grad_norm": 0.5234375, "learning_rate": 0.0018350310690053698, "loss": 0.1667, "step": 19604 }, { "epoch": 0.034763058532118064, "grad_norm": 0.369140625, "learning_rate": 0.0018349967215191302, "loss": 0.351, "step": 19606 }, { "epoch": 0.03476660469742788, "grad_norm": 0.412109375, "learning_rate": 0.0018349623708184416, "loss": 0.1907, "step": 19608 }, { "epoch": 0.03477015086273769, "grad_norm": 0.44921875, "learning_rate": 0.0018349280169034545, "loss": 0.1646, "step": 19610 }, { "epoch": 0.03477369702804751, "grad_norm": 0.39453125, "learning_rate": 0.0018348936597743194, "loss": 0.2475, "step": 19612 }, { "epoch": 0.03477724319335732, "grad_norm": 0.82421875, "learning_rate": 0.001834859299431186, "loss": 0.1876, "step": 19614 }, { "epoch": 0.03478078935866714, "grad_norm": 0.4140625, "learning_rate": 0.0018348249358742053, "loss": 0.237, "step": 19616 }, { "epoch": 0.03478433552397695, "grad_norm": 1.7421875, "learning_rate": 0.0018347905691035267, "loss": 0.2735, "step": 19618 }, { "epoch": 0.034787881689286766, "grad_norm": 0.56640625, "learning_rate": 0.0018347561991193012, "loss": 0.2037, "step": 19620 }, { "epoch": 0.03479142785459658, "grad_norm": 0.7734375, "learning_rate": 0.0018347218259216786, "loss": 0.2271, "step": 19622 }, { "epoch": 0.0347949740199064, "grad_norm": 0.9375, "learning_rate": 0.0018346874495108099, "loss": 0.258, "step": 19624 }, { "epoch": 0.03479852018521622, "grad_norm": 1.3671875, "learning_rate": 0.001834653069886845, "loss": 0.248, "step": 19626 }, { "epoch": 0.03480206635052603, "grad_norm": 0.69921875, "learning_rate": 0.0018346186870499342, "loss": 0.1886, "step": 19628 }, { "epoch": 0.034805612515835846, "grad_norm": 1.53125, "learning_rate": 0.0018345843010002278, "loss": 0.3499, "step": 19630 }, { "epoch": 0.03480915868114566, "grad_norm": 0.71875, "learning_rate": 0.0018345499117378766, "loss": 0.2092, "step": 19632 }, { "epoch": 0.034812704846455475, "grad_norm": 0.59765625, "learning_rate": 0.0018345155192630307, "loss": 0.1688, "step": 19634 }, { "epoch": 0.03481625101176529, "grad_norm": 0.66796875, "learning_rate": 0.0018344811235758408, "loss": 0.2282, "step": 19636 }, { "epoch": 0.034819797177075104, "grad_norm": 1.1875, "learning_rate": 0.0018344467246764569, "loss": 0.2666, "step": 19638 }, { "epoch": 0.03482334334238492, "grad_norm": 0.66015625, "learning_rate": 0.0018344123225650295, "loss": 0.261, "step": 19640 }, { "epoch": 0.03482688950769473, "grad_norm": 0.494140625, "learning_rate": 0.0018343779172417091, "loss": 0.1862, "step": 19642 }, { "epoch": 0.03483043567300455, "grad_norm": 2.1875, "learning_rate": 0.0018343435087066467, "loss": 0.2536, "step": 19644 }, { "epoch": 0.03483398183831436, "grad_norm": 0.28125, "learning_rate": 0.001834309096959992, "loss": 0.1949, "step": 19646 }, { "epoch": 0.034837528003624184, "grad_norm": 0.5703125, "learning_rate": 0.0018342746820018963, "loss": 0.1547, "step": 19648 }, { "epoch": 0.034841074168934, "grad_norm": 0.28515625, "learning_rate": 0.0018342402638325092, "loss": 0.196, "step": 19650 }, { "epoch": 0.03484462033424381, "grad_norm": 1.03125, "learning_rate": 0.001834205842451982, "loss": 0.145, "step": 19652 }, { "epoch": 0.03484816649955363, "grad_norm": 0.32421875, "learning_rate": 0.0018341714178604647, "loss": 0.1616, "step": 19654 }, { "epoch": 0.03485171266486344, "grad_norm": 0.263671875, "learning_rate": 0.0018341369900581079, "loss": 0.2392, "step": 19656 }, { "epoch": 0.03485525883017326, "grad_norm": 0.88671875, "learning_rate": 0.001834102559045063, "loss": 0.3104, "step": 19658 }, { "epoch": 0.03485880499548307, "grad_norm": 0.71484375, "learning_rate": 0.0018340681248214792, "loss": 0.198, "step": 19660 }, { "epoch": 0.034862351160792886, "grad_norm": 0.40625, "learning_rate": 0.0018340336873875083, "loss": 0.1792, "step": 19662 }, { "epoch": 0.0348658973261027, "grad_norm": 0.578125, "learning_rate": 0.0018339992467433003, "loss": 0.292, "step": 19664 }, { "epoch": 0.034869443491412515, "grad_norm": 0.66015625, "learning_rate": 0.001833964802889006, "loss": 0.1743, "step": 19666 }, { "epoch": 0.03487298965672233, "grad_norm": 0.181640625, "learning_rate": 0.0018339303558247758, "loss": 0.1609, "step": 19668 }, { "epoch": 0.03487653582203215, "grad_norm": 1.5625, "learning_rate": 0.0018338959055507606, "loss": 0.2623, "step": 19670 }, { "epoch": 0.034880081987341965, "grad_norm": 0.216796875, "learning_rate": 0.0018338614520671112, "loss": 0.1844, "step": 19672 }, { "epoch": 0.03488362815265178, "grad_norm": 0.56640625, "learning_rate": 0.0018338269953739777, "loss": 0.255, "step": 19674 }, { "epoch": 0.034887174317961595, "grad_norm": 1.1640625, "learning_rate": 0.0018337925354715114, "loss": 0.2012, "step": 19676 }, { "epoch": 0.03489072048327141, "grad_norm": 1.25, "learning_rate": 0.0018337580723598631, "loss": 0.1693, "step": 19678 }, { "epoch": 0.034894266648581224, "grad_norm": 4.0625, "learning_rate": 0.0018337236060391826, "loss": 0.5154, "step": 19680 }, { "epoch": 0.03489781281389104, "grad_norm": 0.408203125, "learning_rate": 0.0018336891365096216, "loss": 0.2416, "step": 19682 }, { "epoch": 0.03490135897920085, "grad_norm": 0.357421875, "learning_rate": 0.0018336546637713305, "loss": 0.2266, "step": 19684 }, { "epoch": 0.03490490514451067, "grad_norm": 0.36328125, "learning_rate": 0.00183362018782446, "loss": 0.2055, "step": 19686 }, { "epoch": 0.03490845130982048, "grad_norm": 1.3828125, "learning_rate": 0.001833585708669161, "loss": 0.256, "step": 19688 }, { "epoch": 0.034911997475130296, "grad_norm": 0.1875, "learning_rate": 0.0018335512263055842, "loss": 0.1809, "step": 19690 }, { "epoch": 0.03491554364044012, "grad_norm": 0.6015625, "learning_rate": 0.0018335167407338804, "loss": 0.1392, "step": 19692 }, { "epoch": 0.03491908980574993, "grad_norm": 1.1171875, "learning_rate": 0.0018334822519542006, "loss": 0.2004, "step": 19694 }, { "epoch": 0.03492263597105975, "grad_norm": 1.6484375, "learning_rate": 0.0018334477599666952, "loss": 0.2829, "step": 19696 }, { "epoch": 0.03492618213636956, "grad_norm": 0.419921875, "learning_rate": 0.0018334132647715156, "loss": 0.1818, "step": 19698 }, { "epoch": 0.034929728301679376, "grad_norm": 2.515625, "learning_rate": 0.0018333787663688123, "loss": 0.3024, "step": 19700 }, { "epoch": 0.03493327446698919, "grad_norm": 1.2421875, "learning_rate": 0.0018333442647587362, "loss": 0.2631, "step": 19702 }, { "epoch": 0.034936820632299005, "grad_norm": 0.9140625, "learning_rate": 0.001833309759941438, "loss": 0.1393, "step": 19704 }, { "epoch": 0.03494036679760882, "grad_norm": 0.98046875, "learning_rate": 0.0018332752519170693, "loss": 0.2472, "step": 19706 }, { "epoch": 0.034943912962918634, "grad_norm": 2.5, "learning_rate": 0.0018332407406857806, "loss": 0.2105, "step": 19708 }, { "epoch": 0.03494745912822845, "grad_norm": 1.140625, "learning_rate": 0.0018332062262477228, "loss": 0.1693, "step": 19710 }, { "epoch": 0.03495100529353826, "grad_norm": 1.15625, "learning_rate": 0.0018331717086030467, "loss": 0.1999, "step": 19712 }, { "epoch": 0.03495455145884808, "grad_norm": 1.8828125, "learning_rate": 0.0018331371877519033, "loss": 0.1967, "step": 19714 }, { "epoch": 0.0349580976241579, "grad_norm": 0.5390625, "learning_rate": 0.0018331026636944439, "loss": 0.2034, "step": 19716 }, { "epoch": 0.034961643789467714, "grad_norm": 1.140625, "learning_rate": 0.0018330681364308195, "loss": 0.1513, "step": 19718 }, { "epoch": 0.03496518995477753, "grad_norm": 0.205078125, "learning_rate": 0.0018330336059611805, "loss": 0.1445, "step": 19720 }, { "epoch": 0.03496873612008734, "grad_norm": 0.80078125, "learning_rate": 0.0018329990722856786, "loss": 0.2039, "step": 19722 }, { "epoch": 0.03497228228539716, "grad_norm": 1.234375, "learning_rate": 0.001832964535404464, "loss": 0.3179, "step": 19724 }, { "epoch": 0.03497582845070697, "grad_norm": 0.361328125, "learning_rate": 0.001832929995317689, "loss": 0.292, "step": 19726 }, { "epoch": 0.03497937461601679, "grad_norm": 0.3984375, "learning_rate": 0.0018328954520255034, "loss": 0.1945, "step": 19728 }, { "epoch": 0.0349829207813266, "grad_norm": 0.51953125, "learning_rate": 0.0018328609055280592, "loss": 0.2892, "step": 19730 }, { "epoch": 0.034986466946636416, "grad_norm": 0.400390625, "learning_rate": 0.0018328263558255067, "loss": 0.1896, "step": 19732 }, { "epoch": 0.03499001311194623, "grad_norm": 0.1904296875, "learning_rate": 0.0018327918029179976, "loss": 0.1612, "step": 19734 }, { "epoch": 0.034993559277256045, "grad_norm": 0.470703125, "learning_rate": 0.0018327572468056828, "loss": 0.2013, "step": 19736 }, { "epoch": 0.034997105442565866, "grad_norm": 0.32421875, "learning_rate": 0.0018327226874887135, "loss": 0.1796, "step": 19738 }, { "epoch": 0.03500065160787568, "grad_norm": 0.302734375, "learning_rate": 0.0018326881249672408, "loss": 0.2448, "step": 19740 }, { "epoch": 0.035004197773185496, "grad_norm": 0.5859375, "learning_rate": 0.0018326535592414156, "loss": 0.1752, "step": 19742 }, { "epoch": 0.03500774393849531, "grad_norm": 0.4140625, "learning_rate": 0.0018326189903113894, "loss": 0.216, "step": 19744 }, { "epoch": 0.035011290103805125, "grad_norm": 8.375, "learning_rate": 0.0018325844181773135, "loss": 0.2106, "step": 19746 }, { "epoch": 0.03501483626911494, "grad_norm": 0.64453125, "learning_rate": 0.0018325498428393388, "loss": 0.2093, "step": 19748 }, { "epoch": 0.035018382434424754, "grad_norm": 0.59765625, "learning_rate": 0.0018325152642976166, "loss": 0.2355, "step": 19750 }, { "epoch": 0.03502192859973457, "grad_norm": 6.6875, "learning_rate": 0.001832480682552298, "loss": 0.3573, "step": 19752 }, { "epoch": 0.03502547476504438, "grad_norm": 0.466796875, "learning_rate": 0.0018324460976035344, "loss": 0.3623, "step": 19754 }, { "epoch": 0.0350290209303542, "grad_norm": 0.400390625, "learning_rate": 0.001832411509451477, "loss": 0.1807, "step": 19756 }, { "epoch": 0.03503256709566401, "grad_norm": 0.75, "learning_rate": 0.0018323769180962773, "loss": 0.22, "step": 19758 }, { "epoch": 0.035036113260973833, "grad_norm": 2.140625, "learning_rate": 0.0018323423235380863, "loss": 0.4485, "step": 19760 }, { "epoch": 0.03503965942628365, "grad_norm": 1.7421875, "learning_rate": 0.0018323077257770555, "loss": 0.3063, "step": 19762 }, { "epoch": 0.03504320559159346, "grad_norm": 0.86328125, "learning_rate": 0.0018322731248133356, "loss": 0.1914, "step": 19764 }, { "epoch": 0.03504675175690328, "grad_norm": 1.0078125, "learning_rate": 0.0018322385206470788, "loss": 0.2252, "step": 19766 }, { "epoch": 0.03505029792221309, "grad_norm": 0.515625, "learning_rate": 0.001832203913278436, "loss": 0.1631, "step": 19768 }, { "epoch": 0.035053844087522906, "grad_norm": 1.328125, "learning_rate": 0.0018321693027075587, "loss": 0.2854, "step": 19770 }, { "epoch": 0.03505739025283272, "grad_norm": 0.40625, "learning_rate": 0.001832134688934598, "loss": 0.191, "step": 19772 }, { "epoch": 0.035060936418142535, "grad_norm": 0.83984375, "learning_rate": 0.0018321000719597055, "loss": 0.2428, "step": 19774 }, { "epoch": 0.03506448258345235, "grad_norm": 0.921875, "learning_rate": 0.0018320654517830323, "loss": 0.197, "step": 19776 }, { "epoch": 0.035068028748762164, "grad_norm": 1.0234375, "learning_rate": 0.0018320308284047303, "loss": 0.1741, "step": 19778 }, { "epoch": 0.03507157491407198, "grad_norm": 0.73828125, "learning_rate": 0.0018319962018249504, "loss": 0.2697, "step": 19780 }, { "epoch": 0.035075121079381794, "grad_norm": 4.09375, "learning_rate": 0.0018319615720438448, "loss": 0.2906, "step": 19782 }, { "epoch": 0.035078667244691615, "grad_norm": 0.443359375, "learning_rate": 0.0018319269390615642, "loss": 0.2275, "step": 19784 }, { "epoch": 0.03508221341000143, "grad_norm": 0.443359375, "learning_rate": 0.0018318923028782603, "loss": 0.189, "step": 19786 }, { "epoch": 0.035085759575311244, "grad_norm": 0.6484375, "learning_rate": 0.0018318576634940844, "loss": 0.2052, "step": 19788 }, { "epoch": 0.03508930574062106, "grad_norm": 0.375, "learning_rate": 0.0018318230209091887, "loss": 0.4515, "step": 19790 }, { "epoch": 0.03509285190593087, "grad_norm": 1.8125, "learning_rate": 0.0018317883751237237, "loss": 0.2496, "step": 19792 }, { "epoch": 0.03509639807124069, "grad_norm": 0.359375, "learning_rate": 0.0018317537261378417, "loss": 0.1834, "step": 19794 }, { "epoch": 0.0350999442365505, "grad_norm": 0.578125, "learning_rate": 0.0018317190739516938, "loss": 0.4138, "step": 19796 }, { "epoch": 0.03510349040186032, "grad_norm": 0.3203125, "learning_rate": 0.0018316844185654318, "loss": 0.2385, "step": 19798 }, { "epoch": 0.03510703656717013, "grad_norm": 0.7265625, "learning_rate": 0.001831649759979207, "loss": 0.2178, "step": 19800 }, { "epoch": 0.035110582732479946, "grad_norm": 0.25, "learning_rate": 0.0018316150981931715, "loss": 0.1591, "step": 19802 }, { "epoch": 0.03511412889778976, "grad_norm": 0.458984375, "learning_rate": 0.0018315804332074762, "loss": 0.2301, "step": 19804 }, { "epoch": 0.03511767506309958, "grad_norm": 0.67578125, "learning_rate": 0.0018315457650222731, "loss": 0.2197, "step": 19806 }, { "epoch": 0.0351212212284094, "grad_norm": 0.671875, "learning_rate": 0.001831511093637714, "loss": 0.2655, "step": 19808 }, { "epoch": 0.03512476739371921, "grad_norm": 1.40625, "learning_rate": 0.00183147641905395, "loss": 0.231, "step": 19810 }, { "epoch": 0.035128313559029026, "grad_norm": 0.83984375, "learning_rate": 0.001831441741271133, "loss": 0.2396, "step": 19812 }, { "epoch": 0.03513185972433884, "grad_norm": 0.52734375, "learning_rate": 0.001831407060289415, "loss": 0.1695, "step": 19814 }, { "epoch": 0.035135405889648655, "grad_norm": 3.0625, "learning_rate": 0.001831372376108947, "loss": 0.2298, "step": 19816 }, { "epoch": 0.03513895205495847, "grad_norm": 0.431640625, "learning_rate": 0.0018313376887298817, "loss": 0.342, "step": 19818 }, { "epoch": 0.035142498220268284, "grad_norm": 0.5703125, "learning_rate": 0.0018313029981523695, "loss": 0.2779, "step": 19820 }, { "epoch": 0.0351460443855781, "grad_norm": 0.40625, "learning_rate": 0.001831268304376563, "loss": 0.1999, "step": 19822 }, { "epoch": 0.03514959055088791, "grad_norm": 0.60546875, "learning_rate": 0.0018312336074026137, "loss": 0.2246, "step": 19824 }, { "epoch": 0.03515313671619773, "grad_norm": 0.63671875, "learning_rate": 0.0018311989072306734, "loss": 0.1251, "step": 19826 }, { "epoch": 0.03515668288150755, "grad_norm": 0.38671875, "learning_rate": 0.0018311642038608938, "loss": 0.2344, "step": 19828 }, { "epoch": 0.035160229046817364, "grad_norm": 0.328125, "learning_rate": 0.0018311294972934266, "loss": 0.1566, "step": 19830 }, { "epoch": 0.03516377521212718, "grad_norm": 1.53125, "learning_rate": 0.0018310947875284239, "loss": 0.3336, "step": 19832 }, { "epoch": 0.03516732137743699, "grad_norm": 1.0859375, "learning_rate": 0.0018310600745660367, "loss": 0.307, "step": 19834 }, { "epoch": 0.03517086754274681, "grad_norm": 0.5078125, "learning_rate": 0.001831025358406418, "loss": 0.2103, "step": 19836 }, { "epoch": 0.03517441370805662, "grad_norm": 4.9375, "learning_rate": 0.0018309906390497187, "loss": 0.2092, "step": 19838 }, { "epoch": 0.035177959873366436, "grad_norm": 0.453125, "learning_rate": 0.0018309559164960913, "loss": 0.2123, "step": 19840 }, { "epoch": 0.03518150603867625, "grad_norm": 0.474609375, "learning_rate": 0.001830921190745687, "loss": 0.3171, "step": 19842 }, { "epoch": 0.035185052203986066, "grad_norm": 0.310546875, "learning_rate": 0.0018308864617986582, "loss": 0.2297, "step": 19844 }, { "epoch": 0.03518859836929588, "grad_norm": 0.76171875, "learning_rate": 0.001830851729655156, "loss": 0.2246, "step": 19846 }, { "epoch": 0.035192144534605695, "grad_norm": 1.828125, "learning_rate": 0.0018308169943153335, "loss": 0.286, "step": 19848 }, { "epoch": 0.03519569069991551, "grad_norm": 0.73828125, "learning_rate": 0.0018307822557793417, "loss": 0.1627, "step": 19850 }, { "epoch": 0.03519923686522533, "grad_norm": 0.62890625, "learning_rate": 0.0018307475140473329, "loss": 0.2125, "step": 19852 }, { "epoch": 0.035202783030535145, "grad_norm": 1.28125, "learning_rate": 0.0018307127691194588, "loss": 0.191, "step": 19854 }, { "epoch": 0.03520632919584496, "grad_norm": 0.57421875, "learning_rate": 0.0018306780209958718, "loss": 0.2788, "step": 19856 }, { "epoch": 0.035209875361154774, "grad_norm": 0.70703125, "learning_rate": 0.0018306432696767233, "loss": 0.1968, "step": 19858 }, { "epoch": 0.03521342152646459, "grad_norm": 0.494140625, "learning_rate": 0.0018306085151621656, "loss": 0.1506, "step": 19860 }, { "epoch": 0.0352169676917744, "grad_norm": 1.8203125, "learning_rate": 0.0018305737574523505, "loss": 0.2261, "step": 19862 }, { "epoch": 0.03522051385708422, "grad_norm": 0.478515625, "learning_rate": 0.0018305389965474303, "loss": 0.1966, "step": 19864 }, { "epoch": 0.03522406002239403, "grad_norm": 3.4375, "learning_rate": 0.001830504232447557, "loss": 0.227, "step": 19866 }, { "epoch": 0.03522760618770385, "grad_norm": 2.484375, "learning_rate": 0.0018304694651528823, "loss": 0.3059, "step": 19868 }, { "epoch": 0.03523115235301366, "grad_norm": 0.5625, "learning_rate": 0.0018304346946635586, "loss": 0.1407, "step": 19870 }, { "epoch": 0.035234698518323476, "grad_norm": 0.3046875, "learning_rate": 0.0018303999209797377, "loss": 0.2046, "step": 19872 }, { "epoch": 0.0352382446836333, "grad_norm": 0.59765625, "learning_rate": 0.0018303651441015719, "loss": 0.1782, "step": 19874 }, { "epoch": 0.03524179084894311, "grad_norm": 0.46875, "learning_rate": 0.0018303303640292133, "loss": 0.2057, "step": 19876 }, { "epoch": 0.03524533701425293, "grad_norm": 1.5703125, "learning_rate": 0.0018302955807628137, "loss": 0.2064, "step": 19878 }, { "epoch": 0.03524888317956274, "grad_norm": 1.3203125, "learning_rate": 0.0018302607943025259, "loss": 0.2405, "step": 19880 }, { "epoch": 0.035252429344872556, "grad_norm": 2.140625, "learning_rate": 0.0018302260046485009, "loss": 0.2072, "step": 19882 }, { "epoch": 0.03525597551018237, "grad_norm": 2.546875, "learning_rate": 0.0018301912118008918, "loss": 0.389, "step": 19884 }, { "epoch": 0.035259521675492185, "grad_norm": 0.34765625, "learning_rate": 0.0018301564157598507, "loss": 0.2596, "step": 19886 }, { "epoch": 0.035263067840802, "grad_norm": 1.3671875, "learning_rate": 0.0018301216165255293, "loss": 0.1424, "step": 19888 }, { "epoch": 0.035266614006111814, "grad_norm": 0.9375, "learning_rate": 0.0018300868140980803, "loss": 0.2065, "step": 19890 }, { "epoch": 0.03527016017142163, "grad_norm": 0.8203125, "learning_rate": 0.001830052008477655, "loss": 0.2469, "step": 19892 }, { "epoch": 0.03527370633673144, "grad_norm": 1.015625, "learning_rate": 0.0018300171996644072, "loss": 0.2107, "step": 19894 }, { "epoch": 0.035277252502041265, "grad_norm": 0.4453125, "learning_rate": 0.0018299823876584875, "loss": 0.1964, "step": 19896 }, { "epoch": 0.03528079866735108, "grad_norm": 0.310546875, "learning_rate": 0.0018299475724600492, "loss": 0.1779, "step": 19898 }, { "epoch": 0.035284344832660894, "grad_norm": 0.9296875, "learning_rate": 0.0018299127540692445, "loss": 0.2099, "step": 19900 }, { "epoch": 0.03528789099797071, "grad_norm": 0.83984375, "learning_rate": 0.0018298779324862249, "loss": 0.2107, "step": 19902 }, { "epoch": 0.03529143716328052, "grad_norm": 0.6328125, "learning_rate": 0.0018298431077111432, "loss": 0.2127, "step": 19904 }, { "epoch": 0.03529498332859034, "grad_norm": 0.65234375, "learning_rate": 0.001829808279744152, "loss": 0.3013, "step": 19906 }, { "epoch": 0.03529852949390015, "grad_norm": 0.92578125, "learning_rate": 0.0018297734485854032, "loss": 0.2168, "step": 19908 }, { "epoch": 0.03530207565920997, "grad_norm": 0.421875, "learning_rate": 0.0018297386142350493, "loss": 0.2195, "step": 19910 }, { "epoch": 0.03530562182451978, "grad_norm": 0.28515625, "learning_rate": 0.0018297037766932425, "loss": 0.2853, "step": 19912 }, { "epoch": 0.035309167989829596, "grad_norm": 0.40234375, "learning_rate": 0.0018296689359601351, "loss": 0.2064, "step": 19914 }, { "epoch": 0.03531271415513941, "grad_norm": 0.46875, "learning_rate": 0.00182963409203588, "loss": 0.2045, "step": 19916 }, { "epoch": 0.035316260320449225, "grad_norm": 0.97265625, "learning_rate": 0.0018295992449206286, "loss": 0.2106, "step": 19918 }, { "epoch": 0.035319806485759046, "grad_norm": 0.38671875, "learning_rate": 0.0018295643946145343, "loss": 0.1845, "step": 19920 }, { "epoch": 0.03532335265106886, "grad_norm": 0.43359375, "learning_rate": 0.001829529541117749, "loss": 0.1987, "step": 19922 }, { "epoch": 0.035326898816378675, "grad_norm": 0.83984375, "learning_rate": 0.0018294946844304254, "loss": 0.2617, "step": 19924 }, { "epoch": 0.03533044498168849, "grad_norm": 2.25, "learning_rate": 0.0018294598245527157, "loss": 0.3284, "step": 19926 }, { "epoch": 0.035333991146998305, "grad_norm": 0.3125, "learning_rate": 0.0018294249614847727, "loss": 0.2725, "step": 19928 }, { "epoch": 0.03533753731230812, "grad_norm": 2.890625, "learning_rate": 0.0018293900952267483, "loss": 0.1963, "step": 19930 }, { "epoch": 0.035341083477617934, "grad_norm": 0.56640625, "learning_rate": 0.0018293552257787954, "loss": 0.2567, "step": 19932 }, { "epoch": 0.03534462964292775, "grad_norm": 1.015625, "learning_rate": 0.0018293203531410663, "loss": 0.274, "step": 19934 }, { "epoch": 0.03534817580823756, "grad_norm": 3.40625, "learning_rate": 0.0018292854773137137, "loss": 0.2324, "step": 19936 }, { "epoch": 0.03535172197354738, "grad_norm": 2.484375, "learning_rate": 0.00182925059829689, "loss": 0.4741, "step": 19938 }, { "epoch": 0.03535526813885719, "grad_norm": 0.6875, "learning_rate": 0.0018292157160907478, "loss": 0.2537, "step": 19940 }, { "epoch": 0.03535881430416701, "grad_norm": 1.3046875, "learning_rate": 0.0018291808306954396, "loss": 0.2346, "step": 19942 }, { "epoch": 0.03536236046947683, "grad_norm": 0.47265625, "learning_rate": 0.001829145942111118, "loss": 0.1706, "step": 19944 }, { "epoch": 0.03536590663478664, "grad_norm": 0.6640625, "learning_rate": 0.0018291110503379359, "loss": 0.208, "step": 19946 }, { "epoch": 0.03536945280009646, "grad_norm": 0.66796875, "learning_rate": 0.0018290761553760454, "loss": 0.2336, "step": 19948 }, { "epoch": 0.03537299896540627, "grad_norm": 0.49609375, "learning_rate": 0.0018290412572255993, "loss": 0.2171, "step": 19950 }, { "epoch": 0.035376545130716086, "grad_norm": 0.83203125, "learning_rate": 0.00182900635588675, "loss": 0.192, "step": 19952 }, { "epoch": 0.0353800912960259, "grad_norm": 1.3125, "learning_rate": 0.0018289714513596502, "loss": 0.3025, "step": 19954 }, { "epoch": 0.035383637461335715, "grad_norm": 0.50390625, "learning_rate": 0.0018289365436444532, "loss": 0.3591, "step": 19956 }, { "epoch": 0.03538718362664553, "grad_norm": 0.466796875, "learning_rate": 0.0018289016327413111, "loss": 0.28, "step": 19958 }, { "epoch": 0.035390729791955344, "grad_norm": 0.5078125, "learning_rate": 0.0018288667186503764, "loss": 0.1875, "step": 19960 }, { "epoch": 0.03539427595726516, "grad_norm": 0.451171875, "learning_rate": 0.0018288318013718024, "loss": 0.1637, "step": 19962 }, { "epoch": 0.03539782212257498, "grad_norm": 4.0, "learning_rate": 0.001828796880905741, "loss": 0.4094, "step": 19964 }, { "epoch": 0.035401368287884795, "grad_norm": 0.59765625, "learning_rate": 0.0018287619572523457, "loss": 0.2104, "step": 19966 }, { "epoch": 0.03540491445319461, "grad_norm": 0.44140625, "learning_rate": 0.0018287270304117688, "loss": 0.2464, "step": 19968 }, { "epoch": 0.035408460618504424, "grad_norm": 0.43359375, "learning_rate": 0.0018286921003841633, "loss": 0.2715, "step": 19970 }, { "epoch": 0.03541200678381424, "grad_norm": 0.6328125, "learning_rate": 0.0018286571671696818, "loss": 0.2143, "step": 19972 }, { "epoch": 0.03541555294912405, "grad_norm": 4.0625, "learning_rate": 0.0018286222307684773, "loss": 0.2641, "step": 19974 }, { "epoch": 0.03541909911443387, "grad_norm": 0.3515625, "learning_rate": 0.0018285872911807017, "loss": 0.2092, "step": 19976 }, { "epoch": 0.03542264527974368, "grad_norm": 0.34375, "learning_rate": 0.0018285523484065092, "loss": 0.2434, "step": 19978 }, { "epoch": 0.0354261914450535, "grad_norm": 0.87890625, "learning_rate": 0.0018285174024460513, "loss": 0.1773, "step": 19980 }, { "epoch": 0.03542973761036331, "grad_norm": 1.453125, "learning_rate": 0.001828482453299482, "loss": 0.5136, "step": 19982 }, { "epoch": 0.035433283775673126, "grad_norm": 0.90625, "learning_rate": 0.0018284475009669537, "loss": 0.1881, "step": 19984 }, { "epoch": 0.03543682994098294, "grad_norm": 0.44140625, "learning_rate": 0.0018284125454486185, "loss": 0.3386, "step": 19986 }, { "epoch": 0.03544037610629276, "grad_norm": 0.54296875, "learning_rate": 0.0018283775867446305, "loss": 0.1808, "step": 19988 }, { "epoch": 0.035443922271602576, "grad_norm": 0.515625, "learning_rate": 0.0018283426248551417, "loss": 0.1709, "step": 19990 }, { "epoch": 0.03544746843691239, "grad_norm": 0.78515625, "learning_rate": 0.0018283076597803054, "loss": 0.2594, "step": 19992 }, { "epoch": 0.035451014602222206, "grad_norm": 0.384765625, "learning_rate": 0.0018282726915202748, "loss": 0.1844, "step": 19994 }, { "epoch": 0.03545456076753202, "grad_norm": 0.69921875, "learning_rate": 0.001828237720075202, "loss": 0.2046, "step": 19996 }, { "epoch": 0.035458106932841835, "grad_norm": 0.625, "learning_rate": 0.0018282027454452408, "loss": 0.1854, "step": 19998 }, { "epoch": 0.03546165309815165, "grad_norm": 0.53515625, "learning_rate": 0.0018281677676305434, "loss": 0.1838, "step": 20000 }, { "epoch": 0.035465199263461464, "grad_norm": 0.24609375, "learning_rate": 0.0018281327866312635, "loss": 0.2353, "step": 20002 }, { "epoch": 0.03546874542877128, "grad_norm": 0.341796875, "learning_rate": 0.0018280978024475538, "loss": 0.1868, "step": 20004 }, { "epoch": 0.03547229159408109, "grad_norm": 0.447265625, "learning_rate": 0.001828062815079567, "loss": 0.2402, "step": 20006 }, { "epoch": 0.03547583775939091, "grad_norm": 0.5546875, "learning_rate": 0.0018280278245274562, "loss": 0.1438, "step": 20008 }, { "epoch": 0.03547938392470073, "grad_norm": 1.3984375, "learning_rate": 0.0018279928307913748, "loss": 0.2108, "step": 20010 }, { "epoch": 0.035482930090010543, "grad_norm": 1.21875, "learning_rate": 0.0018279578338714758, "loss": 0.2302, "step": 20012 }, { "epoch": 0.03548647625532036, "grad_norm": 0.376953125, "learning_rate": 0.0018279228337679118, "loss": 0.2388, "step": 20014 }, { "epoch": 0.03549002242063017, "grad_norm": 1.0625, "learning_rate": 0.0018278878304808363, "loss": 0.2008, "step": 20016 }, { "epoch": 0.03549356858593999, "grad_norm": 1.0703125, "learning_rate": 0.001827852824010402, "loss": 0.184, "step": 20018 }, { "epoch": 0.0354971147512498, "grad_norm": 0.443359375, "learning_rate": 0.0018278178143567626, "loss": 0.2505, "step": 20020 }, { "epoch": 0.035500660916559616, "grad_norm": 0.78515625, "learning_rate": 0.0018277828015200706, "loss": 0.2271, "step": 20022 }, { "epoch": 0.03550420708186943, "grad_norm": 0.353515625, "learning_rate": 0.0018277477855004793, "loss": 0.1955, "step": 20024 }, { "epoch": 0.035507753247179245, "grad_norm": 0.375, "learning_rate": 0.0018277127662981421, "loss": 0.1941, "step": 20026 }, { "epoch": 0.03551129941248906, "grad_norm": 0.4296875, "learning_rate": 0.001827677743913212, "loss": 0.1778, "step": 20028 }, { "epoch": 0.035514845577798874, "grad_norm": 1.0390625, "learning_rate": 0.001827642718345842, "loss": 0.2736, "step": 20030 }, { "epoch": 0.035518391743108696, "grad_norm": 0.40625, "learning_rate": 0.0018276076895961855, "loss": 0.2164, "step": 20032 }, { "epoch": 0.03552193790841851, "grad_norm": 2.828125, "learning_rate": 0.0018275726576643956, "loss": 0.237, "step": 20034 }, { "epoch": 0.035525484073728325, "grad_norm": 0.5390625, "learning_rate": 0.0018275376225506256, "loss": 0.2322, "step": 20036 }, { "epoch": 0.03552903023903814, "grad_norm": 1.234375, "learning_rate": 0.0018275025842550284, "loss": 0.2477, "step": 20038 }, { "epoch": 0.035532576404347954, "grad_norm": 0.447265625, "learning_rate": 0.0018274675427777576, "loss": 0.1645, "step": 20040 }, { "epoch": 0.03553612256965777, "grad_norm": 0.5546875, "learning_rate": 0.0018274324981189664, "loss": 0.2012, "step": 20042 }, { "epoch": 0.03553966873496758, "grad_norm": 0.259765625, "learning_rate": 0.0018273974502788079, "loss": 0.2016, "step": 20044 }, { "epoch": 0.0355432149002774, "grad_norm": 0.7421875, "learning_rate": 0.0018273623992574358, "loss": 0.2397, "step": 20046 }, { "epoch": 0.03554676106558721, "grad_norm": 1.0, "learning_rate": 0.0018273273450550025, "loss": 0.2838, "step": 20048 }, { "epoch": 0.03555030723089703, "grad_norm": 0.5703125, "learning_rate": 0.0018272922876716622, "loss": 0.3161, "step": 20050 }, { "epoch": 0.03555385339620684, "grad_norm": 2.40625, "learning_rate": 0.0018272572271075679, "loss": 0.302, "step": 20052 }, { "epoch": 0.035557399561516656, "grad_norm": 2.5625, "learning_rate": 0.001827222163362873, "loss": 0.3404, "step": 20054 }, { "epoch": 0.03556094572682648, "grad_norm": 1.1015625, "learning_rate": 0.0018271870964377305, "loss": 0.2398, "step": 20056 }, { "epoch": 0.03556449189213629, "grad_norm": 0.470703125, "learning_rate": 0.0018271520263322943, "loss": 0.1816, "step": 20058 }, { "epoch": 0.03556803805744611, "grad_norm": 2.984375, "learning_rate": 0.0018271169530467172, "loss": 0.2055, "step": 20060 }, { "epoch": 0.03557158422275592, "grad_norm": 0.5546875, "learning_rate": 0.0018270818765811535, "loss": 0.1773, "step": 20062 }, { "epoch": 0.035575130388065736, "grad_norm": 1.40625, "learning_rate": 0.0018270467969357554, "loss": 0.3101, "step": 20064 }, { "epoch": 0.03557867655337555, "grad_norm": 0.546875, "learning_rate": 0.0018270117141106772, "loss": 0.2072, "step": 20066 }, { "epoch": 0.035582222718685365, "grad_norm": 2.265625, "learning_rate": 0.0018269766281060723, "loss": 0.2914, "step": 20068 }, { "epoch": 0.03558576888399518, "grad_norm": 0.671875, "learning_rate": 0.0018269415389220935, "loss": 0.2349, "step": 20070 }, { "epoch": 0.035589315049304994, "grad_norm": 4.3125, "learning_rate": 0.0018269064465588948, "loss": 0.5016, "step": 20072 }, { "epoch": 0.03559286121461481, "grad_norm": 0.5390625, "learning_rate": 0.0018268713510166295, "loss": 0.1555, "step": 20074 }, { "epoch": 0.03559640737992462, "grad_norm": 2.078125, "learning_rate": 0.0018268362522954511, "loss": 0.3287, "step": 20076 }, { "epoch": 0.035599953545234445, "grad_norm": 0.515625, "learning_rate": 0.0018268011503955134, "loss": 0.1804, "step": 20078 }, { "epoch": 0.03560349971054426, "grad_norm": 0.5859375, "learning_rate": 0.0018267660453169694, "loss": 0.1981, "step": 20080 }, { "epoch": 0.035607045875854074, "grad_norm": 0.7265625, "learning_rate": 0.0018267309370599727, "loss": 0.2257, "step": 20082 }, { "epoch": 0.03561059204116389, "grad_norm": 0.79296875, "learning_rate": 0.0018266958256246773, "loss": 0.1887, "step": 20084 }, { "epoch": 0.0356141382064737, "grad_norm": 0.6484375, "learning_rate": 0.0018266607110112366, "loss": 0.1587, "step": 20086 }, { "epoch": 0.03561768437178352, "grad_norm": 0.796875, "learning_rate": 0.0018266255932198037, "loss": 0.2306, "step": 20088 }, { "epoch": 0.03562123053709333, "grad_norm": 0.7421875, "learning_rate": 0.0018265904722505328, "loss": 0.1625, "step": 20090 }, { "epoch": 0.035624776702403146, "grad_norm": 0.6484375, "learning_rate": 0.0018265553481035773, "loss": 0.2199, "step": 20092 }, { "epoch": 0.03562832286771296, "grad_norm": 0.482421875, "learning_rate": 0.0018265202207790904, "loss": 0.1625, "step": 20094 }, { "epoch": 0.035631869033022776, "grad_norm": 11.6875, "learning_rate": 0.001826485090277226, "loss": 0.4761, "step": 20096 }, { "epoch": 0.03563541519833259, "grad_norm": 1.015625, "learning_rate": 0.0018264499565981383, "loss": 0.2255, "step": 20098 }, { "epoch": 0.03563896136364241, "grad_norm": 0.62890625, "learning_rate": 0.0018264148197419804, "loss": 0.2157, "step": 20100 }, { "epoch": 0.035642507528952226, "grad_norm": 1.1171875, "learning_rate": 0.0018263796797089056, "loss": 0.1731, "step": 20102 }, { "epoch": 0.03564605369426204, "grad_norm": 0.5234375, "learning_rate": 0.0018263445364990682, "loss": 0.2737, "step": 20104 }, { "epoch": 0.035649599859571855, "grad_norm": 2.828125, "learning_rate": 0.0018263093901126218, "loss": 0.494, "step": 20106 }, { "epoch": 0.03565314602488167, "grad_norm": 1.390625, "learning_rate": 0.0018262742405497198, "loss": 0.1488, "step": 20108 }, { "epoch": 0.035656692190191484, "grad_norm": 1.2890625, "learning_rate": 0.0018262390878105164, "loss": 0.2315, "step": 20110 }, { "epoch": 0.0356602383555013, "grad_norm": 1.9609375, "learning_rate": 0.0018262039318951652, "loss": 0.4986, "step": 20112 }, { "epoch": 0.03566378452081111, "grad_norm": 0.71484375, "learning_rate": 0.0018261687728038194, "loss": 0.2758, "step": 20114 }, { "epoch": 0.03566733068612093, "grad_norm": 0.5234375, "learning_rate": 0.0018261336105366336, "loss": 0.1304, "step": 20116 }, { "epoch": 0.03567087685143074, "grad_norm": 1.015625, "learning_rate": 0.001826098445093761, "loss": 0.2241, "step": 20118 }, { "epoch": 0.03567442301674056, "grad_norm": 0.6171875, "learning_rate": 0.0018260632764753556, "loss": 0.2542, "step": 20120 }, { "epoch": 0.03567796918205037, "grad_norm": 1.234375, "learning_rate": 0.0018260281046815709, "loss": 0.2207, "step": 20122 }, { "epoch": 0.03568151534736019, "grad_norm": 0.80859375, "learning_rate": 0.0018259929297125613, "loss": 0.1875, "step": 20124 }, { "epoch": 0.03568506151267001, "grad_norm": 2.953125, "learning_rate": 0.0018259577515684804, "loss": 0.2535, "step": 20126 }, { "epoch": 0.03568860767797982, "grad_norm": 5.9375, "learning_rate": 0.0018259225702494818, "loss": 0.2641, "step": 20128 }, { "epoch": 0.03569215384328964, "grad_norm": 0.68359375, "learning_rate": 0.0018258873857557195, "loss": 0.2122, "step": 20130 }, { "epoch": 0.03569570000859945, "grad_norm": 0.8515625, "learning_rate": 0.0018258521980873475, "loss": 0.2815, "step": 20132 }, { "epoch": 0.035699246173909266, "grad_norm": 0.65234375, "learning_rate": 0.0018258170072445196, "loss": 0.1924, "step": 20134 }, { "epoch": 0.03570279233921908, "grad_norm": 0.6015625, "learning_rate": 0.0018257818132273898, "loss": 0.3822, "step": 20136 }, { "epoch": 0.035706338504528895, "grad_norm": 1.75, "learning_rate": 0.0018257466160361117, "loss": 0.3125, "step": 20138 }, { "epoch": 0.03570988466983871, "grad_norm": 0.5234375, "learning_rate": 0.0018257114156708398, "loss": 0.2066, "step": 20140 }, { "epoch": 0.035713430835148524, "grad_norm": 1.0390625, "learning_rate": 0.0018256762121317276, "loss": 0.197, "step": 20142 }, { "epoch": 0.03571697700045834, "grad_norm": 0.369140625, "learning_rate": 0.001825641005418929, "loss": 0.4264, "step": 20144 }, { "epoch": 0.03572052316576816, "grad_norm": 2.359375, "learning_rate": 0.0018256057955325985, "loss": 0.2233, "step": 20146 }, { "epoch": 0.035724069331077975, "grad_norm": 1.171875, "learning_rate": 0.0018255705824728892, "loss": 0.2092, "step": 20148 }, { "epoch": 0.03572761549638779, "grad_norm": 0.96484375, "learning_rate": 0.001825535366239956, "loss": 0.1493, "step": 20150 }, { "epoch": 0.035731161661697604, "grad_norm": 0.376953125, "learning_rate": 0.0018255001468339524, "loss": 0.2141, "step": 20152 }, { "epoch": 0.03573470782700742, "grad_norm": 1.234375, "learning_rate": 0.0018254649242550327, "loss": 0.3326, "step": 20154 }, { "epoch": 0.03573825399231723, "grad_norm": 0.427734375, "learning_rate": 0.001825429698503351, "loss": 0.2103, "step": 20156 }, { "epoch": 0.03574180015762705, "grad_norm": 0.5390625, "learning_rate": 0.001825394469579061, "loss": 0.1697, "step": 20158 }, { "epoch": 0.03574534632293686, "grad_norm": 0.734375, "learning_rate": 0.001825359237482317, "loss": 0.3109, "step": 20160 }, { "epoch": 0.03574889248824668, "grad_norm": 0.9375, "learning_rate": 0.0018253240022132727, "loss": 0.2036, "step": 20162 }, { "epoch": 0.03575243865355649, "grad_norm": 8.1875, "learning_rate": 0.001825288763772083, "loss": 0.237, "step": 20164 }, { "epoch": 0.035755984818866306, "grad_norm": 2.59375, "learning_rate": 0.0018252535221589012, "loss": 0.2523, "step": 20166 }, { "epoch": 0.03575953098417613, "grad_norm": 1.109375, "learning_rate": 0.0018252182773738819, "loss": 0.1467, "step": 20168 }, { "epoch": 0.03576307714948594, "grad_norm": 0.66796875, "learning_rate": 0.001825183029417179, "loss": 0.4145, "step": 20170 }, { "epoch": 0.035766623314795756, "grad_norm": 2.515625, "learning_rate": 0.001825147778288947, "loss": 0.3125, "step": 20172 }, { "epoch": 0.03577016948010557, "grad_norm": 0.80859375, "learning_rate": 0.0018251125239893398, "loss": 0.2468, "step": 20174 }, { "epoch": 0.035773715645415385, "grad_norm": 0.306640625, "learning_rate": 0.001825077266518511, "loss": 0.1606, "step": 20176 }, { "epoch": 0.0357772618107252, "grad_norm": 0.73828125, "learning_rate": 0.0018250420058766163, "loss": 0.2095, "step": 20178 }, { "epoch": 0.035780807976035015, "grad_norm": 0.75390625, "learning_rate": 0.0018250067420638087, "loss": 0.2235, "step": 20180 }, { "epoch": 0.03578435414134483, "grad_norm": 0.5859375, "learning_rate": 0.0018249714750802426, "loss": 0.2626, "step": 20182 }, { "epoch": 0.035787900306654644, "grad_norm": 0.91015625, "learning_rate": 0.0018249362049260723, "loss": 0.186, "step": 20184 }, { "epoch": 0.03579144647196446, "grad_norm": 0.296875, "learning_rate": 0.0018249009316014524, "loss": 0.1928, "step": 20186 }, { "epoch": 0.03579499263727427, "grad_norm": 0.60546875, "learning_rate": 0.001824865655106537, "loss": 0.2129, "step": 20188 }, { "epoch": 0.03579853880258409, "grad_norm": 0.8046875, "learning_rate": 0.0018248303754414801, "loss": 0.1755, "step": 20190 }, { "epoch": 0.03580208496789391, "grad_norm": 0.296875, "learning_rate": 0.0018247950926064361, "loss": 0.2808, "step": 20192 }, { "epoch": 0.03580563113320372, "grad_norm": 0.53125, "learning_rate": 0.0018247598066015594, "loss": 0.2269, "step": 20194 }, { "epoch": 0.03580917729851354, "grad_norm": 2.375, "learning_rate": 0.0018247245174270042, "loss": 0.5249, "step": 20196 }, { "epoch": 0.03581272346382335, "grad_norm": 1.3203125, "learning_rate": 0.001824689225082925, "loss": 0.2022, "step": 20198 }, { "epoch": 0.03581626962913317, "grad_norm": 0.9140625, "learning_rate": 0.0018246539295694762, "loss": 0.5507, "step": 20200 }, { "epoch": 0.03581981579444298, "grad_norm": 0.80078125, "learning_rate": 0.0018246186308868123, "loss": 0.262, "step": 20202 }, { "epoch": 0.035823361959752796, "grad_norm": 0.384765625, "learning_rate": 0.0018245833290350868, "loss": 0.1989, "step": 20204 }, { "epoch": 0.03582690812506261, "grad_norm": 0.259765625, "learning_rate": 0.001824548024014455, "loss": 0.1892, "step": 20206 }, { "epoch": 0.035830454290372425, "grad_norm": 0.275390625, "learning_rate": 0.0018245127158250712, "loss": 0.2029, "step": 20208 }, { "epoch": 0.03583400045568224, "grad_norm": 1.484375, "learning_rate": 0.0018244774044670893, "loss": 0.3161, "step": 20210 }, { "epoch": 0.035837546620992054, "grad_norm": 0.71484375, "learning_rate": 0.0018244420899406642, "loss": 0.2452, "step": 20212 }, { "epoch": 0.035841092786301876, "grad_norm": 0.75390625, "learning_rate": 0.0018244067722459501, "loss": 0.1898, "step": 20214 }, { "epoch": 0.03584463895161169, "grad_norm": 0.318359375, "learning_rate": 0.0018243714513831019, "loss": 0.1896, "step": 20216 }, { "epoch": 0.035848185116921505, "grad_norm": 1.703125, "learning_rate": 0.0018243361273522734, "loss": 0.1791, "step": 20218 }, { "epoch": 0.03585173128223132, "grad_norm": 0.9140625, "learning_rate": 0.0018243008001536194, "loss": 0.1824, "step": 20220 }, { "epoch": 0.035855277447541134, "grad_norm": 0.69140625, "learning_rate": 0.0018242654697872944, "loss": 0.2069, "step": 20222 }, { "epoch": 0.03585882361285095, "grad_norm": 1.6875, "learning_rate": 0.0018242301362534532, "loss": 0.251, "step": 20224 }, { "epoch": 0.03586236977816076, "grad_norm": 0.255859375, "learning_rate": 0.00182419479955225, "loss": 0.1677, "step": 20226 }, { "epoch": 0.03586591594347058, "grad_norm": 0.294921875, "learning_rate": 0.0018241594596838393, "loss": 0.1602, "step": 20228 }, { "epoch": 0.03586946210878039, "grad_norm": 1.2734375, "learning_rate": 0.001824124116648376, "loss": 0.1487, "step": 20230 }, { "epoch": 0.03587300827409021, "grad_norm": 1.1484375, "learning_rate": 0.0018240887704460142, "loss": 0.2511, "step": 20232 }, { "epoch": 0.03587655443940002, "grad_norm": 0.458984375, "learning_rate": 0.0018240534210769088, "loss": 0.2312, "step": 20234 }, { "epoch": 0.03588010060470984, "grad_norm": 0.52734375, "learning_rate": 0.0018240180685412143, "loss": 0.1966, "step": 20236 }, { "epoch": 0.03588364677001966, "grad_norm": 0.337890625, "learning_rate": 0.0018239827128390853, "loss": 0.2074, "step": 20238 }, { "epoch": 0.03588719293532947, "grad_norm": 2.453125, "learning_rate": 0.0018239473539706766, "loss": 0.2629, "step": 20240 }, { "epoch": 0.035890739100639286, "grad_norm": 0.51171875, "learning_rate": 0.0018239119919361425, "loss": 0.2351, "step": 20242 }, { "epoch": 0.0358942852659491, "grad_norm": 0.68359375, "learning_rate": 0.0018238766267356379, "loss": 0.2104, "step": 20244 }, { "epoch": 0.035897831431258916, "grad_norm": 0.828125, "learning_rate": 0.0018238412583693174, "loss": 0.1781, "step": 20246 }, { "epoch": 0.03590137759656873, "grad_norm": 0.361328125, "learning_rate": 0.0018238058868373357, "loss": 0.2221, "step": 20248 }, { "epoch": 0.035904923761878545, "grad_norm": 0.97265625, "learning_rate": 0.0018237705121398476, "loss": 0.2552, "step": 20250 }, { "epoch": 0.03590846992718836, "grad_norm": 0.7265625, "learning_rate": 0.0018237351342770074, "loss": 0.2013, "step": 20252 }, { "epoch": 0.035912016092498174, "grad_norm": 1.7265625, "learning_rate": 0.0018236997532489706, "loss": 0.2035, "step": 20254 }, { "epoch": 0.03591556225780799, "grad_norm": 0.640625, "learning_rate": 0.0018236643690558911, "loss": 0.1907, "step": 20256 }, { "epoch": 0.0359191084231178, "grad_norm": 0.95703125, "learning_rate": 0.0018236289816979242, "loss": 0.2263, "step": 20258 }, { "epoch": 0.035922654588427624, "grad_norm": 0.30859375, "learning_rate": 0.001823593591175224, "loss": 0.1817, "step": 20260 }, { "epoch": 0.03592620075373744, "grad_norm": 4.34375, "learning_rate": 0.0018235581974879464, "loss": 0.3063, "step": 20262 }, { "epoch": 0.035929746919047253, "grad_norm": 0.58984375, "learning_rate": 0.001823522800636245, "loss": 0.1946, "step": 20264 }, { "epoch": 0.03593329308435707, "grad_norm": 0.4921875, "learning_rate": 0.0018234874006202758, "loss": 0.1798, "step": 20266 }, { "epoch": 0.03593683924966688, "grad_norm": 0.5234375, "learning_rate": 0.0018234519974401923, "loss": 0.1849, "step": 20268 }, { "epoch": 0.0359403854149767, "grad_norm": 0.32421875, "learning_rate": 0.0018234165910961501, "loss": 0.1966, "step": 20270 }, { "epoch": 0.03594393158028651, "grad_norm": 0.953125, "learning_rate": 0.001823381181588304, "loss": 0.2543, "step": 20272 }, { "epoch": 0.035947477745596326, "grad_norm": 0.353515625, "learning_rate": 0.0018233457689168092, "loss": 0.2512, "step": 20274 }, { "epoch": 0.03595102391090614, "grad_norm": 0.22265625, "learning_rate": 0.0018233103530818197, "loss": 0.2635, "step": 20276 }, { "epoch": 0.035954570076215955, "grad_norm": 1.90625, "learning_rate": 0.001823274934083491, "loss": 0.1846, "step": 20278 }, { "epoch": 0.03595811624152577, "grad_norm": 0.6171875, "learning_rate": 0.001823239511921978, "loss": 0.2407, "step": 20280 }, { "epoch": 0.03596166240683559, "grad_norm": 0.515625, "learning_rate": 0.0018232040865974353, "loss": 0.2686, "step": 20282 }, { "epoch": 0.035965208572145406, "grad_norm": 0.75390625, "learning_rate": 0.001823168658110018, "loss": 0.2196, "step": 20284 }, { "epoch": 0.03596875473745522, "grad_norm": 0.8671875, "learning_rate": 0.0018231332264598812, "loss": 0.25, "step": 20286 }, { "epoch": 0.035972300902765035, "grad_norm": 0.8125, "learning_rate": 0.0018230977916471795, "loss": 0.2642, "step": 20288 }, { "epoch": 0.03597584706807485, "grad_norm": 0.93359375, "learning_rate": 0.0018230623536720683, "loss": 0.4423, "step": 20290 }, { "epoch": 0.035979393233384664, "grad_norm": 0.294921875, "learning_rate": 0.0018230269125347023, "loss": 0.1808, "step": 20292 }, { "epoch": 0.03598293939869448, "grad_norm": 2.109375, "learning_rate": 0.0018229914682352365, "loss": 0.3624, "step": 20294 }, { "epoch": 0.03598648556400429, "grad_norm": 0.3203125, "learning_rate": 0.0018229560207738264, "loss": 0.1932, "step": 20296 }, { "epoch": 0.03599003172931411, "grad_norm": 0.490234375, "learning_rate": 0.0018229205701506262, "loss": 0.4172, "step": 20298 }, { "epoch": 0.03599357789462392, "grad_norm": 0.48046875, "learning_rate": 0.0018228851163657911, "loss": 0.1616, "step": 20300 }, { "epoch": 0.03599712405993374, "grad_norm": 0.2255859375, "learning_rate": 0.0018228496594194768, "loss": 0.3895, "step": 20302 }, { "epoch": 0.03600067022524356, "grad_norm": 0.2734375, "learning_rate": 0.001822814199311838, "loss": 0.195, "step": 20304 }, { "epoch": 0.03600421639055337, "grad_norm": 1.875, "learning_rate": 0.0018227787360430294, "loss": 0.3931, "step": 20306 }, { "epoch": 0.03600776255586319, "grad_norm": 0.80078125, "learning_rate": 0.001822743269613207, "loss": 0.2296, "step": 20308 }, { "epoch": 0.036011308721173, "grad_norm": 0.5, "learning_rate": 0.0018227078000225248, "loss": 0.2144, "step": 20310 }, { "epoch": 0.03601485488648282, "grad_norm": 0.31640625, "learning_rate": 0.0018226723272711388, "loss": 0.2374, "step": 20312 }, { "epoch": 0.03601840105179263, "grad_norm": 1.5703125, "learning_rate": 0.0018226368513592037, "loss": 0.1881, "step": 20314 }, { "epoch": 0.036021947217102446, "grad_norm": 0.859375, "learning_rate": 0.0018226013722868748, "loss": 0.2212, "step": 20316 }, { "epoch": 0.03602549338241226, "grad_norm": 0.50390625, "learning_rate": 0.0018225658900543073, "loss": 0.5096, "step": 20318 }, { "epoch": 0.036029039547722075, "grad_norm": 0.6328125, "learning_rate": 0.0018225304046616561, "loss": 0.2779, "step": 20320 }, { "epoch": 0.03603258571303189, "grad_norm": 0.54296875, "learning_rate": 0.0018224949161090769, "loss": 0.2251, "step": 20322 }, { "epoch": 0.036036131878341704, "grad_norm": 0.416015625, "learning_rate": 0.0018224594243967246, "loss": 0.1936, "step": 20324 }, { "epoch": 0.03603967804365152, "grad_norm": 0.9453125, "learning_rate": 0.0018224239295247541, "loss": 0.2438, "step": 20326 }, { "epoch": 0.03604322420896134, "grad_norm": 0.58203125, "learning_rate": 0.0018223884314933212, "loss": 0.2021, "step": 20328 }, { "epoch": 0.036046770374271155, "grad_norm": 0.58203125, "learning_rate": 0.001822352930302581, "loss": 0.1562, "step": 20330 }, { "epoch": 0.03605031653958097, "grad_norm": 0.47265625, "learning_rate": 0.0018223174259526886, "loss": 0.338, "step": 20332 }, { "epoch": 0.036053862704890784, "grad_norm": 0.5078125, "learning_rate": 0.0018222819184437993, "loss": 0.1898, "step": 20334 }, { "epoch": 0.0360574088702006, "grad_norm": 1.5546875, "learning_rate": 0.0018222464077760684, "loss": 0.2265, "step": 20336 }, { "epoch": 0.03606095503551041, "grad_norm": 0.6484375, "learning_rate": 0.0018222108939496514, "loss": 0.252, "step": 20338 }, { "epoch": 0.03606450120082023, "grad_norm": 4.1875, "learning_rate": 0.001822175376964703, "loss": 0.194, "step": 20340 }, { "epoch": 0.03606804736613004, "grad_norm": 0.53515625, "learning_rate": 0.0018221398568213797, "loss": 0.2426, "step": 20342 }, { "epoch": 0.036071593531439856, "grad_norm": 0.2294921875, "learning_rate": 0.0018221043335198356, "loss": 0.2203, "step": 20344 }, { "epoch": 0.03607513969674967, "grad_norm": 0.318359375, "learning_rate": 0.0018220688070602268, "loss": 0.222, "step": 20346 }, { "epoch": 0.036078685862059486, "grad_norm": 1.28125, "learning_rate": 0.0018220332774427084, "loss": 0.5303, "step": 20348 }, { "epoch": 0.03608223202736931, "grad_norm": 0.546875, "learning_rate": 0.001821997744667436, "loss": 0.1951, "step": 20350 }, { "epoch": 0.03608577819267912, "grad_norm": 1.015625, "learning_rate": 0.001821962208734565, "loss": 0.2364, "step": 20352 }, { "epoch": 0.036089324357988936, "grad_norm": 1.0625, "learning_rate": 0.0018219266696442502, "loss": 0.2128, "step": 20354 }, { "epoch": 0.03609287052329875, "grad_norm": 0.4375, "learning_rate": 0.0018218911273966474, "loss": 0.1906, "step": 20356 }, { "epoch": 0.036096416688608565, "grad_norm": 0.69921875, "learning_rate": 0.0018218555819919126, "loss": 0.2462, "step": 20358 }, { "epoch": 0.03609996285391838, "grad_norm": 0.3671875, "learning_rate": 0.0018218200334302007, "loss": 0.1718, "step": 20360 }, { "epoch": 0.036103509019228194, "grad_norm": 0.279296875, "learning_rate": 0.0018217844817116672, "loss": 0.2305, "step": 20362 }, { "epoch": 0.03610705518453801, "grad_norm": 0.7578125, "learning_rate": 0.0018217489268364676, "loss": 0.1791, "step": 20364 }, { "epoch": 0.03611060134984782, "grad_norm": 0.255859375, "learning_rate": 0.0018217133688047573, "loss": 0.2277, "step": 20366 }, { "epoch": 0.03611414751515764, "grad_norm": 1.1171875, "learning_rate": 0.0018216778076166921, "loss": 0.2123, "step": 20368 }, { "epoch": 0.03611769368046745, "grad_norm": 0.5078125, "learning_rate": 0.0018216422432724274, "loss": 0.2252, "step": 20370 }, { "epoch": 0.036121239845777274, "grad_norm": 0.373046875, "learning_rate": 0.0018216066757721185, "loss": 0.1478, "step": 20372 }, { "epoch": 0.03612478601108709, "grad_norm": 0.734375, "learning_rate": 0.0018215711051159213, "loss": 0.2125, "step": 20374 }, { "epoch": 0.0361283321763969, "grad_norm": 0.6875, "learning_rate": 0.0018215355313039917, "loss": 0.2409, "step": 20376 }, { "epoch": 0.03613187834170672, "grad_norm": 0.4296875, "learning_rate": 0.001821499954336484, "loss": 0.2482, "step": 20378 }, { "epoch": 0.03613542450701653, "grad_norm": 0.68359375, "learning_rate": 0.001821464374213555, "loss": 0.2355, "step": 20380 }, { "epoch": 0.03613897067232635, "grad_norm": 0.375, "learning_rate": 0.00182142879093536, "loss": 0.2166, "step": 20382 }, { "epoch": 0.03614251683763616, "grad_norm": 0.3828125, "learning_rate": 0.0018213932045020543, "loss": 0.2441, "step": 20384 }, { "epoch": 0.036146063002945976, "grad_norm": 0.42578125, "learning_rate": 0.0018213576149137937, "loss": 0.1813, "step": 20386 }, { "epoch": 0.03614960916825579, "grad_norm": 0.4609375, "learning_rate": 0.0018213220221707342, "loss": 0.1864, "step": 20388 }, { "epoch": 0.036153155333565605, "grad_norm": 0.55078125, "learning_rate": 0.0018212864262730308, "loss": 0.1749, "step": 20390 }, { "epoch": 0.03615670149887542, "grad_norm": 0.24609375, "learning_rate": 0.00182125082722084, "loss": 0.3499, "step": 20392 }, { "epoch": 0.036160247664185234, "grad_norm": 0.7421875, "learning_rate": 0.0018212152250143166, "loss": 0.2385, "step": 20394 }, { "epoch": 0.036163793829495056, "grad_norm": 1.1328125, "learning_rate": 0.0018211796196536173, "loss": 0.2278, "step": 20396 }, { "epoch": 0.03616733999480487, "grad_norm": 0.359375, "learning_rate": 0.001821144011138897, "loss": 0.187, "step": 20398 }, { "epoch": 0.036170886160114685, "grad_norm": 0.412109375, "learning_rate": 0.0018211083994703113, "loss": 0.1811, "step": 20400 }, { "epoch": 0.0361744323254245, "grad_norm": 0.86328125, "learning_rate": 0.0018210727846480167, "loss": 0.1686, "step": 20402 }, { "epoch": 0.036177978490734314, "grad_norm": 0.91796875, "learning_rate": 0.0018210371666721684, "loss": 0.1866, "step": 20404 }, { "epoch": 0.03618152465604413, "grad_norm": 0.451171875, "learning_rate": 0.0018210015455429225, "loss": 0.1801, "step": 20406 }, { "epoch": 0.03618507082135394, "grad_norm": 0.251953125, "learning_rate": 0.0018209659212604346, "loss": 0.2136, "step": 20408 }, { "epoch": 0.03618861698666376, "grad_norm": 0.5078125, "learning_rate": 0.0018209302938248604, "loss": 0.1821, "step": 20410 }, { "epoch": 0.03619216315197357, "grad_norm": 1.65625, "learning_rate": 0.0018208946632363558, "loss": 0.3578, "step": 20412 }, { "epoch": 0.03619570931728339, "grad_norm": 0.39453125, "learning_rate": 0.0018208590294950771, "loss": 0.1913, "step": 20414 }, { "epoch": 0.0361992554825932, "grad_norm": 0.61328125, "learning_rate": 0.0018208233926011797, "loss": 0.2514, "step": 20416 }, { "epoch": 0.03620280164790302, "grad_norm": 0.2490234375, "learning_rate": 0.0018207877525548192, "loss": 0.2053, "step": 20418 }, { "epoch": 0.03620634781321284, "grad_norm": 0.388671875, "learning_rate": 0.0018207521093561519, "loss": 0.158, "step": 20420 }, { "epoch": 0.03620989397852265, "grad_norm": 1.125, "learning_rate": 0.0018207164630053335, "loss": 0.3697, "step": 20422 }, { "epoch": 0.036213440143832466, "grad_norm": 0.283203125, "learning_rate": 0.0018206808135025202, "loss": 0.1715, "step": 20424 }, { "epoch": 0.03621698630914228, "grad_norm": 0.6171875, "learning_rate": 0.0018206451608478672, "loss": 0.1934, "step": 20426 }, { "epoch": 0.036220532474452095, "grad_norm": 1.03125, "learning_rate": 0.0018206095050415307, "loss": 0.1837, "step": 20428 }, { "epoch": 0.03622407863976191, "grad_norm": 0.2109375, "learning_rate": 0.0018205738460836673, "loss": 0.1285, "step": 20430 }, { "epoch": 0.036227624805071725, "grad_norm": 0.265625, "learning_rate": 0.0018205381839744323, "loss": 0.2553, "step": 20432 }, { "epoch": 0.03623117097038154, "grad_norm": 2.421875, "learning_rate": 0.0018205025187139818, "loss": 0.5537, "step": 20434 }, { "epoch": 0.036234717135691354, "grad_norm": 0.20703125, "learning_rate": 0.0018204668503024718, "loss": 0.1745, "step": 20436 }, { "epoch": 0.03623826330100117, "grad_norm": 0.82421875, "learning_rate": 0.0018204311787400587, "loss": 0.1451, "step": 20438 }, { "epoch": 0.03624180946631099, "grad_norm": 0.39453125, "learning_rate": 0.0018203955040268975, "loss": 0.2201, "step": 20440 }, { "epoch": 0.036245355631620804, "grad_norm": 0.40625, "learning_rate": 0.001820359826163145, "loss": 0.1715, "step": 20442 }, { "epoch": 0.03624890179693062, "grad_norm": 0.369140625, "learning_rate": 0.0018203241451489572, "loss": 0.1692, "step": 20444 }, { "epoch": 0.03625244796224043, "grad_norm": 0.7890625, "learning_rate": 0.00182028846098449, "loss": 0.1986, "step": 20446 }, { "epoch": 0.03625599412755025, "grad_norm": 0.83203125, "learning_rate": 0.0018202527736698993, "loss": 0.2816, "step": 20448 }, { "epoch": 0.03625954029286006, "grad_norm": 0.5078125, "learning_rate": 0.0018202170832053413, "loss": 0.1991, "step": 20450 }, { "epoch": 0.03626308645816988, "grad_norm": 0.54296875, "learning_rate": 0.0018201813895909723, "loss": 0.2758, "step": 20452 }, { "epoch": 0.03626663262347969, "grad_norm": 0.2294921875, "learning_rate": 0.0018201456928269482, "loss": 0.1686, "step": 20454 }, { "epoch": 0.036270178788789506, "grad_norm": 1.25, "learning_rate": 0.001820109992913425, "loss": 0.3832, "step": 20456 }, { "epoch": 0.03627372495409932, "grad_norm": 0.349609375, "learning_rate": 0.0018200742898505592, "loss": 0.2363, "step": 20458 }, { "epoch": 0.036277271119409135, "grad_norm": 0.431640625, "learning_rate": 0.0018200385836385066, "loss": 0.172, "step": 20460 }, { "epoch": 0.03628081728471895, "grad_norm": 0.251953125, "learning_rate": 0.0018200028742774235, "loss": 0.1854, "step": 20462 }, { "epoch": 0.03628436345002877, "grad_norm": 0.294921875, "learning_rate": 0.001819967161767466, "loss": 0.4978, "step": 20464 }, { "epoch": 0.036287909615338586, "grad_norm": 0.326171875, "learning_rate": 0.0018199314461087906, "loss": 0.1836, "step": 20466 }, { "epoch": 0.0362914557806484, "grad_norm": 0.423828125, "learning_rate": 0.0018198957273015532, "loss": 0.2348, "step": 20468 }, { "epoch": 0.036295001945958215, "grad_norm": 0.326171875, "learning_rate": 0.0018198600053459097, "loss": 0.2287, "step": 20470 }, { "epoch": 0.03629854811126803, "grad_norm": 0.73828125, "learning_rate": 0.0018198242802420167, "loss": 0.2544, "step": 20472 }, { "epoch": 0.036302094276577844, "grad_norm": 0.46875, "learning_rate": 0.0018197885519900306, "loss": 0.2776, "step": 20474 }, { "epoch": 0.03630564044188766, "grad_norm": 0.349609375, "learning_rate": 0.0018197528205901078, "loss": 0.1897, "step": 20476 }, { "epoch": 0.03630918660719747, "grad_norm": 0.431640625, "learning_rate": 0.0018197170860424037, "loss": 0.2406, "step": 20478 }, { "epoch": 0.03631273277250729, "grad_norm": 0.2138671875, "learning_rate": 0.0018196813483470752, "loss": 0.1711, "step": 20480 }, { "epoch": 0.0363162789378171, "grad_norm": 0.3828125, "learning_rate": 0.0018196456075042788, "loss": 0.3583, "step": 20482 }, { "epoch": 0.03631982510312692, "grad_norm": 0.447265625, "learning_rate": 0.0018196098635141706, "loss": 0.2374, "step": 20484 }, { "epoch": 0.03632337126843674, "grad_norm": 0.37109375, "learning_rate": 0.0018195741163769064, "loss": 0.2568, "step": 20486 }, { "epoch": 0.03632691743374655, "grad_norm": 0.7734375, "learning_rate": 0.0018195383660926435, "loss": 0.2657, "step": 20488 }, { "epoch": 0.03633046359905637, "grad_norm": 0.4453125, "learning_rate": 0.0018195026126615374, "loss": 0.2017, "step": 20490 }, { "epoch": 0.03633400976436618, "grad_norm": 0.625, "learning_rate": 0.0018194668560837447, "loss": 0.1823, "step": 20492 }, { "epoch": 0.036337555929675996, "grad_norm": 0.419921875, "learning_rate": 0.0018194310963594222, "loss": 0.3033, "step": 20494 }, { "epoch": 0.03634110209498581, "grad_norm": 16.25, "learning_rate": 0.001819395333488726, "loss": 0.3811, "step": 20496 }, { "epoch": 0.036344648260295626, "grad_norm": 0.6640625, "learning_rate": 0.0018193595674718124, "loss": 0.2094, "step": 20498 }, { "epoch": 0.03634819442560544, "grad_norm": 0.458984375, "learning_rate": 0.0018193237983088377, "loss": 0.2555, "step": 20500 }, { "epoch": 0.036351740590915255, "grad_norm": 0.423828125, "learning_rate": 0.0018192880259999588, "loss": 0.2965, "step": 20502 }, { "epoch": 0.03635528675622507, "grad_norm": 2.390625, "learning_rate": 0.001819252250545332, "loss": 0.3489, "step": 20504 }, { "epoch": 0.036358832921534884, "grad_norm": 1.015625, "learning_rate": 0.0018192164719451134, "loss": 0.2912, "step": 20506 }, { "epoch": 0.036362379086844705, "grad_norm": 0.40625, "learning_rate": 0.0018191806901994595, "loss": 0.1569, "step": 20508 }, { "epoch": 0.03636592525215452, "grad_norm": 0.8203125, "learning_rate": 0.0018191449053085273, "loss": 0.2023, "step": 20510 }, { "epoch": 0.036369471417464334, "grad_norm": 0.251953125, "learning_rate": 0.001819109117272473, "loss": 0.1676, "step": 20512 }, { "epoch": 0.03637301758277415, "grad_norm": 0.5078125, "learning_rate": 0.0018190733260914531, "loss": 0.1503, "step": 20514 }, { "epoch": 0.036376563748083963, "grad_norm": 2.46875, "learning_rate": 0.0018190375317656243, "loss": 0.315, "step": 20516 }, { "epoch": 0.03638010991339378, "grad_norm": 0.33984375, "learning_rate": 0.001819001734295143, "loss": 0.1813, "step": 20518 }, { "epoch": 0.03638365607870359, "grad_norm": 0.283203125, "learning_rate": 0.0018189659336801654, "loss": 0.1341, "step": 20520 }, { "epoch": 0.03638720224401341, "grad_norm": 0.6796875, "learning_rate": 0.0018189301299208487, "loss": 0.2152, "step": 20522 }, { "epoch": 0.03639074840932322, "grad_norm": 0.5234375, "learning_rate": 0.0018188943230173493, "loss": 0.1736, "step": 20524 }, { "epoch": 0.036394294574633036, "grad_norm": 0.578125, "learning_rate": 0.0018188585129698233, "loss": 0.2245, "step": 20526 }, { "epoch": 0.03639784073994285, "grad_norm": 0.498046875, "learning_rate": 0.0018188226997784282, "loss": 0.2311, "step": 20528 }, { "epoch": 0.036401386905252665, "grad_norm": 0.361328125, "learning_rate": 0.0018187868834433202, "loss": 0.1871, "step": 20530 }, { "epoch": 0.03640493307056249, "grad_norm": 0.341796875, "learning_rate": 0.0018187510639646556, "loss": 0.2337, "step": 20532 }, { "epoch": 0.0364084792358723, "grad_norm": 0.41796875, "learning_rate": 0.0018187152413425914, "loss": 0.2826, "step": 20534 }, { "epoch": 0.036412025401182116, "grad_norm": 1.3203125, "learning_rate": 0.001818679415577284, "loss": 0.3556, "step": 20536 }, { "epoch": 0.03641557156649193, "grad_norm": 24.875, "learning_rate": 0.0018186435866688908, "loss": 0.2328, "step": 20538 }, { "epoch": 0.036419117731801745, "grad_norm": 0.6484375, "learning_rate": 0.0018186077546175677, "loss": 0.1957, "step": 20540 }, { "epoch": 0.03642266389711156, "grad_norm": 14.4375, "learning_rate": 0.0018185719194234715, "loss": 0.3737, "step": 20542 }, { "epoch": 0.036426210062421374, "grad_norm": 0.80078125, "learning_rate": 0.0018185360810867594, "loss": 0.2355, "step": 20544 }, { "epoch": 0.03642975622773119, "grad_norm": 1.0234375, "learning_rate": 0.001818500239607588, "loss": 0.1879, "step": 20546 }, { "epoch": 0.036433302393041, "grad_norm": 3.609375, "learning_rate": 0.0018184643949861138, "loss": 0.3967, "step": 20548 }, { "epoch": 0.03643684855835082, "grad_norm": 0.67578125, "learning_rate": 0.0018184285472224932, "loss": 0.2691, "step": 20550 }, { "epoch": 0.03644039472366063, "grad_norm": 0.73828125, "learning_rate": 0.001818392696316884, "loss": 0.1964, "step": 20552 }, { "epoch": 0.036443940888970454, "grad_norm": 0.470703125, "learning_rate": 0.0018183568422694423, "loss": 0.279, "step": 20554 }, { "epoch": 0.03644748705428027, "grad_norm": 0.408203125, "learning_rate": 0.001818320985080325, "loss": 0.1756, "step": 20556 }, { "epoch": 0.03645103321959008, "grad_norm": 0.51953125, "learning_rate": 0.0018182851247496889, "loss": 0.5166, "step": 20558 }, { "epoch": 0.0364545793848999, "grad_norm": 0.81640625, "learning_rate": 0.0018182492612776912, "loss": 0.3984, "step": 20560 }, { "epoch": 0.03645812555020971, "grad_norm": 0.55859375, "learning_rate": 0.001818213394664488, "loss": 0.1721, "step": 20562 }, { "epoch": 0.03646167171551953, "grad_norm": 5.46875, "learning_rate": 0.0018181775249102368, "loss": 0.3314, "step": 20564 }, { "epoch": 0.03646521788082934, "grad_norm": 0.2451171875, "learning_rate": 0.0018181416520150944, "loss": 0.2224, "step": 20566 }, { "epoch": 0.036468764046139156, "grad_norm": 0.6953125, "learning_rate": 0.0018181057759792173, "loss": 0.179, "step": 20568 }, { "epoch": 0.03647231021144897, "grad_norm": 0.78515625, "learning_rate": 0.001818069896802763, "loss": 0.3013, "step": 20570 }, { "epoch": 0.036475856376758785, "grad_norm": 0.36328125, "learning_rate": 0.0018180340144858876, "loss": 0.2424, "step": 20572 }, { "epoch": 0.0364794025420686, "grad_norm": 0.4140625, "learning_rate": 0.0018179981290287488, "loss": 0.1755, "step": 20574 }, { "epoch": 0.03648294870737842, "grad_norm": 1.4765625, "learning_rate": 0.0018179622404315033, "loss": 0.2172, "step": 20576 }, { "epoch": 0.036486494872688235, "grad_norm": 0.59765625, "learning_rate": 0.001817926348694308, "loss": 0.203, "step": 20578 }, { "epoch": 0.03649004103799805, "grad_norm": 4.3125, "learning_rate": 0.0018178904538173198, "loss": 0.4282, "step": 20580 }, { "epoch": 0.036493587203307865, "grad_norm": 0.5546875, "learning_rate": 0.0018178545558006957, "loss": 0.4245, "step": 20582 }, { "epoch": 0.03649713336861768, "grad_norm": 0.83984375, "learning_rate": 0.0018178186546445928, "loss": 0.2519, "step": 20584 }, { "epoch": 0.036500679533927494, "grad_norm": 0.7890625, "learning_rate": 0.0018177827503491682, "loss": 0.2118, "step": 20586 }, { "epoch": 0.03650422569923731, "grad_norm": 0.67578125, "learning_rate": 0.0018177468429145785, "loss": 0.36, "step": 20588 }, { "epoch": 0.03650777186454712, "grad_norm": 0.90234375, "learning_rate": 0.001817710932340981, "loss": 0.1937, "step": 20590 }, { "epoch": 0.03651131802985694, "grad_norm": 0.251953125, "learning_rate": 0.0018176750186285331, "loss": 0.2424, "step": 20592 }, { "epoch": 0.03651486419516675, "grad_norm": 0.345703125, "learning_rate": 0.0018176391017773912, "loss": 0.169, "step": 20594 }, { "epoch": 0.036518410360476566, "grad_norm": 0.81640625, "learning_rate": 0.001817603181787713, "loss": 0.2774, "step": 20596 }, { "epoch": 0.03652195652578638, "grad_norm": 0.26171875, "learning_rate": 0.0018175672586596553, "loss": 0.2406, "step": 20598 }, { "epoch": 0.0365255026910962, "grad_norm": 0.71875, "learning_rate": 0.0018175313323933752, "loss": 0.1676, "step": 20600 }, { "epoch": 0.03652904885640602, "grad_norm": 0.2236328125, "learning_rate": 0.0018174954029890299, "loss": 0.1873, "step": 20602 }, { "epoch": 0.03653259502171583, "grad_norm": 1.4453125, "learning_rate": 0.0018174594704467764, "loss": 0.1998, "step": 20604 }, { "epoch": 0.036536141187025646, "grad_norm": 1.3046875, "learning_rate": 0.0018174235347667717, "loss": 0.3718, "step": 20606 }, { "epoch": 0.03653968735233546, "grad_norm": 0.91015625, "learning_rate": 0.0018173875959491734, "loss": 0.2011, "step": 20608 }, { "epoch": 0.036543233517645275, "grad_norm": 0.421875, "learning_rate": 0.0018173516539941386, "loss": 0.2226, "step": 20610 }, { "epoch": 0.03654677968295509, "grad_norm": 0.349609375, "learning_rate": 0.0018173157089018243, "loss": 0.147, "step": 20612 }, { "epoch": 0.036550325848264904, "grad_norm": 0.271484375, "learning_rate": 0.0018172797606723875, "loss": 0.2609, "step": 20614 }, { "epoch": 0.03655387201357472, "grad_norm": 0.4140625, "learning_rate": 0.001817243809305986, "loss": 0.2488, "step": 20616 }, { "epoch": 0.03655741817888453, "grad_norm": 0.28125, "learning_rate": 0.0018172078548027767, "loss": 0.2065, "step": 20618 }, { "epoch": 0.03656096434419435, "grad_norm": 0.37109375, "learning_rate": 0.0018171718971629165, "loss": 0.2163, "step": 20620 }, { "epoch": 0.03656451050950417, "grad_norm": 0.58203125, "learning_rate": 0.0018171359363865632, "loss": 0.3091, "step": 20622 }, { "epoch": 0.036568056674813984, "grad_norm": 0.5078125, "learning_rate": 0.001817099972473874, "loss": 0.3216, "step": 20624 }, { "epoch": 0.0365716028401238, "grad_norm": 0.375, "learning_rate": 0.0018170640054250057, "loss": 0.2159, "step": 20626 }, { "epoch": 0.03657514900543361, "grad_norm": 0.359375, "learning_rate": 0.0018170280352401162, "loss": 0.1962, "step": 20628 }, { "epoch": 0.03657869517074343, "grad_norm": 0.69921875, "learning_rate": 0.0018169920619193626, "loss": 0.1887, "step": 20630 }, { "epoch": 0.03658224133605324, "grad_norm": 0.490234375, "learning_rate": 0.0018169560854629024, "loss": 0.2226, "step": 20632 }, { "epoch": 0.03658578750136306, "grad_norm": 0.494140625, "learning_rate": 0.0018169201058708922, "loss": 0.1607, "step": 20634 }, { "epoch": 0.03658933366667287, "grad_norm": 0.52734375, "learning_rate": 0.0018168841231434904, "loss": 0.2607, "step": 20636 }, { "epoch": 0.036592879831982686, "grad_norm": 0.2578125, "learning_rate": 0.0018168481372808534, "loss": 0.2244, "step": 20638 }, { "epoch": 0.0365964259972925, "grad_norm": 0.921875, "learning_rate": 0.0018168121482831394, "loss": 0.2954, "step": 20640 }, { "epoch": 0.036599972162602315, "grad_norm": 0.296875, "learning_rate": 0.0018167761561505055, "loss": 0.2097, "step": 20642 }, { "epoch": 0.036603518327912136, "grad_norm": 0.453125, "learning_rate": 0.001816740160883109, "loss": 0.175, "step": 20644 }, { "epoch": 0.03660706449322195, "grad_norm": 0.45703125, "learning_rate": 0.0018167041624811068, "loss": 0.2198, "step": 20646 }, { "epoch": 0.036610610658531766, "grad_norm": 1.0390625, "learning_rate": 0.0018166681609446576, "loss": 0.521, "step": 20648 }, { "epoch": 0.03661415682384158, "grad_norm": 0.265625, "learning_rate": 0.0018166321562739182, "loss": 0.1773, "step": 20650 }, { "epoch": 0.036617702989151395, "grad_norm": 0.54296875, "learning_rate": 0.0018165961484690454, "loss": 0.1919, "step": 20652 }, { "epoch": 0.03662124915446121, "grad_norm": 0.7109375, "learning_rate": 0.0018165601375301976, "loss": 0.1674, "step": 20654 }, { "epoch": 0.036624795319771024, "grad_norm": 0.39453125, "learning_rate": 0.001816524123457532, "loss": 0.2623, "step": 20656 }, { "epoch": 0.03662834148508084, "grad_norm": 2.484375, "learning_rate": 0.0018164881062512062, "loss": 0.2362, "step": 20658 }, { "epoch": 0.03663188765039065, "grad_norm": 1.4140625, "learning_rate": 0.0018164520859113775, "loss": 0.3246, "step": 20660 }, { "epoch": 0.03663543381570047, "grad_norm": 0.375, "learning_rate": 0.0018164160624382039, "loss": 0.1974, "step": 20662 }, { "epoch": 0.03663897998101028, "grad_norm": 0.6484375, "learning_rate": 0.001816380035831842, "loss": 0.19, "step": 20664 }, { "epoch": 0.0366425261463201, "grad_norm": 0.83984375, "learning_rate": 0.00181634400609245, "loss": 0.3763, "step": 20666 }, { "epoch": 0.03664607231162992, "grad_norm": 0.67578125, "learning_rate": 0.0018163079732201857, "loss": 0.1669, "step": 20668 }, { "epoch": 0.03664961847693973, "grad_norm": 0.2490234375, "learning_rate": 0.0018162719372152066, "loss": 0.1944, "step": 20670 }, { "epoch": 0.03665316464224955, "grad_norm": 0.34375, "learning_rate": 0.0018162358980776696, "loss": 0.1894, "step": 20672 }, { "epoch": 0.03665671080755936, "grad_norm": 1.8203125, "learning_rate": 0.001816199855807733, "loss": 0.1825, "step": 20674 }, { "epoch": 0.036660256972869176, "grad_norm": 0.345703125, "learning_rate": 0.0018161638104055545, "loss": 0.3974, "step": 20676 }, { "epoch": 0.03666380313817899, "grad_norm": 0.546875, "learning_rate": 0.0018161277618712912, "loss": 0.2757, "step": 20678 }, { "epoch": 0.036667349303488805, "grad_norm": 0.515625, "learning_rate": 0.0018160917102051008, "loss": 0.1951, "step": 20680 }, { "epoch": 0.03667089546879862, "grad_norm": 0.4140625, "learning_rate": 0.0018160556554071418, "loss": 0.2306, "step": 20682 }, { "epoch": 0.036674441634108434, "grad_norm": 1.15625, "learning_rate": 0.0018160195974775712, "loss": 0.283, "step": 20684 }, { "epoch": 0.03667798779941825, "grad_norm": 0.396484375, "learning_rate": 0.0018159835364165466, "loss": 0.1744, "step": 20686 }, { "epoch": 0.036681533964728064, "grad_norm": 0.92578125, "learning_rate": 0.0018159474722242257, "loss": 0.2245, "step": 20688 }, { "epoch": 0.036685080130037885, "grad_norm": 0.490234375, "learning_rate": 0.0018159114049007667, "loss": 0.2463, "step": 20690 }, { "epoch": 0.0366886262953477, "grad_norm": 0.69921875, "learning_rate": 0.001815875334446327, "loss": 0.2343, "step": 20692 }, { "epoch": 0.036692172460657514, "grad_norm": 0.55078125, "learning_rate": 0.0018158392608610646, "loss": 0.2155, "step": 20694 }, { "epoch": 0.03669571862596733, "grad_norm": 0.91796875, "learning_rate": 0.001815803184145137, "loss": 0.2953, "step": 20696 }, { "epoch": 0.03669926479127714, "grad_norm": 1.390625, "learning_rate": 0.001815767104298702, "loss": 0.3375, "step": 20698 }, { "epoch": 0.03670281095658696, "grad_norm": 2.109375, "learning_rate": 0.0018157310213219174, "loss": 0.2837, "step": 20700 }, { "epoch": 0.03670635712189677, "grad_norm": 0.302734375, "learning_rate": 0.0018156949352149412, "loss": 0.1909, "step": 20702 }, { "epoch": 0.03670990328720659, "grad_norm": 1.921875, "learning_rate": 0.0018156588459779309, "loss": 0.2606, "step": 20704 }, { "epoch": 0.0367134494525164, "grad_norm": 0.357421875, "learning_rate": 0.0018156227536110447, "loss": 0.2155, "step": 20706 }, { "epoch": 0.036716995617826216, "grad_norm": 0.349609375, "learning_rate": 0.00181558665811444, "loss": 0.1708, "step": 20708 }, { "epoch": 0.03672054178313603, "grad_norm": 0.271484375, "learning_rate": 0.0018155505594882753, "loss": 0.3175, "step": 20710 }, { "epoch": 0.03672408794844585, "grad_norm": 1.0078125, "learning_rate": 0.001815514457732708, "loss": 0.2446, "step": 20712 }, { "epoch": 0.03672763411375567, "grad_norm": 0.494140625, "learning_rate": 0.0018154783528478958, "loss": 0.1739, "step": 20714 }, { "epoch": 0.03673118027906548, "grad_norm": 0.484375, "learning_rate": 0.0018154422448339968, "loss": 0.1448, "step": 20716 }, { "epoch": 0.036734726444375296, "grad_norm": 0.4375, "learning_rate": 0.0018154061336911693, "loss": 0.4107, "step": 20718 }, { "epoch": 0.03673827260968511, "grad_norm": 0.328125, "learning_rate": 0.0018153700194195707, "loss": 0.1966, "step": 20720 }, { "epoch": 0.036741818774994925, "grad_norm": 0.3515625, "learning_rate": 0.0018153339020193597, "loss": 0.1899, "step": 20722 }, { "epoch": 0.03674536494030474, "grad_norm": 0.7109375, "learning_rate": 0.0018152977814906933, "loss": 0.2027, "step": 20724 }, { "epoch": 0.036748911105614554, "grad_norm": 1.3671875, "learning_rate": 0.00181526165783373, "loss": 0.3423, "step": 20726 }, { "epoch": 0.03675245727092437, "grad_norm": 0.64453125, "learning_rate": 0.0018152255310486276, "loss": 0.2286, "step": 20728 }, { "epoch": 0.03675600343623418, "grad_norm": 1.09375, "learning_rate": 0.001815189401135544, "loss": 0.2784, "step": 20730 }, { "epoch": 0.036759549601544, "grad_norm": 0.26953125, "learning_rate": 0.001815153268094638, "loss": 0.1614, "step": 20732 }, { "epoch": 0.03676309576685381, "grad_norm": 0.6484375, "learning_rate": 0.0018151171319260664, "loss": 0.2384, "step": 20734 }, { "epoch": 0.036766641932163634, "grad_norm": 0.490234375, "learning_rate": 0.0018150809926299884, "loss": 0.2256, "step": 20736 }, { "epoch": 0.03677018809747345, "grad_norm": 0.73828125, "learning_rate": 0.0018150448502065608, "loss": 0.2084, "step": 20738 }, { "epoch": 0.03677373426278326, "grad_norm": 0.546875, "learning_rate": 0.0018150087046559427, "loss": 0.2435, "step": 20740 }, { "epoch": 0.03677728042809308, "grad_norm": 1.0546875, "learning_rate": 0.001814972555978292, "loss": 0.2042, "step": 20742 }, { "epoch": 0.03678082659340289, "grad_norm": 0.30859375, "learning_rate": 0.0018149364041737668, "loss": 0.2081, "step": 20744 }, { "epoch": 0.036784372758712706, "grad_norm": 0.64453125, "learning_rate": 0.0018149002492425246, "loss": 0.2705, "step": 20746 }, { "epoch": 0.03678791892402252, "grad_norm": 0.7265625, "learning_rate": 0.001814864091184724, "loss": 0.2427, "step": 20748 }, { "epoch": 0.036791465089332336, "grad_norm": 0.6640625, "learning_rate": 0.0018148279300005234, "loss": 0.2168, "step": 20750 }, { "epoch": 0.03679501125464215, "grad_norm": 0.44921875, "learning_rate": 0.0018147917656900808, "loss": 0.2555, "step": 20752 }, { "epoch": 0.036798557419951965, "grad_norm": 1.234375, "learning_rate": 0.0018147555982535538, "loss": 0.3553, "step": 20754 }, { "epoch": 0.03680210358526178, "grad_norm": 0.5078125, "learning_rate": 0.0018147194276911013, "loss": 0.19, "step": 20756 }, { "epoch": 0.0368056497505716, "grad_norm": 0.5546875, "learning_rate": 0.001814683254002881, "loss": 0.2831, "step": 20758 }, { "epoch": 0.036809195915881415, "grad_norm": 3.609375, "learning_rate": 0.0018146470771890513, "loss": 0.4019, "step": 20760 }, { "epoch": 0.03681274208119123, "grad_norm": 0.30078125, "learning_rate": 0.0018146108972497703, "loss": 0.1951, "step": 20762 }, { "epoch": 0.036816288246501044, "grad_norm": 0.44921875, "learning_rate": 0.0018145747141851964, "loss": 0.2427, "step": 20764 }, { "epoch": 0.03681983441181086, "grad_norm": 0.71484375, "learning_rate": 0.001814538527995488, "loss": 0.1913, "step": 20766 }, { "epoch": 0.036823380577120673, "grad_norm": 0.91015625, "learning_rate": 0.0018145023386808028, "loss": 0.1737, "step": 20768 }, { "epoch": 0.03682692674243049, "grad_norm": 0.7109375, "learning_rate": 0.0018144661462413, "loss": 0.2179, "step": 20770 }, { "epoch": 0.0368304729077403, "grad_norm": 1.0859375, "learning_rate": 0.0018144299506771366, "loss": 0.2875, "step": 20772 }, { "epoch": 0.03683401907305012, "grad_norm": 0.578125, "learning_rate": 0.0018143937519884718, "loss": 0.2428, "step": 20774 }, { "epoch": 0.03683756523835993, "grad_norm": 0.375, "learning_rate": 0.0018143575501754637, "loss": 0.2427, "step": 20776 }, { "epoch": 0.036841111403669746, "grad_norm": 1.15625, "learning_rate": 0.0018143213452382705, "loss": 0.161, "step": 20778 }, { "epoch": 0.03684465756897957, "grad_norm": 0.48828125, "learning_rate": 0.001814285137177051, "loss": 0.2137, "step": 20780 }, { "epoch": 0.03684820373428938, "grad_norm": 3.71875, "learning_rate": 0.0018142489259919633, "loss": 0.3106, "step": 20782 }, { "epoch": 0.0368517498995992, "grad_norm": 0.53125, "learning_rate": 0.0018142127116831653, "loss": 0.1988, "step": 20784 }, { "epoch": 0.03685529606490901, "grad_norm": 0.43359375, "learning_rate": 0.0018141764942508159, "loss": 0.2045, "step": 20786 }, { "epoch": 0.036858842230218826, "grad_norm": 0.5078125, "learning_rate": 0.0018141402736950732, "loss": 0.1707, "step": 20788 }, { "epoch": 0.03686238839552864, "grad_norm": 0.65625, "learning_rate": 0.001814104050016096, "loss": 0.5549, "step": 20790 }, { "epoch": 0.036865934560838455, "grad_norm": 0.61328125, "learning_rate": 0.0018140678232140424, "loss": 0.3329, "step": 20792 }, { "epoch": 0.03686948072614827, "grad_norm": 0.73828125, "learning_rate": 0.0018140315932890709, "loss": 0.2136, "step": 20794 }, { "epoch": 0.036873026891458084, "grad_norm": 0.73046875, "learning_rate": 0.0018139953602413401, "loss": 0.1922, "step": 20796 }, { "epoch": 0.0368765730567679, "grad_norm": 0.66796875, "learning_rate": 0.001813959124071008, "loss": 0.1666, "step": 20798 }, { "epoch": 0.03688011922207771, "grad_norm": 0.51953125, "learning_rate": 0.001813922884778234, "loss": 0.2369, "step": 20800 }, { "epoch": 0.03688366538738753, "grad_norm": 0.47265625, "learning_rate": 0.0018138866423631753, "loss": 0.2323, "step": 20802 }, { "epoch": 0.03688721155269735, "grad_norm": 0.35546875, "learning_rate": 0.0018138503968259917, "loss": 0.2249, "step": 20804 }, { "epoch": 0.036890757718007164, "grad_norm": 0.8984375, "learning_rate": 0.0018138141481668407, "loss": 0.1648, "step": 20806 }, { "epoch": 0.03689430388331698, "grad_norm": 0.54296875, "learning_rate": 0.0018137778963858812, "loss": 0.1919, "step": 20808 }, { "epoch": 0.03689785004862679, "grad_norm": 1.078125, "learning_rate": 0.001813741641483272, "loss": 0.272, "step": 20810 }, { "epoch": 0.03690139621393661, "grad_norm": 0.333984375, "learning_rate": 0.0018137053834591716, "loss": 0.1518, "step": 20812 }, { "epoch": 0.03690494237924642, "grad_norm": 0.50390625, "learning_rate": 0.0018136691223137383, "loss": 0.2655, "step": 20814 }, { "epoch": 0.03690848854455624, "grad_norm": 0.65234375, "learning_rate": 0.0018136328580471307, "loss": 0.2803, "step": 20816 }, { "epoch": 0.03691203470986605, "grad_norm": 0.279296875, "learning_rate": 0.0018135965906595076, "loss": 0.1776, "step": 20818 }, { "epoch": 0.036915580875175866, "grad_norm": 0.1630859375, "learning_rate": 0.0018135603201510273, "loss": 0.2845, "step": 20820 }, { "epoch": 0.03691912704048568, "grad_norm": 1.03125, "learning_rate": 0.001813524046521849, "loss": 0.1963, "step": 20822 }, { "epoch": 0.036922673205795495, "grad_norm": 0.42578125, "learning_rate": 0.0018134877697721307, "loss": 0.2117, "step": 20824 }, { "epoch": 0.036926219371105316, "grad_norm": 0.4609375, "learning_rate": 0.0018134514899020315, "loss": 0.1698, "step": 20826 }, { "epoch": 0.03692976553641513, "grad_norm": 0.248046875, "learning_rate": 0.00181341520691171, "loss": 0.2005, "step": 20828 }, { "epoch": 0.036933311701724945, "grad_norm": 0.267578125, "learning_rate": 0.0018133789208013248, "loss": 0.2165, "step": 20830 }, { "epoch": 0.03693685786703476, "grad_norm": 0.8046875, "learning_rate": 0.001813342631571034, "loss": 0.2663, "step": 20832 }, { "epoch": 0.036940404032344575, "grad_norm": 0.1875, "learning_rate": 0.0018133063392209976, "loss": 0.1681, "step": 20834 }, { "epoch": 0.03694395019765439, "grad_norm": 1.6015625, "learning_rate": 0.0018132700437513735, "loss": 0.3001, "step": 20836 }, { "epoch": 0.036947496362964204, "grad_norm": 3.234375, "learning_rate": 0.0018132337451623201, "loss": 0.3373, "step": 20838 }, { "epoch": 0.03695104252827402, "grad_norm": 0.37109375, "learning_rate": 0.0018131974434539972, "loss": 0.1783, "step": 20840 }, { "epoch": 0.03695458869358383, "grad_norm": 1.2734375, "learning_rate": 0.0018131611386265625, "loss": 0.233, "step": 20842 }, { "epoch": 0.03695813485889365, "grad_norm": 0.51171875, "learning_rate": 0.0018131248306801754, "loss": 0.2175, "step": 20844 }, { "epoch": 0.03696168102420346, "grad_norm": 0.6875, "learning_rate": 0.0018130885196149945, "loss": 0.2348, "step": 20846 }, { "epoch": 0.03696522718951328, "grad_norm": 1.5703125, "learning_rate": 0.0018130522054311785, "loss": 0.2253, "step": 20848 }, { "epoch": 0.0369687733548231, "grad_norm": 0.5390625, "learning_rate": 0.0018130158881288865, "loss": 0.2017, "step": 20850 }, { "epoch": 0.03697231952013291, "grad_norm": 0.68359375, "learning_rate": 0.001812979567708277, "loss": 0.1608, "step": 20852 }, { "epoch": 0.03697586568544273, "grad_norm": 0.359375, "learning_rate": 0.0018129432441695093, "loss": 0.1483, "step": 20854 }, { "epoch": 0.03697941185075254, "grad_norm": 0.671875, "learning_rate": 0.0018129069175127418, "loss": 0.2507, "step": 20856 }, { "epoch": 0.036982958016062356, "grad_norm": 0.50390625, "learning_rate": 0.0018128705877381336, "loss": 0.1923, "step": 20858 }, { "epoch": 0.03698650418137217, "grad_norm": 1.1015625, "learning_rate": 0.0018128342548458434, "loss": 0.2784, "step": 20860 }, { "epoch": 0.036990050346681985, "grad_norm": 0.4453125, "learning_rate": 0.0018127979188360304, "loss": 0.1615, "step": 20862 }, { "epoch": 0.0369935965119918, "grad_norm": 0.484375, "learning_rate": 0.0018127615797088532, "loss": 0.1922, "step": 20864 }, { "epoch": 0.036997142677301614, "grad_norm": 0.271484375, "learning_rate": 0.001812725237464471, "loss": 0.2392, "step": 20866 }, { "epoch": 0.03700068884261143, "grad_norm": 1.2421875, "learning_rate": 0.0018126888921030427, "loss": 0.2242, "step": 20868 }, { "epoch": 0.03700423500792124, "grad_norm": 0.9609375, "learning_rate": 0.001812652543624727, "loss": 0.3085, "step": 20870 }, { "epoch": 0.037007781173231065, "grad_norm": 0.64453125, "learning_rate": 0.001812616192029683, "loss": 0.2082, "step": 20872 }, { "epoch": 0.03701132733854088, "grad_norm": 0.263671875, "learning_rate": 0.0018125798373180698, "loss": 0.1908, "step": 20874 }, { "epoch": 0.037014873503850694, "grad_norm": 1.15625, "learning_rate": 0.001812543479490046, "loss": 0.2896, "step": 20876 }, { "epoch": 0.03701841966916051, "grad_norm": 0.7890625, "learning_rate": 0.0018125071185457712, "loss": 0.1997, "step": 20878 }, { "epoch": 0.03702196583447032, "grad_norm": 0.5703125, "learning_rate": 0.0018124707544854041, "loss": 0.1764, "step": 20880 }, { "epoch": 0.03702551199978014, "grad_norm": 0.8125, "learning_rate": 0.0018124343873091038, "loss": 0.187, "step": 20882 }, { "epoch": 0.03702905816508995, "grad_norm": 0.64453125, "learning_rate": 0.0018123980170170293, "loss": 0.256, "step": 20884 }, { "epoch": 0.03703260433039977, "grad_norm": 0.890625, "learning_rate": 0.0018123616436093398, "loss": 0.4213, "step": 20886 }, { "epoch": 0.03703615049570958, "grad_norm": 4.9375, "learning_rate": 0.0018123252670861944, "loss": 0.3213, "step": 20888 }, { "epoch": 0.037039696661019396, "grad_norm": 0.396484375, "learning_rate": 0.0018122888874477518, "loss": 0.219, "step": 20890 }, { "epoch": 0.03704324282632921, "grad_norm": 0.41796875, "learning_rate": 0.001812252504694171, "loss": 0.2303, "step": 20892 }, { "epoch": 0.03704678899163903, "grad_norm": 0.53515625, "learning_rate": 0.001812216118825612, "loss": 0.1699, "step": 20894 }, { "epoch": 0.037050335156948846, "grad_norm": 0.75390625, "learning_rate": 0.0018121797298422334, "loss": 0.2213, "step": 20896 }, { "epoch": 0.03705388132225866, "grad_norm": 0.54296875, "learning_rate": 0.0018121433377441942, "loss": 0.2062, "step": 20898 }, { "epoch": 0.037057427487568476, "grad_norm": 0.3203125, "learning_rate": 0.0018121069425316536, "loss": 0.2148, "step": 20900 }, { "epoch": 0.03706097365287829, "grad_norm": 0.703125, "learning_rate": 0.001812070544204771, "loss": 0.1873, "step": 20902 }, { "epoch": 0.037064519818188105, "grad_norm": 4.15625, "learning_rate": 0.0018120341427637053, "loss": 0.2448, "step": 20904 }, { "epoch": 0.03706806598349792, "grad_norm": 0.2470703125, "learning_rate": 0.001811997738208616, "loss": 0.2328, "step": 20906 }, { "epoch": 0.037071612148807734, "grad_norm": 0.30859375, "learning_rate": 0.0018119613305396622, "loss": 0.1098, "step": 20908 }, { "epoch": 0.03707515831411755, "grad_norm": 0.63671875, "learning_rate": 0.0018119249197570031, "loss": 0.178, "step": 20910 }, { "epoch": 0.03707870447942736, "grad_norm": 0.384765625, "learning_rate": 0.001811888505860798, "loss": 0.2052, "step": 20912 }, { "epoch": 0.03708225064473718, "grad_norm": 0.79296875, "learning_rate": 0.0018118520888512058, "loss": 0.1887, "step": 20914 }, { "epoch": 0.037085796810047, "grad_norm": 1.0546875, "learning_rate": 0.0018118156687283863, "loss": 0.244, "step": 20916 }, { "epoch": 0.037089342975356814, "grad_norm": 0.4765625, "learning_rate": 0.0018117792454924984, "loss": 0.2284, "step": 20918 }, { "epoch": 0.03709288914066663, "grad_norm": 0.53125, "learning_rate": 0.0018117428191437017, "loss": 0.2419, "step": 20920 }, { "epoch": 0.03709643530597644, "grad_norm": 0.55859375, "learning_rate": 0.0018117063896821552, "loss": 0.2104, "step": 20922 }, { "epoch": 0.03709998147128626, "grad_norm": 0.5078125, "learning_rate": 0.0018116699571080184, "loss": 0.3777, "step": 20924 }, { "epoch": 0.03710352763659607, "grad_norm": 0.734375, "learning_rate": 0.0018116335214214505, "loss": 0.315, "step": 20926 }, { "epoch": 0.037107073801905886, "grad_norm": 0.7265625, "learning_rate": 0.001811597082622611, "loss": 0.2939, "step": 20928 }, { "epoch": 0.0371106199672157, "grad_norm": 1.65625, "learning_rate": 0.0018115606407116593, "loss": 0.4039, "step": 20930 }, { "epoch": 0.037114166132525515, "grad_norm": 0.578125, "learning_rate": 0.0018115241956887546, "loss": 0.171, "step": 20932 }, { "epoch": 0.03711771229783533, "grad_norm": 2.828125, "learning_rate": 0.0018114877475540563, "loss": 0.2688, "step": 20934 }, { "epoch": 0.037121258463145144, "grad_norm": 0.765625, "learning_rate": 0.0018114512963077242, "loss": 0.1959, "step": 20936 }, { "epoch": 0.03712480462845496, "grad_norm": 0.53515625, "learning_rate": 0.001811414841949917, "loss": 0.1852, "step": 20938 }, { "epoch": 0.03712835079376478, "grad_norm": 1.3984375, "learning_rate": 0.0018113783844807946, "loss": 0.292, "step": 20940 }, { "epoch": 0.037131896959074595, "grad_norm": 0.7890625, "learning_rate": 0.0018113419239005166, "loss": 0.4275, "step": 20942 }, { "epoch": 0.03713544312438441, "grad_norm": 2.09375, "learning_rate": 0.001811305460209242, "loss": 0.2571, "step": 20944 }, { "epoch": 0.037138989289694224, "grad_norm": 1.078125, "learning_rate": 0.0018112689934071304, "loss": 0.2541, "step": 20946 }, { "epoch": 0.03714253545500404, "grad_norm": 0.4140625, "learning_rate": 0.001811232523494342, "loss": 0.209, "step": 20948 }, { "epoch": 0.03714608162031385, "grad_norm": 0.455078125, "learning_rate": 0.0018111960504710353, "loss": 0.2007, "step": 20950 }, { "epoch": 0.03714962778562367, "grad_norm": 0.89453125, "learning_rate": 0.0018111595743373697, "loss": 0.2365, "step": 20952 }, { "epoch": 0.03715317395093348, "grad_norm": 1.9921875, "learning_rate": 0.0018111230950935055, "loss": 0.4104, "step": 20954 }, { "epoch": 0.0371567201162433, "grad_norm": 1.4140625, "learning_rate": 0.0018110866127396023, "loss": 0.2188, "step": 20956 }, { "epoch": 0.03716026628155311, "grad_norm": 0.392578125, "learning_rate": 0.001811050127275819, "loss": 0.1942, "step": 20958 }, { "epoch": 0.037163812446862926, "grad_norm": 0.48046875, "learning_rate": 0.0018110136387023154, "loss": 0.2925, "step": 20960 }, { "epoch": 0.03716735861217275, "grad_norm": 0.99609375, "learning_rate": 0.0018109771470192512, "loss": 0.259, "step": 20962 }, { "epoch": 0.03717090477748256, "grad_norm": 0.84765625, "learning_rate": 0.001810940652226786, "loss": 0.2276, "step": 20964 }, { "epoch": 0.03717445094279238, "grad_norm": 0.9296875, "learning_rate": 0.0018109041543250795, "loss": 0.2024, "step": 20966 }, { "epoch": 0.03717799710810219, "grad_norm": 0.7890625, "learning_rate": 0.0018108676533142912, "loss": 0.2229, "step": 20968 }, { "epoch": 0.037181543273412006, "grad_norm": 1.2890625, "learning_rate": 0.0018108311491945805, "loss": 0.3529, "step": 20970 }, { "epoch": 0.03718508943872182, "grad_norm": 0.29296875, "learning_rate": 0.0018107946419661071, "loss": 0.1708, "step": 20972 }, { "epoch": 0.037188635604031635, "grad_norm": 0.6171875, "learning_rate": 0.0018107581316290312, "loss": 0.1987, "step": 20974 }, { "epoch": 0.03719218176934145, "grad_norm": 0.87890625, "learning_rate": 0.0018107216181835117, "loss": 0.1912, "step": 20976 }, { "epoch": 0.037195727934651264, "grad_norm": 0.46484375, "learning_rate": 0.001810685101629709, "loss": 0.1652, "step": 20978 }, { "epoch": 0.03719927409996108, "grad_norm": 0.93359375, "learning_rate": 0.0018106485819677825, "loss": 0.2541, "step": 20980 }, { "epoch": 0.03720282026527089, "grad_norm": 0.703125, "learning_rate": 0.0018106120591978917, "loss": 0.2177, "step": 20982 }, { "epoch": 0.037206366430580715, "grad_norm": 0.59765625, "learning_rate": 0.0018105755333201966, "loss": 0.2082, "step": 20984 }, { "epoch": 0.03720991259589053, "grad_norm": 2.15625, "learning_rate": 0.0018105390043348566, "loss": 0.4442, "step": 20986 }, { "epoch": 0.037213458761200344, "grad_norm": 0.80859375, "learning_rate": 0.0018105024722420322, "loss": 0.2664, "step": 20988 }, { "epoch": 0.03721700492651016, "grad_norm": 0.375, "learning_rate": 0.0018104659370418825, "loss": 0.3408, "step": 20990 }, { "epoch": 0.03722055109181997, "grad_norm": 0.59765625, "learning_rate": 0.0018104293987345674, "loss": 0.2209, "step": 20992 }, { "epoch": 0.03722409725712979, "grad_norm": 0.8203125, "learning_rate": 0.0018103928573202468, "loss": 0.1995, "step": 20994 }, { "epoch": 0.0372276434224396, "grad_norm": 0.330078125, "learning_rate": 0.0018103563127990809, "loss": 0.2242, "step": 20996 }, { "epoch": 0.037231189587749416, "grad_norm": 0.7109375, "learning_rate": 0.0018103197651712284, "loss": 0.1957, "step": 20998 }, { "epoch": 0.03723473575305923, "grad_norm": 0.83203125, "learning_rate": 0.0018102832144368502, "loss": 0.2153, "step": 21000 }, { "epoch": 0.037238281918369046, "grad_norm": 0.4921875, "learning_rate": 0.0018102466605961058, "loss": 0.1745, "step": 21002 }, { "epoch": 0.03724182808367886, "grad_norm": 1.0859375, "learning_rate": 0.0018102101036491553, "loss": 0.2931, "step": 21004 }, { "epoch": 0.037245374248988675, "grad_norm": 0.5859375, "learning_rate": 0.0018101735435961579, "loss": 0.1673, "step": 21006 }, { "epoch": 0.037248920414298496, "grad_norm": 1.3046875, "learning_rate": 0.0018101369804372743, "loss": 0.3635, "step": 21008 }, { "epoch": 0.03725246657960831, "grad_norm": 0.37109375, "learning_rate": 0.0018101004141726639, "loss": 0.3353, "step": 21010 }, { "epoch": 0.037256012744918125, "grad_norm": 0.3828125, "learning_rate": 0.0018100638448024864, "loss": 0.2376, "step": 21012 }, { "epoch": 0.03725955891022794, "grad_norm": 0.6875, "learning_rate": 0.0018100272723269026, "loss": 0.2049, "step": 21014 }, { "epoch": 0.037263105075537754, "grad_norm": 0.61328125, "learning_rate": 0.001809990696746072, "loss": 0.2344, "step": 21016 }, { "epoch": 0.03726665124084757, "grad_norm": 0.58984375, "learning_rate": 0.001809954118060154, "loss": 0.2745, "step": 21018 }, { "epoch": 0.03727019740615738, "grad_norm": 1.21875, "learning_rate": 0.0018099175362693094, "loss": 0.2316, "step": 21020 }, { "epoch": 0.0372737435714672, "grad_norm": 0.3125, "learning_rate": 0.0018098809513736977, "loss": 0.3419, "step": 21022 }, { "epoch": 0.03727728973677701, "grad_norm": 0.458984375, "learning_rate": 0.0018098443633734794, "loss": 0.2413, "step": 21024 }, { "epoch": 0.03728083590208683, "grad_norm": 0.490234375, "learning_rate": 0.001809807772268814, "loss": 0.3037, "step": 21026 }, { "epoch": 0.03728438206739664, "grad_norm": 0.76953125, "learning_rate": 0.0018097711780598615, "loss": 0.2663, "step": 21028 }, { "epoch": 0.03728792823270646, "grad_norm": 0.47265625, "learning_rate": 0.0018097345807467824, "loss": 0.2586, "step": 21030 }, { "epoch": 0.03729147439801628, "grad_norm": 0.703125, "learning_rate": 0.0018096979803297365, "loss": 0.2057, "step": 21032 }, { "epoch": 0.03729502056332609, "grad_norm": 0.80859375, "learning_rate": 0.001809661376808884, "loss": 0.2135, "step": 21034 }, { "epoch": 0.03729856672863591, "grad_norm": 1.0078125, "learning_rate": 0.0018096247701843845, "loss": 0.1939, "step": 21036 }, { "epoch": 0.03730211289394572, "grad_norm": 0.875, "learning_rate": 0.0018095881604563986, "loss": 0.2012, "step": 21038 }, { "epoch": 0.037305659059255536, "grad_norm": 0.515625, "learning_rate": 0.0018095515476250863, "loss": 0.2241, "step": 21040 }, { "epoch": 0.03730920522456535, "grad_norm": 0.271484375, "learning_rate": 0.0018095149316906078, "loss": 0.1795, "step": 21042 }, { "epoch": 0.037312751389875165, "grad_norm": 0.3359375, "learning_rate": 0.0018094783126531232, "loss": 0.4878, "step": 21044 }, { "epoch": 0.03731629755518498, "grad_norm": 0.51171875, "learning_rate": 0.0018094416905127919, "loss": 0.2097, "step": 21046 }, { "epoch": 0.037319843720494794, "grad_norm": 0.69921875, "learning_rate": 0.0018094050652697753, "loss": 0.233, "step": 21048 }, { "epoch": 0.03732338988580461, "grad_norm": 1.0234375, "learning_rate": 0.0018093684369242331, "loss": 0.2799, "step": 21050 }, { "epoch": 0.03732693605111443, "grad_norm": 0.482421875, "learning_rate": 0.0018093318054763254, "loss": 0.231, "step": 21052 }, { "epoch": 0.037330482216424245, "grad_norm": 0.6328125, "learning_rate": 0.001809295170926212, "loss": 0.2407, "step": 21054 }, { "epoch": 0.03733402838173406, "grad_norm": 0.52734375, "learning_rate": 0.0018092585332740538, "loss": 0.2253, "step": 21056 }, { "epoch": 0.037337574547043874, "grad_norm": 0.625, "learning_rate": 0.0018092218925200108, "loss": 0.1789, "step": 21058 }, { "epoch": 0.03734112071235369, "grad_norm": 0.296875, "learning_rate": 0.0018091852486642433, "loss": 0.2441, "step": 21060 }, { "epoch": 0.0373446668776635, "grad_norm": 0.60546875, "learning_rate": 0.0018091486017069113, "loss": 0.2528, "step": 21062 }, { "epoch": 0.03734821304297332, "grad_norm": 0.390625, "learning_rate": 0.0018091119516481752, "loss": 0.1958, "step": 21064 }, { "epoch": 0.03735175920828313, "grad_norm": 0.81640625, "learning_rate": 0.0018090752984881954, "loss": 0.2339, "step": 21066 }, { "epoch": 0.03735530537359295, "grad_norm": 0.265625, "learning_rate": 0.0018090386422271324, "loss": 0.1701, "step": 21068 }, { "epoch": 0.03735885153890276, "grad_norm": 0.373046875, "learning_rate": 0.0018090019828651456, "loss": 0.1947, "step": 21070 }, { "epoch": 0.037362397704212576, "grad_norm": 0.58203125, "learning_rate": 0.0018089653204023963, "loss": 0.1968, "step": 21072 }, { "epoch": 0.03736594386952239, "grad_norm": 1.03125, "learning_rate": 0.0018089286548390448, "loss": 0.3609, "step": 21074 }, { "epoch": 0.03736949003483221, "grad_norm": 0.455078125, "learning_rate": 0.0018088919861752506, "loss": 0.2133, "step": 21076 }, { "epoch": 0.037373036200142026, "grad_norm": 1.671875, "learning_rate": 0.001808855314411175, "loss": 0.2689, "step": 21078 }, { "epoch": 0.03737658236545184, "grad_norm": 0.66796875, "learning_rate": 0.0018088186395469779, "loss": 0.1503, "step": 21080 }, { "epoch": 0.037380128530761655, "grad_norm": 0.75390625, "learning_rate": 0.0018087819615828196, "loss": 0.2604, "step": 21082 }, { "epoch": 0.03738367469607147, "grad_norm": 0.6875, "learning_rate": 0.001808745280518861, "loss": 0.289, "step": 21084 }, { "epoch": 0.037387220861381285, "grad_norm": 0.4765625, "learning_rate": 0.0018087085963552623, "loss": 0.3335, "step": 21086 }, { "epoch": 0.0373907670266911, "grad_norm": 0.396484375, "learning_rate": 0.0018086719090921837, "loss": 0.3295, "step": 21088 }, { "epoch": 0.037394313192000914, "grad_norm": 0.51171875, "learning_rate": 0.0018086352187297859, "loss": 0.2438, "step": 21090 }, { "epoch": 0.03739785935731073, "grad_norm": 0.95703125, "learning_rate": 0.0018085985252682293, "loss": 0.2031, "step": 21092 }, { "epoch": 0.03740140552262054, "grad_norm": 1.5390625, "learning_rate": 0.001808561828707674, "loss": 0.2183, "step": 21094 }, { "epoch": 0.03740495168793036, "grad_norm": 0.546875, "learning_rate": 0.0018085251290482812, "loss": 0.2279, "step": 21096 }, { "epoch": 0.03740849785324018, "grad_norm": 0.9453125, "learning_rate": 0.001808488426290211, "loss": 0.2037, "step": 21098 }, { "epoch": 0.03741204401854999, "grad_norm": 0.4609375, "learning_rate": 0.001808451720433624, "loss": 0.1834, "step": 21100 }, { "epoch": 0.03741559018385981, "grad_norm": 0.7890625, "learning_rate": 0.0018084150114786807, "loss": 0.2205, "step": 21102 }, { "epoch": 0.03741913634916962, "grad_norm": 1.7734375, "learning_rate": 0.0018083782994255412, "loss": 0.3892, "step": 21104 }, { "epoch": 0.03742268251447944, "grad_norm": 0.5546875, "learning_rate": 0.001808341584274367, "loss": 0.242, "step": 21106 }, { "epoch": 0.03742622867978925, "grad_norm": 0.44921875, "learning_rate": 0.001808304866025318, "loss": 0.2081, "step": 21108 }, { "epoch": 0.037429774845099066, "grad_norm": 0.470703125, "learning_rate": 0.001808268144678555, "loss": 0.1906, "step": 21110 }, { "epoch": 0.03743332101040888, "grad_norm": 0.71484375, "learning_rate": 0.0018082314202342385, "loss": 0.2031, "step": 21112 }, { "epoch": 0.037436867175718695, "grad_norm": 0.5703125, "learning_rate": 0.001808194692692529, "loss": 0.322, "step": 21114 }, { "epoch": 0.03744041334102851, "grad_norm": 2.0, "learning_rate": 0.0018081579620535875, "loss": 0.4059, "step": 21116 }, { "epoch": 0.037443959506338324, "grad_norm": 0.458984375, "learning_rate": 0.0018081212283175745, "loss": 0.1982, "step": 21118 }, { "epoch": 0.037447505671648146, "grad_norm": 0.5078125, "learning_rate": 0.0018080844914846505, "loss": 0.1356, "step": 21120 }, { "epoch": 0.03745105183695796, "grad_norm": 0.51171875, "learning_rate": 0.0018080477515549761, "loss": 0.2173, "step": 21122 }, { "epoch": 0.037454598002267775, "grad_norm": 0.28515625, "learning_rate": 0.0018080110085287122, "loss": 0.1596, "step": 21124 }, { "epoch": 0.03745814416757759, "grad_norm": 0.392578125, "learning_rate": 0.0018079742624060194, "loss": 0.1601, "step": 21126 }, { "epoch": 0.037461690332887404, "grad_norm": 1.6328125, "learning_rate": 0.001807937513187058, "loss": 0.2733, "step": 21128 }, { "epoch": 0.03746523649819722, "grad_norm": 0.609375, "learning_rate": 0.0018079007608719899, "loss": 0.2029, "step": 21130 }, { "epoch": 0.03746878266350703, "grad_norm": 0.453125, "learning_rate": 0.0018078640054609745, "loss": 0.2069, "step": 21132 }, { "epoch": 0.03747232882881685, "grad_norm": 1.0390625, "learning_rate": 0.0018078272469541736, "loss": 0.237, "step": 21134 }, { "epoch": 0.03747587499412666, "grad_norm": 1.53125, "learning_rate": 0.0018077904853517472, "loss": 0.1499, "step": 21136 }, { "epoch": 0.03747942115943648, "grad_norm": 0.6328125, "learning_rate": 0.001807753720653856, "loss": 0.1936, "step": 21138 }, { "epoch": 0.03748296732474629, "grad_norm": 19.125, "learning_rate": 0.0018077169528606617, "loss": 0.3343, "step": 21140 }, { "epoch": 0.037486513490056106, "grad_norm": 0.390625, "learning_rate": 0.0018076801819723242, "loss": 0.2345, "step": 21142 }, { "epoch": 0.03749005965536593, "grad_norm": 0.6796875, "learning_rate": 0.001807643407989005, "loss": 0.1732, "step": 21144 }, { "epoch": 0.03749360582067574, "grad_norm": 2.3125, "learning_rate": 0.0018076066309108644, "loss": 0.2229, "step": 21146 }, { "epoch": 0.037497151985985556, "grad_norm": 1.9921875, "learning_rate": 0.0018075698507380633, "loss": 0.4106, "step": 21148 }, { "epoch": 0.03750069815129537, "grad_norm": 0.99609375, "learning_rate": 0.0018075330674707628, "loss": 0.2897, "step": 21150 }, { "epoch": 0.037504244316605186, "grad_norm": 0.3203125, "learning_rate": 0.0018074962811091236, "loss": 0.1652, "step": 21152 }, { "epoch": 0.037507790481915, "grad_norm": 0.48046875, "learning_rate": 0.001807459491653307, "loss": 0.1881, "step": 21154 }, { "epoch": 0.037511336647224815, "grad_norm": 0.466796875, "learning_rate": 0.001807422699103473, "loss": 0.2221, "step": 21156 }, { "epoch": 0.03751488281253463, "grad_norm": 1.6640625, "learning_rate": 0.001807385903459783, "loss": 0.2691, "step": 21158 }, { "epoch": 0.037518428977844444, "grad_norm": 0.52734375, "learning_rate": 0.0018073491047223983, "loss": 0.2291, "step": 21160 }, { "epoch": 0.03752197514315426, "grad_norm": 0.314453125, "learning_rate": 0.0018073123028914792, "loss": 0.1859, "step": 21162 }, { "epoch": 0.03752552130846407, "grad_norm": 2.78125, "learning_rate": 0.0018072754979671873, "loss": 0.3122, "step": 21164 }, { "epoch": 0.037529067473773894, "grad_norm": 0.3125, "learning_rate": 0.001807238689949683, "loss": 0.2175, "step": 21166 }, { "epoch": 0.03753261363908371, "grad_norm": 0.85546875, "learning_rate": 0.0018072018788391274, "loss": 0.2292, "step": 21168 }, { "epoch": 0.037536159804393524, "grad_norm": 0.55859375, "learning_rate": 0.0018071650646356817, "loss": 0.2266, "step": 21170 }, { "epoch": 0.03753970596970334, "grad_norm": 0.59375, "learning_rate": 0.0018071282473395068, "loss": 0.1971, "step": 21172 }, { "epoch": 0.03754325213501315, "grad_norm": 0.2216796875, "learning_rate": 0.0018070914269507636, "loss": 0.1768, "step": 21174 }, { "epoch": 0.03754679830032297, "grad_norm": 0.466796875, "learning_rate": 0.0018070546034696135, "loss": 0.2223, "step": 21176 }, { "epoch": 0.03755034446563278, "grad_norm": 1.2890625, "learning_rate": 0.0018070177768962168, "loss": 0.219, "step": 21178 }, { "epoch": 0.037553890630942596, "grad_norm": 0.296875, "learning_rate": 0.0018069809472307352, "loss": 0.1485, "step": 21180 }, { "epoch": 0.03755743679625241, "grad_norm": 0.62109375, "learning_rate": 0.00180694411447333, "loss": 0.2118, "step": 21182 }, { "epoch": 0.037560982961562225, "grad_norm": 1.28125, "learning_rate": 0.0018069072786241615, "loss": 0.3292, "step": 21184 }, { "epoch": 0.03756452912687204, "grad_norm": 0.384765625, "learning_rate": 0.001806870439683391, "loss": 0.2352, "step": 21186 }, { "epoch": 0.03756807529218186, "grad_norm": 1.1015625, "learning_rate": 0.0018068335976511802, "loss": 0.2458, "step": 21188 }, { "epoch": 0.037571621457491676, "grad_norm": 0.41015625, "learning_rate": 0.0018067967525276893, "loss": 0.2156, "step": 21190 }, { "epoch": 0.03757516762280149, "grad_norm": 0.8203125, "learning_rate": 0.0018067599043130806, "loss": 0.2623, "step": 21192 }, { "epoch": 0.037578713788111305, "grad_norm": 0.7109375, "learning_rate": 0.0018067230530075145, "loss": 0.2313, "step": 21194 }, { "epoch": 0.03758225995342112, "grad_norm": 0.78515625, "learning_rate": 0.001806686198611152, "loss": 0.1753, "step": 21196 }, { "epoch": 0.037585806118730934, "grad_norm": 0.27734375, "learning_rate": 0.0018066493411241548, "loss": 0.1862, "step": 21198 }, { "epoch": 0.03758935228404075, "grad_norm": 0.63671875, "learning_rate": 0.0018066124805466836, "loss": 0.2945, "step": 21200 }, { "epoch": 0.03759289844935056, "grad_norm": 2.0, "learning_rate": 0.0018065756168789001, "loss": 0.2767, "step": 21202 }, { "epoch": 0.03759644461466038, "grad_norm": 1.078125, "learning_rate": 0.001806538750120965, "loss": 0.3304, "step": 21204 }, { "epoch": 0.03759999077997019, "grad_norm": 2.328125, "learning_rate": 0.0018065018802730399, "loss": 0.2129, "step": 21206 }, { "epoch": 0.03760353694528001, "grad_norm": 1.5625, "learning_rate": 0.0018064650073352862, "loss": 0.2207, "step": 21208 }, { "epoch": 0.03760708311058982, "grad_norm": 0.396484375, "learning_rate": 0.0018064281313078649, "loss": 0.1767, "step": 21210 }, { "epoch": 0.03761062927589964, "grad_norm": 0.34765625, "learning_rate": 0.0018063912521909366, "loss": 0.2346, "step": 21212 }, { "epoch": 0.03761417544120946, "grad_norm": 3.15625, "learning_rate": 0.001806354369984664, "loss": 0.2902, "step": 21214 }, { "epoch": 0.03761772160651927, "grad_norm": 1.0703125, "learning_rate": 0.0018063174846892076, "loss": 0.3171, "step": 21216 }, { "epoch": 0.03762126777182909, "grad_norm": 0.5078125, "learning_rate": 0.0018062805963047286, "loss": 0.2067, "step": 21218 }, { "epoch": 0.0376248139371389, "grad_norm": 0.5078125, "learning_rate": 0.001806243704831389, "loss": 0.2354, "step": 21220 }, { "epoch": 0.037628360102448716, "grad_norm": 0.84765625, "learning_rate": 0.0018062068102693489, "loss": 0.1851, "step": 21222 }, { "epoch": 0.03763190626775853, "grad_norm": 0.408203125, "learning_rate": 0.0018061699126187712, "loss": 0.2565, "step": 21224 }, { "epoch": 0.037635452433068345, "grad_norm": 3.6875, "learning_rate": 0.001806133011879816, "loss": 0.2314, "step": 21226 }, { "epoch": 0.03763899859837816, "grad_norm": 0.2578125, "learning_rate": 0.0018060961080526455, "loss": 0.1508, "step": 21228 }, { "epoch": 0.037642544763687974, "grad_norm": 0.63671875, "learning_rate": 0.0018060592011374204, "loss": 0.2648, "step": 21230 }, { "epoch": 0.03764609092899779, "grad_norm": 0.478515625, "learning_rate": 0.0018060222911343029, "loss": 0.1983, "step": 21232 }, { "epoch": 0.03764963709430761, "grad_norm": 1.0546875, "learning_rate": 0.0018059853780434537, "loss": 0.3237, "step": 21234 }, { "epoch": 0.037653183259617425, "grad_norm": 0.68359375, "learning_rate": 0.0018059484618650346, "loss": 0.2297, "step": 21236 }, { "epoch": 0.03765672942492724, "grad_norm": 0.55859375, "learning_rate": 0.001805911542599207, "loss": 0.2027, "step": 21238 }, { "epoch": 0.037660275590237054, "grad_norm": 0.57421875, "learning_rate": 0.0018058746202461323, "loss": 0.1726, "step": 21240 }, { "epoch": 0.03766382175554687, "grad_norm": 0.83984375, "learning_rate": 0.0018058376948059723, "loss": 0.2698, "step": 21242 }, { "epoch": 0.03766736792085668, "grad_norm": 0.3203125, "learning_rate": 0.001805800766278888, "loss": 0.1898, "step": 21244 }, { "epoch": 0.0376709140861665, "grad_norm": 0.400390625, "learning_rate": 0.0018057638346650408, "loss": 0.1719, "step": 21246 }, { "epoch": 0.03767446025147631, "grad_norm": 0.6328125, "learning_rate": 0.001805726899964593, "loss": 0.2179, "step": 21248 }, { "epoch": 0.037678006416786126, "grad_norm": 0.86328125, "learning_rate": 0.0018056899621777056, "loss": 0.2322, "step": 21250 }, { "epoch": 0.03768155258209594, "grad_norm": 0.6875, "learning_rate": 0.0018056530213045403, "loss": 0.176, "step": 21252 }, { "epoch": 0.037685098747405756, "grad_norm": 1.96875, "learning_rate": 0.0018056160773452584, "loss": 0.4929, "step": 21254 }, { "epoch": 0.03768864491271558, "grad_norm": 2.296875, "learning_rate": 0.0018055791303000217, "loss": 0.2637, "step": 21256 }, { "epoch": 0.03769219107802539, "grad_norm": 0.578125, "learning_rate": 0.0018055421801689917, "loss": 0.2497, "step": 21258 }, { "epoch": 0.037695737243335206, "grad_norm": 0.353515625, "learning_rate": 0.0018055052269523302, "loss": 0.4515, "step": 21260 }, { "epoch": 0.03769928340864502, "grad_norm": 0.94140625, "learning_rate": 0.0018054682706501984, "loss": 0.2287, "step": 21262 }, { "epoch": 0.037702829573954835, "grad_norm": 2.1875, "learning_rate": 0.0018054313112627583, "loss": 0.2604, "step": 21264 }, { "epoch": 0.03770637573926465, "grad_norm": 0.3828125, "learning_rate": 0.0018053943487901713, "loss": 0.2021, "step": 21266 }, { "epoch": 0.037709921904574464, "grad_norm": 0.314453125, "learning_rate": 0.0018053573832325996, "loss": 0.278, "step": 21268 }, { "epoch": 0.03771346806988428, "grad_norm": 0.466796875, "learning_rate": 0.0018053204145902038, "loss": 0.2024, "step": 21270 }, { "epoch": 0.03771701423519409, "grad_norm": 0.474609375, "learning_rate": 0.0018052834428631465, "loss": 0.1597, "step": 21272 }, { "epoch": 0.03772056040050391, "grad_norm": 0.79296875, "learning_rate": 0.001805246468051589, "loss": 0.2239, "step": 21274 }, { "epoch": 0.03772410656581372, "grad_norm": 0.6484375, "learning_rate": 0.0018052094901556933, "loss": 0.2408, "step": 21276 }, { "epoch": 0.03772765273112354, "grad_norm": 0.322265625, "learning_rate": 0.0018051725091756208, "loss": 0.2451, "step": 21278 }, { "epoch": 0.03773119889643336, "grad_norm": 0.2451171875, "learning_rate": 0.0018051355251115335, "loss": 0.1978, "step": 21280 }, { "epoch": 0.03773474506174317, "grad_norm": 0.35546875, "learning_rate": 0.0018050985379635927, "loss": 0.2186, "step": 21282 }, { "epoch": 0.03773829122705299, "grad_norm": 0.306640625, "learning_rate": 0.0018050615477319606, "loss": 0.2343, "step": 21284 }, { "epoch": 0.0377418373923628, "grad_norm": 0.55078125, "learning_rate": 0.0018050245544167988, "loss": 0.2135, "step": 21286 }, { "epoch": 0.03774538355767262, "grad_norm": 0.51171875, "learning_rate": 0.0018049875580182695, "loss": 0.1666, "step": 21288 }, { "epoch": 0.03774892972298243, "grad_norm": 0.69921875, "learning_rate": 0.0018049505585365337, "loss": 0.2834, "step": 21290 }, { "epoch": 0.037752475888292246, "grad_norm": 0.4609375, "learning_rate": 0.0018049135559717539, "loss": 0.1998, "step": 21292 }, { "epoch": 0.03775602205360206, "grad_norm": 0.3515625, "learning_rate": 0.0018048765503240914, "loss": 0.2019, "step": 21294 }, { "epoch": 0.037759568218911875, "grad_norm": 0.6328125, "learning_rate": 0.0018048395415937084, "loss": 0.2537, "step": 21296 }, { "epoch": 0.03776311438422169, "grad_norm": 1.40625, "learning_rate": 0.0018048025297807667, "loss": 0.2449, "step": 21298 }, { "epoch": 0.037766660549531504, "grad_norm": 1.484375, "learning_rate": 0.0018047655148854282, "loss": 0.2965, "step": 21300 }, { "epoch": 0.037770206714841326, "grad_norm": 0.7265625, "learning_rate": 0.0018047284969078545, "loss": 0.297, "step": 21302 }, { "epoch": 0.03777375288015114, "grad_norm": 0.57421875, "learning_rate": 0.001804691475848208, "loss": 0.2388, "step": 21304 }, { "epoch": 0.037777299045460955, "grad_norm": 1.3359375, "learning_rate": 0.00180465445170665, "loss": 0.4461, "step": 21306 }, { "epoch": 0.03778084521077077, "grad_norm": 0.498046875, "learning_rate": 0.0018046174244833431, "loss": 0.1706, "step": 21308 }, { "epoch": 0.037784391376080584, "grad_norm": 3.90625, "learning_rate": 0.0018045803941784486, "loss": 0.29, "step": 21310 }, { "epoch": 0.0377879375413904, "grad_norm": 0.4375, "learning_rate": 0.001804543360792129, "loss": 0.3236, "step": 21312 }, { "epoch": 0.03779148370670021, "grad_norm": 0.474609375, "learning_rate": 0.001804506324324546, "loss": 0.2502, "step": 21314 }, { "epoch": 0.03779502987201003, "grad_norm": 0.408203125, "learning_rate": 0.0018044692847758613, "loss": 0.1669, "step": 21316 }, { "epoch": 0.03779857603731984, "grad_norm": 0.9453125, "learning_rate": 0.0018044322421462374, "loss": 0.2279, "step": 21318 }, { "epoch": 0.03780212220262966, "grad_norm": 0.828125, "learning_rate": 0.0018043951964358356, "loss": 0.2066, "step": 21320 }, { "epoch": 0.03780566836793947, "grad_norm": 0.82421875, "learning_rate": 0.001804358147644819, "loss": 0.2373, "step": 21322 }, { "epoch": 0.03780921453324929, "grad_norm": 0.78125, "learning_rate": 0.0018043210957733482, "loss": 0.2588, "step": 21324 }, { "epoch": 0.03781276069855911, "grad_norm": 2.59375, "learning_rate": 0.0018042840408215867, "loss": 0.1868, "step": 21326 }, { "epoch": 0.03781630686386892, "grad_norm": 0.91796875, "learning_rate": 0.001804246982789696, "loss": 0.2225, "step": 21328 }, { "epoch": 0.037819853029178736, "grad_norm": 1.0703125, "learning_rate": 0.0018042099216778375, "loss": 0.1665, "step": 21330 }, { "epoch": 0.03782339919448855, "grad_norm": 0.52734375, "learning_rate": 0.0018041728574861742, "loss": 0.2137, "step": 21332 }, { "epoch": 0.037826945359798365, "grad_norm": 0.61328125, "learning_rate": 0.0018041357902148678, "loss": 0.239, "step": 21334 }, { "epoch": 0.03783049152510818, "grad_norm": 0.369140625, "learning_rate": 0.0018040987198640803, "loss": 0.1946, "step": 21336 }, { "epoch": 0.037834037690417995, "grad_norm": 2.609375, "learning_rate": 0.001804061646433974, "loss": 0.2831, "step": 21338 }, { "epoch": 0.03783758385572781, "grad_norm": 0.296875, "learning_rate": 0.0018040245699247114, "loss": 0.1894, "step": 21340 }, { "epoch": 0.037841130021037624, "grad_norm": 1.1484375, "learning_rate": 0.0018039874903364541, "loss": 0.2194, "step": 21342 }, { "epoch": 0.03784467618634744, "grad_norm": 0.197265625, "learning_rate": 0.0018039504076693645, "loss": 0.1626, "step": 21344 }, { "epoch": 0.03784822235165725, "grad_norm": 0.41015625, "learning_rate": 0.0018039133219236046, "loss": 0.2114, "step": 21346 }, { "epoch": 0.037851768516967074, "grad_norm": 0.365234375, "learning_rate": 0.0018038762330993365, "loss": 0.1793, "step": 21348 }, { "epoch": 0.03785531468227689, "grad_norm": 1.0546875, "learning_rate": 0.001803839141196723, "loss": 0.2527, "step": 21350 }, { "epoch": 0.0378588608475867, "grad_norm": 0.6953125, "learning_rate": 0.0018038020462159258, "loss": 0.185, "step": 21352 }, { "epoch": 0.03786240701289652, "grad_norm": 1.7890625, "learning_rate": 0.001803764948157107, "loss": 0.2937, "step": 21354 }, { "epoch": 0.03786595317820633, "grad_norm": 0.435546875, "learning_rate": 0.0018037278470204295, "loss": 0.2109, "step": 21356 }, { "epoch": 0.03786949934351615, "grad_norm": 0.56640625, "learning_rate": 0.0018036907428060549, "loss": 0.2817, "step": 21358 }, { "epoch": 0.03787304550882596, "grad_norm": 0.287109375, "learning_rate": 0.001803653635514146, "loss": 0.235, "step": 21360 }, { "epoch": 0.037876591674135776, "grad_norm": 0.5546875, "learning_rate": 0.001803616525144865, "loss": 0.3035, "step": 21362 }, { "epoch": 0.03788013783944559, "grad_norm": 8.25, "learning_rate": 0.0018035794116983736, "loss": 0.1584, "step": 21364 }, { "epoch": 0.037883684004755405, "grad_norm": 0.51953125, "learning_rate": 0.0018035422951748347, "loss": 0.1782, "step": 21366 }, { "epoch": 0.03788723017006522, "grad_norm": 0.85546875, "learning_rate": 0.0018035051755744102, "loss": 0.1833, "step": 21368 }, { "epoch": 0.03789077633537504, "grad_norm": 0.34375, "learning_rate": 0.001803468052897263, "loss": 0.1709, "step": 21370 }, { "epoch": 0.037894322500684856, "grad_norm": 0.54296875, "learning_rate": 0.0018034309271435553, "loss": 0.1852, "step": 21372 }, { "epoch": 0.03789786866599467, "grad_norm": 1.3828125, "learning_rate": 0.001803393798313449, "loss": 0.3274, "step": 21374 }, { "epoch": 0.037901414831304485, "grad_norm": 0.3515625, "learning_rate": 0.001803356666407107, "loss": 0.1854, "step": 21376 }, { "epoch": 0.0379049609966143, "grad_norm": 0.458984375, "learning_rate": 0.0018033195314246912, "loss": 0.1703, "step": 21378 }, { "epoch": 0.037908507161924114, "grad_norm": 1.3359375, "learning_rate": 0.0018032823933663645, "loss": 0.2617, "step": 21380 }, { "epoch": 0.03791205332723393, "grad_norm": 3.25, "learning_rate": 0.0018032452522322893, "loss": 0.2223, "step": 21382 }, { "epoch": 0.03791559949254374, "grad_norm": 0.46875, "learning_rate": 0.0018032081080226277, "loss": 0.1999, "step": 21384 }, { "epoch": 0.03791914565785356, "grad_norm": 0.78125, "learning_rate": 0.0018031709607375422, "loss": 0.2009, "step": 21386 }, { "epoch": 0.03792269182316337, "grad_norm": 2.859375, "learning_rate": 0.0018031338103771956, "loss": 0.2983, "step": 21388 }, { "epoch": 0.03792623798847319, "grad_norm": 0.67578125, "learning_rate": 0.00180309665694175, "loss": 0.2067, "step": 21390 }, { "epoch": 0.03792978415378301, "grad_norm": 0.34375, "learning_rate": 0.001803059500431368, "loss": 0.179, "step": 21392 }, { "epoch": 0.03793333031909282, "grad_norm": 2.0625, "learning_rate": 0.0018030223408462124, "loss": 0.1839, "step": 21394 }, { "epoch": 0.03793687648440264, "grad_norm": 0.42578125, "learning_rate": 0.001802985178186445, "loss": 0.1773, "step": 21396 }, { "epoch": 0.03794042264971245, "grad_norm": 5.09375, "learning_rate": 0.0018029480124522293, "loss": 0.2803, "step": 21398 }, { "epoch": 0.037943968815022266, "grad_norm": 1.8359375, "learning_rate": 0.001802910843643727, "loss": 0.2233, "step": 21400 }, { "epoch": 0.03794751498033208, "grad_norm": 7.40625, "learning_rate": 0.001802873671761101, "loss": 0.2092, "step": 21402 }, { "epoch": 0.037951061145641896, "grad_norm": 0.38671875, "learning_rate": 0.0018028364968045137, "loss": 0.1647, "step": 21404 }, { "epoch": 0.03795460731095171, "grad_norm": 3.375, "learning_rate": 0.0018027993187741283, "loss": 0.2416, "step": 21406 }, { "epoch": 0.037958153476261525, "grad_norm": 1.296875, "learning_rate": 0.0018027621376701063, "loss": 0.2574, "step": 21408 }, { "epoch": 0.03796169964157134, "grad_norm": 0.75390625, "learning_rate": 0.0018027249534926112, "loss": 0.2454, "step": 21410 }, { "epoch": 0.037965245806881154, "grad_norm": 1.03125, "learning_rate": 0.0018026877662418051, "loss": 0.2003, "step": 21412 }, { "epoch": 0.03796879197219097, "grad_norm": 0.63671875, "learning_rate": 0.0018026505759178512, "loss": 0.1924, "step": 21414 }, { "epoch": 0.03797233813750079, "grad_norm": 0.97265625, "learning_rate": 0.001802613382520912, "loss": 0.2377, "step": 21416 }, { "epoch": 0.037975884302810604, "grad_norm": 1.6015625, "learning_rate": 0.0018025761860511497, "loss": 0.1858, "step": 21418 }, { "epoch": 0.03797943046812042, "grad_norm": 0.70703125, "learning_rate": 0.0018025389865087274, "loss": 0.2268, "step": 21420 }, { "epoch": 0.037982976633430234, "grad_norm": 0.34375, "learning_rate": 0.0018025017838938073, "loss": 0.2002, "step": 21422 }, { "epoch": 0.03798652279874005, "grad_norm": 0.3515625, "learning_rate": 0.0018024645782065527, "loss": 0.225, "step": 21424 }, { "epoch": 0.03799006896404986, "grad_norm": 0.7421875, "learning_rate": 0.0018024273694471262, "loss": 0.2857, "step": 21426 }, { "epoch": 0.03799361512935968, "grad_norm": 1.75, "learning_rate": 0.0018023901576156903, "loss": 0.3385, "step": 21428 }, { "epoch": 0.03799716129466949, "grad_norm": 0.365234375, "learning_rate": 0.0018023529427124078, "loss": 0.2366, "step": 21430 }, { "epoch": 0.038000707459979306, "grad_norm": 1.0859375, "learning_rate": 0.0018023157247374416, "loss": 0.211, "step": 21432 }, { "epoch": 0.03800425362528912, "grad_norm": 0.76953125, "learning_rate": 0.0018022785036909544, "loss": 0.1803, "step": 21434 }, { "epoch": 0.038007799790598935, "grad_norm": 1.3515625, "learning_rate": 0.0018022412795731088, "loss": 0.3451, "step": 21436 }, { "epoch": 0.03801134595590876, "grad_norm": 0.73046875, "learning_rate": 0.001802204052384068, "loss": 0.2073, "step": 21438 }, { "epoch": 0.03801489212121857, "grad_norm": 0.6875, "learning_rate": 0.0018021668221239941, "loss": 0.1702, "step": 21440 }, { "epoch": 0.038018438286528386, "grad_norm": 0.8203125, "learning_rate": 0.001802129588793051, "loss": 0.2185, "step": 21442 }, { "epoch": 0.0380219844518382, "grad_norm": 0.56640625, "learning_rate": 0.0018020923523914005, "loss": 0.2276, "step": 21444 }, { "epoch": 0.038025530617148015, "grad_norm": 1.09375, "learning_rate": 0.001802055112919206, "loss": 0.343, "step": 21446 }, { "epoch": 0.03802907678245783, "grad_norm": 0.55078125, "learning_rate": 0.0018020178703766303, "loss": 0.2146, "step": 21448 }, { "epoch": 0.038032622947767644, "grad_norm": 0.6484375, "learning_rate": 0.001801980624763836, "loss": 0.2428, "step": 21450 }, { "epoch": 0.03803616911307746, "grad_norm": 0.66015625, "learning_rate": 0.0018019433760809863, "loss": 0.2192, "step": 21452 }, { "epoch": 0.03803971527838727, "grad_norm": 0.76171875, "learning_rate": 0.0018019061243282441, "loss": 0.2024, "step": 21454 }, { "epoch": 0.03804326144369709, "grad_norm": 0.63671875, "learning_rate": 0.0018018688695057724, "loss": 0.2785, "step": 21456 }, { "epoch": 0.0380468076090069, "grad_norm": 0.6796875, "learning_rate": 0.0018018316116137337, "loss": 0.2465, "step": 21458 }, { "epoch": 0.038050353774316724, "grad_norm": 0.89453125, "learning_rate": 0.001801794350652291, "loss": 0.207, "step": 21460 }, { "epoch": 0.03805389993962654, "grad_norm": 1.0234375, "learning_rate": 0.0018017570866216079, "loss": 0.3178, "step": 21462 }, { "epoch": 0.03805744610493635, "grad_norm": 0.361328125, "learning_rate": 0.0018017198195218465, "loss": 0.1847, "step": 21464 }, { "epoch": 0.03806099227024617, "grad_norm": 0.322265625, "learning_rate": 0.0018016825493531708, "loss": 0.1926, "step": 21466 }, { "epoch": 0.03806453843555598, "grad_norm": 0.65234375, "learning_rate": 0.0018016452761157426, "loss": 0.2002, "step": 21468 }, { "epoch": 0.0380680846008658, "grad_norm": 0.9453125, "learning_rate": 0.0018016079998097259, "loss": 0.2189, "step": 21470 }, { "epoch": 0.03807163076617561, "grad_norm": 2.09375, "learning_rate": 0.0018015707204352834, "loss": 0.4627, "step": 21472 }, { "epoch": 0.038075176931485426, "grad_norm": 0.33984375, "learning_rate": 0.0018015334379925779, "loss": 0.253, "step": 21474 }, { "epoch": 0.03807872309679524, "grad_norm": 1.203125, "learning_rate": 0.0018014961524817725, "loss": 0.2035, "step": 21476 }, { "epoch": 0.038082269262105055, "grad_norm": 0.45703125, "learning_rate": 0.0018014588639030306, "loss": 0.2383, "step": 21478 }, { "epoch": 0.03808581542741487, "grad_norm": 1.3671875, "learning_rate": 0.001801421572256515, "loss": 0.2245, "step": 21480 }, { "epoch": 0.038089361592724684, "grad_norm": 0.7890625, "learning_rate": 0.001801384277542389, "loss": 0.4605, "step": 21482 }, { "epoch": 0.038092907758034505, "grad_norm": 0.56640625, "learning_rate": 0.0018013469797608158, "loss": 0.2007, "step": 21484 }, { "epoch": 0.03809645392334432, "grad_norm": 0.55859375, "learning_rate": 0.0018013096789119578, "loss": 0.1807, "step": 21486 }, { "epoch": 0.038100000088654135, "grad_norm": 0.8359375, "learning_rate": 0.001801272374995979, "loss": 0.2476, "step": 21488 }, { "epoch": 0.03810354625396395, "grad_norm": 0.828125, "learning_rate": 0.001801235068013042, "loss": 0.2311, "step": 21490 }, { "epoch": 0.038107092419273764, "grad_norm": 0.326171875, "learning_rate": 0.00180119775796331, "loss": 0.1984, "step": 21492 }, { "epoch": 0.03811063858458358, "grad_norm": 0.44140625, "learning_rate": 0.0018011604448469466, "loss": 0.214, "step": 21494 }, { "epoch": 0.03811418474989339, "grad_norm": 0.55078125, "learning_rate": 0.0018011231286641147, "loss": 0.2387, "step": 21496 }, { "epoch": 0.03811773091520321, "grad_norm": 0.45703125, "learning_rate": 0.0018010858094149773, "loss": 0.1921, "step": 21498 }, { "epoch": 0.03812127708051302, "grad_norm": 0.50390625, "learning_rate": 0.0018010484870996978, "loss": 0.2413, "step": 21500 }, { "epoch": 0.038124823245822836, "grad_norm": 0.2099609375, "learning_rate": 0.0018010111617184398, "loss": 0.1738, "step": 21502 }, { "epoch": 0.03812836941113265, "grad_norm": 2.03125, "learning_rate": 0.0018009738332713657, "loss": 0.5018, "step": 21504 }, { "epoch": 0.03813191557644247, "grad_norm": 0.412109375, "learning_rate": 0.0018009365017586394, "loss": 0.1976, "step": 21506 }, { "epoch": 0.03813546174175229, "grad_norm": 5.71875, "learning_rate": 0.0018008991671804241, "loss": 0.3669, "step": 21508 }, { "epoch": 0.0381390079070621, "grad_norm": 0.369140625, "learning_rate": 0.0018008618295368827, "loss": 0.2173, "step": 21510 }, { "epoch": 0.038142554072371916, "grad_norm": 0.7578125, "learning_rate": 0.001800824488828179, "loss": 0.2089, "step": 21512 }, { "epoch": 0.03814610023768173, "grad_norm": 3.90625, "learning_rate": 0.0018007871450544761, "loss": 0.3266, "step": 21514 }, { "epoch": 0.038149646402991545, "grad_norm": 0.314453125, "learning_rate": 0.0018007497982159373, "loss": 0.1582, "step": 21516 }, { "epoch": 0.03815319256830136, "grad_norm": 0.28515625, "learning_rate": 0.001800712448312726, "loss": 0.1981, "step": 21518 }, { "epoch": 0.038156738733611174, "grad_norm": 0.37890625, "learning_rate": 0.0018006750953450052, "loss": 0.2257, "step": 21520 }, { "epoch": 0.03816028489892099, "grad_norm": 0.5, "learning_rate": 0.0018006377393129385, "loss": 0.2191, "step": 21522 }, { "epoch": 0.0381638310642308, "grad_norm": 0.35546875, "learning_rate": 0.0018006003802166898, "loss": 0.2321, "step": 21524 }, { "epoch": 0.03816737722954062, "grad_norm": 0.41015625, "learning_rate": 0.0018005630180564216, "loss": 0.191, "step": 21526 }, { "epoch": 0.03817092339485044, "grad_norm": 0.29296875, "learning_rate": 0.0018005256528322974, "loss": 0.1865, "step": 21528 }, { "epoch": 0.038174469560160254, "grad_norm": 1.09375, "learning_rate": 0.0018004882845444813, "loss": 0.201, "step": 21530 }, { "epoch": 0.03817801572547007, "grad_norm": 2.40625, "learning_rate": 0.0018004509131931364, "loss": 0.165, "step": 21532 }, { "epoch": 0.03818156189077988, "grad_norm": 0.318359375, "learning_rate": 0.0018004135387784262, "loss": 0.1965, "step": 21534 }, { "epoch": 0.0381851080560897, "grad_norm": 0.408203125, "learning_rate": 0.0018003761613005138, "loss": 0.1487, "step": 21536 }, { "epoch": 0.03818865422139951, "grad_norm": 0.62890625, "learning_rate": 0.0018003387807595628, "loss": 0.195, "step": 21538 }, { "epoch": 0.03819220038670933, "grad_norm": 1.53125, "learning_rate": 0.001800301397155737, "loss": 0.2531, "step": 21540 }, { "epoch": 0.03819574655201914, "grad_norm": 1.1953125, "learning_rate": 0.0018002640104891996, "loss": 0.2566, "step": 21542 }, { "epoch": 0.038199292717328956, "grad_norm": 0.287109375, "learning_rate": 0.001800226620760114, "loss": 0.1852, "step": 21544 }, { "epoch": 0.03820283888263877, "grad_norm": 2.953125, "learning_rate": 0.001800189227968644, "loss": 0.2772, "step": 21546 }, { "epoch": 0.038206385047948585, "grad_norm": 0.62109375, "learning_rate": 0.001800151832114953, "loss": 0.2051, "step": 21548 }, { "epoch": 0.0382099312132584, "grad_norm": 0.93359375, "learning_rate": 0.0018001144331992048, "loss": 0.1843, "step": 21550 }, { "epoch": 0.03821347737856822, "grad_norm": 0.369140625, "learning_rate": 0.0018000770312215626, "loss": 0.2179, "step": 21552 }, { "epoch": 0.038217023543878036, "grad_norm": 0.30859375, "learning_rate": 0.00180003962618219, "loss": 0.1822, "step": 21554 }, { "epoch": 0.03822056970918785, "grad_norm": 0.91015625, "learning_rate": 0.001800002218081251, "loss": 0.3259, "step": 21556 }, { "epoch": 0.038224115874497665, "grad_norm": 0.26953125, "learning_rate": 0.0017999648069189085, "loss": 0.1598, "step": 21558 }, { "epoch": 0.03822766203980748, "grad_norm": 0.92578125, "learning_rate": 0.001799927392695327, "loss": 0.2842, "step": 21560 }, { "epoch": 0.038231208205117294, "grad_norm": 0.89453125, "learning_rate": 0.0017998899754106692, "loss": 0.3447, "step": 21562 }, { "epoch": 0.03823475437042711, "grad_norm": 0.9921875, "learning_rate": 0.0017998525550650996, "loss": 0.2216, "step": 21564 }, { "epoch": 0.03823830053573692, "grad_norm": 1.4609375, "learning_rate": 0.001799815131658781, "loss": 0.2649, "step": 21566 }, { "epoch": 0.03824184670104674, "grad_norm": 0.68359375, "learning_rate": 0.0017997777051918778, "loss": 0.2075, "step": 21568 }, { "epoch": 0.03824539286635655, "grad_norm": 0.91015625, "learning_rate": 0.0017997402756645534, "loss": 0.2362, "step": 21570 }, { "epoch": 0.03824893903166637, "grad_norm": 0.5078125, "learning_rate": 0.0017997028430769715, "loss": 0.1895, "step": 21572 }, { "epoch": 0.03825248519697619, "grad_norm": 0.921875, "learning_rate": 0.0017996654074292958, "loss": 0.2503, "step": 21574 }, { "epoch": 0.038256031362286, "grad_norm": 0.275390625, "learning_rate": 0.00179962796872169, "loss": 0.2608, "step": 21576 }, { "epoch": 0.03825957752759582, "grad_norm": 0.98046875, "learning_rate": 0.0017995905269543179, "loss": 0.2141, "step": 21578 }, { "epoch": 0.03826312369290563, "grad_norm": 1.0703125, "learning_rate": 0.0017995530821273429, "loss": 0.2324, "step": 21580 }, { "epoch": 0.038266669858215446, "grad_norm": 0.400390625, "learning_rate": 0.0017995156342409293, "loss": 0.2247, "step": 21582 }, { "epoch": 0.03827021602352526, "grad_norm": 0.32421875, "learning_rate": 0.0017994781832952407, "loss": 0.1777, "step": 21584 }, { "epoch": 0.038273762188835075, "grad_norm": 1.8828125, "learning_rate": 0.0017994407292904408, "loss": 0.201, "step": 21586 }, { "epoch": 0.03827730835414489, "grad_norm": 0.89453125, "learning_rate": 0.0017994032722266932, "loss": 0.2083, "step": 21588 }, { "epoch": 0.038280854519454705, "grad_norm": 0.41796875, "learning_rate": 0.0017993658121041624, "loss": 0.2172, "step": 21590 }, { "epoch": 0.03828440068476452, "grad_norm": 0.5625, "learning_rate": 0.0017993283489230114, "loss": 0.2217, "step": 21592 }, { "epoch": 0.038287946850074334, "grad_norm": 0.498046875, "learning_rate": 0.0017992908826834047, "loss": 0.1841, "step": 21594 }, { "epoch": 0.038291493015384155, "grad_norm": 0.8359375, "learning_rate": 0.0017992534133855057, "loss": 0.466, "step": 21596 }, { "epoch": 0.03829503918069397, "grad_norm": 0.275390625, "learning_rate": 0.0017992159410294784, "loss": 0.2008, "step": 21598 }, { "epoch": 0.038298585346003784, "grad_norm": 0.37890625, "learning_rate": 0.0017991784656154867, "loss": 0.1767, "step": 21600 }, { "epoch": 0.0383021315113136, "grad_norm": 0.384765625, "learning_rate": 0.0017991409871436948, "loss": 0.1752, "step": 21602 }, { "epoch": 0.03830567767662341, "grad_norm": 0.318359375, "learning_rate": 0.0017991035056142662, "loss": 0.3381, "step": 21604 }, { "epoch": 0.03830922384193323, "grad_norm": 0.52734375, "learning_rate": 0.0017990660210273647, "loss": 0.195, "step": 21606 }, { "epoch": 0.03831277000724304, "grad_norm": 0.6015625, "learning_rate": 0.001799028533383155, "loss": 0.1741, "step": 21608 }, { "epoch": 0.03831631617255286, "grad_norm": 0.412109375, "learning_rate": 0.0017989910426818, "loss": 0.2444, "step": 21610 }, { "epoch": 0.03831986233786267, "grad_norm": 0.609375, "learning_rate": 0.0017989535489234645, "loss": 0.212, "step": 21612 }, { "epoch": 0.038323408503172486, "grad_norm": 2.90625, "learning_rate": 0.001798916052108312, "loss": 0.3668, "step": 21614 }, { "epoch": 0.0383269546684823, "grad_norm": 0.478515625, "learning_rate": 0.001798878552236507, "loss": 0.2052, "step": 21616 }, { "epoch": 0.038330500833792115, "grad_norm": 0.26171875, "learning_rate": 0.0017988410493082127, "loss": 0.1844, "step": 21618 }, { "epoch": 0.03833404699910194, "grad_norm": 1.453125, "learning_rate": 0.001798803543323594, "loss": 0.2678, "step": 21620 }, { "epoch": 0.03833759316441175, "grad_norm": 0.462890625, "learning_rate": 0.0017987660342828145, "loss": 0.1717, "step": 21622 }, { "epoch": 0.038341139329721566, "grad_norm": 1.125, "learning_rate": 0.0017987285221860382, "loss": 0.2296, "step": 21624 }, { "epoch": 0.03834468549503138, "grad_norm": 0.7109375, "learning_rate": 0.0017986910070334288, "loss": 0.4399, "step": 21626 }, { "epoch": 0.038348231660341195, "grad_norm": 0.40234375, "learning_rate": 0.001798653488825151, "loss": 0.1886, "step": 21628 }, { "epoch": 0.03835177782565101, "grad_norm": 0.80859375, "learning_rate": 0.001798615967561369, "loss": 0.1967, "step": 21630 }, { "epoch": 0.038355323990960824, "grad_norm": 0.7734375, "learning_rate": 0.0017985784432422462, "loss": 0.2455, "step": 21632 }, { "epoch": 0.03835887015627064, "grad_norm": 0.45703125, "learning_rate": 0.0017985409158679474, "loss": 0.1426, "step": 21634 }, { "epoch": 0.03836241632158045, "grad_norm": 1.078125, "learning_rate": 0.0017985033854386362, "loss": 0.2842, "step": 21636 }, { "epoch": 0.03836596248689027, "grad_norm": 0.40234375, "learning_rate": 0.0017984658519544768, "loss": 0.2208, "step": 21638 }, { "epoch": 0.03836950865220008, "grad_norm": 0.345703125, "learning_rate": 0.0017984283154156336, "loss": 0.2409, "step": 21640 }, { "epoch": 0.038373054817509904, "grad_norm": 0.90234375, "learning_rate": 0.0017983907758222705, "loss": 0.1885, "step": 21642 }, { "epoch": 0.03837660098281972, "grad_norm": 0.69140625, "learning_rate": 0.0017983532331745523, "loss": 0.2739, "step": 21644 }, { "epoch": 0.03838014714812953, "grad_norm": 0.486328125, "learning_rate": 0.0017983156874726424, "loss": 0.2155, "step": 21646 }, { "epoch": 0.03838369331343935, "grad_norm": 1.71875, "learning_rate": 0.0017982781387167052, "loss": 0.2438, "step": 21648 }, { "epoch": 0.03838723947874916, "grad_norm": 0.380859375, "learning_rate": 0.0017982405869069052, "loss": 0.1899, "step": 21650 }, { "epoch": 0.038390785644058976, "grad_norm": 0.384765625, "learning_rate": 0.0017982030320434065, "loss": 0.1642, "step": 21652 }, { "epoch": 0.03839433180936879, "grad_norm": 0.48828125, "learning_rate": 0.0017981654741263732, "loss": 0.3173, "step": 21654 }, { "epoch": 0.038397877974678606, "grad_norm": 0.65625, "learning_rate": 0.0017981279131559695, "loss": 0.1585, "step": 21656 }, { "epoch": 0.03840142413998842, "grad_norm": 0.50390625, "learning_rate": 0.0017980903491323602, "loss": 0.1421, "step": 21658 }, { "epoch": 0.038404970305298235, "grad_norm": 0.484375, "learning_rate": 0.0017980527820557087, "loss": 0.2471, "step": 21660 }, { "epoch": 0.03840851647060805, "grad_norm": 0.451171875, "learning_rate": 0.0017980152119261802, "loss": 0.2446, "step": 21662 }, { "epoch": 0.03841206263591787, "grad_norm": 0.34375, "learning_rate": 0.0017979776387439385, "loss": 0.2102, "step": 21664 }, { "epoch": 0.038415608801227685, "grad_norm": 1.0703125, "learning_rate": 0.001797940062509148, "loss": 0.2099, "step": 21666 }, { "epoch": 0.0384191549665375, "grad_norm": 1.15625, "learning_rate": 0.0017979024832219732, "loss": 0.1909, "step": 21668 }, { "epoch": 0.038422701131847314, "grad_norm": 0.3984375, "learning_rate": 0.0017978649008825783, "loss": 0.2471, "step": 21670 }, { "epoch": 0.03842624729715713, "grad_norm": 0.58203125, "learning_rate": 0.0017978273154911277, "loss": 0.2312, "step": 21672 }, { "epoch": 0.038429793462466943, "grad_norm": 0.451171875, "learning_rate": 0.0017977897270477856, "loss": 0.2769, "step": 21674 }, { "epoch": 0.03843333962777676, "grad_norm": 1.7578125, "learning_rate": 0.0017977521355527167, "loss": 0.2314, "step": 21676 }, { "epoch": 0.03843688579308657, "grad_norm": 1.3828125, "learning_rate": 0.0017977145410060854, "loss": 0.2219, "step": 21678 }, { "epoch": 0.03844043195839639, "grad_norm": 0.5078125, "learning_rate": 0.0017976769434080557, "loss": 0.1992, "step": 21680 }, { "epoch": 0.0384439781237062, "grad_norm": 1.1328125, "learning_rate": 0.0017976393427587927, "loss": 0.2051, "step": 21682 }, { "epoch": 0.038447524289016016, "grad_norm": 0.279296875, "learning_rate": 0.0017976017390584598, "loss": 0.2115, "step": 21684 }, { "epoch": 0.03845107045432583, "grad_norm": 0.314453125, "learning_rate": 0.0017975641323072227, "loss": 0.1678, "step": 21686 }, { "epoch": 0.03845461661963565, "grad_norm": 0.80078125, "learning_rate": 0.0017975265225052453, "loss": 0.2602, "step": 21688 }, { "epoch": 0.03845816278494547, "grad_norm": 1.1640625, "learning_rate": 0.0017974889096526916, "loss": 0.2192, "step": 21690 }, { "epoch": 0.03846170895025528, "grad_norm": 0.2734375, "learning_rate": 0.0017974512937497266, "loss": 0.1584, "step": 21692 }, { "epoch": 0.038465255115565096, "grad_norm": 0.34375, "learning_rate": 0.0017974136747965152, "loss": 0.1772, "step": 21694 }, { "epoch": 0.03846880128087491, "grad_norm": 0.294921875, "learning_rate": 0.0017973760527932212, "loss": 0.2411, "step": 21696 }, { "epoch": 0.038472347446184725, "grad_norm": 0.40234375, "learning_rate": 0.0017973384277400095, "loss": 0.1927, "step": 21698 }, { "epoch": 0.03847589361149454, "grad_norm": 0.37109375, "learning_rate": 0.0017973007996370445, "loss": 0.2077, "step": 21700 }, { "epoch": 0.038479439776804354, "grad_norm": 0.408203125, "learning_rate": 0.0017972631684844908, "loss": 0.1795, "step": 21702 }, { "epoch": 0.03848298594211417, "grad_norm": 0.2490234375, "learning_rate": 0.0017972255342825131, "loss": 0.2127, "step": 21704 }, { "epoch": 0.03848653210742398, "grad_norm": 0.482421875, "learning_rate": 0.001797187897031276, "loss": 0.223, "step": 21706 }, { "epoch": 0.0384900782727338, "grad_norm": 0.455078125, "learning_rate": 0.0017971502567309442, "loss": 0.1951, "step": 21708 }, { "epoch": 0.03849362443804362, "grad_norm": 0.8515625, "learning_rate": 0.0017971126133816818, "loss": 0.2039, "step": 21710 }, { "epoch": 0.038497170603353434, "grad_norm": 0.396484375, "learning_rate": 0.0017970749669836538, "loss": 0.5035, "step": 21712 }, { "epoch": 0.03850071676866325, "grad_norm": 0.5859375, "learning_rate": 0.0017970373175370247, "loss": 0.2439, "step": 21714 }, { "epoch": 0.03850426293397306, "grad_norm": 0.92578125, "learning_rate": 0.0017969996650419595, "loss": 0.2206, "step": 21716 }, { "epoch": 0.03850780909928288, "grad_norm": 0.2421875, "learning_rate": 0.0017969620094986228, "loss": 0.1699, "step": 21718 }, { "epoch": 0.03851135526459269, "grad_norm": 0.42578125, "learning_rate": 0.0017969243509071786, "loss": 0.183, "step": 21720 }, { "epoch": 0.03851490142990251, "grad_norm": 0.5234375, "learning_rate": 0.0017968866892677925, "loss": 0.2239, "step": 21722 }, { "epoch": 0.03851844759521232, "grad_norm": 0.52734375, "learning_rate": 0.0017968490245806287, "loss": 0.22, "step": 21724 }, { "epoch": 0.038521993760522136, "grad_norm": 0.8671875, "learning_rate": 0.0017968113568458519, "loss": 0.164, "step": 21726 }, { "epoch": 0.03852553992583195, "grad_norm": 2.4375, "learning_rate": 0.001796773686063627, "loss": 0.2165, "step": 21728 }, { "epoch": 0.038529086091141765, "grad_norm": 0.37109375, "learning_rate": 0.001796736012234119, "loss": 0.1875, "step": 21730 }, { "epoch": 0.03853263225645158, "grad_norm": 0.83203125, "learning_rate": 0.0017966983353574922, "loss": 0.2295, "step": 21732 }, { "epoch": 0.0385361784217614, "grad_norm": 0.52734375, "learning_rate": 0.0017966606554339116, "loss": 0.2098, "step": 21734 }, { "epoch": 0.038539724587071215, "grad_norm": 0.87890625, "learning_rate": 0.0017966229724635419, "loss": 0.2323, "step": 21736 }, { "epoch": 0.03854327075238103, "grad_norm": 0.298828125, "learning_rate": 0.001796585286446548, "loss": 0.1817, "step": 21738 }, { "epoch": 0.038546816917690845, "grad_norm": 2.8125, "learning_rate": 0.001796547597383095, "loss": 0.2743, "step": 21740 }, { "epoch": 0.03855036308300066, "grad_norm": 0.216796875, "learning_rate": 0.001796509905273347, "loss": 0.1524, "step": 21742 }, { "epoch": 0.038553909248310474, "grad_norm": 0.384765625, "learning_rate": 0.0017964722101174696, "loss": 0.2215, "step": 21744 }, { "epoch": 0.03855745541362029, "grad_norm": 0.82421875, "learning_rate": 0.0017964345119156268, "loss": 0.2316, "step": 21746 }, { "epoch": 0.0385610015789301, "grad_norm": 0.458984375, "learning_rate": 0.0017963968106679841, "loss": 0.1612, "step": 21748 }, { "epoch": 0.03856454774423992, "grad_norm": 0.703125, "learning_rate": 0.0017963591063747068, "loss": 0.2325, "step": 21750 }, { "epoch": 0.03856809390954973, "grad_norm": 0.412109375, "learning_rate": 0.0017963213990359588, "loss": 0.23, "step": 21752 }, { "epoch": 0.038571640074859546, "grad_norm": 0.267578125, "learning_rate": 0.0017962836886519055, "loss": 0.2387, "step": 21754 }, { "epoch": 0.03857518624016937, "grad_norm": 0.6875, "learning_rate": 0.0017962459752227117, "loss": 0.2164, "step": 21756 }, { "epoch": 0.03857873240547918, "grad_norm": 0.302734375, "learning_rate": 0.0017962082587485429, "loss": 0.2466, "step": 21758 }, { "epoch": 0.038582278570789, "grad_norm": 0.36328125, "learning_rate": 0.0017961705392295633, "loss": 0.2395, "step": 21760 }, { "epoch": 0.03858582473609881, "grad_norm": 0.51171875, "learning_rate": 0.0017961328166659384, "loss": 0.182, "step": 21762 }, { "epoch": 0.038589370901408626, "grad_norm": 0.55859375, "learning_rate": 0.0017960950910578324, "loss": 0.2665, "step": 21764 }, { "epoch": 0.03859291706671844, "grad_norm": 1.09375, "learning_rate": 0.001796057362405411, "loss": 0.2522, "step": 21766 }, { "epoch": 0.038596463232028255, "grad_norm": 1.0078125, "learning_rate": 0.001796019630708839, "loss": 0.2188, "step": 21768 }, { "epoch": 0.03860000939733807, "grad_norm": 0.37109375, "learning_rate": 0.0017959818959682817, "loss": 0.1675, "step": 21770 }, { "epoch": 0.038603555562647884, "grad_norm": 0.45703125, "learning_rate": 0.0017959441581839037, "loss": 0.1646, "step": 21772 }, { "epoch": 0.0386071017279577, "grad_norm": 1.1875, "learning_rate": 0.00179590641735587, "loss": 0.1603, "step": 21774 }, { "epoch": 0.03861064789326751, "grad_norm": 0.59765625, "learning_rate": 0.0017958686734843461, "loss": 0.1961, "step": 21776 }, { "epoch": 0.038614194058577335, "grad_norm": 0.64453125, "learning_rate": 0.001795830926569497, "loss": 0.2326, "step": 21778 }, { "epoch": 0.03861774022388715, "grad_norm": 0.453125, "learning_rate": 0.0017957931766114875, "loss": 0.1864, "step": 21780 }, { "epoch": 0.038621286389196964, "grad_norm": 1.3125, "learning_rate": 0.0017957554236104824, "loss": 0.3128, "step": 21782 }, { "epoch": 0.03862483255450678, "grad_norm": 0.62109375, "learning_rate": 0.0017957176675666474, "loss": 0.2496, "step": 21784 }, { "epoch": 0.03862837871981659, "grad_norm": 1.6328125, "learning_rate": 0.0017956799084801476, "loss": 0.2231, "step": 21786 }, { "epoch": 0.03863192488512641, "grad_norm": 0.9140625, "learning_rate": 0.001795642146351148, "loss": 0.2446, "step": 21788 }, { "epoch": 0.03863547105043622, "grad_norm": 0.546875, "learning_rate": 0.0017956043811798137, "loss": 0.1515, "step": 21790 }, { "epoch": 0.03863901721574604, "grad_norm": 0.64453125, "learning_rate": 0.0017955666129663098, "loss": 0.291, "step": 21792 }, { "epoch": 0.03864256338105585, "grad_norm": 1.1640625, "learning_rate": 0.0017955288417108018, "loss": 0.1853, "step": 21794 }, { "epoch": 0.038646109546365666, "grad_norm": 0.46875, "learning_rate": 0.0017954910674134543, "loss": 0.2279, "step": 21796 }, { "epoch": 0.03864965571167548, "grad_norm": 0.36328125, "learning_rate": 0.001795453290074433, "loss": 0.1541, "step": 21798 }, { "epoch": 0.038653201876985295, "grad_norm": 2.921875, "learning_rate": 0.001795415509693903, "loss": 0.2079, "step": 21800 }, { "epoch": 0.038656748042295117, "grad_norm": 1.5, "learning_rate": 0.0017953777262720296, "loss": 0.2954, "step": 21802 }, { "epoch": 0.03866029420760493, "grad_norm": 2.640625, "learning_rate": 0.001795339939808978, "loss": 0.4074, "step": 21804 }, { "epoch": 0.038663840372914746, "grad_norm": 0.38671875, "learning_rate": 0.0017953021503049132, "loss": 0.1383, "step": 21806 }, { "epoch": 0.03866738653822456, "grad_norm": 7.09375, "learning_rate": 0.001795264357760001, "loss": 0.2181, "step": 21808 }, { "epoch": 0.038670932703534375, "grad_norm": 0.306640625, "learning_rate": 0.001795226562174406, "loss": 0.1974, "step": 21810 }, { "epoch": 0.03867447886884419, "grad_norm": 0.296875, "learning_rate": 0.0017951887635482937, "loss": 0.2331, "step": 21812 }, { "epoch": 0.038678025034154004, "grad_norm": 1.8359375, "learning_rate": 0.00179515096188183, "loss": 0.3978, "step": 21814 }, { "epoch": 0.03868157119946382, "grad_norm": 0.87890625, "learning_rate": 0.0017951131571751794, "loss": 0.2498, "step": 21816 }, { "epoch": 0.03868511736477363, "grad_norm": 1.6875, "learning_rate": 0.0017950753494285082, "loss": 0.3201, "step": 21818 }, { "epoch": 0.03868866353008345, "grad_norm": 0.62890625, "learning_rate": 0.0017950375386419806, "loss": 0.1894, "step": 21820 }, { "epoch": 0.03869220969539326, "grad_norm": 0.90234375, "learning_rate": 0.0017949997248157628, "loss": 0.1885, "step": 21822 }, { "epoch": 0.038695755860703084, "grad_norm": 0.37109375, "learning_rate": 0.0017949619079500197, "loss": 0.1972, "step": 21824 }, { "epoch": 0.0386993020260129, "grad_norm": 0.53125, "learning_rate": 0.001794924088044917, "loss": 0.163, "step": 21826 }, { "epoch": 0.03870284819132271, "grad_norm": 0.6328125, "learning_rate": 0.0017948862651006204, "loss": 0.1982, "step": 21828 }, { "epoch": 0.03870639435663253, "grad_norm": 0.474609375, "learning_rate": 0.0017948484391172943, "loss": 0.2272, "step": 21830 }, { "epoch": 0.03870994052194234, "grad_norm": 0.333984375, "learning_rate": 0.001794810610095105, "loss": 0.3554, "step": 21832 }, { "epoch": 0.038713486687252156, "grad_norm": 0.6796875, "learning_rate": 0.0017947727780342178, "loss": 0.2373, "step": 21834 }, { "epoch": 0.03871703285256197, "grad_norm": 0.478515625, "learning_rate": 0.0017947349429347978, "loss": 0.1816, "step": 21836 }, { "epoch": 0.038720579017871785, "grad_norm": 0.1875, "learning_rate": 0.0017946971047970112, "loss": 0.1593, "step": 21838 }, { "epoch": 0.0387241251831816, "grad_norm": 0.69921875, "learning_rate": 0.0017946592636210225, "loss": 0.212, "step": 21840 }, { "epoch": 0.038727671348491415, "grad_norm": 0.94921875, "learning_rate": 0.0017946214194069976, "loss": 0.3428, "step": 21842 }, { "epoch": 0.03873121751380123, "grad_norm": 0.40625, "learning_rate": 0.0017945835721551024, "loss": 0.1411, "step": 21844 }, { "epoch": 0.03873476367911105, "grad_norm": 0.2470703125, "learning_rate": 0.0017945457218655019, "loss": 0.2246, "step": 21846 }, { "epoch": 0.038738309844420865, "grad_norm": 1.796875, "learning_rate": 0.001794507868538362, "loss": 0.5908, "step": 21848 }, { "epoch": 0.03874185600973068, "grad_norm": 0.40625, "learning_rate": 0.001794470012173848, "loss": 0.2306, "step": 21850 }, { "epoch": 0.038745402175040494, "grad_norm": 0.62109375, "learning_rate": 0.0017944321527721258, "loss": 0.2245, "step": 21852 }, { "epoch": 0.03874894834035031, "grad_norm": 0.46875, "learning_rate": 0.0017943942903333606, "loss": 0.1925, "step": 21854 }, { "epoch": 0.03875249450566012, "grad_norm": 0.228515625, "learning_rate": 0.001794356424857718, "loss": 0.2012, "step": 21856 }, { "epoch": 0.03875604067096994, "grad_norm": 0.52734375, "learning_rate": 0.001794318556345364, "loss": 0.1919, "step": 21858 }, { "epoch": 0.03875958683627975, "grad_norm": 0.58203125, "learning_rate": 0.0017942806847964638, "loss": 0.2967, "step": 21860 }, { "epoch": 0.03876313300158957, "grad_norm": 0.4609375, "learning_rate": 0.001794242810211183, "loss": 0.2165, "step": 21862 }, { "epoch": 0.03876667916689938, "grad_norm": 0.30078125, "learning_rate": 0.0017942049325896877, "loss": 0.2089, "step": 21864 }, { "epoch": 0.038770225332209196, "grad_norm": 0.255859375, "learning_rate": 0.0017941670519321432, "loss": 0.3428, "step": 21866 }, { "epoch": 0.03877377149751901, "grad_norm": 0.69921875, "learning_rate": 0.0017941291682387148, "loss": 0.2266, "step": 21868 }, { "epoch": 0.03877731766282883, "grad_norm": 0.53515625, "learning_rate": 0.0017940912815095694, "loss": 0.2177, "step": 21870 }, { "epoch": 0.03878086382813865, "grad_norm": 0.5703125, "learning_rate": 0.0017940533917448712, "loss": 0.2007, "step": 21872 }, { "epoch": 0.03878440999344846, "grad_norm": 0.67578125, "learning_rate": 0.001794015498944787, "loss": 0.1605, "step": 21874 }, { "epoch": 0.038787956158758276, "grad_norm": 1.0, "learning_rate": 0.001793977603109482, "loss": 0.2885, "step": 21876 }, { "epoch": 0.03879150232406809, "grad_norm": 1.390625, "learning_rate": 0.0017939397042391221, "loss": 0.278, "step": 21878 }, { "epoch": 0.038795048489377905, "grad_norm": 1.796875, "learning_rate": 0.0017939018023338728, "loss": 0.3323, "step": 21880 }, { "epoch": 0.03879859465468772, "grad_norm": 3.71875, "learning_rate": 0.0017938638973939007, "loss": 0.2582, "step": 21882 }, { "epoch": 0.038802140819997534, "grad_norm": 0.6171875, "learning_rate": 0.0017938259894193702, "loss": 0.3284, "step": 21884 }, { "epoch": 0.03880568698530735, "grad_norm": 0.61328125, "learning_rate": 0.0017937880784104486, "loss": 0.1652, "step": 21886 }, { "epoch": 0.03880923315061716, "grad_norm": 1.03125, "learning_rate": 0.0017937501643673004, "loss": 0.2368, "step": 21888 }, { "epoch": 0.03881277931592698, "grad_norm": 0.51171875, "learning_rate": 0.001793712247290092, "loss": 0.2055, "step": 21890 }, { "epoch": 0.0388163254812368, "grad_norm": 0.4375, "learning_rate": 0.0017936743271789895, "loss": 0.2097, "step": 21892 }, { "epoch": 0.038819871646546614, "grad_norm": 0.27734375, "learning_rate": 0.0017936364040341583, "loss": 0.2082, "step": 21894 }, { "epoch": 0.03882341781185643, "grad_norm": 0.53515625, "learning_rate": 0.001793598477855764, "loss": 0.3441, "step": 21896 }, { "epoch": 0.03882696397716624, "grad_norm": 2.625, "learning_rate": 0.0017935605486439734, "loss": 0.3034, "step": 21898 }, { "epoch": 0.03883051014247606, "grad_norm": 2.140625, "learning_rate": 0.0017935226163989515, "loss": 0.2335, "step": 21900 }, { "epoch": 0.03883405630778587, "grad_norm": 0.62109375, "learning_rate": 0.0017934846811208647, "loss": 0.1856, "step": 21902 }, { "epoch": 0.038837602473095686, "grad_norm": 0.6328125, "learning_rate": 0.0017934467428098787, "loss": 0.2339, "step": 21904 }, { "epoch": 0.0388411486384055, "grad_norm": 0.3515625, "learning_rate": 0.0017934088014661593, "loss": 0.2676, "step": 21906 }, { "epoch": 0.038844694803715316, "grad_norm": 0.89453125, "learning_rate": 0.001793370857089873, "loss": 0.2663, "step": 21908 }, { "epoch": 0.03884824096902513, "grad_norm": 0.259765625, "learning_rate": 0.001793332909681185, "loss": 0.1632, "step": 21910 }, { "epoch": 0.038851787134334945, "grad_norm": 4.15625, "learning_rate": 0.0017932949592402614, "loss": 0.3467, "step": 21912 }, { "epoch": 0.038855333299644766, "grad_norm": 0.74609375, "learning_rate": 0.0017932570057672683, "loss": 0.2038, "step": 21914 }, { "epoch": 0.03885887946495458, "grad_norm": 1.03125, "learning_rate": 0.0017932190492623722, "loss": 0.2834, "step": 21916 }, { "epoch": 0.038862425630264395, "grad_norm": 0.91796875, "learning_rate": 0.0017931810897257386, "loss": 0.287, "step": 21918 }, { "epoch": 0.03886597179557421, "grad_norm": 0.2236328125, "learning_rate": 0.0017931431271575333, "loss": 0.2205, "step": 21920 }, { "epoch": 0.038869517960884024, "grad_norm": 0.78125, "learning_rate": 0.0017931051615579226, "loss": 0.2059, "step": 21922 }, { "epoch": 0.03887306412619384, "grad_norm": 0.421875, "learning_rate": 0.0017930671929270727, "loss": 0.1422, "step": 21924 }, { "epoch": 0.038876610291503653, "grad_norm": 0.62890625, "learning_rate": 0.0017930292212651492, "loss": 0.1882, "step": 21926 }, { "epoch": 0.03888015645681347, "grad_norm": 0.259765625, "learning_rate": 0.001792991246572319, "loss": 0.1638, "step": 21928 }, { "epoch": 0.03888370262212328, "grad_norm": 0.66796875, "learning_rate": 0.001792953268848747, "loss": 0.2229, "step": 21930 }, { "epoch": 0.0388872487874331, "grad_norm": 0.447265625, "learning_rate": 0.0017929152880946004, "loss": 0.205, "step": 21932 }, { "epoch": 0.03889079495274291, "grad_norm": 0.29296875, "learning_rate": 0.0017928773043100446, "loss": 0.2087, "step": 21934 }, { "epoch": 0.038894341118052726, "grad_norm": 1.796875, "learning_rate": 0.001792839317495246, "loss": 0.2559, "step": 21936 }, { "epoch": 0.03889788728336255, "grad_norm": 0.4296875, "learning_rate": 0.0017928013276503705, "loss": 0.1928, "step": 21938 }, { "epoch": 0.03890143344867236, "grad_norm": 0.33203125, "learning_rate": 0.0017927633347755846, "loss": 0.2183, "step": 21940 }, { "epoch": 0.03890497961398218, "grad_norm": 1.1484375, "learning_rate": 0.0017927253388710543, "loss": 0.2914, "step": 21942 }, { "epoch": 0.03890852577929199, "grad_norm": 6.03125, "learning_rate": 0.0017926873399369458, "loss": 0.4897, "step": 21944 }, { "epoch": 0.038912071944601806, "grad_norm": 0.5625, "learning_rate": 0.001792649337973425, "loss": 0.2279, "step": 21946 }, { "epoch": 0.03891561810991162, "grad_norm": 0.6953125, "learning_rate": 0.0017926113329806586, "loss": 0.1931, "step": 21948 }, { "epoch": 0.038919164275221435, "grad_norm": 0.291015625, "learning_rate": 0.0017925733249588122, "loss": 0.2048, "step": 21950 }, { "epoch": 0.03892271044053125, "grad_norm": 0.23828125, "learning_rate": 0.0017925353139080524, "loss": 0.2217, "step": 21952 }, { "epoch": 0.038926256605841064, "grad_norm": 0.53125, "learning_rate": 0.001792497299828546, "loss": 0.2628, "step": 21954 }, { "epoch": 0.03892980277115088, "grad_norm": 3.640625, "learning_rate": 0.001792459282720458, "loss": 0.4594, "step": 21956 }, { "epoch": 0.03893334893646069, "grad_norm": 1.171875, "learning_rate": 0.0017924212625839557, "loss": 0.207, "step": 21958 }, { "epoch": 0.038936895101770515, "grad_norm": 1.2578125, "learning_rate": 0.0017923832394192048, "loss": 0.4309, "step": 21960 }, { "epoch": 0.03894044126708033, "grad_norm": 4.5625, "learning_rate": 0.001792345213226372, "loss": 0.3272, "step": 21962 }, { "epoch": 0.038943987432390144, "grad_norm": 0.322265625, "learning_rate": 0.0017923071840056232, "loss": 0.1853, "step": 21964 }, { "epoch": 0.03894753359769996, "grad_norm": 0.5390625, "learning_rate": 0.001792269151757125, "loss": 0.2015, "step": 21966 }, { "epoch": 0.03895107976300977, "grad_norm": 0.408203125, "learning_rate": 0.0017922311164810434, "loss": 0.1751, "step": 21968 }, { "epoch": 0.03895462592831959, "grad_norm": 0.7734375, "learning_rate": 0.0017921930781775453, "loss": 0.1886, "step": 21970 }, { "epoch": 0.0389581720936294, "grad_norm": 0.8125, "learning_rate": 0.0017921550368467968, "loss": 0.2208, "step": 21972 }, { "epoch": 0.03896171825893922, "grad_norm": 0.546875, "learning_rate": 0.001792116992488964, "loss": 0.1982, "step": 21974 }, { "epoch": 0.03896526442424903, "grad_norm": 0.890625, "learning_rate": 0.0017920789451042138, "loss": 0.1581, "step": 21976 }, { "epoch": 0.038968810589558846, "grad_norm": 0.62109375, "learning_rate": 0.001792040894692712, "loss": 0.2182, "step": 21978 }, { "epoch": 0.03897235675486866, "grad_norm": 0.6796875, "learning_rate": 0.0017920028412546254, "loss": 0.1943, "step": 21980 }, { "epoch": 0.03897590292017848, "grad_norm": 0.40234375, "learning_rate": 0.00179196478479012, "loss": 0.2012, "step": 21982 }, { "epoch": 0.038979449085488296, "grad_norm": 0.77734375, "learning_rate": 0.0017919267252993632, "loss": 0.1723, "step": 21984 }, { "epoch": 0.03898299525079811, "grad_norm": 0.2294921875, "learning_rate": 0.0017918886627825204, "loss": 0.3973, "step": 21986 }, { "epoch": 0.038986541416107925, "grad_norm": 0.59765625, "learning_rate": 0.0017918505972397587, "loss": 0.1851, "step": 21988 }, { "epoch": 0.03899008758141774, "grad_norm": 0.60546875, "learning_rate": 0.0017918125286712444, "loss": 0.1303, "step": 21990 }, { "epoch": 0.038993633746727555, "grad_norm": 0.484375, "learning_rate": 0.0017917744570771438, "loss": 0.1856, "step": 21992 }, { "epoch": 0.03899717991203737, "grad_norm": 3.3125, "learning_rate": 0.0017917363824576239, "loss": 0.5767, "step": 21994 }, { "epoch": 0.039000726077347184, "grad_norm": 0.99609375, "learning_rate": 0.0017916983048128503, "loss": 0.2643, "step": 21996 }, { "epoch": 0.039004272242657, "grad_norm": 0.365234375, "learning_rate": 0.0017916602241429902, "loss": 0.1855, "step": 21998 }, { "epoch": 0.03900781840796681, "grad_norm": 0.50390625, "learning_rate": 0.0017916221404482104, "loss": 0.2681, "step": 22000 }, { "epoch": 0.03901136457327663, "grad_norm": 0.35546875, "learning_rate": 0.001791584053728677, "loss": 0.2332, "step": 22002 }, { "epoch": 0.03901491073858644, "grad_norm": 1.34375, "learning_rate": 0.0017915459639845563, "loss": 0.1947, "step": 22004 }, { "epoch": 0.03901845690389626, "grad_norm": 0.427734375, "learning_rate": 0.0017915078712160155, "loss": 0.2327, "step": 22006 }, { "epoch": 0.03902200306920608, "grad_norm": 1.2109375, "learning_rate": 0.0017914697754232213, "loss": 0.2249, "step": 22008 }, { "epoch": 0.03902554923451589, "grad_norm": 0.37109375, "learning_rate": 0.0017914316766063393, "loss": 0.1922, "step": 22010 }, { "epoch": 0.03902909539982571, "grad_norm": 0.53515625, "learning_rate": 0.0017913935747655373, "loss": 0.198, "step": 22012 }, { "epoch": 0.03903264156513552, "grad_norm": 0.81640625, "learning_rate": 0.0017913554699009813, "loss": 0.202, "step": 22014 }, { "epoch": 0.039036187730445336, "grad_norm": 0.90234375, "learning_rate": 0.001791317362012838, "loss": 0.1873, "step": 22016 }, { "epoch": 0.03903973389575515, "grad_norm": 0.3125, "learning_rate": 0.0017912792511012741, "loss": 0.1882, "step": 22018 }, { "epoch": 0.039043280061064965, "grad_norm": 0.3046875, "learning_rate": 0.0017912411371664564, "loss": 0.1989, "step": 22020 }, { "epoch": 0.03904682622637478, "grad_norm": 1.15625, "learning_rate": 0.001791203020208551, "loss": 0.4068, "step": 22022 }, { "epoch": 0.039050372391684594, "grad_norm": 2.203125, "learning_rate": 0.0017911649002277257, "loss": 0.2333, "step": 22024 }, { "epoch": 0.03905391855699441, "grad_norm": 1.0859375, "learning_rate": 0.0017911267772241465, "loss": 0.2052, "step": 22026 }, { "epoch": 0.03905746472230423, "grad_norm": 0.361328125, "learning_rate": 0.00179108865119798, "loss": 0.2758, "step": 22028 }, { "epoch": 0.039061010887614045, "grad_norm": 0.466796875, "learning_rate": 0.0017910505221493934, "loss": 0.3595, "step": 22030 }, { "epoch": 0.03906455705292386, "grad_norm": 1.4765625, "learning_rate": 0.001791012390078553, "loss": 0.1364, "step": 22032 }, { "epoch": 0.039068103218233674, "grad_norm": 0.54296875, "learning_rate": 0.001790974254985626, "loss": 0.2297, "step": 22034 }, { "epoch": 0.03907164938354349, "grad_norm": 0.35546875, "learning_rate": 0.0017909361168707788, "loss": 0.2233, "step": 22036 }, { "epoch": 0.0390751955488533, "grad_norm": 1.03125, "learning_rate": 0.0017908979757341786, "loss": 0.2059, "step": 22038 }, { "epoch": 0.03907874171416312, "grad_norm": 7.03125, "learning_rate": 0.0017908598315759916, "loss": 0.3606, "step": 22040 }, { "epoch": 0.03908228787947293, "grad_norm": 0.3828125, "learning_rate": 0.0017908216843963854, "loss": 0.2313, "step": 22042 }, { "epoch": 0.03908583404478275, "grad_norm": 0.296875, "learning_rate": 0.0017907835341955261, "loss": 0.2071, "step": 22044 }, { "epoch": 0.03908938021009256, "grad_norm": 0.51171875, "learning_rate": 0.0017907453809735813, "loss": 0.2179, "step": 22046 }, { "epoch": 0.039092926375402376, "grad_norm": 0.263671875, "learning_rate": 0.001790707224730717, "loss": 0.2232, "step": 22048 }, { "epoch": 0.0390964725407122, "grad_norm": 1.8046875, "learning_rate": 0.0017906690654671006, "loss": 0.3491, "step": 22050 }, { "epoch": 0.03910001870602201, "grad_norm": 0.271484375, "learning_rate": 0.001790630903182899, "loss": 0.1273, "step": 22052 }, { "epoch": 0.039103564871331827, "grad_norm": 1.15625, "learning_rate": 0.001790592737878279, "loss": 0.2397, "step": 22054 }, { "epoch": 0.03910711103664164, "grad_norm": 0.388671875, "learning_rate": 0.0017905545695534074, "loss": 0.1902, "step": 22056 }, { "epoch": 0.039110657201951456, "grad_norm": 2.046875, "learning_rate": 0.0017905163982084513, "loss": 0.3476, "step": 22058 }, { "epoch": 0.03911420336726127, "grad_norm": 0.64453125, "learning_rate": 0.0017904782238435774, "loss": 0.2215, "step": 22060 }, { "epoch": 0.039117749532571085, "grad_norm": 0.3203125, "learning_rate": 0.0017904400464589531, "loss": 0.2047, "step": 22062 }, { "epoch": 0.0391212956978809, "grad_norm": 0.64453125, "learning_rate": 0.0017904018660547451, "loss": 0.1973, "step": 22064 }, { "epoch": 0.039124841863190714, "grad_norm": 1.2578125, "learning_rate": 0.0017903636826311206, "loss": 0.2044, "step": 22066 }, { "epoch": 0.03912838802850053, "grad_norm": 0.55078125, "learning_rate": 0.0017903254961882458, "loss": 0.1693, "step": 22068 }, { "epoch": 0.03913193419381034, "grad_norm": 0.287109375, "learning_rate": 0.0017902873067262887, "loss": 0.2062, "step": 22070 }, { "epoch": 0.03913548035912016, "grad_norm": 0.345703125, "learning_rate": 0.0017902491142454157, "loss": 0.1765, "step": 22072 }, { "epoch": 0.03913902652442998, "grad_norm": 0.25390625, "learning_rate": 0.0017902109187457938, "loss": 0.2058, "step": 22074 }, { "epoch": 0.039142572689739794, "grad_norm": 0.39453125, "learning_rate": 0.0017901727202275903, "loss": 0.1492, "step": 22076 }, { "epoch": 0.03914611885504961, "grad_norm": 0.66796875, "learning_rate": 0.0017901345186909726, "loss": 0.2426, "step": 22078 }, { "epoch": 0.03914966502035942, "grad_norm": 0.2119140625, "learning_rate": 0.001790096314136107, "loss": 0.1855, "step": 22080 }, { "epoch": 0.03915321118566924, "grad_norm": 2.578125, "learning_rate": 0.0017900581065631615, "loss": 0.3733, "step": 22082 }, { "epoch": 0.03915675735097905, "grad_norm": 3.484375, "learning_rate": 0.0017900198959723019, "loss": 0.3413, "step": 22084 }, { "epoch": 0.039160303516288866, "grad_norm": 0.470703125, "learning_rate": 0.0017899816823636967, "loss": 0.1786, "step": 22086 }, { "epoch": 0.03916384968159868, "grad_norm": 0.28515625, "learning_rate": 0.0017899434657375124, "loss": 0.1873, "step": 22088 }, { "epoch": 0.039167395846908495, "grad_norm": 1.7890625, "learning_rate": 0.0017899052460939157, "loss": 0.203, "step": 22090 }, { "epoch": 0.03917094201221831, "grad_norm": 0.44140625, "learning_rate": 0.0017898670234330746, "loss": 0.2406, "step": 22092 }, { "epoch": 0.039174488177528125, "grad_norm": 0.88671875, "learning_rate": 0.0017898287977551558, "loss": 0.2262, "step": 22094 }, { "epoch": 0.039178034342837946, "grad_norm": 0.79296875, "learning_rate": 0.0017897905690603265, "loss": 0.1909, "step": 22096 }, { "epoch": 0.03918158050814776, "grad_norm": 0.58203125, "learning_rate": 0.001789752337348754, "loss": 0.1912, "step": 22098 }, { "epoch": 0.039185126673457575, "grad_norm": 0.478515625, "learning_rate": 0.0017897141026206053, "loss": 0.1712, "step": 22100 }, { "epoch": 0.03918867283876739, "grad_norm": 0.349609375, "learning_rate": 0.0017896758648760479, "loss": 0.2418, "step": 22102 }, { "epoch": 0.039192219004077204, "grad_norm": 0.76171875, "learning_rate": 0.0017896376241152488, "loss": 0.1972, "step": 22104 }, { "epoch": 0.03919576516938702, "grad_norm": 0.87890625, "learning_rate": 0.0017895993803383752, "loss": 0.2691, "step": 22106 }, { "epoch": 0.03919931133469683, "grad_norm": 1.34375, "learning_rate": 0.0017895611335455946, "loss": 0.1351, "step": 22108 }, { "epoch": 0.03920285750000665, "grad_norm": 0.443359375, "learning_rate": 0.001789522883737074, "loss": 0.1612, "step": 22110 }, { "epoch": 0.03920640366531646, "grad_norm": 0.458984375, "learning_rate": 0.0017894846309129808, "loss": 0.1786, "step": 22112 }, { "epoch": 0.03920994983062628, "grad_norm": 0.38671875, "learning_rate": 0.0017894463750734826, "loss": 0.1613, "step": 22114 }, { "epoch": 0.03921349599593609, "grad_norm": 0.69921875, "learning_rate": 0.0017894081162187464, "loss": 0.189, "step": 22116 }, { "epoch": 0.03921704216124591, "grad_norm": 0.412109375, "learning_rate": 0.0017893698543489393, "loss": 0.2212, "step": 22118 }, { "epoch": 0.03922058832655573, "grad_norm": 1.9453125, "learning_rate": 0.0017893315894642293, "loss": 0.2416, "step": 22120 }, { "epoch": 0.03922413449186554, "grad_norm": 0.470703125, "learning_rate": 0.0017892933215647828, "loss": 0.2088, "step": 22122 }, { "epoch": 0.03922768065717536, "grad_norm": 0.416015625, "learning_rate": 0.0017892550506507683, "loss": 0.1838, "step": 22124 }, { "epoch": 0.03923122682248517, "grad_norm": 0.255859375, "learning_rate": 0.0017892167767223522, "loss": 0.1605, "step": 22126 }, { "epoch": 0.039234772987794986, "grad_norm": 0.416015625, "learning_rate": 0.0017891784997797022, "loss": 0.3285, "step": 22128 }, { "epoch": 0.0392383191531048, "grad_norm": 1.0859375, "learning_rate": 0.001789140219822986, "loss": 0.2135, "step": 22130 }, { "epoch": 0.039241865318414615, "grad_norm": 0.44140625, "learning_rate": 0.0017891019368523706, "loss": 0.2212, "step": 22132 }, { "epoch": 0.03924541148372443, "grad_norm": 0.59375, "learning_rate": 0.0017890636508680238, "loss": 0.2315, "step": 22134 }, { "epoch": 0.039248957649034244, "grad_norm": 0.38671875, "learning_rate": 0.0017890253618701127, "loss": 0.1954, "step": 22136 }, { "epoch": 0.03925250381434406, "grad_norm": 1.6875, "learning_rate": 0.0017889870698588049, "loss": 0.2498, "step": 22138 }, { "epoch": 0.03925604997965387, "grad_norm": 0.89453125, "learning_rate": 0.001788948774834268, "loss": 0.1916, "step": 22140 }, { "epoch": 0.039259596144963695, "grad_norm": 0.29296875, "learning_rate": 0.001788910476796669, "loss": 0.3171, "step": 22142 }, { "epoch": 0.03926314231027351, "grad_norm": 4.5, "learning_rate": 0.001788872175746176, "loss": 0.2606, "step": 22144 }, { "epoch": 0.039266688475583324, "grad_norm": 0.81640625, "learning_rate": 0.0017888338716829564, "loss": 0.3094, "step": 22146 }, { "epoch": 0.03927023464089314, "grad_norm": 0.408203125, "learning_rate": 0.0017887955646071771, "loss": 0.2256, "step": 22148 }, { "epoch": 0.03927378080620295, "grad_norm": 0.27734375, "learning_rate": 0.0017887572545190065, "loss": 0.2259, "step": 22150 }, { "epoch": 0.03927732697151277, "grad_norm": 1.5703125, "learning_rate": 0.0017887189414186118, "loss": 0.1866, "step": 22152 }, { "epoch": 0.03928087313682258, "grad_norm": 1.265625, "learning_rate": 0.0017886806253061603, "loss": 0.2834, "step": 22154 }, { "epoch": 0.039284419302132396, "grad_norm": 0.353515625, "learning_rate": 0.0017886423061818197, "loss": 0.1738, "step": 22156 }, { "epoch": 0.03928796546744221, "grad_norm": 0.41015625, "learning_rate": 0.001788603984045758, "loss": 0.1712, "step": 22158 }, { "epoch": 0.039291511632752026, "grad_norm": 1.0859375, "learning_rate": 0.0017885656588981422, "loss": 0.1711, "step": 22160 }, { "epoch": 0.03929505779806184, "grad_norm": 1.0859375, "learning_rate": 0.0017885273307391401, "loss": 0.1854, "step": 22162 }, { "epoch": 0.03929860396337166, "grad_norm": 1.3828125, "learning_rate": 0.0017884889995689198, "loss": 0.2072, "step": 22164 }, { "epoch": 0.039302150128681476, "grad_norm": 0.6953125, "learning_rate": 0.001788450665387648, "loss": 0.1947, "step": 22166 }, { "epoch": 0.03930569629399129, "grad_norm": 0.75390625, "learning_rate": 0.0017884123281954932, "loss": 0.2326, "step": 22168 }, { "epoch": 0.039309242459301105, "grad_norm": 0.69140625, "learning_rate": 0.0017883739879926228, "loss": 0.151, "step": 22170 }, { "epoch": 0.03931278862461092, "grad_norm": 1.390625, "learning_rate": 0.0017883356447792043, "loss": 0.2206, "step": 22172 }, { "epoch": 0.039316334789920734, "grad_norm": 0.39453125, "learning_rate": 0.0017882972985554055, "loss": 0.1695, "step": 22174 }, { "epoch": 0.03931988095523055, "grad_norm": 0.337890625, "learning_rate": 0.0017882589493213945, "loss": 0.2672, "step": 22176 }, { "epoch": 0.039323427120540363, "grad_norm": 0.74609375, "learning_rate": 0.0017882205970773382, "loss": 0.3668, "step": 22178 }, { "epoch": 0.03932697328585018, "grad_norm": 0.318359375, "learning_rate": 0.001788182241823405, "loss": 0.2381, "step": 22180 }, { "epoch": 0.03933051945115999, "grad_norm": 0.400390625, "learning_rate": 0.0017881438835597623, "loss": 0.3068, "step": 22182 }, { "epoch": 0.03933406561646981, "grad_norm": 1.03125, "learning_rate": 0.0017881055222865778, "loss": 0.238, "step": 22184 }, { "epoch": 0.03933761178177963, "grad_norm": 0.7265625, "learning_rate": 0.0017880671580040198, "loss": 0.1642, "step": 22186 }, { "epoch": 0.03934115794708944, "grad_norm": 1.203125, "learning_rate": 0.0017880287907122555, "loss": 0.4286, "step": 22188 }, { "epoch": 0.03934470411239926, "grad_norm": 1.15625, "learning_rate": 0.001787990420411453, "loss": 0.2153, "step": 22190 }, { "epoch": 0.03934825027770907, "grad_norm": 1.4921875, "learning_rate": 0.0017879520471017804, "loss": 0.4067, "step": 22192 }, { "epoch": 0.03935179644301889, "grad_norm": 0.28125, "learning_rate": 0.0017879136707834044, "loss": 0.1787, "step": 22194 }, { "epoch": 0.0393553426083287, "grad_norm": 0.75, "learning_rate": 0.001787875291456494, "loss": 0.2395, "step": 22196 }, { "epoch": 0.039358888773638516, "grad_norm": 0.494140625, "learning_rate": 0.0017878369091212168, "loss": 0.1984, "step": 22198 }, { "epoch": 0.03936243493894833, "grad_norm": 0.39453125, "learning_rate": 0.00178779852377774, "loss": 0.1949, "step": 22200 }, { "epoch": 0.039365981104258145, "grad_norm": 0.30078125, "learning_rate": 0.0017877601354262325, "loss": 0.1921, "step": 22202 }, { "epoch": 0.03936952726956796, "grad_norm": 0.66796875, "learning_rate": 0.0017877217440668617, "loss": 0.2013, "step": 22204 }, { "epoch": 0.039373073434877774, "grad_norm": 0.46875, "learning_rate": 0.0017876833496997951, "loss": 0.2293, "step": 22206 }, { "epoch": 0.03937661960018759, "grad_norm": 0.5390625, "learning_rate": 0.001787644952325201, "loss": 0.2591, "step": 22208 }, { "epoch": 0.03938016576549741, "grad_norm": 0.326171875, "learning_rate": 0.0017876065519432474, "loss": 0.1907, "step": 22210 }, { "epoch": 0.039383711930807225, "grad_norm": 0.8515625, "learning_rate": 0.0017875681485541022, "loss": 0.2286, "step": 22212 }, { "epoch": 0.03938725809611704, "grad_norm": 0.31640625, "learning_rate": 0.0017875297421579334, "loss": 0.236, "step": 22214 }, { "epoch": 0.039390804261426854, "grad_norm": 0.498046875, "learning_rate": 0.0017874913327549087, "loss": 0.18, "step": 22216 }, { "epoch": 0.03939435042673667, "grad_norm": 0.22265625, "learning_rate": 0.0017874529203451965, "loss": 0.3153, "step": 22218 }, { "epoch": 0.03939789659204648, "grad_norm": 0.80078125, "learning_rate": 0.001787414504928964, "loss": 0.2818, "step": 22220 }, { "epoch": 0.0394014427573563, "grad_norm": 0.2041015625, "learning_rate": 0.0017873760865063802, "loss": 0.1262, "step": 22222 }, { "epoch": 0.03940498892266611, "grad_norm": 0.890625, "learning_rate": 0.0017873376650776127, "loss": 0.2494, "step": 22224 }, { "epoch": 0.03940853508797593, "grad_norm": 0.484375, "learning_rate": 0.0017872992406428292, "loss": 0.2258, "step": 22226 }, { "epoch": 0.03941208125328574, "grad_norm": 0.27734375, "learning_rate": 0.0017872608132021984, "loss": 0.1758, "step": 22228 }, { "epoch": 0.039415627418595556, "grad_norm": 0.55859375, "learning_rate": 0.001787222382755888, "loss": 0.2236, "step": 22230 }, { "epoch": 0.03941917358390538, "grad_norm": 0.388671875, "learning_rate": 0.0017871839493040656, "loss": 0.2376, "step": 22232 }, { "epoch": 0.03942271974921519, "grad_norm": 0.5625, "learning_rate": 0.0017871455128469003, "loss": 0.2132, "step": 22234 }, { "epoch": 0.039426265914525006, "grad_norm": 0.625, "learning_rate": 0.0017871070733845593, "loss": 0.2119, "step": 22236 }, { "epoch": 0.03942981207983482, "grad_norm": 1.7578125, "learning_rate": 0.0017870686309172114, "loss": 0.2734, "step": 22238 }, { "epoch": 0.039433358245144635, "grad_norm": 0.478515625, "learning_rate": 0.0017870301854450241, "loss": 0.1884, "step": 22240 }, { "epoch": 0.03943690441045445, "grad_norm": 0.265625, "learning_rate": 0.0017869917369681663, "loss": 0.1574, "step": 22242 }, { "epoch": 0.039440450575764265, "grad_norm": 0.240234375, "learning_rate": 0.0017869532854868054, "loss": 0.3469, "step": 22244 }, { "epoch": 0.03944399674107408, "grad_norm": 0.2412109375, "learning_rate": 0.0017869148310011096, "loss": 0.2359, "step": 22246 }, { "epoch": 0.039447542906383894, "grad_norm": 0.447265625, "learning_rate": 0.001786876373511248, "loss": 0.1972, "step": 22248 }, { "epoch": 0.03945108907169371, "grad_norm": 0.95703125, "learning_rate": 0.0017868379130173875, "loss": 0.306, "step": 22250 }, { "epoch": 0.03945463523700352, "grad_norm": 0.466796875, "learning_rate": 0.0017867994495196972, "loss": 0.2169, "step": 22252 }, { "epoch": 0.039458181402313344, "grad_norm": 0.69140625, "learning_rate": 0.0017867609830183452, "loss": 0.272, "step": 22254 }, { "epoch": 0.03946172756762316, "grad_norm": 0.44921875, "learning_rate": 0.001786722513513499, "loss": 0.2198, "step": 22256 }, { "epoch": 0.03946527373293297, "grad_norm": 0.90234375, "learning_rate": 0.001786684041005328, "loss": 0.2116, "step": 22258 }, { "epoch": 0.03946881989824279, "grad_norm": 0.27734375, "learning_rate": 0.0017866455654939996, "loss": 0.1665, "step": 22260 }, { "epoch": 0.0394723660635526, "grad_norm": 0.439453125, "learning_rate": 0.0017866070869796825, "loss": 0.2036, "step": 22262 }, { "epoch": 0.03947591222886242, "grad_norm": 0.412109375, "learning_rate": 0.0017865686054625448, "loss": 0.1828, "step": 22264 }, { "epoch": 0.03947945839417223, "grad_norm": 0.49609375, "learning_rate": 0.0017865301209427547, "loss": 0.1723, "step": 22266 }, { "epoch": 0.039483004559482046, "grad_norm": 0.29296875, "learning_rate": 0.0017864916334204806, "loss": 0.1818, "step": 22268 }, { "epoch": 0.03948655072479186, "grad_norm": 0.474609375, "learning_rate": 0.001786453142895891, "loss": 0.2001, "step": 22270 }, { "epoch": 0.039490096890101675, "grad_norm": 1.015625, "learning_rate": 0.0017864146493691542, "loss": 0.2373, "step": 22272 }, { "epoch": 0.03949364305541149, "grad_norm": 2.625, "learning_rate": 0.0017863761528404383, "loss": 0.223, "step": 22274 }, { "epoch": 0.039497189220721304, "grad_norm": 0.34765625, "learning_rate": 0.0017863376533099118, "loss": 0.1885, "step": 22276 }, { "epoch": 0.039500735386031126, "grad_norm": 0.251953125, "learning_rate": 0.0017862991507777432, "loss": 0.1824, "step": 22278 }, { "epoch": 0.03950428155134094, "grad_norm": 0.416015625, "learning_rate": 0.0017862606452441006, "loss": 0.229, "step": 22280 }, { "epoch": 0.039507827716650755, "grad_norm": 0.376953125, "learning_rate": 0.0017862221367091527, "loss": 0.2069, "step": 22282 }, { "epoch": 0.03951137388196057, "grad_norm": 0.27734375, "learning_rate": 0.0017861836251730674, "loss": 0.2617, "step": 22284 }, { "epoch": 0.039514920047270384, "grad_norm": 0.330078125, "learning_rate": 0.001786145110636014, "loss": 0.1854, "step": 22286 }, { "epoch": 0.0395184662125802, "grad_norm": 0.765625, "learning_rate": 0.0017861065930981602, "loss": 0.2667, "step": 22288 }, { "epoch": 0.03952201237789001, "grad_norm": 0.353515625, "learning_rate": 0.0017860680725596749, "loss": 0.2359, "step": 22290 }, { "epoch": 0.03952555854319983, "grad_norm": 0.51953125, "learning_rate": 0.0017860295490207262, "loss": 0.2201, "step": 22292 }, { "epoch": 0.03952910470850964, "grad_norm": 0.53515625, "learning_rate": 0.0017859910224814828, "loss": 0.1633, "step": 22294 }, { "epoch": 0.03953265087381946, "grad_norm": 0.21875, "learning_rate": 0.0017859524929421128, "loss": 0.1826, "step": 22296 }, { "epoch": 0.03953619703912927, "grad_norm": 0.337890625, "learning_rate": 0.0017859139604027856, "loss": 0.2732, "step": 22298 }, { "epoch": 0.03953974320443909, "grad_norm": 0.28125, "learning_rate": 0.001785875424863669, "loss": 0.2554, "step": 22300 }, { "epoch": 0.03954328936974891, "grad_norm": 0.5703125, "learning_rate": 0.0017858368863249315, "loss": 0.1859, "step": 22302 }, { "epoch": 0.03954683553505872, "grad_norm": 0.66796875, "learning_rate": 0.001785798344786742, "loss": 0.2124, "step": 22304 }, { "epoch": 0.039550381700368537, "grad_norm": 0.515625, "learning_rate": 0.0017857598002492686, "loss": 0.235, "step": 22306 }, { "epoch": 0.03955392786567835, "grad_norm": 1.9375, "learning_rate": 0.0017857212527126807, "loss": 0.3964, "step": 22308 }, { "epoch": 0.039557474030988166, "grad_norm": 1.0546875, "learning_rate": 0.0017856827021771459, "loss": 0.1741, "step": 22310 }, { "epoch": 0.03956102019629798, "grad_norm": 1.6015625, "learning_rate": 0.0017856441486428334, "loss": 0.4074, "step": 22312 }, { "epoch": 0.039564566361607795, "grad_norm": 0.515625, "learning_rate": 0.0017856055921099119, "loss": 0.2311, "step": 22314 }, { "epoch": 0.03956811252691761, "grad_norm": 0.427734375, "learning_rate": 0.0017855670325785495, "loss": 0.1507, "step": 22316 }, { "epoch": 0.039571658692227424, "grad_norm": 0.703125, "learning_rate": 0.0017855284700489151, "loss": 0.2447, "step": 22318 }, { "epoch": 0.03957520485753724, "grad_norm": 0.2890625, "learning_rate": 0.0017854899045211777, "loss": 0.1981, "step": 22320 }, { "epoch": 0.03957875102284706, "grad_norm": 0.6484375, "learning_rate": 0.001785451335995505, "loss": 0.2905, "step": 22322 }, { "epoch": 0.039582297188156874, "grad_norm": 0.2333984375, "learning_rate": 0.0017854127644720671, "loss": 0.1745, "step": 22324 }, { "epoch": 0.03958584335346669, "grad_norm": 0.62109375, "learning_rate": 0.0017853741899510314, "loss": 0.2087, "step": 22326 }, { "epoch": 0.039589389518776504, "grad_norm": 0.671875, "learning_rate": 0.0017853356124325672, "loss": 0.1757, "step": 22328 }, { "epoch": 0.03959293568408632, "grad_norm": 1.0546875, "learning_rate": 0.001785297031916843, "loss": 0.226, "step": 22330 }, { "epoch": 0.03959648184939613, "grad_norm": 0.353515625, "learning_rate": 0.001785258448404028, "loss": 0.2233, "step": 22332 }, { "epoch": 0.03960002801470595, "grad_norm": 0.55078125, "learning_rate": 0.0017852198618942905, "loss": 0.2188, "step": 22334 }, { "epoch": 0.03960357418001576, "grad_norm": 1.8828125, "learning_rate": 0.0017851812723877992, "loss": 0.3799, "step": 22336 }, { "epoch": 0.039607120345325576, "grad_norm": 0.44921875, "learning_rate": 0.001785142679884723, "loss": 0.1979, "step": 22338 }, { "epoch": 0.03961066651063539, "grad_norm": 2.359375, "learning_rate": 0.0017851040843852306, "loss": 0.2276, "step": 22340 }, { "epoch": 0.039614212675945205, "grad_norm": 0.302734375, "learning_rate": 0.0017850654858894911, "loss": 0.1964, "step": 22342 }, { "epoch": 0.03961775884125502, "grad_norm": 0.6953125, "learning_rate": 0.0017850268843976733, "loss": 0.2449, "step": 22344 }, { "epoch": 0.03962130500656484, "grad_norm": 0.80078125, "learning_rate": 0.0017849882799099454, "loss": 0.2109, "step": 22346 }, { "epoch": 0.039624851171874656, "grad_norm": 0.4921875, "learning_rate": 0.0017849496724264768, "loss": 0.2298, "step": 22348 }, { "epoch": 0.03962839733718447, "grad_norm": 0.263671875, "learning_rate": 0.0017849110619474362, "loss": 0.2042, "step": 22350 }, { "epoch": 0.039631943502494285, "grad_norm": 0.337890625, "learning_rate": 0.0017848724484729927, "loss": 0.1722, "step": 22352 }, { "epoch": 0.0396354896678041, "grad_norm": 0.6171875, "learning_rate": 0.0017848338320033148, "loss": 0.3229, "step": 22354 }, { "epoch": 0.039639035833113914, "grad_norm": 0.7109375, "learning_rate": 0.0017847952125385712, "loss": 0.4101, "step": 22356 }, { "epoch": 0.03964258199842373, "grad_norm": 0.50390625, "learning_rate": 0.0017847565900789312, "loss": 0.1905, "step": 22358 }, { "epoch": 0.03964612816373354, "grad_norm": 0.25390625, "learning_rate": 0.001784717964624564, "loss": 0.2077, "step": 22360 }, { "epoch": 0.03964967432904336, "grad_norm": 1.953125, "learning_rate": 0.0017846793361756378, "loss": 0.3593, "step": 22362 }, { "epoch": 0.03965322049435317, "grad_norm": 3.25, "learning_rate": 0.001784640704732322, "loss": 0.3158, "step": 22364 }, { "epoch": 0.03965676665966299, "grad_norm": 0.9609375, "learning_rate": 0.0017846020702947856, "loss": 0.2534, "step": 22366 }, { "epoch": 0.03966031282497281, "grad_norm": 0.4765625, "learning_rate": 0.0017845634328631972, "loss": 0.1518, "step": 22368 }, { "epoch": 0.03966385899028262, "grad_norm": 1.71875, "learning_rate": 0.0017845247924377261, "loss": 0.2004, "step": 22370 }, { "epoch": 0.03966740515559244, "grad_norm": 0.84765625, "learning_rate": 0.001784486149018541, "loss": 0.2589, "step": 22372 }, { "epoch": 0.03967095132090225, "grad_norm": 0.67578125, "learning_rate": 0.0017844475026058113, "loss": 0.1847, "step": 22374 }, { "epoch": 0.03967449748621207, "grad_norm": 0.3046875, "learning_rate": 0.001784408853199706, "loss": 0.1835, "step": 22376 }, { "epoch": 0.03967804365152188, "grad_norm": 0.33984375, "learning_rate": 0.0017843702008003934, "loss": 0.1979, "step": 22378 }, { "epoch": 0.039681589816831696, "grad_norm": 0.84765625, "learning_rate": 0.0017843315454080433, "loss": 0.4589, "step": 22380 }, { "epoch": 0.03968513598214151, "grad_norm": 0.333984375, "learning_rate": 0.0017842928870228246, "loss": 0.1963, "step": 22382 }, { "epoch": 0.039688682147451325, "grad_norm": 0.2890625, "learning_rate": 0.0017842542256449063, "loss": 0.3256, "step": 22384 }, { "epoch": 0.03969222831276114, "grad_norm": 0.75390625, "learning_rate": 0.0017842155612744573, "loss": 0.2031, "step": 22386 }, { "epoch": 0.039695774478070954, "grad_norm": 0.62109375, "learning_rate": 0.001784176893911647, "loss": 0.1842, "step": 22388 }, { "epoch": 0.039699320643380775, "grad_norm": 0.8515625, "learning_rate": 0.0017841382235566443, "loss": 0.1823, "step": 22390 }, { "epoch": 0.03970286680869059, "grad_norm": 0.419921875, "learning_rate": 0.0017840995502096185, "loss": 0.19, "step": 22392 }, { "epoch": 0.039706412974000405, "grad_norm": 0.80078125, "learning_rate": 0.0017840608738707386, "loss": 0.2092, "step": 22394 }, { "epoch": 0.03970995913931022, "grad_norm": 0.55078125, "learning_rate": 0.001784022194540174, "loss": 0.2094, "step": 22396 }, { "epoch": 0.039713505304620034, "grad_norm": 1.0859375, "learning_rate": 0.001783983512218093, "loss": 0.3682, "step": 22398 }, { "epoch": 0.03971705146992985, "grad_norm": 1.1640625, "learning_rate": 0.0017839448269046662, "loss": 0.2658, "step": 22400 }, { "epoch": 0.03972059763523966, "grad_norm": 1.25, "learning_rate": 0.0017839061386000616, "loss": 0.2217, "step": 22402 }, { "epoch": 0.03972414380054948, "grad_norm": 1.296875, "learning_rate": 0.0017838674473044489, "loss": 0.185, "step": 22404 }, { "epoch": 0.03972768996585929, "grad_norm": 0.546875, "learning_rate": 0.0017838287530179972, "loss": 0.1948, "step": 22406 }, { "epoch": 0.039731236131169106, "grad_norm": 1.671875, "learning_rate": 0.0017837900557408758, "loss": 0.2185, "step": 22408 }, { "epoch": 0.03973478229647892, "grad_norm": 0.400390625, "learning_rate": 0.0017837513554732538, "loss": 0.1688, "step": 22410 }, { "epoch": 0.039738328461788736, "grad_norm": 0.640625, "learning_rate": 0.0017837126522153007, "loss": 0.2303, "step": 22412 }, { "epoch": 0.03974187462709856, "grad_norm": 0.4140625, "learning_rate": 0.0017836739459671856, "loss": 0.209, "step": 22414 }, { "epoch": 0.03974542079240837, "grad_norm": 1.6015625, "learning_rate": 0.0017836352367290775, "loss": 0.3187, "step": 22416 }, { "epoch": 0.039748966957718186, "grad_norm": 0.466796875, "learning_rate": 0.0017835965245011462, "loss": 0.2451, "step": 22418 }, { "epoch": 0.039752513123028, "grad_norm": 0.78515625, "learning_rate": 0.0017835578092835608, "loss": 0.2171, "step": 22420 }, { "epoch": 0.039756059288337815, "grad_norm": 0.400390625, "learning_rate": 0.0017835190910764907, "loss": 0.1928, "step": 22422 }, { "epoch": 0.03975960545364763, "grad_norm": 0.7578125, "learning_rate": 0.0017834803698801049, "loss": 0.2319, "step": 22424 }, { "epoch": 0.039763151618957444, "grad_norm": 0.57421875, "learning_rate": 0.001783441645694573, "loss": 0.1809, "step": 22426 }, { "epoch": 0.03976669778426726, "grad_norm": 0.369140625, "learning_rate": 0.0017834029185200644, "loss": 0.1798, "step": 22428 }, { "epoch": 0.039770243949577073, "grad_norm": 0.224609375, "learning_rate": 0.0017833641883567481, "loss": 0.2321, "step": 22430 }, { "epoch": 0.03977379011488689, "grad_norm": 0.494140625, "learning_rate": 0.0017833254552047943, "loss": 0.2095, "step": 22432 }, { "epoch": 0.0397773362801967, "grad_norm": 0.4296875, "learning_rate": 0.0017832867190643717, "loss": 0.2292, "step": 22434 }, { "epoch": 0.039780882445506524, "grad_norm": 0.578125, "learning_rate": 0.00178324797993565, "loss": 0.3047, "step": 22436 }, { "epoch": 0.03978442861081634, "grad_norm": 0.33203125, "learning_rate": 0.001783209237818798, "loss": 0.2169, "step": 22438 }, { "epoch": 0.03978797477612615, "grad_norm": 0.703125, "learning_rate": 0.0017831704927139863, "loss": 0.2251, "step": 22440 }, { "epoch": 0.03979152094143597, "grad_norm": 0.2890625, "learning_rate": 0.0017831317446213833, "loss": 0.2115, "step": 22442 }, { "epoch": 0.03979506710674578, "grad_norm": 0.58984375, "learning_rate": 0.001783092993541159, "loss": 0.1597, "step": 22444 }, { "epoch": 0.0397986132720556, "grad_norm": 0.361328125, "learning_rate": 0.0017830542394734828, "loss": 0.4002, "step": 22446 }, { "epoch": 0.03980215943736541, "grad_norm": 0.64453125, "learning_rate": 0.001783015482418524, "loss": 0.2015, "step": 22448 }, { "epoch": 0.039805705602675226, "grad_norm": 0.392578125, "learning_rate": 0.0017829767223764522, "loss": 0.2061, "step": 22450 }, { "epoch": 0.03980925176798504, "grad_norm": 0.859375, "learning_rate": 0.0017829379593474368, "loss": 0.263, "step": 22452 }, { "epoch": 0.039812797933294855, "grad_norm": 0.318359375, "learning_rate": 0.0017828991933316477, "loss": 0.2022, "step": 22454 }, { "epoch": 0.03981634409860467, "grad_norm": 0.3671875, "learning_rate": 0.0017828604243292543, "loss": 0.3088, "step": 22456 }, { "epoch": 0.03981989026391449, "grad_norm": 0.515625, "learning_rate": 0.0017828216523404258, "loss": 0.1977, "step": 22458 }, { "epoch": 0.039823436429224306, "grad_norm": 0.498046875, "learning_rate": 0.0017827828773653322, "loss": 0.2256, "step": 22460 }, { "epoch": 0.03982698259453412, "grad_norm": 0.66796875, "learning_rate": 0.0017827440994041429, "loss": 0.2404, "step": 22462 }, { "epoch": 0.039830528759843935, "grad_norm": 1.765625, "learning_rate": 0.0017827053184570271, "loss": 0.2647, "step": 22464 }, { "epoch": 0.03983407492515375, "grad_norm": 1.2578125, "learning_rate": 0.0017826665345241554, "loss": 0.222, "step": 22466 }, { "epoch": 0.039837621090463564, "grad_norm": 0.37890625, "learning_rate": 0.0017826277476056967, "loss": 0.1669, "step": 22468 }, { "epoch": 0.03984116725577338, "grad_norm": 0.2890625, "learning_rate": 0.0017825889577018203, "loss": 0.2117, "step": 22470 }, { "epoch": 0.03984471342108319, "grad_norm": 0.875, "learning_rate": 0.0017825501648126966, "loss": 0.3853, "step": 22472 }, { "epoch": 0.03984825958639301, "grad_norm": 0.8515625, "learning_rate": 0.001782511368938495, "loss": 0.2538, "step": 22474 }, { "epoch": 0.03985180575170282, "grad_norm": 0.515625, "learning_rate": 0.0017824725700793852, "loss": 0.192, "step": 22476 }, { "epoch": 0.03985535191701264, "grad_norm": 0.373046875, "learning_rate": 0.0017824337682355363, "loss": 0.2097, "step": 22478 }, { "epoch": 0.03985889808232245, "grad_norm": 1.1640625, "learning_rate": 0.0017823949634071191, "loss": 0.2595, "step": 22480 }, { "epoch": 0.03986244424763227, "grad_norm": 0.54296875, "learning_rate": 0.0017823561555943023, "loss": 0.1397, "step": 22482 }, { "epoch": 0.03986599041294209, "grad_norm": 0.1630859375, "learning_rate": 0.0017823173447972563, "loss": 0.2044, "step": 22484 }, { "epoch": 0.0398695365782519, "grad_norm": 0.427734375, "learning_rate": 0.0017822785310161505, "loss": 0.1797, "step": 22486 }, { "epoch": 0.039873082743561716, "grad_norm": 0.53125, "learning_rate": 0.0017822397142511547, "loss": 0.2188, "step": 22488 }, { "epoch": 0.03987662890887153, "grad_norm": 0.283203125, "learning_rate": 0.0017822008945024388, "loss": 0.207, "step": 22490 }, { "epoch": 0.039880175074181345, "grad_norm": 1.65625, "learning_rate": 0.0017821620717701724, "loss": 0.2045, "step": 22492 }, { "epoch": 0.03988372123949116, "grad_norm": 0.466796875, "learning_rate": 0.001782123246054525, "loss": 0.2525, "step": 22494 }, { "epoch": 0.039887267404800975, "grad_norm": 0.365234375, "learning_rate": 0.0017820844173556674, "loss": 0.1855, "step": 22496 }, { "epoch": 0.03989081357011079, "grad_norm": 0.3046875, "learning_rate": 0.001782045585673768, "loss": 0.2436, "step": 22498 }, { "epoch": 0.039894359735420604, "grad_norm": 0.42578125, "learning_rate": 0.0017820067510089979, "loss": 0.2402, "step": 22500 }, { "epoch": 0.03989790590073042, "grad_norm": 2.046875, "learning_rate": 0.0017819679133615266, "loss": 0.3063, "step": 22502 }, { "epoch": 0.03990145206604024, "grad_norm": 0.484375, "learning_rate": 0.001781929072731523, "loss": 0.2296, "step": 22504 }, { "epoch": 0.039904998231350054, "grad_norm": 0.59375, "learning_rate": 0.0017818902291191585, "loss": 0.1958, "step": 22506 }, { "epoch": 0.03990854439665987, "grad_norm": 13.75, "learning_rate": 0.001781851382524602, "loss": 0.3618, "step": 22508 }, { "epoch": 0.03991209056196968, "grad_norm": 0.283203125, "learning_rate": 0.0017818125329480236, "loss": 0.2303, "step": 22510 }, { "epoch": 0.0399156367272795, "grad_norm": 0.283203125, "learning_rate": 0.0017817736803895931, "loss": 0.1836, "step": 22512 }, { "epoch": 0.03991918289258931, "grad_norm": 0.439453125, "learning_rate": 0.0017817348248494807, "loss": 0.2261, "step": 22514 }, { "epoch": 0.03992272905789913, "grad_norm": 0.85546875, "learning_rate": 0.001781695966327856, "loss": 0.1709, "step": 22516 }, { "epoch": 0.03992627522320894, "grad_norm": 0.30078125, "learning_rate": 0.0017816571048248893, "loss": 0.1793, "step": 22518 }, { "epoch": 0.039929821388518756, "grad_norm": 0.310546875, "learning_rate": 0.0017816182403407503, "loss": 0.1631, "step": 22520 }, { "epoch": 0.03993336755382857, "grad_norm": 0.55078125, "learning_rate": 0.001781579372875609, "loss": 0.1742, "step": 22522 }, { "epoch": 0.039936913719138385, "grad_norm": 0.80078125, "learning_rate": 0.0017815405024296355, "loss": 0.2342, "step": 22524 }, { "epoch": 0.03994045988444821, "grad_norm": 0.466796875, "learning_rate": 0.0017815016290029999, "loss": 0.2644, "step": 22526 }, { "epoch": 0.03994400604975802, "grad_norm": 0.37109375, "learning_rate": 0.001781462752595872, "loss": 0.1502, "step": 22528 }, { "epoch": 0.039947552215067836, "grad_norm": 1.1953125, "learning_rate": 0.0017814238732084219, "loss": 0.2626, "step": 22530 }, { "epoch": 0.03995109838037765, "grad_norm": 0.38671875, "learning_rate": 0.0017813849908408193, "loss": 0.2062, "step": 22532 }, { "epoch": 0.039954644545687465, "grad_norm": 0.8828125, "learning_rate": 0.0017813461054932344, "loss": 0.4728, "step": 22534 }, { "epoch": 0.03995819071099728, "grad_norm": 0.37890625, "learning_rate": 0.001781307217165838, "loss": 0.1716, "step": 22536 }, { "epoch": 0.039961736876307094, "grad_norm": 0.369140625, "learning_rate": 0.001781268325858799, "loss": 0.2533, "step": 22538 }, { "epoch": 0.03996528304161691, "grad_norm": 0.62109375, "learning_rate": 0.0017812294315722884, "loss": 0.4162, "step": 22540 }, { "epoch": 0.03996882920692672, "grad_norm": 1.09375, "learning_rate": 0.0017811905343064758, "loss": 0.2461, "step": 22542 }, { "epoch": 0.03997237537223654, "grad_norm": 0.91796875, "learning_rate": 0.0017811516340615317, "loss": 0.1988, "step": 22544 }, { "epoch": 0.03997592153754635, "grad_norm": 0.61328125, "learning_rate": 0.0017811127308376257, "loss": 0.228, "step": 22546 }, { "epoch": 0.03997946770285617, "grad_norm": 0.3046875, "learning_rate": 0.0017810738246349285, "loss": 0.1766, "step": 22548 }, { "epoch": 0.03998301386816599, "grad_norm": 1.1640625, "learning_rate": 0.00178103491545361, "loss": 0.3774, "step": 22550 }, { "epoch": 0.0399865600334758, "grad_norm": 1.0625, "learning_rate": 0.0017809960032938406, "loss": 0.3016, "step": 22552 }, { "epoch": 0.03999010619878562, "grad_norm": 0.287109375, "learning_rate": 0.0017809570881557896, "loss": 0.1963, "step": 22554 }, { "epoch": 0.03999365236409543, "grad_norm": 4.0625, "learning_rate": 0.0017809181700396285, "loss": 0.2796, "step": 22556 }, { "epoch": 0.039997198529405247, "grad_norm": 0.80078125, "learning_rate": 0.0017808792489455263, "loss": 0.2096, "step": 22558 }, { "epoch": 0.04000074469471506, "grad_norm": 1.078125, "learning_rate": 0.0017808403248736541, "loss": 0.1856, "step": 22560 }, { "epoch": 0.040004290860024876, "grad_norm": 0.23828125, "learning_rate": 0.0017808013978241818, "loss": 0.1762, "step": 22562 }, { "epoch": 0.04000783702533469, "grad_norm": 1.1171875, "learning_rate": 0.0017807624677972796, "loss": 0.2175, "step": 22564 }, { "epoch": 0.040011383190644505, "grad_norm": 0.365234375, "learning_rate": 0.0017807235347931177, "loss": 0.1747, "step": 22566 }, { "epoch": 0.04001492935595432, "grad_norm": 0.67578125, "learning_rate": 0.0017806845988118665, "loss": 0.1759, "step": 22568 }, { "epoch": 0.040018475521264134, "grad_norm": 1.140625, "learning_rate": 0.0017806456598536964, "loss": 0.3186, "step": 22570 }, { "epoch": 0.040022021686573955, "grad_norm": 0.357421875, "learning_rate": 0.0017806067179187777, "loss": 0.1907, "step": 22572 }, { "epoch": 0.04002556785188377, "grad_norm": 0.66015625, "learning_rate": 0.00178056777300728, "loss": 0.2771, "step": 22574 }, { "epoch": 0.040029114017193584, "grad_norm": 0.208984375, "learning_rate": 0.0017805288251193747, "loss": 0.1618, "step": 22576 }, { "epoch": 0.0400326601825034, "grad_norm": 1.671875, "learning_rate": 0.0017804898742552313, "loss": 0.2619, "step": 22578 }, { "epoch": 0.040036206347813214, "grad_norm": 0.421875, "learning_rate": 0.0017804509204150205, "loss": 0.2328, "step": 22580 }, { "epoch": 0.04003975251312303, "grad_norm": 0.45703125, "learning_rate": 0.0017804119635989127, "loss": 0.1591, "step": 22582 }, { "epoch": 0.04004329867843284, "grad_norm": 1.5703125, "learning_rate": 0.0017803730038070782, "loss": 0.2499, "step": 22584 }, { "epoch": 0.04004684484374266, "grad_norm": 0.83203125, "learning_rate": 0.0017803340410396874, "loss": 0.2408, "step": 22586 }, { "epoch": 0.04005039100905247, "grad_norm": 0.5859375, "learning_rate": 0.001780295075296911, "loss": 0.1496, "step": 22588 }, { "epoch": 0.040053937174362286, "grad_norm": 0.439453125, "learning_rate": 0.0017802561065789187, "loss": 0.2447, "step": 22590 }, { "epoch": 0.0400574833396721, "grad_norm": 0.173828125, "learning_rate": 0.0017802171348858813, "loss": 0.1626, "step": 22592 }, { "epoch": 0.04006102950498192, "grad_norm": 0.5390625, "learning_rate": 0.0017801781602179694, "loss": 0.2996, "step": 22594 }, { "epoch": 0.04006457567029174, "grad_norm": 0.3515625, "learning_rate": 0.0017801391825753535, "loss": 0.2186, "step": 22596 }, { "epoch": 0.04006812183560155, "grad_norm": 0.19140625, "learning_rate": 0.0017801002019582036, "loss": 0.1809, "step": 22598 }, { "epoch": 0.040071668000911366, "grad_norm": 0.84765625, "learning_rate": 0.0017800612183666905, "loss": 0.3143, "step": 22600 }, { "epoch": 0.04007521416622118, "grad_norm": 0.55859375, "learning_rate": 0.0017800222318009847, "loss": 0.2331, "step": 22602 }, { "epoch": 0.040078760331530995, "grad_norm": 0.65234375, "learning_rate": 0.0017799832422612566, "loss": 0.2865, "step": 22604 }, { "epoch": 0.04008230649684081, "grad_norm": 0.3515625, "learning_rate": 0.001779944249747677, "loss": 0.4221, "step": 22606 }, { "epoch": 0.040085852662150624, "grad_norm": 1.21875, "learning_rate": 0.0017799052542604161, "loss": 0.2567, "step": 22608 }, { "epoch": 0.04008939882746044, "grad_norm": 0.33203125, "learning_rate": 0.0017798662557996444, "loss": 0.1969, "step": 22610 }, { "epoch": 0.04009294499277025, "grad_norm": 2.25, "learning_rate": 0.0017798272543655326, "loss": 0.2831, "step": 22612 }, { "epoch": 0.04009649115808007, "grad_norm": 0.52734375, "learning_rate": 0.0017797882499582516, "loss": 0.141, "step": 22614 }, { "epoch": 0.04010003732338988, "grad_norm": 0.376953125, "learning_rate": 0.0017797492425779716, "loss": 0.1769, "step": 22616 }, { "epoch": 0.040103583488699704, "grad_norm": 0.44921875, "learning_rate": 0.0017797102322248631, "loss": 0.2547, "step": 22618 }, { "epoch": 0.04010712965400952, "grad_norm": 0.3515625, "learning_rate": 0.0017796712188990966, "loss": 0.2034, "step": 22620 }, { "epoch": 0.04011067581931933, "grad_norm": 1.8125, "learning_rate": 0.0017796322026008436, "loss": 0.361, "step": 22622 }, { "epoch": 0.04011422198462915, "grad_norm": 0.486328125, "learning_rate": 0.0017795931833302736, "loss": 0.1582, "step": 22624 }, { "epoch": 0.04011776814993896, "grad_norm": 0.453125, "learning_rate": 0.001779554161087558, "loss": 0.1978, "step": 22626 }, { "epoch": 0.04012131431524878, "grad_norm": 0.5234375, "learning_rate": 0.0017795151358728674, "loss": 0.243, "step": 22628 }, { "epoch": 0.04012486048055859, "grad_norm": 0.9453125, "learning_rate": 0.0017794761076863719, "loss": 0.2052, "step": 22630 }, { "epoch": 0.040128406645868406, "grad_norm": 2.21875, "learning_rate": 0.0017794370765282428, "loss": 0.2412, "step": 22632 }, { "epoch": 0.04013195281117822, "grad_norm": 0.54296875, "learning_rate": 0.0017793980423986507, "loss": 0.224, "step": 22634 }, { "epoch": 0.040135498976488035, "grad_norm": 0.361328125, "learning_rate": 0.001779359005297766, "loss": 0.1344, "step": 22636 }, { "epoch": 0.04013904514179785, "grad_norm": 0.6875, "learning_rate": 0.00177931996522576, "loss": 0.2234, "step": 22638 }, { "epoch": 0.04014259130710767, "grad_norm": 0.34375, "learning_rate": 0.0017792809221828026, "loss": 0.1831, "step": 22640 }, { "epoch": 0.040146137472417485, "grad_norm": 0.765625, "learning_rate": 0.0017792418761690655, "loss": 0.2317, "step": 22642 }, { "epoch": 0.0401496836377273, "grad_norm": 0.8671875, "learning_rate": 0.0017792028271847184, "loss": 0.1726, "step": 22644 }, { "epoch": 0.040153229803037115, "grad_norm": 0.96484375, "learning_rate": 0.0017791637752299333, "loss": 0.2046, "step": 22646 }, { "epoch": 0.04015677596834693, "grad_norm": 0.453125, "learning_rate": 0.0017791247203048797, "loss": 0.3859, "step": 22648 }, { "epoch": 0.040160322133656744, "grad_norm": 0.34765625, "learning_rate": 0.0017790856624097296, "loss": 0.1831, "step": 22650 }, { "epoch": 0.04016386829896656, "grad_norm": 1.046875, "learning_rate": 0.0017790466015446531, "loss": 0.2486, "step": 22652 }, { "epoch": 0.04016741446427637, "grad_norm": 0.45703125, "learning_rate": 0.0017790075377098209, "loss": 0.1742, "step": 22654 }, { "epoch": 0.04017096062958619, "grad_norm": 1.1875, "learning_rate": 0.0017789684709054046, "loss": 0.2696, "step": 22656 }, { "epoch": 0.040174506794896, "grad_norm": 0.42578125, "learning_rate": 0.0017789294011315744, "loss": 0.1789, "step": 22658 }, { "epoch": 0.040178052960205816, "grad_norm": 0.58984375, "learning_rate": 0.0017788903283885015, "loss": 0.2149, "step": 22660 }, { "epoch": 0.04018159912551564, "grad_norm": 0.44921875, "learning_rate": 0.0017788512526763563, "loss": 0.2549, "step": 22662 }, { "epoch": 0.04018514529082545, "grad_norm": 0.2021484375, "learning_rate": 0.0017788121739953104, "loss": 0.2346, "step": 22664 }, { "epoch": 0.04018869145613527, "grad_norm": 0.328125, "learning_rate": 0.001778773092345534, "loss": 0.1653, "step": 22666 }, { "epoch": 0.04019223762144508, "grad_norm": 0.34765625, "learning_rate": 0.0017787340077271986, "loss": 0.2001, "step": 22668 }, { "epoch": 0.040195783786754896, "grad_norm": 0.3203125, "learning_rate": 0.001778694920140475, "loss": 0.1679, "step": 22670 }, { "epoch": 0.04019932995206471, "grad_norm": 0.37109375, "learning_rate": 0.001778655829585534, "loss": 0.1692, "step": 22672 }, { "epoch": 0.040202876117374525, "grad_norm": 0.6875, "learning_rate": 0.0017786167360625464, "loss": 0.1622, "step": 22674 }, { "epoch": 0.04020642228268434, "grad_norm": 0.396484375, "learning_rate": 0.0017785776395716835, "loss": 0.1816, "step": 22676 }, { "epoch": 0.040209968447994154, "grad_norm": 0.640625, "learning_rate": 0.0017785385401131162, "loss": 0.1979, "step": 22678 }, { "epoch": 0.04021351461330397, "grad_norm": 0.8046875, "learning_rate": 0.0017784994376870155, "loss": 0.2207, "step": 22680 }, { "epoch": 0.040217060778613783, "grad_norm": 0.310546875, "learning_rate": 0.0017784603322935526, "loss": 0.1433, "step": 22682 }, { "epoch": 0.0402206069439236, "grad_norm": 0.74609375, "learning_rate": 0.001778421223932898, "loss": 0.1815, "step": 22684 }, { "epoch": 0.04022415310923342, "grad_norm": 0.9453125, "learning_rate": 0.001778382112605223, "loss": 0.2913, "step": 22686 }, { "epoch": 0.040227699274543234, "grad_norm": 0.279296875, "learning_rate": 0.001778342998310699, "loss": 0.1979, "step": 22688 }, { "epoch": 0.04023124543985305, "grad_norm": 0.65234375, "learning_rate": 0.0017783038810494963, "loss": 0.3506, "step": 22690 }, { "epoch": 0.04023479160516286, "grad_norm": 0.53125, "learning_rate": 0.0017782647608217865, "loss": 0.1917, "step": 22692 }, { "epoch": 0.04023833777047268, "grad_norm": 0.71875, "learning_rate": 0.001778225637627741, "loss": 0.263, "step": 22694 }, { "epoch": 0.04024188393578249, "grad_norm": 0.70703125, "learning_rate": 0.0017781865114675304, "loss": 0.3052, "step": 22696 }, { "epoch": 0.04024543010109231, "grad_norm": 0.44140625, "learning_rate": 0.0017781473823413256, "loss": 0.1704, "step": 22698 }, { "epoch": 0.04024897626640212, "grad_norm": 0.6015625, "learning_rate": 0.0017781082502492985, "loss": 0.1745, "step": 22700 }, { "epoch": 0.040252522431711936, "grad_norm": 0.5546875, "learning_rate": 0.0017780691151916194, "loss": 0.2955, "step": 22702 }, { "epoch": 0.04025606859702175, "grad_norm": 0.796875, "learning_rate": 0.0017780299771684605, "loss": 0.2641, "step": 22704 }, { "epoch": 0.040259614762331565, "grad_norm": 0.5546875, "learning_rate": 0.0017779908361799917, "loss": 0.2149, "step": 22706 }, { "epoch": 0.04026316092764139, "grad_norm": 0.435546875, "learning_rate": 0.0017779516922263852, "loss": 0.1936, "step": 22708 }, { "epoch": 0.0402667070929512, "grad_norm": 0.267578125, "learning_rate": 0.0017779125453078113, "loss": 0.1879, "step": 22710 }, { "epoch": 0.040270253258261016, "grad_norm": 0.25, "learning_rate": 0.0017778733954244422, "loss": 0.2303, "step": 22712 }, { "epoch": 0.04027379942357083, "grad_norm": 0.23828125, "learning_rate": 0.0017778342425764483, "loss": 0.2025, "step": 22714 }, { "epoch": 0.040277345588880645, "grad_norm": 1.5078125, "learning_rate": 0.0017777950867640016, "loss": 0.2637, "step": 22716 }, { "epoch": 0.04028089175419046, "grad_norm": 1.0234375, "learning_rate": 0.0017777559279872723, "loss": 0.194, "step": 22718 }, { "epoch": 0.040284437919500274, "grad_norm": 0.5390625, "learning_rate": 0.0017777167662464326, "loss": 0.2075, "step": 22720 }, { "epoch": 0.04028798408481009, "grad_norm": 0.6328125, "learning_rate": 0.0017776776015416534, "loss": 0.2202, "step": 22722 }, { "epoch": 0.0402915302501199, "grad_norm": 0.462890625, "learning_rate": 0.001777638433873106, "loss": 0.2157, "step": 22724 }, { "epoch": 0.04029507641542972, "grad_norm": 0.26953125, "learning_rate": 0.0017775992632409617, "loss": 0.1907, "step": 22726 }, { "epoch": 0.04029862258073953, "grad_norm": 0.2177734375, "learning_rate": 0.0017775600896453919, "loss": 0.1939, "step": 22728 }, { "epoch": 0.040302168746049354, "grad_norm": 0.2890625, "learning_rate": 0.0017775209130865676, "loss": 0.2506, "step": 22730 }, { "epoch": 0.04030571491135917, "grad_norm": 0.212890625, "learning_rate": 0.0017774817335646604, "loss": 0.1979, "step": 22732 }, { "epoch": 0.04030926107666898, "grad_norm": 0.341796875, "learning_rate": 0.0017774425510798423, "loss": 0.3306, "step": 22734 }, { "epoch": 0.0403128072419788, "grad_norm": 0.373046875, "learning_rate": 0.0017774033656322832, "loss": 0.178, "step": 22736 }, { "epoch": 0.04031635340728861, "grad_norm": 0.5625, "learning_rate": 0.0017773641772221554, "loss": 0.2687, "step": 22738 }, { "epoch": 0.040319899572598426, "grad_norm": 0.83203125, "learning_rate": 0.0017773249858496303, "loss": 0.2223, "step": 22740 }, { "epoch": 0.04032344573790824, "grad_norm": 0.4375, "learning_rate": 0.001777285791514879, "loss": 0.2178, "step": 22742 }, { "epoch": 0.040326991903218055, "grad_norm": 0.57421875, "learning_rate": 0.0017772465942180735, "loss": 0.2289, "step": 22744 }, { "epoch": 0.04033053806852787, "grad_norm": 0.478515625, "learning_rate": 0.0017772073939593845, "loss": 0.2133, "step": 22746 }, { "epoch": 0.040334084233837685, "grad_norm": 0.51953125, "learning_rate": 0.0017771681907389836, "loss": 0.2125, "step": 22748 }, { "epoch": 0.0403376303991475, "grad_norm": 0.388671875, "learning_rate": 0.0017771289845570424, "loss": 0.2092, "step": 22750 }, { "epoch": 0.040341176564457314, "grad_norm": 1.4609375, "learning_rate": 0.0017770897754137325, "loss": 0.1956, "step": 22752 }, { "epoch": 0.040344722729767135, "grad_norm": 0.3515625, "learning_rate": 0.001777050563309225, "loss": 0.2131, "step": 22754 }, { "epoch": 0.04034826889507695, "grad_norm": 0.466796875, "learning_rate": 0.001777011348243692, "loss": 0.2089, "step": 22756 }, { "epoch": 0.040351815060386764, "grad_norm": 0.62109375, "learning_rate": 0.0017769721302173043, "loss": 0.2507, "step": 22758 }, { "epoch": 0.04035536122569658, "grad_norm": 0.96484375, "learning_rate": 0.0017769329092302338, "loss": 0.2087, "step": 22760 }, { "epoch": 0.04035890739100639, "grad_norm": 0.75, "learning_rate": 0.001776893685282652, "loss": 0.2321, "step": 22762 }, { "epoch": 0.04036245355631621, "grad_norm": 0.546875, "learning_rate": 0.0017768544583747303, "loss": 0.1702, "step": 22764 }, { "epoch": 0.04036599972162602, "grad_norm": 0.208984375, "learning_rate": 0.0017768152285066406, "loss": 0.155, "step": 22766 }, { "epoch": 0.04036954588693584, "grad_norm": 0.55078125, "learning_rate": 0.0017767759956785541, "loss": 0.2236, "step": 22768 }, { "epoch": 0.04037309205224565, "grad_norm": 0.341796875, "learning_rate": 0.0017767367598906423, "loss": 0.1925, "step": 22770 }, { "epoch": 0.040376638217555466, "grad_norm": 0.48046875, "learning_rate": 0.001776697521143077, "loss": 0.3113, "step": 22772 }, { "epoch": 0.04038018438286528, "grad_norm": 2.15625, "learning_rate": 0.0017766582794360305, "loss": 0.2442, "step": 22774 }, { "epoch": 0.0403837305481751, "grad_norm": 1.203125, "learning_rate": 0.001776619034769673, "loss": 0.2394, "step": 22776 }, { "epoch": 0.04038727671348492, "grad_norm": 0.326171875, "learning_rate": 0.001776579787144177, "loss": 0.1634, "step": 22778 }, { "epoch": 0.04039082287879473, "grad_norm": 0.57421875, "learning_rate": 0.0017765405365597145, "loss": 0.2787, "step": 22780 }, { "epoch": 0.040394369044104546, "grad_norm": 0.443359375, "learning_rate": 0.0017765012830164562, "loss": 0.2401, "step": 22782 }, { "epoch": 0.04039791520941436, "grad_norm": 0.419921875, "learning_rate": 0.001776462026514574, "loss": 0.2863, "step": 22784 }, { "epoch": 0.040401461374724175, "grad_norm": 1.375, "learning_rate": 0.0017764227670542406, "loss": 0.4066, "step": 22786 }, { "epoch": 0.04040500754003399, "grad_norm": 0.224609375, "learning_rate": 0.0017763835046356266, "loss": 0.1735, "step": 22788 }, { "epoch": 0.040408553705343804, "grad_norm": 0.40625, "learning_rate": 0.0017763442392589037, "loss": 0.2091, "step": 22790 }, { "epoch": 0.04041209987065362, "grad_norm": 2.015625, "learning_rate": 0.0017763049709242445, "loss": 0.4002, "step": 22792 }, { "epoch": 0.04041564603596343, "grad_norm": 0.41015625, "learning_rate": 0.0017762656996318197, "loss": 0.1968, "step": 22794 }, { "epoch": 0.04041919220127325, "grad_norm": 0.365234375, "learning_rate": 0.001776226425381802, "loss": 0.1887, "step": 22796 }, { "epoch": 0.04042273836658307, "grad_norm": 0.24609375, "learning_rate": 0.0017761871481743621, "loss": 0.1856, "step": 22798 }, { "epoch": 0.040426284531892884, "grad_norm": 0.515625, "learning_rate": 0.001776147868009673, "loss": 0.2419, "step": 22800 }, { "epoch": 0.0404298306972027, "grad_norm": 0.6015625, "learning_rate": 0.0017761085848879055, "loss": 0.1933, "step": 22802 }, { "epoch": 0.04043337686251251, "grad_norm": 0.376953125, "learning_rate": 0.001776069298809232, "loss": 0.3345, "step": 22804 }, { "epoch": 0.04043692302782233, "grad_norm": 0.51953125, "learning_rate": 0.001776030009773824, "loss": 0.2815, "step": 22806 }, { "epoch": 0.04044046919313214, "grad_norm": 0.83984375, "learning_rate": 0.0017759907177818536, "loss": 0.2798, "step": 22808 }, { "epoch": 0.040444015358441956, "grad_norm": 2.609375, "learning_rate": 0.0017759514228334922, "loss": 0.2292, "step": 22810 }, { "epoch": 0.04044756152375177, "grad_norm": 0.25390625, "learning_rate": 0.001775912124928912, "loss": 0.1818, "step": 22812 }, { "epoch": 0.040451107689061586, "grad_norm": 3.890625, "learning_rate": 0.001775872824068285, "loss": 0.2926, "step": 22814 }, { "epoch": 0.0404546538543714, "grad_norm": 0.341796875, "learning_rate": 0.0017758335202517823, "loss": 0.2112, "step": 22816 }, { "epoch": 0.040458200019681215, "grad_norm": 0.3671875, "learning_rate": 0.0017757942134795767, "loss": 0.1841, "step": 22818 }, { "epoch": 0.04046174618499103, "grad_norm": 0.6484375, "learning_rate": 0.00177575490375184, "loss": 0.1668, "step": 22820 }, { "epoch": 0.04046529235030085, "grad_norm": 0.640625, "learning_rate": 0.0017757155910687436, "loss": 0.236, "step": 22822 }, { "epoch": 0.040468838515610665, "grad_norm": 0.341796875, "learning_rate": 0.0017756762754304598, "loss": 0.1939, "step": 22824 }, { "epoch": 0.04047238468092048, "grad_norm": 0.8125, "learning_rate": 0.0017756369568371604, "loss": 0.2601, "step": 22826 }, { "epoch": 0.040475930846230294, "grad_norm": 0.7421875, "learning_rate": 0.0017755976352890174, "loss": 0.2292, "step": 22828 }, { "epoch": 0.04047947701154011, "grad_norm": 0.5078125, "learning_rate": 0.0017755583107862028, "loss": 0.2038, "step": 22830 }, { "epoch": 0.040483023176849924, "grad_norm": 0.953125, "learning_rate": 0.0017755189833288888, "loss": 0.2102, "step": 22832 }, { "epoch": 0.04048656934215974, "grad_norm": 0.34375, "learning_rate": 0.0017754796529172467, "loss": 0.2589, "step": 22834 }, { "epoch": 0.04049011550746955, "grad_norm": 0.296875, "learning_rate": 0.0017754403195514494, "loss": 0.1979, "step": 22836 }, { "epoch": 0.04049366167277937, "grad_norm": 0.1982421875, "learning_rate": 0.0017754009832316685, "loss": 0.1727, "step": 22838 }, { "epoch": 0.04049720783808918, "grad_norm": 0.58203125, "learning_rate": 0.0017753616439580755, "loss": 0.2159, "step": 22840 }, { "epoch": 0.040500754003398996, "grad_norm": 0.361328125, "learning_rate": 0.0017753223017308434, "loss": 0.1862, "step": 22842 }, { "epoch": 0.04050430016870882, "grad_norm": 0.2265625, "learning_rate": 0.0017752829565501433, "loss": 0.1981, "step": 22844 }, { "epoch": 0.04050784633401863, "grad_norm": 1.28125, "learning_rate": 0.0017752436084161485, "loss": 0.2509, "step": 22846 }, { "epoch": 0.04051139249932845, "grad_norm": 0.302734375, "learning_rate": 0.00177520425732903, "loss": 0.2223, "step": 22848 }, { "epoch": 0.04051493866463826, "grad_norm": 0.78515625, "learning_rate": 0.0017751649032889602, "loss": 0.2327, "step": 22850 }, { "epoch": 0.040518484829948076, "grad_norm": 0.52734375, "learning_rate": 0.0017751255462961117, "loss": 0.2726, "step": 22852 }, { "epoch": 0.04052203099525789, "grad_norm": 0.33203125, "learning_rate": 0.001775086186350656, "loss": 0.1874, "step": 22854 }, { "epoch": 0.040525577160567705, "grad_norm": 0.267578125, "learning_rate": 0.0017750468234527654, "loss": 0.1793, "step": 22856 }, { "epoch": 0.04052912332587752, "grad_norm": 0.80859375, "learning_rate": 0.001775007457602612, "loss": 0.2426, "step": 22858 }, { "epoch": 0.040532669491187334, "grad_norm": 3.21875, "learning_rate": 0.0017749680888003681, "loss": 0.3081, "step": 22860 }, { "epoch": 0.04053621565649715, "grad_norm": 0.5703125, "learning_rate": 0.001774928717046206, "loss": 0.179, "step": 22862 }, { "epoch": 0.04053976182180696, "grad_norm": 0.55078125, "learning_rate": 0.0017748893423402978, "loss": 0.182, "step": 22864 }, { "epoch": 0.040543307987116785, "grad_norm": 0.625, "learning_rate": 0.0017748499646828156, "loss": 0.2264, "step": 22866 }, { "epoch": 0.0405468541524266, "grad_norm": 0.8984375, "learning_rate": 0.0017748105840739317, "loss": 0.2861, "step": 22868 }, { "epoch": 0.040550400317736414, "grad_norm": 0.23828125, "learning_rate": 0.0017747712005138179, "loss": 0.2218, "step": 22870 }, { "epoch": 0.04055394648304623, "grad_norm": 0.71484375, "learning_rate": 0.001774731814002647, "loss": 0.1753, "step": 22872 }, { "epoch": 0.04055749264835604, "grad_norm": 0.3046875, "learning_rate": 0.0017746924245405913, "loss": 0.199, "step": 22874 }, { "epoch": 0.04056103881366586, "grad_norm": 0.416015625, "learning_rate": 0.0017746530321278225, "loss": 0.1552, "step": 22876 }, { "epoch": 0.04056458497897567, "grad_norm": 0.91796875, "learning_rate": 0.0017746136367645137, "loss": 0.2423, "step": 22878 }, { "epoch": 0.04056813114428549, "grad_norm": 0.494140625, "learning_rate": 0.0017745742384508362, "loss": 0.1484, "step": 22880 }, { "epoch": 0.0405716773095953, "grad_norm": 0.875, "learning_rate": 0.0017745348371869631, "loss": 0.2091, "step": 22882 }, { "epoch": 0.040575223474905116, "grad_norm": 2.0625, "learning_rate": 0.0017744954329730665, "loss": 0.3813, "step": 22884 }, { "epoch": 0.04057876964021493, "grad_norm": 0.3828125, "learning_rate": 0.0017744560258093187, "loss": 0.2242, "step": 22886 }, { "epoch": 0.040582315805524745, "grad_norm": 0.640625, "learning_rate": 0.0017744166156958916, "loss": 0.1522, "step": 22888 }, { "epoch": 0.040585861970834566, "grad_norm": 0.40625, "learning_rate": 0.0017743772026329583, "loss": 0.1856, "step": 22890 }, { "epoch": 0.04058940813614438, "grad_norm": 0.3125, "learning_rate": 0.0017743377866206909, "loss": 0.4, "step": 22892 }, { "epoch": 0.040592954301454195, "grad_norm": 3.421875, "learning_rate": 0.0017742983676592617, "loss": 0.2224, "step": 22894 }, { "epoch": 0.04059650046676401, "grad_norm": 1.40625, "learning_rate": 0.0017742589457488426, "loss": 0.229, "step": 22896 }, { "epoch": 0.040600046632073825, "grad_norm": 0.296875, "learning_rate": 0.0017742195208896073, "loss": 0.1585, "step": 22898 }, { "epoch": 0.04060359279738364, "grad_norm": 0.40234375, "learning_rate": 0.001774180093081727, "loss": 0.2891, "step": 22900 }, { "epoch": 0.040607138962693454, "grad_norm": 0.546875, "learning_rate": 0.0017741406623253749, "loss": 0.2203, "step": 22902 }, { "epoch": 0.04061068512800327, "grad_norm": 0.3359375, "learning_rate": 0.001774101228620723, "loss": 0.1809, "step": 22904 }, { "epoch": 0.04061423129331308, "grad_norm": 1.0546875, "learning_rate": 0.0017740617919679437, "loss": 0.2655, "step": 22906 }, { "epoch": 0.0406177774586229, "grad_norm": 0.5078125, "learning_rate": 0.00177402235236721, "loss": 0.3109, "step": 22908 }, { "epoch": 0.04062132362393271, "grad_norm": 2.140625, "learning_rate": 0.0017739829098186934, "loss": 0.2354, "step": 22910 }, { "epoch": 0.04062486978924253, "grad_norm": 0.28515625, "learning_rate": 0.0017739434643225676, "loss": 0.1658, "step": 22912 }, { "epoch": 0.04062841595455235, "grad_norm": 0.67578125, "learning_rate": 0.0017739040158790045, "loss": 0.1382, "step": 22914 }, { "epoch": 0.04063196211986216, "grad_norm": 0.5546875, "learning_rate": 0.0017738645644881765, "loss": 0.223, "step": 22916 }, { "epoch": 0.04063550828517198, "grad_norm": 1.53125, "learning_rate": 0.0017738251101502565, "loss": 0.262, "step": 22918 }, { "epoch": 0.04063905445048179, "grad_norm": 0.92578125, "learning_rate": 0.001773785652865417, "loss": 0.2021, "step": 22920 }, { "epoch": 0.040642600615791606, "grad_norm": 0.58203125, "learning_rate": 0.0017737461926338302, "loss": 0.1961, "step": 22922 }, { "epoch": 0.04064614678110142, "grad_norm": 0.75390625, "learning_rate": 0.001773706729455669, "loss": 0.1981, "step": 22924 }, { "epoch": 0.040649692946411235, "grad_norm": 0.298828125, "learning_rate": 0.0017736672633311057, "loss": 0.2089, "step": 22926 }, { "epoch": 0.04065323911172105, "grad_norm": 0.4140625, "learning_rate": 0.0017736277942603134, "loss": 0.1582, "step": 22928 }, { "epoch": 0.040656785277030864, "grad_norm": 0.48828125, "learning_rate": 0.0017735883222434643, "loss": 0.1838, "step": 22930 }, { "epoch": 0.04066033144234068, "grad_norm": 1.25, "learning_rate": 0.001773548847280731, "loss": 0.1752, "step": 22932 }, { "epoch": 0.0406638776076505, "grad_norm": 1.859375, "learning_rate": 0.0017735093693722866, "loss": 0.533, "step": 22934 }, { "epoch": 0.040667423772960315, "grad_norm": 0.55859375, "learning_rate": 0.0017734698885183032, "loss": 0.2504, "step": 22936 }, { "epoch": 0.04067096993827013, "grad_norm": 1.453125, "learning_rate": 0.0017734304047189538, "loss": 0.3188, "step": 22938 }, { "epoch": 0.040674516103579944, "grad_norm": 0.271484375, "learning_rate": 0.0017733909179744108, "loss": 0.2338, "step": 22940 }, { "epoch": 0.04067806226888976, "grad_norm": 0.796875, "learning_rate": 0.0017733514282848473, "loss": 0.2289, "step": 22942 }, { "epoch": 0.04068160843419957, "grad_norm": 2.046875, "learning_rate": 0.0017733119356504357, "loss": 0.2713, "step": 22944 }, { "epoch": 0.04068515459950939, "grad_norm": 0.5234375, "learning_rate": 0.001773272440071349, "loss": 0.2236, "step": 22946 }, { "epoch": 0.0406887007648192, "grad_norm": 0.578125, "learning_rate": 0.0017732329415477595, "loss": 0.1627, "step": 22948 }, { "epoch": 0.04069224693012902, "grad_norm": 0.294921875, "learning_rate": 0.00177319344007984, "loss": 0.2277, "step": 22950 }, { "epoch": 0.04069579309543883, "grad_norm": 0.609375, "learning_rate": 0.0017731539356677635, "loss": 0.2055, "step": 22952 }, { "epoch": 0.040699339260748646, "grad_norm": 0.5390625, "learning_rate": 0.0017731144283117028, "loss": 0.1935, "step": 22954 }, { "epoch": 0.04070288542605846, "grad_norm": 0.390625, "learning_rate": 0.0017730749180118307, "loss": 0.2848, "step": 22956 }, { "epoch": 0.04070643159136828, "grad_norm": 0.396484375, "learning_rate": 0.0017730354047683198, "loss": 0.2068, "step": 22958 }, { "epoch": 0.0407099777566781, "grad_norm": 0.6484375, "learning_rate": 0.0017729958885813431, "loss": 0.198, "step": 22960 }, { "epoch": 0.04071352392198791, "grad_norm": 0.54296875, "learning_rate": 0.001772956369451073, "loss": 0.211, "step": 22962 }, { "epoch": 0.040717070087297726, "grad_norm": 0.546875, "learning_rate": 0.001772916847377683, "loss": 0.2404, "step": 22964 }, { "epoch": 0.04072061625260754, "grad_norm": 0.53515625, "learning_rate": 0.0017728773223613455, "loss": 0.22, "step": 22966 }, { "epoch": 0.040724162417917355, "grad_norm": 0.6328125, "learning_rate": 0.0017728377944022334, "loss": 0.1714, "step": 22968 }, { "epoch": 0.04072770858322717, "grad_norm": 0.283203125, "learning_rate": 0.0017727982635005195, "loss": 0.1405, "step": 22970 }, { "epoch": 0.040731254748536984, "grad_norm": 0.3984375, "learning_rate": 0.0017727587296563767, "loss": 0.1805, "step": 22972 }, { "epoch": 0.0407348009138468, "grad_norm": 0.484375, "learning_rate": 0.001772719192869978, "loss": 0.2062, "step": 22974 }, { "epoch": 0.04073834707915661, "grad_norm": 0.7734375, "learning_rate": 0.0017726796531414967, "loss": 0.2096, "step": 22976 }, { "epoch": 0.04074189324446643, "grad_norm": 0.419921875, "learning_rate": 0.0017726401104711052, "loss": 0.1723, "step": 22978 }, { "epoch": 0.04074543940977625, "grad_norm": 0.74609375, "learning_rate": 0.0017726005648589767, "loss": 0.1873, "step": 22980 }, { "epoch": 0.040748985575086064, "grad_norm": 0.447265625, "learning_rate": 0.0017725610163052837, "loss": 0.3235, "step": 22982 }, { "epoch": 0.04075253174039588, "grad_norm": 1.3984375, "learning_rate": 0.0017725214648101997, "loss": 0.2683, "step": 22984 }, { "epoch": 0.04075607790570569, "grad_norm": 0.60546875, "learning_rate": 0.0017724819103738974, "loss": 0.1775, "step": 22986 }, { "epoch": 0.04075962407101551, "grad_norm": 1.0546875, "learning_rate": 0.0017724423529965497, "loss": 0.2562, "step": 22988 }, { "epoch": 0.04076317023632532, "grad_norm": 0.333984375, "learning_rate": 0.0017724027926783298, "loss": 0.3659, "step": 22990 }, { "epoch": 0.040766716401635136, "grad_norm": 0.462890625, "learning_rate": 0.0017723632294194107, "loss": 0.1962, "step": 22992 }, { "epoch": 0.04077026256694495, "grad_norm": 0.62890625, "learning_rate": 0.0017723236632199656, "loss": 0.2046, "step": 22994 }, { "epoch": 0.040773808732254765, "grad_norm": 0.5234375, "learning_rate": 0.0017722840940801669, "loss": 0.2446, "step": 22996 }, { "epoch": 0.04077735489756458, "grad_norm": 0.52734375, "learning_rate": 0.0017722445220001886, "loss": 0.1942, "step": 22998 }, { "epoch": 0.040780901062874395, "grad_norm": 0.404296875, "learning_rate": 0.001772204946980203, "loss": 0.1216, "step": 23000 }, { "epoch": 0.040784447228184216, "grad_norm": 0.34765625, "learning_rate": 0.0017721653690203833, "loss": 0.2388, "step": 23002 }, { "epoch": 0.04078799339349403, "grad_norm": 5.375, "learning_rate": 0.001772125788120903, "loss": 0.1738, "step": 23004 }, { "epoch": 0.040791539558803845, "grad_norm": 1.7265625, "learning_rate": 0.0017720862042819349, "loss": 0.2508, "step": 23006 }, { "epoch": 0.04079508572411366, "grad_norm": 1.046875, "learning_rate": 0.0017720466175036515, "loss": 0.3182, "step": 23008 }, { "epoch": 0.040798631889423474, "grad_norm": 0.546875, "learning_rate": 0.0017720070277862272, "loss": 0.2032, "step": 23010 }, { "epoch": 0.04080217805473329, "grad_norm": 0.50390625, "learning_rate": 0.0017719674351298342, "loss": 0.1755, "step": 23012 }, { "epoch": 0.0408057242200431, "grad_norm": 0.6796875, "learning_rate": 0.0017719278395346464, "loss": 0.2211, "step": 23014 }, { "epoch": 0.04080927038535292, "grad_norm": 0.1943359375, "learning_rate": 0.0017718882410008361, "loss": 0.1721, "step": 23016 }, { "epoch": 0.04081281655066273, "grad_norm": 0.73046875, "learning_rate": 0.0017718486395285771, "loss": 0.2704, "step": 23018 }, { "epoch": 0.04081636271597255, "grad_norm": 1.7890625, "learning_rate": 0.0017718090351180424, "loss": 0.2036, "step": 23020 }, { "epoch": 0.04081990888128236, "grad_norm": 0.279296875, "learning_rate": 0.0017717694277694053, "loss": 0.1772, "step": 23022 }, { "epoch": 0.040823455046592176, "grad_norm": 0.40625, "learning_rate": 0.001771729817482839, "loss": 0.2523, "step": 23024 }, { "epoch": 0.040827001211902, "grad_norm": 0.55859375, "learning_rate": 0.0017716902042585162, "loss": 0.1698, "step": 23026 }, { "epoch": 0.04083054737721181, "grad_norm": 4.78125, "learning_rate": 0.0017716505880966107, "loss": 0.2229, "step": 23028 }, { "epoch": 0.04083409354252163, "grad_norm": 0.97265625, "learning_rate": 0.0017716109689972959, "loss": 0.2736, "step": 23030 }, { "epoch": 0.04083763970783144, "grad_norm": 0.279296875, "learning_rate": 0.0017715713469607449, "loss": 0.2427, "step": 23032 }, { "epoch": 0.040841185873141256, "grad_norm": 0.458984375, "learning_rate": 0.0017715317219871307, "loss": 0.2546, "step": 23034 }, { "epoch": 0.04084473203845107, "grad_norm": 0.515625, "learning_rate": 0.001771492094076627, "loss": 0.2277, "step": 23036 }, { "epoch": 0.040848278203760885, "grad_norm": 0.30859375, "learning_rate": 0.0017714524632294068, "loss": 0.2006, "step": 23038 }, { "epoch": 0.0408518243690707, "grad_norm": 0.435546875, "learning_rate": 0.0017714128294456437, "loss": 0.2222, "step": 23040 }, { "epoch": 0.040855370534380514, "grad_norm": 0.4453125, "learning_rate": 0.0017713731927255108, "loss": 0.2791, "step": 23042 }, { "epoch": 0.04085891669969033, "grad_norm": 1.1484375, "learning_rate": 0.0017713335530691818, "loss": 0.2175, "step": 23044 }, { "epoch": 0.04086246286500014, "grad_norm": 3.109375, "learning_rate": 0.0017712939104768293, "loss": 0.2304, "step": 23046 }, { "epoch": 0.040866009030309965, "grad_norm": 0.61328125, "learning_rate": 0.0017712542649486274, "loss": 0.3433, "step": 23048 }, { "epoch": 0.04086955519561978, "grad_norm": 0.73046875, "learning_rate": 0.0017712146164847494, "loss": 0.2304, "step": 23050 }, { "epoch": 0.040873101360929594, "grad_norm": 1.2578125, "learning_rate": 0.001771174965085368, "loss": 0.2263, "step": 23052 }, { "epoch": 0.04087664752623941, "grad_norm": 1.0703125, "learning_rate": 0.001771135310750658, "loss": 0.2265, "step": 23054 }, { "epoch": 0.04088019369154922, "grad_norm": 0.82421875, "learning_rate": 0.0017710956534807917, "loss": 0.2856, "step": 23056 }, { "epoch": 0.04088373985685904, "grad_norm": 1.046875, "learning_rate": 0.0017710559932759425, "loss": 0.2033, "step": 23058 }, { "epoch": 0.04088728602216885, "grad_norm": 1.53125, "learning_rate": 0.0017710163301362842, "loss": 0.3443, "step": 23060 }, { "epoch": 0.040890832187478666, "grad_norm": 0.55859375, "learning_rate": 0.0017709766640619906, "loss": 0.2194, "step": 23062 }, { "epoch": 0.04089437835278848, "grad_norm": 1.1484375, "learning_rate": 0.0017709369950532346, "loss": 0.2083, "step": 23064 }, { "epoch": 0.040897924518098296, "grad_norm": 0.56640625, "learning_rate": 0.00177089732311019, "loss": 0.2354, "step": 23066 }, { "epoch": 0.04090147068340811, "grad_norm": 1.234375, "learning_rate": 0.00177085764823303, "loss": 0.2038, "step": 23068 }, { "epoch": 0.04090501684871793, "grad_norm": 2.765625, "learning_rate": 0.0017708179704219286, "loss": 0.3019, "step": 23070 }, { "epoch": 0.040908563014027746, "grad_norm": 1.2421875, "learning_rate": 0.001770778289677059, "loss": 0.2663, "step": 23072 }, { "epoch": 0.04091210917933756, "grad_norm": 0.28125, "learning_rate": 0.0017707386059985948, "loss": 0.2171, "step": 23074 }, { "epoch": 0.040915655344647375, "grad_norm": 0.53125, "learning_rate": 0.0017706989193867094, "loss": 0.1798, "step": 23076 }, { "epoch": 0.04091920150995719, "grad_norm": 0.44140625, "learning_rate": 0.0017706592298415767, "loss": 0.2424, "step": 23078 }, { "epoch": 0.040922747675267004, "grad_norm": 0.37109375, "learning_rate": 0.00177061953736337, "loss": 0.289, "step": 23080 }, { "epoch": 0.04092629384057682, "grad_norm": 0.328125, "learning_rate": 0.0017705798419522629, "loss": 0.1244, "step": 23082 }, { "epoch": 0.040929840005886634, "grad_norm": 1.21875, "learning_rate": 0.0017705401436084293, "loss": 0.2376, "step": 23084 }, { "epoch": 0.04093338617119645, "grad_norm": 0.65625, "learning_rate": 0.0017705004423320425, "loss": 0.1959, "step": 23086 }, { "epoch": 0.04093693233650626, "grad_norm": 0.609375, "learning_rate": 0.0017704607381232764, "loss": 0.239, "step": 23088 }, { "epoch": 0.04094047850181608, "grad_norm": 0.7109375, "learning_rate": 0.0017704210309823043, "loss": 0.254, "step": 23090 }, { "epoch": 0.04094402466712589, "grad_norm": 1.4453125, "learning_rate": 0.0017703813209093001, "loss": 0.2401, "step": 23092 }, { "epoch": 0.04094757083243571, "grad_norm": 0.3046875, "learning_rate": 0.0017703416079044372, "loss": 0.175, "step": 23094 }, { "epoch": 0.04095111699774553, "grad_norm": 0.64453125, "learning_rate": 0.0017703018919678895, "loss": 0.1773, "step": 23096 }, { "epoch": 0.04095466316305534, "grad_norm": 0.2734375, "learning_rate": 0.001770262173099831, "loss": 0.2244, "step": 23098 }, { "epoch": 0.04095820932836516, "grad_norm": 0.326171875, "learning_rate": 0.001770222451300435, "loss": 0.1892, "step": 23100 }, { "epoch": 0.04096175549367497, "grad_norm": 0.5078125, "learning_rate": 0.0017701827265698751, "loss": 0.1675, "step": 23102 }, { "epoch": 0.040965301658984786, "grad_norm": 0.5, "learning_rate": 0.0017701429989083256, "loss": 0.2149, "step": 23104 }, { "epoch": 0.0409688478242946, "grad_norm": 0.43359375, "learning_rate": 0.0017701032683159594, "loss": 0.2345, "step": 23106 }, { "epoch": 0.040972393989604415, "grad_norm": 0.251953125, "learning_rate": 0.0017700635347929511, "loss": 0.1745, "step": 23108 }, { "epoch": 0.04097594015491423, "grad_norm": 0.384765625, "learning_rate": 0.001770023798339474, "loss": 0.2185, "step": 23110 }, { "epoch": 0.040979486320224044, "grad_norm": 0.52734375, "learning_rate": 0.0017699840589557021, "loss": 0.1661, "step": 23112 }, { "epoch": 0.04098303248553386, "grad_norm": 0.5390625, "learning_rate": 0.0017699443166418088, "loss": 0.2182, "step": 23114 }, { "epoch": 0.04098657865084368, "grad_norm": 0.578125, "learning_rate": 0.0017699045713979687, "loss": 0.2326, "step": 23116 }, { "epoch": 0.040990124816153495, "grad_norm": 0.388671875, "learning_rate": 0.001769864823224355, "loss": 0.1849, "step": 23118 }, { "epoch": 0.04099367098146331, "grad_norm": 0.66796875, "learning_rate": 0.0017698250721211414, "loss": 0.2014, "step": 23120 }, { "epoch": 0.040997217146773124, "grad_norm": 2.703125, "learning_rate": 0.0017697853180885022, "loss": 0.4575, "step": 23122 }, { "epoch": 0.04100076331208294, "grad_norm": 0.49609375, "learning_rate": 0.001769745561126611, "loss": 0.1951, "step": 23124 }, { "epoch": 0.04100430947739275, "grad_norm": 0.396484375, "learning_rate": 0.0017697058012356415, "loss": 0.1968, "step": 23126 }, { "epoch": 0.04100785564270257, "grad_norm": 1.125, "learning_rate": 0.0017696660384157684, "loss": 0.2112, "step": 23128 }, { "epoch": 0.04101140180801238, "grad_norm": 0.6796875, "learning_rate": 0.0017696262726671645, "loss": 0.1848, "step": 23130 }, { "epoch": 0.0410149479733222, "grad_norm": 0.279296875, "learning_rate": 0.0017695865039900048, "loss": 0.1366, "step": 23132 }, { "epoch": 0.04101849413863201, "grad_norm": 0.2578125, "learning_rate": 0.001769546732384462, "loss": 0.2428, "step": 23134 }, { "epoch": 0.041022040303941826, "grad_norm": 0.57421875, "learning_rate": 0.0017695069578507112, "loss": 0.2204, "step": 23136 }, { "epoch": 0.04102558646925165, "grad_norm": 0.796875, "learning_rate": 0.001769467180388926, "loss": 0.1482, "step": 23138 }, { "epoch": 0.04102913263456146, "grad_norm": 0.333984375, "learning_rate": 0.0017694273999992799, "loss": 0.2068, "step": 23140 }, { "epoch": 0.041032678799871276, "grad_norm": 0.36328125, "learning_rate": 0.0017693876166819471, "loss": 0.2585, "step": 23142 }, { "epoch": 0.04103622496518109, "grad_norm": 1.109375, "learning_rate": 0.001769347830437102, "loss": 0.229, "step": 23144 }, { "epoch": 0.041039771130490905, "grad_norm": 0.84765625, "learning_rate": 0.0017693080412649184, "loss": 0.2148, "step": 23146 }, { "epoch": 0.04104331729580072, "grad_norm": 0.8515625, "learning_rate": 0.0017692682491655695, "loss": 0.1616, "step": 23148 }, { "epoch": 0.041046863461110535, "grad_norm": 0.3203125, "learning_rate": 0.0017692284541392306, "loss": 0.2977, "step": 23150 }, { "epoch": 0.04105040962642035, "grad_norm": 0.50390625, "learning_rate": 0.001769188656186075, "loss": 0.1748, "step": 23152 }, { "epoch": 0.041053955791730164, "grad_norm": 1.40625, "learning_rate": 0.001769148855306277, "loss": 0.2493, "step": 23154 }, { "epoch": 0.04105750195703998, "grad_norm": 0.1416015625, "learning_rate": 0.0017691090515000107, "loss": 0.1821, "step": 23156 }, { "epoch": 0.04106104812234979, "grad_norm": 0.421875, "learning_rate": 0.00176906924476745, "loss": 0.178, "step": 23158 }, { "epoch": 0.04106459428765961, "grad_norm": 5.46875, "learning_rate": 0.001769029435108769, "loss": 0.2724, "step": 23160 }, { "epoch": 0.04106814045296943, "grad_norm": 0.2734375, "learning_rate": 0.001768989622524142, "loss": 0.1936, "step": 23162 }, { "epoch": 0.04107168661827924, "grad_norm": 0.421875, "learning_rate": 0.001768949807013743, "loss": 0.1615, "step": 23164 }, { "epoch": 0.04107523278358906, "grad_norm": 0.458984375, "learning_rate": 0.0017689099885777459, "loss": 0.2878, "step": 23166 }, { "epoch": 0.04107877894889887, "grad_norm": 0.71484375, "learning_rate": 0.001768870167216325, "loss": 0.2435, "step": 23168 }, { "epoch": 0.04108232511420869, "grad_norm": 0.4609375, "learning_rate": 0.0017688303429296548, "loss": 0.2074, "step": 23170 }, { "epoch": 0.0410858712795185, "grad_norm": 0.333984375, "learning_rate": 0.0017687905157179093, "loss": 0.1941, "step": 23172 }, { "epoch": 0.041089417444828316, "grad_norm": 0.484375, "learning_rate": 0.0017687506855812623, "loss": 0.223, "step": 23174 }, { "epoch": 0.04109296361013813, "grad_norm": 0.35546875, "learning_rate": 0.0017687108525198882, "loss": 0.1845, "step": 23176 }, { "epoch": 0.041096509775447945, "grad_norm": 0.4140625, "learning_rate": 0.0017686710165339611, "loss": 0.167, "step": 23178 }, { "epoch": 0.04110005594075776, "grad_norm": 2.734375, "learning_rate": 0.001768631177623656, "loss": 0.2387, "step": 23180 }, { "epoch": 0.041103602106067574, "grad_norm": 0.294921875, "learning_rate": 0.0017685913357891462, "loss": 0.1821, "step": 23182 }, { "epoch": 0.041107148271377396, "grad_norm": 0.30078125, "learning_rate": 0.0017685514910306059, "loss": 0.2343, "step": 23184 }, { "epoch": 0.04111069443668721, "grad_norm": 0.244140625, "learning_rate": 0.0017685116433482102, "loss": 0.1413, "step": 23186 }, { "epoch": 0.041114240601997025, "grad_norm": 0.55859375, "learning_rate": 0.0017684717927421328, "loss": 0.2076, "step": 23188 }, { "epoch": 0.04111778676730684, "grad_norm": 0.4609375, "learning_rate": 0.0017684319392125478, "loss": 0.1732, "step": 23190 }, { "epoch": 0.041121332932616654, "grad_norm": 1.6875, "learning_rate": 0.00176839208275963, "loss": 0.2775, "step": 23192 }, { "epoch": 0.04112487909792647, "grad_norm": 0.6328125, "learning_rate": 0.0017683522233835534, "loss": 0.1971, "step": 23194 }, { "epoch": 0.04112842526323628, "grad_norm": 1.0703125, "learning_rate": 0.0017683123610844923, "loss": 0.2487, "step": 23196 }, { "epoch": 0.0411319714285461, "grad_norm": 3.234375, "learning_rate": 0.0017682724958626216, "loss": 0.3749, "step": 23198 }, { "epoch": 0.04113551759385591, "grad_norm": 0.26953125, "learning_rate": 0.0017682326277181147, "loss": 0.1859, "step": 23200 }, { "epoch": 0.04113906375916573, "grad_norm": 0.75, "learning_rate": 0.0017681927566511467, "loss": 0.2172, "step": 23202 }, { "epoch": 0.04114260992447554, "grad_norm": 1.15625, "learning_rate": 0.0017681528826618915, "loss": 0.1834, "step": 23204 }, { "epoch": 0.04114615608978536, "grad_norm": 0.30859375, "learning_rate": 0.001768113005750524, "loss": 0.1862, "step": 23206 }, { "epoch": 0.04114970225509518, "grad_norm": 0.427734375, "learning_rate": 0.001768073125917218, "loss": 0.2102, "step": 23208 }, { "epoch": 0.04115324842040499, "grad_norm": 1.7421875, "learning_rate": 0.0017680332431621484, "loss": 0.2361, "step": 23210 }, { "epoch": 0.04115679458571481, "grad_norm": 0.3359375, "learning_rate": 0.0017679933574854894, "loss": 0.1733, "step": 23212 }, { "epoch": 0.04116034075102462, "grad_norm": 0.79296875, "learning_rate": 0.0017679534688874155, "loss": 0.2199, "step": 23214 }, { "epoch": 0.041163886916334436, "grad_norm": 0.8828125, "learning_rate": 0.001767913577368101, "loss": 0.2479, "step": 23216 }, { "epoch": 0.04116743308164425, "grad_norm": 0.5546875, "learning_rate": 0.0017678736829277203, "loss": 0.2076, "step": 23218 }, { "epoch": 0.041170979246954065, "grad_norm": 0.48828125, "learning_rate": 0.0017678337855664486, "loss": 0.2024, "step": 23220 }, { "epoch": 0.04117452541226388, "grad_norm": 0.57421875, "learning_rate": 0.0017677938852844594, "loss": 0.182, "step": 23222 }, { "epoch": 0.041178071577573694, "grad_norm": 0.271484375, "learning_rate": 0.0017677539820819279, "loss": 0.1936, "step": 23224 }, { "epoch": 0.04118161774288351, "grad_norm": 0.439453125, "learning_rate": 0.0017677140759590282, "loss": 0.2759, "step": 23226 }, { "epoch": 0.04118516390819332, "grad_norm": 0.51171875, "learning_rate": 0.0017676741669159352, "loss": 0.316, "step": 23228 }, { "epoch": 0.041188710073503144, "grad_norm": 1.484375, "learning_rate": 0.0017676342549528229, "loss": 0.409, "step": 23230 }, { "epoch": 0.04119225623881296, "grad_norm": 0.349609375, "learning_rate": 0.001767594340069866, "loss": 0.2155, "step": 23232 }, { "epoch": 0.041195802404122774, "grad_norm": 0.486328125, "learning_rate": 0.0017675544222672398, "loss": 0.2108, "step": 23234 }, { "epoch": 0.04119934856943259, "grad_norm": 0.47265625, "learning_rate": 0.001767514501545118, "loss": 0.1795, "step": 23236 }, { "epoch": 0.0412028947347424, "grad_norm": 0.451171875, "learning_rate": 0.0017674745779036759, "loss": 0.2196, "step": 23238 }, { "epoch": 0.04120644090005222, "grad_norm": 0.68359375, "learning_rate": 0.0017674346513430871, "loss": 0.1772, "step": 23240 }, { "epoch": 0.04120998706536203, "grad_norm": 0.88671875, "learning_rate": 0.0017673947218635273, "loss": 0.2201, "step": 23242 }, { "epoch": 0.041213533230671846, "grad_norm": 0.984375, "learning_rate": 0.0017673547894651704, "loss": 0.2178, "step": 23244 }, { "epoch": 0.04121707939598166, "grad_norm": 0.86328125, "learning_rate": 0.0017673148541481913, "loss": 0.1856, "step": 23246 }, { "epoch": 0.041220625561291475, "grad_norm": 0.330078125, "learning_rate": 0.001767274915912765, "loss": 0.1576, "step": 23248 }, { "epoch": 0.04122417172660129, "grad_norm": 0.421875, "learning_rate": 0.0017672349747590655, "loss": 0.172, "step": 23250 }, { "epoch": 0.04122771789191111, "grad_norm": 0.7578125, "learning_rate": 0.0017671950306872679, "loss": 0.1967, "step": 23252 }, { "epoch": 0.041231264057220926, "grad_norm": 0.330078125, "learning_rate": 0.0017671550836975468, "loss": 0.193, "step": 23254 }, { "epoch": 0.04123481022253074, "grad_norm": 0.419921875, "learning_rate": 0.0017671151337900768, "loss": 0.2208, "step": 23256 }, { "epoch": 0.041238356387840555, "grad_norm": 0.232421875, "learning_rate": 0.0017670751809650328, "loss": 0.2057, "step": 23258 }, { "epoch": 0.04124190255315037, "grad_norm": 1.5234375, "learning_rate": 0.0017670352252225894, "loss": 0.2621, "step": 23260 }, { "epoch": 0.041245448718460184, "grad_norm": 0.271484375, "learning_rate": 0.0017669952665629212, "loss": 0.202, "step": 23262 }, { "epoch": 0.04124899488377, "grad_norm": 0.52734375, "learning_rate": 0.0017669553049862036, "loss": 0.1683, "step": 23264 }, { "epoch": 0.04125254104907981, "grad_norm": 0.2890625, "learning_rate": 0.0017669153404926104, "loss": 0.2327, "step": 23266 }, { "epoch": 0.04125608721438963, "grad_norm": 0.61328125, "learning_rate": 0.0017668753730823172, "loss": 0.1284, "step": 23268 }, { "epoch": 0.04125963337969944, "grad_norm": 0.25390625, "learning_rate": 0.0017668354027554985, "loss": 0.1926, "step": 23270 }, { "epoch": 0.04126317954500926, "grad_norm": 0.734375, "learning_rate": 0.001766795429512329, "loss": 0.155, "step": 23272 }, { "epoch": 0.04126672571031908, "grad_norm": 1.1015625, "learning_rate": 0.0017667554533529835, "loss": 0.449, "step": 23274 }, { "epoch": 0.04127027187562889, "grad_norm": 0.7578125, "learning_rate": 0.0017667154742776372, "loss": 0.226, "step": 23276 }, { "epoch": 0.04127381804093871, "grad_norm": 2.421875, "learning_rate": 0.0017666754922864645, "loss": 0.5045, "step": 23278 }, { "epoch": 0.04127736420624852, "grad_norm": 0.484375, "learning_rate": 0.0017666355073796406, "loss": 0.2501, "step": 23280 }, { "epoch": 0.04128091037155834, "grad_norm": 0.333984375, "learning_rate": 0.00176659551955734, "loss": 0.1933, "step": 23282 }, { "epoch": 0.04128445653686815, "grad_norm": 0.443359375, "learning_rate": 0.001766555528819738, "loss": 0.2044, "step": 23284 }, { "epoch": 0.041288002702177966, "grad_norm": 0.2177734375, "learning_rate": 0.001766515535167009, "loss": 0.2059, "step": 23286 }, { "epoch": 0.04129154886748778, "grad_norm": 0.67578125, "learning_rate": 0.0017664755385993285, "loss": 0.1661, "step": 23288 }, { "epoch": 0.041295095032797595, "grad_norm": 0.80078125, "learning_rate": 0.001766435539116871, "loss": 0.2243, "step": 23290 }, { "epoch": 0.04129864119810741, "grad_norm": 0.2890625, "learning_rate": 0.0017663955367198119, "loss": 0.1847, "step": 23292 }, { "epoch": 0.041302187363417224, "grad_norm": 1.6328125, "learning_rate": 0.0017663555314083254, "loss": 0.6411, "step": 23294 }, { "epoch": 0.04130573352872704, "grad_norm": 0.28515625, "learning_rate": 0.001766315523182587, "loss": 0.1896, "step": 23296 }, { "epoch": 0.04130927969403686, "grad_norm": 0.640625, "learning_rate": 0.0017662755120427713, "loss": 0.1905, "step": 23298 }, { "epoch": 0.041312825859346675, "grad_norm": 0.236328125, "learning_rate": 0.001766235497989054, "loss": 0.2125, "step": 23300 }, { "epoch": 0.04131637202465649, "grad_norm": 1.015625, "learning_rate": 0.0017661954810216092, "loss": 0.2027, "step": 23302 }, { "epoch": 0.041319918189966304, "grad_norm": 1.3359375, "learning_rate": 0.0017661554611406126, "loss": 0.1939, "step": 23304 }, { "epoch": 0.04132346435527612, "grad_norm": 0.51953125, "learning_rate": 0.001766115438346239, "loss": 0.2046, "step": 23306 }, { "epoch": 0.04132701052058593, "grad_norm": 0.314453125, "learning_rate": 0.001766075412638663, "loss": 0.1783, "step": 23308 }, { "epoch": 0.04133055668589575, "grad_norm": 0.443359375, "learning_rate": 0.0017660353840180606, "loss": 0.2686, "step": 23310 }, { "epoch": 0.04133410285120556, "grad_norm": 0.48046875, "learning_rate": 0.0017659953524846062, "loss": 0.1926, "step": 23312 }, { "epoch": 0.041337649016515376, "grad_norm": 0.78515625, "learning_rate": 0.0017659553180384747, "loss": 0.2661, "step": 23314 }, { "epoch": 0.04134119518182519, "grad_norm": 0.54296875, "learning_rate": 0.0017659152806798416, "loss": 0.2497, "step": 23316 }, { "epoch": 0.041344741347135006, "grad_norm": 0.4765625, "learning_rate": 0.001765875240408882, "loss": 0.1757, "step": 23318 }, { "epoch": 0.04134828751244483, "grad_norm": 0.453125, "learning_rate": 0.0017658351972257708, "loss": 0.1618, "step": 23320 }, { "epoch": 0.04135183367775464, "grad_norm": 0.26953125, "learning_rate": 0.0017657951511306832, "loss": 0.1876, "step": 23322 }, { "epoch": 0.041355379843064456, "grad_norm": 0.84765625, "learning_rate": 0.0017657551021237942, "loss": 0.3632, "step": 23324 }, { "epoch": 0.04135892600837427, "grad_norm": 0.83203125, "learning_rate": 0.001765715050205279, "loss": 0.2406, "step": 23326 }, { "epoch": 0.041362472173684085, "grad_norm": 0.341796875, "learning_rate": 0.0017656749953753133, "loss": 0.2043, "step": 23328 }, { "epoch": 0.0413660183389939, "grad_norm": 0.30859375, "learning_rate": 0.0017656349376340716, "loss": 0.1712, "step": 23330 }, { "epoch": 0.041369564504303714, "grad_norm": 0.34375, "learning_rate": 0.0017655948769817293, "loss": 0.1547, "step": 23332 }, { "epoch": 0.04137311066961353, "grad_norm": 0.498046875, "learning_rate": 0.0017655548134184618, "loss": 0.1571, "step": 23334 }, { "epoch": 0.041376656834923344, "grad_norm": 0.7109375, "learning_rate": 0.0017655147469444442, "loss": 0.265, "step": 23336 }, { "epoch": 0.04138020300023316, "grad_norm": 0.6484375, "learning_rate": 0.0017654746775598513, "loss": 0.2537, "step": 23338 }, { "epoch": 0.04138374916554297, "grad_norm": 1.5234375, "learning_rate": 0.0017654346052648588, "loss": 0.2825, "step": 23340 }, { "epoch": 0.041387295330852794, "grad_norm": 0.87890625, "learning_rate": 0.0017653945300596422, "loss": 0.2425, "step": 23342 }, { "epoch": 0.04139084149616261, "grad_norm": 0.60546875, "learning_rate": 0.0017653544519443762, "loss": 0.201, "step": 23344 }, { "epoch": 0.04139438766147242, "grad_norm": 1.140625, "learning_rate": 0.0017653143709192363, "loss": 0.2244, "step": 23346 }, { "epoch": 0.04139793382678224, "grad_norm": 0.37890625, "learning_rate": 0.0017652742869843977, "loss": 0.2067, "step": 23348 }, { "epoch": 0.04140147999209205, "grad_norm": 0.349609375, "learning_rate": 0.0017652342001400361, "loss": 0.2372, "step": 23350 }, { "epoch": 0.04140502615740187, "grad_norm": 0.37890625, "learning_rate": 0.0017651941103863265, "loss": 0.4055, "step": 23352 }, { "epoch": 0.04140857232271168, "grad_norm": 0.8203125, "learning_rate": 0.001765154017723444, "loss": 0.2737, "step": 23354 }, { "epoch": 0.041412118488021496, "grad_norm": 0.322265625, "learning_rate": 0.0017651139221515643, "loss": 0.1615, "step": 23356 }, { "epoch": 0.04141566465333131, "grad_norm": 0.248046875, "learning_rate": 0.0017650738236708625, "loss": 0.1665, "step": 23358 }, { "epoch": 0.041419210818641125, "grad_norm": 0.294921875, "learning_rate": 0.0017650337222815144, "loss": 0.217, "step": 23360 }, { "epoch": 0.04142275698395094, "grad_norm": 0.88671875, "learning_rate": 0.0017649936179836949, "loss": 0.176, "step": 23362 }, { "epoch": 0.041426303149260754, "grad_norm": 0.3046875, "learning_rate": 0.0017649535107775798, "loss": 0.2306, "step": 23364 }, { "epoch": 0.041429849314570576, "grad_norm": 0.66796875, "learning_rate": 0.0017649134006633442, "loss": 0.2281, "step": 23366 }, { "epoch": 0.04143339547988039, "grad_norm": 0.259765625, "learning_rate": 0.0017648732876411634, "loss": 0.2211, "step": 23368 }, { "epoch": 0.041436941645190205, "grad_norm": 1.3125, "learning_rate": 0.0017648331717112133, "loss": 0.1993, "step": 23370 }, { "epoch": 0.04144048781050002, "grad_norm": 0.53515625, "learning_rate": 0.001764793052873669, "loss": 0.1779, "step": 23372 }, { "epoch": 0.041444033975809834, "grad_norm": 0.63671875, "learning_rate": 0.0017647529311287063, "loss": 0.2384, "step": 23374 }, { "epoch": 0.04144758014111965, "grad_norm": 1.21875, "learning_rate": 0.0017647128064764998, "loss": 0.2, "step": 23376 }, { "epoch": 0.04145112630642946, "grad_norm": 0.427734375, "learning_rate": 0.0017646726789172262, "loss": 0.191, "step": 23378 }, { "epoch": 0.04145467247173928, "grad_norm": 0.30859375, "learning_rate": 0.00176463254845106, "loss": 0.3467, "step": 23380 }, { "epoch": 0.04145821863704909, "grad_norm": 0.333984375, "learning_rate": 0.001764592415078177, "loss": 0.4592, "step": 23382 }, { "epoch": 0.04146176480235891, "grad_norm": 0.337890625, "learning_rate": 0.0017645522787987534, "loss": 0.2334, "step": 23384 }, { "epoch": 0.04146531096766872, "grad_norm": 0.333984375, "learning_rate": 0.0017645121396129637, "loss": 0.1927, "step": 23386 }, { "epoch": 0.04146885713297854, "grad_norm": 0.439453125, "learning_rate": 0.0017644719975209839, "loss": 0.2189, "step": 23388 }, { "epoch": 0.04147240329828836, "grad_norm": 0.2265625, "learning_rate": 0.0017644318525229898, "loss": 0.1522, "step": 23390 }, { "epoch": 0.04147594946359817, "grad_norm": 0.65625, "learning_rate": 0.0017643917046191566, "loss": 0.2035, "step": 23392 }, { "epoch": 0.041479495628907986, "grad_norm": 1.8203125, "learning_rate": 0.00176435155380966, "loss": 0.2391, "step": 23394 }, { "epoch": 0.0414830417942178, "grad_norm": 0.50390625, "learning_rate": 0.0017643114000946759, "loss": 0.2065, "step": 23396 }, { "epoch": 0.041486587959527615, "grad_norm": 0.46875, "learning_rate": 0.001764271243474379, "loss": 0.1847, "step": 23398 }, { "epoch": 0.04149013412483743, "grad_norm": 0.625, "learning_rate": 0.0017642310839489459, "loss": 0.2559, "step": 23400 }, { "epoch": 0.041493680290147245, "grad_norm": 0.2890625, "learning_rate": 0.001764190921518552, "loss": 0.2399, "step": 23402 }, { "epoch": 0.04149722645545706, "grad_norm": 0.2060546875, "learning_rate": 0.0017641507561833726, "loss": 0.2248, "step": 23404 }, { "epoch": 0.041500772620766874, "grad_norm": 0.52734375, "learning_rate": 0.0017641105879435837, "loss": 0.1774, "step": 23406 }, { "epoch": 0.04150431878607669, "grad_norm": 0.30078125, "learning_rate": 0.0017640704167993606, "loss": 0.1816, "step": 23408 }, { "epoch": 0.04150786495138651, "grad_norm": 4.0, "learning_rate": 0.0017640302427508795, "loss": 0.1873, "step": 23410 }, { "epoch": 0.041511411116696324, "grad_norm": 0.7578125, "learning_rate": 0.0017639900657983154, "loss": 0.2587, "step": 23412 }, { "epoch": 0.04151495728200614, "grad_norm": 0.443359375, "learning_rate": 0.001763949885941845, "loss": 0.3925, "step": 23414 }, { "epoch": 0.04151850344731595, "grad_norm": 0.423828125, "learning_rate": 0.001763909703181643, "loss": 0.2428, "step": 23416 }, { "epoch": 0.04152204961262577, "grad_norm": 0.62109375, "learning_rate": 0.0017638695175178861, "loss": 0.134, "step": 23418 }, { "epoch": 0.04152559577793558, "grad_norm": 0.65625, "learning_rate": 0.0017638293289507492, "loss": 0.2133, "step": 23420 }, { "epoch": 0.0415291419432454, "grad_norm": 0.76953125, "learning_rate": 0.0017637891374804085, "loss": 0.1615, "step": 23422 }, { "epoch": 0.04153268810855521, "grad_norm": 0.30859375, "learning_rate": 0.0017637489431070396, "loss": 0.2235, "step": 23424 }, { "epoch": 0.041536234273865026, "grad_norm": 0.263671875, "learning_rate": 0.0017637087458308187, "loss": 0.1985, "step": 23426 }, { "epoch": 0.04153978043917484, "grad_norm": 0.5234375, "learning_rate": 0.0017636685456519211, "loss": 0.1823, "step": 23428 }, { "epoch": 0.041543326604484655, "grad_norm": 0.291015625, "learning_rate": 0.0017636283425705227, "loss": 0.1806, "step": 23430 }, { "epoch": 0.04154687276979447, "grad_norm": 0.30078125, "learning_rate": 0.0017635881365867993, "loss": 0.1623, "step": 23432 }, { "epoch": 0.04155041893510429, "grad_norm": 0.345703125, "learning_rate": 0.001763547927700927, "loss": 0.1854, "step": 23434 }, { "epoch": 0.041553965100414106, "grad_norm": 1.96875, "learning_rate": 0.0017635077159130815, "loss": 0.4208, "step": 23436 }, { "epoch": 0.04155751126572392, "grad_norm": 0.416015625, "learning_rate": 0.0017634675012234387, "loss": 0.2213, "step": 23438 }, { "epoch": 0.041561057431033735, "grad_norm": 0.390625, "learning_rate": 0.0017634272836321743, "loss": 0.2013, "step": 23440 }, { "epoch": 0.04156460359634355, "grad_norm": 1.015625, "learning_rate": 0.0017633870631394645, "loss": 0.2571, "step": 23442 }, { "epoch": 0.041568149761653364, "grad_norm": 0.28125, "learning_rate": 0.001763346839745485, "loss": 0.3577, "step": 23444 }, { "epoch": 0.04157169592696318, "grad_norm": 0.66796875, "learning_rate": 0.0017633066134504119, "loss": 0.2191, "step": 23446 }, { "epoch": 0.04157524209227299, "grad_norm": 0.6484375, "learning_rate": 0.0017632663842544205, "loss": 0.188, "step": 23448 }, { "epoch": 0.04157878825758281, "grad_norm": 1.1484375, "learning_rate": 0.0017632261521576876, "loss": 0.2824, "step": 23450 }, { "epoch": 0.04158233442289262, "grad_norm": 1.96875, "learning_rate": 0.0017631859171603887, "loss": 0.19, "step": 23452 }, { "epoch": 0.04158588058820244, "grad_norm": 0.5859375, "learning_rate": 0.0017631456792626997, "loss": 0.1951, "step": 23454 }, { "epoch": 0.04158942675351226, "grad_norm": 0.494140625, "learning_rate": 0.0017631054384647964, "loss": 0.2401, "step": 23456 }, { "epoch": 0.04159297291882207, "grad_norm": 0.412109375, "learning_rate": 0.0017630651947668557, "loss": 0.2227, "step": 23458 }, { "epoch": 0.04159651908413189, "grad_norm": 0.8828125, "learning_rate": 0.0017630249481690526, "loss": 0.1963, "step": 23460 }, { "epoch": 0.0416000652494417, "grad_norm": 0.51953125, "learning_rate": 0.0017629846986715637, "loss": 0.2463, "step": 23462 }, { "epoch": 0.041603611414751517, "grad_norm": 0.341796875, "learning_rate": 0.0017629444462745648, "loss": 0.2246, "step": 23464 }, { "epoch": 0.04160715758006133, "grad_norm": 0.5703125, "learning_rate": 0.001762904190978232, "loss": 0.1829, "step": 23466 }, { "epoch": 0.041610703745371146, "grad_norm": 0.40234375, "learning_rate": 0.0017628639327827412, "loss": 0.2002, "step": 23468 }, { "epoch": 0.04161424991068096, "grad_norm": 0.2451171875, "learning_rate": 0.0017628236716882685, "loss": 0.1941, "step": 23470 }, { "epoch": 0.041617796075990775, "grad_norm": 0.84375, "learning_rate": 0.0017627834076949902, "loss": 0.1834, "step": 23472 }, { "epoch": 0.04162134224130059, "grad_norm": 1.3046875, "learning_rate": 0.0017627431408030825, "loss": 0.2683, "step": 23474 }, { "epoch": 0.041624888406610404, "grad_norm": 0.3671875, "learning_rate": 0.0017627028710127208, "loss": 0.2192, "step": 23476 }, { "epoch": 0.041628434571920225, "grad_norm": 0.349609375, "learning_rate": 0.0017626625983240822, "loss": 0.1581, "step": 23478 }, { "epoch": 0.04163198073723004, "grad_norm": 0.59375, "learning_rate": 0.001762622322737342, "loss": 0.1884, "step": 23480 }, { "epoch": 0.041635526902539854, "grad_norm": 0.291015625, "learning_rate": 0.0017625820442526768, "loss": 0.1589, "step": 23482 }, { "epoch": 0.04163907306784967, "grad_norm": 1.0, "learning_rate": 0.0017625417628702625, "loss": 0.2005, "step": 23484 }, { "epoch": 0.041642619233159484, "grad_norm": 0.33203125, "learning_rate": 0.0017625014785902756, "loss": 0.2145, "step": 23486 }, { "epoch": 0.0416461653984693, "grad_norm": 0.41796875, "learning_rate": 0.001762461191412892, "loss": 0.2566, "step": 23488 }, { "epoch": 0.04164971156377911, "grad_norm": 0.58203125, "learning_rate": 0.0017624209013382874, "loss": 0.2069, "step": 23490 }, { "epoch": 0.04165325772908893, "grad_norm": 0.50390625, "learning_rate": 0.0017623806083666392, "loss": 0.2237, "step": 23492 }, { "epoch": 0.04165680389439874, "grad_norm": 0.6640625, "learning_rate": 0.0017623403124981225, "loss": 0.2271, "step": 23494 }, { "epoch": 0.041660350059708556, "grad_norm": 0.50390625, "learning_rate": 0.0017623000137329144, "loss": 0.2027, "step": 23496 }, { "epoch": 0.04166389622501837, "grad_norm": 0.271484375, "learning_rate": 0.0017622597120711906, "loss": 0.264, "step": 23498 }, { "epoch": 0.041667442390328185, "grad_norm": 0.52734375, "learning_rate": 0.0017622194075131274, "loss": 0.2279, "step": 23500 }, { "epoch": 0.04167098855563801, "grad_norm": 0.330078125, "learning_rate": 0.0017621791000589015, "loss": 0.2074, "step": 23502 }, { "epoch": 0.04167453472094782, "grad_norm": 0.443359375, "learning_rate": 0.0017621387897086885, "loss": 0.2275, "step": 23504 }, { "epoch": 0.041678080886257636, "grad_norm": 0.2470703125, "learning_rate": 0.0017620984764626652, "loss": 0.2362, "step": 23506 }, { "epoch": 0.04168162705156745, "grad_norm": 0.40625, "learning_rate": 0.0017620581603210077, "loss": 0.1797, "step": 23508 }, { "epoch": 0.041685173216877265, "grad_norm": 1.8046875, "learning_rate": 0.0017620178412838926, "loss": 0.3199, "step": 23510 }, { "epoch": 0.04168871938218708, "grad_norm": 1.703125, "learning_rate": 0.0017619775193514957, "loss": 0.3936, "step": 23512 }, { "epoch": 0.041692265547496894, "grad_norm": 0.8515625, "learning_rate": 0.0017619371945239942, "loss": 0.2107, "step": 23514 }, { "epoch": 0.04169581171280671, "grad_norm": 0.5703125, "learning_rate": 0.0017618968668015632, "loss": 0.2593, "step": 23516 }, { "epoch": 0.04169935787811652, "grad_norm": 2.09375, "learning_rate": 0.0017618565361843801, "loss": 0.193, "step": 23518 }, { "epoch": 0.04170290404342634, "grad_norm": 0.255859375, "learning_rate": 0.001761816202672621, "loss": 0.2088, "step": 23520 }, { "epoch": 0.04170645020873615, "grad_norm": 2.109375, "learning_rate": 0.0017617758662664621, "loss": 0.304, "step": 23522 }, { "epoch": 0.041709996374045974, "grad_norm": 0.73046875, "learning_rate": 0.00176173552696608, "loss": 0.149, "step": 23524 }, { "epoch": 0.04171354253935579, "grad_norm": 0.318359375, "learning_rate": 0.0017616951847716513, "loss": 0.2706, "step": 23526 }, { "epoch": 0.0417170887046656, "grad_norm": 0.5078125, "learning_rate": 0.0017616548396833522, "loss": 0.1907, "step": 23528 }, { "epoch": 0.04172063486997542, "grad_norm": 2.03125, "learning_rate": 0.001761614491701359, "loss": 0.2211, "step": 23530 }, { "epoch": 0.04172418103528523, "grad_norm": 0.51953125, "learning_rate": 0.0017615741408258483, "loss": 0.1828, "step": 23532 }, { "epoch": 0.04172772720059505, "grad_norm": 0.6953125, "learning_rate": 0.0017615337870569966, "loss": 0.2412, "step": 23534 }, { "epoch": 0.04173127336590486, "grad_norm": 0.60546875, "learning_rate": 0.0017614934303949808, "loss": 0.2206, "step": 23536 }, { "epoch": 0.041734819531214676, "grad_norm": 0.40234375, "learning_rate": 0.0017614530708399763, "loss": 0.2399, "step": 23538 }, { "epoch": 0.04173836569652449, "grad_norm": 0.36328125, "learning_rate": 0.0017614127083921606, "loss": 0.1973, "step": 23540 }, { "epoch": 0.041741911861834305, "grad_norm": 0.46484375, "learning_rate": 0.00176137234305171, "loss": 0.178, "step": 23542 }, { "epoch": 0.04174545802714412, "grad_norm": 0.75390625, "learning_rate": 0.0017613319748188009, "loss": 0.1737, "step": 23544 }, { "epoch": 0.04174900419245394, "grad_norm": 0.73046875, "learning_rate": 0.0017612916036936098, "loss": 0.2034, "step": 23546 }, { "epoch": 0.041752550357763756, "grad_norm": 0.251953125, "learning_rate": 0.0017612512296763135, "loss": 0.2177, "step": 23548 }, { "epoch": 0.04175609652307357, "grad_norm": 0.2890625, "learning_rate": 0.0017612108527670883, "loss": 0.2208, "step": 23550 }, { "epoch": 0.041759642688383385, "grad_norm": 0.625, "learning_rate": 0.0017611704729661108, "loss": 0.2261, "step": 23552 }, { "epoch": 0.0417631888536932, "grad_norm": 0.279296875, "learning_rate": 0.0017611300902735578, "loss": 0.1859, "step": 23554 }, { "epoch": 0.041766735019003014, "grad_norm": 0.57421875, "learning_rate": 0.001761089704689606, "loss": 0.2082, "step": 23556 }, { "epoch": 0.04177028118431283, "grad_norm": 0.5703125, "learning_rate": 0.0017610493162144316, "loss": 0.4987, "step": 23558 }, { "epoch": 0.04177382734962264, "grad_norm": 0.45703125, "learning_rate": 0.0017610089248482116, "loss": 0.216, "step": 23560 }, { "epoch": 0.04177737351493246, "grad_norm": 0.54296875, "learning_rate": 0.0017609685305911226, "loss": 0.1714, "step": 23562 }, { "epoch": 0.04178091968024227, "grad_norm": 0.48828125, "learning_rate": 0.001760928133443341, "loss": 0.1958, "step": 23564 }, { "epoch": 0.041784465845552086, "grad_norm": 0.443359375, "learning_rate": 0.0017608877334050438, "loss": 0.3131, "step": 23566 }, { "epoch": 0.0417880120108619, "grad_norm": 0.283203125, "learning_rate": 0.0017608473304764074, "loss": 0.2163, "step": 23568 }, { "epoch": 0.04179155817617172, "grad_norm": 0.44140625, "learning_rate": 0.0017608069246576087, "loss": 0.2302, "step": 23570 }, { "epoch": 0.04179510434148154, "grad_norm": 1.953125, "learning_rate": 0.0017607665159488244, "loss": 0.2463, "step": 23572 }, { "epoch": 0.04179865050679135, "grad_norm": 0.703125, "learning_rate": 0.001760726104350231, "loss": 0.2713, "step": 23574 }, { "epoch": 0.041802196672101166, "grad_norm": 0.53125, "learning_rate": 0.0017606856898620055, "loss": 0.1806, "step": 23576 }, { "epoch": 0.04180574283741098, "grad_norm": 1.21875, "learning_rate": 0.0017606452724843247, "loss": 0.1866, "step": 23578 }, { "epoch": 0.041809289002720795, "grad_norm": 1.2890625, "learning_rate": 0.0017606048522173653, "loss": 0.2577, "step": 23580 }, { "epoch": 0.04181283516803061, "grad_norm": 0.625, "learning_rate": 0.001760564429061304, "loss": 0.2348, "step": 23582 }, { "epoch": 0.041816381333340424, "grad_norm": 0.31640625, "learning_rate": 0.0017605240030163173, "loss": 0.1864, "step": 23584 }, { "epoch": 0.04181992749865024, "grad_norm": 0.2734375, "learning_rate": 0.0017604835740825824, "loss": 0.3181, "step": 23586 }, { "epoch": 0.041823473663960054, "grad_norm": 0.341796875, "learning_rate": 0.001760443142260276, "loss": 0.2285, "step": 23588 }, { "epoch": 0.04182701982926987, "grad_norm": 12.25, "learning_rate": 0.0017604027075495748, "loss": 0.2903, "step": 23590 }, { "epoch": 0.04183056599457969, "grad_norm": 0.21875, "learning_rate": 0.001760362269950656, "loss": 0.1899, "step": 23592 }, { "epoch": 0.041834112159889504, "grad_norm": 0.53125, "learning_rate": 0.0017603218294636963, "loss": 0.2321, "step": 23594 }, { "epoch": 0.04183765832519932, "grad_norm": 0.5390625, "learning_rate": 0.0017602813860888723, "loss": 0.1917, "step": 23596 }, { "epoch": 0.04184120449050913, "grad_norm": 0.5078125, "learning_rate": 0.001760240939826361, "loss": 0.249, "step": 23598 }, { "epoch": 0.04184475065581895, "grad_norm": 1.8515625, "learning_rate": 0.0017602004906763394, "loss": 0.2614, "step": 23600 }, { "epoch": 0.04184829682112876, "grad_norm": 0.341796875, "learning_rate": 0.0017601600386389844, "loss": 0.2456, "step": 23602 }, { "epoch": 0.04185184298643858, "grad_norm": 0.2275390625, "learning_rate": 0.0017601195837144727, "loss": 0.2004, "step": 23604 }, { "epoch": 0.04185538915174839, "grad_norm": 2.609375, "learning_rate": 0.0017600791259029813, "loss": 0.3607, "step": 23606 }, { "epoch": 0.041858935317058206, "grad_norm": 0.37109375, "learning_rate": 0.0017600386652046876, "loss": 0.2111, "step": 23608 }, { "epoch": 0.04186248148236802, "grad_norm": 0.47265625, "learning_rate": 0.0017599982016197682, "loss": 0.1758, "step": 23610 }, { "epoch": 0.041866027647677835, "grad_norm": 0.32421875, "learning_rate": 0.0017599577351483995, "loss": 0.183, "step": 23612 }, { "epoch": 0.04186957381298766, "grad_norm": 0.296875, "learning_rate": 0.0017599172657907596, "loss": 0.2225, "step": 23614 }, { "epoch": 0.04187311997829747, "grad_norm": 0.7734375, "learning_rate": 0.0017598767935470246, "loss": 0.1616, "step": 23616 }, { "epoch": 0.041876666143607286, "grad_norm": 0.416015625, "learning_rate": 0.0017598363184173718, "loss": 0.1702, "step": 23618 }, { "epoch": 0.0418802123089171, "grad_norm": 0.328125, "learning_rate": 0.0017597958404019782, "loss": 0.1603, "step": 23620 }, { "epoch": 0.041883758474226915, "grad_norm": 0.296875, "learning_rate": 0.001759755359501021, "loss": 0.3313, "step": 23622 }, { "epoch": 0.04188730463953673, "grad_norm": 1.7421875, "learning_rate": 0.001759714875714677, "loss": 0.1736, "step": 23624 }, { "epoch": 0.041890850804846544, "grad_norm": 0.298828125, "learning_rate": 0.0017596743890431236, "loss": 0.3156, "step": 23626 }, { "epoch": 0.04189439697015636, "grad_norm": 0.67578125, "learning_rate": 0.0017596338994865374, "loss": 0.1944, "step": 23628 }, { "epoch": 0.04189794313546617, "grad_norm": 1.078125, "learning_rate": 0.0017595934070450954, "loss": 0.1899, "step": 23630 }, { "epoch": 0.04190148930077599, "grad_norm": 2.96875, "learning_rate": 0.0017595529117189754, "loss": 0.2606, "step": 23632 }, { "epoch": 0.0419050354660858, "grad_norm": 0.482421875, "learning_rate": 0.0017595124135083538, "loss": 0.1687, "step": 23634 }, { "epoch": 0.04190858163139562, "grad_norm": 0.38671875, "learning_rate": 0.001759471912413408, "loss": 0.1878, "step": 23636 }, { "epoch": 0.04191212779670544, "grad_norm": 0.298828125, "learning_rate": 0.0017594314084343153, "loss": 0.2149, "step": 23638 }, { "epoch": 0.04191567396201525, "grad_norm": 2.328125, "learning_rate": 0.0017593909015712525, "loss": 0.3939, "step": 23640 }, { "epoch": 0.04191922012732507, "grad_norm": 0.5703125, "learning_rate": 0.0017593503918243972, "loss": 0.1832, "step": 23642 }, { "epoch": 0.04192276629263488, "grad_norm": 0.578125, "learning_rate": 0.001759309879193926, "loss": 0.2584, "step": 23644 }, { "epoch": 0.041926312457944696, "grad_norm": 0.421875, "learning_rate": 0.0017592693636800164, "loss": 0.1827, "step": 23646 }, { "epoch": 0.04192985862325451, "grad_norm": 0.345703125, "learning_rate": 0.0017592288452828455, "loss": 0.3018, "step": 23648 }, { "epoch": 0.041933404788564325, "grad_norm": 0.890625, "learning_rate": 0.0017591883240025907, "loss": 0.2611, "step": 23650 }, { "epoch": 0.04193695095387414, "grad_norm": 0.61328125, "learning_rate": 0.0017591477998394292, "loss": 0.2099, "step": 23652 }, { "epoch": 0.041940497119183955, "grad_norm": 0.359375, "learning_rate": 0.0017591072727935377, "loss": 0.194, "step": 23654 }, { "epoch": 0.04194404328449377, "grad_norm": 0.400390625, "learning_rate": 0.0017590667428650942, "loss": 0.2605, "step": 23656 }, { "epoch": 0.041947589449803584, "grad_norm": 0.78515625, "learning_rate": 0.0017590262100542753, "loss": 0.264, "step": 23658 }, { "epoch": 0.041951135615113405, "grad_norm": 2.234375, "learning_rate": 0.001758985674361259, "loss": 0.5202, "step": 23660 }, { "epoch": 0.04195468178042322, "grad_norm": 0.87890625, "learning_rate": 0.001758945135786222, "loss": 0.2748, "step": 23662 }, { "epoch": 0.041958227945733034, "grad_norm": 0.9609375, "learning_rate": 0.0017589045943293413, "loss": 0.2368, "step": 23664 }, { "epoch": 0.04196177411104285, "grad_norm": 0.69921875, "learning_rate": 0.0017588640499907949, "loss": 0.1768, "step": 23666 }, { "epoch": 0.04196532027635266, "grad_norm": 0.42578125, "learning_rate": 0.00175882350277076, "loss": 0.2313, "step": 23668 }, { "epoch": 0.04196886644166248, "grad_norm": 0.2734375, "learning_rate": 0.0017587829526694138, "loss": 0.1453, "step": 23670 }, { "epoch": 0.04197241260697229, "grad_norm": 1.1796875, "learning_rate": 0.0017587423996869335, "loss": 0.3998, "step": 23672 }, { "epoch": 0.04197595877228211, "grad_norm": 0.224609375, "learning_rate": 0.0017587018438234966, "loss": 0.1915, "step": 23674 }, { "epoch": 0.04197950493759192, "grad_norm": 0.625, "learning_rate": 0.0017586612850792804, "loss": 0.2312, "step": 23676 }, { "epoch": 0.041983051102901736, "grad_norm": 1.28125, "learning_rate": 0.0017586207234544626, "loss": 0.2724, "step": 23678 }, { "epoch": 0.04198659726821155, "grad_norm": 0.251953125, "learning_rate": 0.0017585801589492201, "loss": 0.1967, "step": 23680 }, { "epoch": 0.04199014343352137, "grad_norm": 0.314453125, "learning_rate": 0.0017585395915637307, "loss": 0.2319, "step": 23682 }, { "epoch": 0.04199368959883119, "grad_norm": 0.291015625, "learning_rate": 0.0017584990212981715, "loss": 0.17, "step": 23684 }, { "epoch": 0.041997235764141, "grad_norm": 1.0625, "learning_rate": 0.0017584584481527199, "loss": 0.2614, "step": 23686 }, { "epoch": 0.042000781929450816, "grad_norm": 0.41015625, "learning_rate": 0.001758417872127554, "loss": 0.2044, "step": 23688 }, { "epoch": 0.04200432809476063, "grad_norm": 0.216796875, "learning_rate": 0.0017583772932228503, "loss": 0.2275, "step": 23690 }, { "epoch": 0.042007874260070445, "grad_norm": 2.6875, "learning_rate": 0.001758336711438787, "loss": 0.4451, "step": 23692 }, { "epoch": 0.04201142042538026, "grad_norm": 0.73828125, "learning_rate": 0.0017582961267755414, "loss": 0.3382, "step": 23694 }, { "epoch": 0.042014966590690074, "grad_norm": 0.45703125, "learning_rate": 0.0017582555392332906, "loss": 0.3099, "step": 23696 }, { "epoch": 0.04201851275599989, "grad_norm": 1.875, "learning_rate": 0.0017582149488122127, "loss": 0.2192, "step": 23698 }, { "epoch": 0.0420220589213097, "grad_norm": 0.8671875, "learning_rate": 0.001758174355512485, "loss": 0.2031, "step": 23700 }, { "epoch": 0.04202560508661952, "grad_norm": 0.609375, "learning_rate": 0.001758133759334285, "loss": 0.2054, "step": 23702 }, { "epoch": 0.04202915125192933, "grad_norm": 0.31640625, "learning_rate": 0.00175809316027779, "loss": 0.2175, "step": 23704 }, { "epoch": 0.042032697417239154, "grad_norm": 1.453125, "learning_rate": 0.0017580525583431779, "loss": 0.3159, "step": 23706 }, { "epoch": 0.04203624358254897, "grad_norm": 0.376953125, "learning_rate": 0.0017580119535306263, "loss": 0.1843, "step": 23708 }, { "epoch": 0.04203978974785878, "grad_norm": 0.388671875, "learning_rate": 0.0017579713458403124, "loss": 0.1867, "step": 23710 }, { "epoch": 0.0420433359131686, "grad_norm": 0.3203125, "learning_rate": 0.0017579307352724141, "loss": 0.2282, "step": 23712 }, { "epoch": 0.04204688207847841, "grad_norm": 0.310546875, "learning_rate": 0.0017578901218271087, "loss": 0.1697, "step": 23714 }, { "epoch": 0.042050428243788227, "grad_norm": 0.271484375, "learning_rate": 0.0017578495055045747, "loss": 0.2352, "step": 23716 }, { "epoch": 0.04205397440909804, "grad_norm": 0.3203125, "learning_rate": 0.0017578088863049886, "loss": 0.1852, "step": 23718 }, { "epoch": 0.042057520574407856, "grad_norm": 0.59765625, "learning_rate": 0.0017577682642285284, "loss": 0.2491, "step": 23720 }, { "epoch": 0.04206106673971767, "grad_norm": 0.296875, "learning_rate": 0.0017577276392753722, "loss": 0.219, "step": 23722 }, { "epoch": 0.042064612905027485, "grad_norm": 0.455078125, "learning_rate": 0.0017576870114456972, "loss": 0.1778, "step": 23724 }, { "epoch": 0.0420681590703373, "grad_norm": 1.5703125, "learning_rate": 0.0017576463807396814, "loss": 0.3497, "step": 23726 }, { "epoch": 0.04207170523564712, "grad_norm": 0.734375, "learning_rate": 0.0017576057471575023, "loss": 0.3019, "step": 23728 }, { "epoch": 0.042075251400956935, "grad_norm": 0.40625, "learning_rate": 0.0017575651106993373, "loss": 0.2189, "step": 23730 }, { "epoch": 0.04207879756626675, "grad_norm": 1.3125, "learning_rate": 0.001757524471365365, "loss": 0.2363, "step": 23732 }, { "epoch": 0.042082343731576564, "grad_norm": 0.359375, "learning_rate": 0.0017574838291557625, "loss": 0.2031, "step": 23734 }, { "epoch": 0.04208588989688638, "grad_norm": 1.21875, "learning_rate": 0.0017574431840707075, "loss": 0.1993, "step": 23736 }, { "epoch": 0.042089436062196194, "grad_norm": 0.3671875, "learning_rate": 0.0017574025361103778, "loss": 0.2137, "step": 23738 }, { "epoch": 0.04209298222750601, "grad_norm": 0.271484375, "learning_rate": 0.0017573618852749513, "loss": 0.2322, "step": 23740 }, { "epoch": 0.04209652839281582, "grad_norm": 0.5703125, "learning_rate": 0.001757321231564606, "loss": 0.1567, "step": 23742 }, { "epoch": 0.04210007455812564, "grad_norm": 0.74609375, "learning_rate": 0.0017572805749795192, "loss": 0.2025, "step": 23744 }, { "epoch": 0.04210362072343545, "grad_norm": 0.515625, "learning_rate": 0.001757239915519869, "loss": 0.1775, "step": 23746 }, { "epoch": 0.042107166888745266, "grad_norm": 0.625, "learning_rate": 0.0017571992531858333, "loss": 0.2102, "step": 23748 }, { "epoch": 0.04211071305405509, "grad_norm": 0.7578125, "learning_rate": 0.0017571585879775898, "loss": 0.2448, "step": 23750 }, { "epoch": 0.0421142592193649, "grad_norm": 0.62109375, "learning_rate": 0.001757117919895316, "loss": 0.1823, "step": 23752 }, { "epoch": 0.04211780538467472, "grad_norm": 0.2890625, "learning_rate": 0.0017570772489391906, "loss": 0.1865, "step": 23754 }, { "epoch": 0.04212135154998453, "grad_norm": 0.216796875, "learning_rate": 0.001757036575109391, "loss": 0.1847, "step": 23756 }, { "epoch": 0.042124897715294346, "grad_norm": 0.7734375, "learning_rate": 0.0017569958984060947, "loss": 0.1851, "step": 23758 }, { "epoch": 0.04212844388060416, "grad_norm": 0.416015625, "learning_rate": 0.0017569552188294799, "loss": 0.2543, "step": 23760 }, { "epoch": 0.042131990045913975, "grad_norm": 0.87890625, "learning_rate": 0.001756914536379725, "loss": 0.2409, "step": 23762 }, { "epoch": 0.04213553621122379, "grad_norm": 1.046875, "learning_rate": 0.0017568738510570074, "loss": 0.1804, "step": 23764 }, { "epoch": 0.042139082376533604, "grad_norm": 2.578125, "learning_rate": 0.001756833162861505, "loss": 0.3695, "step": 23766 }, { "epoch": 0.04214262854184342, "grad_norm": 0.94140625, "learning_rate": 0.0017567924717933959, "loss": 0.2674, "step": 23768 }, { "epoch": 0.04214617470715323, "grad_norm": 0.3125, "learning_rate": 0.001756751777852858, "loss": 0.24, "step": 23770 }, { "epoch": 0.04214972087246305, "grad_norm": 0.3359375, "learning_rate": 0.0017567110810400694, "loss": 0.2174, "step": 23772 }, { "epoch": 0.04215326703777287, "grad_norm": 0.5390625, "learning_rate": 0.0017566703813552082, "loss": 0.2044, "step": 23774 }, { "epoch": 0.042156813203082684, "grad_norm": 0.181640625, "learning_rate": 0.001756629678798452, "loss": 0.1134, "step": 23776 }, { "epoch": 0.0421603593683925, "grad_norm": 0.82421875, "learning_rate": 0.0017565889733699787, "loss": 0.259, "step": 23778 }, { "epoch": 0.04216390553370231, "grad_norm": 0.48828125, "learning_rate": 0.0017565482650699672, "loss": 0.2334, "step": 23780 }, { "epoch": 0.04216745169901213, "grad_norm": 0.65234375, "learning_rate": 0.0017565075538985946, "loss": 0.2257, "step": 23782 }, { "epoch": 0.04217099786432194, "grad_norm": 0.5, "learning_rate": 0.0017564668398560392, "loss": 0.1982, "step": 23784 }, { "epoch": 0.04217454402963176, "grad_norm": 0.90234375, "learning_rate": 0.001756426122942479, "loss": 0.1686, "step": 23786 }, { "epoch": 0.04217809019494157, "grad_norm": 0.341796875, "learning_rate": 0.001756385403158093, "loss": 0.1941, "step": 23788 }, { "epoch": 0.042181636360251386, "grad_norm": 0.416015625, "learning_rate": 0.0017563446805030582, "loss": 0.229, "step": 23790 }, { "epoch": 0.0421851825255612, "grad_norm": 0.578125, "learning_rate": 0.0017563039549775526, "loss": 0.1884, "step": 23792 }, { "epoch": 0.042188728690871015, "grad_norm": 0.2216796875, "learning_rate": 0.001756263226581755, "loss": 0.1616, "step": 23794 }, { "epoch": 0.042192274856180836, "grad_norm": 0.416015625, "learning_rate": 0.0017562224953158432, "loss": 0.1863, "step": 23796 }, { "epoch": 0.04219582102149065, "grad_norm": 0.65625, "learning_rate": 0.0017561817611799957, "loss": 0.2226, "step": 23798 }, { "epoch": 0.042199367186800466, "grad_norm": 0.453125, "learning_rate": 0.00175614102417439, "loss": 0.2248, "step": 23800 }, { "epoch": 0.04220291335211028, "grad_norm": 0.416015625, "learning_rate": 0.0017561002842992046, "loss": 0.1973, "step": 23802 }, { "epoch": 0.042206459517420095, "grad_norm": 0.349609375, "learning_rate": 0.001756059541554618, "loss": 0.2179, "step": 23804 }, { "epoch": 0.04221000568272991, "grad_norm": 0.283203125, "learning_rate": 0.0017560187959408077, "loss": 0.2762, "step": 23806 }, { "epoch": 0.042213551848039724, "grad_norm": 0.435546875, "learning_rate": 0.0017559780474579523, "loss": 0.169, "step": 23808 }, { "epoch": 0.04221709801334954, "grad_norm": 1.0390625, "learning_rate": 0.00175593729610623, "loss": 0.206, "step": 23810 }, { "epoch": 0.04222064417865935, "grad_norm": 0.412109375, "learning_rate": 0.001755896541885819, "loss": 0.2448, "step": 23812 }, { "epoch": 0.04222419034396917, "grad_norm": 0.640625, "learning_rate": 0.0017558557847968973, "loss": 0.2077, "step": 23814 }, { "epoch": 0.04222773650927898, "grad_norm": 0.9609375, "learning_rate": 0.0017558150248396436, "loss": 0.2575, "step": 23816 }, { "epoch": 0.0422312826745888, "grad_norm": 0.1787109375, "learning_rate": 0.001755774262014236, "loss": 0.1484, "step": 23818 }, { "epoch": 0.04223482883989862, "grad_norm": 0.408203125, "learning_rate": 0.0017557334963208525, "loss": 0.2298, "step": 23820 }, { "epoch": 0.04223837500520843, "grad_norm": 0.578125, "learning_rate": 0.0017556927277596714, "loss": 0.2882, "step": 23822 }, { "epoch": 0.04224192117051825, "grad_norm": 2.890625, "learning_rate": 0.0017556519563308717, "loss": 0.292, "step": 23824 }, { "epoch": 0.04224546733582806, "grad_norm": 0.259765625, "learning_rate": 0.001755611182034631, "loss": 0.1942, "step": 23826 }, { "epoch": 0.042249013501137876, "grad_norm": 0.7421875, "learning_rate": 0.0017555704048711276, "loss": 0.2217, "step": 23828 }, { "epoch": 0.04225255966644769, "grad_norm": 0.224609375, "learning_rate": 0.0017555296248405405, "loss": 0.1618, "step": 23830 }, { "epoch": 0.042256105831757505, "grad_norm": 0.33984375, "learning_rate": 0.0017554888419430471, "loss": 0.2221, "step": 23832 }, { "epoch": 0.04225965199706732, "grad_norm": 0.50390625, "learning_rate": 0.0017554480561788265, "loss": 0.1594, "step": 23834 }, { "epoch": 0.042263198162377134, "grad_norm": 0.515625, "learning_rate": 0.0017554072675480565, "loss": 0.2104, "step": 23836 }, { "epoch": 0.04226674432768695, "grad_norm": 0.412109375, "learning_rate": 0.0017553664760509165, "loss": 0.2439, "step": 23838 }, { "epoch": 0.042270290492996763, "grad_norm": 0.70703125, "learning_rate": 0.0017553256816875838, "loss": 0.1477, "step": 23840 }, { "epoch": 0.042273836658306585, "grad_norm": 0.7421875, "learning_rate": 0.0017552848844582369, "loss": 0.2293, "step": 23842 }, { "epoch": 0.0422773828236164, "grad_norm": 0.455078125, "learning_rate": 0.001755244084363055, "loss": 0.2671, "step": 23844 }, { "epoch": 0.042280928988926214, "grad_norm": 1.0, "learning_rate": 0.001755203281402216, "loss": 0.1841, "step": 23846 }, { "epoch": 0.04228447515423603, "grad_norm": 0.89453125, "learning_rate": 0.0017551624755758985, "loss": 0.2281, "step": 23848 }, { "epoch": 0.04228802131954584, "grad_norm": 0.2490234375, "learning_rate": 0.0017551216668842805, "loss": 0.1636, "step": 23850 }, { "epoch": 0.04229156748485566, "grad_norm": 0.421875, "learning_rate": 0.0017550808553275414, "loss": 0.3511, "step": 23852 }, { "epoch": 0.04229511365016547, "grad_norm": 0.63671875, "learning_rate": 0.001755040040905859, "loss": 0.1945, "step": 23854 }, { "epoch": 0.04229865981547529, "grad_norm": 2.71875, "learning_rate": 0.001754999223619412, "loss": 0.3168, "step": 23856 }, { "epoch": 0.0423022059807851, "grad_norm": 1.03125, "learning_rate": 0.0017549584034683788, "loss": 0.2384, "step": 23858 }, { "epoch": 0.042305752146094916, "grad_norm": 1.984375, "learning_rate": 0.0017549175804529378, "loss": 0.2456, "step": 23860 }, { "epoch": 0.04230929831140473, "grad_norm": 0.6015625, "learning_rate": 0.001754876754573268, "loss": 0.1457, "step": 23862 }, { "epoch": 0.04231284447671455, "grad_norm": 0.953125, "learning_rate": 0.0017548359258295475, "loss": 0.308, "step": 23864 }, { "epoch": 0.04231639064202437, "grad_norm": 0.35546875, "learning_rate": 0.001754795094221955, "loss": 0.2502, "step": 23866 }, { "epoch": 0.04231993680733418, "grad_norm": 0.51171875, "learning_rate": 0.0017547542597506692, "loss": 0.1906, "step": 23868 }, { "epoch": 0.042323482972643996, "grad_norm": 0.58984375, "learning_rate": 0.0017547134224158685, "loss": 0.2356, "step": 23870 }, { "epoch": 0.04232702913795381, "grad_norm": 1.1171875, "learning_rate": 0.0017546725822177318, "loss": 0.2045, "step": 23872 }, { "epoch": 0.042330575303263625, "grad_norm": 0.6796875, "learning_rate": 0.0017546317391564372, "loss": 0.1401, "step": 23874 }, { "epoch": 0.04233412146857344, "grad_norm": 0.498046875, "learning_rate": 0.0017545908932321641, "loss": 0.2598, "step": 23876 }, { "epoch": 0.042337667633883254, "grad_norm": 0.59375, "learning_rate": 0.0017545500444450903, "loss": 0.2051, "step": 23878 }, { "epoch": 0.04234121379919307, "grad_norm": 0.2890625, "learning_rate": 0.001754509192795395, "loss": 0.184, "step": 23880 }, { "epoch": 0.04234475996450288, "grad_norm": 1.1484375, "learning_rate": 0.0017544683382832566, "loss": 0.3238, "step": 23882 }, { "epoch": 0.0423483061298127, "grad_norm": 0.357421875, "learning_rate": 0.0017544274809088533, "loss": 0.3192, "step": 23884 }, { "epoch": 0.04235185229512252, "grad_norm": 2.109375, "learning_rate": 0.001754386620672365, "loss": 0.4365, "step": 23886 }, { "epoch": 0.042355398460432334, "grad_norm": 0.69921875, "learning_rate": 0.00175434575757397, "loss": 0.2085, "step": 23888 }, { "epoch": 0.04235894462574215, "grad_norm": 0.451171875, "learning_rate": 0.001754304891613846, "loss": 0.244, "step": 23890 }, { "epoch": 0.04236249079105196, "grad_norm": 0.1650390625, "learning_rate": 0.0017542640227921726, "loss": 0.3557, "step": 23892 }, { "epoch": 0.04236603695636178, "grad_norm": 0.69921875, "learning_rate": 0.0017542231511091287, "loss": 0.2205, "step": 23894 }, { "epoch": 0.04236958312167159, "grad_norm": 1.2890625, "learning_rate": 0.0017541822765648927, "loss": 0.2919, "step": 23896 }, { "epoch": 0.042373129286981406, "grad_norm": 0.458984375, "learning_rate": 0.0017541413991596433, "loss": 0.2988, "step": 23898 }, { "epoch": 0.04237667545229122, "grad_norm": 0.89453125, "learning_rate": 0.0017541005188935594, "loss": 0.2382, "step": 23900 }, { "epoch": 0.042380221617601035, "grad_norm": 0.33203125, "learning_rate": 0.00175405963576682, "loss": 0.1706, "step": 23902 }, { "epoch": 0.04238376778291085, "grad_norm": 0.2451171875, "learning_rate": 0.0017540187497796034, "loss": 0.2317, "step": 23904 }, { "epoch": 0.042387313948220665, "grad_norm": 1.140625, "learning_rate": 0.0017539778609320892, "loss": 0.2345, "step": 23906 }, { "epoch": 0.04239086011353048, "grad_norm": 0.474609375, "learning_rate": 0.001753936969224455, "loss": 0.2592, "step": 23908 }, { "epoch": 0.0423944062788403, "grad_norm": 0.345703125, "learning_rate": 0.0017538960746568807, "loss": 0.1874, "step": 23910 }, { "epoch": 0.042397952444150115, "grad_norm": 0.359375, "learning_rate": 0.0017538551772295448, "loss": 0.2347, "step": 23912 }, { "epoch": 0.04240149860945993, "grad_norm": 0.263671875, "learning_rate": 0.001753814276942626, "loss": 0.1409, "step": 23914 }, { "epoch": 0.042405044774769744, "grad_norm": 0.5078125, "learning_rate": 0.0017537733737963035, "loss": 0.198, "step": 23916 }, { "epoch": 0.04240859094007956, "grad_norm": 0.5390625, "learning_rate": 0.001753732467790756, "loss": 0.1597, "step": 23918 }, { "epoch": 0.04241213710538937, "grad_norm": 0.2001953125, "learning_rate": 0.0017536915589261622, "loss": 0.1795, "step": 23920 }, { "epoch": 0.04241568327069919, "grad_norm": 0.19140625, "learning_rate": 0.0017536506472027014, "loss": 0.1707, "step": 23922 }, { "epoch": 0.042419229436009, "grad_norm": 1.53125, "learning_rate": 0.0017536097326205525, "loss": 0.1775, "step": 23924 }, { "epoch": 0.04242277560131882, "grad_norm": 0.271484375, "learning_rate": 0.0017535688151798943, "loss": 0.2343, "step": 23926 }, { "epoch": 0.04242632176662863, "grad_norm": 0.4140625, "learning_rate": 0.0017535278948809054, "loss": 0.2046, "step": 23928 }, { "epoch": 0.042429867931938446, "grad_norm": 0.546875, "learning_rate": 0.001753486971723765, "loss": 0.1644, "step": 23930 }, { "epoch": 0.04243341409724827, "grad_norm": 0.93359375, "learning_rate": 0.0017534460457086527, "loss": 0.1567, "step": 23932 }, { "epoch": 0.04243696026255808, "grad_norm": 0.404296875, "learning_rate": 0.001753405116835747, "loss": 0.2068, "step": 23934 }, { "epoch": 0.0424405064278679, "grad_norm": 0.1708984375, "learning_rate": 0.0017533641851052264, "loss": 0.2209, "step": 23936 }, { "epoch": 0.04244405259317771, "grad_norm": 0.392578125, "learning_rate": 0.001753323250517271, "loss": 0.1865, "step": 23938 }, { "epoch": 0.042447598758487526, "grad_norm": 3.03125, "learning_rate": 0.0017532823130720586, "loss": 0.2062, "step": 23940 }, { "epoch": 0.04245114492379734, "grad_norm": 0.2099609375, "learning_rate": 0.0017532413727697688, "loss": 0.207, "step": 23942 }, { "epoch": 0.042454691089107155, "grad_norm": 0.466796875, "learning_rate": 0.001753200429610581, "loss": 0.2045, "step": 23944 }, { "epoch": 0.04245823725441697, "grad_norm": 0.400390625, "learning_rate": 0.001753159483594674, "loss": 0.2228, "step": 23946 }, { "epoch": 0.042461783419726784, "grad_norm": 0.68359375, "learning_rate": 0.0017531185347222265, "loss": 0.3209, "step": 23948 }, { "epoch": 0.0424653295850366, "grad_norm": 0.64453125, "learning_rate": 0.0017530775829934183, "loss": 0.2182, "step": 23950 }, { "epoch": 0.04246887575034641, "grad_norm": 0.181640625, "learning_rate": 0.0017530366284084279, "loss": 0.1772, "step": 23952 }, { "epoch": 0.042472421915656235, "grad_norm": 0.1904296875, "learning_rate": 0.0017529956709674344, "loss": 0.1603, "step": 23954 }, { "epoch": 0.04247596808096605, "grad_norm": 0.69140625, "learning_rate": 0.0017529547106706171, "loss": 0.2544, "step": 23956 }, { "epoch": 0.042479514246275864, "grad_norm": 0.55078125, "learning_rate": 0.0017529137475181553, "loss": 0.1482, "step": 23958 }, { "epoch": 0.04248306041158568, "grad_norm": 0.4140625, "learning_rate": 0.0017528727815102284, "loss": 0.1899, "step": 23960 }, { "epoch": 0.04248660657689549, "grad_norm": 1.6875, "learning_rate": 0.0017528318126470148, "loss": 0.2124, "step": 23962 }, { "epoch": 0.04249015274220531, "grad_norm": 0.953125, "learning_rate": 0.0017527908409286942, "loss": 0.2147, "step": 23964 }, { "epoch": 0.04249369890751512, "grad_norm": 0.2021484375, "learning_rate": 0.0017527498663554458, "loss": 0.1988, "step": 23966 }, { "epoch": 0.042497245072824937, "grad_norm": 1.609375, "learning_rate": 0.0017527088889274483, "loss": 0.1801, "step": 23968 }, { "epoch": 0.04250079123813475, "grad_norm": 0.5078125, "learning_rate": 0.0017526679086448816, "loss": 0.13, "step": 23970 }, { "epoch": 0.042504337403444566, "grad_norm": 0.384765625, "learning_rate": 0.0017526269255079244, "loss": 0.198, "step": 23972 }, { "epoch": 0.04250788356875438, "grad_norm": 0.474609375, "learning_rate": 0.0017525859395167561, "loss": 0.1921, "step": 23974 }, { "epoch": 0.042511429734064195, "grad_norm": 0.7890625, "learning_rate": 0.0017525449506715558, "loss": 0.3088, "step": 23976 }, { "epoch": 0.042514975899374016, "grad_norm": 0.1806640625, "learning_rate": 0.001752503958972503, "loss": 0.1595, "step": 23978 }, { "epoch": 0.04251852206468383, "grad_norm": 1.1328125, "learning_rate": 0.0017524629644197773, "loss": 0.1966, "step": 23980 }, { "epoch": 0.042522068229993645, "grad_norm": 0.5703125, "learning_rate": 0.0017524219670135572, "loss": 0.2517, "step": 23982 }, { "epoch": 0.04252561439530346, "grad_norm": 0.2734375, "learning_rate": 0.0017523809667540225, "loss": 0.1734, "step": 23984 }, { "epoch": 0.042529160560613274, "grad_norm": 1.78125, "learning_rate": 0.0017523399636413526, "loss": 0.2952, "step": 23986 }, { "epoch": 0.04253270672592309, "grad_norm": 0.71875, "learning_rate": 0.0017522989576757263, "loss": 0.2196, "step": 23988 }, { "epoch": 0.042536252891232904, "grad_norm": 0.328125, "learning_rate": 0.0017522579488573233, "loss": 0.1897, "step": 23990 }, { "epoch": 0.04253979905654272, "grad_norm": 1.0, "learning_rate": 0.0017522169371863231, "loss": 0.2234, "step": 23992 }, { "epoch": 0.04254334522185253, "grad_norm": 1.03125, "learning_rate": 0.0017521759226629048, "loss": 0.2371, "step": 23994 }, { "epoch": 0.04254689138716235, "grad_norm": 0.462890625, "learning_rate": 0.001752134905287248, "loss": 0.2498, "step": 23996 }, { "epoch": 0.04255043755247216, "grad_norm": 0.451171875, "learning_rate": 0.0017520938850595317, "loss": 0.2481, "step": 23998 }, { "epoch": 0.04255398371778198, "grad_norm": 0.96875, "learning_rate": 0.0017520528619799354, "loss": 0.3016, "step": 24000 }, { "epoch": 0.0425575298830918, "grad_norm": 0.23046875, "learning_rate": 0.0017520118360486389, "loss": 0.2567, "step": 24002 }, { "epoch": 0.04256107604840161, "grad_norm": 2.96875, "learning_rate": 0.001751970807265821, "loss": 0.27, "step": 24004 }, { "epoch": 0.04256462221371143, "grad_norm": 0.44921875, "learning_rate": 0.0017519297756316621, "loss": 0.1887, "step": 24006 }, { "epoch": 0.04256816837902124, "grad_norm": 0.765625, "learning_rate": 0.0017518887411463408, "loss": 0.2201, "step": 24008 }, { "epoch": 0.042571714544331056, "grad_norm": 0.392578125, "learning_rate": 0.0017518477038100365, "loss": 0.1935, "step": 24010 }, { "epoch": 0.04257526070964087, "grad_norm": 0.416015625, "learning_rate": 0.001751806663622929, "loss": 0.1803, "step": 24012 }, { "epoch": 0.042578806874950685, "grad_norm": 0.2177734375, "learning_rate": 0.0017517656205851982, "loss": 0.1566, "step": 24014 }, { "epoch": 0.0425823530402605, "grad_norm": 0.44921875, "learning_rate": 0.001751724574697023, "loss": 0.1373, "step": 24016 }, { "epoch": 0.042585899205570314, "grad_norm": 0.53125, "learning_rate": 0.001751683525958583, "loss": 0.1869, "step": 24018 }, { "epoch": 0.04258944537088013, "grad_norm": 0.3828125, "learning_rate": 0.0017516424743700576, "loss": 0.2613, "step": 24020 }, { "epoch": 0.04259299153618995, "grad_norm": 0.375, "learning_rate": 0.0017516014199316268, "loss": 0.2245, "step": 24022 }, { "epoch": 0.042596537701499765, "grad_norm": 0.2490234375, "learning_rate": 0.00175156036264347, "loss": 0.1988, "step": 24024 }, { "epoch": 0.04260008386680958, "grad_norm": 0.45703125, "learning_rate": 0.0017515193025057665, "loss": 0.2416, "step": 24026 }, { "epoch": 0.042603630032119394, "grad_norm": 0.2099609375, "learning_rate": 0.001751478239518696, "loss": 0.2254, "step": 24028 }, { "epoch": 0.04260717619742921, "grad_norm": 0.21875, "learning_rate": 0.001751437173682438, "loss": 0.1807, "step": 24030 }, { "epoch": 0.04261072236273902, "grad_norm": 1.40625, "learning_rate": 0.0017513961049971723, "loss": 0.2672, "step": 24032 }, { "epoch": 0.04261426852804884, "grad_norm": 1.796875, "learning_rate": 0.0017513550334630785, "loss": 0.2345, "step": 24034 }, { "epoch": 0.04261781469335865, "grad_norm": 0.46484375, "learning_rate": 0.001751313959080336, "loss": 0.1862, "step": 24036 }, { "epoch": 0.04262136085866847, "grad_norm": 0.640625, "learning_rate": 0.0017512728818491247, "loss": 0.2259, "step": 24038 }, { "epoch": 0.04262490702397828, "grad_norm": 1.9921875, "learning_rate": 0.001751231801769624, "loss": 0.5851, "step": 24040 }, { "epoch": 0.042628453189288096, "grad_norm": 0.408203125, "learning_rate": 0.0017511907188420136, "loss": 0.2609, "step": 24042 }, { "epoch": 0.04263199935459791, "grad_norm": 0.435546875, "learning_rate": 0.0017511496330664736, "loss": 0.1681, "step": 24044 }, { "epoch": 0.04263554551990773, "grad_norm": 0.443359375, "learning_rate": 0.0017511085444431832, "loss": 0.1393, "step": 24046 }, { "epoch": 0.042639091685217546, "grad_norm": 0.671875, "learning_rate": 0.001751067452972322, "loss": 0.2269, "step": 24048 }, { "epoch": 0.04264263785052736, "grad_norm": 0.9453125, "learning_rate": 0.00175102635865407, "loss": 0.2427, "step": 24050 }, { "epoch": 0.042646184015837175, "grad_norm": 0.392578125, "learning_rate": 0.001750985261488607, "loss": 0.2772, "step": 24052 }, { "epoch": 0.04264973018114699, "grad_norm": 1.3671875, "learning_rate": 0.0017509441614761126, "loss": 0.2939, "step": 24054 }, { "epoch": 0.042653276346456805, "grad_norm": 0.68359375, "learning_rate": 0.0017509030586167664, "loss": 0.2081, "step": 24056 }, { "epoch": 0.04265682251176662, "grad_norm": 1.6953125, "learning_rate": 0.0017508619529107486, "loss": 0.198, "step": 24058 }, { "epoch": 0.042660368677076434, "grad_norm": 0.59375, "learning_rate": 0.0017508208443582384, "loss": 0.1808, "step": 24060 }, { "epoch": 0.04266391484238625, "grad_norm": 1.7734375, "learning_rate": 0.0017507797329594159, "loss": 0.3574, "step": 24062 }, { "epoch": 0.04266746100769606, "grad_norm": 0.447265625, "learning_rate": 0.001750738618714461, "loss": 0.1958, "step": 24064 }, { "epoch": 0.04267100717300588, "grad_norm": 0.28515625, "learning_rate": 0.0017506975016235533, "loss": 0.1873, "step": 24066 }, { "epoch": 0.0426745533383157, "grad_norm": 0.7421875, "learning_rate": 0.0017506563816868728, "loss": 0.1911, "step": 24068 }, { "epoch": 0.04267809950362551, "grad_norm": 0.81640625, "learning_rate": 0.0017506152589045992, "loss": 0.1759, "step": 24070 }, { "epoch": 0.04268164566893533, "grad_norm": 0.8671875, "learning_rate": 0.0017505741332769123, "loss": 0.3919, "step": 24072 }, { "epoch": 0.04268519183424514, "grad_norm": 0.734375, "learning_rate": 0.001750533004803992, "loss": 0.197, "step": 24074 }, { "epoch": 0.04268873799955496, "grad_norm": 0.5625, "learning_rate": 0.0017504918734860182, "loss": 0.3264, "step": 24076 }, { "epoch": 0.04269228416486477, "grad_norm": 0.4140625, "learning_rate": 0.001750450739323171, "loss": 0.1925, "step": 24078 }, { "epoch": 0.042695830330174586, "grad_norm": 1.1171875, "learning_rate": 0.00175040960231563, "loss": 0.2104, "step": 24080 }, { "epoch": 0.0426993764954844, "grad_norm": 0.265625, "learning_rate": 0.0017503684624635752, "loss": 0.1488, "step": 24082 }, { "epoch": 0.042702922660794215, "grad_norm": 0.5546875, "learning_rate": 0.0017503273197671863, "loss": 0.2518, "step": 24084 }, { "epoch": 0.04270646882610403, "grad_norm": 0.33984375, "learning_rate": 0.0017502861742266437, "loss": 0.1225, "step": 24086 }, { "epoch": 0.042710014991413844, "grad_norm": 0.6484375, "learning_rate": 0.0017502450258421271, "loss": 0.242, "step": 24088 }, { "epoch": 0.04271356115672366, "grad_norm": 0.97265625, "learning_rate": 0.0017502038746138166, "loss": 0.1617, "step": 24090 }, { "epoch": 0.04271710732203348, "grad_norm": 0.31640625, "learning_rate": 0.0017501627205418918, "loss": 0.1525, "step": 24092 }, { "epoch": 0.042720653487343295, "grad_norm": 2.75, "learning_rate": 0.001750121563626533, "loss": 0.2369, "step": 24094 }, { "epoch": 0.04272419965265311, "grad_norm": 0.5078125, "learning_rate": 0.00175008040386792, "loss": 0.2657, "step": 24096 }, { "epoch": 0.042727745817962924, "grad_norm": 0.390625, "learning_rate": 0.001750039241266233, "loss": 0.1413, "step": 24098 }, { "epoch": 0.04273129198327274, "grad_norm": 0.2021484375, "learning_rate": 0.0017499980758216521, "loss": 0.1775, "step": 24100 }, { "epoch": 0.04273483814858255, "grad_norm": 0.63671875, "learning_rate": 0.0017499569075343571, "loss": 0.178, "step": 24102 }, { "epoch": 0.04273838431389237, "grad_norm": 0.404296875, "learning_rate": 0.001749915736404528, "loss": 0.2415, "step": 24104 }, { "epoch": 0.04274193047920218, "grad_norm": 0.3515625, "learning_rate": 0.0017498745624323448, "loss": 0.2049, "step": 24106 }, { "epoch": 0.042745476644512, "grad_norm": 0.283203125, "learning_rate": 0.0017498333856179882, "loss": 0.1907, "step": 24108 }, { "epoch": 0.04274902280982181, "grad_norm": 0.435546875, "learning_rate": 0.0017497922059616377, "loss": 0.1859, "step": 24110 }, { "epoch": 0.042752568975131626, "grad_norm": 0.279296875, "learning_rate": 0.0017497510234634737, "loss": 0.2112, "step": 24112 }, { "epoch": 0.04275611514044145, "grad_norm": 1.25, "learning_rate": 0.0017497098381236758, "loss": 0.2508, "step": 24114 }, { "epoch": 0.04275966130575126, "grad_norm": 0.8671875, "learning_rate": 0.0017496686499424245, "loss": 0.2283, "step": 24116 }, { "epoch": 0.04276320747106108, "grad_norm": 1.421875, "learning_rate": 0.0017496274589199003, "loss": 0.3208, "step": 24118 }, { "epoch": 0.04276675363637089, "grad_norm": 0.75390625, "learning_rate": 0.0017495862650562823, "loss": 0.1682, "step": 24120 }, { "epoch": 0.042770299801680706, "grad_norm": 0.609375, "learning_rate": 0.0017495450683517517, "loss": 0.1606, "step": 24122 }, { "epoch": 0.04277384596699052, "grad_norm": 0.58984375, "learning_rate": 0.0017495038688064882, "loss": 0.2723, "step": 24124 }, { "epoch": 0.042777392132300335, "grad_norm": 0.90625, "learning_rate": 0.0017494626664206721, "loss": 0.2757, "step": 24126 }, { "epoch": 0.04278093829761015, "grad_norm": 0.330078125, "learning_rate": 0.0017494214611944833, "loss": 0.2325, "step": 24128 }, { "epoch": 0.042784484462919964, "grad_norm": 0.24609375, "learning_rate": 0.0017493802531281027, "loss": 0.1646, "step": 24130 }, { "epoch": 0.04278803062822978, "grad_norm": 1.734375, "learning_rate": 0.00174933904222171, "loss": 0.1959, "step": 24132 }, { "epoch": 0.04279157679353959, "grad_norm": 0.2001953125, "learning_rate": 0.0017492978284754852, "loss": 0.1478, "step": 24134 }, { "epoch": 0.042795122958849414, "grad_norm": 0.43359375, "learning_rate": 0.0017492566118896089, "loss": 0.2366, "step": 24136 }, { "epoch": 0.04279866912415923, "grad_norm": 2.15625, "learning_rate": 0.0017492153924642618, "loss": 0.2153, "step": 24138 }, { "epoch": 0.042802215289469044, "grad_norm": 2.34375, "learning_rate": 0.001749174170199623, "loss": 0.3991, "step": 24140 }, { "epoch": 0.04280576145477886, "grad_norm": 0.25390625, "learning_rate": 0.001749132945095874, "loss": 0.2526, "step": 24142 }, { "epoch": 0.04280930762008867, "grad_norm": 1.28125, "learning_rate": 0.0017490917171531942, "loss": 0.2236, "step": 24144 }, { "epoch": 0.04281285378539849, "grad_norm": 0.328125, "learning_rate": 0.0017490504863717645, "loss": 0.1934, "step": 24146 }, { "epoch": 0.0428163999507083, "grad_norm": 0.515625, "learning_rate": 0.0017490092527517648, "loss": 0.179, "step": 24148 }, { "epoch": 0.042819946116018116, "grad_norm": 0.8046875, "learning_rate": 0.0017489680162933756, "loss": 0.2783, "step": 24150 }, { "epoch": 0.04282349228132793, "grad_norm": 0.478515625, "learning_rate": 0.0017489267769967773, "loss": 0.1923, "step": 24152 }, { "epoch": 0.042827038446637745, "grad_norm": 1.109375, "learning_rate": 0.0017488855348621504, "loss": 0.1705, "step": 24154 }, { "epoch": 0.04283058461194756, "grad_norm": 0.62109375, "learning_rate": 0.001748844289889675, "loss": 0.1778, "step": 24156 }, { "epoch": 0.042834130777257375, "grad_norm": 0.51171875, "learning_rate": 0.0017488030420795316, "loss": 0.2089, "step": 24158 }, { "epoch": 0.042837676942567196, "grad_norm": 0.6875, "learning_rate": 0.0017487617914319004, "loss": 0.2046, "step": 24160 }, { "epoch": 0.04284122310787701, "grad_norm": 0.439453125, "learning_rate": 0.0017487205379469622, "loss": 0.2638, "step": 24162 }, { "epoch": 0.042844769273186825, "grad_norm": 0.66796875, "learning_rate": 0.0017486792816248972, "loss": 0.167, "step": 24164 }, { "epoch": 0.04284831543849664, "grad_norm": 1.484375, "learning_rate": 0.0017486380224658855, "loss": 0.2415, "step": 24166 }, { "epoch": 0.042851861603806454, "grad_norm": 0.61328125, "learning_rate": 0.0017485967604701084, "loss": 0.2292, "step": 24168 }, { "epoch": 0.04285540776911627, "grad_norm": 0.7109375, "learning_rate": 0.0017485554956377452, "loss": 0.2215, "step": 24170 }, { "epoch": 0.04285895393442608, "grad_norm": 0.57421875, "learning_rate": 0.001748514227968977, "loss": 0.2161, "step": 24172 }, { "epoch": 0.0428625000997359, "grad_norm": 0.79296875, "learning_rate": 0.0017484729574639849, "loss": 0.2107, "step": 24174 }, { "epoch": 0.04286604626504571, "grad_norm": 0.61328125, "learning_rate": 0.0017484316841229481, "loss": 0.1965, "step": 24176 }, { "epoch": 0.04286959243035553, "grad_norm": 0.94921875, "learning_rate": 0.001748390407946048, "loss": 0.2579, "step": 24178 }, { "epoch": 0.04287313859566534, "grad_norm": 0.431640625, "learning_rate": 0.0017483491289334652, "loss": 0.1948, "step": 24180 }, { "epoch": 0.04287668476097516, "grad_norm": 0.474609375, "learning_rate": 0.0017483078470853796, "loss": 0.158, "step": 24182 }, { "epoch": 0.04288023092628498, "grad_norm": 0.287109375, "learning_rate": 0.0017482665624019723, "loss": 0.2029, "step": 24184 }, { "epoch": 0.04288377709159479, "grad_norm": 0.208984375, "learning_rate": 0.0017482252748834233, "loss": 0.1857, "step": 24186 }, { "epoch": 0.04288732325690461, "grad_norm": 0.224609375, "learning_rate": 0.0017481839845299137, "loss": 0.2128, "step": 24188 }, { "epoch": 0.04289086942221442, "grad_norm": 2.40625, "learning_rate": 0.0017481426913416235, "loss": 0.2198, "step": 24190 }, { "epoch": 0.042894415587524236, "grad_norm": 0.37890625, "learning_rate": 0.001748101395318734, "loss": 0.1862, "step": 24192 }, { "epoch": 0.04289796175283405, "grad_norm": 1.7578125, "learning_rate": 0.0017480600964614255, "loss": 0.2643, "step": 24194 }, { "epoch": 0.042901507918143865, "grad_norm": 0.77734375, "learning_rate": 0.0017480187947698784, "loss": 0.3297, "step": 24196 }, { "epoch": 0.04290505408345368, "grad_norm": 1.421875, "learning_rate": 0.0017479774902442735, "loss": 0.2786, "step": 24198 }, { "epoch": 0.042908600248763494, "grad_norm": 0.51171875, "learning_rate": 0.0017479361828847916, "loss": 0.1989, "step": 24200 }, { "epoch": 0.04291214641407331, "grad_norm": 0.419921875, "learning_rate": 0.0017478948726916133, "loss": 0.1993, "step": 24202 }, { "epoch": 0.04291569257938313, "grad_norm": 0.498046875, "learning_rate": 0.0017478535596649187, "loss": 0.2735, "step": 24204 }, { "epoch": 0.042919238744692945, "grad_norm": 0.220703125, "learning_rate": 0.0017478122438048893, "loss": 0.1735, "step": 24206 }, { "epoch": 0.04292278491000276, "grad_norm": 2.625, "learning_rate": 0.0017477709251117056, "loss": 0.2374, "step": 24208 }, { "epoch": 0.042926331075312574, "grad_norm": 0.50390625, "learning_rate": 0.0017477296035855476, "loss": 0.1774, "step": 24210 }, { "epoch": 0.04292987724062239, "grad_norm": 0.306640625, "learning_rate": 0.0017476882792265972, "loss": 0.2171, "step": 24212 }, { "epoch": 0.0429334234059322, "grad_norm": 1.34375, "learning_rate": 0.001747646952035034, "loss": 0.3039, "step": 24214 }, { "epoch": 0.04293696957124202, "grad_norm": 0.224609375, "learning_rate": 0.0017476056220110394, "loss": 0.2463, "step": 24216 }, { "epoch": 0.04294051573655183, "grad_norm": 1.0, "learning_rate": 0.0017475642891547941, "loss": 0.3457, "step": 24218 }, { "epoch": 0.042944061901861647, "grad_norm": 2.171875, "learning_rate": 0.0017475229534664786, "loss": 0.246, "step": 24220 }, { "epoch": 0.04294760806717146, "grad_norm": 0.462890625, "learning_rate": 0.001747481614946274, "loss": 0.2216, "step": 24222 }, { "epoch": 0.042951154232481276, "grad_norm": 0.71484375, "learning_rate": 0.0017474402735943606, "loss": 0.1745, "step": 24224 }, { "epoch": 0.04295470039779109, "grad_norm": 1.0625, "learning_rate": 0.0017473989294109196, "loss": 0.2014, "step": 24226 }, { "epoch": 0.04295824656310091, "grad_norm": 0.5625, "learning_rate": 0.0017473575823961321, "loss": 0.1977, "step": 24228 }, { "epoch": 0.042961792728410726, "grad_norm": 2.40625, "learning_rate": 0.0017473162325501782, "loss": 0.2359, "step": 24230 }, { "epoch": 0.04296533889372054, "grad_norm": 1.125, "learning_rate": 0.0017472748798732394, "loss": 0.1812, "step": 24232 }, { "epoch": 0.042968885059030355, "grad_norm": 0.58984375, "learning_rate": 0.0017472335243654958, "loss": 0.1912, "step": 24234 }, { "epoch": 0.04297243122434017, "grad_norm": 0.46484375, "learning_rate": 0.001747192166027129, "loss": 0.3422, "step": 24236 }, { "epoch": 0.042975977389649984, "grad_norm": 0.421875, "learning_rate": 0.0017471508048583194, "loss": 0.1902, "step": 24238 }, { "epoch": 0.0429795235549598, "grad_norm": 1.59375, "learning_rate": 0.0017471094408592486, "loss": 0.1939, "step": 24240 }, { "epoch": 0.042983069720269614, "grad_norm": 0.32421875, "learning_rate": 0.0017470680740300966, "loss": 0.1786, "step": 24242 }, { "epoch": 0.04298661588557943, "grad_norm": 1.0859375, "learning_rate": 0.0017470267043710446, "loss": 0.1565, "step": 24244 }, { "epoch": 0.04299016205088924, "grad_norm": 2.0, "learning_rate": 0.001746985331882274, "loss": 0.2989, "step": 24246 }, { "epoch": 0.04299370821619906, "grad_norm": 0.62890625, "learning_rate": 0.0017469439565639651, "loss": 0.1724, "step": 24248 }, { "epoch": 0.04299725438150888, "grad_norm": 0.578125, "learning_rate": 0.0017469025784162993, "loss": 0.1557, "step": 24250 }, { "epoch": 0.04300080054681869, "grad_norm": 0.375, "learning_rate": 0.001746861197439457, "loss": 0.1974, "step": 24252 }, { "epoch": 0.04300434671212851, "grad_norm": 0.5859375, "learning_rate": 0.0017468198136336203, "loss": 0.2223, "step": 24254 }, { "epoch": 0.04300789287743832, "grad_norm": 1.0859375, "learning_rate": 0.0017467784269989688, "loss": 0.1743, "step": 24256 }, { "epoch": 0.04301143904274814, "grad_norm": 0.482421875, "learning_rate": 0.0017467370375356846, "loss": 0.2179, "step": 24258 }, { "epoch": 0.04301498520805795, "grad_norm": 1.3828125, "learning_rate": 0.001746695645243948, "loss": 0.2148, "step": 24260 }, { "epoch": 0.043018531373367766, "grad_norm": 0.84765625, "learning_rate": 0.0017466542501239403, "loss": 0.2186, "step": 24262 }, { "epoch": 0.04302207753867758, "grad_norm": 0.71875, "learning_rate": 0.0017466128521758423, "loss": 0.1757, "step": 24264 }, { "epoch": 0.043025623703987395, "grad_norm": 0.36328125, "learning_rate": 0.0017465714513998356, "loss": 0.1866, "step": 24266 }, { "epoch": 0.04302916986929721, "grad_norm": 0.51171875, "learning_rate": 0.0017465300477961009, "loss": 0.2024, "step": 24268 }, { "epoch": 0.043032716034607024, "grad_norm": 0.40625, "learning_rate": 0.0017464886413648191, "loss": 0.1483, "step": 24270 }, { "epoch": 0.043036262199916846, "grad_norm": 0.70703125, "learning_rate": 0.0017464472321061719, "loss": 0.225, "step": 24272 }, { "epoch": 0.04303980836522666, "grad_norm": 0.578125, "learning_rate": 0.00174640582002034, "loss": 0.2247, "step": 24274 }, { "epoch": 0.043043354530536475, "grad_norm": 0.3046875, "learning_rate": 0.0017463644051075042, "loss": 0.157, "step": 24276 }, { "epoch": 0.04304690069584629, "grad_norm": 0.322265625, "learning_rate": 0.0017463229873678461, "loss": 0.2338, "step": 24278 }, { "epoch": 0.043050446861156104, "grad_norm": 0.78125, "learning_rate": 0.0017462815668015466, "loss": 0.1954, "step": 24280 }, { "epoch": 0.04305399302646592, "grad_norm": 1.78125, "learning_rate": 0.001746240143408787, "loss": 0.2601, "step": 24282 }, { "epoch": 0.04305753919177573, "grad_norm": 0.73828125, "learning_rate": 0.0017461987171897484, "loss": 0.2385, "step": 24284 }, { "epoch": 0.04306108535708555, "grad_norm": 0.3203125, "learning_rate": 0.001746157288144612, "loss": 0.2056, "step": 24286 }, { "epoch": 0.04306463152239536, "grad_norm": 2.8125, "learning_rate": 0.001746115856273559, "loss": 0.2709, "step": 24288 }, { "epoch": 0.04306817768770518, "grad_norm": 0.86328125, "learning_rate": 0.0017460744215767703, "loss": 0.1859, "step": 24290 }, { "epoch": 0.04307172385301499, "grad_norm": 0.333984375, "learning_rate": 0.0017460329840544276, "loss": 0.1771, "step": 24292 }, { "epoch": 0.043075270018324806, "grad_norm": 0.5078125, "learning_rate": 0.001745991543706712, "loss": 0.1575, "step": 24294 }, { "epoch": 0.04307881618363463, "grad_norm": 0.890625, "learning_rate": 0.0017459501005338044, "loss": 0.4684, "step": 24296 }, { "epoch": 0.04308236234894444, "grad_norm": 1.375, "learning_rate": 0.0017459086545358862, "loss": 0.2327, "step": 24298 }, { "epoch": 0.043085908514254256, "grad_norm": 0.37109375, "learning_rate": 0.001745867205713139, "loss": 0.2052, "step": 24300 }, { "epoch": 0.04308945467956407, "grad_norm": 0.41796875, "learning_rate": 0.0017458257540657433, "loss": 0.1432, "step": 24302 }, { "epoch": 0.043093000844873885, "grad_norm": 0.349609375, "learning_rate": 0.0017457842995938814, "loss": 0.2114, "step": 24304 }, { "epoch": 0.0430965470101837, "grad_norm": 0.60546875, "learning_rate": 0.001745742842297734, "loss": 0.3504, "step": 24306 }, { "epoch": 0.043100093175493515, "grad_norm": 0.59765625, "learning_rate": 0.0017457013821774823, "loss": 0.2117, "step": 24308 }, { "epoch": 0.04310363934080333, "grad_norm": 0.369140625, "learning_rate": 0.001745659919233308, "loss": 0.2333, "step": 24310 }, { "epoch": 0.043107185506113144, "grad_norm": 0.455078125, "learning_rate": 0.0017456184534653922, "loss": 0.2491, "step": 24312 }, { "epoch": 0.04311073167142296, "grad_norm": 0.70703125, "learning_rate": 0.0017455769848739163, "loss": 0.2023, "step": 24314 }, { "epoch": 0.04311427783673277, "grad_norm": 1.046875, "learning_rate": 0.0017455355134590618, "loss": 0.4318, "step": 24316 }, { "epoch": 0.043117824002042594, "grad_norm": 0.408203125, "learning_rate": 0.00174549403922101, "loss": 0.1757, "step": 24318 }, { "epoch": 0.04312137016735241, "grad_norm": 0.5546875, "learning_rate": 0.0017454525621599418, "loss": 0.1738, "step": 24320 }, { "epoch": 0.04312491633266222, "grad_norm": 1.46875, "learning_rate": 0.0017454110822760392, "loss": 0.2655, "step": 24322 }, { "epoch": 0.04312846249797204, "grad_norm": 0.70703125, "learning_rate": 0.0017453695995694833, "loss": 0.2151, "step": 24324 }, { "epoch": 0.04313200866328185, "grad_norm": 0.419921875, "learning_rate": 0.0017453281140404563, "loss": 0.162, "step": 24326 }, { "epoch": 0.04313555482859167, "grad_norm": 0.3203125, "learning_rate": 0.0017452866256891383, "loss": 0.2355, "step": 24328 }, { "epoch": 0.04313910099390148, "grad_norm": 0.376953125, "learning_rate": 0.0017452451345157114, "loss": 0.4167, "step": 24330 }, { "epoch": 0.043142647159211296, "grad_norm": 0.6484375, "learning_rate": 0.0017452036405203575, "loss": 0.1681, "step": 24332 }, { "epoch": 0.04314619332452111, "grad_norm": 0.380859375, "learning_rate": 0.0017451621437032574, "loss": 0.1656, "step": 24334 }, { "epoch": 0.043149739489830925, "grad_norm": 0.31640625, "learning_rate": 0.001745120644064593, "loss": 0.1696, "step": 24336 }, { "epoch": 0.04315328565514074, "grad_norm": 0.453125, "learning_rate": 0.0017450791416045454, "loss": 0.2043, "step": 24338 }, { "epoch": 0.04315683182045056, "grad_norm": 0.39453125, "learning_rate": 0.0017450376363232966, "loss": 0.2185, "step": 24340 }, { "epoch": 0.043160377985760376, "grad_norm": 0.56640625, "learning_rate": 0.0017449961282210277, "loss": 0.196, "step": 24342 }, { "epoch": 0.04316392415107019, "grad_norm": 0.94921875, "learning_rate": 0.0017449546172979206, "loss": 0.3017, "step": 24344 }, { "epoch": 0.043167470316380005, "grad_norm": 1.421875, "learning_rate": 0.0017449131035541564, "loss": 0.2033, "step": 24346 }, { "epoch": 0.04317101648168982, "grad_norm": 0.376953125, "learning_rate": 0.0017448715869899172, "loss": 0.2427, "step": 24348 }, { "epoch": 0.043174562646999634, "grad_norm": 0.3515625, "learning_rate": 0.001744830067605384, "loss": 0.1931, "step": 24350 }, { "epoch": 0.04317810881230945, "grad_norm": 0.5, "learning_rate": 0.0017447885454007386, "loss": 0.2006, "step": 24352 }, { "epoch": 0.04318165497761926, "grad_norm": 0.48046875, "learning_rate": 0.0017447470203761628, "loss": 0.2021, "step": 24354 }, { "epoch": 0.04318520114292908, "grad_norm": 0.8359375, "learning_rate": 0.001744705492531838, "loss": 0.2128, "step": 24356 }, { "epoch": 0.04318874730823889, "grad_norm": 2.078125, "learning_rate": 0.0017446639618679456, "loss": 0.2395, "step": 24358 }, { "epoch": 0.04319229347354871, "grad_norm": 1.1796875, "learning_rate": 0.001744622428384668, "loss": 0.2409, "step": 24360 }, { "epoch": 0.04319583963885852, "grad_norm": 1.2890625, "learning_rate": 0.001744580892082186, "loss": 0.2344, "step": 24362 }, { "epoch": 0.04319938580416834, "grad_norm": 0.337890625, "learning_rate": 0.0017445393529606818, "loss": 0.2098, "step": 24364 }, { "epoch": 0.04320293196947816, "grad_norm": 0.7890625, "learning_rate": 0.0017444978110203364, "loss": 0.1994, "step": 24366 }, { "epoch": 0.04320647813478797, "grad_norm": 0.58203125, "learning_rate": 0.0017444562662613323, "loss": 0.1759, "step": 24368 }, { "epoch": 0.04321002430009779, "grad_norm": 0.5, "learning_rate": 0.0017444147186838508, "loss": 0.2018, "step": 24370 }, { "epoch": 0.0432135704654076, "grad_norm": 0.51953125, "learning_rate": 0.0017443731682880738, "loss": 0.153, "step": 24372 }, { "epoch": 0.043217116630717416, "grad_norm": 1.7890625, "learning_rate": 0.0017443316150741828, "loss": 0.2891, "step": 24374 }, { "epoch": 0.04322066279602723, "grad_norm": 0.58984375, "learning_rate": 0.0017442900590423594, "loss": 0.1841, "step": 24376 }, { "epoch": 0.043224208961337045, "grad_norm": 0.25390625, "learning_rate": 0.0017442485001927857, "loss": 0.1902, "step": 24378 }, { "epoch": 0.04322775512664686, "grad_norm": 1.140625, "learning_rate": 0.0017442069385256434, "loss": 0.3534, "step": 24380 }, { "epoch": 0.043231301291956674, "grad_norm": 0.240234375, "learning_rate": 0.0017441653740411136, "loss": 0.2274, "step": 24382 }, { "epoch": 0.04323484745726649, "grad_norm": 0.65625, "learning_rate": 0.001744123806739379, "loss": 0.2303, "step": 24384 }, { "epoch": 0.04323839362257631, "grad_norm": 0.357421875, "learning_rate": 0.0017440822366206212, "loss": 0.1955, "step": 24386 }, { "epoch": 0.043241939787886124, "grad_norm": 0.318359375, "learning_rate": 0.0017440406636850217, "loss": 0.1802, "step": 24388 }, { "epoch": 0.04324548595319594, "grad_norm": 0.392578125, "learning_rate": 0.0017439990879327624, "loss": 0.2531, "step": 24390 }, { "epoch": 0.043249032118505754, "grad_norm": 0.52734375, "learning_rate": 0.0017439575093640249, "loss": 0.2286, "step": 24392 }, { "epoch": 0.04325257828381557, "grad_norm": 0.73828125, "learning_rate": 0.0017439159279789916, "loss": 0.2392, "step": 24394 }, { "epoch": 0.04325612444912538, "grad_norm": 1.34375, "learning_rate": 0.0017438743437778442, "loss": 0.2198, "step": 24396 }, { "epoch": 0.0432596706144352, "grad_norm": 0.5625, "learning_rate": 0.0017438327567607641, "loss": 0.2168, "step": 24398 }, { "epoch": 0.04326321677974501, "grad_norm": 0.93359375, "learning_rate": 0.0017437911669279337, "loss": 0.3038, "step": 24400 }, { "epoch": 0.043266762945054826, "grad_norm": 0.419921875, "learning_rate": 0.0017437495742795347, "loss": 0.5243, "step": 24402 }, { "epoch": 0.04327030911036464, "grad_norm": 0.369140625, "learning_rate": 0.0017437079788157489, "loss": 0.1738, "step": 24404 }, { "epoch": 0.043273855275674455, "grad_norm": 0.26171875, "learning_rate": 0.0017436663805367584, "loss": 0.1757, "step": 24406 }, { "epoch": 0.04327740144098428, "grad_norm": 0.640625, "learning_rate": 0.0017436247794427452, "loss": 0.2239, "step": 24408 }, { "epoch": 0.04328094760629409, "grad_norm": 0.40234375, "learning_rate": 0.0017435831755338907, "loss": 0.1496, "step": 24410 }, { "epoch": 0.043284493771603906, "grad_norm": 0.6953125, "learning_rate": 0.0017435415688103772, "loss": 0.1979, "step": 24412 }, { "epoch": 0.04328803993691372, "grad_norm": 0.474609375, "learning_rate": 0.001743499959272387, "loss": 0.2165, "step": 24414 }, { "epoch": 0.043291586102223535, "grad_norm": 0.3125, "learning_rate": 0.0017434583469201016, "loss": 0.1757, "step": 24416 }, { "epoch": 0.04329513226753335, "grad_norm": 0.63671875, "learning_rate": 0.0017434167317537032, "loss": 0.1548, "step": 24418 }, { "epoch": 0.043298678432843164, "grad_norm": 1.3046875, "learning_rate": 0.0017433751137733737, "loss": 0.2083, "step": 24420 }, { "epoch": 0.04330222459815298, "grad_norm": 1.0703125, "learning_rate": 0.0017433334929792955, "loss": 0.3403, "step": 24422 }, { "epoch": 0.04330577076346279, "grad_norm": 0.271484375, "learning_rate": 0.0017432918693716497, "loss": 0.1731, "step": 24424 }, { "epoch": 0.04330931692877261, "grad_norm": 0.2177734375, "learning_rate": 0.0017432502429506195, "loss": 0.2183, "step": 24426 }, { "epoch": 0.04331286309408242, "grad_norm": 0.4765625, "learning_rate": 0.001743208613716386, "loss": 0.3217, "step": 24428 }, { "epoch": 0.04331640925939224, "grad_norm": 0.310546875, "learning_rate": 0.0017431669816691318, "loss": 0.1976, "step": 24430 }, { "epoch": 0.04331995542470206, "grad_norm": 0.26171875, "learning_rate": 0.0017431253468090388, "loss": 0.2333, "step": 24432 }, { "epoch": 0.04332350159001187, "grad_norm": 0.828125, "learning_rate": 0.0017430837091362893, "loss": 0.2347, "step": 24434 }, { "epoch": 0.04332704775532169, "grad_norm": 2.40625, "learning_rate": 0.0017430420686510649, "loss": 0.2624, "step": 24436 }, { "epoch": 0.0433305939206315, "grad_norm": 0.515625, "learning_rate": 0.0017430004253535484, "loss": 0.1599, "step": 24438 }, { "epoch": 0.04333414008594132, "grad_norm": 0.34765625, "learning_rate": 0.001742958779243921, "loss": 0.206, "step": 24440 }, { "epoch": 0.04333768625125113, "grad_norm": 3.203125, "learning_rate": 0.0017429171303223656, "loss": 0.3181, "step": 24442 }, { "epoch": 0.043341232416560946, "grad_norm": 0.439453125, "learning_rate": 0.001742875478589064, "loss": 0.3261, "step": 24444 }, { "epoch": 0.04334477858187076, "grad_norm": 0.45703125, "learning_rate": 0.0017428338240441988, "loss": 0.1977, "step": 24446 }, { "epoch": 0.043348324747180575, "grad_norm": 0.421875, "learning_rate": 0.0017427921666879518, "loss": 0.1796, "step": 24448 }, { "epoch": 0.04335187091249039, "grad_norm": 3.296875, "learning_rate": 0.0017427505065205052, "loss": 0.4206, "step": 24450 }, { "epoch": 0.043355417077800204, "grad_norm": 1.03125, "learning_rate": 0.0017427088435420408, "loss": 0.2164, "step": 24452 }, { "epoch": 0.043358963243110026, "grad_norm": 0.1875, "learning_rate": 0.0017426671777527418, "loss": 0.1914, "step": 24454 }, { "epoch": 0.04336250940841984, "grad_norm": 2.140625, "learning_rate": 0.0017426255091527896, "loss": 0.2748, "step": 24456 }, { "epoch": 0.043366055573729655, "grad_norm": 0.55078125, "learning_rate": 0.001742583837742367, "loss": 0.2643, "step": 24458 }, { "epoch": 0.04336960173903947, "grad_norm": 1.234375, "learning_rate": 0.0017425421635216558, "loss": 0.2253, "step": 24460 }, { "epoch": 0.043373147904349284, "grad_norm": 0.232421875, "learning_rate": 0.0017425004864908382, "loss": 0.2106, "step": 24462 }, { "epoch": 0.0433766940696591, "grad_norm": 0.890625, "learning_rate": 0.001742458806650097, "loss": 0.2723, "step": 24464 }, { "epoch": 0.04338024023496891, "grad_norm": 0.60546875, "learning_rate": 0.001742417123999614, "loss": 0.1871, "step": 24466 }, { "epoch": 0.04338378640027873, "grad_norm": 1.265625, "learning_rate": 0.0017423754385395716, "loss": 0.2935, "step": 24468 }, { "epoch": 0.04338733256558854, "grad_norm": 0.828125, "learning_rate": 0.0017423337502701522, "loss": 0.2, "step": 24470 }, { "epoch": 0.043390878730898357, "grad_norm": 1.0625, "learning_rate": 0.0017422920591915383, "loss": 0.2161, "step": 24472 }, { "epoch": 0.04339442489620817, "grad_norm": 1.5234375, "learning_rate": 0.001742250365303912, "loss": 0.2065, "step": 24474 }, { "epoch": 0.04339797106151799, "grad_norm": 1.1328125, "learning_rate": 0.0017422086686074553, "loss": 0.1756, "step": 24476 }, { "epoch": 0.04340151722682781, "grad_norm": 1.0859375, "learning_rate": 0.0017421669691023512, "loss": 0.1563, "step": 24478 }, { "epoch": 0.04340506339213762, "grad_norm": 0.431640625, "learning_rate": 0.0017421252667887816, "loss": 0.2812, "step": 24480 }, { "epoch": 0.043408609557447436, "grad_norm": 0.65625, "learning_rate": 0.0017420835616669293, "loss": 0.1827, "step": 24482 }, { "epoch": 0.04341215572275725, "grad_norm": 0.203125, "learning_rate": 0.0017420418537369762, "loss": 0.1739, "step": 24484 }, { "epoch": 0.043415701888067065, "grad_norm": 1.71875, "learning_rate": 0.0017420001429991052, "loss": 0.3764, "step": 24486 }, { "epoch": 0.04341924805337688, "grad_norm": 0.515625, "learning_rate": 0.0017419584294534984, "loss": 0.2501, "step": 24488 }, { "epoch": 0.043422794218686694, "grad_norm": 0.453125, "learning_rate": 0.0017419167131003384, "loss": 0.1938, "step": 24490 }, { "epoch": 0.04342634038399651, "grad_norm": 0.7265625, "learning_rate": 0.0017418749939398077, "loss": 0.2063, "step": 24492 }, { "epoch": 0.043429886549306324, "grad_norm": 0.609375, "learning_rate": 0.0017418332719720882, "loss": 0.2003, "step": 24494 }, { "epoch": 0.04343343271461614, "grad_norm": 1.109375, "learning_rate": 0.001741791547197363, "loss": 0.2152, "step": 24496 }, { "epoch": 0.04343697887992595, "grad_norm": 0.248046875, "learning_rate": 0.0017417498196158144, "loss": 0.3106, "step": 24498 }, { "epoch": 0.043440525045235774, "grad_norm": 0.9921875, "learning_rate": 0.001741708089227625, "loss": 0.2202, "step": 24500 }, { "epoch": 0.04344407121054559, "grad_norm": 1.3046875, "learning_rate": 0.001741666356032977, "loss": 0.165, "step": 24502 }, { "epoch": 0.0434476173758554, "grad_norm": 0.431640625, "learning_rate": 0.0017416246200320533, "loss": 0.1848, "step": 24504 }, { "epoch": 0.04345116354116522, "grad_norm": 0.365234375, "learning_rate": 0.0017415828812250359, "loss": 0.2009, "step": 24506 }, { "epoch": 0.04345470970647503, "grad_norm": 0.365234375, "learning_rate": 0.0017415411396121078, "loss": 0.206, "step": 24508 }, { "epoch": 0.04345825587178485, "grad_norm": 0.392578125, "learning_rate": 0.0017414993951934512, "loss": 0.1575, "step": 24510 }, { "epoch": 0.04346180203709466, "grad_norm": 0.419921875, "learning_rate": 0.0017414576479692493, "loss": 0.1594, "step": 24512 }, { "epoch": 0.043465348202404476, "grad_norm": 0.83203125, "learning_rate": 0.001741415897939684, "loss": 0.2186, "step": 24514 }, { "epoch": 0.04346889436771429, "grad_norm": 0.953125, "learning_rate": 0.001741374145104938, "loss": 0.2389, "step": 24516 }, { "epoch": 0.043472440533024105, "grad_norm": 0.2080078125, "learning_rate": 0.0017413323894651942, "loss": 0.168, "step": 24518 }, { "epoch": 0.04347598669833392, "grad_norm": 0.375, "learning_rate": 0.0017412906310206352, "loss": 0.1699, "step": 24520 }, { "epoch": 0.04347953286364374, "grad_norm": 0.71875, "learning_rate": 0.0017412488697714432, "loss": 0.1966, "step": 24522 }, { "epoch": 0.043483079028953556, "grad_norm": 0.75, "learning_rate": 0.0017412071057178013, "loss": 0.1513, "step": 24524 }, { "epoch": 0.04348662519426337, "grad_norm": 0.76171875, "learning_rate": 0.001741165338859892, "loss": 0.1901, "step": 24526 }, { "epoch": 0.043490171359573185, "grad_norm": 0.3359375, "learning_rate": 0.001741123569197898, "loss": 0.1782, "step": 24528 }, { "epoch": 0.043493717524883, "grad_norm": 0.9375, "learning_rate": 0.0017410817967320016, "loss": 0.1966, "step": 24530 }, { "epoch": 0.043497263690192814, "grad_norm": 0.65625, "learning_rate": 0.001741040021462386, "loss": 0.1827, "step": 24532 }, { "epoch": 0.04350080985550263, "grad_norm": 1.0390625, "learning_rate": 0.001740998243389234, "loss": 0.1767, "step": 24534 }, { "epoch": 0.04350435602081244, "grad_norm": 0.51953125, "learning_rate": 0.0017409564625127274, "loss": 0.1974, "step": 24536 }, { "epoch": 0.04350790218612226, "grad_norm": 1.3515625, "learning_rate": 0.00174091467883305, "loss": 0.2534, "step": 24538 }, { "epoch": 0.04351144835143207, "grad_norm": 1.921875, "learning_rate": 0.001740872892350384, "loss": 0.2744, "step": 24540 }, { "epoch": 0.04351499451674189, "grad_norm": 0.578125, "learning_rate": 0.0017408311030649123, "loss": 0.1968, "step": 24542 }, { "epoch": 0.04351854068205171, "grad_norm": 0.41015625, "learning_rate": 0.0017407893109768174, "loss": 0.2805, "step": 24544 }, { "epoch": 0.04352208684736152, "grad_norm": 0.390625, "learning_rate": 0.0017407475160862824, "loss": 0.1895, "step": 24546 }, { "epoch": 0.04352563301267134, "grad_norm": 0.2412109375, "learning_rate": 0.0017407057183934898, "loss": 0.1834, "step": 24548 }, { "epoch": 0.04352917917798115, "grad_norm": 0.34375, "learning_rate": 0.0017406639178986227, "loss": 0.2249, "step": 24550 }, { "epoch": 0.043532725343290966, "grad_norm": 0.5078125, "learning_rate": 0.0017406221146018639, "loss": 0.1804, "step": 24552 }, { "epoch": 0.04353627150860078, "grad_norm": 0.435546875, "learning_rate": 0.0017405803085033958, "loss": 0.1507, "step": 24554 }, { "epoch": 0.043539817673910595, "grad_norm": 0.255859375, "learning_rate": 0.0017405384996034014, "loss": 0.2211, "step": 24556 }, { "epoch": 0.04354336383922041, "grad_norm": 0.2021484375, "learning_rate": 0.001740496687902064, "loss": 0.1858, "step": 24558 }, { "epoch": 0.043546910004530225, "grad_norm": 0.4921875, "learning_rate": 0.0017404548733995662, "loss": 0.2412, "step": 24560 }, { "epoch": 0.04355045616984004, "grad_norm": 1.4296875, "learning_rate": 0.0017404130560960908, "loss": 0.178, "step": 24562 }, { "epoch": 0.043554002335149854, "grad_norm": 0.48828125, "learning_rate": 0.0017403712359918202, "loss": 0.1823, "step": 24564 }, { "epoch": 0.04355754850045967, "grad_norm": 0.1962890625, "learning_rate": 0.0017403294130869382, "loss": 0.2138, "step": 24566 }, { "epoch": 0.04356109466576949, "grad_norm": 0.470703125, "learning_rate": 0.0017402875873816274, "loss": 0.2772, "step": 24568 }, { "epoch": 0.043564640831079304, "grad_norm": 0.6328125, "learning_rate": 0.0017402457588760703, "loss": 0.2471, "step": 24570 }, { "epoch": 0.04356818699638912, "grad_norm": 3.890625, "learning_rate": 0.0017402039275704504, "loss": 0.2246, "step": 24572 }, { "epoch": 0.04357173316169893, "grad_norm": 0.50390625, "learning_rate": 0.0017401620934649501, "loss": 0.4466, "step": 24574 }, { "epoch": 0.04357527932700875, "grad_norm": 0.640625, "learning_rate": 0.0017401202565597528, "loss": 0.165, "step": 24576 }, { "epoch": 0.04357882549231856, "grad_norm": 0.578125, "learning_rate": 0.0017400784168550417, "loss": 0.2183, "step": 24578 }, { "epoch": 0.04358237165762838, "grad_norm": 0.298828125, "learning_rate": 0.0017400365743509988, "loss": 0.1525, "step": 24580 }, { "epoch": 0.04358591782293819, "grad_norm": 4.125, "learning_rate": 0.0017399947290478077, "loss": 0.4064, "step": 24582 }, { "epoch": 0.043589463988248006, "grad_norm": 0.1953125, "learning_rate": 0.001739952880945652, "loss": 0.1757, "step": 24584 }, { "epoch": 0.04359301015355782, "grad_norm": 0.291015625, "learning_rate": 0.001739911030044714, "loss": 0.2448, "step": 24586 }, { "epoch": 0.043596556318867635, "grad_norm": 1.09375, "learning_rate": 0.0017398691763451765, "loss": 0.2006, "step": 24588 }, { "epoch": 0.04360010248417746, "grad_norm": 1.125, "learning_rate": 0.001739827319847223, "loss": 0.2919, "step": 24590 }, { "epoch": 0.04360364864948727, "grad_norm": 0.53125, "learning_rate": 0.0017397854605510363, "loss": 0.1869, "step": 24592 }, { "epoch": 0.043607194814797086, "grad_norm": 0.62109375, "learning_rate": 0.0017397435984568, "loss": 0.2285, "step": 24594 }, { "epoch": 0.0436107409801069, "grad_norm": 0.17578125, "learning_rate": 0.0017397017335646968, "loss": 0.1463, "step": 24596 }, { "epoch": 0.043614287145416715, "grad_norm": 0.337890625, "learning_rate": 0.0017396598658749097, "loss": 0.1731, "step": 24598 }, { "epoch": 0.04361783331072653, "grad_norm": 0.28515625, "learning_rate": 0.0017396179953876217, "loss": 0.1394, "step": 24600 }, { "epoch": 0.043621379476036344, "grad_norm": 0.5234375, "learning_rate": 0.0017395761221030161, "loss": 0.211, "step": 24602 }, { "epoch": 0.04362492564134616, "grad_norm": 0.498046875, "learning_rate": 0.0017395342460212763, "loss": 0.2733, "step": 24604 }, { "epoch": 0.04362847180665597, "grad_norm": 0.578125, "learning_rate": 0.0017394923671425852, "loss": 0.2176, "step": 24606 }, { "epoch": 0.04363201797196579, "grad_norm": 0.53515625, "learning_rate": 0.0017394504854671257, "loss": 0.1652, "step": 24608 }, { "epoch": 0.0436355641372756, "grad_norm": 0.6875, "learning_rate": 0.0017394086009950816, "loss": 0.3754, "step": 24610 }, { "epoch": 0.043639110302585424, "grad_norm": 0.443359375, "learning_rate": 0.0017393667137266353, "loss": 0.3114, "step": 24612 }, { "epoch": 0.04364265646789524, "grad_norm": 0.76171875, "learning_rate": 0.0017393248236619707, "loss": 0.2052, "step": 24614 }, { "epoch": 0.04364620263320505, "grad_norm": 0.345703125, "learning_rate": 0.0017392829308012707, "loss": 0.2849, "step": 24616 }, { "epoch": 0.04364974879851487, "grad_norm": 0.5390625, "learning_rate": 0.0017392410351447184, "loss": 0.3229, "step": 24618 }, { "epoch": 0.04365329496382468, "grad_norm": 0.58203125, "learning_rate": 0.001739199136692497, "loss": 0.2477, "step": 24620 }, { "epoch": 0.0436568411291345, "grad_norm": 0.42578125, "learning_rate": 0.00173915723544479, "loss": 0.2171, "step": 24622 }, { "epoch": 0.04366038729444431, "grad_norm": 0.345703125, "learning_rate": 0.0017391153314017804, "loss": 0.2439, "step": 24624 }, { "epoch": 0.043663933459754126, "grad_norm": 2.609375, "learning_rate": 0.0017390734245636516, "loss": 0.2733, "step": 24626 }, { "epoch": 0.04366747962506394, "grad_norm": 0.380859375, "learning_rate": 0.001739031514930587, "loss": 0.3275, "step": 24628 }, { "epoch": 0.043671025790373755, "grad_norm": 0.890625, "learning_rate": 0.0017389896025027697, "loss": 0.1998, "step": 24630 }, { "epoch": 0.04367457195568357, "grad_norm": 0.369140625, "learning_rate": 0.001738947687280383, "loss": 0.1971, "step": 24632 }, { "epoch": 0.043678118120993384, "grad_norm": 0.8671875, "learning_rate": 0.00173890576926361, "loss": 0.1863, "step": 24634 }, { "epoch": 0.043681664286303205, "grad_norm": 0.515625, "learning_rate": 0.001738863848452635, "loss": 0.1731, "step": 24636 }, { "epoch": 0.04368521045161302, "grad_norm": 0.77734375, "learning_rate": 0.0017388219248476399, "loss": 0.3525, "step": 24638 }, { "epoch": 0.043688756616922834, "grad_norm": 0.640625, "learning_rate": 0.001738779998448809, "loss": 0.197, "step": 24640 }, { "epoch": 0.04369230278223265, "grad_norm": 0.2255859375, "learning_rate": 0.0017387380692563256, "loss": 0.191, "step": 24642 }, { "epoch": 0.043695848947542464, "grad_norm": 0.29296875, "learning_rate": 0.0017386961372703727, "loss": 0.1856, "step": 24644 }, { "epoch": 0.04369939511285228, "grad_norm": 2.609375, "learning_rate": 0.001738654202491134, "loss": 0.263, "step": 24646 }, { "epoch": 0.04370294127816209, "grad_norm": 0.33203125, "learning_rate": 0.0017386122649187927, "loss": 0.1944, "step": 24648 }, { "epoch": 0.04370648744347191, "grad_norm": 0.45703125, "learning_rate": 0.0017385703245535325, "loss": 0.1627, "step": 24650 }, { "epoch": 0.04371003360878172, "grad_norm": 2.28125, "learning_rate": 0.0017385283813955363, "loss": 0.2837, "step": 24652 }, { "epoch": 0.043713579774091536, "grad_norm": 0.52734375, "learning_rate": 0.0017384864354449883, "loss": 0.1947, "step": 24654 }, { "epoch": 0.04371712593940135, "grad_norm": 0.515625, "learning_rate": 0.0017384444867020712, "loss": 0.1553, "step": 24656 }, { "epoch": 0.04372067210471117, "grad_norm": 0.490234375, "learning_rate": 0.0017384025351669687, "loss": 0.3871, "step": 24658 }, { "epoch": 0.04372421827002099, "grad_norm": 0.45703125, "learning_rate": 0.0017383605808398647, "loss": 0.1821, "step": 24660 }, { "epoch": 0.0437277644353308, "grad_norm": 1.109375, "learning_rate": 0.001738318623720942, "loss": 0.2207, "step": 24662 }, { "epoch": 0.043731310600640616, "grad_norm": 0.462890625, "learning_rate": 0.0017382766638103845, "loss": 0.2804, "step": 24664 }, { "epoch": 0.04373485676595043, "grad_norm": 0.5390625, "learning_rate": 0.0017382347011083755, "loss": 0.2181, "step": 24666 }, { "epoch": 0.043738402931260245, "grad_norm": 1.03125, "learning_rate": 0.0017381927356150987, "loss": 0.3116, "step": 24668 }, { "epoch": 0.04374194909657006, "grad_norm": 0.408203125, "learning_rate": 0.001738150767330738, "loss": 0.1624, "step": 24670 }, { "epoch": 0.043745495261879874, "grad_norm": 0.41015625, "learning_rate": 0.001738108796255476, "loss": 0.1911, "step": 24672 }, { "epoch": 0.04374904142718969, "grad_norm": 0.373046875, "learning_rate": 0.0017380668223894971, "loss": 0.1779, "step": 24674 }, { "epoch": 0.0437525875924995, "grad_norm": 0.50390625, "learning_rate": 0.0017380248457329844, "loss": 0.3178, "step": 24676 }, { "epoch": 0.04375613375780932, "grad_norm": 1.9921875, "learning_rate": 0.0017379828662861217, "loss": 0.2463, "step": 24678 }, { "epoch": 0.04375967992311914, "grad_norm": 0.71484375, "learning_rate": 0.0017379408840490927, "loss": 0.1763, "step": 24680 }, { "epoch": 0.043763226088428954, "grad_norm": 1.0859375, "learning_rate": 0.0017378988990220806, "loss": 0.4367, "step": 24682 }, { "epoch": 0.04376677225373877, "grad_norm": 0.77734375, "learning_rate": 0.0017378569112052693, "loss": 0.2131, "step": 24684 }, { "epoch": 0.04377031841904858, "grad_norm": 0.41015625, "learning_rate": 0.0017378149205988422, "loss": 0.2621, "step": 24686 }, { "epoch": 0.0437738645843584, "grad_norm": 0.59765625, "learning_rate": 0.0017377729272029837, "loss": 0.1956, "step": 24688 }, { "epoch": 0.04377741074966821, "grad_norm": 0.78515625, "learning_rate": 0.0017377309310178761, "loss": 0.1683, "step": 24690 }, { "epoch": 0.04378095691497803, "grad_norm": 0.404296875, "learning_rate": 0.0017376889320437044, "loss": 0.2212, "step": 24692 }, { "epoch": 0.04378450308028784, "grad_norm": 0.81640625, "learning_rate": 0.0017376469302806514, "loss": 0.1979, "step": 24694 }, { "epoch": 0.043788049245597656, "grad_norm": 0.54296875, "learning_rate": 0.0017376049257289013, "loss": 0.372, "step": 24696 }, { "epoch": 0.04379159541090747, "grad_norm": 1.03125, "learning_rate": 0.0017375629183886377, "loss": 0.4359, "step": 24698 }, { "epoch": 0.043795141576217285, "grad_norm": 0.84765625, "learning_rate": 0.0017375209082600438, "loss": 0.2125, "step": 24700 }, { "epoch": 0.0437986877415271, "grad_norm": 0.8359375, "learning_rate": 0.0017374788953433043, "loss": 0.2274, "step": 24702 }, { "epoch": 0.04380223390683692, "grad_norm": 0.2734375, "learning_rate": 0.0017374368796386022, "loss": 0.1753, "step": 24704 }, { "epoch": 0.043805780072146736, "grad_norm": 0.38671875, "learning_rate": 0.0017373948611461214, "loss": 0.22, "step": 24706 }, { "epoch": 0.04380932623745655, "grad_norm": 0.96875, "learning_rate": 0.0017373528398660458, "loss": 0.2047, "step": 24708 }, { "epoch": 0.043812872402766365, "grad_norm": 1.171875, "learning_rate": 0.001737310815798559, "loss": 0.2188, "step": 24710 }, { "epoch": 0.04381641856807618, "grad_norm": 0.5390625, "learning_rate": 0.001737268788943845, "loss": 0.2563, "step": 24712 }, { "epoch": 0.043819964733385994, "grad_norm": 0.45703125, "learning_rate": 0.0017372267593020875, "loss": 0.446, "step": 24714 }, { "epoch": 0.04382351089869581, "grad_norm": 0.447265625, "learning_rate": 0.0017371847268734702, "loss": 0.2102, "step": 24716 }, { "epoch": 0.04382705706400562, "grad_norm": 0.337890625, "learning_rate": 0.001737142691658177, "loss": 0.224, "step": 24718 }, { "epoch": 0.04383060322931544, "grad_norm": 0.68359375, "learning_rate": 0.0017371006536563917, "loss": 0.1864, "step": 24720 }, { "epoch": 0.04383414939462525, "grad_norm": 1.34375, "learning_rate": 0.0017370586128682983, "loss": 0.2079, "step": 24722 }, { "epoch": 0.043837695559935067, "grad_norm": 1.0703125, "learning_rate": 0.0017370165692940807, "loss": 0.1818, "step": 24724 }, { "epoch": 0.04384124172524489, "grad_norm": 0.2392578125, "learning_rate": 0.0017369745229339224, "loss": 0.1769, "step": 24726 }, { "epoch": 0.0438447878905547, "grad_norm": 0.41015625, "learning_rate": 0.0017369324737880078, "loss": 0.1988, "step": 24728 }, { "epoch": 0.04384833405586452, "grad_norm": 0.52734375, "learning_rate": 0.0017368904218565205, "loss": 0.1925, "step": 24730 }, { "epoch": 0.04385188022117433, "grad_norm": 0.2392578125, "learning_rate": 0.0017368483671396445, "loss": 0.2958, "step": 24732 }, { "epoch": 0.043855426386484146, "grad_norm": 0.66015625, "learning_rate": 0.0017368063096375634, "loss": 0.1985, "step": 24734 }, { "epoch": 0.04385897255179396, "grad_norm": 0.3671875, "learning_rate": 0.0017367642493504615, "loss": 0.2097, "step": 24736 }, { "epoch": 0.043862518717103775, "grad_norm": 1.734375, "learning_rate": 0.0017367221862785228, "loss": 0.3518, "step": 24738 }, { "epoch": 0.04386606488241359, "grad_norm": 0.3203125, "learning_rate": 0.0017366801204219309, "loss": 0.1801, "step": 24740 }, { "epoch": 0.043869611047723404, "grad_norm": 0.52734375, "learning_rate": 0.0017366380517808702, "loss": 0.2722, "step": 24742 }, { "epoch": 0.04387315721303322, "grad_norm": 0.3515625, "learning_rate": 0.0017365959803555243, "loss": 0.1475, "step": 24744 }, { "epoch": 0.043876703378343034, "grad_norm": 0.482421875, "learning_rate": 0.0017365539061460773, "loss": 0.2005, "step": 24746 }, { "epoch": 0.043880249543652855, "grad_norm": 0.40625, "learning_rate": 0.0017365118291527133, "loss": 0.2047, "step": 24748 }, { "epoch": 0.04388379570896267, "grad_norm": 0.71484375, "learning_rate": 0.0017364697493756165, "loss": 0.1838, "step": 24750 }, { "epoch": 0.043887341874272484, "grad_norm": 0.9765625, "learning_rate": 0.0017364276668149702, "loss": 0.3485, "step": 24752 }, { "epoch": 0.0438908880395823, "grad_norm": 0.6171875, "learning_rate": 0.00173638558147096, "loss": 0.2053, "step": 24754 }, { "epoch": 0.04389443420489211, "grad_norm": 0.1875, "learning_rate": 0.001736343493343768, "loss": 0.2038, "step": 24756 }, { "epoch": 0.04389798037020193, "grad_norm": 1.4375, "learning_rate": 0.0017363014024335795, "loss": 0.2865, "step": 24758 }, { "epoch": 0.04390152653551174, "grad_norm": 0.337890625, "learning_rate": 0.001736259308740578, "loss": 0.2944, "step": 24760 }, { "epoch": 0.04390507270082156, "grad_norm": 1.8359375, "learning_rate": 0.0017362172122649484, "loss": 0.1514, "step": 24762 }, { "epoch": 0.04390861886613137, "grad_norm": 1.109375, "learning_rate": 0.001736175113006874, "loss": 0.1965, "step": 24764 }, { "epoch": 0.043912165031441186, "grad_norm": 5.46875, "learning_rate": 0.0017361330109665393, "loss": 0.1965, "step": 24766 }, { "epoch": 0.043915711196751, "grad_norm": 0.341796875, "learning_rate": 0.0017360909061441279, "loss": 0.2025, "step": 24768 }, { "epoch": 0.043919257362060815, "grad_norm": 0.77734375, "learning_rate": 0.0017360487985398248, "loss": 0.214, "step": 24770 }, { "epoch": 0.04392280352737064, "grad_norm": 0.46875, "learning_rate": 0.0017360066881538135, "loss": 0.1446, "step": 24772 }, { "epoch": 0.04392634969268045, "grad_norm": 0.703125, "learning_rate": 0.0017359645749862784, "loss": 0.201, "step": 24774 }, { "epoch": 0.043929895857990266, "grad_norm": 1.0078125, "learning_rate": 0.0017359224590374036, "loss": 0.2279, "step": 24776 }, { "epoch": 0.04393344202330008, "grad_norm": 0.298828125, "learning_rate": 0.0017358803403073736, "loss": 0.2437, "step": 24778 }, { "epoch": 0.043936988188609895, "grad_norm": 0.251953125, "learning_rate": 0.0017358382187963725, "loss": 0.207, "step": 24780 }, { "epoch": 0.04394053435391971, "grad_norm": 0.361328125, "learning_rate": 0.0017357960945045836, "loss": 0.1772, "step": 24782 }, { "epoch": 0.043944080519229524, "grad_norm": 0.7890625, "learning_rate": 0.0017357539674321928, "loss": 0.1945, "step": 24784 }, { "epoch": 0.04394762668453934, "grad_norm": 0.41796875, "learning_rate": 0.0017357118375793828, "loss": 0.2121, "step": 24786 }, { "epoch": 0.04395117284984915, "grad_norm": 0.2578125, "learning_rate": 0.0017356697049463385, "loss": 0.1861, "step": 24788 }, { "epoch": 0.04395471901515897, "grad_norm": 0.298828125, "learning_rate": 0.0017356275695332444, "loss": 0.2258, "step": 24790 }, { "epoch": 0.04395826518046878, "grad_norm": 0.74609375, "learning_rate": 0.0017355854313402849, "loss": 0.2314, "step": 24792 }, { "epoch": 0.043961811345778604, "grad_norm": 1.2734375, "learning_rate": 0.0017355432903676433, "loss": 0.243, "step": 24794 }, { "epoch": 0.04396535751108842, "grad_norm": 0.609375, "learning_rate": 0.0017355011466155049, "loss": 0.2536, "step": 24796 }, { "epoch": 0.04396890367639823, "grad_norm": 0.482421875, "learning_rate": 0.0017354590000840532, "loss": 0.1508, "step": 24798 }, { "epoch": 0.04397244984170805, "grad_norm": 0.310546875, "learning_rate": 0.0017354168507734734, "loss": 0.1553, "step": 24800 }, { "epoch": 0.04397599600701786, "grad_norm": 2.390625, "learning_rate": 0.0017353746986839488, "loss": 0.1828, "step": 24802 }, { "epoch": 0.043979542172327676, "grad_norm": 0.515625, "learning_rate": 0.001735332543815665, "loss": 0.2022, "step": 24804 }, { "epoch": 0.04398308833763749, "grad_norm": 0.59765625, "learning_rate": 0.0017352903861688055, "loss": 0.1923, "step": 24806 }, { "epoch": 0.043986634502947305, "grad_norm": 0.3046875, "learning_rate": 0.001735248225743555, "loss": 0.2063, "step": 24808 }, { "epoch": 0.04399018066825712, "grad_norm": 0.248046875, "learning_rate": 0.0017352060625400972, "loss": 0.1802, "step": 24810 }, { "epoch": 0.043993726833566935, "grad_norm": 1.4921875, "learning_rate": 0.0017351638965586173, "loss": 0.2478, "step": 24812 }, { "epoch": 0.04399727299887675, "grad_norm": 0.6875, "learning_rate": 0.0017351217277992998, "loss": 0.1826, "step": 24814 }, { "epoch": 0.04400081916418657, "grad_norm": 0.59765625, "learning_rate": 0.0017350795562623286, "loss": 0.2102, "step": 24816 }, { "epoch": 0.044004365329496385, "grad_norm": 0.50390625, "learning_rate": 0.0017350373819478883, "loss": 0.231, "step": 24818 }, { "epoch": 0.0440079114948062, "grad_norm": 0.447265625, "learning_rate": 0.0017349952048561633, "loss": 0.2083, "step": 24820 }, { "epoch": 0.044011457660116014, "grad_norm": 0.423828125, "learning_rate": 0.001734953024987338, "loss": 0.2702, "step": 24822 }, { "epoch": 0.04401500382542583, "grad_norm": 0.78515625, "learning_rate": 0.0017349108423415974, "loss": 0.2296, "step": 24824 }, { "epoch": 0.04401854999073564, "grad_norm": 0.2734375, "learning_rate": 0.0017348686569191253, "loss": 0.1744, "step": 24826 }, { "epoch": 0.04402209615604546, "grad_norm": 0.6875, "learning_rate": 0.0017348264687201062, "loss": 0.3019, "step": 24828 }, { "epoch": 0.04402564232135527, "grad_norm": 1.796875, "learning_rate": 0.0017347842777447253, "loss": 0.2328, "step": 24830 }, { "epoch": 0.04402918848666509, "grad_norm": 0.69921875, "learning_rate": 0.0017347420839931666, "loss": 0.28, "step": 24832 }, { "epoch": 0.0440327346519749, "grad_norm": 1.1171875, "learning_rate": 0.0017346998874656148, "loss": 0.212, "step": 24834 }, { "epoch": 0.044036280817284716, "grad_norm": 3.234375, "learning_rate": 0.0017346576881622543, "loss": 0.2375, "step": 24836 }, { "epoch": 0.04403982698259453, "grad_norm": 0.2333984375, "learning_rate": 0.0017346154860832697, "loss": 0.1886, "step": 24838 }, { "epoch": 0.04404337314790435, "grad_norm": 0.76953125, "learning_rate": 0.0017345732812288454, "loss": 0.5268, "step": 24840 }, { "epoch": 0.04404691931321417, "grad_norm": 0.470703125, "learning_rate": 0.0017345310735991664, "loss": 0.1705, "step": 24842 }, { "epoch": 0.04405046547852398, "grad_norm": 0.890625, "learning_rate": 0.0017344888631944172, "loss": 0.2968, "step": 24844 }, { "epoch": 0.044054011643833796, "grad_norm": 0.5703125, "learning_rate": 0.001734446650014782, "loss": 0.1989, "step": 24846 }, { "epoch": 0.04405755780914361, "grad_norm": 0.734375, "learning_rate": 0.001734404434060446, "loss": 0.2312, "step": 24848 }, { "epoch": 0.044061103974453425, "grad_norm": 0.76171875, "learning_rate": 0.001734362215331593, "loss": 0.1963, "step": 24850 }, { "epoch": 0.04406465013976324, "grad_norm": 0.66796875, "learning_rate": 0.0017343199938284085, "loss": 0.2594, "step": 24852 }, { "epoch": 0.044068196305073054, "grad_norm": 0.330078125, "learning_rate": 0.0017342777695510767, "loss": 0.2067, "step": 24854 }, { "epoch": 0.04407174247038287, "grad_norm": 0.75, "learning_rate": 0.0017342355424997825, "loss": 0.17, "step": 24856 }, { "epoch": 0.04407528863569268, "grad_norm": 0.390625, "learning_rate": 0.0017341933126747103, "loss": 0.1956, "step": 24858 }, { "epoch": 0.0440788348010025, "grad_norm": 0.306640625, "learning_rate": 0.0017341510800760447, "loss": 0.2202, "step": 24860 }, { "epoch": 0.04408238096631232, "grad_norm": 0.28125, "learning_rate": 0.001734108844703971, "loss": 0.1989, "step": 24862 }, { "epoch": 0.044085927131622134, "grad_norm": 0.341796875, "learning_rate": 0.0017340666065586736, "loss": 0.1902, "step": 24864 }, { "epoch": 0.04408947329693195, "grad_norm": 2.453125, "learning_rate": 0.001734024365640337, "loss": 0.2089, "step": 24866 }, { "epoch": 0.04409301946224176, "grad_norm": 0.328125, "learning_rate": 0.0017339821219491463, "loss": 0.1732, "step": 24868 }, { "epoch": 0.04409656562755158, "grad_norm": 2.015625, "learning_rate": 0.001733939875485286, "loss": 0.242, "step": 24870 }, { "epoch": 0.04410011179286139, "grad_norm": 0.46875, "learning_rate": 0.0017338976262489406, "loss": 0.1847, "step": 24872 }, { "epoch": 0.04410365795817121, "grad_norm": 0.53125, "learning_rate": 0.0017338553742402953, "loss": 0.2285, "step": 24874 }, { "epoch": 0.04410720412348102, "grad_norm": 0.2451171875, "learning_rate": 0.001733813119459535, "loss": 0.168, "step": 24876 }, { "epoch": 0.044110750288790836, "grad_norm": 0.7421875, "learning_rate": 0.0017337708619068444, "loss": 0.1757, "step": 24878 }, { "epoch": 0.04411429645410065, "grad_norm": 0.30859375, "learning_rate": 0.0017337286015824077, "loss": 0.1575, "step": 24880 }, { "epoch": 0.044117842619410465, "grad_norm": 0.59765625, "learning_rate": 0.0017336863384864105, "loss": 0.2938, "step": 24882 }, { "epoch": 0.044121388784720286, "grad_norm": 1.09375, "learning_rate": 0.0017336440726190374, "loss": 0.2164, "step": 24884 }, { "epoch": 0.0441249349500301, "grad_norm": 0.8046875, "learning_rate": 0.0017336018039804733, "loss": 0.2116, "step": 24886 }, { "epoch": 0.044128481115339915, "grad_norm": 0.6328125, "learning_rate": 0.0017335595325709025, "loss": 0.183, "step": 24888 }, { "epoch": 0.04413202728064973, "grad_norm": 1.5546875, "learning_rate": 0.0017335172583905106, "loss": 0.1581, "step": 24890 }, { "epoch": 0.044135573445959544, "grad_norm": 1.0390625, "learning_rate": 0.0017334749814394822, "loss": 0.1948, "step": 24892 }, { "epoch": 0.04413911961126936, "grad_norm": 1.0078125, "learning_rate": 0.0017334327017180025, "loss": 0.3382, "step": 24894 }, { "epoch": 0.044142665776579174, "grad_norm": 4.8125, "learning_rate": 0.0017333904192262557, "loss": 0.2989, "step": 24896 }, { "epoch": 0.04414621194188899, "grad_norm": 0.255859375, "learning_rate": 0.0017333481339644272, "loss": 0.178, "step": 24898 }, { "epoch": 0.0441497581071988, "grad_norm": 0.396484375, "learning_rate": 0.0017333058459327018, "loss": 0.184, "step": 24900 }, { "epoch": 0.04415330427250862, "grad_norm": 0.392578125, "learning_rate": 0.0017332635551312646, "loss": 0.2037, "step": 24902 }, { "epoch": 0.04415685043781843, "grad_norm": 0.36328125, "learning_rate": 0.0017332212615603002, "loss": 0.2542, "step": 24904 }, { "epoch": 0.044160396603128246, "grad_norm": 0.32421875, "learning_rate": 0.0017331789652199941, "loss": 0.1737, "step": 24906 }, { "epoch": 0.04416394276843807, "grad_norm": 1.765625, "learning_rate": 0.001733136666110531, "loss": 0.2466, "step": 24908 }, { "epoch": 0.04416748893374788, "grad_norm": 0.6328125, "learning_rate": 0.001733094364232096, "loss": 0.2467, "step": 24910 }, { "epoch": 0.0441710350990577, "grad_norm": 0.84765625, "learning_rate": 0.0017330520595848736, "loss": 0.2927, "step": 24912 }, { "epoch": 0.04417458126436751, "grad_norm": 0.78125, "learning_rate": 0.0017330097521690497, "loss": 0.2484, "step": 24914 }, { "epoch": 0.044178127429677326, "grad_norm": 0.73046875, "learning_rate": 0.0017329674419848085, "loss": 0.2698, "step": 24916 }, { "epoch": 0.04418167359498714, "grad_norm": 0.5546875, "learning_rate": 0.0017329251290323353, "loss": 0.2154, "step": 24918 }, { "epoch": 0.044185219760296955, "grad_norm": 0.357421875, "learning_rate": 0.0017328828133118153, "loss": 0.3464, "step": 24920 }, { "epoch": 0.04418876592560677, "grad_norm": 0.51953125, "learning_rate": 0.0017328404948234338, "loss": 0.1822, "step": 24922 }, { "epoch": 0.044192312090916584, "grad_norm": 0.6015625, "learning_rate": 0.0017327981735673756, "loss": 0.2624, "step": 24924 }, { "epoch": 0.0441958582562264, "grad_norm": 2.609375, "learning_rate": 0.0017327558495438254, "loss": 0.1726, "step": 24926 }, { "epoch": 0.04419940442153621, "grad_norm": 0.2431640625, "learning_rate": 0.0017327135227529684, "loss": 0.253, "step": 24928 }, { "epoch": 0.044202950586846035, "grad_norm": 0.59375, "learning_rate": 0.0017326711931949905, "loss": 0.1939, "step": 24930 }, { "epoch": 0.04420649675215585, "grad_norm": 0.498046875, "learning_rate": 0.0017326288608700761, "loss": 0.2277, "step": 24932 }, { "epoch": 0.044210042917465664, "grad_norm": 0.609375, "learning_rate": 0.0017325865257784108, "loss": 0.1929, "step": 24934 }, { "epoch": 0.04421358908277548, "grad_norm": 0.55859375, "learning_rate": 0.0017325441879201793, "loss": 0.1885, "step": 24936 }, { "epoch": 0.04421713524808529, "grad_norm": 0.314453125, "learning_rate": 0.0017325018472955668, "loss": 0.2081, "step": 24938 }, { "epoch": 0.04422068141339511, "grad_norm": 0.3828125, "learning_rate": 0.0017324595039047588, "loss": 0.153, "step": 24940 }, { "epoch": 0.04422422757870492, "grad_norm": 0.51171875, "learning_rate": 0.0017324171577479403, "loss": 0.2743, "step": 24942 }, { "epoch": 0.04422777374401474, "grad_norm": 0.390625, "learning_rate": 0.0017323748088252965, "loss": 0.1951, "step": 24944 }, { "epoch": 0.04423131990932455, "grad_norm": 0.52734375, "learning_rate": 0.0017323324571370125, "loss": 0.2211, "step": 24946 }, { "epoch": 0.044234866074634366, "grad_norm": 0.4609375, "learning_rate": 0.0017322901026832736, "loss": 0.2157, "step": 24948 }, { "epoch": 0.04423841223994418, "grad_norm": 0.20703125, "learning_rate": 0.0017322477454642653, "loss": 0.1773, "step": 24950 }, { "epoch": 0.044241958405254, "grad_norm": 0.27734375, "learning_rate": 0.0017322053854801724, "loss": 0.1846, "step": 24952 }, { "epoch": 0.044245504570563816, "grad_norm": 0.609375, "learning_rate": 0.0017321630227311803, "loss": 0.2066, "step": 24954 }, { "epoch": 0.04424905073587363, "grad_norm": 0.333984375, "learning_rate": 0.001732120657217475, "loss": 0.2046, "step": 24956 }, { "epoch": 0.044252596901183446, "grad_norm": 0.62109375, "learning_rate": 0.0017320782889392403, "loss": 0.2003, "step": 24958 }, { "epoch": 0.04425614306649326, "grad_norm": 0.9609375, "learning_rate": 0.0017320359178966628, "loss": 0.4127, "step": 24960 }, { "epoch": 0.044259689231803075, "grad_norm": 0.380859375, "learning_rate": 0.0017319935440899272, "loss": 0.1933, "step": 24962 }, { "epoch": 0.04426323539711289, "grad_norm": 0.6015625, "learning_rate": 0.0017319511675192188, "loss": 0.1809, "step": 24964 }, { "epoch": 0.044266781562422704, "grad_norm": 0.5390625, "learning_rate": 0.001731908788184723, "loss": 0.1682, "step": 24966 }, { "epoch": 0.04427032772773252, "grad_norm": 0.462890625, "learning_rate": 0.0017318664060866256, "loss": 0.2287, "step": 24968 }, { "epoch": 0.04427387389304233, "grad_norm": 2.5, "learning_rate": 0.0017318240212251116, "loss": 0.2996, "step": 24970 }, { "epoch": 0.04427742005835215, "grad_norm": 0.357421875, "learning_rate": 0.001731781633600366, "loss": 0.1233, "step": 24972 }, { "epoch": 0.04428096622366196, "grad_norm": 1.25, "learning_rate": 0.0017317392432125746, "loss": 0.2858, "step": 24974 }, { "epoch": 0.04428451238897178, "grad_norm": 0.2421875, "learning_rate": 0.0017316968500619228, "loss": 0.1561, "step": 24976 }, { "epoch": 0.0442880585542816, "grad_norm": 0.41015625, "learning_rate": 0.0017316544541485957, "loss": 0.23, "step": 24978 }, { "epoch": 0.04429160471959141, "grad_norm": 1.6953125, "learning_rate": 0.0017316120554727791, "loss": 0.2432, "step": 24980 }, { "epoch": 0.04429515088490123, "grad_norm": 1.546875, "learning_rate": 0.0017315696540346583, "loss": 0.2294, "step": 24982 }, { "epoch": 0.04429869705021104, "grad_norm": 1.1796875, "learning_rate": 0.0017315272498344186, "loss": 0.1983, "step": 24984 }, { "epoch": 0.044302243215520856, "grad_norm": 0.416015625, "learning_rate": 0.0017314848428722457, "loss": 0.2116, "step": 24986 }, { "epoch": 0.04430578938083067, "grad_norm": 0.412109375, "learning_rate": 0.0017314424331483249, "loss": 0.1912, "step": 24988 }, { "epoch": 0.044309335546140485, "grad_norm": 0.2431640625, "learning_rate": 0.0017314000206628414, "loss": 0.2261, "step": 24990 }, { "epoch": 0.0443128817114503, "grad_norm": 1.3046875, "learning_rate": 0.0017313576054159812, "loss": 0.3155, "step": 24992 }, { "epoch": 0.044316427876760114, "grad_norm": 0.4140625, "learning_rate": 0.0017313151874079294, "loss": 0.3077, "step": 24994 }, { "epoch": 0.04431997404206993, "grad_norm": 0.2578125, "learning_rate": 0.0017312727666388716, "loss": 0.2133, "step": 24996 }, { "epoch": 0.04432352020737975, "grad_norm": 0.466796875, "learning_rate": 0.001731230343108994, "loss": 0.2026, "step": 24998 }, { "epoch": 0.044327066372689565, "grad_norm": 0.6015625, "learning_rate": 0.0017311879168184813, "loss": 0.1502, "step": 25000 }, { "epoch": 0.04433061253799938, "grad_norm": 0.63671875, "learning_rate": 0.0017311454877675188, "loss": 0.203, "step": 25002 }, { "epoch": 0.044334158703309194, "grad_norm": 1.5390625, "learning_rate": 0.0017311030559562929, "loss": 0.4451, "step": 25004 }, { "epoch": 0.04433770486861901, "grad_norm": 0.74609375, "learning_rate": 0.0017310606213849886, "loss": 0.181, "step": 25006 }, { "epoch": 0.04434125103392882, "grad_norm": 0.65234375, "learning_rate": 0.001731018184053792, "loss": 0.2011, "step": 25008 }, { "epoch": 0.04434479719923864, "grad_norm": 0.310546875, "learning_rate": 0.0017309757439628883, "loss": 0.18, "step": 25010 }, { "epoch": 0.04434834336454845, "grad_norm": 1.6796875, "learning_rate": 0.0017309333011124633, "loss": 0.4138, "step": 25012 }, { "epoch": 0.04435188952985827, "grad_norm": 0.54296875, "learning_rate": 0.0017308908555027022, "loss": 0.2245, "step": 25014 }, { "epoch": 0.04435543569516808, "grad_norm": 0.59765625, "learning_rate": 0.0017308484071337914, "loss": 0.2158, "step": 25016 }, { "epoch": 0.044358981860477896, "grad_norm": 0.5625, "learning_rate": 0.001730805956005916, "loss": 0.2051, "step": 25018 }, { "epoch": 0.04436252802578772, "grad_norm": 0.61328125, "learning_rate": 0.0017307635021192617, "loss": 0.2866, "step": 25020 }, { "epoch": 0.04436607419109753, "grad_norm": 0.77734375, "learning_rate": 0.001730721045474014, "loss": 0.2846, "step": 25022 }, { "epoch": 0.04436962035640735, "grad_norm": 0.625, "learning_rate": 0.0017306785860703594, "loss": 0.1775, "step": 25024 }, { "epoch": 0.04437316652171716, "grad_norm": 0.3359375, "learning_rate": 0.0017306361239084822, "loss": 0.1851, "step": 25026 }, { "epoch": 0.044376712687026976, "grad_norm": 1.2265625, "learning_rate": 0.0017305936589885693, "loss": 0.2471, "step": 25028 }, { "epoch": 0.04438025885233679, "grad_norm": 0.36328125, "learning_rate": 0.0017305511913108063, "loss": 0.2088, "step": 25030 }, { "epoch": 0.044383805017646605, "grad_norm": 0.57421875, "learning_rate": 0.0017305087208753782, "loss": 0.1501, "step": 25032 }, { "epoch": 0.04438735118295642, "grad_norm": 0.65234375, "learning_rate": 0.0017304662476824715, "loss": 0.4587, "step": 25034 }, { "epoch": 0.044390897348266234, "grad_norm": 0.8671875, "learning_rate": 0.0017304237717322717, "loss": 0.2408, "step": 25036 }, { "epoch": 0.04439444351357605, "grad_norm": 1.515625, "learning_rate": 0.0017303812930249643, "loss": 0.2906, "step": 25038 }, { "epoch": 0.04439798967888586, "grad_norm": 0.73828125, "learning_rate": 0.0017303388115607354, "loss": 0.4104, "step": 25040 }, { "epoch": 0.04440153584419568, "grad_norm": 0.478515625, "learning_rate": 0.0017302963273397706, "loss": 0.1979, "step": 25042 }, { "epoch": 0.0444050820095055, "grad_norm": 0.498046875, "learning_rate": 0.001730253840362256, "loss": 0.2338, "step": 25044 }, { "epoch": 0.044408628174815314, "grad_norm": 0.423828125, "learning_rate": 0.0017302113506283772, "loss": 0.2185, "step": 25046 }, { "epoch": 0.04441217434012513, "grad_norm": 0.546875, "learning_rate": 0.0017301688581383198, "loss": 0.2663, "step": 25048 }, { "epoch": 0.04441572050543494, "grad_norm": 0.83203125, "learning_rate": 0.0017301263628922699, "loss": 0.2094, "step": 25050 }, { "epoch": 0.04441926667074476, "grad_norm": 0.2333984375, "learning_rate": 0.001730083864890413, "loss": 0.1794, "step": 25052 }, { "epoch": 0.04442281283605457, "grad_norm": 0.9453125, "learning_rate": 0.0017300413641329357, "loss": 0.244, "step": 25054 }, { "epoch": 0.044426359001364386, "grad_norm": 0.6953125, "learning_rate": 0.0017299988606200233, "loss": 0.2548, "step": 25056 }, { "epoch": 0.0444299051666742, "grad_norm": 9.25, "learning_rate": 0.0017299563543518622, "loss": 0.3135, "step": 25058 }, { "epoch": 0.044433451331984015, "grad_norm": 1.03125, "learning_rate": 0.0017299138453286376, "loss": 0.2566, "step": 25060 }, { "epoch": 0.04443699749729383, "grad_norm": 0.302734375, "learning_rate": 0.0017298713335505357, "loss": 0.2064, "step": 25062 }, { "epoch": 0.044440543662603645, "grad_norm": 1.0234375, "learning_rate": 0.0017298288190177424, "loss": 0.2721, "step": 25064 }, { "epoch": 0.044444089827913466, "grad_norm": 0.287109375, "learning_rate": 0.0017297863017304439, "loss": 0.1798, "step": 25066 }, { "epoch": 0.04444763599322328, "grad_norm": 0.404296875, "learning_rate": 0.0017297437816888258, "loss": 0.2137, "step": 25068 }, { "epoch": 0.044451182158533095, "grad_norm": 0.71875, "learning_rate": 0.0017297012588930745, "loss": 0.3298, "step": 25070 }, { "epoch": 0.04445472832384291, "grad_norm": 0.330078125, "learning_rate": 0.0017296587333433752, "loss": 0.1751, "step": 25072 }, { "epoch": 0.044458274489152724, "grad_norm": 0.318359375, "learning_rate": 0.0017296162050399147, "loss": 0.1794, "step": 25074 }, { "epoch": 0.04446182065446254, "grad_norm": 0.7265625, "learning_rate": 0.0017295736739828783, "loss": 0.1732, "step": 25076 }, { "epoch": 0.04446536681977235, "grad_norm": 0.87109375, "learning_rate": 0.0017295311401724527, "loss": 0.205, "step": 25078 }, { "epoch": 0.04446891298508217, "grad_norm": 0.490234375, "learning_rate": 0.0017294886036088232, "loss": 0.1551, "step": 25080 }, { "epoch": 0.04447245915039198, "grad_norm": 0.35546875, "learning_rate": 0.0017294460642921766, "loss": 0.2524, "step": 25082 }, { "epoch": 0.0444760053157018, "grad_norm": 1.0859375, "learning_rate": 0.0017294035222226983, "loss": 0.1488, "step": 25084 }, { "epoch": 0.04447955148101161, "grad_norm": 0.76171875, "learning_rate": 0.0017293609774005746, "loss": 0.2564, "step": 25086 }, { "epoch": 0.04448309764632143, "grad_norm": 0.50390625, "learning_rate": 0.0017293184298259917, "loss": 0.2283, "step": 25088 }, { "epoch": 0.04448664381163125, "grad_norm": 0.5390625, "learning_rate": 0.0017292758794991354, "loss": 0.1896, "step": 25090 }, { "epoch": 0.04449018997694106, "grad_norm": 0.6640625, "learning_rate": 0.001729233326420192, "loss": 0.1801, "step": 25092 }, { "epoch": 0.04449373614225088, "grad_norm": 0.41015625, "learning_rate": 0.0017291907705893478, "loss": 0.2594, "step": 25094 }, { "epoch": 0.04449728230756069, "grad_norm": 1.234375, "learning_rate": 0.0017291482120067878, "loss": 0.3557, "step": 25096 }, { "epoch": 0.044500828472870506, "grad_norm": 0.5390625, "learning_rate": 0.0017291056506726998, "loss": 0.1286, "step": 25098 }, { "epoch": 0.04450437463818032, "grad_norm": 0.251953125, "learning_rate": 0.001729063086587269, "loss": 0.1593, "step": 25100 }, { "epoch": 0.044507920803490135, "grad_norm": 0.609375, "learning_rate": 0.0017290205197506815, "loss": 0.1626, "step": 25102 }, { "epoch": 0.04451146696879995, "grad_norm": 0.515625, "learning_rate": 0.0017289779501631238, "loss": 0.2343, "step": 25104 }, { "epoch": 0.044515013134109764, "grad_norm": 0.73046875, "learning_rate": 0.0017289353778247815, "loss": 0.2718, "step": 25106 }, { "epoch": 0.04451855929941958, "grad_norm": 1.9453125, "learning_rate": 0.0017288928027358412, "loss": 0.2221, "step": 25108 }, { "epoch": 0.04452210546472939, "grad_norm": 0.341796875, "learning_rate": 0.0017288502248964895, "loss": 0.1897, "step": 25110 }, { "epoch": 0.044525651630039215, "grad_norm": 0.91015625, "learning_rate": 0.001728807644306912, "loss": 0.2313, "step": 25112 }, { "epoch": 0.04452919779534903, "grad_norm": 0.71484375, "learning_rate": 0.0017287650609672952, "loss": 0.2257, "step": 25114 }, { "epoch": 0.044532743960658844, "grad_norm": 0.3984375, "learning_rate": 0.001728722474877825, "loss": 0.2012, "step": 25116 }, { "epoch": 0.04453629012596866, "grad_norm": 1.28125, "learning_rate": 0.001728679886038688, "loss": 0.1532, "step": 25118 }, { "epoch": 0.04453983629127847, "grad_norm": 1.203125, "learning_rate": 0.0017286372944500708, "loss": 0.2037, "step": 25120 }, { "epoch": 0.04454338245658829, "grad_norm": 2.515625, "learning_rate": 0.0017285947001121587, "loss": 0.2852, "step": 25122 }, { "epoch": 0.0445469286218981, "grad_norm": 1.3203125, "learning_rate": 0.0017285521030251387, "loss": 0.2982, "step": 25124 }, { "epoch": 0.04455047478720792, "grad_norm": 0.294921875, "learning_rate": 0.0017285095031891967, "loss": 0.1718, "step": 25126 }, { "epoch": 0.04455402095251773, "grad_norm": 0.7421875, "learning_rate": 0.0017284669006045195, "loss": 0.2023, "step": 25128 }, { "epoch": 0.044557567117827546, "grad_norm": 0.67578125, "learning_rate": 0.001728424295271293, "loss": 0.2458, "step": 25130 }, { "epoch": 0.04456111328313736, "grad_norm": 0.60546875, "learning_rate": 0.001728381687189704, "loss": 0.1828, "step": 25132 }, { "epoch": 0.04456465944844718, "grad_norm": 0.42578125, "learning_rate": 0.001728339076359938, "loss": 0.1949, "step": 25134 }, { "epoch": 0.044568205613756996, "grad_norm": 0.37890625, "learning_rate": 0.001728296462782182, "loss": 0.2538, "step": 25136 }, { "epoch": 0.04457175177906681, "grad_norm": 0.5703125, "learning_rate": 0.0017282538464566224, "loss": 0.1863, "step": 25138 }, { "epoch": 0.044575297944376625, "grad_norm": 1.109375, "learning_rate": 0.0017282112273834453, "loss": 0.291, "step": 25140 }, { "epoch": 0.04457884410968644, "grad_norm": 0.94140625, "learning_rate": 0.0017281686055628373, "loss": 0.2358, "step": 25142 }, { "epoch": 0.044582390274996254, "grad_norm": 0.48046875, "learning_rate": 0.0017281259809949845, "loss": 0.1753, "step": 25144 }, { "epoch": 0.04458593644030607, "grad_norm": 0.39453125, "learning_rate": 0.0017280833536800738, "loss": 0.3093, "step": 25146 }, { "epoch": 0.044589482605615884, "grad_norm": 0.515625, "learning_rate": 0.001728040723618291, "loss": 0.2193, "step": 25148 }, { "epoch": 0.0445930287709257, "grad_norm": 0.53515625, "learning_rate": 0.001727998090809823, "loss": 0.2426, "step": 25150 }, { "epoch": 0.04459657493623551, "grad_norm": 1.09375, "learning_rate": 0.0017279554552548564, "loss": 0.245, "step": 25152 }, { "epoch": 0.04460012110154533, "grad_norm": 0.462890625, "learning_rate": 0.0017279128169535771, "loss": 0.1978, "step": 25154 }, { "epoch": 0.04460366726685515, "grad_norm": 1.1171875, "learning_rate": 0.0017278701759061718, "loss": 0.148, "step": 25156 }, { "epoch": 0.04460721343216496, "grad_norm": 0.9765625, "learning_rate": 0.0017278275321128275, "loss": 0.3347, "step": 25158 }, { "epoch": 0.04461075959747478, "grad_norm": 0.470703125, "learning_rate": 0.00172778488557373, "loss": 0.2355, "step": 25160 }, { "epoch": 0.04461430576278459, "grad_norm": 1.0, "learning_rate": 0.0017277422362890658, "loss": 0.2109, "step": 25162 }, { "epoch": 0.04461785192809441, "grad_norm": 0.73046875, "learning_rate": 0.001727699584259022, "loss": 0.2239, "step": 25164 }, { "epoch": 0.04462139809340422, "grad_norm": 2.265625, "learning_rate": 0.001727656929483785, "loss": 0.3557, "step": 25166 }, { "epoch": 0.044624944258714036, "grad_norm": 0.46484375, "learning_rate": 0.001727614271963541, "loss": 0.2334, "step": 25168 }, { "epoch": 0.04462849042402385, "grad_norm": 0.423828125, "learning_rate": 0.0017275716116984766, "loss": 0.1854, "step": 25170 }, { "epoch": 0.044632036589333665, "grad_norm": 0.37109375, "learning_rate": 0.0017275289486887787, "loss": 0.169, "step": 25172 }, { "epoch": 0.04463558275464348, "grad_norm": 0.78515625, "learning_rate": 0.0017274862829346335, "loss": 0.2193, "step": 25174 }, { "epoch": 0.044639128919953294, "grad_norm": 0.3046875, "learning_rate": 0.001727443614436228, "loss": 0.1657, "step": 25176 }, { "epoch": 0.04464267508526311, "grad_norm": 0.357421875, "learning_rate": 0.0017274009431937484, "loss": 0.2131, "step": 25178 }, { "epoch": 0.04464622125057293, "grad_norm": 1.171875, "learning_rate": 0.0017273582692073817, "loss": 0.2232, "step": 25180 }, { "epoch": 0.044649767415882745, "grad_norm": 0.23828125, "learning_rate": 0.001727315592477314, "loss": 0.2346, "step": 25182 }, { "epoch": 0.04465331358119256, "grad_norm": 0.640625, "learning_rate": 0.0017272729130037325, "loss": 0.23, "step": 25184 }, { "epoch": 0.044656859746502374, "grad_norm": 0.3515625, "learning_rate": 0.0017272302307868236, "loss": 0.1772, "step": 25186 }, { "epoch": 0.04466040591181219, "grad_norm": 1.1796875, "learning_rate": 0.0017271875458267742, "loss": 0.1607, "step": 25188 }, { "epoch": 0.044663952077122, "grad_norm": 1.28125, "learning_rate": 0.0017271448581237707, "loss": 0.1981, "step": 25190 }, { "epoch": 0.04466749824243182, "grad_norm": 0.294921875, "learning_rate": 0.0017271021676779998, "loss": 0.1789, "step": 25192 }, { "epoch": 0.04467104440774163, "grad_norm": 2.375, "learning_rate": 0.0017270594744896485, "loss": 0.1912, "step": 25194 }, { "epoch": 0.04467459057305145, "grad_norm": 0.95703125, "learning_rate": 0.0017270167785589027, "loss": 0.2657, "step": 25196 }, { "epoch": 0.04467813673836126, "grad_norm": 0.296875, "learning_rate": 0.0017269740798859503, "loss": 0.2955, "step": 25198 }, { "epoch": 0.044681682903671076, "grad_norm": 0.40625, "learning_rate": 0.0017269313784709776, "loss": 0.2702, "step": 25200 }, { "epoch": 0.0446852290689809, "grad_norm": 0.330078125, "learning_rate": 0.0017268886743141707, "loss": 0.2156, "step": 25202 }, { "epoch": 0.04468877523429071, "grad_norm": 0.376953125, "learning_rate": 0.001726845967415717, "loss": 0.1633, "step": 25204 }, { "epoch": 0.044692321399600526, "grad_norm": 0.7578125, "learning_rate": 0.0017268032577758036, "loss": 0.2501, "step": 25206 }, { "epoch": 0.04469586756491034, "grad_norm": 0.6171875, "learning_rate": 0.0017267605453946163, "loss": 0.2585, "step": 25208 }, { "epoch": 0.044699413730220156, "grad_norm": 1.5546875, "learning_rate": 0.0017267178302723429, "loss": 0.2442, "step": 25210 }, { "epoch": 0.04470295989552997, "grad_norm": 0.5546875, "learning_rate": 0.001726675112409169, "loss": 0.2002, "step": 25212 }, { "epoch": 0.044706506060839785, "grad_norm": 0.8828125, "learning_rate": 0.001726632391805283, "loss": 0.1982, "step": 25214 }, { "epoch": 0.0447100522261496, "grad_norm": 0.74609375, "learning_rate": 0.0017265896684608704, "loss": 0.2583, "step": 25216 }, { "epoch": 0.044713598391459414, "grad_norm": 2.671875, "learning_rate": 0.0017265469423761186, "loss": 0.4628, "step": 25218 }, { "epoch": 0.04471714455676923, "grad_norm": 0.361328125, "learning_rate": 0.0017265042135512147, "loss": 0.1803, "step": 25220 }, { "epoch": 0.04472069072207904, "grad_norm": 0.61328125, "learning_rate": 0.001726461481986345, "loss": 0.3379, "step": 25222 }, { "epoch": 0.044724236887388864, "grad_norm": 0.455078125, "learning_rate": 0.001726418747681697, "loss": 0.2032, "step": 25224 }, { "epoch": 0.04472778305269868, "grad_norm": 0.474609375, "learning_rate": 0.001726376010637457, "loss": 0.1911, "step": 25226 }, { "epoch": 0.04473132921800849, "grad_norm": 0.447265625, "learning_rate": 0.0017263332708538125, "loss": 0.2561, "step": 25228 }, { "epoch": 0.04473487538331831, "grad_norm": 0.8046875, "learning_rate": 0.0017262905283309496, "loss": 0.2067, "step": 25230 }, { "epoch": 0.04473842154862812, "grad_norm": 0.376953125, "learning_rate": 0.001726247783069056, "loss": 0.2352, "step": 25232 }, { "epoch": 0.04474196771393794, "grad_norm": 1.5703125, "learning_rate": 0.0017262050350683182, "loss": 0.2136, "step": 25234 }, { "epoch": 0.04474551387924775, "grad_norm": 0.625, "learning_rate": 0.0017261622843289235, "loss": 0.1698, "step": 25236 }, { "epoch": 0.044749060044557566, "grad_norm": 0.51953125, "learning_rate": 0.0017261195308510586, "loss": 0.1719, "step": 25238 }, { "epoch": 0.04475260620986738, "grad_norm": 0.6328125, "learning_rate": 0.0017260767746349107, "loss": 0.2408, "step": 25240 }, { "epoch": 0.044756152375177195, "grad_norm": 0.361328125, "learning_rate": 0.0017260340156806666, "loss": 0.3159, "step": 25242 }, { "epoch": 0.04475969854048701, "grad_norm": 0.5703125, "learning_rate": 0.0017259912539885135, "loss": 0.2247, "step": 25244 }, { "epoch": 0.044763244705796824, "grad_norm": 0.26953125, "learning_rate": 0.001725948489558638, "loss": 0.1638, "step": 25246 }, { "epoch": 0.044766790871106646, "grad_norm": 0.5859375, "learning_rate": 0.0017259057223912274, "loss": 0.1935, "step": 25248 }, { "epoch": 0.04477033703641646, "grad_norm": 0.431640625, "learning_rate": 0.001725862952486469, "loss": 0.1959, "step": 25250 }, { "epoch": 0.044773883201726275, "grad_norm": 0.390625, "learning_rate": 0.0017258201798445493, "loss": 0.2547, "step": 25252 }, { "epoch": 0.04477742936703609, "grad_norm": 0.74609375, "learning_rate": 0.0017257774044656556, "loss": 0.1704, "step": 25254 }, { "epoch": 0.044780975532345904, "grad_norm": 0.2314453125, "learning_rate": 0.0017257346263499752, "loss": 0.2106, "step": 25256 }, { "epoch": 0.04478452169765572, "grad_norm": 0.6796875, "learning_rate": 0.0017256918454976952, "loss": 0.2778, "step": 25258 }, { "epoch": 0.04478806786296553, "grad_norm": 0.83203125, "learning_rate": 0.001725649061909002, "loss": 0.2158, "step": 25260 }, { "epoch": 0.04479161402827535, "grad_norm": 0.2109375, "learning_rate": 0.0017256062755840837, "loss": 0.2246, "step": 25262 }, { "epoch": 0.04479516019358516, "grad_norm": 0.455078125, "learning_rate": 0.0017255634865231267, "loss": 0.1648, "step": 25264 }, { "epoch": 0.04479870635889498, "grad_norm": 2.15625, "learning_rate": 0.0017255206947263185, "loss": 0.3511, "step": 25266 }, { "epoch": 0.04480225252420479, "grad_norm": 1.2734375, "learning_rate": 0.0017254779001938461, "loss": 0.2226, "step": 25268 }, { "epoch": 0.04480579868951461, "grad_norm": 0.76953125, "learning_rate": 0.0017254351029258967, "loss": 0.1895, "step": 25270 }, { "epoch": 0.04480934485482443, "grad_norm": 1.9765625, "learning_rate": 0.0017253923029226573, "loss": 0.3192, "step": 25272 }, { "epoch": 0.04481289102013424, "grad_norm": 0.78125, "learning_rate": 0.0017253495001843156, "loss": 0.2938, "step": 25274 }, { "epoch": 0.04481643718544406, "grad_norm": 0.5390625, "learning_rate": 0.001725306694711058, "loss": 0.1547, "step": 25276 }, { "epoch": 0.04481998335075387, "grad_norm": 0.765625, "learning_rate": 0.0017252638865030724, "loss": 0.2048, "step": 25278 }, { "epoch": 0.044823529516063686, "grad_norm": 0.36328125, "learning_rate": 0.001725221075560546, "loss": 0.1902, "step": 25280 }, { "epoch": 0.0448270756813735, "grad_norm": 0.55859375, "learning_rate": 0.001725178261883665, "loss": 0.1282, "step": 25282 }, { "epoch": 0.044830621846683315, "grad_norm": 1.03125, "learning_rate": 0.0017251354454726182, "loss": 0.2068, "step": 25284 }, { "epoch": 0.04483416801199313, "grad_norm": 0.61328125, "learning_rate": 0.001725092626327592, "loss": 0.2783, "step": 25286 }, { "epoch": 0.044837714177302944, "grad_norm": 0.56640625, "learning_rate": 0.0017250498044487732, "loss": 0.3639, "step": 25288 }, { "epoch": 0.04484126034261276, "grad_norm": 0.75, "learning_rate": 0.00172500697983635, "loss": 0.211, "step": 25290 }, { "epoch": 0.04484480650792258, "grad_norm": 0.4765625, "learning_rate": 0.0017249641524905092, "loss": 0.1993, "step": 25292 }, { "epoch": 0.044848352673232394, "grad_norm": 0.77734375, "learning_rate": 0.0017249213224114384, "loss": 0.2579, "step": 25294 }, { "epoch": 0.04485189883854221, "grad_norm": 0.310546875, "learning_rate": 0.0017248784895993246, "loss": 0.1885, "step": 25296 }, { "epoch": 0.044855445003852024, "grad_norm": 0.20703125, "learning_rate": 0.0017248356540543555, "loss": 0.3486, "step": 25298 }, { "epoch": 0.04485899116916184, "grad_norm": 4.3125, "learning_rate": 0.001724792815776718, "loss": 0.1858, "step": 25300 }, { "epoch": 0.04486253733447165, "grad_norm": 0.365234375, "learning_rate": 0.0017247499747665995, "loss": 0.1691, "step": 25302 }, { "epoch": 0.04486608349978147, "grad_norm": 0.5078125, "learning_rate": 0.0017247071310241878, "loss": 0.2438, "step": 25304 }, { "epoch": 0.04486962966509128, "grad_norm": 0.57421875, "learning_rate": 0.0017246642845496697, "loss": 0.2318, "step": 25306 }, { "epoch": 0.044873175830401096, "grad_norm": 0.392578125, "learning_rate": 0.0017246214353432332, "loss": 0.2326, "step": 25308 }, { "epoch": 0.04487672199571091, "grad_norm": 0.37890625, "learning_rate": 0.0017245785834050652, "loss": 0.3474, "step": 25310 }, { "epoch": 0.044880268161020725, "grad_norm": 0.53125, "learning_rate": 0.0017245357287353532, "loss": 0.2132, "step": 25312 }, { "epoch": 0.04488381432633054, "grad_norm": 0.31640625, "learning_rate": 0.0017244928713342849, "loss": 0.3875, "step": 25314 }, { "epoch": 0.04488736049164036, "grad_norm": 0.416015625, "learning_rate": 0.0017244500112020473, "loss": 0.2385, "step": 25316 }, { "epoch": 0.044890906656950176, "grad_norm": 1.7421875, "learning_rate": 0.001724407148338828, "loss": 0.1813, "step": 25318 }, { "epoch": 0.04489445282225999, "grad_norm": 0.3515625, "learning_rate": 0.0017243642827448147, "loss": 0.1964, "step": 25320 }, { "epoch": 0.044897998987569805, "grad_norm": 0.375, "learning_rate": 0.0017243214144201946, "loss": 0.1915, "step": 25322 }, { "epoch": 0.04490154515287962, "grad_norm": 0.404296875, "learning_rate": 0.0017242785433651555, "loss": 0.1782, "step": 25324 }, { "epoch": 0.044905091318189434, "grad_norm": 0.259765625, "learning_rate": 0.0017242356695798843, "loss": 0.1672, "step": 25326 }, { "epoch": 0.04490863748349925, "grad_norm": 0.5078125, "learning_rate": 0.0017241927930645694, "loss": 0.1473, "step": 25328 }, { "epoch": 0.04491218364880906, "grad_norm": 0.365234375, "learning_rate": 0.0017241499138193974, "loss": 0.1786, "step": 25330 }, { "epoch": 0.04491572981411888, "grad_norm": 1.203125, "learning_rate": 0.0017241070318445566, "loss": 0.1742, "step": 25332 }, { "epoch": 0.04491927597942869, "grad_norm": 0.9921875, "learning_rate": 0.0017240641471402336, "loss": 0.2284, "step": 25334 }, { "epoch": 0.04492282214473851, "grad_norm": 0.36328125, "learning_rate": 0.001724021259706617, "loss": 0.1995, "step": 25336 }, { "epoch": 0.04492636831004833, "grad_norm": 1.0546875, "learning_rate": 0.0017239783695438935, "loss": 0.2411, "step": 25338 }, { "epoch": 0.04492991447535814, "grad_norm": 0.384765625, "learning_rate": 0.0017239354766522515, "loss": 0.1829, "step": 25340 }, { "epoch": 0.04493346064066796, "grad_norm": 0.34765625, "learning_rate": 0.0017238925810318779, "loss": 0.1963, "step": 25342 }, { "epoch": 0.04493700680597777, "grad_norm": 1.5859375, "learning_rate": 0.0017238496826829605, "loss": 0.3664, "step": 25344 }, { "epoch": 0.04494055297128759, "grad_norm": 2.015625, "learning_rate": 0.0017238067816056871, "loss": 0.3184, "step": 25346 }, { "epoch": 0.0449440991365974, "grad_norm": 0.326171875, "learning_rate": 0.0017237638778002452, "loss": 0.2, "step": 25348 }, { "epoch": 0.044947645301907216, "grad_norm": 0.8046875, "learning_rate": 0.0017237209712668224, "loss": 0.3216, "step": 25350 }, { "epoch": 0.04495119146721703, "grad_norm": 0.2412109375, "learning_rate": 0.0017236780620056064, "loss": 0.2209, "step": 25352 }, { "epoch": 0.044954737632526845, "grad_norm": 0.490234375, "learning_rate": 0.0017236351500167846, "loss": 0.1877, "step": 25354 }, { "epoch": 0.04495828379783666, "grad_norm": 1.6484375, "learning_rate": 0.0017235922353005452, "loss": 0.3096, "step": 25356 }, { "epoch": 0.044961829963146474, "grad_norm": 0.7265625, "learning_rate": 0.0017235493178570753, "loss": 0.2528, "step": 25358 }, { "epoch": 0.044965376128456296, "grad_norm": 0.25, "learning_rate": 0.0017235063976865628, "loss": 0.2663, "step": 25360 }, { "epoch": 0.04496892229376611, "grad_norm": 0.5546875, "learning_rate": 0.0017234634747891962, "loss": 0.3093, "step": 25362 }, { "epoch": 0.044972468459075925, "grad_norm": 0.43359375, "learning_rate": 0.0017234205491651615, "loss": 0.1628, "step": 25364 }, { "epoch": 0.04497601462438574, "grad_norm": 0.31640625, "learning_rate": 0.0017233776208146483, "loss": 0.1997, "step": 25366 }, { "epoch": 0.044979560789695554, "grad_norm": 0.28515625, "learning_rate": 0.001723334689737843, "loss": 0.1321, "step": 25368 }, { "epoch": 0.04498310695500537, "grad_norm": 0.609375, "learning_rate": 0.001723291755934934, "loss": 0.2833, "step": 25370 }, { "epoch": 0.04498665312031518, "grad_norm": 1.8203125, "learning_rate": 0.0017232488194061089, "loss": 0.2644, "step": 25372 }, { "epoch": 0.044990199285625, "grad_norm": 0.3125, "learning_rate": 0.0017232058801515558, "loss": 0.2767, "step": 25374 }, { "epoch": 0.04499374545093481, "grad_norm": 0.466796875, "learning_rate": 0.0017231629381714618, "loss": 0.1822, "step": 25376 }, { "epoch": 0.04499729161624463, "grad_norm": 0.275390625, "learning_rate": 0.0017231199934660151, "loss": 0.2472, "step": 25378 }, { "epoch": 0.04500083778155444, "grad_norm": 0.455078125, "learning_rate": 0.0017230770460354035, "loss": 0.2009, "step": 25380 }, { "epoch": 0.045004383946864256, "grad_norm": 0.765625, "learning_rate": 0.001723034095879815, "loss": 0.2155, "step": 25382 }, { "epoch": 0.04500793011217408, "grad_norm": 0.80078125, "learning_rate": 0.001722991142999437, "loss": 0.2195, "step": 25384 }, { "epoch": 0.04501147627748389, "grad_norm": 0.8515625, "learning_rate": 0.001722948187394458, "loss": 0.1754, "step": 25386 }, { "epoch": 0.045015022442793706, "grad_norm": 0.51953125, "learning_rate": 0.0017229052290650651, "loss": 0.3058, "step": 25388 }, { "epoch": 0.04501856860810352, "grad_norm": 0.283203125, "learning_rate": 0.0017228622680114467, "loss": 0.2018, "step": 25390 }, { "epoch": 0.045022114773413335, "grad_norm": 0.515625, "learning_rate": 0.0017228193042337907, "loss": 0.2135, "step": 25392 }, { "epoch": 0.04502566093872315, "grad_norm": 0.6484375, "learning_rate": 0.0017227763377322847, "loss": 0.2468, "step": 25394 }, { "epoch": 0.045029207104032964, "grad_norm": 0.40625, "learning_rate": 0.0017227333685071167, "loss": 0.2182, "step": 25396 }, { "epoch": 0.04503275326934278, "grad_norm": 1.1640625, "learning_rate": 0.0017226903965584749, "loss": 0.2119, "step": 25398 }, { "epoch": 0.045036299434652594, "grad_norm": 0.228515625, "learning_rate": 0.0017226474218865466, "loss": 0.2165, "step": 25400 }, { "epoch": 0.04503984559996241, "grad_norm": 0.66015625, "learning_rate": 0.0017226044444915206, "loss": 0.2019, "step": 25402 }, { "epoch": 0.04504339176527222, "grad_norm": 0.546875, "learning_rate": 0.0017225614643735843, "loss": 0.1597, "step": 25404 }, { "epoch": 0.045046937930582044, "grad_norm": 1.109375, "learning_rate": 0.0017225184815329257, "loss": 0.1677, "step": 25406 }, { "epoch": 0.04505048409589186, "grad_norm": 0.5546875, "learning_rate": 0.0017224754959697327, "loss": 0.279, "step": 25408 }, { "epoch": 0.04505403026120167, "grad_norm": 2.015625, "learning_rate": 0.0017224325076841936, "loss": 0.4587, "step": 25410 }, { "epoch": 0.04505757642651149, "grad_norm": 1.1328125, "learning_rate": 0.0017223895166764962, "loss": 0.3143, "step": 25412 }, { "epoch": 0.0450611225918213, "grad_norm": 0.68359375, "learning_rate": 0.0017223465229468287, "loss": 0.173, "step": 25414 }, { "epoch": 0.04506466875713112, "grad_norm": 0.640625, "learning_rate": 0.0017223035264953789, "loss": 0.2514, "step": 25416 }, { "epoch": 0.04506821492244093, "grad_norm": 0.484375, "learning_rate": 0.001722260527322335, "loss": 0.1549, "step": 25418 }, { "epoch": 0.045071761087750746, "grad_norm": 0.2265625, "learning_rate": 0.001722217525427885, "loss": 0.1817, "step": 25420 }, { "epoch": 0.04507530725306056, "grad_norm": 0.84765625, "learning_rate": 0.001722174520812217, "loss": 0.2271, "step": 25422 }, { "epoch": 0.045078853418370375, "grad_norm": 0.8671875, "learning_rate": 0.001722131513475519, "loss": 0.2019, "step": 25424 }, { "epoch": 0.04508239958368019, "grad_norm": 1.609375, "learning_rate": 0.0017220885034179793, "loss": 0.2505, "step": 25426 }, { "epoch": 0.04508594574899001, "grad_norm": 0.375, "learning_rate": 0.0017220454906397855, "loss": 0.2105, "step": 25428 }, { "epoch": 0.045089491914299826, "grad_norm": 0.375, "learning_rate": 0.0017220024751411264, "loss": 0.1876, "step": 25430 }, { "epoch": 0.04509303807960964, "grad_norm": 0.515625, "learning_rate": 0.0017219594569221894, "loss": 0.1667, "step": 25432 }, { "epoch": 0.045096584244919455, "grad_norm": 0.79296875, "learning_rate": 0.0017219164359831636, "loss": 0.1579, "step": 25434 }, { "epoch": 0.04510013041022927, "grad_norm": 0.671875, "learning_rate": 0.001721873412324236, "loss": 0.2341, "step": 25436 }, { "epoch": 0.045103676575539084, "grad_norm": 0.32421875, "learning_rate": 0.0017218303859455957, "loss": 0.2081, "step": 25438 }, { "epoch": 0.0451072227408489, "grad_norm": 0.6875, "learning_rate": 0.0017217873568474303, "loss": 0.2764, "step": 25440 }, { "epoch": 0.04511076890615871, "grad_norm": 0.25390625, "learning_rate": 0.0017217443250299282, "loss": 0.1304, "step": 25442 }, { "epoch": 0.04511431507146853, "grad_norm": 0.263671875, "learning_rate": 0.0017217012904932774, "loss": 0.158, "step": 25444 }, { "epoch": 0.04511786123677834, "grad_norm": 0.546875, "learning_rate": 0.0017216582532376666, "loss": 0.1919, "step": 25446 }, { "epoch": 0.04512140740208816, "grad_norm": 0.67578125, "learning_rate": 0.0017216152132632835, "loss": 0.2029, "step": 25448 }, { "epoch": 0.04512495356739797, "grad_norm": 0.5859375, "learning_rate": 0.0017215721705703165, "loss": 0.1775, "step": 25450 }, { "epoch": 0.04512849973270779, "grad_norm": 1.75, "learning_rate": 0.001721529125158954, "loss": 0.2798, "step": 25452 }, { "epoch": 0.04513204589801761, "grad_norm": 0.3515625, "learning_rate": 0.0017214860770293841, "loss": 0.5013, "step": 25454 }, { "epoch": 0.04513559206332742, "grad_norm": 0.75390625, "learning_rate": 0.0017214430261817952, "loss": 0.1969, "step": 25456 }, { "epoch": 0.045139138228637236, "grad_norm": 0.9765625, "learning_rate": 0.0017213999726163752, "loss": 0.1888, "step": 25458 }, { "epoch": 0.04514268439394705, "grad_norm": 0.353515625, "learning_rate": 0.001721356916333313, "loss": 0.1652, "step": 25460 }, { "epoch": 0.045146230559256866, "grad_norm": 0.458984375, "learning_rate": 0.0017213138573327965, "loss": 0.1664, "step": 25462 }, { "epoch": 0.04514977672456668, "grad_norm": 0.4140625, "learning_rate": 0.001721270795615014, "loss": 0.2046, "step": 25464 }, { "epoch": 0.045153322889876495, "grad_norm": 0.2890625, "learning_rate": 0.0017212277311801538, "loss": 0.1738, "step": 25466 }, { "epoch": 0.04515686905518631, "grad_norm": 0.265625, "learning_rate": 0.0017211846640284045, "loss": 0.2052, "step": 25468 }, { "epoch": 0.045160415220496124, "grad_norm": 0.4453125, "learning_rate": 0.0017211415941599543, "loss": 0.2385, "step": 25470 }, { "epoch": 0.04516396138580594, "grad_norm": 0.380859375, "learning_rate": 0.0017210985215749919, "loss": 0.1935, "step": 25472 }, { "epoch": 0.04516750755111576, "grad_norm": 0.302734375, "learning_rate": 0.0017210554462737046, "loss": 0.2098, "step": 25474 }, { "epoch": 0.045171053716425574, "grad_norm": 1.734375, "learning_rate": 0.001721012368256282, "loss": 0.3009, "step": 25476 }, { "epoch": 0.04517459988173539, "grad_norm": 0.408203125, "learning_rate": 0.001720969287522912, "loss": 0.1868, "step": 25478 }, { "epoch": 0.0451781460470452, "grad_norm": 0.34765625, "learning_rate": 0.0017209262040737833, "loss": 0.2503, "step": 25480 }, { "epoch": 0.04518169221235502, "grad_norm": 0.80859375, "learning_rate": 0.0017208831179090838, "loss": 0.2286, "step": 25482 }, { "epoch": 0.04518523837766483, "grad_norm": 0.283203125, "learning_rate": 0.001720840029029002, "loss": 0.2539, "step": 25484 }, { "epoch": 0.04518878454297465, "grad_norm": 0.7578125, "learning_rate": 0.0017207969374337265, "loss": 0.1986, "step": 25486 }, { "epoch": 0.04519233070828446, "grad_norm": 0.8046875, "learning_rate": 0.0017207538431234463, "loss": 0.3149, "step": 25488 }, { "epoch": 0.045195876873594276, "grad_norm": 1.21875, "learning_rate": 0.001720710746098349, "loss": 0.2551, "step": 25490 }, { "epoch": 0.04519942303890409, "grad_norm": 0.486328125, "learning_rate": 0.0017206676463586235, "loss": 0.1997, "step": 25492 }, { "epoch": 0.045202969204213905, "grad_norm": 1.515625, "learning_rate": 0.0017206245439044582, "loss": 0.284, "step": 25494 }, { "epoch": 0.04520651536952373, "grad_norm": 0.267578125, "learning_rate": 0.0017205814387360417, "loss": 0.215, "step": 25496 }, { "epoch": 0.04521006153483354, "grad_norm": 2.25, "learning_rate": 0.0017205383308535624, "loss": 0.2639, "step": 25498 }, { "epoch": 0.045213607700143356, "grad_norm": 0.31640625, "learning_rate": 0.001720495220257209, "loss": 0.2499, "step": 25500 }, { "epoch": 0.04521715386545317, "grad_norm": 2.1875, "learning_rate": 0.00172045210694717, "loss": 0.2524, "step": 25502 }, { "epoch": 0.045220700030762985, "grad_norm": 0.63671875, "learning_rate": 0.0017204089909236338, "loss": 0.1922, "step": 25504 }, { "epoch": 0.0452242461960728, "grad_norm": 0.3671875, "learning_rate": 0.001720365872186789, "loss": 0.2061, "step": 25506 }, { "epoch": 0.045227792361382614, "grad_norm": 0.34375, "learning_rate": 0.0017203227507368243, "loss": 0.2378, "step": 25508 }, { "epoch": 0.04523133852669243, "grad_norm": 0.455078125, "learning_rate": 0.001720279626573928, "loss": 0.1828, "step": 25510 }, { "epoch": 0.04523488469200224, "grad_norm": 0.64453125, "learning_rate": 0.0017202364996982892, "loss": 0.2122, "step": 25512 }, { "epoch": 0.04523843085731206, "grad_norm": 0.494140625, "learning_rate": 0.001720193370110096, "loss": 0.2354, "step": 25514 }, { "epoch": 0.04524197702262187, "grad_norm": 0.3359375, "learning_rate": 0.0017201502378095374, "loss": 0.1997, "step": 25516 }, { "epoch": 0.04524552318793169, "grad_norm": 0.1787109375, "learning_rate": 0.0017201071027968021, "loss": 0.1815, "step": 25518 }, { "epoch": 0.04524906935324151, "grad_norm": 0.2890625, "learning_rate": 0.0017200639650720782, "loss": 0.2294, "step": 25520 }, { "epoch": 0.04525261551855132, "grad_norm": 0.44140625, "learning_rate": 0.0017200208246355546, "loss": 0.2029, "step": 25522 }, { "epoch": 0.04525616168386114, "grad_norm": 0.25390625, "learning_rate": 0.0017199776814874205, "loss": 0.1412, "step": 25524 }, { "epoch": 0.04525970784917095, "grad_norm": 0.609375, "learning_rate": 0.0017199345356278637, "loss": 0.1894, "step": 25526 }, { "epoch": 0.04526325401448077, "grad_norm": 3.34375, "learning_rate": 0.0017198913870570735, "loss": 0.2968, "step": 25528 }, { "epoch": 0.04526680017979058, "grad_norm": 0.365234375, "learning_rate": 0.0017198482357752385, "loss": 0.4749, "step": 25530 }, { "epoch": 0.045270346345100396, "grad_norm": 0.38671875, "learning_rate": 0.0017198050817825473, "loss": 0.1996, "step": 25532 }, { "epoch": 0.04527389251041021, "grad_norm": 1.234375, "learning_rate": 0.0017197619250791888, "loss": 0.1999, "step": 25534 }, { "epoch": 0.045277438675720025, "grad_norm": 0.6875, "learning_rate": 0.0017197187656653515, "loss": 0.1493, "step": 25536 }, { "epoch": 0.04528098484102984, "grad_norm": 0.40234375, "learning_rate": 0.0017196756035412243, "loss": 0.1705, "step": 25538 }, { "epoch": 0.045284531006339654, "grad_norm": 0.6796875, "learning_rate": 0.001719632438706996, "loss": 0.2438, "step": 25540 }, { "epoch": 0.045288077171649475, "grad_norm": 0.328125, "learning_rate": 0.0017195892711628557, "loss": 0.1797, "step": 25542 }, { "epoch": 0.04529162333695929, "grad_norm": 0.4140625, "learning_rate": 0.0017195461009089912, "loss": 0.2461, "step": 25544 }, { "epoch": 0.045295169502269104, "grad_norm": 0.4609375, "learning_rate": 0.0017195029279455925, "loss": 0.2392, "step": 25546 }, { "epoch": 0.04529871566757892, "grad_norm": 0.228515625, "learning_rate": 0.0017194597522728475, "loss": 0.1433, "step": 25548 }, { "epoch": 0.045302261832888734, "grad_norm": 0.353515625, "learning_rate": 0.0017194165738909455, "loss": 0.1984, "step": 25550 }, { "epoch": 0.04530580799819855, "grad_norm": 0.283203125, "learning_rate": 0.001719373392800075, "loss": 0.2304, "step": 25552 }, { "epoch": 0.04530935416350836, "grad_norm": 0.2578125, "learning_rate": 0.0017193302090004253, "loss": 0.1826, "step": 25554 }, { "epoch": 0.04531290032881818, "grad_norm": 0.31640625, "learning_rate": 0.0017192870224921849, "loss": 0.2292, "step": 25556 }, { "epoch": 0.04531644649412799, "grad_norm": 0.390625, "learning_rate": 0.0017192438332755428, "loss": 0.3133, "step": 25558 }, { "epoch": 0.045319992659437806, "grad_norm": 0.33203125, "learning_rate": 0.001719200641350688, "loss": 0.2361, "step": 25560 }, { "epoch": 0.04532353882474762, "grad_norm": 0.6953125, "learning_rate": 0.001719157446717809, "loss": 0.3206, "step": 25562 }, { "epoch": 0.04532708499005744, "grad_norm": 1.0703125, "learning_rate": 0.001719114249377095, "loss": 0.1806, "step": 25564 }, { "epoch": 0.04533063115536726, "grad_norm": 0.38671875, "learning_rate": 0.001719071049328735, "loss": 0.1894, "step": 25566 }, { "epoch": 0.04533417732067707, "grad_norm": 0.51953125, "learning_rate": 0.001719027846572918, "loss": 0.307, "step": 25568 }, { "epoch": 0.045337723485986886, "grad_norm": 0.7734375, "learning_rate": 0.001718984641109833, "loss": 0.1914, "step": 25570 }, { "epoch": 0.0453412696512967, "grad_norm": 0.609375, "learning_rate": 0.001718941432939668, "loss": 0.4102, "step": 25572 }, { "epoch": 0.045344815816606515, "grad_norm": 0.2109375, "learning_rate": 0.001718898222062613, "loss": 0.1255, "step": 25574 }, { "epoch": 0.04534836198191633, "grad_norm": 0.3203125, "learning_rate": 0.0017188550084788568, "loss": 0.1721, "step": 25576 }, { "epoch": 0.045351908147226144, "grad_norm": 0.84375, "learning_rate": 0.0017188117921885884, "loss": 0.2043, "step": 25578 }, { "epoch": 0.04535545431253596, "grad_norm": 0.416015625, "learning_rate": 0.0017187685731919963, "loss": 0.1694, "step": 25580 }, { "epoch": 0.04535900047784577, "grad_norm": 0.2041015625, "learning_rate": 0.0017187253514892701, "loss": 0.238, "step": 25582 }, { "epoch": 0.04536254664315559, "grad_norm": 0.318359375, "learning_rate": 0.0017186821270805985, "loss": 0.1665, "step": 25584 }, { "epoch": 0.0453660928084654, "grad_norm": 0.259765625, "learning_rate": 0.0017186388999661708, "loss": 0.1894, "step": 25586 }, { "epoch": 0.045369638973775224, "grad_norm": 0.30859375, "learning_rate": 0.0017185956701461758, "loss": 0.2025, "step": 25588 }, { "epoch": 0.04537318513908504, "grad_norm": 1.390625, "learning_rate": 0.0017185524376208028, "loss": 0.2124, "step": 25590 }, { "epoch": 0.04537673130439485, "grad_norm": 0.765625, "learning_rate": 0.0017185092023902404, "loss": 0.2002, "step": 25592 }, { "epoch": 0.04538027746970467, "grad_norm": 0.71875, "learning_rate": 0.0017184659644546782, "loss": 0.1997, "step": 25594 }, { "epoch": 0.04538382363501448, "grad_norm": 0.46875, "learning_rate": 0.001718422723814305, "loss": 0.2049, "step": 25596 }, { "epoch": 0.0453873698003243, "grad_norm": 0.29296875, "learning_rate": 0.0017183794804693105, "loss": 0.1884, "step": 25598 }, { "epoch": 0.04539091596563411, "grad_norm": 0.3515625, "learning_rate": 0.0017183362344198828, "loss": 0.2467, "step": 25600 }, { "epoch": 0.045394462130943926, "grad_norm": 0.6484375, "learning_rate": 0.0017182929856662122, "loss": 0.2209, "step": 25602 }, { "epoch": 0.04539800829625374, "grad_norm": 0.291015625, "learning_rate": 0.0017182497342084866, "loss": 0.1821, "step": 25604 }, { "epoch": 0.045401554461563555, "grad_norm": 0.65625, "learning_rate": 0.001718206480046896, "loss": 0.2772, "step": 25606 }, { "epoch": 0.04540510062687337, "grad_norm": 0.390625, "learning_rate": 0.0017181632231816294, "loss": 0.2159, "step": 25608 }, { "epoch": 0.04540864679218319, "grad_norm": 0.52734375, "learning_rate": 0.0017181199636128758, "loss": 0.2839, "step": 25610 }, { "epoch": 0.045412192957493006, "grad_norm": 0.412109375, "learning_rate": 0.0017180767013408246, "loss": 0.1547, "step": 25612 }, { "epoch": 0.04541573912280282, "grad_norm": 0.55859375, "learning_rate": 0.001718033436365665, "loss": 0.1818, "step": 25614 }, { "epoch": 0.045419285288112635, "grad_norm": 1.5078125, "learning_rate": 0.001717990168687586, "loss": 0.3927, "step": 25616 }, { "epoch": 0.04542283145342245, "grad_norm": 0.234375, "learning_rate": 0.0017179468983067774, "loss": 0.1788, "step": 25618 }, { "epoch": 0.045426377618732264, "grad_norm": 0.26953125, "learning_rate": 0.0017179036252234275, "loss": 0.1303, "step": 25620 }, { "epoch": 0.04542992378404208, "grad_norm": 0.5625, "learning_rate": 0.0017178603494377262, "loss": 0.2304, "step": 25622 }, { "epoch": 0.04543346994935189, "grad_norm": 0.5078125, "learning_rate": 0.001717817070949863, "loss": 0.2297, "step": 25624 }, { "epoch": 0.04543701611466171, "grad_norm": 0.412109375, "learning_rate": 0.0017177737897600264, "loss": 0.2096, "step": 25626 }, { "epoch": 0.04544056227997152, "grad_norm": 0.77734375, "learning_rate": 0.001717730505868406, "loss": 0.1753, "step": 25628 }, { "epoch": 0.045444108445281337, "grad_norm": 0.5234375, "learning_rate": 0.0017176872192751916, "loss": 0.3097, "step": 25630 }, { "epoch": 0.04544765461059116, "grad_norm": 0.66796875, "learning_rate": 0.0017176439299805718, "loss": 0.1519, "step": 25632 }, { "epoch": 0.04545120077590097, "grad_norm": 0.50390625, "learning_rate": 0.0017176006379847363, "loss": 0.2172, "step": 25634 }, { "epoch": 0.04545474694121079, "grad_norm": 0.375, "learning_rate": 0.0017175573432878745, "loss": 0.1886, "step": 25636 }, { "epoch": 0.0454582931065206, "grad_norm": 0.5078125, "learning_rate": 0.0017175140458901752, "loss": 0.1767, "step": 25638 }, { "epoch": 0.045461839271830416, "grad_norm": 0.279296875, "learning_rate": 0.0017174707457918287, "loss": 0.1944, "step": 25640 }, { "epoch": 0.04546538543714023, "grad_norm": 1.0, "learning_rate": 0.0017174274429930232, "loss": 0.1938, "step": 25642 }, { "epoch": 0.045468931602450045, "grad_norm": 0.55078125, "learning_rate": 0.0017173841374939494, "loss": 0.1942, "step": 25644 }, { "epoch": 0.04547247776775986, "grad_norm": 0.61328125, "learning_rate": 0.0017173408292947953, "loss": 0.2176, "step": 25646 }, { "epoch": 0.045476023933069674, "grad_norm": 0.435546875, "learning_rate": 0.0017172975183957515, "loss": 0.2022, "step": 25648 }, { "epoch": 0.04547957009837949, "grad_norm": 0.494140625, "learning_rate": 0.0017172542047970067, "loss": 0.1694, "step": 25650 }, { "epoch": 0.045483116263689304, "grad_norm": 7.84375, "learning_rate": 0.0017172108884987507, "loss": 0.1949, "step": 25652 }, { "epoch": 0.04548666242899912, "grad_norm": 1.109375, "learning_rate": 0.0017171675695011727, "loss": 0.2864, "step": 25654 }, { "epoch": 0.04549020859430894, "grad_norm": 1.125, "learning_rate": 0.0017171242478044625, "loss": 0.2368, "step": 25656 }, { "epoch": 0.045493754759618754, "grad_norm": 0.58984375, "learning_rate": 0.001717080923408809, "loss": 0.2447, "step": 25658 }, { "epoch": 0.04549730092492857, "grad_norm": 1.1328125, "learning_rate": 0.0017170375963144018, "loss": 0.2481, "step": 25660 }, { "epoch": 0.04550084709023838, "grad_norm": 0.349609375, "learning_rate": 0.0017169942665214311, "loss": 0.1743, "step": 25662 }, { "epoch": 0.0455043932555482, "grad_norm": 1.0859375, "learning_rate": 0.0017169509340300856, "loss": 0.1963, "step": 25664 }, { "epoch": 0.04550793942085801, "grad_norm": 0.416015625, "learning_rate": 0.001716907598840555, "loss": 0.1734, "step": 25666 }, { "epoch": 0.04551148558616783, "grad_norm": 2.3125, "learning_rate": 0.001716864260953029, "loss": 0.1648, "step": 25668 }, { "epoch": 0.04551503175147764, "grad_norm": 1.296875, "learning_rate": 0.001716820920367697, "loss": 0.2811, "step": 25670 }, { "epoch": 0.045518577916787456, "grad_norm": 1.046875, "learning_rate": 0.0017167775770847488, "loss": 0.2303, "step": 25672 }, { "epoch": 0.04552212408209727, "grad_norm": 0.41015625, "learning_rate": 0.0017167342311043732, "loss": 0.1969, "step": 25674 }, { "epoch": 0.045525670247407085, "grad_norm": 1.2578125, "learning_rate": 0.0017166908824267609, "loss": 0.2926, "step": 25676 }, { "epoch": 0.04552921641271691, "grad_norm": 0.75390625, "learning_rate": 0.0017166475310521005, "loss": 0.1331, "step": 25678 }, { "epoch": 0.04553276257802672, "grad_norm": 0.466796875, "learning_rate": 0.0017166041769805821, "loss": 0.2177, "step": 25680 }, { "epoch": 0.045536308743336536, "grad_norm": 0.8828125, "learning_rate": 0.0017165608202123952, "loss": 0.2342, "step": 25682 }, { "epoch": 0.04553985490864635, "grad_norm": 0.27734375, "learning_rate": 0.0017165174607477298, "loss": 0.2007, "step": 25684 }, { "epoch": 0.045543401073956165, "grad_norm": 2.078125, "learning_rate": 0.0017164740985867745, "loss": 0.3573, "step": 25686 }, { "epoch": 0.04554694723926598, "grad_norm": 0.6796875, "learning_rate": 0.0017164307337297201, "loss": 0.1687, "step": 25688 }, { "epoch": 0.045550493404575794, "grad_norm": 1.7890625, "learning_rate": 0.0017163873661767556, "loss": 0.2206, "step": 25690 }, { "epoch": 0.04555403956988561, "grad_norm": 0.91796875, "learning_rate": 0.0017163439959280706, "loss": 0.1627, "step": 25692 }, { "epoch": 0.04555758573519542, "grad_norm": 0.5078125, "learning_rate": 0.001716300622983855, "loss": 0.2677, "step": 25694 }, { "epoch": 0.04556113190050524, "grad_norm": 1.0625, "learning_rate": 0.0017162572473442988, "loss": 0.2084, "step": 25696 }, { "epoch": 0.04556467806581505, "grad_norm": 0.5703125, "learning_rate": 0.001716213869009591, "loss": 0.2047, "step": 25698 }, { "epoch": 0.045568224231124874, "grad_norm": 0.875, "learning_rate": 0.0017161704879799218, "loss": 0.1909, "step": 25700 }, { "epoch": 0.04557177039643469, "grad_norm": 0.80078125, "learning_rate": 0.001716127104255481, "loss": 0.2332, "step": 25702 }, { "epoch": 0.0455753165617445, "grad_norm": 0.498046875, "learning_rate": 0.0017160837178364576, "loss": 0.1941, "step": 25704 }, { "epoch": 0.04557886272705432, "grad_norm": 0.5546875, "learning_rate": 0.0017160403287230423, "loss": 0.187, "step": 25706 }, { "epoch": 0.04558240889236413, "grad_norm": 0.515625, "learning_rate": 0.0017159969369154244, "loss": 0.1707, "step": 25708 }, { "epoch": 0.045585955057673946, "grad_norm": 0.5390625, "learning_rate": 0.0017159535424137937, "loss": 0.4781, "step": 25710 }, { "epoch": 0.04558950122298376, "grad_norm": 0.267578125, "learning_rate": 0.00171591014521834, "loss": 0.1972, "step": 25712 }, { "epoch": 0.045593047388293576, "grad_norm": 0.373046875, "learning_rate": 0.001715866745329253, "loss": 0.2256, "step": 25714 }, { "epoch": 0.04559659355360339, "grad_norm": 0.9765625, "learning_rate": 0.0017158233427467227, "loss": 0.2193, "step": 25716 }, { "epoch": 0.045600139718913205, "grad_norm": 0.421875, "learning_rate": 0.0017157799374709392, "loss": 0.1821, "step": 25718 }, { "epoch": 0.04560368588422302, "grad_norm": 0.419921875, "learning_rate": 0.0017157365295020913, "loss": 0.2137, "step": 25720 }, { "epoch": 0.045607232049532834, "grad_norm": 0.37890625, "learning_rate": 0.0017156931188403695, "loss": 0.1978, "step": 25722 }, { "epoch": 0.045610778214842655, "grad_norm": 0.34375, "learning_rate": 0.0017156497054859636, "loss": 0.1728, "step": 25724 }, { "epoch": 0.04561432438015247, "grad_norm": 0.984375, "learning_rate": 0.001715606289439064, "loss": 0.3566, "step": 25726 }, { "epoch": 0.045617870545462284, "grad_norm": 0.90234375, "learning_rate": 0.0017155628706998598, "loss": 0.1888, "step": 25728 }, { "epoch": 0.0456214167107721, "grad_norm": 0.5, "learning_rate": 0.0017155194492685414, "loss": 0.1704, "step": 25730 }, { "epoch": 0.04562496287608191, "grad_norm": 0.6484375, "learning_rate": 0.001715476025145298, "loss": 0.2041, "step": 25732 }, { "epoch": 0.04562850904139173, "grad_norm": 0.5, "learning_rate": 0.0017154325983303202, "loss": 0.1611, "step": 25734 }, { "epoch": 0.04563205520670154, "grad_norm": 2.390625, "learning_rate": 0.0017153891688237977, "loss": 0.2167, "step": 25736 }, { "epoch": 0.04563560137201136, "grad_norm": 10.6875, "learning_rate": 0.0017153457366259207, "loss": 0.3996, "step": 25738 }, { "epoch": 0.04563914753732117, "grad_norm": 0.359375, "learning_rate": 0.0017153023017368785, "loss": 0.2025, "step": 25740 }, { "epoch": 0.045642693702630986, "grad_norm": 1.7265625, "learning_rate": 0.0017152588641568616, "loss": 0.2032, "step": 25742 }, { "epoch": 0.0456462398679408, "grad_norm": 0.337890625, "learning_rate": 0.0017152154238860597, "loss": 0.2278, "step": 25744 }, { "epoch": 0.04564978603325062, "grad_norm": 1.171875, "learning_rate": 0.001715171980924663, "loss": 0.2413, "step": 25746 }, { "epoch": 0.04565333219856044, "grad_norm": 1.1640625, "learning_rate": 0.0017151285352728616, "loss": 0.3813, "step": 25748 }, { "epoch": 0.04565687836387025, "grad_norm": 0.2490234375, "learning_rate": 0.001715085086930845, "loss": 0.1673, "step": 25750 }, { "epoch": 0.045660424529180066, "grad_norm": 0.640625, "learning_rate": 0.0017150416358988033, "loss": 0.2437, "step": 25752 }, { "epoch": 0.04566397069448988, "grad_norm": 0.484375, "learning_rate": 0.0017149981821769273, "loss": 0.1634, "step": 25754 }, { "epoch": 0.045667516859799695, "grad_norm": 1.6484375, "learning_rate": 0.0017149547257654062, "loss": 0.2959, "step": 25756 }, { "epoch": 0.04567106302510951, "grad_norm": 0.6015625, "learning_rate": 0.0017149112666644305, "loss": 0.169, "step": 25758 }, { "epoch": 0.045674609190419324, "grad_norm": 0.287109375, "learning_rate": 0.00171486780487419, "loss": 0.1664, "step": 25760 }, { "epoch": 0.04567815535572914, "grad_norm": 0.455078125, "learning_rate": 0.0017148243403948746, "loss": 0.4258, "step": 25762 }, { "epoch": 0.04568170152103895, "grad_norm": 0.35546875, "learning_rate": 0.001714780873226675, "loss": 0.2398, "step": 25764 }, { "epoch": 0.04568524768634877, "grad_norm": 0.7734375, "learning_rate": 0.001714737403369781, "loss": 0.2238, "step": 25766 }, { "epoch": 0.04568879385165859, "grad_norm": 0.62109375, "learning_rate": 0.0017146939308243824, "loss": 0.1732, "step": 25768 }, { "epoch": 0.045692340016968404, "grad_norm": 1.1640625, "learning_rate": 0.0017146504555906696, "loss": 0.1884, "step": 25770 }, { "epoch": 0.04569588618227822, "grad_norm": 0.255859375, "learning_rate": 0.0017146069776688329, "loss": 0.2043, "step": 25772 }, { "epoch": 0.04569943234758803, "grad_norm": 0.1943359375, "learning_rate": 0.0017145634970590624, "loss": 0.2506, "step": 25774 }, { "epoch": 0.04570297851289785, "grad_norm": 0.6796875, "learning_rate": 0.001714520013761548, "loss": 0.1863, "step": 25776 }, { "epoch": 0.04570652467820766, "grad_norm": 0.443359375, "learning_rate": 0.0017144765277764802, "loss": 0.2049, "step": 25778 }, { "epoch": 0.04571007084351748, "grad_norm": 1.296875, "learning_rate": 0.0017144330391040486, "loss": 0.1928, "step": 25780 }, { "epoch": 0.04571361700882729, "grad_norm": 0.51171875, "learning_rate": 0.001714389547744444, "loss": 0.2556, "step": 25782 }, { "epoch": 0.045717163174137106, "grad_norm": 1.0625, "learning_rate": 0.0017143460536978565, "loss": 0.1861, "step": 25784 }, { "epoch": 0.04572070933944692, "grad_norm": 1.1640625, "learning_rate": 0.0017143025569644759, "loss": 0.2131, "step": 25786 }, { "epoch": 0.045724255504756735, "grad_norm": 0.412109375, "learning_rate": 0.0017142590575444931, "loss": 0.1781, "step": 25788 }, { "epoch": 0.04572780167006655, "grad_norm": 0.4921875, "learning_rate": 0.001714215555438098, "loss": 0.2433, "step": 25790 }, { "epoch": 0.04573134783537637, "grad_norm": 1.0078125, "learning_rate": 0.0017141720506454806, "loss": 0.6866, "step": 25792 }, { "epoch": 0.045734894000686185, "grad_norm": 0.57421875, "learning_rate": 0.0017141285431668317, "loss": 0.1471, "step": 25794 }, { "epoch": 0.045738440165996, "grad_norm": 0.314453125, "learning_rate": 0.0017140850330023413, "loss": 0.2181, "step": 25796 }, { "epoch": 0.045741986331305814, "grad_norm": 0.36328125, "learning_rate": 0.001714041520152199, "loss": 0.2128, "step": 25798 }, { "epoch": 0.04574553249661563, "grad_norm": 0.2392578125, "learning_rate": 0.0017139980046165964, "loss": 0.1923, "step": 25800 }, { "epoch": 0.045749078661925444, "grad_norm": 0.39453125, "learning_rate": 0.0017139544863957231, "loss": 0.2169, "step": 25802 }, { "epoch": 0.04575262482723526, "grad_norm": 0.255859375, "learning_rate": 0.0017139109654897694, "loss": 0.1872, "step": 25804 }, { "epoch": 0.04575617099254507, "grad_norm": 0.435546875, "learning_rate": 0.0017138674418989256, "loss": 0.243, "step": 25806 }, { "epoch": 0.04575971715785489, "grad_norm": 4.53125, "learning_rate": 0.0017138239156233824, "loss": 0.5334, "step": 25808 }, { "epoch": 0.0457632633231647, "grad_norm": 5.46875, "learning_rate": 0.0017137803866633299, "loss": 0.3189, "step": 25810 }, { "epoch": 0.045766809488474516, "grad_norm": 0.236328125, "learning_rate": 0.0017137368550189584, "loss": 0.1711, "step": 25812 }, { "epoch": 0.04577035565378434, "grad_norm": 0.359375, "learning_rate": 0.0017136933206904586, "loss": 0.1747, "step": 25814 }, { "epoch": 0.04577390181909415, "grad_norm": 0.271484375, "learning_rate": 0.0017136497836780207, "loss": 0.1784, "step": 25816 }, { "epoch": 0.04577744798440397, "grad_norm": 3.140625, "learning_rate": 0.0017136062439818346, "loss": 0.3431, "step": 25818 }, { "epoch": 0.04578099414971378, "grad_norm": 0.81640625, "learning_rate": 0.001713562701602092, "loss": 0.1427, "step": 25820 }, { "epoch": 0.045784540315023596, "grad_norm": 1.046875, "learning_rate": 0.0017135191565389818, "loss": 0.4024, "step": 25822 }, { "epoch": 0.04578808648033341, "grad_norm": 0.69921875, "learning_rate": 0.0017134756087926956, "loss": 0.1606, "step": 25824 }, { "epoch": 0.045791632645643225, "grad_norm": 0.3515625, "learning_rate": 0.0017134320583634234, "loss": 0.1576, "step": 25826 }, { "epoch": 0.04579517881095304, "grad_norm": 2.03125, "learning_rate": 0.0017133885052513559, "loss": 0.2849, "step": 25828 }, { "epoch": 0.045798724976262854, "grad_norm": 1.1875, "learning_rate": 0.001713344949456683, "loss": 0.2268, "step": 25830 }, { "epoch": 0.04580227114157267, "grad_norm": 0.99609375, "learning_rate": 0.001713301390979596, "loss": 0.1704, "step": 25832 }, { "epoch": 0.04580581730688248, "grad_norm": 0.498046875, "learning_rate": 0.0017132578298202845, "loss": 0.1936, "step": 25834 }, { "epoch": 0.045809363472192305, "grad_norm": 0.578125, "learning_rate": 0.0017132142659789396, "loss": 0.2522, "step": 25836 }, { "epoch": 0.04581290963750212, "grad_norm": 0.40234375, "learning_rate": 0.0017131706994557518, "loss": 0.1717, "step": 25838 }, { "epoch": 0.045816455802811934, "grad_norm": 0.5390625, "learning_rate": 0.0017131271302509115, "loss": 0.2075, "step": 25840 }, { "epoch": 0.04582000196812175, "grad_norm": 0.98046875, "learning_rate": 0.001713083558364609, "loss": 0.2542, "step": 25842 }, { "epoch": 0.04582354813343156, "grad_norm": 0.5546875, "learning_rate": 0.0017130399837970356, "loss": 0.2296, "step": 25844 }, { "epoch": 0.04582709429874138, "grad_norm": 0.5859375, "learning_rate": 0.001712996406548381, "loss": 0.2175, "step": 25846 }, { "epoch": 0.04583064046405119, "grad_norm": 1.171875, "learning_rate": 0.0017129528266188365, "loss": 0.3051, "step": 25848 }, { "epoch": 0.04583418662936101, "grad_norm": 0.259765625, "learning_rate": 0.0017129092440085923, "loss": 0.2195, "step": 25850 }, { "epoch": 0.04583773279467082, "grad_norm": 0.69921875, "learning_rate": 0.0017128656587178388, "loss": 0.2553, "step": 25852 }, { "epoch": 0.045841278959980636, "grad_norm": 0.85546875, "learning_rate": 0.0017128220707467673, "loss": 0.1638, "step": 25854 }, { "epoch": 0.04584482512529045, "grad_norm": 0.51953125, "learning_rate": 0.001712778480095568, "loss": 0.2151, "step": 25856 }, { "epoch": 0.045848371290600265, "grad_norm": 2.4375, "learning_rate": 0.0017127348867644315, "loss": 0.2617, "step": 25858 }, { "epoch": 0.045851917455910086, "grad_norm": 0.41015625, "learning_rate": 0.0017126912907535483, "loss": 0.2414, "step": 25860 }, { "epoch": 0.0458554636212199, "grad_norm": 0.43359375, "learning_rate": 0.0017126476920631096, "loss": 0.182, "step": 25862 }, { "epoch": 0.045859009786529716, "grad_norm": 0.6953125, "learning_rate": 0.0017126040906933057, "loss": 0.3261, "step": 25864 }, { "epoch": 0.04586255595183953, "grad_norm": 0.515625, "learning_rate": 0.0017125604866443272, "loss": 0.2803, "step": 25866 }, { "epoch": 0.045866102117149345, "grad_norm": 0.44921875, "learning_rate": 0.001712516879916365, "loss": 0.2051, "step": 25868 }, { "epoch": 0.04586964828245916, "grad_norm": 0.443359375, "learning_rate": 0.0017124732705096097, "loss": 0.1855, "step": 25870 }, { "epoch": 0.045873194447768974, "grad_norm": 0.390625, "learning_rate": 0.001712429658424252, "loss": 0.1979, "step": 25872 }, { "epoch": 0.04587674061307879, "grad_norm": 0.609375, "learning_rate": 0.0017123860436604832, "loss": 0.2321, "step": 25874 }, { "epoch": 0.0458802867783886, "grad_norm": 0.1669921875, "learning_rate": 0.0017123424262184932, "loss": 0.1906, "step": 25876 }, { "epoch": 0.04588383294369842, "grad_norm": 0.5, "learning_rate": 0.0017122988060984732, "loss": 0.192, "step": 25878 }, { "epoch": 0.04588737910900823, "grad_norm": 0.3828125, "learning_rate": 0.0017122551833006136, "loss": 0.1685, "step": 25880 }, { "epoch": 0.04589092527431805, "grad_norm": 0.51953125, "learning_rate": 0.0017122115578251061, "loss": 0.2093, "step": 25882 }, { "epoch": 0.04589447143962787, "grad_norm": 0.43359375, "learning_rate": 0.0017121679296721404, "loss": 0.1906, "step": 25884 }, { "epoch": 0.04589801760493768, "grad_norm": 0.32421875, "learning_rate": 0.0017121242988419078, "loss": 0.1523, "step": 25886 }, { "epoch": 0.0459015637702475, "grad_norm": 0.453125, "learning_rate": 0.001712080665334599, "loss": 0.2256, "step": 25888 }, { "epoch": 0.04590510993555731, "grad_norm": 2.0, "learning_rate": 0.0017120370291504052, "loss": 0.3778, "step": 25890 }, { "epoch": 0.045908656100867126, "grad_norm": 0.65234375, "learning_rate": 0.0017119933902895168, "loss": 0.1916, "step": 25892 }, { "epoch": 0.04591220226617694, "grad_norm": 0.5703125, "learning_rate": 0.0017119497487521247, "loss": 0.294, "step": 25894 }, { "epoch": 0.045915748431486755, "grad_norm": 0.7734375, "learning_rate": 0.0017119061045384198, "loss": 0.2407, "step": 25896 }, { "epoch": 0.04591929459679657, "grad_norm": 0.37890625, "learning_rate": 0.0017118624576485935, "loss": 0.1824, "step": 25898 }, { "epoch": 0.045922840762106384, "grad_norm": 0.9140625, "learning_rate": 0.0017118188080828356, "loss": 0.246, "step": 25900 }, { "epoch": 0.0459263869274162, "grad_norm": 0.5234375, "learning_rate": 0.0017117751558413381, "loss": 0.2198, "step": 25902 }, { "epoch": 0.04592993309272602, "grad_norm": 0.8046875, "learning_rate": 0.0017117315009242914, "loss": 0.1699, "step": 25904 }, { "epoch": 0.045933479258035835, "grad_norm": 0.31640625, "learning_rate": 0.0017116878433318862, "loss": 0.2413, "step": 25906 }, { "epoch": 0.04593702542334565, "grad_norm": 1.0234375, "learning_rate": 0.0017116441830643137, "loss": 0.3057, "step": 25908 }, { "epoch": 0.045940571588655464, "grad_norm": 0.34375, "learning_rate": 0.001711600520121765, "loss": 0.2181, "step": 25910 }, { "epoch": 0.04594411775396528, "grad_norm": 0.4921875, "learning_rate": 0.001711556854504431, "loss": 0.159, "step": 25912 }, { "epoch": 0.04594766391927509, "grad_norm": 0.447265625, "learning_rate": 0.0017115131862125022, "loss": 0.2072, "step": 25914 }, { "epoch": 0.04595121008458491, "grad_norm": 0.79296875, "learning_rate": 0.0017114695152461701, "loss": 0.3051, "step": 25916 }, { "epoch": 0.04595475624989472, "grad_norm": 0.859375, "learning_rate": 0.0017114258416056256, "loss": 0.2815, "step": 25918 }, { "epoch": 0.04595830241520454, "grad_norm": 0.46484375, "learning_rate": 0.0017113821652910593, "loss": 0.1734, "step": 25920 }, { "epoch": 0.04596184858051435, "grad_norm": 0.53515625, "learning_rate": 0.0017113384863026628, "loss": 0.1549, "step": 25922 }, { "epoch": 0.045965394745824166, "grad_norm": 0.5546875, "learning_rate": 0.0017112948046406268, "loss": 0.2272, "step": 25924 }, { "epoch": 0.04596894091113398, "grad_norm": 0.76171875, "learning_rate": 0.0017112511203051426, "loss": 0.2298, "step": 25926 }, { "epoch": 0.0459724870764438, "grad_norm": 1.6796875, "learning_rate": 0.001711207433296401, "loss": 0.2439, "step": 25928 }, { "epoch": 0.04597603324175362, "grad_norm": 0.376953125, "learning_rate": 0.001711163743614593, "loss": 0.2151, "step": 25930 }, { "epoch": 0.04597957940706343, "grad_norm": 0.3125, "learning_rate": 0.00171112005125991, "loss": 0.1682, "step": 25932 }, { "epoch": 0.045983125572373246, "grad_norm": 0.5234375, "learning_rate": 0.0017110763562325426, "loss": 0.1884, "step": 25934 }, { "epoch": 0.04598667173768306, "grad_norm": 0.5078125, "learning_rate": 0.0017110326585326823, "loss": 0.2387, "step": 25936 }, { "epoch": 0.045990217902992875, "grad_norm": 1.9921875, "learning_rate": 0.00171098895816052, "loss": 0.1565, "step": 25938 }, { "epoch": 0.04599376406830269, "grad_norm": 0.416015625, "learning_rate": 0.001710945255116247, "loss": 0.1911, "step": 25940 }, { "epoch": 0.045997310233612504, "grad_norm": 0.482421875, "learning_rate": 0.0017109015494000542, "loss": 0.2056, "step": 25942 }, { "epoch": 0.04600085639892232, "grad_norm": 0.25390625, "learning_rate": 0.0017108578410121329, "loss": 0.1775, "step": 25944 }, { "epoch": 0.04600440256423213, "grad_norm": 1.3125, "learning_rate": 0.0017108141299526744, "loss": 0.2414, "step": 25946 }, { "epoch": 0.04600794872954195, "grad_norm": 0.5703125, "learning_rate": 0.0017107704162218696, "loss": 0.2084, "step": 25948 }, { "epoch": 0.04601149489485177, "grad_norm": 0.384765625, "learning_rate": 0.0017107266998199098, "loss": 0.2119, "step": 25950 }, { "epoch": 0.046015041060161584, "grad_norm": 0.58984375, "learning_rate": 0.001710682980746986, "loss": 0.2097, "step": 25952 }, { "epoch": 0.0460185872254714, "grad_norm": 0.96484375, "learning_rate": 0.0017106392590032899, "loss": 0.2913, "step": 25954 }, { "epoch": 0.04602213339078121, "grad_norm": 0.625, "learning_rate": 0.001710595534589012, "loss": 0.3591, "step": 25956 }, { "epoch": 0.04602567955609103, "grad_norm": 0.62109375, "learning_rate": 0.001710551807504344, "loss": 0.2215, "step": 25958 }, { "epoch": 0.04602922572140084, "grad_norm": 1.109375, "learning_rate": 0.0017105080777494773, "loss": 0.1744, "step": 25960 }, { "epoch": 0.046032771886710656, "grad_norm": 1.046875, "learning_rate": 0.0017104643453246028, "loss": 0.2341, "step": 25962 }, { "epoch": 0.04603631805202047, "grad_norm": 0.56640625, "learning_rate": 0.0017104206102299118, "loss": 0.2023, "step": 25964 }, { "epoch": 0.046039864217330286, "grad_norm": 1.234375, "learning_rate": 0.0017103768724655954, "loss": 0.2139, "step": 25966 }, { "epoch": 0.0460434103826401, "grad_norm": 0.76953125, "learning_rate": 0.0017103331320318456, "loss": 0.208, "step": 25968 }, { "epoch": 0.046046956547949915, "grad_norm": 2.171875, "learning_rate": 0.001710289388928853, "loss": 0.4158, "step": 25970 }, { "epoch": 0.046050502713259736, "grad_norm": 0.390625, "learning_rate": 0.001710245643156809, "loss": 0.2111, "step": 25972 }, { "epoch": 0.04605404887856955, "grad_norm": 0.2041015625, "learning_rate": 0.001710201894715905, "loss": 0.1771, "step": 25974 }, { "epoch": 0.046057595043879365, "grad_norm": 0.6953125, "learning_rate": 0.0017101581436063326, "loss": 0.2738, "step": 25976 }, { "epoch": 0.04606114120918918, "grad_norm": 0.55859375, "learning_rate": 0.0017101143898282827, "loss": 0.262, "step": 25978 }, { "epoch": 0.046064687374498994, "grad_norm": 0.431640625, "learning_rate": 0.0017100706333819469, "loss": 0.2294, "step": 25980 }, { "epoch": 0.04606823353980881, "grad_norm": 0.40234375, "learning_rate": 0.0017100268742675164, "loss": 0.1774, "step": 25982 }, { "epoch": 0.04607177970511862, "grad_norm": 0.80859375, "learning_rate": 0.001709983112485183, "loss": 0.1972, "step": 25984 }, { "epoch": 0.04607532587042844, "grad_norm": 0.734375, "learning_rate": 0.0017099393480351373, "loss": 0.281, "step": 25986 }, { "epoch": 0.04607887203573825, "grad_norm": 0.359375, "learning_rate": 0.0017098955809175715, "loss": 0.2037, "step": 25988 }, { "epoch": 0.04608241820104807, "grad_norm": 0.455078125, "learning_rate": 0.0017098518111326767, "loss": 0.2018, "step": 25990 }, { "epoch": 0.04608596436635788, "grad_norm": 1.640625, "learning_rate": 0.0017098080386806446, "loss": 0.3368, "step": 25992 }, { "epoch": 0.046089510531667696, "grad_norm": 0.59765625, "learning_rate": 0.0017097642635616657, "loss": 0.1961, "step": 25994 }, { "epoch": 0.04609305669697752, "grad_norm": 0.94140625, "learning_rate": 0.0017097204857759328, "loss": 0.2178, "step": 25996 }, { "epoch": 0.04609660286228733, "grad_norm": 0.62890625, "learning_rate": 0.001709676705323636, "loss": 0.1715, "step": 25998 }, { "epoch": 0.04610014902759715, "grad_norm": 0.41015625, "learning_rate": 0.0017096329222049678, "loss": 0.1827, "step": 26000 }, { "epoch": 0.04610369519290696, "grad_norm": 0.34375, "learning_rate": 0.0017095891364201192, "loss": 0.157, "step": 26002 }, { "epoch": 0.046107241358216776, "grad_norm": 0.44921875, "learning_rate": 0.001709545347969282, "loss": 0.3541, "step": 26004 }, { "epoch": 0.04611078752352659, "grad_norm": 0.96875, "learning_rate": 0.0017095015568526473, "loss": 0.2669, "step": 26006 }, { "epoch": 0.046114333688836405, "grad_norm": 0.4375, "learning_rate": 0.0017094577630704072, "loss": 0.2086, "step": 26008 }, { "epoch": 0.04611787985414622, "grad_norm": 0.67578125, "learning_rate": 0.0017094139666227524, "loss": 0.153, "step": 26010 }, { "epoch": 0.046121426019456034, "grad_norm": 0.8203125, "learning_rate": 0.0017093701675098753, "loss": 0.1675, "step": 26012 }, { "epoch": 0.04612497218476585, "grad_norm": 0.39453125, "learning_rate": 0.0017093263657319666, "loss": 0.2848, "step": 26014 }, { "epoch": 0.04612851835007566, "grad_norm": 1.0, "learning_rate": 0.0017092825612892186, "loss": 0.2153, "step": 26016 }, { "epoch": 0.046132064515385485, "grad_norm": 0.59765625, "learning_rate": 0.0017092387541818225, "loss": 0.1468, "step": 26018 }, { "epoch": 0.0461356106806953, "grad_norm": 0.283203125, "learning_rate": 0.0017091949444099702, "loss": 0.2116, "step": 26020 }, { "epoch": 0.046139156846005114, "grad_norm": 0.2890625, "learning_rate": 0.0017091511319738525, "loss": 0.1973, "step": 26022 }, { "epoch": 0.04614270301131493, "grad_norm": 0.345703125, "learning_rate": 0.0017091073168736624, "loss": 0.2215, "step": 26024 }, { "epoch": 0.04614624917662474, "grad_norm": 1.1484375, "learning_rate": 0.00170906349910959, "loss": 0.1505, "step": 26026 }, { "epoch": 0.04614979534193456, "grad_norm": 1.4765625, "learning_rate": 0.0017090196786818282, "loss": 0.2872, "step": 26028 }, { "epoch": 0.04615334150724437, "grad_norm": 2.5625, "learning_rate": 0.0017089758555905677, "loss": 0.4636, "step": 26030 }, { "epoch": 0.04615688767255419, "grad_norm": 1.3359375, "learning_rate": 0.0017089320298360007, "loss": 0.4287, "step": 26032 }, { "epoch": 0.046160433837864, "grad_norm": 0.7890625, "learning_rate": 0.0017088882014183186, "loss": 0.1669, "step": 26034 }, { "epoch": 0.046163980003173816, "grad_norm": 0.455078125, "learning_rate": 0.0017088443703377131, "loss": 0.215, "step": 26036 }, { "epoch": 0.04616752616848363, "grad_norm": 1.0625, "learning_rate": 0.0017088005365943767, "loss": 0.2821, "step": 26038 }, { "epoch": 0.04617107233379345, "grad_norm": 0.74609375, "learning_rate": 0.0017087567001884995, "loss": 0.1614, "step": 26040 }, { "epoch": 0.046174618499103266, "grad_norm": 0.337890625, "learning_rate": 0.0017087128611202745, "loss": 0.2507, "step": 26042 }, { "epoch": 0.04617816466441308, "grad_norm": 0.451171875, "learning_rate": 0.0017086690193898928, "loss": 0.2067, "step": 26044 }, { "epoch": 0.046181710829722895, "grad_norm": 0.91015625, "learning_rate": 0.0017086251749975467, "loss": 0.2486, "step": 26046 }, { "epoch": 0.04618525699503271, "grad_norm": 0.75390625, "learning_rate": 0.0017085813279434274, "loss": 0.2421, "step": 26048 }, { "epoch": 0.046188803160342524, "grad_norm": 0.7890625, "learning_rate": 0.001708537478227727, "loss": 0.2234, "step": 26050 }, { "epoch": 0.04619234932565234, "grad_norm": 0.52734375, "learning_rate": 0.0017084936258506369, "loss": 0.2215, "step": 26052 }, { "epoch": 0.046195895490962154, "grad_norm": 0.333984375, "learning_rate": 0.0017084497708123494, "loss": 0.2059, "step": 26054 }, { "epoch": 0.04619944165627197, "grad_norm": 0.400390625, "learning_rate": 0.001708405913113056, "loss": 0.1825, "step": 26056 }, { "epoch": 0.04620298782158178, "grad_norm": 0.326171875, "learning_rate": 0.0017083620527529484, "loss": 0.2069, "step": 26058 }, { "epoch": 0.0462065339868916, "grad_norm": 0.7265625, "learning_rate": 0.0017083181897322188, "loss": 0.1857, "step": 26060 }, { "epoch": 0.04621008015220141, "grad_norm": 0.546875, "learning_rate": 0.0017082743240510586, "loss": 0.2015, "step": 26062 }, { "epoch": 0.04621362631751123, "grad_norm": 0.83203125, "learning_rate": 0.00170823045570966, "loss": 0.176, "step": 26064 }, { "epoch": 0.04621717248282105, "grad_norm": 0.265625, "learning_rate": 0.0017081865847082143, "loss": 0.1459, "step": 26066 }, { "epoch": 0.04622071864813086, "grad_norm": 0.51953125, "learning_rate": 0.001708142711046914, "loss": 0.2153, "step": 26068 }, { "epoch": 0.04622426481344068, "grad_norm": 0.283203125, "learning_rate": 0.001708098834725951, "loss": 0.1432, "step": 26070 }, { "epoch": 0.04622781097875049, "grad_norm": 0.291015625, "learning_rate": 0.0017080549557455167, "loss": 0.178, "step": 26072 }, { "epoch": 0.046231357144060306, "grad_norm": 0.6953125, "learning_rate": 0.001708011074105803, "loss": 0.312, "step": 26074 }, { "epoch": 0.04623490330937012, "grad_norm": 1.8046875, "learning_rate": 0.0017079671898070023, "loss": 0.2437, "step": 26076 }, { "epoch": 0.046238449474679935, "grad_norm": 2.140625, "learning_rate": 0.0017079233028493063, "loss": 0.3157, "step": 26078 }, { "epoch": 0.04624199563998975, "grad_norm": 1.2578125, "learning_rate": 0.0017078794132329067, "loss": 0.3413, "step": 26080 }, { "epoch": 0.046245541805299564, "grad_norm": 0.76171875, "learning_rate": 0.0017078355209579957, "loss": 0.1758, "step": 26082 }, { "epoch": 0.04624908797060938, "grad_norm": 0.41015625, "learning_rate": 0.0017077916260247652, "loss": 0.2042, "step": 26084 }, { "epoch": 0.0462526341359192, "grad_norm": 0.75390625, "learning_rate": 0.0017077477284334073, "loss": 0.1902, "step": 26086 }, { "epoch": 0.046256180301229015, "grad_norm": 0.375, "learning_rate": 0.0017077038281841138, "loss": 0.3049, "step": 26088 }, { "epoch": 0.04625972646653883, "grad_norm": 0.44140625, "learning_rate": 0.0017076599252770766, "loss": 0.1475, "step": 26090 }, { "epoch": 0.046263272631848644, "grad_norm": 0.498046875, "learning_rate": 0.0017076160197124878, "loss": 0.1764, "step": 26092 }, { "epoch": 0.04626681879715846, "grad_norm": 0.90234375, "learning_rate": 0.00170757211149054, "loss": 0.207, "step": 26094 }, { "epoch": 0.04627036496246827, "grad_norm": 0.55078125, "learning_rate": 0.0017075282006114242, "loss": 0.164, "step": 26096 }, { "epoch": 0.04627391112777809, "grad_norm": 5.0625, "learning_rate": 0.0017074842870753333, "loss": 0.4012, "step": 26098 }, { "epoch": 0.0462774572930879, "grad_norm": 0.310546875, "learning_rate": 0.0017074403708824586, "loss": 0.192, "step": 26100 }, { "epoch": 0.04628100345839772, "grad_norm": 0.298828125, "learning_rate": 0.0017073964520329925, "loss": 0.206, "step": 26102 }, { "epoch": 0.04628454962370753, "grad_norm": 0.640625, "learning_rate": 0.0017073525305271275, "loss": 0.2016, "step": 26104 }, { "epoch": 0.046288095789017346, "grad_norm": 0.88671875, "learning_rate": 0.0017073086063650557, "loss": 0.1796, "step": 26106 }, { "epoch": 0.04629164195432717, "grad_norm": 0.439453125, "learning_rate": 0.001707264679546968, "loss": 0.216, "step": 26108 }, { "epoch": 0.04629518811963698, "grad_norm": 0.78515625, "learning_rate": 0.0017072207500730577, "loss": 0.1942, "step": 26110 }, { "epoch": 0.046298734284946796, "grad_norm": 0.263671875, "learning_rate": 0.0017071768179435167, "loss": 0.1946, "step": 26112 }, { "epoch": 0.04630228045025661, "grad_norm": 0.357421875, "learning_rate": 0.0017071328831585366, "loss": 0.205, "step": 26114 }, { "epoch": 0.046305826615566426, "grad_norm": 1.0078125, "learning_rate": 0.0017070889457183104, "loss": 0.3614, "step": 26116 }, { "epoch": 0.04630937278087624, "grad_norm": 0.828125, "learning_rate": 0.0017070450056230293, "loss": 0.2697, "step": 26118 }, { "epoch": 0.046312918946186055, "grad_norm": 0.322265625, "learning_rate": 0.0017070010628728862, "loss": 0.2137, "step": 26120 }, { "epoch": 0.04631646511149587, "grad_norm": 0.41015625, "learning_rate": 0.001706957117468073, "loss": 0.2054, "step": 26122 }, { "epoch": 0.046320011276805684, "grad_norm": 1.6015625, "learning_rate": 0.0017069131694087824, "loss": 0.3104, "step": 26124 }, { "epoch": 0.0463235574421155, "grad_norm": 0.640625, "learning_rate": 0.0017068692186952054, "loss": 0.1769, "step": 26126 }, { "epoch": 0.04632710360742531, "grad_norm": 0.79296875, "learning_rate": 0.0017068252653275353, "loss": 0.1966, "step": 26128 }, { "epoch": 0.04633064977273513, "grad_norm": 1.875, "learning_rate": 0.0017067813093059642, "loss": 0.2292, "step": 26130 }, { "epoch": 0.04633419593804495, "grad_norm": 0.232421875, "learning_rate": 0.0017067373506306836, "loss": 0.1376, "step": 26132 }, { "epoch": 0.04633774210335476, "grad_norm": 0.35546875, "learning_rate": 0.0017066933893018865, "loss": 0.3009, "step": 26134 }, { "epoch": 0.04634128826866458, "grad_norm": 0.328125, "learning_rate": 0.0017066494253197648, "loss": 0.1524, "step": 26136 }, { "epoch": 0.04634483443397439, "grad_norm": 0.494140625, "learning_rate": 0.0017066054586845113, "loss": 0.2016, "step": 26138 }, { "epoch": 0.04634838059928421, "grad_norm": 1.15625, "learning_rate": 0.0017065614893963174, "loss": 0.2322, "step": 26140 }, { "epoch": 0.04635192676459402, "grad_norm": 0.369140625, "learning_rate": 0.001706517517455376, "loss": 0.2868, "step": 26142 }, { "epoch": 0.046355472929903836, "grad_norm": 0.37109375, "learning_rate": 0.0017064735428618794, "loss": 0.1756, "step": 26144 }, { "epoch": 0.04635901909521365, "grad_norm": 0.859375, "learning_rate": 0.0017064295656160197, "loss": 0.2021, "step": 26146 }, { "epoch": 0.046362565260523465, "grad_norm": 0.61328125, "learning_rate": 0.0017063855857179897, "loss": 0.1954, "step": 26148 }, { "epoch": 0.04636611142583328, "grad_norm": 0.251953125, "learning_rate": 0.001706341603167981, "loss": 0.1248, "step": 26150 }, { "epoch": 0.046369657591143094, "grad_norm": 0.423828125, "learning_rate": 0.0017062976179661868, "loss": 0.1505, "step": 26152 }, { "epoch": 0.046373203756452916, "grad_norm": 0.6953125, "learning_rate": 0.0017062536301127981, "loss": 0.1791, "step": 26154 }, { "epoch": 0.04637674992176273, "grad_norm": 0.275390625, "learning_rate": 0.0017062096396080087, "loss": 0.1923, "step": 26156 }, { "epoch": 0.046380296087072545, "grad_norm": 0.72265625, "learning_rate": 0.0017061656464520106, "loss": 0.185, "step": 26158 }, { "epoch": 0.04638384225238236, "grad_norm": 1.2265625, "learning_rate": 0.0017061216506449961, "loss": 0.2128, "step": 26160 }, { "epoch": 0.046387388417692174, "grad_norm": 2.234375, "learning_rate": 0.0017060776521871572, "loss": 0.2501, "step": 26162 }, { "epoch": 0.04639093458300199, "grad_norm": 0.41015625, "learning_rate": 0.0017060336510786869, "loss": 0.2348, "step": 26164 }, { "epoch": 0.0463944807483118, "grad_norm": 0.263671875, "learning_rate": 0.0017059896473197778, "loss": 0.1794, "step": 26166 }, { "epoch": 0.04639802691362162, "grad_norm": 0.578125, "learning_rate": 0.0017059456409106216, "loss": 0.2392, "step": 26168 }, { "epoch": 0.04640157307893143, "grad_norm": 0.298828125, "learning_rate": 0.001705901631851411, "loss": 0.6879, "step": 26170 }, { "epoch": 0.04640511924424125, "grad_norm": 1.8203125, "learning_rate": 0.001705857620142339, "loss": 0.3341, "step": 26172 }, { "epoch": 0.04640866540955106, "grad_norm": 0.5546875, "learning_rate": 0.0017058136057835974, "loss": 0.1771, "step": 26174 }, { "epoch": 0.04641221157486088, "grad_norm": 0.404296875, "learning_rate": 0.001705769588775379, "loss": 0.2007, "step": 26176 }, { "epoch": 0.0464157577401707, "grad_norm": 0.52734375, "learning_rate": 0.0017057255691178763, "loss": 0.1908, "step": 26178 }, { "epoch": 0.04641930390548051, "grad_norm": 0.55078125, "learning_rate": 0.001705681546811282, "loss": 0.2116, "step": 26180 }, { "epoch": 0.04642285007079033, "grad_norm": 1.1953125, "learning_rate": 0.0017056375218557883, "loss": 0.1989, "step": 26182 }, { "epoch": 0.04642639623610014, "grad_norm": 0.357421875, "learning_rate": 0.0017055934942515878, "loss": 0.2417, "step": 26184 }, { "epoch": 0.046429942401409956, "grad_norm": 0.41796875, "learning_rate": 0.0017055494639988732, "loss": 0.2349, "step": 26186 }, { "epoch": 0.04643348856671977, "grad_norm": 0.6171875, "learning_rate": 0.001705505431097837, "loss": 0.2326, "step": 26188 }, { "epoch": 0.046437034732029585, "grad_norm": 0.2451171875, "learning_rate": 0.0017054613955486716, "loss": 0.2044, "step": 26190 }, { "epoch": 0.0464405808973394, "grad_norm": 2.40625, "learning_rate": 0.00170541735735157, "loss": 0.3135, "step": 26192 }, { "epoch": 0.046444127062649214, "grad_norm": 0.4453125, "learning_rate": 0.0017053733165067245, "loss": 0.2392, "step": 26194 }, { "epoch": 0.04644767322795903, "grad_norm": 0.3359375, "learning_rate": 0.0017053292730143277, "loss": 0.1449, "step": 26196 }, { "epoch": 0.04645121939326884, "grad_norm": 0.3203125, "learning_rate": 0.0017052852268745723, "loss": 0.1934, "step": 26198 }, { "epoch": 0.046454765558578665, "grad_norm": 0.64453125, "learning_rate": 0.0017052411780876509, "loss": 0.2074, "step": 26200 }, { "epoch": 0.04645831172388848, "grad_norm": 0.46875, "learning_rate": 0.0017051971266537562, "loss": 0.1877, "step": 26202 }, { "epoch": 0.046461857889198294, "grad_norm": 1.296875, "learning_rate": 0.0017051530725730808, "loss": 0.2244, "step": 26204 }, { "epoch": 0.04646540405450811, "grad_norm": 1.40625, "learning_rate": 0.0017051090158458173, "loss": 0.1829, "step": 26206 }, { "epoch": 0.04646895021981792, "grad_norm": 0.275390625, "learning_rate": 0.0017050649564721583, "loss": 0.1775, "step": 26208 }, { "epoch": 0.04647249638512774, "grad_norm": 2.4375, "learning_rate": 0.0017050208944522972, "loss": 0.4318, "step": 26210 }, { "epoch": 0.04647604255043755, "grad_norm": 0.443359375, "learning_rate": 0.0017049768297864257, "loss": 0.202, "step": 26212 }, { "epoch": 0.046479588715747366, "grad_norm": 0.416015625, "learning_rate": 0.0017049327624747371, "loss": 0.2132, "step": 26214 }, { "epoch": 0.04648313488105718, "grad_norm": 0.431640625, "learning_rate": 0.001704888692517424, "loss": 0.1796, "step": 26216 }, { "epoch": 0.046486681046366995, "grad_norm": 0.625, "learning_rate": 0.0017048446199146787, "loss": 0.1897, "step": 26218 }, { "epoch": 0.04649022721167681, "grad_norm": 0.40625, "learning_rate": 0.0017048005446666948, "loss": 0.2152, "step": 26220 }, { "epoch": 0.04649377337698663, "grad_norm": 0.72265625, "learning_rate": 0.0017047564667736644, "loss": 0.183, "step": 26222 }, { "epoch": 0.046497319542296446, "grad_norm": 0.384765625, "learning_rate": 0.0017047123862357807, "loss": 0.1673, "step": 26224 }, { "epoch": 0.04650086570760626, "grad_norm": 0.263671875, "learning_rate": 0.0017046683030532363, "loss": 0.2806, "step": 26226 }, { "epoch": 0.046504411872916075, "grad_norm": 1.0859375, "learning_rate": 0.0017046242172262238, "loss": 0.274, "step": 26228 }, { "epoch": 0.04650795803822589, "grad_norm": 0.6328125, "learning_rate": 0.0017045801287549362, "loss": 0.2151, "step": 26230 }, { "epoch": 0.046511504203535704, "grad_norm": 0.388671875, "learning_rate": 0.0017045360376395664, "loss": 0.2637, "step": 26232 }, { "epoch": 0.04651505036884552, "grad_norm": 1.1875, "learning_rate": 0.0017044919438803073, "loss": 0.1963, "step": 26234 }, { "epoch": 0.04651859653415533, "grad_norm": 0.470703125, "learning_rate": 0.0017044478474773513, "loss": 0.2368, "step": 26236 }, { "epoch": 0.04652214269946515, "grad_norm": 0.60546875, "learning_rate": 0.0017044037484308916, "loss": 0.2424, "step": 26238 }, { "epoch": 0.04652568886477496, "grad_norm": 1.515625, "learning_rate": 0.0017043596467411209, "loss": 0.2824, "step": 26240 }, { "epoch": 0.04652923503008478, "grad_norm": 0.33984375, "learning_rate": 0.0017043155424082323, "loss": 0.1975, "step": 26242 }, { "epoch": 0.0465327811953946, "grad_norm": 0.25390625, "learning_rate": 0.0017042714354324185, "loss": 0.1724, "step": 26244 }, { "epoch": 0.04653632736070441, "grad_norm": 0.369140625, "learning_rate": 0.0017042273258138722, "loss": 0.2137, "step": 26246 }, { "epoch": 0.04653987352601423, "grad_norm": 0.34765625, "learning_rate": 0.0017041832135527867, "loss": 0.2472, "step": 26248 }, { "epoch": 0.04654341969132404, "grad_norm": 1.78125, "learning_rate": 0.001704139098649355, "loss": 0.4322, "step": 26250 }, { "epoch": 0.04654696585663386, "grad_norm": 0.75, "learning_rate": 0.0017040949811037698, "loss": 0.1899, "step": 26252 }, { "epoch": 0.04655051202194367, "grad_norm": 0.427734375, "learning_rate": 0.0017040508609162239, "loss": 0.2124, "step": 26254 }, { "epoch": 0.046554058187253486, "grad_norm": 0.5546875, "learning_rate": 0.0017040067380869105, "loss": 0.1779, "step": 26256 }, { "epoch": 0.0465576043525633, "grad_norm": 1.59375, "learning_rate": 0.0017039626126160224, "loss": 0.2418, "step": 26258 }, { "epoch": 0.046561150517873115, "grad_norm": 0.72265625, "learning_rate": 0.0017039184845037527, "loss": 0.3143, "step": 26260 }, { "epoch": 0.04656469668318293, "grad_norm": 0.8359375, "learning_rate": 0.0017038743537502942, "loss": 0.4441, "step": 26262 }, { "epoch": 0.046568242848492744, "grad_norm": 0.3046875, "learning_rate": 0.0017038302203558403, "loss": 0.1865, "step": 26264 }, { "epoch": 0.04657178901380256, "grad_norm": 0.28515625, "learning_rate": 0.0017037860843205834, "loss": 0.2082, "step": 26266 }, { "epoch": 0.04657533517911238, "grad_norm": 0.38671875, "learning_rate": 0.001703741945644717, "loss": 0.2036, "step": 26268 }, { "epoch": 0.046578881344422195, "grad_norm": 0.70703125, "learning_rate": 0.0017036978043284343, "loss": 0.2463, "step": 26270 }, { "epoch": 0.04658242750973201, "grad_norm": 0.462890625, "learning_rate": 0.0017036536603719278, "loss": 0.3107, "step": 26272 }, { "epoch": 0.046585973675041824, "grad_norm": 0.376953125, "learning_rate": 0.0017036095137753912, "loss": 0.1705, "step": 26274 }, { "epoch": 0.04658951984035164, "grad_norm": 0.267578125, "learning_rate": 0.0017035653645390166, "loss": 0.19, "step": 26276 }, { "epoch": 0.04659306600566145, "grad_norm": 0.79296875, "learning_rate": 0.0017035212126629983, "loss": 0.2222, "step": 26278 }, { "epoch": 0.04659661217097127, "grad_norm": 4.71875, "learning_rate": 0.0017034770581475284, "loss": 0.3348, "step": 26280 }, { "epoch": 0.04660015833628108, "grad_norm": 4.59375, "learning_rate": 0.0017034329009928005, "loss": 0.2646, "step": 26282 }, { "epoch": 0.0466037045015909, "grad_norm": 1.9453125, "learning_rate": 0.0017033887411990075, "loss": 0.359, "step": 26284 }, { "epoch": 0.04660725066690071, "grad_norm": 0.96484375, "learning_rate": 0.0017033445787663427, "loss": 0.2766, "step": 26286 }, { "epoch": 0.046610796832210526, "grad_norm": 0.361328125, "learning_rate": 0.0017033004136949995, "loss": 0.2161, "step": 26288 }, { "epoch": 0.04661434299752035, "grad_norm": 0.6328125, "learning_rate": 0.0017032562459851704, "loss": 0.1906, "step": 26290 }, { "epoch": 0.04661788916283016, "grad_norm": 0.30859375, "learning_rate": 0.0017032120756370489, "loss": 0.1883, "step": 26292 }, { "epoch": 0.046621435328139976, "grad_norm": 0.52734375, "learning_rate": 0.001703167902650828, "loss": 0.1549, "step": 26294 }, { "epoch": 0.04662498149344979, "grad_norm": 0.73828125, "learning_rate": 0.0017031237270267012, "loss": 0.2314, "step": 26296 }, { "epoch": 0.046628527658759605, "grad_norm": 0.388671875, "learning_rate": 0.0017030795487648616, "loss": 0.2373, "step": 26298 }, { "epoch": 0.04663207382406942, "grad_norm": 0.8046875, "learning_rate": 0.0017030353678655027, "loss": 0.1619, "step": 26300 }, { "epoch": 0.046635619989379234, "grad_norm": 1.328125, "learning_rate": 0.0017029911843288168, "loss": 0.3279, "step": 26302 }, { "epoch": 0.04663916615468905, "grad_norm": 1.625, "learning_rate": 0.001702946998154998, "loss": 0.2421, "step": 26304 }, { "epoch": 0.046642712319998864, "grad_norm": 0.30078125, "learning_rate": 0.0017029028093442393, "loss": 0.1816, "step": 26306 }, { "epoch": 0.04664625848530868, "grad_norm": 1.28125, "learning_rate": 0.001702858617896734, "loss": 0.2181, "step": 26308 }, { "epoch": 0.04664980465061849, "grad_norm": 0.68359375, "learning_rate": 0.0017028144238126748, "loss": 0.1898, "step": 26310 }, { "epoch": 0.046653350815928314, "grad_norm": 1.4140625, "learning_rate": 0.001702770227092256, "loss": 0.2021, "step": 26312 }, { "epoch": 0.04665689698123813, "grad_norm": 0.359375, "learning_rate": 0.0017027260277356702, "loss": 0.1304, "step": 26314 }, { "epoch": 0.04666044314654794, "grad_norm": 0.64453125, "learning_rate": 0.001702681825743111, "loss": 0.2546, "step": 26316 }, { "epoch": 0.04666398931185776, "grad_norm": 1.6015625, "learning_rate": 0.001702637621114771, "loss": 0.1731, "step": 26318 }, { "epoch": 0.04666753547716757, "grad_norm": 0.515625, "learning_rate": 0.001702593413850845, "loss": 0.2259, "step": 26320 }, { "epoch": 0.04667108164247739, "grad_norm": 0.59375, "learning_rate": 0.0017025492039515248, "loss": 0.1596, "step": 26322 }, { "epoch": 0.0466746278077872, "grad_norm": 0.25, "learning_rate": 0.0017025049914170043, "loss": 0.2008, "step": 26324 }, { "epoch": 0.046678173973097016, "grad_norm": 0.43359375, "learning_rate": 0.0017024607762474774, "loss": 0.2482, "step": 26326 }, { "epoch": 0.04668172013840683, "grad_norm": 0.78125, "learning_rate": 0.0017024165584431366, "loss": 0.2442, "step": 26328 }, { "epoch": 0.046685266303716645, "grad_norm": 0.5234375, "learning_rate": 0.001702372338004176, "loss": 0.2235, "step": 26330 }, { "epoch": 0.04668881246902646, "grad_norm": 0.43359375, "learning_rate": 0.0017023281149307885, "loss": 0.1694, "step": 26332 }, { "epoch": 0.046692358634336274, "grad_norm": 0.51171875, "learning_rate": 0.001702283889223168, "loss": 0.397, "step": 26334 }, { "epoch": 0.046695904799646096, "grad_norm": 0.6953125, "learning_rate": 0.0017022396608815074, "loss": 0.1493, "step": 26336 }, { "epoch": 0.04669945096495591, "grad_norm": 0.333984375, "learning_rate": 0.0017021954299060002, "loss": 0.1927, "step": 26338 }, { "epoch": 0.046702997130265725, "grad_norm": 0.279296875, "learning_rate": 0.0017021511962968402, "loss": 0.2215, "step": 26340 }, { "epoch": 0.04670654329557554, "grad_norm": 0.27734375, "learning_rate": 0.0017021069600542205, "loss": 0.2289, "step": 26342 }, { "epoch": 0.046710089460885354, "grad_norm": 0.6953125, "learning_rate": 0.0017020627211783348, "loss": 0.22, "step": 26344 }, { "epoch": 0.04671363562619517, "grad_norm": 0.3984375, "learning_rate": 0.0017020184796693765, "loss": 0.1477, "step": 26346 }, { "epoch": 0.04671718179150498, "grad_norm": 0.48828125, "learning_rate": 0.001701974235527539, "loss": 0.1967, "step": 26348 }, { "epoch": 0.0467207279568148, "grad_norm": 0.37890625, "learning_rate": 0.0017019299887530155, "loss": 0.1668, "step": 26350 }, { "epoch": 0.04672427412212461, "grad_norm": 0.38671875, "learning_rate": 0.0017018857393460009, "loss": 0.2344, "step": 26352 }, { "epoch": 0.04672782028743443, "grad_norm": 1.0390625, "learning_rate": 0.0017018414873066865, "loss": 0.2144, "step": 26354 }, { "epoch": 0.04673136645274424, "grad_norm": 0.68359375, "learning_rate": 0.001701797232635268, "loss": 0.2501, "step": 26356 }, { "epoch": 0.04673491261805406, "grad_norm": 0.380859375, "learning_rate": 0.0017017529753319371, "loss": 0.2134, "step": 26358 }, { "epoch": 0.04673845878336388, "grad_norm": 0.93359375, "learning_rate": 0.0017017087153968888, "loss": 0.289, "step": 26360 }, { "epoch": 0.04674200494867369, "grad_norm": 0.359375, "learning_rate": 0.001701664452830316, "loss": 0.1221, "step": 26362 }, { "epoch": 0.046745551113983506, "grad_norm": 0.75390625, "learning_rate": 0.0017016201876324123, "loss": 0.2445, "step": 26364 }, { "epoch": 0.04674909727929332, "grad_norm": 0.51953125, "learning_rate": 0.0017015759198033714, "loss": 0.27, "step": 26366 }, { "epoch": 0.046752643444603136, "grad_norm": 0.298828125, "learning_rate": 0.0017015316493433866, "loss": 0.177, "step": 26368 }, { "epoch": 0.04675618960991295, "grad_norm": 0.3671875, "learning_rate": 0.0017014873762526523, "loss": 0.1573, "step": 26370 }, { "epoch": 0.046759735775222765, "grad_norm": 0.384765625, "learning_rate": 0.001701443100531361, "loss": 0.3564, "step": 26372 }, { "epoch": 0.04676328194053258, "grad_norm": 0.498046875, "learning_rate": 0.0017013988221797076, "loss": 0.1993, "step": 26374 }, { "epoch": 0.046766828105842394, "grad_norm": 0.375, "learning_rate": 0.0017013545411978847, "loss": 0.1907, "step": 26376 }, { "epoch": 0.04677037427115221, "grad_norm": 0.2041015625, "learning_rate": 0.0017013102575860863, "loss": 0.2344, "step": 26378 }, { "epoch": 0.04677392043646203, "grad_norm": 0.71875, "learning_rate": 0.0017012659713445063, "loss": 0.2775, "step": 26380 }, { "epoch": 0.046777466601771844, "grad_norm": 0.78515625, "learning_rate": 0.0017012216824733382, "loss": 0.2069, "step": 26382 }, { "epoch": 0.04678101276708166, "grad_norm": 0.55859375, "learning_rate": 0.0017011773909727755, "loss": 0.1913, "step": 26384 }, { "epoch": 0.04678455893239147, "grad_norm": 0.62890625, "learning_rate": 0.0017011330968430122, "loss": 0.2029, "step": 26386 }, { "epoch": 0.04678810509770129, "grad_norm": 0.318359375, "learning_rate": 0.001701088800084242, "loss": 0.2483, "step": 26388 }, { "epoch": 0.0467916512630111, "grad_norm": 0.921875, "learning_rate": 0.0017010445006966583, "loss": 0.2236, "step": 26390 }, { "epoch": 0.04679519742832092, "grad_norm": 0.765625, "learning_rate": 0.0017010001986804554, "loss": 0.2329, "step": 26392 }, { "epoch": 0.04679874359363073, "grad_norm": 0.1669921875, "learning_rate": 0.0017009558940358264, "loss": 0.1477, "step": 26394 }, { "epoch": 0.046802289758940546, "grad_norm": 1.09375, "learning_rate": 0.0017009115867629657, "loss": 0.2482, "step": 26396 }, { "epoch": 0.04680583592425036, "grad_norm": 1.0546875, "learning_rate": 0.0017008672768620665, "loss": 0.1572, "step": 26398 }, { "epoch": 0.046809382089560175, "grad_norm": 0.26171875, "learning_rate": 0.0017008229643333229, "loss": 0.2865, "step": 26400 }, { "epoch": 0.04681292825486999, "grad_norm": 0.3515625, "learning_rate": 0.0017007786491769288, "loss": 0.2051, "step": 26402 }, { "epoch": 0.04681647442017981, "grad_norm": 0.515625, "learning_rate": 0.0017007343313930779, "loss": 0.1688, "step": 26404 }, { "epoch": 0.046820020585489626, "grad_norm": 0.8671875, "learning_rate": 0.0017006900109819637, "loss": 0.1314, "step": 26406 }, { "epoch": 0.04682356675079944, "grad_norm": 0.30078125, "learning_rate": 0.0017006456879437807, "loss": 0.2086, "step": 26408 }, { "epoch": 0.046827112916109255, "grad_norm": 3.71875, "learning_rate": 0.001700601362278722, "loss": 0.2599, "step": 26410 }, { "epoch": 0.04683065908141907, "grad_norm": 0.99609375, "learning_rate": 0.001700557033986982, "loss": 0.2239, "step": 26412 }, { "epoch": 0.046834205246728884, "grad_norm": 1.8125, "learning_rate": 0.0017005127030687544, "loss": 0.3041, "step": 26414 }, { "epoch": 0.0468377514120387, "grad_norm": 0.5234375, "learning_rate": 0.001700468369524233, "loss": 0.2533, "step": 26416 }, { "epoch": 0.04684129757734851, "grad_norm": 0.89453125, "learning_rate": 0.0017004240333536118, "loss": 0.3332, "step": 26418 }, { "epoch": 0.04684484374265833, "grad_norm": 0.578125, "learning_rate": 0.0017003796945570847, "loss": 0.1965, "step": 26420 }, { "epoch": 0.04684838990796814, "grad_norm": 0.69921875, "learning_rate": 0.0017003353531348455, "loss": 0.2099, "step": 26422 }, { "epoch": 0.04685193607327796, "grad_norm": 0.8359375, "learning_rate": 0.001700291009087088, "loss": 0.2719, "step": 26424 }, { "epoch": 0.04685548223858778, "grad_norm": 3.046875, "learning_rate": 0.0017002466624140066, "loss": 0.2137, "step": 26426 }, { "epoch": 0.04685902840389759, "grad_norm": 0.466796875, "learning_rate": 0.0017002023131157946, "loss": 0.1751, "step": 26428 }, { "epoch": 0.04686257456920741, "grad_norm": 2.796875, "learning_rate": 0.0017001579611926468, "loss": 0.2077, "step": 26430 }, { "epoch": 0.04686612073451722, "grad_norm": 0.427734375, "learning_rate": 0.0017001136066447562, "loss": 0.2164, "step": 26432 }, { "epoch": 0.04686966689982704, "grad_norm": 0.302734375, "learning_rate": 0.0017000692494723176, "loss": 0.2455, "step": 26434 }, { "epoch": 0.04687321306513685, "grad_norm": 0.2373046875, "learning_rate": 0.0017000248896755245, "loss": 0.1795, "step": 26436 }, { "epoch": 0.046876759230446666, "grad_norm": 0.7265625, "learning_rate": 0.0016999805272545712, "loss": 0.213, "step": 26438 }, { "epoch": 0.04688030539575648, "grad_norm": 2.875, "learning_rate": 0.0016999361622096517, "loss": 0.3079, "step": 26440 }, { "epoch": 0.046883851561066295, "grad_norm": 1.421875, "learning_rate": 0.0016998917945409595, "loss": 0.2155, "step": 26442 }, { "epoch": 0.04688739772637611, "grad_norm": 0.271484375, "learning_rate": 0.0016998474242486891, "loss": 0.2075, "step": 26444 }, { "epoch": 0.046890943891685924, "grad_norm": 0.671875, "learning_rate": 0.001699803051333035, "loss": 0.2399, "step": 26446 }, { "epoch": 0.046894490056995745, "grad_norm": 1.453125, "learning_rate": 0.0016997586757941904, "loss": 0.3139, "step": 26448 }, { "epoch": 0.04689803622230556, "grad_norm": 0.353515625, "learning_rate": 0.00169971429763235, "loss": 0.2062, "step": 26450 }, { "epoch": 0.046901582387615375, "grad_norm": 0.66015625, "learning_rate": 0.001699669916847707, "loss": 0.2301, "step": 26452 }, { "epoch": 0.04690512855292519, "grad_norm": 3.171875, "learning_rate": 0.0016996255334404565, "loss": 0.3623, "step": 26454 }, { "epoch": 0.046908674718235004, "grad_norm": 0.443359375, "learning_rate": 0.0016995811474107923, "loss": 0.3399, "step": 26456 }, { "epoch": 0.04691222088354482, "grad_norm": 0.5625, "learning_rate": 0.0016995367587589083, "loss": 0.1929, "step": 26458 }, { "epoch": 0.04691576704885463, "grad_norm": 0.208984375, "learning_rate": 0.001699492367484999, "loss": 0.2058, "step": 26460 }, { "epoch": 0.04691931321416445, "grad_norm": 0.28515625, "learning_rate": 0.0016994479735892578, "loss": 0.188, "step": 26462 }, { "epoch": 0.04692285937947426, "grad_norm": 0.478515625, "learning_rate": 0.0016994035770718798, "loss": 0.2374, "step": 26464 }, { "epoch": 0.046926405544784076, "grad_norm": 1.375, "learning_rate": 0.0016993591779330588, "loss": 0.1652, "step": 26466 }, { "epoch": 0.04692995171009389, "grad_norm": 0.396484375, "learning_rate": 0.0016993147761729889, "loss": 0.2014, "step": 26468 }, { "epoch": 0.046933497875403705, "grad_norm": 0.55859375, "learning_rate": 0.0016992703717918641, "loss": 0.1951, "step": 26470 }, { "epoch": 0.04693704404071353, "grad_norm": 0.291015625, "learning_rate": 0.0016992259647898788, "loss": 0.3431, "step": 26472 }, { "epoch": 0.04694059020602334, "grad_norm": 0.48828125, "learning_rate": 0.0016991815551672272, "loss": 0.2235, "step": 26474 }, { "epoch": 0.046944136371333156, "grad_norm": 0.6953125, "learning_rate": 0.0016991371429241036, "loss": 0.2247, "step": 26476 }, { "epoch": 0.04694768253664297, "grad_norm": 0.91796875, "learning_rate": 0.001699092728060702, "loss": 0.225, "step": 26478 }, { "epoch": 0.046951228701952785, "grad_norm": 0.6015625, "learning_rate": 0.001699048310577217, "loss": 0.2535, "step": 26480 }, { "epoch": 0.0469547748672626, "grad_norm": 0.33984375, "learning_rate": 0.0016990038904738426, "loss": 0.1822, "step": 26482 }, { "epoch": 0.046958321032572414, "grad_norm": 3.15625, "learning_rate": 0.001698959467750773, "loss": 0.3388, "step": 26484 }, { "epoch": 0.04696186719788223, "grad_norm": 0.2333984375, "learning_rate": 0.0016989150424082027, "loss": 0.3176, "step": 26486 }, { "epoch": 0.04696541336319204, "grad_norm": 0.283203125, "learning_rate": 0.001698870614446326, "loss": 0.2681, "step": 26488 }, { "epoch": 0.04696895952850186, "grad_norm": 0.51953125, "learning_rate": 0.0016988261838653367, "loss": 0.1832, "step": 26490 }, { "epoch": 0.04697250569381167, "grad_norm": 0.431640625, "learning_rate": 0.0016987817506654301, "loss": 0.186, "step": 26492 }, { "epoch": 0.046976051859121494, "grad_norm": 0.74609375, "learning_rate": 0.0016987373148467995, "loss": 0.2559, "step": 26494 }, { "epoch": 0.04697959802443131, "grad_norm": 0.408203125, "learning_rate": 0.0016986928764096399, "loss": 0.2334, "step": 26496 }, { "epoch": 0.04698314418974112, "grad_norm": 0.3515625, "learning_rate": 0.0016986484353541454, "loss": 0.1615, "step": 26498 }, { "epoch": 0.04698669035505094, "grad_norm": 1.7421875, "learning_rate": 0.0016986039916805102, "loss": 0.2688, "step": 26500 }, { "epoch": 0.04699023652036075, "grad_norm": 0.255859375, "learning_rate": 0.0016985595453889288, "loss": 0.1914, "step": 26502 }, { "epoch": 0.04699378268567057, "grad_norm": 0.310546875, "learning_rate": 0.001698515096479596, "loss": 0.2446, "step": 26504 }, { "epoch": 0.04699732885098038, "grad_norm": 0.279296875, "learning_rate": 0.0016984706449527055, "loss": 0.2026, "step": 26506 }, { "epoch": 0.047000875016290196, "grad_norm": 0.33984375, "learning_rate": 0.0016984261908084522, "loss": 0.1927, "step": 26508 }, { "epoch": 0.04700442118160001, "grad_norm": 0.3046875, "learning_rate": 0.0016983817340470305, "loss": 0.2186, "step": 26510 }, { "epoch": 0.047007967346909825, "grad_norm": 0.5546875, "learning_rate": 0.0016983372746686345, "loss": 0.4702, "step": 26512 }, { "epoch": 0.04701151351221964, "grad_norm": 2.046875, "learning_rate": 0.001698292812673459, "loss": 0.385, "step": 26514 }, { "epoch": 0.047015059677529454, "grad_norm": 0.96484375, "learning_rate": 0.001698248348061698, "loss": 0.1972, "step": 26516 }, { "epoch": 0.047018605842839276, "grad_norm": 0.478515625, "learning_rate": 0.001698203880833546, "loss": 0.2332, "step": 26518 }, { "epoch": 0.04702215200814909, "grad_norm": 0.5390625, "learning_rate": 0.001698159410989198, "loss": 0.2654, "step": 26520 }, { "epoch": 0.047025698173458905, "grad_norm": 1.7578125, "learning_rate": 0.001698114938528848, "loss": 0.223, "step": 26522 }, { "epoch": 0.04702924433876872, "grad_norm": 0.369140625, "learning_rate": 0.001698070463452691, "loss": 0.2001, "step": 26524 }, { "epoch": 0.047032790504078534, "grad_norm": 0.291015625, "learning_rate": 0.0016980259857609212, "loss": 0.3718, "step": 26526 }, { "epoch": 0.04703633666938835, "grad_norm": 0.54296875, "learning_rate": 0.001697981505453733, "loss": 0.2237, "step": 26528 }, { "epoch": 0.04703988283469816, "grad_norm": 0.3125, "learning_rate": 0.0016979370225313208, "loss": 0.2062, "step": 26530 }, { "epoch": 0.04704342900000798, "grad_norm": 0.5, "learning_rate": 0.0016978925369938798, "loss": 0.2603, "step": 26532 }, { "epoch": 0.04704697516531779, "grad_norm": 1.28125, "learning_rate": 0.001697848048841604, "loss": 0.2309, "step": 26534 }, { "epoch": 0.04705052133062761, "grad_norm": 0.78515625, "learning_rate": 0.001697803558074688, "loss": 0.1878, "step": 26536 }, { "epoch": 0.04705406749593742, "grad_norm": 0.3125, "learning_rate": 0.0016977590646933267, "loss": 0.2087, "step": 26538 }, { "epoch": 0.04705761366124724, "grad_norm": 0.9375, "learning_rate": 0.0016977145686977143, "loss": 0.2314, "step": 26540 }, { "epoch": 0.04706115982655706, "grad_norm": 0.27734375, "learning_rate": 0.0016976700700880457, "loss": 0.1449, "step": 26542 }, { "epoch": 0.04706470599186687, "grad_norm": 0.4765625, "learning_rate": 0.001697625568864515, "loss": 0.1747, "step": 26544 }, { "epoch": 0.047068252157176686, "grad_norm": 0.50390625, "learning_rate": 0.0016975810650273176, "loss": 0.2192, "step": 26546 }, { "epoch": 0.0470717983224865, "grad_norm": 0.2451171875, "learning_rate": 0.0016975365585766475, "loss": 0.4713, "step": 26548 }, { "epoch": 0.047075344487796315, "grad_norm": 0.369140625, "learning_rate": 0.0016974920495126998, "loss": 0.1806, "step": 26550 }, { "epoch": 0.04707889065310613, "grad_norm": 2.84375, "learning_rate": 0.001697447537835669, "loss": 0.182, "step": 26552 }, { "epoch": 0.047082436818415944, "grad_norm": 5.1875, "learning_rate": 0.0016974030235457494, "loss": 0.2765, "step": 26554 }, { "epoch": 0.04708598298372576, "grad_norm": 0.80859375, "learning_rate": 0.001697358506643136, "loss": 0.2096, "step": 26556 }, { "epoch": 0.047089529149035574, "grad_norm": 0.265625, "learning_rate": 0.0016973139871280236, "loss": 0.1767, "step": 26558 }, { "epoch": 0.04709307531434539, "grad_norm": 0.68359375, "learning_rate": 0.0016972694650006068, "loss": 0.1866, "step": 26560 }, { "epoch": 0.04709662147965521, "grad_norm": 0.796875, "learning_rate": 0.0016972249402610801, "loss": 0.2523, "step": 26562 }, { "epoch": 0.047100167644965024, "grad_norm": 0.326171875, "learning_rate": 0.0016971804129096387, "loss": 0.2213, "step": 26564 }, { "epoch": 0.04710371381027484, "grad_norm": 0.310546875, "learning_rate": 0.0016971358829464772, "loss": 0.134, "step": 26566 }, { "epoch": 0.04710725997558465, "grad_norm": 1.7421875, "learning_rate": 0.0016970913503717897, "loss": 0.155, "step": 26568 }, { "epoch": 0.04711080614089447, "grad_norm": 4.9375, "learning_rate": 0.0016970468151857718, "loss": 0.3679, "step": 26570 }, { "epoch": 0.04711435230620428, "grad_norm": 0.73046875, "learning_rate": 0.001697002277388618, "loss": 0.2125, "step": 26572 }, { "epoch": 0.0471178984715141, "grad_norm": 0.369140625, "learning_rate": 0.0016969577369805229, "loss": 0.1798, "step": 26574 }, { "epoch": 0.04712144463682391, "grad_norm": 0.59375, "learning_rate": 0.0016969131939616811, "loss": 0.186, "step": 26576 }, { "epoch": 0.047124990802133726, "grad_norm": 0.50390625, "learning_rate": 0.001696868648332288, "loss": 0.2249, "step": 26578 }, { "epoch": 0.04712853696744354, "grad_norm": 1.2109375, "learning_rate": 0.0016968241000925384, "loss": 0.1821, "step": 26580 }, { "epoch": 0.047132083132753355, "grad_norm": 0.78515625, "learning_rate": 0.0016967795492426265, "loss": 0.1938, "step": 26582 }, { "epoch": 0.04713562929806317, "grad_norm": 0.64453125, "learning_rate": 0.0016967349957827476, "loss": 0.1888, "step": 26584 }, { "epoch": 0.04713917546337299, "grad_norm": 0.25390625, "learning_rate": 0.0016966904397130965, "loss": 0.1776, "step": 26586 }, { "epoch": 0.047142721628682806, "grad_norm": 0.392578125, "learning_rate": 0.0016966458810338681, "loss": 0.221, "step": 26588 }, { "epoch": 0.04714626779399262, "grad_norm": 0.34375, "learning_rate": 0.001696601319745257, "loss": 0.1848, "step": 26590 }, { "epoch": 0.047149813959302435, "grad_norm": 0.87109375, "learning_rate": 0.0016965567558474583, "loss": 0.1882, "step": 26592 }, { "epoch": 0.04715336012461225, "grad_norm": 0.80078125, "learning_rate": 0.001696512189340667, "loss": 0.2284, "step": 26594 }, { "epoch": 0.047156906289922064, "grad_norm": 0.671875, "learning_rate": 0.0016964676202250778, "loss": 0.2095, "step": 26596 }, { "epoch": 0.04716045245523188, "grad_norm": 0.1865234375, "learning_rate": 0.0016964230485008859, "loss": 0.145, "step": 26598 }, { "epoch": 0.04716399862054169, "grad_norm": 0.671875, "learning_rate": 0.0016963784741682855, "loss": 0.2673, "step": 26600 }, { "epoch": 0.04716754478585151, "grad_norm": 0.5546875, "learning_rate": 0.0016963338972274726, "loss": 0.2122, "step": 26602 }, { "epoch": 0.04717109095116132, "grad_norm": 16.375, "learning_rate": 0.0016962893176786413, "loss": 0.4163, "step": 26604 }, { "epoch": 0.04717463711647114, "grad_norm": 0.83203125, "learning_rate": 0.001696244735521987, "loss": 0.274, "step": 26606 }, { "epoch": 0.04717818328178096, "grad_norm": 0.39453125, "learning_rate": 0.0016962001507577046, "loss": 0.1843, "step": 26608 }, { "epoch": 0.04718172944709077, "grad_norm": 1.078125, "learning_rate": 0.0016961555633859893, "loss": 0.2262, "step": 26610 }, { "epoch": 0.04718527561240059, "grad_norm": 0.490234375, "learning_rate": 0.0016961109734070356, "loss": 0.1802, "step": 26612 }, { "epoch": 0.0471888217777104, "grad_norm": 0.828125, "learning_rate": 0.0016960663808210385, "loss": 0.1749, "step": 26614 }, { "epoch": 0.047192367943020216, "grad_norm": 0.515625, "learning_rate": 0.0016960217856281936, "loss": 0.1896, "step": 26616 }, { "epoch": 0.04719591410833003, "grad_norm": 0.326171875, "learning_rate": 0.0016959771878286957, "loss": 0.1714, "step": 26618 }, { "epoch": 0.047199460273639846, "grad_norm": 2.046875, "learning_rate": 0.0016959325874227396, "loss": 0.4339, "step": 26620 }, { "epoch": 0.04720300643894966, "grad_norm": 0.6328125, "learning_rate": 0.0016958879844105204, "loss": 0.2082, "step": 26622 }, { "epoch": 0.047206552604259475, "grad_norm": 0.78125, "learning_rate": 0.0016958433787922333, "loss": 0.2411, "step": 26624 }, { "epoch": 0.04721009876956929, "grad_norm": 0.7265625, "learning_rate": 0.0016957987705680736, "loss": 0.264, "step": 26626 }, { "epoch": 0.047213644934879104, "grad_norm": 0.5546875, "learning_rate": 0.0016957541597382361, "loss": 0.1873, "step": 26628 }, { "epoch": 0.047217191100188925, "grad_norm": 0.37109375, "learning_rate": 0.001695709546302916, "loss": 0.2267, "step": 26630 }, { "epoch": 0.04722073726549874, "grad_norm": 0.609375, "learning_rate": 0.001695664930262308, "loss": 0.2653, "step": 26632 }, { "epoch": 0.047224283430808554, "grad_norm": 0.4296875, "learning_rate": 0.001695620311616608, "loss": 0.2177, "step": 26634 }, { "epoch": 0.04722782959611837, "grad_norm": 1.4140625, "learning_rate": 0.0016955756903660108, "loss": 0.3076, "step": 26636 }, { "epoch": 0.04723137576142818, "grad_norm": 0.3671875, "learning_rate": 0.0016955310665107113, "loss": 0.24, "step": 26638 }, { "epoch": 0.047234921926738, "grad_norm": 0.87890625, "learning_rate": 0.001695486440050905, "loss": 0.1752, "step": 26640 }, { "epoch": 0.04723846809204781, "grad_norm": 0.451171875, "learning_rate": 0.0016954418109867866, "loss": 0.2261, "step": 26642 }, { "epoch": 0.04724201425735763, "grad_norm": 0.671875, "learning_rate": 0.001695397179318552, "loss": 0.1952, "step": 26644 }, { "epoch": 0.04724556042266744, "grad_norm": 1.5390625, "learning_rate": 0.0016953525450463956, "loss": 0.3127, "step": 26646 }, { "epoch": 0.047249106587977256, "grad_norm": 0.2578125, "learning_rate": 0.0016953079081705132, "loss": 0.1597, "step": 26648 }, { "epoch": 0.04725265275328707, "grad_norm": 0.40625, "learning_rate": 0.0016952632686910996, "loss": 0.233, "step": 26650 }, { "epoch": 0.047256198918596885, "grad_norm": 0.48828125, "learning_rate": 0.0016952186266083506, "loss": 0.1523, "step": 26652 }, { "epoch": 0.04725974508390671, "grad_norm": 1.421875, "learning_rate": 0.0016951739819224608, "loss": 0.2706, "step": 26654 }, { "epoch": 0.04726329124921652, "grad_norm": 0.416015625, "learning_rate": 0.0016951293346336258, "loss": 0.2314, "step": 26656 }, { "epoch": 0.047266837414526336, "grad_norm": 0.388671875, "learning_rate": 0.0016950846847420408, "loss": 0.1628, "step": 26658 }, { "epoch": 0.04727038357983615, "grad_norm": 0.5703125, "learning_rate": 0.0016950400322479008, "loss": 0.2043, "step": 26660 }, { "epoch": 0.047273929745145965, "grad_norm": 0.72265625, "learning_rate": 0.0016949953771514016, "loss": 0.2141, "step": 26662 }, { "epoch": 0.04727747591045578, "grad_norm": 0.796875, "learning_rate": 0.0016949507194527382, "loss": 0.2153, "step": 26664 }, { "epoch": 0.047281022075765594, "grad_norm": 0.25, "learning_rate": 0.0016949060591521059, "loss": 0.3093, "step": 26666 }, { "epoch": 0.04728456824107541, "grad_norm": 0.55859375, "learning_rate": 0.0016948613962497001, "loss": 0.1671, "step": 26668 }, { "epoch": 0.04728811440638522, "grad_norm": 0.89453125, "learning_rate": 0.0016948167307457161, "loss": 0.1981, "step": 26670 }, { "epoch": 0.04729166057169504, "grad_norm": 0.7890625, "learning_rate": 0.001694772062640349, "loss": 0.1879, "step": 26672 }, { "epoch": 0.04729520673700485, "grad_norm": 0.40625, "learning_rate": 0.0016947273919337947, "loss": 0.1701, "step": 26674 }, { "epoch": 0.047298752902314674, "grad_norm": 2.25, "learning_rate": 0.0016946827186262484, "loss": 0.2584, "step": 26676 }, { "epoch": 0.04730229906762449, "grad_norm": 0.35546875, "learning_rate": 0.0016946380427179046, "loss": 0.1787, "step": 26678 }, { "epoch": 0.0473058452329343, "grad_norm": 0.5078125, "learning_rate": 0.0016945933642089602, "loss": 0.3514, "step": 26680 }, { "epoch": 0.04730939139824412, "grad_norm": 0.41015625, "learning_rate": 0.0016945486830996096, "loss": 0.2255, "step": 26682 }, { "epoch": 0.04731293756355393, "grad_norm": 0.66796875, "learning_rate": 0.0016945039993900481, "loss": 0.1752, "step": 26684 }, { "epoch": 0.04731648372886375, "grad_norm": 0.60546875, "learning_rate": 0.0016944593130804714, "loss": 0.2008, "step": 26686 }, { "epoch": 0.04732002989417356, "grad_norm": 0.66015625, "learning_rate": 0.0016944146241710755, "loss": 0.2074, "step": 26688 }, { "epoch": 0.047323576059483376, "grad_norm": 0.4765625, "learning_rate": 0.001694369932662055, "loss": 0.2019, "step": 26690 }, { "epoch": 0.04732712222479319, "grad_norm": 3.046875, "learning_rate": 0.0016943252385536057, "loss": 0.2159, "step": 26692 }, { "epoch": 0.047330668390103005, "grad_norm": 0.3203125, "learning_rate": 0.0016942805418459228, "loss": 0.2373, "step": 26694 }, { "epoch": 0.04733421455541282, "grad_norm": 1.796875, "learning_rate": 0.0016942358425392024, "loss": 0.259, "step": 26696 }, { "epoch": 0.04733776072072264, "grad_norm": 0.609375, "learning_rate": 0.0016941911406336391, "loss": 0.202, "step": 26698 }, { "epoch": 0.047341306886032455, "grad_norm": 0.765625, "learning_rate": 0.0016941464361294291, "loss": 0.1538, "step": 26700 }, { "epoch": 0.04734485305134227, "grad_norm": 0.494140625, "learning_rate": 0.0016941017290267681, "loss": 0.2765, "step": 26702 }, { "epoch": 0.047348399216652085, "grad_norm": 0.306640625, "learning_rate": 0.0016940570193258507, "loss": 0.2019, "step": 26704 }, { "epoch": 0.0473519453819619, "grad_norm": 1.203125, "learning_rate": 0.0016940123070268733, "loss": 0.2224, "step": 26706 }, { "epoch": 0.047355491547271714, "grad_norm": 0.27734375, "learning_rate": 0.001693967592130031, "loss": 0.1978, "step": 26708 }, { "epoch": 0.04735903771258153, "grad_norm": 0.9140625, "learning_rate": 0.0016939228746355197, "loss": 0.2975, "step": 26710 }, { "epoch": 0.04736258387789134, "grad_norm": 0.388671875, "learning_rate": 0.0016938781545435342, "loss": 0.2641, "step": 26712 }, { "epoch": 0.04736613004320116, "grad_norm": 0.5, "learning_rate": 0.001693833431854271, "loss": 0.1644, "step": 26714 }, { "epoch": 0.04736967620851097, "grad_norm": 0.671875, "learning_rate": 0.0016937887065679254, "loss": 0.3008, "step": 26716 }, { "epoch": 0.047373222373820786, "grad_norm": 0.470703125, "learning_rate": 0.0016937439786846927, "loss": 0.202, "step": 26718 }, { "epoch": 0.0473767685391306, "grad_norm": 0.439453125, "learning_rate": 0.001693699248204769, "loss": 0.1363, "step": 26720 }, { "epoch": 0.04738031470444042, "grad_norm": 0.55859375, "learning_rate": 0.0016936545151283494, "loss": 0.1663, "step": 26722 }, { "epoch": 0.04738386086975024, "grad_norm": 3.796875, "learning_rate": 0.0016936097794556296, "loss": 0.4902, "step": 26724 }, { "epoch": 0.04738740703506005, "grad_norm": 0.255859375, "learning_rate": 0.0016935650411868057, "loss": 0.2973, "step": 26726 }, { "epoch": 0.047390953200369866, "grad_norm": 1.3046875, "learning_rate": 0.001693520300322073, "loss": 0.2227, "step": 26728 }, { "epoch": 0.04739449936567968, "grad_norm": 0.57421875, "learning_rate": 0.0016934755568616276, "loss": 0.2098, "step": 26730 }, { "epoch": 0.047398045530989495, "grad_norm": 0.380859375, "learning_rate": 0.0016934308108056643, "loss": 0.1398, "step": 26732 }, { "epoch": 0.04740159169629931, "grad_norm": 0.35546875, "learning_rate": 0.0016933860621543798, "loss": 0.202, "step": 26734 }, { "epoch": 0.047405137861609124, "grad_norm": 0.78125, "learning_rate": 0.001693341310907969, "loss": 0.2498, "step": 26736 }, { "epoch": 0.04740868402691894, "grad_norm": 0.640625, "learning_rate": 0.001693296557066628, "loss": 0.1964, "step": 26738 }, { "epoch": 0.04741223019222875, "grad_norm": 0.55859375, "learning_rate": 0.0016932518006305525, "loss": 0.2096, "step": 26740 }, { "epoch": 0.04741577635753857, "grad_norm": 3.828125, "learning_rate": 0.0016932070415999385, "loss": 0.5608, "step": 26742 }, { "epoch": 0.04741932252284839, "grad_norm": 0.35546875, "learning_rate": 0.0016931622799749814, "loss": 0.2021, "step": 26744 }, { "epoch": 0.047422868688158204, "grad_norm": 0.703125, "learning_rate": 0.0016931175157558767, "loss": 0.3564, "step": 26746 }, { "epoch": 0.04742641485346802, "grad_norm": 0.287109375, "learning_rate": 0.001693072748942821, "loss": 0.2096, "step": 26748 }, { "epoch": 0.04742996101877783, "grad_norm": 0.244140625, "learning_rate": 0.0016930279795360093, "loss": 0.1717, "step": 26750 }, { "epoch": 0.04743350718408765, "grad_norm": 1.1640625, "learning_rate": 0.0016929832075356377, "loss": 0.2168, "step": 26752 }, { "epoch": 0.04743705334939746, "grad_norm": 0.2236328125, "learning_rate": 0.001692938432941902, "loss": 0.1648, "step": 26754 }, { "epoch": 0.04744059951470728, "grad_norm": 0.439453125, "learning_rate": 0.001692893655754998, "loss": 0.2113, "step": 26756 }, { "epoch": 0.04744414568001709, "grad_norm": 0.37109375, "learning_rate": 0.0016928488759751217, "loss": 0.199, "step": 26758 }, { "epoch": 0.047447691845326906, "grad_norm": 0.349609375, "learning_rate": 0.0016928040936024685, "loss": 0.2144, "step": 26760 }, { "epoch": 0.04745123801063672, "grad_norm": 0.328125, "learning_rate": 0.0016927593086372348, "loss": 0.2307, "step": 26762 }, { "epoch": 0.047454784175946535, "grad_norm": 0.921875, "learning_rate": 0.001692714521079616, "loss": 0.2631, "step": 26764 }, { "epoch": 0.047458330341256356, "grad_norm": 0.294921875, "learning_rate": 0.0016926697309298084, "loss": 0.1919, "step": 26766 }, { "epoch": 0.04746187650656617, "grad_norm": 0.482421875, "learning_rate": 0.0016926249381880077, "loss": 0.1862, "step": 26768 }, { "epoch": 0.047465422671875986, "grad_norm": 0.490234375, "learning_rate": 0.0016925801428544097, "loss": 0.2482, "step": 26770 }, { "epoch": 0.0474689688371858, "grad_norm": 0.546875, "learning_rate": 0.0016925353449292098, "loss": 0.182, "step": 26772 }, { "epoch": 0.047472515002495615, "grad_norm": 1.46875, "learning_rate": 0.0016924905444126051, "loss": 0.2364, "step": 26774 }, { "epoch": 0.04747606116780543, "grad_norm": 0.84765625, "learning_rate": 0.0016924457413047907, "loss": 0.2101, "step": 26776 }, { "epoch": 0.047479607333115244, "grad_norm": 0.58203125, "learning_rate": 0.0016924009356059629, "loss": 0.1731, "step": 26778 }, { "epoch": 0.04748315349842506, "grad_norm": 0.46875, "learning_rate": 0.0016923561273163173, "loss": 0.2054, "step": 26780 }, { "epoch": 0.04748669966373487, "grad_norm": 0.26171875, "learning_rate": 0.0016923113164360505, "loss": 0.1965, "step": 26782 }, { "epoch": 0.04749024582904469, "grad_norm": 0.66796875, "learning_rate": 0.0016922665029653577, "loss": 0.2095, "step": 26784 }, { "epoch": 0.0474937919943545, "grad_norm": 1.359375, "learning_rate": 0.0016922216869044353, "loss": 0.2737, "step": 26786 }, { "epoch": 0.04749733815966432, "grad_norm": 2.734375, "learning_rate": 0.0016921768682534792, "loss": 0.3536, "step": 26788 }, { "epoch": 0.04750088432497414, "grad_norm": 0.474609375, "learning_rate": 0.0016921320470126852, "loss": 0.2142, "step": 26790 }, { "epoch": 0.04750443049028395, "grad_norm": 1.046875, "learning_rate": 0.0016920872231822503, "loss": 0.2686, "step": 26792 }, { "epoch": 0.04750797665559377, "grad_norm": 0.55859375, "learning_rate": 0.0016920423967623692, "loss": 0.3635, "step": 26794 }, { "epoch": 0.04751152282090358, "grad_norm": 0.72265625, "learning_rate": 0.001691997567753239, "loss": 0.2676, "step": 26796 }, { "epoch": 0.047515068986213396, "grad_norm": 0.46875, "learning_rate": 0.001691952736155055, "loss": 0.1452, "step": 26798 }, { "epoch": 0.04751861515152321, "grad_norm": 0.298828125, "learning_rate": 0.0016919079019680134, "loss": 0.4631, "step": 26800 }, { "epoch": 0.047522161316833025, "grad_norm": 0.361328125, "learning_rate": 0.0016918630651923108, "loss": 0.2792, "step": 26802 }, { "epoch": 0.04752570748214284, "grad_norm": 0.271484375, "learning_rate": 0.001691818225828143, "loss": 0.1678, "step": 26804 }, { "epoch": 0.047529253647452654, "grad_norm": 0.3828125, "learning_rate": 0.0016917733838757057, "loss": 0.1863, "step": 26806 }, { "epoch": 0.04753279981276247, "grad_norm": 0.86328125, "learning_rate": 0.0016917285393351958, "loss": 0.2667, "step": 26808 }, { "epoch": 0.047536345978072284, "grad_norm": 1.2421875, "learning_rate": 0.0016916836922068088, "loss": 0.1615, "step": 26810 }, { "epoch": 0.047539892143382105, "grad_norm": 0.396484375, "learning_rate": 0.0016916388424907409, "loss": 0.1935, "step": 26812 }, { "epoch": 0.04754343830869192, "grad_norm": 0.33203125, "learning_rate": 0.0016915939901871885, "loss": 0.1828, "step": 26814 }, { "epoch": 0.047546984474001734, "grad_norm": 0.55078125, "learning_rate": 0.0016915491352963476, "loss": 0.1822, "step": 26816 }, { "epoch": 0.04755053063931155, "grad_norm": 1.078125, "learning_rate": 0.001691504277818414, "loss": 0.2499, "step": 26818 }, { "epoch": 0.04755407680462136, "grad_norm": 0.267578125, "learning_rate": 0.001691459417753585, "loss": 0.1889, "step": 26820 }, { "epoch": 0.04755762296993118, "grad_norm": 0.91015625, "learning_rate": 0.0016914145551020555, "loss": 0.1931, "step": 26822 }, { "epoch": 0.04756116913524099, "grad_norm": 0.400390625, "learning_rate": 0.0016913696898640225, "loss": 0.2128, "step": 26824 }, { "epoch": 0.04756471530055081, "grad_norm": 0.34375, "learning_rate": 0.0016913248220396821, "loss": 0.1635, "step": 26826 }, { "epoch": 0.04756826146586062, "grad_norm": 0.375, "learning_rate": 0.00169127995162923, "loss": 0.203, "step": 26828 }, { "epoch": 0.047571807631170436, "grad_norm": 0.7109375, "learning_rate": 0.0016912350786328634, "loss": 0.2795, "step": 26830 }, { "epoch": 0.04757535379648025, "grad_norm": 1.3828125, "learning_rate": 0.0016911902030507778, "loss": 0.2004, "step": 26832 }, { "epoch": 0.04757889996179007, "grad_norm": 0.462890625, "learning_rate": 0.0016911453248831694, "loss": 0.2872, "step": 26834 }, { "epoch": 0.04758244612709989, "grad_norm": 0.609375, "learning_rate": 0.0016911004441302348, "loss": 0.2363, "step": 26836 }, { "epoch": 0.0475859922924097, "grad_norm": 0.2353515625, "learning_rate": 0.0016910555607921702, "loss": 0.1554, "step": 26838 }, { "epoch": 0.047589538457719516, "grad_norm": 0.328125, "learning_rate": 0.0016910106748691722, "loss": 0.1638, "step": 26840 }, { "epoch": 0.04759308462302933, "grad_norm": 2.28125, "learning_rate": 0.0016909657863614367, "loss": 0.265, "step": 26842 }, { "epoch": 0.047596630788339145, "grad_norm": 0.92578125, "learning_rate": 0.00169092089526916, "loss": 0.2391, "step": 26844 }, { "epoch": 0.04760017695364896, "grad_norm": 0.8203125, "learning_rate": 0.0016908760015925386, "loss": 0.1643, "step": 26846 }, { "epoch": 0.047603723118958774, "grad_norm": 0.416015625, "learning_rate": 0.001690831105331769, "loss": 0.2094, "step": 26848 }, { "epoch": 0.04760726928426859, "grad_norm": 0.8046875, "learning_rate": 0.001690786206487047, "loss": 0.2242, "step": 26850 }, { "epoch": 0.0476108154495784, "grad_norm": 0.58984375, "learning_rate": 0.0016907413050585695, "loss": 0.1878, "step": 26852 }, { "epoch": 0.04761436161488822, "grad_norm": 0.74609375, "learning_rate": 0.0016906964010465324, "loss": 0.2738, "step": 26854 }, { "epoch": 0.04761790778019803, "grad_norm": 0.671875, "learning_rate": 0.0016906514944511326, "loss": 0.2113, "step": 26856 }, { "epoch": 0.047621453945507854, "grad_norm": 0.80078125, "learning_rate": 0.0016906065852725662, "loss": 0.2342, "step": 26858 }, { "epoch": 0.04762500011081767, "grad_norm": 0.90625, "learning_rate": 0.0016905616735110299, "loss": 0.1734, "step": 26860 }, { "epoch": 0.04762854627612748, "grad_norm": 0.201171875, "learning_rate": 0.0016905167591667197, "loss": 0.1547, "step": 26862 }, { "epoch": 0.0476320924414373, "grad_norm": 0.9609375, "learning_rate": 0.0016904718422398322, "loss": 0.1941, "step": 26864 }, { "epoch": 0.04763563860674711, "grad_norm": 0.4140625, "learning_rate": 0.0016904269227305635, "loss": 0.2407, "step": 26866 }, { "epoch": 0.047639184772056926, "grad_norm": 0.484375, "learning_rate": 0.0016903820006391108, "loss": 0.2149, "step": 26868 }, { "epoch": 0.04764273093736674, "grad_norm": 0.796875, "learning_rate": 0.0016903370759656703, "loss": 0.256, "step": 26870 }, { "epoch": 0.047646277102676556, "grad_norm": 1.25, "learning_rate": 0.001690292148710438, "loss": 0.2674, "step": 26872 }, { "epoch": 0.04764982326798637, "grad_norm": 0.50390625, "learning_rate": 0.0016902472188736108, "loss": 0.2204, "step": 26874 }, { "epoch": 0.047653369433296185, "grad_norm": 1.2109375, "learning_rate": 0.001690202286455385, "loss": 0.5059, "step": 26876 }, { "epoch": 0.047656915598606, "grad_norm": 0.91015625, "learning_rate": 0.0016901573514559575, "loss": 0.1789, "step": 26878 }, { "epoch": 0.04766046176391582, "grad_norm": 10.9375, "learning_rate": 0.0016901124138755247, "loss": 0.3592, "step": 26880 }, { "epoch": 0.047664007929225635, "grad_norm": 0.62890625, "learning_rate": 0.0016900674737142824, "loss": 0.2536, "step": 26882 }, { "epoch": 0.04766755409453545, "grad_norm": 2.0, "learning_rate": 0.0016900225309724281, "loss": 0.1749, "step": 26884 }, { "epoch": 0.047671100259845264, "grad_norm": 0.255859375, "learning_rate": 0.0016899775856501582, "loss": 0.2355, "step": 26886 }, { "epoch": 0.04767464642515508, "grad_norm": 0.61328125, "learning_rate": 0.0016899326377476684, "loss": 0.1652, "step": 26888 }, { "epoch": 0.04767819259046489, "grad_norm": 0.76171875, "learning_rate": 0.0016898876872651562, "loss": 0.1886, "step": 26890 }, { "epoch": 0.04768173875577471, "grad_norm": 3.046875, "learning_rate": 0.0016898427342028181, "loss": 0.2788, "step": 26892 }, { "epoch": 0.04768528492108452, "grad_norm": 0.279296875, "learning_rate": 0.0016897977785608504, "loss": 0.1484, "step": 26894 }, { "epoch": 0.04768883108639434, "grad_norm": 0.427734375, "learning_rate": 0.00168975282033945, "loss": 0.1474, "step": 26896 }, { "epoch": 0.04769237725170415, "grad_norm": 1.0390625, "learning_rate": 0.001689707859538813, "loss": 0.3669, "step": 26898 }, { "epoch": 0.047695923417013966, "grad_norm": 0.85546875, "learning_rate": 0.0016896628961591367, "loss": 0.2442, "step": 26900 }, { "epoch": 0.04769946958232379, "grad_norm": 0.8515625, "learning_rate": 0.001689617930200617, "loss": 0.548, "step": 26902 }, { "epoch": 0.0477030157476336, "grad_norm": 0.423828125, "learning_rate": 0.0016895729616634512, "loss": 0.2445, "step": 26904 }, { "epoch": 0.04770656191294342, "grad_norm": 2.40625, "learning_rate": 0.0016895279905478357, "loss": 0.2967, "step": 26906 }, { "epoch": 0.04771010807825323, "grad_norm": 0.6015625, "learning_rate": 0.0016894830168539673, "loss": 0.1548, "step": 26908 }, { "epoch": 0.047713654243563046, "grad_norm": 0.6875, "learning_rate": 0.0016894380405820424, "loss": 0.1795, "step": 26910 }, { "epoch": 0.04771720040887286, "grad_norm": 1.234375, "learning_rate": 0.0016893930617322582, "loss": 0.1617, "step": 26912 }, { "epoch": 0.047720746574182675, "grad_norm": 0.47265625, "learning_rate": 0.001689348080304811, "loss": 0.256, "step": 26914 }, { "epoch": 0.04772429273949249, "grad_norm": 1.921875, "learning_rate": 0.0016893030962998974, "loss": 0.2681, "step": 26916 }, { "epoch": 0.047727838904802304, "grad_norm": 0.62890625, "learning_rate": 0.0016892581097177144, "loss": 0.1833, "step": 26918 }, { "epoch": 0.04773138507011212, "grad_norm": 1.1015625, "learning_rate": 0.0016892131205584588, "loss": 0.3758, "step": 26920 }, { "epoch": 0.04773493123542193, "grad_norm": 0.2294921875, "learning_rate": 0.0016891681288223274, "loss": 0.2344, "step": 26922 }, { "epoch": 0.04773847740073175, "grad_norm": 0.279296875, "learning_rate": 0.0016891231345095166, "loss": 0.2185, "step": 26924 }, { "epoch": 0.04774202356604157, "grad_norm": 0.453125, "learning_rate": 0.0016890781376202235, "loss": 0.2265, "step": 26926 }, { "epoch": 0.047745569731351384, "grad_norm": 0.5859375, "learning_rate": 0.0016890331381546448, "loss": 0.2437, "step": 26928 }, { "epoch": 0.0477491158966612, "grad_norm": 3.15625, "learning_rate": 0.0016889881361129772, "loss": 0.2029, "step": 26930 }, { "epoch": 0.04775266206197101, "grad_norm": 0.98046875, "learning_rate": 0.0016889431314954181, "loss": 0.2015, "step": 26932 }, { "epoch": 0.04775620822728083, "grad_norm": 0.453125, "learning_rate": 0.0016888981243021633, "loss": 0.1718, "step": 26934 }, { "epoch": 0.04775975439259064, "grad_norm": 1.4140625, "learning_rate": 0.0016888531145334102, "loss": 0.241, "step": 26936 }, { "epoch": 0.04776330055790046, "grad_norm": 0.71875, "learning_rate": 0.001688808102189356, "loss": 0.2415, "step": 26938 }, { "epoch": 0.04776684672321027, "grad_norm": 1.3125, "learning_rate": 0.001688763087270197, "loss": 0.2881, "step": 26940 }, { "epoch": 0.047770392888520086, "grad_norm": 1.046875, "learning_rate": 0.0016887180697761303, "loss": 0.2255, "step": 26942 }, { "epoch": 0.0477739390538299, "grad_norm": 1.328125, "learning_rate": 0.0016886730497073525, "loss": 0.2826, "step": 26944 }, { "epoch": 0.047777485219139715, "grad_norm": 0.66796875, "learning_rate": 0.001688628027064061, "loss": 0.2586, "step": 26946 }, { "epoch": 0.047781031384449536, "grad_norm": 0.439453125, "learning_rate": 0.0016885830018464523, "loss": 0.2123, "step": 26948 }, { "epoch": 0.04778457754975935, "grad_norm": 2.4375, "learning_rate": 0.0016885379740547233, "loss": 0.4881, "step": 26950 }, { "epoch": 0.047788123715069165, "grad_norm": 0.73046875, "learning_rate": 0.0016884929436890713, "loss": 0.2228, "step": 26952 }, { "epoch": 0.04779166988037898, "grad_norm": 1.078125, "learning_rate": 0.0016884479107496931, "loss": 0.2062, "step": 26954 }, { "epoch": 0.047795216045688795, "grad_norm": 0.75, "learning_rate": 0.0016884028752367853, "loss": 0.3562, "step": 26956 }, { "epoch": 0.04779876221099861, "grad_norm": 0.65234375, "learning_rate": 0.0016883578371505448, "loss": 0.1781, "step": 26958 }, { "epoch": 0.047802308376308424, "grad_norm": 0.37109375, "learning_rate": 0.0016883127964911694, "loss": 0.2409, "step": 26960 }, { "epoch": 0.04780585454161824, "grad_norm": 1.2109375, "learning_rate": 0.0016882677532588555, "loss": 0.2075, "step": 26962 }, { "epoch": 0.04780940070692805, "grad_norm": 1.4921875, "learning_rate": 0.0016882227074538, "loss": 0.314, "step": 26964 }, { "epoch": 0.04781294687223787, "grad_norm": 0.61328125, "learning_rate": 0.0016881776590762, "loss": 0.1763, "step": 26966 }, { "epoch": 0.04781649303754768, "grad_norm": 0.59765625, "learning_rate": 0.001688132608126253, "loss": 0.1953, "step": 26968 }, { "epoch": 0.0478200392028575, "grad_norm": 1.09375, "learning_rate": 0.0016880875546041551, "loss": 0.1731, "step": 26970 }, { "epoch": 0.04782358536816732, "grad_norm": 0.91796875, "learning_rate": 0.0016880424985101043, "loss": 0.2987, "step": 26972 }, { "epoch": 0.04782713153347713, "grad_norm": 0.640625, "learning_rate": 0.0016879974398442965, "loss": 0.2192, "step": 26974 }, { "epoch": 0.04783067769878695, "grad_norm": 0.404296875, "learning_rate": 0.0016879523786069303, "loss": 0.2007, "step": 26976 }, { "epoch": 0.04783422386409676, "grad_norm": 0.66796875, "learning_rate": 0.0016879073147982014, "loss": 0.2356, "step": 26978 }, { "epoch": 0.047837770029406576, "grad_norm": 0.2734375, "learning_rate": 0.0016878622484183075, "loss": 0.1508, "step": 26980 }, { "epoch": 0.04784131619471639, "grad_norm": 0.68359375, "learning_rate": 0.0016878171794674453, "loss": 0.1392, "step": 26982 }, { "epoch": 0.047844862360026205, "grad_norm": 0.369140625, "learning_rate": 0.0016877721079458128, "loss": 0.1602, "step": 26984 }, { "epoch": 0.04784840852533602, "grad_norm": 1.875, "learning_rate": 0.001687727033853606, "loss": 0.4699, "step": 26986 }, { "epoch": 0.047851954690645834, "grad_norm": 1.828125, "learning_rate": 0.0016876819571910227, "loss": 0.2239, "step": 26988 }, { "epoch": 0.04785550085595565, "grad_norm": 0.328125, "learning_rate": 0.00168763687795826, "loss": 0.2329, "step": 26990 }, { "epoch": 0.04785904702126546, "grad_norm": 0.34375, "learning_rate": 0.001687591796155515, "loss": 0.1856, "step": 26992 }, { "epoch": 0.047862593186575285, "grad_norm": 0.26171875, "learning_rate": 0.0016875467117829846, "loss": 0.1636, "step": 26994 }, { "epoch": 0.0478661393518851, "grad_norm": 0.71875, "learning_rate": 0.0016875016248408662, "loss": 0.3355, "step": 26996 }, { "epoch": 0.047869685517194914, "grad_norm": 2.671875, "learning_rate": 0.0016874565353293572, "loss": 0.332, "step": 26998 }, { "epoch": 0.04787323168250473, "grad_norm": 0.46484375, "learning_rate": 0.0016874114432486543, "loss": 0.2015, "step": 27000 }, { "epoch": 0.04787677784781454, "grad_norm": 0.5546875, "learning_rate": 0.0016873663485989549, "loss": 0.1933, "step": 27002 }, { "epoch": 0.04788032401312436, "grad_norm": 0.7421875, "learning_rate": 0.0016873212513804563, "loss": 0.2273, "step": 27004 }, { "epoch": 0.04788387017843417, "grad_norm": 0.294921875, "learning_rate": 0.0016872761515933558, "loss": 0.3624, "step": 27006 }, { "epoch": 0.04788741634374399, "grad_norm": 1.2109375, "learning_rate": 0.0016872310492378507, "loss": 0.2118, "step": 27008 }, { "epoch": 0.0478909625090538, "grad_norm": 1.6015625, "learning_rate": 0.001687185944314138, "loss": 0.1949, "step": 27010 }, { "epoch": 0.047894508674363616, "grad_norm": 0.55859375, "learning_rate": 0.0016871408368224151, "loss": 0.3822, "step": 27012 }, { "epoch": 0.04789805483967343, "grad_norm": 1.0078125, "learning_rate": 0.0016870957267628788, "loss": 0.2411, "step": 27014 }, { "epoch": 0.04790160100498325, "grad_norm": 0.423828125, "learning_rate": 0.0016870506141357273, "loss": 0.1851, "step": 27016 }, { "epoch": 0.047905147170293066, "grad_norm": 0.82421875, "learning_rate": 0.0016870054989411572, "loss": 0.1851, "step": 27018 }, { "epoch": 0.04790869333560288, "grad_norm": 0.40625, "learning_rate": 0.0016869603811793662, "loss": 0.1984, "step": 27020 }, { "epoch": 0.047912239500912696, "grad_norm": 0.765625, "learning_rate": 0.0016869152608505512, "loss": 0.1396, "step": 27022 }, { "epoch": 0.04791578566622251, "grad_norm": 0.61328125, "learning_rate": 0.0016868701379549097, "loss": 0.1765, "step": 27024 }, { "epoch": 0.047919331831532325, "grad_norm": 0.482421875, "learning_rate": 0.0016868250124926392, "loss": 0.2057, "step": 27026 }, { "epoch": 0.04792287799684214, "grad_norm": 0.478515625, "learning_rate": 0.0016867798844639372, "loss": 0.2135, "step": 27028 }, { "epoch": 0.047926424162151954, "grad_norm": 2.984375, "learning_rate": 0.0016867347538690005, "loss": 0.2056, "step": 27030 }, { "epoch": 0.04792997032746177, "grad_norm": 0.439453125, "learning_rate": 0.0016866896207080272, "loss": 0.196, "step": 27032 }, { "epoch": 0.04793351649277158, "grad_norm": 0.70703125, "learning_rate": 0.001686644484981214, "loss": 0.2462, "step": 27034 }, { "epoch": 0.0479370626580814, "grad_norm": 0.447265625, "learning_rate": 0.0016865993466887586, "loss": 0.2099, "step": 27036 }, { "epoch": 0.04794060882339122, "grad_norm": 0.484375, "learning_rate": 0.0016865542058308585, "loss": 0.3989, "step": 27038 }, { "epoch": 0.047944154988701033, "grad_norm": 0.69921875, "learning_rate": 0.0016865090624077108, "loss": 0.1884, "step": 27040 }, { "epoch": 0.04794770115401085, "grad_norm": 0.439453125, "learning_rate": 0.0016864639164195134, "loss": 0.1829, "step": 27042 }, { "epoch": 0.04795124731932066, "grad_norm": 1.25, "learning_rate": 0.0016864187678664634, "loss": 0.2385, "step": 27044 }, { "epoch": 0.04795479348463048, "grad_norm": 0.333984375, "learning_rate": 0.0016863736167487585, "loss": 0.1863, "step": 27046 }, { "epoch": 0.04795833964994029, "grad_norm": 0.5078125, "learning_rate": 0.0016863284630665959, "loss": 0.1735, "step": 27048 }, { "epoch": 0.047961885815250106, "grad_norm": 1.015625, "learning_rate": 0.0016862833068201728, "loss": 0.1854, "step": 27050 }, { "epoch": 0.04796543198055992, "grad_norm": 0.5859375, "learning_rate": 0.0016862381480096872, "loss": 0.1997, "step": 27052 }, { "epoch": 0.047968978145869735, "grad_norm": 0.28515625, "learning_rate": 0.001686192986635337, "loss": 0.182, "step": 27054 }, { "epoch": 0.04797252431117955, "grad_norm": 0.263671875, "learning_rate": 0.001686147822697319, "loss": 0.1874, "step": 27056 }, { "epoch": 0.047976070476489364, "grad_norm": 0.326171875, "learning_rate": 0.0016861026561958306, "loss": 0.2006, "step": 27058 }, { "epoch": 0.04797961664179918, "grad_norm": 0.25, "learning_rate": 0.0016860574871310696, "loss": 0.2099, "step": 27060 }, { "epoch": 0.047983162807109, "grad_norm": 0.5, "learning_rate": 0.001686012315503234, "loss": 0.1624, "step": 27062 }, { "epoch": 0.047986708972418815, "grad_norm": 0.412109375, "learning_rate": 0.0016859671413125205, "loss": 0.2451, "step": 27064 }, { "epoch": 0.04799025513772863, "grad_norm": 0.369140625, "learning_rate": 0.0016859219645591272, "loss": 0.2396, "step": 27066 }, { "epoch": 0.047993801303038444, "grad_norm": 0.271484375, "learning_rate": 0.0016858767852432517, "loss": 0.203, "step": 27068 }, { "epoch": 0.04799734746834826, "grad_norm": 1.9765625, "learning_rate": 0.0016858316033650917, "loss": 0.2157, "step": 27070 }, { "epoch": 0.04800089363365807, "grad_norm": 0.69140625, "learning_rate": 0.001685786418924844, "loss": 0.1947, "step": 27072 }, { "epoch": 0.04800443979896789, "grad_norm": 0.76953125, "learning_rate": 0.001685741231922707, "loss": 0.2488, "step": 27074 }, { "epoch": 0.0480079859642777, "grad_norm": 2.109375, "learning_rate": 0.001685696042358878, "loss": 0.3698, "step": 27076 }, { "epoch": 0.04801153212958752, "grad_norm": 0.44921875, "learning_rate": 0.0016856508502335551, "loss": 0.1618, "step": 27078 }, { "epoch": 0.04801507829489733, "grad_norm": 0.53515625, "learning_rate": 0.001685605655546935, "loss": 0.2105, "step": 27080 }, { "epoch": 0.048018624460207146, "grad_norm": 1.578125, "learning_rate": 0.0016855604582992165, "loss": 0.2491, "step": 27082 }, { "epoch": 0.04802217062551697, "grad_norm": 1.0625, "learning_rate": 0.0016855152584905962, "loss": 0.4457, "step": 27084 }, { "epoch": 0.04802571679082678, "grad_norm": 0.64453125, "learning_rate": 0.0016854700561212727, "loss": 0.278, "step": 27086 }, { "epoch": 0.0480292629561366, "grad_norm": 0.357421875, "learning_rate": 0.0016854248511914429, "loss": 0.2004, "step": 27088 }, { "epoch": 0.04803280912144641, "grad_norm": 1.421875, "learning_rate": 0.0016853796437013052, "loss": 0.26, "step": 27090 }, { "epoch": 0.048036355286756226, "grad_norm": 1.796875, "learning_rate": 0.0016853344336510568, "loss": 0.3279, "step": 27092 }, { "epoch": 0.04803990145206604, "grad_norm": 0.41015625, "learning_rate": 0.0016852892210408954, "loss": 0.2015, "step": 27094 }, { "epoch": 0.048043447617375855, "grad_norm": 0.6015625, "learning_rate": 0.0016852440058710192, "loss": 0.1767, "step": 27096 }, { "epoch": 0.04804699378268567, "grad_norm": 0.408203125, "learning_rate": 0.0016851987881416255, "loss": 0.2141, "step": 27098 }, { "epoch": 0.048050539947995484, "grad_norm": 0.2255859375, "learning_rate": 0.001685153567852912, "loss": 0.2427, "step": 27100 }, { "epoch": 0.0480540861133053, "grad_norm": 0.53515625, "learning_rate": 0.0016851083450050773, "loss": 0.2414, "step": 27102 }, { "epoch": 0.04805763227861511, "grad_norm": 0.271484375, "learning_rate": 0.001685063119598318, "loss": 0.2359, "step": 27104 }, { "epoch": 0.048061178443924935, "grad_norm": 0.65234375, "learning_rate": 0.001685017891632833, "loss": 0.176, "step": 27106 }, { "epoch": 0.04806472460923475, "grad_norm": 0.609375, "learning_rate": 0.001684972661108819, "loss": 0.1763, "step": 27108 }, { "epoch": 0.048068270774544564, "grad_norm": 0.40625, "learning_rate": 0.0016849274280264748, "loss": 0.2518, "step": 27110 }, { "epoch": 0.04807181693985438, "grad_norm": 1.375, "learning_rate": 0.0016848821923859975, "loss": 0.2934, "step": 27112 }, { "epoch": 0.04807536310516419, "grad_norm": 0.4140625, "learning_rate": 0.0016848369541875854, "loss": 0.189, "step": 27114 }, { "epoch": 0.04807890927047401, "grad_norm": 0.419921875, "learning_rate": 0.001684791713431436, "loss": 0.2169, "step": 27116 }, { "epoch": 0.04808245543578382, "grad_norm": 0.2314453125, "learning_rate": 0.0016847464701177475, "loss": 0.3293, "step": 27118 }, { "epoch": 0.048086001601093636, "grad_norm": 0.51953125, "learning_rate": 0.0016847012242467176, "loss": 0.1872, "step": 27120 }, { "epoch": 0.04808954776640345, "grad_norm": 0.494140625, "learning_rate": 0.0016846559758185442, "loss": 0.1716, "step": 27122 }, { "epoch": 0.048093093931713266, "grad_norm": 0.32421875, "learning_rate": 0.001684610724833425, "loss": 0.1923, "step": 27124 }, { "epoch": 0.04809664009702308, "grad_norm": 0.3515625, "learning_rate": 0.0016845654712915581, "loss": 0.1978, "step": 27126 }, { "epoch": 0.048100186262332895, "grad_norm": 0.609375, "learning_rate": 0.0016845202151931418, "loss": 0.1957, "step": 27128 }, { "epoch": 0.048103732427642716, "grad_norm": 1.875, "learning_rate": 0.001684474956538373, "loss": 0.2163, "step": 27130 }, { "epoch": 0.04810727859295253, "grad_norm": 0.50390625, "learning_rate": 0.0016844296953274507, "loss": 0.17, "step": 27132 }, { "epoch": 0.048110824758262345, "grad_norm": 0.416015625, "learning_rate": 0.0016843844315605721, "loss": 0.2131, "step": 27134 }, { "epoch": 0.04811437092357216, "grad_norm": 1.171875, "learning_rate": 0.001684339165237936, "loss": 0.2205, "step": 27136 }, { "epoch": 0.048117917088881974, "grad_norm": 0.255859375, "learning_rate": 0.001684293896359739, "loss": 0.2053, "step": 27138 }, { "epoch": 0.04812146325419179, "grad_norm": 0.18359375, "learning_rate": 0.0016842486249261802, "loss": 0.1666, "step": 27140 }, { "epoch": 0.0481250094195016, "grad_norm": 0.259765625, "learning_rate": 0.0016842033509374575, "loss": 0.1818, "step": 27142 }, { "epoch": 0.04812855558481142, "grad_norm": 3.890625, "learning_rate": 0.0016841580743937685, "loss": 0.3159, "step": 27144 }, { "epoch": 0.04813210175012123, "grad_norm": 0.5546875, "learning_rate": 0.0016841127952953114, "loss": 0.3616, "step": 27146 }, { "epoch": 0.04813564791543105, "grad_norm": 0.51171875, "learning_rate": 0.0016840675136422843, "loss": 0.1813, "step": 27148 }, { "epoch": 0.04813919408074086, "grad_norm": 0.515625, "learning_rate": 0.001684022229434885, "loss": 0.1584, "step": 27150 }, { "epoch": 0.04814274024605068, "grad_norm": 0.265625, "learning_rate": 0.001683976942673312, "loss": 0.1954, "step": 27152 }, { "epoch": 0.0481462864113605, "grad_norm": 0.4140625, "learning_rate": 0.001683931653357763, "loss": 0.1794, "step": 27154 }, { "epoch": 0.04814983257667031, "grad_norm": 0.14453125, "learning_rate": 0.001683886361488436, "loss": 0.1702, "step": 27156 }, { "epoch": 0.04815337874198013, "grad_norm": 0.36328125, "learning_rate": 0.0016838410670655293, "loss": 0.2468, "step": 27158 }, { "epoch": 0.04815692490728994, "grad_norm": 0.53515625, "learning_rate": 0.0016837957700892407, "loss": 0.1892, "step": 27160 }, { "epoch": 0.048160471072599756, "grad_norm": 0.25390625, "learning_rate": 0.0016837504705597688, "loss": 0.1769, "step": 27162 }, { "epoch": 0.04816401723790957, "grad_norm": 0.53125, "learning_rate": 0.0016837051684773113, "loss": 0.2333, "step": 27164 }, { "epoch": 0.048167563403219385, "grad_norm": 0.224609375, "learning_rate": 0.0016836598638420666, "loss": 0.1821, "step": 27166 }, { "epoch": 0.0481711095685292, "grad_norm": 0.66015625, "learning_rate": 0.0016836145566542324, "loss": 0.177, "step": 27168 }, { "epoch": 0.048174655733839014, "grad_norm": 0.37890625, "learning_rate": 0.0016835692469140075, "loss": 0.2508, "step": 27170 }, { "epoch": 0.04817820189914883, "grad_norm": 0.69921875, "learning_rate": 0.0016835239346215892, "loss": 0.1475, "step": 27172 }, { "epoch": 0.04818174806445865, "grad_norm": 0.2431640625, "learning_rate": 0.0016834786197771764, "loss": 0.14, "step": 27174 }, { "epoch": 0.048185294229768465, "grad_norm": 0.6875, "learning_rate": 0.001683433302380967, "loss": 0.2758, "step": 27176 }, { "epoch": 0.04818884039507828, "grad_norm": 0.84375, "learning_rate": 0.0016833879824331595, "loss": 0.3031, "step": 27178 }, { "epoch": 0.048192386560388094, "grad_norm": 1.09375, "learning_rate": 0.0016833426599339519, "loss": 0.2239, "step": 27180 }, { "epoch": 0.04819593272569791, "grad_norm": 0.244140625, "learning_rate": 0.0016832973348835417, "loss": 0.2178, "step": 27182 }, { "epoch": 0.04819947889100772, "grad_norm": 0.5234375, "learning_rate": 0.0016832520072821283, "loss": 0.1862, "step": 27184 }, { "epoch": 0.04820302505631754, "grad_norm": 0.494140625, "learning_rate": 0.0016832066771299092, "loss": 0.2372, "step": 27186 }, { "epoch": 0.04820657122162735, "grad_norm": 0.55078125, "learning_rate": 0.0016831613444270833, "loss": 0.169, "step": 27188 }, { "epoch": 0.04821011738693717, "grad_norm": 0.5703125, "learning_rate": 0.0016831160091738478, "loss": 0.1817, "step": 27190 }, { "epoch": 0.04821366355224698, "grad_norm": 0.609375, "learning_rate": 0.0016830706713704018, "loss": 0.1706, "step": 27192 }, { "epoch": 0.048217209717556796, "grad_norm": 0.55859375, "learning_rate": 0.0016830253310169433, "loss": 0.1905, "step": 27194 }, { "epoch": 0.04822075588286661, "grad_norm": 0.40234375, "learning_rate": 0.001682979988113671, "loss": 0.1929, "step": 27196 }, { "epoch": 0.04822430204817643, "grad_norm": 1.5625, "learning_rate": 0.0016829346426607825, "loss": 0.2519, "step": 27198 }, { "epoch": 0.048227848213486246, "grad_norm": 1.625, "learning_rate": 0.0016828892946584768, "loss": 0.2396, "step": 27200 }, { "epoch": 0.04823139437879606, "grad_norm": 0.341796875, "learning_rate": 0.0016828439441069515, "loss": 0.2009, "step": 27202 }, { "epoch": 0.048234940544105875, "grad_norm": 0.1796875, "learning_rate": 0.0016827985910064056, "loss": 0.188, "step": 27204 }, { "epoch": 0.04823848670941569, "grad_norm": 2.515625, "learning_rate": 0.0016827532353570374, "loss": 0.3611, "step": 27206 }, { "epoch": 0.048242032874725504, "grad_norm": 0.474609375, "learning_rate": 0.0016827078771590447, "loss": 0.3856, "step": 27208 }, { "epoch": 0.04824557904003532, "grad_norm": 0.490234375, "learning_rate": 0.0016826625164126266, "loss": 0.3042, "step": 27210 }, { "epoch": 0.048249125205345134, "grad_norm": 1.1796875, "learning_rate": 0.0016826171531179805, "loss": 0.1793, "step": 27212 }, { "epoch": 0.04825267137065495, "grad_norm": 0.66015625, "learning_rate": 0.0016825717872753058, "loss": 0.1771, "step": 27214 }, { "epoch": 0.04825621753596476, "grad_norm": 0.384765625, "learning_rate": 0.0016825264188848007, "loss": 0.2396, "step": 27216 }, { "epoch": 0.04825976370127458, "grad_norm": 0.515625, "learning_rate": 0.001682481047946663, "loss": 0.2228, "step": 27218 }, { "epoch": 0.0482633098665844, "grad_norm": 0.41796875, "learning_rate": 0.0016824356744610917, "loss": 0.2015, "step": 27220 }, { "epoch": 0.04826685603189421, "grad_norm": 0.333984375, "learning_rate": 0.0016823902984282853, "loss": 0.2322, "step": 27222 }, { "epoch": 0.04827040219720403, "grad_norm": 0.82421875, "learning_rate": 0.0016823449198484417, "loss": 0.1882, "step": 27224 }, { "epoch": 0.04827394836251384, "grad_norm": 0.41015625, "learning_rate": 0.0016822995387217599, "loss": 0.2796, "step": 27226 }, { "epoch": 0.04827749452782366, "grad_norm": 0.9453125, "learning_rate": 0.001682254155048438, "loss": 0.1583, "step": 27228 }, { "epoch": 0.04828104069313347, "grad_norm": 0.4921875, "learning_rate": 0.0016822087688286747, "loss": 0.1893, "step": 27230 }, { "epoch": 0.048284586858443286, "grad_norm": 0.9921875, "learning_rate": 0.0016821633800626686, "loss": 0.2557, "step": 27232 }, { "epoch": 0.0482881330237531, "grad_norm": 0.3359375, "learning_rate": 0.0016821179887506177, "loss": 0.1586, "step": 27234 }, { "epoch": 0.048291679189062915, "grad_norm": 1.828125, "learning_rate": 0.0016820725948927212, "loss": 0.2888, "step": 27236 }, { "epoch": 0.04829522535437273, "grad_norm": 0.4921875, "learning_rate": 0.001682027198489177, "loss": 0.247, "step": 27238 }, { "epoch": 0.048298771519682544, "grad_norm": 0.94140625, "learning_rate": 0.0016819817995401838, "loss": 0.2127, "step": 27240 }, { "epoch": 0.048302317684992366, "grad_norm": 1.0546875, "learning_rate": 0.0016819363980459406, "loss": 0.2308, "step": 27242 }, { "epoch": 0.04830586385030218, "grad_norm": 0.2109375, "learning_rate": 0.0016818909940066456, "loss": 0.2207, "step": 27244 }, { "epoch": 0.048309410015611995, "grad_norm": 0.33984375, "learning_rate": 0.0016818455874224973, "loss": 0.2246, "step": 27246 }, { "epoch": 0.04831295618092181, "grad_norm": 0.4921875, "learning_rate": 0.0016818001782936945, "loss": 0.2301, "step": 27248 }, { "epoch": 0.048316502346231624, "grad_norm": 0.76171875, "learning_rate": 0.0016817547666204355, "loss": 0.258, "step": 27250 }, { "epoch": 0.04832004851154144, "grad_norm": 1.484375, "learning_rate": 0.001681709352402919, "loss": 0.232, "step": 27252 }, { "epoch": 0.04832359467685125, "grad_norm": 0.298828125, "learning_rate": 0.0016816639356413438, "loss": 0.2077, "step": 27254 }, { "epoch": 0.04832714084216107, "grad_norm": 0.5, "learning_rate": 0.0016816185163359084, "loss": 0.2552, "step": 27256 }, { "epoch": 0.04833068700747088, "grad_norm": 0.3515625, "learning_rate": 0.0016815730944868116, "loss": 0.1645, "step": 27258 }, { "epoch": 0.0483342331727807, "grad_norm": 0.5703125, "learning_rate": 0.0016815276700942517, "loss": 0.261, "step": 27260 }, { "epoch": 0.04833777933809051, "grad_norm": 0.330078125, "learning_rate": 0.0016814822431584276, "loss": 0.1779, "step": 27262 }, { "epoch": 0.048341325503400326, "grad_norm": 1.046875, "learning_rate": 0.001681436813679538, "loss": 0.1821, "step": 27264 }, { "epoch": 0.04834487166871015, "grad_norm": 0.427734375, "learning_rate": 0.0016813913816577817, "loss": 0.1995, "step": 27266 }, { "epoch": 0.04834841783401996, "grad_norm": 0.435546875, "learning_rate": 0.0016813459470933568, "loss": 0.2083, "step": 27268 }, { "epoch": 0.048351963999329776, "grad_norm": 1.140625, "learning_rate": 0.0016813005099864627, "loss": 0.3625, "step": 27270 }, { "epoch": 0.04835551016463959, "grad_norm": 0.412109375, "learning_rate": 0.0016812550703372978, "loss": 0.1887, "step": 27272 }, { "epoch": 0.048359056329949406, "grad_norm": 0.96484375, "learning_rate": 0.0016812096281460607, "loss": 0.1821, "step": 27274 }, { "epoch": 0.04836260249525922, "grad_norm": 0.37890625, "learning_rate": 0.0016811641834129506, "loss": 0.1943, "step": 27276 }, { "epoch": 0.048366148660569035, "grad_norm": 1.125, "learning_rate": 0.0016811187361381655, "loss": 0.272, "step": 27278 }, { "epoch": 0.04836969482587885, "grad_norm": 1.6796875, "learning_rate": 0.0016810732863219047, "loss": 0.3667, "step": 27280 }, { "epoch": 0.048373240991188664, "grad_norm": 0.404296875, "learning_rate": 0.0016810278339643673, "loss": 0.203, "step": 27282 }, { "epoch": 0.04837678715649848, "grad_norm": 0.78125, "learning_rate": 0.0016809823790657511, "loss": 0.5751, "step": 27284 }, { "epoch": 0.04838033332180829, "grad_norm": 0.37890625, "learning_rate": 0.0016809369216262558, "loss": 0.2276, "step": 27286 }, { "epoch": 0.048383879487118114, "grad_norm": 1.34375, "learning_rate": 0.0016808914616460798, "loss": 0.2584, "step": 27288 }, { "epoch": 0.04838742565242793, "grad_norm": 0.353515625, "learning_rate": 0.001680845999125422, "loss": 0.3475, "step": 27290 }, { "epoch": 0.048390971817737743, "grad_norm": 0.203125, "learning_rate": 0.0016808005340644809, "loss": 0.1675, "step": 27292 }, { "epoch": 0.04839451798304756, "grad_norm": 0.455078125, "learning_rate": 0.0016807550664634562, "loss": 0.187, "step": 27294 }, { "epoch": 0.04839806414835737, "grad_norm": 0.390625, "learning_rate": 0.0016807095963225457, "loss": 0.1954, "step": 27296 }, { "epoch": 0.04840161031366719, "grad_norm": 10.5, "learning_rate": 0.001680664123641949, "loss": 0.2485, "step": 27298 }, { "epoch": 0.048405156478977, "grad_norm": 0.890625, "learning_rate": 0.0016806186484218646, "loss": 0.2187, "step": 27300 }, { "epoch": 0.048408702644286816, "grad_norm": 0.31640625, "learning_rate": 0.0016805731706624914, "loss": 0.1903, "step": 27302 }, { "epoch": 0.04841224880959663, "grad_norm": 0.298828125, "learning_rate": 0.0016805276903640285, "loss": 0.2631, "step": 27304 }, { "epoch": 0.048415794974906445, "grad_norm": 0.494140625, "learning_rate": 0.0016804822075266745, "loss": 0.178, "step": 27306 }, { "epoch": 0.04841934114021626, "grad_norm": 0.50390625, "learning_rate": 0.001680436722150629, "loss": 0.2145, "step": 27308 }, { "epoch": 0.04842288730552608, "grad_norm": 1.390625, "learning_rate": 0.0016803912342360898, "loss": 0.2795, "step": 27310 }, { "epoch": 0.048426433470835896, "grad_norm": 2.34375, "learning_rate": 0.001680345743783257, "loss": 0.453, "step": 27312 }, { "epoch": 0.04842997963614571, "grad_norm": 0.6875, "learning_rate": 0.0016803002507923288, "loss": 0.213, "step": 27314 }, { "epoch": 0.048433525801455525, "grad_norm": 0.357421875, "learning_rate": 0.0016802547552635042, "loss": 0.3996, "step": 27316 }, { "epoch": 0.04843707196676534, "grad_norm": 2.09375, "learning_rate": 0.0016802092571969827, "loss": 0.2752, "step": 27318 }, { "epoch": 0.048440618132075154, "grad_norm": 0.7578125, "learning_rate": 0.001680163756592963, "loss": 0.2484, "step": 27320 }, { "epoch": 0.04844416429738497, "grad_norm": 0.875, "learning_rate": 0.0016801182534516438, "loss": 0.1838, "step": 27322 }, { "epoch": 0.04844771046269478, "grad_norm": 0.40234375, "learning_rate": 0.0016800727477732243, "loss": 0.4011, "step": 27324 }, { "epoch": 0.0484512566280046, "grad_norm": 0.28515625, "learning_rate": 0.0016800272395579034, "loss": 0.2467, "step": 27326 }, { "epoch": 0.04845480279331441, "grad_norm": 0.357421875, "learning_rate": 0.0016799817288058808, "loss": 0.2006, "step": 27328 }, { "epoch": 0.04845834895862423, "grad_norm": 4.90625, "learning_rate": 0.0016799362155173543, "loss": 0.3769, "step": 27330 }, { "epoch": 0.04846189512393404, "grad_norm": 0.408203125, "learning_rate": 0.001679890699692524, "loss": 0.1401, "step": 27332 }, { "epoch": 0.04846544128924386, "grad_norm": 14.875, "learning_rate": 0.0016798451813315888, "loss": 0.3457, "step": 27334 }, { "epoch": 0.04846898745455368, "grad_norm": 0.70703125, "learning_rate": 0.0016797996604347477, "loss": 0.2404, "step": 27336 }, { "epoch": 0.04847253361986349, "grad_norm": 0.7734375, "learning_rate": 0.0016797541370021992, "loss": 0.1658, "step": 27338 }, { "epoch": 0.04847607978517331, "grad_norm": 3.375, "learning_rate": 0.001679708611034143, "loss": 0.3007, "step": 27340 }, { "epoch": 0.04847962595048312, "grad_norm": 0.3671875, "learning_rate": 0.001679663082530778, "loss": 0.1615, "step": 27342 }, { "epoch": 0.048483172115792936, "grad_norm": 0.375, "learning_rate": 0.0016796175514923039, "loss": 0.2092, "step": 27344 }, { "epoch": 0.04848671828110275, "grad_norm": 0.77734375, "learning_rate": 0.0016795720179189186, "loss": 0.1919, "step": 27346 }, { "epoch": 0.048490264446412565, "grad_norm": 0.32421875, "learning_rate": 0.0016795264818108226, "loss": 0.1425, "step": 27348 }, { "epoch": 0.04849381061172238, "grad_norm": 0.3515625, "learning_rate": 0.001679480943168214, "loss": 0.5534, "step": 27350 }, { "epoch": 0.048497356777032194, "grad_norm": 0.88671875, "learning_rate": 0.0016794354019912924, "loss": 0.2148, "step": 27352 }, { "epoch": 0.04850090294234201, "grad_norm": 3.078125, "learning_rate": 0.001679389858280257, "loss": 0.2641, "step": 27354 }, { "epoch": 0.04850444910765183, "grad_norm": 0.65625, "learning_rate": 0.001679344312035307, "loss": 0.182, "step": 27356 }, { "epoch": 0.048507995272961645, "grad_norm": 0.5078125, "learning_rate": 0.0016792987632566411, "loss": 0.2607, "step": 27358 }, { "epoch": 0.04851154143827146, "grad_norm": 0.408203125, "learning_rate": 0.0016792532119444595, "loss": 0.1941, "step": 27360 }, { "epoch": 0.048515087603581274, "grad_norm": 0.2470703125, "learning_rate": 0.0016792076580989606, "loss": 0.1526, "step": 27362 }, { "epoch": 0.04851863376889109, "grad_norm": 0.7109375, "learning_rate": 0.0016791621017203438, "loss": 0.2434, "step": 27364 }, { "epoch": 0.0485221799342009, "grad_norm": 0.447265625, "learning_rate": 0.0016791165428088081, "loss": 0.2025, "step": 27366 }, { "epoch": 0.04852572609951072, "grad_norm": 0.37890625, "learning_rate": 0.0016790709813645536, "loss": 0.2147, "step": 27368 }, { "epoch": 0.04852927226482053, "grad_norm": 0.53515625, "learning_rate": 0.0016790254173877786, "loss": 0.2113, "step": 27370 }, { "epoch": 0.048532818430130346, "grad_norm": 0.51171875, "learning_rate": 0.001678979850878683, "loss": 0.225, "step": 27372 }, { "epoch": 0.04853636459544016, "grad_norm": 0.376953125, "learning_rate": 0.0016789342818374655, "loss": 0.2878, "step": 27374 }, { "epoch": 0.048539910760749976, "grad_norm": 0.5078125, "learning_rate": 0.0016788887102643258, "loss": 0.1826, "step": 27376 }, { "epoch": 0.0485434569260598, "grad_norm": 0.490234375, "learning_rate": 0.0016788431361594634, "loss": 0.2061, "step": 27378 }, { "epoch": 0.04854700309136961, "grad_norm": 0.470703125, "learning_rate": 0.001678797559523077, "loss": 0.1844, "step": 27380 }, { "epoch": 0.048550549256679426, "grad_norm": 1.0703125, "learning_rate": 0.0016787519803553666, "loss": 0.2227, "step": 27382 }, { "epoch": 0.04855409542198924, "grad_norm": 4.8125, "learning_rate": 0.0016787063986565313, "loss": 0.194, "step": 27384 }, { "epoch": 0.048557641587299055, "grad_norm": 0.375, "learning_rate": 0.0016786608144267702, "loss": 0.2409, "step": 27386 }, { "epoch": 0.04856118775260887, "grad_norm": 0.37890625, "learning_rate": 0.0016786152276662828, "loss": 0.2078, "step": 27388 }, { "epoch": 0.048564733917918684, "grad_norm": 0.59765625, "learning_rate": 0.0016785696383752684, "loss": 0.2104, "step": 27390 }, { "epoch": 0.0485682800832285, "grad_norm": 0.23046875, "learning_rate": 0.0016785240465539268, "loss": 0.1736, "step": 27392 }, { "epoch": 0.04857182624853831, "grad_norm": 0.26953125, "learning_rate": 0.0016784784522024569, "loss": 0.1716, "step": 27394 }, { "epoch": 0.04857537241384813, "grad_norm": 0.953125, "learning_rate": 0.001678432855321058, "loss": 0.1756, "step": 27396 }, { "epoch": 0.04857891857915794, "grad_norm": 2.5625, "learning_rate": 0.00167838725590993, "loss": 0.2079, "step": 27398 }, { "epoch": 0.04858246474446776, "grad_norm": 0.298828125, "learning_rate": 0.001678341653969272, "loss": 0.1601, "step": 27400 }, { "epoch": 0.04858601090977758, "grad_norm": 0.96484375, "learning_rate": 0.001678296049499284, "loss": 0.1651, "step": 27402 }, { "epoch": 0.04858955707508739, "grad_norm": 0.546875, "learning_rate": 0.0016782504425001642, "loss": 0.207, "step": 27404 }, { "epoch": 0.04859310324039721, "grad_norm": 0.76953125, "learning_rate": 0.0016782048329721136, "loss": 0.1949, "step": 27406 }, { "epoch": 0.04859664940570702, "grad_norm": 0.337890625, "learning_rate": 0.0016781592209153303, "loss": 0.4266, "step": 27408 }, { "epoch": 0.04860019557101684, "grad_norm": 2.109375, "learning_rate": 0.001678113606330015, "loss": 0.2862, "step": 27410 }, { "epoch": 0.04860374173632665, "grad_norm": 2.921875, "learning_rate": 0.0016780679892163663, "loss": 0.2805, "step": 27412 }, { "epoch": 0.048607287901636466, "grad_norm": 1.4375, "learning_rate": 0.001678022369574584, "loss": 0.2626, "step": 27414 }, { "epoch": 0.04861083406694628, "grad_norm": 0.4453125, "learning_rate": 0.0016779767474048675, "loss": 0.2185, "step": 27416 }, { "epoch": 0.048614380232256095, "grad_norm": 0.75, "learning_rate": 0.0016779311227074166, "loss": 0.2095, "step": 27418 }, { "epoch": 0.04861792639756591, "grad_norm": 0.5390625, "learning_rate": 0.0016778854954824307, "loss": 0.2161, "step": 27420 }, { "epoch": 0.048621472562875724, "grad_norm": 0.59375, "learning_rate": 0.0016778398657301095, "loss": 0.2607, "step": 27422 }, { "epoch": 0.048625018728185546, "grad_norm": 0.51171875, "learning_rate": 0.001677794233450652, "loss": 0.1964, "step": 27424 }, { "epoch": 0.04862856489349536, "grad_norm": 0.376953125, "learning_rate": 0.0016777485986442583, "loss": 0.1694, "step": 27426 }, { "epoch": 0.048632111058805175, "grad_norm": 0.26171875, "learning_rate": 0.0016777029613111276, "loss": 0.1694, "step": 27428 }, { "epoch": 0.04863565722411499, "grad_norm": 0.3984375, "learning_rate": 0.00167765732145146, "loss": 0.1756, "step": 27430 }, { "epoch": 0.048639203389424804, "grad_norm": 1.6640625, "learning_rate": 0.0016776116790654546, "loss": 0.2613, "step": 27432 }, { "epoch": 0.04864274955473462, "grad_norm": 0.27734375, "learning_rate": 0.0016775660341533114, "loss": 0.1737, "step": 27434 }, { "epoch": 0.04864629572004443, "grad_norm": 0.275390625, "learning_rate": 0.00167752038671523, "loss": 0.201, "step": 27436 }, { "epoch": 0.04864984188535425, "grad_norm": 0.349609375, "learning_rate": 0.0016774747367514096, "loss": 0.1553, "step": 27438 }, { "epoch": 0.04865338805066406, "grad_norm": 1.15625, "learning_rate": 0.0016774290842620503, "loss": 0.304, "step": 27440 }, { "epoch": 0.04865693421597388, "grad_norm": 0.5859375, "learning_rate": 0.0016773834292473518, "loss": 0.2129, "step": 27442 }, { "epoch": 0.04866048038128369, "grad_norm": 5.84375, "learning_rate": 0.0016773377717075129, "loss": 0.2513, "step": 27444 }, { "epoch": 0.04866402654659351, "grad_norm": 0.302734375, "learning_rate": 0.0016772921116427345, "loss": 0.3658, "step": 27446 }, { "epoch": 0.04866757271190333, "grad_norm": 0.244140625, "learning_rate": 0.0016772464490532157, "loss": 0.1777, "step": 27448 }, { "epoch": 0.04867111887721314, "grad_norm": 0.59765625, "learning_rate": 0.001677200783939156, "loss": 0.216, "step": 27450 }, { "epoch": 0.048674665042522956, "grad_norm": 0.59765625, "learning_rate": 0.0016771551163007555, "loss": 0.1837, "step": 27452 }, { "epoch": 0.04867821120783277, "grad_norm": 1.2109375, "learning_rate": 0.0016771094461382138, "loss": 0.262, "step": 27454 }, { "epoch": 0.048681757373142585, "grad_norm": 0.314453125, "learning_rate": 0.0016770637734517307, "loss": 0.1647, "step": 27456 }, { "epoch": 0.0486853035384524, "grad_norm": 1.0859375, "learning_rate": 0.0016770180982415056, "loss": 0.199, "step": 27458 }, { "epoch": 0.048688849703762214, "grad_norm": 0.63671875, "learning_rate": 0.0016769724205077388, "loss": 0.2328, "step": 27460 }, { "epoch": 0.04869239586907203, "grad_norm": 0.400390625, "learning_rate": 0.0016769267402506295, "loss": 0.2053, "step": 27462 }, { "epoch": 0.048695942034381844, "grad_norm": 0.7578125, "learning_rate": 0.0016768810574703777, "loss": 0.2549, "step": 27464 }, { "epoch": 0.04869948819969166, "grad_norm": 0.80078125, "learning_rate": 0.0016768353721671834, "loss": 0.2234, "step": 27466 }, { "epoch": 0.04870303436500147, "grad_norm": 0.5234375, "learning_rate": 0.0016767896843412464, "loss": 0.146, "step": 27468 }, { "epoch": 0.048706580530311294, "grad_norm": 0.953125, "learning_rate": 0.001676743993992766, "loss": 0.1832, "step": 27470 }, { "epoch": 0.04871012669562111, "grad_norm": 0.44921875, "learning_rate": 0.001676698301121943, "loss": 0.1689, "step": 27472 }, { "epoch": 0.04871367286093092, "grad_norm": 0.375, "learning_rate": 0.001676652605728976, "loss": 0.1697, "step": 27474 }, { "epoch": 0.04871721902624074, "grad_norm": 1.171875, "learning_rate": 0.0016766069078140656, "loss": 0.2778, "step": 27476 }, { "epoch": 0.04872076519155055, "grad_norm": 0.474609375, "learning_rate": 0.0016765612073774115, "loss": 0.1741, "step": 27478 }, { "epoch": 0.04872431135686037, "grad_norm": 0.47265625, "learning_rate": 0.001676515504419214, "loss": 0.362, "step": 27480 }, { "epoch": 0.04872785752217018, "grad_norm": 2.296875, "learning_rate": 0.0016764697989396719, "loss": 0.3004, "step": 27482 }, { "epoch": 0.048731403687479996, "grad_norm": 0.423828125, "learning_rate": 0.0016764240909389863, "loss": 0.2278, "step": 27484 }, { "epoch": 0.04873494985278981, "grad_norm": 0.62109375, "learning_rate": 0.0016763783804173563, "loss": 0.1944, "step": 27486 }, { "epoch": 0.048738496018099625, "grad_norm": 0.9296875, "learning_rate": 0.001676332667374982, "loss": 0.2052, "step": 27488 }, { "epoch": 0.04874204218340944, "grad_norm": 0.59375, "learning_rate": 0.0016762869518120635, "loss": 0.2697, "step": 27490 }, { "epoch": 0.04874558834871926, "grad_norm": 1.0390625, "learning_rate": 0.0016762412337288008, "loss": 0.1912, "step": 27492 }, { "epoch": 0.048749134514029076, "grad_norm": 1.7109375, "learning_rate": 0.0016761955131253936, "loss": 0.217, "step": 27494 }, { "epoch": 0.04875268067933889, "grad_norm": 0.6640625, "learning_rate": 0.0016761497900020417, "loss": 0.2027, "step": 27496 }, { "epoch": 0.048756226844648705, "grad_norm": 0.279296875, "learning_rate": 0.0016761040643589453, "loss": 0.1727, "step": 27498 }, { "epoch": 0.04875977300995852, "grad_norm": 0.3828125, "learning_rate": 0.0016760583361963045, "loss": 0.18, "step": 27500 }, { "epoch": 0.048763319175268334, "grad_norm": 0.59375, "learning_rate": 0.001676012605514319, "loss": 0.2111, "step": 27502 }, { "epoch": 0.04876686534057815, "grad_norm": 0.416015625, "learning_rate": 0.0016759668723131892, "loss": 0.1468, "step": 27504 }, { "epoch": 0.04877041150588796, "grad_norm": 0.380859375, "learning_rate": 0.0016759211365931148, "loss": 0.2135, "step": 27506 }, { "epoch": 0.04877395767119778, "grad_norm": 0.8046875, "learning_rate": 0.0016758753983542956, "loss": 0.1693, "step": 27508 }, { "epoch": 0.04877750383650759, "grad_norm": 1.1171875, "learning_rate": 0.0016758296575969322, "loss": 0.1903, "step": 27510 }, { "epoch": 0.04878105000181741, "grad_norm": 0.263671875, "learning_rate": 0.0016757839143212243, "loss": 0.1966, "step": 27512 }, { "epoch": 0.04878459616712723, "grad_norm": 0.478515625, "learning_rate": 0.0016757381685273721, "loss": 0.2254, "step": 27514 }, { "epoch": 0.04878814233243704, "grad_norm": 0.271484375, "learning_rate": 0.0016756924202155753, "loss": 0.1927, "step": 27516 }, { "epoch": 0.04879168849774686, "grad_norm": 0.66796875, "learning_rate": 0.0016756466693860345, "loss": 0.3193, "step": 27518 }, { "epoch": 0.04879523466305667, "grad_norm": 1.515625, "learning_rate": 0.0016756009160389495, "loss": 0.3272, "step": 27520 }, { "epoch": 0.048798780828366486, "grad_norm": 3.5625, "learning_rate": 0.0016755551601745206, "loss": 0.5131, "step": 27522 }, { "epoch": 0.0488023269936763, "grad_norm": 0.76953125, "learning_rate": 0.0016755094017929476, "loss": 0.2043, "step": 27524 }, { "epoch": 0.048805873158986116, "grad_norm": 1.03125, "learning_rate": 0.0016754636408944305, "loss": 0.2843, "step": 27526 }, { "epoch": 0.04880941932429593, "grad_norm": 0.421875, "learning_rate": 0.00167541787747917, "loss": 0.2117, "step": 27528 }, { "epoch": 0.048812965489605745, "grad_norm": 0.81640625, "learning_rate": 0.0016753721115473656, "loss": 0.2117, "step": 27530 }, { "epoch": 0.04881651165491556, "grad_norm": 0.79296875, "learning_rate": 0.0016753263430992183, "loss": 0.2181, "step": 27532 }, { "epoch": 0.048820057820225374, "grad_norm": 0.328125, "learning_rate": 0.0016752805721349277, "loss": 0.1528, "step": 27534 }, { "epoch": 0.04882360398553519, "grad_norm": 0.421875, "learning_rate": 0.0016752347986546939, "loss": 0.2262, "step": 27536 }, { "epoch": 0.04882715015084501, "grad_norm": 0.216796875, "learning_rate": 0.0016751890226587172, "loss": 0.2413, "step": 27538 }, { "epoch": 0.048830696316154824, "grad_norm": 0.703125, "learning_rate": 0.001675143244147198, "loss": 0.1929, "step": 27540 }, { "epoch": 0.04883424248146464, "grad_norm": 1.1875, "learning_rate": 0.0016750974631203359, "loss": 0.1618, "step": 27542 }, { "epoch": 0.048837788646774453, "grad_norm": 0.28125, "learning_rate": 0.0016750516795783319, "loss": 0.2506, "step": 27544 }, { "epoch": 0.04884133481208427, "grad_norm": 0.5703125, "learning_rate": 0.0016750058935213858, "loss": 0.2427, "step": 27546 }, { "epoch": 0.04884488097739408, "grad_norm": 0.54296875, "learning_rate": 0.0016749601049496979, "loss": 0.1424, "step": 27548 }, { "epoch": 0.0488484271427039, "grad_norm": 0.474609375, "learning_rate": 0.0016749143138634686, "loss": 0.2198, "step": 27550 }, { "epoch": 0.04885197330801371, "grad_norm": 0.27734375, "learning_rate": 0.001674868520262898, "loss": 0.2631, "step": 27552 }, { "epoch": 0.048855519473323526, "grad_norm": 0.466796875, "learning_rate": 0.0016748227241481861, "loss": 0.2152, "step": 27554 }, { "epoch": 0.04885906563863334, "grad_norm": 1.8671875, "learning_rate": 0.001674776925519534, "loss": 0.2257, "step": 27556 }, { "epoch": 0.048862611803943155, "grad_norm": 0.46875, "learning_rate": 0.0016747311243771412, "loss": 0.2459, "step": 27558 }, { "epoch": 0.04886615796925298, "grad_norm": 0.51953125, "learning_rate": 0.0016746853207212085, "loss": 0.341, "step": 27560 }, { "epoch": 0.04886970413456279, "grad_norm": 0.7578125, "learning_rate": 0.001674639514551936, "loss": 0.2167, "step": 27562 }, { "epoch": 0.048873250299872606, "grad_norm": 0.451171875, "learning_rate": 0.0016745937058695237, "loss": 0.2854, "step": 27564 }, { "epoch": 0.04887679646518242, "grad_norm": 0.55078125, "learning_rate": 0.0016745478946741728, "loss": 0.3759, "step": 27566 }, { "epoch": 0.048880342630492235, "grad_norm": 0.859375, "learning_rate": 0.0016745020809660826, "loss": 0.3439, "step": 27568 }, { "epoch": 0.04888388879580205, "grad_norm": 0.298828125, "learning_rate": 0.0016744562647454547, "loss": 0.177, "step": 27570 }, { "epoch": 0.048887434961111864, "grad_norm": 0.67578125, "learning_rate": 0.0016744104460124883, "loss": 0.1629, "step": 27572 }, { "epoch": 0.04889098112642168, "grad_norm": 0.376953125, "learning_rate": 0.0016743646247673843, "loss": 0.2112, "step": 27574 }, { "epoch": 0.04889452729173149, "grad_norm": 0.89453125, "learning_rate": 0.0016743188010103429, "loss": 0.1197, "step": 27576 }, { "epoch": 0.04889807345704131, "grad_norm": 0.90234375, "learning_rate": 0.0016742729747415651, "loss": 0.2486, "step": 27578 }, { "epoch": 0.04890161962235112, "grad_norm": 0.4375, "learning_rate": 0.0016742271459612506, "loss": 0.2216, "step": 27580 }, { "epoch": 0.048905165787660944, "grad_norm": 0.6328125, "learning_rate": 0.0016741813146696001, "loss": 0.2427, "step": 27582 }, { "epoch": 0.04890871195297076, "grad_norm": 0.38671875, "learning_rate": 0.0016741354808668142, "loss": 0.1873, "step": 27584 }, { "epoch": 0.04891225811828057, "grad_norm": 1.8203125, "learning_rate": 0.0016740896445530933, "loss": 0.196, "step": 27586 }, { "epoch": 0.04891580428359039, "grad_norm": 1.125, "learning_rate": 0.0016740438057286375, "loss": 0.2936, "step": 27588 }, { "epoch": 0.0489193504489002, "grad_norm": 0.6328125, "learning_rate": 0.0016739979643936478, "loss": 0.327, "step": 27590 }, { "epoch": 0.04892289661421002, "grad_norm": 0.58984375, "learning_rate": 0.0016739521205483242, "loss": 0.1819, "step": 27592 }, { "epoch": 0.04892644277951983, "grad_norm": 0.5625, "learning_rate": 0.0016739062741928674, "loss": 0.2103, "step": 27594 }, { "epoch": 0.048929988944829646, "grad_norm": 2.1875, "learning_rate": 0.0016738604253274778, "loss": 0.2365, "step": 27596 }, { "epoch": 0.04893353511013946, "grad_norm": 0.52734375, "learning_rate": 0.0016738145739523564, "loss": 0.1964, "step": 27598 }, { "epoch": 0.048937081275449275, "grad_norm": 0.359375, "learning_rate": 0.0016737687200677033, "loss": 0.1642, "step": 27600 }, { "epoch": 0.04894062744075909, "grad_norm": 0.478515625, "learning_rate": 0.001673722863673719, "loss": 0.2654, "step": 27602 }, { "epoch": 0.048944173606068904, "grad_norm": 0.2265625, "learning_rate": 0.001673677004770604, "loss": 0.2291, "step": 27604 }, { "epoch": 0.048947719771378725, "grad_norm": 4.28125, "learning_rate": 0.001673631143358559, "loss": 0.3125, "step": 27606 }, { "epoch": 0.04895126593668854, "grad_norm": 0.75, "learning_rate": 0.0016735852794377852, "loss": 0.216, "step": 27608 }, { "epoch": 0.048954812101998355, "grad_norm": 0.90625, "learning_rate": 0.0016735394130084822, "loss": 0.2221, "step": 27610 }, { "epoch": 0.04895835826730817, "grad_norm": 4.46875, "learning_rate": 0.0016734935440708507, "loss": 0.2369, "step": 27612 }, { "epoch": 0.048961904432617984, "grad_norm": 0.69140625, "learning_rate": 0.0016734476726250917, "loss": 0.1766, "step": 27614 }, { "epoch": 0.0489654505979278, "grad_norm": 0.70703125, "learning_rate": 0.0016734017986714056, "loss": 0.2263, "step": 27616 }, { "epoch": 0.04896899676323761, "grad_norm": 0.369140625, "learning_rate": 0.0016733559222099938, "loss": 0.1922, "step": 27618 }, { "epoch": 0.04897254292854743, "grad_norm": 0.94921875, "learning_rate": 0.0016733100432410554, "loss": 0.159, "step": 27620 }, { "epoch": 0.04897608909385724, "grad_norm": 0.6875, "learning_rate": 0.0016732641617647921, "loss": 0.2057, "step": 27622 }, { "epoch": 0.048979635259167056, "grad_norm": 0.478515625, "learning_rate": 0.0016732182777814042, "loss": 0.1901, "step": 27624 }, { "epoch": 0.04898318142447687, "grad_norm": 0.375, "learning_rate": 0.0016731723912910927, "loss": 0.1933, "step": 27626 }, { "epoch": 0.04898672758978669, "grad_norm": 0.54296875, "learning_rate": 0.0016731265022940586, "loss": 0.253, "step": 27628 }, { "epoch": 0.04899027375509651, "grad_norm": 0.9609375, "learning_rate": 0.0016730806107905013, "loss": 0.2822, "step": 27630 }, { "epoch": 0.04899381992040632, "grad_norm": 0.7890625, "learning_rate": 0.0016730347167806226, "loss": 0.2566, "step": 27632 }, { "epoch": 0.048997366085716136, "grad_norm": 0.478515625, "learning_rate": 0.001672988820264623, "loss": 0.1671, "step": 27634 }, { "epoch": 0.04900091225102595, "grad_norm": 0.18359375, "learning_rate": 0.001672942921242703, "loss": 0.1419, "step": 27636 }, { "epoch": 0.049004458416335765, "grad_norm": 0.515625, "learning_rate": 0.0016728970197150636, "loss": 0.3397, "step": 27638 }, { "epoch": 0.04900800458164558, "grad_norm": 0.48046875, "learning_rate": 0.001672851115681905, "loss": 0.1907, "step": 27640 }, { "epoch": 0.049011550746955394, "grad_norm": 0.7734375, "learning_rate": 0.0016728052091434288, "loss": 0.3122, "step": 27642 }, { "epoch": 0.04901509691226521, "grad_norm": 0.259765625, "learning_rate": 0.0016727593000998354, "loss": 0.148, "step": 27644 }, { "epoch": 0.04901864307757502, "grad_norm": 0.73046875, "learning_rate": 0.0016727133885513252, "loss": 0.1999, "step": 27646 }, { "epoch": 0.04902218924288484, "grad_norm": 0.87890625, "learning_rate": 0.0016726674744980993, "loss": 0.2652, "step": 27648 }, { "epoch": 0.04902573540819466, "grad_norm": 1.03125, "learning_rate": 0.001672621557940359, "loss": 0.2193, "step": 27650 }, { "epoch": 0.049029281573504474, "grad_norm": 0.400390625, "learning_rate": 0.0016725756388783044, "loss": 0.2048, "step": 27652 }, { "epoch": 0.04903282773881429, "grad_norm": 0.240234375, "learning_rate": 0.0016725297173121364, "loss": 0.1569, "step": 27654 }, { "epoch": 0.0490363739041241, "grad_norm": 0.74609375, "learning_rate": 0.001672483793242056, "loss": 0.2729, "step": 27656 }, { "epoch": 0.04903992006943392, "grad_norm": 0.24609375, "learning_rate": 0.001672437866668264, "loss": 0.1925, "step": 27658 }, { "epoch": 0.04904346623474373, "grad_norm": 2.75, "learning_rate": 0.0016723919375909613, "loss": 0.271, "step": 27660 }, { "epoch": 0.04904701240005355, "grad_norm": 0.58984375, "learning_rate": 0.0016723460060103488, "loss": 0.2411, "step": 27662 }, { "epoch": 0.04905055856536336, "grad_norm": 0.39453125, "learning_rate": 0.0016723000719266272, "loss": 0.2002, "step": 27664 }, { "epoch": 0.049054104730673176, "grad_norm": 0.33984375, "learning_rate": 0.0016722541353399976, "loss": 0.2032, "step": 27666 }, { "epoch": 0.04905765089598299, "grad_norm": 0.373046875, "learning_rate": 0.001672208196250661, "loss": 0.1659, "step": 27668 }, { "epoch": 0.049061197061292805, "grad_norm": 0.69921875, "learning_rate": 0.0016721622546588177, "loss": 0.4634, "step": 27670 }, { "epoch": 0.04906474322660262, "grad_norm": 0.57421875, "learning_rate": 0.0016721163105646697, "loss": 0.2322, "step": 27672 }, { "epoch": 0.04906828939191244, "grad_norm": 0.765625, "learning_rate": 0.0016720703639684167, "loss": 0.1721, "step": 27674 }, { "epoch": 0.049071835557222256, "grad_norm": 0.95703125, "learning_rate": 0.0016720244148702606, "loss": 0.2633, "step": 27676 }, { "epoch": 0.04907538172253207, "grad_norm": 0.4375, "learning_rate": 0.0016719784632704018, "loss": 0.1912, "step": 27678 }, { "epoch": 0.049078927887841885, "grad_norm": 0.78125, "learning_rate": 0.0016719325091690417, "loss": 0.1853, "step": 27680 }, { "epoch": 0.0490824740531517, "grad_norm": 0.6953125, "learning_rate": 0.0016718865525663807, "loss": 0.1913, "step": 27682 }, { "epoch": 0.049086020218461514, "grad_norm": 0.65625, "learning_rate": 0.0016718405934626202, "loss": 0.1892, "step": 27684 }, { "epoch": 0.04908956638377133, "grad_norm": 0.34375, "learning_rate": 0.0016717946318579608, "loss": 0.2402, "step": 27686 }, { "epoch": 0.04909311254908114, "grad_norm": 0.6875, "learning_rate": 0.0016717486677526043, "loss": 0.1605, "step": 27688 }, { "epoch": 0.04909665871439096, "grad_norm": 0.376953125, "learning_rate": 0.0016717027011467509, "loss": 0.1727, "step": 27690 }, { "epoch": 0.04910020487970077, "grad_norm": 0.1875, "learning_rate": 0.0016716567320406023, "loss": 0.1964, "step": 27692 }, { "epoch": 0.04910375104501059, "grad_norm": 0.59375, "learning_rate": 0.0016716107604343592, "loss": 0.1557, "step": 27694 }, { "epoch": 0.04910729721032041, "grad_norm": 0.703125, "learning_rate": 0.0016715647863282223, "loss": 0.1917, "step": 27696 }, { "epoch": 0.04911084337563022, "grad_norm": 0.9140625, "learning_rate": 0.001671518809722393, "loss": 0.1907, "step": 27698 }, { "epoch": 0.04911438954094004, "grad_norm": 0.54296875, "learning_rate": 0.001671472830617073, "loss": 0.2001, "step": 27700 }, { "epoch": 0.04911793570624985, "grad_norm": 0.3125, "learning_rate": 0.0016714268490124625, "loss": 0.2129, "step": 27702 }, { "epoch": 0.049121481871559666, "grad_norm": 0.435546875, "learning_rate": 0.001671380864908763, "loss": 0.2355, "step": 27704 }, { "epoch": 0.04912502803686948, "grad_norm": 1.734375, "learning_rate": 0.001671334878306175, "loss": 0.2582, "step": 27706 }, { "epoch": 0.049128574202179295, "grad_norm": 0.453125, "learning_rate": 0.0016712888892049006, "loss": 0.1878, "step": 27708 }, { "epoch": 0.04913212036748911, "grad_norm": 0.470703125, "learning_rate": 0.0016712428976051398, "loss": 0.202, "step": 27710 }, { "epoch": 0.049135666532798924, "grad_norm": 1.890625, "learning_rate": 0.001671196903507095, "loss": 0.3378, "step": 27712 }, { "epoch": 0.04913921269810874, "grad_norm": 0.65234375, "learning_rate": 0.0016711509069109666, "loss": 0.2083, "step": 27714 }, { "epoch": 0.049142758863418554, "grad_norm": 0.291015625, "learning_rate": 0.0016711049078169558, "loss": 0.2795, "step": 27716 }, { "epoch": 0.049146305028728375, "grad_norm": 0.79296875, "learning_rate": 0.001671058906225264, "loss": 0.2019, "step": 27718 }, { "epoch": 0.04914985119403819, "grad_norm": 0.9609375, "learning_rate": 0.0016710129021360923, "loss": 0.2016, "step": 27720 }, { "epoch": 0.049153397359348004, "grad_norm": 0.7734375, "learning_rate": 0.0016709668955496417, "loss": 0.1704, "step": 27722 }, { "epoch": 0.04915694352465782, "grad_norm": 0.23046875, "learning_rate": 0.0016709208864661138, "loss": 0.1586, "step": 27724 }, { "epoch": 0.04916048968996763, "grad_norm": 0.65625, "learning_rate": 0.0016708748748857092, "loss": 0.2848, "step": 27726 }, { "epoch": 0.04916403585527745, "grad_norm": 0.3984375, "learning_rate": 0.0016708288608086297, "loss": 0.3441, "step": 27728 }, { "epoch": 0.04916758202058726, "grad_norm": 0.78515625, "learning_rate": 0.0016707828442350766, "loss": 0.201, "step": 27730 }, { "epoch": 0.04917112818589708, "grad_norm": 0.427734375, "learning_rate": 0.0016707368251652503, "loss": 0.2629, "step": 27732 }, { "epoch": 0.04917467435120689, "grad_norm": 0.58203125, "learning_rate": 0.0016706908035993531, "loss": 0.1957, "step": 27734 }, { "epoch": 0.049178220516516706, "grad_norm": 0.35546875, "learning_rate": 0.0016706447795375857, "loss": 0.1834, "step": 27736 }, { "epoch": 0.04918176668182652, "grad_norm": 0.439453125, "learning_rate": 0.0016705987529801497, "loss": 0.1378, "step": 27738 }, { "epoch": 0.049185312847136335, "grad_norm": 0.984375, "learning_rate": 0.0016705527239272457, "loss": 0.2028, "step": 27740 }, { "epoch": 0.04918885901244616, "grad_norm": 0.32421875, "learning_rate": 0.0016705066923790757, "loss": 0.1986, "step": 27742 }, { "epoch": 0.04919240517775597, "grad_norm": 0.4921875, "learning_rate": 0.001670460658335841, "loss": 0.1796, "step": 27744 }, { "epoch": 0.049195951343065786, "grad_norm": 0.40234375, "learning_rate": 0.0016704146217977427, "loss": 0.2651, "step": 27746 }, { "epoch": 0.0491994975083756, "grad_norm": 2.375, "learning_rate": 0.0016703685827649823, "loss": 0.3195, "step": 27748 }, { "epoch": 0.049203043673685415, "grad_norm": 1.421875, "learning_rate": 0.0016703225412377607, "loss": 0.2236, "step": 27750 }, { "epoch": 0.04920658983899523, "grad_norm": 0.6328125, "learning_rate": 0.00167027649721628, "loss": 0.2245, "step": 27752 }, { "epoch": 0.049210136004305044, "grad_norm": 0.63671875, "learning_rate": 0.0016702304507007406, "loss": 0.2257, "step": 27754 }, { "epoch": 0.04921368216961486, "grad_norm": 0.318359375, "learning_rate": 0.0016701844016913448, "loss": 0.2242, "step": 27756 }, { "epoch": 0.04921722833492467, "grad_norm": 0.3671875, "learning_rate": 0.0016701383501882936, "loss": 0.2182, "step": 27758 }, { "epoch": 0.04922077450023449, "grad_norm": 0.482421875, "learning_rate": 0.0016700922961917885, "loss": 0.2102, "step": 27760 }, { "epoch": 0.0492243206655443, "grad_norm": 0.91796875, "learning_rate": 0.0016700462397020307, "loss": 0.2379, "step": 27762 }, { "epoch": 0.049227866830854124, "grad_norm": 0.26171875, "learning_rate": 0.0016700001807192215, "loss": 0.1694, "step": 27764 }, { "epoch": 0.04923141299616394, "grad_norm": 0.390625, "learning_rate": 0.0016699541192435631, "loss": 0.2549, "step": 27766 }, { "epoch": 0.04923495916147375, "grad_norm": 0.314453125, "learning_rate": 0.0016699080552752563, "loss": 0.1718, "step": 27768 }, { "epoch": 0.04923850532678357, "grad_norm": 0.6015625, "learning_rate": 0.001669861988814503, "loss": 0.2045, "step": 27770 }, { "epoch": 0.04924205149209338, "grad_norm": 0.59765625, "learning_rate": 0.0016698159198615036, "loss": 0.2026, "step": 27772 }, { "epoch": 0.049245597657403196, "grad_norm": 0.92578125, "learning_rate": 0.0016697698484164612, "loss": 0.244, "step": 27774 }, { "epoch": 0.04924914382271301, "grad_norm": 0.90625, "learning_rate": 0.0016697237744795759, "loss": 0.1724, "step": 27776 }, { "epoch": 0.049252689988022826, "grad_norm": 0.76171875, "learning_rate": 0.00166967769805105, "loss": 0.2235, "step": 27778 }, { "epoch": 0.04925623615333264, "grad_norm": 0.6171875, "learning_rate": 0.0016696316191310845, "loss": 0.2293, "step": 27780 }, { "epoch": 0.049259782318642455, "grad_norm": 0.337890625, "learning_rate": 0.0016695855377198813, "loss": 0.1815, "step": 27782 }, { "epoch": 0.04926332848395227, "grad_norm": 7.9375, "learning_rate": 0.0016695394538176422, "loss": 0.2171, "step": 27784 }, { "epoch": 0.04926687464926209, "grad_norm": 0.376953125, "learning_rate": 0.0016694933674245677, "loss": 0.1785, "step": 27786 }, { "epoch": 0.049270420814571905, "grad_norm": 0.482421875, "learning_rate": 0.0016694472785408604, "loss": 0.1918, "step": 27788 }, { "epoch": 0.04927396697988172, "grad_norm": 0.5859375, "learning_rate": 0.0016694011871667214, "loss": 0.2075, "step": 27790 }, { "epoch": 0.049277513145191534, "grad_norm": 0.302734375, "learning_rate": 0.0016693550933023524, "loss": 0.1964, "step": 27792 }, { "epoch": 0.04928105931050135, "grad_norm": 2.046875, "learning_rate": 0.001669308996947955, "loss": 0.3022, "step": 27794 }, { "epoch": 0.04928460547581116, "grad_norm": 0.369140625, "learning_rate": 0.0016692628981037306, "loss": 0.2381, "step": 27796 }, { "epoch": 0.04928815164112098, "grad_norm": 0.44921875, "learning_rate": 0.001669216796769881, "loss": 0.2227, "step": 27798 }, { "epoch": 0.04929169780643079, "grad_norm": 0.609375, "learning_rate": 0.0016691706929466076, "loss": 0.2152, "step": 27800 }, { "epoch": 0.04929524397174061, "grad_norm": 0.5546875, "learning_rate": 0.0016691245866341123, "loss": 0.192, "step": 27802 }, { "epoch": 0.04929879013705042, "grad_norm": 0.67578125, "learning_rate": 0.001669078477832597, "loss": 0.1694, "step": 27804 }, { "epoch": 0.049302336302360236, "grad_norm": 0.5, "learning_rate": 0.0016690323665422627, "loss": 0.1698, "step": 27806 }, { "epoch": 0.04930588246767005, "grad_norm": 0.390625, "learning_rate": 0.0016689862527633113, "loss": 0.1649, "step": 27808 }, { "epoch": 0.04930942863297987, "grad_norm": 0.4140625, "learning_rate": 0.0016689401364959444, "loss": 0.2896, "step": 27810 }, { "epoch": 0.04931297479828969, "grad_norm": 1.40625, "learning_rate": 0.0016688940177403643, "loss": 0.4941, "step": 27812 }, { "epoch": 0.0493165209635995, "grad_norm": 1.0078125, "learning_rate": 0.0016688478964967718, "loss": 0.3078, "step": 27814 }, { "epoch": 0.049320067128909316, "grad_norm": 0.625, "learning_rate": 0.0016688017727653692, "loss": 0.1915, "step": 27816 }, { "epoch": 0.04932361329421913, "grad_norm": 0.83984375, "learning_rate": 0.0016687556465463577, "loss": 0.1865, "step": 27818 }, { "epoch": 0.049327159459528945, "grad_norm": 0.232421875, "learning_rate": 0.00166870951783994, "loss": 0.1926, "step": 27820 }, { "epoch": 0.04933070562483876, "grad_norm": 0.6015625, "learning_rate": 0.001668663386646317, "loss": 0.2602, "step": 27822 }, { "epoch": 0.049334251790148574, "grad_norm": 0.455078125, "learning_rate": 0.0016686172529656903, "loss": 0.2284, "step": 27824 }, { "epoch": 0.04933779795545839, "grad_norm": 1.0078125, "learning_rate": 0.0016685711167982625, "loss": 0.3065, "step": 27826 }, { "epoch": 0.0493413441207682, "grad_norm": 0.3984375, "learning_rate": 0.0016685249781442347, "loss": 0.1869, "step": 27828 }, { "epoch": 0.04934489028607802, "grad_norm": 2.03125, "learning_rate": 0.0016684788370038089, "loss": 0.2437, "step": 27830 }, { "epoch": 0.04934843645138784, "grad_norm": 0.8984375, "learning_rate": 0.001668432693377187, "loss": 0.1359, "step": 27832 }, { "epoch": 0.049351982616697654, "grad_norm": 0.6484375, "learning_rate": 0.0016683865472645703, "loss": 0.1849, "step": 27834 }, { "epoch": 0.04935552878200747, "grad_norm": 0.337890625, "learning_rate": 0.0016683403986661614, "loss": 0.1791, "step": 27836 }, { "epoch": 0.04935907494731728, "grad_norm": 1.0078125, "learning_rate": 0.0016682942475821617, "loss": 0.2013, "step": 27838 }, { "epoch": 0.0493626211126271, "grad_norm": 1.234375, "learning_rate": 0.0016682480940127728, "loss": 0.1919, "step": 27840 }, { "epoch": 0.04936616727793691, "grad_norm": 0.35546875, "learning_rate": 0.001668201937958197, "loss": 0.1576, "step": 27842 }, { "epoch": 0.04936971344324673, "grad_norm": 0.6640625, "learning_rate": 0.001668155779418636, "loss": 0.2228, "step": 27844 }, { "epoch": 0.04937325960855654, "grad_norm": 1.0859375, "learning_rate": 0.0016681096183942914, "loss": 0.2717, "step": 27846 }, { "epoch": 0.049376805773866356, "grad_norm": 0.22265625, "learning_rate": 0.0016680634548853657, "loss": 0.1955, "step": 27848 }, { "epoch": 0.04938035193917617, "grad_norm": 0.31640625, "learning_rate": 0.0016680172888920602, "loss": 0.2688, "step": 27850 }, { "epoch": 0.049383898104485985, "grad_norm": 0.353515625, "learning_rate": 0.001667971120414577, "loss": 0.1823, "step": 27852 }, { "epoch": 0.049387444269795806, "grad_norm": 2.484375, "learning_rate": 0.0016679249494531182, "loss": 0.1953, "step": 27854 }, { "epoch": 0.04939099043510562, "grad_norm": 0.93359375, "learning_rate": 0.0016678787760078854, "loss": 0.2108, "step": 27856 }, { "epoch": 0.049394536600415435, "grad_norm": 0.57421875, "learning_rate": 0.0016678326000790806, "loss": 0.1638, "step": 27858 }, { "epoch": 0.04939808276572525, "grad_norm": 0.3046875, "learning_rate": 0.0016677864216669061, "loss": 0.1907, "step": 27860 }, { "epoch": 0.049401628931035065, "grad_norm": 0.80078125, "learning_rate": 0.0016677402407715633, "loss": 0.1934, "step": 27862 }, { "epoch": 0.04940517509634488, "grad_norm": 0.61328125, "learning_rate": 0.0016676940573932548, "loss": 0.271, "step": 27864 }, { "epoch": 0.049408721261654694, "grad_norm": 0.828125, "learning_rate": 0.0016676478715321821, "loss": 0.2908, "step": 27866 }, { "epoch": 0.04941226742696451, "grad_norm": 0.3203125, "learning_rate": 0.0016676016831885475, "loss": 0.1757, "step": 27868 }, { "epoch": 0.04941581359227432, "grad_norm": 0.5390625, "learning_rate": 0.0016675554923625527, "loss": 0.2011, "step": 27870 }, { "epoch": 0.04941935975758414, "grad_norm": 5.5625, "learning_rate": 0.0016675092990543997, "loss": 0.2947, "step": 27872 }, { "epoch": 0.04942290592289395, "grad_norm": 0.81640625, "learning_rate": 0.0016674631032642909, "loss": 0.1589, "step": 27874 }, { "epoch": 0.049426452088203766, "grad_norm": 0.56640625, "learning_rate": 0.0016674169049924281, "loss": 0.2102, "step": 27876 }, { "epoch": 0.04942999825351359, "grad_norm": 0.67578125, "learning_rate": 0.001667370704239013, "loss": 0.229, "step": 27878 }, { "epoch": 0.0494335444188234, "grad_norm": 0.53515625, "learning_rate": 0.0016673245010042485, "loss": 0.2138, "step": 27880 }, { "epoch": 0.04943709058413322, "grad_norm": 0.59765625, "learning_rate": 0.0016672782952883358, "loss": 0.2541, "step": 27882 }, { "epoch": 0.04944063674944303, "grad_norm": 1.953125, "learning_rate": 0.0016672320870914775, "loss": 0.2812, "step": 27884 }, { "epoch": 0.049444182914752846, "grad_norm": 0.73046875, "learning_rate": 0.0016671858764138753, "loss": 0.2342, "step": 27886 }, { "epoch": 0.04944772908006266, "grad_norm": 1.5, "learning_rate": 0.001667139663255732, "loss": 0.1575, "step": 27888 }, { "epoch": 0.049451275245372475, "grad_norm": 1.140625, "learning_rate": 0.001667093447617249, "loss": 0.2015, "step": 27890 }, { "epoch": 0.04945482141068229, "grad_norm": 0.7734375, "learning_rate": 0.0016670472294986287, "loss": 0.2173, "step": 27892 }, { "epoch": 0.049458367575992104, "grad_norm": 0.388671875, "learning_rate": 0.0016670010089000732, "loss": 0.2426, "step": 27894 }, { "epoch": 0.04946191374130192, "grad_norm": 0.408203125, "learning_rate": 0.0016669547858217846, "loss": 0.153, "step": 27896 }, { "epoch": 0.04946545990661173, "grad_norm": 0.5546875, "learning_rate": 0.0016669085602639654, "loss": 0.2145, "step": 27898 }, { "epoch": 0.049469006071921555, "grad_norm": 0.443359375, "learning_rate": 0.001666862332226817, "loss": 0.217, "step": 27900 }, { "epoch": 0.04947255223723137, "grad_norm": 0.341796875, "learning_rate": 0.0016668161017105423, "loss": 0.1746, "step": 27902 }, { "epoch": 0.049476098402541184, "grad_norm": 0.671875, "learning_rate": 0.001666769868715343, "loss": 0.1477, "step": 27904 }, { "epoch": 0.049479644567851, "grad_norm": 0.6328125, "learning_rate": 0.0016667236332414217, "loss": 0.1846, "step": 27906 }, { "epoch": 0.04948319073316081, "grad_norm": 0.96875, "learning_rate": 0.0016666773952889805, "loss": 0.22, "step": 27908 }, { "epoch": 0.04948673689847063, "grad_norm": 0.6796875, "learning_rate": 0.0016666311548582213, "loss": 0.1665, "step": 27910 }, { "epoch": 0.04949028306378044, "grad_norm": 0.23828125, "learning_rate": 0.0016665849119493464, "loss": 0.1986, "step": 27912 }, { "epoch": 0.04949382922909026, "grad_norm": 0.46875, "learning_rate": 0.0016665386665625588, "loss": 0.213, "step": 27914 }, { "epoch": 0.04949737539440007, "grad_norm": 0.345703125, "learning_rate": 0.0016664924186980596, "loss": 0.2079, "step": 27916 }, { "epoch": 0.049500921559709886, "grad_norm": 0.376953125, "learning_rate": 0.0016664461683560522, "loss": 0.166, "step": 27918 }, { "epoch": 0.0495044677250197, "grad_norm": 0.451171875, "learning_rate": 0.0016663999155367376, "loss": 0.2034, "step": 27920 }, { "epoch": 0.04950801389032952, "grad_norm": 1.8046875, "learning_rate": 0.0016663536602403192, "loss": 0.2747, "step": 27922 }, { "epoch": 0.049511560055639336, "grad_norm": 0.67578125, "learning_rate": 0.0016663074024669989, "loss": 0.2248, "step": 27924 }, { "epoch": 0.04951510622094915, "grad_norm": 1.890625, "learning_rate": 0.001666261142216979, "loss": 0.3508, "step": 27926 }, { "epoch": 0.049518652386258966, "grad_norm": 0.55078125, "learning_rate": 0.0016662148794904614, "loss": 0.2417, "step": 27928 }, { "epoch": 0.04952219855156878, "grad_norm": 0.39453125, "learning_rate": 0.0016661686142876494, "loss": 0.1728, "step": 27930 }, { "epoch": 0.049525744716878595, "grad_norm": 9.625, "learning_rate": 0.001666122346608744, "loss": 0.2835, "step": 27932 }, { "epoch": 0.04952929088218841, "grad_norm": 1.265625, "learning_rate": 0.001666076076453949, "loss": 0.2367, "step": 27934 }, { "epoch": 0.049532837047498224, "grad_norm": 0.89453125, "learning_rate": 0.0016660298038234658, "loss": 0.4485, "step": 27936 }, { "epoch": 0.04953638321280804, "grad_norm": 0.91015625, "learning_rate": 0.0016659835287174969, "loss": 0.4221, "step": 27938 }, { "epoch": 0.04953992937811785, "grad_norm": 3.296875, "learning_rate": 0.0016659372511362448, "loss": 0.224, "step": 27940 }, { "epoch": 0.04954347554342767, "grad_norm": 0.5546875, "learning_rate": 0.001665890971079912, "loss": 0.2089, "step": 27942 }, { "epoch": 0.04954702170873748, "grad_norm": 0.50390625, "learning_rate": 0.0016658446885487008, "loss": 0.2687, "step": 27944 }, { "epoch": 0.049550567874047304, "grad_norm": 0.462890625, "learning_rate": 0.0016657984035428135, "loss": 0.1956, "step": 27946 }, { "epoch": 0.04955411403935712, "grad_norm": 2.921875, "learning_rate": 0.001665752116062453, "loss": 0.5581, "step": 27948 }, { "epoch": 0.04955766020466693, "grad_norm": 0.71875, "learning_rate": 0.0016657058261078212, "loss": 0.5145, "step": 27950 }, { "epoch": 0.04956120636997675, "grad_norm": 1.4609375, "learning_rate": 0.0016656595336791205, "loss": 0.2503, "step": 27952 }, { "epoch": 0.04956475253528656, "grad_norm": 0.4765625, "learning_rate": 0.0016656132387765537, "loss": 0.164, "step": 27954 }, { "epoch": 0.049568298700596376, "grad_norm": 0.73828125, "learning_rate": 0.001665566941400323, "loss": 0.2543, "step": 27956 }, { "epoch": 0.04957184486590619, "grad_norm": 0.61328125, "learning_rate": 0.0016655206415506312, "loss": 0.1681, "step": 27958 }, { "epoch": 0.049575391031216005, "grad_norm": 0.79296875, "learning_rate": 0.0016654743392276808, "loss": 0.2143, "step": 27960 }, { "epoch": 0.04957893719652582, "grad_norm": 1.0078125, "learning_rate": 0.0016654280344316737, "loss": 0.22, "step": 27962 }, { "epoch": 0.049582483361835634, "grad_norm": 0.5234375, "learning_rate": 0.0016653817271628133, "loss": 0.1959, "step": 27964 }, { "epoch": 0.04958602952714545, "grad_norm": 4.125, "learning_rate": 0.0016653354174213009, "loss": 0.2416, "step": 27966 }, { "epoch": 0.04958957569245527, "grad_norm": 0.90234375, "learning_rate": 0.0016652891052073404, "loss": 0.2009, "step": 27968 }, { "epoch": 0.049593121857765085, "grad_norm": 0.2578125, "learning_rate": 0.0016652427905211335, "loss": 0.1838, "step": 27970 }, { "epoch": 0.0495966680230749, "grad_norm": 0.6484375, "learning_rate": 0.001665196473362883, "loss": 0.2672, "step": 27972 }, { "epoch": 0.049600214188384714, "grad_norm": 0.41796875, "learning_rate": 0.0016651501537327915, "loss": 0.2306, "step": 27974 }, { "epoch": 0.04960376035369453, "grad_norm": 0.6484375, "learning_rate": 0.0016651038316310616, "loss": 0.2079, "step": 27976 }, { "epoch": 0.04960730651900434, "grad_norm": 2.0, "learning_rate": 0.0016650575070578952, "loss": 0.2249, "step": 27978 }, { "epoch": 0.04961085268431416, "grad_norm": 0.234375, "learning_rate": 0.0016650111800134962, "loss": 0.1921, "step": 27980 }, { "epoch": 0.04961439884962397, "grad_norm": 0.7734375, "learning_rate": 0.0016649648504980662, "loss": 0.2214, "step": 27982 }, { "epoch": 0.04961794501493379, "grad_norm": 0.447265625, "learning_rate": 0.0016649185185118082, "loss": 0.2072, "step": 27984 }, { "epoch": 0.0496214911802436, "grad_norm": 2.9375, "learning_rate": 0.0016648721840549245, "loss": 0.1715, "step": 27986 }, { "epoch": 0.049625037345553416, "grad_norm": 1.453125, "learning_rate": 0.001664825847127618, "loss": 0.1837, "step": 27988 }, { "epoch": 0.04962858351086324, "grad_norm": 1.1484375, "learning_rate": 0.0016647795077300918, "loss": 0.3115, "step": 27990 }, { "epoch": 0.04963212967617305, "grad_norm": 0.64453125, "learning_rate": 0.0016647331658625477, "loss": 0.1602, "step": 27992 }, { "epoch": 0.04963567584148287, "grad_norm": 0.5546875, "learning_rate": 0.0016646868215251889, "loss": 0.2095, "step": 27994 }, { "epoch": 0.04963922200679268, "grad_norm": 0.8359375, "learning_rate": 0.001664640474718218, "loss": 0.2153, "step": 27996 }, { "epoch": 0.049642768172102496, "grad_norm": 2.03125, "learning_rate": 0.0016645941254418376, "loss": 0.3239, "step": 27998 }, { "epoch": 0.04964631433741231, "grad_norm": 0.83203125, "learning_rate": 0.0016645477736962502, "loss": 0.1795, "step": 28000 }, { "epoch": 0.049649860502722125, "grad_norm": 5.375, "learning_rate": 0.001664501419481659, "loss": 0.2481, "step": 28002 }, { "epoch": 0.04965340666803194, "grad_norm": 2.171875, "learning_rate": 0.0016644550627982666, "loss": 0.4749, "step": 28004 }, { "epoch": 0.049656952833341754, "grad_norm": 0.283203125, "learning_rate": 0.0016644087036462754, "loss": 0.2166, "step": 28006 }, { "epoch": 0.04966049899865157, "grad_norm": 2.0625, "learning_rate": 0.0016643623420258885, "loss": 0.2171, "step": 28008 }, { "epoch": 0.04966404516396138, "grad_norm": 2.453125, "learning_rate": 0.0016643159779373084, "loss": 0.232, "step": 28010 }, { "epoch": 0.0496675913292712, "grad_norm": 1.234375, "learning_rate": 0.001664269611380738, "loss": 0.1775, "step": 28012 }, { "epoch": 0.04967113749458102, "grad_norm": 0.6796875, "learning_rate": 0.0016642232423563802, "loss": 0.2393, "step": 28014 }, { "epoch": 0.049674683659890834, "grad_norm": 0.53515625, "learning_rate": 0.0016641768708644377, "loss": 0.1786, "step": 28016 }, { "epoch": 0.04967822982520065, "grad_norm": 0.435546875, "learning_rate": 0.0016641304969051134, "loss": 0.3051, "step": 28018 }, { "epoch": 0.04968177599051046, "grad_norm": 1.0625, "learning_rate": 0.0016640841204786099, "loss": 0.2987, "step": 28020 }, { "epoch": 0.04968532215582028, "grad_norm": 3.140625, "learning_rate": 0.0016640377415851298, "loss": 0.3208, "step": 28022 }, { "epoch": 0.04968886832113009, "grad_norm": 0.625, "learning_rate": 0.0016639913602248767, "loss": 0.2423, "step": 28024 }, { "epoch": 0.049692414486439906, "grad_norm": 0.57421875, "learning_rate": 0.0016639449763980528, "loss": 0.2448, "step": 28026 }, { "epoch": 0.04969596065174972, "grad_norm": 0.275390625, "learning_rate": 0.001663898590104861, "loss": 0.2189, "step": 28028 }, { "epoch": 0.049699506817059536, "grad_norm": 0.72265625, "learning_rate": 0.0016638522013455047, "loss": 0.1954, "step": 28030 }, { "epoch": 0.04970305298236935, "grad_norm": 1.0078125, "learning_rate": 0.001663805810120186, "loss": 0.2716, "step": 28032 }, { "epoch": 0.049706599147679165, "grad_norm": 0.50390625, "learning_rate": 0.0016637594164291083, "loss": 0.2058, "step": 28034 }, { "epoch": 0.049710145312988986, "grad_norm": 2.859375, "learning_rate": 0.0016637130202724745, "loss": 0.1881, "step": 28036 }, { "epoch": 0.0497136914782988, "grad_norm": 0.92578125, "learning_rate": 0.0016636666216504877, "loss": 0.2078, "step": 28038 }, { "epoch": 0.049717237643608615, "grad_norm": 1.0390625, "learning_rate": 0.00166362022056335, "loss": 0.1782, "step": 28040 }, { "epoch": 0.04972078380891843, "grad_norm": 1.0703125, "learning_rate": 0.0016635738170112649, "loss": 0.3353, "step": 28042 }, { "epoch": 0.049724329974228244, "grad_norm": 0.67578125, "learning_rate": 0.0016635274109944352, "loss": 0.2159, "step": 28044 }, { "epoch": 0.04972787613953806, "grad_norm": 0.24609375, "learning_rate": 0.0016634810025130643, "loss": 0.1879, "step": 28046 }, { "epoch": 0.04973142230484787, "grad_norm": 0.337890625, "learning_rate": 0.0016634345915673547, "loss": 0.2126, "step": 28048 }, { "epoch": 0.04973496847015769, "grad_norm": 2.4375, "learning_rate": 0.0016633881781575094, "loss": 0.2666, "step": 28050 }, { "epoch": 0.0497385146354675, "grad_norm": 1.8203125, "learning_rate": 0.0016633417622837312, "loss": 0.2933, "step": 28052 }, { "epoch": 0.04974206080077732, "grad_norm": 0.416015625, "learning_rate": 0.0016632953439462237, "loss": 0.2003, "step": 28054 }, { "epoch": 0.04974560696608713, "grad_norm": 0.87109375, "learning_rate": 0.0016632489231451896, "loss": 0.2298, "step": 28056 }, { "epoch": 0.04974915313139695, "grad_norm": 1.2578125, "learning_rate": 0.0016632024998808317, "loss": 0.2036, "step": 28058 }, { "epoch": 0.04975269929670677, "grad_norm": 0.30859375, "learning_rate": 0.0016631560741533532, "loss": 0.1691, "step": 28060 }, { "epoch": 0.04975624546201658, "grad_norm": 1.9375, "learning_rate": 0.0016631096459629572, "loss": 0.3316, "step": 28062 }, { "epoch": 0.0497597916273264, "grad_norm": 0.6484375, "learning_rate": 0.0016630632153098468, "loss": 0.2299, "step": 28064 }, { "epoch": 0.04976333779263621, "grad_norm": 4.09375, "learning_rate": 0.001663016782194225, "loss": 0.2308, "step": 28066 }, { "epoch": 0.049766883957946026, "grad_norm": 0.423828125, "learning_rate": 0.0016629703466162949, "loss": 0.1887, "step": 28068 }, { "epoch": 0.04977043012325584, "grad_norm": 0.408203125, "learning_rate": 0.0016629239085762591, "loss": 0.1918, "step": 28070 }, { "epoch": 0.049773976288565655, "grad_norm": 0.60546875, "learning_rate": 0.0016628774680743216, "loss": 0.2079, "step": 28072 }, { "epoch": 0.04977752245387547, "grad_norm": 1.296875, "learning_rate": 0.0016628310251106846, "loss": 0.1874, "step": 28074 }, { "epoch": 0.049781068619185284, "grad_norm": 0.310546875, "learning_rate": 0.0016627845796855518, "loss": 0.2035, "step": 28076 }, { "epoch": 0.0497846147844951, "grad_norm": 1.328125, "learning_rate": 0.001662738131799126, "loss": 0.2102, "step": 28078 }, { "epoch": 0.04978816094980491, "grad_norm": 1.0234375, "learning_rate": 0.0016626916814516108, "loss": 0.2039, "step": 28080 }, { "epoch": 0.049791707115114735, "grad_norm": 1.53125, "learning_rate": 0.0016626452286432089, "loss": 0.2385, "step": 28082 }, { "epoch": 0.04979525328042455, "grad_norm": 1.4921875, "learning_rate": 0.0016625987733741234, "loss": 0.2306, "step": 28084 }, { "epoch": 0.049798799445734364, "grad_norm": 0.40234375, "learning_rate": 0.001662552315644558, "loss": 0.1806, "step": 28086 }, { "epoch": 0.04980234561104418, "grad_norm": 0.74609375, "learning_rate": 0.0016625058554547152, "loss": 0.2605, "step": 28088 }, { "epoch": 0.04980589177635399, "grad_norm": 0.984375, "learning_rate": 0.001662459392804799, "loss": 0.1683, "step": 28090 }, { "epoch": 0.04980943794166381, "grad_norm": 0.38671875, "learning_rate": 0.0016624129276950118, "loss": 0.1945, "step": 28092 }, { "epoch": 0.04981298410697362, "grad_norm": 1.7109375, "learning_rate": 0.0016623664601255572, "loss": 0.2158, "step": 28094 }, { "epoch": 0.04981653027228344, "grad_norm": 0.515625, "learning_rate": 0.0016623199900966381, "loss": 0.2156, "step": 28096 }, { "epoch": 0.04982007643759325, "grad_norm": 0.83984375, "learning_rate": 0.0016622735176084582, "loss": 0.175, "step": 28098 }, { "epoch": 0.049823622602903066, "grad_norm": 1.515625, "learning_rate": 0.0016622270426612205, "loss": 0.2303, "step": 28100 }, { "epoch": 0.04982716876821288, "grad_norm": 0.71875, "learning_rate": 0.0016621805652551288, "loss": 0.2442, "step": 28102 }, { "epoch": 0.0498307149335227, "grad_norm": 0.62109375, "learning_rate": 0.0016621340853903853, "loss": 0.249, "step": 28104 }, { "epoch": 0.049834261098832516, "grad_norm": 0.59765625, "learning_rate": 0.0016620876030671939, "loss": 0.2068, "step": 28106 }, { "epoch": 0.04983780726414233, "grad_norm": 0.4140625, "learning_rate": 0.0016620411182857579, "loss": 0.2047, "step": 28108 }, { "epoch": 0.049841353429452145, "grad_norm": 0.90234375, "learning_rate": 0.0016619946310462805, "loss": 0.237, "step": 28110 }, { "epoch": 0.04984489959476196, "grad_norm": 0.59375, "learning_rate": 0.001661948141348965, "loss": 0.1792, "step": 28112 }, { "epoch": 0.049848445760071775, "grad_norm": 0.8828125, "learning_rate": 0.001661901649194015, "loss": 0.2811, "step": 28114 }, { "epoch": 0.04985199192538159, "grad_norm": 1.5859375, "learning_rate": 0.0016618551545816334, "loss": 0.2571, "step": 28116 }, { "epoch": 0.049855538090691404, "grad_norm": 1.78125, "learning_rate": 0.0016618086575120234, "loss": 0.2839, "step": 28118 }, { "epoch": 0.04985908425600122, "grad_norm": 0.66015625, "learning_rate": 0.0016617621579853893, "loss": 0.1873, "step": 28120 }, { "epoch": 0.04986263042131103, "grad_norm": 0.984375, "learning_rate": 0.0016617156560019334, "loss": 0.3298, "step": 28122 }, { "epoch": 0.04986617658662085, "grad_norm": 2.34375, "learning_rate": 0.0016616691515618594, "loss": 0.1956, "step": 28124 }, { "epoch": 0.04986972275193067, "grad_norm": 0.228515625, "learning_rate": 0.001661622644665371, "loss": 0.2449, "step": 28126 }, { "epoch": 0.04987326891724048, "grad_norm": 0.423828125, "learning_rate": 0.0016615761353126716, "loss": 0.1842, "step": 28128 }, { "epoch": 0.0498768150825503, "grad_norm": 0.40625, "learning_rate": 0.0016615296235039639, "loss": 0.2034, "step": 28130 }, { "epoch": 0.04988036124786011, "grad_norm": 0.66796875, "learning_rate": 0.001661483109239452, "loss": 0.1687, "step": 28132 }, { "epoch": 0.04988390741316993, "grad_norm": 0.39453125, "learning_rate": 0.0016614365925193392, "loss": 0.1741, "step": 28134 }, { "epoch": 0.04988745357847974, "grad_norm": 5.09375, "learning_rate": 0.001661390073343829, "loss": 0.3037, "step": 28136 }, { "epoch": 0.049890999743789556, "grad_norm": 0.392578125, "learning_rate": 0.0016613435517131244, "loss": 0.1983, "step": 28138 }, { "epoch": 0.04989454590909937, "grad_norm": 0.56640625, "learning_rate": 0.0016612970276274296, "loss": 0.2035, "step": 28140 }, { "epoch": 0.049898092074409185, "grad_norm": 1.421875, "learning_rate": 0.0016612505010869473, "loss": 0.2186, "step": 28142 }, { "epoch": 0.049901638239719, "grad_norm": 0.6875, "learning_rate": 0.0016612039720918812, "loss": 0.2084, "step": 28144 }, { "epoch": 0.049905184405028814, "grad_norm": 0.251953125, "learning_rate": 0.001661157440642435, "loss": 0.1901, "step": 28146 }, { "epoch": 0.04990873057033863, "grad_norm": 4.53125, "learning_rate": 0.0016611109067388124, "loss": 0.391, "step": 28148 }, { "epoch": 0.04991227673564845, "grad_norm": 0.40625, "learning_rate": 0.0016610643703812162, "loss": 0.1475, "step": 28150 }, { "epoch": 0.049915822900958265, "grad_norm": 0.470703125, "learning_rate": 0.001661017831569851, "loss": 0.228, "step": 28152 }, { "epoch": 0.04991936906626808, "grad_norm": 0.353515625, "learning_rate": 0.001660971290304919, "loss": 0.2312, "step": 28154 }, { "epoch": 0.049922915231577894, "grad_norm": 0.3671875, "learning_rate": 0.0016609247465866248, "loss": 0.1747, "step": 28156 }, { "epoch": 0.04992646139688771, "grad_norm": 0.3203125, "learning_rate": 0.0016608782004151714, "loss": 0.1633, "step": 28158 }, { "epoch": 0.04993000756219752, "grad_norm": 0.7890625, "learning_rate": 0.0016608316517907628, "loss": 0.2399, "step": 28160 }, { "epoch": 0.04993355372750734, "grad_norm": 1.78125, "learning_rate": 0.001660785100713602, "loss": 0.386, "step": 28162 }, { "epoch": 0.04993709989281715, "grad_norm": 3.421875, "learning_rate": 0.0016607385471838932, "loss": 0.3376, "step": 28164 }, { "epoch": 0.04994064605812697, "grad_norm": 0.4296875, "learning_rate": 0.0016606919912018397, "loss": 0.197, "step": 28166 }, { "epoch": 0.04994419222343678, "grad_norm": 0.474609375, "learning_rate": 0.0016606454327676447, "loss": 0.2007, "step": 28168 }, { "epoch": 0.049947738388746596, "grad_norm": 0.458984375, "learning_rate": 0.0016605988718815126, "loss": 0.2016, "step": 28170 }, { "epoch": 0.04995128455405642, "grad_norm": 0.455078125, "learning_rate": 0.0016605523085436466, "loss": 0.2106, "step": 28172 }, { "epoch": 0.04995483071936623, "grad_norm": 2.390625, "learning_rate": 0.0016605057427542505, "loss": 0.3039, "step": 28174 }, { "epoch": 0.049958376884676046, "grad_norm": 0.48046875, "learning_rate": 0.0016604591745135277, "loss": 0.1948, "step": 28176 }, { "epoch": 0.04996192304998586, "grad_norm": 0.423828125, "learning_rate": 0.0016604126038216823, "loss": 0.1596, "step": 28178 }, { "epoch": 0.049965469215295676, "grad_norm": 0.72265625, "learning_rate": 0.0016603660306789174, "loss": 0.2016, "step": 28180 }, { "epoch": 0.04996901538060549, "grad_norm": 0.353515625, "learning_rate": 0.001660319455085437, "loss": 0.1762, "step": 28182 }, { "epoch": 0.049972561545915305, "grad_norm": 3.984375, "learning_rate": 0.0016602728770414449, "loss": 0.284, "step": 28184 }, { "epoch": 0.04997610771122512, "grad_norm": 0.83984375, "learning_rate": 0.0016602262965471448, "loss": 0.2623, "step": 28186 }, { "epoch": 0.049979653876534934, "grad_norm": 0.73828125, "learning_rate": 0.0016601797136027402, "loss": 0.1973, "step": 28188 }, { "epoch": 0.04998320004184475, "grad_norm": 0.53515625, "learning_rate": 0.0016601331282084348, "loss": 0.1799, "step": 28190 }, { "epoch": 0.04998674620715456, "grad_norm": 0.96484375, "learning_rate": 0.0016600865403644325, "loss": 0.2871, "step": 28192 }, { "epoch": 0.049990292372464384, "grad_norm": 0.275390625, "learning_rate": 0.0016600399500709372, "loss": 0.1651, "step": 28194 }, { "epoch": 0.0499938385377742, "grad_norm": 0.6640625, "learning_rate": 0.0016599933573281521, "loss": 0.2835, "step": 28196 }, { "epoch": 0.049997384703084014, "grad_norm": 0.6015625, "learning_rate": 0.0016599467621362814, "loss": 0.1804, "step": 28198 }, { "epoch": 0.05000093086839383, "grad_norm": 0.3515625, "learning_rate": 0.001659900164495529, "loss": 0.2538, "step": 28200 }, { "epoch": 0.05000447703370364, "grad_norm": 0.42578125, "learning_rate": 0.0016598535644060987, "loss": 0.2363, "step": 28202 }, { "epoch": 0.05000802319901346, "grad_norm": 0.345703125, "learning_rate": 0.0016598069618681938, "loss": 0.2018, "step": 28204 }, { "epoch": 0.05001156936432327, "grad_norm": 2.265625, "learning_rate": 0.0016597603568820182, "loss": 0.2003, "step": 28206 }, { "epoch": 0.050015115529633086, "grad_norm": 0.6640625, "learning_rate": 0.0016597137494477764, "loss": 0.189, "step": 28208 }, { "epoch": 0.0500186616949429, "grad_norm": 1.1171875, "learning_rate": 0.0016596671395656718, "loss": 0.276, "step": 28210 }, { "epoch": 0.050022207860252715, "grad_norm": 0.58203125, "learning_rate": 0.0016596205272359082, "loss": 0.2019, "step": 28212 }, { "epoch": 0.05002575402556253, "grad_norm": 0.84765625, "learning_rate": 0.001659573912458689, "loss": 0.2806, "step": 28214 }, { "epoch": 0.050029300190872344, "grad_norm": 0.34375, "learning_rate": 0.0016595272952342189, "loss": 0.1727, "step": 28216 }, { "epoch": 0.050032846356182166, "grad_norm": 0.55078125, "learning_rate": 0.0016594806755627013, "loss": 0.178, "step": 28218 }, { "epoch": 0.05003639252149198, "grad_norm": 0.93359375, "learning_rate": 0.0016594340534443403, "loss": 0.2368, "step": 28220 }, { "epoch": 0.050039938686801795, "grad_norm": 0.310546875, "learning_rate": 0.0016593874288793398, "loss": 0.1954, "step": 28222 }, { "epoch": 0.05004348485211161, "grad_norm": 0.59375, "learning_rate": 0.0016593408018679035, "loss": 0.1724, "step": 28224 }, { "epoch": 0.050047031017421424, "grad_norm": 4.8125, "learning_rate": 0.0016592941724102356, "loss": 0.2349, "step": 28226 }, { "epoch": 0.05005057718273124, "grad_norm": 0.62109375, "learning_rate": 0.0016592475405065397, "loss": 0.196, "step": 28228 }, { "epoch": 0.05005412334804105, "grad_norm": 0.31640625, "learning_rate": 0.0016592009061570199, "loss": 0.1943, "step": 28230 }, { "epoch": 0.05005766951335087, "grad_norm": 0.5078125, "learning_rate": 0.0016591542693618802, "loss": 0.1971, "step": 28232 }, { "epoch": 0.05006121567866068, "grad_norm": 0.388671875, "learning_rate": 0.0016591076301213246, "loss": 0.2035, "step": 28234 }, { "epoch": 0.0500647618439705, "grad_norm": 1.1484375, "learning_rate": 0.0016590609884355569, "loss": 0.2031, "step": 28236 }, { "epoch": 0.05006830800928031, "grad_norm": 0.45703125, "learning_rate": 0.0016590143443047812, "loss": 0.1875, "step": 28238 }, { "epoch": 0.05007185417459013, "grad_norm": 0.8671875, "learning_rate": 0.0016589676977292011, "loss": 0.2142, "step": 28240 }, { "epoch": 0.05007540033989995, "grad_norm": 0.408203125, "learning_rate": 0.001658921048709022, "loss": 0.1999, "step": 28242 }, { "epoch": 0.05007894650520976, "grad_norm": 0.486328125, "learning_rate": 0.001658874397244446, "loss": 0.5444, "step": 28244 }, { "epoch": 0.05008249267051958, "grad_norm": 0.765625, "learning_rate": 0.0016588277433356783, "loss": 0.1623, "step": 28246 }, { "epoch": 0.05008603883582939, "grad_norm": 0.435546875, "learning_rate": 0.0016587810869829226, "loss": 0.2599, "step": 28248 }, { "epoch": 0.050089585001139206, "grad_norm": 0.65234375, "learning_rate": 0.001658734428186383, "loss": 0.1847, "step": 28250 }, { "epoch": 0.05009313116644902, "grad_norm": 0.2333984375, "learning_rate": 0.0016586877669462635, "loss": 0.2075, "step": 28252 }, { "epoch": 0.050096677331758835, "grad_norm": 0.216796875, "learning_rate": 0.001658641103262768, "loss": 0.1435, "step": 28254 }, { "epoch": 0.05010022349706865, "grad_norm": 0.64453125, "learning_rate": 0.0016585944371361012, "loss": 0.3204, "step": 28256 }, { "epoch": 0.050103769662378464, "grad_norm": 0.62109375, "learning_rate": 0.001658547768566467, "loss": 0.4185, "step": 28258 }, { "epoch": 0.05010731582768828, "grad_norm": 0.357421875, "learning_rate": 0.0016585010975540691, "loss": 0.1776, "step": 28260 }, { "epoch": 0.0501108619929981, "grad_norm": 0.56640625, "learning_rate": 0.0016584544240991117, "loss": 0.2571, "step": 28262 }, { "epoch": 0.050114408158307915, "grad_norm": 0.62109375, "learning_rate": 0.0016584077482017989, "loss": 0.2108, "step": 28264 }, { "epoch": 0.05011795432361773, "grad_norm": 3.015625, "learning_rate": 0.0016583610698623354, "loss": 0.3322, "step": 28266 }, { "epoch": 0.050121500488927544, "grad_norm": 0.2138671875, "learning_rate": 0.0016583143890809245, "loss": 0.2296, "step": 28268 }, { "epoch": 0.05012504665423736, "grad_norm": 0.2490234375, "learning_rate": 0.0016582677058577708, "loss": 0.1322, "step": 28270 }, { "epoch": 0.05012859281954717, "grad_norm": 1.1875, "learning_rate": 0.0016582210201930789, "loss": 0.2008, "step": 28272 }, { "epoch": 0.05013213898485699, "grad_norm": 0.45703125, "learning_rate": 0.0016581743320870522, "loss": 0.1876, "step": 28274 }, { "epoch": 0.0501356851501668, "grad_norm": 0.984375, "learning_rate": 0.001658127641539895, "loss": 0.238, "step": 28276 }, { "epoch": 0.050139231315476616, "grad_norm": 0.66796875, "learning_rate": 0.0016580809485518123, "loss": 0.1863, "step": 28278 }, { "epoch": 0.05014277748078643, "grad_norm": 0.67578125, "learning_rate": 0.0016580342531230068, "loss": 0.2175, "step": 28280 }, { "epoch": 0.050146323646096246, "grad_norm": 1.0625, "learning_rate": 0.0016579875552536843, "loss": 0.1796, "step": 28282 }, { "epoch": 0.05014986981140606, "grad_norm": 1.8203125, "learning_rate": 0.001657940854944048, "loss": 0.3023, "step": 28284 }, { "epoch": 0.05015341597671588, "grad_norm": 0.3671875, "learning_rate": 0.0016578941521943028, "loss": 0.2615, "step": 28286 }, { "epoch": 0.050156962142025696, "grad_norm": 2.15625, "learning_rate": 0.0016578474470046525, "loss": 0.2042, "step": 28288 }, { "epoch": 0.05016050830733551, "grad_norm": 0.62890625, "learning_rate": 0.0016578007393753015, "loss": 0.178, "step": 28290 }, { "epoch": 0.050164054472645325, "grad_norm": 1.0234375, "learning_rate": 0.001657754029306454, "loss": 0.4545, "step": 28292 }, { "epoch": 0.05016760063795514, "grad_norm": 0.921875, "learning_rate": 0.0016577073167983146, "loss": 0.2084, "step": 28294 }, { "epoch": 0.050171146803264954, "grad_norm": 2.140625, "learning_rate": 0.001657660601851087, "loss": 0.2231, "step": 28296 }, { "epoch": 0.05017469296857477, "grad_norm": 6.46875, "learning_rate": 0.001657613884464976, "loss": 0.2285, "step": 28298 }, { "epoch": 0.05017823913388458, "grad_norm": 1.375, "learning_rate": 0.001657567164640186, "loss": 0.2802, "step": 28300 }, { "epoch": 0.0501817852991944, "grad_norm": 0.7578125, "learning_rate": 0.0016575204423769205, "loss": 0.2527, "step": 28302 }, { "epoch": 0.05018533146450421, "grad_norm": 1.0234375, "learning_rate": 0.0016574737176753849, "loss": 0.2037, "step": 28304 }, { "epoch": 0.05018887762981403, "grad_norm": 0.3046875, "learning_rate": 0.001657426990535783, "loss": 0.1911, "step": 28306 }, { "epoch": 0.05019242379512385, "grad_norm": 0.59765625, "learning_rate": 0.0016573802609583196, "loss": 0.2783, "step": 28308 }, { "epoch": 0.05019596996043366, "grad_norm": 0.423828125, "learning_rate": 0.001657333528943198, "loss": 0.2279, "step": 28310 }, { "epoch": 0.05019951612574348, "grad_norm": 1.2109375, "learning_rate": 0.0016572867944906241, "loss": 0.2399, "step": 28312 }, { "epoch": 0.05020306229105329, "grad_norm": 2.921875, "learning_rate": 0.0016572400576008004, "loss": 0.3888, "step": 28314 }, { "epoch": 0.05020660845636311, "grad_norm": 0.23828125, "learning_rate": 0.0016571933182739332, "loss": 0.1937, "step": 28316 }, { "epoch": 0.05021015462167292, "grad_norm": 0.42578125, "learning_rate": 0.0016571465765102258, "loss": 0.2018, "step": 28318 }, { "epoch": 0.050213700786982736, "grad_norm": 1.4296875, "learning_rate": 0.0016570998323098831, "loss": 0.2899, "step": 28320 }, { "epoch": 0.05021724695229255, "grad_norm": 0.671875, "learning_rate": 0.0016570530856731091, "loss": 0.2695, "step": 28322 }, { "epoch": 0.050220793117602365, "grad_norm": 1.8515625, "learning_rate": 0.0016570063366001086, "loss": 0.198, "step": 28324 }, { "epoch": 0.05022433928291218, "grad_norm": 0.4296875, "learning_rate": 0.0016569595850910862, "loss": 0.2423, "step": 28326 }, { "epoch": 0.050227885448221994, "grad_norm": 0.58203125, "learning_rate": 0.0016569128311462455, "loss": 0.2095, "step": 28328 }, { "epoch": 0.050231431613531816, "grad_norm": 3.15625, "learning_rate": 0.0016568660747657918, "loss": 0.3738, "step": 28330 }, { "epoch": 0.05023497777884163, "grad_norm": 0.271484375, "learning_rate": 0.0016568193159499295, "loss": 0.1645, "step": 28332 }, { "epoch": 0.050238523944151445, "grad_norm": 0.61328125, "learning_rate": 0.001656772554698863, "loss": 0.1884, "step": 28334 }, { "epoch": 0.05024207010946126, "grad_norm": 1.8515625, "learning_rate": 0.0016567257910127967, "loss": 0.2278, "step": 28336 }, { "epoch": 0.050245616274771074, "grad_norm": 0.66015625, "learning_rate": 0.001656679024891935, "loss": 0.1277, "step": 28338 }, { "epoch": 0.05024916244008089, "grad_norm": 0.85546875, "learning_rate": 0.0016566322563364828, "loss": 0.1521, "step": 28340 }, { "epoch": 0.0502527086053907, "grad_norm": 0.671875, "learning_rate": 0.0016565854853466443, "loss": 0.2182, "step": 28342 }, { "epoch": 0.05025625477070052, "grad_norm": 0.6640625, "learning_rate": 0.0016565387119226243, "loss": 0.3202, "step": 28344 }, { "epoch": 0.05025980093601033, "grad_norm": 1.25, "learning_rate": 0.001656491936064627, "loss": 0.4765, "step": 28346 }, { "epoch": 0.05026334710132015, "grad_norm": 1.4921875, "learning_rate": 0.0016564451577728574, "loss": 0.2112, "step": 28348 }, { "epoch": 0.05026689326662996, "grad_norm": 2.1875, "learning_rate": 0.0016563983770475198, "loss": 0.1927, "step": 28350 }, { "epoch": 0.050270439431939776, "grad_norm": 0.478515625, "learning_rate": 0.0016563515938888191, "loss": 0.2089, "step": 28352 }, { "epoch": 0.0502739855972496, "grad_norm": 1.078125, "learning_rate": 0.0016563048082969597, "loss": 0.2637, "step": 28354 }, { "epoch": 0.05027753176255941, "grad_norm": 1.2734375, "learning_rate": 0.0016562580202721462, "loss": 0.2384, "step": 28356 }, { "epoch": 0.050281077927869226, "grad_norm": 0.51171875, "learning_rate": 0.001656211229814583, "loss": 0.2029, "step": 28358 }, { "epoch": 0.05028462409317904, "grad_norm": 0.515625, "learning_rate": 0.001656164436924475, "loss": 0.1587, "step": 28360 }, { "epoch": 0.050288170258488855, "grad_norm": 0.58984375, "learning_rate": 0.0016561176416020269, "loss": 0.1497, "step": 28362 }, { "epoch": 0.05029171642379867, "grad_norm": 1.0078125, "learning_rate": 0.0016560708438474435, "loss": 0.2094, "step": 28364 }, { "epoch": 0.050295262589108485, "grad_norm": 0.44140625, "learning_rate": 0.001656024043660929, "loss": 0.1797, "step": 28366 }, { "epoch": 0.0502988087544183, "grad_norm": 2.546875, "learning_rate": 0.0016559772410426883, "loss": 0.2939, "step": 28368 }, { "epoch": 0.050302354919728114, "grad_norm": 2.71875, "learning_rate": 0.0016559304359929261, "loss": 0.3236, "step": 28370 }, { "epoch": 0.05030590108503793, "grad_norm": 0.2470703125, "learning_rate": 0.0016558836285118473, "loss": 0.313, "step": 28372 }, { "epoch": 0.05030944725034774, "grad_norm": 1.0546875, "learning_rate": 0.0016558368185996559, "loss": 0.2158, "step": 28374 }, { "epoch": 0.050312993415657564, "grad_norm": 0.921875, "learning_rate": 0.0016557900062565576, "loss": 0.2823, "step": 28376 }, { "epoch": 0.05031653958096738, "grad_norm": 0.447265625, "learning_rate": 0.0016557431914827566, "loss": 0.1624, "step": 28378 }, { "epoch": 0.05032008574627719, "grad_norm": 0.330078125, "learning_rate": 0.0016556963742784577, "loss": 0.1649, "step": 28380 }, { "epoch": 0.05032363191158701, "grad_norm": 0.59375, "learning_rate": 0.0016556495546438653, "loss": 0.2618, "step": 28382 }, { "epoch": 0.05032717807689682, "grad_norm": 1.921875, "learning_rate": 0.001655602732579185, "loss": 0.3033, "step": 28384 }, { "epoch": 0.05033072424220664, "grad_norm": 0.275390625, "learning_rate": 0.0016555559080846207, "loss": 0.1851, "step": 28386 }, { "epoch": 0.05033427040751645, "grad_norm": 0.49609375, "learning_rate": 0.0016555090811603778, "loss": 0.2547, "step": 28388 }, { "epoch": 0.050337816572826266, "grad_norm": 1.21875, "learning_rate": 0.0016554622518066607, "loss": 0.2015, "step": 28390 }, { "epoch": 0.05034136273813608, "grad_norm": 0.28125, "learning_rate": 0.0016554154200236744, "loss": 0.1935, "step": 28392 }, { "epoch": 0.050344908903445895, "grad_norm": 0.486328125, "learning_rate": 0.0016553685858116235, "loss": 0.2092, "step": 28394 }, { "epoch": 0.05034845506875571, "grad_norm": 0.86328125, "learning_rate": 0.0016553217491707133, "loss": 0.2217, "step": 28396 }, { "epoch": 0.05035200123406553, "grad_norm": 0.4296875, "learning_rate": 0.0016552749101011483, "loss": 0.2027, "step": 28398 }, { "epoch": 0.050355547399375346, "grad_norm": 0.439453125, "learning_rate": 0.0016552280686031333, "loss": 0.214, "step": 28400 }, { "epoch": 0.05035909356468516, "grad_norm": 0.68359375, "learning_rate": 0.0016551812246768734, "loss": 0.1738, "step": 28402 }, { "epoch": 0.050362639729994975, "grad_norm": 0.671875, "learning_rate": 0.0016551343783225729, "loss": 0.1951, "step": 28404 }, { "epoch": 0.05036618589530479, "grad_norm": 0.984375, "learning_rate": 0.0016550875295404377, "loss": 0.2178, "step": 28406 }, { "epoch": 0.050369732060614604, "grad_norm": 0.9765625, "learning_rate": 0.0016550406783306716, "loss": 0.3385, "step": 28408 }, { "epoch": 0.05037327822592442, "grad_norm": 0.54296875, "learning_rate": 0.0016549938246934802, "loss": 0.2505, "step": 28410 }, { "epoch": 0.05037682439123423, "grad_norm": 1.4375, "learning_rate": 0.001654946968629068, "loss": 0.2925, "step": 28412 }, { "epoch": 0.05038037055654405, "grad_norm": 0.421875, "learning_rate": 0.0016549001101376405, "loss": 0.2381, "step": 28414 }, { "epoch": 0.05038391672185386, "grad_norm": 0.388671875, "learning_rate": 0.0016548532492194018, "loss": 0.3066, "step": 28416 }, { "epoch": 0.05038746288716368, "grad_norm": 0.349609375, "learning_rate": 0.0016548063858745576, "loss": 0.1539, "step": 28418 }, { "epoch": 0.05039100905247349, "grad_norm": 1.2578125, "learning_rate": 0.0016547595201033122, "loss": 0.2224, "step": 28420 }, { "epoch": 0.05039455521778331, "grad_norm": 0.44921875, "learning_rate": 0.001654712651905871, "loss": 0.1616, "step": 28422 }, { "epoch": 0.05039810138309313, "grad_norm": 0.38671875, "learning_rate": 0.001654665781282439, "loss": 0.2092, "step": 28424 }, { "epoch": 0.05040164754840294, "grad_norm": 1.1015625, "learning_rate": 0.0016546189082332211, "loss": 0.2487, "step": 28426 }, { "epoch": 0.050405193713712756, "grad_norm": 0.703125, "learning_rate": 0.001654572032758422, "loss": 0.3997, "step": 28428 }, { "epoch": 0.05040873987902257, "grad_norm": 0.8125, "learning_rate": 0.0016545251548582471, "loss": 0.1872, "step": 28430 }, { "epoch": 0.050412286044332386, "grad_norm": 0.427734375, "learning_rate": 0.0016544782745329014, "loss": 0.2251, "step": 28432 }, { "epoch": 0.0504158322096422, "grad_norm": 0.375, "learning_rate": 0.00165443139178259, "loss": 0.1772, "step": 28434 }, { "epoch": 0.050419378374952015, "grad_norm": 0.91796875, "learning_rate": 0.001654384506607517, "loss": 0.2359, "step": 28436 }, { "epoch": 0.05042292454026183, "grad_norm": 1.40625, "learning_rate": 0.001654337619007889, "loss": 0.211, "step": 28438 }, { "epoch": 0.050426470705571644, "grad_norm": 0.57421875, "learning_rate": 0.0016542907289839098, "loss": 0.3137, "step": 28440 }, { "epoch": 0.05043001687088146, "grad_norm": 0.54296875, "learning_rate": 0.0016542438365357852, "loss": 0.2254, "step": 28442 }, { "epoch": 0.05043356303619128, "grad_norm": 0.5546875, "learning_rate": 0.0016541969416637198, "loss": 0.231, "step": 28444 }, { "epoch": 0.050437109201501094, "grad_norm": 0.7890625, "learning_rate": 0.001654150044367919, "loss": 0.346, "step": 28446 }, { "epoch": 0.05044065536681091, "grad_norm": 0.42578125, "learning_rate": 0.0016541031446485877, "loss": 0.2195, "step": 28448 }, { "epoch": 0.050444201532120723, "grad_norm": 0.33203125, "learning_rate": 0.001654056242505931, "loss": 0.224, "step": 28450 }, { "epoch": 0.05044774769743054, "grad_norm": 2.078125, "learning_rate": 0.0016540093379401543, "loss": 0.189, "step": 28452 }, { "epoch": 0.05045129386274035, "grad_norm": 1.5234375, "learning_rate": 0.0016539624309514626, "loss": 0.323, "step": 28454 }, { "epoch": 0.05045484002805017, "grad_norm": 0.80078125, "learning_rate": 0.001653915521540061, "loss": 0.2229, "step": 28456 }, { "epoch": 0.05045838619335998, "grad_norm": 0.408203125, "learning_rate": 0.0016538686097061547, "loss": 0.2165, "step": 28458 }, { "epoch": 0.050461932358669796, "grad_norm": 0.6640625, "learning_rate": 0.0016538216954499485, "loss": 0.2192, "step": 28460 }, { "epoch": 0.05046547852397961, "grad_norm": 0.55078125, "learning_rate": 0.0016537747787716483, "loss": 0.255, "step": 28462 }, { "epoch": 0.050469024689289425, "grad_norm": 0.80859375, "learning_rate": 0.0016537278596714584, "loss": 0.223, "step": 28464 }, { "epoch": 0.05047257085459925, "grad_norm": 0.5859375, "learning_rate": 0.001653680938149585, "loss": 0.1777, "step": 28466 }, { "epoch": 0.05047611701990906, "grad_norm": 0.375, "learning_rate": 0.0016536340142062325, "loss": 0.1761, "step": 28468 }, { "epoch": 0.050479663185218876, "grad_norm": 0.79296875, "learning_rate": 0.0016535870878416066, "loss": 0.2346, "step": 28470 }, { "epoch": 0.05048320935052869, "grad_norm": 3.796875, "learning_rate": 0.001653540159055912, "loss": 0.2976, "step": 28472 }, { "epoch": 0.050486755515838505, "grad_norm": 0.71484375, "learning_rate": 0.0016534932278493549, "loss": 0.2349, "step": 28474 }, { "epoch": 0.05049030168114832, "grad_norm": 0.8046875, "learning_rate": 0.0016534462942221393, "loss": 0.1838, "step": 28476 }, { "epoch": 0.050493847846458134, "grad_norm": 0.625, "learning_rate": 0.0016533993581744715, "loss": 0.5133, "step": 28478 }, { "epoch": 0.05049739401176795, "grad_norm": 1.6953125, "learning_rate": 0.001653352419706556, "loss": 0.2398, "step": 28480 }, { "epoch": 0.05050094017707776, "grad_norm": 0.447265625, "learning_rate": 0.0016533054788185986, "loss": 0.2115, "step": 28482 }, { "epoch": 0.05050448634238758, "grad_norm": 0.7421875, "learning_rate": 0.0016532585355108047, "loss": 0.2186, "step": 28484 }, { "epoch": 0.05050803250769739, "grad_norm": 0.46484375, "learning_rate": 0.001653211589783379, "loss": 0.1774, "step": 28486 }, { "epoch": 0.05051157867300721, "grad_norm": 0.47265625, "learning_rate": 0.0016531646416365275, "loss": 0.1943, "step": 28488 }, { "epoch": 0.05051512483831703, "grad_norm": 10.125, "learning_rate": 0.001653117691070455, "loss": 0.3016, "step": 28490 }, { "epoch": 0.05051867100362684, "grad_norm": 0.462890625, "learning_rate": 0.0016530707380853667, "loss": 0.26, "step": 28492 }, { "epoch": 0.05052221716893666, "grad_norm": 0.392578125, "learning_rate": 0.0016530237826814685, "loss": 0.1705, "step": 28494 }, { "epoch": 0.05052576333424647, "grad_norm": 0.6796875, "learning_rate": 0.0016529768248589656, "loss": 0.4452, "step": 28496 }, { "epoch": 0.05052930949955629, "grad_norm": 1.9140625, "learning_rate": 0.0016529298646180634, "loss": 0.1802, "step": 28498 }, { "epoch": 0.0505328556648661, "grad_norm": 0.30859375, "learning_rate": 0.0016528829019589665, "loss": 0.1679, "step": 28500 }, { "epoch": 0.050536401830175916, "grad_norm": 0.2578125, "learning_rate": 0.0016528359368818816, "loss": 0.3359, "step": 28502 }, { "epoch": 0.05053994799548573, "grad_norm": 0.62109375, "learning_rate": 0.001652788969387013, "loss": 0.2422, "step": 28504 }, { "epoch": 0.050543494160795545, "grad_norm": 0.197265625, "learning_rate": 0.0016527419994745668, "loss": 0.1634, "step": 28506 }, { "epoch": 0.05054704032610536, "grad_norm": 0.369140625, "learning_rate": 0.0016526950271447483, "loss": 0.1769, "step": 28508 }, { "epoch": 0.050550586491415174, "grad_norm": 0.21484375, "learning_rate": 0.0016526480523977622, "loss": 0.2307, "step": 28510 }, { "epoch": 0.050554132656724995, "grad_norm": 0.921875, "learning_rate": 0.0016526010752338151, "loss": 0.2101, "step": 28512 }, { "epoch": 0.05055767882203481, "grad_norm": 1.8046875, "learning_rate": 0.0016525540956531118, "loss": 0.1986, "step": 28514 }, { "epoch": 0.050561224987344625, "grad_norm": 0.390625, "learning_rate": 0.001652507113655858, "loss": 0.2451, "step": 28516 }, { "epoch": 0.05056477115265444, "grad_norm": 1.4375, "learning_rate": 0.0016524601292422587, "loss": 0.2841, "step": 28518 }, { "epoch": 0.050568317317964254, "grad_norm": 0.3828125, "learning_rate": 0.0016524131424125199, "loss": 0.4561, "step": 28520 }, { "epoch": 0.05057186348327407, "grad_norm": 0.58984375, "learning_rate": 0.0016523661531668468, "loss": 0.462, "step": 28522 }, { "epoch": 0.05057540964858388, "grad_norm": 0.8828125, "learning_rate": 0.0016523191615054448, "loss": 0.1718, "step": 28524 }, { "epoch": 0.0505789558138937, "grad_norm": 0.6640625, "learning_rate": 0.00165227216742852, "loss": 0.2612, "step": 28526 }, { "epoch": 0.05058250197920351, "grad_norm": 0.408203125, "learning_rate": 0.0016522251709362775, "loss": 0.1762, "step": 28528 }, { "epoch": 0.050586048144513326, "grad_norm": 0.359375, "learning_rate": 0.0016521781720289224, "loss": 0.2132, "step": 28530 }, { "epoch": 0.05058959430982314, "grad_norm": 0.470703125, "learning_rate": 0.0016521311707066613, "loss": 0.2066, "step": 28532 }, { "epoch": 0.05059314047513296, "grad_norm": 0.37109375, "learning_rate": 0.001652084166969699, "loss": 0.2045, "step": 28534 }, { "epoch": 0.05059668664044278, "grad_norm": 0.84765625, "learning_rate": 0.001652037160818241, "loss": 0.1783, "step": 28536 }, { "epoch": 0.05060023280575259, "grad_norm": 1.4609375, "learning_rate": 0.0016519901522524933, "loss": 0.2025, "step": 28538 }, { "epoch": 0.050603778971062406, "grad_norm": 0.6484375, "learning_rate": 0.001651943141272661, "loss": 0.2168, "step": 28540 }, { "epoch": 0.05060732513637222, "grad_norm": 1.6171875, "learning_rate": 0.0016518961278789506, "loss": 0.4143, "step": 28542 }, { "epoch": 0.050610871301682035, "grad_norm": 0.431640625, "learning_rate": 0.0016518491120715666, "loss": 0.1851, "step": 28544 }, { "epoch": 0.05061441746699185, "grad_norm": 0.96484375, "learning_rate": 0.0016518020938507154, "loss": 0.2218, "step": 28546 }, { "epoch": 0.050617963632301664, "grad_norm": 0.53125, "learning_rate": 0.0016517550732166025, "loss": 0.185, "step": 28548 }, { "epoch": 0.05062150979761148, "grad_norm": 0.52734375, "learning_rate": 0.001651708050169433, "loss": 0.2293, "step": 28550 }, { "epoch": 0.05062505596292129, "grad_norm": 0.2451171875, "learning_rate": 0.0016516610247094131, "loss": 0.1684, "step": 28552 }, { "epoch": 0.05062860212823111, "grad_norm": 0.8046875, "learning_rate": 0.0016516139968367485, "loss": 0.1954, "step": 28554 }, { "epoch": 0.05063214829354092, "grad_norm": 0.86328125, "learning_rate": 0.0016515669665516446, "loss": 0.2238, "step": 28556 }, { "epoch": 0.050635694458850744, "grad_norm": 4.0, "learning_rate": 0.0016515199338543072, "loss": 0.3887, "step": 28558 }, { "epoch": 0.05063924062416056, "grad_norm": 0.53125, "learning_rate": 0.001651472898744942, "loss": 0.2151, "step": 28560 }, { "epoch": 0.05064278678947037, "grad_norm": 0.2890625, "learning_rate": 0.0016514258612237544, "loss": 0.2271, "step": 28562 }, { "epoch": 0.05064633295478019, "grad_norm": 7.59375, "learning_rate": 0.0016513788212909508, "loss": 0.2133, "step": 28564 }, { "epoch": 0.05064987912009, "grad_norm": 0.375, "learning_rate": 0.0016513317789467363, "loss": 0.206, "step": 28566 }, { "epoch": 0.05065342528539982, "grad_norm": 0.314453125, "learning_rate": 0.0016512847341913167, "loss": 0.2106, "step": 28568 }, { "epoch": 0.05065697145070963, "grad_norm": 0.3828125, "learning_rate": 0.0016512376870248982, "loss": 0.2003, "step": 28570 }, { "epoch": 0.050660517616019446, "grad_norm": 2.296875, "learning_rate": 0.001651190637447686, "loss": 0.2541, "step": 28572 }, { "epoch": 0.05066406378132926, "grad_norm": 2.0, "learning_rate": 0.0016511435854598864, "loss": 0.342, "step": 28574 }, { "epoch": 0.050667609946639075, "grad_norm": 0.3828125, "learning_rate": 0.0016510965310617047, "loss": 0.1684, "step": 28576 }, { "epoch": 0.05067115611194889, "grad_norm": 0.65234375, "learning_rate": 0.0016510494742533468, "loss": 0.1694, "step": 28578 }, { "epoch": 0.05067470227725871, "grad_norm": 0.96484375, "learning_rate": 0.0016510024150350187, "loss": 0.1838, "step": 28580 }, { "epoch": 0.050678248442568526, "grad_norm": 0.76953125, "learning_rate": 0.001650955353406926, "loss": 0.1979, "step": 28582 }, { "epoch": 0.05068179460787834, "grad_norm": 0.79296875, "learning_rate": 0.001650908289369275, "loss": 0.1963, "step": 28584 }, { "epoch": 0.050685340773188155, "grad_norm": 0.59765625, "learning_rate": 0.001650861222922271, "loss": 0.1702, "step": 28586 }, { "epoch": 0.05068888693849797, "grad_norm": 0.6953125, "learning_rate": 0.0016508141540661197, "loss": 0.2374, "step": 28588 }, { "epoch": 0.050692433103807784, "grad_norm": 0.859375, "learning_rate": 0.0016507670828010276, "loss": 0.2139, "step": 28590 }, { "epoch": 0.0506959792691176, "grad_norm": 0.8671875, "learning_rate": 0.0016507200091272, "loss": 0.1982, "step": 28592 }, { "epoch": 0.05069952543442741, "grad_norm": 0.490234375, "learning_rate": 0.001650672933044843, "loss": 0.1916, "step": 28594 }, { "epoch": 0.05070307159973723, "grad_norm": 1.421875, "learning_rate": 0.0016506258545541622, "loss": 0.1966, "step": 28596 }, { "epoch": 0.05070661776504704, "grad_norm": 0.7890625, "learning_rate": 0.0016505787736553641, "loss": 0.2751, "step": 28598 }, { "epoch": 0.05071016393035686, "grad_norm": 0.625, "learning_rate": 0.0016505316903486543, "loss": 0.1882, "step": 28600 }, { "epoch": 0.05071371009566668, "grad_norm": 2.546875, "learning_rate": 0.0016504846046342383, "loss": 0.4404, "step": 28602 }, { "epoch": 0.05071725626097649, "grad_norm": 0.7265625, "learning_rate": 0.0016504375165123228, "loss": 0.2123, "step": 28604 }, { "epoch": 0.05072080242628631, "grad_norm": 1.3125, "learning_rate": 0.001650390425983113, "loss": 0.1901, "step": 28606 }, { "epoch": 0.05072434859159612, "grad_norm": 0.5390625, "learning_rate": 0.0016503433330468152, "loss": 0.241, "step": 28608 }, { "epoch": 0.050727894756905936, "grad_norm": 0.97265625, "learning_rate": 0.0016502962377036357, "loss": 0.2277, "step": 28610 }, { "epoch": 0.05073144092221575, "grad_norm": 0.5546875, "learning_rate": 0.0016502491399537798, "loss": 0.313, "step": 28612 }, { "epoch": 0.050734987087525565, "grad_norm": 0.7734375, "learning_rate": 0.0016502020397974539, "loss": 0.5285, "step": 28614 }, { "epoch": 0.05073853325283538, "grad_norm": 0.341796875, "learning_rate": 0.0016501549372348642, "loss": 0.1737, "step": 28616 }, { "epoch": 0.050742079418145195, "grad_norm": 0.484375, "learning_rate": 0.0016501078322662158, "loss": 0.1719, "step": 28618 }, { "epoch": 0.05074562558345501, "grad_norm": 1.25, "learning_rate": 0.0016500607248917155, "loss": 0.2139, "step": 28620 }, { "epoch": 0.050749171748764824, "grad_norm": 0.58203125, "learning_rate": 0.001650013615111569, "loss": 0.1895, "step": 28622 }, { "epoch": 0.05075271791407464, "grad_norm": 0.8359375, "learning_rate": 0.0016499665029259824, "loss": 0.3267, "step": 28624 }, { "epoch": 0.05075626407938446, "grad_norm": 0.42578125, "learning_rate": 0.001649919388335162, "loss": 0.1946, "step": 28626 }, { "epoch": 0.050759810244694274, "grad_norm": 0.314453125, "learning_rate": 0.0016498722713393138, "loss": 0.1703, "step": 28628 }, { "epoch": 0.05076335641000409, "grad_norm": 0.369140625, "learning_rate": 0.0016498251519386433, "loss": 0.1619, "step": 28630 }, { "epoch": 0.0507669025753139, "grad_norm": 1.2265625, "learning_rate": 0.001649778030133357, "loss": 0.3634, "step": 28632 }, { "epoch": 0.05077044874062372, "grad_norm": 1.0625, "learning_rate": 0.001649730905923661, "loss": 0.2644, "step": 28634 }, { "epoch": 0.05077399490593353, "grad_norm": 0.7890625, "learning_rate": 0.0016496837793097613, "loss": 0.1897, "step": 28636 }, { "epoch": 0.05077754107124335, "grad_norm": 1.3203125, "learning_rate": 0.001649636650291864, "loss": 0.1791, "step": 28638 }, { "epoch": 0.05078108723655316, "grad_norm": 0.322265625, "learning_rate": 0.0016495895188701755, "loss": 0.1733, "step": 28640 }, { "epoch": 0.050784633401862976, "grad_norm": 0.396484375, "learning_rate": 0.0016495423850449017, "loss": 0.1832, "step": 28642 }, { "epoch": 0.05078817956717279, "grad_norm": 0.349609375, "learning_rate": 0.0016494952488162484, "loss": 0.1928, "step": 28644 }, { "epoch": 0.050791725732482605, "grad_norm": 0.72265625, "learning_rate": 0.0016494481101844225, "loss": 0.1984, "step": 28646 }, { "epoch": 0.05079527189779243, "grad_norm": 0.78125, "learning_rate": 0.0016494009691496296, "loss": 0.2059, "step": 28648 }, { "epoch": 0.05079881806310224, "grad_norm": 0.6640625, "learning_rate": 0.0016493538257120754, "loss": 0.2088, "step": 28650 }, { "epoch": 0.050802364228412056, "grad_norm": 2.0625, "learning_rate": 0.0016493066798719674, "loss": 0.4083, "step": 28652 }, { "epoch": 0.05080591039372187, "grad_norm": 0.625, "learning_rate": 0.001649259531629511, "loss": 0.2082, "step": 28654 }, { "epoch": 0.050809456559031685, "grad_norm": 0.6015625, "learning_rate": 0.0016492123809849122, "loss": 0.3202, "step": 28656 }, { "epoch": 0.0508130027243415, "grad_norm": 2.09375, "learning_rate": 0.0016491652279383771, "loss": 0.2159, "step": 28658 }, { "epoch": 0.050816548889651314, "grad_norm": 0.267578125, "learning_rate": 0.0016491180724901129, "loss": 0.2132, "step": 28660 }, { "epoch": 0.05082009505496113, "grad_norm": 0.45703125, "learning_rate": 0.0016490709146403252, "loss": 0.1729, "step": 28662 }, { "epoch": 0.05082364122027094, "grad_norm": 0.435546875, "learning_rate": 0.0016490237543892198, "loss": 0.2795, "step": 28664 }, { "epoch": 0.05082718738558076, "grad_norm": 3.390625, "learning_rate": 0.0016489765917370037, "loss": 0.3065, "step": 28666 }, { "epoch": 0.05083073355089057, "grad_norm": 0.4921875, "learning_rate": 0.0016489294266838828, "loss": 0.2114, "step": 28668 }, { "epoch": 0.050834279716200394, "grad_norm": 0.267578125, "learning_rate": 0.0016488822592300636, "loss": 0.2118, "step": 28670 }, { "epoch": 0.05083782588151021, "grad_norm": 0.337890625, "learning_rate": 0.001648835089375752, "loss": 0.2019, "step": 28672 }, { "epoch": 0.05084137204682002, "grad_norm": 0.52734375, "learning_rate": 0.001648787917121155, "loss": 0.2566, "step": 28674 }, { "epoch": 0.05084491821212984, "grad_norm": 0.259765625, "learning_rate": 0.001648740742466478, "loss": 0.1995, "step": 28676 }, { "epoch": 0.05084846437743965, "grad_norm": 0.68359375, "learning_rate": 0.0016486935654119275, "loss": 0.184, "step": 28678 }, { "epoch": 0.050852010542749466, "grad_norm": 1.046875, "learning_rate": 0.0016486463859577106, "loss": 0.268, "step": 28680 }, { "epoch": 0.05085555670805928, "grad_norm": 0.287109375, "learning_rate": 0.0016485992041040329, "loss": 0.3128, "step": 28682 }, { "epoch": 0.050859102873369096, "grad_norm": 0.3125, "learning_rate": 0.001648552019851101, "loss": 0.3102, "step": 28684 }, { "epoch": 0.05086264903867891, "grad_norm": 0.26171875, "learning_rate": 0.001648504833199121, "loss": 0.1639, "step": 28686 }, { "epoch": 0.050866195203988725, "grad_norm": 0.263671875, "learning_rate": 0.0016484576441482997, "loss": 0.2285, "step": 28688 }, { "epoch": 0.05086974136929854, "grad_norm": 1.7421875, "learning_rate": 0.0016484104526988427, "loss": 0.2878, "step": 28690 }, { "epoch": 0.050873287534608354, "grad_norm": 0.72265625, "learning_rate": 0.0016483632588509575, "loss": 0.2177, "step": 28692 }, { "epoch": 0.050876833699918175, "grad_norm": 0.474609375, "learning_rate": 0.0016483160626048498, "loss": 0.1657, "step": 28694 }, { "epoch": 0.05088037986522799, "grad_norm": 0.419921875, "learning_rate": 0.0016482688639607264, "loss": 0.1881, "step": 28696 }, { "epoch": 0.050883926030537804, "grad_norm": 1.609375, "learning_rate": 0.0016482216629187933, "loss": 0.2206, "step": 28698 }, { "epoch": 0.05088747219584762, "grad_norm": 0.271484375, "learning_rate": 0.0016481744594792568, "loss": 0.1681, "step": 28700 }, { "epoch": 0.050891018361157433, "grad_norm": 1.8828125, "learning_rate": 0.0016481272536423238, "loss": 0.3243, "step": 28702 }, { "epoch": 0.05089456452646725, "grad_norm": 0.443359375, "learning_rate": 0.0016480800454082008, "loss": 0.1967, "step": 28704 }, { "epoch": 0.05089811069177706, "grad_norm": 0.345703125, "learning_rate": 0.0016480328347770938, "loss": 0.1687, "step": 28706 }, { "epoch": 0.05090165685708688, "grad_norm": 0.6875, "learning_rate": 0.0016479856217492094, "loss": 0.3906, "step": 28708 }, { "epoch": 0.05090520302239669, "grad_norm": 0.365234375, "learning_rate": 0.0016479384063247545, "loss": 0.1591, "step": 28710 }, { "epoch": 0.050908749187706506, "grad_norm": 0.404296875, "learning_rate": 0.0016478911885039352, "loss": 0.2264, "step": 28712 }, { "epoch": 0.05091229535301632, "grad_norm": 0.75, "learning_rate": 0.001647843968286958, "loss": 0.1519, "step": 28714 }, { "epoch": 0.05091584151832614, "grad_norm": 0.330078125, "learning_rate": 0.0016477967456740292, "loss": 0.4789, "step": 28716 }, { "epoch": 0.05091938768363596, "grad_norm": 0.42578125, "learning_rate": 0.0016477495206653563, "loss": 0.2105, "step": 28718 }, { "epoch": 0.05092293384894577, "grad_norm": 0.322265625, "learning_rate": 0.0016477022932611448, "loss": 0.1669, "step": 28720 }, { "epoch": 0.050926480014255586, "grad_norm": 1.1015625, "learning_rate": 0.001647655063461602, "loss": 0.2072, "step": 28722 }, { "epoch": 0.0509300261795654, "grad_norm": 2.328125, "learning_rate": 0.0016476078312669333, "loss": 0.1838, "step": 28724 }, { "epoch": 0.050933572344875215, "grad_norm": 0.484375, "learning_rate": 0.0016475605966773468, "loss": 0.2668, "step": 28726 }, { "epoch": 0.05093711851018503, "grad_norm": 0.88671875, "learning_rate": 0.0016475133596930479, "loss": 0.1725, "step": 28728 }, { "epoch": 0.050940664675494844, "grad_norm": 0.37890625, "learning_rate": 0.0016474661203142436, "loss": 0.1998, "step": 28730 }, { "epoch": 0.05094421084080466, "grad_norm": 1.9296875, "learning_rate": 0.0016474188785411405, "loss": 0.204, "step": 28732 }, { "epoch": 0.05094775700611447, "grad_norm": 1.796875, "learning_rate": 0.0016473716343739454, "loss": 0.2857, "step": 28734 }, { "epoch": 0.05095130317142429, "grad_norm": 0.427734375, "learning_rate": 0.0016473243878128649, "loss": 0.247, "step": 28736 }, { "epoch": 0.05095484933673411, "grad_norm": 0.392578125, "learning_rate": 0.0016472771388581049, "loss": 0.325, "step": 28738 }, { "epoch": 0.050958395502043924, "grad_norm": 0.41796875, "learning_rate": 0.0016472298875098727, "loss": 0.2842, "step": 28740 }, { "epoch": 0.05096194166735374, "grad_norm": 0.87890625, "learning_rate": 0.0016471826337683753, "loss": 0.1386, "step": 28742 }, { "epoch": 0.05096548783266355, "grad_norm": 0.5859375, "learning_rate": 0.0016471353776338182, "loss": 0.1798, "step": 28744 }, { "epoch": 0.05096903399797337, "grad_norm": 0.65234375, "learning_rate": 0.0016470881191064095, "loss": 0.2383, "step": 28746 }, { "epoch": 0.05097258016328318, "grad_norm": 0.46875, "learning_rate": 0.0016470408581863547, "loss": 0.2776, "step": 28748 }, { "epoch": 0.050976126328593, "grad_norm": 0.57421875, "learning_rate": 0.0016469935948738613, "loss": 0.2373, "step": 28750 }, { "epoch": 0.05097967249390281, "grad_norm": 0.41015625, "learning_rate": 0.0016469463291691351, "loss": 0.1622, "step": 28752 }, { "epoch": 0.050983218659212626, "grad_norm": 0.404296875, "learning_rate": 0.001646899061072384, "loss": 0.2334, "step": 28754 }, { "epoch": 0.05098676482452244, "grad_norm": 0.4765625, "learning_rate": 0.0016468517905838136, "loss": 0.1622, "step": 28756 }, { "epoch": 0.050990310989832255, "grad_norm": 0.5625, "learning_rate": 0.0016468045177036315, "loss": 0.1986, "step": 28758 }, { "epoch": 0.05099385715514207, "grad_norm": 0.5, "learning_rate": 0.0016467572424320436, "loss": 0.1887, "step": 28760 }, { "epoch": 0.05099740332045189, "grad_norm": 0.69921875, "learning_rate": 0.0016467099647692574, "loss": 0.182, "step": 28762 }, { "epoch": 0.051000949485761705, "grad_norm": 0.40625, "learning_rate": 0.0016466626847154793, "loss": 0.1693, "step": 28764 }, { "epoch": 0.05100449565107152, "grad_norm": 0.3828125, "learning_rate": 0.0016466154022709164, "loss": 0.1587, "step": 28766 }, { "epoch": 0.051008041816381335, "grad_norm": 1.296875, "learning_rate": 0.001646568117435775, "loss": 0.205, "step": 28768 }, { "epoch": 0.05101158798169115, "grad_norm": 0.408203125, "learning_rate": 0.0016465208302102621, "loss": 0.2307, "step": 28770 }, { "epoch": 0.051015134147000964, "grad_norm": 0.5078125, "learning_rate": 0.0016464735405945846, "loss": 0.1716, "step": 28772 }, { "epoch": 0.05101868031231078, "grad_norm": 0.546875, "learning_rate": 0.0016464262485889492, "loss": 0.1816, "step": 28774 }, { "epoch": 0.05102222647762059, "grad_norm": 1.203125, "learning_rate": 0.001646378954193563, "loss": 0.2674, "step": 28776 }, { "epoch": 0.05102577264293041, "grad_norm": 0.494140625, "learning_rate": 0.0016463316574086325, "loss": 0.1873, "step": 28778 }, { "epoch": 0.05102931880824022, "grad_norm": 1.5234375, "learning_rate": 0.0016462843582343647, "loss": 0.4065, "step": 28780 }, { "epoch": 0.051032864973550036, "grad_norm": 0.78515625, "learning_rate": 0.0016462370566709664, "loss": 0.196, "step": 28782 }, { "epoch": 0.05103641113885986, "grad_norm": 0.7578125, "learning_rate": 0.0016461897527186445, "loss": 0.23, "step": 28784 }, { "epoch": 0.05103995730416967, "grad_norm": 4.21875, "learning_rate": 0.0016461424463776058, "loss": 0.3921, "step": 28786 }, { "epoch": 0.05104350346947949, "grad_norm": 0.53125, "learning_rate": 0.0016460951376480574, "loss": 0.1672, "step": 28788 }, { "epoch": 0.0510470496347893, "grad_norm": 0.5625, "learning_rate": 0.0016460478265302061, "loss": 0.1965, "step": 28790 }, { "epoch": 0.051050595800099116, "grad_norm": 0.56640625, "learning_rate": 0.0016460005130242588, "loss": 0.19, "step": 28792 }, { "epoch": 0.05105414196540893, "grad_norm": 0.439453125, "learning_rate": 0.0016459531971304221, "loss": 0.2144, "step": 28794 }, { "epoch": 0.051057688130718745, "grad_norm": 1.2578125, "learning_rate": 0.0016459058788489036, "loss": 0.1698, "step": 28796 }, { "epoch": 0.05106123429602856, "grad_norm": 0.88671875, "learning_rate": 0.0016458585581799099, "loss": 0.1734, "step": 28798 }, { "epoch": 0.051064780461338374, "grad_norm": 0.78125, "learning_rate": 0.001645811235123648, "loss": 0.2268, "step": 28800 }, { "epoch": 0.05106832662664819, "grad_norm": 0.68359375, "learning_rate": 0.0016457639096803245, "loss": 0.1987, "step": 28802 }, { "epoch": 0.051071872791958, "grad_norm": 2.390625, "learning_rate": 0.001645716581850147, "loss": 0.3474, "step": 28804 }, { "epoch": 0.051075418957267825, "grad_norm": 0.26953125, "learning_rate": 0.0016456692516333216, "loss": 0.1969, "step": 28806 }, { "epoch": 0.05107896512257764, "grad_norm": 0.478515625, "learning_rate": 0.0016456219190300564, "loss": 0.2385, "step": 28808 }, { "epoch": 0.051082511287887454, "grad_norm": 0.2734375, "learning_rate": 0.0016455745840405576, "loss": 0.2877, "step": 28810 }, { "epoch": 0.05108605745319727, "grad_norm": 0.478515625, "learning_rate": 0.001645527246665033, "loss": 0.1883, "step": 28812 }, { "epoch": 0.05108960361850708, "grad_norm": 0.392578125, "learning_rate": 0.001645479906903688, "loss": 0.2601, "step": 28814 }, { "epoch": 0.0510931497838169, "grad_norm": 0.79296875, "learning_rate": 0.0016454325647567317, "loss": 0.1992, "step": 28816 }, { "epoch": 0.05109669594912671, "grad_norm": 0.408203125, "learning_rate": 0.0016453852202243698, "loss": 0.208, "step": 28818 }, { "epoch": 0.05110024211443653, "grad_norm": 0.625, "learning_rate": 0.00164533787330681, "loss": 0.2227, "step": 28820 }, { "epoch": 0.05110378827974634, "grad_norm": 0.5703125, "learning_rate": 0.0016452905240042588, "loss": 0.1654, "step": 28822 }, { "epoch": 0.051107334445056156, "grad_norm": 0.71875, "learning_rate": 0.0016452431723169242, "loss": 0.3266, "step": 28824 }, { "epoch": 0.05111088061036597, "grad_norm": 0.73828125, "learning_rate": 0.001645195818245012, "loss": 0.2287, "step": 28826 }, { "epoch": 0.051114426775675785, "grad_norm": 0.5390625, "learning_rate": 0.00164514846178873, "loss": 0.1434, "step": 28828 }, { "epoch": 0.051117972940985607, "grad_norm": 0.92578125, "learning_rate": 0.0016451011029482855, "loss": 0.2887, "step": 28830 }, { "epoch": 0.05112151910629542, "grad_norm": 0.58984375, "learning_rate": 0.0016450537417238855, "loss": 0.1941, "step": 28832 }, { "epoch": 0.051125065271605236, "grad_norm": 0.54296875, "learning_rate": 0.001645006378115737, "loss": 0.1971, "step": 28834 }, { "epoch": 0.05112861143691505, "grad_norm": 0.91015625, "learning_rate": 0.0016449590121240469, "loss": 0.2037, "step": 28836 }, { "epoch": 0.051132157602224865, "grad_norm": 0.578125, "learning_rate": 0.0016449116437490228, "loss": 0.2103, "step": 28838 }, { "epoch": 0.05113570376753468, "grad_norm": 0.318359375, "learning_rate": 0.0016448642729908719, "loss": 0.301, "step": 28840 }, { "epoch": 0.051139249932844494, "grad_norm": 1.640625, "learning_rate": 0.001644816899849801, "loss": 0.2176, "step": 28842 }, { "epoch": 0.05114279609815431, "grad_norm": 1.6875, "learning_rate": 0.0016447695243260176, "loss": 0.2285, "step": 28844 }, { "epoch": 0.05114634226346412, "grad_norm": 0.6015625, "learning_rate": 0.0016447221464197285, "loss": 0.2002, "step": 28846 }, { "epoch": 0.05114988842877394, "grad_norm": 0.337890625, "learning_rate": 0.0016446747661311412, "loss": 0.1468, "step": 28848 }, { "epoch": 0.05115343459408375, "grad_norm": 0.734375, "learning_rate": 0.0016446273834604629, "loss": 0.1857, "step": 28850 }, { "epoch": 0.051156980759393574, "grad_norm": 0.306640625, "learning_rate": 0.0016445799984079008, "loss": 0.1961, "step": 28852 }, { "epoch": 0.05116052692470339, "grad_norm": 0.287109375, "learning_rate": 0.0016445326109736618, "loss": 0.1867, "step": 28854 }, { "epoch": 0.0511640730900132, "grad_norm": 0.447265625, "learning_rate": 0.001644485221157954, "loss": 0.1803, "step": 28856 }, { "epoch": 0.05116761925532302, "grad_norm": 0.49609375, "learning_rate": 0.0016444378289609837, "loss": 0.2511, "step": 28858 }, { "epoch": 0.05117116542063283, "grad_norm": 0.73828125, "learning_rate": 0.001644390434382959, "loss": 0.2498, "step": 28860 }, { "epoch": 0.051174711585942646, "grad_norm": 0.83984375, "learning_rate": 0.0016443430374240863, "loss": 0.2328, "step": 28862 }, { "epoch": 0.05117825775125246, "grad_norm": 0.78515625, "learning_rate": 0.001644295638084574, "loss": 0.2114, "step": 28864 }, { "epoch": 0.051181803916562275, "grad_norm": 0.26171875, "learning_rate": 0.0016442482363646278, "loss": 0.1612, "step": 28866 }, { "epoch": 0.05118535008187209, "grad_norm": 0.28515625, "learning_rate": 0.0016442008322644565, "loss": 0.2012, "step": 28868 }, { "epoch": 0.051188896247181905, "grad_norm": 0.302734375, "learning_rate": 0.0016441534257842668, "loss": 0.2724, "step": 28870 }, { "epoch": 0.05119244241249172, "grad_norm": 0.306640625, "learning_rate": 0.0016441060169242662, "loss": 0.1829, "step": 28872 }, { "epoch": 0.05119598857780154, "grad_norm": 0.31640625, "learning_rate": 0.001644058605684662, "loss": 0.2505, "step": 28874 }, { "epoch": 0.051199534743111355, "grad_norm": 0.271484375, "learning_rate": 0.0016440111920656612, "loss": 0.1631, "step": 28876 }, { "epoch": 0.05120308090842117, "grad_norm": 0.439453125, "learning_rate": 0.0016439637760674716, "loss": 0.1606, "step": 28878 }, { "epoch": 0.051206627073730984, "grad_norm": 0.310546875, "learning_rate": 0.0016439163576903004, "loss": 0.1597, "step": 28880 }, { "epoch": 0.0512101732390408, "grad_norm": 0.9453125, "learning_rate": 0.001643868936934355, "loss": 0.217, "step": 28882 }, { "epoch": 0.05121371940435061, "grad_norm": 0.7109375, "learning_rate": 0.0016438215137998427, "loss": 0.2671, "step": 28884 }, { "epoch": 0.05121726556966043, "grad_norm": 0.57421875, "learning_rate": 0.0016437740882869712, "loss": 0.198, "step": 28886 }, { "epoch": 0.05122081173497024, "grad_norm": 0.30859375, "learning_rate": 0.0016437266603959474, "loss": 0.1945, "step": 28888 }, { "epoch": 0.05122435790028006, "grad_norm": 0.2216796875, "learning_rate": 0.0016436792301269794, "loss": 0.2231, "step": 28890 }, { "epoch": 0.05122790406558987, "grad_norm": 0.359375, "learning_rate": 0.0016436317974802739, "loss": 0.2575, "step": 28892 }, { "epoch": 0.051231450230899686, "grad_norm": 0.8046875, "learning_rate": 0.0016435843624560387, "loss": 0.2005, "step": 28894 }, { "epoch": 0.0512349963962095, "grad_norm": 0.423828125, "learning_rate": 0.0016435369250544815, "loss": 0.1654, "step": 28896 }, { "epoch": 0.05123854256151932, "grad_norm": 0.45703125, "learning_rate": 0.0016434894852758095, "loss": 0.1506, "step": 28898 }, { "epoch": 0.05124208872682914, "grad_norm": 0.515625, "learning_rate": 0.0016434420431202302, "loss": 0.1927, "step": 28900 }, { "epoch": 0.05124563489213895, "grad_norm": 6.09375, "learning_rate": 0.0016433945985879508, "loss": 0.3085, "step": 28902 }, { "epoch": 0.051249181057448766, "grad_norm": 0.50390625, "learning_rate": 0.0016433471516791793, "loss": 0.2624, "step": 28904 }, { "epoch": 0.05125272722275858, "grad_norm": 1.34375, "learning_rate": 0.0016432997023941229, "loss": 0.2234, "step": 28906 }, { "epoch": 0.051256273388068395, "grad_norm": 0.310546875, "learning_rate": 0.0016432522507329893, "loss": 0.1908, "step": 28908 }, { "epoch": 0.05125981955337821, "grad_norm": 0.296875, "learning_rate": 0.0016432047966959858, "loss": 0.1326, "step": 28910 }, { "epoch": 0.051263365718688024, "grad_norm": 0.5859375, "learning_rate": 0.0016431573402833199, "loss": 0.2154, "step": 28912 }, { "epoch": 0.05126691188399784, "grad_norm": 0.6015625, "learning_rate": 0.0016431098814951996, "loss": 0.1923, "step": 28914 }, { "epoch": 0.05127045804930765, "grad_norm": 0.66796875, "learning_rate": 0.0016430624203318319, "loss": 0.1887, "step": 28916 }, { "epoch": 0.05127400421461747, "grad_norm": 0.5390625, "learning_rate": 0.0016430149567934248, "loss": 0.208, "step": 28918 }, { "epoch": 0.05127755037992729, "grad_norm": 0.361328125, "learning_rate": 0.0016429674908801856, "loss": 0.167, "step": 28920 }, { "epoch": 0.051281096545237104, "grad_norm": 0.625, "learning_rate": 0.001642920022592322, "loss": 0.1769, "step": 28922 }, { "epoch": 0.05128464271054692, "grad_norm": 0.78515625, "learning_rate": 0.0016428725519300417, "loss": 0.1957, "step": 28924 }, { "epoch": 0.05128818887585673, "grad_norm": 0.322265625, "learning_rate": 0.001642825078893552, "loss": 0.2115, "step": 28926 }, { "epoch": 0.05129173504116655, "grad_norm": 0.42578125, "learning_rate": 0.0016427776034830608, "loss": 0.2172, "step": 28928 }, { "epoch": 0.05129528120647636, "grad_norm": 0.88671875, "learning_rate": 0.0016427301256987758, "loss": 0.1258, "step": 28930 }, { "epoch": 0.051298827371786176, "grad_norm": 0.71875, "learning_rate": 0.0016426826455409043, "loss": 0.2494, "step": 28932 }, { "epoch": 0.05130237353709599, "grad_norm": 0.68359375, "learning_rate": 0.001642635163009654, "loss": 0.1731, "step": 28934 }, { "epoch": 0.051305919702405806, "grad_norm": 1.5, "learning_rate": 0.001642587678105233, "loss": 0.2589, "step": 28936 }, { "epoch": 0.05130946586771562, "grad_norm": 0.5234375, "learning_rate": 0.0016425401908278488, "loss": 0.2342, "step": 28938 }, { "epoch": 0.051313012033025435, "grad_norm": 0.30078125, "learning_rate": 0.0016424927011777085, "loss": 0.1873, "step": 28940 }, { "epoch": 0.05131655819833525, "grad_norm": 0.318359375, "learning_rate": 0.0016424452091550203, "loss": 0.1679, "step": 28942 }, { "epoch": 0.05132010436364507, "grad_norm": 0.490234375, "learning_rate": 0.0016423977147599922, "loss": 0.1611, "step": 28944 }, { "epoch": 0.051323650528954885, "grad_norm": 0.25, "learning_rate": 0.0016423502179928314, "loss": 0.2194, "step": 28946 }, { "epoch": 0.0513271966942647, "grad_norm": 0.8359375, "learning_rate": 0.0016423027188537456, "loss": 0.1665, "step": 28948 }, { "epoch": 0.051330742859574514, "grad_norm": 0.31640625, "learning_rate": 0.0016422552173429431, "loss": 0.2284, "step": 28950 }, { "epoch": 0.05133428902488433, "grad_norm": 1.375, "learning_rate": 0.0016422077134606308, "loss": 0.2934, "step": 28952 }, { "epoch": 0.051337835190194143, "grad_norm": 1.640625, "learning_rate": 0.0016421602072070169, "loss": 0.3084, "step": 28954 }, { "epoch": 0.05134138135550396, "grad_norm": 2.265625, "learning_rate": 0.0016421126985823097, "loss": 0.3871, "step": 28956 }, { "epoch": 0.05134492752081377, "grad_norm": 1.8125, "learning_rate": 0.0016420651875867158, "loss": 0.2129, "step": 28958 }, { "epoch": 0.05134847368612359, "grad_norm": 0.6875, "learning_rate": 0.001642017674220444, "loss": 0.1386, "step": 28960 }, { "epoch": 0.0513520198514334, "grad_norm": 0.546875, "learning_rate": 0.0016419701584837016, "loss": 0.1912, "step": 28962 }, { "epoch": 0.051355566016743216, "grad_norm": 0.625, "learning_rate": 0.0016419226403766966, "loss": 0.3302, "step": 28964 }, { "epoch": 0.05135911218205304, "grad_norm": 0.98046875, "learning_rate": 0.0016418751198996372, "loss": 0.1567, "step": 28966 }, { "epoch": 0.05136265834736285, "grad_norm": 1.0546875, "learning_rate": 0.0016418275970527302, "loss": 0.1539, "step": 28968 }, { "epoch": 0.05136620451267267, "grad_norm": 0.60546875, "learning_rate": 0.0016417800718361842, "loss": 0.2275, "step": 28970 }, { "epoch": 0.05136975067798248, "grad_norm": 0.68359375, "learning_rate": 0.0016417325442502069, "loss": 0.2154, "step": 28972 }, { "epoch": 0.051373296843292296, "grad_norm": 0.51953125, "learning_rate": 0.001641685014295006, "loss": 0.2856, "step": 28974 }, { "epoch": 0.05137684300860211, "grad_norm": 0.49609375, "learning_rate": 0.0016416374819707897, "loss": 0.2403, "step": 28976 }, { "epoch": 0.051380389173911925, "grad_norm": 0.5078125, "learning_rate": 0.0016415899472777655, "loss": 0.181, "step": 28978 }, { "epoch": 0.05138393533922174, "grad_norm": 0.494140625, "learning_rate": 0.0016415424102161416, "loss": 0.1782, "step": 28980 }, { "epoch": 0.051387481504531554, "grad_norm": 0.5703125, "learning_rate": 0.0016414948707861258, "loss": 0.1594, "step": 28982 }, { "epoch": 0.05139102766984137, "grad_norm": 0.3203125, "learning_rate": 0.001641447328987926, "loss": 0.2049, "step": 28984 }, { "epoch": 0.05139457383515118, "grad_norm": 0.255859375, "learning_rate": 0.0016413997848217497, "loss": 0.1624, "step": 28986 }, { "epoch": 0.051398120000461005, "grad_norm": 1.78125, "learning_rate": 0.0016413522382878057, "loss": 0.2854, "step": 28988 }, { "epoch": 0.05140166616577082, "grad_norm": 2.265625, "learning_rate": 0.001641304689386301, "loss": 0.2871, "step": 28990 }, { "epoch": 0.051405212331080634, "grad_norm": 0.578125, "learning_rate": 0.0016412571381174447, "loss": 0.1582, "step": 28992 }, { "epoch": 0.05140875849639045, "grad_norm": 0.498046875, "learning_rate": 0.001641209584481443, "loss": 0.1664, "step": 28994 }, { "epoch": 0.05141230466170026, "grad_norm": 0.8828125, "learning_rate": 0.001641162028478506, "loss": 0.2886, "step": 28996 }, { "epoch": 0.05141585082701008, "grad_norm": 0.294921875, "learning_rate": 0.00164111447010884, "loss": 0.2259, "step": 28998 }, { "epoch": 0.05141939699231989, "grad_norm": 0.8515625, "learning_rate": 0.0016410669093726543, "loss": 0.1939, "step": 29000 }, { "epoch": 0.05142294315762971, "grad_norm": 1.1640625, "learning_rate": 0.0016410193462701556, "loss": 0.1788, "step": 29002 }, { "epoch": 0.05142648932293952, "grad_norm": 0.2333984375, "learning_rate": 0.0016409717808015527, "loss": 0.289, "step": 29004 }, { "epoch": 0.051430035488249336, "grad_norm": 0.66015625, "learning_rate": 0.0016409242129670535, "loss": 0.1965, "step": 29006 }, { "epoch": 0.05143358165355915, "grad_norm": 0.81640625, "learning_rate": 0.001640876642766866, "loss": 0.3609, "step": 29008 }, { "epoch": 0.051437127818868965, "grad_norm": 0.79296875, "learning_rate": 0.0016408290702011983, "loss": 0.1386, "step": 29010 }, { "epoch": 0.051440673984178786, "grad_norm": 0.416015625, "learning_rate": 0.0016407814952702584, "loss": 0.2418, "step": 29012 }, { "epoch": 0.0514442201494886, "grad_norm": 2.046875, "learning_rate": 0.0016407339179742542, "loss": 0.1988, "step": 29014 }, { "epoch": 0.051447766314798415, "grad_norm": 0.240234375, "learning_rate": 0.0016406863383133942, "loss": 0.2546, "step": 29016 }, { "epoch": 0.05145131248010823, "grad_norm": 0.72265625, "learning_rate": 0.0016406387562878857, "loss": 0.2214, "step": 29018 }, { "epoch": 0.051454858645418045, "grad_norm": 0.9765625, "learning_rate": 0.0016405911718979378, "loss": 0.1649, "step": 29020 }, { "epoch": 0.05145840481072786, "grad_norm": 0.349609375, "learning_rate": 0.001640543585143758, "loss": 0.1634, "step": 29022 }, { "epoch": 0.051461950976037674, "grad_norm": 0.365234375, "learning_rate": 0.0016404959960255546, "loss": 0.1586, "step": 29024 }, { "epoch": 0.05146549714134749, "grad_norm": 1.8984375, "learning_rate": 0.0016404484045435357, "loss": 0.4484, "step": 29026 }, { "epoch": 0.0514690433066573, "grad_norm": 0.65625, "learning_rate": 0.0016404008106979094, "loss": 0.1628, "step": 29028 }, { "epoch": 0.05147258947196712, "grad_norm": 0.279296875, "learning_rate": 0.0016403532144888837, "loss": 0.3513, "step": 29030 }, { "epoch": 0.05147613563727693, "grad_norm": 0.306640625, "learning_rate": 0.001640305615916667, "loss": 0.1816, "step": 29032 }, { "epoch": 0.05147968180258675, "grad_norm": 0.291015625, "learning_rate": 0.0016402580149814674, "loss": 0.2215, "step": 29034 }, { "epoch": 0.05148322796789657, "grad_norm": 0.279296875, "learning_rate": 0.001640210411683493, "loss": 0.1874, "step": 29036 }, { "epoch": 0.05148677413320638, "grad_norm": 2.859375, "learning_rate": 0.001640162806022952, "loss": 0.4283, "step": 29038 }, { "epoch": 0.0514903202985162, "grad_norm": 5.1875, "learning_rate": 0.001640115198000053, "loss": 0.2846, "step": 29040 }, { "epoch": 0.05149386646382601, "grad_norm": 0.515625, "learning_rate": 0.0016400675876150034, "loss": 0.2094, "step": 29042 }, { "epoch": 0.051497412629135826, "grad_norm": 0.7890625, "learning_rate": 0.0016400199748680122, "loss": 0.1953, "step": 29044 }, { "epoch": 0.05150095879444564, "grad_norm": 0.39453125, "learning_rate": 0.001639972359759287, "loss": 0.1638, "step": 29046 }, { "epoch": 0.051504504959755455, "grad_norm": 1.0625, "learning_rate": 0.0016399247422890367, "loss": 0.2819, "step": 29048 }, { "epoch": 0.05150805112506527, "grad_norm": 2.203125, "learning_rate": 0.0016398771224574692, "loss": 0.2604, "step": 29050 }, { "epoch": 0.051511597290375084, "grad_norm": 2.859375, "learning_rate": 0.0016398295002647922, "loss": 0.1812, "step": 29052 }, { "epoch": 0.0515151434556849, "grad_norm": 0.5859375, "learning_rate": 0.001639781875711215, "loss": 0.1874, "step": 29054 }, { "epoch": 0.05151868962099472, "grad_norm": 0.392578125, "learning_rate": 0.0016397342487969454, "loss": 0.2109, "step": 29056 }, { "epoch": 0.051522235786304535, "grad_norm": 0.3515625, "learning_rate": 0.001639686619522192, "loss": 0.1957, "step": 29058 }, { "epoch": 0.05152578195161435, "grad_norm": 0.39453125, "learning_rate": 0.0016396389878871622, "loss": 0.176, "step": 29060 }, { "epoch": 0.051529328116924164, "grad_norm": 0.91796875, "learning_rate": 0.0016395913538920655, "loss": 0.1947, "step": 29062 }, { "epoch": 0.05153287428223398, "grad_norm": 0.43359375, "learning_rate": 0.0016395437175371092, "loss": 0.2163, "step": 29064 }, { "epoch": 0.05153642044754379, "grad_norm": 0.431640625, "learning_rate": 0.0016394960788225024, "loss": 0.2346, "step": 29066 }, { "epoch": 0.05153996661285361, "grad_norm": 0.34765625, "learning_rate": 0.0016394484377484529, "loss": 0.1645, "step": 29068 }, { "epoch": 0.05154351277816342, "grad_norm": 0.5625, "learning_rate": 0.0016394007943151695, "loss": 0.2055, "step": 29070 }, { "epoch": 0.05154705894347324, "grad_norm": 0.390625, "learning_rate": 0.0016393531485228599, "loss": 0.1977, "step": 29072 }, { "epoch": 0.05155060510878305, "grad_norm": 0.455078125, "learning_rate": 0.0016393055003717334, "loss": 0.2311, "step": 29074 }, { "epoch": 0.051554151274092866, "grad_norm": 1.5546875, "learning_rate": 0.0016392578498619977, "loss": 0.2774, "step": 29076 }, { "epoch": 0.05155769743940268, "grad_norm": 0.2578125, "learning_rate": 0.0016392101969938612, "loss": 0.1874, "step": 29078 }, { "epoch": 0.0515612436047125, "grad_norm": 0.421875, "learning_rate": 0.0016391625417675332, "loss": 0.18, "step": 29080 }, { "epoch": 0.051564789770022317, "grad_norm": 0.392578125, "learning_rate": 0.0016391148841832207, "loss": 0.2366, "step": 29082 }, { "epoch": 0.05156833593533213, "grad_norm": 0.45703125, "learning_rate": 0.0016390672242411334, "loss": 0.1684, "step": 29084 }, { "epoch": 0.051571882100641946, "grad_norm": 0.416015625, "learning_rate": 0.0016390195619414786, "loss": 0.2153, "step": 29086 }, { "epoch": 0.05157542826595176, "grad_norm": 1.03125, "learning_rate": 0.0016389718972844656, "loss": 0.1968, "step": 29088 }, { "epoch": 0.051578974431261575, "grad_norm": 1.9609375, "learning_rate": 0.001638924230270303, "loss": 0.2094, "step": 29090 }, { "epoch": 0.05158252059657139, "grad_norm": 0.55859375, "learning_rate": 0.0016388765608991982, "loss": 0.2178, "step": 29092 }, { "epoch": 0.051586066761881204, "grad_norm": 2.0625, "learning_rate": 0.0016388288891713608, "loss": 0.2435, "step": 29094 }, { "epoch": 0.05158961292719102, "grad_norm": 0.2333984375, "learning_rate": 0.0016387812150869987, "loss": 0.1626, "step": 29096 }, { "epoch": 0.05159315909250083, "grad_norm": 0.5, "learning_rate": 0.0016387335386463205, "loss": 0.164, "step": 29098 }, { "epoch": 0.05159670525781065, "grad_norm": 0.333984375, "learning_rate": 0.0016386858598495346, "loss": 0.2015, "step": 29100 }, { "epoch": 0.05160025142312047, "grad_norm": 0.3359375, "learning_rate": 0.0016386381786968498, "loss": 0.318, "step": 29102 }, { "epoch": 0.051603797588430284, "grad_norm": 1.9765625, "learning_rate": 0.0016385904951884747, "loss": 0.2776, "step": 29104 }, { "epoch": 0.0516073437537401, "grad_norm": 0.2255859375, "learning_rate": 0.0016385428093246175, "loss": 0.1563, "step": 29106 }, { "epoch": 0.05161088991904991, "grad_norm": 1.0546875, "learning_rate": 0.0016384951211054863, "loss": 0.1647, "step": 29108 }, { "epoch": 0.05161443608435973, "grad_norm": 0.8125, "learning_rate": 0.0016384474305312909, "loss": 0.1562, "step": 29110 }, { "epoch": 0.05161798224966954, "grad_norm": 3.09375, "learning_rate": 0.001638399737602239, "loss": 0.1979, "step": 29112 }, { "epoch": 0.051621528414979356, "grad_norm": 0.359375, "learning_rate": 0.0016383520423185396, "loss": 0.1997, "step": 29114 }, { "epoch": 0.05162507458028917, "grad_norm": 0.318359375, "learning_rate": 0.001638304344680401, "loss": 0.1806, "step": 29116 }, { "epoch": 0.051628620745598985, "grad_norm": 0.30078125, "learning_rate": 0.0016382566446880315, "loss": 0.1856, "step": 29118 }, { "epoch": 0.0516321669109088, "grad_norm": 0.392578125, "learning_rate": 0.0016382089423416402, "loss": 0.183, "step": 29120 }, { "epoch": 0.051635713076218615, "grad_norm": 0.85546875, "learning_rate": 0.0016381612376414358, "loss": 0.1946, "step": 29122 }, { "epoch": 0.051639259241528436, "grad_norm": 0.29296875, "learning_rate": 0.0016381135305876266, "loss": 0.1728, "step": 29124 }, { "epoch": 0.05164280540683825, "grad_norm": 0.91796875, "learning_rate": 0.0016380658211804218, "loss": 0.2502, "step": 29126 }, { "epoch": 0.051646351572148065, "grad_norm": 0.77734375, "learning_rate": 0.0016380181094200292, "loss": 0.2109, "step": 29128 }, { "epoch": 0.05164989773745788, "grad_norm": 0.291015625, "learning_rate": 0.0016379703953066582, "loss": 0.2181, "step": 29130 }, { "epoch": 0.051653443902767694, "grad_norm": 0.251953125, "learning_rate": 0.001637922678840517, "loss": 0.1973, "step": 29132 }, { "epoch": 0.05165699006807751, "grad_norm": 0.69140625, "learning_rate": 0.0016378749600218145, "loss": 0.2957, "step": 29134 }, { "epoch": 0.05166053623338732, "grad_norm": 0.349609375, "learning_rate": 0.0016378272388507594, "loss": 0.213, "step": 29136 }, { "epoch": 0.05166408239869714, "grad_norm": 0.2353515625, "learning_rate": 0.0016377795153275602, "loss": 0.2658, "step": 29138 }, { "epoch": 0.05166762856400695, "grad_norm": 0.169921875, "learning_rate": 0.0016377317894524259, "loss": 0.1811, "step": 29140 }, { "epoch": 0.05167117472931677, "grad_norm": 0.484375, "learning_rate": 0.0016376840612255653, "loss": 0.1603, "step": 29142 }, { "epoch": 0.05167472089462658, "grad_norm": 0.4296875, "learning_rate": 0.0016376363306471868, "loss": 0.1678, "step": 29144 }, { "epoch": 0.051678267059936396, "grad_norm": 0.8828125, "learning_rate": 0.001637588597717499, "loss": 0.1716, "step": 29146 }, { "epoch": 0.05168181322524622, "grad_norm": 0.74609375, "learning_rate": 0.0016375408624367115, "loss": 0.1571, "step": 29148 }, { "epoch": 0.05168535939055603, "grad_norm": 3.3125, "learning_rate": 0.001637493124805032, "loss": 0.3265, "step": 29150 }, { "epoch": 0.05168890555586585, "grad_norm": 0.51171875, "learning_rate": 0.00163744538482267, "loss": 0.2879, "step": 29152 }, { "epoch": 0.05169245172117566, "grad_norm": 0.51171875, "learning_rate": 0.001637397642489834, "loss": 0.322, "step": 29154 }, { "epoch": 0.051695997886485476, "grad_norm": 0.306640625, "learning_rate": 0.0016373498978067332, "loss": 0.172, "step": 29156 }, { "epoch": 0.05169954405179529, "grad_norm": 0.486328125, "learning_rate": 0.0016373021507735757, "loss": 0.185, "step": 29158 }, { "epoch": 0.051703090217105105, "grad_norm": 0.7109375, "learning_rate": 0.0016372544013905708, "loss": 0.1881, "step": 29160 }, { "epoch": 0.05170663638241492, "grad_norm": 0.23828125, "learning_rate": 0.0016372066496579273, "loss": 0.1878, "step": 29162 }, { "epoch": 0.051710182547724734, "grad_norm": 2.203125, "learning_rate": 0.0016371588955758538, "loss": 0.2924, "step": 29164 }, { "epoch": 0.05171372871303455, "grad_norm": 1.0859375, "learning_rate": 0.0016371111391445593, "loss": 0.399, "step": 29166 }, { "epoch": 0.05171727487834436, "grad_norm": 0.5234375, "learning_rate": 0.0016370633803642527, "loss": 0.1622, "step": 29168 }, { "epoch": 0.051720821043654185, "grad_norm": 0.7734375, "learning_rate": 0.0016370156192351429, "loss": 0.224, "step": 29170 }, { "epoch": 0.051724367208964, "grad_norm": 1.3203125, "learning_rate": 0.001636967855757439, "loss": 0.2693, "step": 29172 }, { "epoch": 0.051727913374273814, "grad_norm": 0.49609375, "learning_rate": 0.001636920089931349, "loss": 0.1806, "step": 29174 }, { "epoch": 0.05173145953958363, "grad_norm": 0.384765625, "learning_rate": 0.001636872321757083, "loss": 0.2247, "step": 29176 }, { "epoch": 0.05173500570489344, "grad_norm": 4.625, "learning_rate": 0.0016368245512348488, "loss": 0.2579, "step": 29178 }, { "epoch": 0.05173855187020326, "grad_norm": 0.267578125, "learning_rate": 0.0016367767783648565, "loss": 0.2064, "step": 29180 }, { "epoch": 0.05174209803551307, "grad_norm": 0.3515625, "learning_rate": 0.0016367290031473135, "loss": 0.2169, "step": 29182 }, { "epoch": 0.051745644200822886, "grad_norm": 0.96875, "learning_rate": 0.0016366812255824304, "loss": 0.1918, "step": 29184 }, { "epoch": 0.0517491903661327, "grad_norm": 1.9609375, "learning_rate": 0.0016366334456704146, "loss": 0.1457, "step": 29186 }, { "epoch": 0.051752736531442516, "grad_norm": 0.4375, "learning_rate": 0.0016365856634114764, "loss": 0.1656, "step": 29188 }, { "epoch": 0.05175628269675233, "grad_norm": 6.65625, "learning_rate": 0.0016365378788058239, "loss": 0.1749, "step": 29190 }, { "epoch": 0.05175982886206215, "grad_norm": 0.45703125, "learning_rate": 0.0016364900918536668, "loss": 0.2483, "step": 29192 }, { "epoch": 0.051763375027371966, "grad_norm": 0.486328125, "learning_rate": 0.0016364423025552134, "loss": 0.2089, "step": 29194 }, { "epoch": 0.05176692119268178, "grad_norm": 0.8046875, "learning_rate": 0.001636394510910673, "loss": 0.2163, "step": 29196 }, { "epoch": 0.051770467357991595, "grad_norm": 0.54296875, "learning_rate": 0.0016363467169202546, "loss": 0.2075, "step": 29198 }, { "epoch": 0.05177401352330141, "grad_norm": 0.58984375, "learning_rate": 0.001636298920584167, "loss": 0.1855, "step": 29200 }, { "epoch": 0.051777559688611224, "grad_norm": 0.44140625, "learning_rate": 0.0016362511219026195, "loss": 0.2332, "step": 29202 }, { "epoch": 0.05178110585392104, "grad_norm": 2.328125, "learning_rate": 0.001636203320875821, "loss": 0.3503, "step": 29204 }, { "epoch": 0.051784652019230853, "grad_norm": 0.4609375, "learning_rate": 0.001636155517503981, "loss": 0.2012, "step": 29206 }, { "epoch": 0.05178819818454067, "grad_norm": 0.65234375, "learning_rate": 0.001636107711787308, "loss": 0.1714, "step": 29208 }, { "epoch": 0.05179174434985048, "grad_norm": 0.34375, "learning_rate": 0.0016360599037260114, "loss": 0.204, "step": 29210 }, { "epoch": 0.0517952905151603, "grad_norm": 0.875, "learning_rate": 0.0016360120933202997, "loss": 0.2471, "step": 29212 }, { "epoch": 0.05179883668047011, "grad_norm": 0.54296875, "learning_rate": 0.0016359642805703829, "loss": 0.2223, "step": 29214 }, { "epoch": 0.05180238284577993, "grad_norm": 3.515625, "learning_rate": 0.0016359164654764697, "loss": 0.2981, "step": 29216 }, { "epoch": 0.05180592901108975, "grad_norm": 2.09375, "learning_rate": 0.001635868648038769, "loss": 0.3408, "step": 29218 }, { "epoch": 0.05180947517639956, "grad_norm": 0.57421875, "learning_rate": 0.00163582082825749, "loss": 0.1672, "step": 29220 }, { "epoch": 0.05181302134170938, "grad_norm": 5.71875, "learning_rate": 0.0016357730061328422, "loss": 0.2878, "step": 29222 }, { "epoch": 0.05181656750701919, "grad_norm": 0.333984375, "learning_rate": 0.0016357251816650343, "loss": 0.2449, "step": 29224 }, { "epoch": 0.051820113672329006, "grad_norm": 2.3125, "learning_rate": 0.0016356773548542758, "loss": 0.3094, "step": 29226 }, { "epoch": 0.05182365983763882, "grad_norm": 0.57421875, "learning_rate": 0.0016356295257007757, "loss": 0.244, "step": 29228 }, { "epoch": 0.051827206002948635, "grad_norm": 2.328125, "learning_rate": 0.0016355816942047433, "loss": 0.1978, "step": 29230 }, { "epoch": 0.05183075216825845, "grad_norm": 0.2578125, "learning_rate": 0.0016355338603663875, "loss": 0.1815, "step": 29232 }, { "epoch": 0.051834298333568264, "grad_norm": 0.48828125, "learning_rate": 0.0016354860241859174, "loss": 0.2086, "step": 29234 }, { "epoch": 0.05183784449887808, "grad_norm": 0.55859375, "learning_rate": 0.0016354381856635428, "loss": 0.1613, "step": 29236 }, { "epoch": 0.0518413906641879, "grad_norm": 1.21875, "learning_rate": 0.0016353903447994724, "loss": 0.1759, "step": 29238 }, { "epoch": 0.051844936829497715, "grad_norm": 0.87890625, "learning_rate": 0.0016353425015939158, "loss": 0.1763, "step": 29240 }, { "epoch": 0.05184848299480753, "grad_norm": 2.296875, "learning_rate": 0.001635294656047082, "loss": 0.2838, "step": 29242 }, { "epoch": 0.051852029160117344, "grad_norm": 2.703125, "learning_rate": 0.0016352468081591803, "loss": 0.1977, "step": 29244 }, { "epoch": 0.05185557532542716, "grad_norm": 0.46875, "learning_rate": 0.0016351989579304201, "loss": 0.1569, "step": 29246 }, { "epoch": 0.05185912149073697, "grad_norm": 2.03125, "learning_rate": 0.0016351511053610101, "loss": 0.3726, "step": 29248 }, { "epoch": 0.05186266765604679, "grad_norm": 0.7109375, "learning_rate": 0.0016351032504511606, "loss": 0.1895, "step": 29250 }, { "epoch": 0.0518662138213566, "grad_norm": 0.291015625, "learning_rate": 0.00163505539320108, "loss": 0.1581, "step": 29252 }, { "epoch": 0.05186975998666642, "grad_norm": 0.40234375, "learning_rate": 0.001635007533610978, "loss": 0.1826, "step": 29254 }, { "epoch": 0.05187330615197623, "grad_norm": 0.55078125, "learning_rate": 0.0016349596716810637, "loss": 0.2037, "step": 29256 }, { "epoch": 0.051876852317286046, "grad_norm": 1.4375, "learning_rate": 0.0016349118074115469, "loss": 0.2556, "step": 29258 }, { "epoch": 0.05188039848259587, "grad_norm": 0.546875, "learning_rate": 0.001634863940802636, "loss": 0.3034, "step": 29260 }, { "epoch": 0.05188394464790568, "grad_norm": 1.3984375, "learning_rate": 0.0016348160718545413, "loss": 0.1233, "step": 29262 }, { "epoch": 0.051887490813215496, "grad_norm": 0.365234375, "learning_rate": 0.0016347682005674715, "loss": 0.2243, "step": 29264 }, { "epoch": 0.05189103697852531, "grad_norm": 0.5546875, "learning_rate": 0.0016347203269416364, "loss": 0.1984, "step": 29266 }, { "epoch": 0.051894583143835125, "grad_norm": 0.4296875, "learning_rate": 0.0016346724509772451, "loss": 0.1251, "step": 29268 }, { "epoch": 0.05189812930914494, "grad_norm": 0.39453125, "learning_rate": 0.0016346245726745075, "loss": 0.1835, "step": 29270 }, { "epoch": 0.051901675474454755, "grad_norm": 0.306640625, "learning_rate": 0.0016345766920336322, "loss": 0.2736, "step": 29272 }, { "epoch": 0.05190522163976457, "grad_norm": 0.546875, "learning_rate": 0.0016345288090548289, "loss": 0.2261, "step": 29274 }, { "epoch": 0.051908767805074384, "grad_norm": 2.40625, "learning_rate": 0.001634480923738307, "loss": 0.2026, "step": 29276 }, { "epoch": 0.0519123139703842, "grad_norm": 1.15625, "learning_rate": 0.0016344330360842762, "loss": 0.218, "step": 29278 }, { "epoch": 0.05191586013569401, "grad_norm": 0.3828125, "learning_rate": 0.0016343851460929458, "loss": 0.1586, "step": 29280 }, { "epoch": 0.05191940630100383, "grad_norm": 0.408203125, "learning_rate": 0.001634337253764525, "loss": 0.2128, "step": 29282 }, { "epoch": 0.05192295246631365, "grad_norm": 0.4375, "learning_rate": 0.0016342893590992234, "loss": 0.1637, "step": 29284 }, { "epoch": 0.05192649863162346, "grad_norm": 1.03125, "learning_rate": 0.0016342414620972507, "loss": 0.4581, "step": 29286 }, { "epoch": 0.05193004479693328, "grad_norm": 0.353515625, "learning_rate": 0.001634193562758816, "loss": 0.2068, "step": 29288 }, { "epoch": 0.05193359096224309, "grad_norm": 0.46484375, "learning_rate": 0.001634145661084129, "loss": 0.2024, "step": 29290 }, { "epoch": 0.05193713712755291, "grad_norm": 1.4375, "learning_rate": 0.0016340977570733996, "loss": 0.2584, "step": 29292 }, { "epoch": 0.05194068329286272, "grad_norm": 0.5859375, "learning_rate": 0.001634049850726836, "loss": 0.2244, "step": 29294 }, { "epoch": 0.051944229458172536, "grad_norm": 0.4140625, "learning_rate": 0.001634001942044649, "loss": 0.1889, "step": 29296 }, { "epoch": 0.05194777562348235, "grad_norm": 0.52734375, "learning_rate": 0.0016339540310270479, "loss": 0.2325, "step": 29298 }, { "epoch": 0.051951321788792165, "grad_norm": 0.5703125, "learning_rate": 0.0016339061176742417, "loss": 0.3686, "step": 29300 }, { "epoch": 0.05195486795410198, "grad_norm": 0.59765625, "learning_rate": 0.0016338582019864403, "loss": 0.193, "step": 29302 }, { "epoch": 0.051958414119411794, "grad_norm": 0.26953125, "learning_rate": 0.0016338102839638532, "loss": 0.1572, "step": 29304 }, { "epoch": 0.051961960284721616, "grad_norm": 0.98046875, "learning_rate": 0.00163376236360669, "loss": 0.1793, "step": 29306 }, { "epoch": 0.05196550645003143, "grad_norm": 0.6015625, "learning_rate": 0.0016337144409151606, "loss": 0.1965, "step": 29308 }, { "epoch": 0.051969052615341245, "grad_norm": 0.296875, "learning_rate": 0.001633666515889474, "loss": 0.1683, "step": 29310 }, { "epoch": 0.05197259878065106, "grad_norm": 1.25, "learning_rate": 0.00163361858852984, "loss": 0.277, "step": 29312 }, { "epoch": 0.051976144945960874, "grad_norm": 0.640625, "learning_rate": 0.001633570658836468, "loss": 0.3299, "step": 29314 }, { "epoch": 0.05197969111127069, "grad_norm": 0.365234375, "learning_rate": 0.0016335227268095686, "loss": 0.1567, "step": 29316 }, { "epoch": 0.0519832372765805, "grad_norm": 2.171875, "learning_rate": 0.0016334747924493503, "loss": 0.1947, "step": 29318 }, { "epoch": 0.05198678344189032, "grad_norm": 0.57421875, "learning_rate": 0.001633426855756023, "loss": 0.232, "step": 29320 }, { "epoch": 0.05199032960720013, "grad_norm": 0.369140625, "learning_rate": 0.0016333789167297965, "loss": 0.184, "step": 29322 }, { "epoch": 0.05199387577250995, "grad_norm": 0.474609375, "learning_rate": 0.001633330975370881, "loss": 0.2409, "step": 29324 }, { "epoch": 0.05199742193781976, "grad_norm": 0.421875, "learning_rate": 0.001633283031679485, "loss": 0.1686, "step": 29326 }, { "epoch": 0.05200096810312958, "grad_norm": 0.64453125, "learning_rate": 0.0016332350856558188, "loss": 0.2088, "step": 29328 }, { "epoch": 0.0520045142684394, "grad_norm": 0.484375, "learning_rate": 0.0016331871373000924, "loss": 0.2295, "step": 29330 }, { "epoch": 0.05200806043374921, "grad_norm": 0.86328125, "learning_rate": 0.001633139186612515, "loss": 0.2255, "step": 29332 }, { "epoch": 0.052011606599059027, "grad_norm": 0.35546875, "learning_rate": 0.0016330912335932963, "loss": 0.1924, "step": 29334 }, { "epoch": 0.05201515276436884, "grad_norm": 0.427734375, "learning_rate": 0.0016330432782426464, "loss": 0.1573, "step": 29336 }, { "epoch": 0.052018698929678656, "grad_norm": 0.27734375, "learning_rate": 0.001632995320560775, "loss": 0.1698, "step": 29338 }, { "epoch": 0.05202224509498847, "grad_norm": 0.2216796875, "learning_rate": 0.0016329473605478915, "loss": 0.2038, "step": 29340 }, { "epoch": 0.052025791260298285, "grad_norm": 1.109375, "learning_rate": 0.0016328993982042054, "loss": 0.2349, "step": 29342 }, { "epoch": 0.0520293374256081, "grad_norm": 0.61328125, "learning_rate": 0.0016328514335299275, "loss": 0.1905, "step": 29344 }, { "epoch": 0.052032883590917914, "grad_norm": 1.328125, "learning_rate": 0.0016328034665252667, "loss": 0.2348, "step": 29346 }, { "epoch": 0.05203642975622773, "grad_norm": 0.65234375, "learning_rate": 0.001632755497190433, "loss": 0.2071, "step": 29348 }, { "epoch": 0.05203997592153754, "grad_norm": 0.3984375, "learning_rate": 0.0016327075255256362, "loss": 0.1488, "step": 29350 }, { "epoch": 0.052043522086847364, "grad_norm": 0.63671875, "learning_rate": 0.0016326595515310865, "loss": 0.1989, "step": 29352 }, { "epoch": 0.05204706825215718, "grad_norm": 3.84375, "learning_rate": 0.0016326115752069927, "loss": 0.3219, "step": 29354 }, { "epoch": 0.052050614417466994, "grad_norm": 0.2392578125, "learning_rate": 0.0016325635965535657, "loss": 0.1545, "step": 29356 }, { "epoch": 0.05205416058277681, "grad_norm": 3.609375, "learning_rate": 0.0016325156155710147, "loss": 0.3998, "step": 29358 }, { "epoch": 0.05205770674808662, "grad_norm": 0.69921875, "learning_rate": 0.0016324676322595497, "loss": 0.1625, "step": 29360 }, { "epoch": 0.05206125291339644, "grad_norm": 0.35546875, "learning_rate": 0.0016324196466193806, "loss": 0.1878, "step": 29362 }, { "epoch": 0.05206479907870625, "grad_norm": 0.7734375, "learning_rate": 0.0016323716586507174, "loss": 0.2318, "step": 29364 }, { "epoch": 0.052068345244016066, "grad_norm": 0.6484375, "learning_rate": 0.0016323236683537697, "loss": 0.1865, "step": 29366 }, { "epoch": 0.05207189140932588, "grad_norm": 0.431640625, "learning_rate": 0.0016322756757287474, "loss": 0.2153, "step": 29368 }, { "epoch": 0.052075437574635695, "grad_norm": 0.2353515625, "learning_rate": 0.0016322276807758606, "loss": 0.1726, "step": 29370 }, { "epoch": 0.05207898373994551, "grad_norm": 0.7265625, "learning_rate": 0.0016321796834953188, "loss": 0.17, "step": 29372 }, { "epoch": 0.05208252990525533, "grad_norm": 3.15625, "learning_rate": 0.0016321316838873326, "loss": 0.1804, "step": 29374 }, { "epoch": 0.052086076070565146, "grad_norm": 3.890625, "learning_rate": 0.0016320836819521112, "loss": 0.2338, "step": 29376 }, { "epoch": 0.05208962223587496, "grad_norm": 0.263671875, "learning_rate": 0.0016320356776898647, "loss": 0.2036, "step": 29378 }, { "epoch": 0.052093168401184775, "grad_norm": 0.3515625, "learning_rate": 0.0016319876711008037, "loss": 0.1791, "step": 29380 }, { "epoch": 0.05209671456649459, "grad_norm": 2.75, "learning_rate": 0.0016319396621851372, "loss": 0.2519, "step": 29382 }, { "epoch": 0.052100260731804404, "grad_norm": 0.419921875, "learning_rate": 0.0016318916509430758, "loss": 0.2777, "step": 29384 }, { "epoch": 0.05210380689711422, "grad_norm": 0.69140625, "learning_rate": 0.0016318436373748294, "loss": 0.1787, "step": 29386 }, { "epoch": 0.05210735306242403, "grad_norm": 0.39453125, "learning_rate": 0.0016317956214806078, "loss": 0.1795, "step": 29388 }, { "epoch": 0.05211089922773385, "grad_norm": 1.8828125, "learning_rate": 0.001631747603260621, "loss": 0.302, "step": 29390 }, { "epoch": 0.05211444539304366, "grad_norm": 0.3828125, "learning_rate": 0.001631699582715079, "loss": 0.1661, "step": 29392 }, { "epoch": 0.05211799155835348, "grad_norm": 0.96875, "learning_rate": 0.0016316515598441919, "loss": 0.1707, "step": 29394 }, { "epoch": 0.0521215377236633, "grad_norm": 0.44921875, "learning_rate": 0.0016316035346481696, "loss": 0.1799, "step": 29396 }, { "epoch": 0.05212508388897311, "grad_norm": 0.416015625, "learning_rate": 0.0016315555071272224, "loss": 0.5051, "step": 29398 }, { "epoch": 0.05212863005428293, "grad_norm": 1.2421875, "learning_rate": 0.00163150747728156, "loss": 0.2236, "step": 29400 }, { "epoch": 0.05213217621959274, "grad_norm": 0.53515625, "learning_rate": 0.0016314594451113929, "loss": 0.2237, "step": 29402 }, { "epoch": 0.05213572238490256, "grad_norm": 0.390625, "learning_rate": 0.0016314114106169306, "loss": 0.1617, "step": 29404 }, { "epoch": 0.05213926855021237, "grad_norm": 1.0859375, "learning_rate": 0.0016313633737983838, "loss": 0.1486, "step": 29406 }, { "epoch": 0.052142814715522186, "grad_norm": 2.203125, "learning_rate": 0.001631315334655962, "loss": 0.2649, "step": 29408 }, { "epoch": 0.052146360880832, "grad_norm": 0.640625, "learning_rate": 0.0016312672931898756, "loss": 0.1933, "step": 29410 }, { "epoch": 0.052149907046141815, "grad_norm": 0.54296875, "learning_rate": 0.0016312192494003345, "loss": 0.1847, "step": 29412 }, { "epoch": 0.05215345321145163, "grad_norm": 5.71875, "learning_rate": 0.0016311712032875494, "loss": 0.3264, "step": 29414 }, { "epoch": 0.052156999376761444, "grad_norm": 5.84375, "learning_rate": 0.0016311231548517297, "loss": 0.262, "step": 29416 }, { "epoch": 0.05216054554207126, "grad_norm": 0.58984375, "learning_rate": 0.0016310751040930858, "loss": 0.2145, "step": 29418 }, { "epoch": 0.05216409170738108, "grad_norm": 0.44140625, "learning_rate": 0.0016310270510118282, "loss": 0.1632, "step": 29420 }, { "epoch": 0.052167637872690895, "grad_norm": 0.45703125, "learning_rate": 0.0016309789956081664, "loss": 0.1752, "step": 29422 }, { "epoch": 0.05217118403800071, "grad_norm": 0.41796875, "learning_rate": 0.001630930937882311, "loss": 0.2039, "step": 29424 }, { "epoch": 0.052174730203310524, "grad_norm": 0.62109375, "learning_rate": 0.001630882877834472, "loss": 0.1499, "step": 29426 }, { "epoch": 0.05217827636862034, "grad_norm": 0.71875, "learning_rate": 0.0016308348154648595, "loss": 0.2638, "step": 29428 }, { "epoch": 0.05218182253393015, "grad_norm": 1.015625, "learning_rate": 0.0016307867507736843, "loss": 0.1717, "step": 29430 }, { "epoch": 0.05218536869923997, "grad_norm": 0.9140625, "learning_rate": 0.001630738683761156, "loss": 0.1901, "step": 29432 }, { "epoch": 0.05218891486454978, "grad_norm": 0.84375, "learning_rate": 0.001630690614427485, "loss": 0.3374, "step": 29434 }, { "epoch": 0.052192461029859596, "grad_norm": 0.55859375, "learning_rate": 0.0016306425427728814, "loss": 0.2287, "step": 29436 }, { "epoch": 0.05219600719516941, "grad_norm": 0.80078125, "learning_rate": 0.0016305944687975556, "loss": 0.1366, "step": 29438 }, { "epoch": 0.052199553360479226, "grad_norm": 0.287109375, "learning_rate": 0.0016305463925017178, "loss": 0.2093, "step": 29440 }, { "epoch": 0.05220309952578905, "grad_norm": 0.50390625, "learning_rate": 0.0016304983138855785, "loss": 0.1907, "step": 29442 }, { "epoch": 0.05220664569109886, "grad_norm": 0.703125, "learning_rate": 0.0016304502329493474, "loss": 0.1577, "step": 29444 }, { "epoch": 0.052210191856408676, "grad_norm": 0.66015625, "learning_rate": 0.0016304021496932352, "loss": 0.1942, "step": 29446 }, { "epoch": 0.05221373802171849, "grad_norm": 6.53125, "learning_rate": 0.0016303540641174519, "loss": 0.1805, "step": 29448 }, { "epoch": 0.052217284187028305, "grad_norm": 1.1015625, "learning_rate": 0.0016303059762222084, "loss": 0.2016, "step": 29450 }, { "epoch": 0.05222083035233812, "grad_norm": 0.462890625, "learning_rate": 0.0016302578860077144, "loss": 0.1799, "step": 29452 }, { "epoch": 0.052224376517647934, "grad_norm": 0.61328125, "learning_rate": 0.0016302097934741803, "loss": 0.2282, "step": 29454 }, { "epoch": 0.05222792268295775, "grad_norm": 0.578125, "learning_rate": 0.0016301616986218168, "loss": 0.1945, "step": 29456 }, { "epoch": 0.052231468848267563, "grad_norm": 0.69140625, "learning_rate": 0.0016301136014508337, "loss": 0.1745, "step": 29458 }, { "epoch": 0.05223501501357738, "grad_norm": 0.259765625, "learning_rate": 0.0016300655019614418, "loss": 0.2347, "step": 29460 }, { "epoch": 0.05223856117888719, "grad_norm": 0.3046875, "learning_rate": 0.0016300174001538516, "loss": 0.1843, "step": 29462 }, { "epoch": 0.052242107344197014, "grad_norm": 0.85546875, "learning_rate": 0.0016299692960282726, "loss": 0.2718, "step": 29464 }, { "epoch": 0.05224565350950683, "grad_norm": 0.349609375, "learning_rate": 0.001629921189584916, "loss": 0.211, "step": 29466 }, { "epoch": 0.05224919967481664, "grad_norm": 0.51953125, "learning_rate": 0.001629873080823992, "loss": 0.1883, "step": 29468 }, { "epoch": 0.05225274584012646, "grad_norm": 2.1875, "learning_rate": 0.0016298249697457108, "loss": 0.2801, "step": 29470 }, { "epoch": 0.05225629200543627, "grad_norm": 0.7578125, "learning_rate": 0.001629776856350283, "loss": 0.1534, "step": 29472 }, { "epoch": 0.05225983817074609, "grad_norm": 0.34375, "learning_rate": 0.0016297287406379186, "loss": 0.1646, "step": 29474 }, { "epoch": 0.0522633843360559, "grad_norm": 0.1865234375, "learning_rate": 0.001629680622608829, "loss": 0.1829, "step": 29476 }, { "epoch": 0.052266930501365716, "grad_norm": 0.3828125, "learning_rate": 0.0016296325022632234, "loss": 0.2562, "step": 29478 }, { "epoch": 0.05227047666667553, "grad_norm": 0.376953125, "learning_rate": 0.0016295843796013136, "loss": 0.1795, "step": 29480 }, { "epoch": 0.052274022831985345, "grad_norm": 1.0625, "learning_rate": 0.0016295362546233088, "loss": 0.1924, "step": 29482 }, { "epoch": 0.05227756899729516, "grad_norm": 0.65625, "learning_rate": 0.0016294881273294201, "loss": 0.1864, "step": 29484 }, { "epoch": 0.052281115162604974, "grad_norm": 0.34765625, "learning_rate": 0.0016294399977198582, "loss": 0.2138, "step": 29486 }, { "epoch": 0.052284661327914796, "grad_norm": 0.64453125, "learning_rate": 0.0016293918657948332, "loss": 0.1827, "step": 29488 }, { "epoch": 0.05228820749322461, "grad_norm": 2.515625, "learning_rate": 0.0016293437315545553, "loss": 0.2159, "step": 29490 }, { "epoch": 0.052291753658534425, "grad_norm": 0.625, "learning_rate": 0.0016292955949992359, "loss": 0.1871, "step": 29492 }, { "epoch": 0.05229529982384424, "grad_norm": 0.5, "learning_rate": 0.0016292474561290845, "loss": 0.1805, "step": 29494 }, { "epoch": 0.052298845989154054, "grad_norm": 0.76171875, "learning_rate": 0.0016291993149443128, "loss": 0.1579, "step": 29496 }, { "epoch": 0.05230239215446387, "grad_norm": 0.87890625, "learning_rate": 0.0016291511714451304, "loss": 0.1807, "step": 29498 }, { "epoch": 0.05230593831977368, "grad_norm": 0.49609375, "learning_rate": 0.0016291030256317478, "loss": 0.2025, "step": 29500 }, { "epoch": 0.0523094844850835, "grad_norm": 0.44140625, "learning_rate": 0.0016290548775043762, "loss": 0.1863, "step": 29502 }, { "epoch": 0.05231303065039331, "grad_norm": 1.5, "learning_rate": 0.001629006727063226, "loss": 0.2028, "step": 29504 }, { "epoch": 0.05231657681570313, "grad_norm": 0.75390625, "learning_rate": 0.0016289585743085074, "loss": 0.2303, "step": 29506 }, { "epoch": 0.05232012298101294, "grad_norm": 0.39453125, "learning_rate": 0.0016289104192404313, "loss": 0.2042, "step": 29508 }, { "epoch": 0.05232366914632276, "grad_norm": 0.40234375, "learning_rate": 0.0016288622618592082, "loss": 0.1922, "step": 29510 }, { "epoch": 0.05232721531163258, "grad_norm": 0.74609375, "learning_rate": 0.0016288141021650491, "loss": 0.1618, "step": 29512 }, { "epoch": 0.05233076147694239, "grad_norm": 0.5, "learning_rate": 0.0016287659401581643, "loss": 0.1618, "step": 29514 }, { "epoch": 0.052334307642252206, "grad_norm": 0.357421875, "learning_rate": 0.001628717775838764, "loss": 0.2084, "step": 29516 }, { "epoch": 0.05233785380756202, "grad_norm": 0.6328125, "learning_rate": 0.0016286696092070595, "loss": 0.1842, "step": 29518 }, { "epoch": 0.052341399972871835, "grad_norm": 0.5703125, "learning_rate": 0.0016286214402632608, "loss": 0.1768, "step": 29520 }, { "epoch": 0.05234494613818165, "grad_norm": 1.3984375, "learning_rate": 0.0016285732690075794, "loss": 0.2792, "step": 29522 }, { "epoch": 0.052348492303491465, "grad_norm": 0.412109375, "learning_rate": 0.0016285250954402258, "loss": 0.2141, "step": 29524 }, { "epoch": 0.05235203846880128, "grad_norm": 1.2890625, "learning_rate": 0.0016284769195614099, "loss": 0.2979, "step": 29526 }, { "epoch": 0.052355584634111094, "grad_norm": 1.6875, "learning_rate": 0.0016284287413713433, "loss": 0.2029, "step": 29528 }, { "epoch": 0.05235913079942091, "grad_norm": 0.84375, "learning_rate": 0.0016283805608702361, "loss": 0.146, "step": 29530 }, { "epoch": 0.05236267696473073, "grad_norm": 0.53515625, "learning_rate": 0.0016283323780582994, "loss": 0.2192, "step": 29532 }, { "epoch": 0.052366223130040544, "grad_norm": 0.48046875, "learning_rate": 0.001628284192935744, "loss": 0.1646, "step": 29534 }, { "epoch": 0.05236976929535036, "grad_norm": 0.8203125, "learning_rate": 0.00162823600550278, "loss": 0.2177, "step": 29536 }, { "epoch": 0.05237331546066017, "grad_norm": 0.77734375, "learning_rate": 0.0016281878157596186, "loss": 0.2215, "step": 29538 }, { "epoch": 0.05237686162596999, "grad_norm": 0.58984375, "learning_rate": 0.0016281396237064706, "loss": 0.2279, "step": 29540 }, { "epoch": 0.0523804077912798, "grad_norm": 0.7265625, "learning_rate": 0.0016280914293435467, "loss": 0.2202, "step": 29542 }, { "epoch": 0.05238395395658962, "grad_norm": 0.455078125, "learning_rate": 0.0016280432326710576, "loss": 0.1756, "step": 29544 }, { "epoch": 0.05238750012189943, "grad_norm": 0.69140625, "learning_rate": 0.0016279950336892139, "loss": 0.2074, "step": 29546 }, { "epoch": 0.052391046287209246, "grad_norm": 0.408203125, "learning_rate": 0.0016279468323982269, "loss": 0.1944, "step": 29548 }, { "epoch": 0.05239459245251906, "grad_norm": 0.5859375, "learning_rate": 0.001627898628798307, "loss": 0.2113, "step": 29550 }, { "epoch": 0.052398138617828875, "grad_norm": 0.376953125, "learning_rate": 0.0016278504228896653, "loss": 0.2352, "step": 29552 }, { "epoch": 0.05240168478313869, "grad_norm": 0.2353515625, "learning_rate": 0.0016278022146725122, "loss": 0.1654, "step": 29554 }, { "epoch": 0.05240523094844851, "grad_norm": 0.283203125, "learning_rate": 0.001627754004147059, "loss": 0.1673, "step": 29556 }, { "epoch": 0.052408777113758326, "grad_norm": 0.287109375, "learning_rate": 0.0016277057913135166, "loss": 0.2707, "step": 29558 }, { "epoch": 0.05241232327906814, "grad_norm": 0.3125, "learning_rate": 0.001627657576172095, "loss": 0.2114, "step": 29560 }, { "epoch": 0.052415869444377955, "grad_norm": 0.341796875, "learning_rate": 0.001627609358723006, "loss": 0.3996, "step": 29562 }, { "epoch": 0.05241941560968777, "grad_norm": 0.4765625, "learning_rate": 0.0016275611389664601, "loss": 0.2247, "step": 29564 }, { "epoch": 0.052422961774997584, "grad_norm": 0.6953125, "learning_rate": 0.0016275129169026682, "loss": 0.1819, "step": 29566 }, { "epoch": 0.0524265079403074, "grad_norm": 2.078125, "learning_rate": 0.0016274646925318412, "loss": 0.2822, "step": 29568 }, { "epoch": 0.05243005410561721, "grad_norm": 1.1328125, "learning_rate": 0.00162741646585419, "loss": 0.1856, "step": 29570 }, { "epoch": 0.05243360027092703, "grad_norm": 0.78515625, "learning_rate": 0.001627368236869926, "loss": 0.2253, "step": 29572 }, { "epoch": 0.05243714643623684, "grad_norm": 0.6015625, "learning_rate": 0.0016273200055792587, "loss": 0.1541, "step": 29574 }, { "epoch": 0.05244069260154666, "grad_norm": 0.341796875, "learning_rate": 0.0016272717719824005, "loss": 0.1829, "step": 29576 }, { "epoch": 0.05244423876685648, "grad_norm": 0.67578125, "learning_rate": 0.0016272235360795622, "loss": 0.1794, "step": 29578 }, { "epoch": 0.05244778493216629, "grad_norm": 0.53125, "learning_rate": 0.001627175297870954, "loss": 0.164, "step": 29580 }, { "epoch": 0.05245133109747611, "grad_norm": 0.4765625, "learning_rate": 0.0016271270573567874, "loss": 0.2013, "step": 29582 }, { "epoch": 0.05245487726278592, "grad_norm": 0.5078125, "learning_rate": 0.0016270788145372733, "loss": 0.2372, "step": 29584 }, { "epoch": 0.052458423428095736, "grad_norm": 0.345703125, "learning_rate": 0.0016270305694126225, "loss": 0.1612, "step": 29586 }, { "epoch": 0.05246196959340555, "grad_norm": 0.7109375, "learning_rate": 0.0016269823219830463, "loss": 0.1487, "step": 29588 }, { "epoch": 0.052465515758715366, "grad_norm": 0.5078125, "learning_rate": 0.0016269340722487554, "loss": 0.2297, "step": 29590 }, { "epoch": 0.05246906192402518, "grad_norm": 0.87890625, "learning_rate": 0.0016268858202099607, "loss": 0.1796, "step": 29592 }, { "epoch": 0.052472608089334995, "grad_norm": 1.1015625, "learning_rate": 0.0016268375658668739, "loss": 0.211, "step": 29594 }, { "epoch": 0.05247615425464481, "grad_norm": 4.25, "learning_rate": 0.0016267893092197052, "loss": 0.2365, "step": 29596 }, { "epoch": 0.052479700419954624, "grad_norm": 0.412109375, "learning_rate": 0.0016267410502686663, "loss": 0.2132, "step": 29598 }, { "epoch": 0.052483246585264445, "grad_norm": 0.46484375, "learning_rate": 0.0016266927890139682, "loss": 0.2015, "step": 29600 }, { "epoch": 0.05248679275057426, "grad_norm": 2.359375, "learning_rate": 0.0016266445254558212, "loss": 0.2166, "step": 29602 }, { "epoch": 0.052490338915884074, "grad_norm": 0.859375, "learning_rate": 0.0016265962595944374, "loss": 0.1964, "step": 29604 }, { "epoch": 0.05249388508119389, "grad_norm": 0.41015625, "learning_rate": 0.0016265479914300276, "loss": 0.2009, "step": 29606 }, { "epoch": 0.052497431246503704, "grad_norm": 0.4609375, "learning_rate": 0.0016264997209628025, "loss": 0.1939, "step": 29608 }, { "epoch": 0.05250097741181352, "grad_norm": 1.2890625, "learning_rate": 0.0016264514481929735, "loss": 0.2974, "step": 29610 }, { "epoch": 0.05250452357712333, "grad_norm": 0.3046875, "learning_rate": 0.0016264031731207517, "loss": 0.2368, "step": 29612 }, { "epoch": 0.05250806974243315, "grad_norm": 0.2138671875, "learning_rate": 0.0016263548957463482, "loss": 0.2116, "step": 29614 }, { "epoch": 0.05251161590774296, "grad_norm": 0.2353515625, "learning_rate": 0.0016263066160699738, "loss": 0.1626, "step": 29616 }, { "epoch": 0.052515162073052776, "grad_norm": 0.93359375, "learning_rate": 0.0016262583340918401, "loss": 0.1594, "step": 29618 }, { "epoch": 0.05251870823836259, "grad_norm": 0.298828125, "learning_rate": 0.0016262100498121586, "loss": 0.3052, "step": 29620 }, { "epoch": 0.052522254403672405, "grad_norm": 0.96875, "learning_rate": 0.0016261617632311396, "loss": 0.3892, "step": 29622 }, { "epoch": 0.05252580056898223, "grad_norm": 0.345703125, "learning_rate": 0.0016261134743489948, "loss": 0.1799, "step": 29624 }, { "epoch": 0.05252934673429204, "grad_norm": 0.68359375, "learning_rate": 0.0016260651831659352, "loss": 0.1742, "step": 29626 }, { "epoch": 0.052532892899601856, "grad_norm": 0.9453125, "learning_rate": 0.0016260168896821718, "loss": 0.1977, "step": 29628 }, { "epoch": 0.05253643906491167, "grad_norm": 0.34375, "learning_rate": 0.0016259685938979165, "loss": 0.1951, "step": 29630 }, { "epoch": 0.052539985230221485, "grad_norm": 0.5078125, "learning_rate": 0.0016259202958133802, "loss": 0.1511, "step": 29632 }, { "epoch": 0.0525435313955313, "grad_norm": 0.427734375, "learning_rate": 0.0016258719954287736, "loss": 0.1983, "step": 29634 }, { "epoch": 0.052547077560841114, "grad_norm": 0.265625, "learning_rate": 0.0016258236927443083, "loss": 0.4247, "step": 29636 }, { "epoch": 0.05255062372615093, "grad_norm": 0.70703125, "learning_rate": 0.001625775387760196, "loss": 0.3383, "step": 29638 }, { "epoch": 0.05255416989146074, "grad_norm": 0.30078125, "learning_rate": 0.0016257270804766473, "loss": 0.1874, "step": 29640 }, { "epoch": 0.05255771605677056, "grad_norm": 1.1875, "learning_rate": 0.0016256787708938739, "loss": 0.2622, "step": 29642 }, { "epoch": 0.05256126222208037, "grad_norm": 0.470703125, "learning_rate": 0.0016256304590120867, "loss": 0.1846, "step": 29644 }, { "epoch": 0.052564808387390194, "grad_norm": 0.318359375, "learning_rate": 0.001625582144831497, "loss": 0.2108, "step": 29646 }, { "epoch": 0.05256835455270001, "grad_norm": 0.275390625, "learning_rate": 0.0016255338283523168, "loss": 0.1857, "step": 29648 }, { "epoch": 0.05257190071800982, "grad_norm": 0.59375, "learning_rate": 0.0016254855095747564, "loss": 0.2121, "step": 29650 }, { "epoch": 0.05257544688331964, "grad_norm": 0.37109375, "learning_rate": 0.0016254371884990282, "loss": 0.2481, "step": 29652 }, { "epoch": 0.05257899304862945, "grad_norm": 1.0703125, "learning_rate": 0.0016253888651253426, "loss": 0.2126, "step": 29654 }, { "epoch": 0.05258253921393927, "grad_norm": 0.2734375, "learning_rate": 0.0016253405394539115, "loss": 0.2171, "step": 29656 }, { "epoch": 0.05258608537924908, "grad_norm": 0.6640625, "learning_rate": 0.0016252922114849456, "loss": 0.152, "step": 29658 }, { "epoch": 0.052589631544558896, "grad_norm": 0.30078125, "learning_rate": 0.0016252438812186572, "loss": 0.139, "step": 29660 }, { "epoch": 0.05259317770986871, "grad_norm": 0.61328125, "learning_rate": 0.0016251955486552567, "loss": 0.1927, "step": 29662 }, { "epoch": 0.052596723875178525, "grad_norm": 0.609375, "learning_rate": 0.0016251472137949565, "loss": 0.2182, "step": 29664 }, { "epoch": 0.05260027004048834, "grad_norm": 0.2294921875, "learning_rate": 0.001625098876637967, "loss": 0.2097, "step": 29666 }, { "epoch": 0.05260381620579816, "grad_norm": 0.306640625, "learning_rate": 0.0016250505371845, "loss": 0.2595, "step": 29668 }, { "epoch": 0.052607362371107975, "grad_norm": 0.46484375, "learning_rate": 0.001625002195434767, "loss": 0.2114, "step": 29670 }, { "epoch": 0.05261090853641779, "grad_norm": 0.451171875, "learning_rate": 0.0016249538513889797, "loss": 0.2018, "step": 29672 }, { "epoch": 0.052614454701727605, "grad_norm": 0.69140625, "learning_rate": 0.0016249055050473486, "loss": 0.1507, "step": 29674 }, { "epoch": 0.05261800086703742, "grad_norm": 2.15625, "learning_rate": 0.0016248571564100862, "loss": 0.282, "step": 29676 }, { "epoch": 0.052621547032347234, "grad_norm": 0.2412109375, "learning_rate": 0.0016248088054774032, "loss": 0.1664, "step": 29678 }, { "epoch": 0.05262509319765705, "grad_norm": 0.439453125, "learning_rate": 0.0016247604522495115, "loss": 0.1773, "step": 29680 }, { "epoch": 0.05262863936296686, "grad_norm": 0.59375, "learning_rate": 0.0016247120967266225, "loss": 0.1947, "step": 29682 }, { "epoch": 0.05263218552827668, "grad_norm": 0.59765625, "learning_rate": 0.0016246637389089474, "loss": 0.2936, "step": 29684 }, { "epoch": 0.05263573169358649, "grad_norm": 1.171875, "learning_rate": 0.0016246153787966978, "loss": 0.2047, "step": 29686 }, { "epoch": 0.052639277858896306, "grad_norm": 0.640625, "learning_rate": 0.0016245670163900854, "loss": 0.2071, "step": 29688 }, { "epoch": 0.05264282402420612, "grad_norm": 0.498046875, "learning_rate": 0.0016245186516893214, "loss": 0.2114, "step": 29690 }, { "epoch": 0.05264637018951594, "grad_norm": 0.625, "learning_rate": 0.0016244702846946177, "loss": 0.1754, "step": 29692 }, { "epoch": 0.05264991635482576, "grad_norm": 0.58984375, "learning_rate": 0.0016244219154061853, "loss": 0.2319, "step": 29694 }, { "epoch": 0.05265346252013557, "grad_norm": 0.5703125, "learning_rate": 0.0016243735438242364, "loss": 0.3021, "step": 29696 }, { "epoch": 0.052657008685445386, "grad_norm": 0.984375, "learning_rate": 0.0016243251699489822, "loss": 0.2135, "step": 29698 }, { "epoch": 0.0526605548507552, "grad_norm": 2.46875, "learning_rate": 0.0016242767937806341, "loss": 0.3793, "step": 29700 }, { "epoch": 0.052664101016065015, "grad_norm": 0.3203125, "learning_rate": 0.001624228415319404, "loss": 0.1934, "step": 29702 }, { "epoch": 0.05266764718137483, "grad_norm": 0.498046875, "learning_rate": 0.0016241800345655033, "loss": 0.1993, "step": 29704 }, { "epoch": 0.052671193346684644, "grad_norm": 1.890625, "learning_rate": 0.0016241316515191436, "loss": 0.2571, "step": 29706 }, { "epoch": 0.05267473951199446, "grad_norm": 0.80859375, "learning_rate": 0.0016240832661805363, "loss": 0.1905, "step": 29708 }, { "epoch": 0.052678285677304273, "grad_norm": 0.25, "learning_rate": 0.0016240348785498933, "loss": 0.1586, "step": 29710 }, { "epoch": 0.05268183184261409, "grad_norm": 0.236328125, "learning_rate": 0.0016239864886274263, "loss": 0.203, "step": 29712 }, { "epoch": 0.05268537800792391, "grad_norm": 0.3515625, "learning_rate": 0.0016239380964133466, "loss": 0.222, "step": 29714 }, { "epoch": 0.052688924173233724, "grad_norm": 1.3671875, "learning_rate": 0.0016238897019078661, "loss": 0.2967, "step": 29716 }, { "epoch": 0.05269247033854354, "grad_norm": 0.216796875, "learning_rate": 0.0016238413051111963, "loss": 0.1596, "step": 29718 }, { "epoch": 0.05269601650385335, "grad_norm": 0.439453125, "learning_rate": 0.001623792906023549, "loss": 0.1662, "step": 29720 }, { "epoch": 0.05269956266916317, "grad_norm": 0.486328125, "learning_rate": 0.0016237445046451357, "loss": 0.2142, "step": 29722 }, { "epoch": 0.05270310883447298, "grad_norm": 0.51171875, "learning_rate": 0.0016236961009761682, "loss": 0.2055, "step": 29724 }, { "epoch": 0.0527066549997828, "grad_norm": 0.330078125, "learning_rate": 0.001623647695016858, "loss": 0.1847, "step": 29726 }, { "epoch": 0.05271020116509261, "grad_norm": 0.91796875, "learning_rate": 0.0016235992867674172, "loss": 0.2133, "step": 29728 }, { "epoch": 0.052713747330402426, "grad_norm": 0.8984375, "learning_rate": 0.001623550876228057, "loss": 0.2431, "step": 29730 }, { "epoch": 0.05271729349571224, "grad_norm": 0.8671875, "learning_rate": 0.0016235024633989897, "loss": 0.1802, "step": 29732 }, { "epoch": 0.052720839661022055, "grad_norm": 2.59375, "learning_rate": 0.0016234540482804268, "loss": 0.2533, "step": 29734 }, { "epoch": 0.05272438582633188, "grad_norm": 2.71875, "learning_rate": 0.0016234056308725795, "loss": 0.3452, "step": 29736 }, { "epoch": 0.05272793199164169, "grad_norm": 0.6171875, "learning_rate": 0.0016233572111756603, "loss": 0.1522, "step": 29738 }, { "epoch": 0.052731478156951506, "grad_norm": 0.41796875, "learning_rate": 0.0016233087891898805, "loss": 0.2215, "step": 29740 }, { "epoch": 0.05273502432226132, "grad_norm": 0.609375, "learning_rate": 0.0016232603649154523, "loss": 0.3195, "step": 29742 }, { "epoch": 0.052738570487571135, "grad_norm": 0.361328125, "learning_rate": 0.0016232119383525869, "loss": 0.2127, "step": 29744 }, { "epoch": 0.05274211665288095, "grad_norm": 0.58203125, "learning_rate": 0.0016231635095014963, "loss": 0.2001, "step": 29746 }, { "epoch": 0.052745662818190764, "grad_norm": 2.34375, "learning_rate": 0.0016231150783623926, "loss": 0.1826, "step": 29748 }, { "epoch": 0.05274920898350058, "grad_norm": 0.46875, "learning_rate": 0.0016230666449354877, "loss": 0.2034, "step": 29750 }, { "epoch": 0.05275275514881039, "grad_norm": 0.423828125, "learning_rate": 0.0016230182092209927, "loss": 0.185, "step": 29752 }, { "epoch": 0.05275630131412021, "grad_norm": 0.25, "learning_rate": 0.00162296977121912, "loss": 0.1915, "step": 29754 }, { "epoch": 0.05275984747943002, "grad_norm": 0.9609375, "learning_rate": 0.0016229213309300811, "loss": 0.1818, "step": 29756 }, { "epoch": 0.05276339364473984, "grad_norm": 1.015625, "learning_rate": 0.0016228728883540885, "loss": 0.2412, "step": 29758 }, { "epoch": 0.05276693981004966, "grad_norm": 0.3828125, "learning_rate": 0.0016228244434913534, "loss": 0.205, "step": 29760 }, { "epoch": 0.05277048597535947, "grad_norm": 0.263671875, "learning_rate": 0.0016227759963420877, "loss": 0.1706, "step": 29762 }, { "epoch": 0.05277403214066929, "grad_norm": 1.3359375, "learning_rate": 0.0016227275469065034, "loss": 0.226, "step": 29764 }, { "epoch": 0.0527775783059791, "grad_norm": 0.31640625, "learning_rate": 0.0016226790951848129, "loss": 0.1616, "step": 29766 }, { "epoch": 0.052781124471288916, "grad_norm": 0.6484375, "learning_rate": 0.0016226306411772273, "loss": 0.2207, "step": 29768 }, { "epoch": 0.05278467063659873, "grad_norm": 0.640625, "learning_rate": 0.001622582184883959, "loss": 0.2462, "step": 29770 }, { "epoch": 0.052788216801908545, "grad_norm": 0.6640625, "learning_rate": 0.0016225337263052195, "loss": 0.1971, "step": 29772 }, { "epoch": 0.05279176296721836, "grad_norm": 0.2490234375, "learning_rate": 0.0016224852654412214, "loss": 0.2246, "step": 29774 }, { "epoch": 0.052795309132528175, "grad_norm": 0.5703125, "learning_rate": 0.0016224368022921762, "loss": 0.2175, "step": 29776 }, { "epoch": 0.05279885529783799, "grad_norm": 0.66015625, "learning_rate": 0.0016223883368582956, "loss": 0.1955, "step": 29778 }, { "epoch": 0.052802401463147804, "grad_norm": 0.41796875, "learning_rate": 0.0016223398691397922, "loss": 0.2095, "step": 29780 }, { "epoch": 0.052805947628457625, "grad_norm": 0.365234375, "learning_rate": 0.0016222913991368773, "loss": 0.18, "step": 29782 }, { "epoch": 0.05280949379376744, "grad_norm": 0.71875, "learning_rate": 0.0016222429268497633, "loss": 0.1555, "step": 29784 }, { "epoch": 0.052813039959077254, "grad_norm": 0.84765625, "learning_rate": 0.0016221944522786622, "loss": 0.2272, "step": 29786 }, { "epoch": 0.05281658612438707, "grad_norm": 0.5390625, "learning_rate": 0.0016221459754237858, "loss": 0.232, "step": 29788 }, { "epoch": 0.05282013228969688, "grad_norm": 0.55859375, "learning_rate": 0.0016220974962853462, "loss": 0.1653, "step": 29790 }, { "epoch": 0.0528236784550067, "grad_norm": 0.62109375, "learning_rate": 0.0016220490148635555, "loss": 0.1785, "step": 29792 }, { "epoch": 0.05282722462031651, "grad_norm": 1.7734375, "learning_rate": 0.0016220005311586257, "loss": 0.2984, "step": 29794 }, { "epoch": 0.05283077078562633, "grad_norm": 0.380859375, "learning_rate": 0.0016219520451707685, "loss": 0.2497, "step": 29796 }, { "epoch": 0.05283431695093614, "grad_norm": 0.474609375, "learning_rate": 0.0016219035569001966, "loss": 0.2375, "step": 29798 }, { "epoch": 0.052837863116245956, "grad_norm": 2.828125, "learning_rate": 0.0016218550663471213, "loss": 0.3587, "step": 29800 }, { "epoch": 0.05284140928155577, "grad_norm": 0.23828125, "learning_rate": 0.0016218065735117554, "loss": 0.1905, "step": 29802 }, { "epoch": 0.05284495544686559, "grad_norm": 0.53125, "learning_rate": 0.0016217580783943104, "loss": 0.2421, "step": 29804 }, { "epoch": 0.05284850161217541, "grad_norm": 2.0, "learning_rate": 0.0016217095809949987, "loss": 0.2129, "step": 29806 }, { "epoch": 0.05285204777748522, "grad_norm": 0.9609375, "learning_rate": 0.0016216610813140327, "loss": 0.2363, "step": 29808 }, { "epoch": 0.052855593942795036, "grad_norm": 0.3359375, "learning_rate": 0.0016216125793516237, "loss": 0.163, "step": 29810 }, { "epoch": 0.05285914010810485, "grad_norm": 2.0, "learning_rate": 0.0016215640751079845, "loss": 0.2343, "step": 29812 }, { "epoch": 0.052862686273414665, "grad_norm": 0.48828125, "learning_rate": 0.001621515568583327, "loss": 0.1859, "step": 29814 }, { "epoch": 0.05286623243872448, "grad_norm": 0.453125, "learning_rate": 0.0016214670597778633, "loss": 0.2553, "step": 29816 }, { "epoch": 0.052869778604034294, "grad_norm": 0.58203125, "learning_rate": 0.0016214185486918054, "loss": 0.2598, "step": 29818 }, { "epoch": 0.05287332476934411, "grad_norm": 0.52734375, "learning_rate": 0.0016213700353253657, "loss": 0.2664, "step": 29820 }, { "epoch": 0.05287687093465392, "grad_norm": 0.373046875, "learning_rate": 0.0016213215196787564, "loss": 0.2222, "step": 29822 }, { "epoch": 0.05288041709996374, "grad_norm": 1.265625, "learning_rate": 0.0016212730017521897, "loss": 0.2209, "step": 29824 }, { "epoch": 0.05288396326527355, "grad_norm": 0.44921875, "learning_rate": 0.0016212244815458777, "loss": 0.14, "step": 29826 }, { "epoch": 0.052887509430583374, "grad_norm": 0.265625, "learning_rate": 0.0016211759590600327, "loss": 0.1926, "step": 29828 }, { "epoch": 0.05289105559589319, "grad_norm": 1.703125, "learning_rate": 0.0016211274342948661, "loss": 0.2439, "step": 29830 }, { "epoch": 0.052894601761203, "grad_norm": 0.9765625, "learning_rate": 0.0016210789072505917, "loss": 0.1515, "step": 29832 }, { "epoch": 0.05289814792651282, "grad_norm": 0.5546875, "learning_rate": 0.0016210303779274201, "loss": 0.1706, "step": 29834 }, { "epoch": 0.05290169409182263, "grad_norm": 0.361328125, "learning_rate": 0.0016209818463255646, "loss": 0.2086, "step": 29836 }, { "epoch": 0.052905240257132446, "grad_norm": 2.171875, "learning_rate": 0.001620933312445237, "loss": 0.2092, "step": 29838 }, { "epoch": 0.05290878642244226, "grad_norm": 0.98828125, "learning_rate": 0.0016208847762866497, "loss": 0.2348, "step": 29840 }, { "epoch": 0.052912332587752076, "grad_norm": 0.2470703125, "learning_rate": 0.0016208362378500151, "loss": 0.1826, "step": 29842 }, { "epoch": 0.05291587875306189, "grad_norm": 0.482421875, "learning_rate": 0.0016207876971355455, "loss": 0.23, "step": 29844 }, { "epoch": 0.052919424918371705, "grad_norm": 0.361328125, "learning_rate": 0.0016207391541434527, "loss": 0.1895, "step": 29846 }, { "epoch": 0.05292297108368152, "grad_norm": 0.296875, "learning_rate": 0.0016206906088739492, "loss": 0.2238, "step": 29848 }, { "epoch": 0.05292651724899134, "grad_norm": 0.271484375, "learning_rate": 0.0016206420613272476, "loss": 0.1558, "step": 29850 }, { "epoch": 0.052930063414301155, "grad_norm": 0.41796875, "learning_rate": 0.0016205935115035605, "loss": 0.2035, "step": 29852 }, { "epoch": 0.05293360957961097, "grad_norm": 0.3984375, "learning_rate": 0.0016205449594030993, "loss": 0.2166, "step": 29854 }, { "epoch": 0.052937155744920784, "grad_norm": 0.353515625, "learning_rate": 0.0016204964050260768, "loss": 0.3389, "step": 29856 }, { "epoch": 0.0529407019102306, "grad_norm": 1.40625, "learning_rate": 0.0016204478483727053, "loss": 0.1471, "step": 29858 }, { "epoch": 0.052944248075540414, "grad_norm": 0.984375, "learning_rate": 0.0016203992894431973, "loss": 0.2698, "step": 29860 }, { "epoch": 0.05294779424085023, "grad_norm": 0.42578125, "learning_rate": 0.0016203507282377654, "loss": 0.1759, "step": 29862 }, { "epoch": 0.05295134040616004, "grad_norm": 0.54296875, "learning_rate": 0.0016203021647566214, "loss": 0.2376, "step": 29864 }, { "epoch": 0.05295488657146986, "grad_norm": 0.23828125, "learning_rate": 0.001620253598999978, "loss": 0.1783, "step": 29866 }, { "epoch": 0.05295843273677967, "grad_norm": 1.9140625, "learning_rate": 0.0016202050309680476, "loss": 0.2444, "step": 29868 }, { "epoch": 0.052961978902089486, "grad_norm": 1.1328125, "learning_rate": 0.0016201564606610427, "loss": 0.272, "step": 29870 }, { "epoch": 0.05296552506739931, "grad_norm": 0.90234375, "learning_rate": 0.0016201078880791753, "loss": 0.1937, "step": 29872 }, { "epoch": 0.05296907123270912, "grad_norm": 0.55078125, "learning_rate": 0.0016200593132226582, "loss": 0.1605, "step": 29874 }, { "epoch": 0.05297261739801894, "grad_norm": 31.125, "learning_rate": 0.0016200107360917037, "loss": 0.216, "step": 29876 }, { "epoch": 0.05297616356332875, "grad_norm": 1.2265625, "learning_rate": 0.0016199621566865246, "loss": 0.4193, "step": 29878 }, { "epoch": 0.052979709728638566, "grad_norm": 0.423828125, "learning_rate": 0.0016199135750073325, "loss": 0.2499, "step": 29880 }, { "epoch": 0.05298325589394838, "grad_norm": 0.71484375, "learning_rate": 0.0016198649910543407, "loss": 0.1839, "step": 29882 }, { "epoch": 0.052986802059258195, "grad_norm": 0.455078125, "learning_rate": 0.0016198164048277616, "loss": 0.1915, "step": 29884 }, { "epoch": 0.05299034822456801, "grad_norm": 0.80078125, "learning_rate": 0.001619767816327807, "loss": 0.2381, "step": 29886 }, { "epoch": 0.052993894389877824, "grad_norm": 6.5, "learning_rate": 0.0016197192255546907, "loss": 0.2103, "step": 29888 }, { "epoch": 0.05299744055518764, "grad_norm": 0.291015625, "learning_rate": 0.001619670632508624, "loss": 0.1878, "step": 29890 }, { "epoch": 0.05300098672049745, "grad_norm": 0.5546875, "learning_rate": 0.0016196220371898196, "loss": 0.2761, "step": 29892 }, { "epoch": 0.05300453288580727, "grad_norm": 0.29296875, "learning_rate": 0.0016195734395984902, "loss": 0.1884, "step": 29894 }, { "epoch": 0.05300807905111709, "grad_norm": 0.58984375, "learning_rate": 0.0016195248397348487, "loss": 0.2003, "step": 29896 }, { "epoch": 0.053011625216426904, "grad_norm": 0.6796875, "learning_rate": 0.0016194762375991072, "loss": 0.2415, "step": 29898 }, { "epoch": 0.05301517138173672, "grad_norm": 0.44140625, "learning_rate": 0.0016194276331914783, "loss": 0.2148, "step": 29900 }, { "epoch": 0.05301871754704653, "grad_norm": 0.578125, "learning_rate": 0.0016193790265121748, "loss": 0.2437, "step": 29902 }, { "epoch": 0.05302226371235635, "grad_norm": 0.515625, "learning_rate": 0.0016193304175614088, "loss": 0.2223, "step": 29904 }, { "epoch": 0.05302580987766616, "grad_norm": 0.58203125, "learning_rate": 0.0016192818063393937, "loss": 0.4571, "step": 29906 }, { "epoch": 0.05302935604297598, "grad_norm": 0.2421875, "learning_rate": 0.0016192331928463412, "loss": 0.1659, "step": 29908 }, { "epoch": 0.05303290220828579, "grad_norm": 0.58984375, "learning_rate": 0.0016191845770824648, "loss": 0.1907, "step": 29910 }, { "epoch": 0.053036448373595606, "grad_norm": 0.470703125, "learning_rate": 0.001619135959047976, "loss": 0.2333, "step": 29912 }, { "epoch": 0.05303999453890542, "grad_norm": 0.234375, "learning_rate": 0.0016190873387430887, "loss": 0.2752, "step": 29914 }, { "epoch": 0.053043540704215235, "grad_norm": 0.46484375, "learning_rate": 0.0016190387161680144, "loss": 0.1899, "step": 29916 }, { "epoch": 0.053047086869525056, "grad_norm": 1.0625, "learning_rate": 0.0016189900913229666, "loss": 0.1774, "step": 29918 }, { "epoch": 0.05305063303483487, "grad_norm": 0.376953125, "learning_rate": 0.0016189414642081574, "loss": 0.1977, "step": 29920 }, { "epoch": 0.053054179200144685, "grad_norm": 0.498046875, "learning_rate": 0.0016188928348238, "loss": 0.1784, "step": 29922 }, { "epoch": 0.0530577253654545, "grad_norm": 2.015625, "learning_rate": 0.001618844203170106, "loss": 0.2859, "step": 29924 }, { "epoch": 0.053061271530764315, "grad_norm": 0.5234375, "learning_rate": 0.0016187955692472896, "loss": 0.2218, "step": 29926 }, { "epoch": 0.05306481769607413, "grad_norm": 0.2890625, "learning_rate": 0.0016187469330555624, "loss": 0.1677, "step": 29928 }, { "epoch": 0.053068363861383944, "grad_norm": 0.326171875, "learning_rate": 0.0016186982945951376, "loss": 0.2135, "step": 29930 }, { "epoch": 0.05307191002669376, "grad_norm": 0.326171875, "learning_rate": 0.0016186496538662277, "loss": 0.2232, "step": 29932 }, { "epoch": 0.05307545619200357, "grad_norm": 1.8984375, "learning_rate": 0.0016186010108690454, "loss": 0.4095, "step": 29934 }, { "epoch": 0.05307900235731339, "grad_norm": 0.443359375, "learning_rate": 0.0016185523656038034, "loss": 0.2274, "step": 29936 }, { "epoch": 0.0530825485226232, "grad_norm": 2.078125, "learning_rate": 0.001618503718070715, "loss": 0.2847, "step": 29938 }, { "epoch": 0.05308609468793302, "grad_norm": 0.267578125, "learning_rate": 0.001618455068269992, "loss": 0.1717, "step": 29940 }, { "epoch": 0.05308964085324284, "grad_norm": 0.451171875, "learning_rate": 0.001618406416201848, "loss": 0.1974, "step": 29942 }, { "epoch": 0.05309318701855265, "grad_norm": 0.375, "learning_rate": 0.0016183577618664954, "loss": 0.1414, "step": 29944 }, { "epoch": 0.05309673318386247, "grad_norm": 0.50390625, "learning_rate": 0.001618309105264147, "loss": 0.2029, "step": 29946 }, { "epoch": 0.05310027934917228, "grad_norm": 0.400390625, "learning_rate": 0.0016182604463950156, "loss": 0.2113, "step": 29948 }, { "epoch": 0.053103825514482096, "grad_norm": 0.466796875, "learning_rate": 0.0016182117852593142, "loss": 0.1709, "step": 29950 }, { "epoch": 0.05310737167979191, "grad_norm": 0.3828125, "learning_rate": 0.0016181631218572551, "loss": 0.1845, "step": 29952 }, { "epoch": 0.053110917845101725, "grad_norm": 0.6328125, "learning_rate": 0.001618114456189052, "loss": 0.2254, "step": 29954 }, { "epoch": 0.05311446401041154, "grad_norm": 1.21875, "learning_rate": 0.001618065788254917, "loss": 0.2466, "step": 29956 }, { "epoch": 0.053118010175721354, "grad_norm": 0.283203125, "learning_rate": 0.0016180171180550634, "loss": 0.1719, "step": 29958 }, { "epoch": 0.05312155634103117, "grad_norm": 0.474609375, "learning_rate": 0.0016179684455897034, "loss": 0.2249, "step": 29960 }, { "epoch": 0.05312510250634098, "grad_norm": 1.46875, "learning_rate": 0.0016179197708590504, "loss": 0.2285, "step": 29962 }, { "epoch": 0.053128648671650805, "grad_norm": 0.8828125, "learning_rate": 0.0016178710938633174, "loss": 0.19, "step": 29964 }, { "epoch": 0.05313219483696062, "grad_norm": 0.384765625, "learning_rate": 0.001617822414602717, "loss": 0.1839, "step": 29966 }, { "epoch": 0.053135741002270434, "grad_norm": 0.6328125, "learning_rate": 0.001617773733077462, "loss": 0.1799, "step": 29968 }, { "epoch": 0.05313928716758025, "grad_norm": 1.6328125, "learning_rate": 0.0016177250492877655, "loss": 0.3135, "step": 29970 }, { "epoch": 0.05314283333289006, "grad_norm": 0.40625, "learning_rate": 0.0016176763632338403, "loss": 0.1667, "step": 29972 }, { "epoch": 0.05314637949819988, "grad_norm": 0.384765625, "learning_rate": 0.0016176276749158994, "loss": 0.1796, "step": 29974 }, { "epoch": 0.05314992566350969, "grad_norm": 0.50390625, "learning_rate": 0.001617578984334156, "loss": 0.1526, "step": 29976 }, { "epoch": 0.05315347182881951, "grad_norm": 0.408203125, "learning_rate": 0.0016175302914888223, "loss": 0.2037, "step": 29978 }, { "epoch": 0.05315701799412932, "grad_norm": 13.625, "learning_rate": 0.0016174815963801122, "loss": 0.5022, "step": 29980 }, { "epoch": 0.053160564159439136, "grad_norm": 0.375, "learning_rate": 0.0016174328990082379, "loss": 0.1799, "step": 29982 }, { "epoch": 0.05316411032474895, "grad_norm": 0.59765625, "learning_rate": 0.0016173841993734129, "loss": 0.2383, "step": 29984 }, { "epoch": 0.05316765649005877, "grad_norm": 2.765625, "learning_rate": 0.0016173354974758495, "loss": 0.5299, "step": 29986 }, { "epoch": 0.05317120265536859, "grad_norm": 1.5078125, "learning_rate": 0.0016172867933157615, "loss": 0.2048, "step": 29988 }, { "epoch": 0.0531747488206784, "grad_norm": 0.384765625, "learning_rate": 0.0016172380868933614, "loss": 0.2437, "step": 29990 }, { "epoch": 0.053178294985988216, "grad_norm": 3.96875, "learning_rate": 0.0016171893782088624, "loss": 0.2554, "step": 29992 }, { "epoch": 0.05318184115129803, "grad_norm": 0.5, "learning_rate": 0.0016171406672624775, "loss": 0.1529, "step": 29994 }, { "epoch": 0.053185387316607845, "grad_norm": 0.53515625, "learning_rate": 0.0016170919540544196, "loss": 0.2231, "step": 29996 }, { "epoch": 0.05318893348191766, "grad_norm": 0.56640625, "learning_rate": 0.001617043238584902, "loss": 0.2097, "step": 29998 }, { "epoch": 0.053192479647227474, "grad_norm": 0.54296875, "learning_rate": 0.0016169945208541375, "loss": 0.1765, "step": 30000 }, { "epoch": 0.05319602581253729, "grad_norm": 0.267578125, "learning_rate": 0.0016169458008623394, "loss": 0.1791, "step": 30002 }, { "epoch": 0.0531995719778471, "grad_norm": 0.412109375, "learning_rate": 0.0016168970786097205, "loss": 0.519, "step": 30004 }, { "epoch": 0.05320311814315692, "grad_norm": 0.5234375, "learning_rate": 0.001616848354096494, "loss": 0.1665, "step": 30006 }, { "epoch": 0.05320666430846674, "grad_norm": 1.453125, "learning_rate": 0.0016167996273228733, "loss": 0.2041, "step": 30008 }, { "epoch": 0.053210210473776554, "grad_norm": 0.3671875, "learning_rate": 0.0016167508982890709, "loss": 0.1714, "step": 30010 }, { "epoch": 0.05321375663908637, "grad_norm": 0.6015625, "learning_rate": 0.0016167021669953006, "loss": 0.1902, "step": 30012 }, { "epoch": 0.05321730280439618, "grad_norm": 1.015625, "learning_rate": 0.0016166534334417747, "loss": 0.1505, "step": 30014 }, { "epoch": 0.053220848969706, "grad_norm": 0.392578125, "learning_rate": 0.0016166046976287072, "loss": 0.1893, "step": 30016 }, { "epoch": 0.05322439513501581, "grad_norm": 0.89453125, "learning_rate": 0.0016165559595563105, "loss": 0.1841, "step": 30018 }, { "epoch": 0.053227941300325626, "grad_norm": 0.64453125, "learning_rate": 0.0016165072192247986, "loss": 0.1735, "step": 30020 }, { "epoch": 0.05323148746563544, "grad_norm": 2.234375, "learning_rate": 0.0016164584766343833, "loss": 0.2483, "step": 30022 }, { "epoch": 0.053235033630945255, "grad_norm": 0.8984375, "learning_rate": 0.0016164097317852796, "loss": 0.2257, "step": 30024 }, { "epoch": 0.05323857979625507, "grad_norm": 1.8359375, "learning_rate": 0.001616360984677699, "loss": 0.199, "step": 30026 }, { "epoch": 0.053242125961564885, "grad_norm": 0.5078125, "learning_rate": 0.0016163122353118555, "loss": 0.2051, "step": 30028 }, { "epoch": 0.0532456721268747, "grad_norm": 0.75, "learning_rate": 0.0016162634836879622, "loss": 0.2237, "step": 30030 }, { "epoch": 0.05324921829218452, "grad_norm": 0.59765625, "learning_rate": 0.0016162147298062323, "loss": 0.221, "step": 30032 }, { "epoch": 0.053252764457494335, "grad_norm": 0.22265625, "learning_rate": 0.0016161659736668795, "loss": 0.3134, "step": 30034 }, { "epoch": 0.05325631062280415, "grad_norm": 2.59375, "learning_rate": 0.001616117215270116, "loss": 0.241, "step": 30036 }, { "epoch": 0.053259856788113964, "grad_norm": 0.671875, "learning_rate": 0.0016160684546161557, "loss": 0.1838, "step": 30038 }, { "epoch": 0.05326340295342378, "grad_norm": 0.62109375, "learning_rate": 0.001616019691705212, "loss": 0.2569, "step": 30040 }, { "epoch": 0.05326694911873359, "grad_norm": 0.640625, "learning_rate": 0.0016159709265374978, "loss": 0.1877, "step": 30042 }, { "epoch": 0.05327049528404341, "grad_norm": 0.67578125, "learning_rate": 0.0016159221591132262, "loss": 0.284, "step": 30044 }, { "epoch": 0.05327404144935322, "grad_norm": 0.8828125, "learning_rate": 0.001615873389432611, "loss": 0.1867, "step": 30046 }, { "epoch": 0.05327758761466304, "grad_norm": 1.265625, "learning_rate": 0.0016158246174958653, "loss": 0.2916, "step": 30048 }, { "epoch": 0.05328113377997285, "grad_norm": 1.6015625, "learning_rate": 0.001615775843303202, "loss": 0.1687, "step": 30050 }, { "epoch": 0.053284679945282666, "grad_norm": 1.0234375, "learning_rate": 0.0016157270668548352, "loss": 0.135, "step": 30052 }, { "epoch": 0.05328822611059249, "grad_norm": 1.875, "learning_rate": 0.0016156782881509774, "loss": 0.2789, "step": 30054 }, { "epoch": 0.0532917722759023, "grad_norm": 0.84765625, "learning_rate": 0.0016156295071918425, "loss": 0.1831, "step": 30056 }, { "epoch": 0.05329531844121212, "grad_norm": 1.2578125, "learning_rate": 0.0016155807239776438, "loss": 0.2967, "step": 30058 }, { "epoch": 0.05329886460652193, "grad_norm": 0.421875, "learning_rate": 0.0016155319385085943, "loss": 0.2206, "step": 30060 }, { "epoch": 0.053302410771831746, "grad_norm": 1.984375, "learning_rate": 0.0016154831507849078, "loss": 0.1807, "step": 30062 }, { "epoch": 0.05330595693714156, "grad_norm": 1.3515625, "learning_rate": 0.0016154343608067972, "loss": 0.2563, "step": 30064 }, { "epoch": 0.053309503102451375, "grad_norm": 0.953125, "learning_rate": 0.0016153855685744761, "loss": 0.1797, "step": 30066 }, { "epoch": 0.05331304926776119, "grad_norm": 1.6484375, "learning_rate": 0.001615336774088158, "loss": 0.3054, "step": 30068 }, { "epoch": 0.053316595433071004, "grad_norm": 0.216796875, "learning_rate": 0.0016152879773480562, "loss": 0.1894, "step": 30070 }, { "epoch": 0.05332014159838082, "grad_norm": 0.4296875, "learning_rate": 0.0016152391783543839, "loss": 0.2619, "step": 30072 }, { "epoch": 0.05332368776369063, "grad_norm": 0.6015625, "learning_rate": 0.0016151903771073547, "loss": 0.1696, "step": 30074 }, { "epoch": 0.053327233929000455, "grad_norm": 1.2421875, "learning_rate": 0.0016151415736071824, "loss": 0.2048, "step": 30076 }, { "epoch": 0.05333078009431027, "grad_norm": 0.412109375, "learning_rate": 0.0016150927678540798, "loss": 0.1362, "step": 30078 }, { "epoch": 0.053334326259620084, "grad_norm": 0.79296875, "learning_rate": 0.0016150439598482606, "loss": 0.2515, "step": 30080 }, { "epoch": 0.0533378724249299, "grad_norm": 0.28515625, "learning_rate": 0.0016149951495899387, "loss": 0.2648, "step": 30082 }, { "epoch": 0.05334141859023971, "grad_norm": 0.255859375, "learning_rate": 0.0016149463370793266, "loss": 0.2562, "step": 30084 }, { "epoch": 0.05334496475554953, "grad_norm": 0.6640625, "learning_rate": 0.0016148975223166388, "loss": 0.2949, "step": 30086 }, { "epoch": 0.05334851092085934, "grad_norm": 0.91796875, "learning_rate": 0.0016148487053020881, "loss": 0.2062, "step": 30088 }, { "epoch": 0.053352057086169156, "grad_norm": 1.4375, "learning_rate": 0.0016147998860358884, "loss": 0.4868, "step": 30090 }, { "epoch": 0.05335560325147897, "grad_norm": 0.353515625, "learning_rate": 0.001614751064518253, "loss": 0.2011, "step": 30092 }, { "epoch": 0.053359149416788786, "grad_norm": 0.205078125, "learning_rate": 0.001614702240749395, "loss": 0.1211, "step": 30094 }, { "epoch": 0.0533626955820986, "grad_norm": 0.8828125, "learning_rate": 0.001614653414729529, "loss": 0.293, "step": 30096 }, { "epoch": 0.053366241747408415, "grad_norm": 1.0234375, "learning_rate": 0.0016146045864588675, "loss": 0.195, "step": 30098 }, { "epoch": 0.053369787912718236, "grad_norm": 0.318359375, "learning_rate": 0.0016145557559376245, "loss": 0.1289, "step": 30100 }, { "epoch": 0.05337333407802805, "grad_norm": 2.109375, "learning_rate": 0.0016145069231660138, "loss": 0.2366, "step": 30102 }, { "epoch": 0.053376880243337865, "grad_norm": 0.76171875, "learning_rate": 0.0016144580881442484, "loss": 0.1691, "step": 30104 }, { "epoch": 0.05338042640864768, "grad_norm": 0.283203125, "learning_rate": 0.0016144092508725424, "loss": 0.1592, "step": 30106 }, { "epoch": 0.053383972573957494, "grad_norm": 0.515625, "learning_rate": 0.0016143604113511088, "loss": 0.2334, "step": 30108 }, { "epoch": 0.05338751873926731, "grad_norm": 0.28125, "learning_rate": 0.0016143115695801617, "loss": 0.1757, "step": 30110 }, { "epoch": 0.053391064904577124, "grad_norm": 0.578125, "learning_rate": 0.0016142627255599144, "loss": 0.1853, "step": 30112 }, { "epoch": 0.05339461106988694, "grad_norm": 0.357421875, "learning_rate": 0.0016142138792905809, "loss": 0.1895, "step": 30114 }, { "epoch": 0.05339815723519675, "grad_norm": 1.2578125, "learning_rate": 0.0016141650307723745, "loss": 0.1871, "step": 30116 }, { "epoch": 0.05340170340050657, "grad_norm": 0.42578125, "learning_rate": 0.001614116180005509, "loss": 0.2007, "step": 30118 }, { "epoch": 0.05340524956581638, "grad_norm": 1.203125, "learning_rate": 0.0016140673269901978, "loss": 0.205, "step": 30120 }, { "epoch": 0.0534087957311262, "grad_norm": 0.36328125, "learning_rate": 0.0016140184717266547, "loss": 0.3975, "step": 30122 }, { "epoch": 0.05341234189643602, "grad_norm": 0.58203125, "learning_rate": 0.0016139696142150938, "loss": 0.163, "step": 30124 }, { "epoch": 0.05341588806174583, "grad_norm": 0.443359375, "learning_rate": 0.0016139207544557277, "loss": 0.18, "step": 30126 }, { "epoch": 0.05341943422705565, "grad_norm": 1.3515625, "learning_rate": 0.0016138718924487711, "loss": 0.1896, "step": 30128 }, { "epoch": 0.05342298039236546, "grad_norm": 0.8203125, "learning_rate": 0.0016138230281944376, "loss": 0.2709, "step": 30130 }, { "epoch": 0.053426526557675276, "grad_norm": 0.69140625, "learning_rate": 0.0016137741616929402, "loss": 0.2834, "step": 30132 }, { "epoch": 0.05343007272298509, "grad_norm": 0.9921875, "learning_rate": 0.0016137252929444932, "loss": 0.1855, "step": 30134 }, { "epoch": 0.053433618888294905, "grad_norm": 0.25, "learning_rate": 0.0016136764219493102, "loss": 0.1867, "step": 30136 }, { "epoch": 0.05343716505360472, "grad_norm": 0.41796875, "learning_rate": 0.0016136275487076048, "loss": 0.2201, "step": 30138 }, { "epoch": 0.053440711218914534, "grad_norm": 1.953125, "learning_rate": 0.001613578673219591, "loss": 0.287, "step": 30140 }, { "epoch": 0.05344425738422435, "grad_norm": 2.046875, "learning_rate": 0.0016135297954854822, "loss": 0.1829, "step": 30142 }, { "epoch": 0.05344780354953417, "grad_norm": 0.6796875, "learning_rate": 0.0016134809155054926, "loss": 0.1335, "step": 30144 }, { "epoch": 0.053451349714843985, "grad_norm": 2.28125, "learning_rate": 0.0016134320332798356, "loss": 0.2341, "step": 30146 }, { "epoch": 0.0534548958801538, "grad_norm": 1.8671875, "learning_rate": 0.0016133831488087255, "loss": 0.1898, "step": 30148 }, { "epoch": 0.053458442045463614, "grad_norm": 0.6875, "learning_rate": 0.001613334262092375, "loss": 0.1827, "step": 30150 }, { "epoch": 0.05346198821077343, "grad_norm": 0.6328125, "learning_rate": 0.0016132853731309993, "loss": 0.1941, "step": 30152 }, { "epoch": 0.05346553437608324, "grad_norm": 0.63671875, "learning_rate": 0.0016132364819248111, "loss": 0.4495, "step": 30154 }, { "epoch": 0.05346908054139306, "grad_norm": 1.796875, "learning_rate": 0.0016131875884740248, "loss": 0.4899, "step": 30156 }, { "epoch": 0.05347262670670287, "grad_norm": 2.0625, "learning_rate": 0.0016131386927788539, "loss": 0.2207, "step": 30158 }, { "epoch": 0.05347617287201269, "grad_norm": 2.78125, "learning_rate": 0.0016130897948395126, "loss": 0.2931, "step": 30160 }, { "epoch": 0.0534797190373225, "grad_norm": 0.4375, "learning_rate": 0.0016130408946562148, "loss": 0.1933, "step": 30162 }, { "epoch": 0.053483265202632316, "grad_norm": 0.3828125, "learning_rate": 0.0016129919922291739, "loss": 0.152, "step": 30164 }, { "epoch": 0.05348681136794213, "grad_norm": 0.796875, "learning_rate": 0.001612943087558604, "loss": 0.2469, "step": 30166 }, { "epoch": 0.05349035753325195, "grad_norm": 0.51171875, "learning_rate": 0.0016128941806447192, "loss": 0.2912, "step": 30168 }, { "epoch": 0.053493903698561766, "grad_norm": 0.51953125, "learning_rate": 0.0016128452714877328, "loss": 0.1329, "step": 30170 }, { "epoch": 0.05349744986387158, "grad_norm": 0.490234375, "learning_rate": 0.0016127963600878592, "loss": 0.2066, "step": 30172 }, { "epoch": 0.053500996029181395, "grad_norm": 0.94140625, "learning_rate": 0.0016127474464453122, "loss": 0.1938, "step": 30174 }, { "epoch": 0.05350454219449121, "grad_norm": 0.58203125, "learning_rate": 0.0016126985305603056, "loss": 0.2503, "step": 30176 }, { "epoch": 0.053508088359801025, "grad_norm": 0.470703125, "learning_rate": 0.0016126496124330538, "loss": 0.1586, "step": 30178 }, { "epoch": 0.05351163452511084, "grad_norm": 0.890625, "learning_rate": 0.0016126006920637703, "loss": 0.1956, "step": 30180 }, { "epoch": 0.053515180690420654, "grad_norm": 0.66796875, "learning_rate": 0.001612551769452669, "loss": 0.2153, "step": 30182 }, { "epoch": 0.05351872685573047, "grad_norm": 0.30859375, "learning_rate": 0.0016125028445999638, "loss": 0.2065, "step": 30184 }, { "epoch": 0.05352227302104028, "grad_norm": 0.26953125, "learning_rate": 0.001612453917505869, "loss": 0.1392, "step": 30186 }, { "epoch": 0.0535258191863501, "grad_norm": 0.244140625, "learning_rate": 0.0016124049881705985, "loss": 0.1874, "step": 30188 }, { "epoch": 0.05352936535165992, "grad_norm": 1.28125, "learning_rate": 0.0016123560565943662, "loss": 0.3, "step": 30190 }, { "epoch": 0.05353291151696973, "grad_norm": 0.92578125, "learning_rate": 0.0016123071227773862, "loss": 0.2318, "step": 30192 }, { "epoch": 0.05353645768227955, "grad_norm": 1.7734375, "learning_rate": 0.0016122581867198723, "loss": 0.1831, "step": 30194 }, { "epoch": 0.05354000384758936, "grad_norm": 0.72265625, "learning_rate": 0.0016122092484220387, "loss": 0.2002, "step": 30196 }, { "epoch": 0.05354355001289918, "grad_norm": 1.5234375, "learning_rate": 0.0016121603078840994, "loss": 0.3693, "step": 30198 }, { "epoch": 0.05354709617820899, "grad_norm": 1.1171875, "learning_rate": 0.0016121113651062684, "loss": 0.236, "step": 30200 }, { "epoch": 0.053550642343518806, "grad_norm": 1.1640625, "learning_rate": 0.0016120624200887594, "loss": 0.1791, "step": 30202 }, { "epoch": 0.05355418850882862, "grad_norm": 0.33984375, "learning_rate": 0.0016120134728317874, "loss": 0.2037, "step": 30204 }, { "epoch": 0.053557734674138435, "grad_norm": 0.54296875, "learning_rate": 0.0016119645233355654, "loss": 0.2192, "step": 30206 }, { "epoch": 0.05356128083944825, "grad_norm": 0.455078125, "learning_rate": 0.0016119155716003085, "loss": 0.2055, "step": 30208 }, { "epoch": 0.053564827004758064, "grad_norm": 1.53125, "learning_rate": 0.0016118666176262297, "loss": 0.4839, "step": 30210 }, { "epoch": 0.053568373170067886, "grad_norm": 0.828125, "learning_rate": 0.001611817661413544, "loss": 0.1697, "step": 30212 }, { "epoch": 0.0535719193353777, "grad_norm": 1.0234375, "learning_rate": 0.0016117687029624651, "loss": 0.2439, "step": 30214 }, { "epoch": 0.053575465500687515, "grad_norm": 0.416015625, "learning_rate": 0.0016117197422732073, "loss": 0.1705, "step": 30216 }, { "epoch": 0.05357901166599733, "grad_norm": 0.177734375, "learning_rate": 0.0016116707793459842, "loss": 0.1517, "step": 30218 }, { "epoch": 0.053582557831307144, "grad_norm": 0.4140625, "learning_rate": 0.0016116218141810105, "loss": 0.2059, "step": 30220 }, { "epoch": 0.05358610399661696, "grad_norm": 0.625, "learning_rate": 0.0016115728467785, "loss": 0.2248, "step": 30222 }, { "epoch": 0.05358965016192677, "grad_norm": 0.435546875, "learning_rate": 0.0016115238771386676, "loss": 0.24, "step": 30224 }, { "epoch": 0.05359319632723659, "grad_norm": 1.0, "learning_rate": 0.0016114749052617264, "loss": 0.1943, "step": 30226 }, { "epoch": 0.0535967424925464, "grad_norm": 0.8203125, "learning_rate": 0.0016114259311478907, "loss": 0.2096, "step": 30228 }, { "epoch": 0.05360028865785622, "grad_norm": 1.828125, "learning_rate": 0.0016113769547973758, "loss": 0.2475, "step": 30230 }, { "epoch": 0.05360383482316603, "grad_norm": 0.71875, "learning_rate": 0.0016113279762103947, "loss": 0.1708, "step": 30232 }, { "epoch": 0.053607380988475846, "grad_norm": 1.8359375, "learning_rate": 0.0016112789953871623, "loss": 0.2306, "step": 30234 }, { "epoch": 0.05361092715378567, "grad_norm": 1.0390625, "learning_rate": 0.0016112300123278924, "loss": 0.2153, "step": 30236 }, { "epoch": 0.05361447331909548, "grad_norm": 0.400390625, "learning_rate": 0.0016111810270327996, "loss": 0.1798, "step": 30238 }, { "epoch": 0.053618019484405297, "grad_norm": 0.34375, "learning_rate": 0.0016111320395020977, "loss": 0.2075, "step": 30240 }, { "epoch": 0.05362156564971511, "grad_norm": 0.80859375, "learning_rate": 0.001611083049736001, "loss": 0.1727, "step": 30242 }, { "epoch": 0.053625111815024926, "grad_norm": 0.25, "learning_rate": 0.0016110340577347242, "loss": 0.1972, "step": 30244 }, { "epoch": 0.05362865798033474, "grad_norm": 1.9375, "learning_rate": 0.0016109850634984813, "loss": 0.3131, "step": 30246 }, { "epoch": 0.053632204145644555, "grad_norm": 0.77734375, "learning_rate": 0.0016109360670274863, "loss": 0.2086, "step": 30248 }, { "epoch": 0.05363575031095437, "grad_norm": 0.8984375, "learning_rate": 0.001610887068321954, "loss": 0.2136, "step": 30250 }, { "epoch": 0.053639296476264184, "grad_norm": 0.455078125, "learning_rate": 0.0016108380673820985, "loss": 0.3134, "step": 30252 }, { "epoch": 0.053642842641574, "grad_norm": 0.3203125, "learning_rate": 0.0016107890642081337, "loss": 0.2045, "step": 30254 }, { "epoch": 0.05364638880688381, "grad_norm": 2.578125, "learning_rate": 0.0016107400588002742, "loss": 0.3101, "step": 30256 }, { "epoch": 0.053649934972193634, "grad_norm": 0.71875, "learning_rate": 0.0016106910511587344, "loss": 0.277, "step": 30258 }, { "epoch": 0.05365348113750345, "grad_norm": 0.53125, "learning_rate": 0.0016106420412837288, "loss": 0.1468, "step": 30260 }, { "epoch": 0.053657027302813264, "grad_norm": 0.96875, "learning_rate": 0.0016105930291754715, "loss": 0.168, "step": 30262 }, { "epoch": 0.05366057346812308, "grad_norm": 0.3828125, "learning_rate": 0.0016105440148341769, "loss": 0.1657, "step": 30264 }, { "epoch": 0.05366411963343289, "grad_norm": 0.333984375, "learning_rate": 0.0016104949982600591, "loss": 0.2149, "step": 30266 }, { "epoch": 0.05366766579874271, "grad_norm": 0.625, "learning_rate": 0.0016104459794533327, "loss": 0.1907, "step": 30268 }, { "epoch": 0.05367121196405252, "grad_norm": 2.546875, "learning_rate": 0.0016103969584142124, "loss": 0.4859, "step": 30270 }, { "epoch": 0.053674758129362336, "grad_norm": 0.322265625, "learning_rate": 0.0016103479351429121, "loss": 0.2418, "step": 30272 }, { "epoch": 0.05367830429467215, "grad_norm": 3.453125, "learning_rate": 0.0016102989096396466, "loss": 0.2072, "step": 30274 }, { "epoch": 0.053681850459981965, "grad_norm": 0.275390625, "learning_rate": 0.0016102498819046298, "loss": 0.2053, "step": 30276 }, { "epoch": 0.05368539662529178, "grad_norm": 0.4765625, "learning_rate": 0.0016102008519380765, "loss": 0.1824, "step": 30278 }, { "epoch": 0.0536889427906016, "grad_norm": 1.046875, "learning_rate": 0.001610151819740201, "loss": 0.2191, "step": 30280 }, { "epoch": 0.053692488955911416, "grad_norm": 0.23828125, "learning_rate": 0.0016101027853112177, "loss": 0.153, "step": 30282 }, { "epoch": 0.05369603512122123, "grad_norm": 0.279296875, "learning_rate": 0.0016100537486513414, "loss": 0.2053, "step": 30284 }, { "epoch": 0.053699581286531045, "grad_norm": 0.3046875, "learning_rate": 0.001610004709760786, "loss": 0.1755, "step": 30286 }, { "epoch": 0.05370312745184086, "grad_norm": 0.73828125, "learning_rate": 0.0016099556686397663, "loss": 0.3132, "step": 30288 }, { "epoch": 0.053706673617150674, "grad_norm": 0.46875, "learning_rate": 0.001609906625288497, "loss": 0.1979, "step": 30290 }, { "epoch": 0.05371021978246049, "grad_norm": 1.953125, "learning_rate": 0.0016098575797071919, "loss": 0.1899, "step": 30292 }, { "epoch": 0.0537137659477703, "grad_norm": 0.3828125, "learning_rate": 0.0016098085318960662, "loss": 0.1634, "step": 30294 }, { "epoch": 0.05371731211308012, "grad_norm": 1.671875, "learning_rate": 0.0016097594818553337, "loss": 0.173, "step": 30296 }, { "epoch": 0.05372085827838993, "grad_norm": 0.39453125, "learning_rate": 0.0016097104295852097, "loss": 0.139, "step": 30298 }, { "epoch": 0.05372440444369975, "grad_norm": 0.5546875, "learning_rate": 0.0016096613750859084, "loss": 0.1768, "step": 30300 }, { "epoch": 0.05372795060900956, "grad_norm": 0.6015625, "learning_rate": 0.0016096123183576443, "loss": 0.2449, "step": 30302 }, { "epoch": 0.05373149677431938, "grad_norm": 0.4765625, "learning_rate": 0.0016095632594006315, "loss": 0.1766, "step": 30304 }, { "epoch": 0.0537350429396292, "grad_norm": 0.61328125, "learning_rate": 0.0016095141982150854, "loss": 0.1757, "step": 30306 }, { "epoch": 0.05373858910493901, "grad_norm": 0.27734375, "learning_rate": 0.0016094651348012201, "loss": 0.1242, "step": 30308 }, { "epoch": 0.05374213527024883, "grad_norm": 1.09375, "learning_rate": 0.0016094160691592502, "loss": 0.2557, "step": 30310 }, { "epoch": 0.05374568143555864, "grad_norm": 6.9375, "learning_rate": 0.0016093670012893903, "loss": 0.3109, "step": 30312 }, { "epoch": 0.053749227600868456, "grad_norm": 1.375, "learning_rate": 0.0016093179311918548, "loss": 0.3202, "step": 30314 }, { "epoch": 0.05375277376617827, "grad_norm": 0.50390625, "learning_rate": 0.001609268858866859, "loss": 0.1697, "step": 30316 }, { "epoch": 0.053756319931488085, "grad_norm": 0.546875, "learning_rate": 0.0016092197843146168, "loss": 0.1813, "step": 30318 }, { "epoch": 0.0537598660967979, "grad_norm": 0.205078125, "learning_rate": 0.0016091707075353429, "loss": 0.2039, "step": 30320 }, { "epoch": 0.053763412262107714, "grad_norm": 0.283203125, "learning_rate": 0.0016091216285292521, "loss": 0.2334, "step": 30322 }, { "epoch": 0.05376695842741753, "grad_norm": 0.44921875, "learning_rate": 0.0016090725472965592, "loss": 0.1955, "step": 30324 }, { "epoch": 0.05377050459272735, "grad_norm": 0.404296875, "learning_rate": 0.0016090234638374786, "loss": 0.1502, "step": 30326 }, { "epoch": 0.053774050758037165, "grad_norm": 0.4609375, "learning_rate": 0.001608974378152225, "loss": 0.4652, "step": 30328 }, { "epoch": 0.05377759692334698, "grad_norm": 0.45703125, "learning_rate": 0.0016089252902410133, "loss": 0.2246, "step": 30330 }, { "epoch": 0.053781143088656794, "grad_norm": 0.5234375, "learning_rate": 0.0016088762001040579, "loss": 0.1789, "step": 30332 }, { "epoch": 0.05378468925396661, "grad_norm": 0.640625, "learning_rate": 0.0016088271077415734, "loss": 0.2032, "step": 30334 }, { "epoch": 0.05378823541927642, "grad_norm": 1.671875, "learning_rate": 0.001608778013153775, "loss": 0.2143, "step": 30336 }, { "epoch": 0.05379178158458624, "grad_norm": 1.7890625, "learning_rate": 0.0016087289163408771, "loss": 0.2351, "step": 30338 }, { "epoch": 0.05379532774989605, "grad_norm": 0.1953125, "learning_rate": 0.0016086798173030941, "loss": 0.1614, "step": 30340 }, { "epoch": 0.053798873915205866, "grad_norm": 0.27734375, "learning_rate": 0.0016086307160406413, "loss": 0.1267, "step": 30342 }, { "epoch": 0.05380242008051568, "grad_norm": 0.51953125, "learning_rate": 0.0016085816125537332, "loss": 0.18, "step": 30344 }, { "epoch": 0.053805966245825496, "grad_norm": 0.45703125, "learning_rate": 0.0016085325068425844, "loss": 0.2146, "step": 30346 }, { "epoch": 0.05380951241113532, "grad_norm": 0.45703125, "learning_rate": 0.0016084833989074099, "loss": 0.1895, "step": 30348 }, { "epoch": 0.05381305857644513, "grad_norm": 0.8671875, "learning_rate": 0.0016084342887484242, "loss": 0.186, "step": 30350 }, { "epoch": 0.053816604741754946, "grad_norm": 2.84375, "learning_rate": 0.0016083851763658429, "loss": 0.3351, "step": 30352 }, { "epoch": 0.05382015090706476, "grad_norm": 1.421875, "learning_rate": 0.0016083360617598793, "loss": 0.4249, "step": 30354 }, { "epoch": 0.053823697072374575, "grad_norm": 0.369140625, "learning_rate": 0.0016082869449307493, "loss": 0.197, "step": 30356 }, { "epoch": 0.05382724323768439, "grad_norm": 0.419921875, "learning_rate": 0.0016082378258786675, "loss": 0.2066, "step": 30358 }, { "epoch": 0.053830789402994204, "grad_norm": 0.765625, "learning_rate": 0.001608188704603849, "loss": 0.3675, "step": 30360 }, { "epoch": 0.05383433556830402, "grad_norm": 0.251953125, "learning_rate": 0.0016081395811065077, "loss": 0.1596, "step": 30362 }, { "epoch": 0.053837881733613834, "grad_norm": 0.68359375, "learning_rate": 0.0016080904553868597, "loss": 0.1661, "step": 30364 }, { "epoch": 0.05384142789892365, "grad_norm": 0.27734375, "learning_rate": 0.001608041327445119, "loss": 0.1953, "step": 30366 }, { "epoch": 0.05384497406423346, "grad_norm": 0.26953125, "learning_rate": 0.0016079921972815003, "loss": 0.1929, "step": 30368 }, { "epoch": 0.05384852022954328, "grad_norm": 0.431640625, "learning_rate": 0.0016079430648962192, "loss": 0.2732, "step": 30370 }, { "epoch": 0.0538520663948531, "grad_norm": 0.306640625, "learning_rate": 0.00160789393028949, "loss": 0.1648, "step": 30372 }, { "epoch": 0.05385561256016291, "grad_norm": 1.6640625, "learning_rate": 0.0016078447934615274, "loss": 0.2648, "step": 30374 }, { "epoch": 0.05385915872547273, "grad_norm": 0.6484375, "learning_rate": 0.0016077956544125471, "loss": 0.2382, "step": 30376 }, { "epoch": 0.05386270489078254, "grad_norm": 0.435546875, "learning_rate": 0.0016077465131427635, "loss": 0.1946, "step": 30378 }, { "epoch": 0.05386625105609236, "grad_norm": 0.90234375, "learning_rate": 0.0016076973696523915, "loss": 0.3324, "step": 30380 }, { "epoch": 0.05386979722140217, "grad_norm": 0.298828125, "learning_rate": 0.0016076482239416462, "loss": 0.2954, "step": 30382 }, { "epoch": 0.053873343386711986, "grad_norm": 0.203125, "learning_rate": 0.0016075990760107425, "loss": 0.1574, "step": 30384 }, { "epoch": 0.0538768895520218, "grad_norm": 2.15625, "learning_rate": 0.0016075499258598953, "loss": 0.2176, "step": 30386 }, { "epoch": 0.053880435717331615, "grad_norm": 0.373046875, "learning_rate": 0.0016075007734893195, "loss": 0.2247, "step": 30388 }, { "epoch": 0.05388398188264143, "grad_norm": 0.71875, "learning_rate": 0.0016074516188992298, "loss": 0.1882, "step": 30390 }, { "epoch": 0.053887528047951244, "grad_norm": 1.203125, "learning_rate": 0.001607402462089842, "loss": 0.1884, "step": 30392 }, { "epoch": 0.053891074213261066, "grad_norm": 1.859375, "learning_rate": 0.00160735330306137, "loss": 0.3325, "step": 30394 }, { "epoch": 0.05389462037857088, "grad_norm": 0.255859375, "learning_rate": 0.0016073041418140299, "loss": 0.1664, "step": 30396 }, { "epoch": 0.053898166543880695, "grad_norm": 0.26171875, "learning_rate": 0.0016072549783480358, "loss": 0.1435, "step": 30398 }, { "epoch": 0.05390171270919051, "grad_norm": 0.451171875, "learning_rate": 0.001607205812663603, "loss": 0.2192, "step": 30400 }, { "epoch": 0.053905258874500324, "grad_norm": 0.453125, "learning_rate": 0.0016071566447609469, "loss": 0.2505, "step": 30402 }, { "epoch": 0.05390880503981014, "grad_norm": 0.43359375, "learning_rate": 0.0016071074746402824, "loss": 0.2431, "step": 30404 }, { "epoch": 0.05391235120511995, "grad_norm": 1.3671875, "learning_rate": 0.0016070583023018242, "loss": 0.2181, "step": 30406 }, { "epoch": 0.05391589737042977, "grad_norm": 0.369140625, "learning_rate": 0.0016070091277457872, "loss": 0.2545, "step": 30408 }, { "epoch": 0.05391944353573958, "grad_norm": 1.21875, "learning_rate": 0.001606959950972387, "loss": 0.2087, "step": 30410 }, { "epoch": 0.0539229897010494, "grad_norm": 0.58203125, "learning_rate": 0.0016069107719818383, "loss": 0.3003, "step": 30412 }, { "epoch": 0.05392653586635921, "grad_norm": 0.2275390625, "learning_rate": 0.0016068615907743567, "loss": 0.2239, "step": 30414 }, { "epoch": 0.05393008203166903, "grad_norm": 0.734375, "learning_rate": 0.0016068124073501564, "loss": 0.1933, "step": 30416 }, { "epoch": 0.05393362819697885, "grad_norm": 0.267578125, "learning_rate": 0.0016067632217094537, "loss": 0.2393, "step": 30418 }, { "epoch": 0.05393717436228866, "grad_norm": 0.6171875, "learning_rate": 0.0016067140338524625, "loss": 0.1425, "step": 30420 }, { "epoch": 0.053940720527598476, "grad_norm": 0.283203125, "learning_rate": 0.0016066648437793986, "loss": 0.1718, "step": 30422 }, { "epoch": 0.05394426669290829, "grad_norm": 0.54296875, "learning_rate": 0.001606615651490477, "loss": 0.2131, "step": 30424 }, { "epoch": 0.053947812858218105, "grad_norm": 0.447265625, "learning_rate": 0.001606566456985913, "loss": 0.1607, "step": 30426 }, { "epoch": 0.05395135902352792, "grad_norm": 0.359375, "learning_rate": 0.001606517260265921, "loss": 0.1724, "step": 30428 }, { "epoch": 0.053954905188837735, "grad_norm": 0.54296875, "learning_rate": 0.0016064680613307172, "loss": 0.2221, "step": 30430 }, { "epoch": 0.05395845135414755, "grad_norm": 0.3125, "learning_rate": 0.0016064188601805163, "loss": 0.1986, "step": 30432 }, { "epoch": 0.053961997519457364, "grad_norm": 0.7890625, "learning_rate": 0.0016063696568155332, "loss": 0.2529, "step": 30434 }, { "epoch": 0.05396554368476718, "grad_norm": 0.5625, "learning_rate": 0.0016063204512359835, "loss": 0.2338, "step": 30436 }, { "epoch": 0.05396908985007699, "grad_norm": 0.4453125, "learning_rate": 0.0016062712434420823, "loss": 0.2036, "step": 30438 }, { "epoch": 0.053972636015386814, "grad_norm": 1.015625, "learning_rate": 0.0016062220334340446, "loss": 0.2063, "step": 30440 }, { "epoch": 0.05397618218069663, "grad_norm": 0.2236328125, "learning_rate": 0.0016061728212120863, "loss": 0.2124, "step": 30442 }, { "epoch": 0.05397972834600644, "grad_norm": 0.251953125, "learning_rate": 0.0016061236067764215, "loss": 0.1872, "step": 30444 }, { "epoch": 0.05398327451131626, "grad_norm": 0.267578125, "learning_rate": 0.001606074390127266, "loss": 0.1405, "step": 30446 }, { "epoch": 0.05398682067662607, "grad_norm": 1.4296875, "learning_rate": 0.0016060251712648355, "loss": 0.3397, "step": 30448 }, { "epoch": 0.05399036684193589, "grad_norm": 0.51171875, "learning_rate": 0.0016059759501893447, "loss": 0.1771, "step": 30450 }, { "epoch": 0.0539939130072457, "grad_norm": 0.3125, "learning_rate": 0.001605926726901009, "loss": 0.1875, "step": 30452 }, { "epoch": 0.053997459172555516, "grad_norm": 0.95703125, "learning_rate": 0.0016058775014000436, "loss": 0.1682, "step": 30454 }, { "epoch": 0.05400100533786533, "grad_norm": 0.671875, "learning_rate": 0.001605828273686664, "loss": 0.1876, "step": 30456 }, { "epoch": 0.054004551503175145, "grad_norm": 0.8125, "learning_rate": 0.0016057790437610851, "loss": 0.2482, "step": 30458 }, { "epoch": 0.05400809766848496, "grad_norm": 0.85546875, "learning_rate": 0.0016057298116235227, "loss": 0.2488, "step": 30460 }, { "epoch": 0.05401164383379478, "grad_norm": 0.23828125, "learning_rate": 0.0016056805772741915, "loss": 0.136, "step": 30462 }, { "epoch": 0.054015189999104596, "grad_norm": 0.7421875, "learning_rate": 0.0016056313407133076, "loss": 0.2024, "step": 30464 }, { "epoch": 0.05401873616441441, "grad_norm": 0.671875, "learning_rate": 0.0016055821019410857, "loss": 0.1844, "step": 30466 }, { "epoch": 0.054022282329724225, "grad_norm": 0.82421875, "learning_rate": 0.0016055328609577413, "loss": 0.2291, "step": 30468 }, { "epoch": 0.05402582849503404, "grad_norm": 0.5078125, "learning_rate": 0.00160548361776349, "loss": 0.1829, "step": 30470 }, { "epoch": 0.054029374660343854, "grad_norm": 0.66015625, "learning_rate": 0.0016054343723585472, "loss": 0.2912, "step": 30472 }, { "epoch": 0.05403292082565367, "grad_norm": 0.208984375, "learning_rate": 0.0016053851247431275, "loss": 0.1754, "step": 30474 }, { "epoch": 0.05403646699096348, "grad_norm": 0.244140625, "learning_rate": 0.001605335874917447, "loss": 0.1896, "step": 30476 }, { "epoch": 0.0540400131562733, "grad_norm": 0.359375, "learning_rate": 0.001605286622881721, "loss": 0.2124, "step": 30478 }, { "epoch": 0.05404355932158311, "grad_norm": 0.36328125, "learning_rate": 0.001605237368636165, "loss": 0.2255, "step": 30480 }, { "epoch": 0.05404710548689293, "grad_norm": 0.41796875, "learning_rate": 0.001605188112180994, "loss": 0.2725, "step": 30482 }, { "epoch": 0.05405065165220275, "grad_norm": 0.39453125, "learning_rate": 0.0016051388535164236, "loss": 0.2462, "step": 30484 }, { "epoch": 0.05405419781751256, "grad_norm": 0.5390625, "learning_rate": 0.0016050895926426694, "loss": 0.2302, "step": 30486 }, { "epoch": 0.05405774398282238, "grad_norm": 0.734375, "learning_rate": 0.0016050403295599467, "loss": 0.2155, "step": 30488 }, { "epoch": 0.05406129014813219, "grad_norm": 1.7421875, "learning_rate": 0.0016049910642684708, "loss": 0.3307, "step": 30490 }, { "epoch": 0.054064836313442007, "grad_norm": 0.74609375, "learning_rate": 0.0016049417967684573, "loss": 0.448, "step": 30492 }, { "epoch": 0.05406838247875182, "grad_norm": 1.2265625, "learning_rate": 0.001604892527060122, "loss": 0.1616, "step": 30494 }, { "epoch": 0.054071928644061636, "grad_norm": 0.66796875, "learning_rate": 0.00160484325514368, "loss": 0.232, "step": 30496 }, { "epoch": 0.05407547480937145, "grad_norm": 0.255859375, "learning_rate": 0.0016047939810193467, "loss": 0.1466, "step": 30498 }, { "epoch": 0.054079020974681265, "grad_norm": 0.400390625, "learning_rate": 0.0016047447046873377, "loss": 0.1971, "step": 30500 }, { "epoch": 0.05408256713999108, "grad_norm": 1.6796875, "learning_rate": 0.0016046954261478687, "loss": 0.2503, "step": 30502 }, { "epoch": 0.054086113305300894, "grad_norm": 0.33203125, "learning_rate": 0.0016046461454011552, "loss": 0.1445, "step": 30504 }, { "epoch": 0.05408965947061071, "grad_norm": 0.419921875, "learning_rate": 0.0016045968624474123, "loss": 0.2752, "step": 30506 }, { "epoch": 0.05409320563592053, "grad_norm": 0.20703125, "learning_rate": 0.0016045475772868561, "loss": 0.1763, "step": 30508 }, { "epoch": 0.054096751801230344, "grad_norm": 0.2353515625, "learning_rate": 0.0016044982899197015, "loss": 0.2575, "step": 30510 }, { "epoch": 0.05410029796654016, "grad_norm": 0.388671875, "learning_rate": 0.001604449000346165, "loss": 0.2194, "step": 30512 }, { "epoch": 0.054103844131849974, "grad_norm": 0.453125, "learning_rate": 0.001604399708566461, "loss": 0.237, "step": 30514 }, { "epoch": 0.05410739029715979, "grad_norm": 0.234375, "learning_rate": 0.0016043504145808061, "loss": 0.1969, "step": 30516 }, { "epoch": 0.0541109364624696, "grad_norm": 0.30859375, "learning_rate": 0.0016043011183894153, "loss": 0.1605, "step": 30518 }, { "epoch": 0.05411448262777942, "grad_norm": 2.109375, "learning_rate": 0.001604251819992504, "loss": 0.2011, "step": 30520 }, { "epoch": 0.05411802879308923, "grad_norm": 0.318359375, "learning_rate": 0.0016042025193902885, "loss": 0.2311, "step": 30522 }, { "epoch": 0.054121574958399046, "grad_norm": 1.4765625, "learning_rate": 0.001604153216582984, "loss": 0.2702, "step": 30524 }, { "epoch": 0.05412512112370886, "grad_norm": 0.3671875, "learning_rate": 0.0016041039115708062, "loss": 0.1629, "step": 30526 }, { "epoch": 0.054128667289018675, "grad_norm": 0.490234375, "learning_rate": 0.0016040546043539707, "loss": 0.2284, "step": 30528 }, { "epoch": 0.0541322134543285, "grad_norm": 0.55859375, "learning_rate": 0.001604005294932693, "loss": 0.3466, "step": 30530 }, { "epoch": 0.05413575961963831, "grad_norm": 0.51953125, "learning_rate": 0.0016039559833071892, "loss": 0.2195, "step": 30532 }, { "epoch": 0.054139305784948126, "grad_norm": 0.373046875, "learning_rate": 0.0016039066694776742, "loss": 0.2143, "step": 30534 }, { "epoch": 0.05414285195025794, "grad_norm": 0.3515625, "learning_rate": 0.0016038573534443647, "loss": 0.2293, "step": 30536 }, { "epoch": 0.054146398115567755, "grad_norm": 0.9140625, "learning_rate": 0.0016038080352074754, "loss": 0.2483, "step": 30538 }, { "epoch": 0.05414994428087757, "grad_norm": 1.6015625, "learning_rate": 0.0016037587147672229, "loss": 0.3931, "step": 30540 }, { "epoch": 0.054153490446187384, "grad_norm": 0.240234375, "learning_rate": 0.0016037093921238215, "loss": 0.2129, "step": 30542 }, { "epoch": 0.0541570366114972, "grad_norm": 0.333984375, "learning_rate": 0.0016036600672774885, "loss": 0.2643, "step": 30544 }, { "epoch": 0.05416058277680701, "grad_norm": 1.078125, "learning_rate": 0.0016036107402284388, "loss": 0.2304, "step": 30546 }, { "epoch": 0.05416412894211683, "grad_norm": 0.61328125, "learning_rate": 0.0016035614109768882, "loss": 0.2067, "step": 30548 }, { "epoch": 0.05416767510742664, "grad_norm": 0.474609375, "learning_rate": 0.0016035120795230527, "loss": 0.1914, "step": 30550 }, { "epoch": 0.054171221272736464, "grad_norm": 0.55078125, "learning_rate": 0.0016034627458671475, "loss": 0.2372, "step": 30552 }, { "epoch": 0.05417476743804628, "grad_norm": 0.1435546875, "learning_rate": 0.001603413410009389, "loss": 0.1996, "step": 30554 }, { "epoch": 0.05417831360335609, "grad_norm": 0.8671875, "learning_rate": 0.0016033640719499925, "loss": 0.3408, "step": 30556 }, { "epoch": 0.05418185976866591, "grad_norm": 0.29296875, "learning_rate": 0.0016033147316891739, "loss": 0.1869, "step": 30558 }, { "epoch": 0.05418540593397572, "grad_norm": 1.2109375, "learning_rate": 0.001603265389227149, "loss": 0.1841, "step": 30560 }, { "epoch": 0.05418895209928554, "grad_norm": 0.52734375, "learning_rate": 0.0016032160445641338, "loss": 0.2004, "step": 30562 }, { "epoch": 0.05419249826459535, "grad_norm": 0.5234375, "learning_rate": 0.0016031666977003435, "loss": 0.2071, "step": 30564 }, { "epoch": 0.054196044429905166, "grad_norm": 0.54296875, "learning_rate": 0.0016031173486359947, "loss": 0.1733, "step": 30566 }, { "epoch": 0.05419959059521498, "grad_norm": 0.40234375, "learning_rate": 0.0016030679973713029, "loss": 0.1807, "step": 30568 }, { "epoch": 0.054203136760524795, "grad_norm": 1.234375, "learning_rate": 0.0016030186439064838, "loss": 0.3036, "step": 30570 }, { "epoch": 0.05420668292583461, "grad_norm": 0.63671875, "learning_rate": 0.0016029692882417535, "loss": 0.1561, "step": 30572 }, { "epoch": 0.054210229091144424, "grad_norm": 0.57421875, "learning_rate": 0.0016029199303773274, "loss": 0.234, "step": 30574 }, { "epoch": 0.054213775256454245, "grad_norm": 2.8125, "learning_rate": 0.0016028705703134219, "loss": 0.2713, "step": 30576 }, { "epoch": 0.05421732142176406, "grad_norm": 0.2001953125, "learning_rate": 0.0016028212080502527, "loss": 0.2623, "step": 30578 }, { "epoch": 0.054220867587073875, "grad_norm": 0.640625, "learning_rate": 0.0016027718435880352, "loss": 0.2357, "step": 30580 }, { "epoch": 0.05422441375238369, "grad_norm": 0.275390625, "learning_rate": 0.001602722476926986, "loss": 0.2578, "step": 30582 }, { "epoch": 0.054227959917693504, "grad_norm": 0.373046875, "learning_rate": 0.0016026731080673205, "loss": 0.1961, "step": 30584 }, { "epoch": 0.05423150608300332, "grad_norm": 0.1474609375, "learning_rate": 0.0016026237370092551, "loss": 0.1226, "step": 30586 }, { "epoch": 0.05423505224831313, "grad_norm": 0.322265625, "learning_rate": 0.0016025743637530052, "loss": 0.2679, "step": 30588 }, { "epoch": 0.05423859841362295, "grad_norm": 0.494140625, "learning_rate": 0.0016025249882987871, "loss": 0.1528, "step": 30590 }, { "epoch": 0.05424214457893276, "grad_norm": 0.75, "learning_rate": 0.0016024756106468164, "loss": 0.2176, "step": 30592 }, { "epoch": 0.054245690744242576, "grad_norm": 0.25390625, "learning_rate": 0.0016024262307973095, "loss": 0.1601, "step": 30594 }, { "epoch": 0.05424923690955239, "grad_norm": 0.408203125, "learning_rate": 0.0016023768487504822, "loss": 0.3534, "step": 30596 }, { "epoch": 0.05425278307486221, "grad_norm": 0.369140625, "learning_rate": 0.00160232746450655, "loss": 0.207, "step": 30598 }, { "epoch": 0.05425632924017203, "grad_norm": 0.6796875, "learning_rate": 0.0016022780780657296, "loss": 0.3981, "step": 30600 }, { "epoch": 0.05425987540548184, "grad_norm": 0.7734375, "learning_rate": 0.0016022286894282365, "loss": 0.1895, "step": 30602 }, { "epoch": 0.054263421570791656, "grad_norm": 0.5703125, "learning_rate": 0.0016021792985942867, "loss": 0.2045, "step": 30604 }, { "epoch": 0.05426696773610147, "grad_norm": 0.47265625, "learning_rate": 0.0016021299055640965, "loss": 0.3999, "step": 30606 }, { "epoch": 0.054270513901411285, "grad_norm": 3.140625, "learning_rate": 0.0016020805103378818, "loss": 0.2054, "step": 30608 }, { "epoch": 0.0542740600667211, "grad_norm": 0.455078125, "learning_rate": 0.0016020311129158586, "loss": 0.2272, "step": 30610 }, { "epoch": 0.054277606232030914, "grad_norm": 1.75, "learning_rate": 0.0016019817132982427, "loss": 0.2382, "step": 30612 }, { "epoch": 0.05428115239734073, "grad_norm": 2.1875, "learning_rate": 0.0016019323114852505, "loss": 0.2293, "step": 30614 }, { "epoch": 0.054284698562650543, "grad_norm": 0.1796875, "learning_rate": 0.001601882907477098, "loss": 0.2867, "step": 30616 }, { "epoch": 0.05428824472796036, "grad_norm": 0.546875, "learning_rate": 0.001601833501274001, "loss": 0.2205, "step": 30618 }, { "epoch": 0.05429179089327018, "grad_norm": 0.859375, "learning_rate": 0.001601784092876176, "loss": 0.1531, "step": 30620 }, { "epoch": 0.054295337058579994, "grad_norm": 0.5546875, "learning_rate": 0.001601734682283839, "loss": 0.2055, "step": 30622 }, { "epoch": 0.05429888322388981, "grad_norm": 0.232421875, "learning_rate": 0.0016016852694972054, "loss": 0.1628, "step": 30624 }, { "epoch": 0.05430242938919962, "grad_norm": 0.4140625, "learning_rate": 0.0016016358545164924, "loss": 0.2544, "step": 30626 }, { "epoch": 0.05430597555450944, "grad_norm": 1.5234375, "learning_rate": 0.0016015864373419154, "loss": 0.2836, "step": 30628 }, { "epoch": 0.05430952171981925, "grad_norm": 0.466796875, "learning_rate": 0.0016015370179736906, "loss": 0.1937, "step": 30630 }, { "epoch": 0.05431306788512907, "grad_norm": 0.498046875, "learning_rate": 0.0016014875964120339, "loss": 0.1982, "step": 30632 }, { "epoch": 0.05431661405043888, "grad_norm": 0.5625, "learning_rate": 0.0016014381726571624, "loss": 0.1966, "step": 30634 }, { "epoch": 0.054320160215748696, "grad_norm": 0.435546875, "learning_rate": 0.001601388746709291, "loss": 0.227, "step": 30636 }, { "epoch": 0.05432370638105851, "grad_norm": 0.314453125, "learning_rate": 0.001601339318568637, "loss": 0.1799, "step": 30638 }, { "epoch": 0.054327252546368325, "grad_norm": 0.21484375, "learning_rate": 0.0016012898882354154, "loss": 0.1448, "step": 30640 }, { "epoch": 0.05433079871167814, "grad_norm": 0.6328125, "learning_rate": 0.0016012404557098434, "loss": 0.2403, "step": 30642 }, { "epoch": 0.05433434487698796, "grad_norm": 0.33203125, "learning_rate": 0.001601191020992137, "loss": 0.1722, "step": 30644 }, { "epoch": 0.054337891042297776, "grad_norm": 1.2734375, "learning_rate": 0.0016011415840825117, "loss": 0.2315, "step": 30646 }, { "epoch": 0.05434143720760759, "grad_norm": 2.671875, "learning_rate": 0.0016010921449811843, "loss": 0.2808, "step": 30648 }, { "epoch": 0.054344983372917405, "grad_norm": 0.39453125, "learning_rate": 0.0016010427036883712, "loss": 0.2065, "step": 30650 }, { "epoch": 0.05434852953822722, "grad_norm": 0.890625, "learning_rate": 0.0016009932602042884, "loss": 0.3017, "step": 30652 }, { "epoch": 0.054352075703537034, "grad_norm": 2.734375, "learning_rate": 0.0016009438145291519, "loss": 0.1957, "step": 30654 }, { "epoch": 0.05435562186884685, "grad_norm": 0.240234375, "learning_rate": 0.0016008943666631784, "loss": 0.1674, "step": 30656 }, { "epoch": 0.05435916803415666, "grad_norm": 1.2421875, "learning_rate": 0.0016008449166065837, "loss": 0.2269, "step": 30658 }, { "epoch": 0.05436271419946648, "grad_norm": 0.66015625, "learning_rate": 0.0016007954643595841, "loss": 0.2626, "step": 30660 }, { "epoch": 0.05436626036477629, "grad_norm": 0.5390625, "learning_rate": 0.0016007460099223961, "loss": 0.1403, "step": 30662 }, { "epoch": 0.05436980653008611, "grad_norm": 0.94140625, "learning_rate": 0.001600696553295236, "loss": 0.4077, "step": 30664 }, { "epoch": 0.05437335269539593, "grad_norm": 0.8125, "learning_rate": 0.00160064709447832, "loss": 0.2254, "step": 30666 }, { "epoch": 0.05437689886070574, "grad_norm": 0.5234375, "learning_rate": 0.0016005976334718646, "loss": 0.206, "step": 30668 }, { "epoch": 0.05438044502601556, "grad_norm": 0.2490234375, "learning_rate": 0.0016005481702760856, "loss": 0.2006, "step": 30670 }, { "epoch": 0.05438399119132537, "grad_norm": 0.392578125, "learning_rate": 0.0016004987048911996, "loss": 0.2185, "step": 30672 }, { "epoch": 0.054387537356635186, "grad_norm": 0.259765625, "learning_rate": 0.0016004492373174234, "loss": 0.1875, "step": 30674 }, { "epoch": 0.054391083521945, "grad_norm": 0.75, "learning_rate": 0.0016003997675549729, "loss": 0.1695, "step": 30676 }, { "epoch": 0.054394629687254815, "grad_norm": 0.30859375, "learning_rate": 0.0016003502956040642, "loss": 0.1778, "step": 30678 }, { "epoch": 0.05439817585256463, "grad_norm": 0.83203125, "learning_rate": 0.0016003008214649141, "loss": 0.197, "step": 30680 }, { "epoch": 0.054401722017874445, "grad_norm": 0.2412109375, "learning_rate": 0.0016002513451377387, "loss": 0.1892, "step": 30682 }, { "epoch": 0.05440526818318426, "grad_norm": 2.59375, "learning_rate": 0.0016002018666227548, "loss": 0.2213, "step": 30684 }, { "epoch": 0.054408814348494074, "grad_norm": 0.30859375, "learning_rate": 0.001600152385920178, "loss": 0.2284, "step": 30686 }, { "epoch": 0.054412360513803895, "grad_norm": 0.96875, "learning_rate": 0.0016001029030302254, "loss": 0.2105, "step": 30688 }, { "epoch": 0.05441590667911371, "grad_norm": 0.482421875, "learning_rate": 0.0016000534179531132, "loss": 0.1829, "step": 30690 }, { "epoch": 0.054419452844423524, "grad_norm": 0.205078125, "learning_rate": 0.0016000039306890582, "loss": 0.1809, "step": 30692 }, { "epoch": 0.05442299900973334, "grad_norm": 0.6015625, "learning_rate": 0.0015999544412382757, "loss": 0.2531, "step": 30694 }, { "epoch": 0.05442654517504315, "grad_norm": 1.1875, "learning_rate": 0.0015999049496009836, "loss": 0.3955, "step": 30696 }, { "epoch": 0.05443009134035297, "grad_norm": 0.4765625, "learning_rate": 0.0015998554557773972, "loss": 0.2357, "step": 30698 }, { "epoch": 0.05443363750566278, "grad_norm": 3.21875, "learning_rate": 0.0015998059597677338, "loss": 0.3937, "step": 30700 }, { "epoch": 0.0544371836709726, "grad_norm": 0.6484375, "learning_rate": 0.0015997564615722092, "loss": 0.2372, "step": 30702 }, { "epoch": 0.05444072983628241, "grad_norm": 0.3515625, "learning_rate": 0.00159970696119104, "loss": 0.2309, "step": 30704 }, { "epoch": 0.054444276001592226, "grad_norm": 0.4375, "learning_rate": 0.0015996574586244429, "loss": 0.2106, "step": 30706 }, { "epoch": 0.05444782216690204, "grad_norm": 0.60546875, "learning_rate": 0.0015996079538726342, "loss": 0.2356, "step": 30708 }, { "epoch": 0.054451368332211855, "grad_norm": 0.291015625, "learning_rate": 0.0015995584469358304, "loss": 0.1994, "step": 30710 }, { "epoch": 0.05445491449752168, "grad_norm": 0.54296875, "learning_rate": 0.0015995089378142485, "loss": 0.181, "step": 30712 }, { "epoch": 0.05445846066283149, "grad_norm": 0.80078125, "learning_rate": 0.0015994594265081048, "loss": 0.3128, "step": 30714 }, { "epoch": 0.054462006828141306, "grad_norm": 0.38671875, "learning_rate": 0.0015994099130176153, "loss": 0.2318, "step": 30716 }, { "epoch": 0.05446555299345112, "grad_norm": 0.330078125, "learning_rate": 0.0015993603973429968, "loss": 0.1595, "step": 30718 }, { "epoch": 0.054469099158760935, "grad_norm": 0.6484375, "learning_rate": 0.0015993108794844666, "loss": 0.1893, "step": 30720 }, { "epoch": 0.05447264532407075, "grad_norm": 0.31640625, "learning_rate": 0.0015992613594422403, "loss": 0.2067, "step": 30722 }, { "epoch": 0.054476191489380564, "grad_norm": 0.279296875, "learning_rate": 0.0015992118372165347, "loss": 0.1463, "step": 30724 }, { "epoch": 0.05447973765469038, "grad_norm": 0.80859375, "learning_rate": 0.0015991623128075664, "loss": 0.1859, "step": 30726 }, { "epoch": 0.05448328382000019, "grad_norm": 1.8359375, "learning_rate": 0.0015991127862155525, "loss": 0.1553, "step": 30728 }, { "epoch": 0.05448682998531001, "grad_norm": 0.38671875, "learning_rate": 0.001599063257440709, "loss": 0.2474, "step": 30730 }, { "epoch": 0.05449037615061982, "grad_norm": 0.46875, "learning_rate": 0.0015990137264832525, "loss": 0.1908, "step": 30732 }, { "epoch": 0.054493922315929644, "grad_norm": 0.68359375, "learning_rate": 0.0015989641933434001, "loss": 0.2176, "step": 30734 }, { "epoch": 0.05449746848123946, "grad_norm": 0.63671875, "learning_rate": 0.0015989146580213678, "loss": 0.2691, "step": 30736 }, { "epoch": 0.05450101464654927, "grad_norm": 0.259765625, "learning_rate": 0.001598865120517373, "loss": 0.2022, "step": 30738 }, { "epoch": 0.05450456081185909, "grad_norm": 0.384765625, "learning_rate": 0.0015988155808316317, "loss": 0.1979, "step": 30740 }, { "epoch": 0.0545081069771689, "grad_norm": 0.5, "learning_rate": 0.001598766038964361, "loss": 0.2762, "step": 30742 }, { "epoch": 0.054511653142478717, "grad_norm": 0.466796875, "learning_rate": 0.001598716494915777, "loss": 0.13, "step": 30744 }, { "epoch": 0.05451519930778853, "grad_norm": 1.015625, "learning_rate": 0.0015986669486860972, "loss": 0.2554, "step": 30746 }, { "epoch": 0.054518745473098346, "grad_norm": 0.734375, "learning_rate": 0.0015986174002755376, "loss": 0.4373, "step": 30748 }, { "epoch": 0.05452229163840816, "grad_norm": 1.8125, "learning_rate": 0.0015985678496843155, "loss": 0.3827, "step": 30750 }, { "epoch": 0.054525837803717975, "grad_norm": 0.80859375, "learning_rate": 0.0015985182969126465, "loss": 0.1969, "step": 30752 }, { "epoch": 0.05452938396902779, "grad_norm": 0.5859375, "learning_rate": 0.0015984687419607485, "loss": 0.3428, "step": 30754 }, { "epoch": 0.05453293013433761, "grad_norm": 0.50390625, "learning_rate": 0.0015984191848288374, "loss": 0.2079, "step": 30756 }, { "epoch": 0.054536476299647425, "grad_norm": 0.2099609375, "learning_rate": 0.001598369625517131, "loss": 0.1919, "step": 30758 }, { "epoch": 0.05454002246495724, "grad_norm": 0.4296875, "learning_rate": 0.0015983200640258446, "loss": 0.2519, "step": 30760 }, { "epoch": 0.054543568630267054, "grad_norm": 1.6484375, "learning_rate": 0.0015982705003551962, "loss": 0.184, "step": 30762 }, { "epoch": 0.05454711479557687, "grad_norm": 0.255859375, "learning_rate": 0.0015982209345054017, "loss": 0.1823, "step": 30764 }, { "epoch": 0.054550660960886684, "grad_norm": 0.466796875, "learning_rate": 0.0015981713664766784, "loss": 0.1766, "step": 30766 }, { "epoch": 0.0545542071261965, "grad_norm": 1.875, "learning_rate": 0.0015981217962692431, "loss": 0.1635, "step": 30768 }, { "epoch": 0.05455775329150631, "grad_norm": 0.5234375, "learning_rate": 0.0015980722238833124, "loss": 0.2603, "step": 30770 }, { "epoch": 0.05456129945681613, "grad_norm": 0.4140625, "learning_rate": 0.0015980226493191028, "loss": 0.2398, "step": 30772 }, { "epoch": 0.05456484562212594, "grad_norm": 0.54296875, "learning_rate": 0.0015979730725768316, "loss": 0.1806, "step": 30774 }, { "epoch": 0.054568391787435756, "grad_norm": 0.73828125, "learning_rate": 0.0015979234936567153, "loss": 0.1864, "step": 30776 }, { "epoch": 0.05457193795274557, "grad_norm": 0.419921875, "learning_rate": 0.0015978739125589712, "loss": 0.2021, "step": 30778 }, { "epoch": 0.05457548411805539, "grad_norm": 2.453125, "learning_rate": 0.0015978243292838156, "loss": 0.2341, "step": 30780 }, { "epoch": 0.05457903028336521, "grad_norm": 0.9140625, "learning_rate": 0.0015977747438314653, "loss": 0.2685, "step": 30782 }, { "epoch": 0.05458257644867502, "grad_norm": 0.2021484375, "learning_rate": 0.0015977251562021375, "loss": 0.138, "step": 30784 }, { "epoch": 0.054586122613984836, "grad_norm": 0.451171875, "learning_rate": 0.0015976755663960492, "loss": 0.1853, "step": 30786 }, { "epoch": 0.05458966877929465, "grad_norm": 0.1630859375, "learning_rate": 0.001597625974413417, "loss": 0.1989, "step": 30788 }, { "epoch": 0.054593214944604465, "grad_norm": 0.515625, "learning_rate": 0.0015975763802544578, "loss": 0.2289, "step": 30790 }, { "epoch": 0.05459676110991428, "grad_norm": 0.44140625, "learning_rate": 0.0015975267839193885, "loss": 0.2058, "step": 30792 }, { "epoch": 0.054600307275224094, "grad_norm": 0.7578125, "learning_rate": 0.0015974771854084262, "loss": 0.2445, "step": 30794 }, { "epoch": 0.05460385344053391, "grad_norm": 0.55859375, "learning_rate": 0.0015974275847217877, "loss": 0.1863, "step": 30796 }, { "epoch": 0.05460739960584372, "grad_norm": 0.59765625, "learning_rate": 0.0015973779818596898, "loss": 0.1561, "step": 30798 }, { "epoch": 0.05461094577115354, "grad_norm": 1.328125, "learning_rate": 0.0015973283768223492, "loss": 0.1938, "step": 30800 }, { "epoch": 0.05461449193646336, "grad_norm": 0.35546875, "learning_rate": 0.0015972787696099835, "loss": 0.2547, "step": 30802 }, { "epoch": 0.054618038101773174, "grad_norm": 0.2734375, "learning_rate": 0.0015972291602228091, "loss": 0.2128, "step": 30804 }, { "epoch": 0.05462158426708299, "grad_norm": 0.251953125, "learning_rate": 0.0015971795486610437, "loss": 0.1922, "step": 30806 }, { "epoch": 0.0546251304323928, "grad_norm": 0.5234375, "learning_rate": 0.0015971299349249034, "loss": 0.1627, "step": 30808 }, { "epoch": 0.05462867659770262, "grad_norm": 0.63671875, "learning_rate": 0.0015970803190146054, "loss": 0.1893, "step": 30810 }, { "epoch": 0.05463222276301243, "grad_norm": 0.39453125, "learning_rate": 0.0015970307009303669, "loss": 0.2252, "step": 30812 }, { "epoch": 0.05463576892832225, "grad_norm": 0.44140625, "learning_rate": 0.0015969810806724049, "loss": 0.1882, "step": 30814 }, { "epoch": 0.05463931509363206, "grad_norm": 0.359375, "learning_rate": 0.0015969314582409363, "loss": 0.1928, "step": 30816 }, { "epoch": 0.054642861258941876, "grad_norm": 0.353515625, "learning_rate": 0.001596881833636178, "loss": 0.1994, "step": 30818 }, { "epoch": 0.05464640742425169, "grad_norm": 0.5390625, "learning_rate": 0.0015968322068583474, "loss": 0.2247, "step": 30820 }, { "epoch": 0.054649953589561505, "grad_norm": 0.388671875, "learning_rate": 0.0015967825779076613, "loss": 0.1842, "step": 30822 }, { "epoch": 0.054653499754871326, "grad_norm": 0.296875, "learning_rate": 0.0015967329467843365, "loss": 0.13, "step": 30824 }, { "epoch": 0.05465704592018114, "grad_norm": 0.66015625, "learning_rate": 0.0015966833134885906, "loss": 0.2091, "step": 30826 }, { "epoch": 0.054660592085490955, "grad_norm": 0.59375, "learning_rate": 0.0015966336780206404, "loss": 0.1823, "step": 30828 }, { "epoch": 0.05466413825080077, "grad_norm": 6.0, "learning_rate": 0.0015965840403807028, "loss": 0.3909, "step": 30830 }, { "epoch": 0.054667684416110585, "grad_norm": 0.462890625, "learning_rate": 0.001596534400568995, "loss": 0.138, "step": 30832 }, { "epoch": 0.0546712305814204, "grad_norm": 0.2490234375, "learning_rate": 0.0015964847585857348, "loss": 0.162, "step": 30834 }, { "epoch": 0.054674776746730214, "grad_norm": 0.73046875, "learning_rate": 0.0015964351144311377, "loss": 0.1973, "step": 30836 }, { "epoch": 0.05467832291204003, "grad_norm": 0.66015625, "learning_rate": 0.0015963854681054226, "loss": 0.1741, "step": 30838 }, { "epoch": 0.05468186907734984, "grad_norm": 0.51953125, "learning_rate": 0.0015963358196088055, "loss": 0.1636, "step": 30840 }, { "epoch": 0.05468541524265966, "grad_norm": 0.279296875, "learning_rate": 0.0015962861689415035, "loss": 0.1693, "step": 30842 }, { "epoch": 0.05468896140796947, "grad_norm": 0.2041015625, "learning_rate": 0.0015962365161037344, "loss": 0.201, "step": 30844 }, { "epoch": 0.054692507573279286, "grad_norm": 1.234375, "learning_rate": 0.001596186861095715, "loss": 0.1767, "step": 30846 }, { "epoch": 0.05469605373858911, "grad_norm": 0.63671875, "learning_rate": 0.0015961372039176626, "loss": 0.2443, "step": 30848 }, { "epoch": 0.05469959990389892, "grad_norm": 1.0625, "learning_rate": 0.001596087544569794, "loss": 0.2177, "step": 30850 }, { "epoch": 0.05470314606920874, "grad_norm": 2.140625, "learning_rate": 0.0015960378830523268, "loss": 0.2076, "step": 30852 }, { "epoch": 0.05470669223451855, "grad_norm": 5.0, "learning_rate": 0.0015959882193654783, "loss": 0.2235, "step": 30854 }, { "epoch": 0.054710238399828366, "grad_norm": 0.82421875, "learning_rate": 0.0015959385535094651, "loss": 0.2175, "step": 30856 }, { "epoch": 0.05471378456513818, "grad_norm": 1.1953125, "learning_rate": 0.0015958888854845047, "loss": 0.2108, "step": 30858 }, { "epoch": 0.054717330730447995, "grad_norm": 0.5390625, "learning_rate": 0.0015958392152908144, "loss": 0.2801, "step": 30860 }, { "epoch": 0.05472087689575781, "grad_norm": 0.58984375, "learning_rate": 0.0015957895429286116, "loss": 0.2091, "step": 30862 }, { "epoch": 0.054724423061067624, "grad_norm": 0.201171875, "learning_rate": 0.001595739868398113, "loss": 0.1528, "step": 30864 }, { "epoch": 0.05472796922637744, "grad_norm": 0.15625, "learning_rate": 0.0015956901916995363, "loss": 0.1844, "step": 30866 }, { "epoch": 0.054731515391687253, "grad_norm": 1.1796875, "learning_rate": 0.0015956405128330986, "loss": 0.2183, "step": 30868 }, { "epoch": 0.054735061556997075, "grad_norm": 1.5234375, "learning_rate": 0.0015955908317990173, "loss": 0.2666, "step": 30870 }, { "epoch": 0.05473860772230689, "grad_norm": 0.177734375, "learning_rate": 0.0015955411485975097, "loss": 0.2277, "step": 30872 }, { "epoch": 0.054742153887616704, "grad_norm": 0.2578125, "learning_rate": 0.0015954914632287927, "loss": 0.1711, "step": 30874 }, { "epoch": 0.05474570005292652, "grad_norm": 0.50390625, "learning_rate": 0.0015954417756930842, "loss": 0.3657, "step": 30876 }, { "epoch": 0.05474924621823633, "grad_norm": 0.380859375, "learning_rate": 0.0015953920859906007, "loss": 0.2191, "step": 30878 }, { "epoch": 0.05475279238354615, "grad_norm": 0.28515625, "learning_rate": 0.0015953423941215603, "loss": 0.1717, "step": 30880 }, { "epoch": 0.05475633854885596, "grad_norm": 0.2001953125, "learning_rate": 0.0015952927000861797, "loss": 0.173, "step": 30882 }, { "epoch": 0.05475988471416578, "grad_norm": 0.703125, "learning_rate": 0.0015952430038846767, "loss": 0.1923, "step": 30884 }, { "epoch": 0.05476343087947559, "grad_norm": 0.310546875, "learning_rate": 0.0015951933055172688, "loss": 0.1547, "step": 30886 }, { "epoch": 0.054766977044785406, "grad_norm": 0.60546875, "learning_rate": 0.0015951436049841726, "loss": 0.2774, "step": 30888 }, { "epoch": 0.05477052321009522, "grad_norm": 0.359375, "learning_rate": 0.001595093902285606, "loss": 0.1818, "step": 30890 }, { "epoch": 0.05477406937540504, "grad_norm": 0.58984375, "learning_rate": 0.0015950441974217863, "loss": 0.2363, "step": 30892 }, { "epoch": 0.05477761554071486, "grad_norm": 0.58203125, "learning_rate": 0.0015949944903929304, "loss": 0.1686, "step": 30894 }, { "epoch": 0.05478116170602467, "grad_norm": 0.21875, "learning_rate": 0.0015949447811992568, "loss": 0.1653, "step": 30896 }, { "epoch": 0.054784707871334486, "grad_norm": 0.341796875, "learning_rate": 0.0015948950698409818, "loss": 0.1901, "step": 30898 }, { "epoch": 0.0547882540366443, "grad_norm": 0.28125, "learning_rate": 0.0015948453563183235, "loss": 0.2011, "step": 30900 }, { "epoch": 0.054791800201954115, "grad_norm": 0.29296875, "learning_rate": 0.001594795640631499, "loss": 0.1974, "step": 30902 }, { "epoch": 0.05479534636726393, "grad_norm": 0.58203125, "learning_rate": 0.0015947459227807258, "loss": 0.2192, "step": 30904 }, { "epoch": 0.054798892532573744, "grad_norm": 0.2216796875, "learning_rate": 0.0015946962027662212, "loss": 0.1855, "step": 30906 }, { "epoch": 0.05480243869788356, "grad_norm": 0.28515625, "learning_rate": 0.0015946464805882026, "loss": 0.1567, "step": 30908 }, { "epoch": 0.05480598486319337, "grad_norm": 0.48046875, "learning_rate": 0.001594596756246888, "loss": 0.3898, "step": 30910 }, { "epoch": 0.05480953102850319, "grad_norm": 0.56640625, "learning_rate": 0.0015945470297424943, "loss": 0.2634, "step": 30912 }, { "epoch": 0.054813077193813, "grad_norm": 0.310546875, "learning_rate": 0.0015944973010752388, "loss": 0.181, "step": 30914 }, { "epoch": 0.054816623359122824, "grad_norm": 1.0859375, "learning_rate": 0.00159444757024534, "loss": 0.1752, "step": 30916 }, { "epoch": 0.05482016952443264, "grad_norm": 0.45703125, "learning_rate": 0.0015943978372530143, "loss": 0.2169, "step": 30918 }, { "epoch": 0.05482371568974245, "grad_norm": 0.5546875, "learning_rate": 0.0015943481020984796, "loss": 0.3005, "step": 30920 }, { "epoch": 0.05482726185505227, "grad_norm": 0.5703125, "learning_rate": 0.001594298364781954, "loss": 0.4046, "step": 30922 }, { "epoch": 0.05483080802036208, "grad_norm": 0.1962890625, "learning_rate": 0.0015942486253036538, "loss": 0.1589, "step": 30924 }, { "epoch": 0.054834354185671896, "grad_norm": 0.22265625, "learning_rate": 0.0015941988836637975, "loss": 0.1734, "step": 30926 }, { "epoch": 0.05483790035098171, "grad_norm": 1.5390625, "learning_rate": 0.0015941491398626026, "loss": 0.2812, "step": 30928 }, { "epoch": 0.054841446516291525, "grad_norm": 0.25390625, "learning_rate": 0.0015940993939002862, "loss": 0.3145, "step": 30930 }, { "epoch": 0.05484499268160134, "grad_norm": 0.65234375, "learning_rate": 0.0015940496457770659, "loss": 0.18, "step": 30932 }, { "epoch": 0.054848538846911155, "grad_norm": 0.3046875, "learning_rate": 0.0015939998954931597, "loss": 0.1777, "step": 30934 }, { "epoch": 0.05485208501222097, "grad_norm": 5.46875, "learning_rate": 0.0015939501430487846, "loss": 0.2138, "step": 30936 }, { "epoch": 0.05485563117753079, "grad_norm": 0.6484375, "learning_rate": 0.0015939003884441587, "loss": 0.2471, "step": 30938 }, { "epoch": 0.054859177342840605, "grad_norm": 0.90234375, "learning_rate": 0.0015938506316794992, "loss": 0.1951, "step": 30940 }, { "epoch": 0.05486272350815042, "grad_norm": 1.1171875, "learning_rate": 0.001593800872755024, "loss": 0.2983, "step": 30942 }, { "epoch": 0.054866269673460234, "grad_norm": 0.2109375, "learning_rate": 0.0015937511116709507, "loss": 0.1538, "step": 30944 }, { "epoch": 0.05486981583877005, "grad_norm": 0.62109375, "learning_rate": 0.0015937013484274966, "loss": 0.2228, "step": 30946 }, { "epoch": 0.05487336200407986, "grad_norm": 0.42578125, "learning_rate": 0.0015936515830248799, "loss": 0.2227, "step": 30948 }, { "epoch": 0.05487690816938968, "grad_norm": 0.40234375, "learning_rate": 0.001593601815463318, "loss": 0.1814, "step": 30950 }, { "epoch": 0.05488045433469949, "grad_norm": 0.6015625, "learning_rate": 0.001593552045743028, "loss": 0.1935, "step": 30952 }, { "epoch": 0.05488400050000931, "grad_norm": 0.421875, "learning_rate": 0.0015935022738642287, "loss": 0.168, "step": 30954 }, { "epoch": 0.05488754666531912, "grad_norm": 0.234375, "learning_rate": 0.0015934524998271367, "loss": 0.1774, "step": 30956 }, { "epoch": 0.054891092830628936, "grad_norm": 0.50390625, "learning_rate": 0.0015934027236319702, "loss": 0.2093, "step": 30958 }, { "epoch": 0.05489463899593876, "grad_norm": 1.953125, "learning_rate": 0.0015933529452789464, "loss": 0.189, "step": 30960 }, { "epoch": 0.05489818516124857, "grad_norm": 0.349609375, "learning_rate": 0.0015933031647682838, "loss": 0.2227, "step": 30962 }, { "epoch": 0.05490173132655839, "grad_norm": 0.85546875, "learning_rate": 0.0015932533821001996, "loss": 0.2278, "step": 30964 }, { "epoch": 0.0549052774918682, "grad_norm": 0.3046875, "learning_rate": 0.0015932035972749117, "loss": 0.1872, "step": 30966 }, { "epoch": 0.054908823657178016, "grad_norm": 0.7734375, "learning_rate": 0.0015931538102926376, "loss": 0.2553, "step": 30968 }, { "epoch": 0.05491236982248783, "grad_norm": 0.2138671875, "learning_rate": 0.0015931040211535953, "loss": 0.1753, "step": 30970 }, { "epoch": 0.054915915987797645, "grad_norm": 0.3203125, "learning_rate": 0.0015930542298580024, "loss": 0.1487, "step": 30972 }, { "epoch": 0.05491946215310746, "grad_norm": 1.1953125, "learning_rate": 0.0015930044364060767, "loss": 0.2449, "step": 30974 }, { "epoch": 0.054923008318417274, "grad_norm": 0.189453125, "learning_rate": 0.0015929546407980359, "loss": 0.2014, "step": 30976 }, { "epoch": 0.05492655448372709, "grad_norm": 0.6640625, "learning_rate": 0.001592904843034098, "loss": 0.2501, "step": 30978 }, { "epoch": 0.0549301006490369, "grad_norm": 0.5078125, "learning_rate": 0.0015928550431144805, "loss": 0.1558, "step": 30980 }, { "epoch": 0.05493364681434672, "grad_norm": 0.421875, "learning_rate": 0.0015928052410394014, "loss": 0.2331, "step": 30982 }, { "epoch": 0.05493719297965654, "grad_norm": 1.0625, "learning_rate": 0.0015927554368090784, "loss": 0.1704, "step": 30984 }, { "epoch": 0.054940739144966354, "grad_norm": 0.32421875, "learning_rate": 0.0015927056304237293, "loss": 0.1964, "step": 30986 }, { "epoch": 0.05494428531027617, "grad_norm": 0.55078125, "learning_rate": 0.001592655821883572, "loss": 0.2062, "step": 30988 }, { "epoch": 0.05494783147558598, "grad_norm": 0.53515625, "learning_rate": 0.0015926060111888245, "loss": 0.1996, "step": 30990 }, { "epoch": 0.0549513776408958, "grad_norm": 0.29296875, "learning_rate": 0.0015925561983397042, "loss": 0.1684, "step": 30992 }, { "epoch": 0.05495492380620561, "grad_norm": 0.451171875, "learning_rate": 0.0015925063833364293, "loss": 0.1903, "step": 30994 }, { "epoch": 0.054958469971515427, "grad_norm": 0.251953125, "learning_rate": 0.0015924565661792173, "loss": 0.1503, "step": 30996 }, { "epoch": 0.05496201613682524, "grad_norm": 0.234375, "learning_rate": 0.0015924067468682866, "loss": 0.1574, "step": 30998 }, { "epoch": 0.054965562302135056, "grad_norm": 0.2177734375, "learning_rate": 0.0015923569254038547, "loss": 0.3123, "step": 31000 }, { "epoch": 0.05496910846744487, "grad_norm": 0.44921875, "learning_rate": 0.0015923071017861397, "loss": 0.1684, "step": 31002 }, { "epoch": 0.054972654632754685, "grad_norm": 0.30859375, "learning_rate": 0.0015922572760153598, "loss": 0.2031, "step": 31004 }, { "epoch": 0.054976200798064506, "grad_norm": 0.75390625, "learning_rate": 0.0015922074480917317, "loss": 0.2126, "step": 31006 }, { "epoch": 0.05497974696337432, "grad_norm": 1.390625, "learning_rate": 0.0015921576180154748, "loss": 0.2323, "step": 31008 }, { "epoch": 0.054983293128684135, "grad_norm": 0.3125, "learning_rate": 0.001592107785786806, "loss": 0.172, "step": 31010 }, { "epoch": 0.05498683929399395, "grad_norm": 0.6875, "learning_rate": 0.0015920579514059438, "loss": 0.3915, "step": 31012 }, { "epoch": 0.054990385459303764, "grad_norm": 0.19921875, "learning_rate": 0.0015920081148731056, "loss": 0.158, "step": 31014 }, { "epoch": 0.05499393162461358, "grad_norm": 0.55859375, "learning_rate": 0.00159195827618851, "loss": 0.2471, "step": 31016 }, { "epoch": 0.054997477789923394, "grad_norm": 0.5625, "learning_rate": 0.0015919084353523745, "loss": 0.3113, "step": 31018 }, { "epoch": 0.05500102395523321, "grad_norm": 0.173828125, "learning_rate": 0.0015918585923649173, "loss": 0.1557, "step": 31020 }, { "epoch": 0.05500457012054302, "grad_norm": 0.35546875, "learning_rate": 0.0015918087472263564, "loss": 0.2761, "step": 31022 }, { "epoch": 0.05500811628585284, "grad_norm": 0.43359375, "learning_rate": 0.0015917588999369097, "loss": 0.201, "step": 31024 }, { "epoch": 0.05501166245116265, "grad_norm": 0.412109375, "learning_rate": 0.001591709050496795, "loss": 0.223, "step": 31026 }, { "epoch": 0.05501520861647247, "grad_norm": 0.400390625, "learning_rate": 0.001591659198906231, "loss": 0.2484, "step": 31028 }, { "epoch": 0.05501875478178229, "grad_norm": 1.5390625, "learning_rate": 0.0015916093451654348, "loss": 0.2643, "step": 31030 }, { "epoch": 0.0550223009470921, "grad_norm": 0.1962890625, "learning_rate": 0.0015915594892746251, "loss": 0.1781, "step": 31032 }, { "epoch": 0.05502584711240192, "grad_norm": 0.51171875, "learning_rate": 0.0015915096312340195, "loss": 0.2634, "step": 31034 }, { "epoch": 0.05502939327771173, "grad_norm": 0.49609375, "learning_rate": 0.0015914597710438366, "loss": 0.2137, "step": 31036 }, { "epoch": 0.055032939443021546, "grad_norm": 0.24609375, "learning_rate": 0.001591409908704294, "loss": 0.2319, "step": 31038 }, { "epoch": 0.05503648560833136, "grad_norm": 0.92578125, "learning_rate": 0.00159136004421561, "loss": 0.2546, "step": 31040 }, { "epoch": 0.055040031773641175, "grad_norm": 1.6171875, "learning_rate": 0.0015913101775780023, "loss": 0.1825, "step": 31042 }, { "epoch": 0.05504357793895099, "grad_norm": 0.58203125, "learning_rate": 0.0015912603087916894, "loss": 0.2534, "step": 31044 }, { "epoch": 0.055047124104260804, "grad_norm": 0.28125, "learning_rate": 0.0015912104378568894, "loss": 0.1701, "step": 31046 }, { "epoch": 0.05505067026957062, "grad_norm": 1.1328125, "learning_rate": 0.00159116056477382, "loss": 0.2973, "step": 31048 }, { "epoch": 0.05505421643488043, "grad_norm": 0.1923828125, "learning_rate": 0.0015911106895427, "loss": 0.1551, "step": 31050 }, { "epoch": 0.055057762600190255, "grad_norm": 0.51953125, "learning_rate": 0.001591060812163747, "loss": 0.18, "step": 31052 }, { "epoch": 0.05506130876550007, "grad_norm": 1.421875, "learning_rate": 0.001591010932637179, "loss": 0.3355, "step": 31054 }, { "epoch": 0.055064854930809884, "grad_norm": 0.73046875, "learning_rate": 0.0015909610509632147, "loss": 0.2099, "step": 31056 }, { "epoch": 0.0550684010961197, "grad_norm": 0.2158203125, "learning_rate": 0.0015909111671420716, "loss": 0.2108, "step": 31058 }, { "epoch": 0.05507194726142951, "grad_norm": 0.42578125, "learning_rate": 0.0015908612811739685, "loss": 0.1775, "step": 31060 }, { "epoch": 0.05507549342673933, "grad_norm": 2.03125, "learning_rate": 0.001590811393059123, "loss": 0.212, "step": 31062 }, { "epoch": 0.05507903959204914, "grad_norm": 0.2578125, "learning_rate": 0.0015907615027977537, "loss": 0.1701, "step": 31064 }, { "epoch": 0.05508258575735896, "grad_norm": 0.2294921875, "learning_rate": 0.0015907116103900786, "loss": 0.1974, "step": 31066 }, { "epoch": 0.05508613192266877, "grad_norm": 0.46484375, "learning_rate": 0.0015906617158363162, "loss": 0.159, "step": 31068 }, { "epoch": 0.055089678087978586, "grad_norm": 3.109375, "learning_rate": 0.0015906118191366843, "loss": 0.3078, "step": 31070 }, { "epoch": 0.0550932242532884, "grad_norm": 0.2080078125, "learning_rate": 0.0015905619202914011, "loss": 0.1915, "step": 31072 }, { "epoch": 0.05509677041859822, "grad_norm": 0.66796875, "learning_rate": 0.0015905120193006855, "loss": 0.2231, "step": 31074 }, { "epoch": 0.055100316583908036, "grad_norm": 2.265625, "learning_rate": 0.001590462116164755, "loss": 0.1687, "step": 31076 }, { "epoch": 0.05510386274921785, "grad_norm": 1.8515625, "learning_rate": 0.001590412210883828, "loss": 0.217, "step": 31078 }, { "epoch": 0.055107408914527665, "grad_norm": 0.455078125, "learning_rate": 0.001590362303458123, "loss": 0.3625, "step": 31080 }, { "epoch": 0.05511095507983748, "grad_norm": 0.2216796875, "learning_rate": 0.0015903123938878579, "loss": 0.3165, "step": 31082 }, { "epoch": 0.055114501245147295, "grad_norm": 1.109375, "learning_rate": 0.0015902624821732515, "loss": 0.2021, "step": 31084 }, { "epoch": 0.05511804741045711, "grad_norm": 0.373046875, "learning_rate": 0.0015902125683145214, "loss": 0.1967, "step": 31086 }, { "epoch": 0.055121593575766924, "grad_norm": 0.1796875, "learning_rate": 0.0015901626523118866, "loss": 0.2346, "step": 31088 }, { "epoch": 0.05512513974107674, "grad_norm": 0.318359375, "learning_rate": 0.001590112734165565, "loss": 0.2031, "step": 31090 }, { "epoch": 0.05512868590638655, "grad_norm": 0.2734375, "learning_rate": 0.0015900628138757747, "loss": 0.1546, "step": 31092 }, { "epoch": 0.05513223207169637, "grad_norm": 0.7265625, "learning_rate": 0.0015900128914427348, "loss": 0.1991, "step": 31094 }, { "epoch": 0.05513577823700619, "grad_norm": 0.4140625, "learning_rate": 0.0015899629668666628, "loss": 0.2277, "step": 31096 }, { "epoch": 0.055139324402316, "grad_norm": 0.4921875, "learning_rate": 0.0015899130401477778, "loss": 0.1446, "step": 31098 }, { "epoch": 0.05514287056762582, "grad_norm": 0.4140625, "learning_rate": 0.0015898631112862972, "loss": 0.1982, "step": 31100 }, { "epoch": 0.05514641673293563, "grad_norm": 0.19140625, "learning_rate": 0.00158981318028244, "loss": 0.1425, "step": 31102 }, { "epoch": 0.05514996289824545, "grad_norm": 0.283203125, "learning_rate": 0.001589763247136425, "loss": 0.279, "step": 31104 }, { "epoch": 0.05515350906355526, "grad_norm": 0.5, "learning_rate": 0.0015897133118484696, "loss": 0.2613, "step": 31106 }, { "epoch": 0.055157055228865076, "grad_norm": 0.53125, "learning_rate": 0.0015896633744187925, "loss": 0.2096, "step": 31108 }, { "epoch": 0.05516060139417489, "grad_norm": 0.171875, "learning_rate": 0.0015896134348476125, "loss": 0.2069, "step": 31110 }, { "epoch": 0.055164147559484705, "grad_norm": 0.455078125, "learning_rate": 0.0015895634931351475, "loss": 0.1668, "step": 31112 }, { "epoch": 0.05516769372479452, "grad_norm": 0.33984375, "learning_rate": 0.0015895135492816164, "loss": 0.2508, "step": 31114 }, { "epoch": 0.055171239890104334, "grad_norm": 0.91015625, "learning_rate": 0.0015894636032872374, "loss": 0.1813, "step": 31116 }, { "epoch": 0.05517478605541415, "grad_norm": 0.3671875, "learning_rate": 0.0015894136551522287, "loss": 0.2015, "step": 31118 }, { "epoch": 0.05517833222072397, "grad_norm": 0.5859375, "learning_rate": 0.001589363704876809, "loss": 0.1669, "step": 31120 }, { "epoch": 0.055181878386033785, "grad_norm": 0.177734375, "learning_rate": 0.0015893137524611966, "loss": 0.1776, "step": 31122 }, { "epoch": 0.0551854245513436, "grad_norm": 0.2294921875, "learning_rate": 0.0015892637979056103, "loss": 0.1621, "step": 31124 }, { "epoch": 0.055188970716653414, "grad_norm": 0.44140625, "learning_rate": 0.0015892138412102687, "loss": 0.1849, "step": 31126 }, { "epoch": 0.05519251688196323, "grad_norm": 0.37890625, "learning_rate": 0.0015891638823753891, "loss": 0.3485, "step": 31128 }, { "epoch": 0.05519606304727304, "grad_norm": 0.330078125, "learning_rate": 0.0015891139214011913, "loss": 0.2004, "step": 31130 }, { "epoch": 0.05519960921258286, "grad_norm": 1.28125, "learning_rate": 0.0015890639582878932, "loss": 0.2628, "step": 31132 }, { "epoch": 0.05520315537789267, "grad_norm": 0.36328125, "learning_rate": 0.0015890139930357136, "loss": 0.2067, "step": 31134 }, { "epoch": 0.05520670154320249, "grad_norm": 0.34765625, "learning_rate": 0.0015889640256448708, "loss": 0.1887, "step": 31136 }, { "epoch": 0.0552102477085123, "grad_norm": 0.859375, "learning_rate": 0.0015889140561155832, "loss": 0.1316, "step": 31138 }, { "epoch": 0.055213793873822116, "grad_norm": 0.41015625, "learning_rate": 0.0015888640844480696, "loss": 0.1865, "step": 31140 }, { "epoch": 0.05521734003913194, "grad_norm": 0.322265625, "learning_rate": 0.0015888141106425485, "loss": 0.1891, "step": 31142 }, { "epoch": 0.05522088620444175, "grad_norm": 1.125, "learning_rate": 0.0015887641346992384, "loss": 0.2402, "step": 31144 }, { "epoch": 0.05522443236975157, "grad_norm": 0.6640625, "learning_rate": 0.0015887141566183578, "loss": 0.2016, "step": 31146 }, { "epoch": 0.05522797853506138, "grad_norm": 0.2333984375, "learning_rate": 0.0015886641764001257, "loss": 0.1633, "step": 31148 }, { "epoch": 0.055231524700371196, "grad_norm": 0.26953125, "learning_rate": 0.0015886141940447598, "loss": 0.1726, "step": 31150 }, { "epoch": 0.05523507086568101, "grad_norm": 0.546875, "learning_rate": 0.0015885642095524797, "loss": 0.2194, "step": 31152 }, { "epoch": 0.055238617030990825, "grad_norm": 0.23828125, "learning_rate": 0.0015885142229235033, "loss": 0.2042, "step": 31154 }, { "epoch": 0.05524216319630064, "grad_norm": 0.423828125, "learning_rate": 0.0015884642341580494, "loss": 0.1592, "step": 31156 }, { "epoch": 0.055245709361610454, "grad_norm": 0.56640625, "learning_rate": 0.0015884142432563367, "loss": 0.213, "step": 31158 }, { "epoch": 0.05524925552692027, "grad_norm": 0.68359375, "learning_rate": 0.001588364250218584, "loss": 0.1687, "step": 31160 }, { "epoch": 0.05525280169223008, "grad_norm": 0.8046875, "learning_rate": 0.0015883142550450096, "loss": 0.1816, "step": 31162 }, { "epoch": 0.055256347857539904, "grad_norm": 0.404296875, "learning_rate": 0.0015882642577358323, "loss": 0.2615, "step": 31164 }, { "epoch": 0.05525989402284972, "grad_norm": 0.57421875, "learning_rate": 0.0015882142582912709, "loss": 0.2276, "step": 31166 }, { "epoch": 0.055263440188159534, "grad_norm": 0.60546875, "learning_rate": 0.001588164256711544, "loss": 0.1463, "step": 31168 }, { "epoch": 0.05526698635346935, "grad_norm": 1.484375, "learning_rate": 0.0015881142529968698, "loss": 0.3188, "step": 31170 }, { "epoch": 0.05527053251877916, "grad_norm": 0.28125, "learning_rate": 0.0015880642471474678, "loss": 0.1544, "step": 31172 }, { "epoch": 0.05527407868408898, "grad_norm": 0.84765625, "learning_rate": 0.001588014239163556, "loss": 0.1668, "step": 31174 }, { "epoch": 0.05527762484939879, "grad_norm": 0.4140625, "learning_rate": 0.0015879642290453533, "loss": 0.1791, "step": 31176 }, { "epoch": 0.055281171014708606, "grad_norm": 0.47265625, "learning_rate": 0.0015879142167930787, "loss": 0.1708, "step": 31178 }, { "epoch": 0.05528471718001842, "grad_norm": 1.4765625, "learning_rate": 0.0015878642024069508, "loss": 0.216, "step": 31180 }, { "epoch": 0.055288263345328235, "grad_norm": 0.2353515625, "learning_rate": 0.001587814185887188, "loss": 0.2039, "step": 31182 }, { "epoch": 0.05529180951063805, "grad_norm": 0.5625, "learning_rate": 0.0015877641672340096, "loss": 0.2204, "step": 31184 }, { "epoch": 0.055295355675947865, "grad_norm": 0.1748046875, "learning_rate": 0.0015877141464476336, "loss": 0.271, "step": 31186 }, { "epoch": 0.055298901841257686, "grad_norm": 0.7265625, "learning_rate": 0.0015876641235282798, "loss": 0.246, "step": 31188 }, { "epoch": 0.0553024480065675, "grad_norm": 0.251953125, "learning_rate": 0.0015876140984761662, "loss": 0.1494, "step": 31190 }, { "epoch": 0.055305994171877315, "grad_norm": 0.283203125, "learning_rate": 0.0015875640712915118, "loss": 0.202, "step": 31192 }, { "epoch": 0.05530954033718713, "grad_norm": 0.314453125, "learning_rate": 0.0015875140419745351, "loss": 0.1742, "step": 31194 }, { "epoch": 0.055313086502496944, "grad_norm": 0.703125, "learning_rate": 0.0015874640105254553, "loss": 0.3514, "step": 31196 }, { "epoch": 0.05531663266780676, "grad_norm": 1.7265625, "learning_rate": 0.0015874139769444912, "loss": 0.2531, "step": 31198 }, { "epoch": 0.05532017883311657, "grad_norm": 0.6171875, "learning_rate": 0.0015873639412318614, "loss": 0.3206, "step": 31200 }, { "epoch": 0.05532372499842639, "grad_norm": 0.31640625, "learning_rate": 0.0015873139033877849, "loss": 0.2695, "step": 31202 }, { "epoch": 0.0553272711637362, "grad_norm": 0.412109375, "learning_rate": 0.0015872638634124804, "loss": 0.1735, "step": 31204 }, { "epoch": 0.05533081732904602, "grad_norm": 0.494140625, "learning_rate": 0.0015872138213061666, "loss": 0.2341, "step": 31206 }, { "epoch": 0.05533436349435583, "grad_norm": 0.248046875, "learning_rate": 0.0015871637770690627, "loss": 0.1346, "step": 31208 }, { "epoch": 0.05533790965966565, "grad_norm": 0.55859375, "learning_rate": 0.0015871137307013877, "loss": 0.2033, "step": 31210 }, { "epoch": 0.05534145582497547, "grad_norm": 0.6796875, "learning_rate": 0.0015870636822033597, "loss": 0.1796, "step": 31212 }, { "epoch": 0.05534500199028528, "grad_norm": 0.439453125, "learning_rate": 0.0015870136315751984, "loss": 0.2062, "step": 31214 }, { "epoch": 0.0553485481555951, "grad_norm": 0.35546875, "learning_rate": 0.001586963578817122, "loss": 0.1966, "step": 31216 }, { "epoch": 0.05535209432090491, "grad_norm": 1.0546875, "learning_rate": 0.0015869135239293503, "loss": 0.4616, "step": 31218 }, { "epoch": 0.055355640486214726, "grad_norm": 0.70703125, "learning_rate": 0.0015868634669121016, "loss": 0.2027, "step": 31220 }, { "epoch": 0.05535918665152454, "grad_norm": 0.287109375, "learning_rate": 0.0015868134077655946, "loss": 0.1818, "step": 31222 }, { "epoch": 0.055362732816834355, "grad_norm": 0.2373046875, "learning_rate": 0.0015867633464900486, "loss": 0.2626, "step": 31224 }, { "epoch": 0.05536627898214417, "grad_norm": 0.28515625, "learning_rate": 0.0015867132830856825, "loss": 0.206, "step": 31226 }, { "epoch": 0.055369825147453984, "grad_norm": 0.66796875, "learning_rate": 0.0015866632175527153, "loss": 0.1819, "step": 31228 }, { "epoch": 0.0553733713127638, "grad_norm": 0.63671875, "learning_rate": 0.0015866131498913658, "loss": 0.2969, "step": 31230 }, { "epoch": 0.05537691747807362, "grad_norm": 0.5390625, "learning_rate": 0.001586563080101853, "loss": 0.2063, "step": 31232 }, { "epoch": 0.055380463643383435, "grad_norm": 0.341796875, "learning_rate": 0.001586513008184396, "loss": 0.2209, "step": 31234 }, { "epoch": 0.05538400980869325, "grad_norm": 0.51953125, "learning_rate": 0.0015864629341392135, "loss": 0.2592, "step": 31236 }, { "epoch": 0.055387555974003064, "grad_norm": 0.265625, "learning_rate": 0.0015864128579665248, "loss": 0.1471, "step": 31238 }, { "epoch": 0.05539110213931288, "grad_norm": 0.76171875, "learning_rate": 0.001586362779666549, "loss": 0.2042, "step": 31240 }, { "epoch": 0.05539464830462269, "grad_norm": 0.4375, "learning_rate": 0.0015863126992395047, "loss": 0.2702, "step": 31242 }, { "epoch": 0.05539819446993251, "grad_norm": 4.0, "learning_rate": 0.0015862626166856112, "loss": 0.2453, "step": 31244 }, { "epoch": 0.05540174063524232, "grad_norm": 0.408203125, "learning_rate": 0.0015862125320050876, "loss": 0.288, "step": 31246 }, { "epoch": 0.055405286800552137, "grad_norm": 0.435546875, "learning_rate": 0.001586162445198153, "loss": 0.2731, "step": 31248 }, { "epoch": 0.05540883296586195, "grad_norm": 1.7265625, "learning_rate": 0.001586112356265026, "loss": 0.3648, "step": 31250 }, { "epoch": 0.055412379131171766, "grad_norm": 0.5625, "learning_rate": 0.0015860622652059259, "loss": 0.2104, "step": 31252 }, { "epoch": 0.05541592529648158, "grad_norm": 1.6953125, "learning_rate": 0.0015860121720210716, "loss": 0.204, "step": 31254 }, { "epoch": 0.0554194714617914, "grad_norm": 0.67578125, "learning_rate": 0.0015859620767106823, "loss": 0.1823, "step": 31256 }, { "epoch": 0.055423017627101216, "grad_norm": 0.42578125, "learning_rate": 0.0015859119792749777, "loss": 0.1714, "step": 31258 }, { "epoch": 0.05542656379241103, "grad_norm": 1.03125, "learning_rate": 0.001585861879714176, "loss": 0.2757, "step": 31260 }, { "epoch": 0.055430109957720845, "grad_norm": 0.60546875, "learning_rate": 0.0015858117780284966, "loss": 0.1968, "step": 31262 }, { "epoch": 0.05543365612303066, "grad_norm": 0.61328125, "learning_rate": 0.0015857616742181587, "loss": 0.192, "step": 31264 }, { "epoch": 0.055437202288340474, "grad_norm": 1.359375, "learning_rate": 0.0015857115682833818, "loss": 0.2854, "step": 31266 }, { "epoch": 0.05544074845365029, "grad_norm": 0.28125, "learning_rate": 0.001585661460224384, "loss": 0.1752, "step": 31268 }, { "epoch": 0.055444294618960104, "grad_norm": 0.345703125, "learning_rate": 0.0015856113500413859, "loss": 0.2035, "step": 31270 }, { "epoch": 0.05544784078426992, "grad_norm": 0.55078125, "learning_rate": 0.0015855612377346049, "loss": 0.2004, "step": 31272 }, { "epoch": 0.05545138694957973, "grad_norm": 1.5546875, "learning_rate": 0.0015855111233042617, "loss": 0.1769, "step": 31274 }, { "epoch": 0.05545493311488955, "grad_norm": 1.03125, "learning_rate": 0.0015854610067505745, "loss": 0.2485, "step": 31276 }, { "epoch": 0.05545847928019937, "grad_norm": 0.283203125, "learning_rate": 0.0015854108880737632, "loss": 0.244, "step": 31278 }, { "epoch": 0.05546202544550918, "grad_norm": 0.17578125, "learning_rate": 0.0015853607672740464, "loss": 0.1734, "step": 31280 }, { "epoch": 0.055465571610819, "grad_norm": 0.75, "learning_rate": 0.0015853106443516436, "loss": 0.2192, "step": 31282 }, { "epoch": 0.05546911777612881, "grad_norm": 0.51171875, "learning_rate": 0.001585260519306774, "loss": 0.2941, "step": 31284 }, { "epoch": 0.05547266394143863, "grad_norm": 0.32421875, "learning_rate": 0.0015852103921396568, "loss": 0.1955, "step": 31286 }, { "epoch": 0.05547621010674844, "grad_norm": 0.40234375, "learning_rate": 0.001585160262850511, "loss": 0.2549, "step": 31288 }, { "epoch": 0.055479756272058256, "grad_norm": 0.7421875, "learning_rate": 0.001585110131439556, "loss": 0.1936, "step": 31290 }, { "epoch": 0.05548330243736807, "grad_norm": 0.6171875, "learning_rate": 0.0015850599979070114, "loss": 0.3102, "step": 31292 }, { "epoch": 0.055486848602677885, "grad_norm": 0.64453125, "learning_rate": 0.0015850098622530963, "loss": 0.1962, "step": 31294 }, { "epoch": 0.0554903947679877, "grad_norm": 0.9609375, "learning_rate": 0.001584959724478029, "loss": 0.1734, "step": 31296 }, { "epoch": 0.055493940933297514, "grad_norm": 0.25390625, "learning_rate": 0.0015849095845820301, "loss": 0.1955, "step": 31298 }, { "epoch": 0.05549748709860733, "grad_norm": 0.2734375, "learning_rate": 0.0015848594425653183, "loss": 0.1338, "step": 31300 }, { "epoch": 0.05550103326391715, "grad_norm": 0.447265625, "learning_rate": 0.0015848092984281127, "loss": 0.2134, "step": 31302 }, { "epoch": 0.055504579429226965, "grad_norm": 0.59765625, "learning_rate": 0.001584759152170633, "loss": 0.1921, "step": 31304 }, { "epoch": 0.05550812559453678, "grad_norm": 0.79296875, "learning_rate": 0.0015847090037930983, "loss": 0.1311, "step": 31306 }, { "epoch": 0.055511671759846594, "grad_norm": 1.8046875, "learning_rate": 0.0015846588532957282, "loss": 0.2029, "step": 31308 }, { "epoch": 0.05551521792515641, "grad_norm": 0.470703125, "learning_rate": 0.0015846087006787415, "loss": 0.1818, "step": 31310 }, { "epoch": 0.05551876409046622, "grad_norm": 0.400390625, "learning_rate": 0.001584558545942358, "loss": 0.1529, "step": 31312 }, { "epoch": 0.05552231025577604, "grad_norm": 0.478515625, "learning_rate": 0.001584508389086797, "loss": 0.1835, "step": 31314 }, { "epoch": 0.05552585642108585, "grad_norm": 0.3046875, "learning_rate": 0.0015844582301122773, "loss": 0.2339, "step": 31316 }, { "epoch": 0.05552940258639567, "grad_norm": 0.49609375, "learning_rate": 0.001584408069019019, "loss": 0.1589, "step": 31318 }, { "epoch": 0.05553294875170548, "grad_norm": 2.265625, "learning_rate": 0.0015843579058072413, "loss": 0.2302, "step": 31320 }, { "epoch": 0.055536494917015296, "grad_norm": 0.455078125, "learning_rate": 0.0015843077404771632, "loss": 0.1914, "step": 31322 }, { "epoch": 0.05554004108232512, "grad_norm": 1.25, "learning_rate": 0.0015842575730290046, "loss": 0.1813, "step": 31324 }, { "epoch": 0.05554358724763493, "grad_norm": 0.80078125, "learning_rate": 0.0015842074034629846, "loss": 0.2245, "step": 31326 }, { "epoch": 0.055547133412944746, "grad_norm": 0.412109375, "learning_rate": 0.0015841572317793226, "loss": 0.1937, "step": 31328 }, { "epoch": 0.05555067957825456, "grad_norm": 0.3359375, "learning_rate": 0.0015841070579782383, "loss": 0.2615, "step": 31330 }, { "epoch": 0.055554225743564375, "grad_norm": 0.45703125, "learning_rate": 0.0015840568820599508, "loss": 0.1818, "step": 31332 }, { "epoch": 0.05555777190887419, "grad_norm": 0.263671875, "learning_rate": 0.0015840067040246795, "loss": 0.2092, "step": 31334 }, { "epoch": 0.055561318074184005, "grad_norm": 0.388671875, "learning_rate": 0.0015839565238726441, "loss": 0.1339, "step": 31336 }, { "epoch": 0.05556486423949382, "grad_norm": 0.74609375, "learning_rate": 0.0015839063416040642, "loss": 0.2457, "step": 31338 }, { "epoch": 0.055568410404803634, "grad_norm": 0.353515625, "learning_rate": 0.001583856157219159, "loss": 0.2162, "step": 31340 }, { "epoch": 0.05557195657011345, "grad_norm": 0.353515625, "learning_rate": 0.001583805970718148, "loss": 0.1731, "step": 31342 }, { "epoch": 0.05557550273542326, "grad_norm": 0.80078125, "learning_rate": 0.0015837557821012506, "loss": 0.3627, "step": 31344 }, { "epoch": 0.055579048900733084, "grad_norm": 0.62890625, "learning_rate": 0.0015837055913686861, "loss": 0.1894, "step": 31346 }, { "epoch": 0.0555825950660429, "grad_norm": 0.5625, "learning_rate": 0.001583655398520675, "loss": 0.1778, "step": 31348 }, { "epoch": 0.05558614123135271, "grad_norm": 0.478515625, "learning_rate": 0.0015836052035574357, "loss": 0.2885, "step": 31350 }, { "epoch": 0.05558968739666253, "grad_norm": 0.259765625, "learning_rate": 0.0015835550064791882, "loss": 0.1742, "step": 31352 }, { "epoch": 0.05559323356197234, "grad_norm": 0.95703125, "learning_rate": 0.0015835048072861519, "loss": 0.2766, "step": 31354 }, { "epoch": 0.05559677972728216, "grad_norm": 0.353515625, "learning_rate": 0.0015834546059785466, "loss": 0.1784, "step": 31356 }, { "epoch": 0.05560032589259197, "grad_norm": 0.330078125, "learning_rate": 0.0015834044025565914, "loss": 0.3342, "step": 31358 }, { "epoch": 0.055603872057901786, "grad_norm": 0.64453125, "learning_rate": 0.0015833541970205066, "loss": 0.1974, "step": 31360 }, { "epoch": 0.0556074182232116, "grad_norm": 0.50390625, "learning_rate": 0.001583303989370511, "loss": 0.1654, "step": 31362 }, { "epoch": 0.055610964388521415, "grad_norm": 1.953125, "learning_rate": 0.0015832537796068248, "loss": 0.2265, "step": 31364 }, { "epoch": 0.05561451055383123, "grad_norm": 0.65625, "learning_rate": 0.001583203567729667, "loss": 0.1779, "step": 31366 }, { "epoch": 0.055618056719141044, "grad_norm": 0.48828125, "learning_rate": 0.0015831533537392574, "loss": 0.2579, "step": 31368 }, { "epoch": 0.055621602884450866, "grad_norm": 2.53125, "learning_rate": 0.001583103137635816, "loss": 0.3657, "step": 31370 }, { "epoch": 0.05562514904976068, "grad_norm": 0.8828125, "learning_rate": 0.0015830529194195615, "loss": 0.2351, "step": 31372 }, { "epoch": 0.055628695215070495, "grad_norm": 1.734375, "learning_rate": 0.0015830026990907144, "loss": 0.2034, "step": 31374 }, { "epoch": 0.05563224138038031, "grad_norm": 0.291015625, "learning_rate": 0.0015829524766494943, "loss": 0.1787, "step": 31376 }, { "epoch": 0.055635787545690124, "grad_norm": 0.921875, "learning_rate": 0.0015829022520961204, "loss": 0.2, "step": 31378 }, { "epoch": 0.05563933371099994, "grad_norm": 0.494140625, "learning_rate": 0.0015828520254308129, "loss": 0.1661, "step": 31380 }, { "epoch": 0.05564287987630975, "grad_norm": 1.546875, "learning_rate": 0.001582801796653791, "loss": 0.2007, "step": 31382 }, { "epoch": 0.05564642604161957, "grad_norm": 0.37109375, "learning_rate": 0.0015827515657652743, "loss": 0.2123, "step": 31384 }, { "epoch": 0.05564997220692938, "grad_norm": 0.439453125, "learning_rate": 0.0015827013327654824, "loss": 0.181, "step": 31386 }, { "epoch": 0.0556535183722392, "grad_norm": 0.62890625, "learning_rate": 0.0015826510976546357, "loss": 0.1792, "step": 31388 }, { "epoch": 0.05565706453754901, "grad_norm": 0.349609375, "learning_rate": 0.0015826008604329532, "loss": 0.1785, "step": 31390 }, { "epoch": 0.05566061070285883, "grad_norm": 0.875, "learning_rate": 0.0015825506211006551, "loss": 0.2025, "step": 31392 }, { "epoch": 0.05566415686816865, "grad_norm": 1.4921875, "learning_rate": 0.0015825003796579606, "loss": 0.1991, "step": 31394 }, { "epoch": 0.05566770303347846, "grad_norm": 1.4140625, "learning_rate": 0.00158245013610509, "loss": 0.2841, "step": 31396 }, { "epoch": 0.05567124919878828, "grad_norm": 0.85546875, "learning_rate": 0.0015823998904422622, "loss": 0.1833, "step": 31398 }, { "epoch": 0.05567479536409809, "grad_norm": 0.384765625, "learning_rate": 0.0015823496426696979, "loss": 0.2043, "step": 31400 }, { "epoch": 0.055678341529407906, "grad_norm": 0.59375, "learning_rate": 0.0015822993927876166, "loss": 0.1873, "step": 31402 }, { "epoch": 0.05568188769471772, "grad_norm": 2.03125, "learning_rate": 0.001582249140796238, "loss": 0.3987, "step": 31404 }, { "epoch": 0.055685433860027535, "grad_norm": 0.490234375, "learning_rate": 0.0015821988866957812, "loss": 0.1563, "step": 31406 }, { "epoch": 0.05568898002533735, "grad_norm": 0.2578125, "learning_rate": 0.0015821486304864669, "loss": 0.1952, "step": 31408 }, { "epoch": 0.055692526190647164, "grad_norm": 1.375, "learning_rate": 0.0015820983721685146, "loss": 0.1765, "step": 31410 }, { "epoch": 0.05569607235595698, "grad_norm": 1.453125, "learning_rate": 0.001582048111742144, "loss": 0.1445, "step": 31412 }, { "epoch": 0.0556996185212668, "grad_norm": 0.27734375, "learning_rate": 0.0015819978492075748, "loss": 0.1763, "step": 31414 }, { "epoch": 0.055703164686576614, "grad_norm": 1.3125, "learning_rate": 0.001581947584565027, "loss": 0.2401, "step": 31416 }, { "epoch": 0.05570671085188643, "grad_norm": 0.3984375, "learning_rate": 0.0015818973178147208, "loss": 0.229, "step": 31418 }, { "epoch": 0.055710257017196244, "grad_norm": 0.5234375, "learning_rate": 0.0015818470489568754, "loss": 0.2063, "step": 31420 }, { "epoch": 0.05571380318250606, "grad_norm": 0.3125, "learning_rate": 0.001581796777991711, "loss": 0.1924, "step": 31422 }, { "epoch": 0.05571734934781587, "grad_norm": 1.3359375, "learning_rate": 0.0015817465049194474, "loss": 0.2552, "step": 31424 }, { "epoch": 0.05572089551312569, "grad_norm": 0.51171875, "learning_rate": 0.001581696229740304, "loss": 0.1498, "step": 31426 }, { "epoch": 0.0557244416784355, "grad_norm": 0.515625, "learning_rate": 0.0015816459524545016, "loss": 0.2681, "step": 31428 }, { "epoch": 0.055727987843745316, "grad_norm": 0.8125, "learning_rate": 0.0015815956730622594, "loss": 0.4454, "step": 31430 }, { "epoch": 0.05573153400905513, "grad_norm": 2.359375, "learning_rate": 0.0015815453915637972, "loss": 0.3548, "step": 31432 }, { "epoch": 0.055735080174364945, "grad_norm": 1.1328125, "learning_rate": 0.0015814951079593355, "loss": 0.2253, "step": 31434 }, { "epoch": 0.05573862633967476, "grad_norm": 1.2890625, "learning_rate": 0.0015814448222490939, "loss": 0.2625, "step": 31436 }, { "epoch": 0.05574217250498458, "grad_norm": 0.30859375, "learning_rate": 0.0015813945344332922, "loss": 0.314, "step": 31438 }, { "epoch": 0.055745718670294396, "grad_norm": 0.62890625, "learning_rate": 0.0015813442445121505, "loss": 0.2143, "step": 31440 }, { "epoch": 0.05574926483560421, "grad_norm": 0.55078125, "learning_rate": 0.0015812939524858884, "loss": 0.1446, "step": 31442 }, { "epoch": 0.055752811000914025, "grad_norm": 0.421875, "learning_rate": 0.0015812436583547264, "loss": 0.2614, "step": 31444 }, { "epoch": 0.05575635716622384, "grad_norm": 0.423828125, "learning_rate": 0.0015811933621188842, "loss": 0.2264, "step": 31446 }, { "epoch": 0.055759903331533654, "grad_norm": 0.380859375, "learning_rate": 0.0015811430637785815, "loss": 0.1803, "step": 31448 }, { "epoch": 0.05576344949684347, "grad_norm": 0.60546875, "learning_rate": 0.0015810927633340386, "loss": 0.2521, "step": 31450 }, { "epoch": 0.05576699566215328, "grad_norm": 0.451171875, "learning_rate": 0.0015810424607854755, "loss": 0.2851, "step": 31452 }, { "epoch": 0.0557705418274631, "grad_norm": 0.81640625, "learning_rate": 0.001580992156133112, "loss": 0.197, "step": 31454 }, { "epoch": 0.05577408799277291, "grad_norm": 0.578125, "learning_rate": 0.0015809418493771684, "loss": 0.2149, "step": 31456 }, { "epoch": 0.05577763415808273, "grad_norm": 2.5, "learning_rate": 0.0015808915405178643, "loss": 0.3473, "step": 31458 }, { "epoch": 0.05578118032339255, "grad_norm": 0.326171875, "learning_rate": 0.00158084122955542, "loss": 0.2581, "step": 31460 }, { "epoch": 0.05578472648870236, "grad_norm": 0.388671875, "learning_rate": 0.0015807909164900556, "loss": 0.342, "step": 31462 }, { "epoch": 0.05578827265401218, "grad_norm": 0.38671875, "learning_rate": 0.0015807406013219904, "loss": 0.1603, "step": 31464 }, { "epoch": 0.05579181881932199, "grad_norm": 0.52734375, "learning_rate": 0.001580690284051446, "loss": 0.2562, "step": 31466 }, { "epoch": 0.05579536498463181, "grad_norm": 1.96875, "learning_rate": 0.001580639964678641, "loss": 0.4193, "step": 31468 }, { "epoch": 0.05579891114994162, "grad_norm": 0.486328125, "learning_rate": 0.001580589643203796, "loss": 0.217, "step": 31470 }, { "epoch": 0.055802457315251436, "grad_norm": 0.416015625, "learning_rate": 0.001580539319627131, "loss": 0.3901, "step": 31472 }, { "epoch": 0.05580600348056125, "grad_norm": 0.625, "learning_rate": 0.0015804889939488663, "loss": 0.2214, "step": 31474 }, { "epoch": 0.055809549645871065, "grad_norm": 0.443359375, "learning_rate": 0.0015804386661692218, "loss": 0.1907, "step": 31476 }, { "epoch": 0.05581309581118088, "grad_norm": 0.341796875, "learning_rate": 0.0015803883362884177, "loss": 0.2111, "step": 31478 }, { "epoch": 0.055816641976490694, "grad_norm": 1.5390625, "learning_rate": 0.0015803380043066739, "loss": 0.2539, "step": 31480 }, { "epoch": 0.055820188141800516, "grad_norm": 0.984375, "learning_rate": 0.0015802876702242109, "loss": 0.1664, "step": 31482 }, { "epoch": 0.05582373430711033, "grad_norm": 0.275390625, "learning_rate": 0.0015802373340412486, "loss": 0.2057, "step": 31484 }, { "epoch": 0.055827280472420145, "grad_norm": 0.35546875, "learning_rate": 0.0015801869957580068, "loss": 0.2017, "step": 31486 }, { "epoch": 0.05583082663772996, "grad_norm": 0.30078125, "learning_rate": 0.0015801366553747063, "loss": 0.1755, "step": 31488 }, { "epoch": 0.055834372803039774, "grad_norm": 0.38671875, "learning_rate": 0.0015800863128915667, "loss": 0.2311, "step": 31490 }, { "epoch": 0.05583791896834959, "grad_norm": 0.62109375, "learning_rate": 0.0015800359683088088, "loss": 0.1608, "step": 31492 }, { "epoch": 0.0558414651336594, "grad_norm": 0.404296875, "learning_rate": 0.0015799856216266524, "loss": 0.1902, "step": 31494 }, { "epoch": 0.05584501129896922, "grad_norm": 0.421875, "learning_rate": 0.0015799352728453175, "loss": 0.181, "step": 31496 }, { "epoch": 0.05584855746427903, "grad_norm": 0.41796875, "learning_rate": 0.0015798849219650247, "loss": 0.1978, "step": 31498 }, { "epoch": 0.055852103629588847, "grad_norm": 0.2412109375, "learning_rate": 0.001579834568985994, "loss": 0.1685, "step": 31500 }, { "epoch": 0.05585564979489866, "grad_norm": 0.298828125, "learning_rate": 0.001579784213908445, "loss": 0.1578, "step": 31502 }, { "epoch": 0.055859195960208476, "grad_norm": 0.34765625, "learning_rate": 0.0015797338567325994, "loss": 0.1546, "step": 31504 }, { "epoch": 0.0558627421255183, "grad_norm": 0.2490234375, "learning_rate": 0.001579683497458676, "loss": 0.1491, "step": 31506 }, { "epoch": 0.05586628829082811, "grad_norm": 0.486328125, "learning_rate": 0.0015796331360868959, "loss": 0.4946, "step": 31508 }, { "epoch": 0.055869834456137926, "grad_norm": 0.2578125, "learning_rate": 0.001579582772617479, "loss": 0.2212, "step": 31510 }, { "epoch": 0.05587338062144774, "grad_norm": 0.546875, "learning_rate": 0.0015795324070506456, "loss": 0.1957, "step": 31512 }, { "epoch": 0.055876926786757555, "grad_norm": 0.5546875, "learning_rate": 0.001579482039386616, "loss": 0.3397, "step": 31514 }, { "epoch": 0.05588047295206737, "grad_norm": 0.6640625, "learning_rate": 0.0015794316696256107, "loss": 0.1913, "step": 31516 }, { "epoch": 0.055884019117377184, "grad_norm": 0.486328125, "learning_rate": 0.0015793812977678494, "loss": 0.2293, "step": 31518 }, { "epoch": 0.055887565282687, "grad_norm": 0.353515625, "learning_rate": 0.0015793309238135527, "loss": 0.1675, "step": 31520 }, { "epoch": 0.055891111447996814, "grad_norm": 0.875, "learning_rate": 0.0015792805477629413, "loss": 0.1526, "step": 31522 }, { "epoch": 0.05589465761330663, "grad_norm": 0.375, "learning_rate": 0.0015792301696162352, "loss": 0.1256, "step": 31524 }, { "epoch": 0.05589820377861644, "grad_norm": 0.98828125, "learning_rate": 0.0015791797893736543, "loss": 0.1994, "step": 31526 }, { "epoch": 0.055901749943926264, "grad_norm": 0.390625, "learning_rate": 0.0015791294070354196, "loss": 0.1543, "step": 31528 }, { "epoch": 0.05590529610923608, "grad_norm": 0.35546875, "learning_rate": 0.0015790790226017514, "loss": 0.1686, "step": 31530 }, { "epoch": 0.05590884227454589, "grad_norm": 0.279296875, "learning_rate": 0.0015790286360728696, "loss": 0.1553, "step": 31532 }, { "epoch": 0.05591238843985571, "grad_norm": 0.22265625, "learning_rate": 0.0015789782474489949, "loss": 0.155, "step": 31534 }, { "epoch": 0.05591593460516552, "grad_norm": 0.78515625, "learning_rate": 0.0015789278567303476, "loss": 0.2033, "step": 31536 }, { "epoch": 0.05591948077047534, "grad_norm": 1.4140625, "learning_rate": 0.0015788774639171478, "loss": 0.1975, "step": 31538 }, { "epoch": 0.05592302693578515, "grad_norm": 2.453125, "learning_rate": 0.0015788270690096163, "loss": 0.3192, "step": 31540 }, { "epoch": 0.055926573101094966, "grad_norm": 0.3359375, "learning_rate": 0.0015787766720079732, "loss": 0.2119, "step": 31542 }, { "epoch": 0.05593011926640478, "grad_norm": 0.283203125, "learning_rate": 0.0015787262729124393, "loss": 0.1727, "step": 31544 }, { "epoch": 0.055933665431714595, "grad_norm": 0.6328125, "learning_rate": 0.0015786758717232343, "loss": 0.2437, "step": 31546 }, { "epoch": 0.05593721159702441, "grad_norm": 0.546875, "learning_rate": 0.0015786254684405797, "loss": 0.3225, "step": 31548 }, { "epoch": 0.05594075776233423, "grad_norm": 0.279296875, "learning_rate": 0.0015785750630646952, "loss": 0.3449, "step": 31550 }, { "epoch": 0.055944303927644046, "grad_norm": 0.296875, "learning_rate": 0.0015785246555958012, "loss": 0.1501, "step": 31552 }, { "epoch": 0.05594785009295386, "grad_norm": 0.37890625, "learning_rate": 0.001578474246034118, "loss": 0.2831, "step": 31554 }, { "epoch": 0.055951396258263675, "grad_norm": 1.3515625, "learning_rate": 0.0015784238343798667, "loss": 0.2639, "step": 31556 }, { "epoch": 0.05595494242357349, "grad_norm": 0.36328125, "learning_rate": 0.0015783734206332675, "loss": 0.1718, "step": 31558 }, { "epoch": 0.055958488588883304, "grad_norm": 0.353515625, "learning_rate": 0.001578323004794541, "loss": 0.2069, "step": 31560 }, { "epoch": 0.05596203475419312, "grad_norm": 0.58203125, "learning_rate": 0.001578272586863907, "loss": 0.2093, "step": 31562 }, { "epoch": 0.05596558091950293, "grad_norm": 0.48046875, "learning_rate": 0.0015782221668415868, "loss": 0.1728, "step": 31564 }, { "epoch": 0.05596912708481275, "grad_norm": 0.26953125, "learning_rate": 0.0015781717447278007, "loss": 0.1473, "step": 31566 }, { "epoch": 0.05597267325012256, "grad_norm": 0.466796875, "learning_rate": 0.0015781213205227692, "loss": 0.2306, "step": 31568 }, { "epoch": 0.05597621941543238, "grad_norm": 0.392578125, "learning_rate": 0.0015780708942267126, "loss": 0.1683, "step": 31570 }, { "epoch": 0.05597976558074219, "grad_norm": 0.23046875, "learning_rate": 0.0015780204658398516, "loss": 0.1924, "step": 31572 }, { "epoch": 0.05598331174605201, "grad_norm": 0.15625, "learning_rate": 0.001577970035362407, "loss": 0.1573, "step": 31574 }, { "epoch": 0.05598685791136183, "grad_norm": 0.259765625, "learning_rate": 0.0015779196027945986, "loss": 0.206, "step": 31576 }, { "epoch": 0.05599040407667164, "grad_norm": 0.345703125, "learning_rate": 0.0015778691681366478, "loss": 0.2891, "step": 31578 }, { "epoch": 0.055993950241981456, "grad_norm": 0.294921875, "learning_rate": 0.0015778187313887748, "loss": 0.128, "step": 31580 }, { "epoch": 0.05599749640729127, "grad_norm": 0.349609375, "learning_rate": 0.0015777682925512003, "loss": 0.1987, "step": 31582 }, { "epoch": 0.056001042572601085, "grad_norm": 0.89453125, "learning_rate": 0.0015777178516241447, "loss": 0.2538, "step": 31584 }, { "epoch": 0.0560045887379109, "grad_norm": 0.89453125, "learning_rate": 0.0015776674086078286, "loss": 0.2965, "step": 31586 }, { "epoch": 0.056008134903220715, "grad_norm": 0.259765625, "learning_rate": 0.001577616963502473, "loss": 0.2687, "step": 31588 }, { "epoch": 0.05601168106853053, "grad_norm": 0.376953125, "learning_rate": 0.0015775665163082983, "loss": 0.2303, "step": 31590 }, { "epoch": 0.056015227233840344, "grad_norm": 0.90234375, "learning_rate": 0.0015775160670255248, "loss": 0.3005, "step": 31592 }, { "epoch": 0.05601877339915016, "grad_norm": 0.4921875, "learning_rate": 0.0015774656156543733, "loss": 0.2338, "step": 31594 }, { "epoch": 0.05602231956445998, "grad_norm": 0.361328125, "learning_rate": 0.0015774151621950647, "loss": 0.2035, "step": 31596 }, { "epoch": 0.056025865729769794, "grad_norm": 0.65625, "learning_rate": 0.0015773647066478196, "loss": 0.2276, "step": 31598 }, { "epoch": 0.05602941189507961, "grad_norm": 0.27734375, "learning_rate": 0.0015773142490128583, "loss": 0.1807, "step": 31600 }, { "epoch": 0.05603295806038942, "grad_norm": 1.2109375, "learning_rate": 0.001577263789290402, "loss": 0.1833, "step": 31602 }, { "epoch": 0.05603650422569924, "grad_norm": 7.46875, "learning_rate": 0.001577213327480671, "loss": 0.1888, "step": 31604 }, { "epoch": 0.05604005039100905, "grad_norm": 0.703125, "learning_rate": 0.0015771628635838863, "loss": 0.3437, "step": 31606 }, { "epoch": 0.05604359655631887, "grad_norm": 0.6015625, "learning_rate": 0.0015771123976002681, "loss": 0.1537, "step": 31608 }, { "epoch": 0.05604714272162868, "grad_norm": 0.8203125, "learning_rate": 0.0015770619295300376, "loss": 0.2145, "step": 31610 }, { "epoch": 0.056050688886938496, "grad_norm": 2.59375, "learning_rate": 0.001577011459373415, "loss": 0.2339, "step": 31612 }, { "epoch": 0.05605423505224831, "grad_norm": 0.5546875, "learning_rate": 0.0015769609871306217, "loss": 0.1846, "step": 31614 }, { "epoch": 0.056057781217558125, "grad_norm": 0.318359375, "learning_rate": 0.001576910512801878, "loss": 0.1614, "step": 31616 }, { "epoch": 0.05606132738286795, "grad_norm": 0.2578125, "learning_rate": 0.0015768600363874048, "loss": 0.1297, "step": 31618 }, { "epoch": 0.05606487354817776, "grad_norm": 1.2890625, "learning_rate": 0.0015768095578874224, "loss": 0.2745, "step": 31620 }, { "epoch": 0.056068419713487576, "grad_norm": 0.375, "learning_rate": 0.0015767590773021526, "loss": 0.2105, "step": 31622 }, { "epoch": 0.05607196587879739, "grad_norm": 0.3046875, "learning_rate": 0.0015767085946318154, "loss": 0.1507, "step": 31624 }, { "epoch": 0.056075512044107205, "grad_norm": 1.6328125, "learning_rate": 0.0015766581098766315, "loss": 0.1731, "step": 31626 }, { "epoch": 0.05607905820941702, "grad_norm": 0.255859375, "learning_rate": 0.0015766076230368218, "loss": 0.1255, "step": 31628 }, { "epoch": 0.056082604374726834, "grad_norm": 0.2314453125, "learning_rate": 0.0015765571341126072, "loss": 0.1965, "step": 31630 }, { "epoch": 0.05608615054003665, "grad_norm": 0.443359375, "learning_rate": 0.001576506643104209, "loss": 0.1852, "step": 31632 }, { "epoch": 0.05608969670534646, "grad_norm": 0.345703125, "learning_rate": 0.001576456150011847, "loss": 0.2156, "step": 31634 }, { "epoch": 0.05609324287065628, "grad_norm": 0.421875, "learning_rate": 0.0015764056548357428, "loss": 0.1837, "step": 31636 }, { "epoch": 0.05609678903596609, "grad_norm": 0.23828125, "learning_rate": 0.001576355157576117, "loss": 0.1918, "step": 31638 }, { "epoch": 0.05610033520127591, "grad_norm": 3.15625, "learning_rate": 0.0015763046582331901, "loss": 0.3827, "step": 31640 }, { "epoch": 0.05610388136658573, "grad_norm": 0.310546875, "learning_rate": 0.0015762541568071838, "loss": 0.2107, "step": 31642 }, { "epoch": 0.05610742753189554, "grad_norm": 0.359375, "learning_rate": 0.0015762036532983182, "loss": 0.183, "step": 31644 }, { "epoch": 0.05611097369720536, "grad_norm": 0.9296875, "learning_rate": 0.0015761531477068147, "loss": 0.2281, "step": 31646 }, { "epoch": 0.05611451986251517, "grad_norm": 0.640625, "learning_rate": 0.0015761026400328935, "loss": 0.261, "step": 31648 }, { "epoch": 0.05611806602782499, "grad_norm": 0.34765625, "learning_rate": 0.0015760521302767762, "loss": 0.182, "step": 31650 }, { "epoch": 0.0561216121931348, "grad_norm": 0.423828125, "learning_rate": 0.0015760016184386831, "loss": 0.2255, "step": 31652 }, { "epoch": 0.056125158358444616, "grad_norm": 0.50390625, "learning_rate": 0.0015759511045188353, "loss": 0.1869, "step": 31654 }, { "epoch": 0.05612870452375443, "grad_norm": 0.328125, "learning_rate": 0.0015759005885174545, "loss": 0.2303, "step": 31656 }, { "epoch": 0.056132250689064245, "grad_norm": 1.359375, "learning_rate": 0.0015758500704347602, "loss": 0.2089, "step": 31658 }, { "epoch": 0.05613579685437406, "grad_norm": 0.8984375, "learning_rate": 0.0015757995502709747, "loss": 0.1791, "step": 31660 }, { "epoch": 0.056139343019683874, "grad_norm": 0.2080078125, "learning_rate": 0.0015757490280263182, "loss": 0.1313, "step": 31662 }, { "epoch": 0.056142889184993695, "grad_norm": 0.423828125, "learning_rate": 0.0015756985037010118, "loss": 0.1574, "step": 31664 }, { "epoch": 0.05614643535030351, "grad_norm": 0.462890625, "learning_rate": 0.0015756479772952765, "loss": 0.2021, "step": 31666 }, { "epoch": 0.056149981515613324, "grad_norm": 0.78125, "learning_rate": 0.0015755974488093332, "loss": 0.1713, "step": 31668 }, { "epoch": 0.05615352768092314, "grad_norm": 0.16015625, "learning_rate": 0.0015755469182434027, "loss": 0.2109, "step": 31670 }, { "epoch": 0.056157073846232954, "grad_norm": 1.421875, "learning_rate": 0.0015754963855977066, "loss": 0.2644, "step": 31672 }, { "epoch": 0.05616062001154277, "grad_norm": 0.93359375, "learning_rate": 0.001575445850872465, "loss": 0.172, "step": 31674 }, { "epoch": 0.05616416617685258, "grad_norm": 0.57421875, "learning_rate": 0.0015753953140678998, "loss": 0.1923, "step": 31676 }, { "epoch": 0.0561677123421624, "grad_norm": 0.251953125, "learning_rate": 0.0015753447751842313, "loss": 0.2004, "step": 31678 }, { "epoch": 0.05617125850747221, "grad_norm": 0.5234375, "learning_rate": 0.0015752942342216816, "loss": 0.166, "step": 31680 }, { "epoch": 0.056174804672782026, "grad_norm": 0.380859375, "learning_rate": 0.0015752436911804703, "loss": 0.1743, "step": 31682 }, { "epoch": 0.05617835083809184, "grad_norm": 0.296875, "learning_rate": 0.0015751931460608196, "loss": 0.2723, "step": 31684 }, { "epoch": 0.05618189700340166, "grad_norm": 0.37890625, "learning_rate": 0.0015751425988629498, "loss": 0.2252, "step": 31686 }, { "epoch": 0.05618544316871148, "grad_norm": 0.2109375, "learning_rate": 0.0015750920495870822, "loss": 0.2169, "step": 31688 }, { "epoch": 0.05618898933402129, "grad_norm": 0.416015625, "learning_rate": 0.001575041498233438, "loss": 0.1383, "step": 31690 }, { "epoch": 0.056192535499331106, "grad_norm": 0.5390625, "learning_rate": 0.0015749909448022385, "loss": 0.1735, "step": 31692 }, { "epoch": 0.05619608166464092, "grad_norm": 4.15625, "learning_rate": 0.0015749403892937042, "loss": 0.3588, "step": 31694 }, { "epoch": 0.056199627829950735, "grad_norm": 0.25390625, "learning_rate": 0.0015748898317080567, "loss": 0.2356, "step": 31696 }, { "epoch": 0.05620317399526055, "grad_norm": 0.2890625, "learning_rate": 0.001574839272045517, "loss": 0.1871, "step": 31698 }, { "epoch": 0.056206720160570364, "grad_norm": 0.76171875, "learning_rate": 0.0015747887103063059, "loss": 0.144, "step": 31700 }, { "epoch": 0.05621026632588018, "grad_norm": 0.296875, "learning_rate": 0.0015747381464906447, "loss": 0.2034, "step": 31702 }, { "epoch": 0.05621381249118999, "grad_norm": 0.55078125, "learning_rate": 0.001574687580598755, "loss": 0.259, "step": 31704 }, { "epoch": 0.05621735865649981, "grad_norm": 0.39453125, "learning_rate": 0.0015746370126308571, "loss": 0.196, "step": 31706 }, { "epoch": 0.05622090482180962, "grad_norm": 0.6875, "learning_rate": 0.001574586442587173, "loss": 0.1685, "step": 31708 }, { "epoch": 0.056224450987119444, "grad_norm": 0.322265625, "learning_rate": 0.0015745358704679232, "loss": 0.181, "step": 31710 }, { "epoch": 0.05622799715242926, "grad_norm": 0.365234375, "learning_rate": 0.0015744852962733295, "loss": 0.206, "step": 31712 }, { "epoch": 0.05623154331773907, "grad_norm": 0.89453125, "learning_rate": 0.0015744347200036123, "loss": 0.1821, "step": 31714 }, { "epoch": 0.05623508948304889, "grad_norm": 0.423828125, "learning_rate": 0.0015743841416589936, "loss": 0.2014, "step": 31716 }, { "epoch": 0.0562386356483587, "grad_norm": 0.322265625, "learning_rate": 0.0015743335612396939, "loss": 0.1839, "step": 31718 }, { "epoch": 0.05624218181366852, "grad_norm": 0.29296875, "learning_rate": 0.0015742829787459352, "loss": 0.258, "step": 31720 }, { "epoch": 0.05624572797897833, "grad_norm": 1.015625, "learning_rate": 0.0015742323941779375, "loss": 0.2354, "step": 31722 }, { "epoch": 0.056249274144288146, "grad_norm": 1.3828125, "learning_rate": 0.0015741818075359236, "loss": 0.2884, "step": 31724 }, { "epoch": 0.05625282030959796, "grad_norm": 1.125, "learning_rate": 0.0015741312188201132, "loss": 0.2646, "step": 31726 }, { "epoch": 0.056256366474907775, "grad_norm": 0.7734375, "learning_rate": 0.0015740806280307284, "loss": 0.2466, "step": 31728 }, { "epoch": 0.05625991264021759, "grad_norm": 0.49609375, "learning_rate": 0.0015740300351679906, "loss": 0.1922, "step": 31730 }, { "epoch": 0.05626345880552741, "grad_norm": 0.380859375, "learning_rate": 0.0015739794402321208, "loss": 0.2046, "step": 31732 }, { "epoch": 0.056267004970837226, "grad_norm": 3.671875, "learning_rate": 0.00157392884322334, "loss": 0.3482, "step": 31734 }, { "epoch": 0.05627055113614704, "grad_norm": 0.251953125, "learning_rate": 0.0015738782441418698, "loss": 0.2074, "step": 31736 }, { "epoch": 0.056274097301456855, "grad_norm": 0.2294921875, "learning_rate": 0.0015738276429879314, "loss": 0.1833, "step": 31738 }, { "epoch": 0.05627764346676667, "grad_norm": 0.5625, "learning_rate": 0.0015737770397617461, "loss": 0.193, "step": 31740 }, { "epoch": 0.056281189632076484, "grad_norm": 0.625, "learning_rate": 0.0015737264344635354, "loss": 0.1712, "step": 31742 }, { "epoch": 0.0562847357973863, "grad_norm": 0.375, "learning_rate": 0.00157367582709352, "loss": 0.1809, "step": 31744 }, { "epoch": 0.05628828196269611, "grad_norm": 0.369140625, "learning_rate": 0.001573625217651922, "loss": 0.1505, "step": 31746 }, { "epoch": 0.05629182812800593, "grad_norm": 0.6875, "learning_rate": 0.0015735746061389627, "loss": 0.2225, "step": 31748 }, { "epoch": 0.05629537429331574, "grad_norm": 0.2109375, "learning_rate": 0.001573523992554863, "loss": 0.2404, "step": 31750 }, { "epoch": 0.056298920458625556, "grad_norm": 0.37890625, "learning_rate": 0.0015734733768998442, "loss": 0.1931, "step": 31752 }, { "epoch": 0.05630246662393538, "grad_norm": 0.2255859375, "learning_rate": 0.001573422759174128, "loss": 0.1842, "step": 31754 }, { "epoch": 0.05630601278924519, "grad_norm": 0.263671875, "learning_rate": 0.0015733721393779356, "loss": 0.1661, "step": 31756 }, { "epoch": 0.05630955895455501, "grad_norm": 0.443359375, "learning_rate": 0.0015733215175114887, "loss": 0.164, "step": 31758 }, { "epoch": 0.05631310511986482, "grad_norm": 0.9453125, "learning_rate": 0.0015732708935750077, "loss": 0.2351, "step": 31760 }, { "epoch": 0.056316651285174636, "grad_norm": 0.39453125, "learning_rate": 0.0015732202675687154, "loss": 0.1819, "step": 31762 }, { "epoch": 0.05632019745048445, "grad_norm": 0.423828125, "learning_rate": 0.0015731696394928323, "loss": 0.1952, "step": 31764 }, { "epoch": 0.056323743615794265, "grad_norm": 1.171875, "learning_rate": 0.00157311900934758, "loss": 0.3291, "step": 31766 }, { "epoch": 0.05632728978110408, "grad_norm": 4.125, "learning_rate": 0.0015730683771331799, "loss": 0.2503, "step": 31768 }, { "epoch": 0.056330835946413894, "grad_norm": 0.68359375, "learning_rate": 0.0015730177428498538, "loss": 0.1594, "step": 31770 }, { "epoch": 0.05633438211172371, "grad_norm": 1.34375, "learning_rate": 0.001572967106497823, "loss": 0.2025, "step": 31772 }, { "epoch": 0.056337928277033524, "grad_norm": 0.515625, "learning_rate": 0.0015729164680773082, "loss": 0.1814, "step": 31774 }, { "epoch": 0.05634147444234334, "grad_norm": 0.68359375, "learning_rate": 0.0015728658275885319, "loss": 0.2987, "step": 31776 }, { "epoch": 0.05634502060765316, "grad_norm": 0.5546875, "learning_rate": 0.0015728151850317152, "loss": 0.1991, "step": 31778 }, { "epoch": 0.056348566772962974, "grad_norm": 0.5390625, "learning_rate": 0.0015727645404070793, "loss": 0.1644, "step": 31780 }, { "epoch": 0.05635211293827279, "grad_norm": 2.359375, "learning_rate": 0.001572713893714846, "loss": 0.2354, "step": 31782 }, { "epoch": 0.0563556591035826, "grad_norm": 0.318359375, "learning_rate": 0.0015726632449552362, "loss": 0.2181, "step": 31784 }, { "epoch": 0.05635920526889242, "grad_norm": 0.5234375, "learning_rate": 0.0015726125941284727, "loss": 0.2244, "step": 31786 }, { "epoch": 0.05636275143420223, "grad_norm": 0.56640625, "learning_rate": 0.0015725619412347755, "loss": 0.1776, "step": 31788 }, { "epoch": 0.05636629759951205, "grad_norm": 0.52734375, "learning_rate": 0.0015725112862743679, "loss": 0.2227, "step": 31790 }, { "epoch": 0.05636984376482186, "grad_norm": 1.5859375, "learning_rate": 0.0015724606292474696, "loss": 0.2322, "step": 31792 }, { "epoch": 0.056373389930131676, "grad_norm": 0.8125, "learning_rate": 0.0015724099701543033, "loss": 0.2024, "step": 31794 }, { "epoch": 0.05637693609544149, "grad_norm": 0.349609375, "learning_rate": 0.0015723593089950899, "loss": 0.1994, "step": 31796 }, { "epoch": 0.056380482260751305, "grad_norm": 2.21875, "learning_rate": 0.0015723086457700515, "loss": 0.2888, "step": 31798 }, { "epoch": 0.05638402842606113, "grad_norm": 0.96484375, "learning_rate": 0.0015722579804794093, "loss": 0.2439, "step": 31800 }, { "epoch": 0.05638757459137094, "grad_norm": 0.6015625, "learning_rate": 0.001572207313123385, "loss": 0.2162, "step": 31802 }, { "epoch": 0.056391120756680756, "grad_norm": 1.6484375, "learning_rate": 0.0015721566437022002, "loss": 0.1872, "step": 31804 }, { "epoch": 0.05639466692199057, "grad_norm": 0.4453125, "learning_rate": 0.0015721059722160766, "loss": 0.2099, "step": 31806 }, { "epoch": 0.056398213087300385, "grad_norm": 0.34375, "learning_rate": 0.0015720552986652357, "loss": 0.1453, "step": 31808 }, { "epoch": 0.0564017592526102, "grad_norm": 0.439453125, "learning_rate": 0.0015720046230498993, "loss": 0.3344, "step": 31810 }, { "epoch": 0.056405305417920014, "grad_norm": 0.8828125, "learning_rate": 0.0015719539453702884, "loss": 0.2228, "step": 31812 }, { "epoch": 0.05640885158322983, "grad_norm": 2.734375, "learning_rate": 0.0015719032656266254, "loss": 0.3393, "step": 31814 }, { "epoch": 0.05641239774853964, "grad_norm": 0.40625, "learning_rate": 0.0015718525838191316, "loss": 0.1845, "step": 31816 }, { "epoch": 0.05641594391384946, "grad_norm": 0.33203125, "learning_rate": 0.0015718018999480285, "loss": 0.1682, "step": 31818 }, { "epoch": 0.05641949007915927, "grad_norm": 0.54296875, "learning_rate": 0.0015717512140135384, "loss": 0.3934, "step": 31820 }, { "epoch": 0.056423036244469094, "grad_norm": 0.9296875, "learning_rate": 0.0015717005260158818, "loss": 0.2013, "step": 31822 }, { "epoch": 0.05642658240977891, "grad_norm": 5.9375, "learning_rate": 0.0015716498359552818, "loss": 0.3728, "step": 31824 }, { "epoch": 0.05643012857508872, "grad_norm": 0.5703125, "learning_rate": 0.0015715991438319588, "loss": 0.1906, "step": 31826 }, { "epoch": 0.05643367474039854, "grad_norm": 0.6875, "learning_rate": 0.0015715484496461357, "loss": 0.3512, "step": 31828 }, { "epoch": 0.05643722090570835, "grad_norm": 0.421875, "learning_rate": 0.001571497753398033, "loss": 0.2177, "step": 31830 }, { "epoch": 0.056440767071018166, "grad_norm": 0.37109375, "learning_rate": 0.0015714470550878734, "loss": 0.1614, "step": 31832 }, { "epoch": 0.05644431323632798, "grad_norm": 0.921875, "learning_rate": 0.0015713963547158783, "loss": 0.1625, "step": 31834 }, { "epoch": 0.056447859401637795, "grad_norm": 2.140625, "learning_rate": 0.0015713456522822688, "loss": 0.2384, "step": 31836 }, { "epoch": 0.05645140556694761, "grad_norm": 0.5390625, "learning_rate": 0.0015712949477872677, "loss": 0.206, "step": 31838 }, { "epoch": 0.056454951732257425, "grad_norm": 0.46875, "learning_rate": 0.0015712442412310961, "loss": 0.3, "step": 31840 }, { "epoch": 0.05645849789756724, "grad_norm": 0.30078125, "learning_rate": 0.001571193532613976, "loss": 0.1562, "step": 31842 }, { "epoch": 0.056462044062877054, "grad_norm": 0.83203125, "learning_rate": 0.001571142821936129, "loss": 0.1549, "step": 31844 }, { "epoch": 0.056465590228186875, "grad_norm": 0.37890625, "learning_rate": 0.001571092109197777, "loss": 0.1394, "step": 31846 }, { "epoch": 0.05646913639349669, "grad_norm": 0.71484375, "learning_rate": 0.0015710413943991418, "loss": 0.2299, "step": 31848 }, { "epoch": 0.056472682558806504, "grad_norm": 0.228515625, "learning_rate": 0.001570990677540445, "loss": 0.3401, "step": 31850 }, { "epoch": 0.05647622872411632, "grad_norm": 0.640625, "learning_rate": 0.0015709399586219088, "loss": 0.1858, "step": 31852 }, { "epoch": 0.05647977488942613, "grad_norm": 0.392578125, "learning_rate": 0.0015708892376437547, "loss": 0.1818, "step": 31854 }, { "epoch": 0.05648332105473595, "grad_norm": 3.609375, "learning_rate": 0.0015708385146062048, "loss": 0.2723, "step": 31856 }, { "epoch": 0.05648686722004576, "grad_norm": 0.66015625, "learning_rate": 0.0015707877895094803, "loss": 0.231, "step": 31858 }, { "epoch": 0.05649041338535558, "grad_norm": 0.48046875, "learning_rate": 0.0015707370623538038, "loss": 0.1811, "step": 31860 }, { "epoch": 0.05649395955066539, "grad_norm": 0.4453125, "learning_rate": 0.0015706863331393965, "loss": 0.2217, "step": 31862 }, { "epoch": 0.056497505715975206, "grad_norm": 0.54296875, "learning_rate": 0.0015706356018664806, "loss": 0.182, "step": 31864 }, { "epoch": 0.05650105188128502, "grad_norm": 0.400390625, "learning_rate": 0.0015705848685352783, "loss": 0.2094, "step": 31866 }, { "epoch": 0.05650459804659484, "grad_norm": 1.328125, "learning_rate": 0.0015705341331460109, "loss": 0.3011, "step": 31868 }, { "epoch": 0.05650814421190466, "grad_norm": 0.7109375, "learning_rate": 0.0015704833956989006, "loss": 0.1441, "step": 31870 }, { "epoch": 0.05651169037721447, "grad_norm": 0.466796875, "learning_rate": 0.0015704326561941694, "loss": 0.137, "step": 31872 }, { "epoch": 0.056515236542524286, "grad_norm": 0.53515625, "learning_rate": 0.0015703819146320385, "loss": 0.4438, "step": 31874 }, { "epoch": 0.0565187827078341, "grad_norm": 0.2490234375, "learning_rate": 0.0015703311710127308, "loss": 0.1593, "step": 31876 }, { "epoch": 0.056522328873143915, "grad_norm": 0.3125, "learning_rate": 0.0015702804253364675, "loss": 0.1592, "step": 31878 }, { "epoch": 0.05652587503845373, "grad_norm": 0.86328125, "learning_rate": 0.001570229677603471, "loss": 0.2546, "step": 31880 }, { "epoch": 0.056529421203763544, "grad_norm": 0.76953125, "learning_rate": 0.0015701789278139628, "loss": 0.1914, "step": 31882 }, { "epoch": 0.05653296736907336, "grad_norm": 0.439453125, "learning_rate": 0.0015701281759681652, "loss": 0.1811, "step": 31884 }, { "epoch": 0.05653651353438317, "grad_norm": 0.2216796875, "learning_rate": 0.0015700774220663003, "loss": 0.1303, "step": 31886 }, { "epoch": 0.05654005969969299, "grad_norm": 0.466796875, "learning_rate": 0.0015700266661085895, "loss": 0.2166, "step": 31888 }, { "epoch": 0.05654360586500281, "grad_norm": 0.578125, "learning_rate": 0.0015699759080952552, "loss": 0.1777, "step": 31890 }, { "epoch": 0.056547152030312624, "grad_norm": 1.2265625, "learning_rate": 0.0015699251480265192, "loss": 0.2194, "step": 31892 }, { "epoch": 0.05655069819562244, "grad_norm": 1.6796875, "learning_rate": 0.0015698743859026035, "loss": 0.2464, "step": 31894 }, { "epoch": 0.05655424436093225, "grad_norm": 2.140625, "learning_rate": 0.0015698236217237306, "loss": 0.2988, "step": 31896 }, { "epoch": 0.05655779052624207, "grad_norm": 0.416015625, "learning_rate": 0.0015697728554901213, "loss": 0.2077, "step": 31898 }, { "epoch": 0.05656133669155188, "grad_norm": 1.703125, "learning_rate": 0.001569722087201999, "loss": 0.3094, "step": 31900 }, { "epoch": 0.0565648828568617, "grad_norm": 0.8125, "learning_rate": 0.001569671316859585, "loss": 0.2131, "step": 31902 }, { "epoch": 0.05656842902217151, "grad_norm": 2.359375, "learning_rate": 0.0015696205444631016, "loss": 0.2686, "step": 31904 }, { "epoch": 0.056571975187481326, "grad_norm": 0.302734375, "learning_rate": 0.0015695697700127706, "loss": 0.1976, "step": 31906 }, { "epoch": 0.05657552135279114, "grad_norm": 0.3828125, "learning_rate": 0.001569518993508814, "loss": 0.1556, "step": 31908 }, { "epoch": 0.056579067518100955, "grad_norm": 0.201171875, "learning_rate": 0.0015694682149514543, "loss": 0.1451, "step": 31910 }, { "epoch": 0.05658261368341077, "grad_norm": 0.388671875, "learning_rate": 0.001569417434340913, "loss": 0.2178, "step": 31912 }, { "epoch": 0.05658615984872059, "grad_norm": 0.5859375, "learning_rate": 0.001569366651677413, "loss": 0.2007, "step": 31914 }, { "epoch": 0.056589706014030405, "grad_norm": 0.482421875, "learning_rate": 0.0015693158669611755, "loss": 0.2089, "step": 31916 }, { "epoch": 0.05659325217934022, "grad_norm": 0.66015625, "learning_rate": 0.0015692650801924235, "loss": 0.183, "step": 31918 }, { "epoch": 0.056596798344650034, "grad_norm": 0.41796875, "learning_rate": 0.001569214291371378, "loss": 0.193, "step": 31920 }, { "epoch": 0.05660034450995985, "grad_norm": 0.26171875, "learning_rate": 0.0015691635004982621, "loss": 0.1504, "step": 31922 }, { "epoch": 0.056603890675269664, "grad_norm": 0.53125, "learning_rate": 0.0015691127075732976, "loss": 0.1574, "step": 31924 }, { "epoch": 0.05660743684057948, "grad_norm": 0.36328125, "learning_rate": 0.0015690619125967068, "loss": 0.2717, "step": 31926 }, { "epoch": 0.05661098300588929, "grad_norm": 0.52734375, "learning_rate": 0.0015690111155687112, "loss": 0.1626, "step": 31928 }, { "epoch": 0.05661452917119911, "grad_norm": 0.85546875, "learning_rate": 0.0015689603164895337, "loss": 0.4407, "step": 31930 }, { "epoch": 0.05661807533650892, "grad_norm": 2.046875, "learning_rate": 0.001568909515359396, "loss": 0.1987, "step": 31932 }, { "epoch": 0.056621621501818736, "grad_norm": 0.30859375, "learning_rate": 0.0015688587121785208, "loss": 0.1756, "step": 31934 }, { "epoch": 0.05662516766712856, "grad_norm": 0.62890625, "learning_rate": 0.0015688079069471293, "loss": 0.1342, "step": 31936 }, { "epoch": 0.05662871383243837, "grad_norm": 1.0234375, "learning_rate": 0.001568757099665445, "loss": 0.2281, "step": 31938 }, { "epoch": 0.05663225999774819, "grad_norm": 0.349609375, "learning_rate": 0.001568706290333689, "loss": 0.1942, "step": 31940 }, { "epoch": 0.056635806163058, "grad_norm": 1.703125, "learning_rate": 0.001568655478952084, "loss": 0.1968, "step": 31942 }, { "epoch": 0.056639352328367816, "grad_norm": 4.46875, "learning_rate": 0.0015686046655208523, "loss": 0.4224, "step": 31944 }, { "epoch": 0.05664289849367763, "grad_norm": 0.2734375, "learning_rate": 0.001568553850040216, "loss": 0.1908, "step": 31946 }, { "epoch": 0.056646444658987445, "grad_norm": 0.83203125, "learning_rate": 0.0015685030325103973, "loss": 0.2404, "step": 31948 }, { "epoch": 0.05664999082429726, "grad_norm": 1.46875, "learning_rate": 0.0015684522129316186, "loss": 0.2682, "step": 31950 }, { "epoch": 0.056653536989607074, "grad_norm": 0.498046875, "learning_rate": 0.0015684013913041016, "loss": 0.1954, "step": 31952 }, { "epoch": 0.05665708315491689, "grad_norm": 0.2451171875, "learning_rate": 0.0015683505676280694, "loss": 0.24, "step": 31954 }, { "epoch": 0.0566606293202267, "grad_norm": 0.5625, "learning_rate": 0.0015682997419037437, "loss": 0.1908, "step": 31956 }, { "epoch": 0.056664175485536525, "grad_norm": 0.8828125, "learning_rate": 0.001568248914131347, "loss": 0.2108, "step": 31958 }, { "epoch": 0.05666772165084634, "grad_norm": 0.5546875, "learning_rate": 0.0015681980843111015, "loss": 0.2204, "step": 31960 }, { "epoch": 0.056671267816156154, "grad_norm": 0.294921875, "learning_rate": 0.0015681472524432297, "loss": 0.1546, "step": 31962 }, { "epoch": 0.05667481398146597, "grad_norm": 0.5390625, "learning_rate": 0.0015680964185279533, "loss": 0.2263, "step": 31964 }, { "epoch": 0.05667836014677578, "grad_norm": 0.271484375, "learning_rate": 0.0015680455825654955, "loss": 0.2358, "step": 31966 }, { "epoch": 0.0566819063120856, "grad_norm": 0.51953125, "learning_rate": 0.001567994744556078, "loss": 0.2097, "step": 31968 }, { "epoch": 0.05668545247739541, "grad_norm": 0.265625, "learning_rate": 0.0015679439044999236, "loss": 0.189, "step": 31970 }, { "epoch": 0.05668899864270523, "grad_norm": 0.94921875, "learning_rate": 0.0015678930623972541, "loss": 0.2378, "step": 31972 }, { "epoch": 0.05669254480801504, "grad_norm": 1.25, "learning_rate": 0.0015678422182482923, "loss": 0.1974, "step": 31974 }, { "epoch": 0.056696090973324856, "grad_norm": 0.390625, "learning_rate": 0.0015677913720532602, "loss": 0.1609, "step": 31976 }, { "epoch": 0.05669963713863467, "grad_norm": 1.8359375, "learning_rate": 0.0015677405238123806, "loss": 0.1428, "step": 31978 }, { "epoch": 0.056703183303944485, "grad_norm": 0.9453125, "learning_rate": 0.0015676896735258754, "loss": 0.1898, "step": 31980 }, { "epoch": 0.056706729469254306, "grad_norm": 0.494140625, "learning_rate": 0.0015676388211939673, "loss": 0.1567, "step": 31982 }, { "epoch": 0.05671027563456412, "grad_norm": 0.6328125, "learning_rate": 0.0015675879668168786, "loss": 0.152, "step": 31984 }, { "epoch": 0.056713821799873936, "grad_norm": 1.203125, "learning_rate": 0.001567537110394832, "loss": 0.2965, "step": 31986 }, { "epoch": 0.05671736796518375, "grad_norm": 1.515625, "learning_rate": 0.0015674862519280495, "loss": 0.1973, "step": 31988 }, { "epoch": 0.056720914130493565, "grad_norm": 0.37109375, "learning_rate": 0.0015674353914167537, "loss": 0.1426, "step": 31990 }, { "epoch": 0.05672446029580338, "grad_norm": 0.6640625, "learning_rate": 0.0015673845288611667, "loss": 0.1841, "step": 31992 }, { "epoch": 0.056728006461113194, "grad_norm": 0.2578125, "learning_rate": 0.0015673336642615116, "loss": 0.1729, "step": 31994 }, { "epoch": 0.05673155262642301, "grad_norm": 0.25390625, "learning_rate": 0.0015672827976180106, "loss": 0.1624, "step": 31996 }, { "epoch": 0.05673509879173282, "grad_norm": 0.435546875, "learning_rate": 0.001567231928930886, "loss": 0.2301, "step": 31998 }, { "epoch": 0.05673864495704264, "grad_norm": 1.4453125, "learning_rate": 0.0015671810582003605, "loss": 0.1902, "step": 32000 }, { "epoch": 0.05674219112235245, "grad_norm": 0.265625, "learning_rate": 0.001567130185426656, "loss": 0.1788, "step": 32002 }, { "epoch": 0.05674573728766227, "grad_norm": 0.56640625, "learning_rate": 0.0015670793106099955, "loss": 0.2429, "step": 32004 }, { "epoch": 0.05674928345297209, "grad_norm": 0.57421875, "learning_rate": 0.0015670284337506014, "loss": 0.1885, "step": 32006 }, { "epoch": 0.0567528296182819, "grad_norm": 0.73046875, "learning_rate": 0.0015669775548486965, "loss": 0.2028, "step": 32008 }, { "epoch": 0.05675637578359172, "grad_norm": 0.52734375, "learning_rate": 0.0015669266739045027, "loss": 0.2217, "step": 32010 }, { "epoch": 0.05675992194890153, "grad_norm": 0.349609375, "learning_rate": 0.0015668757909182431, "loss": 0.2279, "step": 32012 }, { "epoch": 0.056763468114211346, "grad_norm": 0.5078125, "learning_rate": 0.00156682490589014, "loss": 0.2151, "step": 32014 }, { "epoch": 0.05676701427952116, "grad_norm": 0.625, "learning_rate": 0.0015667740188204157, "loss": 0.1602, "step": 32016 }, { "epoch": 0.056770560444830975, "grad_norm": 0.328125, "learning_rate": 0.0015667231297092931, "loss": 0.2891, "step": 32018 }, { "epoch": 0.05677410661014079, "grad_norm": 0.79296875, "learning_rate": 0.0015666722385569948, "loss": 0.2027, "step": 32020 }, { "epoch": 0.056777652775450604, "grad_norm": 0.7890625, "learning_rate": 0.001566621345363743, "loss": 0.1645, "step": 32022 }, { "epoch": 0.05678119894076042, "grad_norm": 0.228515625, "learning_rate": 0.0015665704501297607, "loss": 0.1633, "step": 32024 }, { "epoch": 0.05678474510607024, "grad_norm": 0.423828125, "learning_rate": 0.0015665195528552695, "loss": 0.1464, "step": 32026 }, { "epoch": 0.056788291271380055, "grad_norm": 0.310546875, "learning_rate": 0.0015664686535404938, "loss": 0.2002, "step": 32028 }, { "epoch": 0.05679183743668987, "grad_norm": 0.9375, "learning_rate": 0.0015664177521856543, "loss": 0.2033, "step": 32030 }, { "epoch": 0.056795383601999684, "grad_norm": 1.46875, "learning_rate": 0.0015663668487909752, "loss": 0.2873, "step": 32032 }, { "epoch": 0.0567989297673095, "grad_norm": 0.71484375, "learning_rate": 0.001566315943356678, "loss": 0.2676, "step": 32034 }, { "epoch": 0.05680247593261931, "grad_norm": 1.15625, "learning_rate": 0.0015662650358829857, "loss": 0.1922, "step": 32036 }, { "epoch": 0.05680602209792913, "grad_norm": 0.404296875, "learning_rate": 0.001566214126370121, "loss": 0.2233, "step": 32038 }, { "epoch": 0.05680956826323894, "grad_norm": 0.5703125, "learning_rate": 0.001566163214818307, "loss": 0.1665, "step": 32040 }, { "epoch": 0.05681311442854876, "grad_norm": 0.3671875, "learning_rate": 0.001566112301227765, "loss": 0.2876, "step": 32042 }, { "epoch": 0.05681666059385857, "grad_norm": 0.36328125, "learning_rate": 0.0015660613855987195, "loss": 0.1457, "step": 32044 }, { "epoch": 0.056820206759168386, "grad_norm": 0.244140625, "learning_rate": 0.0015660104679313916, "loss": 0.188, "step": 32046 }, { "epoch": 0.0568237529244782, "grad_norm": 0.8828125, "learning_rate": 0.001565959548226005, "loss": 0.2686, "step": 32048 }, { "epoch": 0.05682729908978802, "grad_norm": 0.6484375, "learning_rate": 0.0015659086264827816, "loss": 0.18, "step": 32050 }, { "epoch": 0.05683084525509784, "grad_norm": 0.4296875, "learning_rate": 0.0015658577027019447, "loss": 0.2925, "step": 32052 }, { "epoch": 0.05683439142040765, "grad_norm": 0.396484375, "learning_rate": 0.0015658067768837165, "loss": 0.2012, "step": 32054 }, { "epoch": 0.056837937585717466, "grad_norm": 0.408203125, "learning_rate": 0.0015657558490283206, "loss": 0.1614, "step": 32056 }, { "epoch": 0.05684148375102728, "grad_norm": 0.8828125, "learning_rate": 0.0015657049191359787, "loss": 0.2312, "step": 32058 }, { "epoch": 0.056845029916337095, "grad_norm": 0.470703125, "learning_rate": 0.0015656539872069143, "loss": 0.4211, "step": 32060 }, { "epoch": 0.05684857608164691, "grad_norm": 1.9921875, "learning_rate": 0.0015656030532413497, "loss": 0.5052, "step": 32062 }, { "epoch": 0.056852122246956724, "grad_norm": 0.93359375, "learning_rate": 0.0015655521172395078, "loss": 0.2941, "step": 32064 }, { "epoch": 0.05685566841226654, "grad_norm": 0.6640625, "learning_rate": 0.0015655011792016112, "loss": 0.1685, "step": 32066 }, { "epoch": 0.05685921457757635, "grad_norm": 0.171875, "learning_rate": 0.0015654502391278832, "loss": 0.1551, "step": 32068 }, { "epoch": 0.05686276074288617, "grad_norm": 1.7890625, "learning_rate": 0.001565399297018546, "loss": 0.1756, "step": 32070 }, { "epoch": 0.05686630690819599, "grad_norm": 0.326171875, "learning_rate": 0.0015653483528738224, "loss": 0.1792, "step": 32072 }, { "epoch": 0.056869853073505804, "grad_norm": 1.1953125, "learning_rate": 0.001565297406693936, "loss": 0.2405, "step": 32074 }, { "epoch": 0.05687339923881562, "grad_norm": 0.291015625, "learning_rate": 0.0015652464584791081, "loss": 0.2502, "step": 32076 }, { "epoch": 0.05687694540412543, "grad_norm": 0.51171875, "learning_rate": 0.0015651955082295632, "loss": 0.1705, "step": 32078 }, { "epoch": 0.05688049156943525, "grad_norm": 0.423828125, "learning_rate": 0.0015651445559455232, "loss": 0.1881, "step": 32080 }, { "epoch": 0.05688403773474506, "grad_norm": 0.26171875, "learning_rate": 0.001565093601627211, "loss": 0.1477, "step": 32082 }, { "epoch": 0.056887583900054876, "grad_norm": 1.5390625, "learning_rate": 0.0015650426452748498, "loss": 0.233, "step": 32084 }, { "epoch": 0.05689113006536469, "grad_norm": 4.375, "learning_rate": 0.0015649916868886618, "loss": 0.2199, "step": 32086 }, { "epoch": 0.056894676230674505, "grad_norm": 0.283203125, "learning_rate": 0.0015649407264688706, "loss": 0.1826, "step": 32088 }, { "epoch": 0.05689822239598432, "grad_norm": 0.30859375, "learning_rate": 0.0015648897640156987, "loss": 0.2291, "step": 32090 }, { "epoch": 0.056901768561294135, "grad_norm": 0.240234375, "learning_rate": 0.001564838799529369, "loss": 0.1866, "step": 32092 }, { "epoch": 0.056905314726603956, "grad_norm": 0.484375, "learning_rate": 0.0015647878330101042, "loss": 0.2349, "step": 32094 }, { "epoch": 0.05690886089191377, "grad_norm": 0.59375, "learning_rate": 0.0015647368644581274, "loss": 0.1454, "step": 32096 }, { "epoch": 0.056912407057223585, "grad_norm": 0.31640625, "learning_rate": 0.0015646858938736618, "loss": 0.1743, "step": 32098 }, { "epoch": 0.0569159532225334, "grad_norm": 0.953125, "learning_rate": 0.0015646349212569298, "loss": 0.2142, "step": 32100 }, { "epoch": 0.056919499387843214, "grad_norm": 0.470703125, "learning_rate": 0.0015645839466081545, "loss": 0.1674, "step": 32102 }, { "epoch": 0.05692304555315303, "grad_norm": 0.62109375, "learning_rate": 0.001564532969927559, "loss": 0.2256, "step": 32104 }, { "epoch": 0.05692659171846284, "grad_norm": 0.90234375, "learning_rate": 0.001564481991215366, "loss": 0.2044, "step": 32106 }, { "epoch": 0.05693013788377266, "grad_norm": 0.6328125, "learning_rate": 0.0015644310104717988, "loss": 0.2192, "step": 32108 }, { "epoch": 0.05693368404908247, "grad_norm": 0.291015625, "learning_rate": 0.0015643800276970799, "loss": 0.2244, "step": 32110 }, { "epoch": 0.05693723021439229, "grad_norm": 0.271484375, "learning_rate": 0.0015643290428914327, "loss": 0.2352, "step": 32112 }, { "epoch": 0.0569407763797021, "grad_norm": 1.578125, "learning_rate": 0.00156427805605508, "loss": 0.1942, "step": 32114 }, { "epoch": 0.056944322545011916, "grad_norm": 0.65625, "learning_rate": 0.0015642270671882446, "loss": 0.1818, "step": 32116 }, { "epoch": 0.05694786871032174, "grad_norm": 0.466796875, "learning_rate": 0.0015641760762911496, "loss": 0.1862, "step": 32118 }, { "epoch": 0.05695141487563155, "grad_norm": 8.1875, "learning_rate": 0.0015641250833640184, "loss": 0.3622, "step": 32120 }, { "epoch": 0.05695496104094137, "grad_norm": 0.57421875, "learning_rate": 0.0015640740884070735, "loss": 0.2184, "step": 32122 }, { "epoch": 0.05695850720625118, "grad_norm": 0.52734375, "learning_rate": 0.001564023091420538, "loss": 0.189, "step": 32124 }, { "epoch": 0.056962053371560996, "grad_norm": 1.6328125, "learning_rate": 0.001563972092404635, "loss": 0.2492, "step": 32126 }, { "epoch": 0.05696559953687081, "grad_norm": 11.5, "learning_rate": 0.0015639210913595877, "loss": 0.3734, "step": 32128 }, { "epoch": 0.056969145702180625, "grad_norm": 0.875, "learning_rate": 0.001563870088285619, "loss": 0.1814, "step": 32130 }, { "epoch": 0.05697269186749044, "grad_norm": 0.52734375, "learning_rate": 0.001563819083182952, "loss": 0.1948, "step": 32132 }, { "epoch": 0.056976238032800254, "grad_norm": 0.82421875, "learning_rate": 0.0015637680760518101, "loss": 0.2031, "step": 32134 }, { "epoch": 0.05697978419811007, "grad_norm": 0.59375, "learning_rate": 0.0015637170668924153, "loss": 0.1938, "step": 32136 }, { "epoch": 0.05698333036341988, "grad_norm": 0.609375, "learning_rate": 0.0015636660557049918, "loss": 0.2081, "step": 32138 }, { "epoch": 0.056986876528729705, "grad_norm": 1.328125, "learning_rate": 0.0015636150424897621, "loss": 0.3748, "step": 32140 }, { "epoch": 0.05699042269403952, "grad_norm": 1.515625, "learning_rate": 0.0015635640272469496, "loss": 0.3033, "step": 32142 }, { "epoch": 0.056993968859349334, "grad_norm": 0.73046875, "learning_rate": 0.0015635130099767773, "loss": 0.2421, "step": 32144 }, { "epoch": 0.05699751502465915, "grad_norm": 0.205078125, "learning_rate": 0.0015634619906794687, "loss": 0.1306, "step": 32146 }, { "epoch": 0.05700106118996896, "grad_norm": 2.078125, "learning_rate": 0.0015634109693552466, "loss": 0.4345, "step": 32148 }, { "epoch": 0.05700460735527878, "grad_norm": 0.4296875, "learning_rate": 0.0015633599460043337, "loss": 0.1538, "step": 32150 }, { "epoch": 0.05700815352058859, "grad_norm": 0.9609375, "learning_rate": 0.0015633089206269534, "loss": 0.2521, "step": 32152 }, { "epoch": 0.05701169968589841, "grad_norm": 0.3515625, "learning_rate": 0.001563257893223329, "loss": 0.1935, "step": 32154 }, { "epoch": 0.05701524585120822, "grad_norm": 0.259765625, "learning_rate": 0.001563206863793684, "loss": 0.2224, "step": 32156 }, { "epoch": 0.057018792016518036, "grad_norm": 0.5078125, "learning_rate": 0.0015631558323382412, "loss": 0.2041, "step": 32158 }, { "epoch": 0.05702233818182785, "grad_norm": 0.80078125, "learning_rate": 0.001563104798857224, "loss": 0.1875, "step": 32160 }, { "epoch": 0.05702588434713767, "grad_norm": 0.2138671875, "learning_rate": 0.001563053763350855, "loss": 0.2855, "step": 32162 }, { "epoch": 0.057029430512447486, "grad_norm": 0.423828125, "learning_rate": 0.001563002725819358, "loss": 0.2074, "step": 32164 }, { "epoch": 0.0570329766777573, "grad_norm": 0.55078125, "learning_rate": 0.001562951686262956, "loss": 0.2322, "step": 32166 }, { "epoch": 0.057036522843067115, "grad_norm": 0.6328125, "learning_rate": 0.0015629006446818722, "loss": 0.1924, "step": 32168 }, { "epoch": 0.05704006900837693, "grad_norm": 2.796875, "learning_rate": 0.0015628496010763298, "loss": 0.2221, "step": 32170 }, { "epoch": 0.057043615173686744, "grad_norm": 0.7734375, "learning_rate": 0.0015627985554465523, "loss": 0.2148, "step": 32172 }, { "epoch": 0.05704716133899656, "grad_norm": 1.1640625, "learning_rate": 0.0015627475077927627, "loss": 0.2548, "step": 32174 }, { "epoch": 0.057050707504306374, "grad_norm": 1.890625, "learning_rate": 0.0015626964581151844, "loss": 0.157, "step": 32176 }, { "epoch": 0.05705425366961619, "grad_norm": 0.349609375, "learning_rate": 0.00156264540641404, "loss": 0.1892, "step": 32178 }, { "epoch": 0.057057799834926, "grad_norm": 0.86328125, "learning_rate": 0.001562594352689554, "loss": 0.1976, "step": 32180 }, { "epoch": 0.05706134600023582, "grad_norm": 0.9921875, "learning_rate": 0.0015625432969419487, "loss": 0.1798, "step": 32182 }, { "epoch": 0.05706489216554563, "grad_norm": 1.6953125, "learning_rate": 0.0015624922391714478, "loss": 0.2785, "step": 32184 }, { "epoch": 0.05706843833085545, "grad_norm": 0.73828125, "learning_rate": 0.0015624411793782747, "loss": 0.2852, "step": 32186 }, { "epoch": 0.05707198449616527, "grad_norm": 0.67578125, "learning_rate": 0.001562390117562652, "loss": 0.227, "step": 32188 }, { "epoch": 0.05707553066147508, "grad_norm": 0.609375, "learning_rate": 0.0015623390537248037, "loss": 0.1537, "step": 32190 }, { "epoch": 0.0570790768267849, "grad_norm": 1.4375, "learning_rate": 0.0015622879878649531, "loss": 0.2325, "step": 32192 }, { "epoch": 0.05708262299209471, "grad_norm": 0.341796875, "learning_rate": 0.001562236919983323, "loss": 0.1795, "step": 32194 }, { "epoch": 0.057086169157404526, "grad_norm": 0.62890625, "learning_rate": 0.0015621858500801376, "loss": 0.1908, "step": 32196 }, { "epoch": 0.05708971532271434, "grad_norm": 1.5078125, "learning_rate": 0.0015621347781556195, "loss": 0.253, "step": 32198 }, { "epoch": 0.057093261488024155, "grad_norm": 0.96875, "learning_rate": 0.0015620837042099925, "loss": 0.1902, "step": 32200 }, { "epoch": 0.05709680765333397, "grad_norm": 0.68359375, "learning_rate": 0.0015620326282434795, "loss": 0.232, "step": 32202 }, { "epoch": 0.057100353818643784, "grad_norm": 0.330078125, "learning_rate": 0.0015619815502563043, "loss": 0.1615, "step": 32204 }, { "epoch": 0.0571038999839536, "grad_norm": 0.3515625, "learning_rate": 0.0015619304702486903, "loss": 0.1635, "step": 32206 }, { "epoch": 0.05710744614926342, "grad_norm": 0.39453125, "learning_rate": 0.0015618793882208606, "loss": 0.1833, "step": 32208 }, { "epoch": 0.057110992314573235, "grad_norm": 7.15625, "learning_rate": 0.0015618283041730389, "loss": 0.4443, "step": 32210 }, { "epoch": 0.05711453847988305, "grad_norm": 0.62890625, "learning_rate": 0.001561777218105448, "loss": 0.1918, "step": 32212 }, { "epoch": 0.057118084645192864, "grad_norm": 0.44921875, "learning_rate": 0.0015617261300183123, "loss": 0.208, "step": 32214 }, { "epoch": 0.05712163081050268, "grad_norm": 0.41796875, "learning_rate": 0.0015616750399118544, "loss": 0.1831, "step": 32216 }, { "epoch": 0.05712517697581249, "grad_norm": 1.328125, "learning_rate": 0.001561623947786298, "loss": 0.1804, "step": 32218 }, { "epoch": 0.05712872314112231, "grad_norm": 0.2177734375, "learning_rate": 0.0015615728536418668, "loss": 0.1726, "step": 32220 }, { "epoch": 0.05713226930643212, "grad_norm": 0.349609375, "learning_rate": 0.001561521757478784, "loss": 0.1702, "step": 32222 }, { "epoch": 0.05713581547174194, "grad_norm": 0.2412109375, "learning_rate": 0.0015614706592972732, "loss": 0.1738, "step": 32224 }, { "epoch": 0.05713936163705175, "grad_norm": 0.2158203125, "learning_rate": 0.0015614195590975574, "loss": 0.1705, "step": 32226 }, { "epoch": 0.057142907802361566, "grad_norm": 0.37109375, "learning_rate": 0.0015613684568798612, "loss": 0.2368, "step": 32228 }, { "epoch": 0.05714645396767139, "grad_norm": 0.294921875, "learning_rate": 0.0015613173526444067, "loss": 0.2326, "step": 32230 }, { "epoch": 0.0571500001329812, "grad_norm": 0.37890625, "learning_rate": 0.0015612662463914185, "loss": 0.1535, "step": 32232 }, { "epoch": 0.057153546298291016, "grad_norm": 0.376953125, "learning_rate": 0.0015612151381211193, "loss": 0.2127, "step": 32234 }, { "epoch": 0.05715709246360083, "grad_norm": 1.21875, "learning_rate": 0.0015611640278337334, "loss": 0.2155, "step": 32236 }, { "epoch": 0.057160638628910646, "grad_norm": 0.2734375, "learning_rate": 0.0015611129155294835, "loss": 0.182, "step": 32238 }, { "epoch": 0.05716418479422046, "grad_norm": 0.296875, "learning_rate": 0.0015610618012085935, "loss": 0.1956, "step": 32240 }, { "epoch": 0.057167730959530275, "grad_norm": 0.36328125, "learning_rate": 0.0015610106848712875, "loss": 0.171, "step": 32242 }, { "epoch": 0.05717127712484009, "grad_norm": 0.25, "learning_rate": 0.0015609595665177882, "loss": 0.1812, "step": 32244 }, { "epoch": 0.057174823290149904, "grad_norm": 0.53515625, "learning_rate": 0.0015609084461483196, "loss": 0.193, "step": 32246 }, { "epoch": 0.05717836945545972, "grad_norm": 0.6328125, "learning_rate": 0.0015608573237631052, "loss": 0.2598, "step": 32248 }, { "epoch": 0.05718191562076953, "grad_norm": 0.4765625, "learning_rate": 0.0015608061993623687, "loss": 0.1934, "step": 32250 }, { "epoch": 0.05718546178607935, "grad_norm": 0.3046875, "learning_rate": 0.001560755072946333, "loss": 0.2219, "step": 32252 }, { "epoch": 0.05718900795138917, "grad_norm": 0.212890625, "learning_rate": 0.0015607039445152229, "loss": 0.1541, "step": 32254 }, { "epoch": 0.05719255411669898, "grad_norm": 0.78125, "learning_rate": 0.001560652814069261, "loss": 0.2698, "step": 32256 }, { "epoch": 0.0571961002820088, "grad_norm": 0.50390625, "learning_rate": 0.0015606016816086714, "loss": 0.1809, "step": 32258 }, { "epoch": 0.05719964644731861, "grad_norm": 1.984375, "learning_rate": 0.0015605505471336775, "loss": 0.1961, "step": 32260 }, { "epoch": 0.05720319261262843, "grad_norm": 0.453125, "learning_rate": 0.0015604994106445031, "loss": 0.2081, "step": 32262 }, { "epoch": 0.05720673877793824, "grad_norm": 0.63671875, "learning_rate": 0.001560448272141372, "loss": 0.2378, "step": 32264 }, { "epoch": 0.057210284943248056, "grad_norm": 0.671875, "learning_rate": 0.0015603971316245073, "loss": 0.2678, "step": 32266 }, { "epoch": 0.05721383110855787, "grad_norm": 0.51953125, "learning_rate": 0.0015603459890941329, "loss": 0.2462, "step": 32268 }, { "epoch": 0.057217377273867685, "grad_norm": 0.314453125, "learning_rate": 0.0015602948445504728, "loss": 0.155, "step": 32270 }, { "epoch": 0.0572209234391775, "grad_norm": 0.271484375, "learning_rate": 0.0015602436979937505, "loss": 0.1951, "step": 32272 }, { "epoch": 0.057224469604487314, "grad_norm": 0.453125, "learning_rate": 0.0015601925494241893, "loss": 0.222, "step": 32274 }, { "epoch": 0.057228015769797136, "grad_norm": 0.2041015625, "learning_rate": 0.0015601413988420132, "loss": 0.1967, "step": 32276 }, { "epoch": 0.05723156193510695, "grad_norm": 0.64453125, "learning_rate": 0.0015600902462474461, "loss": 0.1306, "step": 32278 }, { "epoch": 0.057235108100416765, "grad_norm": 1.484375, "learning_rate": 0.0015600390916407118, "loss": 0.2058, "step": 32280 }, { "epoch": 0.05723865426572658, "grad_norm": 0.6328125, "learning_rate": 0.0015599879350220334, "loss": 0.1973, "step": 32282 }, { "epoch": 0.057242200431036394, "grad_norm": 1.0, "learning_rate": 0.0015599367763916346, "loss": 0.1665, "step": 32284 }, { "epoch": 0.05724574659634621, "grad_norm": 0.412109375, "learning_rate": 0.00155988561574974, "loss": 0.1601, "step": 32286 }, { "epoch": 0.05724929276165602, "grad_norm": 0.48046875, "learning_rate": 0.0015598344530965724, "loss": 0.1869, "step": 32288 }, { "epoch": 0.05725283892696584, "grad_norm": 0.56640625, "learning_rate": 0.0015597832884323567, "loss": 0.1652, "step": 32290 }, { "epoch": 0.05725638509227565, "grad_norm": 1.375, "learning_rate": 0.0015597321217573153, "loss": 0.2419, "step": 32292 }, { "epoch": 0.05725993125758547, "grad_norm": 0.5234375, "learning_rate": 0.0015596809530716732, "loss": 0.2808, "step": 32294 }, { "epoch": 0.05726347742289528, "grad_norm": 0.7890625, "learning_rate": 0.001559629782375653, "loss": 0.1761, "step": 32296 }, { "epoch": 0.0572670235882051, "grad_norm": 0.5703125, "learning_rate": 0.0015595786096694794, "loss": 0.1904, "step": 32298 }, { "epoch": 0.05727056975351492, "grad_norm": 0.2197265625, "learning_rate": 0.001559527434953376, "loss": 0.1479, "step": 32300 }, { "epoch": 0.05727411591882473, "grad_norm": 0.2578125, "learning_rate": 0.0015594762582275667, "loss": 0.2044, "step": 32302 }, { "epoch": 0.05727766208413455, "grad_norm": 0.2119140625, "learning_rate": 0.001559425079492275, "loss": 0.1646, "step": 32304 }, { "epoch": 0.05728120824944436, "grad_norm": 0.796875, "learning_rate": 0.0015593738987477249, "loss": 0.1767, "step": 32306 }, { "epoch": 0.057284754414754176, "grad_norm": 0.6328125, "learning_rate": 0.00155932271599414, "loss": 0.2523, "step": 32308 }, { "epoch": 0.05728830058006399, "grad_norm": 0.4609375, "learning_rate": 0.0015592715312317447, "loss": 0.1737, "step": 32310 }, { "epoch": 0.057291846745373805, "grad_norm": 0.77734375, "learning_rate": 0.001559220344460762, "loss": 0.228, "step": 32312 }, { "epoch": 0.05729539291068362, "grad_norm": 1.59375, "learning_rate": 0.0015591691556814165, "loss": 0.2514, "step": 32314 }, { "epoch": 0.057298939075993434, "grad_norm": 0.455078125, "learning_rate": 0.001559117964893932, "loss": 0.1679, "step": 32316 }, { "epoch": 0.05730248524130325, "grad_norm": 2.1875, "learning_rate": 0.001559066772098532, "loss": 0.4687, "step": 32318 }, { "epoch": 0.05730603140661306, "grad_norm": 0.306640625, "learning_rate": 0.0015590155772954406, "loss": 0.1545, "step": 32320 }, { "epoch": 0.057309577571922884, "grad_norm": 0.79296875, "learning_rate": 0.0015589643804848816, "loss": 0.2366, "step": 32322 }, { "epoch": 0.0573131237372327, "grad_norm": 0.240234375, "learning_rate": 0.0015589131816670795, "loss": 0.1679, "step": 32324 }, { "epoch": 0.057316669902542514, "grad_norm": 2.1875, "learning_rate": 0.001558861980842257, "loss": 0.2072, "step": 32326 }, { "epoch": 0.05732021606785233, "grad_norm": 1.0546875, "learning_rate": 0.0015588107780106393, "loss": 0.1914, "step": 32328 }, { "epoch": 0.05732376223316214, "grad_norm": 0.453125, "learning_rate": 0.0015587595731724492, "loss": 0.1454, "step": 32330 }, { "epoch": 0.05732730839847196, "grad_norm": 0.365234375, "learning_rate": 0.001558708366327912, "loss": 0.1816, "step": 32332 }, { "epoch": 0.05733085456378177, "grad_norm": 2.25, "learning_rate": 0.0015586571574772504, "loss": 0.2226, "step": 32334 }, { "epoch": 0.057334400729091586, "grad_norm": 0.392578125, "learning_rate": 0.0015586059466206888, "loss": 0.164, "step": 32336 }, { "epoch": 0.0573379468944014, "grad_norm": 1.7890625, "learning_rate": 0.001558554733758451, "loss": 0.2791, "step": 32338 }, { "epoch": 0.057341493059711215, "grad_norm": 12.875, "learning_rate": 0.0015585035188907617, "loss": 0.2768, "step": 32340 }, { "epoch": 0.05734503922502103, "grad_norm": 2.53125, "learning_rate": 0.0015584523020178438, "loss": 0.289, "step": 32342 }, { "epoch": 0.05734858539033085, "grad_norm": 2.375, "learning_rate": 0.001558401083139922, "loss": 0.2455, "step": 32344 }, { "epoch": 0.057352131555640666, "grad_norm": 0.53125, "learning_rate": 0.0015583498622572202, "loss": 0.1701, "step": 32346 }, { "epoch": 0.05735567772095048, "grad_norm": 0.3671875, "learning_rate": 0.0015582986393699622, "loss": 0.2135, "step": 32348 }, { "epoch": 0.057359223886260295, "grad_norm": 1.921875, "learning_rate": 0.0015582474144783722, "loss": 0.1853, "step": 32350 }, { "epoch": 0.05736277005157011, "grad_norm": 0.328125, "learning_rate": 0.0015581961875826744, "loss": 0.1776, "step": 32352 }, { "epoch": 0.057366316216879924, "grad_norm": 0.419921875, "learning_rate": 0.0015581449586830928, "loss": 0.2334, "step": 32354 }, { "epoch": 0.05736986238218974, "grad_norm": 0.5078125, "learning_rate": 0.0015580937277798511, "loss": 0.22, "step": 32356 }, { "epoch": 0.05737340854749955, "grad_norm": 0.400390625, "learning_rate": 0.0015580424948731732, "loss": 0.1895, "step": 32358 }, { "epoch": 0.05737695471280937, "grad_norm": 2.046875, "learning_rate": 0.0015579912599632836, "loss": 0.2682, "step": 32360 }, { "epoch": 0.05738050087811918, "grad_norm": 0.41015625, "learning_rate": 0.0015579400230504065, "loss": 0.2234, "step": 32362 }, { "epoch": 0.057384047043429, "grad_norm": 0.51953125, "learning_rate": 0.0015578887841347655, "loss": 0.1737, "step": 32364 }, { "epoch": 0.05738759320873882, "grad_norm": 0.4296875, "learning_rate": 0.001557837543216585, "loss": 0.2105, "step": 32366 }, { "epoch": 0.05739113937404863, "grad_norm": 0.703125, "learning_rate": 0.001557786300296089, "loss": 0.1848, "step": 32368 }, { "epoch": 0.05739468553935845, "grad_norm": 0.34765625, "learning_rate": 0.0015577350553735018, "loss": 0.1639, "step": 32370 }, { "epoch": 0.05739823170466826, "grad_norm": 0.482421875, "learning_rate": 0.0015576838084490473, "loss": 0.1814, "step": 32372 }, { "epoch": 0.05740177786997808, "grad_norm": 0.349609375, "learning_rate": 0.0015576325595229497, "loss": 0.2088, "step": 32374 }, { "epoch": 0.05740532403528789, "grad_norm": 0.88671875, "learning_rate": 0.001557581308595433, "loss": 0.2096, "step": 32376 }, { "epoch": 0.057408870200597706, "grad_norm": 0.5390625, "learning_rate": 0.0015575300556667213, "loss": 0.2658, "step": 32378 }, { "epoch": 0.05741241636590752, "grad_norm": 0.380859375, "learning_rate": 0.0015574788007370393, "loss": 0.2267, "step": 32380 }, { "epoch": 0.057415962531217335, "grad_norm": 0.388671875, "learning_rate": 0.0015574275438066105, "loss": 0.1544, "step": 32382 }, { "epoch": 0.05741950869652715, "grad_norm": 0.95703125, "learning_rate": 0.0015573762848756592, "loss": 0.2101, "step": 32384 }, { "epoch": 0.057423054861836964, "grad_norm": 2.578125, "learning_rate": 0.0015573250239444096, "loss": 0.2404, "step": 32386 }, { "epoch": 0.05742660102714678, "grad_norm": 0.2431640625, "learning_rate": 0.0015572737610130866, "loss": 0.1938, "step": 32388 }, { "epoch": 0.0574301471924566, "grad_norm": 0.384765625, "learning_rate": 0.0015572224960819132, "loss": 0.2129, "step": 32390 }, { "epoch": 0.057433693357766415, "grad_norm": 0.55078125, "learning_rate": 0.0015571712291511147, "loss": 0.1626, "step": 32392 }, { "epoch": 0.05743723952307623, "grad_norm": 0.32421875, "learning_rate": 0.0015571199602209148, "loss": 0.1362, "step": 32394 }, { "epoch": 0.057440785688386044, "grad_norm": 0.31640625, "learning_rate": 0.0015570686892915373, "loss": 0.1614, "step": 32396 }, { "epoch": 0.05744433185369586, "grad_norm": 0.392578125, "learning_rate": 0.0015570174163632069, "loss": 0.17, "step": 32398 }, { "epoch": 0.05744787801900567, "grad_norm": 0.86328125, "learning_rate": 0.001556966141436148, "loss": 0.1699, "step": 32400 }, { "epoch": 0.05745142418431549, "grad_norm": 0.6328125, "learning_rate": 0.0015569148645105844, "loss": 0.2534, "step": 32402 }, { "epoch": 0.0574549703496253, "grad_norm": 0.28125, "learning_rate": 0.0015568635855867411, "loss": 0.1865, "step": 32404 }, { "epoch": 0.057458516514935117, "grad_norm": 0.498046875, "learning_rate": 0.0015568123046648412, "loss": 0.1932, "step": 32406 }, { "epoch": 0.05746206268024493, "grad_norm": 0.421875, "learning_rate": 0.0015567610217451099, "loss": 0.4279, "step": 32408 }, { "epoch": 0.057465608845554746, "grad_norm": 0.328125, "learning_rate": 0.0015567097368277712, "loss": 0.2004, "step": 32410 }, { "epoch": 0.05746915501086457, "grad_norm": 0.625, "learning_rate": 0.0015566584499130494, "loss": 0.2572, "step": 32412 }, { "epoch": 0.05747270117617438, "grad_norm": 0.703125, "learning_rate": 0.001556607161001169, "loss": 0.1913, "step": 32414 }, { "epoch": 0.057476247341484196, "grad_norm": 0.9140625, "learning_rate": 0.0015565558700923537, "loss": 0.3234, "step": 32416 }, { "epoch": 0.05747979350679401, "grad_norm": 2.0625, "learning_rate": 0.0015565045771868287, "loss": 0.3797, "step": 32418 }, { "epoch": 0.057483339672103825, "grad_norm": 0.3828125, "learning_rate": 0.0015564532822848174, "loss": 0.172, "step": 32420 }, { "epoch": 0.05748688583741364, "grad_norm": 0.71875, "learning_rate": 0.0015564019853865447, "loss": 0.1727, "step": 32422 }, { "epoch": 0.057490432002723454, "grad_norm": 0.421875, "learning_rate": 0.001556350686492235, "loss": 0.1625, "step": 32424 }, { "epoch": 0.05749397816803327, "grad_norm": 0.72265625, "learning_rate": 0.0015562993856021124, "loss": 0.1577, "step": 32426 }, { "epoch": 0.057497524333343084, "grad_norm": 0.640625, "learning_rate": 0.0015562480827164013, "loss": 0.1856, "step": 32428 }, { "epoch": 0.0575010704986529, "grad_norm": 0.796875, "learning_rate": 0.001556196777835326, "loss": 0.1747, "step": 32430 }, { "epoch": 0.05750461666396271, "grad_norm": 2.8125, "learning_rate": 0.0015561454709591112, "loss": 0.1919, "step": 32432 }, { "epoch": 0.057508162829272534, "grad_norm": 0.232421875, "learning_rate": 0.001556094162087981, "loss": 0.1671, "step": 32434 }, { "epoch": 0.05751170899458235, "grad_norm": 0.296875, "learning_rate": 0.00155604285122216, "loss": 0.1457, "step": 32436 }, { "epoch": 0.05751525515989216, "grad_norm": 1.8203125, "learning_rate": 0.001555991538361872, "loss": 0.4397, "step": 32438 }, { "epoch": 0.05751880132520198, "grad_norm": 2.1875, "learning_rate": 0.0015559402235073424, "loss": 0.1941, "step": 32440 }, { "epoch": 0.05752234749051179, "grad_norm": 0.47265625, "learning_rate": 0.001555888906658795, "loss": 0.2278, "step": 32442 }, { "epoch": 0.05752589365582161, "grad_norm": 0.5703125, "learning_rate": 0.001555837587816454, "loss": 0.1521, "step": 32444 }, { "epoch": 0.05752943982113142, "grad_norm": 0.50390625, "learning_rate": 0.0015557862669805444, "loss": 0.208, "step": 32446 }, { "epoch": 0.057532985986441236, "grad_norm": 0.25, "learning_rate": 0.0015557349441512906, "loss": 0.2124, "step": 32448 }, { "epoch": 0.05753653215175105, "grad_norm": 1.96875, "learning_rate": 0.0015556836193289167, "loss": 0.2536, "step": 32450 }, { "epoch": 0.057540078317060865, "grad_norm": 0.859375, "learning_rate": 0.0015556322925136476, "loss": 0.2857, "step": 32452 }, { "epoch": 0.05754362448237068, "grad_norm": 0.251953125, "learning_rate": 0.001555580963705707, "loss": 0.1598, "step": 32454 }, { "epoch": 0.057547170647680494, "grad_norm": 0.8828125, "learning_rate": 0.00155552963290532, "loss": 0.2007, "step": 32456 }, { "epoch": 0.057550716812990316, "grad_norm": 0.76171875, "learning_rate": 0.0015554783001127115, "loss": 0.2275, "step": 32458 }, { "epoch": 0.05755426297830013, "grad_norm": 0.38671875, "learning_rate": 0.001555426965328105, "loss": 0.2242, "step": 32460 }, { "epoch": 0.057557809143609945, "grad_norm": 1.4375, "learning_rate": 0.0015553756285517255, "loss": 0.2393, "step": 32462 }, { "epoch": 0.05756135530891976, "grad_norm": 0.76171875, "learning_rate": 0.0015553242897837975, "loss": 0.1729, "step": 32464 }, { "epoch": 0.057564901474229574, "grad_norm": 0.71484375, "learning_rate": 0.001555272949024546, "loss": 0.1808, "step": 32466 }, { "epoch": 0.05756844763953939, "grad_norm": 0.68359375, "learning_rate": 0.0015552216062741946, "loss": 0.1677, "step": 32468 }, { "epoch": 0.0575719938048492, "grad_norm": 1.8359375, "learning_rate": 0.0015551702615329687, "loss": 0.4286, "step": 32470 }, { "epoch": 0.05757553997015902, "grad_norm": 0.2021484375, "learning_rate": 0.001555118914801092, "loss": 0.1659, "step": 32472 }, { "epoch": 0.05757908613546883, "grad_norm": 0.5625, "learning_rate": 0.00155506756607879, "loss": 0.1743, "step": 32474 }, { "epoch": 0.05758263230077865, "grad_norm": 0.28125, "learning_rate": 0.0015550162153662866, "loss": 0.1767, "step": 32476 }, { "epoch": 0.05758617846608846, "grad_norm": 1.1015625, "learning_rate": 0.0015549648626638068, "loss": 0.3092, "step": 32478 }, { "epoch": 0.05758972463139828, "grad_norm": 0.90234375, "learning_rate": 0.0015549135079715743, "loss": 0.3511, "step": 32480 }, { "epoch": 0.0575932707967081, "grad_norm": 0.3515625, "learning_rate": 0.0015548621512898148, "loss": 0.1741, "step": 32482 }, { "epoch": 0.05759681696201791, "grad_norm": 0.52734375, "learning_rate": 0.0015548107926187528, "loss": 0.199, "step": 32484 }, { "epoch": 0.057600363127327726, "grad_norm": 0.73828125, "learning_rate": 0.001554759431958612, "loss": 0.24, "step": 32486 }, { "epoch": 0.05760390929263754, "grad_norm": 1.390625, "learning_rate": 0.0015547080693096179, "loss": 0.2288, "step": 32488 }, { "epoch": 0.057607455457947356, "grad_norm": 0.32421875, "learning_rate": 0.0015546567046719945, "loss": 0.1782, "step": 32490 }, { "epoch": 0.05761100162325717, "grad_norm": 0.66015625, "learning_rate": 0.0015546053380459674, "loss": 0.2162, "step": 32492 }, { "epoch": 0.057614547788566985, "grad_norm": 3.453125, "learning_rate": 0.0015545539694317599, "loss": 0.238, "step": 32494 }, { "epoch": 0.0576180939538768, "grad_norm": 1.0625, "learning_rate": 0.0015545025988295978, "loss": 0.2908, "step": 32496 }, { "epoch": 0.057621640119186614, "grad_norm": 2.734375, "learning_rate": 0.0015544512262397055, "loss": 0.2277, "step": 32498 }, { "epoch": 0.05762518628449643, "grad_norm": 0.419921875, "learning_rate": 0.0015543998516623072, "loss": 0.189, "step": 32500 }, { "epoch": 0.05762873244980625, "grad_norm": 0.498046875, "learning_rate": 0.0015543484750976282, "loss": 0.1353, "step": 32502 }, { "epoch": 0.057632278615116064, "grad_norm": 0.427734375, "learning_rate": 0.0015542970965458926, "loss": 0.2177, "step": 32504 }, { "epoch": 0.05763582478042588, "grad_norm": 1.6640625, "learning_rate": 0.0015542457160073254, "loss": 0.2283, "step": 32506 }, { "epoch": 0.05763937094573569, "grad_norm": 0.5234375, "learning_rate": 0.0015541943334821512, "loss": 0.1681, "step": 32508 }, { "epoch": 0.05764291711104551, "grad_norm": 0.494140625, "learning_rate": 0.001554142948970595, "loss": 0.1898, "step": 32510 }, { "epoch": 0.05764646327635532, "grad_norm": 1.2265625, "learning_rate": 0.0015540915624728815, "loss": 0.2549, "step": 32512 }, { "epoch": 0.05765000944166514, "grad_norm": 0.90625, "learning_rate": 0.0015540401739892347, "loss": 0.1459, "step": 32514 }, { "epoch": 0.05765355560697495, "grad_norm": 0.89453125, "learning_rate": 0.0015539887835198806, "loss": 0.1449, "step": 32516 }, { "epoch": 0.057657101772284766, "grad_norm": 2.625, "learning_rate": 0.0015539373910650428, "loss": 0.2313, "step": 32518 }, { "epoch": 0.05766064793759458, "grad_norm": 0.72265625, "learning_rate": 0.0015538859966249467, "loss": 0.2128, "step": 32520 }, { "epoch": 0.057664194102904395, "grad_norm": 0.7421875, "learning_rate": 0.001553834600199817, "loss": 0.1874, "step": 32522 }, { "epoch": 0.05766774026821421, "grad_norm": 0.291015625, "learning_rate": 0.0015537832017898786, "loss": 0.1988, "step": 32524 }, { "epoch": 0.05767128643352403, "grad_norm": 2.546875, "learning_rate": 0.0015537318013953555, "loss": 0.2306, "step": 32526 }, { "epoch": 0.057674832598833846, "grad_norm": 1.2421875, "learning_rate": 0.0015536803990164733, "loss": 0.1674, "step": 32528 }, { "epoch": 0.05767837876414366, "grad_norm": 5.25, "learning_rate": 0.0015536289946534562, "loss": 0.2624, "step": 32530 }, { "epoch": 0.057681924929453475, "grad_norm": 1.2421875, "learning_rate": 0.00155357758830653, "loss": 0.3909, "step": 32532 }, { "epoch": 0.05768547109476329, "grad_norm": 0.302734375, "learning_rate": 0.0015535261799759187, "loss": 0.1338, "step": 32534 }, { "epoch": 0.057689017260073104, "grad_norm": 1.6015625, "learning_rate": 0.001553474769661847, "loss": 0.2787, "step": 32536 }, { "epoch": 0.05769256342538292, "grad_norm": 0.9609375, "learning_rate": 0.0015534233573645405, "loss": 0.1968, "step": 32538 }, { "epoch": 0.05769610959069273, "grad_norm": 0.349609375, "learning_rate": 0.0015533719430842234, "loss": 0.1498, "step": 32540 }, { "epoch": 0.05769965575600255, "grad_norm": 0.640625, "learning_rate": 0.001553320526821121, "loss": 0.3432, "step": 32542 }, { "epoch": 0.05770320192131236, "grad_norm": 0.400390625, "learning_rate": 0.0015532691085754578, "loss": 0.2025, "step": 32544 }, { "epoch": 0.05770674808662218, "grad_norm": 0.267578125, "learning_rate": 0.0015532176883474584, "loss": 0.1761, "step": 32546 }, { "epoch": 0.057710294251932, "grad_norm": 1.0703125, "learning_rate": 0.0015531662661373484, "loss": 0.2267, "step": 32548 }, { "epoch": 0.05771384041724181, "grad_norm": 0.37109375, "learning_rate": 0.0015531148419453526, "loss": 0.2532, "step": 32550 }, { "epoch": 0.05771738658255163, "grad_norm": 0.4140625, "learning_rate": 0.0015530634157716956, "loss": 0.2195, "step": 32552 }, { "epoch": 0.05772093274786144, "grad_norm": 0.392578125, "learning_rate": 0.001553011987616602, "loss": 0.1793, "step": 32554 }, { "epoch": 0.05772447891317126, "grad_norm": 0.3515625, "learning_rate": 0.0015529605574802975, "loss": 0.2428, "step": 32556 }, { "epoch": 0.05772802507848107, "grad_norm": 0.236328125, "learning_rate": 0.0015529091253630065, "loss": 0.3177, "step": 32558 }, { "epoch": 0.057731571243790886, "grad_norm": 0.4375, "learning_rate": 0.001552857691264954, "loss": 0.1529, "step": 32560 }, { "epoch": 0.0577351174091007, "grad_norm": 0.427734375, "learning_rate": 0.0015528062551863654, "loss": 0.2156, "step": 32562 }, { "epoch": 0.057738663574410515, "grad_norm": 1.015625, "learning_rate": 0.001552754817127465, "loss": 0.1672, "step": 32564 }, { "epoch": 0.05774220973972033, "grad_norm": 0.359375, "learning_rate": 0.0015527033770884777, "loss": 0.1862, "step": 32566 }, { "epoch": 0.057745755905030144, "grad_norm": 0.310546875, "learning_rate": 0.0015526519350696293, "loss": 0.1316, "step": 32568 }, { "epoch": 0.057749302070339965, "grad_norm": 0.95703125, "learning_rate": 0.001552600491071144, "loss": 0.2426, "step": 32570 }, { "epoch": 0.05775284823564978, "grad_norm": 1.6875, "learning_rate": 0.0015525490450932471, "loss": 0.1743, "step": 32572 }, { "epoch": 0.057756394400959594, "grad_norm": 11.25, "learning_rate": 0.0015524975971361636, "loss": 0.2521, "step": 32574 }, { "epoch": 0.05775994056626941, "grad_norm": 0.40234375, "learning_rate": 0.0015524461472001185, "loss": 0.1604, "step": 32576 }, { "epoch": 0.057763486731579224, "grad_norm": 0.97265625, "learning_rate": 0.0015523946952853368, "loss": 0.2201, "step": 32578 }, { "epoch": 0.05776703289688904, "grad_norm": 0.50390625, "learning_rate": 0.0015523432413920432, "loss": 0.1897, "step": 32580 }, { "epoch": 0.05777057906219885, "grad_norm": 2.28125, "learning_rate": 0.001552291785520463, "loss": 0.309, "step": 32582 }, { "epoch": 0.05777412522750867, "grad_norm": 0.66015625, "learning_rate": 0.0015522403276708218, "loss": 0.1743, "step": 32584 }, { "epoch": 0.05777767139281848, "grad_norm": 1.34375, "learning_rate": 0.0015521888678433435, "loss": 0.3351, "step": 32586 }, { "epoch": 0.057781217558128296, "grad_norm": 0.546875, "learning_rate": 0.001552137406038254, "loss": 0.1991, "step": 32588 }, { "epoch": 0.05778476372343811, "grad_norm": 0.349609375, "learning_rate": 0.0015520859422557779, "loss": 0.1572, "step": 32590 }, { "epoch": 0.057788309888747925, "grad_norm": 0.474609375, "learning_rate": 0.0015520344764961408, "loss": 0.1692, "step": 32592 }, { "epoch": 0.05779185605405775, "grad_norm": 0.94140625, "learning_rate": 0.0015519830087595671, "loss": 0.1908, "step": 32594 }, { "epoch": 0.05779540221936756, "grad_norm": 0.373046875, "learning_rate": 0.0015519315390462828, "loss": 0.1788, "step": 32596 }, { "epoch": 0.057798948384677376, "grad_norm": 0.76953125, "learning_rate": 0.0015518800673565124, "loss": 0.1666, "step": 32598 }, { "epoch": 0.05780249454998719, "grad_norm": 0.73046875, "learning_rate": 0.0015518285936904806, "loss": 0.1553, "step": 32600 }, { "epoch": 0.057806040715297005, "grad_norm": 0.53515625, "learning_rate": 0.0015517771180484133, "loss": 0.1775, "step": 32602 }, { "epoch": 0.05780958688060682, "grad_norm": 0.84765625, "learning_rate": 0.001551725640430535, "loss": 0.1692, "step": 32604 }, { "epoch": 0.057813133045916634, "grad_norm": 1.0078125, "learning_rate": 0.0015516741608370716, "loss": 0.2226, "step": 32606 }, { "epoch": 0.05781667921122645, "grad_norm": 0.30859375, "learning_rate": 0.0015516226792682474, "loss": 0.2335, "step": 32608 }, { "epoch": 0.05782022537653626, "grad_norm": 0.3359375, "learning_rate": 0.0015515711957242884, "loss": 0.1768, "step": 32610 }, { "epoch": 0.05782377154184608, "grad_norm": 0.36328125, "learning_rate": 0.001551519710205419, "loss": 0.2005, "step": 32612 }, { "epoch": 0.05782731770715589, "grad_norm": 0.50390625, "learning_rate": 0.0015514682227118646, "loss": 0.1533, "step": 32614 }, { "epoch": 0.057830863872465714, "grad_norm": 0.55078125, "learning_rate": 0.0015514167332438505, "loss": 0.2213, "step": 32616 }, { "epoch": 0.05783441003777553, "grad_norm": 0.6953125, "learning_rate": 0.0015513652418016018, "loss": 0.1994, "step": 32618 }, { "epoch": 0.05783795620308534, "grad_norm": 0.28125, "learning_rate": 0.0015513137483853436, "loss": 0.1338, "step": 32620 }, { "epoch": 0.05784150236839516, "grad_norm": 0.53515625, "learning_rate": 0.0015512622529953012, "loss": 0.1632, "step": 32622 }, { "epoch": 0.05784504853370497, "grad_norm": 0.275390625, "learning_rate": 0.0015512107556317001, "loss": 0.1956, "step": 32624 }, { "epoch": 0.05784859469901479, "grad_norm": 0.27734375, "learning_rate": 0.0015511592562947654, "loss": 0.2028, "step": 32626 }, { "epoch": 0.0578521408643246, "grad_norm": 0.7265625, "learning_rate": 0.0015511077549847216, "loss": 0.1665, "step": 32628 }, { "epoch": 0.057855687029634416, "grad_norm": 1.0078125, "learning_rate": 0.0015510562517017951, "loss": 0.3109, "step": 32630 }, { "epoch": 0.05785923319494423, "grad_norm": 0.490234375, "learning_rate": 0.0015510047464462103, "loss": 0.1832, "step": 32632 }, { "epoch": 0.057862779360254045, "grad_norm": 0.6171875, "learning_rate": 0.0015509532392181924, "loss": 0.2036, "step": 32634 }, { "epoch": 0.05786632552556386, "grad_norm": 0.37890625, "learning_rate": 0.0015509017300179674, "loss": 0.2014, "step": 32636 }, { "epoch": 0.05786987169087368, "grad_norm": 2.265625, "learning_rate": 0.0015508502188457602, "loss": 0.1755, "step": 32638 }, { "epoch": 0.057873417856183496, "grad_norm": 1.109375, "learning_rate": 0.0015507987057017958, "loss": 0.1658, "step": 32640 }, { "epoch": 0.05787696402149331, "grad_norm": 0.5859375, "learning_rate": 0.0015507471905862997, "loss": 0.1419, "step": 32642 }, { "epoch": 0.057880510186803125, "grad_norm": 0.7890625, "learning_rate": 0.0015506956734994972, "loss": 0.175, "step": 32644 }, { "epoch": 0.05788405635211294, "grad_norm": 0.8359375, "learning_rate": 0.0015506441544416137, "loss": 0.2017, "step": 32646 }, { "epoch": 0.057887602517422754, "grad_norm": 0.36328125, "learning_rate": 0.0015505926334128744, "loss": 0.1334, "step": 32648 }, { "epoch": 0.05789114868273257, "grad_norm": 0.275390625, "learning_rate": 0.0015505411104135048, "loss": 0.2382, "step": 32650 }, { "epoch": 0.05789469484804238, "grad_norm": 0.6328125, "learning_rate": 0.00155048958544373, "loss": 0.2341, "step": 32652 }, { "epoch": 0.0578982410133522, "grad_norm": 0.85546875, "learning_rate": 0.0015504380585037755, "loss": 0.1722, "step": 32654 }, { "epoch": 0.05790178717866201, "grad_norm": 0.95703125, "learning_rate": 0.0015503865295938665, "loss": 0.2059, "step": 32656 }, { "epoch": 0.057905333343971827, "grad_norm": 0.7109375, "learning_rate": 0.0015503349987142283, "loss": 0.214, "step": 32658 }, { "epoch": 0.05790887950928164, "grad_norm": 2.5, "learning_rate": 0.0015502834658650867, "loss": 0.2269, "step": 32660 }, { "epoch": 0.05791242567459146, "grad_norm": 0.83203125, "learning_rate": 0.0015502319310466666, "loss": 0.1995, "step": 32662 }, { "epoch": 0.05791597183990128, "grad_norm": 0.2734375, "learning_rate": 0.0015501803942591932, "loss": 0.2188, "step": 32664 }, { "epoch": 0.05791951800521109, "grad_norm": 0.61328125, "learning_rate": 0.0015501288555028925, "loss": 0.2223, "step": 32666 }, { "epoch": 0.057923064170520906, "grad_norm": 0.484375, "learning_rate": 0.0015500773147779903, "loss": 0.1822, "step": 32668 }, { "epoch": 0.05792661033583072, "grad_norm": 0.8125, "learning_rate": 0.0015500257720847108, "loss": 0.1621, "step": 32670 }, { "epoch": 0.057930156501140535, "grad_norm": 0.72265625, "learning_rate": 0.0015499742274232798, "loss": 0.2553, "step": 32672 }, { "epoch": 0.05793370266645035, "grad_norm": 0.478515625, "learning_rate": 0.001549922680793923, "loss": 0.3143, "step": 32674 }, { "epoch": 0.057937248831760164, "grad_norm": 0.76171875, "learning_rate": 0.0015498711321968658, "loss": 0.2183, "step": 32676 }, { "epoch": 0.05794079499706998, "grad_norm": 0.6484375, "learning_rate": 0.0015498195816323337, "loss": 0.1813, "step": 32678 }, { "epoch": 0.057944341162379794, "grad_norm": 0.51171875, "learning_rate": 0.001549768029100552, "loss": 0.2066, "step": 32680 }, { "epoch": 0.05794788732768961, "grad_norm": 0.7890625, "learning_rate": 0.001549716474601746, "loss": 0.201, "step": 32682 }, { "epoch": 0.05795143349299943, "grad_norm": 0.150390625, "learning_rate": 0.0015496649181361418, "loss": 0.1413, "step": 32684 }, { "epoch": 0.057954979658309244, "grad_norm": 0.45703125, "learning_rate": 0.0015496133597039641, "loss": 0.234, "step": 32686 }, { "epoch": 0.05795852582361906, "grad_norm": 2.28125, "learning_rate": 0.0015495617993054388, "loss": 0.2642, "step": 32688 }, { "epoch": 0.05796207198892887, "grad_norm": 0.349609375, "learning_rate": 0.0015495102369407915, "loss": 0.3398, "step": 32690 }, { "epoch": 0.05796561815423869, "grad_norm": 0.2177734375, "learning_rate": 0.0015494586726102473, "loss": 0.1918, "step": 32692 }, { "epoch": 0.0579691643195485, "grad_norm": 0.75, "learning_rate": 0.0015494071063140318, "loss": 0.2178, "step": 32694 }, { "epoch": 0.05797271048485832, "grad_norm": 0.390625, "learning_rate": 0.001549355538052371, "loss": 0.1898, "step": 32696 }, { "epoch": 0.05797625665016813, "grad_norm": 0.38671875, "learning_rate": 0.00154930396782549, "loss": 0.2675, "step": 32698 }, { "epoch": 0.057979802815477946, "grad_norm": 0.34765625, "learning_rate": 0.0015492523956336145, "loss": 0.1765, "step": 32700 }, { "epoch": 0.05798334898078776, "grad_norm": 0.3671875, "learning_rate": 0.0015492008214769697, "loss": 0.1783, "step": 32702 }, { "epoch": 0.057986895146097575, "grad_norm": 0.7890625, "learning_rate": 0.0015491492453557818, "loss": 0.2519, "step": 32704 }, { "epoch": 0.0579904413114074, "grad_norm": 0.36328125, "learning_rate": 0.0015490976672702754, "loss": 0.1926, "step": 32706 }, { "epoch": 0.05799398747671721, "grad_norm": 0.66015625, "learning_rate": 0.0015490460872206774, "loss": 0.1503, "step": 32708 }, { "epoch": 0.057997533642027026, "grad_norm": 0.333984375, "learning_rate": 0.0015489945052072124, "loss": 0.2192, "step": 32710 }, { "epoch": 0.05800107980733684, "grad_norm": 0.91015625, "learning_rate": 0.001548942921230106, "loss": 0.2445, "step": 32712 }, { "epoch": 0.058004625972646655, "grad_norm": 0.62109375, "learning_rate": 0.0015488913352895842, "loss": 0.2324, "step": 32714 }, { "epoch": 0.05800817213795647, "grad_norm": 0.328125, "learning_rate": 0.0015488397473858726, "loss": 0.1257, "step": 32716 }, { "epoch": 0.058011718303266284, "grad_norm": 0.6484375, "learning_rate": 0.0015487881575191966, "loss": 0.2182, "step": 32718 }, { "epoch": 0.0580152644685761, "grad_norm": 0.3359375, "learning_rate": 0.001548736565689782, "loss": 0.1354, "step": 32720 }, { "epoch": 0.05801881063388591, "grad_norm": 0.310546875, "learning_rate": 0.0015486849718978537, "loss": 0.1892, "step": 32722 }, { "epoch": 0.05802235679919573, "grad_norm": 1.234375, "learning_rate": 0.0015486333761436384, "loss": 0.2164, "step": 32724 }, { "epoch": 0.05802590296450554, "grad_norm": 0.59375, "learning_rate": 0.0015485817784273612, "loss": 0.2139, "step": 32726 }, { "epoch": 0.05802944912981536, "grad_norm": 0.4296875, "learning_rate": 0.0015485301787492483, "loss": 0.1753, "step": 32728 }, { "epoch": 0.05803299529512518, "grad_norm": 0.59375, "learning_rate": 0.0015484785771095245, "loss": 0.1488, "step": 32730 }, { "epoch": 0.05803654146043499, "grad_norm": 0.490234375, "learning_rate": 0.001548426973508416, "loss": 0.2039, "step": 32732 }, { "epoch": 0.05804008762574481, "grad_norm": 0.29296875, "learning_rate": 0.0015483753679461482, "loss": 0.2055, "step": 32734 }, { "epoch": 0.05804363379105462, "grad_norm": 0.26171875, "learning_rate": 0.0015483237604229474, "loss": 0.1246, "step": 32736 }, { "epoch": 0.058047179956364436, "grad_norm": 0.734375, "learning_rate": 0.0015482721509390385, "loss": 0.2483, "step": 32738 }, { "epoch": 0.05805072612167425, "grad_norm": 0.5625, "learning_rate": 0.0015482205394946478, "loss": 0.1687, "step": 32740 }, { "epoch": 0.058054272286984065, "grad_norm": 0.91796875, "learning_rate": 0.0015481689260900008, "loss": 0.2611, "step": 32742 }, { "epoch": 0.05805781845229388, "grad_norm": 0.21875, "learning_rate": 0.0015481173107253232, "loss": 0.129, "step": 32744 }, { "epoch": 0.058061364617603695, "grad_norm": 0.62109375, "learning_rate": 0.001548065693400841, "loss": 0.1614, "step": 32746 }, { "epoch": 0.05806491078291351, "grad_norm": 0.7421875, "learning_rate": 0.0015480140741167795, "loss": 0.1468, "step": 32748 }, { "epoch": 0.058068456948223324, "grad_norm": 0.287109375, "learning_rate": 0.0015479624528733646, "loss": 0.1847, "step": 32750 }, { "epoch": 0.058072003113533145, "grad_norm": 1.1640625, "learning_rate": 0.0015479108296708225, "loss": 0.2115, "step": 32752 }, { "epoch": 0.05807554927884296, "grad_norm": 0.8828125, "learning_rate": 0.0015478592045093781, "loss": 0.1759, "step": 32754 }, { "epoch": 0.058079095444152774, "grad_norm": 0.490234375, "learning_rate": 0.0015478075773892582, "loss": 0.2047, "step": 32756 }, { "epoch": 0.05808264160946259, "grad_norm": 1.9140625, "learning_rate": 0.0015477559483106878, "loss": 0.2186, "step": 32758 }, { "epoch": 0.0580861877747724, "grad_norm": 1.15625, "learning_rate": 0.001547704317273893, "loss": 0.1579, "step": 32760 }, { "epoch": 0.05808973394008222, "grad_norm": 0.4296875, "learning_rate": 0.0015476526842790996, "loss": 0.1826, "step": 32762 }, { "epoch": 0.05809328010539203, "grad_norm": 0.322265625, "learning_rate": 0.0015476010493265333, "loss": 0.125, "step": 32764 }, { "epoch": 0.05809682627070185, "grad_norm": 0.6640625, "learning_rate": 0.0015475494124164203, "loss": 0.1696, "step": 32766 }, { "epoch": 0.05810037243601166, "grad_norm": 0.322265625, "learning_rate": 0.0015474977735489856, "loss": 0.1495, "step": 32768 }, { "epoch": 0.058103918601321476, "grad_norm": 0.3203125, "learning_rate": 0.001547446132724456, "loss": 0.1782, "step": 32770 }, { "epoch": 0.05810746476663129, "grad_norm": 0.62109375, "learning_rate": 0.0015473944899430567, "loss": 0.3256, "step": 32772 }, { "epoch": 0.05811101093194111, "grad_norm": 0.251953125, "learning_rate": 0.001547342845205014, "loss": 0.2066, "step": 32774 }, { "epoch": 0.05811455709725093, "grad_norm": 2.171875, "learning_rate": 0.0015472911985105534, "loss": 0.477, "step": 32776 }, { "epoch": 0.05811810326256074, "grad_norm": 0.6953125, "learning_rate": 0.001547239549859901, "loss": 0.1861, "step": 32778 }, { "epoch": 0.058121649427870556, "grad_norm": 0.375, "learning_rate": 0.0015471878992532825, "loss": 0.1913, "step": 32780 }, { "epoch": 0.05812519559318037, "grad_norm": 0.76953125, "learning_rate": 0.001547136246690924, "loss": 0.3667, "step": 32782 }, { "epoch": 0.058128741758490185, "grad_norm": 0.30078125, "learning_rate": 0.0015470845921730513, "loss": 0.2809, "step": 32784 }, { "epoch": 0.0581322879238, "grad_norm": 1.4453125, "learning_rate": 0.0015470329356998902, "loss": 0.2582, "step": 32786 }, { "epoch": 0.058135834089109814, "grad_norm": 0.44921875, "learning_rate": 0.0015469812772716666, "loss": 0.1488, "step": 32788 }, { "epoch": 0.05813938025441963, "grad_norm": 0.234375, "learning_rate": 0.0015469296168886067, "loss": 0.1734, "step": 32790 }, { "epoch": 0.05814292641972944, "grad_norm": 0.91015625, "learning_rate": 0.001546877954550936, "loss": 0.3811, "step": 32792 }, { "epoch": 0.05814647258503926, "grad_norm": 1.359375, "learning_rate": 0.001546826290258881, "loss": 0.3681, "step": 32794 }, { "epoch": 0.05815001875034907, "grad_norm": 2.1875, "learning_rate": 0.0015467746240126672, "loss": 0.316, "step": 32796 }, { "epoch": 0.058153564915658894, "grad_norm": 0.84375, "learning_rate": 0.0015467229558125209, "loss": 0.2074, "step": 32798 }, { "epoch": 0.05815711108096871, "grad_norm": 0.65234375, "learning_rate": 0.0015466712856586676, "loss": 0.1452, "step": 32800 }, { "epoch": 0.05816065724627852, "grad_norm": 1.1171875, "learning_rate": 0.0015466196135513338, "loss": 0.1868, "step": 32802 }, { "epoch": 0.05816420341158834, "grad_norm": 1.6484375, "learning_rate": 0.001546567939490745, "loss": 0.3062, "step": 32804 }, { "epoch": 0.05816774957689815, "grad_norm": 0.33203125, "learning_rate": 0.0015465162634771277, "loss": 0.1659, "step": 32806 }, { "epoch": 0.05817129574220797, "grad_norm": 0.82421875, "learning_rate": 0.0015464645855107076, "loss": 0.1711, "step": 32808 }, { "epoch": 0.05817484190751778, "grad_norm": 0.2734375, "learning_rate": 0.0015464129055917104, "loss": 0.197, "step": 32810 }, { "epoch": 0.058178388072827596, "grad_norm": 0.59375, "learning_rate": 0.0015463612237203628, "loss": 0.1863, "step": 32812 }, { "epoch": 0.05818193423813741, "grad_norm": 2.375, "learning_rate": 0.0015463095398968907, "loss": 0.239, "step": 32814 }, { "epoch": 0.058185480403447225, "grad_norm": 0.55078125, "learning_rate": 0.0015462578541215193, "loss": 0.1599, "step": 32816 }, { "epoch": 0.05818902656875704, "grad_norm": 0.251953125, "learning_rate": 0.0015462061663944757, "loss": 0.1489, "step": 32818 }, { "epoch": 0.05819257273406686, "grad_norm": 0.3046875, "learning_rate": 0.0015461544767159852, "loss": 0.1743, "step": 32820 }, { "epoch": 0.058196118899376675, "grad_norm": 0.330078125, "learning_rate": 0.0015461027850862746, "loss": 0.1592, "step": 32822 }, { "epoch": 0.05819966506468649, "grad_norm": 0.64453125, "learning_rate": 0.0015460510915055693, "loss": 0.2169, "step": 32824 }, { "epoch": 0.058203211229996304, "grad_norm": 0.357421875, "learning_rate": 0.0015459993959740955, "loss": 0.1771, "step": 32826 }, { "epoch": 0.05820675739530612, "grad_norm": 1.78125, "learning_rate": 0.0015459476984920797, "loss": 0.2825, "step": 32828 }, { "epoch": 0.058210303560615934, "grad_norm": 0.64453125, "learning_rate": 0.0015458959990597473, "loss": 0.1675, "step": 32830 }, { "epoch": 0.05821384972592575, "grad_norm": 0.546875, "learning_rate": 0.0015458442976773253, "loss": 0.2112, "step": 32832 }, { "epoch": 0.05821739589123556, "grad_norm": 0.8203125, "learning_rate": 0.0015457925943450387, "loss": 0.2091, "step": 32834 }, { "epoch": 0.05822094205654538, "grad_norm": 0.5390625, "learning_rate": 0.0015457408890631149, "loss": 0.1629, "step": 32836 }, { "epoch": 0.05822448822185519, "grad_norm": 2.125, "learning_rate": 0.001545689181831779, "loss": 0.2125, "step": 32838 }, { "epoch": 0.058228034387165006, "grad_norm": 0.265625, "learning_rate": 0.0015456374726512576, "loss": 0.1783, "step": 32840 }, { "epoch": 0.05823158055247483, "grad_norm": 0.224609375, "learning_rate": 0.0015455857615217768, "loss": 0.145, "step": 32842 }, { "epoch": 0.05823512671778464, "grad_norm": 0.75, "learning_rate": 0.0015455340484435626, "loss": 0.1641, "step": 32844 }, { "epoch": 0.05823867288309446, "grad_norm": 0.546875, "learning_rate": 0.0015454823334168414, "loss": 0.2008, "step": 32846 }, { "epoch": 0.05824221904840427, "grad_norm": 0.302734375, "learning_rate": 0.0015454306164418392, "loss": 0.2207, "step": 32848 }, { "epoch": 0.058245765213714086, "grad_norm": 0.76171875, "learning_rate": 0.001545378897518782, "loss": 0.2048, "step": 32850 }, { "epoch": 0.0582493113790239, "grad_norm": 0.1865234375, "learning_rate": 0.0015453271766478964, "loss": 0.1611, "step": 32852 }, { "epoch": 0.058252857544333715, "grad_norm": 1.4296875, "learning_rate": 0.0015452754538294083, "loss": 0.3008, "step": 32854 }, { "epoch": 0.05825640370964353, "grad_norm": 0.302734375, "learning_rate": 0.0015452237290635442, "loss": 0.1433, "step": 32856 }, { "epoch": 0.058259949874953344, "grad_norm": 0.71484375, "learning_rate": 0.0015451720023505302, "loss": 0.215, "step": 32858 }, { "epoch": 0.05826349604026316, "grad_norm": 2.96875, "learning_rate": 0.0015451202736905922, "loss": 0.2807, "step": 32860 }, { "epoch": 0.05826704220557297, "grad_norm": 1.109375, "learning_rate": 0.0015450685430839566, "loss": 0.3874, "step": 32862 }, { "epoch": 0.05827058837088279, "grad_norm": 1.0625, "learning_rate": 0.00154501681053085, "loss": 0.2067, "step": 32864 }, { "epoch": 0.05827413453619261, "grad_norm": 1.078125, "learning_rate": 0.001544965076031498, "loss": 0.3986, "step": 32866 }, { "epoch": 0.058277680701502424, "grad_norm": 0.58203125, "learning_rate": 0.0015449133395861276, "loss": 0.2626, "step": 32868 }, { "epoch": 0.05828122686681224, "grad_norm": 0.56640625, "learning_rate": 0.0015448616011949645, "loss": 0.2164, "step": 32870 }, { "epoch": 0.05828477303212205, "grad_norm": 0.64453125, "learning_rate": 0.0015448098608582355, "loss": 0.193, "step": 32872 }, { "epoch": 0.05828831919743187, "grad_norm": 0.2412109375, "learning_rate": 0.001544758118576166, "loss": 0.1891, "step": 32874 }, { "epoch": 0.05829186536274168, "grad_norm": 0.51171875, "learning_rate": 0.001544706374348983, "loss": 0.3071, "step": 32876 }, { "epoch": 0.0582954115280515, "grad_norm": 0.2392578125, "learning_rate": 0.0015446546281769132, "loss": 0.1614, "step": 32878 }, { "epoch": 0.05829895769336131, "grad_norm": 1.09375, "learning_rate": 0.0015446028800601816, "loss": 0.2111, "step": 32880 }, { "epoch": 0.058302503858671126, "grad_norm": 0.3828125, "learning_rate": 0.0015445511299990156, "loss": 0.23, "step": 32882 }, { "epoch": 0.05830605002398094, "grad_norm": 0.45703125, "learning_rate": 0.0015444993779936411, "loss": 0.1871, "step": 32884 }, { "epoch": 0.058309596189290755, "grad_norm": 2.5, "learning_rate": 0.0015444476240442844, "loss": 0.3988, "step": 32886 }, { "epoch": 0.058313142354600576, "grad_norm": 0.515625, "learning_rate": 0.0015443958681511723, "loss": 0.2062, "step": 32888 }, { "epoch": 0.05831668851991039, "grad_norm": 4.46875, "learning_rate": 0.0015443441103145303, "loss": 0.335, "step": 32890 }, { "epoch": 0.058320234685220206, "grad_norm": 0.482421875, "learning_rate": 0.0015442923505345858, "loss": 0.1858, "step": 32892 }, { "epoch": 0.05832378085053002, "grad_norm": 0.275390625, "learning_rate": 0.0015442405888115643, "loss": 0.1822, "step": 32894 }, { "epoch": 0.058327327015839835, "grad_norm": 0.9609375, "learning_rate": 0.0015441888251456926, "loss": 0.2514, "step": 32896 }, { "epoch": 0.05833087318114965, "grad_norm": 1.296875, "learning_rate": 0.0015441370595371971, "loss": 0.2094, "step": 32898 }, { "epoch": 0.058334419346459464, "grad_norm": 0.275390625, "learning_rate": 0.0015440852919863042, "loss": 0.2043, "step": 32900 }, { "epoch": 0.05833796551176928, "grad_norm": 0.5234375, "learning_rate": 0.0015440335224932399, "loss": 0.1949, "step": 32902 }, { "epoch": 0.05834151167707909, "grad_norm": 0.421875, "learning_rate": 0.001543981751058231, "loss": 0.1664, "step": 32904 }, { "epoch": 0.05834505784238891, "grad_norm": 0.51953125, "learning_rate": 0.0015439299776815038, "loss": 0.1594, "step": 32906 }, { "epoch": 0.05834860400769872, "grad_norm": 1.5078125, "learning_rate": 0.0015438782023632846, "loss": 0.3101, "step": 32908 }, { "epoch": 0.05835215017300854, "grad_norm": 0.427734375, "learning_rate": 0.0015438264251038001, "loss": 0.1774, "step": 32910 }, { "epoch": 0.05835569633831836, "grad_norm": 0.2314453125, "learning_rate": 0.0015437746459032765, "loss": 0.2407, "step": 32912 }, { "epoch": 0.05835924250362817, "grad_norm": 0.59375, "learning_rate": 0.0015437228647619406, "loss": 0.1494, "step": 32914 }, { "epoch": 0.05836278866893799, "grad_norm": 0.5, "learning_rate": 0.0015436710816800186, "loss": 0.2114, "step": 32916 }, { "epoch": 0.0583663348342478, "grad_norm": 0.435546875, "learning_rate": 0.001543619296657737, "loss": 0.2357, "step": 32918 }, { "epoch": 0.058369880999557616, "grad_norm": 0.435546875, "learning_rate": 0.0015435675096953222, "loss": 0.2111, "step": 32920 }, { "epoch": 0.05837342716486743, "grad_norm": 0.6484375, "learning_rate": 0.0015435157207930009, "loss": 0.2047, "step": 32922 }, { "epoch": 0.058376973330177245, "grad_norm": 0.609375, "learning_rate": 0.0015434639299509991, "loss": 0.1667, "step": 32924 }, { "epoch": 0.05838051949548706, "grad_norm": 1.5703125, "learning_rate": 0.0015434121371695443, "loss": 0.2829, "step": 32926 }, { "epoch": 0.058384065660796874, "grad_norm": 0.28515625, "learning_rate": 0.0015433603424488617, "loss": 0.1744, "step": 32928 }, { "epoch": 0.05838761182610669, "grad_norm": 0.3203125, "learning_rate": 0.001543308545789179, "loss": 0.192, "step": 32930 }, { "epoch": 0.058391157991416504, "grad_norm": 0.296875, "learning_rate": 0.0015432567471907218, "loss": 0.2123, "step": 32932 }, { "epoch": 0.058394704156726325, "grad_norm": 0.85546875, "learning_rate": 0.0015432049466537175, "loss": 0.2079, "step": 32934 }, { "epoch": 0.05839825032203614, "grad_norm": 0.365234375, "learning_rate": 0.001543153144178392, "loss": 0.2296, "step": 32936 }, { "epoch": 0.058401796487345954, "grad_norm": 1.3125, "learning_rate": 0.0015431013397649723, "loss": 0.1824, "step": 32938 }, { "epoch": 0.05840534265265577, "grad_norm": 0.373046875, "learning_rate": 0.001543049533413684, "loss": 0.1573, "step": 32940 }, { "epoch": 0.05840888881796558, "grad_norm": 1.171875, "learning_rate": 0.001542997725124755, "loss": 0.2017, "step": 32942 }, { "epoch": 0.0584124349832754, "grad_norm": 0.21875, "learning_rate": 0.0015429459148984114, "loss": 0.1683, "step": 32944 }, { "epoch": 0.05841598114858521, "grad_norm": 0.21484375, "learning_rate": 0.0015428941027348794, "loss": 0.1634, "step": 32946 }, { "epoch": 0.05841952731389503, "grad_norm": 0.38671875, "learning_rate": 0.0015428422886343855, "loss": 0.1652, "step": 32948 }, { "epoch": 0.05842307347920484, "grad_norm": 0.240234375, "learning_rate": 0.0015427904725971572, "loss": 0.2046, "step": 32950 }, { "epoch": 0.058426619644514656, "grad_norm": 0.2451171875, "learning_rate": 0.0015427386546234205, "loss": 0.148, "step": 32952 }, { "epoch": 0.05843016580982447, "grad_norm": 0.431640625, "learning_rate": 0.001542686834713402, "loss": 0.2354, "step": 32954 }, { "epoch": 0.05843371197513429, "grad_norm": 0.28515625, "learning_rate": 0.0015426350128673284, "loss": 0.4164, "step": 32956 }, { "epoch": 0.05843725814044411, "grad_norm": 0.5546875, "learning_rate": 0.0015425831890854263, "loss": 0.1922, "step": 32958 }, { "epoch": 0.05844080430575392, "grad_norm": 0.75390625, "learning_rate": 0.0015425313633679225, "loss": 0.1894, "step": 32960 }, { "epoch": 0.058444350471063736, "grad_norm": 0.51953125, "learning_rate": 0.0015424795357150435, "loss": 0.1992, "step": 32962 }, { "epoch": 0.05844789663637355, "grad_norm": 0.47265625, "learning_rate": 0.0015424277061270163, "loss": 0.1812, "step": 32964 }, { "epoch": 0.058451442801683365, "grad_norm": 0.2578125, "learning_rate": 0.0015423758746040672, "loss": 0.164, "step": 32966 }, { "epoch": 0.05845498896699318, "grad_norm": 0.2734375, "learning_rate": 0.0015423240411464227, "loss": 0.1477, "step": 32968 }, { "epoch": 0.058458535132302994, "grad_norm": 0.490234375, "learning_rate": 0.0015422722057543102, "loss": 0.1913, "step": 32970 }, { "epoch": 0.05846208129761281, "grad_norm": 0.74609375, "learning_rate": 0.0015422203684279556, "loss": 0.1596, "step": 32972 }, { "epoch": 0.05846562746292262, "grad_norm": 0.3828125, "learning_rate": 0.0015421685291675863, "loss": 0.186, "step": 32974 }, { "epoch": 0.05846917362823244, "grad_norm": 0.32421875, "learning_rate": 0.0015421166879734283, "loss": 0.2054, "step": 32976 }, { "epoch": 0.05847271979354226, "grad_norm": 0.396484375, "learning_rate": 0.0015420648448457091, "loss": 0.2201, "step": 32978 }, { "epoch": 0.058476265958852074, "grad_norm": 0.1982421875, "learning_rate": 0.0015420129997846547, "loss": 0.1334, "step": 32980 }, { "epoch": 0.05847981212416189, "grad_norm": 0.326171875, "learning_rate": 0.0015419611527904926, "loss": 0.2138, "step": 32982 }, { "epoch": 0.0584833582894717, "grad_norm": 1.28125, "learning_rate": 0.001541909303863449, "loss": 0.1858, "step": 32984 }, { "epoch": 0.05848690445478152, "grad_norm": 0.255859375, "learning_rate": 0.0015418574530037508, "loss": 0.3209, "step": 32986 }, { "epoch": 0.05849045062009133, "grad_norm": 0.3984375, "learning_rate": 0.0015418056002116246, "loss": 0.2256, "step": 32988 }, { "epoch": 0.058493996785401146, "grad_norm": 0.380859375, "learning_rate": 0.0015417537454872974, "loss": 0.2278, "step": 32990 }, { "epoch": 0.05849754295071096, "grad_norm": 0.44921875, "learning_rate": 0.001541701888830996, "loss": 0.1644, "step": 32992 }, { "epoch": 0.058501089116020775, "grad_norm": 0.56640625, "learning_rate": 0.0015416500302429471, "loss": 0.2221, "step": 32994 }, { "epoch": 0.05850463528133059, "grad_norm": 0.462890625, "learning_rate": 0.0015415981697233772, "loss": 0.1724, "step": 32996 }, { "epoch": 0.058508181446640405, "grad_norm": 0.55078125, "learning_rate": 0.0015415463072725136, "loss": 0.232, "step": 32998 }, { "epoch": 0.05851172761195022, "grad_norm": 0.62109375, "learning_rate": 0.0015414944428905832, "loss": 0.2728, "step": 33000 }, { "epoch": 0.05851527377726004, "grad_norm": 0.306640625, "learning_rate": 0.0015414425765778123, "loss": 0.1757, "step": 33002 }, { "epoch": 0.058518819942569855, "grad_norm": 0.6953125, "learning_rate": 0.0015413907083344281, "loss": 0.1818, "step": 33004 }, { "epoch": 0.05852236610787967, "grad_norm": 0.5703125, "learning_rate": 0.0015413388381606572, "loss": 0.1738, "step": 33006 }, { "epoch": 0.058525912273189484, "grad_norm": 0.287109375, "learning_rate": 0.0015412869660567268, "loss": 0.1707, "step": 33008 }, { "epoch": 0.0585294584384993, "grad_norm": 1.28125, "learning_rate": 0.0015412350920228635, "loss": 0.2252, "step": 33010 }, { "epoch": 0.05853300460380911, "grad_norm": 0.384765625, "learning_rate": 0.001541183216059294, "loss": 0.5729, "step": 33012 }, { "epoch": 0.05853655076911893, "grad_norm": 0.21484375, "learning_rate": 0.0015411313381662455, "loss": 0.1625, "step": 33014 }, { "epoch": 0.05854009693442874, "grad_norm": 0.458984375, "learning_rate": 0.001541079458343945, "loss": 0.137, "step": 33016 }, { "epoch": 0.05854364309973856, "grad_norm": 0.412109375, "learning_rate": 0.0015410275765926186, "loss": 0.1659, "step": 33018 }, { "epoch": 0.05854718926504837, "grad_norm": 0.443359375, "learning_rate": 0.0015409756929124943, "loss": 0.1782, "step": 33020 }, { "epoch": 0.058550735430358186, "grad_norm": 1.078125, "learning_rate": 0.001540923807303798, "loss": 0.257, "step": 33022 }, { "epoch": 0.05855428159566801, "grad_norm": 1.5, "learning_rate": 0.0015408719197667576, "loss": 0.3601, "step": 33024 }, { "epoch": 0.05855782776097782, "grad_norm": 0.3828125, "learning_rate": 0.0015408200303015994, "loss": 0.212, "step": 33026 }, { "epoch": 0.05856137392628764, "grad_norm": 0.41015625, "learning_rate": 0.0015407681389085501, "loss": 0.1554, "step": 33028 }, { "epoch": 0.05856492009159745, "grad_norm": 0.2109375, "learning_rate": 0.001540716245587837, "loss": 0.1979, "step": 33030 }, { "epoch": 0.058568466256907266, "grad_norm": 0.259765625, "learning_rate": 0.0015406643503396876, "loss": 0.3732, "step": 33032 }, { "epoch": 0.05857201242221708, "grad_norm": 0.28515625, "learning_rate": 0.0015406124531643277, "loss": 0.1809, "step": 33034 }, { "epoch": 0.058575558587526895, "grad_norm": 1.9765625, "learning_rate": 0.0015405605540619852, "loss": 0.2567, "step": 33036 }, { "epoch": 0.05857910475283671, "grad_norm": 1.125, "learning_rate": 0.0015405086530328866, "loss": 0.2221, "step": 33038 }, { "epoch": 0.058582650918146524, "grad_norm": 0.341796875, "learning_rate": 0.0015404567500772591, "loss": 0.1321, "step": 33040 }, { "epoch": 0.05858619708345634, "grad_norm": 0.94921875, "learning_rate": 0.0015404048451953297, "loss": 0.1509, "step": 33042 }, { "epoch": 0.05858974324876615, "grad_norm": 3.046875, "learning_rate": 0.001540352938387325, "loss": 0.1797, "step": 33044 }, { "epoch": 0.058593289414075975, "grad_norm": 0.921875, "learning_rate": 0.0015403010296534728, "loss": 0.3508, "step": 33046 }, { "epoch": 0.05859683557938579, "grad_norm": 0.171875, "learning_rate": 0.0015402491189939996, "loss": 0.1443, "step": 33048 }, { "epoch": 0.058600381744695604, "grad_norm": 1.8671875, "learning_rate": 0.0015401972064091319, "loss": 0.2212, "step": 33050 }, { "epoch": 0.05860392791000542, "grad_norm": 0.267578125, "learning_rate": 0.001540145291899098, "loss": 0.1768, "step": 33052 }, { "epoch": 0.05860747407531523, "grad_norm": 0.251953125, "learning_rate": 0.0015400933754641237, "loss": 0.2608, "step": 33054 }, { "epoch": 0.05861102024062505, "grad_norm": 0.330078125, "learning_rate": 0.001540041457104437, "loss": 0.1734, "step": 33056 }, { "epoch": 0.05861456640593486, "grad_norm": 0.3046875, "learning_rate": 0.0015399895368202643, "loss": 0.1636, "step": 33058 }, { "epoch": 0.05861811257124468, "grad_norm": 0.47265625, "learning_rate": 0.0015399376146118333, "loss": 0.1403, "step": 33060 }, { "epoch": 0.05862165873655449, "grad_norm": 0.5390625, "learning_rate": 0.0015398856904793702, "loss": 0.1597, "step": 33062 }, { "epoch": 0.058625204901864306, "grad_norm": 0.353515625, "learning_rate": 0.0015398337644231032, "loss": 0.1572, "step": 33064 }, { "epoch": 0.05862875106717412, "grad_norm": 0.515625, "learning_rate": 0.0015397818364432585, "loss": 0.2071, "step": 33066 }, { "epoch": 0.058632297232483935, "grad_norm": 0.875, "learning_rate": 0.0015397299065400636, "loss": 0.2024, "step": 33068 }, { "epoch": 0.058635843397793756, "grad_norm": 0.6953125, "learning_rate": 0.0015396779747137452, "loss": 0.1645, "step": 33070 }, { "epoch": 0.05863938956310357, "grad_norm": 0.953125, "learning_rate": 0.0015396260409645311, "loss": 0.227, "step": 33072 }, { "epoch": 0.058642935728413385, "grad_norm": 0.65234375, "learning_rate": 0.001539574105292648, "loss": 0.2297, "step": 33074 }, { "epoch": 0.0586464818937232, "grad_norm": 0.734375, "learning_rate": 0.0015395221676983229, "loss": 0.3506, "step": 33076 }, { "epoch": 0.058650028059033014, "grad_norm": 0.49609375, "learning_rate": 0.0015394702281817832, "loss": 0.2145, "step": 33078 }, { "epoch": 0.05865357422434283, "grad_norm": 0.416015625, "learning_rate": 0.0015394182867432562, "loss": 0.1517, "step": 33080 }, { "epoch": 0.058657120389652644, "grad_norm": 0.484375, "learning_rate": 0.0015393663433829687, "loss": 0.2045, "step": 33082 }, { "epoch": 0.05866066655496246, "grad_norm": 0.2080078125, "learning_rate": 0.001539314398101148, "loss": 0.2133, "step": 33084 }, { "epoch": 0.05866421272027227, "grad_norm": 0.447265625, "learning_rate": 0.0015392624508980216, "loss": 0.2679, "step": 33086 }, { "epoch": 0.05866775888558209, "grad_norm": 0.271484375, "learning_rate": 0.0015392105017738161, "loss": 0.1916, "step": 33088 }, { "epoch": 0.0586713050508919, "grad_norm": 0.60546875, "learning_rate": 0.001539158550728759, "loss": 0.1928, "step": 33090 }, { "epoch": 0.05867485121620172, "grad_norm": 0.43359375, "learning_rate": 0.0015391065977630775, "loss": 0.3062, "step": 33092 }, { "epoch": 0.05867839738151154, "grad_norm": 2.890625, "learning_rate": 0.001539054642876999, "loss": 0.2165, "step": 33094 }, { "epoch": 0.05868194354682135, "grad_norm": 0.2578125, "learning_rate": 0.0015390026860707502, "loss": 0.1672, "step": 33096 }, { "epoch": 0.05868548971213117, "grad_norm": 2.578125, "learning_rate": 0.0015389507273445586, "loss": 0.222, "step": 33098 }, { "epoch": 0.05868903587744098, "grad_norm": 1.625, "learning_rate": 0.001538898766698652, "loss": 0.2514, "step": 33100 }, { "epoch": 0.058692582042750796, "grad_norm": 0.388671875, "learning_rate": 0.001538846804133257, "loss": 0.1975, "step": 33102 }, { "epoch": 0.05869612820806061, "grad_norm": 0.359375, "learning_rate": 0.0015387948396486009, "loss": 0.2053, "step": 33104 }, { "epoch": 0.058699674373370425, "grad_norm": 0.1708984375, "learning_rate": 0.0015387428732449107, "loss": 0.1841, "step": 33106 }, { "epoch": 0.05870322053868024, "grad_norm": 0.2373046875, "learning_rate": 0.0015386909049224145, "loss": 0.1567, "step": 33108 }, { "epoch": 0.058706766703990054, "grad_norm": 4.0, "learning_rate": 0.0015386389346813388, "loss": 0.2079, "step": 33110 }, { "epoch": 0.05871031286929987, "grad_norm": 0.380859375, "learning_rate": 0.0015385869625219113, "loss": 0.1732, "step": 33112 }, { "epoch": 0.05871385903460969, "grad_norm": 0.3359375, "learning_rate": 0.0015385349884443592, "loss": 0.2141, "step": 33114 }, { "epoch": 0.058717405199919505, "grad_norm": 0.298828125, "learning_rate": 0.0015384830124489097, "loss": 0.1606, "step": 33116 }, { "epoch": 0.05872095136522932, "grad_norm": 0.341796875, "learning_rate": 0.0015384310345357903, "loss": 0.1862, "step": 33118 }, { "epoch": 0.058724497530539134, "grad_norm": 1.0546875, "learning_rate": 0.0015383790547052283, "loss": 0.2023, "step": 33120 }, { "epoch": 0.05872804369584895, "grad_norm": 0.59765625, "learning_rate": 0.0015383270729574512, "loss": 0.1785, "step": 33122 }, { "epoch": 0.05873158986115876, "grad_norm": 0.37890625, "learning_rate": 0.0015382750892926856, "loss": 0.1788, "step": 33124 }, { "epoch": 0.05873513602646858, "grad_norm": 0.37109375, "learning_rate": 0.0015382231037111597, "loss": 0.2021, "step": 33126 }, { "epoch": 0.05873868219177839, "grad_norm": 0.2578125, "learning_rate": 0.0015381711162131, "loss": 0.2191, "step": 33128 }, { "epoch": 0.05874222835708821, "grad_norm": 0.609375, "learning_rate": 0.0015381191267987348, "loss": 0.1894, "step": 33130 }, { "epoch": 0.05874577452239802, "grad_norm": 0.3046875, "learning_rate": 0.0015380671354682906, "loss": 0.1952, "step": 33132 }, { "epoch": 0.058749320687707836, "grad_norm": 0.828125, "learning_rate": 0.001538015142221996, "loss": 0.2016, "step": 33134 }, { "epoch": 0.05875286685301765, "grad_norm": 0.80859375, "learning_rate": 0.0015379631470600768, "loss": 0.3386, "step": 33136 }, { "epoch": 0.05875641301832747, "grad_norm": 0.232421875, "learning_rate": 0.0015379111499827615, "loss": 0.1745, "step": 33138 }, { "epoch": 0.058759959183637286, "grad_norm": 0.2333984375, "learning_rate": 0.0015378591509902773, "loss": 0.219, "step": 33140 }, { "epoch": 0.0587635053489471, "grad_norm": 0.376953125, "learning_rate": 0.0015378071500828515, "loss": 0.1798, "step": 33142 }, { "epoch": 0.058767051514256916, "grad_norm": 0.38671875, "learning_rate": 0.0015377551472607115, "loss": 0.2139, "step": 33144 }, { "epoch": 0.05877059767956673, "grad_norm": 0.21875, "learning_rate": 0.0015377031425240848, "loss": 0.1753, "step": 33146 }, { "epoch": 0.058774143844876545, "grad_norm": 0.408203125, "learning_rate": 0.0015376511358731987, "loss": 0.1457, "step": 33148 }, { "epoch": 0.05877769001018636, "grad_norm": 0.98046875, "learning_rate": 0.0015375991273082806, "loss": 0.2567, "step": 33150 }, { "epoch": 0.058781236175496174, "grad_norm": 5.34375, "learning_rate": 0.0015375471168295582, "loss": 0.4731, "step": 33152 }, { "epoch": 0.05878478234080599, "grad_norm": 0.423828125, "learning_rate": 0.001537495104437259, "loss": 0.2048, "step": 33154 }, { "epoch": 0.0587883285061158, "grad_norm": 0.54296875, "learning_rate": 0.0015374430901316104, "loss": 0.1737, "step": 33156 }, { "epoch": 0.05879187467142562, "grad_norm": 0.7421875, "learning_rate": 0.0015373910739128395, "loss": 0.2454, "step": 33158 }, { "epoch": 0.05879542083673544, "grad_norm": 1.1328125, "learning_rate": 0.0015373390557811744, "loss": 0.2127, "step": 33160 }, { "epoch": 0.05879896700204525, "grad_norm": 0.439453125, "learning_rate": 0.0015372870357368423, "loss": 0.2003, "step": 33162 }, { "epoch": 0.05880251316735507, "grad_norm": 0.65234375, "learning_rate": 0.0015372350137800705, "loss": 0.1808, "step": 33164 }, { "epoch": 0.05880605933266488, "grad_norm": 0.2021484375, "learning_rate": 0.0015371829899110867, "loss": 0.1441, "step": 33166 }, { "epoch": 0.0588096054979747, "grad_norm": 0.51953125, "learning_rate": 0.0015371309641301187, "loss": 0.3938, "step": 33168 }, { "epoch": 0.05881315166328451, "grad_norm": 0.54296875, "learning_rate": 0.0015370789364373939, "loss": 0.2284, "step": 33170 }, { "epoch": 0.058816697828594326, "grad_norm": 0.89453125, "learning_rate": 0.0015370269068331393, "loss": 0.1523, "step": 33172 }, { "epoch": 0.05882024399390414, "grad_norm": 0.640625, "learning_rate": 0.001536974875317583, "loss": 0.2535, "step": 33174 }, { "epoch": 0.058823790159213955, "grad_norm": 0.4609375, "learning_rate": 0.0015369228418909524, "loss": 0.2193, "step": 33176 }, { "epoch": 0.05882733632452377, "grad_norm": 0.328125, "learning_rate": 0.0015368708065534753, "loss": 0.2074, "step": 33178 }, { "epoch": 0.058830882489833584, "grad_norm": 1.1484375, "learning_rate": 0.001536818769305379, "loss": 0.2534, "step": 33180 }, { "epoch": 0.058834428655143406, "grad_norm": 0.390625, "learning_rate": 0.001536766730146891, "loss": 0.2513, "step": 33182 }, { "epoch": 0.05883797482045322, "grad_norm": 0.5, "learning_rate": 0.0015367146890782391, "loss": 0.1681, "step": 33184 }, { "epoch": 0.058841520985763035, "grad_norm": 0.322265625, "learning_rate": 0.0015366626460996506, "loss": 0.1588, "step": 33186 }, { "epoch": 0.05884506715107285, "grad_norm": 0.42578125, "learning_rate": 0.0015366106012113536, "loss": 0.2279, "step": 33188 }, { "epoch": 0.058848613316382664, "grad_norm": 4.375, "learning_rate": 0.0015365585544135753, "loss": 0.2279, "step": 33190 }, { "epoch": 0.05885215948169248, "grad_norm": 0.380859375, "learning_rate": 0.0015365065057065437, "loss": 0.2137, "step": 33192 }, { "epoch": 0.05885570564700229, "grad_norm": 0.1806640625, "learning_rate": 0.0015364544550904862, "loss": 0.1321, "step": 33194 }, { "epoch": 0.05885925181231211, "grad_norm": 0.29296875, "learning_rate": 0.0015364024025656302, "loss": 0.1958, "step": 33196 }, { "epoch": 0.05886279797762192, "grad_norm": 0.33984375, "learning_rate": 0.001536350348132204, "loss": 0.1442, "step": 33198 }, { "epoch": 0.05886634414293174, "grad_norm": 0.337890625, "learning_rate": 0.0015362982917904344, "loss": 0.2503, "step": 33200 }, { "epoch": 0.05886989030824155, "grad_norm": 0.6640625, "learning_rate": 0.0015362462335405495, "loss": 0.192, "step": 33202 }, { "epoch": 0.058873436473551366, "grad_norm": 0.703125, "learning_rate": 0.0015361941733827772, "loss": 0.2114, "step": 33204 }, { "epoch": 0.05887698263886119, "grad_norm": 0.37890625, "learning_rate": 0.0015361421113173446, "loss": 0.1664, "step": 33206 }, { "epoch": 0.058880528804171, "grad_norm": 0.671875, "learning_rate": 0.00153609004734448, "loss": 0.2578, "step": 33208 }, { "epoch": 0.05888407496948082, "grad_norm": 0.8984375, "learning_rate": 0.0015360379814644107, "loss": 0.4418, "step": 33210 }, { "epoch": 0.05888762113479063, "grad_norm": 0.236328125, "learning_rate": 0.0015359859136773648, "loss": 0.1694, "step": 33212 }, { "epoch": 0.058891167300100446, "grad_norm": 0.267578125, "learning_rate": 0.0015359338439835696, "loss": 0.2108, "step": 33214 }, { "epoch": 0.05889471346541026, "grad_norm": 0.578125, "learning_rate": 0.0015358817723832533, "loss": 0.149, "step": 33216 }, { "epoch": 0.058898259630720075, "grad_norm": 0.2041015625, "learning_rate": 0.001535829698876643, "loss": 0.1621, "step": 33218 }, { "epoch": 0.05890180579602989, "grad_norm": 2.5, "learning_rate": 0.001535777623463967, "loss": 0.5894, "step": 33220 }, { "epoch": 0.058905351961339704, "grad_norm": 0.47265625, "learning_rate": 0.0015357255461454524, "loss": 0.2307, "step": 33222 }, { "epoch": 0.05890889812664952, "grad_norm": 0.482421875, "learning_rate": 0.0015356734669213278, "loss": 0.1825, "step": 33224 }, { "epoch": 0.05891244429195933, "grad_norm": 0.796875, "learning_rate": 0.00153562138579182, "loss": 0.1274, "step": 33226 }, { "epoch": 0.058915990457269155, "grad_norm": 0.3203125, "learning_rate": 0.0015355693027571578, "loss": 0.212, "step": 33228 }, { "epoch": 0.05891953662257897, "grad_norm": 0.236328125, "learning_rate": 0.001535517217817568, "loss": 0.149, "step": 33230 }, { "epoch": 0.058923082787888784, "grad_norm": 0.34375, "learning_rate": 0.0015354651309732795, "loss": 0.1778, "step": 33232 }, { "epoch": 0.0589266289531986, "grad_norm": 0.37109375, "learning_rate": 0.0015354130422245192, "loss": 0.2064, "step": 33234 }, { "epoch": 0.05893017511850841, "grad_norm": 0.470703125, "learning_rate": 0.001535360951571515, "loss": 0.2078, "step": 33236 }, { "epoch": 0.05893372128381823, "grad_norm": 0.408203125, "learning_rate": 0.0015353088590144947, "loss": 0.2001, "step": 33238 }, { "epoch": 0.05893726744912804, "grad_norm": 0.208984375, "learning_rate": 0.0015352567645536869, "loss": 0.1969, "step": 33240 }, { "epoch": 0.058940813614437856, "grad_norm": 0.3203125, "learning_rate": 0.0015352046681893184, "loss": 0.1613, "step": 33242 }, { "epoch": 0.05894435977974767, "grad_norm": 0.2734375, "learning_rate": 0.0015351525699216176, "loss": 0.1828, "step": 33244 }, { "epoch": 0.058947905945057485, "grad_norm": 0.86328125, "learning_rate": 0.0015351004697508122, "loss": 0.2995, "step": 33246 }, { "epoch": 0.0589514521103673, "grad_norm": 0.48046875, "learning_rate": 0.0015350483676771299, "loss": 0.2031, "step": 33248 }, { "epoch": 0.05895499827567712, "grad_norm": 0.98828125, "learning_rate": 0.001534996263700799, "loss": 0.2361, "step": 33250 }, { "epoch": 0.058958544440986936, "grad_norm": 1.546875, "learning_rate": 0.0015349441578220472, "loss": 0.271, "step": 33252 }, { "epoch": 0.05896209060629675, "grad_norm": 0.6015625, "learning_rate": 0.0015348920500411023, "loss": 0.2313, "step": 33254 }, { "epoch": 0.058965636771606565, "grad_norm": 0.28515625, "learning_rate": 0.0015348399403581918, "loss": 0.1195, "step": 33256 }, { "epoch": 0.05896918293691638, "grad_norm": 0.8046875, "learning_rate": 0.0015347878287735445, "loss": 0.1331, "step": 33258 }, { "epoch": 0.058972729102226194, "grad_norm": 0.73046875, "learning_rate": 0.0015347357152873872, "loss": 0.1696, "step": 33260 }, { "epoch": 0.05897627526753601, "grad_norm": 1.953125, "learning_rate": 0.001534683599899949, "loss": 0.2244, "step": 33262 }, { "epoch": 0.05897982143284582, "grad_norm": 0.9375, "learning_rate": 0.001534631482611457, "loss": 0.1743, "step": 33264 }, { "epoch": 0.05898336759815564, "grad_norm": 1.0390625, "learning_rate": 0.001534579363422139, "loss": 0.2006, "step": 33266 }, { "epoch": 0.05898691376346545, "grad_norm": 0.7109375, "learning_rate": 0.0015345272423322237, "loss": 0.1934, "step": 33268 }, { "epoch": 0.05899045992877527, "grad_norm": 1.265625, "learning_rate": 0.0015344751193419387, "loss": 0.2028, "step": 33270 }, { "epoch": 0.05899400609408508, "grad_norm": 1.5234375, "learning_rate": 0.0015344229944515117, "loss": 0.4314, "step": 33272 }, { "epoch": 0.0589975522593949, "grad_norm": 0.4609375, "learning_rate": 0.001534370867661171, "loss": 0.1678, "step": 33274 }, { "epoch": 0.05900109842470472, "grad_norm": 0.263671875, "learning_rate": 0.0015343187389711445, "loss": 0.2136, "step": 33276 }, { "epoch": 0.05900464459001453, "grad_norm": 1.21875, "learning_rate": 0.0015342666083816598, "loss": 0.2308, "step": 33278 }, { "epoch": 0.05900819075532435, "grad_norm": 0.40625, "learning_rate": 0.0015342144758929452, "loss": 0.2164, "step": 33280 }, { "epoch": 0.05901173692063416, "grad_norm": 0.494140625, "learning_rate": 0.001534162341505229, "loss": 0.1469, "step": 33282 }, { "epoch": 0.059015283085943976, "grad_norm": 0.46484375, "learning_rate": 0.0015341102052187387, "loss": 0.2288, "step": 33284 }, { "epoch": 0.05901882925125379, "grad_norm": 0.427734375, "learning_rate": 0.001534058067033703, "loss": 0.157, "step": 33286 }, { "epoch": 0.059022375416563605, "grad_norm": 0.51953125, "learning_rate": 0.0015340059269503491, "loss": 0.2001, "step": 33288 }, { "epoch": 0.05902592158187342, "grad_norm": 4.1875, "learning_rate": 0.0015339537849689056, "loss": 0.2982, "step": 33290 }, { "epoch": 0.059029467747183234, "grad_norm": 0.953125, "learning_rate": 0.0015339016410895999, "loss": 0.2016, "step": 33292 }, { "epoch": 0.05903301391249305, "grad_norm": 0.6484375, "learning_rate": 0.0015338494953126608, "loss": 0.2555, "step": 33294 }, { "epoch": 0.05903656007780287, "grad_norm": 2.46875, "learning_rate": 0.0015337973476383156, "loss": 0.3006, "step": 33296 }, { "epoch": 0.059040106243112685, "grad_norm": 0.32421875, "learning_rate": 0.001533745198066793, "loss": 0.4011, "step": 33298 }, { "epoch": 0.0590436524084225, "grad_norm": 0.74609375, "learning_rate": 0.0015336930465983207, "loss": 0.2422, "step": 33300 }, { "epoch": 0.059047198573732314, "grad_norm": 0.73828125, "learning_rate": 0.0015336408932331274, "loss": 0.2428, "step": 33302 }, { "epoch": 0.05905074473904213, "grad_norm": 0.8046875, "learning_rate": 0.0015335887379714403, "loss": 0.1995, "step": 33304 }, { "epoch": 0.05905429090435194, "grad_norm": 0.98828125, "learning_rate": 0.0015335365808134883, "loss": 0.2213, "step": 33306 }, { "epoch": 0.05905783706966176, "grad_norm": 0.287109375, "learning_rate": 0.0015334844217594987, "loss": 0.1847, "step": 33308 }, { "epoch": 0.05906138323497157, "grad_norm": 1.0234375, "learning_rate": 0.0015334322608097004, "loss": 0.2486, "step": 33310 }, { "epoch": 0.05906492940028139, "grad_norm": 0.6171875, "learning_rate": 0.0015333800979643209, "loss": 0.1591, "step": 33312 }, { "epoch": 0.0590684755655912, "grad_norm": 0.8359375, "learning_rate": 0.0015333279332235888, "loss": 0.2214, "step": 33314 }, { "epoch": 0.059072021730901016, "grad_norm": 0.52734375, "learning_rate": 0.0015332757665877316, "loss": 0.1995, "step": 33316 }, { "epoch": 0.05907556789621084, "grad_norm": 0.296875, "learning_rate": 0.0015332235980569785, "loss": 0.2184, "step": 33318 }, { "epoch": 0.05907911406152065, "grad_norm": 1.5234375, "learning_rate": 0.0015331714276315563, "loss": 0.1999, "step": 33320 }, { "epoch": 0.059082660226830466, "grad_norm": 0.58203125, "learning_rate": 0.0015331192553116944, "loss": 0.232, "step": 33322 }, { "epoch": 0.05908620639214028, "grad_norm": 0.50390625, "learning_rate": 0.00153306708109762, "loss": 0.4622, "step": 33324 }, { "epoch": 0.059089752557450095, "grad_norm": 0.5234375, "learning_rate": 0.0015330149049895623, "loss": 0.2346, "step": 33326 }, { "epoch": 0.05909329872275991, "grad_norm": 0.298828125, "learning_rate": 0.0015329627269877488, "loss": 0.1224, "step": 33328 }, { "epoch": 0.059096844888069724, "grad_norm": 0.64453125, "learning_rate": 0.0015329105470924078, "loss": 0.1727, "step": 33330 }, { "epoch": 0.05910039105337954, "grad_norm": 0.33984375, "learning_rate": 0.0015328583653037675, "loss": 0.1426, "step": 33332 }, { "epoch": 0.059103937218689354, "grad_norm": 0.291015625, "learning_rate": 0.001532806181622056, "loss": 0.1845, "step": 33334 }, { "epoch": 0.05910748338399917, "grad_norm": 0.53515625, "learning_rate": 0.0015327539960475018, "loss": 0.231, "step": 33336 }, { "epoch": 0.05911102954930898, "grad_norm": 1.5859375, "learning_rate": 0.0015327018085803327, "loss": 0.2401, "step": 33338 }, { "epoch": 0.0591145757146188, "grad_norm": 0.6171875, "learning_rate": 0.001532649619220778, "loss": 0.1821, "step": 33340 }, { "epoch": 0.05911812187992862, "grad_norm": 7.96875, "learning_rate": 0.0015325974279690644, "loss": 0.1784, "step": 33342 }, { "epoch": 0.05912166804523843, "grad_norm": 0.326171875, "learning_rate": 0.0015325452348254214, "loss": 0.213, "step": 33344 }, { "epoch": 0.05912521421054825, "grad_norm": 0.322265625, "learning_rate": 0.0015324930397900766, "loss": 0.168, "step": 33346 }, { "epoch": 0.05912876037585806, "grad_norm": 0.59765625, "learning_rate": 0.0015324408428632585, "loss": 0.1221, "step": 33348 }, { "epoch": 0.05913230654116788, "grad_norm": 0.43359375, "learning_rate": 0.0015323886440451952, "loss": 0.1821, "step": 33350 }, { "epoch": 0.05913585270647769, "grad_norm": 0.2333984375, "learning_rate": 0.0015323364433361155, "loss": 0.1428, "step": 33352 }, { "epoch": 0.059139398871787506, "grad_norm": 0.89453125, "learning_rate": 0.001532284240736247, "loss": 0.3291, "step": 33354 }, { "epoch": 0.05914294503709732, "grad_norm": 0.2734375, "learning_rate": 0.0015322320362458183, "loss": 0.1918, "step": 33356 }, { "epoch": 0.059146491202407135, "grad_norm": 0.349609375, "learning_rate": 0.001532179829865058, "loss": 0.1663, "step": 33358 }, { "epoch": 0.05915003736771695, "grad_norm": 0.427734375, "learning_rate": 0.0015321276215941942, "loss": 0.2191, "step": 33360 }, { "epoch": 0.059153583533026764, "grad_norm": 0.3359375, "learning_rate": 0.001532075411433455, "loss": 0.1872, "step": 33362 }, { "epoch": 0.059157129698336586, "grad_norm": 1.0234375, "learning_rate": 0.0015320231993830688, "loss": 0.1627, "step": 33364 }, { "epoch": 0.0591606758636464, "grad_norm": 0.2392578125, "learning_rate": 0.0015319709854432644, "loss": 0.1638, "step": 33366 }, { "epoch": 0.059164222028956215, "grad_norm": 0.8671875, "learning_rate": 0.0015319187696142698, "loss": 0.125, "step": 33368 }, { "epoch": 0.05916776819426603, "grad_norm": 0.90625, "learning_rate": 0.0015318665518963132, "loss": 0.2462, "step": 33370 }, { "epoch": 0.059171314359575844, "grad_norm": 0.57421875, "learning_rate": 0.001531814332289623, "loss": 0.231, "step": 33372 }, { "epoch": 0.05917486052488566, "grad_norm": 0.451171875, "learning_rate": 0.001531762110794428, "loss": 0.1579, "step": 33374 }, { "epoch": 0.05917840669019547, "grad_norm": 0.671875, "learning_rate": 0.0015317098874109566, "loss": 0.1945, "step": 33376 }, { "epoch": 0.05918195285550529, "grad_norm": 0.30078125, "learning_rate": 0.0015316576621394362, "loss": 0.1435, "step": 33378 }, { "epoch": 0.0591854990208151, "grad_norm": 0.3671875, "learning_rate": 0.0015316054349800963, "loss": 0.1812, "step": 33380 }, { "epoch": 0.05918904518612492, "grad_norm": 0.5, "learning_rate": 0.0015315532059331652, "loss": 0.2136, "step": 33382 }, { "epoch": 0.05919259135143473, "grad_norm": 0.2041015625, "learning_rate": 0.0015315009749988708, "loss": 0.1933, "step": 33384 }, { "epoch": 0.05919613751674455, "grad_norm": 0.357421875, "learning_rate": 0.0015314487421774416, "loss": 0.2558, "step": 33386 }, { "epoch": 0.05919968368205437, "grad_norm": 0.369140625, "learning_rate": 0.0015313965074691064, "loss": 0.186, "step": 33388 }, { "epoch": 0.05920322984736418, "grad_norm": 0.83203125, "learning_rate": 0.0015313442708740934, "loss": 0.2182, "step": 33390 }, { "epoch": 0.059206776012673996, "grad_norm": 1.0078125, "learning_rate": 0.0015312920323926314, "loss": 0.1513, "step": 33392 }, { "epoch": 0.05921032217798381, "grad_norm": 0.443359375, "learning_rate": 0.0015312397920249484, "loss": 0.2523, "step": 33394 }, { "epoch": 0.059213868343293626, "grad_norm": 0.5078125, "learning_rate": 0.0015311875497712728, "loss": 0.2306, "step": 33396 }, { "epoch": 0.05921741450860344, "grad_norm": 0.5078125, "learning_rate": 0.0015311353056318333, "loss": 0.2207, "step": 33398 }, { "epoch": 0.059220960673913255, "grad_norm": 0.322265625, "learning_rate": 0.0015310830596068589, "loss": 0.1792, "step": 33400 }, { "epoch": 0.05922450683922307, "grad_norm": 0.341796875, "learning_rate": 0.0015310308116965774, "loss": 0.1667, "step": 33402 }, { "epoch": 0.059228053004532884, "grad_norm": 0.5078125, "learning_rate": 0.0015309785619012175, "loss": 0.5121, "step": 33404 }, { "epoch": 0.0592315991698427, "grad_norm": 0.396484375, "learning_rate": 0.0015309263102210075, "loss": 0.1846, "step": 33406 }, { "epoch": 0.05923514533515251, "grad_norm": 0.64453125, "learning_rate": 0.0015308740566561763, "loss": 0.2024, "step": 33408 }, { "epoch": 0.059238691500462334, "grad_norm": 0.58203125, "learning_rate": 0.001530821801206952, "loss": 0.228, "step": 33410 }, { "epoch": 0.05924223766577215, "grad_norm": 0.380859375, "learning_rate": 0.0015307695438735638, "loss": 0.1741, "step": 33412 }, { "epoch": 0.05924578383108196, "grad_norm": 1.2890625, "learning_rate": 0.0015307172846562396, "loss": 0.1821, "step": 33414 }, { "epoch": 0.05924932999639178, "grad_norm": 0.314453125, "learning_rate": 0.001530665023555208, "loss": 0.1917, "step": 33416 }, { "epoch": 0.05925287616170159, "grad_norm": 0.60546875, "learning_rate": 0.001530612760570698, "loss": 0.1541, "step": 33418 }, { "epoch": 0.05925642232701141, "grad_norm": 0.5390625, "learning_rate": 0.001530560495702938, "loss": 0.1883, "step": 33420 }, { "epoch": 0.05925996849232122, "grad_norm": 0.56640625, "learning_rate": 0.0015305082289521566, "loss": 0.1816, "step": 33422 }, { "epoch": 0.059263514657631036, "grad_norm": 0.369140625, "learning_rate": 0.0015304559603185818, "loss": 0.2245, "step": 33424 }, { "epoch": 0.05926706082294085, "grad_norm": 1.390625, "learning_rate": 0.001530403689802443, "loss": 0.2308, "step": 33426 }, { "epoch": 0.059270606988250665, "grad_norm": 0.80859375, "learning_rate": 0.0015303514174039683, "loss": 0.3468, "step": 33428 }, { "epoch": 0.05927415315356048, "grad_norm": 0.71875, "learning_rate": 0.0015302991431233868, "loss": 0.1712, "step": 33430 }, { "epoch": 0.0592776993188703, "grad_norm": 1.8671875, "learning_rate": 0.0015302468669609263, "loss": 0.2805, "step": 33432 }, { "epoch": 0.059281245484180116, "grad_norm": 1.1015625, "learning_rate": 0.0015301945889168165, "loss": 0.2135, "step": 33434 }, { "epoch": 0.05928479164948993, "grad_norm": 0.37109375, "learning_rate": 0.001530142308991285, "loss": 0.1621, "step": 33436 }, { "epoch": 0.059288337814799745, "grad_norm": 0.95703125, "learning_rate": 0.001530090027184561, "loss": 0.2674, "step": 33438 }, { "epoch": 0.05929188398010956, "grad_norm": 0.30078125, "learning_rate": 0.0015300377434968734, "loss": 0.1662, "step": 33440 }, { "epoch": 0.059295430145419374, "grad_norm": 0.43359375, "learning_rate": 0.0015299854579284501, "loss": 0.1861, "step": 33442 }, { "epoch": 0.05929897631072919, "grad_norm": 0.33984375, "learning_rate": 0.00152993317047952, "loss": 0.1812, "step": 33444 }, { "epoch": 0.059302522476039, "grad_norm": 0.33203125, "learning_rate": 0.0015298808811503124, "loss": 0.1583, "step": 33446 }, { "epoch": 0.05930606864134882, "grad_norm": 0.4296875, "learning_rate": 0.0015298285899410554, "loss": 0.235, "step": 33448 }, { "epoch": 0.05930961480665863, "grad_norm": 0.88671875, "learning_rate": 0.0015297762968519777, "loss": 0.2343, "step": 33450 }, { "epoch": 0.05931316097196845, "grad_norm": 0.150390625, "learning_rate": 0.0015297240018833083, "loss": 0.1557, "step": 33452 }, { "epoch": 0.05931670713727827, "grad_norm": 0.3359375, "learning_rate": 0.0015296717050352758, "loss": 0.222, "step": 33454 }, { "epoch": 0.05932025330258808, "grad_norm": 0.52734375, "learning_rate": 0.0015296194063081085, "loss": 0.1703, "step": 33456 }, { "epoch": 0.0593237994678979, "grad_norm": 0.79296875, "learning_rate": 0.0015295671057020358, "loss": 0.2106, "step": 33458 }, { "epoch": 0.05932734563320771, "grad_norm": 0.431640625, "learning_rate": 0.001529514803217286, "loss": 0.1945, "step": 33460 }, { "epoch": 0.05933089179851753, "grad_norm": 1.015625, "learning_rate": 0.0015294624988540877, "loss": 0.1778, "step": 33462 }, { "epoch": 0.05933443796382734, "grad_norm": 0.279296875, "learning_rate": 0.00152941019261267, "loss": 0.1273, "step": 33464 }, { "epoch": 0.059337984129137156, "grad_norm": 0.73828125, "learning_rate": 0.0015293578844932617, "loss": 0.1659, "step": 33466 }, { "epoch": 0.05934153029444697, "grad_norm": 0.78515625, "learning_rate": 0.0015293055744960912, "loss": 0.2101, "step": 33468 }, { "epoch": 0.059345076459756785, "grad_norm": 0.81640625, "learning_rate": 0.0015292532626213877, "loss": 0.2368, "step": 33470 }, { "epoch": 0.0593486226250666, "grad_norm": 1.34375, "learning_rate": 0.0015292009488693796, "loss": 0.2071, "step": 33472 }, { "epoch": 0.059352168790376414, "grad_norm": 0.8359375, "learning_rate": 0.0015291486332402963, "loss": 0.2293, "step": 33474 }, { "epoch": 0.05935571495568623, "grad_norm": 0.53515625, "learning_rate": 0.0015290963157343655, "loss": 0.2014, "step": 33476 }, { "epoch": 0.05935926112099605, "grad_norm": 0.6328125, "learning_rate": 0.0015290439963518171, "loss": 0.1763, "step": 33478 }, { "epoch": 0.059362807286305865, "grad_norm": 0.51171875, "learning_rate": 0.0015289916750928793, "loss": 0.1559, "step": 33480 }, { "epoch": 0.05936635345161568, "grad_norm": 0.71875, "learning_rate": 0.0015289393519577813, "loss": 0.2062, "step": 33482 }, { "epoch": 0.059369899616925494, "grad_norm": 0.42578125, "learning_rate": 0.0015288870269467515, "loss": 0.2502, "step": 33484 }, { "epoch": 0.05937344578223531, "grad_norm": 0.8359375, "learning_rate": 0.001528834700060019, "loss": 0.1516, "step": 33486 }, { "epoch": 0.05937699194754512, "grad_norm": 0.69140625, "learning_rate": 0.0015287823712978126, "loss": 0.206, "step": 33488 }, { "epoch": 0.05938053811285494, "grad_norm": 0.55078125, "learning_rate": 0.0015287300406603614, "loss": 0.1283, "step": 33490 }, { "epoch": 0.05938408427816475, "grad_norm": 0.296875, "learning_rate": 0.0015286777081478938, "loss": 0.1221, "step": 33492 }, { "epoch": 0.059387630443474566, "grad_norm": 0.7734375, "learning_rate": 0.001528625373760639, "loss": 0.1828, "step": 33494 }, { "epoch": 0.05939117660878438, "grad_norm": 1.4453125, "learning_rate": 0.001528573037498826, "loss": 0.2133, "step": 33496 }, { "epoch": 0.059394722774094195, "grad_norm": 0.2265625, "learning_rate": 0.0015285206993626832, "loss": 0.2185, "step": 33498 }, { "epoch": 0.05939826893940402, "grad_norm": 0.56640625, "learning_rate": 0.0015284683593524398, "loss": 0.2415, "step": 33500 }, { "epoch": 0.05940181510471383, "grad_norm": 0.2353515625, "learning_rate": 0.0015284160174683246, "loss": 0.2333, "step": 33502 }, { "epoch": 0.059405361270023646, "grad_norm": 1.0, "learning_rate": 0.0015283636737105667, "loss": 0.1883, "step": 33504 }, { "epoch": 0.05940890743533346, "grad_norm": 0.306640625, "learning_rate": 0.001528311328079395, "loss": 0.2563, "step": 33506 }, { "epoch": 0.059412453600643275, "grad_norm": 0.37109375, "learning_rate": 0.0015282589805750383, "loss": 0.1517, "step": 33508 }, { "epoch": 0.05941599976595309, "grad_norm": 0.7890625, "learning_rate": 0.0015282066311977254, "loss": 0.199, "step": 33510 }, { "epoch": 0.059419545931262904, "grad_norm": 0.63671875, "learning_rate": 0.0015281542799476858, "loss": 0.2212, "step": 33512 }, { "epoch": 0.05942309209657272, "grad_norm": 0.54296875, "learning_rate": 0.001528101926825148, "loss": 0.2061, "step": 33514 }, { "epoch": 0.05942663826188253, "grad_norm": 0.279296875, "learning_rate": 0.001528049571830341, "loss": 0.2251, "step": 33516 }, { "epoch": 0.05943018442719235, "grad_norm": 0.78125, "learning_rate": 0.0015279972149634935, "loss": 0.2116, "step": 33518 }, { "epoch": 0.05943373059250216, "grad_norm": 1.1171875, "learning_rate": 0.001527944856224835, "loss": 0.2418, "step": 33520 }, { "epoch": 0.059437276757811984, "grad_norm": 0.4140625, "learning_rate": 0.0015278924956145944, "loss": 0.2169, "step": 33522 }, { "epoch": 0.0594408229231218, "grad_norm": 0.423828125, "learning_rate": 0.0015278401331330002, "loss": 0.1614, "step": 33524 }, { "epoch": 0.05944436908843161, "grad_norm": 0.4140625, "learning_rate": 0.0015277877687802821, "loss": 0.2338, "step": 33526 }, { "epoch": 0.05944791525374143, "grad_norm": 0.65625, "learning_rate": 0.0015277354025566687, "loss": 0.2011, "step": 33528 }, { "epoch": 0.05945146141905124, "grad_norm": 0.4375, "learning_rate": 0.001527683034462389, "loss": 0.2025, "step": 33530 }, { "epoch": 0.05945500758436106, "grad_norm": 0.65234375, "learning_rate": 0.0015276306644976725, "loss": 0.1717, "step": 33532 }, { "epoch": 0.05945855374967087, "grad_norm": 0.8046875, "learning_rate": 0.0015275782926627477, "loss": 0.2071, "step": 33534 }, { "epoch": 0.059462099914980686, "grad_norm": 0.85546875, "learning_rate": 0.001527525918957844, "loss": 0.2889, "step": 33536 }, { "epoch": 0.0594656460802905, "grad_norm": 0.71875, "learning_rate": 0.0015274735433831897, "loss": 0.1874, "step": 33538 }, { "epoch": 0.059469192245600315, "grad_norm": 0.734375, "learning_rate": 0.0015274211659390149, "loss": 0.2549, "step": 33540 }, { "epoch": 0.05947273841091013, "grad_norm": 0.65234375, "learning_rate": 0.001527368786625548, "loss": 0.2318, "step": 33542 }, { "epoch": 0.059476284576219944, "grad_norm": 4.5, "learning_rate": 0.0015273164054430185, "loss": 0.4045, "step": 33544 }, { "epoch": 0.059479830741529766, "grad_norm": 0.9609375, "learning_rate": 0.001527264022391655, "loss": 0.2492, "step": 33546 }, { "epoch": 0.05948337690683958, "grad_norm": 2.265625, "learning_rate": 0.0015272116374716869, "loss": 0.3044, "step": 33548 }, { "epoch": 0.059486923072149395, "grad_norm": 0.3515625, "learning_rate": 0.0015271592506833434, "loss": 0.2788, "step": 33550 }, { "epoch": 0.05949046923745921, "grad_norm": 0.373046875, "learning_rate": 0.0015271068620268534, "loss": 0.17, "step": 33552 }, { "epoch": 0.059494015402769024, "grad_norm": 0.71875, "learning_rate": 0.001527054471502446, "loss": 0.1643, "step": 33554 }, { "epoch": 0.05949756156807884, "grad_norm": 0.373046875, "learning_rate": 0.0015270020791103502, "loss": 0.1782, "step": 33556 }, { "epoch": 0.05950110773338865, "grad_norm": 1.8984375, "learning_rate": 0.0015269496848507955, "loss": 0.3836, "step": 33558 }, { "epoch": 0.05950465389869847, "grad_norm": 0.185546875, "learning_rate": 0.0015268972887240112, "loss": 0.1612, "step": 33560 }, { "epoch": 0.05950820006400828, "grad_norm": 0.42578125, "learning_rate": 0.0015268448907302258, "loss": 0.1282, "step": 33562 }, { "epoch": 0.0595117462293181, "grad_norm": 0.2412109375, "learning_rate": 0.0015267924908696688, "loss": 0.2006, "step": 33564 }, { "epoch": 0.05951529239462791, "grad_norm": 0.236328125, "learning_rate": 0.0015267400891425694, "loss": 0.1791, "step": 33566 }, { "epoch": 0.05951883855993773, "grad_norm": 0.1962890625, "learning_rate": 0.0015266876855491568, "loss": 0.2274, "step": 33568 }, { "epoch": 0.05952238472524755, "grad_norm": 0.85546875, "learning_rate": 0.0015266352800896602, "loss": 0.207, "step": 33570 }, { "epoch": 0.05952593089055736, "grad_norm": 0.419921875, "learning_rate": 0.0015265828727643086, "loss": 0.1516, "step": 33572 }, { "epoch": 0.059529477055867176, "grad_norm": 1.125, "learning_rate": 0.001526530463573331, "loss": 0.2169, "step": 33574 }, { "epoch": 0.05953302322117699, "grad_norm": 5.4375, "learning_rate": 0.0015264780525169575, "loss": 0.2752, "step": 33576 }, { "epoch": 0.059536569386486805, "grad_norm": 1.3125, "learning_rate": 0.0015264256395954162, "loss": 0.2099, "step": 33578 }, { "epoch": 0.05954011555179662, "grad_norm": 0.322265625, "learning_rate": 0.001526373224808937, "loss": 0.14, "step": 33580 }, { "epoch": 0.059543661717106434, "grad_norm": 0.9921875, "learning_rate": 0.0015263208081577491, "loss": 0.1949, "step": 33582 }, { "epoch": 0.05954720788241625, "grad_norm": 0.40234375, "learning_rate": 0.0015262683896420815, "loss": 0.184, "step": 33584 }, { "epoch": 0.059550754047726064, "grad_norm": 1.140625, "learning_rate": 0.0015262159692621636, "loss": 0.1652, "step": 33586 }, { "epoch": 0.05955430021303588, "grad_norm": 0.2060546875, "learning_rate": 0.0015261635470182249, "loss": 0.1458, "step": 33588 }, { "epoch": 0.0595578463783457, "grad_norm": 0.169921875, "learning_rate": 0.001526111122910494, "loss": 0.3392, "step": 33590 }, { "epoch": 0.059561392543655514, "grad_norm": 0.2890625, "learning_rate": 0.0015260586969392004, "loss": 0.1825, "step": 33592 }, { "epoch": 0.05956493870896533, "grad_norm": 0.16015625, "learning_rate": 0.0015260062691045742, "loss": 0.4837, "step": 33594 }, { "epoch": 0.05956848487427514, "grad_norm": 1.8359375, "learning_rate": 0.0015259538394068435, "loss": 0.1416, "step": 33596 }, { "epoch": 0.05957203103958496, "grad_norm": 1.0703125, "learning_rate": 0.0015259014078462384, "loss": 0.2399, "step": 33598 }, { "epoch": 0.05957557720489477, "grad_norm": 1.0390625, "learning_rate": 0.0015258489744229878, "loss": 0.2929, "step": 33600 }, { "epoch": 0.05957912337020459, "grad_norm": 0.71484375, "learning_rate": 0.0015257965391373214, "loss": 0.251, "step": 33602 }, { "epoch": 0.0595826695355144, "grad_norm": 9.1875, "learning_rate": 0.0015257441019894678, "loss": 0.2291, "step": 33604 }, { "epoch": 0.059586215700824216, "grad_norm": 0.64453125, "learning_rate": 0.001525691662979657, "loss": 0.2211, "step": 33606 }, { "epoch": 0.05958976186613403, "grad_norm": 0.333984375, "learning_rate": 0.0015256392221081186, "loss": 0.1361, "step": 33608 }, { "epoch": 0.059593308031443845, "grad_norm": 0.37890625, "learning_rate": 0.001525586779375081, "loss": 0.1629, "step": 33610 }, { "epoch": 0.05959685419675366, "grad_norm": 1.140625, "learning_rate": 0.0015255343347807741, "loss": 0.2476, "step": 33612 }, { "epoch": 0.05960040036206348, "grad_norm": 1.53125, "learning_rate": 0.0015254818883254273, "loss": 0.3141, "step": 33614 }, { "epoch": 0.059603946527373296, "grad_norm": 1.328125, "learning_rate": 0.0015254294400092697, "loss": 0.2151, "step": 33616 }, { "epoch": 0.05960749269268311, "grad_norm": 0.33984375, "learning_rate": 0.001525376989832531, "loss": 0.2072, "step": 33618 }, { "epoch": 0.059611038857992925, "grad_norm": 1.1171875, "learning_rate": 0.0015253245377954401, "loss": 0.1455, "step": 33620 }, { "epoch": 0.05961458502330274, "grad_norm": 1.5625, "learning_rate": 0.0015252720838982272, "loss": 0.2009, "step": 33622 }, { "epoch": 0.059618131188612554, "grad_norm": 2.5, "learning_rate": 0.0015252196281411211, "loss": 0.1814, "step": 33624 }, { "epoch": 0.05962167735392237, "grad_norm": 1.1484375, "learning_rate": 0.0015251671705243514, "loss": 0.1753, "step": 33626 }, { "epoch": 0.05962522351923218, "grad_norm": 0.466796875, "learning_rate": 0.0015251147110481472, "loss": 0.19, "step": 33628 }, { "epoch": 0.059628769684542, "grad_norm": 0.87890625, "learning_rate": 0.0015250622497127388, "loss": 0.2177, "step": 33630 }, { "epoch": 0.05963231584985181, "grad_norm": 0.45703125, "learning_rate": 0.0015250097865183543, "loss": 0.1812, "step": 33632 }, { "epoch": 0.05963586201516163, "grad_norm": 1.328125, "learning_rate": 0.0015249573214652242, "loss": 0.2584, "step": 33634 }, { "epoch": 0.05963940818047145, "grad_norm": 0.390625, "learning_rate": 0.0015249048545535773, "loss": 0.212, "step": 33636 }, { "epoch": 0.05964295434578126, "grad_norm": 0.474609375, "learning_rate": 0.001524852385783644, "loss": 0.1502, "step": 33638 }, { "epoch": 0.05964650051109108, "grad_norm": 0.427734375, "learning_rate": 0.0015247999151556525, "loss": 0.1998, "step": 33640 }, { "epoch": 0.05965004667640089, "grad_norm": 1.3203125, "learning_rate": 0.0015247474426698334, "loss": 0.1832, "step": 33642 }, { "epoch": 0.059653592841710706, "grad_norm": 0.45703125, "learning_rate": 0.0015246949683264152, "loss": 0.1699, "step": 33644 }, { "epoch": 0.05965713900702052, "grad_norm": 0.48828125, "learning_rate": 0.0015246424921256283, "loss": 0.2387, "step": 33646 }, { "epoch": 0.059660685172330336, "grad_norm": 0.34765625, "learning_rate": 0.0015245900140677018, "loss": 0.127, "step": 33648 }, { "epoch": 0.05966423133764015, "grad_norm": 0.419921875, "learning_rate": 0.001524537534152865, "loss": 0.1548, "step": 33650 }, { "epoch": 0.059667777502949965, "grad_norm": 7.625, "learning_rate": 0.0015244850523813479, "loss": 0.2532, "step": 33652 }, { "epoch": 0.05967132366825978, "grad_norm": 0.55859375, "learning_rate": 0.0015244325687533795, "loss": 0.1848, "step": 33654 }, { "epoch": 0.059674869833569594, "grad_norm": 0.458984375, "learning_rate": 0.0015243800832691896, "loss": 0.2269, "step": 33656 }, { "epoch": 0.059678415998879415, "grad_norm": 0.26953125, "learning_rate": 0.001524327595929008, "loss": 0.2199, "step": 33658 }, { "epoch": 0.05968196216418923, "grad_norm": 0.671875, "learning_rate": 0.0015242751067330639, "loss": 0.1933, "step": 33660 }, { "epoch": 0.059685508329499044, "grad_norm": 0.421875, "learning_rate": 0.0015242226156815869, "loss": 0.1854, "step": 33662 }, { "epoch": 0.05968905449480886, "grad_norm": 0.94921875, "learning_rate": 0.0015241701227748064, "loss": 0.2249, "step": 33664 }, { "epoch": 0.05969260066011867, "grad_norm": 0.3203125, "learning_rate": 0.0015241176280129522, "loss": 0.3148, "step": 33666 }, { "epoch": 0.05969614682542849, "grad_norm": 0.455078125, "learning_rate": 0.001524065131396254, "loss": 0.2415, "step": 33668 }, { "epoch": 0.0596996929907383, "grad_norm": 0.65234375, "learning_rate": 0.001524012632924941, "loss": 0.1857, "step": 33670 }, { "epoch": 0.05970323915604812, "grad_norm": 0.2197265625, "learning_rate": 0.0015239601325992434, "loss": 0.2308, "step": 33672 }, { "epoch": 0.05970678532135793, "grad_norm": 1.203125, "learning_rate": 0.00152390763041939, "loss": 0.2336, "step": 33674 }, { "epoch": 0.059710331486667746, "grad_norm": 0.3515625, "learning_rate": 0.0015238551263856111, "loss": 0.2008, "step": 33676 }, { "epoch": 0.05971387765197756, "grad_norm": 0.69921875, "learning_rate": 0.001523802620498136, "loss": 0.1501, "step": 33678 }, { "epoch": 0.059717423817287375, "grad_norm": 1.515625, "learning_rate": 0.0015237501127571946, "loss": 0.1744, "step": 33680 }, { "epoch": 0.0597209699825972, "grad_norm": 0.5703125, "learning_rate": 0.0015236976031630166, "loss": 0.168, "step": 33682 }, { "epoch": 0.05972451614790701, "grad_norm": 0.478515625, "learning_rate": 0.0015236450917158308, "loss": 0.2884, "step": 33684 }, { "epoch": 0.059728062313216826, "grad_norm": 0.443359375, "learning_rate": 0.001523592578415868, "loss": 0.4404, "step": 33686 }, { "epoch": 0.05973160847852664, "grad_norm": 0.412109375, "learning_rate": 0.0015235400632633569, "loss": 0.2767, "step": 33688 }, { "epoch": 0.059735154643836455, "grad_norm": 0.478515625, "learning_rate": 0.0015234875462585276, "loss": 0.1229, "step": 33690 }, { "epoch": 0.05973870080914627, "grad_norm": 0.494140625, "learning_rate": 0.00152343502740161, "loss": 0.1463, "step": 33692 }, { "epoch": 0.059742246974456084, "grad_norm": 0.2578125, "learning_rate": 0.0015233825066928333, "loss": 0.1804, "step": 33694 }, { "epoch": 0.0597457931397659, "grad_norm": 0.345703125, "learning_rate": 0.0015233299841324278, "loss": 0.1556, "step": 33696 }, { "epoch": 0.05974933930507571, "grad_norm": 0.68359375, "learning_rate": 0.0015232774597206224, "loss": 0.2662, "step": 33698 }, { "epoch": 0.05975288547038553, "grad_norm": 0.37890625, "learning_rate": 0.0015232249334576477, "loss": 0.1838, "step": 33700 }, { "epoch": 0.05975643163569534, "grad_norm": 0.82421875, "learning_rate": 0.001523172405343733, "loss": 0.2632, "step": 33702 }, { "epoch": 0.059759977801005164, "grad_norm": 0.31640625, "learning_rate": 0.0015231198753791076, "loss": 0.1495, "step": 33704 }, { "epoch": 0.05976352396631498, "grad_norm": 0.7890625, "learning_rate": 0.0015230673435640016, "loss": 0.1744, "step": 33706 }, { "epoch": 0.05976707013162479, "grad_norm": 1.3515625, "learning_rate": 0.001523014809898645, "loss": 0.3071, "step": 33708 }, { "epoch": 0.05977061629693461, "grad_norm": 0.234375, "learning_rate": 0.001522962274383267, "loss": 0.1563, "step": 33710 }, { "epoch": 0.05977416246224442, "grad_norm": 1.0703125, "learning_rate": 0.0015229097370180985, "loss": 0.2597, "step": 33712 }, { "epoch": 0.05977770862755424, "grad_norm": 0.3671875, "learning_rate": 0.001522857197803368, "loss": 0.1859, "step": 33714 }, { "epoch": 0.05978125479286405, "grad_norm": 0.6953125, "learning_rate": 0.0015228046567393057, "loss": 0.2501, "step": 33716 }, { "epoch": 0.059784800958173866, "grad_norm": 1.2734375, "learning_rate": 0.0015227521138261415, "loss": 0.2428, "step": 33718 }, { "epoch": 0.05978834712348368, "grad_norm": 1.078125, "learning_rate": 0.001522699569064105, "loss": 0.2191, "step": 33720 }, { "epoch": 0.059791893288793495, "grad_norm": 0.59765625, "learning_rate": 0.001522647022453426, "loss": 0.2193, "step": 33722 }, { "epoch": 0.05979543945410331, "grad_norm": 0.353515625, "learning_rate": 0.0015225944739943348, "loss": 0.1806, "step": 33724 }, { "epoch": 0.059798985619413124, "grad_norm": 0.2255859375, "learning_rate": 0.0015225419236870605, "loss": 0.1832, "step": 33726 }, { "epoch": 0.059802531784722945, "grad_norm": 0.2177734375, "learning_rate": 0.0015224893715318339, "loss": 0.1792, "step": 33728 }, { "epoch": 0.05980607795003276, "grad_norm": 2.046875, "learning_rate": 0.0015224368175288834, "loss": 0.3896, "step": 33730 }, { "epoch": 0.059809624115342575, "grad_norm": 0.84765625, "learning_rate": 0.00152238426167844, "loss": 0.1639, "step": 33732 }, { "epoch": 0.05981317028065239, "grad_norm": 0.392578125, "learning_rate": 0.0015223317039807331, "loss": 0.1571, "step": 33734 }, { "epoch": 0.059816716445962204, "grad_norm": 0.51171875, "learning_rate": 0.001522279144435993, "loss": 0.2218, "step": 33736 }, { "epoch": 0.05982026261127202, "grad_norm": 1.515625, "learning_rate": 0.0015222265830444489, "loss": 0.246, "step": 33738 }, { "epoch": 0.05982380877658183, "grad_norm": 0.51171875, "learning_rate": 0.0015221740198063309, "loss": 0.161, "step": 33740 }, { "epoch": 0.05982735494189165, "grad_norm": 1.171875, "learning_rate": 0.0015221214547218689, "loss": 0.2855, "step": 33742 }, { "epoch": 0.05983090110720146, "grad_norm": 0.2021484375, "learning_rate": 0.0015220688877912933, "loss": 0.171, "step": 33744 }, { "epoch": 0.059834447272511276, "grad_norm": 0.4296875, "learning_rate": 0.0015220163190148332, "loss": 0.1967, "step": 33746 }, { "epoch": 0.05983799343782109, "grad_norm": 0.267578125, "learning_rate": 0.001521963748392719, "loss": 0.1789, "step": 33748 }, { "epoch": 0.05984153960313091, "grad_norm": 0.78515625, "learning_rate": 0.0015219111759251802, "loss": 0.1546, "step": 33750 }, { "epoch": 0.05984508576844073, "grad_norm": 0.625, "learning_rate": 0.0015218586016124472, "loss": 0.147, "step": 33752 }, { "epoch": 0.05984863193375054, "grad_norm": 0.349609375, "learning_rate": 0.0015218060254547497, "loss": 0.1367, "step": 33754 }, { "epoch": 0.059852178099060356, "grad_norm": 0.85546875, "learning_rate": 0.0015217534474523179, "loss": 0.1895, "step": 33756 }, { "epoch": 0.05985572426437017, "grad_norm": 1.046875, "learning_rate": 0.0015217008676053812, "loss": 0.236, "step": 33758 }, { "epoch": 0.059859270429679985, "grad_norm": 0.345703125, "learning_rate": 0.0015216482859141697, "loss": 0.1483, "step": 33760 }, { "epoch": 0.0598628165949898, "grad_norm": 0.4296875, "learning_rate": 0.001521595702378914, "loss": 0.229, "step": 33762 }, { "epoch": 0.059866362760299614, "grad_norm": 0.376953125, "learning_rate": 0.0015215431169998432, "loss": 0.2, "step": 33764 }, { "epoch": 0.05986990892560943, "grad_norm": 0.32421875, "learning_rate": 0.001521490529777188, "loss": 0.3237, "step": 33766 }, { "epoch": 0.05987345509091924, "grad_norm": 0.470703125, "learning_rate": 0.0015214379407111778, "loss": 0.2025, "step": 33768 }, { "epoch": 0.05987700125622906, "grad_norm": 0.478515625, "learning_rate": 0.001521385349802043, "loss": 0.201, "step": 33770 }, { "epoch": 0.05988054742153888, "grad_norm": 1.0859375, "learning_rate": 0.001521332757050013, "loss": 0.4623, "step": 33772 }, { "epoch": 0.059884093586848694, "grad_norm": 2.796875, "learning_rate": 0.001521280162455319, "loss": 0.3137, "step": 33774 }, { "epoch": 0.05988763975215851, "grad_norm": 0.369140625, "learning_rate": 0.0015212275660181898, "loss": 0.2116, "step": 33776 }, { "epoch": 0.05989118591746832, "grad_norm": 1.5390625, "learning_rate": 0.001521174967738856, "loss": 0.2527, "step": 33778 }, { "epoch": 0.05989473208277814, "grad_norm": 0.396484375, "learning_rate": 0.0015211223676175475, "loss": 0.1346, "step": 33780 }, { "epoch": 0.05989827824808795, "grad_norm": 1.6328125, "learning_rate": 0.0015210697656544946, "loss": 0.1888, "step": 33782 }, { "epoch": 0.05990182441339777, "grad_norm": 1.703125, "learning_rate": 0.0015210171618499268, "loss": 0.2323, "step": 33784 }, { "epoch": 0.05990537057870758, "grad_norm": 0.76953125, "learning_rate": 0.0015209645562040749, "loss": 0.1829, "step": 33786 }, { "epoch": 0.059908916744017396, "grad_norm": 0.81640625, "learning_rate": 0.001520911948717168, "loss": 0.1537, "step": 33788 }, { "epoch": 0.05991246290932721, "grad_norm": 0.77734375, "learning_rate": 0.0015208593393894372, "loss": 0.1669, "step": 33790 }, { "epoch": 0.059916009074637025, "grad_norm": 0.62890625, "learning_rate": 0.0015208067282211119, "loss": 0.2352, "step": 33792 }, { "epoch": 0.05991955523994684, "grad_norm": 0.828125, "learning_rate": 0.0015207541152124225, "loss": 0.2329, "step": 33794 }, { "epoch": 0.05992310140525666, "grad_norm": 1.1796875, "learning_rate": 0.001520701500363599, "loss": 0.2305, "step": 33796 }, { "epoch": 0.059926647570566476, "grad_norm": 0.291015625, "learning_rate": 0.0015206488836748717, "loss": 0.2023, "step": 33798 }, { "epoch": 0.05993019373587629, "grad_norm": 0.33984375, "learning_rate": 0.00152059626514647, "loss": 0.2245, "step": 33800 }, { "epoch": 0.059933739901186105, "grad_norm": 0.5234375, "learning_rate": 0.0015205436447786249, "loss": 0.2198, "step": 33802 }, { "epoch": 0.05993728606649592, "grad_norm": 0.51171875, "learning_rate": 0.001520491022571566, "loss": 0.2925, "step": 33804 }, { "epoch": 0.059940832231805734, "grad_norm": 0.6171875, "learning_rate": 0.0015204383985255238, "loss": 0.1998, "step": 33806 }, { "epoch": 0.05994437839711555, "grad_norm": 0.30859375, "learning_rate": 0.0015203857726407283, "loss": 0.1828, "step": 33808 }, { "epoch": 0.05994792456242536, "grad_norm": 0.546875, "learning_rate": 0.0015203331449174097, "loss": 0.1675, "step": 33810 }, { "epoch": 0.05995147072773518, "grad_norm": 0.6171875, "learning_rate": 0.0015202805153557977, "loss": 0.2022, "step": 33812 }, { "epoch": 0.05995501689304499, "grad_norm": 0.6796875, "learning_rate": 0.0015202278839561232, "loss": 0.2144, "step": 33814 }, { "epoch": 0.05995856305835481, "grad_norm": 0.56640625, "learning_rate": 0.0015201752507186157, "loss": 0.2096, "step": 33816 }, { "epoch": 0.05996210922366463, "grad_norm": 4.40625, "learning_rate": 0.0015201226156435063, "loss": 0.2596, "step": 33818 }, { "epoch": 0.05996565538897444, "grad_norm": 0.421875, "learning_rate": 0.001520069978731024, "loss": 0.2253, "step": 33820 }, { "epoch": 0.05996920155428426, "grad_norm": 0.486328125, "learning_rate": 0.0015200173399813998, "loss": 0.2063, "step": 33822 }, { "epoch": 0.05997274771959407, "grad_norm": 0.275390625, "learning_rate": 0.0015199646993948638, "loss": 0.1858, "step": 33824 }, { "epoch": 0.059976293884903886, "grad_norm": 0.291015625, "learning_rate": 0.0015199120569716461, "loss": 0.1811, "step": 33826 }, { "epoch": 0.0599798400502137, "grad_norm": 0.37109375, "learning_rate": 0.001519859412711977, "loss": 0.2077, "step": 33828 }, { "epoch": 0.059983386215523515, "grad_norm": 0.498046875, "learning_rate": 0.0015198067666160866, "loss": 0.1673, "step": 33830 }, { "epoch": 0.05998693238083333, "grad_norm": 0.66015625, "learning_rate": 0.0015197541186842056, "loss": 0.2353, "step": 33832 }, { "epoch": 0.059990478546143144, "grad_norm": 0.91015625, "learning_rate": 0.0015197014689165636, "loss": 0.2075, "step": 33834 }, { "epoch": 0.05999402471145296, "grad_norm": 0.248046875, "learning_rate": 0.0015196488173133912, "loss": 0.1912, "step": 33836 }, { "epoch": 0.059997570876762774, "grad_norm": 0.78515625, "learning_rate": 0.0015195961638749184, "loss": 0.2717, "step": 33838 }, { "epoch": 0.060001117042072595, "grad_norm": 0.6171875, "learning_rate": 0.0015195435086013762, "loss": 0.1822, "step": 33840 }, { "epoch": 0.06000466320738241, "grad_norm": 0.55078125, "learning_rate": 0.0015194908514929942, "loss": 0.1893, "step": 33842 }, { "epoch": 0.060008209372692224, "grad_norm": 1.4765625, "learning_rate": 0.0015194381925500028, "loss": 0.1865, "step": 33844 }, { "epoch": 0.06001175553800204, "grad_norm": 8.0, "learning_rate": 0.0015193855317726325, "loss": 0.3614, "step": 33846 }, { "epoch": 0.06001530170331185, "grad_norm": 0.875, "learning_rate": 0.0015193328691611135, "loss": 0.2128, "step": 33848 }, { "epoch": 0.06001884786862167, "grad_norm": 0.35546875, "learning_rate": 0.0015192802047156759, "loss": 0.2325, "step": 33850 }, { "epoch": 0.06002239403393148, "grad_norm": 0.66015625, "learning_rate": 0.0015192275384365505, "loss": 0.2296, "step": 33852 }, { "epoch": 0.0600259401992413, "grad_norm": 0.404296875, "learning_rate": 0.0015191748703239673, "loss": 0.1579, "step": 33854 }, { "epoch": 0.06002948636455111, "grad_norm": 0.453125, "learning_rate": 0.0015191222003781567, "loss": 0.2651, "step": 33856 }, { "epoch": 0.060033032529860926, "grad_norm": 0.76171875, "learning_rate": 0.001519069528599349, "loss": 0.1455, "step": 33858 }, { "epoch": 0.06003657869517074, "grad_norm": 0.5390625, "learning_rate": 0.0015190168549877748, "loss": 0.1585, "step": 33860 }, { "epoch": 0.060040124860480555, "grad_norm": 0.419921875, "learning_rate": 0.0015189641795436638, "loss": 0.1533, "step": 33862 }, { "epoch": 0.06004367102579038, "grad_norm": 3.0625, "learning_rate": 0.0015189115022672473, "loss": 0.3335, "step": 33864 }, { "epoch": 0.06004721719110019, "grad_norm": 0.359375, "learning_rate": 0.0015188588231587548, "loss": 0.1873, "step": 33866 }, { "epoch": 0.060050763356410006, "grad_norm": 0.28515625, "learning_rate": 0.0015188061422184173, "loss": 0.1881, "step": 33868 }, { "epoch": 0.06005430952171982, "grad_norm": 0.1796875, "learning_rate": 0.001518753459446465, "loss": 0.1841, "step": 33870 }, { "epoch": 0.060057855687029635, "grad_norm": 0.94140625, "learning_rate": 0.0015187007748431287, "loss": 0.1934, "step": 33872 }, { "epoch": 0.06006140185233945, "grad_norm": 1.25, "learning_rate": 0.0015186480884086379, "loss": 0.1845, "step": 33874 }, { "epoch": 0.060064948017649264, "grad_norm": 0.9453125, "learning_rate": 0.0015185954001432239, "loss": 0.1668, "step": 33876 }, { "epoch": 0.06006849418295908, "grad_norm": 0.171875, "learning_rate": 0.0015185427100471165, "loss": 0.1315, "step": 33878 }, { "epoch": 0.06007204034826889, "grad_norm": 0.578125, "learning_rate": 0.0015184900181205465, "loss": 0.195, "step": 33880 }, { "epoch": 0.06007558651357871, "grad_norm": 0.953125, "learning_rate": 0.001518437324363744, "loss": 0.1963, "step": 33882 }, { "epoch": 0.06007913267888852, "grad_norm": 0.80859375, "learning_rate": 0.00151838462877694, "loss": 0.4066, "step": 33884 }, { "epoch": 0.060082678844198344, "grad_norm": 0.330078125, "learning_rate": 0.0015183319313603644, "loss": 0.2819, "step": 33886 }, { "epoch": 0.06008622500950816, "grad_norm": 0.53515625, "learning_rate": 0.0015182792321142484, "loss": 0.3097, "step": 33888 }, { "epoch": 0.06008977117481797, "grad_norm": 4.21875, "learning_rate": 0.001518226531038822, "loss": 0.2158, "step": 33890 }, { "epoch": 0.06009331734012779, "grad_norm": 0.28515625, "learning_rate": 0.0015181738281343153, "loss": 0.1932, "step": 33892 }, { "epoch": 0.0600968635054376, "grad_norm": 0.298828125, "learning_rate": 0.0015181211234009593, "loss": 0.1992, "step": 33894 }, { "epoch": 0.060100409670747416, "grad_norm": 0.375, "learning_rate": 0.0015180684168389844, "loss": 0.1465, "step": 33896 }, { "epoch": 0.06010395583605723, "grad_norm": 0.2470703125, "learning_rate": 0.0015180157084486211, "loss": 0.2195, "step": 33898 }, { "epoch": 0.060107502001367046, "grad_norm": 0.439453125, "learning_rate": 0.0015179629982300998, "loss": 0.1595, "step": 33900 }, { "epoch": 0.06011104816667686, "grad_norm": 0.466796875, "learning_rate": 0.0015179102861836511, "loss": 0.2757, "step": 33902 }, { "epoch": 0.060114594331986675, "grad_norm": 0.337890625, "learning_rate": 0.0015178575723095058, "loss": 0.2595, "step": 33904 }, { "epoch": 0.06011814049729649, "grad_norm": 0.6953125, "learning_rate": 0.0015178048566078942, "loss": 0.1907, "step": 33906 }, { "epoch": 0.06012168666260631, "grad_norm": 0.9296875, "learning_rate": 0.0015177521390790467, "loss": 0.1921, "step": 33908 }, { "epoch": 0.060125232827916125, "grad_norm": 0.4140625, "learning_rate": 0.001517699419723194, "loss": 0.1847, "step": 33910 }, { "epoch": 0.06012877899322594, "grad_norm": 1.0078125, "learning_rate": 0.0015176466985405666, "loss": 0.1707, "step": 33912 }, { "epoch": 0.060132325158535754, "grad_norm": 1.3046875, "learning_rate": 0.0015175939755313953, "loss": 0.1607, "step": 33914 }, { "epoch": 0.06013587132384557, "grad_norm": 0.4453125, "learning_rate": 0.0015175412506959103, "loss": 0.2688, "step": 33916 }, { "epoch": 0.06013941748915538, "grad_norm": 0.671875, "learning_rate": 0.0015174885240343426, "loss": 0.2525, "step": 33918 }, { "epoch": 0.0601429636544652, "grad_norm": 0.43359375, "learning_rate": 0.0015174357955469225, "loss": 0.216, "step": 33920 }, { "epoch": 0.06014650981977501, "grad_norm": 0.484375, "learning_rate": 0.001517383065233881, "loss": 0.1534, "step": 33922 }, { "epoch": 0.06015005598508483, "grad_norm": 0.359375, "learning_rate": 0.001517330333095448, "loss": 0.2138, "step": 33924 }, { "epoch": 0.06015360215039464, "grad_norm": 0.45703125, "learning_rate": 0.0015172775991318548, "loss": 0.1554, "step": 33926 }, { "epoch": 0.060157148315704456, "grad_norm": 0.9765625, "learning_rate": 0.0015172248633433317, "loss": 0.2329, "step": 33928 }, { "epoch": 0.06016069448101427, "grad_norm": 0.6484375, "learning_rate": 0.0015171721257301093, "loss": 0.2151, "step": 33930 }, { "epoch": 0.06016424064632409, "grad_norm": 1.9921875, "learning_rate": 0.0015171193862924181, "loss": 0.2967, "step": 33932 }, { "epoch": 0.06016778681163391, "grad_norm": 1.796875, "learning_rate": 0.0015170666450304896, "loss": 0.2321, "step": 33934 }, { "epoch": 0.06017133297694372, "grad_norm": 0.3046875, "learning_rate": 0.0015170139019445538, "loss": 0.2119, "step": 33936 }, { "epoch": 0.060174879142253536, "grad_norm": 0.5078125, "learning_rate": 0.001516961157034841, "loss": 0.2121, "step": 33938 }, { "epoch": 0.06017842530756335, "grad_norm": 0.55859375, "learning_rate": 0.0015169084103015824, "loss": 0.1798, "step": 33940 }, { "epoch": 0.060181971472873165, "grad_norm": 0.27734375, "learning_rate": 0.0015168556617450087, "loss": 0.1507, "step": 33942 }, { "epoch": 0.06018551763818298, "grad_norm": 0.232421875, "learning_rate": 0.0015168029113653506, "loss": 0.1495, "step": 33944 }, { "epoch": 0.060189063803492794, "grad_norm": 12.25, "learning_rate": 0.0015167501591628386, "loss": 0.277, "step": 33946 }, { "epoch": 0.06019260996880261, "grad_norm": 1.3828125, "learning_rate": 0.0015166974051377031, "loss": 0.2228, "step": 33948 }, { "epoch": 0.06019615613411242, "grad_norm": 1.2265625, "learning_rate": 0.0015166446492901758, "loss": 0.2798, "step": 33950 }, { "epoch": 0.06019970229942224, "grad_norm": 0.55078125, "learning_rate": 0.0015165918916204863, "loss": 0.252, "step": 33952 }, { "epoch": 0.06020324846473206, "grad_norm": 0.283203125, "learning_rate": 0.0015165391321288661, "loss": 0.1662, "step": 33954 }, { "epoch": 0.060206794630041874, "grad_norm": 0.267578125, "learning_rate": 0.0015164863708155459, "loss": 0.1696, "step": 33956 }, { "epoch": 0.06021034079535169, "grad_norm": 0.31640625, "learning_rate": 0.001516433607680756, "loss": 0.2241, "step": 33958 }, { "epoch": 0.0602138869606615, "grad_norm": 1.140625, "learning_rate": 0.0015163808427247271, "loss": 0.1989, "step": 33960 }, { "epoch": 0.06021743312597132, "grad_norm": 0.62890625, "learning_rate": 0.0015163280759476906, "loss": 0.2271, "step": 33962 }, { "epoch": 0.06022097929128113, "grad_norm": 0.419921875, "learning_rate": 0.001516275307349877, "loss": 0.2162, "step": 33964 }, { "epoch": 0.06022452545659095, "grad_norm": 0.392578125, "learning_rate": 0.001516222536931517, "loss": 0.2406, "step": 33966 }, { "epoch": 0.06022807162190076, "grad_norm": 0.7578125, "learning_rate": 0.0015161697646928409, "loss": 0.2464, "step": 33968 }, { "epoch": 0.060231617787210576, "grad_norm": 0.40234375, "learning_rate": 0.0015161169906340805, "loss": 0.2167, "step": 33970 }, { "epoch": 0.06023516395252039, "grad_norm": 0.203125, "learning_rate": 0.0015160642147554661, "loss": 0.4023, "step": 33972 }, { "epoch": 0.060238710117830205, "grad_norm": 0.2431640625, "learning_rate": 0.0015160114370572284, "loss": 0.1892, "step": 33974 }, { "epoch": 0.060242256283140026, "grad_norm": 0.37890625, "learning_rate": 0.0015159586575395983, "loss": 0.1649, "step": 33976 }, { "epoch": 0.06024580244844984, "grad_norm": 0.37890625, "learning_rate": 0.0015159058762028066, "loss": 0.2155, "step": 33978 }, { "epoch": 0.060249348613759655, "grad_norm": 0.41796875, "learning_rate": 0.001515853093047084, "loss": 0.1429, "step": 33980 }, { "epoch": 0.06025289477906947, "grad_norm": 0.294921875, "learning_rate": 0.0015158003080726619, "loss": 0.1716, "step": 33982 }, { "epoch": 0.060256440944379284, "grad_norm": 0.90625, "learning_rate": 0.0015157475212797709, "loss": 0.1806, "step": 33984 }, { "epoch": 0.0602599871096891, "grad_norm": 0.81640625, "learning_rate": 0.0015156947326686412, "loss": 0.2279, "step": 33986 }, { "epoch": 0.060263533274998914, "grad_norm": 0.56640625, "learning_rate": 0.0015156419422395044, "loss": 0.2468, "step": 33988 }, { "epoch": 0.06026707944030873, "grad_norm": 0.3125, "learning_rate": 0.0015155891499925916, "loss": 0.2239, "step": 33990 }, { "epoch": 0.06027062560561854, "grad_norm": 1.125, "learning_rate": 0.0015155363559281325, "loss": 0.3549, "step": 33992 }, { "epoch": 0.06027417177092836, "grad_norm": 0.369140625, "learning_rate": 0.0015154835600463595, "loss": 0.255, "step": 33994 }, { "epoch": 0.06027771793623817, "grad_norm": 0.326171875, "learning_rate": 0.001515430762347502, "loss": 0.1716, "step": 33996 }, { "epoch": 0.060281264101547986, "grad_norm": 0.93359375, "learning_rate": 0.0015153779628317922, "loss": 0.394, "step": 33998 }, { "epoch": 0.06028481026685781, "grad_norm": 0.45703125, "learning_rate": 0.0015153251614994606, "loss": 0.1607, "step": 34000 }, { "epoch": 0.06028835643216762, "grad_norm": 1.4296875, "learning_rate": 0.0015152723583507376, "loss": 0.1951, "step": 34002 }, { "epoch": 0.06029190259747744, "grad_norm": 1.0546875, "learning_rate": 0.0015152195533858549, "loss": 0.1775, "step": 34004 }, { "epoch": 0.06029544876278725, "grad_norm": 1.3671875, "learning_rate": 0.0015151667466050426, "loss": 0.1756, "step": 34006 }, { "epoch": 0.060298994928097066, "grad_norm": 4.40625, "learning_rate": 0.0015151139380085324, "loss": 0.2326, "step": 34008 }, { "epoch": 0.06030254109340688, "grad_norm": 0.498046875, "learning_rate": 0.0015150611275965547, "loss": 0.2136, "step": 34010 }, { "epoch": 0.060306087258716695, "grad_norm": 0.490234375, "learning_rate": 0.001515008315369341, "loss": 0.2687, "step": 34012 }, { "epoch": 0.06030963342402651, "grad_norm": 0.279296875, "learning_rate": 0.0015149555013271216, "loss": 0.1845, "step": 34014 }, { "epoch": 0.060313179589336324, "grad_norm": 0.306640625, "learning_rate": 0.0015149026854701286, "loss": 0.2654, "step": 34016 }, { "epoch": 0.06031672575464614, "grad_norm": 0.5546875, "learning_rate": 0.0015148498677985919, "loss": 0.1894, "step": 34018 }, { "epoch": 0.06032027191995595, "grad_norm": 0.9453125, "learning_rate": 0.0015147970483127427, "loss": 0.2625, "step": 34020 }, { "epoch": 0.060323818085265775, "grad_norm": 0.279296875, "learning_rate": 0.0015147442270128121, "loss": 0.1874, "step": 34022 }, { "epoch": 0.06032736425057559, "grad_norm": 0.6640625, "learning_rate": 0.0015146914038990315, "loss": 0.1924, "step": 34024 }, { "epoch": 0.060330910415885404, "grad_norm": 0.23046875, "learning_rate": 0.0015146385789716313, "loss": 0.1551, "step": 34026 }, { "epoch": 0.06033445658119522, "grad_norm": 1.453125, "learning_rate": 0.0015145857522308429, "loss": 0.3098, "step": 34028 }, { "epoch": 0.06033800274650503, "grad_norm": 0.439453125, "learning_rate": 0.001514532923676897, "loss": 0.1806, "step": 34030 }, { "epoch": 0.06034154891181485, "grad_norm": 2.15625, "learning_rate": 0.0015144800933100254, "loss": 0.2005, "step": 34032 }, { "epoch": 0.06034509507712466, "grad_norm": 0.77734375, "learning_rate": 0.0015144272611304582, "loss": 0.1806, "step": 34034 }, { "epoch": 0.06034864124243448, "grad_norm": 0.75, "learning_rate": 0.001514374427138427, "loss": 0.1802, "step": 34036 }, { "epoch": 0.06035218740774429, "grad_norm": 0.65625, "learning_rate": 0.001514321591334163, "loss": 0.2183, "step": 34038 }, { "epoch": 0.060355733573054106, "grad_norm": 0.2431640625, "learning_rate": 0.0015142687537178967, "loss": 0.1689, "step": 34040 }, { "epoch": 0.06035927973836392, "grad_norm": 0.98046875, "learning_rate": 0.0015142159142898597, "loss": 0.1868, "step": 34042 }, { "epoch": 0.06036282590367374, "grad_norm": 0.50390625, "learning_rate": 0.0015141630730502826, "loss": 0.2014, "step": 34044 }, { "epoch": 0.060366372068983556, "grad_norm": 0.375, "learning_rate": 0.001514110229999397, "loss": 0.1807, "step": 34046 }, { "epoch": 0.06036991823429337, "grad_norm": 0.88671875, "learning_rate": 0.001514057385137434, "loss": 0.2364, "step": 34048 }, { "epoch": 0.060373464399603186, "grad_norm": 0.361328125, "learning_rate": 0.0015140045384646242, "loss": 0.1413, "step": 34050 }, { "epoch": 0.060377010564913, "grad_norm": 0.32421875, "learning_rate": 0.001513951689981199, "loss": 0.2001, "step": 34052 }, { "epoch": 0.060380556730222815, "grad_norm": 0.29296875, "learning_rate": 0.0015138988396873898, "loss": 0.1857, "step": 34054 }, { "epoch": 0.06038410289553263, "grad_norm": 0.404296875, "learning_rate": 0.0015138459875834274, "loss": 0.2107, "step": 34056 }, { "epoch": 0.060387649060842444, "grad_norm": 0.265625, "learning_rate": 0.001513793133669543, "loss": 0.1201, "step": 34058 }, { "epoch": 0.06039119522615226, "grad_norm": 2.96875, "learning_rate": 0.001513740277945968, "loss": 0.4326, "step": 34060 }, { "epoch": 0.06039474139146207, "grad_norm": 0.1669921875, "learning_rate": 0.001513687420412933, "loss": 0.17, "step": 34062 }, { "epoch": 0.06039828755677189, "grad_norm": 0.33984375, "learning_rate": 0.0015136345610706698, "loss": 0.1742, "step": 34064 }, { "epoch": 0.0604018337220817, "grad_norm": 0.2734375, "learning_rate": 0.0015135816999194093, "loss": 0.2435, "step": 34066 }, { "epoch": 0.060405379887391523, "grad_norm": 0.56640625, "learning_rate": 0.0015135288369593825, "loss": 0.1591, "step": 34068 }, { "epoch": 0.06040892605270134, "grad_norm": 0.306640625, "learning_rate": 0.001513475972190821, "loss": 0.1999, "step": 34070 }, { "epoch": 0.06041247221801115, "grad_norm": 0.37890625, "learning_rate": 0.0015134231056139556, "loss": 0.2328, "step": 34072 }, { "epoch": 0.06041601838332097, "grad_norm": 0.41796875, "learning_rate": 0.0015133702372290177, "loss": 0.189, "step": 34074 }, { "epoch": 0.06041956454863078, "grad_norm": 0.90234375, "learning_rate": 0.0015133173670362386, "loss": 0.1827, "step": 34076 }, { "epoch": 0.060423110713940596, "grad_norm": 0.92578125, "learning_rate": 0.0015132644950358492, "loss": 0.1789, "step": 34078 }, { "epoch": 0.06042665687925041, "grad_norm": 0.388671875, "learning_rate": 0.0015132116212280811, "loss": 0.2465, "step": 34080 }, { "epoch": 0.060430203044560225, "grad_norm": 0.25, "learning_rate": 0.0015131587456131653, "loss": 0.1803, "step": 34082 }, { "epoch": 0.06043374920987004, "grad_norm": 0.3046875, "learning_rate": 0.0015131058681913333, "loss": 0.209, "step": 34084 }, { "epoch": 0.060437295375179854, "grad_norm": 0.59375, "learning_rate": 0.001513052988962816, "loss": 0.2628, "step": 34086 }, { "epoch": 0.06044084154048967, "grad_norm": 2.3125, "learning_rate": 0.0015130001079278449, "loss": 0.2879, "step": 34088 }, { "epoch": 0.06044438770579949, "grad_norm": 0.6171875, "learning_rate": 0.0015129472250866513, "loss": 0.2624, "step": 34090 }, { "epoch": 0.060447933871109305, "grad_norm": 0.1953125, "learning_rate": 0.0015128943404394666, "loss": 0.1649, "step": 34092 }, { "epoch": 0.06045148003641912, "grad_norm": 0.83984375, "learning_rate": 0.0015128414539865216, "loss": 0.2521, "step": 34094 }, { "epoch": 0.060455026201728934, "grad_norm": 0.55859375, "learning_rate": 0.0015127885657280481, "loss": 0.1574, "step": 34096 }, { "epoch": 0.06045857236703875, "grad_norm": 1.3125, "learning_rate": 0.001512735675664277, "loss": 0.2852, "step": 34098 }, { "epoch": 0.06046211853234856, "grad_norm": 0.224609375, "learning_rate": 0.0015126827837954398, "loss": 0.2197, "step": 34100 }, { "epoch": 0.06046566469765838, "grad_norm": 0.2080078125, "learning_rate": 0.0015126298901217679, "loss": 0.1599, "step": 34102 }, { "epoch": 0.06046921086296819, "grad_norm": 0.345703125, "learning_rate": 0.0015125769946434924, "loss": 0.2079, "step": 34104 }, { "epoch": 0.06047275702827801, "grad_norm": 4.15625, "learning_rate": 0.001512524097360845, "loss": 0.258, "step": 34106 }, { "epoch": 0.06047630319358782, "grad_norm": 0.72265625, "learning_rate": 0.0015124711982740565, "loss": 0.1847, "step": 34108 }, { "epoch": 0.060479849358897636, "grad_norm": 0.474609375, "learning_rate": 0.001512418297383359, "loss": 0.2351, "step": 34110 }, { "epoch": 0.06048339552420746, "grad_norm": 0.32421875, "learning_rate": 0.0015123653946889832, "loss": 0.2111, "step": 34112 }, { "epoch": 0.06048694168951727, "grad_norm": 0.37890625, "learning_rate": 0.0015123124901911607, "loss": 0.1839, "step": 34114 }, { "epoch": 0.06049048785482709, "grad_norm": 0.546875, "learning_rate": 0.0015122595838901228, "loss": 0.1635, "step": 34116 }, { "epoch": 0.0604940340201369, "grad_norm": 0.65625, "learning_rate": 0.0015122066757861012, "loss": 0.1856, "step": 34118 }, { "epoch": 0.060497580185446716, "grad_norm": 1.2578125, "learning_rate": 0.0015121537658793268, "loss": 0.3814, "step": 34120 }, { "epoch": 0.06050112635075653, "grad_norm": 0.4296875, "learning_rate": 0.0015121008541700313, "loss": 0.3229, "step": 34122 }, { "epoch": 0.060504672516066345, "grad_norm": 0.458984375, "learning_rate": 0.001512047940658446, "loss": 0.1881, "step": 34124 }, { "epoch": 0.06050821868137616, "grad_norm": 0.29296875, "learning_rate": 0.0015119950253448026, "loss": 0.1305, "step": 34126 }, { "epoch": 0.060511764846685974, "grad_norm": 5.78125, "learning_rate": 0.0015119421082293318, "loss": 0.2737, "step": 34128 }, { "epoch": 0.06051531101199579, "grad_norm": 1.6171875, "learning_rate": 0.001511889189312266, "loss": 0.2569, "step": 34130 }, { "epoch": 0.0605188571773056, "grad_norm": 0.6171875, "learning_rate": 0.001511836268593836, "loss": 0.2814, "step": 34132 }, { "epoch": 0.06052240334261542, "grad_norm": 0.51953125, "learning_rate": 0.0015117833460742735, "loss": 0.2319, "step": 34134 }, { "epoch": 0.06052594950792524, "grad_norm": 0.5703125, "learning_rate": 0.0015117304217538096, "loss": 0.2404, "step": 34136 }, { "epoch": 0.060529495673235054, "grad_norm": 0.59375, "learning_rate": 0.0015116774956326759, "loss": 0.3036, "step": 34138 }, { "epoch": 0.06053304183854487, "grad_norm": 0.609375, "learning_rate": 0.001511624567711104, "loss": 0.1689, "step": 34140 }, { "epoch": 0.06053658800385468, "grad_norm": 1.0703125, "learning_rate": 0.0015115716379893254, "loss": 0.1982, "step": 34142 }, { "epoch": 0.0605401341691645, "grad_norm": 0.4375, "learning_rate": 0.0015115187064675714, "loss": 0.1422, "step": 34144 }, { "epoch": 0.06054368033447431, "grad_norm": 1.1875, "learning_rate": 0.0015114657731460739, "loss": 0.1621, "step": 34146 }, { "epoch": 0.060547226499784126, "grad_norm": 0.263671875, "learning_rate": 0.001511412838025064, "loss": 0.2336, "step": 34148 }, { "epoch": 0.06055077266509394, "grad_norm": 0.365234375, "learning_rate": 0.0015113599011047731, "loss": 0.1732, "step": 34150 }, { "epoch": 0.060554318830403756, "grad_norm": 0.279296875, "learning_rate": 0.0015113069623854335, "loss": 0.2518, "step": 34152 }, { "epoch": 0.06055786499571357, "grad_norm": 0.77734375, "learning_rate": 0.0015112540218672759, "loss": 0.1525, "step": 34154 }, { "epoch": 0.060561411161023385, "grad_norm": 0.4375, "learning_rate": 0.0015112010795505317, "loss": 0.1819, "step": 34156 }, { "epoch": 0.060564957326333206, "grad_norm": 0.3515625, "learning_rate": 0.0015111481354354332, "loss": 0.239, "step": 34158 }, { "epoch": 0.06056850349164302, "grad_norm": 0.486328125, "learning_rate": 0.0015110951895222116, "loss": 0.1717, "step": 34160 }, { "epoch": 0.060572049656952835, "grad_norm": 0.341796875, "learning_rate": 0.0015110422418110985, "loss": 0.175, "step": 34162 }, { "epoch": 0.06057559582226265, "grad_norm": 1.15625, "learning_rate": 0.001510989292302325, "loss": 0.3236, "step": 34164 }, { "epoch": 0.060579141987572464, "grad_norm": 0.94921875, "learning_rate": 0.0015109363409961235, "loss": 0.2762, "step": 34166 }, { "epoch": 0.06058268815288228, "grad_norm": 0.6015625, "learning_rate": 0.001510883387892725, "loss": 0.1782, "step": 34168 }, { "epoch": 0.06058623431819209, "grad_norm": 0.2890625, "learning_rate": 0.0015108304329923613, "loss": 0.1949, "step": 34170 }, { "epoch": 0.06058978048350191, "grad_norm": 0.265625, "learning_rate": 0.0015107774762952637, "loss": 0.2138, "step": 34172 }, { "epoch": 0.06059332664881172, "grad_norm": 0.55859375, "learning_rate": 0.001510724517801664, "loss": 0.1808, "step": 34174 }, { "epoch": 0.06059687281412154, "grad_norm": 0.7578125, "learning_rate": 0.0015106715575117942, "loss": 0.2233, "step": 34176 }, { "epoch": 0.06060041897943135, "grad_norm": 0.443359375, "learning_rate": 0.0015106185954258853, "loss": 0.2484, "step": 34178 }, { "epoch": 0.06060396514474117, "grad_norm": 0.5546875, "learning_rate": 0.0015105656315441691, "loss": 0.225, "step": 34180 }, { "epoch": 0.06060751131005099, "grad_norm": 1.3828125, "learning_rate": 0.0015105126658668774, "loss": 0.2889, "step": 34182 }, { "epoch": 0.0606110574753608, "grad_norm": 0.333984375, "learning_rate": 0.0015104596983942418, "loss": 0.3335, "step": 34184 }, { "epoch": 0.06061460364067062, "grad_norm": 1.3203125, "learning_rate": 0.001510406729126494, "loss": 0.2388, "step": 34186 }, { "epoch": 0.06061814980598043, "grad_norm": 0.265625, "learning_rate": 0.0015103537580638655, "loss": 0.2029, "step": 34188 }, { "epoch": 0.060621695971290246, "grad_norm": 0.69140625, "learning_rate": 0.001510300785206588, "loss": 0.1933, "step": 34190 }, { "epoch": 0.06062524213660006, "grad_norm": 0.259765625, "learning_rate": 0.001510247810554893, "loss": 0.1872, "step": 34192 }, { "epoch": 0.060628788301909875, "grad_norm": 0.96484375, "learning_rate": 0.0015101948341090126, "loss": 0.2337, "step": 34194 }, { "epoch": 0.06063233446721969, "grad_norm": 0.390625, "learning_rate": 0.0015101418558691783, "loss": 0.1762, "step": 34196 }, { "epoch": 0.060635880632529504, "grad_norm": 0.25390625, "learning_rate": 0.0015100888758356214, "loss": 0.1542, "step": 34198 }, { "epoch": 0.06063942679783932, "grad_norm": 0.3671875, "learning_rate": 0.0015100358940085743, "loss": 0.1525, "step": 34200 }, { "epoch": 0.06064297296314913, "grad_norm": 0.6015625, "learning_rate": 0.0015099829103882685, "loss": 0.2375, "step": 34202 }, { "epoch": 0.060646519128458955, "grad_norm": 0.6171875, "learning_rate": 0.0015099299249749353, "loss": 0.2025, "step": 34204 }, { "epoch": 0.06065006529376877, "grad_norm": 0.5390625, "learning_rate": 0.001509876937768807, "loss": 0.2203, "step": 34206 }, { "epoch": 0.060653611459078584, "grad_norm": 0.33984375, "learning_rate": 0.001509823948770115, "loss": 0.2714, "step": 34208 }, { "epoch": 0.0606571576243884, "grad_norm": 0.369140625, "learning_rate": 0.0015097709579790907, "loss": 0.1877, "step": 34210 }, { "epoch": 0.06066070378969821, "grad_norm": 0.6328125, "learning_rate": 0.0015097179653959668, "loss": 0.2006, "step": 34212 }, { "epoch": 0.06066424995500803, "grad_norm": 0.458984375, "learning_rate": 0.001509664971020974, "loss": 0.1893, "step": 34214 }, { "epoch": 0.06066779612031784, "grad_norm": 0.703125, "learning_rate": 0.001509611974854345, "loss": 0.1881, "step": 34216 }, { "epoch": 0.06067134228562766, "grad_norm": 0.212890625, "learning_rate": 0.0015095589768963108, "loss": 0.1801, "step": 34218 }, { "epoch": 0.06067488845093747, "grad_norm": 0.353515625, "learning_rate": 0.0015095059771471039, "loss": 0.2047, "step": 34220 }, { "epoch": 0.060678434616247286, "grad_norm": 0.30078125, "learning_rate": 0.0015094529756069556, "loss": 0.1918, "step": 34222 }, { "epoch": 0.0606819807815571, "grad_norm": 1.1015625, "learning_rate": 0.0015093999722760976, "loss": 0.2197, "step": 34224 }, { "epoch": 0.06068552694686692, "grad_norm": 0.423828125, "learning_rate": 0.0015093469671547623, "loss": 0.164, "step": 34226 }, { "epoch": 0.060689073112176736, "grad_norm": 0.53125, "learning_rate": 0.001509293960243181, "loss": 0.216, "step": 34228 }, { "epoch": 0.06069261927748655, "grad_norm": 0.83984375, "learning_rate": 0.0015092409515415856, "loss": 0.178, "step": 34230 }, { "epoch": 0.060696165442796365, "grad_norm": 0.39453125, "learning_rate": 0.0015091879410502078, "loss": 0.1749, "step": 34232 }, { "epoch": 0.06069971160810618, "grad_norm": 1.3203125, "learning_rate": 0.0015091349287692797, "loss": 0.1363, "step": 34234 }, { "epoch": 0.060703257773415994, "grad_norm": 0.58984375, "learning_rate": 0.0015090819146990335, "loss": 0.2424, "step": 34236 }, { "epoch": 0.06070680393872581, "grad_norm": 0.8046875, "learning_rate": 0.0015090288988397, "loss": 0.179, "step": 34238 }, { "epoch": 0.060710350104035624, "grad_norm": 0.482421875, "learning_rate": 0.0015089758811915122, "loss": 0.1567, "step": 34240 }, { "epoch": 0.06071389626934544, "grad_norm": 0.345703125, "learning_rate": 0.0015089228617547012, "loss": 0.1653, "step": 34242 }, { "epoch": 0.06071744243465525, "grad_norm": 2.140625, "learning_rate": 0.0015088698405294994, "loss": 0.345, "step": 34244 }, { "epoch": 0.06072098859996507, "grad_norm": 0.4609375, "learning_rate": 0.0015088168175161382, "loss": 0.1705, "step": 34246 }, { "epoch": 0.06072453476527489, "grad_norm": 0.546875, "learning_rate": 0.00150876379271485, "loss": 0.2321, "step": 34248 }, { "epoch": 0.0607280809305847, "grad_norm": 0.87109375, "learning_rate": 0.0015087107661258658, "loss": 0.2493, "step": 34250 }, { "epoch": 0.06073162709589452, "grad_norm": 0.365234375, "learning_rate": 0.0015086577377494187, "loss": 0.202, "step": 34252 }, { "epoch": 0.06073517326120433, "grad_norm": 0.55859375, "learning_rate": 0.0015086047075857395, "loss": 0.154, "step": 34254 }, { "epoch": 0.06073871942651415, "grad_norm": 0.6015625, "learning_rate": 0.001508551675635061, "loss": 0.2466, "step": 34256 }, { "epoch": 0.06074226559182396, "grad_norm": 0.2353515625, "learning_rate": 0.0015084986418976147, "loss": 0.1816, "step": 34258 }, { "epoch": 0.060745811757133776, "grad_norm": 0.55078125, "learning_rate": 0.001508445606373633, "loss": 0.1485, "step": 34260 }, { "epoch": 0.06074935792244359, "grad_norm": 0.275390625, "learning_rate": 0.0015083925690633471, "loss": 0.2092, "step": 34262 }, { "epoch": 0.060752904087753405, "grad_norm": 0.28515625, "learning_rate": 0.0015083395299669892, "loss": 0.1285, "step": 34264 }, { "epoch": 0.06075645025306322, "grad_norm": 0.35546875, "learning_rate": 0.0015082864890847915, "loss": 0.2556, "step": 34266 }, { "epoch": 0.060759996418373034, "grad_norm": 0.6875, "learning_rate": 0.001508233446416986, "loss": 0.2296, "step": 34268 }, { "epoch": 0.06076354258368285, "grad_norm": 0.6328125, "learning_rate": 0.0015081804019638045, "loss": 0.1855, "step": 34270 }, { "epoch": 0.06076708874899267, "grad_norm": 0.232421875, "learning_rate": 0.0015081273557254788, "loss": 0.1682, "step": 34272 }, { "epoch": 0.060770634914302485, "grad_norm": 0.326171875, "learning_rate": 0.0015080743077022414, "loss": 0.1619, "step": 34274 }, { "epoch": 0.0607741810796123, "grad_norm": 0.55078125, "learning_rate": 0.0015080212578943237, "loss": 0.1589, "step": 34276 }, { "epoch": 0.060777727244922114, "grad_norm": 0.38671875, "learning_rate": 0.0015079682063019583, "loss": 0.2142, "step": 34278 }, { "epoch": 0.06078127341023193, "grad_norm": 0.314453125, "learning_rate": 0.0015079151529253769, "loss": 0.2577, "step": 34280 }, { "epoch": 0.06078481957554174, "grad_norm": 0.28515625, "learning_rate": 0.0015078620977648117, "loss": 0.1375, "step": 34282 }, { "epoch": 0.06078836574085156, "grad_norm": 0.27734375, "learning_rate": 0.0015078090408204943, "loss": 0.2366, "step": 34284 }, { "epoch": 0.06079191190616137, "grad_norm": 0.640625, "learning_rate": 0.0015077559820926571, "loss": 0.2469, "step": 34286 }, { "epoch": 0.06079545807147119, "grad_norm": 1.7265625, "learning_rate": 0.001507702921581532, "loss": 0.3724, "step": 34288 }, { "epoch": 0.060799004236781, "grad_norm": 0.54296875, "learning_rate": 0.0015076498592873516, "loss": 0.2846, "step": 34290 }, { "epoch": 0.060802550402090816, "grad_norm": 0.51171875, "learning_rate": 0.0015075967952103472, "loss": 0.1489, "step": 34292 }, { "epoch": 0.06080609656740064, "grad_norm": 0.6171875, "learning_rate": 0.001507543729350751, "loss": 0.2349, "step": 34294 }, { "epoch": 0.06080964273271045, "grad_norm": 0.296875, "learning_rate": 0.0015074906617087956, "loss": 0.1639, "step": 34296 }, { "epoch": 0.060813188898020266, "grad_norm": 0.56640625, "learning_rate": 0.0015074375922847123, "loss": 0.1612, "step": 34298 }, { "epoch": 0.06081673506333008, "grad_norm": 1.890625, "learning_rate": 0.0015073845210787343, "loss": 0.2798, "step": 34300 }, { "epoch": 0.060820281228639896, "grad_norm": 0.271484375, "learning_rate": 0.0015073314480910927, "loss": 0.2723, "step": 34302 }, { "epoch": 0.06082382739394971, "grad_norm": 0.94921875, "learning_rate": 0.0015072783733220197, "loss": 0.2183, "step": 34304 }, { "epoch": 0.060827373559259525, "grad_norm": 0.494140625, "learning_rate": 0.0015072252967717479, "loss": 0.1875, "step": 34306 }, { "epoch": 0.06083091972456934, "grad_norm": 0.45703125, "learning_rate": 0.001507172218440509, "loss": 0.1897, "step": 34308 }, { "epoch": 0.060834465889879154, "grad_norm": 0.59765625, "learning_rate": 0.0015071191383285356, "loss": 0.2031, "step": 34310 }, { "epoch": 0.06083801205518897, "grad_norm": 0.2890625, "learning_rate": 0.0015070660564360596, "loss": 0.1898, "step": 34312 }, { "epoch": 0.06084155822049878, "grad_norm": 0.287109375, "learning_rate": 0.001507012972763313, "loss": 0.2115, "step": 34314 }, { "epoch": 0.060845104385808604, "grad_norm": 0.3046875, "learning_rate": 0.0015069598873105282, "loss": 0.2433, "step": 34316 }, { "epoch": 0.06084865055111842, "grad_norm": 0.82421875, "learning_rate": 0.001506906800077937, "loss": 0.1732, "step": 34318 }, { "epoch": 0.060852196716428233, "grad_norm": 0.51953125, "learning_rate": 0.001506853711065772, "loss": 0.2336, "step": 34320 }, { "epoch": 0.06085574288173805, "grad_norm": 0.46875, "learning_rate": 0.0015068006202742653, "loss": 0.185, "step": 34322 }, { "epoch": 0.06085928904704786, "grad_norm": 0.333984375, "learning_rate": 0.0015067475277036487, "loss": 0.231, "step": 34324 }, { "epoch": 0.06086283521235768, "grad_norm": 0.412109375, "learning_rate": 0.0015066944333541547, "loss": 0.1607, "step": 34326 }, { "epoch": 0.06086638137766749, "grad_norm": 0.21875, "learning_rate": 0.0015066413372260157, "loss": 0.1652, "step": 34328 }, { "epoch": 0.060869927542977306, "grad_norm": 1.046875, "learning_rate": 0.0015065882393194637, "loss": 0.2363, "step": 34330 }, { "epoch": 0.06087347370828712, "grad_norm": 0.40234375, "learning_rate": 0.0015065351396347307, "loss": 0.1671, "step": 34332 }, { "epoch": 0.060877019873596935, "grad_norm": 0.59765625, "learning_rate": 0.0015064820381720492, "loss": 0.2462, "step": 34334 }, { "epoch": 0.06088056603890675, "grad_norm": 0.5625, "learning_rate": 0.0015064289349316514, "loss": 0.2213, "step": 34336 }, { "epoch": 0.060884112204216564, "grad_norm": 0.294921875, "learning_rate": 0.0015063758299137696, "loss": 0.1974, "step": 34338 }, { "epoch": 0.060887658369526386, "grad_norm": 0.1826171875, "learning_rate": 0.0015063227231186358, "loss": 0.2979, "step": 34340 }, { "epoch": 0.0608912045348362, "grad_norm": 2.578125, "learning_rate": 0.0015062696145464827, "loss": 0.2572, "step": 34342 }, { "epoch": 0.060894750700146015, "grad_norm": 0.80078125, "learning_rate": 0.001506216504197542, "loss": 0.2297, "step": 34344 }, { "epoch": 0.06089829686545583, "grad_norm": 0.7265625, "learning_rate": 0.0015061633920720463, "loss": 0.2506, "step": 34346 }, { "epoch": 0.060901843030765644, "grad_norm": 0.451171875, "learning_rate": 0.0015061102781702281, "loss": 0.1948, "step": 34348 }, { "epoch": 0.06090538919607546, "grad_norm": 1.2578125, "learning_rate": 0.0015060571624923192, "loss": 0.2515, "step": 34350 }, { "epoch": 0.06090893536138527, "grad_norm": 0.466796875, "learning_rate": 0.0015060040450385523, "loss": 0.282, "step": 34352 }, { "epoch": 0.06091248152669509, "grad_norm": 0.408203125, "learning_rate": 0.0015059509258091594, "loss": 0.1922, "step": 34354 }, { "epoch": 0.0609160276920049, "grad_norm": 1.7890625, "learning_rate": 0.0015058978048043733, "loss": 0.2961, "step": 34356 }, { "epoch": 0.06091957385731472, "grad_norm": 1.1171875, "learning_rate": 0.0015058446820244255, "loss": 0.2125, "step": 34358 }, { "epoch": 0.06092312002262453, "grad_norm": 0.6328125, "learning_rate": 0.001505791557469549, "loss": 0.2456, "step": 34360 }, { "epoch": 0.06092666618793435, "grad_norm": 1.53125, "learning_rate": 0.0015057384311399759, "loss": 0.3092, "step": 34362 }, { "epoch": 0.06093021235324417, "grad_norm": 0.33203125, "learning_rate": 0.0015056853030359387, "loss": 0.272, "step": 34364 }, { "epoch": 0.06093375851855398, "grad_norm": 0.37890625, "learning_rate": 0.0015056321731576696, "loss": 0.183, "step": 34366 }, { "epoch": 0.0609373046838638, "grad_norm": 0.373046875, "learning_rate": 0.001505579041505401, "loss": 0.208, "step": 34368 }, { "epoch": 0.06094085084917361, "grad_norm": 0.69140625, "learning_rate": 0.0015055259080793651, "loss": 0.1941, "step": 34370 }, { "epoch": 0.060944397014483426, "grad_norm": 0.58203125, "learning_rate": 0.0015054727728797946, "loss": 0.1646, "step": 34372 }, { "epoch": 0.06094794317979324, "grad_norm": 0.306640625, "learning_rate": 0.001505419635906922, "loss": 0.2005, "step": 34374 }, { "epoch": 0.060951489345103055, "grad_norm": 0.2373046875, "learning_rate": 0.001505366497160979, "loss": 0.2277, "step": 34376 }, { "epoch": 0.06095503551041287, "grad_norm": 0.265625, "learning_rate": 0.0015053133566421983, "loss": 0.1794, "step": 34378 }, { "epoch": 0.060958581675722684, "grad_norm": 0.37109375, "learning_rate": 0.0015052602143508127, "loss": 0.2272, "step": 34380 }, { "epoch": 0.0609621278410325, "grad_norm": 0.28515625, "learning_rate": 0.0015052070702870541, "loss": 0.1788, "step": 34382 }, { "epoch": 0.06096567400634232, "grad_norm": 2.484375, "learning_rate": 0.0015051539244511553, "loss": 0.1994, "step": 34384 }, { "epoch": 0.060969220171652135, "grad_norm": 0.95703125, "learning_rate": 0.0015051007768433486, "loss": 0.1803, "step": 34386 }, { "epoch": 0.06097276633696195, "grad_norm": 0.8046875, "learning_rate": 0.0015050476274638665, "loss": 0.1937, "step": 34388 }, { "epoch": 0.060976312502271764, "grad_norm": 0.56640625, "learning_rate": 0.001504994476312941, "loss": 0.4065, "step": 34390 }, { "epoch": 0.06097985866758158, "grad_norm": 0.9609375, "learning_rate": 0.0015049413233908053, "loss": 0.2244, "step": 34392 }, { "epoch": 0.06098340483289139, "grad_norm": 0.33203125, "learning_rate": 0.0015048881686976912, "loss": 0.2023, "step": 34394 }, { "epoch": 0.06098695099820121, "grad_norm": 0.4375, "learning_rate": 0.001504835012233832, "loss": 0.2578, "step": 34396 }, { "epoch": 0.06099049716351102, "grad_norm": 0.67578125, "learning_rate": 0.0015047818539994588, "loss": 0.1824, "step": 34398 }, { "epoch": 0.060994043328820836, "grad_norm": 0.2470703125, "learning_rate": 0.0015047286939948055, "loss": 0.2634, "step": 34400 }, { "epoch": 0.06099758949413065, "grad_norm": 5.53125, "learning_rate": 0.0015046755322201033, "loss": 0.1652, "step": 34402 }, { "epoch": 0.061001135659440466, "grad_norm": 0.2890625, "learning_rate": 0.0015046223686755863, "loss": 0.1776, "step": 34404 }, { "epoch": 0.06100468182475028, "grad_norm": 0.3828125, "learning_rate": 0.0015045692033614855, "loss": 0.1928, "step": 34406 }, { "epoch": 0.0610082279900601, "grad_norm": 0.271484375, "learning_rate": 0.001504516036278034, "loss": 0.1548, "step": 34408 }, { "epoch": 0.061011774155369916, "grad_norm": 1.015625, "learning_rate": 0.0015044628674254644, "loss": 0.2158, "step": 34410 }, { "epoch": 0.06101532032067973, "grad_norm": 0.318359375, "learning_rate": 0.0015044096968040092, "loss": 0.1866, "step": 34412 }, { "epoch": 0.061018866485989545, "grad_norm": 0.431640625, "learning_rate": 0.001504356524413901, "loss": 0.1999, "step": 34414 }, { "epoch": 0.06102241265129936, "grad_norm": 0.306640625, "learning_rate": 0.0015043033502553723, "loss": 0.2185, "step": 34416 }, { "epoch": 0.061025958816609174, "grad_norm": 2.890625, "learning_rate": 0.0015042501743286552, "loss": 0.3017, "step": 34418 }, { "epoch": 0.06102950498191899, "grad_norm": 0.4609375, "learning_rate": 0.0015041969966339828, "loss": 0.257, "step": 34420 }, { "epoch": 0.0610330511472288, "grad_norm": 0.236328125, "learning_rate": 0.0015041438171715877, "loss": 0.1876, "step": 34422 }, { "epoch": 0.06103659731253862, "grad_norm": 0.396484375, "learning_rate": 0.001504090635941702, "loss": 0.1703, "step": 34424 }, { "epoch": 0.06104014347784843, "grad_norm": 0.9453125, "learning_rate": 0.0015040374529445588, "loss": 0.1794, "step": 34426 }, { "epoch": 0.06104368964315825, "grad_norm": 0.404296875, "learning_rate": 0.0015039842681803905, "loss": 0.1854, "step": 34428 }, { "epoch": 0.06104723580846807, "grad_norm": 0.3046875, "learning_rate": 0.00150393108164943, "loss": 0.1819, "step": 34430 }, { "epoch": 0.06105078197377788, "grad_norm": 0.416015625, "learning_rate": 0.001503877893351909, "loss": 0.1912, "step": 34432 }, { "epoch": 0.0610543281390877, "grad_norm": 0.51171875, "learning_rate": 0.001503824703288061, "loss": 0.1576, "step": 34434 }, { "epoch": 0.06105787430439751, "grad_norm": 0.30859375, "learning_rate": 0.0015037715114581181, "loss": 0.2359, "step": 34436 }, { "epoch": 0.06106142046970733, "grad_norm": 0.75, "learning_rate": 0.0015037183178623135, "loss": 0.2798, "step": 34438 }, { "epoch": 0.06106496663501714, "grad_norm": 0.6953125, "learning_rate": 0.001503665122500879, "loss": 0.2177, "step": 34440 }, { "epoch": 0.061068512800326956, "grad_norm": 0.5390625, "learning_rate": 0.0015036119253740482, "loss": 0.236, "step": 34442 }, { "epoch": 0.06107205896563677, "grad_norm": 0.80078125, "learning_rate": 0.001503558726482053, "loss": 0.491, "step": 34444 }, { "epoch": 0.061075605130946585, "grad_norm": 0.322265625, "learning_rate": 0.0015035055258251265, "loss": 0.2714, "step": 34446 }, { "epoch": 0.0610791512962564, "grad_norm": 0.69921875, "learning_rate": 0.001503452323403501, "loss": 0.1709, "step": 34448 }, { "epoch": 0.061082697461566214, "grad_norm": 0.34765625, "learning_rate": 0.0015033991192174097, "loss": 0.1825, "step": 34450 }, { "epoch": 0.061086243626876036, "grad_norm": 1.5390625, "learning_rate": 0.0015033459132670847, "loss": 0.1944, "step": 34452 }, { "epoch": 0.06108978979218585, "grad_norm": 0.8515625, "learning_rate": 0.0015032927055527593, "loss": 0.184, "step": 34454 }, { "epoch": 0.061093335957495665, "grad_norm": 0.78515625, "learning_rate": 0.0015032394960746655, "loss": 0.1712, "step": 34456 }, { "epoch": 0.06109688212280548, "grad_norm": 0.380859375, "learning_rate": 0.0015031862848330364, "loss": 0.1529, "step": 34458 }, { "epoch": 0.061100428288115294, "grad_norm": 0.2294921875, "learning_rate": 0.0015031330718281047, "loss": 0.1914, "step": 34460 }, { "epoch": 0.06110397445342511, "grad_norm": 0.60546875, "learning_rate": 0.0015030798570601033, "loss": 0.2384, "step": 34462 }, { "epoch": 0.06110752061873492, "grad_norm": 0.75, "learning_rate": 0.0015030266405292646, "loss": 0.1939, "step": 34464 }, { "epoch": 0.06111106678404474, "grad_norm": 0.34375, "learning_rate": 0.0015029734222358215, "loss": 0.2311, "step": 34466 }, { "epoch": 0.06111461294935455, "grad_norm": 0.828125, "learning_rate": 0.0015029202021800067, "loss": 0.1865, "step": 34468 }, { "epoch": 0.06111815911466437, "grad_norm": 0.609375, "learning_rate": 0.0015028669803620533, "loss": 0.2351, "step": 34470 }, { "epoch": 0.06112170527997418, "grad_norm": 0.265625, "learning_rate": 0.0015028137567821933, "loss": 0.1433, "step": 34472 }, { "epoch": 0.061125251445283996, "grad_norm": 8.5625, "learning_rate": 0.0015027605314406603, "loss": 0.2143, "step": 34474 }, { "epoch": 0.06112879761059382, "grad_norm": 0.341796875, "learning_rate": 0.0015027073043376862, "loss": 0.1764, "step": 34476 }, { "epoch": 0.06113234377590363, "grad_norm": 0.345703125, "learning_rate": 0.0015026540754735048, "loss": 0.1791, "step": 34478 }, { "epoch": 0.061135889941213446, "grad_norm": 0.494140625, "learning_rate": 0.001502600844848348, "loss": 0.1756, "step": 34480 }, { "epoch": 0.06113943610652326, "grad_norm": 0.2734375, "learning_rate": 0.0015025476124624492, "loss": 0.2309, "step": 34482 }, { "epoch": 0.061142982271833075, "grad_norm": 0.4921875, "learning_rate": 0.0015024943783160407, "loss": 0.2502, "step": 34484 }, { "epoch": 0.06114652843714289, "grad_norm": 1.2109375, "learning_rate": 0.0015024411424093558, "loss": 0.2166, "step": 34486 }, { "epoch": 0.061150074602452704, "grad_norm": 0.263671875, "learning_rate": 0.001502387904742627, "loss": 0.1709, "step": 34488 }, { "epoch": 0.06115362076776252, "grad_norm": 0.482421875, "learning_rate": 0.0015023346653160875, "loss": 0.1955, "step": 34490 }, { "epoch": 0.061157166933072334, "grad_norm": 2.984375, "learning_rate": 0.0015022814241299697, "loss": 0.2294, "step": 34492 }, { "epoch": 0.06116071309838215, "grad_norm": 0.427734375, "learning_rate": 0.0015022281811845064, "loss": 0.2518, "step": 34494 }, { "epoch": 0.06116425926369196, "grad_norm": 0.88671875, "learning_rate": 0.0015021749364799307, "loss": 0.2057, "step": 34496 }, { "epoch": 0.061167805429001784, "grad_norm": 0.64453125, "learning_rate": 0.0015021216900164755, "loss": 0.2221, "step": 34498 }, { "epoch": 0.0611713515943116, "grad_norm": 0.443359375, "learning_rate": 0.0015020684417943737, "loss": 0.1698, "step": 34500 }, { "epoch": 0.06117489775962141, "grad_norm": 0.3359375, "learning_rate": 0.001502015191813858, "loss": 0.1485, "step": 34502 }, { "epoch": 0.06117844392493123, "grad_norm": 0.1591796875, "learning_rate": 0.0015019619400751616, "loss": 0.2195, "step": 34504 }, { "epoch": 0.06118199009024104, "grad_norm": 0.69921875, "learning_rate": 0.0015019086865785173, "loss": 0.3035, "step": 34506 }, { "epoch": 0.06118553625555086, "grad_norm": 0.30078125, "learning_rate": 0.0015018554313241572, "loss": 0.3628, "step": 34508 }, { "epoch": 0.06118908242086067, "grad_norm": 1.0390625, "learning_rate": 0.0015018021743123154, "loss": 0.4544, "step": 34510 }, { "epoch": 0.061192628586170486, "grad_norm": 0.3046875, "learning_rate": 0.001501748915543224, "loss": 0.1726, "step": 34512 }, { "epoch": 0.0611961747514803, "grad_norm": 0.25390625, "learning_rate": 0.0015016956550171164, "loss": 0.2081, "step": 34514 }, { "epoch": 0.061199720916790115, "grad_norm": 1.2109375, "learning_rate": 0.0015016423927342252, "loss": 0.2451, "step": 34516 }, { "epoch": 0.06120326708209993, "grad_norm": 0.56640625, "learning_rate": 0.0015015891286947834, "loss": 0.2175, "step": 34518 }, { "epoch": 0.06120681324740975, "grad_norm": 0.51171875, "learning_rate": 0.0015015358628990246, "loss": 0.2116, "step": 34520 }, { "epoch": 0.061210359412719566, "grad_norm": 1.0625, "learning_rate": 0.0015014825953471806, "loss": 0.191, "step": 34522 }, { "epoch": 0.06121390557802938, "grad_norm": 0.458984375, "learning_rate": 0.0015014293260394852, "loss": 0.1777, "step": 34524 }, { "epoch": 0.061217451743339195, "grad_norm": 12.375, "learning_rate": 0.0015013760549761708, "loss": 0.2444, "step": 34526 }, { "epoch": 0.06122099790864901, "grad_norm": 1.3125, "learning_rate": 0.001501322782157471, "loss": 0.2308, "step": 34528 }, { "epoch": 0.061224544073958824, "grad_norm": 0.2490234375, "learning_rate": 0.0015012695075836183, "loss": 0.1894, "step": 34530 }, { "epoch": 0.06122809023926864, "grad_norm": 0.12353515625, "learning_rate": 0.0015012162312548459, "loss": 0.1279, "step": 34532 }, { "epoch": 0.06123163640457845, "grad_norm": 0.365234375, "learning_rate": 0.0015011629531713866, "loss": 0.2134, "step": 34534 }, { "epoch": 0.06123518256988827, "grad_norm": 0.25, "learning_rate": 0.0015011096733334739, "loss": 0.237, "step": 34536 }, { "epoch": 0.06123872873519808, "grad_norm": 2.921875, "learning_rate": 0.00150105639174134, "loss": 0.2756, "step": 34538 }, { "epoch": 0.0612422749005079, "grad_norm": 0.76171875, "learning_rate": 0.001501003108395219, "loss": 0.1535, "step": 34540 }, { "epoch": 0.06124582106581771, "grad_norm": 0.3359375, "learning_rate": 0.001500949823295343, "loss": 0.3248, "step": 34542 }, { "epoch": 0.06124936723112753, "grad_norm": 0.310546875, "learning_rate": 0.0015008965364419456, "loss": 0.1457, "step": 34544 }, { "epoch": 0.06125291339643735, "grad_norm": 1.0859375, "learning_rate": 0.0015008432478352592, "loss": 0.2861, "step": 34546 }, { "epoch": 0.06125645956174716, "grad_norm": 0.69921875, "learning_rate": 0.0015007899574755177, "loss": 0.227, "step": 34548 }, { "epoch": 0.061260005727056976, "grad_norm": 0.5390625, "learning_rate": 0.0015007366653629532, "loss": 0.1863, "step": 34550 }, { "epoch": 0.06126355189236679, "grad_norm": 0.79296875, "learning_rate": 0.0015006833714978, "loss": 0.2456, "step": 34552 }, { "epoch": 0.061267098057676606, "grad_norm": 0.3359375, "learning_rate": 0.00150063007588029, "loss": 0.2015, "step": 34554 }, { "epoch": 0.06127064422298642, "grad_norm": 0.373046875, "learning_rate": 0.0015005767785106568, "loss": 0.1789, "step": 34556 }, { "epoch": 0.061274190388296235, "grad_norm": 0.56640625, "learning_rate": 0.0015005234793891333, "loss": 0.1635, "step": 34558 }, { "epoch": 0.06127773655360605, "grad_norm": 0.263671875, "learning_rate": 0.0015004701785159534, "loss": 0.1848, "step": 34560 }, { "epoch": 0.061281282718915864, "grad_norm": 0.330078125, "learning_rate": 0.0015004168758913491, "loss": 0.2295, "step": 34562 }, { "epoch": 0.06128482888422568, "grad_norm": 0.435546875, "learning_rate": 0.001500363571515554, "loss": 0.181, "step": 34564 }, { "epoch": 0.0612883750495355, "grad_norm": 0.3671875, "learning_rate": 0.0015003102653888011, "loss": 0.1916, "step": 34566 }, { "epoch": 0.061291921214845314, "grad_norm": 0.75, "learning_rate": 0.0015002569575113239, "loss": 0.1759, "step": 34568 }, { "epoch": 0.06129546738015513, "grad_norm": 0.41015625, "learning_rate": 0.001500203647883355, "loss": 0.2176, "step": 34570 }, { "epoch": 0.06129901354546494, "grad_norm": 0.39453125, "learning_rate": 0.0015001503365051278, "loss": 0.21, "step": 34572 }, { "epoch": 0.06130255971077476, "grad_norm": 0.79296875, "learning_rate": 0.0015000970233768757, "loss": 0.4213, "step": 34574 }, { "epoch": 0.06130610587608457, "grad_norm": 0.58984375, "learning_rate": 0.0015000437084988314, "loss": 0.2624, "step": 34576 }, { "epoch": 0.06130965204139439, "grad_norm": 0.71875, "learning_rate": 0.0014999903918712283, "loss": 0.2069, "step": 34578 }, { "epoch": 0.0613131982067042, "grad_norm": 0.93359375, "learning_rate": 0.0014999370734942999, "loss": 0.2187, "step": 34580 }, { "epoch": 0.061316744372014016, "grad_norm": 0.51171875, "learning_rate": 0.0014998837533682788, "loss": 0.1739, "step": 34582 }, { "epoch": 0.06132029053732383, "grad_norm": 0.30078125, "learning_rate": 0.0014998304314933987, "loss": 0.2151, "step": 34584 }, { "epoch": 0.061323836702633645, "grad_norm": 1.515625, "learning_rate": 0.001499777107869892, "loss": 0.1902, "step": 34586 }, { "epoch": 0.06132738286794347, "grad_norm": 0.6484375, "learning_rate": 0.0014997237824979927, "loss": 0.2281, "step": 34588 }, { "epoch": 0.06133092903325328, "grad_norm": 0.515625, "learning_rate": 0.001499670455377934, "loss": 0.1539, "step": 34590 }, { "epoch": 0.061334475198563096, "grad_norm": 0.65234375, "learning_rate": 0.0014996171265099483, "loss": 0.2229, "step": 34592 }, { "epoch": 0.06133802136387291, "grad_norm": 0.380859375, "learning_rate": 0.00149956379589427, "loss": 0.199, "step": 34594 }, { "epoch": 0.061341567529182725, "grad_norm": 0.53125, "learning_rate": 0.0014995104635311314, "loss": 0.2446, "step": 34596 }, { "epoch": 0.06134511369449254, "grad_norm": 0.453125, "learning_rate": 0.0014994571294207665, "loss": 0.1404, "step": 34598 }, { "epoch": 0.061348659859802354, "grad_norm": 0.33203125, "learning_rate": 0.001499403793563408, "loss": 0.2608, "step": 34600 }, { "epoch": 0.06135220602511217, "grad_norm": 0.828125, "learning_rate": 0.001499350455959289, "loss": 0.2032, "step": 34602 }, { "epoch": 0.06135575219042198, "grad_norm": 0.6484375, "learning_rate": 0.0014992971166086432, "loss": 0.1631, "step": 34604 }, { "epoch": 0.0613592983557318, "grad_norm": 0.390625, "learning_rate": 0.001499243775511704, "loss": 0.2224, "step": 34606 }, { "epoch": 0.06136284452104161, "grad_norm": 0.44921875, "learning_rate": 0.0014991904326687043, "loss": 0.1413, "step": 34608 }, { "epoch": 0.06136639068635143, "grad_norm": 0.326171875, "learning_rate": 0.0014991370880798776, "loss": 0.1558, "step": 34610 }, { "epoch": 0.06136993685166125, "grad_norm": 1.4140625, "learning_rate": 0.0014990837417454567, "loss": 0.2008, "step": 34612 }, { "epoch": 0.06137348301697106, "grad_norm": 0.365234375, "learning_rate": 0.001499030393665676, "loss": 0.2262, "step": 34614 }, { "epoch": 0.06137702918228088, "grad_norm": 0.8515625, "learning_rate": 0.0014989770438407677, "loss": 0.2118, "step": 34616 }, { "epoch": 0.06138057534759069, "grad_norm": 0.40625, "learning_rate": 0.0014989236922709657, "loss": 0.2164, "step": 34618 }, { "epoch": 0.06138412151290051, "grad_norm": 0.37109375, "learning_rate": 0.001498870338956503, "loss": 0.1972, "step": 34620 }, { "epoch": 0.06138766767821032, "grad_norm": 1.4921875, "learning_rate": 0.0014988169838976134, "loss": 0.2374, "step": 34622 }, { "epoch": 0.061391213843520136, "grad_norm": 0.65234375, "learning_rate": 0.0014987636270945297, "loss": 0.1705, "step": 34624 }, { "epoch": 0.06139476000882995, "grad_norm": 0.47265625, "learning_rate": 0.0014987102685474858, "loss": 0.1743, "step": 34626 }, { "epoch": 0.061398306174139765, "grad_norm": 0.59765625, "learning_rate": 0.0014986569082567145, "loss": 0.2685, "step": 34628 }, { "epoch": 0.06140185233944958, "grad_norm": 1.203125, "learning_rate": 0.0014986035462224497, "loss": 0.2621, "step": 34630 }, { "epoch": 0.061405398504759394, "grad_norm": 0.6484375, "learning_rate": 0.0014985501824449244, "loss": 0.1588, "step": 34632 }, { "epoch": 0.061408944670069215, "grad_norm": 0.68359375, "learning_rate": 0.0014984968169243723, "loss": 0.4406, "step": 34634 }, { "epoch": 0.06141249083537903, "grad_norm": 0.462890625, "learning_rate": 0.0014984434496610264, "loss": 0.1562, "step": 34636 }, { "epoch": 0.061416037000688845, "grad_norm": 0.4296875, "learning_rate": 0.0014983900806551204, "loss": 0.2989, "step": 34638 }, { "epoch": 0.06141958316599866, "grad_norm": 1.6015625, "learning_rate": 0.0014983367099068875, "loss": 0.2141, "step": 34640 }, { "epoch": 0.061423129331308474, "grad_norm": 0.39453125, "learning_rate": 0.0014982833374165613, "loss": 0.2135, "step": 34642 }, { "epoch": 0.06142667549661829, "grad_norm": 0.34375, "learning_rate": 0.0014982299631843747, "loss": 0.1691, "step": 34644 }, { "epoch": 0.0614302216619281, "grad_norm": 0.3125, "learning_rate": 0.0014981765872105622, "loss": 0.1785, "step": 34646 }, { "epoch": 0.06143376782723792, "grad_norm": 0.435546875, "learning_rate": 0.0014981232094953563, "loss": 0.1763, "step": 34648 }, { "epoch": 0.06143731399254773, "grad_norm": 0.4140625, "learning_rate": 0.0014980698300389908, "loss": 0.1682, "step": 34650 }, { "epoch": 0.061440860157857546, "grad_norm": 0.416015625, "learning_rate": 0.001498016448841699, "loss": 0.2167, "step": 34652 }, { "epoch": 0.06144440632316736, "grad_norm": 0.81640625, "learning_rate": 0.0014979630659037145, "loss": 0.181, "step": 34654 }, { "epoch": 0.06144795248847718, "grad_norm": 1.71875, "learning_rate": 0.0014979096812252708, "loss": 0.217, "step": 34656 }, { "epoch": 0.061451498653787, "grad_norm": 2.28125, "learning_rate": 0.0014978562948066013, "loss": 0.2298, "step": 34658 }, { "epoch": 0.06145504481909681, "grad_norm": 0.357421875, "learning_rate": 0.0014978029066479394, "loss": 0.1449, "step": 34660 }, { "epoch": 0.061458590984406626, "grad_norm": 0.4453125, "learning_rate": 0.0014977495167495184, "loss": 0.1586, "step": 34662 }, { "epoch": 0.06146213714971644, "grad_norm": 0.2490234375, "learning_rate": 0.0014976961251115725, "loss": 0.1415, "step": 34664 }, { "epoch": 0.061465683315026255, "grad_norm": 0.56640625, "learning_rate": 0.0014976427317343346, "loss": 0.2266, "step": 34666 }, { "epoch": 0.06146922948033607, "grad_norm": 0.73828125, "learning_rate": 0.0014975893366180383, "loss": 0.2184, "step": 34668 }, { "epoch": 0.061472775645645884, "grad_norm": 0.353515625, "learning_rate": 0.0014975359397629171, "loss": 0.1466, "step": 34670 }, { "epoch": 0.0614763218109557, "grad_norm": 1.328125, "learning_rate": 0.0014974825411692048, "loss": 0.2423, "step": 34672 }, { "epoch": 0.06147986797626551, "grad_norm": 0.365234375, "learning_rate": 0.0014974291408371347, "loss": 0.1627, "step": 34674 }, { "epoch": 0.06148341414157533, "grad_norm": 0.875, "learning_rate": 0.0014973757387669404, "loss": 0.2008, "step": 34676 }, { "epoch": 0.06148696030688514, "grad_norm": 0.328125, "learning_rate": 0.0014973223349588553, "loss": 0.1922, "step": 34678 }, { "epoch": 0.061490506472194964, "grad_norm": 0.87890625, "learning_rate": 0.0014972689294131135, "loss": 0.1672, "step": 34680 }, { "epoch": 0.06149405263750478, "grad_norm": 2.125, "learning_rate": 0.0014972155221299477, "loss": 0.2697, "step": 34682 }, { "epoch": 0.06149759880281459, "grad_norm": 0.447265625, "learning_rate": 0.001497162113109592, "loss": 0.1629, "step": 34684 }, { "epoch": 0.06150114496812441, "grad_norm": 0.251953125, "learning_rate": 0.00149710870235228, "loss": 0.1873, "step": 34686 }, { "epoch": 0.06150469113343422, "grad_norm": 0.33203125, "learning_rate": 0.0014970552898582452, "loss": 0.3756, "step": 34688 }, { "epoch": 0.06150823729874404, "grad_norm": 0.396484375, "learning_rate": 0.0014970018756277216, "loss": 0.208, "step": 34690 }, { "epoch": 0.06151178346405385, "grad_norm": 0.14453125, "learning_rate": 0.001496948459660942, "loss": 0.1601, "step": 34692 }, { "epoch": 0.061515329629363666, "grad_norm": 0.74609375, "learning_rate": 0.0014968950419581401, "loss": 0.2215, "step": 34694 }, { "epoch": 0.06151887579467348, "grad_norm": 0.9609375, "learning_rate": 0.0014968416225195505, "loss": 0.3586, "step": 34696 }, { "epoch": 0.061522421959983295, "grad_norm": 0.228515625, "learning_rate": 0.0014967882013454058, "loss": 0.1772, "step": 34698 }, { "epoch": 0.06152596812529311, "grad_norm": 0.48828125, "learning_rate": 0.00149673477843594, "loss": 0.1518, "step": 34700 }, { "epoch": 0.06152951429060293, "grad_norm": 0.447265625, "learning_rate": 0.0014966813537913865, "loss": 0.2412, "step": 34702 }, { "epoch": 0.061533060455912746, "grad_norm": 1.421875, "learning_rate": 0.0014966279274119796, "loss": 0.3623, "step": 34704 }, { "epoch": 0.06153660662122256, "grad_norm": 0.50390625, "learning_rate": 0.0014965744992979525, "loss": 0.2091, "step": 34706 }, { "epoch": 0.061540152786532375, "grad_norm": 0.4296875, "learning_rate": 0.0014965210694495388, "loss": 0.1975, "step": 34708 }, { "epoch": 0.06154369895184219, "grad_norm": 0.287109375, "learning_rate": 0.0014964676378669722, "loss": 0.1665, "step": 34710 }, { "epoch": 0.061547245117152004, "grad_norm": 0.94140625, "learning_rate": 0.0014964142045504865, "loss": 0.3113, "step": 34712 }, { "epoch": 0.06155079128246182, "grad_norm": 0.330078125, "learning_rate": 0.0014963607695003152, "loss": 0.3338, "step": 34714 }, { "epoch": 0.06155433744777163, "grad_norm": 0.380859375, "learning_rate": 0.0014963073327166922, "loss": 0.1652, "step": 34716 }, { "epoch": 0.06155788361308145, "grad_norm": 0.4921875, "learning_rate": 0.0014962538941998512, "loss": 0.1986, "step": 34718 }, { "epoch": 0.06156142977839126, "grad_norm": 0.310546875, "learning_rate": 0.0014962004539500258, "loss": 0.1905, "step": 34720 }, { "epoch": 0.06156497594370108, "grad_norm": 0.458984375, "learning_rate": 0.0014961470119674498, "loss": 0.2426, "step": 34722 }, { "epoch": 0.0615685221090109, "grad_norm": 0.3125, "learning_rate": 0.0014960935682523566, "loss": 0.1844, "step": 34724 }, { "epoch": 0.06157206827432071, "grad_norm": 0.431640625, "learning_rate": 0.0014960401228049807, "loss": 0.149, "step": 34726 }, { "epoch": 0.06157561443963053, "grad_norm": 5.21875, "learning_rate": 0.001495986675625555, "loss": 0.5408, "step": 34728 }, { "epoch": 0.06157916060494034, "grad_norm": 0.54296875, "learning_rate": 0.0014959332267143138, "loss": 0.2147, "step": 34730 }, { "epoch": 0.061582706770250156, "grad_norm": 0.86328125, "learning_rate": 0.0014958797760714906, "loss": 0.225, "step": 34732 }, { "epoch": 0.06158625293555997, "grad_norm": 0.37109375, "learning_rate": 0.0014958263236973192, "loss": 0.1772, "step": 34734 }, { "epoch": 0.061589799100869785, "grad_norm": 0.494140625, "learning_rate": 0.0014957728695920332, "loss": 0.1997, "step": 34736 }, { "epoch": 0.0615933452661796, "grad_norm": 0.232421875, "learning_rate": 0.0014957194137558668, "loss": 0.1952, "step": 34738 }, { "epoch": 0.061596891431489414, "grad_norm": 0.8203125, "learning_rate": 0.0014956659561890535, "loss": 0.1859, "step": 34740 }, { "epoch": 0.06160043759679923, "grad_norm": 0.51953125, "learning_rate": 0.001495612496891827, "loss": 0.3235, "step": 34742 }, { "epoch": 0.061603983762109044, "grad_norm": 0.298828125, "learning_rate": 0.0014955590358644213, "loss": 0.213, "step": 34744 }, { "epoch": 0.06160752992741886, "grad_norm": 1.015625, "learning_rate": 0.00149550557310707, "loss": 0.2689, "step": 34746 }, { "epoch": 0.06161107609272868, "grad_norm": 0.384765625, "learning_rate": 0.0014954521086200075, "loss": 0.1958, "step": 34748 }, { "epoch": 0.061614622258038494, "grad_norm": 0.3984375, "learning_rate": 0.001495398642403467, "loss": 0.1578, "step": 34750 }, { "epoch": 0.06161816842334831, "grad_norm": 0.72265625, "learning_rate": 0.0014953451744576824, "loss": 0.1753, "step": 34752 }, { "epoch": 0.06162171458865812, "grad_norm": 0.2333984375, "learning_rate": 0.0014952917047828878, "loss": 0.164, "step": 34754 }, { "epoch": 0.06162526075396794, "grad_norm": 0.498046875, "learning_rate": 0.0014952382333793164, "loss": 0.1998, "step": 34756 }, { "epoch": 0.06162880691927775, "grad_norm": 0.3671875, "learning_rate": 0.001495184760247203, "loss": 0.1611, "step": 34758 }, { "epoch": 0.06163235308458757, "grad_norm": 0.26953125, "learning_rate": 0.0014951312853867809, "loss": 0.1548, "step": 34760 }, { "epoch": 0.06163589924989738, "grad_norm": 0.45703125, "learning_rate": 0.0014950778087982845, "loss": 0.1226, "step": 34762 }, { "epoch": 0.061639445415207196, "grad_norm": 1.0234375, "learning_rate": 0.0014950243304819467, "loss": 0.1984, "step": 34764 }, { "epoch": 0.06164299158051701, "grad_norm": 0.921875, "learning_rate": 0.001494970850438002, "loss": 0.2171, "step": 34766 }, { "epoch": 0.061646537745826825, "grad_norm": 1.1171875, "learning_rate": 0.0014949173686666843, "loss": 0.1799, "step": 34768 }, { "epoch": 0.06165008391113665, "grad_norm": 0.287109375, "learning_rate": 0.0014948638851682277, "loss": 0.192, "step": 34770 }, { "epoch": 0.06165363007644646, "grad_norm": 0.30078125, "learning_rate": 0.0014948103999428656, "loss": 0.1935, "step": 34772 }, { "epoch": 0.061657176241756276, "grad_norm": 0.53515625, "learning_rate": 0.001494756912990832, "loss": 0.1811, "step": 34774 }, { "epoch": 0.06166072240706609, "grad_norm": 0.431640625, "learning_rate": 0.001494703424312361, "loss": 0.1886, "step": 34776 }, { "epoch": 0.061664268572375905, "grad_norm": 0.609375, "learning_rate": 0.0014946499339076867, "loss": 0.1569, "step": 34778 }, { "epoch": 0.06166781473768572, "grad_norm": 0.47265625, "learning_rate": 0.0014945964417770426, "loss": 0.1745, "step": 34780 }, { "epoch": 0.061671360902995534, "grad_norm": 0.66015625, "learning_rate": 0.0014945429479206631, "loss": 0.183, "step": 34782 }, { "epoch": 0.06167490706830535, "grad_norm": 0.33984375, "learning_rate": 0.001494489452338782, "loss": 0.147, "step": 34784 }, { "epoch": 0.06167845323361516, "grad_norm": 4.09375, "learning_rate": 0.0014944359550316329, "loss": 0.2284, "step": 34786 }, { "epoch": 0.06168199939892498, "grad_norm": 0.58984375, "learning_rate": 0.0014943824559994498, "loss": 0.2031, "step": 34788 }, { "epoch": 0.06168554556423479, "grad_norm": 0.33984375, "learning_rate": 0.0014943289552424674, "loss": 0.1411, "step": 34790 }, { "epoch": 0.061689091729544614, "grad_norm": 0.267578125, "learning_rate": 0.0014942754527609192, "loss": 0.1774, "step": 34792 }, { "epoch": 0.06169263789485443, "grad_norm": 0.515625, "learning_rate": 0.0014942219485550392, "loss": 0.1769, "step": 34794 }, { "epoch": 0.06169618406016424, "grad_norm": 0.224609375, "learning_rate": 0.001494168442625061, "loss": 0.139, "step": 34796 }, { "epoch": 0.06169973022547406, "grad_norm": 0.67578125, "learning_rate": 0.0014941149349712193, "loss": 0.1892, "step": 34798 }, { "epoch": 0.06170327639078387, "grad_norm": 0.357421875, "learning_rate": 0.0014940614255937476, "loss": 0.3919, "step": 34800 }, { "epoch": 0.061706822556093686, "grad_norm": 0.376953125, "learning_rate": 0.0014940079144928804, "loss": 0.2716, "step": 34802 }, { "epoch": 0.0617103687214035, "grad_norm": 0.671875, "learning_rate": 0.0014939544016688516, "loss": 0.471, "step": 34804 }, { "epoch": 0.061713914886713316, "grad_norm": 0.5546875, "learning_rate": 0.0014939008871218947, "loss": 0.1717, "step": 34806 }, { "epoch": 0.06171746105202313, "grad_norm": 1.2421875, "learning_rate": 0.0014938473708522443, "loss": 0.1561, "step": 34808 }, { "epoch": 0.061721007217332945, "grad_norm": 3.609375, "learning_rate": 0.001493793852860134, "loss": 0.3465, "step": 34810 }, { "epoch": 0.06172455338264276, "grad_norm": 0.9296875, "learning_rate": 0.0014937403331457984, "loss": 0.1834, "step": 34812 }, { "epoch": 0.061728099547952574, "grad_norm": 0.404296875, "learning_rate": 0.0014936868117094713, "loss": 0.2331, "step": 34814 }, { "epoch": 0.061731645713262395, "grad_norm": 0.52734375, "learning_rate": 0.0014936332885513864, "loss": 0.2051, "step": 34816 }, { "epoch": 0.06173519187857221, "grad_norm": 0.8359375, "learning_rate": 0.0014935797636717787, "loss": 0.1999, "step": 34818 }, { "epoch": 0.061738738043882024, "grad_norm": 3.859375, "learning_rate": 0.0014935262370708814, "loss": 0.1448, "step": 34820 }, { "epoch": 0.06174228420919184, "grad_norm": 0.333984375, "learning_rate": 0.0014934727087489291, "loss": 0.2183, "step": 34822 }, { "epoch": 0.06174583037450165, "grad_norm": 0.423828125, "learning_rate": 0.0014934191787061555, "loss": 0.1729, "step": 34824 }, { "epoch": 0.06174937653981147, "grad_norm": 1.2265625, "learning_rate": 0.001493365646942795, "loss": 0.4009, "step": 34826 }, { "epoch": 0.06175292270512128, "grad_norm": 0.263671875, "learning_rate": 0.0014933121134590817, "loss": 0.1703, "step": 34828 }, { "epoch": 0.0617564688704311, "grad_norm": 0.435546875, "learning_rate": 0.00149325857825525, "loss": 0.1811, "step": 34830 }, { "epoch": 0.06176001503574091, "grad_norm": 1.0078125, "learning_rate": 0.001493205041331533, "loss": 0.2168, "step": 34832 }, { "epoch": 0.061763561201050726, "grad_norm": 3.03125, "learning_rate": 0.0014931515026881661, "loss": 0.4823, "step": 34834 }, { "epoch": 0.06176710736636054, "grad_norm": 0.7109375, "learning_rate": 0.0014930979623253827, "loss": 0.1723, "step": 34836 }, { "epoch": 0.06177065353167036, "grad_norm": 0.734375, "learning_rate": 0.0014930444202434171, "loss": 0.1929, "step": 34838 }, { "epoch": 0.06177419969698018, "grad_norm": 0.546875, "learning_rate": 0.0014929908764425035, "loss": 0.3628, "step": 34840 }, { "epoch": 0.06177774586228999, "grad_norm": 0.84765625, "learning_rate": 0.0014929373309228767, "loss": 0.2019, "step": 34842 }, { "epoch": 0.061781292027599806, "grad_norm": 0.38671875, "learning_rate": 0.0014928837836847696, "loss": 0.2733, "step": 34844 }, { "epoch": 0.06178483819290962, "grad_norm": 0.52734375, "learning_rate": 0.0014928302347284172, "loss": 0.1806, "step": 34846 }, { "epoch": 0.061788384358219435, "grad_norm": 0.419921875, "learning_rate": 0.0014927766840540535, "loss": 0.1568, "step": 34848 }, { "epoch": 0.06179193052352925, "grad_norm": 0.6015625, "learning_rate": 0.0014927231316619128, "loss": 0.2004, "step": 34850 }, { "epoch": 0.061795476688839064, "grad_norm": 4.9375, "learning_rate": 0.0014926695775522292, "loss": 0.3137, "step": 34852 }, { "epoch": 0.06179902285414888, "grad_norm": 0.2080078125, "learning_rate": 0.0014926160217252366, "loss": 0.2168, "step": 34854 }, { "epoch": 0.06180256901945869, "grad_norm": 0.70703125, "learning_rate": 0.0014925624641811703, "loss": 0.2396, "step": 34856 }, { "epoch": 0.06180611518476851, "grad_norm": 0.5703125, "learning_rate": 0.0014925089049202633, "loss": 0.1988, "step": 34858 }, { "epoch": 0.06180966135007833, "grad_norm": 1.6171875, "learning_rate": 0.0014924553439427505, "loss": 0.2242, "step": 34860 }, { "epoch": 0.061813207515388144, "grad_norm": 0.49609375, "learning_rate": 0.0014924017812488661, "loss": 0.1721, "step": 34862 }, { "epoch": 0.06181675368069796, "grad_norm": 0.41015625, "learning_rate": 0.0014923482168388443, "loss": 0.1703, "step": 34864 }, { "epoch": 0.06182029984600777, "grad_norm": 1.15625, "learning_rate": 0.001492294650712919, "loss": 0.1427, "step": 34866 }, { "epoch": 0.06182384601131759, "grad_norm": 0.376953125, "learning_rate": 0.001492241082871325, "loss": 0.3468, "step": 34868 }, { "epoch": 0.0618273921766274, "grad_norm": 0.91015625, "learning_rate": 0.001492187513314296, "loss": 0.2412, "step": 34870 }, { "epoch": 0.06183093834193722, "grad_norm": 0.361328125, "learning_rate": 0.0014921339420420673, "loss": 0.2019, "step": 34872 }, { "epoch": 0.06183448450724703, "grad_norm": 0.34375, "learning_rate": 0.0014920803690548717, "loss": 0.4797, "step": 34874 }, { "epoch": 0.061838030672556846, "grad_norm": 0.76953125, "learning_rate": 0.0014920267943529448, "loss": 0.2345, "step": 34876 }, { "epoch": 0.06184157683786666, "grad_norm": 0.80859375, "learning_rate": 0.0014919732179365203, "loss": 0.1779, "step": 34878 }, { "epoch": 0.061845123003176475, "grad_norm": 0.7109375, "learning_rate": 0.001491919639805833, "loss": 0.1951, "step": 34880 }, { "epoch": 0.06184866916848629, "grad_norm": 0.5078125, "learning_rate": 0.0014918660599611163, "loss": 0.2229, "step": 34882 }, { "epoch": 0.06185221533379611, "grad_norm": 1.578125, "learning_rate": 0.0014918124784026054, "loss": 0.1722, "step": 34884 }, { "epoch": 0.061855761499105925, "grad_norm": 0.390625, "learning_rate": 0.001491758895130534, "loss": 0.1015, "step": 34886 }, { "epoch": 0.06185930766441574, "grad_norm": 0.49609375, "learning_rate": 0.0014917053101451373, "loss": 0.3386, "step": 34888 }, { "epoch": 0.061862853829725555, "grad_norm": 0.46484375, "learning_rate": 0.0014916517234466484, "loss": 0.1611, "step": 34890 }, { "epoch": 0.06186639999503537, "grad_norm": 0.400390625, "learning_rate": 0.001491598135035303, "loss": 0.1656, "step": 34892 }, { "epoch": 0.061869946160345184, "grad_norm": 0.921875, "learning_rate": 0.0014915445449113346, "loss": 0.15, "step": 34894 }, { "epoch": 0.061873492325655, "grad_norm": 1.40625, "learning_rate": 0.0014914909530749777, "loss": 0.2563, "step": 34896 }, { "epoch": 0.06187703849096481, "grad_norm": 0.51171875, "learning_rate": 0.0014914373595264667, "loss": 0.3065, "step": 34898 }, { "epoch": 0.06188058465627463, "grad_norm": 0.80078125, "learning_rate": 0.001491383764266036, "loss": 0.1922, "step": 34900 }, { "epoch": 0.06188413082158444, "grad_norm": 0.27734375, "learning_rate": 0.0014913301672939206, "loss": 0.2454, "step": 34902 }, { "epoch": 0.061887676986894256, "grad_norm": 0.43359375, "learning_rate": 0.0014912765686103538, "loss": 0.2055, "step": 34904 }, { "epoch": 0.06189122315220408, "grad_norm": 0.412109375, "learning_rate": 0.0014912229682155706, "loss": 0.2258, "step": 34906 }, { "epoch": 0.06189476931751389, "grad_norm": 1.2890625, "learning_rate": 0.0014911693661098058, "loss": 0.368, "step": 34908 }, { "epoch": 0.06189831548282371, "grad_norm": 0.15234375, "learning_rate": 0.0014911157622932928, "loss": 0.1522, "step": 34910 }, { "epoch": 0.06190186164813352, "grad_norm": 0.294921875, "learning_rate": 0.0014910621567662673, "loss": 0.1261, "step": 34912 }, { "epoch": 0.061905407813443336, "grad_norm": 0.66796875, "learning_rate": 0.0014910085495289625, "loss": 0.272, "step": 34914 }, { "epoch": 0.06190895397875315, "grad_norm": 0.28125, "learning_rate": 0.0014909549405816137, "loss": 0.1547, "step": 34916 }, { "epoch": 0.061912500144062965, "grad_norm": 0.734375, "learning_rate": 0.0014909013299244556, "loss": 0.17, "step": 34918 }, { "epoch": 0.06191604630937278, "grad_norm": 0.5234375, "learning_rate": 0.0014908477175577216, "loss": 0.1807, "step": 34920 }, { "epoch": 0.061919592474682594, "grad_norm": 0.3515625, "learning_rate": 0.0014907941034816466, "loss": 0.1939, "step": 34922 }, { "epoch": 0.06192313863999241, "grad_norm": 0.57421875, "learning_rate": 0.0014907404876964652, "loss": 0.2025, "step": 34924 }, { "epoch": 0.06192668480530222, "grad_norm": 0.3828125, "learning_rate": 0.001490686870202412, "loss": 0.1695, "step": 34926 }, { "epoch": 0.061930230970612045, "grad_norm": 0.90625, "learning_rate": 0.0014906332509997213, "loss": 0.3066, "step": 34928 }, { "epoch": 0.06193377713592186, "grad_norm": 0.48828125, "learning_rate": 0.0014905796300886279, "loss": 0.2318, "step": 34930 }, { "epoch": 0.061937323301231674, "grad_norm": 0.4609375, "learning_rate": 0.0014905260074693659, "loss": 0.2111, "step": 34932 }, { "epoch": 0.06194086946654149, "grad_norm": 0.44921875, "learning_rate": 0.00149047238314217, "loss": 0.5131, "step": 34934 }, { "epoch": 0.0619444156318513, "grad_norm": 0.2216796875, "learning_rate": 0.001490418757107275, "loss": 0.1889, "step": 34936 }, { "epoch": 0.06194796179716112, "grad_norm": 0.39453125, "learning_rate": 0.0014903651293649148, "loss": 0.1632, "step": 34938 }, { "epoch": 0.06195150796247093, "grad_norm": 0.4375, "learning_rate": 0.0014903114999153242, "loss": 0.2092, "step": 34940 }, { "epoch": 0.06195505412778075, "grad_norm": 0.220703125, "learning_rate": 0.001490257868758738, "loss": 0.1987, "step": 34942 }, { "epoch": 0.06195860029309056, "grad_norm": 4.53125, "learning_rate": 0.0014902042358953904, "loss": 0.256, "step": 34944 }, { "epoch": 0.061962146458400376, "grad_norm": 0.875, "learning_rate": 0.0014901506013255165, "loss": 0.2012, "step": 34946 }, { "epoch": 0.06196569262371019, "grad_norm": 0.6328125, "learning_rate": 0.0014900969650493501, "loss": 0.1424, "step": 34948 }, { "epoch": 0.061969238789020005, "grad_norm": 0.5234375, "learning_rate": 0.0014900433270671263, "loss": 0.1572, "step": 34950 }, { "epoch": 0.061972784954329826, "grad_norm": 0.63671875, "learning_rate": 0.0014899896873790798, "loss": 0.3867, "step": 34952 }, { "epoch": 0.06197633111963964, "grad_norm": 0.94140625, "learning_rate": 0.001489936045985445, "loss": 0.181, "step": 34954 }, { "epoch": 0.061979877284949456, "grad_norm": 0.349609375, "learning_rate": 0.0014898824028864558, "loss": 0.2172, "step": 34956 }, { "epoch": 0.06198342345025927, "grad_norm": 0.77734375, "learning_rate": 0.0014898287580823479, "loss": 0.2898, "step": 34958 }, { "epoch": 0.061986969615569085, "grad_norm": 0.318359375, "learning_rate": 0.0014897751115733554, "loss": 0.1683, "step": 34960 }, { "epoch": 0.0619905157808789, "grad_norm": 0.37890625, "learning_rate": 0.0014897214633597126, "loss": 0.1707, "step": 34962 }, { "epoch": 0.061994061946188714, "grad_norm": 1.0234375, "learning_rate": 0.0014896678134416548, "loss": 0.2218, "step": 34964 }, { "epoch": 0.06199760811149853, "grad_norm": 0.345703125, "learning_rate": 0.0014896141618194166, "loss": 0.1509, "step": 34966 }, { "epoch": 0.06200115427680834, "grad_norm": 0.6953125, "learning_rate": 0.001489560508493232, "loss": 0.2277, "step": 34968 }, { "epoch": 0.06200470044211816, "grad_norm": 0.16015625, "learning_rate": 0.0014895068534633363, "loss": 0.1865, "step": 34970 }, { "epoch": 0.06200824660742797, "grad_norm": 0.98828125, "learning_rate": 0.0014894531967299639, "loss": 0.2356, "step": 34972 }, { "epoch": 0.062011792772737793, "grad_norm": 1.59375, "learning_rate": 0.0014893995382933489, "loss": 0.497, "step": 34974 }, { "epoch": 0.06201533893804761, "grad_norm": 0.341796875, "learning_rate": 0.0014893458781537268, "loss": 0.1718, "step": 34976 }, { "epoch": 0.06201888510335742, "grad_norm": 0.189453125, "learning_rate": 0.0014892922163113323, "loss": 0.1593, "step": 34978 }, { "epoch": 0.06202243126866724, "grad_norm": 0.84375, "learning_rate": 0.0014892385527663994, "loss": 0.1671, "step": 34980 }, { "epoch": 0.06202597743397705, "grad_norm": 0.21484375, "learning_rate": 0.0014891848875191634, "loss": 0.1733, "step": 34982 }, { "epoch": 0.062029523599286866, "grad_norm": 0.265625, "learning_rate": 0.0014891312205698587, "loss": 0.1736, "step": 34984 }, { "epoch": 0.06203306976459668, "grad_norm": 0.45703125, "learning_rate": 0.0014890775519187198, "loss": 0.1702, "step": 34986 }, { "epoch": 0.062036615929906495, "grad_norm": 0.337890625, "learning_rate": 0.001489023881565982, "loss": 0.1582, "step": 34988 }, { "epoch": 0.06204016209521631, "grad_norm": 0.1572265625, "learning_rate": 0.0014889702095118797, "loss": 0.1041, "step": 34990 }, { "epoch": 0.062043708260526124, "grad_norm": 0.890625, "learning_rate": 0.0014889165357566477, "loss": 0.2486, "step": 34992 }, { "epoch": 0.06204725442583594, "grad_norm": 1.0859375, "learning_rate": 0.0014888628603005206, "loss": 0.2016, "step": 34994 }, { "epoch": 0.06205080059114576, "grad_norm": 0.322265625, "learning_rate": 0.0014888091831437328, "loss": 0.1644, "step": 34996 }, { "epoch": 0.062054346756455575, "grad_norm": 0.8671875, "learning_rate": 0.0014887555042865201, "loss": 0.1793, "step": 34998 }, { "epoch": 0.06205789292176539, "grad_norm": 1.03125, "learning_rate": 0.0014887018237291164, "loss": 0.206, "step": 35000 }, { "epoch": 0.062061439087075204, "grad_norm": 0.328125, "learning_rate": 0.001488648141471757, "loss": 0.2532, "step": 35002 }, { "epoch": 0.06206498525238502, "grad_norm": 0.349609375, "learning_rate": 0.0014885944575146757, "loss": 0.1634, "step": 35004 }, { "epoch": 0.06206853141769483, "grad_norm": 0.302734375, "learning_rate": 0.0014885407718581082, "loss": 0.1815, "step": 35006 }, { "epoch": 0.06207207758300465, "grad_norm": 0.6484375, "learning_rate": 0.0014884870845022896, "loss": 0.2531, "step": 35008 }, { "epoch": 0.06207562374831446, "grad_norm": 0.37890625, "learning_rate": 0.0014884333954474538, "loss": 0.1742, "step": 35010 }, { "epoch": 0.06207916991362428, "grad_norm": 1.8125, "learning_rate": 0.0014883797046938358, "loss": 0.2339, "step": 35012 }, { "epoch": 0.06208271607893409, "grad_norm": 0.6171875, "learning_rate": 0.0014883260122416707, "loss": 0.2188, "step": 35014 }, { "epoch": 0.062086262244243906, "grad_norm": 0.326171875, "learning_rate": 0.0014882723180911933, "loss": 0.1286, "step": 35016 }, { "epoch": 0.06208980840955372, "grad_norm": 0.54296875, "learning_rate": 0.001488218622242638, "loss": 0.1976, "step": 35018 }, { "epoch": 0.06209335457486354, "grad_norm": 0.31640625, "learning_rate": 0.0014881649246962403, "loss": 0.2099, "step": 35020 }, { "epoch": 0.06209690074017336, "grad_norm": 0.298828125, "learning_rate": 0.0014881112254522344, "loss": 0.2442, "step": 35022 }, { "epoch": 0.06210044690548317, "grad_norm": 0.83984375, "learning_rate": 0.0014880575245108559, "loss": 0.2699, "step": 35024 }, { "epoch": 0.062103993070792986, "grad_norm": 1.265625, "learning_rate": 0.0014880038218723385, "loss": 0.2267, "step": 35026 }, { "epoch": 0.0621075392361028, "grad_norm": 0.55859375, "learning_rate": 0.0014879501175369184, "loss": 0.2197, "step": 35028 }, { "epoch": 0.062111085401412615, "grad_norm": 0.5546875, "learning_rate": 0.0014878964115048296, "loss": 0.2088, "step": 35030 }, { "epoch": 0.06211463156672243, "grad_norm": 0.2138671875, "learning_rate": 0.0014878427037763073, "loss": 0.1558, "step": 35032 }, { "epoch": 0.062118177732032244, "grad_norm": 0.361328125, "learning_rate": 0.0014877889943515862, "loss": 0.1718, "step": 35034 }, { "epoch": 0.06212172389734206, "grad_norm": 0.70703125, "learning_rate": 0.0014877352832309012, "loss": 0.2185, "step": 35036 }, { "epoch": 0.06212527006265187, "grad_norm": 0.76171875, "learning_rate": 0.0014876815704144872, "loss": 0.164, "step": 35038 }, { "epoch": 0.06212881622796169, "grad_norm": 0.47265625, "learning_rate": 0.0014876278559025795, "loss": 0.2216, "step": 35040 }, { "epoch": 0.06213236239327151, "grad_norm": 0.361328125, "learning_rate": 0.0014875741396954125, "loss": 0.1654, "step": 35042 }, { "epoch": 0.062135908558581324, "grad_norm": 0.306640625, "learning_rate": 0.0014875204217932214, "loss": 0.17, "step": 35044 }, { "epoch": 0.06213945472389114, "grad_norm": 0.6328125, "learning_rate": 0.0014874667021962413, "loss": 0.1626, "step": 35046 }, { "epoch": 0.06214300088920095, "grad_norm": 0.357421875, "learning_rate": 0.001487412980904707, "loss": 0.1793, "step": 35048 }, { "epoch": 0.06214654705451077, "grad_norm": 0.5390625, "learning_rate": 0.001487359257918853, "loss": 0.1741, "step": 35050 }, { "epoch": 0.06215009321982058, "grad_norm": 0.3203125, "learning_rate": 0.0014873055332389148, "loss": 0.2394, "step": 35052 }, { "epoch": 0.062153639385130396, "grad_norm": 0.2060546875, "learning_rate": 0.0014872518068651268, "loss": 0.212, "step": 35054 }, { "epoch": 0.06215718555044021, "grad_norm": 0.1923828125, "learning_rate": 0.0014871980787977248, "loss": 0.2122, "step": 35056 }, { "epoch": 0.062160731715750026, "grad_norm": 1.28125, "learning_rate": 0.001487144349036943, "loss": 0.4389, "step": 35058 }, { "epoch": 0.06216427788105984, "grad_norm": 0.33203125, "learning_rate": 0.0014870906175830168, "loss": 0.2401, "step": 35060 }, { "epoch": 0.062167824046369655, "grad_norm": 0.484375, "learning_rate": 0.001487036884436181, "loss": 0.3665, "step": 35062 }, { "epoch": 0.062171370211679476, "grad_norm": 0.36328125, "learning_rate": 0.001486983149596671, "loss": 0.3, "step": 35064 }, { "epoch": 0.06217491637698929, "grad_norm": 0.2470703125, "learning_rate": 0.0014869294130647212, "loss": 0.1655, "step": 35066 }, { "epoch": 0.062178462542299105, "grad_norm": 1.0390625, "learning_rate": 0.0014868756748405672, "loss": 0.2129, "step": 35068 }, { "epoch": 0.06218200870760892, "grad_norm": 0.275390625, "learning_rate": 0.0014868219349244434, "loss": 0.2032, "step": 35070 }, { "epoch": 0.062185554872918734, "grad_norm": 0.318359375, "learning_rate": 0.0014867681933165853, "loss": 0.2293, "step": 35072 }, { "epoch": 0.06218910103822855, "grad_norm": 0.384765625, "learning_rate": 0.0014867144500172278, "loss": 0.1492, "step": 35074 }, { "epoch": 0.06219264720353836, "grad_norm": 0.271484375, "learning_rate": 0.0014866607050266058, "loss": 0.1853, "step": 35076 }, { "epoch": 0.06219619336884818, "grad_norm": 2.203125, "learning_rate": 0.0014866069583449547, "loss": 0.171, "step": 35078 }, { "epoch": 0.06219973953415799, "grad_norm": 0.89453125, "learning_rate": 0.0014865532099725093, "loss": 0.1527, "step": 35080 }, { "epoch": 0.06220328569946781, "grad_norm": 0.60546875, "learning_rate": 0.0014864994599095042, "loss": 0.3475, "step": 35082 }, { "epoch": 0.06220683186477762, "grad_norm": 0.5, "learning_rate": 0.0014864457081561755, "loss": 0.2288, "step": 35084 }, { "epoch": 0.062210378030087436, "grad_norm": 0.33203125, "learning_rate": 0.0014863919547127577, "loss": 0.1631, "step": 35086 }, { "epoch": 0.06221392419539726, "grad_norm": 0.408203125, "learning_rate": 0.001486338199579486, "loss": 0.2385, "step": 35088 }, { "epoch": 0.06221747036070707, "grad_norm": 0.80859375, "learning_rate": 0.001486284442756595, "loss": 0.2086, "step": 35090 }, { "epoch": 0.06222101652601689, "grad_norm": 0.546875, "learning_rate": 0.0014862306842443206, "loss": 0.1674, "step": 35092 }, { "epoch": 0.0622245626913267, "grad_norm": 0.453125, "learning_rate": 0.0014861769240428972, "loss": 0.2327, "step": 35094 }, { "epoch": 0.062228108856636516, "grad_norm": 0.28515625, "learning_rate": 0.0014861231621525607, "loss": 0.1948, "step": 35096 }, { "epoch": 0.06223165502194633, "grad_norm": 0.90234375, "learning_rate": 0.0014860693985735455, "loss": 0.1888, "step": 35098 }, { "epoch": 0.062235201187256145, "grad_norm": 0.83203125, "learning_rate": 0.0014860156333060869, "loss": 0.158, "step": 35100 }, { "epoch": 0.06223874735256596, "grad_norm": 0.28125, "learning_rate": 0.0014859618663504205, "loss": 0.2115, "step": 35102 }, { "epoch": 0.062242293517875774, "grad_norm": 1.0390625, "learning_rate": 0.001485908097706781, "loss": 0.2001, "step": 35104 }, { "epoch": 0.06224583968318559, "grad_norm": 0.427734375, "learning_rate": 0.0014858543273754035, "loss": 0.1844, "step": 35106 }, { "epoch": 0.0622493858484954, "grad_norm": 0.34375, "learning_rate": 0.0014858005553565231, "loss": 0.2336, "step": 35108 }, { "epoch": 0.062252932013805225, "grad_norm": 0.29296875, "learning_rate": 0.0014857467816503755, "loss": 0.2058, "step": 35110 }, { "epoch": 0.06225647817911504, "grad_norm": 0.55078125, "learning_rate": 0.0014856930062571957, "loss": 0.1788, "step": 35112 }, { "epoch": 0.062260024344424854, "grad_norm": 0.23828125, "learning_rate": 0.0014856392291772182, "loss": 0.1794, "step": 35114 }, { "epoch": 0.06226357050973467, "grad_norm": 0.322265625, "learning_rate": 0.001485585450410679, "loss": 0.3101, "step": 35116 }, { "epoch": 0.06226711667504448, "grad_norm": 0.76171875, "learning_rate": 0.001485531669957813, "loss": 0.2033, "step": 35118 }, { "epoch": 0.0622706628403543, "grad_norm": 0.330078125, "learning_rate": 0.0014854778878188556, "loss": 0.2024, "step": 35120 }, { "epoch": 0.06227420900566411, "grad_norm": 0.5703125, "learning_rate": 0.0014854241039940416, "loss": 0.1736, "step": 35122 }, { "epoch": 0.06227775517097393, "grad_norm": 0.3359375, "learning_rate": 0.0014853703184836064, "loss": 0.1448, "step": 35124 }, { "epoch": 0.06228130133628374, "grad_norm": 1.296875, "learning_rate": 0.0014853165312877857, "loss": 0.2654, "step": 35126 }, { "epoch": 0.062284847501593556, "grad_norm": 0.388671875, "learning_rate": 0.0014852627424068138, "loss": 0.2421, "step": 35128 }, { "epoch": 0.06228839366690337, "grad_norm": 0.380859375, "learning_rate": 0.0014852089518409266, "loss": 0.1682, "step": 35130 }, { "epoch": 0.06229193983221319, "grad_norm": 0.6328125, "learning_rate": 0.001485155159590359, "loss": 0.2994, "step": 35132 }, { "epoch": 0.062295485997523006, "grad_norm": 0.31640625, "learning_rate": 0.001485101365655347, "loss": 0.2161, "step": 35134 }, { "epoch": 0.06229903216283282, "grad_norm": 0.59765625, "learning_rate": 0.0014850475700361248, "loss": 0.3094, "step": 35136 }, { "epoch": 0.062302578328142635, "grad_norm": 1.140625, "learning_rate": 0.0014849937727329286, "loss": 0.1759, "step": 35138 }, { "epoch": 0.06230612449345245, "grad_norm": 0.48046875, "learning_rate": 0.0014849399737459931, "loss": 0.1888, "step": 35140 }, { "epoch": 0.062309670658762265, "grad_norm": 0.2734375, "learning_rate": 0.001484886173075554, "loss": 0.2161, "step": 35142 }, { "epoch": 0.06231321682407208, "grad_norm": 0.9921875, "learning_rate": 0.001484832370721846, "loss": 0.2459, "step": 35144 }, { "epoch": 0.062316762989381894, "grad_norm": 0.29296875, "learning_rate": 0.0014847785666851048, "loss": 0.2351, "step": 35146 }, { "epoch": 0.06232030915469171, "grad_norm": 0.55859375, "learning_rate": 0.0014847247609655657, "loss": 0.1642, "step": 35148 }, { "epoch": 0.06232385532000152, "grad_norm": 0.232421875, "learning_rate": 0.0014846709535634639, "loss": 0.1752, "step": 35150 }, { "epoch": 0.06232740148531134, "grad_norm": 0.5703125, "learning_rate": 0.0014846171444790348, "loss": 0.361, "step": 35152 }, { "epoch": 0.06233094765062115, "grad_norm": 0.302734375, "learning_rate": 0.0014845633337125137, "loss": 0.2161, "step": 35154 }, { "epoch": 0.06233449381593097, "grad_norm": 0.30859375, "learning_rate": 0.0014845095212641362, "loss": 0.1494, "step": 35156 }, { "epoch": 0.06233803998124079, "grad_norm": 0.3125, "learning_rate": 0.0014844557071341375, "loss": 0.1965, "step": 35158 }, { "epoch": 0.0623415861465506, "grad_norm": 1.4140625, "learning_rate": 0.0014844018913227524, "loss": 0.2168, "step": 35160 }, { "epoch": 0.06234513231186042, "grad_norm": 0.283203125, "learning_rate": 0.001484348073830217, "loss": 0.1812, "step": 35162 }, { "epoch": 0.06234867847717023, "grad_norm": 0.400390625, "learning_rate": 0.001484294254656766, "loss": 0.1374, "step": 35164 }, { "epoch": 0.062352224642480046, "grad_norm": 0.75390625, "learning_rate": 0.0014842404338026357, "loss": 0.1904, "step": 35166 }, { "epoch": 0.06235577080778986, "grad_norm": 2.015625, "learning_rate": 0.0014841866112680607, "loss": 0.2592, "step": 35168 }, { "epoch": 0.062359316973099675, "grad_norm": 0.89453125, "learning_rate": 0.0014841327870532767, "loss": 0.1901, "step": 35170 }, { "epoch": 0.06236286313840949, "grad_norm": 0.451171875, "learning_rate": 0.0014840789611585189, "loss": 0.2195, "step": 35172 }, { "epoch": 0.062366409303719304, "grad_norm": 0.69140625, "learning_rate": 0.0014840251335840225, "loss": 0.2031, "step": 35174 }, { "epoch": 0.06236995546902912, "grad_norm": 0.255859375, "learning_rate": 0.0014839713043300236, "loss": 0.1687, "step": 35176 }, { "epoch": 0.06237350163433894, "grad_norm": 0.54296875, "learning_rate": 0.0014839174733967573, "loss": 0.2125, "step": 35178 }, { "epoch": 0.062377047799648755, "grad_norm": 0.984375, "learning_rate": 0.0014838636407844591, "loss": 0.2542, "step": 35180 }, { "epoch": 0.06238059396495857, "grad_norm": 0.76171875, "learning_rate": 0.001483809806493364, "loss": 0.2222, "step": 35182 }, { "epoch": 0.062384140130268384, "grad_norm": 0.42578125, "learning_rate": 0.0014837559705237079, "loss": 0.223, "step": 35184 }, { "epoch": 0.0623876862955782, "grad_norm": 0.21484375, "learning_rate": 0.001483702132875726, "loss": 0.1674, "step": 35186 }, { "epoch": 0.06239123246088801, "grad_norm": 0.98046875, "learning_rate": 0.001483648293549654, "loss": 0.2395, "step": 35188 }, { "epoch": 0.06239477862619783, "grad_norm": 0.380859375, "learning_rate": 0.0014835944525457272, "loss": 0.147, "step": 35190 }, { "epoch": 0.06239832479150764, "grad_norm": 1.8984375, "learning_rate": 0.001483540609864181, "loss": 0.2589, "step": 35192 }, { "epoch": 0.06240187095681746, "grad_norm": 0.357421875, "learning_rate": 0.0014834867655052508, "loss": 0.2185, "step": 35194 }, { "epoch": 0.06240541712212727, "grad_norm": 0.353515625, "learning_rate": 0.0014834329194691725, "loss": 0.1883, "step": 35196 }, { "epoch": 0.062408963287437086, "grad_norm": 0.87109375, "learning_rate": 0.0014833790717561815, "loss": 0.1837, "step": 35198 }, { "epoch": 0.06241250945274691, "grad_norm": 0.76953125, "learning_rate": 0.0014833252223665128, "loss": 0.2438, "step": 35200 }, { "epoch": 0.06241605561805672, "grad_norm": 0.1806640625, "learning_rate": 0.0014832713713004023, "loss": 0.2045, "step": 35202 }, { "epoch": 0.062419601783366536, "grad_norm": 0.35546875, "learning_rate": 0.0014832175185580856, "loss": 0.1637, "step": 35204 }, { "epoch": 0.06242314794867635, "grad_norm": 1.2109375, "learning_rate": 0.0014831636641397979, "loss": 0.2046, "step": 35206 }, { "epoch": 0.062426694113986166, "grad_norm": 0.466796875, "learning_rate": 0.001483109808045775, "loss": 0.1538, "step": 35208 }, { "epoch": 0.06243024027929598, "grad_norm": 0.375, "learning_rate": 0.0014830559502762523, "loss": 0.2567, "step": 35210 }, { "epoch": 0.062433786444605795, "grad_norm": 0.396484375, "learning_rate": 0.0014830020908314652, "loss": 0.1658, "step": 35212 }, { "epoch": 0.06243733260991561, "grad_norm": 0.20703125, "learning_rate": 0.0014829482297116497, "loss": 0.1711, "step": 35214 }, { "epoch": 0.062440878775225424, "grad_norm": 0.9609375, "learning_rate": 0.001482894366917041, "loss": 0.2109, "step": 35216 }, { "epoch": 0.06244442494053524, "grad_norm": 0.275390625, "learning_rate": 0.0014828405024478748, "loss": 0.2354, "step": 35218 }, { "epoch": 0.06244797110584505, "grad_norm": 1.4375, "learning_rate": 0.0014827866363043864, "loss": 0.252, "step": 35220 }, { "epoch": 0.06245151727115487, "grad_norm": 1.046875, "learning_rate": 0.0014827327684868118, "loss": 0.1767, "step": 35222 }, { "epoch": 0.06245506343646469, "grad_norm": 0.2734375, "learning_rate": 0.0014826788989953862, "loss": 0.1924, "step": 35224 }, { "epoch": 0.062458609601774503, "grad_norm": 0.5390625, "learning_rate": 0.0014826250278303457, "loss": 0.1857, "step": 35226 }, { "epoch": 0.06246215576708432, "grad_norm": 0.224609375, "learning_rate": 0.0014825711549919253, "loss": 0.1837, "step": 35228 }, { "epoch": 0.06246570193239413, "grad_norm": 1.3359375, "learning_rate": 0.001482517280480361, "loss": 0.2279, "step": 35230 }, { "epoch": 0.06246924809770395, "grad_norm": 3.390625, "learning_rate": 0.0014824634042958885, "loss": 0.3722, "step": 35232 }, { "epoch": 0.06247279426301376, "grad_norm": 0.294921875, "learning_rate": 0.0014824095264387431, "loss": 0.1633, "step": 35234 }, { "epoch": 0.062476340428323576, "grad_norm": 0.1806640625, "learning_rate": 0.0014823556469091607, "loss": 0.3073, "step": 35236 }, { "epoch": 0.06247988659363339, "grad_norm": 0.326171875, "learning_rate": 0.0014823017657073765, "loss": 0.1881, "step": 35238 }, { "epoch": 0.062483432758943205, "grad_norm": 0.546875, "learning_rate": 0.0014822478828336264, "loss": 0.1618, "step": 35240 }, { "epoch": 0.06248697892425302, "grad_norm": 0.2412109375, "learning_rate": 0.0014821939982881463, "loss": 0.1704, "step": 35242 }, { "epoch": 0.062490525089562834, "grad_norm": 0.439453125, "learning_rate": 0.0014821401120711716, "loss": 0.1884, "step": 35244 }, { "epoch": 0.062494071254872656, "grad_norm": 0.8125, "learning_rate": 0.0014820862241829381, "loss": 0.1874, "step": 35246 }, { "epoch": 0.06249761742018247, "grad_norm": 0.2021484375, "learning_rate": 0.0014820323346236813, "loss": 0.2118, "step": 35248 }, { "epoch": 0.06250116358549228, "grad_norm": 0.251953125, "learning_rate": 0.0014819784433936368, "loss": 0.16, "step": 35250 }, { "epoch": 0.06250470975080209, "grad_norm": 0.5390625, "learning_rate": 0.0014819245504930408, "loss": 0.1893, "step": 35252 }, { "epoch": 0.06250825591611191, "grad_norm": 0.23046875, "learning_rate": 0.0014818706559221284, "loss": 0.1972, "step": 35254 }, { "epoch": 0.06251180208142172, "grad_norm": 0.470703125, "learning_rate": 0.001481816759681136, "loss": 0.187, "step": 35256 }, { "epoch": 0.06251534824673155, "grad_norm": 1.0703125, "learning_rate": 0.0014817628617702985, "loss": 0.1698, "step": 35258 }, { "epoch": 0.06251889441204136, "grad_norm": 0.546875, "learning_rate": 0.0014817089621898519, "loss": 0.193, "step": 35260 }, { "epoch": 0.06252244057735118, "grad_norm": 1.2109375, "learning_rate": 0.0014816550609400322, "loss": 0.2731, "step": 35262 }, { "epoch": 0.062525986742661, "grad_norm": 0.8125, "learning_rate": 0.001481601158021075, "loss": 0.1805, "step": 35264 }, { "epoch": 0.06252953290797081, "grad_norm": 0.333984375, "learning_rate": 0.001481547253433216, "loss": 0.3314, "step": 35266 }, { "epoch": 0.06253307907328062, "grad_norm": 1.375, "learning_rate": 0.0014814933471766908, "loss": 0.4282, "step": 35268 }, { "epoch": 0.06253662523859044, "grad_norm": 0.61328125, "learning_rate": 0.0014814394392517354, "loss": 0.2284, "step": 35270 }, { "epoch": 0.06254017140390025, "grad_norm": 0.6875, "learning_rate": 0.0014813855296585856, "loss": 0.1755, "step": 35272 }, { "epoch": 0.06254371756921007, "grad_norm": 0.2490234375, "learning_rate": 0.001481331618397477, "loss": 0.1523, "step": 35274 }, { "epoch": 0.06254726373451988, "grad_norm": 0.6875, "learning_rate": 0.0014812777054686451, "loss": 0.2012, "step": 35276 }, { "epoch": 0.0625508098998297, "grad_norm": 0.478515625, "learning_rate": 0.0014812237908723262, "loss": 0.1701, "step": 35278 }, { "epoch": 0.06255435606513951, "grad_norm": 0.24609375, "learning_rate": 0.001481169874608756, "loss": 0.1398, "step": 35280 }, { "epoch": 0.06255790223044932, "grad_norm": 0.333984375, "learning_rate": 0.0014811159566781697, "loss": 0.1859, "step": 35282 }, { "epoch": 0.06256144839575914, "grad_norm": 0.3984375, "learning_rate": 0.0014810620370808041, "loss": 0.1604, "step": 35284 }, { "epoch": 0.06256499456106895, "grad_norm": 0.43359375, "learning_rate": 0.0014810081158168943, "loss": 0.1818, "step": 35286 }, { "epoch": 0.06256854072637877, "grad_norm": 0.23828125, "learning_rate": 0.001480954192886676, "loss": 0.1811, "step": 35288 }, { "epoch": 0.06257208689168858, "grad_norm": 0.40234375, "learning_rate": 0.0014809002682903858, "loss": 0.2088, "step": 35290 }, { "epoch": 0.0625756330569984, "grad_norm": 0.458984375, "learning_rate": 0.001480846342028259, "loss": 0.1384, "step": 35292 }, { "epoch": 0.06257917922230821, "grad_norm": 0.439453125, "learning_rate": 0.0014807924141005317, "loss": 0.2553, "step": 35294 }, { "epoch": 0.06258272538761803, "grad_norm": 0.384765625, "learning_rate": 0.0014807384845074393, "loss": 0.2424, "step": 35296 }, { "epoch": 0.06258627155292784, "grad_norm": 1.078125, "learning_rate": 0.0014806845532492178, "loss": 0.2108, "step": 35298 }, { "epoch": 0.06258981771823766, "grad_norm": 0.419921875, "learning_rate": 0.0014806306203261036, "loss": 0.1759, "step": 35300 }, { "epoch": 0.06259336388354748, "grad_norm": 0.7265625, "learning_rate": 0.0014805766857383317, "loss": 0.1393, "step": 35302 }, { "epoch": 0.0625969100488573, "grad_norm": 0.275390625, "learning_rate": 0.001480522749486139, "loss": 0.2655, "step": 35304 }, { "epoch": 0.06260045621416711, "grad_norm": 0.423828125, "learning_rate": 0.0014804688115697604, "loss": 0.1482, "step": 35306 }, { "epoch": 0.06260400237947693, "grad_norm": 0.365234375, "learning_rate": 0.001480414871989432, "loss": 0.1616, "step": 35308 }, { "epoch": 0.06260754854478674, "grad_norm": 0.83984375, "learning_rate": 0.0014803609307453907, "loss": 0.1948, "step": 35310 }, { "epoch": 0.06261109471009656, "grad_norm": 0.46484375, "learning_rate": 0.001480306987837871, "loss": 0.1892, "step": 35312 }, { "epoch": 0.06261464087540637, "grad_norm": 0.91015625, "learning_rate": 0.0014802530432671095, "loss": 0.2245, "step": 35314 }, { "epoch": 0.06261818704071619, "grad_norm": 0.1533203125, "learning_rate": 0.001480199097033342, "loss": 0.2041, "step": 35316 }, { "epoch": 0.062621733206026, "grad_norm": 0.205078125, "learning_rate": 0.0014801451491368049, "loss": 0.1221, "step": 35318 }, { "epoch": 0.06262527937133582, "grad_norm": 0.296875, "learning_rate": 0.0014800911995777335, "loss": 0.1729, "step": 35320 }, { "epoch": 0.06262882553664563, "grad_norm": 1.4296875, "learning_rate": 0.001480037248356364, "loss": 0.2403, "step": 35322 }, { "epoch": 0.06263237170195544, "grad_norm": 0.75, "learning_rate": 0.0014799832954729322, "loss": 0.1642, "step": 35324 }, { "epoch": 0.06263591786726526, "grad_norm": 0.62890625, "learning_rate": 0.0014799293409276744, "loss": 0.248, "step": 35326 }, { "epoch": 0.06263946403257507, "grad_norm": 3.25, "learning_rate": 0.0014798753847208262, "loss": 0.253, "step": 35328 }, { "epoch": 0.06264301019788489, "grad_norm": 0.318359375, "learning_rate": 0.001479821426852624, "loss": 0.4124, "step": 35330 }, { "epoch": 0.0626465563631947, "grad_norm": 0.578125, "learning_rate": 0.0014797674673233032, "loss": 0.1623, "step": 35332 }, { "epoch": 0.06265010252850452, "grad_norm": 2.625, "learning_rate": 0.0014797135061331002, "loss": 0.2028, "step": 35334 }, { "epoch": 0.06265364869381433, "grad_norm": 0.27734375, "learning_rate": 0.001479659543282251, "loss": 0.2467, "step": 35336 }, { "epoch": 0.06265719485912415, "grad_norm": 0.36328125, "learning_rate": 0.0014796055787709911, "loss": 0.2784, "step": 35338 }, { "epoch": 0.06266074102443396, "grad_norm": 0.71875, "learning_rate": 0.0014795516125995572, "loss": 0.1841, "step": 35340 }, { "epoch": 0.06266428718974378, "grad_norm": 0.2138671875, "learning_rate": 0.0014794976447681848, "loss": 0.1606, "step": 35342 }, { "epoch": 0.06266783335505359, "grad_norm": 1.0546875, "learning_rate": 0.0014794436752771105, "loss": 0.2936, "step": 35344 }, { "epoch": 0.0626713795203634, "grad_norm": 0.81640625, "learning_rate": 0.0014793897041265698, "loss": 0.144, "step": 35346 }, { "epoch": 0.06267492568567323, "grad_norm": 0.41796875, "learning_rate": 0.001479335731316799, "loss": 0.2096, "step": 35348 }, { "epoch": 0.06267847185098305, "grad_norm": 0.376953125, "learning_rate": 0.001479281756848034, "loss": 0.1729, "step": 35350 }, { "epoch": 0.06268201801629286, "grad_norm": 0.400390625, "learning_rate": 0.0014792277807205107, "loss": 0.1441, "step": 35352 }, { "epoch": 0.06268556418160268, "grad_norm": 0.8203125, "learning_rate": 0.0014791738029344655, "loss": 0.2053, "step": 35354 }, { "epoch": 0.06268911034691249, "grad_norm": 0.416015625, "learning_rate": 0.0014791198234901345, "loss": 0.1669, "step": 35356 }, { "epoch": 0.0626926565122223, "grad_norm": 2.171875, "learning_rate": 0.0014790658423877532, "loss": 0.2074, "step": 35358 }, { "epoch": 0.06269620267753212, "grad_norm": 0.60546875, "learning_rate": 0.0014790118596275585, "loss": 0.216, "step": 35360 }, { "epoch": 0.06269974884284193, "grad_norm": 0.98828125, "learning_rate": 0.0014789578752097856, "loss": 0.2371, "step": 35362 }, { "epoch": 0.06270329500815175, "grad_norm": 0.259765625, "learning_rate": 0.0014789038891346719, "loss": 0.2207, "step": 35364 }, { "epoch": 0.06270684117346156, "grad_norm": 0.89453125, "learning_rate": 0.001478849901402452, "loss": 0.1826, "step": 35366 }, { "epoch": 0.06271038733877138, "grad_norm": 0.3125, "learning_rate": 0.001478795912013363, "loss": 0.1501, "step": 35368 }, { "epoch": 0.06271393350408119, "grad_norm": 0.376953125, "learning_rate": 0.0014787419209676405, "loss": 0.2108, "step": 35370 }, { "epoch": 0.06271747966939101, "grad_norm": 1.375, "learning_rate": 0.001478687928265521, "loss": 0.1877, "step": 35372 }, { "epoch": 0.06272102583470082, "grad_norm": 0.578125, "learning_rate": 0.0014786339339072406, "loss": 0.3032, "step": 35374 }, { "epoch": 0.06272457200001064, "grad_norm": 0.31640625, "learning_rate": 0.001478579937893035, "loss": 0.1295, "step": 35376 }, { "epoch": 0.06272811816532045, "grad_norm": 0.306640625, "learning_rate": 0.0014785259402231407, "loss": 0.1997, "step": 35378 }, { "epoch": 0.06273166433063027, "grad_norm": 0.423828125, "learning_rate": 0.001478471940897794, "loss": 0.202, "step": 35380 }, { "epoch": 0.06273521049594008, "grad_norm": 0.443359375, "learning_rate": 0.0014784179399172305, "loss": 0.18, "step": 35382 }, { "epoch": 0.0627387566612499, "grad_norm": 2.640625, "learning_rate": 0.0014783639372816873, "loss": 0.3024, "step": 35384 }, { "epoch": 0.06274230282655971, "grad_norm": 0.51953125, "learning_rate": 0.0014783099329913998, "loss": 0.3659, "step": 35386 }, { "epoch": 0.06274584899186952, "grad_norm": 0.275390625, "learning_rate": 0.0014782559270466043, "loss": 0.1957, "step": 35388 }, { "epoch": 0.06274939515717934, "grad_norm": 0.40625, "learning_rate": 0.001478201919447537, "loss": 0.2311, "step": 35390 }, { "epoch": 0.06275294132248915, "grad_norm": 0.326171875, "learning_rate": 0.0014781479101944343, "loss": 0.1766, "step": 35392 }, { "epoch": 0.06275648748779898, "grad_norm": 0.5078125, "learning_rate": 0.0014780938992875322, "loss": 0.2785, "step": 35394 }, { "epoch": 0.0627600336531088, "grad_norm": 0.271484375, "learning_rate": 0.0014780398867270672, "loss": 0.1313, "step": 35396 }, { "epoch": 0.06276357981841861, "grad_norm": 0.2001953125, "learning_rate": 0.0014779858725132752, "loss": 0.1616, "step": 35398 }, { "epoch": 0.06276712598372843, "grad_norm": 1.015625, "learning_rate": 0.0014779318566463925, "loss": 0.1844, "step": 35400 }, { "epoch": 0.06277067214903824, "grad_norm": 0.4375, "learning_rate": 0.0014778778391266556, "loss": 0.1794, "step": 35402 }, { "epoch": 0.06277421831434805, "grad_norm": 0.34765625, "learning_rate": 0.0014778238199543005, "loss": 0.1423, "step": 35404 }, { "epoch": 0.06277776447965787, "grad_norm": 0.287109375, "learning_rate": 0.0014777697991295634, "loss": 0.1599, "step": 35406 }, { "epoch": 0.06278131064496768, "grad_norm": 0.302734375, "learning_rate": 0.0014777157766526809, "loss": 0.187, "step": 35408 }, { "epoch": 0.0627848568102775, "grad_norm": 0.53125, "learning_rate": 0.0014776617525238884, "loss": 0.2273, "step": 35410 }, { "epoch": 0.06278840297558731, "grad_norm": 0.59375, "learning_rate": 0.0014776077267434232, "loss": 0.1652, "step": 35412 }, { "epoch": 0.06279194914089713, "grad_norm": 1.328125, "learning_rate": 0.0014775536993115207, "loss": 0.2128, "step": 35414 }, { "epoch": 0.06279549530620694, "grad_norm": 1.09375, "learning_rate": 0.0014774996702284183, "loss": 0.3473, "step": 35416 }, { "epoch": 0.06279904147151676, "grad_norm": 0.271484375, "learning_rate": 0.0014774456394943511, "loss": 0.208, "step": 35418 }, { "epoch": 0.06280258763682657, "grad_norm": 1.7109375, "learning_rate": 0.0014773916071095562, "loss": 0.2341, "step": 35420 }, { "epoch": 0.06280613380213639, "grad_norm": 0.37890625, "learning_rate": 0.0014773375730742697, "loss": 0.1819, "step": 35422 }, { "epoch": 0.0628096799674462, "grad_norm": 0.5546875, "learning_rate": 0.0014772835373887277, "loss": 0.1234, "step": 35424 }, { "epoch": 0.06281322613275601, "grad_norm": 0.5, "learning_rate": 0.0014772295000531667, "loss": 0.3345, "step": 35426 }, { "epoch": 0.06281677229806583, "grad_norm": 0.546875, "learning_rate": 0.0014771754610678224, "loss": 0.1855, "step": 35428 }, { "epoch": 0.06282031846337564, "grad_norm": 0.5234375, "learning_rate": 0.0014771214204329325, "loss": 0.2362, "step": 35430 }, { "epoch": 0.06282386462868546, "grad_norm": 0.66796875, "learning_rate": 0.0014770673781487323, "loss": 0.17, "step": 35432 }, { "epoch": 0.06282741079399527, "grad_norm": 0.6015625, "learning_rate": 0.0014770133342154584, "loss": 0.2483, "step": 35434 }, { "epoch": 0.06283095695930509, "grad_norm": 0.43359375, "learning_rate": 0.0014769592886333472, "loss": 0.1891, "step": 35436 }, { "epoch": 0.06283450312461492, "grad_norm": 0.2060546875, "learning_rate": 0.0014769052414026352, "loss": 0.1506, "step": 35438 }, { "epoch": 0.06283804928992473, "grad_norm": 0.369140625, "learning_rate": 0.0014768511925235585, "loss": 0.3143, "step": 35440 }, { "epoch": 0.06284159545523454, "grad_norm": 0.6171875, "learning_rate": 0.0014767971419963532, "loss": 0.1952, "step": 35442 }, { "epoch": 0.06284514162054436, "grad_norm": 0.9296875, "learning_rate": 0.0014767430898212565, "loss": 0.2017, "step": 35444 }, { "epoch": 0.06284868778585417, "grad_norm": 0.2734375, "learning_rate": 0.0014766890359985043, "loss": 0.2086, "step": 35446 }, { "epoch": 0.06285223395116399, "grad_norm": 0.306640625, "learning_rate": 0.0014766349805283327, "loss": 0.2022, "step": 35448 }, { "epoch": 0.0628557801164738, "grad_norm": 0.31640625, "learning_rate": 0.001476580923410979, "loss": 0.1688, "step": 35450 }, { "epoch": 0.06285932628178362, "grad_norm": 1.5390625, "learning_rate": 0.0014765268646466788, "loss": 0.327, "step": 35452 }, { "epoch": 0.06286287244709343, "grad_norm": 0.76953125, "learning_rate": 0.0014764728042356689, "loss": 0.1645, "step": 35454 }, { "epoch": 0.06286641861240325, "grad_norm": 0.259765625, "learning_rate": 0.0014764187421781854, "loss": 0.1985, "step": 35456 }, { "epoch": 0.06286996477771306, "grad_norm": 0.4609375, "learning_rate": 0.0014763646784744656, "loss": 0.1972, "step": 35458 }, { "epoch": 0.06287351094302288, "grad_norm": 0.70703125, "learning_rate": 0.0014763106131247445, "loss": 0.1975, "step": 35460 }, { "epoch": 0.06287705710833269, "grad_norm": 0.341796875, "learning_rate": 0.00147625654612926, "loss": 0.1871, "step": 35462 }, { "epoch": 0.0628806032736425, "grad_norm": 0.388671875, "learning_rate": 0.0014762024774882478, "loss": 0.3703, "step": 35464 }, { "epoch": 0.06288414943895232, "grad_norm": 0.2275390625, "learning_rate": 0.0014761484072019445, "loss": 0.1722, "step": 35466 }, { "epoch": 0.06288769560426213, "grad_norm": 4.125, "learning_rate": 0.0014760943352705863, "loss": 0.2352, "step": 35468 }, { "epoch": 0.06289124176957195, "grad_norm": 0.439453125, "learning_rate": 0.00147604026169441, "loss": 0.1839, "step": 35470 }, { "epoch": 0.06289478793488176, "grad_norm": 0.369140625, "learning_rate": 0.0014759861864736522, "loss": 0.1825, "step": 35472 }, { "epoch": 0.06289833410019158, "grad_norm": 0.470703125, "learning_rate": 0.001475932109608549, "loss": 0.2739, "step": 35474 }, { "epoch": 0.06290188026550139, "grad_norm": 0.392578125, "learning_rate": 0.0014758780310993372, "loss": 0.212, "step": 35476 }, { "epoch": 0.0629054264308112, "grad_norm": 1.5390625, "learning_rate": 0.0014758239509462532, "loss": 0.1706, "step": 35478 }, { "epoch": 0.06290897259612102, "grad_norm": 7.71875, "learning_rate": 0.0014757698691495336, "loss": 0.4462, "step": 35480 }, { "epoch": 0.06291251876143084, "grad_norm": 0.40625, "learning_rate": 0.001475715785709415, "loss": 0.195, "step": 35482 }, { "epoch": 0.06291606492674066, "grad_norm": 1.03125, "learning_rate": 0.0014756617006261338, "loss": 0.2121, "step": 35484 }, { "epoch": 0.06291961109205048, "grad_norm": 0.4609375, "learning_rate": 0.0014756076138999264, "loss": 0.147, "step": 35486 }, { "epoch": 0.0629231572573603, "grad_norm": 0.61328125, "learning_rate": 0.0014755535255310296, "loss": 0.1994, "step": 35488 }, { "epoch": 0.06292670342267011, "grad_norm": 0.216796875, "learning_rate": 0.0014754994355196795, "loss": 0.1753, "step": 35490 }, { "epoch": 0.06293024958797992, "grad_norm": 0.376953125, "learning_rate": 0.0014754453438661132, "loss": 0.2035, "step": 35492 }, { "epoch": 0.06293379575328974, "grad_norm": 0.1962890625, "learning_rate": 0.0014753912505705668, "loss": 0.1538, "step": 35494 }, { "epoch": 0.06293734191859955, "grad_norm": 2.609375, "learning_rate": 0.0014753371556332775, "loss": 0.1581, "step": 35496 }, { "epoch": 0.06294088808390937, "grad_norm": 0.84765625, "learning_rate": 0.0014752830590544816, "loss": 0.1908, "step": 35498 }, { "epoch": 0.06294443424921918, "grad_norm": 0.8046875, "learning_rate": 0.0014752289608344152, "loss": 0.191, "step": 35500 }, { "epoch": 0.062947980414529, "grad_norm": 0.47265625, "learning_rate": 0.0014751748609733155, "loss": 0.1919, "step": 35502 }, { "epoch": 0.06295152657983881, "grad_norm": 0.416015625, "learning_rate": 0.0014751207594714188, "loss": 0.1463, "step": 35504 }, { "epoch": 0.06295507274514862, "grad_norm": 0.357421875, "learning_rate": 0.0014750666563289618, "loss": 0.1822, "step": 35506 }, { "epoch": 0.06295861891045844, "grad_norm": 0.326171875, "learning_rate": 0.0014750125515461813, "loss": 0.2285, "step": 35508 }, { "epoch": 0.06296216507576825, "grad_norm": 0.287109375, "learning_rate": 0.0014749584451233132, "loss": 0.14, "step": 35510 }, { "epoch": 0.06296571124107807, "grad_norm": 0.306640625, "learning_rate": 0.0014749043370605951, "loss": 0.1463, "step": 35512 }, { "epoch": 0.06296925740638788, "grad_norm": 0.38671875, "learning_rate": 0.001474850227358263, "loss": 0.2504, "step": 35514 }, { "epoch": 0.0629728035716977, "grad_norm": 0.515625, "learning_rate": 0.0014747961160165542, "loss": 0.2048, "step": 35516 }, { "epoch": 0.06297634973700751, "grad_norm": 0.55859375, "learning_rate": 0.0014747420030357043, "loss": 0.1696, "step": 35518 }, { "epoch": 0.06297989590231733, "grad_norm": 0.455078125, "learning_rate": 0.0014746878884159512, "loss": 0.1934, "step": 35520 }, { "epoch": 0.06298344206762714, "grad_norm": 0.41015625, "learning_rate": 0.0014746337721575303, "loss": 0.2101, "step": 35522 }, { "epoch": 0.06298698823293696, "grad_norm": 0.345703125, "learning_rate": 0.001474579654260679, "loss": 0.2518, "step": 35524 }, { "epoch": 0.06299053439824677, "grad_norm": 0.734375, "learning_rate": 0.0014745255347256342, "loss": 0.1748, "step": 35526 }, { "epoch": 0.06299408056355658, "grad_norm": 0.337890625, "learning_rate": 0.001474471413552632, "loss": 0.2214, "step": 35528 }, { "epoch": 0.06299762672886641, "grad_norm": 0.314453125, "learning_rate": 0.0014744172907419093, "loss": 0.2343, "step": 35530 }, { "epoch": 0.06300117289417623, "grad_norm": 1.9296875, "learning_rate": 0.0014743631662937032, "loss": 0.1894, "step": 35532 }, { "epoch": 0.06300471905948604, "grad_norm": 0.78125, "learning_rate": 0.0014743090402082497, "loss": 0.1943, "step": 35534 }, { "epoch": 0.06300826522479586, "grad_norm": 0.45703125, "learning_rate": 0.0014742549124857863, "loss": 0.1534, "step": 35536 }, { "epoch": 0.06301181139010567, "grad_norm": 0.30859375, "learning_rate": 0.001474200783126549, "loss": 0.2144, "step": 35538 }, { "epoch": 0.06301535755541549, "grad_norm": 0.25390625, "learning_rate": 0.001474146652130775, "loss": 0.2199, "step": 35540 }, { "epoch": 0.0630189037207253, "grad_norm": 0.435546875, "learning_rate": 0.0014740925194987005, "loss": 0.2032, "step": 35542 }, { "epoch": 0.06302244988603511, "grad_norm": 0.50390625, "learning_rate": 0.0014740383852305627, "loss": 0.1747, "step": 35544 }, { "epoch": 0.06302599605134493, "grad_norm": 1.09375, "learning_rate": 0.0014739842493265987, "loss": 0.2268, "step": 35546 }, { "epoch": 0.06302954221665474, "grad_norm": 1.3671875, "learning_rate": 0.0014739301117870445, "loss": 0.1654, "step": 35548 }, { "epoch": 0.06303308838196456, "grad_norm": 0.5234375, "learning_rate": 0.001473875972612137, "loss": 0.2037, "step": 35550 }, { "epoch": 0.06303663454727437, "grad_norm": 0.392578125, "learning_rate": 0.0014738218318021135, "loss": 0.162, "step": 35552 }, { "epoch": 0.06304018071258419, "grad_norm": 0.349609375, "learning_rate": 0.0014737676893572106, "loss": 0.1407, "step": 35554 }, { "epoch": 0.063043726877894, "grad_norm": 0.25390625, "learning_rate": 0.0014737135452776648, "loss": 0.1473, "step": 35556 }, { "epoch": 0.06304727304320382, "grad_norm": 0.5859375, "learning_rate": 0.0014736593995637128, "loss": 0.2906, "step": 35558 }, { "epoch": 0.06305081920851363, "grad_norm": 1.0546875, "learning_rate": 0.001473605252215592, "loss": 0.244, "step": 35560 }, { "epoch": 0.06305436537382345, "grad_norm": 0.5, "learning_rate": 0.0014735511032335385, "loss": 0.172, "step": 35562 }, { "epoch": 0.06305791153913326, "grad_norm": 0.50390625, "learning_rate": 0.0014734969526177895, "loss": 0.1657, "step": 35564 }, { "epoch": 0.06306145770444307, "grad_norm": 0.71484375, "learning_rate": 0.001473442800368582, "loss": 0.1564, "step": 35566 }, { "epoch": 0.06306500386975289, "grad_norm": 0.5078125, "learning_rate": 0.0014733886464861523, "loss": 0.1992, "step": 35568 }, { "epoch": 0.0630685500350627, "grad_norm": 0.89453125, "learning_rate": 0.0014733344909707378, "loss": 0.2306, "step": 35570 }, { "epoch": 0.06307209620037252, "grad_norm": 0.33984375, "learning_rate": 0.001473280333822575, "loss": 0.2205, "step": 35572 }, { "epoch": 0.06307564236568235, "grad_norm": 0.48046875, "learning_rate": 0.001473226175041901, "loss": 0.1988, "step": 35574 }, { "epoch": 0.06307918853099216, "grad_norm": 2.21875, "learning_rate": 0.0014731720146289524, "loss": 0.3054, "step": 35576 }, { "epoch": 0.06308273469630198, "grad_norm": 0.2890625, "learning_rate": 0.001473117852583966, "loss": 0.2137, "step": 35578 }, { "epoch": 0.06308628086161179, "grad_norm": 0.69921875, "learning_rate": 0.001473063688907179, "loss": 0.1883, "step": 35580 }, { "epoch": 0.0630898270269216, "grad_norm": 0.310546875, "learning_rate": 0.001473009523598828, "loss": 0.1501, "step": 35582 }, { "epoch": 0.06309337319223142, "grad_norm": 0.38671875, "learning_rate": 0.0014729553566591503, "loss": 0.2363, "step": 35584 }, { "epoch": 0.06309691935754123, "grad_norm": 0.50390625, "learning_rate": 0.0014729011880883823, "loss": 0.1963, "step": 35586 }, { "epoch": 0.06310046552285105, "grad_norm": 0.48828125, "learning_rate": 0.0014728470178867613, "loss": 0.2905, "step": 35588 }, { "epoch": 0.06310401168816086, "grad_norm": 0.2431640625, "learning_rate": 0.0014727928460545239, "loss": 0.1441, "step": 35590 }, { "epoch": 0.06310755785347068, "grad_norm": 0.2470703125, "learning_rate": 0.001472738672591907, "loss": 0.1985, "step": 35592 }, { "epoch": 0.06311110401878049, "grad_norm": 1.046875, "learning_rate": 0.0014726844974991477, "loss": 0.2147, "step": 35594 }, { "epoch": 0.0631146501840903, "grad_norm": 0.349609375, "learning_rate": 0.0014726303207764828, "loss": 0.1552, "step": 35596 }, { "epoch": 0.06311819634940012, "grad_norm": 0.2412109375, "learning_rate": 0.0014725761424241497, "loss": 0.3288, "step": 35598 }, { "epoch": 0.06312174251470994, "grad_norm": 0.2421875, "learning_rate": 0.0014725219624423848, "loss": 0.1788, "step": 35600 }, { "epoch": 0.06312528868001975, "grad_norm": 0.76171875, "learning_rate": 0.001472467780831425, "loss": 0.1723, "step": 35602 }, { "epoch": 0.06312883484532957, "grad_norm": 0.3828125, "learning_rate": 0.0014724135975915076, "loss": 0.216, "step": 35604 }, { "epoch": 0.06313238101063938, "grad_norm": 2.0625, "learning_rate": 0.0014723594127228697, "loss": 0.2661, "step": 35606 }, { "epoch": 0.0631359271759492, "grad_norm": 0.59375, "learning_rate": 0.0014723052262257477, "loss": 0.2325, "step": 35608 }, { "epoch": 0.06313947334125901, "grad_norm": 0.875, "learning_rate": 0.0014722510381003791, "loss": 0.1845, "step": 35610 }, { "epoch": 0.06314301950656882, "grad_norm": 1.5, "learning_rate": 0.0014721968483470006, "loss": 0.3052, "step": 35612 }, { "epoch": 0.06314656567187864, "grad_norm": 0.6015625, "learning_rate": 0.0014721426569658494, "loss": 0.2524, "step": 35614 }, { "epoch": 0.06315011183718845, "grad_norm": 0.404296875, "learning_rate": 0.001472088463957162, "loss": 0.2141, "step": 35616 }, { "epoch": 0.06315365800249827, "grad_norm": 1.015625, "learning_rate": 0.0014720342693211761, "loss": 0.2333, "step": 35618 }, { "epoch": 0.0631572041678081, "grad_norm": 1.8203125, "learning_rate": 0.0014719800730581285, "loss": 0.2465, "step": 35620 }, { "epoch": 0.06316075033311791, "grad_norm": 0.255859375, "learning_rate": 0.001471925875168256, "loss": 0.2575, "step": 35622 }, { "epoch": 0.06316429649842772, "grad_norm": 0.412109375, "learning_rate": 0.0014718716756517956, "loss": 0.1786, "step": 35624 }, { "epoch": 0.06316784266373754, "grad_norm": 0.765625, "learning_rate": 0.0014718174745089849, "loss": 0.2031, "step": 35626 }, { "epoch": 0.06317138882904735, "grad_norm": 0.38671875, "learning_rate": 0.0014717632717400604, "loss": 0.2137, "step": 35628 }, { "epoch": 0.06317493499435717, "grad_norm": 0.25390625, "learning_rate": 0.0014717090673452594, "loss": 0.2049, "step": 35630 }, { "epoch": 0.06317848115966698, "grad_norm": 0.27734375, "learning_rate": 0.0014716548613248183, "loss": 0.2984, "step": 35632 }, { "epoch": 0.0631820273249768, "grad_norm": 0.443359375, "learning_rate": 0.0014716006536789755, "loss": 0.2033, "step": 35634 }, { "epoch": 0.06318557349028661, "grad_norm": 0.46875, "learning_rate": 0.0014715464444079664, "loss": 0.2025, "step": 35636 }, { "epoch": 0.06318911965559643, "grad_norm": 0.65625, "learning_rate": 0.0014714922335120298, "loss": 0.2476, "step": 35638 }, { "epoch": 0.06319266582090624, "grad_norm": 0.83984375, "learning_rate": 0.0014714380209914014, "loss": 0.1887, "step": 35640 }, { "epoch": 0.06319621198621606, "grad_norm": 0.251953125, "learning_rate": 0.0014713838068463193, "loss": 0.2063, "step": 35642 }, { "epoch": 0.06319975815152587, "grad_norm": 0.26171875, "learning_rate": 0.0014713295910770195, "loss": 0.2036, "step": 35644 }, { "epoch": 0.06320330431683568, "grad_norm": 0.62890625, "learning_rate": 0.0014712753736837404, "loss": 0.2535, "step": 35646 }, { "epoch": 0.0632068504821455, "grad_norm": 0.9375, "learning_rate": 0.0014712211546667182, "loss": 0.1655, "step": 35648 }, { "epoch": 0.06321039664745531, "grad_norm": 0.328125, "learning_rate": 0.0014711669340261907, "loss": 0.1882, "step": 35650 }, { "epoch": 0.06321394281276513, "grad_norm": 0.54296875, "learning_rate": 0.001471112711762394, "loss": 0.1902, "step": 35652 }, { "epoch": 0.06321748897807494, "grad_norm": 0.5078125, "learning_rate": 0.0014710584878755665, "loss": 0.1648, "step": 35654 }, { "epoch": 0.06322103514338476, "grad_norm": 0.49609375, "learning_rate": 0.001471004262365944, "loss": 0.1814, "step": 35656 }, { "epoch": 0.06322458130869457, "grad_norm": 0.396484375, "learning_rate": 0.0014709500352337649, "loss": 0.1928, "step": 35658 }, { "epoch": 0.06322812747400439, "grad_norm": 0.609375, "learning_rate": 0.0014708958064792654, "loss": 0.1994, "step": 35660 }, { "epoch": 0.0632316736393142, "grad_norm": 0.341796875, "learning_rate": 0.0014708415761026836, "loss": 0.1923, "step": 35662 }, { "epoch": 0.06323521980462402, "grad_norm": 0.60546875, "learning_rate": 0.0014707873441042558, "loss": 0.1872, "step": 35664 }, { "epoch": 0.06323876596993384, "grad_norm": 0.76171875, "learning_rate": 0.0014707331104842198, "loss": 0.1842, "step": 35666 }, { "epoch": 0.06324231213524366, "grad_norm": 0.419921875, "learning_rate": 0.0014706788752428123, "loss": 0.1575, "step": 35668 }, { "epoch": 0.06324585830055347, "grad_norm": 0.73828125, "learning_rate": 0.0014706246383802705, "loss": 0.1956, "step": 35670 }, { "epoch": 0.06324940446586329, "grad_norm": 0.4609375, "learning_rate": 0.0014705703998968323, "loss": 0.1649, "step": 35672 }, { "epoch": 0.0632529506311731, "grad_norm": 0.51171875, "learning_rate": 0.0014705161597927342, "loss": 0.1724, "step": 35674 }, { "epoch": 0.06325649679648292, "grad_norm": 0.2060546875, "learning_rate": 0.0014704619180682134, "loss": 0.1817, "step": 35676 }, { "epoch": 0.06326004296179273, "grad_norm": 1.1171875, "learning_rate": 0.0014704076747235075, "loss": 0.2207, "step": 35678 }, { "epoch": 0.06326358912710255, "grad_norm": 0.51953125, "learning_rate": 0.001470353429758854, "loss": 0.4006, "step": 35680 }, { "epoch": 0.06326713529241236, "grad_norm": 0.55078125, "learning_rate": 0.0014702991831744895, "loss": 0.2716, "step": 35682 }, { "epoch": 0.06327068145772217, "grad_norm": 1.8125, "learning_rate": 0.0014702449349706516, "loss": 0.2228, "step": 35684 }, { "epoch": 0.06327422762303199, "grad_norm": 0.5, "learning_rate": 0.0014701906851475768, "loss": 0.1487, "step": 35686 }, { "epoch": 0.0632777737883418, "grad_norm": 0.40234375, "learning_rate": 0.0014701364337055035, "loss": 0.1834, "step": 35688 }, { "epoch": 0.06328131995365162, "grad_norm": 0.77734375, "learning_rate": 0.0014700821806446682, "loss": 0.2339, "step": 35690 }, { "epoch": 0.06328486611896143, "grad_norm": 3.390625, "learning_rate": 0.0014700279259653084, "loss": 0.3506, "step": 35692 }, { "epoch": 0.06328841228427125, "grad_norm": 0.6328125, "learning_rate": 0.0014699736696676615, "loss": 0.2072, "step": 35694 }, { "epoch": 0.06329195844958106, "grad_norm": 0.384765625, "learning_rate": 0.001469919411751965, "loss": 0.206, "step": 35696 }, { "epoch": 0.06329550461489088, "grad_norm": 0.734375, "learning_rate": 0.0014698651522184552, "loss": 0.2326, "step": 35698 }, { "epoch": 0.06329905078020069, "grad_norm": 0.2578125, "learning_rate": 0.0014698108910673707, "loss": 0.1406, "step": 35700 }, { "epoch": 0.0633025969455105, "grad_norm": 0.265625, "learning_rate": 0.001469756628298948, "loss": 0.1872, "step": 35702 }, { "epoch": 0.06330614311082032, "grad_norm": 0.546875, "learning_rate": 0.0014697023639134246, "loss": 0.1999, "step": 35704 }, { "epoch": 0.06330968927613013, "grad_norm": 1.1796875, "learning_rate": 0.0014696480979110374, "loss": 0.172, "step": 35706 }, { "epoch": 0.06331323544143995, "grad_norm": 0.255859375, "learning_rate": 0.0014695938302920247, "loss": 0.1735, "step": 35708 }, { "epoch": 0.06331678160674978, "grad_norm": 0.86328125, "learning_rate": 0.001469539561056623, "loss": 0.1948, "step": 35710 }, { "epoch": 0.06332032777205959, "grad_norm": 0.388671875, "learning_rate": 0.0014694852902050698, "loss": 0.196, "step": 35712 }, { "epoch": 0.06332387393736941, "grad_norm": 1.0234375, "learning_rate": 0.0014694310177376024, "loss": 0.3898, "step": 35714 }, { "epoch": 0.06332742010267922, "grad_norm": 0.40234375, "learning_rate": 0.0014693767436544586, "loss": 0.1977, "step": 35716 }, { "epoch": 0.06333096626798904, "grad_norm": 0.5625, "learning_rate": 0.0014693224679558756, "loss": 0.2351, "step": 35718 }, { "epoch": 0.06333451243329885, "grad_norm": 1.3515625, "learning_rate": 0.0014692681906420906, "loss": 0.1988, "step": 35720 }, { "epoch": 0.06333805859860867, "grad_norm": 0.52734375, "learning_rate": 0.0014692139117133411, "loss": 0.1916, "step": 35722 }, { "epoch": 0.06334160476391848, "grad_norm": 1.6953125, "learning_rate": 0.0014691596311698642, "loss": 0.3414, "step": 35724 }, { "epoch": 0.0633451509292283, "grad_norm": 0.59375, "learning_rate": 0.0014691053490118976, "loss": 0.1557, "step": 35726 }, { "epoch": 0.06334869709453811, "grad_norm": 0.42578125, "learning_rate": 0.0014690510652396788, "loss": 0.1974, "step": 35728 }, { "epoch": 0.06335224325984792, "grad_norm": 0.56640625, "learning_rate": 0.0014689967798534448, "loss": 0.2082, "step": 35730 }, { "epoch": 0.06335578942515774, "grad_norm": 0.455078125, "learning_rate": 0.001468942492853433, "loss": 0.2261, "step": 35732 }, { "epoch": 0.06335933559046755, "grad_norm": 0.79296875, "learning_rate": 0.0014688882042398813, "loss": 0.236, "step": 35734 }, { "epoch": 0.06336288175577737, "grad_norm": 0.275390625, "learning_rate": 0.0014688339140130268, "loss": 0.1602, "step": 35736 }, { "epoch": 0.06336642792108718, "grad_norm": 0.73828125, "learning_rate": 0.0014687796221731072, "loss": 0.2385, "step": 35738 }, { "epoch": 0.063369974086397, "grad_norm": 0.30859375, "learning_rate": 0.0014687253287203595, "loss": 0.2155, "step": 35740 }, { "epoch": 0.06337352025170681, "grad_norm": 0.6484375, "learning_rate": 0.0014686710336550214, "loss": 0.1875, "step": 35742 }, { "epoch": 0.06337706641701663, "grad_norm": 0.59765625, "learning_rate": 0.0014686167369773308, "loss": 0.1862, "step": 35744 }, { "epoch": 0.06338061258232644, "grad_norm": 0.302734375, "learning_rate": 0.0014685624386875243, "loss": 0.1595, "step": 35746 }, { "epoch": 0.06338415874763625, "grad_norm": 0.1669921875, "learning_rate": 0.0014685081387858397, "loss": 0.1804, "step": 35748 }, { "epoch": 0.06338770491294607, "grad_norm": 0.310546875, "learning_rate": 0.0014684538372725148, "loss": 0.1926, "step": 35750 }, { "epoch": 0.06339125107825588, "grad_norm": 0.4375, "learning_rate": 0.0014683995341477868, "loss": 0.1984, "step": 35752 }, { "epoch": 0.0633947972435657, "grad_norm": 0.9140625, "learning_rate": 0.001468345229411893, "loss": 0.1892, "step": 35754 }, { "epoch": 0.06339834340887553, "grad_norm": 0.32421875, "learning_rate": 0.0014682909230650711, "loss": 0.1904, "step": 35756 }, { "epoch": 0.06340188957418534, "grad_norm": 0.2451171875, "learning_rate": 0.001468236615107559, "loss": 0.152, "step": 35758 }, { "epoch": 0.06340543573949516, "grad_norm": 0.337890625, "learning_rate": 0.0014681823055395936, "loss": 0.169, "step": 35760 }, { "epoch": 0.06340898190480497, "grad_norm": 0.44921875, "learning_rate": 0.0014681279943614127, "loss": 0.2105, "step": 35762 }, { "epoch": 0.06341252807011478, "grad_norm": 0.259765625, "learning_rate": 0.0014680736815732539, "loss": 0.2279, "step": 35764 }, { "epoch": 0.0634160742354246, "grad_norm": 1.171875, "learning_rate": 0.0014680193671753542, "loss": 0.3781, "step": 35766 }, { "epoch": 0.06341962040073441, "grad_norm": 0.291015625, "learning_rate": 0.0014679650511679521, "loss": 0.2112, "step": 35768 }, { "epoch": 0.06342316656604423, "grad_norm": 0.9296875, "learning_rate": 0.0014679107335512842, "loss": 0.1608, "step": 35770 }, { "epoch": 0.06342671273135404, "grad_norm": 1.0703125, "learning_rate": 0.0014678564143255888, "loss": 0.331, "step": 35772 }, { "epoch": 0.06343025889666386, "grad_norm": 1.0078125, "learning_rate": 0.001467802093491103, "loss": 0.1572, "step": 35774 }, { "epoch": 0.06343380506197367, "grad_norm": 0.3203125, "learning_rate": 0.0014677477710480645, "loss": 0.2624, "step": 35776 }, { "epoch": 0.06343735122728349, "grad_norm": 0.27734375, "learning_rate": 0.0014676934469967109, "loss": 0.2667, "step": 35778 }, { "epoch": 0.0634408973925933, "grad_norm": 0.60546875, "learning_rate": 0.0014676391213372798, "loss": 0.3599, "step": 35780 }, { "epoch": 0.06344444355790312, "grad_norm": 1.2421875, "learning_rate": 0.0014675847940700084, "loss": 0.2904, "step": 35782 }, { "epoch": 0.06344798972321293, "grad_norm": 0.828125, "learning_rate": 0.0014675304651951347, "loss": 0.3502, "step": 35784 }, { "epoch": 0.06345153588852274, "grad_norm": 0.4140625, "learning_rate": 0.0014674761347128965, "loss": 0.1876, "step": 35786 }, { "epoch": 0.06345508205383256, "grad_norm": 0.55078125, "learning_rate": 0.0014674218026235308, "loss": 0.1856, "step": 35788 }, { "epoch": 0.06345862821914237, "grad_norm": 1.390625, "learning_rate": 0.001467367468927276, "loss": 0.2253, "step": 35790 }, { "epoch": 0.06346217438445219, "grad_norm": 1.3671875, "learning_rate": 0.0014673131336243688, "loss": 0.3229, "step": 35792 }, { "epoch": 0.063465720549762, "grad_norm": 1.2890625, "learning_rate": 0.0014672587967150476, "loss": 0.4037, "step": 35794 }, { "epoch": 0.06346926671507182, "grad_norm": 0.203125, "learning_rate": 0.0014672044581995499, "loss": 0.1905, "step": 35796 }, { "epoch": 0.06347281288038163, "grad_norm": 0.83203125, "learning_rate": 0.001467150118078113, "loss": 0.3098, "step": 35798 }, { "epoch": 0.06347635904569145, "grad_norm": 0.96875, "learning_rate": 0.0014670957763509747, "loss": 0.2511, "step": 35800 }, { "epoch": 0.06347990521100128, "grad_norm": 0.26953125, "learning_rate": 0.0014670414330183728, "loss": 0.1856, "step": 35802 }, { "epoch": 0.06348345137631109, "grad_norm": 0.87109375, "learning_rate": 0.0014669870880805447, "loss": 0.2496, "step": 35804 }, { "epoch": 0.0634869975416209, "grad_norm": 1.4765625, "learning_rate": 0.0014669327415377288, "loss": 0.2348, "step": 35806 }, { "epoch": 0.06349054370693072, "grad_norm": 0.306640625, "learning_rate": 0.0014668783933901614, "loss": 0.1643, "step": 35808 }, { "epoch": 0.06349408987224053, "grad_norm": 0.486328125, "learning_rate": 0.0014668240436380815, "loss": 0.2166, "step": 35810 }, { "epoch": 0.06349763603755035, "grad_norm": 0.365234375, "learning_rate": 0.0014667696922817262, "loss": 0.1747, "step": 35812 }, { "epoch": 0.06350118220286016, "grad_norm": 0.392578125, "learning_rate": 0.0014667153393213336, "loss": 0.1988, "step": 35814 }, { "epoch": 0.06350472836816998, "grad_norm": 0.82421875, "learning_rate": 0.001466660984757141, "loss": 0.1695, "step": 35816 }, { "epoch": 0.06350827453347979, "grad_norm": 4.53125, "learning_rate": 0.001466606628589386, "loss": 0.2611, "step": 35818 }, { "epoch": 0.0635118206987896, "grad_norm": 0.5234375, "learning_rate": 0.0014665522708183065, "loss": 0.2061, "step": 35820 }, { "epoch": 0.06351536686409942, "grad_norm": 2.765625, "learning_rate": 0.0014664979114441406, "loss": 0.3154, "step": 35822 }, { "epoch": 0.06351891302940924, "grad_norm": 0.76953125, "learning_rate": 0.0014664435504671254, "loss": 0.2702, "step": 35824 }, { "epoch": 0.06352245919471905, "grad_norm": 0.73046875, "learning_rate": 0.0014663891878874992, "loss": 0.1982, "step": 35826 }, { "epoch": 0.06352600536002886, "grad_norm": 1.390625, "learning_rate": 0.0014663348237054991, "loss": 0.2206, "step": 35828 }, { "epoch": 0.06352955152533868, "grad_norm": 0.318359375, "learning_rate": 0.001466280457921364, "loss": 0.1573, "step": 35830 }, { "epoch": 0.0635330976906485, "grad_norm": 0.578125, "learning_rate": 0.0014662260905353305, "loss": 0.1895, "step": 35832 }, { "epoch": 0.06353664385595831, "grad_norm": 0.458984375, "learning_rate": 0.0014661717215476368, "loss": 0.2424, "step": 35834 }, { "epoch": 0.06354019002126812, "grad_norm": 0.302734375, "learning_rate": 0.0014661173509585208, "loss": 0.1796, "step": 35836 }, { "epoch": 0.06354373618657794, "grad_norm": 0.734375, "learning_rate": 0.0014660629787682203, "loss": 0.2547, "step": 35838 }, { "epoch": 0.06354728235188775, "grad_norm": 0.80078125, "learning_rate": 0.0014660086049769728, "loss": 0.3369, "step": 35840 }, { "epoch": 0.06355082851719757, "grad_norm": 0.302734375, "learning_rate": 0.0014659542295850164, "loss": 0.195, "step": 35842 }, { "epoch": 0.06355437468250738, "grad_norm": 0.30859375, "learning_rate": 0.0014658998525925884, "loss": 0.2022, "step": 35844 }, { "epoch": 0.06355792084781721, "grad_norm": 0.46484375, "learning_rate": 0.001465845473999927, "loss": 0.1853, "step": 35846 }, { "epoch": 0.06356146701312702, "grad_norm": 0.76171875, "learning_rate": 0.0014657910938072705, "loss": 0.226, "step": 35848 }, { "epoch": 0.06356501317843684, "grad_norm": 0.5078125, "learning_rate": 0.001465736712014856, "loss": 0.1819, "step": 35850 }, { "epoch": 0.06356855934374665, "grad_norm": 0.365234375, "learning_rate": 0.0014656823286229217, "loss": 0.2088, "step": 35852 }, { "epoch": 0.06357210550905647, "grad_norm": 0.84765625, "learning_rate": 0.001465627943631705, "loss": 0.1773, "step": 35854 }, { "epoch": 0.06357565167436628, "grad_norm": 0.7421875, "learning_rate": 0.0014655735570414444, "loss": 0.2128, "step": 35856 }, { "epoch": 0.0635791978396761, "grad_norm": 1.0390625, "learning_rate": 0.001465519168852377, "loss": 0.2544, "step": 35858 }, { "epoch": 0.06358274400498591, "grad_norm": 0.443359375, "learning_rate": 0.0014654647790647412, "loss": 0.2245, "step": 35860 }, { "epoch": 0.06358629017029573, "grad_norm": 0.23828125, "learning_rate": 0.001465410387678775, "loss": 0.2134, "step": 35862 }, { "epoch": 0.06358983633560554, "grad_norm": 0.66796875, "learning_rate": 0.0014653559946947157, "loss": 0.2174, "step": 35864 }, { "epoch": 0.06359338250091535, "grad_norm": 0.66796875, "learning_rate": 0.0014653016001128015, "loss": 0.2381, "step": 35866 }, { "epoch": 0.06359692866622517, "grad_norm": 0.404296875, "learning_rate": 0.0014652472039332707, "loss": 0.1995, "step": 35868 }, { "epoch": 0.06360047483153498, "grad_norm": 0.43359375, "learning_rate": 0.0014651928061563603, "loss": 0.1919, "step": 35870 }, { "epoch": 0.0636040209968448, "grad_norm": 0.6875, "learning_rate": 0.001465138406782309, "loss": 0.389, "step": 35872 }, { "epoch": 0.06360756716215461, "grad_norm": 3.421875, "learning_rate": 0.0014650840058113544, "loss": 0.3533, "step": 35874 }, { "epoch": 0.06361111332746443, "grad_norm": 0.421875, "learning_rate": 0.0014650296032437345, "loss": 0.3744, "step": 35876 }, { "epoch": 0.06361465949277424, "grad_norm": 0.66015625, "learning_rate": 0.0014649751990796869, "loss": 0.1846, "step": 35878 }, { "epoch": 0.06361820565808406, "grad_norm": 1.296875, "learning_rate": 0.00146492079331945, "loss": 0.2693, "step": 35880 }, { "epoch": 0.06362175182339387, "grad_norm": 0.6328125, "learning_rate": 0.0014648663859632611, "loss": 0.1789, "step": 35882 }, { "epoch": 0.06362529798870369, "grad_norm": 0.30859375, "learning_rate": 0.001464811977011359, "loss": 0.2304, "step": 35884 }, { "epoch": 0.0636288441540135, "grad_norm": 0.859375, "learning_rate": 0.0014647575664639809, "loss": 0.2338, "step": 35886 }, { "epoch": 0.06363239031932331, "grad_norm": 0.1845703125, "learning_rate": 0.0014647031543213653, "loss": 0.1566, "step": 35888 }, { "epoch": 0.06363593648463313, "grad_norm": 0.45703125, "learning_rate": 0.0014646487405837502, "loss": 0.2675, "step": 35890 }, { "epoch": 0.06363948264994296, "grad_norm": 1.7109375, "learning_rate": 0.0014645943252513732, "loss": 0.232, "step": 35892 }, { "epoch": 0.06364302881525277, "grad_norm": 0.5859375, "learning_rate": 0.0014645399083244722, "loss": 0.1899, "step": 35894 }, { "epoch": 0.06364657498056259, "grad_norm": 0.68359375, "learning_rate": 0.0014644854898032853, "loss": 0.2973, "step": 35896 }, { "epoch": 0.0636501211458724, "grad_norm": 0.251953125, "learning_rate": 0.0014644310696880507, "loss": 0.1688, "step": 35898 }, { "epoch": 0.06365366731118222, "grad_norm": 0.9609375, "learning_rate": 0.0014643766479790065, "loss": 0.1542, "step": 35900 }, { "epoch": 0.06365721347649203, "grad_norm": 0.384765625, "learning_rate": 0.00146432222467639, "loss": 0.1585, "step": 35902 }, { "epoch": 0.06366075964180185, "grad_norm": 1.1171875, "learning_rate": 0.00146426779978044, "loss": 0.187, "step": 35904 }, { "epoch": 0.06366430580711166, "grad_norm": 5.90625, "learning_rate": 0.0014642133732913943, "loss": 0.2861, "step": 35906 }, { "epoch": 0.06366785197242147, "grad_norm": 0.84375, "learning_rate": 0.0014641589452094909, "loss": 0.2033, "step": 35908 }, { "epoch": 0.06367139813773129, "grad_norm": 0.55078125, "learning_rate": 0.001464104515534968, "loss": 0.4428, "step": 35910 }, { "epoch": 0.0636749443030411, "grad_norm": 0.39453125, "learning_rate": 0.0014640500842680632, "loss": 0.1882, "step": 35912 }, { "epoch": 0.06367849046835092, "grad_norm": 0.259765625, "learning_rate": 0.0014639956514090145, "loss": 0.3606, "step": 35914 }, { "epoch": 0.06368203663366073, "grad_norm": 0.474609375, "learning_rate": 0.0014639412169580605, "loss": 0.2765, "step": 35916 }, { "epoch": 0.06368558279897055, "grad_norm": 0.333984375, "learning_rate": 0.001463886780915439, "loss": 0.1718, "step": 35918 }, { "epoch": 0.06368912896428036, "grad_norm": 3.015625, "learning_rate": 0.0014638323432813885, "loss": 0.2021, "step": 35920 }, { "epoch": 0.06369267512959018, "grad_norm": 0.33203125, "learning_rate": 0.001463777904056146, "loss": 0.1819, "step": 35922 }, { "epoch": 0.06369622129489999, "grad_norm": 0.53125, "learning_rate": 0.0014637234632399508, "loss": 0.1789, "step": 35924 }, { "epoch": 0.0636997674602098, "grad_norm": 0.267578125, "learning_rate": 0.00146366902083304, "loss": 0.2096, "step": 35926 }, { "epoch": 0.06370331362551962, "grad_norm": 3.53125, "learning_rate": 0.0014636145768356526, "loss": 0.3308, "step": 35928 }, { "epoch": 0.06370685979082943, "grad_norm": 0.45703125, "learning_rate": 0.001463560131248026, "loss": 0.2012, "step": 35930 }, { "epoch": 0.06371040595613925, "grad_norm": 1.4296875, "learning_rate": 0.0014635056840703983, "loss": 0.3634, "step": 35932 }, { "epoch": 0.06371395212144906, "grad_norm": 0.279296875, "learning_rate": 0.0014634512353030084, "loss": 0.1549, "step": 35934 }, { "epoch": 0.06371749828675888, "grad_norm": 0.4453125, "learning_rate": 0.0014633967849460936, "loss": 0.1574, "step": 35936 }, { "epoch": 0.0637210444520687, "grad_norm": 0.28515625, "learning_rate": 0.0014633423329998923, "loss": 0.1861, "step": 35938 }, { "epoch": 0.06372459061737852, "grad_norm": 0.2578125, "learning_rate": 0.0014632878794646428, "loss": 0.2551, "step": 35940 }, { "epoch": 0.06372813678268834, "grad_norm": 0.322265625, "learning_rate": 0.001463233424340583, "loss": 0.2012, "step": 35942 }, { "epoch": 0.06373168294799815, "grad_norm": 0.8046875, "learning_rate": 0.0014631789676279516, "loss": 0.2697, "step": 35944 }, { "epoch": 0.06373522911330796, "grad_norm": 0.6796875, "learning_rate": 0.0014631245093269863, "loss": 0.2294, "step": 35946 }, { "epoch": 0.06373877527861778, "grad_norm": 0.271484375, "learning_rate": 0.001463070049437925, "loss": 0.1541, "step": 35948 }, { "epoch": 0.0637423214439276, "grad_norm": 0.474609375, "learning_rate": 0.0014630155879610063, "loss": 0.1846, "step": 35950 }, { "epoch": 0.06374586760923741, "grad_norm": 0.5859375, "learning_rate": 0.001462961124896468, "loss": 0.1733, "step": 35952 }, { "epoch": 0.06374941377454722, "grad_norm": 1.0390625, "learning_rate": 0.0014629066602445493, "loss": 0.2277, "step": 35954 }, { "epoch": 0.06375295993985704, "grad_norm": 0.2021484375, "learning_rate": 0.0014628521940054872, "loss": 0.3299, "step": 35956 }, { "epoch": 0.06375650610516685, "grad_norm": 0.34765625, "learning_rate": 0.0014627977261795204, "loss": 0.2162, "step": 35958 }, { "epoch": 0.06376005227047667, "grad_norm": 0.462890625, "learning_rate": 0.001462743256766887, "loss": 0.2071, "step": 35960 }, { "epoch": 0.06376359843578648, "grad_norm": 0.6796875, "learning_rate": 0.0014626887857678253, "loss": 0.318, "step": 35962 }, { "epoch": 0.0637671446010963, "grad_norm": 0.31640625, "learning_rate": 0.0014626343131825738, "loss": 0.1398, "step": 35964 }, { "epoch": 0.06377069076640611, "grad_norm": 0.703125, "learning_rate": 0.0014625798390113705, "loss": 0.1872, "step": 35966 }, { "epoch": 0.06377423693171592, "grad_norm": 2.4375, "learning_rate": 0.0014625253632544533, "loss": 0.542, "step": 35968 }, { "epoch": 0.06377778309702574, "grad_norm": 0.5390625, "learning_rate": 0.0014624708859120605, "loss": 0.2541, "step": 35970 }, { "epoch": 0.06378132926233555, "grad_norm": 0.271484375, "learning_rate": 0.0014624164069844312, "loss": 0.1689, "step": 35972 }, { "epoch": 0.06378487542764537, "grad_norm": 0.4765625, "learning_rate": 0.0014623619264718026, "loss": 0.1664, "step": 35974 }, { "epoch": 0.06378842159295518, "grad_norm": 0.29296875, "learning_rate": 0.0014623074443744135, "loss": 0.2016, "step": 35976 }, { "epoch": 0.063791967758265, "grad_norm": 0.162109375, "learning_rate": 0.0014622529606925024, "loss": 0.1547, "step": 35978 }, { "epoch": 0.06379551392357481, "grad_norm": 0.55078125, "learning_rate": 0.0014621984754263069, "loss": 0.1702, "step": 35980 }, { "epoch": 0.06379906008888464, "grad_norm": 0.609375, "learning_rate": 0.0014621439885760658, "loss": 0.2038, "step": 35982 }, { "epoch": 0.06380260625419445, "grad_norm": 0.376953125, "learning_rate": 0.0014620895001420174, "loss": 0.2626, "step": 35984 }, { "epoch": 0.06380615241950427, "grad_norm": 0.36328125, "learning_rate": 0.0014620350101243998, "loss": 0.1908, "step": 35986 }, { "epoch": 0.06380969858481408, "grad_norm": 0.62109375, "learning_rate": 0.0014619805185234515, "loss": 0.1927, "step": 35988 }, { "epoch": 0.0638132447501239, "grad_norm": 0.87890625, "learning_rate": 0.0014619260253394104, "loss": 0.1678, "step": 35990 }, { "epoch": 0.06381679091543371, "grad_norm": 0.283203125, "learning_rate": 0.001461871530572515, "loss": 0.1677, "step": 35992 }, { "epoch": 0.06382033708074353, "grad_norm": 0.345703125, "learning_rate": 0.001461817034223004, "loss": 0.1877, "step": 35994 }, { "epoch": 0.06382388324605334, "grad_norm": 0.451171875, "learning_rate": 0.0014617625362911152, "loss": 0.1669, "step": 35996 }, { "epoch": 0.06382742941136316, "grad_norm": 0.53515625, "learning_rate": 0.0014617080367770874, "loss": 0.2157, "step": 35998 }, { "epoch": 0.06383097557667297, "grad_norm": 0.2373046875, "learning_rate": 0.0014616535356811585, "loss": 0.2111, "step": 36000 }, { "epoch": 0.06383452174198279, "grad_norm": 0.55859375, "learning_rate": 0.0014615990330035674, "loss": 0.1653, "step": 36002 }, { "epoch": 0.0638380679072926, "grad_norm": 0.578125, "learning_rate": 0.0014615445287445522, "loss": 0.1851, "step": 36004 }, { "epoch": 0.06384161407260242, "grad_norm": 0.96484375, "learning_rate": 0.0014614900229043511, "loss": 0.1844, "step": 36006 }, { "epoch": 0.06384516023791223, "grad_norm": 0.423828125, "learning_rate": 0.0014614355154832024, "loss": 0.1638, "step": 36008 }, { "epoch": 0.06384870640322204, "grad_norm": 0.64453125, "learning_rate": 0.0014613810064813453, "loss": 0.1899, "step": 36010 }, { "epoch": 0.06385225256853186, "grad_norm": 0.9453125, "learning_rate": 0.001461326495899017, "loss": 0.2314, "step": 36012 }, { "epoch": 0.06385579873384167, "grad_norm": 0.875, "learning_rate": 0.0014612719837364565, "loss": 0.1954, "step": 36014 }, { "epoch": 0.06385934489915149, "grad_norm": 0.291015625, "learning_rate": 0.0014612174699939024, "loss": 0.164, "step": 36016 }, { "epoch": 0.0638628910644613, "grad_norm": 0.451171875, "learning_rate": 0.0014611629546715933, "loss": 0.2117, "step": 36018 }, { "epoch": 0.06386643722977112, "grad_norm": 0.326171875, "learning_rate": 0.0014611084377697669, "loss": 0.2418, "step": 36020 }, { "epoch": 0.06386998339508093, "grad_norm": 0.90234375, "learning_rate": 0.0014610539192886616, "loss": 0.226, "step": 36022 }, { "epoch": 0.06387352956039075, "grad_norm": 0.275390625, "learning_rate": 0.0014609993992285165, "loss": 0.1898, "step": 36024 }, { "epoch": 0.06387707572570056, "grad_norm": 0.3671875, "learning_rate": 0.0014609448775895696, "loss": 0.1493, "step": 36026 }, { "epoch": 0.06388062189101039, "grad_norm": 0.408203125, "learning_rate": 0.0014608903543720598, "loss": 0.1625, "step": 36028 }, { "epoch": 0.0638841680563202, "grad_norm": 0.1953125, "learning_rate": 0.0014608358295762247, "loss": 0.1805, "step": 36030 }, { "epoch": 0.06388771422163002, "grad_norm": 0.390625, "learning_rate": 0.0014607813032023037, "loss": 0.1854, "step": 36032 }, { "epoch": 0.06389126038693983, "grad_norm": 0.333984375, "learning_rate": 0.0014607267752505346, "loss": 0.1879, "step": 36034 }, { "epoch": 0.06389480655224965, "grad_norm": 1.140625, "learning_rate": 0.0014606722457211564, "loss": 0.2819, "step": 36036 }, { "epoch": 0.06389835271755946, "grad_norm": 0.375, "learning_rate": 0.0014606177146144074, "loss": 0.2314, "step": 36038 }, { "epoch": 0.06390189888286928, "grad_norm": 1.7734375, "learning_rate": 0.0014605631819305253, "loss": 0.1631, "step": 36040 }, { "epoch": 0.06390544504817909, "grad_norm": 0.55859375, "learning_rate": 0.00146050864766975, "loss": 0.2352, "step": 36042 }, { "epoch": 0.0639089912134889, "grad_norm": 0.96484375, "learning_rate": 0.001460454111832319, "loss": 0.2413, "step": 36044 }, { "epoch": 0.06391253737879872, "grad_norm": 0.5, "learning_rate": 0.0014603995744184712, "loss": 0.2004, "step": 36046 }, { "epoch": 0.06391608354410853, "grad_norm": 2.625, "learning_rate": 0.0014603450354284447, "loss": 0.2188, "step": 36048 }, { "epoch": 0.06391962970941835, "grad_norm": 0.2197265625, "learning_rate": 0.0014602904948624784, "loss": 0.1695, "step": 36050 }, { "epoch": 0.06392317587472816, "grad_norm": 0.490234375, "learning_rate": 0.001460235952720811, "loss": 0.199, "step": 36052 }, { "epoch": 0.06392672204003798, "grad_norm": 0.416015625, "learning_rate": 0.0014601814090036804, "loss": 0.1401, "step": 36054 }, { "epoch": 0.06393026820534779, "grad_norm": 0.322265625, "learning_rate": 0.001460126863711326, "loss": 0.3583, "step": 36056 }, { "epoch": 0.06393381437065761, "grad_norm": 0.4296875, "learning_rate": 0.0014600723168439855, "loss": 0.1938, "step": 36058 }, { "epoch": 0.06393736053596742, "grad_norm": 0.298828125, "learning_rate": 0.0014600177684018982, "loss": 0.1683, "step": 36060 }, { "epoch": 0.06394090670127724, "grad_norm": 0.69140625, "learning_rate": 0.001459963218385302, "loss": 0.3138, "step": 36062 }, { "epoch": 0.06394445286658705, "grad_norm": 0.69140625, "learning_rate": 0.0014599086667944359, "loss": 0.4388, "step": 36064 }, { "epoch": 0.06394799903189687, "grad_norm": 0.39453125, "learning_rate": 0.0014598541136295385, "loss": 0.1993, "step": 36066 }, { "epoch": 0.06395154519720668, "grad_norm": 0.71484375, "learning_rate": 0.001459799558890848, "loss": 0.2054, "step": 36068 }, { "epoch": 0.0639550913625165, "grad_norm": 0.265625, "learning_rate": 0.0014597450025786033, "loss": 0.2158, "step": 36070 }, { "epoch": 0.06395863752782631, "grad_norm": 0.22265625, "learning_rate": 0.0014596904446930428, "loss": 0.1789, "step": 36072 }, { "epoch": 0.06396218369313614, "grad_norm": 0.60546875, "learning_rate": 0.0014596358852344055, "loss": 0.1776, "step": 36074 }, { "epoch": 0.06396572985844595, "grad_norm": 0.486328125, "learning_rate": 0.0014595813242029296, "loss": 0.2003, "step": 36076 }, { "epoch": 0.06396927602375577, "grad_norm": 0.421875, "learning_rate": 0.0014595267615988541, "loss": 0.1757, "step": 36078 }, { "epoch": 0.06397282218906558, "grad_norm": 1.078125, "learning_rate": 0.001459472197422417, "loss": 0.2055, "step": 36080 }, { "epoch": 0.0639763683543754, "grad_norm": 1.859375, "learning_rate": 0.0014594176316738575, "loss": 0.2553, "step": 36082 }, { "epoch": 0.06397991451968521, "grad_norm": 0.380859375, "learning_rate": 0.0014593630643534139, "loss": 0.2045, "step": 36084 }, { "epoch": 0.06398346068499502, "grad_norm": 6.90625, "learning_rate": 0.0014593084954613248, "loss": 0.2533, "step": 36086 }, { "epoch": 0.06398700685030484, "grad_norm": 1.3203125, "learning_rate": 0.0014592539249978298, "loss": 0.2083, "step": 36088 }, { "epoch": 0.06399055301561465, "grad_norm": 0.478515625, "learning_rate": 0.0014591993529631663, "loss": 0.1815, "step": 36090 }, { "epoch": 0.06399409918092447, "grad_norm": 0.48046875, "learning_rate": 0.0014591447793575736, "loss": 0.2067, "step": 36092 }, { "epoch": 0.06399764534623428, "grad_norm": 1.4921875, "learning_rate": 0.00145909020418129, "loss": 0.2523, "step": 36094 }, { "epoch": 0.0640011915115441, "grad_norm": 0.345703125, "learning_rate": 0.0014590356274345546, "loss": 0.1845, "step": 36096 }, { "epoch": 0.06400473767685391, "grad_norm": 0.2890625, "learning_rate": 0.001458981049117606, "loss": 0.4264, "step": 36098 }, { "epoch": 0.06400828384216373, "grad_norm": 0.8046875, "learning_rate": 0.0014589264692306827, "loss": 0.1963, "step": 36100 }, { "epoch": 0.06401183000747354, "grad_norm": 0.48828125, "learning_rate": 0.0014588718877740238, "loss": 0.1778, "step": 36102 }, { "epoch": 0.06401537617278336, "grad_norm": 0.421875, "learning_rate": 0.0014588173047478672, "loss": 0.1896, "step": 36104 }, { "epoch": 0.06401892233809317, "grad_norm": 1.5625, "learning_rate": 0.0014587627201524528, "loss": 0.2993, "step": 36106 }, { "epoch": 0.06402246850340299, "grad_norm": 0.494140625, "learning_rate": 0.0014587081339880181, "loss": 0.1684, "step": 36108 }, { "epoch": 0.0640260146687128, "grad_norm": 0.65234375, "learning_rate": 0.0014586535462548025, "loss": 0.2066, "step": 36110 }, { "epoch": 0.06402956083402261, "grad_norm": 0.369140625, "learning_rate": 0.0014585989569530448, "loss": 0.1718, "step": 36112 }, { "epoch": 0.06403310699933243, "grad_norm": 0.8046875, "learning_rate": 0.0014585443660829836, "loss": 0.2311, "step": 36114 }, { "epoch": 0.06403665316464224, "grad_norm": 1.0546875, "learning_rate": 0.0014584897736448575, "loss": 0.2476, "step": 36116 }, { "epoch": 0.06404019932995206, "grad_norm": 0.55078125, "learning_rate": 0.0014584351796389052, "loss": 0.2361, "step": 36118 }, { "epoch": 0.06404374549526189, "grad_norm": 0.345703125, "learning_rate": 0.001458380584065366, "loss": 0.1962, "step": 36120 }, { "epoch": 0.0640472916605717, "grad_norm": 0.47265625, "learning_rate": 0.0014583259869244778, "loss": 0.1581, "step": 36122 }, { "epoch": 0.06405083782588152, "grad_norm": 0.5546875, "learning_rate": 0.00145827138821648, "loss": 0.2152, "step": 36124 }, { "epoch": 0.06405438399119133, "grad_norm": 0.361328125, "learning_rate": 0.0014582167879416115, "loss": 0.1623, "step": 36126 }, { "epoch": 0.06405793015650114, "grad_norm": 0.84375, "learning_rate": 0.0014581621861001106, "loss": 0.2034, "step": 36128 }, { "epoch": 0.06406147632181096, "grad_norm": 1.0234375, "learning_rate": 0.0014581075826922165, "loss": 0.189, "step": 36130 }, { "epoch": 0.06406502248712077, "grad_norm": 0.85546875, "learning_rate": 0.001458052977718168, "loss": 0.1858, "step": 36132 }, { "epoch": 0.06406856865243059, "grad_norm": 0.2216796875, "learning_rate": 0.001457998371178204, "loss": 0.1899, "step": 36134 }, { "epoch": 0.0640721148177404, "grad_norm": 0.376953125, "learning_rate": 0.0014579437630725623, "loss": 0.1974, "step": 36136 }, { "epoch": 0.06407566098305022, "grad_norm": 0.376953125, "learning_rate": 0.001457889153401483, "loss": 0.1831, "step": 36138 }, { "epoch": 0.06407920714836003, "grad_norm": 1.3828125, "learning_rate": 0.001457834542165204, "loss": 0.2341, "step": 36140 }, { "epoch": 0.06408275331366985, "grad_norm": 0.39453125, "learning_rate": 0.0014577799293639649, "loss": 0.1707, "step": 36142 }, { "epoch": 0.06408629947897966, "grad_norm": 0.31640625, "learning_rate": 0.001457725314998004, "loss": 0.2103, "step": 36144 }, { "epoch": 0.06408984564428948, "grad_norm": 0.41796875, "learning_rate": 0.0014576706990675609, "loss": 0.1736, "step": 36146 }, { "epoch": 0.06409339180959929, "grad_norm": 0.2373046875, "learning_rate": 0.0014576160815728731, "loss": 0.1757, "step": 36148 }, { "epoch": 0.0640969379749091, "grad_norm": 1.4609375, "learning_rate": 0.0014575614625141808, "loss": 0.201, "step": 36150 }, { "epoch": 0.06410048414021892, "grad_norm": 0.41796875, "learning_rate": 0.0014575068418917225, "loss": 0.155, "step": 36152 }, { "epoch": 0.06410403030552873, "grad_norm": 0.78515625, "learning_rate": 0.001457452219705737, "loss": 0.2339, "step": 36154 }, { "epoch": 0.06410757647083855, "grad_norm": 0.7109375, "learning_rate": 0.0014573975959564628, "loss": 0.2077, "step": 36156 }, { "epoch": 0.06411112263614836, "grad_norm": 0.4296875, "learning_rate": 0.0014573429706441392, "loss": 0.1272, "step": 36158 }, { "epoch": 0.06411466880145818, "grad_norm": 0.46484375, "learning_rate": 0.0014572883437690045, "loss": 0.2097, "step": 36160 }, { "epoch": 0.06411821496676799, "grad_norm": 0.306640625, "learning_rate": 0.0014572337153312988, "loss": 0.2857, "step": 36162 }, { "epoch": 0.06412176113207782, "grad_norm": 0.36328125, "learning_rate": 0.0014571790853312601, "loss": 0.2497, "step": 36164 }, { "epoch": 0.06412530729738763, "grad_norm": 0.3125, "learning_rate": 0.0014571244537691277, "loss": 0.1233, "step": 36166 }, { "epoch": 0.06412885346269745, "grad_norm": 0.51953125, "learning_rate": 0.0014570698206451402, "loss": 0.2454, "step": 36168 }, { "epoch": 0.06413239962800726, "grad_norm": 0.291015625, "learning_rate": 0.001457015185959537, "loss": 0.2182, "step": 36170 }, { "epoch": 0.06413594579331708, "grad_norm": 0.7734375, "learning_rate": 0.0014569605497125565, "loss": 0.2086, "step": 36172 }, { "epoch": 0.06413949195862689, "grad_norm": 0.40625, "learning_rate": 0.0014569059119044381, "loss": 0.2193, "step": 36174 }, { "epoch": 0.06414303812393671, "grad_norm": 0.365234375, "learning_rate": 0.0014568512725354203, "loss": 0.2536, "step": 36176 }, { "epoch": 0.06414658428924652, "grad_norm": 0.46484375, "learning_rate": 0.0014567966316057423, "loss": 0.2184, "step": 36178 }, { "epoch": 0.06415013045455634, "grad_norm": 0.314453125, "learning_rate": 0.0014567419891156432, "loss": 0.1838, "step": 36180 }, { "epoch": 0.06415367661986615, "grad_norm": 0.234375, "learning_rate": 0.0014566873450653618, "loss": 0.1881, "step": 36182 }, { "epoch": 0.06415722278517597, "grad_norm": 0.43359375, "learning_rate": 0.0014566326994551373, "loss": 0.2014, "step": 36184 }, { "epoch": 0.06416076895048578, "grad_norm": 0.55859375, "learning_rate": 0.0014565780522852085, "loss": 0.1902, "step": 36186 }, { "epoch": 0.0641643151157956, "grad_norm": 0.71875, "learning_rate": 0.0014565234035558143, "loss": 0.1709, "step": 36188 }, { "epoch": 0.06416786128110541, "grad_norm": 0.361328125, "learning_rate": 0.001456468753267194, "loss": 0.2233, "step": 36190 }, { "epoch": 0.06417140744641522, "grad_norm": 0.30078125, "learning_rate": 0.0014564141014195863, "loss": 0.1721, "step": 36192 }, { "epoch": 0.06417495361172504, "grad_norm": 0.251953125, "learning_rate": 0.0014563594480132302, "loss": 0.1611, "step": 36194 }, { "epoch": 0.06417849977703485, "grad_norm": 0.52734375, "learning_rate": 0.0014563047930483655, "loss": 0.4987, "step": 36196 }, { "epoch": 0.06418204594234467, "grad_norm": 0.98828125, "learning_rate": 0.0014562501365252298, "loss": 0.1836, "step": 36198 }, { "epoch": 0.06418559210765448, "grad_norm": 0.388671875, "learning_rate": 0.0014561954784440636, "loss": 0.1689, "step": 36200 }, { "epoch": 0.0641891382729643, "grad_norm": 1.4765625, "learning_rate": 0.0014561408188051047, "loss": 0.2725, "step": 36202 }, { "epoch": 0.06419268443827411, "grad_norm": 0.458984375, "learning_rate": 0.0014560861576085934, "loss": 0.1701, "step": 36204 }, { "epoch": 0.06419623060358393, "grad_norm": 0.98828125, "learning_rate": 0.0014560314948547674, "loss": 0.1656, "step": 36206 }, { "epoch": 0.06419977676889374, "grad_norm": 0.1923828125, "learning_rate": 0.0014559768305438669, "loss": 0.1546, "step": 36208 }, { "epoch": 0.06420332293420357, "grad_norm": 0.32421875, "learning_rate": 0.0014559221646761303, "loss": 0.1867, "step": 36210 }, { "epoch": 0.06420686909951338, "grad_norm": 0.337890625, "learning_rate": 0.0014558674972517968, "loss": 0.1567, "step": 36212 }, { "epoch": 0.0642104152648232, "grad_norm": 0.419921875, "learning_rate": 0.001455812828271106, "loss": 0.3284, "step": 36214 }, { "epoch": 0.06421396143013301, "grad_norm": 0.6875, "learning_rate": 0.0014557581577342962, "loss": 0.2036, "step": 36216 }, { "epoch": 0.06421750759544283, "grad_norm": 0.392578125, "learning_rate": 0.0014557034856416068, "loss": 0.1953, "step": 36218 }, { "epoch": 0.06422105376075264, "grad_norm": 0.55859375, "learning_rate": 0.0014556488119932772, "loss": 0.2172, "step": 36220 }, { "epoch": 0.06422459992606246, "grad_norm": 0.2333984375, "learning_rate": 0.0014555941367895464, "loss": 0.2309, "step": 36222 }, { "epoch": 0.06422814609137227, "grad_norm": 0.3046875, "learning_rate": 0.0014555394600306532, "loss": 0.1579, "step": 36224 }, { "epoch": 0.06423169225668209, "grad_norm": 0.55078125, "learning_rate": 0.001455484781716837, "loss": 0.1974, "step": 36226 }, { "epoch": 0.0642352384219919, "grad_norm": 0.439453125, "learning_rate": 0.0014554301018483368, "loss": 0.129, "step": 36228 }, { "epoch": 0.06423878458730171, "grad_norm": 0.578125, "learning_rate": 0.0014553754204253916, "loss": 0.2316, "step": 36230 }, { "epoch": 0.06424233075261153, "grad_norm": 0.19140625, "learning_rate": 0.0014553207374482411, "loss": 0.2083, "step": 36232 }, { "epoch": 0.06424587691792134, "grad_norm": 0.7890625, "learning_rate": 0.001455266052917124, "loss": 0.2225, "step": 36234 }, { "epoch": 0.06424942308323116, "grad_norm": 0.451171875, "learning_rate": 0.0014552113668322795, "loss": 0.1507, "step": 36236 }, { "epoch": 0.06425296924854097, "grad_norm": 0.29296875, "learning_rate": 0.0014551566791939465, "loss": 0.1725, "step": 36238 }, { "epoch": 0.06425651541385079, "grad_norm": 0.39453125, "learning_rate": 0.001455101990002365, "loss": 0.32, "step": 36240 }, { "epoch": 0.0642600615791606, "grad_norm": 0.91015625, "learning_rate": 0.0014550472992577732, "loss": 0.2742, "step": 36242 }, { "epoch": 0.06426360774447042, "grad_norm": 5.21875, "learning_rate": 0.0014549926069604112, "loss": 0.3501, "step": 36244 }, { "epoch": 0.06426715390978023, "grad_norm": 0.86328125, "learning_rate": 0.0014549379131105176, "loss": 0.1706, "step": 36246 }, { "epoch": 0.06427070007509005, "grad_norm": 7.4375, "learning_rate": 0.0014548832177083317, "loss": 0.1495, "step": 36248 }, { "epoch": 0.06427424624039986, "grad_norm": 0.357421875, "learning_rate": 0.0014548285207540929, "loss": 0.2269, "step": 36250 }, { "epoch": 0.06427779240570967, "grad_norm": 0.388671875, "learning_rate": 0.0014547738222480398, "loss": 0.1996, "step": 36252 }, { "epoch": 0.06428133857101949, "grad_norm": 0.498046875, "learning_rate": 0.0014547191221904125, "loss": 0.1657, "step": 36254 }, { "epoch": 0.06428488473632932, "grad_norm": 0.5703125, "learning_rate": 0.0014546644205814495, "loss": 0.2608, "step": 36256 }, { "epoch": 0.06428843090163913, "grad_norm": 0.443359375, "learning_rate": 0.0014546097174213907, "loss": 0.1732, "step": 36258 }, { "epoch": 0.06429197706694895, "grad_norm": 0.7109375, "learning_rate": 0.0014545550127104746, "loss": 0.1905, "step": 36260 }, { "epoch": 0.06429552323225876, "grad_norm": 0.240234375, "learning_rate": 0.0014545003064489407, "loss": 0.1904, "step": 36262 }, { "epoch": 0.06429906939756858, "grad_norm": 0.333984375, "learning_rate": 0.0014544455986370288, "loss": 0.1509, "step": 36264 }, { "epoch": 0.06430261556287839, "grad_norm": 0.306640625, "learning_rate": 0.0014543908892749777, "loss": 0.1942, "step": 36266 }, { "epoch": 0.0643061617281882, "grad_norm": 0.53125, "learning_rate": 0.0014543361783630265, "loss": 0.1972, "step": 36268 }, { "epoch": 0.06430970789349802, "grad_norm": 0.369140625, "learning_rate": 0.0014542814659014149, "loss": 0.1959, "step": 36270 }, { "epoch": 0.06431325405880783, "grad_norm": 1.4921875, "learning_rate": 0.0014542267518903819, "loss": 0.2879, "step": 36272 }, { "epoch": 0.06431680022411765, "grad_norm": 1.0625, "learning_rate": 0.0014541720363301666, "loss": 0.1796, "step": 36274 }, { "epoch": 0.06432034638942746, "grad_norm": 0.4453125, "learning_rate": 0.0014541173192210088, "loss": 0.1574, "step": 36276 }, { "epoch": 0.06432389255473728, "grad_norm": 0.294921875, "learning_rate": 0.0014540626005631477, "loss": 0.1249, "step": 36278 }, { "epoch": 0.06432743872004709, "grad_norm": 0.84375, "learning_rate": 0.0014540078803568222, "loss": 0.2021, "step": 36280 }, { "epoch": 0.0643309848853569, "grad_norm": 1.0625, "learning_rate": 0.001453953158602272, "loss": 0.1457, "step": 36282 }, { "epoch": 0.06433453105066672, "grad_norm": 0.3828125, "learning_rate": 0.001453898435299736, "loss": 0.2111, "step": 36284 }, { "epoch": 0.06433807721597654, "grad_norm": 0.236328125, "learning_rate": 0.001453843710449454, "loss": 0.188, "step": 36286 }, { "epoch": 0.06434162338128635, "grad_norm": 0.2451171875, "learning_rate": 0.0014537889840516652, "loss": 0.1982, "step": 36288 }, { "epoch": 0.06434516954659616, "grad_norm": 1.09375, "learning_rate": 0.0014537342561066087, "loss": 0.1983, "step": 36290 }, { "epoch": 0.06434871571190598, "grad_norm": 0.54296875, "learning_rate": 0.0014536795266145242, "loss": 0.2552, "step": 36292 }, { "epoch": 0.0643522618772158, "grad_norm": 0.49609375, "learning_rate": 0.0014536247955756509, "loss": 0.2157, "step": 36294 }, { "epoch": 0.06435580804252561, "grad_norm": 0.35546875, "learning_rate": 0.001453570062990228, "loss": 0.1916, "step": 36296 }, { "epoch": 0.06435935420783542, "grad_norm": 0.84375, "learning_rate": 0.0014535153288584952, "loss": 0.2325, "step": 36298 }, { "epoch": 0.06436290037314525, "grad_norm": 0.72265625, "learning_rate": 0.0014534605931806916, "loss": 0.1888, "step": 36300 }, { "epoch": 0.06436644653845507, "grad_norm": 0.2275390625, "learning_rate": 0.0014534058559570568, "loss": 0.1459, "step": 36302 }, { "epoch": 0.06436999270376488, "grad_norm": 0.412109375, "learning_rate": 0.00145335111718783, "loss": 0.3498, "step": 36304 }, { "epoch": 0.0643735388690747, "grad_norm": 0.7265625, "learning_rate": 0.0014532963768732505, "loss": 0.3083, "step": 36306 }, { "epoch": 0.06437708503438451, "grad_norm": 0.51953125, "learning_rate": 0.0014532416350135583, "loss": 0.1513, "step": 36308 }, { "epoch": 0.06438063119969432, "grad_norm": 0.80078125, "learning_rate": 0.001453186891608992, "loss": 0.1618, "step": 36310 }, { "epoch": 0.06438417736500414, "grad_norm": 0.251953125, "learning_rate": 0.0014531321466597913, "loss": 0.1669, "step": 36312 }, { "epoch": 0.06438772353031395, "grad_norm": 0.8984375, "learning_rate": 0.0014530774001661959, "loss": 0.4763, "step": 36314 }, { "epoch": 0.06439126969562377, "grad_norm": 0.2890625, "learning_rate": 0.001453022652128445, "loss": 0.1652, "step": 36316 }, { "epoch": 0.06439481586093358, "grad_norm": 0.478515625, "learning_rate": 0.0014529679025467783, "loss": 0.1722, "step": 36318 }, { "epoch": 0.0643983620262434, "grad_norm": 0.185546875, "learning_rate": 0.001452913151421435, "loss": 0.175, "step": 36320 }, { "epoch": 0.06440190819155321, "grad_norm": 1.40625, "learning_rate": 0.0014528583987526544, "loss": 0.4955, "step": 36322 }, { "epoch": 0.06440545435686303, "grad_norm": 0.283203125, "learning_rate": 0.001452803644540676, "loss": 0.21, "step": 36324 }, { "epoch": 0.06440900052217284, "grad_norm": 1.3203125, "learning_rate": 0.0014527488887857397, "loss": 0.1896, "step": 36326 }, { "epoch": 0.06441254668748266, "grad_norm": 3.234375, "learning_rate": 0.0014526941314880841, "loss": 0.3934, "step": 36328 }, { "epoch": 0.06441609285279247, "grad_norm": 0.41015625, "learning_rate": 0.00145263937264795, "loss": 0.1678, "step": 36330 }, { "epoch": 0.06441963901810228, "grad_norm": 0.341796875, "learning_rate": 0.0014525846122655755, "loss": 0.2592, "step": 36332 }, { "epoch": 0.0644231851834121, "grad_norm": 0.359375, "learning_rate": 0.001452529850341201, "loss": 0.1992, "step": 36334 }, { "epoch": 0.06442673134872191, "grad_norm": 0.67578125, "learning_rate": 0.0014524750868750657, "loss": 0.1976, "step": 36336 }, { "epoch": 0.06443027751403173, "grad_norm": 0.39453125, "learning_rate": 0.001452420321867409, "loss": 0.2053, "step": 36338 }, { "epoch": 0.06443382367934154, "grad_norm": 0.458984375, "learning_rate": 0.0014523655553184706, "loss": 0.2175, "step": 36340 }, { "epoch": 0.06443736984465136, "grad_norm": 0.234375, "learning_rate": 0.0014523107872284901, "loss": 0.1597, "step": 36342 }, { "epoch": 0.06444091600996117, "grad_norm": 2.109375, "learning_rate": 0.0014522560175977066, "loss": 0.2435, "step": 36344 }, { "epoch": 0.064444462175271, "grad_norm": 0.76171875, "learning_rate": 0.0014522012464263602, "loss": 0.2393, "step": 36346 }, { "epoch": 0.06444800834058081, "grad_norm": 0.48046875, "learning_rate": 0.0014521464737146897, "loss": 0.1559, "step": 36348 }, { "epoch": 0.06445155450589063, "grad_norm": 0.578125, "learning_rate": 0.0014520916994629352, "loss": 0.2315, "step": 36350 }, { "epoch": 0.06445510067120044, "grad_norm": 0.84375, "learning_rate": 0.0014520369236713365, "loss": 0.2131, "step": 36352 }, { "epoch": 0.06445864683651026, "grad_norm": 0.287109375, "learning_rate": 0.0014519821463401322, "loss": 0.168, "step": 36354 }, { "epoch": 0.06446219300182007, "grad_norm": 0.2578125, "learning_rate": 0.001451927367469563, "loss": 0.1601, "step": 36356 }, { "epoch": 0.06446573916712989, "grad_norm": 0.3125, "learning_rate": 0.0014518725870598676, "loss": 0.1772, "step": 36358 }, { "epoch": 0.0644692853324397, "grad_norm": 0.1630859375, "learning_rate": 0.001451817805111286, "loss": 0.1685, "step": 36360 }, { "epoch": 0.06447283149774952, "grad_norm": 0.8671875, "learning_rate": 0.0014517630216240576, "loss": 0.2324, "step": 36362 }, { "epoch": 0.06447637766305933, "grad_norm": 0.376953125, "learning_rate": 0.0014517082365984222, "loss": 0.1191, "step": 36364 }, { "epoch": 0.06447992382836915, "grad_norm": 0.67578125, "learning_rate": 0.0014516534500346192, "loss": 0.1405, "step": 36366 }, { "epoch": 0.06448346999367896, "grad_norm": 0.3203125, "learning_rate": 0.0014515986619328882, "loss": 0.1526, "step": 36368 }, { "epoch": 0.06448701615898877, "grad_norm": 0.30078125, "learning_rate": 0.0014515438722934688, "loss": 0.1906, "step": 36370 }, { "epoch": 0.06449056232429859, "grad_norm": 5.375, "learning_rate": 0.0014514890811166013, "loss": 0.2426, "step": 36372 }, { "epoch": 0.0644941084896084, "grad_norm": 0.2138671875, "learning_rate": 0.0014514342884025242, "loss": 0.1602, "step": 36374 }, { "epoch": 0.06449765465491822, "grad_norm": 0.515625, "learning_rate": 0.0014513794941514778, "loss": 0.2355, "step": 36376 }, { "epoch": 0.06450120082022803, "grad_norm": 0.341796875, "learning_rate": 0.0014513246983637017, "loss": 0.2262, "step": 36378 }, { "epoch": 0.06450474698553785, "grad_norm": 1.265625, "learning_rate": 0.0014512699010394352, "loss": 0.2276, "step": 36380 }, { "epoch": 0.06450829315084766, "grad_norm": 0.75390625, "learning_rate": 0.0014512151021789182, "loss": 0.1989, "step": 36382 }, { "epoch": 0.06451183931615748, "grad_norm": 0.54296875, "learning_rate": 0.0014511603017823905, "loss": 0.2097, "step": 36384 }, { "epoch": 0.06451538548146729, "grad_norm": 0.83984375, "learning_rate": 0.0014511054998500915, "loss": 0.2595, "step": 36386 }, { "epoch": 0.0645189316467771, "grad_norm": 0.59765625, "learning_rate": 0.001451050696382261, "loss": 0.1755, "step": 36388 }, { "epoch": 0.06452247781208692, "grad_norm": 1.453125, "learning_rate": 0.0014509958913791385, "loss": 0.2211, "step": 36390 }, { "epoch": 0.06452602397739675, "grad_norm": 0.44140625, "learning_rate": 0.0014509410848409642, "loss": 0.224, "step": 36392 }, { "epoch": 0.06452957014270656, "grad_norm": 0.44140625, "learning_rate": 0.0014508862767679776, "loss": 0.1818, "step": 36394 }, { "epoch": 0.06453311630801638, "grad_norm": 0.341796875, "learning_rate": 0.0014508314671604176, "loss": 0.2104, "step": 36396 }, { "epoch": 0.06453666247332619, "grad_norm": 4.53125, "learning_rate": 0.0014507766560185249, "loss": 0.1676, "step": 36398 }, { "epoch": 0.064540208638636, "grad_norm": 0.5625, "learning_rate": 0.0014507218433425391, "loss": 0.1774, "step": 36400 }, { "epoch": 0.06454375480394582, "grad_norm": 0.1796875, "learning_rate": 0.0014506670291326994, "loss": 0.1746, "step": 36402 }, { "epoch": 0.06454730096925564, "grad_norm": 0.296875, "learning_rate": 0.001450612213389246, "loss": 0.1791, "step": 36404 }, { "epoch": 0.06455084713456545, "grad_norm": 0.29296875, "learning_rate": 0.0014505573961124181, "loss": 0.1908, "step": 36406 }, { "epoch": 0.06455439329987527, "grad_norm": 0.4296875, "learning_rate": 0.0014505025773024558, "loss": 0.1711, "step": 36408 }, { "epoch": 0.06455793946518508, "grad_norm": 0.27734375, "learning_rate": 0.001450447756959599, "loss": 0.188, "step": 36410 }, { "epoch": 0.0645614856304949, "grad_norm": 0.80078125, "learning_rate": 0.0014503929350840874, "loss": 0.1908, "step": 36412 }, { "epoch": 0.06456503179580471, "grad_norm": 0.8125, "learning_rate": 0.0014503381116761607, "loss": 0.2488, "step": 36414 }, { "epoch": 0.06456857796111452, "grad_norm": 0.66015625, "learning_rate": 0.0014502832867360582, "loss": 0.2516, "step": 36416 }, { "epoch": 0.06457212412642434, "grad_norm": 0.46875, "learning_rate": 0.0014502284602640203, "loss": 0.2259, "step": 36418 }, { "epoch": 0.06457567029173415, "grad_norm": 0.279296875, "learning_rate": 0.0014501736322602868, "loss": 0.1394, "step": 36420 }, { "epoch": 0.06457921645704397, "grad_norm": 0.78125, "learning_rate": 0.0014501188027250967, "loss": 0.2057, "step": 36422 }, { "epoch": 0.06458276262235378, "grad_norm": 0.20703125, "learning_rate": 0.0014500639716586907, "loss": 0.2114, "step": 36424 }, { "epoch": 0.0645863087876636, "grad_norm": 0.259765625, "learning_rate": 0.0014500091390613082, "loss": 0.1846, "step": 36426 }, { "epoch": 0.06458985495297341, "grad_norm": 0.35546875, "learning_rate": 0.001449954304933189, "loss": 0.1537, "step": 36428 }, { "epoch": 0.06459340111828323, "grad_norm": 0.27734375, "learning_rate": 0.0014498994692745727, "loss": 0.1967, "step": 36430 }, { "epoch": 0.06459694728359304, "grad_norm": 0.59375, "learning_rate": 0.0014498446320856998, "loss": 0.2512, "step": 36432 }, { "epoch": 0.06460049344890285, "grad_norm": 0.2490234375, "learning_rate": 0.0014497897933668095, "loss": 0.1404, "step": 36434 }, { "epoch": 0.06460403961421268, "grad_norm": 0.984375, "learning_rate": 0.0014497349531181415, "loss": 0.196, "step": 36436 }, { "epoch": 0.0646075857795225, "grad_norm": 0.4140625, "learning_rate": 0.0014496801113399363, "loss": 0.2646, "step": 36438 }, { "epoch": 0.06461113194483231, "grad_norm": 0.283203125, "learning_rate": 0.0014496252680324332, "loss": 0.1575, "step": 36440 }, { "epoch": 0.06461467811014213, "grad_norm": 0.244140625, "learning_rate": 0.0014495704231958727, "loss": 0.1946, "step": 36442 }, { "epoch": 0.06461822427545194, "grad_norm": 2.046875, "learning_rate": 0.0014495155768304937, "loss": 0.2554, "step": 36444 }, { "epoch": 0.06462177044076176, "grad_norm": 0.41796875, "learning_rate": 0.001449460728936537, "loss": 0.2381, "step": 36446 }, { "epoch": 0.06462531660607157, "grad_norm": 1.5390625, "learning_rate": 0.001449405879514242, "loss": 0.2096, "step": 36448 }, { "epoch": 0.06462886277138138, "grad_norm": 0.396484375, "learning_rate": 0.0014493510285638488, "loss": 0.2699, "step": 36450 }, { "epoch": 0.0646324089366912, "grad_norm": 0.77734375, "learning_rate": 0.0014492961760855967, "loss": 0.1699, "step": 36452 }, { "epoch": 0.06463595510200101, "grad_norm": 0.67578125, "learning_rate": 0.0014492413220797262, "loss": 0.2214, "step": 36454 }, { "epoch": 0.06463950126731083, "grad_norm": 0.515625, "learning_rate": 0.001449186466546477, "loss": 0.1441, "step": 36456 }, { "epoch": 0.06464304743262064, "grad_norm": 0.7890625, "learning_rate": 0.0014491316094860892, "loss": 0.2654, "step": 36458 }, { "epoch": 0.06464659359793046, "grad_norm": 0.6484375, "learning_rate": 0.0014490767508988022, "loss": 0.3156, "step": 36460 }, { "epoch": 0.06465013976324027, "grad_norm": 0.3046875, "learning_rate": 0.0014490218907848567, "loss": 0.1824, "step": 36462 }, { "epoch": 0.06465368592855009, "grad_norm": 0.57421875, "learning_rate": 0.0014489670291444917, "loss": 0.2224, "step": 36464 }, { "epoch": 0.0646572320938599, "grad_norm": 0.54296875, "learning_rate": 0.0014489121659779479, "loss": 0.1638, "step": 36466 }, { "epoch": 0.06466077825916972, "grad_norm": 0.51171875, "learning_rate": 0.0014488573012854651, "loss": 0.1622, "step": 36468 }, { "epoch": 0.06466432442447953, "grad_norm": 0.2890625, "learning_rate": 0.0014488024350672829, "loss": 0.2092, "step": 36470 }, { "epoch": 0.06466787058978934, "grad_norm": 0.6171875, "learning_rate": 0.0014487475673236414, "loss": 0.1636, "step": 36472 }, { "epoch": 0.06467141675509916, "grad_norm": 2.296875, "learning_rate": 0.0014486926980547808, "loss": 0.2008, "step": 36474 }, { "epoch": 0.06467496292040897, "grad_norm": 0.392578125, "learning_rate": 0.0014486378272609405, "loss": 0.2171, "step": 36476 }, { "epoch": 0.06467850908571879, "grad_norm": 0.5625, "learning_rate": 0.0014485829549423612, "loss": 0.2024, "step": 36478 }, { "epoch": 0.0646820552510286, "grad_norm": 0.52734375, "learning_rate": 0.0014485280810992824, "loss": 0.2262, "step": 36480 }, { "epoch": 0.06468560141633843, "grad_norm": 0.2236328125, "learning_rate": 0.0014484732057319445, "loss": 0.169, "step": 36482 }, { "epoch": 0.06468914758164825, "grad_norm": 0.6328125, "learning_rate": 0.0014484183288405868, "loss": 0.2095, "step": 36484 }, { "epoch": 0.06469269374695806, "grad_norm": 0.62109375, "learning_rate": 0.0014483634504254502, "loss": 0.1546, "step": 36486 }, { "epoch": 0.06469623991226787, "grad_norm": 0.1826171875, "learning_rate": 0.001448308570486774, "loss": 0.1562, "step": 36488 }, { "epoch": 0.06469978607757769, "grad_norm": 0.33203125, "learning_rate": 0.0014482536890247984, "loss": 0.2207, "step": 36490 }, { "epoch": 0.0647033322428875, "grad_norm": 0.81640625, "learning_rate": 0.0014481988060397633, "loss": 0.1767, "step": 36492 }, { "epoch": 0.06470687840819732, "grad_norm": 10.8125, "learning_rate": 0.001448143921531909, "loss": 0.2334, "step": 36494 }, { "epoch": 0.06471042457350713, "grad_norm": 0.6171875, "learning_rate": 0.0014480890355014753, "loss": 0.2408, "step": 36496 }, { "epoch": 0.06471397073881695, "grad_norm": 0.203125, "learning_rate": 0.0014480341479487025, "loss": 0.2687, "step": 36498 }, { "epoch": 0.06471751690412676, "grad_norm": 0.447265625, "learning_rate": 0.0014479792588738304, "loss": 0.2146, "step": 36500 }, { "epoch": 0.06472106306943658, "grad_norm": 0.546875, "learning_rate": 0.0014479243682770993, "loss": 0.1985, "step": 36502 }, { "epoch": 0.06472460923474639, "grad_norm": 0.240234375, "learning_rate": 0.001447869476158749, "loss": 0.2189, "step": 36504 }, { "epoch": 0.0647281554000562, "grad_norm": 0.83203125, "learning_rate": 0.00144781458251902, "loss": 0.5075, "step": 36506 }, { "epoch": 0.06473170156536602, "grad_norm": 0.53125, "learning_rate": 0.0014477596873581518, "loss": 0.3733, "step": 36508 }, { "epoch": 0.06473524773067584, "grad_norm": 0.7578125, "learning_rate": 0.0014477047906763848, "loss": 0.1987, "step": 36510 }, { "epoch": 0.06473879389598565, "grad_norm": 0.466796875, "learning_rate": 0.0014476498924739587, "loss": 0.2054, "step": 36512 }, { "epoch": 0.06474234006129546, "grad_norm": 0.431640625, "learning_rate": 0.0014475949927511144, "loss": 0.1846, "step": 36514 }, { "epoch": 0.06474588622660528, "grad_norm": 1.046875, "learning_rate": 0.001447540091508091, "loss": 0.2396, "step": 36516 }, { "epoch": 0.0647494323919151, "grad_norm": 1.03125, "learning_rate": 0.0014474851887451292, "loss": 0.1649, "step": 36518 }, { "epoch": 0.06475297855722491, "grad_norm": 1.3125, "learning_rate": 0.0014474302844624692, "loss": 0.3179, "step": 36520 }, { "epoch": 0.06475652472253472, "grad_norm": 0.291015625, "learning_rate": 0.0014473753786603508, "loss": 0.1754, "step": 36522 }, { "epoch": 0.06476007088784454, "grad_norm": 0.90625, "learning_rate": 0.0014473204713390147, "loss": 0.1738, "step": 36524 }, { "epoch": 0.06476361705315435, "grad_norm": 0.330078125, "learning_rate": 0.0014472655624987003, "loss": 0.2214, "step": 36526 }, { "epoch": 0.06476716321846418, "grad_norm": 0.2109375, "learning_rate": 0.001447210652139648, "loss": 0.1667, "step": 36528 }, { "epoch": 0.064770709383774, "grad_norm": 0.263671875, "learning_rate": 0.0014471557402620978, "loss": 0.2205, "step": 36530 }, { "epoch": 0.06477425554908381, "grad_norm": 0.26953125, "learning_rate": 0.0014471008268662907, "loss": 0.2237, "step": 36532 }, { "epoch": 0.06477780171439362, "grad_norm": 0.42578125, "learning_rate": 0.0014470459119524655, "loss": 0.193, "step": 36534 }, { "epoch": 0.06478134787970344, "grad_norm": 0.32421875, "learning_rate": 0.0014469909955208632, "loss": 0.223, "step": 36536 }, { "epoch": 0.06478489404501325, "grad_norm": 0.34765625, "learning_rate": 0.0014469360775717242, "loss": 0.1715, "step": 36538 }, { "epoch": 0.06478844021032307, "grad_norm": 0.63671875, "learning_rate": 0.001446881158105288, "loss": 0.2115, "step": 36540 }, { "epoch": 0.06479198637563288, "grad_norm": 1.3359375, "learning_rate": 0.0014468262371217953, "loss": 0.1433, "step": 36542 }, { "epoch": 0.0647955325409427, "grad_norm": 0.30078125, "learning_rate": 0.0014467713146214863, "loss": 0.1917, "step": 36544 }, { "epoch": 0.06479907870625251, "grad_norm": 1.890625, "learning_rate": 0.0014467163906046005, "loss": 0.3532, "step": 36546 }, { "epoch": 0.06480262487156233, "grad_norm": 0.40625, "learning_rate": 0.001446661465071379, "loss": 0.2, "step": 36548 }, { "epoch": 0.06480617103687214, "grad_norm": 0.34375, "learning_rate": 0.0014466065380220614, "loss": 0.1925, "step": 36550 }, { "epoch": 0.06480971720218195, "grad_norm": 0.7578125, "learning_rate": 0.0014465516094568883, "loss": 0.1865, "step": 36552 }, { "epoch": 0.06481326336749177, "grad_norm": 0.404296875, "learning_rate": 0.0014464966793760995, "loss": 0.1973, "step": 36554 }, { "epoch": 0.06481680953280158, "grad_norm": 0.33984375, "learning_rate": 0.0014464417477799355, "loss": 0.2452, "step": 36556 }, { "epoch": 0.0648203556981114, "grad_norm": 0.5234375, "learning_rate": 0.0014463868146686368, "loss": 0.1677, "step": 36558 }, { "epoch": 0.06482390186342121, "grad_norm": 0.271484375, "learning_rate": 0.0014463318800424431, "loss": 0.1846, "step": 36560 }, { "epoch": 0.06482744802873103, "grad_norm": 0.400390625, "learning_rate": 0.0014462769439015952, "loss": 0.1833, "step": 36562 }, { "epoch": 0.06483099419404084, "grad_norm": 0.408203125, "learning_rate": 0.0014462220062463331, "loss": 0.2178, "step": 36564 }, { "epoch": 0.06483454035935066, "grad_norm": 0.51953125, "learning_rate": 0.001446167067076897, "loss": 0.248, "step": 36566 }, { "epoch": 0.06483808652466047, "grad_norm": 0.49609375, "learning_rate": 0.001446112126393527, "loss": 0.1366, "step": 36568 }, { "epoch": 0.06484163268997029, "grad_norm": 0.37890625, "learning_rate": 0.0014460571841964638, "loss": 0.2334, "step": 36570 }, { "epoch": 0.06484517885528011, "grad_norm": 0.337890625, "learning_rate": 0.0014460022404859474, "loss": 0.197, "step": 36572 }, { "epoch": 0.06484872502058993, "grad_norm": 0.2177734375, "learning_rate": 0.001445947295262218, "loss": 0.1325, "step": 36574 }, { "epoch": 0.06485227118589974, "grad_norm": 0.314453125, "learning_rate": 0.0014458923485255167, "loss": 0.2116, "step": 36576 }, { "epoch": 0.06485581735120956, "grad_norm": 1.421875, "learning_rate": 0.0014458374002760826, "loss": 0.2492, "step": 36578 }, { "epoch": 0.06485936351651937, "grad_norm": 0.79296875, "learning_rate": 0.0014457824505141569, "loss": 0.2067, "step": 36580 }, { "epoch": 0.06486290968182919, "grad_norm": 0.35546875, "learning_rate": 0.0014457274992399796, "loss": 0.205, "step": 36582 }, { "epoch": 0.064866455847139, "grad_norm": 0.72265625, "learning_rate": 0.001445672546453791, "loss": 0.4659, "step": 36584 }, { "epoch": 0.06487000201244882, "grad_norm": 1.65625, "learning_rate": 0.0014456175921558313, "loss": 0.2308, "step": 36586 }, { "epoch": 0.06487354817775863, "grad_norm": 1.46875, "learning_rate": 0.001445562636346341, "loss": 0.2502, "step": 36588 }, { "epoch": 0.06487709434306844, "grad_norm": 0.78125, "learning_rate": 0.0014455076790255608, "loss": 0.1896, "step": 36590 }, { "epoch": 0.06488064050837826, "grad_norm": 1.2890625, "learning_rate": 0.0014454527201937305, "loss": 0.2547, "step": 36592 }, { "epoch": 0.06488418667368807, "grad_norm": 7.84375, "learning_rate": 0.0014453977598510904, "loss": 0.2256, "step": 36594 }, { "epoch": 0.06488773283899789, "grad_norm": 0.734375, "learning_rate": 0.0014453427979978816, "loss": 0.2695, "step": 36596 }, { "epoch": 0.0648912790043077, "grad_norm": 4.5, "learning_rate": 0.0014452878346343434, "loss": 0.3166, "step": 36598 }, { "epoch": 0.06489482516961752, "grad_norm": 0.53125, "learning_rate": 0.0014452328697607173, "loss": 0.3663, "step": 36600 }, { "epoch": 0.06489837133492733, "grad_norm": 0.57421875, "learning_rate": 0.0014451779033772434, "loss": 0.1823, "step": 36602 }, { "epoch": 0.06490191750023715, "grad_norm": 0.43359375, "learning_rate": 0.0014451229354841613, "loss": 0.1404, "step": 36604 }, { "epoch": 0.06490546366554696, "grad_norm": 0.365234375, "learning_rate": 0.0014450679660817122, "loss": 0.16, "step": 36606 }, { "epoch": 0.06490900983085678, "grad_norm": 0.18359375, "learning_rate": 0.001445012995170136, "loss": 0.1443, "step": 36608 }, { "epoch": 0.06491255599616659, "grad_norm": 2.9375, "learning_rate": 0.0014449580227496738, "loss": 0.4874, "step": 36610 }, { "epoch": 0.0649161021614764, "grad_norm": 0.62890625, "learning_rate": 0.0014449030488205652, "loss": 0.1897, "step": 36612 }, { "epoch": 0.06491964832678622, "grad_norm": 0.671875, "learning_rate": 0.001444848073383051, "loss": 0.1926, "step": 36614 }, { "epoch": 0.06492319449209603, "grad_norm": 0.404296875, "learning_rate": 0.0014447930964373717, "loss": 0.2293, "step": 36616 }, { "epoch": 0.06492674065740586, "grad_norm": 0.486328125, "learning_rate": 0.0014447381179837678, "loss": 0.254, "step": 36618 }, { "epoch": 0.06493028682271568, "grad_norm": 0.77734375, "learning_rate": 0.00144468313802248, "loss": 0.1651, "step": 36620 }, { "epoch": 0.06493383298802549, "grad_norm": 0.67578125, "learning_rate": 0.001444628156553748, "loss": 0.2224, "step": 36622 }, { "epoch": 0.0649373791533353, "grad_norm": 0.1806640625, "learning_rate": 0.0014445731735778123, "loss": 0.1942, "step": 36624 }, { "epoch": 0.06494092531864512, "grad_norm": 0.443359375, "learning_rate": 0.001444518189094914, "loss": 0.2067, "step": 36626 }, { "epoch": 0.06494447148395494, "grad_norm": 0.451171875, "learning_rate": 0.0014444632031052932, "loss": 0.1854, "step": 36628 }, { "epoch": 0.06494801764926475, "grad_norm": 0.33203125, "learning_rate": 0.0014444082156091908, "loss": 0.1836, "step": 36630 }, { "epoch": 0.06495156381457456, "grad_norm": 0.400390625, "learning_rate": 0.0014443532266068461, "loss": 0.2847, "step": 36632 }, { "epoch": 0.06495510997988438, "grad_norm": 0.859375, "learning_rate": 0.001444298236098501, "loss": 0.3273, "step": 36634 }, { "epoch": 0.0649586561451942, "grad_norm": 0.384765625, "learning_rate": 0.0014442432440843955, "loss": 0.1722, "step": 36636 }, { "epoch": 0.06496220231050401, "grad_norm": 0.5, "learning_rate": 0.0014441882505647698, "loss": 0.2176, "step": 36638 }, { "epoch": 0.06496574847581382, "grad_norm": 0.5, "learning_rate": 0.0014441332555398644, "loss": 0.212, "step": 36640 }, { "epoch": 0.06496929464112364, "grad_norm": 0.4296875, "learning_rate": 0.0014440782590099204, "loss": 0.2109, "step": 36642 }, { "epoch": 0.06497284080643345, "grad_norm": 0.310546875, "learning_rate": 0.0014440232609751777, "loss": 0.2014, "step": 36644 }, { "epoch": 0.06497638697174327, "grad_norm": 0.890625, "learning_rate": 0.0014439682614358774, "loss": 0.2327, "step": 36646 }, { "epoch": 0.06497993313705308, "grad_norm": 0.46875, "learning_rate": 0.0014439132603922594, "loss": 0.2039, "step": 36648 }, { "epoch": 0.0649834793023629, "grad_norm": 0.337890625, "learning_rate": 0.0014438582578445648, "loss": 0.3073, "step": 36650 }, { "epoch": 0.06498702546767271, "grad_norm": 0.5546875, "learning_rate": 0.001443803253793034, "loss": 0.1985, "step": 36652 }, { "epoch": 0.06499057163298252, "grad_norm": 0.455078125, "learning_rate": 0.0014437482482379071, "loss": 0.2182, "step": 36654 }, { "epoch": 0.06499411779829234, "grad_norm": 0.470703125, "learning_rate": 0.0014436932411794254, "loss": 0.2987, "step": 36656 }, { "epoch": 0.06499766396360215, "grad_norm": 0.44921875, "learning_rate": 0.001443638232617829, "loss": 0.1842, "step": 36658 }, { "epoch": 0.06500121012891197, "grad_norm": 0.376953125, "learning_rate": 0.0014435832225533586, "loss": 0.1771, "step": 36660 }, { "epoch": 0.06500475629422178, "grad_norm": 0.431640625, "learning_rate": 0.0014435282109862546, "loss": 0.2434, "step": 36662 }, { "epoch": 0.06500830245953161, "grad_norm": 0.470703125, "learning_rate": 0.0014434731979167577, "loss": 0.1737, "step": 36664 }, { "epoch": 0.06501184862484143, "grad_norm": 0.333984375, "learning_rate": 0.0014434181833451088, "loss": 0.1519, "step": 36666 }, { "epoch": 0.06501539479015124, "grad_norm": 0.61328125, "learning_rate": 0.0014433631672715482, "loss": 0.1741, "step": 36668 }, { "epoch": 0.06501894095546105, "grad_norm": 0.38671875, "learning_rate": 0.0014433081496963165, "loss": 0.4655, "step": 36670 }, { "epoch": 0.06502248712077087, "grad_norm": 0.66015625, "learning_rate": 0.0014432531306196542, "loss": 0.1809, "step": 36672 }, { "epoch": 0.06502603328608068, "grad_norm": 0.443359375, "learning_rate": 0.0014431981100418024, "loss": 0.1371, "step": 36674 }, { "epoch": 0.0650295794513905, "grad_norm": 0.2421875, "learning_rate": 0.0014431430879630013, "loss": 0.3104, "step": 36676 }, { "epoch": 0.06503312561670031, "grad_norm": 0.203125, "learning_rate": 0.0014430880643834915, "loss": 0.1735, "step": 36678 }, { "epoch": 0.06503667178201013, "grad_norm": 0.703125, "learning_rate": 0.001443033039303514, "loss": 0.1618, "step": 36680 }, { "epoch": 0.06504021794731994, "grad_norm": 0.3515625, "learning_rate": 0.001442978012723309, "loss": 0.1513, "step": 36682 }, { "epoch": 0.06504376411262976, "grad_norm": 0.53125, "learning_rate": 0.0014429229846431176, "loss": 0.1749, "step": 36684 }, { "epoch": 0.06504731027793957, "grad_norm": 0.37109375, "learning_rate": 0.0014428679550631803, "loss": 0.2065, "step": 36686 }, { "epoch": 0.06505085644324939, "grad_norm": 1.3828125, "learning_rate": 0.0014428129239837375, "loss": 0.2029, "step": 36688 }, { "epoch": 0.0650544026085592, "grad_norm": 0.8125, "learning_rate": 0.0014427578914050303, "loss": 0.2002, "step": 36690 }, { "epoch": 0.06505794877386901, "grad_norm": 2.140625, "learning_rate": 0.0014427028573272991, "loss": 0.1988, "step": 36692 }, { "epoch": 0.06506149493917883, "grad_norm": 0.50390625, "learning_rate": 0.0014426478217507848, "loss": 0.1601, "step": 36694 }, { "epoch": 0.06506504110448864, "grad_norm": 0.5078125, "learning_rate": 0.001442592784675728, "loss": 0.1962, "step": 36696 }, { "epoch": 0.06506858726979846, "grad_norm": 0.9140625, "learning_rate": 0.001442537746102369, "loss": 0.1833, "step": 36698 }, { "epoch": 0.06507213343510827, "grad_norm": 0.63671875, "learning_rate": 0.0014424827060309491, "loss": 0.2309, "step": 36700 }, { "epoch": 0.06507567960041809, "grad_norm": 0.8125, "learning_rate": 0.0014424276644617088, "loss": 0.2105, "step": 36702 }, { "epoch": 0.0650792257657279, "grad_norm": 0.96875, "learning_rate": 0.0014423726213948886, "loss": 0.2095, "step": 36704 }, { "epoch": 0.06508277193103772, "grad_norm": 0.349609375, "learning_rate": 0.0014423175768307295, "loss": 0.2679, "step": 36706 }, { "epoch": 0.06508631809634755, "grad_norm": 0.828125, "learning_rate": 0.0014422625307694724, "loss": 0.2106, "step": 36708 }, { "epoch": 0.06508986426165736, "grad_norm": 0.427734375, "learning_rate": 0.0014422074832113577, "loss": 0.1973, "step": 36710 }, { "epoch": 0.06509341042696717, "grad_norm": 0.3359375, "learning_rate": 0.0014421524341566262, "loss": 0.1878, "step": 36712 }, { "epoch": 0.06509695659227699, "grad_norm": 0.53125, "learning_rate": 0.0014420973836055187, "loss": 0.1534, "step": 36714 }, { "epoch": 0.0651005027575868, "grad_norm": 0.35546875, "learning_rate": 0.001442042331558276, "loss": 0.1745, "step": 36716 }, { "epoch": 0.06510404892289662, "grad_norm": 0.4296875, "learning_rate": 0.001441987278015139, "loss": 0.146, "step": 36718 }, { "epoch": 0.06510759508820643, "grad_norm": 0.56640625, "learning_rate": 0.0014419322229763478, "loss": 0.1967, "step": 36720 }, { "epoch": 0.06511114125351625, "grad_norm": 0.56640625, "learning_rate": 0.001441877166442144, "loss": 0.1887, "step": 36722 }, { "epoch": 0.06511468741882606, "grad_norm": 0.296875, "learning_rate": 0.0014418221084127679, "loss": 0.1995, "step": 36724 }, { "epoch": 0.06511823358413588, "grad_norm": 0.3984375, "learning_rate": 0.0014417670488884604, "loss": 0.1698, "step": 36726 }, { "epoch": 0.06512177974944569, "grad_norm": 0.73046875, "learning_rate": 0.0014417119878694627, "loss": 0.1583, "step": 36728 }, { "epoch": 0.0651253259147555, "grad_norm": 1.28125, "learning_rate": 0.001441656925356015, "loss": 0.2448, "step": 36730 }, { "epoch": 0.06512887208006532, "grad_norm": 0.6875, "learning_rate": 0.0014416018613483588, "loss": 0.2002, "step": 36732 }, { "epoch": 0.06513241824537513, "grad_norm": 0.240234375, "learning_rate": 0.001441546795846734, "loss": 0.1711, "step": 36734 }, { "epoch": 0.06513596441068495, "grad_norm": 0.28125, "learning_rate": 0.001441491728851382, "loss": 0.1842, "step": 36736 }, { "epoch": 0.06513951057599476, "grad_norm": 1.109375, "learning_rate": 0.0014414366603625434, "loss": 0.4712, "step": 36738 }, { "epoch": 0.06514305674130458, "grad_norm": 0.49609375, "learning_rate": 0.0014413815903804596, "loss": 0.1786, "step": 36740 }, { "epoch": 0.06514660290661439, "grad_norm": 0.23828125, "learning_rate": 0.0014413265189053706, "loss": 0.1591, "step": 36742 }, { "epoch": 0.06515014907192421, "grad_norm": 0.36328125, "learning_rate": 0.0014412714459375179, "loss": 0.2035, "step": 36744 }, { "epoch": 0.06515369523723402, "grad_norm": 0.28515625, "learning_rate": 0.0014412163714771417, "loss": 0.2669, "step": 36746 }, { "epoch": 0.06515724140254384, "grad_norm": 0.44140625, "learning_rate": 0.0014411612955244838, "loss": 0.2392, "step": 36748 }, { "epoch": 0.06516078756785365, "grad_norm": 0.369140625, "learning_rate": 0.0014411062180797846, "loss": 0.2053, "step": 36750 }, { "epoch": 0.06516433373316347, "grad_norm": 0.71484375, "learning_rate": 0.001441051139143285, "loss": 0.2135, "step": 36752 }, { "epoch": 0.0651678798984733, "grad_norm": 0.8828125, "learning_rate": 0.0014409960587152253, "loss": 0.1991, "step": 36754 }, { "epoch": 0.06517142606378311, "grad_norm": 0.5, "learning_rate": 0.0014409409767958475, "loss": 0.2116, "step": 36756 }, { "epoch": 0.06517497222909292, "grad_norm": 0.70703125, "learning_rate": 0.0014408858933853913, "loss": 0.1963, "step": 36758 }, { "epoch": 0.06517851839440274, "grad_norm": 4.75, "learning_rate": 0.0014408308084840986, "loss": 0.2669, "step": 36760 }, { "epoch": 0.06518206455971255, "grad_norm": 0.859375, "learning_rate": 0.0014407757220922099, "loss": 0.1617, "step": 36762 }, { "epoch": 0.06518561072502237, "grad_norm": 0.345703125, "learning_rate": 0.0014407206342099662, "loss": 0.1233, "step": 36764 }, { "epoch": 0.06518915689033218, "grad_norm": 2.421875, "learning_rate": 0.001440665544837608, "loss": 0.2675, "step": 36766 }, { "epoch": 0.065192703055642, "grad_norm": 1.0234375, "learning_rate": 0.0014406104539753768, "loss": 0.2098, "step": 36768 }, { "epoch": 0.06519624922095181, "grad_norm": 0.24609375, "learning_rate": 0.0014405553616235137, "loss": 0.1872, "step": 36770 }, { "epoch": 0.06519979538626162, "grad_norm": 1.34375, "learning_rate": 0.0014405002677822588, "loss": 0.2578, "step": 36772 }, { "epoch": 0.06520334155157144, "grad_norm": 0.7734375, "learning_rate": 0.0014404451724518534, "loss": 0.1846, "step": 36774 }, { "epoch": 0.06520688771688125, "grad_norm": 0.46484375, "learning_rate": 0.0014403900756325388, "loss": 0.2092, "step": 36776 }, { "epoch": 0.06521043388219107, "grad_norm": 0.6640625, "learning_rate": 0.001440334977324556, "loss": 0.4931, "step": 36778 }, { "epoch": 0.06521398004750088, "grad_norm": 1.296875, "learning_rate": 0.0014402798775281452, "loss": 0.3459, "step": 36780 }, { "epoch": 0.0652175262128107, "grad_norm": 1.328125, "learning_rate": 0.001440224776243548, "loss": 0.2499, "step": 36782 }, { "epoch": 0.06522107237812051, "grad_norm": 0.37890625, "learning_rate": 0.0014401696734710054, "loss": 0.151, "step": 36784 }, { "epoch": 0.06522461854343033, "grad_norm": 1.09375, "learning_rate": 0.0014401145692107583, "loss": 0.3122, "step": 36786 }, { "epoch": 0.06522816470874014, "grad_norm": 0.6796875, "learning_rate": 0.0014400594634630475, "loss": 0.2105, "step": 36788 }, { "epoch": 0.06523171087404996, "grad_norm": 0.43359375, "learning_rate": 0.0014400043562281142, "loss": 0.1764, "step": 36790 }, { "epoch": 0.06523525703935977, "grad_norm": 0.224609375, "learning_rate": 0.0014399492475061994, "loss": 0.1613, "step": 36792 }, { "epoch": 0.06523880320466958, "grad_norm": 0.3359375, "learning_rate": 0.001439894137297544, "loss": 0.1534, "step": 36794 }, { "epoch": 0.0652423493699794, "grad_norm": 8.3125, "learning_rate": 0.001439839025602389, "loss": 0.3257, "step": 36796 }, { "epoch": 0.06524589553528921, "grad_norm": 0.287109375, "learning_rate": 0.0014397839124209754, "loss": 0.1682, "step": 36798 }, { "epoch": 0.06524944170059904, "grad_norm": 0.61328125, "learning_rate": 0.0014397287977535448, "loss": 0.1645, "step": 36800 }, { "epoch": 0.06525298786590886, "grad_norm": 0.4453125, "learning_rate": 0.0014396736816003374, "loss": 0.1693, "step": 36802 }, { "epoch": 0.06525653403121867, "grad_norm": 0.388671875, "learning_rate": 0.0014396185639615946, "loss": 0.1807, "step": 36804 }, { "epoch": 0.06526008019652849, "grad_norm": 0.52734375, "learning_rate": 0.0014395634448375577, "loss": 0.2584, "step": 36806 }, { "epoch": 0.0652636263618383, "grad_norm": 0.703125, "learning_rate": 0.0014395083242284674, "loss": 0.3236, "step": 36808 }, { "epoch": 0.06526717252714812, "grad_norm": 1.203125, "learning_rate": 0.0014394532021345652, "loss": 0.1885, "step": 36810 }, { "epoch": 0.06527071869245793, "grad_norm": 0.5390625, "learning_rate": 0.0014393980785560915, "loss": 0.2197, "step": 36812 }, { "epoch": 0.06527426485776774, "grad_norm": 0.451171875, "learning_rate": 0.0014393429534932875, "loss": 0.1711, "step": 36814 }, { "epoch": 0.06527781102307756, "grad_norm": 0.8671875, "learning_rate": 0.0014392878269463949, "loss": 0.1565, "step": 36816 }, { "epoch": 0.06528135718838737, "grad_norm": 0.65625, "learning_rate": 0.0014392326989156547, "loss": 0.1561, "step": 36818 }, { "epoch": 0.06528490335369719, "grad_norm": 0.2021484375, "learning_rate": 0.0014391775694013072, "loss": 0.4676, "step": 36820 }, { "epoch": 0.065288449519007, "grad_norm": 0.447265625, "learning_rate": 0.0014391224384035944, "loss": 0.2118, "step": 36822 }, { "epoch": 0.06529199568431682, "grad_norm": 2.140625, "learning_rate": 0.0014390673059227568, "loss": 0.2482, "step": 36824 }, { "epoch": 0.06529554184962663, "grad_norm": 0.3046875, "learning_rate": 0.001439012171959036, "loss": 0.1639, "step": 36826 }, { "epoch": 0.06529908801493645, "grad_norm": 0.5234375, "learning_rate": 0.0014389570365126725, "loss": 0.2031, "step": 36828 }, { "epoch": 0.06530263418024626, "grad_norm": 0.298828125, "learning_rate": 0.0014389018995839083, "loss": 0.1613, "step": 36830 }, { "epoch": 0.06530618034555608, "grad_norm": 0.76171875, "learning_rate": 0.0014388467611729835, "loss": 0.1766, "step": 36832 }, { "epoch": 0.06530972651086589, "grad_norm": 0.765625, "learning_rate": 0.0014387916212801402, "loss": 0.1868, "step": 36834 }, { "epoch": 0.0653132726761757, "grad_norm": 0.142578125, "learning_rate": 0.001438736479905619, "loss": 0.1627, "step": 36836 }, { "epoch": 0.06531681884148552, "grad_norm": 0.255859375, "learning_rate": 0.0014386813370496612, "loss": 0.2737, "step": 36838 }, { "epoch": 0.06532036500679533, "grad_norm": 0.57421875, "learning_rate": 0.0014386261927125078, "loss": 0.225, "step": 36840 }, { "epoch": 0.06532391117210515, "grad_norm": 0.56640625, "learning_rate": 0.0014385710468944005, "loss": 0.2013, "step": 36842 }, { "epoch": 0.06532745733741498, "grad_norm": 1.296875, "learning_rate": 0.00143851589959558, "loss": 0.2708, "step": 36844 }, { "epoch": 0.06533100350272479, "grad_norm": 1.8515625, "learning_rate": 0.0014384607508162875, "loss": 0.2835, "step": 36846 }, { "epoch": 0.0653345496680346, "grad_norm": 0.31640625, "learning_rate": 0.0014384056005567643, "loss": 0.178, "step": 36848 }, { "epoch": 0.06533809583334442, "grad_norm": 0.310546875, "learning_rate": 0.0014383504488172515, "loss": 0.1696, "step": 36850 }, { "epoch": 0.06534164199865423, "grad_norm": 0.69140625, "learning_rate": 0.0014382952955979901, "loss": 0.214, "step": 36852 }, { "epoch": 0.06534518816396405, "grad_norm": 0.47265625, "learning_rate": 0.0014382401408992221, "loss": 0.2086, "step": 36854 }, { "epoch": 0.06534873432927386, "grad_norm": 0.359375, "learning_rate": 0.0014381849847211882, "loss": 0.3033, "step": 36856 }, { "epoch": 0.06535228049458368, "grad_norm": 0.43359375, "learning_rate": 0.0014381298270641292, "loss": 0.2393, "step": 36858 }, { "epoch": 0.06535582665989349, "grad_norm": 0.61328125, "learning_rate": 0.001438074667928287, "loss": 0.4743, "step": 36860 }, { "epoch": 0.06535937282520331, "grad_norm": 1.59375, "learning_rate": 0.0014380195073139027, "loss": 0.2464, "step": 36862 }, { "epoch": 0.06536291899051312, "grad_norm": 0.515625, "learning_rate": 0.0014379643452212172, "loss": 0.2763, "step": 36864 }, { "epoch": 0.06536646515582294, "grad_norm": 0.359375, "learning_rate": 0.0014379091816504718, "loss": 0.1868, "step": 36866 }, { "epoch": 0.06537001132113275, "grad_norm": 0.59765625, "learning_rate": 0.0014378540166019083, "loss": 0.3379, "step": 36868 }, { "epoch": 0.06537355748644257, "grad_norm": 0.2216796875, "learning_rate": 0.0014377988500757675, "loss": 0.17, "step": 36870 }, { "epoch": 0.06537710365175238, "grad_norm": 1.3046875, "learning_rate": 0.0014377436820722906, "loss": 0.2509, "step": 36872 }, { "epoch": 0.0653806498170622, "grad_norm": 0.361328125, "learning_rate": 0.0014376885125917192, "loss": 0.1757, "step": 36874 }, { "epoch": 0.06538419598237201, "grad_norm": 0.62890625, "learning_rate": 0.0014376333416342944, "loss": 0.1473, "step": 36876 }, { "epoch": 0.06538774214768182, "grad_norm": 0.33203125, "learning_rate": 0.0014375781692002572, "loss": 0.1444, "step": 36878 }, { "epoch": 0.06539128831299164, "grad_norm": 0.58203125, "learning_rate": 0.0014375229952898493, "loss": 0.2071, "step": 36880 }, { "epoch": 0.06539483447830145, "grad_norm": 1.1796875, "learning_rate": 0.0014374678199033121, "loss": 0.1963, "step": 36882 }, { "epoch": 0.06539838064361127, "grad_norm": 0.462890625, "learning_rate": 0.0014374126430408867, "loss": 0.2466, "step": 36884 }, { "epoch": 0.06540192680892108, "grad_norm": 0.48046875, "learning_rate": 0.001437357464702814, "loss": 0.1869, "step": 36886 }, { "epoch": 0.0654054729742309, "grad_norm": 0.490234375, "learning_rate": 0.001437302284889336, "loss": 0.1961, "step": 36888 }, { "epoch": 0.06540901913954072, "grad_norm": 0.78515625, "learning_rate": 0.0014372471036006936, "loss": 0.2019, "step": 36890 }, { "epoch": 0.06541256530485054, "grad_norm": 1.78125, "learning_rate": 0.0014371919208371285, "loss": 0.2876, "step": 36892 }, { "epoch": 0.06541611147016035, "grad_norm": 0.6875, "learning_rate": 0.0014371367365988814, "loss": 0.3217, "step": 36894 }, { "epoch": 0.06541965763547017, "grad_norm": 0.283203125, "learning_rate": 0.0014370815508861943, "loss": 0.2015, "step": 36896 }, { "epoch": 0.06542320380077998, "grad_norm": 0.2412109375, "learning_rate": 0.0014370263636993084, "loss": 0.1598, "step": 36898 }, { "epoch": 0.0654267499660898, "grad_norm": 0.78515625, "learning_rate": 0.0014369711750384648, "loss": 0.219, "step": 36900 }, { "epoch": 0.06543029613139961, "grad_norm": 1.328125, "learning_rate": 0.0014369159849039052, "loss": 0.2115, "step": 36902 }, { "epoch": 0.06543384229670943, "grad_norm": 0.515625, "learning_rate": 0.0014368607932958707, "loss": 0.1687, "step": 36904 }, { "epoch": 0.06543738846201924, "grad_norm": 0.490234375, "learning_rate": 0.0014368056002146024, "loss": 0.2331, "step": 36906 }, { "epoch": 0.06544093462732906, "grad_norm": 0.31640625, "learning_rate": 0.0014367504056603424, "loss": 0.2033, "step": 36908 }, { "epoch": 0.06544448079263887, "grad_norm": 0.8125, "learning_rate": 0.0014366952096333317, "loss": 0.1894, "step": 36910 }, { "epoch": 0.06544802695794869, "grad_norm": 0.267578125, "learning_rate": 0.0014366400121338116, "loss": 0.166, "step": 36912 }, { "epoch": 0.0654515731232585, "grad_norm": 0.427734375, "learning_rate": 0.001436584813162024, "loss": 0.1709, "step": 36914 }, { "epoch": 0.06545511928856831, "grad_norm": 0.3046875, "learning_rate": 0.0014365296127182095, "loss": 0.2494, "step": 36916 }, { "epoch": 0.06545866545387813, "grad_norm": 0.287109375, "learning_rate": 0.0014364744108026101, "loss": 0.1956, "step": 36918 }, { "epoch": 0.06546221161918794, "grad_norm": 0.2294921875, "learning_rate": 0.0014364192074154674, "loss": 0.1928, "step": 36920 }, { "epoch": 0.06546575778449776, "grad_norm": 1.1796875, "learning_rate": 0.001436364002557022, "loss": 0.2667, "step": 36922 }, { "epoch": 0.06546930394980757, "grad_norm": 0.451171875, "learning_rate": 0.001436308796227516, "loss": 0.1691, "step": 36924 }, { "epoch": 0.06547285011511739, "grad_norm": 1.5546875, "learning_rate": 0.0014362535884271906, "loss": 0.2395, "step": 36926 }, { "epoch": 0.0654763962804272, "grad_norm": 0.71875, "learning_rate": 0.0014361983791562874, "loss": 0.1559, "step": 36928 }, { "epoch": 0.06547994244573702, "grad_norm": 0.236328125, "learning_rate": 0.001436143168415048, "loss": 0.2033, "step": 36930 }, { "epoch": 0.06548348861104683, "grad_norm": 0.2470703125, "learning_rate": 0.0014360879562037134, "loss": 0.4137, "step": 36932 }, { "epoch": 0.06548703477635665, "grad_norm": 0.8984375, "learning_rate": 0.0014360327425225253, "loss": 0.2932, "step": 36934 }, { "epoch": 0.06549058094166647, "grad_norm": 0.63671875, "learning_rate": 0.001435977527371725, "loss": 0.1701, "step": 36936 }, { "epoch": 0.06549412710697629, "grad_norm": 0.1796875, "learning_rate": 0.0014359223107515548, "loss": 0.1234, "step": 36938 }, { "epoch": 0.0654976732722861, "grad_norm": 0.427734375, "learning_rate": 0.0014358670926622548, "loss": 0.1607, "step": 36940 }, { "epoch": 0.06550121943759592, "grad_norm": 0.390625, "learning_rate": 0.0014358118731040677, "loss": 0.198, "step": 36942 }, { "epoch": 0.06550476560290573, "grad_norm": 1.484375, "learning_rate": 0.0014357566520772343, "loss": 0.4045, "step": 36944 }, { "epoch": 0.06550831176821555, "grad_norm": 0.71484375, "learning_rate": 0.0014357014295819964, "loss": 0.2255, "step": 36946 }, { "epoch": 0.06551185793352536, "grad_norm": 0.6171875, "learning_rate": 0.0014356462056185956, "loss": 0.2362, "step": 36948 }, { "epoch": 0.06551540409883518, "grad_norm": 0.2021484375, "learning_rate": 0.001435590980187273, "loss": 0.162, "step": 36950 }, { "epoch": 0.06551895026414499, "grad_norm": 0.47265625, "learning_rate": 0.0014355357532882705, "loss": 0.1692, "step": 36952 }, { "epoch": 0.0655224964294548, "grad_norm": 1.765625, "learning_rate": 0.0014354805249218296, "loss": 0.2169, "step": 36954 }, { "epoch": 0.06552604259476462, "grad_norm": 0.404296875, "learning_rate": 0.0014354252950881917, "loss": 0.1652, "step": 36956 }, { "epoch": 0.06552958876007443, "grad_norm": 0.404296875, "learning_rate": 0.0014353700637875983, "loss": 0.1906, "step": 36958 }, { "epoch": 0.06553313492538425, "grad_norm": 0.6484375, "learning_rate": 0.001435314831020291, "loss": 0.1449, "step": 36960 }, { "epoch": 0.06553668109069406, "grad_norm": 0.275390625, "learning_rate": 0.0014352595967865116, "loss": 0.1546, "step": 36962 }, { "epoch": 0.06554022725600388, "grad_norm": 0.40234375, "learning_rate": 0.0014352043610865014, "loss": 0.1678, "step": 36964 }, { "epoch": 0.06554377342131369, "grad_norm": 0.37109375, "learning_rate": 0.001435149123920502, "loss": 0.1878, "step": 36966 }, { "epoch": 0.0655473195866235, "grad_norm": 0.375, "learning_rate": 0.001435093885288755, "loss": 0.2004, "step": 36968 }, { "epoch": 0.06555086575193332, "grad_norm": 0.126953125, "learning_rate": 0.001435038645191502, "loss": 0.1305, "step": 36970 }, { "epoch": 0.06555441191724314, "grad_norm": 0.1787109375, "learning_rate": 0.0014349834036289845, "loss": 0.1715, "step": 36972 }, { "epoch": 0.06555795808255295, "grad_norm": 0.1953125, "learning_rate": 0.0014349281606014445, "loss": 0.1893, "step": 36974 }, { "epoch": 0.06556150424786276, "grad_norm": 0.484375, "learning_rate": 0.0014348729161091233, "loss": 0.1976, "step": 36976 }, { "epoch": 0.06556505041317258, "grad_norm": 0.388671875, "learning_rate": 0.0014348176701522621, "loss": 0.1367, "step": 36978 }, { "epoch": 0.06556859657848241, "grad_norm": 0.45703125, "learning_rate": 0.001434762422731103, "loss": 0.1574, "step": 36980 }, { "epoch": 0.06557214274379222, "grad_norm": 0.5390625, "learning_rate": 0.0014347071738458877, "loss": 0.1931, "step": 36982 }, { "epoch": 0.06557568890910204, "grad_norm": 0.8046875, "learning_rate": 0.0014346519234968574, "loss": 0.1649, "step": 36984 }, { "epoch": 0.06557923507441185, "grad_norm": 1.7421875, "learning_rate": 0.001434596671684254, "loss": 0.2059, "step": 36986 }, { "epoch": 0.06558278123972167, "grad_norm": 1.3046875, "learning_rate": 0.001434541418408319, "loss": 0.1895, "step": 36988 }, { "epoch": 0.06558632740503148, "grad_norm": 0.345703125, "learning_rate": 0.0014344861636692945, "loss": 0.2404, "step": 36990 }, { "epoch": 0.0655898735703413, "grad_norm": 0.6796875, "learning_rate": 0.001434430907467422, "loss": 0.1777, "step": 36992 }, { "epoch": 0.06559341973565111, "grad_norm": 0.7265625, "learning_rate": 0.0014343756498029425, "loss": 0.2195, "step": 36994 }, { "epoch": 0.06559696590096092, "grad_norm": 0.353515625, "learning_rate": 0.0014343203906760982, "loss": 0.1507, "step": 36996 }, { "epoch": 0.06560051206627074, "grad_norm": 0.625, "learning_rate": 0.0014342651300871308, "loss": 0.3201, "step": 36998 }, { "epoch": 0.06560405823158055, "grad_norm": 0.51953125, "learning_rate": 0.0014342098680362816, "loss": 0.2513, "step": 37000 }, { "epoch": 0.06560760439689037, "grad_norm": 0.400390625, "learning_rate": 0.0014341546045237932, "loss": 0.1966, "step": 37002 }, { "epoch": 0.06561115056220018, "grad_norm": 0.349609375, "learning_rate": 0.001434099339549906, "loss": 0.2417, "step": 37004 }, { "epoch": 0.06561469672751, "grad_norm": 0.72265625, "learning_rate": 0.0014340440731148627, "loss": 0.1518, "step": 37006 }, { "epoch": 0.06561824289281981, "grad_norm": 0.76171875, "learning_rate": 0.0014339888052189045, "loss": 0.2141, "step": 37008 }, { "epoch": 0.06562178905812963, "grad_norm": 0.9453125, "learning_rate": 0.0014339335358622737, "loss": 0.1896, "step": 37010 }, { "epoch": 0.06562533522343944, "grad_norm": 0.796875, "learning_rate": 0.0014338782650452114, "loss": 0.1915, "step": 37012 }, { "epoch": 0.06562888138874926, "grad_norm": 0.94921875, "learning_rate": 0.0014338229927679592, "loss": 0.2066, "step": 37014 }, { "epoch": 0.06563242755405907, "grad_norm": 0.45703125, "learning_rate": 0.0014337677190307593, "loss": 0.269, "step": 37016 }, { "epoch": 0.06563597371936888, "grad_norm": 0.392578125, "learning_rate": 0.001433712443833853, "loss": 0.2019, "step": 37018 }, { "epoch": 0.0656395198846787, "grad_norm": 0.328125, "learning_rate": 0.0014336571671774827, "loss": 0.1379, "step": 37020 }, { "epoch": 0.06564306604998851, "grad_norm": 0.30078125, "learning_rate": 0.0014336018890618895, "loss": 0.1924, "step": 37022 }, { "epoch": 0.06564661221529833, "grad_norm": 0.46875, "learning_rate": 0.0014335466094873158, "loss": 0.2258, "step": 37024 }, { "epoch": 0.06565015838060816, "grad_norm": 0.88671875, "learning_rate": 0.0014334913284540026, "loss": 0.2199, "step": 37026 }, { "epoch": 0.06565370454591797, "grad_norm": 1.7109375, "learning_rate": 0.0014334360459621923, "loss": 0.2587, "step": 37028 }, { "epoch": 0.06565725071122779, "grad_norm": 0.349609375, "learning_rate": 0.0014333807620121265, "loss": 0.1502, "step": 37030 }, { "epoch": 0.0656607968765376, "grad_norm": 0.48828125, "learning_rate": 0.0014333254766040464, "loss": 0.2223, "step": 37032 }, { "epoch": 0.06566434304184741, "grad_norm": 0.220703125, "learning_rate": 0.0014332701897381945, "loss": 0.1599, "step": 37034 }, { "epoch": 0.06566788920715723, "grad_norm": 0.52734375, "learning_rate": 0.0014332149014148127, "loss": 0.1919, "step": 37036 }, { "epoch": 0.06567143537246704, "grad_norm": 0.76953125, "learning_rate": 0.001433159611634142, "loss": 0.1781, "step": 37038 }, { "epoch": 0.06567498153777686, "grad_norm": 0.328125, "learning_rate": 0.001433104320396425, "loss": 0.2281, "step": 37040 }, { "epoch": 0.06567852770308667, "grad_norm": 0.271484375, "learning_rate": 0.0014330490277019033, "loss": 0.1492, "step": 37042 }, { "epoch": 0.06568207386839649, "grad_norm": 0.498046875, "learning_rate": 0.0014329937335508182, "loss": 0.1522, "step": 37044 }, { "epoch": 0.0656856200337063, "grad_norm": 1.078125, "learning_rate": 0.0014329384379434124, "loss": 0.2447, "step": 37046 }, { "epoch": 0.06568916619901612, "grad_norm": 0.7265625, "learning_rate": 0.0014328831408799267, "loss": 0.1964, "step": 37048 }, { "epoch": 0.06569271236432593, "grad_norm": 0.46875, "learning_rate": 0.0014328278423606042, "loss": 0.202, "step": 37050 }, { "epoch": 0.06569625852963575, "grad_norm": 0.302734375, "learning_rate": 0.001432772542385686, "loss": 0.13, "step": 37052 }, { "epoch": 0.06569980469494556, "grad_norm": 0.26171875, "learning_rate": 0.0014327172409554132, "loss": 0.1608, "step": 37054 }, { "epoch": 0.06570335086025537, "grad_norm": 1.3125, "learning_rate": 0.0014326619380700292, "loss": 0.1521, "step": 37056 }, { "epoch": 0.06570689702556519, "grad_norm": 0.30078125, "learning_rate": 0.0014326066337297745, "loss": 0.2051, "step": 37058 }, { "epoch": 0.065710443190875, "grad_norm": 0.53515625, "learning_rate": 0.0014325513279348921, "loss": 0.1873, "step": 37060 }, { "epoch": 0.06571398935618482, "grad_norm": 0.455078125, "learning_rate": 0.001432496020685623, "loss": 0.1577, "step": 37062 }, { "epoch": 0.06571753552149463, "grad_norm": 0.3671875, "learning_rate": 0.0014324407119822098, "loss": 0.1653, "step": 37064 }, { "epoch": 0.06572108168680445, "grad_norm": 0.546875, "learning_rate": 0.0014323854018248939, "loss": 0.1857, "step": 37066 }, { "epoch": 0.06572462785211426, "grad_norm": 0.35546875, "learning_rate": 0.0014323300902139174, "loss": 0.1571, "step": 37068 }, { "epoch": 0.06572817401742408, "grad_norm": 0.255859375, "learning_rate": 0.001432274777149522, "loss": 0.158, "step": 37070 }, { "epoch": 0.0657317201827339, "grad_norm": 0.357421875, "learning_rate": 0.0014322194626319499, "loss": 0.1847, "step": 37072 }, { "epoch": 0.06573526634804372, "grad_norm": 0.54296875, "learning_rate": 0.0014321641466614424, "loss": 0.2224, "step": 37074 }, { "epoch": 0.06573881251335353, "grad_norm": 0.4375, "learning_rate": 0.0014321088292382422, "loss": 0.1258, "step": 37076 }, { "epoch": 0.06574235867866335, "grad_norm": 0.388671875, "learning_rate": 0.001432053510362591, "loss": 0.1971, "step": 37078 }, { "epoch": 0.06574590484397316, "grad_norm": 1.703125, "learning_rate": 0.0014319981900347302, "loss": 0.1584, "step": 37080 }, { "epoch": 0.06574945100928298, "grad_norm": 0.66796875, "learning_rate": 0.0014319428682549023, "loss": 0.2502, "step": 37082 }, { "epoch": 0.06575299717459279, "grad_norm": 0.65625, "learning_rate": 0.0014318875450233495, "loss": 0.2016, "step": 37084 }, { "epoch": 0.0657565433399026, "grad_norm": 0.458984375, "learning_rate": 0.0014318322203403133, "loss": 0.2383, "step": 37086 }, { "epoch": 0.06576008950521242, "grad_norm": 0.271484375, "learning_rate": 0.0014317768942060354, "loss": 0.2305, "step": 37088 }, { "epoch": 0.06576363567052224, "grad_norm": 2.5, "learning_rate": 0.001431721566620758, "loss": 0.3525, "step": 37090 }, { "epoch": 0.06576718183583205, "grad_norm": 0.84375, "learning_rate": 0.0014316662375847237, "loss": 0.2073, "step": 37092 }, { "epoch": 0.06577072800114186, "grad_norm": 0.625, "learning_rate": 0.0014316109070981733, "loss": 0.1671, "step": 37094 }, { "epoch": 0.06577427416645168, "grad_norm": 0.359375, "learning_rate": 0.0014315555751613501, "loss": 0.2918, "step": 37096 }, { "epoch": 0.0657778203317615, "grad_norm": 0.52734375, "learning_rate": 0.0014315002417744946, "loss": 0.1837, "step": 37098 }, { "epoch": 0.06578136649707131, "grad_norm": 1.46875, "learning_rate": 0.0014314449069378504, "loss": 0.4547, "step": 37100 }, { "epoch": 0.06578491266238112, "grad_norm": 0.73046875, "learning_rate": 0.001431389570651658, "loss": 0.1828, "step": 37102 }, { "epoch": 0.06578845882769094, "grad_norm": 0.216796875, "learning_rate": 0.0014313342329161606, "loss": 0.1248, "step": 37104 }, { "epoch": 0.06579200499300075, "grad_norm": 0.357421875, "learning_rate": 0.0014312788937315994, "loss": 0.1561, "step": 37106 }, { "epoch": 0.06579555115831057, "grad_norm": 0.427734375, "learning_rate": 0.001431223553098217, "loss": 0.1659, "step": 37108 }, { "epoch": 0.06579909732362038, "grad_norm": 0.3359375, "learning_rate": 0.001431168211016255, "loss": 0.1749, "step": 37110 }, { "epoch": 0.0658026434889302, "grad_norm": 1.484375, "learning_rate": 0.0014311128674859557, "loss": 0.2233, "step": 37112 }, { "epoch": 0.06580618965424001, "grad_norm": 0.36328125, "learning_rate": 0.0014310575225075612, "loss": 0.1978, "step": 37114 }, { "epoch": 0.06580973581954984, "grad_norm": 0.33203125, "learning_rate": 0.0014310021760813132, "loss": 0.1675, "step": 37116 }, { "epoch": 0.06581328198485965, "grad_norm": 0.388671875, "learning_rate": 0.001430946828207454, "loss": 0.1823, "step": 37118 }, { "epoch": 0.06581682815016947, "grad_norm": 0.39453125, "learning_rate": 0.0014308914788862255, "loss": 0.1979, "step": 37120 }, { "epoch": 0.06582037431547928, "grad_norm": 0.2177734375, "learning_rate": 0.00143083612811787, "loss": 0.186, "step": 37122 }, { "epoch": 0.0658239204807891, "grad_norm": 0.6796875, "learning_rate": 0.0014307807759026297, "loss": 0.175, "step": 37124 }, { "epoch": 0.06582746664609891, "grad_norm": 0.52734375, "learning_rate": 0.0014307254222407462, "loss": 0.1963, "step": 37126 }, { "epoch": 0.06583101281140873, "grad_norm": 1.4140625, "learning_rate": 0.0014306700671324617, "loss": 0.2441, "step": 37128 }, { "epoch": 0.06583455897671854, "grad_norm": 0.55859375, "learning_rate": 0.0014306147105780185, "loss": 0.1848, "step": 37130 }, { "epoch": 0.06583810514202836, "grad_norm": 0.2890625, "learning_rate": 0.0014305593525776583, "loss": 0.2553, "step": 37132 }, { "epoch": 0.06584165130733817, "grad_norm": 0.3984375, "learning_rate": 0.0014305039931316239, "loss": 0.2166, "step": 37134 }, { "epoch": 0.06584519747264798, "grad_norm": 0.8046875, "learning_rate": 0.001430448632240157, "loss": 0.1738, "step": 37136 }, { "epoch": 0.0658487436379578, "grad_norm": 0.78515625, "learning_rate": 0.0014303932699034996, "loss": 0.2454, "step": 37138 }, { "epoch": 0.06585228980326761, "grad_norm": 1.84375, "learning_rate": 0.0014303379061218937, "loss": 0.2256, "step": 37140 }, { "epoch": 0.06585583596857743, "grad_norm": 1.1640625, "learning_rate": 0.0014302825408955822, "loss": 0.2771, "step": 37142 }, { "epoch": 0.06585938213388724, "grad_norm": 1.0, "learning_rate": 0.0014302271742248065, "loss": 0.1846, "step": 37144 }, { "epoch": 0.06586292829919706, "grad_norm": 0.43359375, "learning_rate": 0.0014301718061098087, "loss": 0.2014, "step": 37146 }, { "epoch": 0.06586647446450687, "grad_norm": 0.421875, "learning_rate": 0.0014301164365508314, "loss": 0.1954, "step": 37148 }, { "epoch": 0.06587002062981669, "grad_norm": 0.306640625, "learning_rate": 0.001430061065548117, "loss": 0.1855, "step": 37150 }, { "epoch": 0.0658735667951265, "grad_norm": 0.33203125, "learning_rate": 0.0014300056931019066, "loss": 0.1771, "step": 37152 }, { "epoch": 0.06587711296043632, "grad_norm": 0.4765625, "learning_rate": 0.0014299503192124433, "loss": 0.2113, "step": 37154 }, { "epoch": 0.06588065912574613, "grad_norm": 0.337890625, "learning_rate": 0.0014298949438799687, "loss": 0.2012, "step": 37156 }, { "epoch": 0.06588420529105594, "grad_norm": 0.53125, "learning_rate": 0.0014298395671047256, "loss": 0.1979, "step": 37158 }, { "epoch": 0.06588775145636576, "grad_norm": 0.37890625, "learning_rate": 0.0014297841888869557, "loss": 0.159, "step": 37160 }, { "epoch": 0.06589129762167559, "grad_norm": 2.203125, "learning_rate": 0.0014297288092269013, "loss": 0.1849, "step": 37162 }, { "epoch": 0.0658948437869854, "grad_norm": 0.408203125, "learning_rate": 0.0014296734281248043, "loss": 0.2035, "step": 37164 }, { "epoch": 0.06589838995229522, "grad_norm": 0.365234375, "learning_rate": 0.0014296180455809079, "loss": 0.149, "step": 37166 }, { "epoch": 0.06590193611760503, "grad_norm": 0.38671875, "learning_rate": 0.0014295626615954528, "loss": 0.2075, "step": 37168 }, { "epoch": 0.06590548228291485, "grad_norm": 0.361328125, "learning_rate": 0.0014295072761686828, "loss": 0.1954, "step": 37170 }, { "epoch": 0.06590902844822466, "grad_norm": 0.32421875, "learning_rate": 0.001429451889300839, "loss": 0.1515, "step": 37172 }, { "epoch": 0.06591257461353447, "grad_norm": 0.62109375, "learning_rate": 0.0014293965009921639, "loss": 0.1751, "step": 37174 }, { "epoch": 0.06591612077884429, "grad_norm": 0.423828125, "learning_rate": 0.0014293411112429, "loss": 0.1332, "step": 37176 }, { "epoch": 0.0659196669441541, "grad_norm": 1.328125, "learning_rate": 0.0014292857200532893, "loss": 0.2566, "step": 37178 }, { "epoch": 0.06592321310946392, "grad_norm": 1.1875, "learning_rate": 0.0014292303274235744, "loss": 0.2818, "step": 37180 }, { "epoch": 0.06592675927477373, "grad_norm": 3.515625, "learning_rate": 0.0014291749333539972, "loss": 0.4083, "step": 37182 }, { "epoch": 0.06593030544008355, "grad_norm": 0.328125, "learning_rate": 0.0014291195378448, "loss": 0.1939, "step": 37184 }, { "epoch": 0.06593385160539336, "grad_norm": 0.58984375, "learning_rate": 0.0014290641408962249, "loss": 0.2074, "step": 37186 }, { "epoch": 0.06593739777070318, "grad_norm": 0.2158203125, "learning_rate": 0.0014290087425085146, "loss": 0.1692, "step": 37188 }, { "epoch": 0.06594094393601299, "grad_norm": 1.8671875, "learning_rate": 0.001428953342681911, "loss": 0.1932, "step": 37190 }, { "epoch": 0.0659444901013228, "grad_norm": 0.71875, "learning_rate": 0.0014288979414166568, "loss": 0.2129, "step": 37192 }, { "epoch": 0.06594803626663262, "grad_norm": 0.474609375, "learning_rate": 0.001428842538712994, "loss": 0.1735, "step": 37194 }, { "epoch": 0.06595158243194243, "grad_norm": 0.4921875, "learning_rate": 0.0014287871345711647, "loss": 0.2009, "step": 37196 }, { "epoch": 0.06595512859725225, "grad_norm": 0.30078125, "learning_rate": 0.0014287317289914116, "loss": 0.1426, "step": 37198 }, { "epoch": 0.06595867476256206, "grad_norm": 2.25, "learning_rate": 0.0014286763219739772, "loss": 0.2912, "step": 37200 }, { "epoch": 0.06596222092787188, "grad_norm": 0.6875, "learning_rate": 0.001428620913519103, "loss": 0.1292, "step": 37202 }, { "epoch": 0.0659657670931817, "grad_norm": 0.55859375, "learning_rate": 0.0014285655036270321, "loss": 0.1911, "step": 37204 }, { "epoch": 0.06596931325849151, "grad_norm": 0.56640625, "learning_rate": 0.0014285100922980063, "loss": 0.2575, "step": 37206 }, { "epoch": 0.06597285942380134, "grad_norm": 0.314453125, "learning_rate": 0.0014284546795322684, "loss": 0.2502, "step": 37208 }, { "epoch": 0.06597640558911115, "grad_norm": 0.65234375, "learning_rate": 0.0014283992653300603, "loss": 0.2582, "step": 37210 }, { "epoch": 0.06597995175442097, "grad_norm": 0.3046875, "learning_rate": 0.0014283438496916249, "loss": 0.2272, "step": 37212 }, { "epoch": 0.06598349791973078, "grad_norm": 0.515625, "learning_rate": 0.0014282884326172038, "loss": 0.1343, "step": 37214 }, { "epoch": 0.0659870440850406, "grad_norm": 0.318359375, "learning_rate": 0.00142823301410704, "loss": 0.2322, "step": 37216 }, { "epoch": 0.06599059025035041, "grad_norm": 0.99609375, "learning_rate": 0.001428177594161376, "loss": 0.1733, "step": 37218 }, { "epoch": 0.06599413641566022, "grad_norm": 1.0859375, "learning_rate": 0.0014281221727804536, "loss": 0.1542, "step": 37220 }, { "epoch": 0.06599768258097004, "grad_norm": 0.357421875, "learning_rate": 0.0014280667499645152, "loss": 0.1363, "step": 37222 }, { "epoch": 0.06600122874627985, "grad_norm": 0.494140625, "learning_rate": 0.0014280113257138037, "loss": 0.2046, "step": 37224 }, { "epoch": 0.06600477491158967, "grad_norm": 0.5390625, "learning_rate": 0.0014279559000285609, "loss": 0.1261, "step": 37226 }, { "epoch": 0.06600832107689948, "grad_norm": 0.2890625, "learning_rate": 0.0014279004729090296, "loss": 0.3132, "step": 37228 }, { "epoch": 0.0660118672422093, "grad_norm": 0.40234375, "learning_rate": 0.001427845044355452, "loss": 0.2867, "step": 37230 }, { "epoch": 0.06601541340751911, "grad_norm": 0.2236328125, "learning_rate": 0.0014277896143680708, "loss": 0.2209, "step": 37232 }, { "epoch": 0.06601895957282893, "grad_norm": 0.33984375, "learning_rate": 0.001427734182947128, "loss": 0.2246, "step": 37234 }, { "epoch": 0.06602250573813874, "grad_norm": 0.4453125, "learning_rate": 0.0014276787500928667, "loss": 0.1621, "step": 37236 }, { "epoch": 0.06602605190344855, "grad_norm": 0.75, "learning_rate": 0.0014276233158055285, "loss": 0.219, "step": 37238 }, { "epoch": 0.06602959806875837, "grad_norm": 0.9140625, "learning_rate": 0.0014275678800853564, "loss": 0.3027, "step": 37240 }, { "epoch": 0.06603314423406818, "grad_norm": 0.408203125, "learning_rate": 0.0014275124429325924, "loss": 0.1264, "step": 37242 }, { "epoch": 0.066036690399378, "grad_norm": 0.421875, "learning_rate": 0.0014274570043474797, "loss": 0.2091, "step": 37244 }, { "epoch": 0.06604023656468781, "grad_norm": 2.296875, "learning_rate": 0.00142740156433026, "loss": 0.2309, "step": 37246 }, { "epoch": 0.06604378272999763, "grad_norm": 1.265625, "learning_rate": 0.0014273461228811758, "loss": 0.4331, "step": 37248 }, { "epoch": 0.06604732889530744, "grad_norm": 0.490234375, "learning_rate": 0.0014272906800004699, "loss": 0.2109, "step": 37250 }, { "epoch": 0.06605087506061727, "grad_norm": 0.6328125, "learning_rate": 0.001427235235688385, "loss": 0.241, "step": 37252 }, { "epoch": 0.06605442122592708, "grad_norm": 0.287109375, "learning_rate": 0.0014271797899451633, "loss": 0.2312, "step": 37254 }, { "epoch": 0.0660579673912369, "grad_norm": 0.5703125, "learning_rate": 0.0014271243427710469, "loss": 0.1624, "step": 37256 }, { "epoch": 0.06606151355654671, "grad_norm": 0.375, "learning_rate": 0.0014270688941662786, "loss": 0.1598, "step": 37258 }, { "epoch": 0.06606505972185653, "grad_norm": 0.66796875, "learning_rate": 0.001427013444131101, "loss": 0.2129, "step": 37260 }, { "epoch": 0.06606860588716634, "grad_norm": 1.09375, "learning_rate": 0.001426957992665757, "loss": 0.129, "step": 37262 }, { "epoch": 0.06607215205247616, "grad_norm": 3.5, "learning_rate": 0.001426902539770488, "loss": 0.2001, "step": 37264 }, { "epoch": 0.06607569821778597, "grad_norm": 0.337890625, "learning_rate": 0.0014268470854455375, "loss": 0.187, "step": 37266 }, { "epoch": 0.06607924438309579, "grad_norm": 0.26171875, "learning_rate": 0.001426791629691148, "loss": 0.4083, "step": 37268 }, { "epoch": 0.0660827905484056, "grad_norm": 1.3359375, "learning_rate": 0.0014267361725075613, "loss": 0.2454, "step": 37270 }, { "epoch": 0.06608633671371542, "grad_norm": 0.5, "learning_rate": 0.0014266807138950204, "loss": 0.2099, "step": 37272 }, { "epoch": 0.06608988287902523, "grad_norm": 0.6328125, "learning_rate": 0.0014266252538537678, "loss": 0.1557, "step": 37274 }, { "epoch": 0.06609342904433504, "grad_norm": 1.3984375, "learning_rate": 0.0014265697923840461, "loss": 0.2451, "step": 37276 }, { "epoch": 0.06609697520964486, "grad_norm": 0.59765625, "learning_rate": 0.001426514329486098, "loss": 0.1939, "step": 37278 }, { "epoch": 0.06610052137495467, "grad_norm": 0.66015625, "learning_rate": 0.0014264588651601658, "loss": 0.194, "step": 37280 }, { "epoch": 0.06610406754026449, "grad_norm": 0.33203125, "learning_rate": 0.0014264033994064922, "loss": 0.1969, "step": 37282 }, { "epoch": 0.0661076137055743, "grad_norm": 0.6640625, "learning_rate": 0.0014263479322253194, "loss": 0.1824, "step": 37284 }, { "epoch": 0.06611115987088412, "grad_norm": 0.765625, "learning_rate": 0.0014262924636168905, "loss": 0.1981, "step": 37286 }, { "epoch": 0.06611470603619393, "grad_norm": 0.30078125, "learning_rate": 0.001426236993581448, "loss": 0.1568, "step": 37288 }, { "epoch": 0.06611825220150375, "grad_norm": 0.33984375, "learning_rate": 0.0014261815221192345, "loss": 0.1602, "step": 37290 }, { "epoch": 0.06612179836681356, "grad_norm": 0.404296875, "learning_rate": 0.0014261260492304923, "loss": 0.1939, "step": 37292 }, { "epoch": 0.06612534453212338, "grad_norm": 0.66796875, "learning_rate": 0.0014260705749154644, "loss": 0.3056, "step": 37294 }, { "epoch": 0.06612889069743319, "grad_norm": 0.259765625, "learning_rate": 0.001426015099174393, "loss": 0.2168, "step": 37296 }, { "epoch": 0.06613243686274302, "grad_norm": 1.21875, "learning_rate": 0.001425959622007521, "loss": 0.1704, "step": 37298 }, { "epoch": 0.06613598302805283, "grad_norm": 0.9140625, "learning_rate": 0.0014259041434150908, "loss": 0.1694, "step": 37300 }, { "epoch": 0.06613952919336265, "grad_norm": 0.466796875, "learning_rate": 0.0014258486633973455, "loss": 0.1942, "step": 37302 }, { "epoch": 0.06614307535867246, "grad_norm": 0.279296875, "learning_rate": 0.001425793181954527, "loss": 0.1581, "step": 37304 }, { "epoch": 0.06614662152398228, "grad_norm": 0.2236328125, "learning_rate": 0.0014257376990868787, "loss": 0.1621, "step": 37306 }, { "epoch": 0.06615016768929209, "grad_norm": 0.392578125, "learning_rate": 0.0014256822147946427, "loss": 0.1962, "step": 37308 }, { "epoch": 0.0661537138546019, "grad_norm": 0.341796875, "learning_rate": 0.001425626729078062, "loss": 0.1821, "step": 37310 }, { "epoch": 0.06615726001991172, "grad_norm": 0.453125, "learning_rate": 0.0014255712419373795, "loss": 0.2438, "step": 37312 }, { "epoch": 0.06616080618522154, "grad_norm": 1.6953125, "learning_rate": 0.0014255157533728368, "loss": 0.2581, "step": 37314 }, { "epoch": 0.06616435235053135, "grad_norm": 0.291015625, "learning_rate": 0.001425460263384678, "loss": 0.1508, "step": 37316 }, { "epoch": 0.06616789851584116, "grad_norm": 0.302734375, "learning_rate": 0.0014254047719731444, "loss": 0.2272, "step": 37318 }, { "epoch": 0.06617144468115098, "grad_norm": 0.2373046875, "learning_rate": 0.0014253492791384795, "loss": 0.213, "step": 37320 }, { "epoch": 0.0661749908464608, "grad_norm": 5.46875, "learning_rate": 0.0014252937848809259, "loss": 0.3193, "step": 37322 }, { "epoch": 0.06617853701177061, "grad_norm": 0.197265625, "learning_rate": 0.0014252382892007262, "loss": 0.1669, "step": 37324 }, { "epoch": 0.06618208317708042, "grad_norm": 1.1484375, "learning_rate": 0.0014251827920981233, "loss": 0.2215, "step": 37326 }, { "epoch": 0.06618562934239024, "grad_norm": 0.25, "learning_rate": 0.0014251272935733596, "loss": 0.1623, "step": 37328 }, { "epoch": 0.06618917550770005, "grad_norm": 1.0625, "learning_rate": 0.0014250717936266782, "loss": 0.2267, "step": 37330 }, { "epoch": 0.06619272167300987, "grad_norm": 0.318359375, "learning_rate": 0.0014250162922583214, "loss": 0.21, "step": 37332 }, { "epoch": 0.06619626783831968, "grad_norm": 0.265625, "learning_rate": 0.0014249607894685323, "loss": 0.2047, "step": 37334 }, { "epoch": 0.0661998140036295, "grad_norm": 0.8984375, "learning_rate": 0.0014249052852575534, "loss": 0.2141, "step": 37336 }, { "epoch": 0.06620336016893931, "grad_norm": 0.58203125, "learning_rate": 0.0014248497796256275, "loss": 0.3066, "step": 37338 }, { "epoch": 0.06620690633424912, "grad_norm": 0.390625, "learning_rate": 0.0014247942725729972, "loss": 0.313, "step": 37340 }, { "epoch": 0.06621045249955894, "grad_norm": 0.92578125, "learning_rate": 0.0014247387640999056, "loss": 0.2108, "step": 37342 }, { "epoch": 0.06621399866486877, "grad_norm": 0.703125, "learning_rate": 0.0014246832542065952, "loss": 0.1537, "step": 37344 }, { "epoch": 0.06621754483017858, "grad_norm": 0.42578125, "learning_rate": 0.001424627742893309, "loss": 0.1979, "step": 37346 }, { "epoch": 0.0662210909954884, "grad_norm": 0.35546875, "learning_rate": 0.0014245722301602893, "loss": 0.1326, "step": 37348 }, { "epoch": 0.06622463716079821, "grad_norm": 0.21484375, "learning_rate": 0.0014245167160077796, "loss": 0.2366, "step": 37350 }, { "epoch": 0.06622818332610803, "grad_norm": 0.2412109375, "learning_rate": 0.001424461200436022, "loss": 0.3163, "step": 37352 }, { "epoch": 0.06623172949141784, "grad_norm": 0.453125, "learning_rate": 0.0014244056834452595, "loss": 0.1565, "step": 37354 }, { "epoch": 0.06623527565672765, "grad_norm": 0.640625, "learning_rate": 0.001424350165035735, "loss": 0.1595, "step": 37356 }, { "epoch": 0.06623882182203747, "grad_norm": 0.28125, "learning_rate": 0.0014242946452076915, "loss": 0.1767, "step": 37358 }, { "epoch": 0.06624236798734728, "grad_norm": 0.75390625, "learning_rate": 0.0014242391239613714, "loss": 0.1787, "step": 37360 }, { "epoch": 0.0662459141526571, "grad_norm": 0.546875, "learning_rate": 0.0014241836012970177, "loss": 0.1628, "step": 37362 }, { "epoch": 0.06624946031796691, "grad_norm": 0.294921875, "learning_rate": 0.0014241280772148734, "loss": 0.1385, "step": 37364 }, { "epoch": 0.06625300648327673, "grad_norm": 1.34375, "learning_rate": 0.0014240725517151813, "loss": 0.1858, "step": 37366 }, { "epoch": 0.06625655264858654, "grad_norm": 1.3125, "learning_rate": 0.0014240170247981838, "loss": 0.1873, "step": 37368 }, { "epoch": 0.06626009881389636, "grad_norm": 1.5, "learning_rate": 0.0014239614964641242, "loss": 0.2223, "step": 37370 }, { "epoch": 0.06626364497920617, "grad_norm": 0.5625, "learning_rate": 0.0014239059667132453, "loss": 0.1777, "step": 37372 }, { "epoch": 0.06626719114451599, "grad_norm": 0.7421875, "learning_rate": 0.0014238504355457895, "loss": 0.2262, "step": 37374 }, { "epoch": 0.0662707373098258, "grad_norm": 0.171875, "learning_rate": 0.0014237949029620002, "loss": 0.2039, "step": 37376 }, { "epoch": 0.06627428347513561, "grad_norm": 1.1328125, "learning_rate": 0.00142373936896212, "loss": 0.1545, "step": 37378 }, { "epoch": 0.06627782964044543, "grad_norm": 1.5234375, "learning_rate": 0.001423683833546392, "loss": 0.1821, "step": 37380 }, { "epoch": 0.06628137580575524, "grad_norm": 0.271484375, "learning_rate": 0.001423628296715059, "loss": 0.1692, "step": 37382 }, { "epoch": 0.06628492197106506, "grad_norm": 0.5234375, "learning_rate": 0.0014235727584683635, "loss": 0.2125, "step": 37384 }, { "epoch": 0.06628846813637487, "grad_norm": 0.373046875, "learning_rate": 0.001423517218806549, "loss": 0.2159, "step": 37386 }, { "epoch": 0.0662920143016847, "grad_norm": 0.28515625, "learning_rate": 0.0014234616777298581, "loss": 0.1527, "step": 37388 }, { "epoch": 0.06629556046699452, "grad_norm": 0.31640625, "learning_rate": 0.0014234061352385336, "loss": 0.161, "step": 37390 }, { "epoch": 0.06629910663230433, "grad_norm": 0.2177734375, "learning_rate": 0.0014233505913328187, "loss": 0.1688, "step": 37392 }, { "epoch": 0.06630265279761414, "grad_norm": 0.3515625, "learning_rate": 0.0014232950460129556, "loss": 0.2113, "step": 37394 }, { "epoch": 0.06630619896292396, "grad_norm": 0.32421875, "learning_rate": 0.0014232394992791885, "loss": 0.1484, "step": 37396 }, { "epoch": 0.06630974512823377, "grad_norm": 0.2138671875, "learning_rate": 0.0014231839511317588, "loss": 0.1443, "step": 37398 }, { "epoch": 0.06631329129354359, "grad_norm": 0.70703125, "learning_rate": 0.0014231284015709107, "loss": 0.4504, "step": 37400 }, { "epoch": 0.0663168374588534, "grad_norm": 0.23828125, "learning_rate": 0.0014230728505968868, "loss": 0.1718, "step": 37402 }, { "epoch": 0.06632038362416322, "grad_norm": 0.515625, "learning_rate": 0.0014230172982099296, "loss": 0.1779, "step": 37404 }, { "epoch": 0.06632392978947303, "grad_norm": 0.439453125, "learning_rate": 0.0014229617444102825, "loss": 0.2164, "step": 37406 }, { "epoch": 0.06632747595478285, "grad_norm": 0.318359375, "learning_rate": 0.0014229061891981884, "loss": 0.1896, "step": 37408 }, { "epoch": 0.06633102212009266, "grad_norm": 0.52734375, "learning_rate": 0.0014228506325738902, "loss": 0.2448, "step": 37410 }, { "epoch": 0.06633456828540248, "grad_norm": 0.40625, "learning_rate": 0.0014227950745376308, "loss": 0.1804, "step": 37412 }, { "epoch": 0.06633811445071229, "grad_norm": 0.58984375, "learning_rate": 0.001422739515089653, "loss": 0.2256, "step": 37414 }, { "epoch": 0.0663416606160221, "grad_norm": 0.392578125, "learning_rate": 0.0014226839542302006, "loss": 0.1826, "step": 37416 }, { "epoch": 0.06634520678133192, "grad_norm": 5.625, "learning_rate": 0.0014226283919595154, "loss": 0.2268, "step": 37418 }, { "epoch": 0.06634875294664173, "grad_norm": 0.388671875, "learning_rate": 0.0014225728282778413, "loss": 0.1687, "step": 37420 }, { "epoch": 0.06635229911195155, "grad_norm": 0.359375, "learning_rate": 0.0014225172631854208, "loss": 0.1465, "step": 37422 }, { "epoch": 0.06635584527726136, "grad_norm": 0.18359375, "learning_rate": 0.0014224616966824976, "loss": 0.2361, "step": 37424 }, { "epoch": 0.06635939144257118, "grad_norm": 0.80859375, "learning_rate": 0.001422406128769314, "loss": 0.2917, "step": 37426 }, { "epoch": 0.06636293760788099, "grad_norm": 0.400390625, "learning_rate": 0.0014223505594461133, "loss": 0.2081, "step": 37428 }, { "epoch": 0.0663664837731908, "grad_norm": 0.9453125, "learning_rate": 0.0014222949887131384, "loss": 0.2403, "step": 37430 }, { "epoch": 0.06637002993850062, "grad_norm": 1.859375, "learning_rate": 0.0014222394165706326, "loss": 0.2442, "step": 37432 }, { "epoch": 0.06637357610381045, "grad_norm": 0.53515625, "learning_rate": 0.0014221838430188386, "loss": 0.1891, "step": 37434 }, { "epoch": 0.06637712226912026, "grad_norm": 0.453125, "learning_rate": 0.001422128268058, "loss": 0.2332, "step": 37436 }, { "epoch": 0.06638066843443008, "grad_norm": 0.47265625, "learning_rate": 0.0014220726916883586, "loss": 0.1994, "step": 37438 }, { "epoch": 0.0663842145997399, "grad_norm": 0.240234375, "learning_rate": 0.001422017113910159, "loss": 0.1556, "step": 37440 }, { "epoch": 0.06638776076504971, "grad_norm": 0.349609375, "learning_rate": 0.0014219615347236436, "loss": 0.1421, "step": 37442 }, { "epoch": 0.06639130693035952, "grad_norm": 0.51953125, "learning_rate": 0.0014219059541290553, "loss": 0.1607, "step": 37444 }, { "epoch": 0.06639485309566934, "grad_norm": 0.306640625, "learning_rate": 0.0014218503721266376, "loss": 0.1928, "step": 37446 }, { "epoch": 0.06639839926097915, "grad_norm": 1.8359375, "learning_rate": 0.001421794788716633, "loss": 0.3405, "step": 37448 }, { "epoch": 0.06640194542628897, "grad_norm": 0.333984375, "learning_rate": 0.0014217392038992848, "loss": 0.1559, "step": 37450 }, { "epoch": 0.06640549159159878, "grad_norm": 1.1796875, "learning_rate": 0.0014216836176748364, "loss": 0.2223, "step": 37452 }, { "epoch": 0.0664090377569086, "grad_norm": 0.427734375, "learning_rate": 0.001421628030043531, "loss": 0.1661, "step": 37454 }, { "epoch": 0.06641258392221841, "grad_norm": 0.59765625, "learning_rate": 0.001421572441005611, "loss": 0.2503, "step": 37456 }, { "epoch": 0.06641613008752822, "grad_norm": 0.345703125, "learning_rate": 0.0014215168505613202, "loss": 0.1865, "step": 37458 }, { "epoch": 0.06641967625283804, "grad_norm": 0.578125, "learning_rate": 0.0014214612587109012, "loss": 0.1688, "step": 37460 }, { "epoch": 0.06642322241814785, "grad_norm": 0.30078125, "learning_rate": 0.0014214056654545976, "loss": 0.1882, "step": 37462 }, { "epoch": 0.06642676858345767, "grad_norm": 0.3828125, "learning_rate": 0.0014213500707926524, "loss": 0.1713, "step": 37464 }, { "epoch": 0.06643031474876748, "grad_norm": 0.94140625, "learning_rate": 0.0014212944747253086, "loss": 0.2066, "step": 37466 }, { "epoch": 0.0664338609140773, "grad_norm": 1.2734375, "learning_rate": 0.001421238877252809, "loss": 0.169, "step": 37468 }, { "epoch": 0.06643740707938711, "grad_norm": 0.40234375, "learning_rate": 0.0014211832783753975, "loss": 0.2077, "step": 37470 }, { "epoch": 0.06644095324469693, "grad_norm": 0.3671875, "learning_rate": 0.0014211276780933166, "loss": 0.2005, "step": 37472 }, { "epoch": 0.06644449941000674, "grad_norm": 0.3515625, "learning_rate": 0.0014210720764068104, "loss": 0.1488, "step": 37474 }, { "epoch": 0.06644804557531656, "grad_norm": 0.369140625, "learning_rate": 0.0014210164733161209, "loss": 0.3038, "step": 37476 }, { "epoch": 0.06645159174062637, "grad_norm": 0.2412109375, "learning_rate": 0.001420960868821492, "loss": 0.2109, "step": 37478 }, { "epoch": 0.0664551379059362, "grad_norm": 0.294921875, "learning_rate": 0.0014209052629231668, "loss": 0.1579, "step": 37480 }, { "epoch": 0.06645868407124601, "grad_norm": 0.318359375, "learning_rate": 0.0014208496556213882, "loss": 0.1618, "step": 37482 }, { "epoch": 0.06646223023655583, "grad_norm": 0.1953125, "learning_rate": 0.0014207940469163995, "loss": 0.1736, "step": 37484 }, { "epoch": 0.06646577640186564, "grad_norm": 0.2470703125, "learning_rate": 0.0014207384368084444, "loss": 0.2026, "step": 37486 }, { "epoch": 0.06646932256717546, "grad_norm": 0.25, "learning_rate": 0.001420682825297765, "loss": 0.1649, "step": 37488 }, { "epoch": 0.06647286873248527, "grad_norm": 0.515625, "learning_rate": 0.0014206272123846058, "loss": 0.2174, "step": 37490 }, { "epoch": 0.06647641489779509, "grad_norm": 0.447265625, "learning_rate": 0.0014205715980692092, "loss": 0.2247, "step": 37492 }, { "epoch": 0.0664799610631049, "grad_norm": 0.45703125, "learning_rate": 0.0014205159823518188, "loss": 0.2282, "step": 37494 }, { "epoch": 0.06648350722841471, "grad_norm": 1.6875, "learning_rate": 0.0014204603652326773, "loss": 0.2191, "step": 37496 }, { "epoch": 0.06648705339372453, "grad_norm": 0.3671875, "learning_rate": 0.0014204047467120285, "loss": 0.1725, "step": 37498 }, { "epoch": 0.06649059955903434, "grad_norm": 0.5546875, "learning_rate": 0.0014203491267901157, "loss": 0.1582, "step": 37500 }, { "epoch": 0.06649414572434416, "grad_norm": 0.296875, "learning_rate": 0.0014202935054671818, "loss": 0.2006, "step": 37502 }, { "epoch": 0.06649769188965397, "grad_norm": 3.1875, "learning_rate": 0.0014202378827434697, "loss": 0.3244, "step": 37504 }, { "epoch": 0.06650123805496379, "grad_norm": 0.400390625, "learning_rate": 0.0014201822586192238, "loss": 0.2808, "step": 37506 }, { "epoch": 0.0665047842202736, "grad_norm": 0.44140625, "learning_rate": 0.0014201266330946861, "loss": 0.2018, "step": 37508 }, { "epoch": 0.06650833038558342, "grad_norm": 1.7109375, "learning_rate": 0.0014200710061701009, "loss": 0.3386, "step": 37510 }, { "epoch": 0.06651187655089323, "grad_norm": 0.310546875, "learning_rate": 0.0014200153778457106, "loss": 0.2099, "step": 37512 }, { "epoch": 0.06651542271620305, "grad_norm": 0.490234375, "learning_rate": 0.0014199597481217591, "loss": 0.1743, "step": 37514 }, { "epoch": 0.06651896888151286, "grad_norm": 5.25, "learning_rate": 0.0014199041169984897, "loss": 0.4069, "step": 37516 }, { "epoch": 0.06652251504682268, "grad_norm": 0.58203125, "learning_rate": 0.0014198484844761454, "loss": 0.2001, "step": 37518 }, { "epoch": 0.06652606121213249, "grad_norm": 0.5546875, "learning_rate": 0.0014197928505549696, "loss": 0.171, "step": 37520 }, { "epoch": 0.0665296073774423, "grad_norm": 0.82421875, "learning_rate": 0.0014197372152352057, "loss": 0.184, "step": 37522 }, { "epoch": 0.06653315354275213, "grad_norm": 0.48046875, "learning_rate": 0.001419681578517097, "loss": 0.2236, "step": 37524 }, { "epoch": 0.06653669970806195, "grad_norm": 0.314453125, "learning_rate": 0.0014196259404008864, "loss": 0.2118, "step": 37526 }, { "epoch": 0.06654024587337176, "grad_norm": 0.25390625, "learning_rate": 0.0014195703008868178, "loss": 0.1564, "step": 37528 }, { "epoch": 0.06654379203868158, "grad_norm": 0.54296875, "learning_rate": 0.0014195146599751342, "loss": 0.1861, "step": 37530 }, { "epoch": 0.06654733820399139, "grad_norm": 0.6484375, "learning_rate": 0.0014194590176660796, "loss": 0.2699, "step": 37532 }, { "epoch": 0.0665508843693012, "grad_norm": 0.458984375, "learning_rate": 0.0014194033739598963, "loss": 0.2387, "step": 37534 }, { "epoch": 0.06655443053461102, "grad_norm": 0.48046875, "learning_rate": 0.0014193477288568285, "loss": 0.208, "step": 37536 }, { "epoch": 0.06655797669992083, "grad_norm": 1.65625, "learning_rate": 0.001419292082357119, "loss": 0.2327, "step": 37538 }, { "epoch": 0.06656152286523065, "grad_norm": 1.1640625, "learning_rate": 0.0014192364344610114, "loss": 0.1486, "step": 37540 }, { "epoch": 0.06656506903054046, "grad_norm": 4.625, "learning_rate": 0.001419180785168749, "loss": 0.1827, "step": 37542 }, { "epoch": 0.06656861519585028, "grad_norm": 0.2578125, "learning_rate": 0.0014191251344805756, "loss": 0.1442, "step": 37544 }, { "epoch": 0.06657216136116009, "grad_norm": 1.7890625, "learning_rate": 0.001419069482396734, "loss": 0.191, "step": 37546 }, { "epoch": 0.06657570752646991, "grad_norm": 0.609375, "learning_rate": 0.0014190138289174679, "loss": 0.1833, "step": 37548 }, { "epoch": 0.06657925369177972, "grad_norm": 0.43359375, "learning_rate": 0.0014189581740430203, "loss": 0.198, "step": 37550 }, { "epoch": 0.06658279985708954, "grad_norm": 0.51953125, "learning_rate": 0.0014189025177736355, "loss": 0.2372, "step": 37552 }, { "epoch": 0.06658634602239935, "grad_norm": 0.1767578125, "learning_rate": 0.0014188468601095561, "loss": 0.1634, "step": 37554 }, { "epoch": 0.06658989218770917, "grad_norm": 0.44140625, "learning_rate": 0.0014187912010510256, "loss": 0.2132, "step": 37556 }, { "epoch": 0.06659343835301898, "grad_norm": 0.392578125, "learning_rate": 0.0014187355405982877, "loss": 0.3887, "step": 37558 }, { "epoch": 0.0665969845183288, "grad_norm": 0.828125, "learning_rate": 0.0014186798787515856, "loss": 0.2035, "step": 37560 }, { "epoch": 0.06660053068363861, "grad_norm": 0.65234375, "learning_rate": 0.0014186242155111628, "loss": 0.2017, "step": 37562 }, { "epoch": 0.06660407684894842, "grad_norm": 0.3046875, "learning_rate": 0.0014185685508772627, "loss": 0.1746, "step": 37564 }, { "epoch": 0.06660762301425824, "grad_norm": 0.29296875, "learning_rate": 0.001418512884850129, "loss": 0.3041, "step": 37566 }, { "epoch": 0.06661116917956805, "grad_norm": 0.51953125, "learning_rate": 0.0014184572174300047, "loss": 0.2298, "step": 37568 }, { "epoch": 0.06661471534487788, "grad_norm": 1.2421875, "learning_rate": 0.0014184015486171338, "loss": 0.3456, "step": 37570 }, { "epoch": 0.0666182615101877, "grad_norm": 0.56640625, "learning_rate": 0.0014183458784117592, "loss": 0.2807, "step": 37572 }, { "epoch": 0.06662180767549751, "grad_norm": 0.53125, "learning_rate": 0.0014182902068141249, "loss": 0.1707, "step": 37574 }, { "epoch": 0.06662535384080732, "grad_norm": 1.4140625, "learning_rate": 0.0014182345338244742, "loss": 0.2557, "step": 37576 }, { "epoch": 0.06662890000611714, "grad_norm": 0.7109375, "learning_rate": 0.0014181788594430501, "loss": 0.2071, "step": 37578 }, { "epoch": 0.06663244617142695, "grad_norm": 0.326171875, "learning_rate": 0.001418123183670097, "loss": 0.1587, "step": 37580 }, { "epoch": 0.06663599233673677, "grad_norm": 2.09375, "learning_rate": 0.0014180675065058573, "loss": 0.2365, "step": 37582 }, { "epoch": 0.06663953850204658, "grad_norm": 0.365234375, "learning_rate": 0.0014180118279505753, "loss": 0.1756, "step": 37584 }, { "epoch": 0.0666430846673564, "grad_norm": 0.73828125, "learning_rate": 0.0014179561480044942, "loss": 0.2499, "step": 37586 }, { "epoch": 0.06664663083266621, "grad_norm": 0.271484375, "learning_rate": 0.001417900466667858, "loss": 0.2207, "step": 37588 }, { "epoch": 0.06665017699797603, "grad_norm": 0.7578125, "learning_rate": 0.0014178447839409095, "loss": 0.1859, "step": 37590 }, { "epoch": 0.06665372316328584, "grad_norm": 1.625, "learning_rate": 0.0014177890998238925, "loss": 0.2766, "step": 37592 }, { "epoch": 0.06665726932859566, "grad_norm": 0.53125, "learning_rate": 0.0014177334143170507, "loss": 0.1801, "step": 37594 }, { "epoch": 0.06666081549390547, "grad_norm": 0.734375, "learning_rate": 0.0014176777274206279, "loss": 0.2163, "step": 37596 }, { "epoch": 0.06666436165921528, "grad_norm": 0.2314453125, "learning_rate": 0.0014176220391348667, "loss": 0.2238, "step": 37598 }, { "epoch": 0.0666679078245251, "grad_norm": 0.6171875, "learning_rate": 0.0014175663494600114, "loss": 0.1737, "step": 37600 }, { "epoch": 0.06667145398983491, "grad_norm": 1.0, "learning_rate": 0.001417510658396305, "loss": 0.2874, "step": 37602 }, { "epoch": 0.06667500015514473, "grad_norm": 0.26171875, "learning_rate": 0.0014174549659439917, "loss": 0.151, "step": 37604 }, { "epoch": 0.06667854632045454, "grad_norm": 0.5078125, "learning_rate": 0.0014173992721033149, "loss": 0.1412, "step": 37606 }, { "epoch": 0.06668209248576436, "grad_norm": 0.5625, "learning_rate": 0.001417343576874518, "loss": 0.2405, "step": 37608 }, { "epoch": 0.06668563865107417, "grad_norm": 0.984375, "learning_rate": 0.0014172878802578446, "loss": 0.1942, "step": 37610 }, { "epoch": 0.06668918481638399, "grad_norm": 0.466796875, "learning_rate": 0.001417232182253538, "loss": 0.1877, "step": 37612 }, { "epoch": 0.0666927309816938, "grad_norm": 0.60546875, "learning_rate": 0.0014171764828618425, "loss": 0.1726, "step": 37614 }, { "epoch": 0.06669627714700363, "grad_norm": 0.314453125, "learning_rate": 0.0014171207820830012, "loss": 0.1234, "step": 37616 }, { "epoch": 0.06669982331231344, "grad_norm": 0.328125, "learning_rate": 0.0014170650799172578, "loss": 0.1873, "step": 37618 }, { "epoch": 0.06670336947762326, "grad_norm": 2.71875, "learning_rate": 0.0014170093763648556, "loss": 0.3767, "step": 37620 }, { "epoch": 0.06670691564293307, "grad_norm": 0.341796875, "learning_rate": 0.0014169536714260392, "loss": 0.1729, "step": 37622 }, { "epoch": 0.06671046180824289, "grad_norm": 0.3984375, "learning_rate": 0.001416897965101051, "loss": 0.1846, "step": 37624 }, { "epoch": 0.0667140079735527, "grad_norm": 0.34375, "learning_rate": 0.0014168422573901354, "loss": 0.1853, "step": 37626 }, { "epoch": 0.06671755413886252, "grad_norm": 0.5703125, "learning_rate": 0.0014167865482935357, "loss": 0.1324, "step": 37628 }, { "epoch": 0.06672110030417233, "grad_norm": 0.275390625, "learning_rate": 0.0014167308378114961, "loss": 0.2048, "step": 37630 }, { "epoch": 0.06672464646948215, "grad_norm": 0.1279296875, "learning_rate": 0.0014166751259442592, "loss": 0.1168, "step": 37632 }, { "epoch": 0.06672819263479196, "grad_norm": 0.828125, "learning_rate": 0.0014166194126920696, "loss": 0.2124, "step": 37634 }, { "epoch": 0.06673173880010178, "grad_norm": 0.63671875, "learning_rate": 0.0014165636980551701, "loss": 0.1795, "step": 37636 }, { "epoch": 0.06673528496541159, "grad_norm": 0.30859375, "learning_rate": 0.0014165079820338054, "loss": 0.207, "step": 37638 }, { "epoch": 0.0667388311307214, "grad_norm": 0.921875, "learning_rate": 0.0014164522646282183, "loss": 0.1883, "step": 37640 }, { "epoch": 0.06674237729603122, "grad_norm": 0.48046875, "learning_rate": 0.0014163965458386532, "loss": 0.221, "step": 37642 }, { "epoch": 0.06674592346134103, "grad_norm": 1.6171875, "learning_rate": 0.001416340825665353, "loss": 0.1809, "step": 37644 }, { "epoch": 0.06674946962665085, "grad_norm": 0.71484375, "learning_rate": 0.0014162851041085618, "loss": 0.2412, "step": 37646 }, { "epoch": 0.06675301579196066, "grad_norm": 0.9453125, "learning_rate": 0.0014162293811685236, "loss": 0.2272, "step": 37648 }, { "epoch": 0.06675656195727048, "grad_norm": 0.55078125, "learning_rate": 0.0014161736568454818, "loss": 0.174, "step": 37650 }, { "epoch": 0.06676010812258029, "grad_norm": 0.8984375, "learning_rate": 0.0014161179311396796, "loss": 0.1594, "step": 37652 }, { "epoch": 0.0667636542878901, "grad_norm": 0.671875, "learning_rate": 0.0014160622040513618, "loss": 0.1876, "step": 37654 }, { "epoch": 0.06676720045319992, "grad_norm": 0.6640625, "learning_rate": 0.001416006475580771, "loss": 0.2195, "step": 37656 }, { "epoch": 0.06677074661850974, "grad_norm": 0.291015625, "learning_rate": 0.0014159507457281516, "loss": 0.2887, "step": 37658 }, { "epoch": 0.06677429278381956, "grad_norm": 0.59765625, "learning_rate": 0.001415895014493747, "loss": 0.1456, "step": 37660 }, { "epoch": 0.06677783894912938, "grad_norm": 0.279296875, "learning_rate": 0.0014158392818778014, "loss": 0.2402, "step": 37662 }, { "epoch": 0.06678138511443919, "grad_norm": 1.1328125, "learning_rate": 0.001415783547880558, "loss": 0.443, "step": 37664 }, { "epoch": 0.06678493127974901, "grad_norm": 0.2265625, "learning_rate": 0.0014157278125022613, "loss": 0.3335, "step": 37666 }, { "epoch": 0.06678847744505882, "grad_norm": 0.392578125, "learning_rate": 0.001415672075743154, "loss": 0.1977, "step": 37668 }, { "epoch": 0.06679202361036864, "grad_norm": 0.373046875, "learning_rate": 0.0014156163376034805, "loss": 0.1847, "step": 37670 }, { "epoch": 0.06679556977567845, "grad_norm": 0.26171875, "learning_rate": 0.0014155605980834844, "loss": 0.2925, "step": 37672 }, { "epoch": 0.06679911594098827, "grad_norm": 0.470703125, "learning_rate": 0.00141550485718341, "loss": 0.2119, "step": 37674 }, { "epoch": 0.06680266210629808, "grad_norm": 0.87890625, "learning_rate": 0.0014154491149035002, "loss": 0.2032, "step": 37676 }, { "epoch": 0.0668062082716079, "grad_norm": 0.458984375, "learning_rate": 0.0014153933712439993, "loss": 0.2193, "step": 37678 }, { "epoch": 0.06680975443691771, "grad_norm": 0.310546875, "learning_rate": 0.001415337626205151, "loss": 0.206, "step": 37680 }, { "epoch": 0.06681330060222752, "grad_norm": 1.703125, "learning_rate": 0.001415281879787199, "loss": 0.2849, "step": 37682 }, { "epoch": 0.06681684676753734, "grad_norm": 0.2890625, "learning_rate": 0.0014152261319903873, "loss": 0.194, "step": 37684 }, { "epoch": 0.06682039293284715, "grad_norm": 0.87890625, "learning_rate": 0.0014151703828149595, "loss": 0.2511, "step": 37686 }, { "epoch": 0.06682393909815697, "grad_norm": 0.40625, "learning_rate": 0.00141511463226116, "loss": 0.1932, "step": 37688 }, { "epoch": 0.06682748526346678, "grad_norm": 0.734375, "learning_rate": 0.0014150588803292315, "loss": 0.206, "step": 37690 }, { "epoch": 0.0668310314287766, "grad_norm": 0.447265625, "learning_rate": 0.0014150031270194188, "loss": 0.2043, "step": 37692 }, { "epoch": 0.06683457759408641, "grad_norm": 0.33984375, "learning_rate": 0.0014149473723319651, "loss": 0.2395, "step": 37694 }, { "epoch": 0.06683812375939623, "grad_norm": 0.259765625, "learning_rate": 0.0014148916162671147, "loss": 0.1422, "step": 37696 }, { "epoch": 0.06684166992470604, "grad_norm": 0.427734375, "learning_rate": 0.0014148358588251114, "loss": 0.2655, "step": 37698 }, { "epoch": 0.06684521609001585, "grad_norm": 0.376953125, "learning_rate": 0.0014147801000061989, "loss": 0.1459, "step": 37700 }, { "epoch": 0.06684876225532567, "grad_norm": 0.322265625, "learning_rate": 0.001414724339810621, "loss": 0.2364, "step": 37702 }, { "epoch": 0.06685230842063548, "grad_norm": 1.515625, "learning_rate": 0.001414668578238622, "loss": 0.2242, "step": 37704 }, { "epoch": 0.06685585458594531, "grad_norm": 0.39453125, "learning_rate": 0.001414612815290445, "loss": 0.1553, "step": 37706 }, { "epoch": 0.06685940075125513, "grad_norm": 1.7265625, "learning_rate": 0.0014145570509663343, "loss": 0.2396, "step": 37708 }, { "epoch": 0.06686294691656494, "grad_norm": 0.9765625, "learning_rate": 0.0014145012852665338, "loss": 0.1837, "step": 37710 }, { "epoch": 0.06686649308187476, "grad_norm": 1.75, "learning_rate": 0.0014144455181912877, "loss": 0.5242, "step": 37712 }, { "epoch": 0.06687003924718457, "grad_norm": 1.453125, "learning_rate": 0.0014143897497408391, "loss": 0.2209, "step": 37714 }, { "epoch": 0.06687358541249439, "grad_norm": 0.2265625, "learning_rate": 0.0014143339799154325, "loss": 0.1891, "step": 37716 }, { "epoch": 0.0668771315778042, "grad_norm": 0.84375, "learning_rate": 0.001414278208715312, "loss": 0.2593, "step": 37718 }, { "epoch": 0.06688067774311401, "grad_norm": 0.345703125, "learning_rate": 0.0014142224361407207, "loss": 0.1632, "step": 37720 }, { "epoch": 0.06688422390842383, "grad_norm": 0.302734375, "learning_rate": 0.0014141666621919034, "loss": 0.2295, "step": 37722 }, { "epoch": 0.06688777007373364, "grad_norm": 1.6796875, "learning_rate": 0.0014141108868691032, "loss": 0.2882, "step": 37724 }, { "epoch": 0.06689131623904346, "grad_norm": 0.3046875, "learning_rate": 0.0014140551101725644, "loss": 0.2254, "step": 37726 }, { "epoch": 0.06689486240435327, "grad_norm": 0.376953125, "learning_rate": 0.0014139993321025314, "loss": 0.2393, "step": 37728 }, { "epoch": 0.06689840856966309, "grad_norm": 0.63671875, "learning_rate": 0.0014139435526592475, "loss": 0.1908, "step": 37730 }, { "epoch": 0.0669019547349729, "grad_norm": 0.380859375, "learning_rate": 0.0014138877718429571, "loss": 0.2203, "step": 37732 }, { "epoch": 0.06690550090028272, "grad_norm": 0.56640625, "learning_rate": 0.0014138319896539033, "loss": 0.3896, "step": 37734 }, { "epoch": 0.06690904706559253, "grad_norm": 0.349609375, "learning_rate": 0.0014137762060923314, "loss": 0.2426, "step": 37736 }, { "epoch": 0.06691259323090235, "grad_norm": 2.625, "learning_rate": 0.0014137204211584842, "loss": 0.282, "step": 37738 }, { "epoch": 0.06691613939621216, "grad_norm": 0.78515625, "learning_rate": 0.0014136646348526063, "loss": 0.1812, "step": 37740 }, { "epoch": 0.06691968556152197, "grad_norm": 0.361328125, "learning_rate": 0.0014136088471749413, "loss": 0.2213, "step": 37742 }, { "epoch": 0.06692323172683179, "grad_norm": 1.2109375, "learning_rate": 0.0014135530581257337, "loss": 0.1624, "step": 37744 }, { "epoch": 0.0669267778921416, "grad_norm": 0.494140625, "learning_rate": 0.0014134972677052266, "loss": 0.1981, "step": 37746 }, { "epoch": 0.06693032405745142, "grad_norm": 0.578125, "learning_rate": 0.001413441475913665, "loss": 0.2082, "step": 37748 }, { "epoch": 0.06693387022276123, "grad_norm": 0.95703125, "learning_rate": 0.0014133856827512922, "loss": 0.4842, "step": 37750 }, { "epoch": 0.06693741638807106, "grad_norm": 0.44921875, "learning_rate": 0.0014133298882183527, "loss": 0.1709, "step": 37752 }, { "epoch": 0.06694096255338088, "grad_norm": 0.40234375, "learning_rate": 0.00141327409231509, "loss": 0.171, "step": 37754 }, { "epoch": 0.06694450871869069, "grad_norm": 0.75390625, "learning_rate": 0.0014132182950417488, "loss": 0.1958, "step": 37756 }, { "epoch": 0.0669480548840005, "grad_norm": 2.828125, "learning_rate": 0.0014131624963985723, "loss": 0.3072, "step": 37758 }, { "epoch": 0.06695160104931032, "grad_norm": 0.251953125, "learning_rate": 0.001413106696385805, "loss": 0.176, "step": 37760 }, { "epoch": 0.06695514721462013, "grad_norm": 0.5859375, "learning_rate": 0.0014130508950036913, "loss": 0.2251, "step": 37762 }, { "epoch": 0.06695869337992995, "grad_norm": 0.490234375, "learning_rate": 0.0014129950922524743, "loss": 0.2637, "step": 37764 }, { "epoch": 0.06696223954523976, "grad_norm": 0.6484375, "learning_rate": 0.001412939288132399, "loss": 0.1544, "step": 37766 }, { "epoch": 0.06696578571054958, "grad_norm": 0.447265625, "learning_rate": 0.0014128834826437088, "loss": 0.1755, "step": 37768 }, { "epoch": 0.06696933187585939, "grad_norm": 0.439453125, "learning_rate": 0.0014128276757866477, "loss": 0.2043, "step": 37770 }, { "epoch": 0.0669728780411692, "grad_norm": 0.2392578125, "learning_rate": 0.0014127718675614606, "loss": 0.2637, "step": 37772 }, { "epoch": 0.06697642420647902, "grad_norm": 0.28125, "learning_rate": 0.0014127160579683908, "loss": 0.1467, "step": 37774 }, { "epoch": 0.06697997037178884, "grad_norm": 0.34765625, "learning_rate": 0.0014126602470076826, "loss": 0.1942, "step": 37776 }, { "epoch": 0.06698351653709865, "grad_norm": 0.5703125, "learning_rate": 0.0014126044346795801, "loss": 0.2474, "step": 37778 }, { "epoch": 0.06698706270240846, "grad_norm": 1.2265625, "learning_rate": 0.0014125486209843276, "loss": 0.1811, "step": 37780 }, { "epoch": 0.06699060886771828, "grad_norm": 0.85546875, "learning_rate": 0.0014124928059221686, "loss": 0.2249, "step": 37782 }, { "epoch": 0.0669941550330281, "grad_norm": 0.82421875, "learning_rate": 0.0014124369894933477, "loss": 0.198, "step": 37784 }, { "epoch": 0.06699770119833791, "grad_norm": 0.294921875, "learning_rate": 0.001412381171698109, "loss": 0.1259, "step": 37786 }, { "epoch": 0.06700124736364772, "grad_norm": 0.859375, "learning_rate": 0.0014123253525366963, "loss": 0.1821, "step": 37788 }, { "epoch": 0.06700479352895754, "grad_norm": 0.46484375, "learning_rate": 0.0014122695320093542, "loss": 0.1887, "step": 37790 }, { "epoch": 0.06700833969426735, "grad_norm": 0.54296875, "learning_rate": 0.0014122137101163265, "loss": 0.1649, "step": 37792 }, { "epoch": 0.06701188585957717, "grad_norm": 0.9921875, "learning_rate": 0.0014121578868578574, "loss": 0.2041, "step": 37794 }, { "epoch": 0.067015432024887, "grad_norm": 1.546875, "learning_rate": 0.0014121020622341908, "loss": 0.2977, "step": 37796 }, { "epoch": 0.06701897819019681, "grad_norm": 0.314453125, "learning_rate": 0.0014120462362455714, "loss": 0.1509, "step": 37798 }, { "epoch": 0.06702252435550662, "grad_norm": 0.181640625, "learning_rate": 0.0014119904088922429, "loss": 0.1859, "step": 37800 }, { "epoch": 0.06702607052081644, "grad_norm": 1.03125, "learning_rate": 0.0014119345801744494, "loss": 0.1934, "step": 37802 }, { "epoch": 0.06702961668612625, "grad_norm": 0.5, "learning_rate": 0.0014118787500924355, "loss": 0.1915, "step": 37804 }, { "epoch": 0.06703316285143607, "grad_norm": 0.314453125, "learning_rate": 0.0014118229186464448, "loss": 0.1977, "step": 37806 }, { "epoch": 0.06703670901674588, "grad_norm": 1.1171875, "learning_rate": 0.0014117670858367218, "loss": 0.1561, "step": 37808 }, { "epoch": 0.0670402551820557, "grad_norm": 0.5625, "learning_rate": 0.001411711251663511, "loss": 0.2105, "step": 37810 }, { "epoch": 0.06704380134736551, "grad_norm": 0.61328125, "learning_rate": 0.0014116554161270561, "loss": 0.2465, "step": 37812 }, { "epoch": 0.06704734751267533, "grad_norm": 1.265625, "learning_rate": 0.0014115995792276014, "loss": 0.4522, "step": 37814 }, { "epoch": 0.06705089367798514, "grad_norm": 0.466796875, "learning_rate": 0.0014115437409653912, "loss": 0.1883, "step": 37816 }, { "epoch": 0.06705443984329496, "grad_norm": 0.546875, "learning_rate": 0.0014114879013406695, "loss": 0.1698, "step": 37818 }, { "epoch": 0.06705798600860477, "grad_norm": 0.419921875, "learning_rate": 0.0014114320603536808, "loss": 0.1622, "step": 37820 }, { "epoch": 0.06706153217391458, "grad_norm": 0.453125, "learning_rate": 0.001411376218004669, "loss": 0.1824, "step": 37822 }, { "epoch": 0.0670650783392244, "grad_norm": 1.0, "learning_rate": 0.0014113203742938786, "loss": 0.2754, "step": 37824 }, { "epoch": 0.06706862450453421, "grad_norm": 0.345703125, "learning_rate": 0.0014112645292215537, "loss": 0.1593, "step": 37826 }, { "epoch": 0.06707217066984403, "grad_norm": 0.8203125, "learning_rate": 0.0014112086827879388, "loss": 0.1871, "step": 37828 }, { "epoch": 0.06707571683515384, "grad_norm": 0.2353515625, "learning_rate": 0.0014111528349932774, "loss": 0.1927, "step": 37830 }, { "epoch": 0.06707926300046366, "grad_norm": 0.78515625, "learning_rate": 0.0014110969858378145, "loss": 0.272, "step": 37832 }, { "epoch": 0.06708280916577347, "grad_norm": 2.09375, "learning_rate": 0.001411041135321794, "loss": 0.2612, "step": 37834 }, { "epoch": 0.06708635533108329, "grad_norm": 0.212890625, "learning_rate": 0.0014109852834454605, "loss": 0.1881, "step": 37836 }, { "epoch": 0.0670899014963931, "grad_norm": 0.486328125, "learning_rate": 0.001410929430209058, "loss": 0.1776, "step": 37838 }, { "epoch": 0.06709344766170292, "grad_norm": 0.28515625, "learning_rate": 0.0014108735756128304, "loss": 0.139, "step": 37840 }, { "epoch": 0.06709699382701274, "grad_norm": 0.7890625, "learning_rate": 0.001410817719657023, "loss": 0.1707, "step": 37842 }, { "epoch": 0.06710053999232256, "grad_norm": 0.38671875, "learning_rate": 0.0014107618623418789, "loss": 0.2294, "step": 37844 }, { "epoch": 0.06710408615763237, "grad_norm": 0.30078125, "learning_rate": 0.0014107060036676429, "loss": 0.1751, "step": 37846 }, { "epoch": 0.06710763232294219, "grad_norm": 0.37109375, "learning_rate": 0.0014106501436345594, "loss": 0.2709, "step": 37848 }, { "epoch": 0.067111178488252, "grad_norm": 1.6171875, "learning_rate": 0.0014105942822428728, "loss": 0.4506, "step": 37850 }, { "epoch": 0.06711472465356182, "grad_norm": 0.36328125, "learning_rate": 0.001410538419492827, "loss": 0.1542, "step": 37852 }, { "epoch": 0.06711827081887163, "grad_norm": 0.3515625, "learning_rate": 0.0014104825553846667, "loss": 0.311, "step": 37854 }, { "epoch": 0.06712181698418145, "grad_norm": 0.283203125, "learning_rate": 0.001410426689918636, "loss": 0.1437, "step": 37856 }, { "epoch": 0.06712536314949126, "grad_norm": 1.7890625, "learning_rate": 0.0014103708230949793, "loss": 0.1941, "step": 37858 }, { "epoch": 0.06712890931480107, "grad_norm": 0.87109375, "learning_rate": 0.0014103149549139409, "loss": 0.2397, "step": 37860 }, { "epoch": 0.06713245548011089, "grad_norm": 0.1845703125, "learning_rate": 0.0014102590853757651, "loss": 0.1483, "step": 37862 }, { "epoch": 0.0671360016454207, "grad_norm": 1.96875, "learning_rate": 0.0014102032144806965, "loss": 0.1891, "step": 37864 }, { "epoch": 0.06713954781073052, "grad_norm": 0.1474609375, "learning_rate": 0.0014101473422289787, "loss": 0.1202, "step": 37866 }, { "epoch": 0.06714309397604033, "grad_norm": 0.9921875, "learning_rate": 0.0014100914686208572, "loss": 0.2429, "step": 37868 }, { "epoch": 0.06714664014135015, "grad_norm": 0.38671875, "learning_rate": 0.0014100355936565753, "loss": 0.1764, "step": 37870 }, { "epoch": 0.06715018630665996, "grad_norm": 0.44140625, "learning_rate": 0.0014099797173363782, "loss": 0.1865, "step": 37872 }, { "epoch": 0.06715373247196978, "grad_norm": 0.46875, "learning_rate": 0.0014099238396605097, "loss": 0.1758, "step": 37874 }, { "epoch": 0.06715727863727959, "grad_norm": 0.734375, "learning_rate": 0.0014098679606292142, "loss": 0.1934, "step": 37876 }, { "epoch": 0.0671608248025894, "grad_norm": 0.44921875, "learning_rate": 0.0014098120802427362, "loss": 0.1662, "step": 37878 }, { "epoch": 0.06716437096789922, "grad_norm": 0.6484375, "learning_rate": 0.0014097561985013203, "loss": 0.1884, "step": 37880 }, { "epoch": 0.06716791713320903, "grad_norm": 0.64453125, "learning_rate": 0.0014097003154052107, "loss": 0.1588, "step": 37882 }, { "epoch": 0.06717146329851885, "grad_norm": 1.0390625, "learning_rate": 0.0014096444309546516, "loss": 0.2228, "step": 37884 }, { "epoch": 0.06717500946382866, "grad_norm": 0.578125, "learning_rate": 0.0014095885451498877, "loss": 0.1757, "step": 37886 }, { "epoch": 0.06717855562913849, "grad_norm": 0.2138671875, "learning_rate": 0.0014095326579911635, "loss": 0.1384, "step": 37888 }, { "epoch": 0.0671821017944483, "grad_norm": 0.35546875, "learning_rate": 0.0014094767694787234, "loss": 0.1763, "step": 37890 }, { "epoch": 0.06718564795975812, "grad_norm": 0.328125, "learning_rate": 0.0014094208796128114, "loss": 0.2442, "step": 37892 }, { "epoch": 0.06718919412506794, "grad_norm": 1.0, "learning_rate": 0.0014093649883936724, "loss": 0.1778, "step": 37894 }, { "epoch": 0.06719274029037775, "grad_norm": 0.228515625, "learning_rate": 0.0014093090958215504, "loss": 0.1752, "step": 37896 }, { "epoch": 0.06719628645568756, "grad_norm": 0.41796875, "learning_rate": 0.0014092532018966902, "loss": 0.2082, "step": 37898 }, { "epoch": 0.06719983262099738, "grad_norm": 0.5, "learning_rate": 0.0014091973066193359, "loss": 0.1871, "step": 37900 }, { "epoch": 0.0672033787863072, "grad_norm": 0.2890625, "learning_rate": 0.0014091414099897323, "loss": 0.1953, "step": 37902 }, { "epoch": 0.06720692495161701, "grad_norm": 1.71875, "learning_rate": 0.001409085512008124, "loss": 0.2654, "step": 37904 }, { "epoch": 0.06721047111692682, "grad_norm": 0.53125, "learning_rate": 0.0014090296126747551, "loss": 0.2025, "step": 37906 }, { "epoch": 0.06721401728223664, "grad_norm": 0.1982421875, "learning_rate": 0.00140897371198987, "loss": 0.1808, "step": 37908 }, { "epoch": 0.06721756344754645, "grad_norm": 0.52734375, "learning_rate": 0.0014089178099537137, "loss": 0.1908, "step": 37910 }, { "epoch": 0.06722110961285627, "grad_norm": 0.5859375, "learning_rate": 0.00140886190656653, "loss": 0.1686, "step": 37912 }, { "epoch": 0.06722465577816608, "grad_norm": 0.640625, "learning_rate": 0.001408806001828564, "loss": 0.1784, "step": 37914 }, { "epoch": 0.0672282019434759, "grad_norm": 0.2578125, "learning_rate": 0.0014087500957400598, "loss": 0.1764, "step": 37916 }, { "epoch": 0.06723174810878571, "grad_norm": 1.484375, "learning_rate": 0.0014086941883012618, "loss": 0.2464, "step": 37918 }, { "epoch": 0.06723529427409553, "grad_norm": 0.35546875, "learning_rate": 0.0014086382795124152, "loss": 0.1793, "step": 37920 }, { "epoch": 0.06723884043940534, "grad_norm": 0.20703125, "learning_rate": 0.0014085823693737636, "loss": 0.177, "step": 37922 }, { "epoch": 0.06724238660471515, "grad_norm": 0.7890625, "learning_rate": 0.0014085264578855522, "loss": 0.1918, "step": 37924 }, { "epoch": 0.06724593277002497, "grad_norm": 0.390625, "learning_rate": 0.0014084705450480254, "loss": 0.1857, "step": 37926 }, { "epoch": 0.06724947893533478, "grad_norm": 0.46875, "learning_rate": 0.001408414630861427, "loss": 0.2024, "step": 37928 }, { "epoch": 0.0672530251006446, "grad_norm": 0.251953125, "learning_rate": 0.0014083587153260028, "loss": 0.1362, "step": 37930 }, { "epoch": 0.06725657126595443, "grad_norm": 1.1875, "learning_rate": 0.0014083027984419967, "loss": 0.2532, "step": 37932 }, { "epoch": 0.06726011743126424, "grad_norm": 1.1171875, "learning_rate": 0.0014082468802096531, "loss": 0.1993, "step": 37934 }, { "epoch": 0.06726366359657406, "grad_norm": 0.2216796875, "learning_rate": 0.0014081909606292165, "loss": 0.1701, "step": 37936 }, { "epoch": 0.06726720976188387, "grad_norm": 0.63671875, "learning_rate": 0.0014081350397009316, "loss": 0.1771, "step": 37938 }, { "epoch": 0.06727075592719368, "grad_norm": 0.267578125, "learning_rate": 0.0014080791174250434, "loss": 0.15, "step": 37940 }, { "epoch": 0.0672743020925035, "grad_norm": 0.4375, "learning_rate": 0.0014080231938017958, "loss": 0.1878, "step": 37942 }, { "epoch": 0.06727784825781331, "grad_norm": 0.478515625, "learning_rate": 0.0014079672688314339, "loss": 0.3073, "step": 37944 }, { "epoch": 0.06728139442312313, "grad_norm": 0.41796875, "learning_rate": 0.0014079113425142019, "loss": 0.1538, "step": 37946 }, { "epoch": 0.06728494058843294, "grad_norm": 0.328125, "learning_rate": 0.0014078554148503446, "loss": 0.189, "step": 37948 }, { "epoch": 0.06728848675374276, "grad_norm": 0.34765625, "learning_rate": 0.0014077994858401066, "loss": 0.2102, "step": 37950 }, { "epoch": 0.06729203291905257, "grad_norm": 0.259765625, "learning_rate": 0.001407743555483732, "loss": 0.1926, "step": 37952 }, { "epoch": 0.06729557908436239, "grad_norm": 0.9296875, "learning_rate": 0.0014076876237814665, "loss": 0.2533, "step": 37954 }, { "epoch": 0.0672991252496722, "grad_norm": 1.21875, "learning_rate": 0.0014076316907335536, "loss": 0.2249, "step": 37956 }, { "epoch": 0.06730267141498202, "grad_norm": 0.3671875, "learning_rate": 0.0014075757563402388, "loss": 0.234, "step": 37958 }, { "epoch": 0.06730621758029183, "grad_norm": 0.224609375, "learning_rate": 0.0014075198206017659, "loss": 0.2358, "step": 37960 }, { "epoch": 0.06730976374560164, "grad_norm": 0.34375, "learning_rate": 0.00140746388351838, "loss": 0.2069, "step": 37962 }, { "epoch": 0.06731330991091146, "grad_norm": 0.2890625, "learning_rate": 0.001407407945090326, "loss": 0.1968, "step": 37964 }, { "epoch": 0.06731685607622127, "grad_norm": 0.30078125, "learning_rate": 0.001407352005317848, "loss": 0.2091, "step": 37966 }, { "epoch": 0.06732040224153109, "grad_norm": 0.2470703125, "learning_rate": 0.0014072960642011905, "loss": 0.1699, "step": 37968 }, { "epoch": 0.0673239484068409, "grad_norm": 0.37890625, "learning_rate": 0.001407240121740599, "loss": 0.159, "step": 37970 }, { "epoch": 0.06732749457215072, "grad_norm": 0.255859375, "learning_rate": 0.0014071841779363173, "loss": 0.1777, "step": 37972 }, { "epoch": 0.06733104073746053, "grad_norm": 0.201171875, "learning_rate": 0.0014071282327885908, "loss": 0.1944, "step": 37974 }, { "epoch": 0.06733458690277035, "grad_norm": 0.466796875, "learning_rate": 0.0014070722862976636, "loss": 0.2113, "step": 37976 }, { "epoch": 0.06733813306808017, "grad_norm": 0.203125, "learning_rate": 0.001407016338463781, "loss": 0.231, "step": 37978 }, { "epoch": 0.06734167923338999, "grad_norm": 0.3046875, "learning_rate": 0.0014069603892871865, "loss": 0.2113, "step": 37980 }, { "epoch": 0.0673452253986998, "grad_norm": 1.59375, "learning_rate": 0.001406904438768126, "loss": 0.2414, "step": 37982 }, { "epoch": 0.06734877156400962, "grad_norm": 0.2734375, "learning_rate": 0.0014068484869068439, "loss": 0.2097, "step": 37984 }, { "epoch": 0.06735231772931943, "grad_norm": 0.53125, "learning_rate": 0.0014067925337035846, "loss": 0.2418, "step": 37986 }, { "epoch": 0.06735586389462925, "grad_norm": 0.2197265625, "learning_rate": 0.001406736579158593, "loss": 0.1535, "step": 37988 }, { "epoch": 0.06735941005993906, "grad_norm": 1.2890625, "learning_rate": 0.0014066806232721136, "loss": 0.3729, "step": 37990 }, { "epoch": 0.06736295622524888, "grad_norm": 0.24609375, "learning_rate": 0.0014066246660443913, "loss": 0.1553, "step": 37992 }, { "epoch": 0.06736650239055869, "grad_norm": 0.30078125, "learning_rate": 0.0014065687074756713, "loss": 0.2318, "step": 37994 }, { "epoch": 0.0673700485558685, "grad_norm": 0.453125, "learning_rate": 0.0014065127475661973, "loss": 0.1901, "step": 37996 }, { "epoch": 0.06737359472117832, "grad_norm": 0.57421875, "learning_rate": 0.0014064567863162148, "loss": 0.1613, "step": 37998 }, { "epoch": 0.06737714088648813, "grad_norm": 0.1796875, "learning_rate": 0.0014064008237259682, "loss": 0.1549, "step": 38000 }, { "epoch": 0.06738068705179795, "grad_norm": 0.6015625, "learning_rate": 0.0014063448597957024, "loss": 0.146, "step": 38002 }, { "epoch": 0.06738423321710776, "grad_norm": 0.201171875, "learning_rate": 0.0014062888945256624, "loss": 0.1683, "step": 38004 }, { "epoch": 0.06738777938241758, "grad_norm": 0.169921875, "learning_rate": 0.0014062329279160926, "loss": 0.2238, "step": 38006 }, { "epoch": 0.0673913255477274, "grad_norm": 0.404296875, "learning_rate": 0.0014061769599672377, "loss": 0.2846, "step": 38008 }, { "epoch": 0.06739487171303721, "grad_norm": 0.1748046875, "learning_rate": 0.0014061209906793429, "loss": 0.5551, "step": 38010 }, { "epoch": 0.06739841787834702, "grad_norm": 0.255859375, "learning_rate": 0.0014060650200526524, "loss": 0.2026, "step": 38012 }, { "epoch": 0.06740196404365684, "grad_norm": 0.345703125, "learning_rate": 0.0014060090480874112, "loss": 0.1679, "step": 38014 }, { "epoch": 0.06740551020896665, "grad_norm": 1.8359375, "learning_rate": 0.0014059530747838641, "loss": 0.1761, "step": 38016 }, { "epoch": 0.06740905637427647, "grad_norm": 1.71875, "learning_rate": 0.0014058971001422564, "loss": 0.3153, "step": 38018 }, { "epoch": 0.06741260253958628, "grad_norm": 0.431640625, "learning_rate": 0.0014058411241628325, "loss": 0.1588, "step": 38020 }, { "epoch": 0.0674161487048961, "grad_norm": 0.24609375, "learning_rate": 0.0014057851468458366, "loss": 0.1653, "step": 38022 }, { "epoch": 0.06741969487020592, "grad_norm": 0.466796875, "learning_rate": 0.0014057291681915148, "loss": 0.1444, "step": 38024 }, { "epoch": 0.06742324103551574, "grad_norm": 0.3125, "learning_rate": 0.0014056731882001107, "loss": 0.2766, "step": 38026 }, { "epoch": 0.06742678720082555, "grad_norm": 0.3671875, "learning_rate": 0.00140561720687187, "loss": 0.1719, "step": 38028 }, { "epoch": 0.06743033336613537, "grad_norm": 0.89453125, "learning_rate": 0.0014055612242070369, "loss": 0.2115, "step": 38030 }, { "epoch": 0.06743387953144518, "grad_norm": 1.3203125, "learning_rate": 0.0014055052402058563, "loss": 0.3211, "step": 38032 }, { "epoch": 0.067437425696755, "grad_norm": 0.1171875, "learning_rate": 0.0014054492548685735, "loss": 0.1905, "step": 38034 }, { "epoch": 0.06744097186206481, "grad_norm": 1.0078125, "learning_rate": 0.0014053932681954333, "loss": 0.1506, "step": 38036 }, { "epoch": 0.06744451802737463, "grad_norm": 1.1015625, "learning_rate": 0.0014053372801866798, "loss": 0.4415, "step": 38038 }, { "epoch": 0.06744806419268444, "grad_norm": 0.326171875, "learning_rate": 0.001405281290842559, "loss": 0.2139, "step": 38040 }, { "epoch": 0.06745161035799425, "grad_norm": 0.671875, "learning_rate": 0.0014052253001633152, "loss": 0.1976, "step": 38042 }, { "epoch": 0.06745515652330407, "grad_norm": 0.341796875, "learning_rate": 0.0014051693081491925, "loss": 0.2087, "step": 38044 }, { "epoch": 0.06745870268861388, "grad_norm": 0.294921875, "learning_rate": 0.001405113314800437, "loss": 0.148, "step": 38046 }, { "epoch": 0.0674622488539237, "grad_norm": 0.84765625, "learning_rate": 0.001405057320117293, "loss": 0.2992, "step": 38048 }, { "epoch": 0.06746579501923351, "grad_norm": 0.208984375, "learning_rate": 0.0014050013241000054, "loss": 0.1409, "step": 38050 }, { "epoch": 0.06746934118454333, "grad_norm": 1.0390625, "learning_rate": 0.0014049453267488195, "loss": 0.1579, "step": 38052 }, { "epoch": 0.06747288734985314, "grad_norm": 0.4609375, "learning_rate": 0.0014048893280639795, "loss": 0.1945, "step": 38054 }, { "epoch": 0.06747643351516296, "grad_norm": 0.4453125, "learning_rate": 0.0014048333280457309, "loss": 0.1646, "step": 38056 }, { "epoch": 0.06747997968047277, "grad_norm": 0.275390625, "learning_rate": 0.0014047773266943181, "loss": 0.2174, "step": 38058 }, { "epoch": 0.06748352584578259, "grad_norm": 0.2431640625, "learning_rate": 0.0014047213240099869, "loss": 0.1712, "step": 38060 }, { "epoch": 0.0674870720110924, "grad_norm": 1.2421875, "learning_rate": 0.0014046653199929812, "loss": 0.2986, "step": 38062 }, { "epoch": 0.06749061817640221, "grad_norm": 0.443359375, "learning_rate": 0.0014046093146435463, "loss": 0.1652, "step": 38064 }, { "epoch": 0.06749416434171203, "grad_norm": 0.484375, "learning_rate": 0.0014045533079619272, "loss": 0.1476, "step": 38066 }, { "epoch": 0.06749771050702186, "grad_norm": 0.4140625, "learning_rate": 0.0014044972999483689, "loss": 0.2063, "step": 38068 }, { "epoch": 0.06750125667233167, "grad_norm": 0.34765625, "learning_rate": 0.0014044412906031166, "loss": 0.2072, "step": 38070 }, { "epoch": 0.06750480283764149, "grad_norm": 0.3515625, "learning_rate": 0.0014043852799264145, "loss": 0.1803, "step": 38072 }, { "epoch": 0.0675083490029513, "grad_norm": 0.703125, "learning_rate": 0.001404329267918508, "loss": 0.2009, "step": 38074 }, { "epoch": 0.06751189516826112, "grad_norm": 0.75390625, "learning_rate": 0.0014042732545796425, "loss": 0.1728, "step": 38076 }, { "epoch": 0.06751544133357093, "grad_norm": 0.7265625, "learning_rate": 0.0014042172399100622, "loss": 0.1489, "step": 38078 }, { "epoch": 0.06751898749888074, "grad_norm": 0.2734375, "learning_rate": 0.0014041612239100124, "loss": 0.2742, "step": 38080 }, { "epoch": 0.06752253366419056, "grad_norm": 0.37109375, "learning_rate": 0.001404105206579738, "loss": 0.167, "step": 38082 }, { "epoch": 0.06752607982950037, "grad_norm": 0.72265625, "learning_rate": 0.0014040491879194843, "loss": 0.2185, "step": 38084 }, { "epoch": 0.06752962599481019, "grad_norm": 0.921875, "learning_rate": 0.001403993167929496, "loss": 0.1936, "step": 38086 }, { "epoch": 0.06753317216012, "grad_norm": 0.32421875, "learning_rate": 0.0014039371466100182, "loss": 0.1794, "step": 38088 }, { "epoch": 0.06753671832542982, "grad_norm": 0.24609375, "learning_rate": 0.0014038811239612957, "loss": 0.1902, "step": 38090 }, { "epoch": 0.06754026449073963, "grad_norm": 0.68359375, "learning_rate": 0.0014038250999835735, "loss": 0.1845, "step": 38092 }, { "epoch": 0.06754381065604945, "grad_norm": 0.390625, "learning_rate": 0.0014037690746770972, "loss": 0.1408, "step": 38094 }, { "epoch": 0.06754735682135926, "grad_norm": 0.408203125, "learning_rate": 0.0014037130480421113, "loss": 0.2018, "step": 38096 }, { "epoch": 0.06755090298666908, "grad_norm": 0.47265625, "learning_rate": 0.001403657020078861, "loss": 0.2839, "step": 38098 }, { "epoch": 0.06755444915197889, "grad_norm": 0.283203125, "learning_rate": 0.0014036009907875914, "loss": 0.1785, "step": 38100 }, { "epoch": 0.0675579953172887, "grad_norm": 0.4453125, "learning_rate": 0.001403544960168547, "loss": 0.2157, "step": 38102 }, { "epoch": 0.06756154148259852, "grad_norm": 0.6484375, "learning_rate": 0.0014034889282219735, "loss": 0.3959, "step": 38104 }, { "epoch": 0.06756508764790833, "grad_norm": 0.515625, "learning_rate": 0.0014034328949481156, "loss": 0.1897, "step": 38106 }, { "epoch": 0.06756863381321815, "grad_norm": 0.416015625, "learning_rate": 0.0014033768603472186, "loss": 0.1691, "step": 38108 }, { "epoch": 0.06757217997852796, "grad_norm": 0.30859375, "learning_rate": 0.0014033208244195274, "loss": 0.1608, "step": 38110 }, { "epoch": 0.06757572614383778, "grad_norm": 2.875, "learning_rate": 0.0014032647871652871, "loss": 0.288, "step": 38112 }, { "epoch": 0.0675792723091476, "grad_norm": 0.5234375, "learning_rate": 0.001403208748584743, "loss": 0.1209, "step": 38114 }, { "epoch": 0.06758281847445742, "grad_norm": 0.66796875, "learning_rate": 0.0014031527086781398, "loss": 0.2391, "step": 38116 }, { "epoch": 0.06758636463976724, "grad_norm": 0.27734375, "learning_rate": 0.0014030966674457228, "loss": 0.1977, "step": 38118 }, { "epoch": 0.06758991080507705, "grad_norm": 0.365234375, "learning_rate": 0.0014030406248877368, "loss": 0.1569, "step": 38120 }, { "epoch": 0.06759345697038686, "grad_norm": 1.0390625, "learning_rate": 0.0014029845810044272, "loss": 0.2053, "step": 38122 }, { "epoch": 0.06759700313569668, "grad_norm": 0.359375, "learning_rate": 0.0014029285357960392, "loss": 0.1207, "step": 38124 }, { "epoch": 0.0676005493010065, "grad_norm": 0.259765625, "learning_rate": 0.0014028724892628176, "loss": 0.165, "step": 38126 }, { "epoch": 0.06760409546631631, "grad_norm": 0.51953125, "learning_rate": 0.0014028164414050075, "loss": 0.2324, "step": 38128 }, { "epoch": 0.06760764163162612, "grad_norm": 0.625, "learning_rate": 0.0014027603922228547, "loss": 0.1968, "step": 38130 }, { "epoch": 0.06761118779693594, "grad_norm": 0.70703125, "learning_rate": 0.0014027043417166034, "loss": 0.2596, "step": 38132 }, { "epoch": 0.06761473396224575, "grad_norm": 0.412109375, "learning_rate": 0.0014026482898864994, "loss": 0.2205, "step": 38134 }, { "epoch": 0.06761828012755557, "grad_norm": 0.5234375, "learning_rate": 0.0014025922367327873, "loss": 0.2112, "step": 38136 }, { "epoch": 0.06762182629286538, "grad_norm": 0.6953125, "learning_rate": 0.0014025361822557127, "loss": 0.1881, "step": 38138 }, { "epoch": 0.0676253724581752, "grad_norm": 0.609375, "learning_rate": 0.0014024801264555205, "loss": 0.3466, "step": 38140 }, { "epoch": 0.06762891862348501, "grad_norm": 0.302734375, "learning_rate": 0.001402424069332456, "loss": 0.2739, "step": 38142 }, { "epoch": 0.06763246478879482, "grad_norm": 1.03125, "learning_rate": 0.0014023680108867643, "loss": 0.1499, "step": 38144 }, { "epoch": 0.06763601095410464, "grad_norm": 1.0078125, "learning_rate": 0.0014023119511186904, "loss": 0.3911, "step": 38146 }, { "epoch": 0.06763955711941445, "grad_norm": 1.1875, "learning_rate": 0.0014022558900284796, "loss": 0.1605, "step": 38148 }, { "epoch": 0.06764310328472427, "grad_norm": 0.72265625, "learning_rate": 0.0014021998276163771, "loss": 0.1519, "step": 38150 }, { "epoch": 0.06764664945003408, "grad_norm": 0.46484375, "learning_rate": 0.0014021437638826286, "loss": 0.1927, "step": 38152 }, { "epoch": 0.0676501956153439, "grad_norm": 1.7578125, "learning_rate": 0.0014020876988274783, "loss": 0.2798, "step": 38154 }, { "epoch": 0.06765374178065371, "grad_norm": 0.6015625, "learning_rate": 0.001402031632451172, "loss": 0.2424, "step": 38156 }, { "epoch": 0.06765728794596353, "grad_norm": 1.03125, "learning_rate": 0.0014019755647539547, "loss": 0.1948, "step": 38158 }, { "epoch": 0.06766083411127335, "grad_norm": 0.439453125, "learning_rate": 0.0014019194957360717, "loss": 0.1654, "step": 38160 }, { "epoch": 0.06766438027658317, "grad_norm": 1.3671875, "learning_rate": 0.0014018634253977684, "loss": 0.2451, "step": 38162 }, { "epoch": 0.06766792644189298, "grad_norm": 1.3125, "learning_rate": 0.0014018073537392895, "loss": 0.274, "step": 38164 }, { "epoch": 0.0676714726072028, "grad_norm": 0.40625, "learning_rate": 0.001401751280760881, "loss": 0.1472, "step": 38166 }, { "epoch": 0.06767501877251261, "grad_norm": 0.25390625, "learning_rate": 0.001401695206462787, "loss": 0.361, "step": 38168 }, { "epoch": 0.06767856493782243, "grad_norm": 0.251953125, "learning_rate": 0.0014016391308452538, "loss": 0.1655, "step": 38170 }, { "epoch": 0.06768211110313224, "grad_norm": 0.474609375, "learning_rate": 0.0014015830539085266, "loss": 0.227, "step": 38172 }, { "epoch": 0.06768565726844206, "grad_norm": 0.2001953125, "learning_rate": 0.0014015269756528501, "loss": 0.1828, "step": 38174 }, { "epoch": 0.06768920343375187, "grad_norm": 0.298828125, "learning_rate": 0.0014014708960784694, "loss": 0.1842, "step": 38176 }, { "epoch": 0.06769274959906169, "grad_norm": 0.34375, "learning_rate": 0.0014014148151856305, "loss": 0.2086, "step": 38178 }, { "epoch": 0.0676962957643715, "grad_norm": 0.57421875, "learning_rate": 0.001401358732974578, "loss": 0.1567, "step": 38180 }, { "epoch": 0.06769984192968131, "grad_norm": 0.65625, "learning_rate": 0.0014013026494455575, "loss": 0.1712, "step": 38182 }, { "epoch": 0.06770338809499113, "grad_norm": 0.2265625, "learning_rate": 0.0014012465645988144, "loss": 0.2317, "step": 38184 }, { "epoch": 0.06770693426030094, "grad_norm": 1.59375, "learning_rate": 0.0014011904784345937, "loss": 0.2767, "step": 38186 }, { "epoch": 0.06771048042561076, "grad_norm": 0.72265625, "learning_rate": 0.0014011343909531406, "loss": 0.2158, "step": 38188 }, { "epoch": 0.06771402659092057, "grad_norm": 2.140625, "learning_rate": 0.0014010783021547008, "loss": 0.2056, "step": 38190 }, { "epoch": 0.06771757275623039, "grad_norm": 0.75390625, "learning_rate": 0.0014010222120395194, "loss": 0.1804, "step": 38192 }, { "epoch": 0.0677211189215402, "grad_norm": 0.84375, "learning_rate": 0.0014009661206078419, "loss": 0.2379, "step": 38194 }, { "epoch": 0.06772466508685002, "grad_norm": 0.490234375, "learning_rate": 0.0014009100278599134, "loss": 0.3141, "step": 38196 }, { "epoch": 0.06772821125215983, "grad_norm": 0.36328125, "learning_rate": 0.0014008539337959788, "loss": 0.1913, "step": 38198 }, { "epoch": 0.06773175741746965, "grad_norm": 0.376953125, "learning_rate": 0.0014007978384162844, "loss": 0.2258, "step": 38200 }, { "epoch": 0.06773530358277946, "grad_norm": 0.447265625, "learning_rate": 0.0014007417417210744, "loss": 0.2604, "step": 38202 }, { "epoch": 0.06773884974808929, "grad_norm": 0.291015625, "learning_rate": 0.001400685643710595, "loss": 0.2234, "step": 38204 }, { "epoch": 0.0677423959133991, "grad_norm": 0.57421875, "learning_rate": 0.001400629544385091, "loss": 0.1863, "step": 38206 }, { "epoch": 0.06774594207870892, "grad_norm": 0.306640625, "learning_rate": 0.0014005734437448083, "loss": 0.1819, "step": 38208 }, { "epoch": 0.06774948824401873, "grad_norm": 0.921875, "learning_rate": 0.0014005173417899921, "loss": 0.1733, "step": 38210 }, { "epoch": 0.06775303440932855, "grad_norm": 1.8125, "learning_rate": 0.0014004612385208875, "loss": 0.222, "step": 38212 }, { "epoch": 0.06775658057463836, "grad_norm": 0.765625, "learning_rate": 0.0014004051339377398, "loss": 0.1889, "step": 38214 }, { "epoch": 0.06776012673994818, "grad_norm": 0.462890625, "learning_rate": 0.0014003490280407945, "loss": 0.2515, "step": 38216 }, { "epoch": 0.06776367290525799, "grad_norm": 0.5703125, "learning_rate": 0.001400292920830297, "loss": 0.1981, "step": 38218 }, { "epoch": 0.0677672190705678, "grad_norm": 1.515625, "learning_rate": 0.001400236812306493, "loss": 0.2434, "step": 38220 }, { "epoch": 0.06777076523587762, "grad_norm": 0.734375, "learning_rate": 0.0014001807024696272, "loss": 0.1548, "step": 38222 }, { "epoch": 0.06777431140118743, "grad_norm": 0.9609375, "learning_rate": 0.0014001245913199457, "loss": 0.3761, "step": 38224 }, { "epoch": 0.06777785756649725, "grad_norm": 0.26953125, "learning_rate": 0.0014000684788576935, "loss": 0.1539, "step": 38226 }, { "epoch": 0.06778140373180706, "grad_norm": 0.9765625, "learning_rate": 0.001400012365083116, "loss": 0.3315, "step": 38228 }, { "epoch": 0.06778494989711688, "grad_norm": 0.2890625, "learning_rate": 0.0013999562499964586, "loss": 0.1445, "step": 38230 }, { "epoch": 0.06778849606242669, "grad_norm": 0.73828125, "learning_rate": 0.001399900133597967, "loss": 0.1767, "step": 38232 }, { "epoch": 0.06779204222773651, "grad_norm": 0.40234375, "learning_rate": 0.0013998440158878862, "loss": 0.184, "step": 38234 }, { "epoch": 0.06779558839304632, "grad_norm": 1.125, "learning_rate": 0.0013997878968664622, "loss": 0.4462, "step": 38236 }, { "epoch": 0.06779913455835614, "grad_norm": 1.546875, "learning_rate": 0.0013997317765339396, "loss": 0.171, "step": 38238 }, { "epoch": 0.06780268072366595, "grad_norm": 0.578125, "learning_rate": 0.0013996756548905647, "loss": 0.2151, "step": 38240 }, { "epoch": 0.06780622688897577, "grad_norm": 0.625, "learning_rate": 0.0013996195319365824, "loss": 0.4022, "step": 38242 }, { "epoch": 0.06780977305428558, "grad_norm": 0.51171875, "learning_rate": 0.0013995634076722384, "loss": 0.1521, "step": 38244 }, { "epoch": 0.0678133192195954, "grad_norm": 0.4765625, "learning_rate": 0.0013995072820977778, "loss": 0.1848, "step": 38246 }, { "epoch": 0.06781686538490521, "grad_norm": 0.2421875, "learning_rate": 0.0013994511552134466, "loss": 0.177, "step": 38248 }, { "epoch": 0.06782041155021504, "grad_norm": 0.1982421875, "learning_rate": 0.0013993950270194898, "loss": 0.1775, "step": 38250 }, { "epoch": 0.06782395771552485, "grad_norm": 0.33203125, "learning_rate": 0.0013993388975161532, "loss": 0.1328, "step": 38252 }, { "epoch": 0.06782750388083467, "grad_norm": 0.6796875, "learning_rate": 0.001399282766703682, "loss": 0.2512, "step": 38254 }, { "epoch": 0.06783105004614448, "grad_norm": 0.3046875, "learning_rate": 0.001399226634582322, "loss": 0.1851, "step": 38256 }, { "epoch": 0.0678345962114543, "grad_norm": 1.4375, "learning_rate": 0.0013991705011523184, "loss": 0.1973, "step": 38258 }, { "epoch": 0.06783814237676411, "grad_norm": 0.361328125, "learning_rate": 0.0013991143664139167, "loss": 0.2042, "step": 38260 }, { "epoch": 0.06784168854207392, "grad_norm": 6.5625, "learning_rate": 0.0013990582303673625, "loss": 0.3079, "step": 38262 }, { "epoch": 0.06784523470738374, "grad_norm": 1.015625, "learning_rate": 0.0013990020930129015, "loss": 0.2447, "step": 38264 }, { "epoch": 0.06784878087269355, "grad_norm": 0.51171875, "learning_rate": 0.0013989459543507791, "loss": 0.1873, "step": 38266 }, { "epoch": 0.06785232703800337, "grad_norm": 0.546875, "learning_rate": 0.001398889814381241, "loss": 0.1975, "step": 38268 }, { "epoch": 0.06785587320331318, "grad_norm": 0.41796875, "learning_rate": 0.001398833673104532, "loss": 0.1761, "step": 38270 }, { "epoch": 0.067859419368623, "grad_norm": 0.482421875, "learning_rate": 0.001398777530520898, "loss": 0.2234, "step": 38272 }, { "epoch": 0.06786296553393281, "grad_norm": 0.734375, "learning_rate": 0.0013987213866305849, "loss": 0.2178, "step": 38274 }, { "epoch": 0.06786651169924263, "grad_norm": 0.2275390625, "learning_rate": 0.0013986652414338374, "loss": 0.1797, "step": 38276 }, { "epoch": 0.06787005786455244, "grad_norm": 0.2109375, "learning_rate": 0.0013986090949309022, "loss": 0.2624, "step": 38278 }, { "epoch": 0.06787360402986226, "grad_norm": 0.30078125, "learning_rate": 0.0013985529471220242, "loss": 0.1474, "step": 38280 }, { "epoch": 0.06787715019517207, "grad_norm": 0.92578125, "learning_rate": 0.0013984967980074491, "loss": 0.2142, "step": 38282 }, { "epoch": 0.06788069636048188, "grad_norm": 0.37109375, "learning_rate": 0.0013984406475874224, "loss": 0.2321, "step": 38284 }, { "epoch": 0.0678842425257917, "grad_norm": 0.2890625, "learning_rate": 0.0013983844958621895, "loss": 0.3348, "step": 38286 }, { "epoch": 0.06788778869110151, "grad_norm": 1.109375, "learning_rate": 0.0013983283428319964, "loss": 0.2325, "step": 38288 }, { "epoch": 0.06789133485641133, "grad_norm": 0.404296875, "learning_rate": 0.001398272188497088, "loss": 0.17, "step": 38290 }, { "epoch": 0.06789488102172114, "grad_norm": 0.44921875, "learning_rate": 0.0013982160328577108, "loss": 0.2045, "step": 38292 }, { "epoch": 0.06789842718703096, "grad_norm": 0.3203125, "learning_rate": 0.0013981598759141094, "loss": 0.1807, "step": 38294 }, { "epoch": 0.06790197335234079, "grad_norm": 0.89453125, "learning_rate": 0.00139810371766653, "loss": 0.252, "step": 38296 }, { "epoch": 0.0679055195176506, "grad_norm": 0.380859375, "learning_rate": 0.0013980475581152182, "loss": 0.1487, "step": 38298 }, { "epoch": 0.06790906568296042, "grad_norm": 0.3359375, "learning_rate": 0.0013979913972604196, "loss": 0.1867, "step": 38300 }, { "epoch": 0.06791261184827023, "grad_norm": 0.45703125, "learning_rate": 0.0013979352351023797, "loss": 0.2257, "step": 38302 }, { "epoch": 0.06791615801358004, "grad_norm": 0.1845703125, "learning_rate": 0.0013978790716413442, "loss": 0.1502, "step": 38304 }, { "epoch": 0.06791970417888986, "grad_norm": 0.259765625, "learning_rate": 0.0013978229068775585, "loss": 0.1935, "step": 38306 }, { "epoch": 0.06792325034419967, "grad_norm": 0.2265625, "learning_rate": 0.0013977667408112684, "loss": 0.2329, "step": 38308 }, { "epoch": 0.06792679650950949, "grad_norm": 0.3203125, "learning_rate": 0.0013977105734427197, "loss": 0.2516, "step": 38310 }, { "epoch": 0.0679303426748193, "grad_norm": 0.2490234375, "learning_rate": 0.0013976544047721578, "loss": 0.1339, "step": 38312 }, { "epoch": 0.06793388884012912, "grad_norm": 0.88671875, "learning_rate": 0.0013975982347998283, "loss": 0.2951, "step": 38314 }, { "epoch": 0.06793743500543893, "grad_norm": 0.263671875, "learning_rate": 0.0013975420635259771, "loss": 0.1864, "step": 38316 }, { "epoch": 0.06794098117074875, "grad_norm": 1.4296875, "learning_rate": 0.0013974858909508497, "loss": 0.2524, "step": 38318 }, { "epoch": 0.06794452733605856, "grad_norm": 0.275390625, "learning_rate": 0.001397429717074692, "loss": 0.1751, "step": 38320 }, { "epoch": 0.06794807350136838, "grad_norm": 0.283203125, "learning_rate": 0.0013973735418977493, "loss": 0.1674, "step": 38322 }, { "epoch": 0.06795161966667819, "grad_norm": 0.328125, "learning_rate": 0.0013973173654202675, "loss": 0.1916, "step": 38324 }, { "epoch": 0.067955165831988, "grad_norm": 1.4140625, "learning_rate": 0.0013972611876424924, "loss": 0.2069, "step": 38326 }, { "epoch": 0.06795871199729782, "grad_norm": 0.294921875, "learning_rate": 0.0013972050085646693, "loss": 0.1723, "step": 38328 }, { "epoch": 0.06796225816260763, "grad_norm": 0.81640625, "learning_rate": 0.001397148828187044, "loss": 0.2891, "step": 38330 }, { "epoch": 0.06796580432791745, "grad_norm": 0.5625, "learning_rate": 0.0013970926465098627, "loss": 0.2552, "step": 38332 }, { "epoch": 0.06796935049322726, "grad_norm": 0.59375, "learning_rate": 0.0013970364635333705, "loss": 0.2104, "step": 38334 }, { "epoch": 0.06797289665853708, "grad_norm": 0.515625, "learning_rate": 0.0013969802792578134, "loss": 0.2075, "step": 38336 }, { "epoch": 0.06797644282384689, "grad_norm": 0.26171875, "learning_rate": 0.001396924093683437, "loss": 0.1632, "step": 38338 }, { "epoch": 0.06797998898915672, "grad_norm": 1.046875, "learning_rate": 0.0013968679068104876, "loss": 0.4355, "step": 38340 }, { "epoch": 0.06798353515446653, "grad_norm": 0.37890625, "learning_rate": 0.00139681171863921, "loss": 0.1994, "step": 38342 }, { "epoch": 0.06798708131977635, "grad_norm": 0.2392578125, "learning_rate": 0.0013967555291698503, "loss": 0.1779, "step": 38344 }, { "epoch": 0.06799062748508616, "grad_norm": 0.93359375, "learning_rate": 0.0013966993384026546, "loss": 0.2307, "step": 38346 }, { "epoch": 0.06799417365039598, "grad_norm": 0.306640625, "learning_rate": 0.0013966431463378678, "loss": 0.2312, "step": 38348 }, { "epoch": 0.06799771981570579, "grad_norm": 0.29296875, "learning_rate": 0.0013965869529757365, "loss": 0.2151, "step": 38350 }, { "epoch": 0.06800126598101561, "grad_norm": 0.234375, "learning_rate": 0.001396530758316506, "loss": 0.2118, "step": 38352 }, { "epoch": 0.06800481214632542, "grad_norm": 0.4375, "learning_rate": 0.0013964745623604225, "loss": 0.2759, "step": 38354 }, { "epoch": 0.06800835831163524, "grad_norm": 0.7734375, "learning_rate": 0.0013964183651077311, "loss": 0.2909, "step": 38356 }, { "epoch": 0.06801190447694505, "grad_norm": 0.2255859375, "learning_rate": 0.0013963621665586783, "loss": 0.2046, "step": 38358 }, { "epoch": 0.06801545064225487, "grad_norm": 1.1953125, "learning_rate": 0.0013963059667135094, "loss": 0.238, "step": 38360 }, { "epoch": 0.06801899680756468, "grad_norm": 2.3125, "learning_rate": 0.0013962497655724702, "loss": 0.3522, "step": 38362 }, { "epoch": 0.0680225429728745, "grad_norm": 0.3984375, "learning_rate": 0.0013961935631358066, "loss": 0.1794, "step": 38364 }, { "epoch": 0.06802608913818431, "grad_norm": 0.2890625, "learning_rate": 0.0013961373594037645, "loss": 0.1895, "step": 38366 }, { "epoch": 0.06802963530349412, "grad_norm": 0.2109375, "learning_rate": 0.0013960811543765896, "loss": 0.134, "step": 38368 }, { "epoch": 0.06803318146880394, "grad_norm": 0.234375, "learning_rate": 0.0013960249480545277, "loss": 0.2408, "step": 38370 }, { "epoch": 0.06803672763411375, "grad_norm": 0.318359375, "learning_rate": 0.0013959687404378248, "loss": 0.1731, "step": 38372 }, { "epoch": 0.06804027379942357, "grad_norm": 0.37109375, "learning_rate": 0.0013959125315267265, "loss": 0.2128, "step": 38374 }, { "epoch": 0.06804381996473338, "grad_norm": 0.279296875, "learning_rate": 0.0013958563213214783, "loss": 0.1669, "step": 38376 }, { "epoch": 0.0680473661300432, "grad_norm": 0.431640625, "learning_rate": 0.001395800109822327, "loss": 0.161, "step": 38378 }, { "epoch": 0.06805091229535301, "grad_norm": 0.447265625, "learning_rate": 0.0013957438970295178, "loss": 0.2179, "step": 38380 }, { "epoch": 0.06805445846066283, "grad_norm": 0.31640625, "learning_rate": 0.0013956876829432962, "loss": 0.2019, "step": 38382 }, { "epoch": 0.06805800462597264, "grad_norm": 0.50390625, "learning_rate": 0.0013956314675639084, "loss": 0.2087, "step": 38384 }, { "epoch": 0.06806155079128247, "grad_norm": 0.298828125, "learning_rate": 0.0013955752508916003, "loss": 0.1963, "step": 38386 }, { "epoch": 0.06806509695659228, "grad_norm": 0.5703125, "learning_rate": 0.001395519032926618, "loss": 0.1453, "step": 38388 }, { "epoch": 0.0680686431219021, "grad_norm": 0.453125, "learning_rate": 0.001395462813669207, "loss": 0.1736, "step": 38390 }, { "epoch": 0.06807218928721191, "grad_norm": 0.4453125, "learning_rate": 0.0013954065931196132, "loss": 0.1956, "step": 38392 }, { "epoch": 0.06807573545252173, "grad_norm": 1.078125, "learning_rate": 0.0013953503712780828, "loss": 0.1756, "step": 38394 }, { "epoch": 0.06807928161783154, "grad_norm": 0.71875, "learning_rate": 0.001395294148144861, "loss": 0.1888, "step": 38396 }, { "epoch": 0.06808282778314136, "grad_norm": 1.7734375, "learning_rate": 0.0013952379237201944, "loss": 0.2469, "step": 38398 }, { "epoch": 0.06808637394845117, "grad_norm": 0.3515625, "learning_rate": 0.0013951816980043285, "loss": 0.1884, "step": 38400 }, { "epoch": 0.06808992011376098, "grad_norm": 0.6171875, "learning_rate": 0.0013951254709975095, "loss": 0.1928, "step": 38402 }, { "epoch": 0.0680934662790708, "grad_norm": 0.578125, "learning_rate": 0.0013950692426999832, "loss": 0.1648, "step": 38404 }, { "epoch": 0.06809701244438061, "grad_norm": 1.453125, "learning_rate": 0.001395013013111995, "loss": 0.3896, "step": 38406 }, { "epoch": 0.06810055860969043, "grad_norm": 0.310546875, "learning_rate": 0.0013949567822337916, "loss": 0.2046, "step": 38408 }, { "epoch": 0.06810410477500024, "grad_norm": 0.6328125, "learning_rate": 0.0013949005500656184, "loss": 0.1951, "step": 38410 }, { "epoch": 0.06810765094031006, "grad_norm": 0.625, "learning_rate": 0.0013948443166077215, "loss": 0.2136, "step": 38412 }, { "epoch": 0.06811119710561987, "grad_norm": 0.6875, "learning_rate": 0.0013947880818603472, "loss": 0.1817, "step": 38414 }, { "epoch": 0.06811474327092969, "grad_norm": 0.78125, "learning_rate": 0.0013947318458237407, "loss": 0.3948, "step": 38416 }, { "epoch": 0.0681182894362395, "grad_norm": 0.2021484375, "learning_rate": 0.0013946756084981483, "loss": 0.1687, "step": 38418 }, { "epoch": 0.06812183560154932, "grad_norm": 0.478515625, "learning_rate": 0.001394619369883816, "loss": 0.1481, "step": 38420 }, { "epoch": 0.06812538176685913, "grad_norm": 0.46875, "learning_rate": 0.0013945631299809899, "loss": 0.1857, "step": 38422 }, { "epoch": 0.06812892793216895, "grad_norm": 0.97265625, "learning_rate": 0.0013945068887899156, "loss": 0.2757, "step": 38424 }, { "epoch": 0.06813247409747876, "grad_norm": 0.396484375, "learning_rate": 0.001394450646310839, "loss": 0.2142, "step": 38426 }, { "epoch": 0.06813602026278857, "grad_norm": 0.34375, "learning_rate": 0.0013943944025440066, "loss": 0.1548, "step": 38428 }, { "epoch": 0.06813956642809839, "grad_norm": 0.1513671875, "learning_rate": 0.0013943381574896638, "loss": 0.1506, "step": 38430 }, { "epoch": 0.06814311259340822, "grad_norm": 0.40234375, "learning_rate": 0.0013942819111480574, "loss": 0.1351, "step": 38432 }, { "epoch": 0.06814665875871803, "grad_norm": 0.369140625, "learning_rate": 0.0013942256635194326, "loss": 0.1807, "step": 38434 }, { "epoch": 0.06815020492402785, "grad_norm": 4.09375, "learning_rate": 0.0013941694146040356, "loss": 0.2372, "step": 38436 }, { "epoch": 0.06815375108933766, "grad_norm": 0.365234375, "learning_rate": 0.0013941131644021124, "loss": 0.1709, "step": 38438 }, { "epoch": 0.06815729725464748, "grad_norm": 0.59765625, "learning_rate": 0.0013940569129139093, "loss": 0.218, "step": 38440 }, { "epoch": 0.06816084341995729, "grad_norm": 0.5859375, "learning_rate": 0.0013940006601396717, "loss": 0.2435, "step": 38442 }, { "epoch": 0.0681643895852671, "grad_norm": 0.375, "learning_rate": 0.001393944406079646, "loss": 0.2094, "step": 38444 }, { "epoch": 0.06816793575057692, "grad_norm": 2.375, "learning_rate": 0.0013938881507340787, "loss": 0.2277, "step": 38446 }, { "epoch": 0.06817148191588673, "grad_norm": 0.322265625, "learning_rate": 0.001393831894103215, "loss": 0.1835, "step": 38448 }, { "epoch": 0.06817502808119655, "grad_norm": 0.27734375, "learning_rate": 0.001393775636187301, "loss": 0.2094, "step": 38450 }, { "epoch": 0.06817857424650636, "grad_norm": 0.640625, "learning_rate": 0.0013937193769865835, "loss": 0.2516, "step": 38452 }, { "epoch": 0.06818212041181618, "grad_norm": 0.96875, "learning_rate": 0.001393663116501308, "loss": 0.2791, "step": 38454 }, { "epoch": 0.06818566657712599, "grad_norm": 0.40625, "learning_rate": 0.0013936068547317204, "loss": 0.2281, "step": 38456 }, { "epoch": 0.0681892127424358, "grad_norm": 0.51171875, "learning_rate": 0.0013935505916780671, "loss": 0.1932, "step": 38458 }, { "epoch": 0.06819275890774562, "grad_norm": 0.859375, "learning_rate": 0.0013934943273405938, "loss": 0.2413, "step": 38460 }, { "epoch": 0.06819630507305544, "grad_norm": 1.359375, "learning_rate": 0.001393438061719547, "loss": 0.2003, "step": 38462 }, { "epoch": 0.06819985123836525, "grad_norm": 0.5546875, "learning_rate": 0.0013933817948151724, "loss": 0.1785, "step": 38464 }, { "epoch": 0.06820339740367506, "grad_norm": 0.384765625, "learning_rate": 0.0013933255266277166, "loss": 0.1791, "step": 38466 }, { "epoch": 0.06820694356898488, "grad_norm": 0.88671875, "learning_rate": 0.001393269257157425, "loss": 0.3308, "step": 38468 }, { "epoch": 0.0682104897342947, "grad_norm": 0.65234375, "learning_rate": 0.0013932129864045442, "loss": 0.2037, "step": 38470 }, { "epoch": 0.06821403589960451, "grad_norm": 0.92578125, "learning_rate": 0.0013931567143693202, "loss": 0.1881, "step": 38472 }, { "epoch": 0.06821758206491432, "grad_norm": 0.357421875, "learning_rate": 0.0013931004410519987, "loss": 0.2012, "step": 38474 }, { "epoch": 0.06822112823022415, "grad_norm": 0.1708984375, "learning_rate": 0.0013930441664528263, "loss": 0.1844, "step": 38476 }, { "epoch": 0.06822467439553397, "grad_norm": 0.6640625, "learning_rate": 0.001392987890572049, "loss": 0.2094, "step": 38478 }, { "epoch": 0.06822822056084378, "grad_norm": 0.291015625, "learning_rate": 0.0013929316134099125, "loss": 0.2119, "step": 38480 }, { "epoch": 0.0682317667261536, "grad_norm": 0.71484375, "learning_rate": 0.0013928753349666638, "loss": 0.1917, "step": 38482 }, { "epoch": 0.06823531289146341, "grad_norm": 0.173828125, "learning_rate": 0.0013928190552425482, "loss": 0.1582, "step": 38484 }, { "epoch": 0.06823885905677322, "grad_norm": 0.6015625, "learning_rate": 0.0013927627742378124, "loss": 0.1718, "step": 38486 }, { "epoch": 0.06824240522208304, "grad_norm": 0.53125, "learning_rate": 0.0013927064919527022, "loss": 0.1957, "step": 38488 }, { "epoch": 0.06824595138739285, "grad_norm": 0.875, "learning_rate": 0.0013926502083874638, "loss": 0.3304, "step": 38490 }, { "epoch": 0.06824949755270267, "grad_norm": 0.244140625, "learning_rate": 0.001392593923542343, "loss": 0.1809, "step": 38492 }, { "epoch": 0.06825304371801248, "grad_norm": 1.0859375, "learning_rate": 0.0013925376374175867, "loss": 0.2992, "step": 38494 }, { "epoch": 0.0682565898833223, "grad_norm": 7.8125, "learning_rate": 0.0013924813500134409, "loss": 0.2119, "step": 38496 }, { "epoch": 0.06826013604863211, "grad_norm": 4.125, "learning_rate": 0.0013924250613301511, "loss": 0.39, "step": 38498 }, { "epoch": 0.06826368221394193, "grad_norm": 0.5078125, "learning_rate": 0.001392368771367964, "loss": 0.1599, "step": 38500 }, { "epoch": 0.06826722837925174, "grad_norm": 0.28125, "learning_rate": 0.0013923124801271261, "loss": 0.4339, "step": 38502 }, { "epoch": 0.06827077454456155, "grad_norm": 0.3125, "learning_rate": 0.0013922561876078827, "loss": 0.1554, "step": 38504 }, { "epoch": 0.06827432070987137, "grad_norm": 1.28125, "learning_rate": 0.001392199893810481, "loss": 0.3344, "step": 38506 }, { "epoch": 0.06827786687518118, "grad_norm": 1.28125, "learning_rate": 0.0013921435987351665, "loss": 0.3097, "step": 38508 }, { "epoch": 0.068281413040491, "grad_norm": 0.376953125, "learning_rate": 0.0013920873023821857, "loss": 0.2352, "step": 38510 }, { "epoch": 0.06828495920580081, "grad_norm": 0.2353515625, "learning_rate": 0.0013920310047517846, "loss": 0.1384, "step": 38512 }, { "epoch": 0.06828850537111063, "grad_norm": 0.4765625, "learning_rate": 0.0013919747058442093, "loss": 0.1821, "step": 38514 }, { "epoch": 0.06829205153642044, "grad_norm": 0.56640625, "learning_rate": 0.0013919184056597064, "loss": 0.2065, "step": 38516 }, { "epoch": 0.06829559770173026, "grad_norm": 0.50390625, "learning_rate": 0.001391862104198522, "loss": 0.2006, "step": 38518 }, { "epoch": 0.06829914386704007, "grad_norm": 0.341796875, "learning_rate": 0.001391805801460902, "loss": 0.171, "step": 38520 }, { "epoch": 0.0683026900323499, "grad_norm": 0.28515625, "learning_rate": 0.0013917494974470936, "loss": 0.2022, "step": 38522 }, { "epoch": 0.06830623619765971, "grad_norm": 0.5078125, "learning_rate": 0.0013916931921573417, "loss": 0.1755, "step": 38524 }, { "epoch": 0.06830978236296953, "grad_norm": 1.1953125, "learning_rate": 0.0013916368855918936, "loss": 0.2213, "step": 38526 }, { "epoch": 0.06831332852827934, "grad_norm": 0.318359375, "learning_rate": 0.0013915805777509952, "loss": 0.2017, "step": 38528 }, { "epoch": 0.06831687469358916, "grad_norm": 1.046875, "learning_rate": 0.0013915242686348924, "loss": 0.2866, "step": 38530 }, { "epoch": 0.06832042085889897, "grad_norm": 0.40625, "learning_rate": 0.0013914679582438321, "loss": 0.2487, "step": 38532 }, { "epoch": 0.06832396702420879, "grad_norm": 0.75, "learning_rate": 0.00139141164657806, "loss": 0.2134, "step": 38534 }, { "epoch": 0.0683275131895186, "grad_norm": 0.3125, "learning_rate": 0.0013913553336378226, "loss": 0.1881, "step": 38536 }, { "epoch": 0.06833105935482842, "grad_norm": 0.296875, "learning_rate": 0.0013912990194233663, "loss": 0.1552, "step": 38538 }, { "epoch": 0.06833460552013823, "grad_norm": 0.345703125, "learning_rate": 0.0013912427039349375, "loss": 0.205, "step": 38540 }, { "epoch": 0.06833815168544805, "grad_norm": 0.37890625, "learning_rate": 0.0013911863871727819, "loss": 0.1956, "step": 38542 }, { "epoch": 0.06834169785075786, "grad_norm": 0.1767578125, "learning_rate": 0.0013911300691371467, "loss": 0.151, "step": 38544 }, { "epoch": 0.06834524401606767, "grad_norm": 0.26953125, "learning_rate": 0.0013910737498282772, "loss": 0.1941, "step": 38546 }, { "epoch": 0.06834879018137749, "grad_norm": 0.37890625, "learning_rate": 0.0013910174292464206, "loss": 0.1779, "step": 38548 }, { "epoch": 0.0683523363466873, "grad_norm": 0.58203125, "learning_rate": 0.0013909611073918225, "loss": 0.1621, "step": 38550 }, { "epoch": 0.06835588251199712, "grad_norm": 0.359375, "learning_rate": 0.0013909047842647299, "loss": 0.1613, "step": 38552 }, { "epoch": 0.06835942867730693, "grad_norm": 0.703125, "learning_rate": 0.0013908484598653883, "loss": 0.2225, "step": 38554 }, { "epoch": 0.06836297484261675, "grad_norm": 0.369140625, "learning_rate": 0.0013907921341940446, "loss": 0.1658, "step": 38556 }, { "epoch": 0.06836652100792656, "grad_norm": 0.427734375, "learning_rate": 0.001390735807250945, "loss": 0.1558, "step": 38558 }, { "epoch": 0.06837006717323638, "grad_norm": 0.42578125, "learning_rate": 0.001390679479036336, "loss": 0.1836, "step": 38560 }, { "epoch": 0.06837361333854619, "grad_norm": 0.359375, "learning_rate": 0.0013906231495504637, "loss": 0.1618, "step": 38562 }, { "epoch": 0.068377159503856, "grad_norm": 0.74609375, "learning_rate": 0.0013905668187935748, "loss": 0.2254, "step": 38564 }, { "epoch": 0.06838070566916582, "grad_norm": 0.78515625, "learning_rate": 0.001390510486765915, "loss": 0.1782, "step": 38566 }, { "epoch": 0.06838425183447565, "grad_norm": 0.212890625, "learning_rate": 0.0013904541534677315, "loss": 0.2047, "step": 38568 }, { "epoch": 0.06838779799978546, "grad_norm": 0.53515625, "learning_rate": 0.00139039781889927, "loss": 0.2004, "step": 38570 }, { "epoch": 0.06839134416509528, "grad_norm": 0.53515625, "learning_rate": 0.0013903414830607772, "loss": 0.1965, "step": 38572 }, { "epoch": 0.06839489033040509, "grad_norm": 0.30078125, "learning_rate": 0.0013902851459524994, "loss": 0.1595, "step": 38574 }, { "epoch": 0.0683984364957149, "grad_norm": 0.279296875, "learning_rate": 0.0013902288075746828, "loss": 0.1686, "step": 38576 }, { "epoch": 0.06840198266102472, "grad_norm": 0.765625, "learning_rate": 0.001390172467927574, "loss": 0.1831, "step": 38578 }, { "epoch": 0.06840552882633454, "grad_norm": 0.294921875, "learning_rate": 0.0013901161270114197, "loss": 0.1399, "step": 38580 }, { "epoch": 0.06840907499164435, "grad_norm": 0.43359375, "learning_rate": 0.001390059784826466, "loss": 0.2341, "step": 38582 }, { "epoch": 0.06841262115695416, "grad_norm": 0.33203125, "learning_rate": 0.001390003441372959, "loss": 0.1851, "step": 38584 }, { "epoch": 0.06841616732226398, "grad_norm": 0.228515625, "learning_rate": 0.0013899470966511453, "loss": 0.2308, "step": 38586 }, { "epoch": 0.0684197134875738, "grad_norm": 0.2099609375, "learning_rate": 0.0013898907506612718, "loss": 0.1891, "step": 38588 }, { "epoch": 0.06842325965288361, "grad_norm": 0.2041015625, "learning_rate": 0.0013898344034035841, "loss": 0.1586, "step": 38590 }, { "epoch": 0.06842680581819342, "grad_norm": 0.94921875, "learning_rate": 0.0013897780548783297, "loss": 0.2378, "step": 38592 }, { "epoch": 0.06843035198350324, "grad_norm": 0.435546875, "learning_rate": 0.001389721705085754, "loss": 0.2102, "step": 38594 }, { "epoch": 0.06843389814881305, "grad_norm": 0.73046875, "learning_rate": 0.0013896653540261038, "loss": 0.112, "step": 38596 }, { "epoch": 0.06843744431412287, "grad_norm": 0.48046875, "learning_rate": 0.0013896090016996257, "loss": 0.1799, "step": 38598 }, { "epoch": 0.06844099047943268, "grad_norm": 0.36328125, "learning_rate": 0.001389552648106566, "loss": 0.2451, "step": 38600 }, { "epoch": 0.0684445366447425, "grad_norm": 0.31640625, "learning_rate": 0.0013894962932471716, "loss": 0.1903, "step": 38602 }, { "epoch": 0.06844808281005231, "grad_norm": 0.345703125, "learning_rate": 0.0013894399371216879, "loss": 0.1838, "step": 38604 }, { "epoch": 0.06845162897536212, "grad_norm": 0.470703125, "learning_rate": 0.0013893835797303621, "loss": 0.1922, "step": 38606 }, { "epoch": 0.06845517514067194, "grad_norm": 0.373046875, "learning_rate": 0.0013893272210734413, "loss": 0.3005, "step": 38608 }, { "epoch": 0.06845872130598175, "grad_norm": 1.1953125, "learning_rate": 0.001389270861151171, "loss": 0.2285, "step": 38610 }, { "epoch": 0.06846226747129157, "grad_norm": 0.796875, "learning_rate": 0.0013892144999637975, "loss": 0.2986, "step": 38612 }, { "epoch": 0.0684658136366014, "grad_norm": 0.46875, "learning_rate": 0.0013891581375115683, "loss": 0.1625, "step": 38614 }, { "epoch": 0.06846935980191121, "grad_norm": 0.90234375, "learning_rate": 0.0013891017737947288, "loss": 0.2712, "step": 38616 }, { "epoch": 0.06847290596722103, "grad_norm": 0.328125, "learning_rate": 0.0013890454088135266, "loss": 0.2401, "step": 38618 }, { "epoch": 0.06847645213253084, "grad_norm": 1.1015625, "learning_rate": 0.0013889890425682076, "loss": 0.2564, "step": 38620 }, { "epoch": 0.06847999829784066, "grad_norm": 0.37109375, "learning_rate": 0.0013889326750590187, "loss": 0.2566, "step": 38622 }, { "epoch": 0.06848354446315047, "grad_norm": 0.26171875, "learning_rate": 0.0013888763062862056, "loss": 0.1777, "step": 38624 }, { "epoch": 0.06848709062846028, "grad_norm": 0.2890625, "learning_rate": 0.0013888199362500156, "loss": 0.2204, "step": 38626 }, { "epoch": 0.0684906367937701, "grad_norm": 1.1328125, "learning_rate": 0.0013887635649506946, "loss": 0.2267, "step": 38628 }, { "epoch": 0.06849418295907991, "grad_norm": 0.9296875, "learning_rate": 0.0013887071923884898, "loss": 0.2019, "step": 38630 }, { "epoch": 0.06849772912438973, "grad_norm": 0.494140625, "learning_rate": 0.0013886508185636476, "loss": 0.2052, "step": 38632 }, { "epoch": 0.06850127528969954, "grad_norm": 0.58984375, "learning_rate": 0.0013885944434764142, "loss": 0.16, "step": 38634 }, { "epoch": 0.06850482145500936, "grad_norm": 0.57421875, "learning_rate": 0.0013885380671270363, "loss": 0.2023, "step": 38636 }, { "epoch": 0.06850836762031917, "grad_norm": 0.62109375, "learning_rate": 0.0013884816895157605, "loss": 0.171, "step": 38638 }, { "epoch": 0.06851191378562899, "grad_norm": 0.400390625, "learning_rate": 0.0013884253106428333, "loss": 0.1549, "step": 38640 }, { "epoch": 0.0685154599509388, "grad_norm": 0.5625, "learning_rate": 0.0013883689305085015, "loss": 0.3093, "step": 38642 }, { "epoch": 0.06851900611624862, "grad_norm": 0.212890625, "learning_rate": 0.0013883125491130114, "loss": 0.2038, "step": 38644 }, { "epoch": 0.06852255228155843, "grad_norm": 0.392578125, "learning_rate": 0.0013882561664566096, "loss": 0.146, "step": 38646 }, { "epoch": 0.06852609844686824, "grad_norm": 1.625, "learning_rate": 0.0013881997825395428, "loss": 0.2772, "step": 38648 }, { "epoch": 0.06852964461217806, "grad_norm": 0.50390625, "learning_rate": 0.0013881433973620576, "loss": 0.2073, "step": 38650 }, { "epoch": 0.06853319077748787, "grad_norm": 0.35546875, "learning_rate": 0.0013880870109244006, "loss": 0.2884, "step": 38652 }, { "epoch": 0.06853673694279769, "grad_norm": 0.1953125, "learning_rate": 0.0013880306232268184, "loss": 0.1797, "step": 38654 }, { "epoch": 0.0685402831081075, "grad_norm": 0.9140625, "learning_rate": 0.0013879742342695575, "loss": 0.254, "step": 38656 }, { "epoch": 0.06854382927341733, "grad_norm": 2.25, "learning_rate": 0.0013879178440528645, "loss": 0.2781, "step": 38658 }, { "epoch": 0.06854737543872715, "grad_norm": 0.484375, "learning_rate": 0.0013878614525769863, "loss": 0.208, "step": 38660 }, { "epoch": 0.06855092160403696, "grad_norm": 0.5859375, "learning_rate": 0.0013878050598421694, "loss": 0.1603, "step": 38662 }, { "epoch": 0.06855446776934677, "grad_norm": 0.36328125, "learning_rate": 0.00138774866584866, "loss": 0.2282, "step": 38664 }, { "epoch": 0.06855801393465659, "grad_norm": 0.333984375, "learning_rate": 0.0013876922705967052, "loss": 0.1922, "step": 38666 }, { "epoch": 0.0685615600999664, "grad_norm": 0.349609375, "learning_rate": 0.0013876358740865513, "loss": 0.1808, "step": 38668 }, { "epoch": 0.06856510626527622, "grad_norm": 0.47265625, "learning_rate": 0.0013875794763184455, "loss": 0.1742, "step": 38670 }, { "epoch": 0.06856865243058603, "grad_norm": 0.34375, "learning_rate": 0.001387523077292634, "loss": 0.1383, "step": 38672 }, { "epoch": 0.06857219859589585, "grad_norm": 0.3515625, "learning_rate": 0.001387466677009364, "loss": 0.1723, "step": 38674 }, { "epoch": 0.06857574476120566, "grad_norm": 0.279296875, "learning_rate": 0.0013874102754688812, "loss": 0.185, "step": 38676 }, { "epoch": 0.06857929092651548, "grad_norm": 0.427734375, "learning_rate": 0.0013873538726714328, "loss": 0.1956, "step": 38678 }, { "epoch": 0.06858283709182529, "grad_norm": 0.73046875, "learning_rate": 0.0013872974686172654, "loss": 0.1889, "step": 38680 }, { "epoch": 0.0685863832571351, "grad_norm": 1.1640625, "learning_rate": 0.0013872410633066262, "loss": 0.7169, "step": 38682 }, { "epoch": 0.06858992942244492, "grad_norm": 0.53125, "learning_rate": 0.001387184656739761, "loss": 0.168, "step": 38684 }, { "epoch": 0.06859347558775473, "grad_norm": 1.421875, "learning_rate": 0.0013871282489169173, "loss": 0.1486, "step": 38686 }, { "epoch": 0.06859702175306455, "grad_norm": 0.4140625, "learning_rate": 0.0013870718398383412, "loss": 0.1885, "step": 38688 }, { "epoch": 0.06860056791837436, "grad_norm": 0.298828125, "learning_rate": 0.0013870154295042797, "loss": 0.1668, "step": 38690 }, { "epoch": 0.06860411408368418, "grad_norm": 1.546875, "learning_rate": 0.001386959017914979, "loss": 0.3711, "step": 38692 }, { "epoch": 0.06860766024899399, "grad_norm": 0.58984375, "learning_rate": 0.0013869026050706867, "loss": 0.2045, "step": 38694 }, { "epoch": 0.06861120641430381, "grad_norm": 0.61328125, "learning_rate": 0.0013868461909716493, "loss": 0.3048, "step": 38696 }, { "epoch": 0.06861475257961362, "grad_norm": 0.306640625, "learning_rate": 0.001386789775618113, "loss": 0.1788, "step": 38698 }, { "epoch": 0.06861829874492344, "grad_norm": 0.75390625, "learning_rate": 0.0013867333590103245, "loss": 0.208, "step": 38700 }, { "epoch": 0.06862184491023325, "grad_norm": 0.365234375, "learning_rate": 0.0013866769411485312, "loss": 0.3026, "step": 38702 }, { "epoch": 0.06862539107554308, "grad_norm": 0.46875, "learning_rate": 0.0013866205220329794, "loss": 0.1746, "step": 38704 }, { "epoch": 0.0686289372408529, "grad_norm": 0.32421875, "learning_rate": 0.0013865641016639159, "loss": 0.1726, "step": 38706 }, { "epoch": 0.06863248340616271, "grad_norm": 0.5234375, "learning_rate": 0.0013865076800415874, "loss": 0.181, "step": 38708 }, { "epoch": 0.06863602957147252, "grad_norm": 0.494140625, "learning_rate": 0.001386451257166241, "loss": 0.1777, "step": 38710 }, { "epoch": 0.06863957573678234, "grad_norm": 1.53125, "learning_rate": 0.001386394833038123, "loss": 0.5013, "step": 38712 }, { "epoch": 0.06864312190209215, "grad_norm": 3.15625, "learning_rate": 0.0013863384076574802, "loss": 0.3899, "step": 38714 }, { "epoch": 0.06864666806740197, "grad_norm": 0.41796875, "learning_rate": 0.0013862819810245603, "loss": 0.3849, "step": 38716 }, { "epoch": 0.06865021423271178, "grad_norm": 0.84375, "learning_rate": 0.0013862255531396084, "loss": 0.2061, "step": 38718 }, { "epoch": 0.0686537603980216, "grad_norm": 0.466796875, "learning_rate": 0.0013861691240028727, "loss": 0.2131, "step": 38720 }, { "epoch": 0.06865730656333141, "grad_norm": 0.86328125, "learning_rate": 0.001386112693614599, "loss": 0.2721, "step": 38722 }, { "epoch": 0.06866085272864123, "grad_norm": 1.0234375, "learning_rate": 0.001386056261975035, "loss": 0.1928, "step": 38724 }, { "epoch": 0.06866439889395104, "grad_norm": 0.5078125, "learning_rate": 0.001385999829084427, "loss": 0.2387, "step": 38726 }, { "epoch": 0.06866794505926085, "grad_norm": 0.59765625, "learning_rate": 0.0013859433949430216, "loss": 0.1539, "step": 38728 }, { "epoch": 0.06867149122457067, "grad_norm": 0.83203125, "learning_rate": 0.0013858869595510662, "loss": 0.2554, "step": 38730 }, { "epoch": 0.06867503738988048, "grad_norm": 0.5546875, "learning_rate": 0.001385830522908807, "loss": 0.2179, "step": 38732 }, { "epoch": 0.0686785835551903, "grad_norm": 0.48828125, "learning_rate": 0.0013857740850164914, "loss": 0.1837, "step": 38734 }, { "epoch": 0.06868212972050011, "grad_norm": 0.36328125, "learning_rate": 0.0013857176458743662, "loss": 0.1722, "step": 38736 }, { "epoch": 0.06868567588580993, "grad_norm": 0.431640625, "learning_rate": 0.0013856612054826774, "loss": 0.2289, "step": 38738 }, { "epoch": 0.06868922205111974, "grad_norm": 0.255859375, "learning_rate": 0.001385604763841673, "loss": 0.3891, "step": 38740 }, { "epoch": 0.06869276821642956, "grad_norm": 1.828125, "learning_rate": 0.0013855483209515987, "loss": 0.2045, "step": 38742 }, { "epoch": 0.06869631438173937, "grad_norm": 0.46875, "learning_rate": 0.0013854918768127023, "loss": 0.2637, "step": 38744 }, { "epoch": 0.06869986054704919, "grad_norm": 0.52734375, "learning_rate": 0.0013854354314252297, "loss": 0.283, "step": 38746 }, { "epoch": 0.068703406712359, "grad_norm": 0.365234375, "learning_rate": 0.001385378984789429, "loss": 0.2111, "step": 38748 }, { "epoch": 0.06870695287766883, "grad_norm": 0.330078125, "learning_rate": 0.0013853225369055463, "loss": 0.1722, "step": 38750 }, { "epoch": 0.06871049904297864, "grad_norm": 1.53125, "learning_rate": 0.0013852660877738284, "loss": 0.2091, "step": 38752 }, { "epoch": 0.06871404520828846, "grad_norm": 0.373046875, "learning_rate": 0.0013852096373945222, "loss": 0.1948, "step": 38754 }, { "epoch": 0.06871759137359827, "grad_norm": 0.296875, "learning_rate": 0.001385153185767875, "loss": 0.1899, "step": 38756 }, { "epoch": 0.06872113753890809, "grad_norm": 1.2265625, "learning_rate": 0.0013850967328941331, "loss": 0.2329, "step": 38758 }, { "epoch": 0.0687246837042179, "grad_norm": 0.294921875, "learning_rate": 0.0013850402787735439, "loss": 0.1819, "step": 38760 }, { "epoch": 0.06872822986952772, "grad_norm": 0.26171875, "learning_rate": 0.001384983823406354, "loss": 0.187, "step": 38762 }, { "epoch": 0.06873177603483753, "grad_norm": 0.287109375, "learning_rate": 0.00138492736679281, "loss": 0.1824, "step": 38764 }, { "epoch": 0.06873532220014734, "grad_norm": 0.384765625, "learning_rate": 0.0013848709089331597, "loss": 0.1288, "step": 38766 }, { "epoch": 0.06873886836545716, "grad_norm": 0.75, "learning_rate": 0.0013848144498276498, "loss": 0.244, "step": 38768 }, { "epoch": 0.06874241453076697, "grad_norm": 0.1953125, "learning_rate": 0.0013847579894765265, "loss": 0.2181, "step": 38770 }, { "epoch": 0.06874596069607679, "grad_norm": 0.29296875, "learning_rate": 0.0013847015278800373, "loss": 0.2719, "step": 38772 }, { "epoch": 0.0687495068613866, "grad_norm": 0.84375, "learning_rate": 0.0013846450650384287, "loss": 0.1907, "step": 38774 }, { "epoch": 0.06875305302669642, "grad_norm": 0.6953125, "learning_rate": 0.0013845886009519483, "loss": 0.1059, "step": 38776 }, { "epoch": 0.06875659919200623, "grad_norm": 0.2451171875, "learning_rate": 0.0013845321356208424, "loss": 0.1556, "step": 38778 }, { "epoch": 0.06876014535731605, "grad_norm": 0.76953125, "learning_rate": 0.0013844756690453584, "loss": 0.1781, "step": 38780 }, { "epoch": 0.06876369152262586, "grad_norm": 0.447265625, "learning_rate": 0.001384419201225743, "loss": 0.2238, "step": 38782 }, { "epoch": 0.06876723768793568, "grad_norm": 0.296875, "learning_rate": 0.001384362732162243, "loss": 0.1513, "step": 38784 }, { "epoch": 0.06877078385324549, "grad_norm": 0.431640625, "learning_rate": 0.0013843062618551058, "loss": 0.2141, "step": 38786 }, { "epoch": 0.0687743300185553, "grad_norm": 0.26953125, "learning_rate": 0.0013842497903045782, "loss": 0.2192, "step": 38788 }, { "epoch": 0.06877787618386512, "grad_norm": 0.546875, "learning_rate": 0.0013841933175109075, "loss": 0.2019, "step": 38790 }, { "epoch": 0.06878142234917493, "grad_norm": 0.46875, "learning_rate": 0.0013841368434743399, "loss": 0.1661, "step": 38792 }, { "epoch": 0.06878496851448476, "grad_norm": 1.0078125, "learning_rate": 0.0013840803681951226, "loss": 0.268, "step": 38794 }, { "epoch": 0.06878851467979458, "grad_norm": 0.56640625, "learning_rate": 0.001384023891673503, "loss": 0.2013, "step": 38796 }, { "epoch": 0.06879206084510439, "grad_norm": 1.21875, "learning_rate": 0.001383967413909728, "loss": 0.2282, "step": 38798 }, { "epoch": 0.0687956070104142, "grad_norm": 0.3046875, "learning_rate": 0.001383910934904044, "loss": 0.1804, "step": 38800 }, { "epoch": 0.06879915317572402, "grad_norm": 0.2265625, "learning_rate": 0.001383854454656699, "loss": 0.2195, "step": 38802 }, { "epoch": 0.06880269934103384, "grad_norm": 0.396484375, "learning_rate": 0.001383797973167939, "loss": 0.2714, "step": 38804 }, { "epoch": 0.06880624550634365, "grad_norm": 0.3203125, "learning_rate": 0.0013837414904380122, "loss": 0.2148, "step": 38806 }, { "epoch": 0.06880979167165346, "grad_norm": 0.42578125, "learning_rate": 0.0013836850064671644, "loss": 0.2032, "step": 38808 }, { "epoch": 0.06881333783696328, "grad_norm": 1.609375, "learning_rate": 0.0013836285212556435, "loss": 0.1533, "step": 38810 }, { "epoch": 0.0688168840022731, "grad_norm": 0.298828125, "learning_rate": 0.001383572034803696, "loss": 0.2029, "step": 38812 }, { "epoch": 0.06882043016758291, "grad_norm": 0.67578125, "learning_rate": 0.0013835155471115692, "loss": 0.2191, "step": 38814 }, { "epoch": 0.06882397633289272, "grad_norm": 0.2578125, "learning_rate": 0.00138345905817951, "loss": 0.2209, "step": 38816 }, { "epoch": 0.06882752249820254, "grad_norm": 1.1875, "learning_rate": 0.0013834025680077655, "loss": 0.2542, "step": 38818 }, { "epoch": 0.06883106866351235, "grad_norm": 0.63671875, "learning_rate": 0.001383346076596583, "loss": 0.1697, "step": 38820 }, { "epoch": 0.06883461482882217, "grad_norm": 0.828125, "learning_rate": 0.001383289583946209, "loss": 0.2327, "step": 38822 }, { "epoch": 0.06883816099413198, "grad_norm": 0.306640625, "learning_rate": 0.0013832330900568912, "loss": 0.1478, "step": 38824 }, { "epoch": 0.0688417071594418, "grad_norm": 0.419921875, "learning_rate": 0.0013831765949288764, "loss": 0.2718, "step": 38826 }, { "epoch": 0.06884525332475161, "grad_norm": 0.359375, "learning_rate": 0.0013831200985624118, "loss": 0.1641, "step": 38828 }, { "epoch": 0.06884879949006142, "grad_norm": 0.91015625, "learning_rate": 0.001383063600957744, "loss": 0.1828, "step": 38830 }, { "epoch": 0.06885234565537124, "grad_norm": 0.5703125, "learning_rate": 0.0013830071021151205, "loss": 0.2324, "step": 38832 }, { "epoch": 0.06885589182068105, "grad_norm": 1.5, "learning_rate": 0.0013829506020347885, "loss": 0.1831, "step": 38834 }, { "epoch": 0.06885943798599087, "grad_norm": 0.310546875, "learning_rate": 0.0013828941007169948, "loss": 0.2253, "step": 38836 }, { "epoch": 0.06886298415130068, "grad_norm": 0.59765625, "learning_rate": 0.0013828375981619868, "loss": 0.2929, "step": 38838 }, { "epoch": 0.06886653031661051, "grad_norm": 0.30078125, "learning_rate": 0.001382781094370011, "loss": 0.1126, "step": 38840 }, { "epoch": 0.06887007648192033, "grad_norm": 0.51171875, "learning_rate": 0.0013827245893413154, "loss": 0.1649, "step": 38842 }, { "epoch": 0.06887362264723014, "grad_norm": 0.97265625, "learning_rate": 0.0013826680830761465, "loss": 0.2236, "step": 38844 }, { "epoch": 0.06887716881253995, "grad_norm": 0.453125, "learning_rate": 0.0013826115755747517, "loss": 0.1491, "step": 38846 }, { "epoch": 0.06888071497784977, "grad_norm": 0.46875, "learning_rate": 0.0013825550668373778, "loss": 0.1669, "step": 38848 }, { "epoch": 0.06888426114315958, "grad_norm": 0.396484375, "learning_rate": 0.0013824985568642726, "loss": 0.1996, "step": 38850 }, { "epoch": 0.0688878073084694, "grad_norm": 1.0390625, "learning_rate": 0.0013824420456556824, "loss": 0.2398, "step": 38852 }, { "epoch": 0.06889135347377921, "grad_norm": 0.400390625, "learning_rate": 0.0013823855332118547, "loss": 0.1732, "step": 38854 }, { "epoch": 0.06889489963908903, "grad_norm": 0.65234375, "learning_rate": 0.0013823290195330367, "loss": 0.2042, "step": 38856 }, { "epoch": 0.06889844580439884, "grad_norm": 0.25390625, "learning_rate": 0.001382272504619476, "loss": 0.197, "step": 38858 }, { "epoch": 0.06890199196970866, "grad_norm": 1.7109375, "learning_rate": 0.0013822159884714189, "loss": 0.2177, "step": 38860 }, { "epoch": 0.06890553813501847, "grad_norm": 0.359375, "learning_rate": 0.001382159471089113, "loss": 0.1863, "step": 38862 }, { "epoch": 0.06890908430032829, "grad_norm": 0.2578125, "learning_rate": 0.001382102952472806, "loss": 0.1719, "step": 38864 }, { "epoch": 0.0689126304656381, "grad_norm": 0.255859375, "learning_rate": 0.0013820464326227437, "loss": 0.1712, "step": 38866 }, { "epoch": 0.06891617663094791, "grad_norm": 0.470703125, "learning_rate": 0.001381989911539175, "loss": 0.4004, "step": 38868 }, { "epoch": 0.06891972279625773, "grad_norm": 0.259765625, "learning_rate": 0.0013819333892223457, "loss": 0.1878, "step": 38870 }, { "epoch": 0.06892326896156754, "grad_norm": 0.2041015625, "learning_rate": 0.0013818768656725035, "loss": 0.1938, "step": 38872 }, { "epoch": 0.06892681512687736, "grad_norm": 0.76171875, "learning_rate": 0.001381820340889896, "loss": 0.166, "step": 38874 }, { "epoch": 0.06893036129218717, "grad_norm": 0.9296875, "learning_rate": 0.0013817638148747695, "loss": 0.2672, "step": 38876 }, { "epoch": 0.06893390745749699, "grad_norm": 0.3359375, "learning_rate": 0.0013817072876273723, "loss": 0.1814, "step": 38878 }, { "epoch": 0.0689374536228068, "grad_norm": 0.216796875, "learning_rate": 0.0013816507591479508, "loss": 0.142, "step": 38880 }, { "epoch": 0.06894099978811662, "grad_norm": 0.318359375, "learning_rate": 0.0013815942294367525, "loss": 0.1413, "step": 38882 }, { "epoch": 0.06894454595342643, "grad_norm": 0.81640625, "learning_rate": 0.0013815376984940247, "loss": 0.2014, "step": 38884 }, { "epoch": 0.06894809211873626, "grad_norm": 0.365234375, "learning_rate": 0.0013814811663200141, "loss": 0.2333, "step": 38886 }, { "epoch": 0.06895163828404607, "grad_norm": 0.435546875, "learning_rate": 0.001381424632914969, "loss": 0.2058, "step": 38888 }, { "epoch": 0.06895518444935589, "grad_norm": 0.134765625, "learning_rate": 0.0013813680982791357, "loss": 0.1578, "step": 38890 }, { "epoch": 0.0689587306146657, "grad_norm": 0.451171875, "learning_rate": 0.0013813115624127618, "loss": 0.2366, "step": 38892 }, { "epoch": 0.06896227677997552, "grad_norm": 0.376953125, "learning_rate": 0.0013812550253160946, "loss": 0.1827, "step": 38894 }, { "epoch": 0.06896582294528533, "grad_norm": 1.9453125, "learning_rate": 0.0013811984869893813, "loss": 0.1987, "step": 38896 }, { "epoch": 0.06896936911059515, "grad_norm": 0.921875, "learning_rate": 0.001381141947432869, "loss": 0.2456, "step": 38898 }, { "epoch": 0.06897291527590496, "grad_norm": 0.5859375, "learning_rate": 0.0013810854066468052, "loss": 0.1645, "step": 38900 }, { "epoch": 0.06897646144121478, "grad_norm": 0.43359375, "learning_rate": 0.0013810288646314375, "loss": 0.1397, "step": 38902 }, { "epoch": 0.06898000760652459, "grad_norm": 0.37109375, "learning_rate": 0.0013809723213870125, "loss": 0.2136, "step": 38904 }, { "epoch": 0.0689835537718344, "grad_norm": 0.3046875, "learning_rate": 0.0013809157769137774, "loss": 0.1348, "step": 38906 }, { "epoch": 0.06898709993714422, "grad_norm": 2.0, "learning_rate": 0.0013808592312119802, "loss": 0.2627, "step": 38908 }, { "epoch": 0.06899064610245403, "grad_norm": 0.201171875, "learning_rate": 0.001380802684281868, "loss": 0.1499, "step": 38910 }, { "epoch": 0.06899419226776385, "grad_norm": 0.70703125, "learning_rate": 0.0013807461361236878, "loss": 0.1544, "step": 38912 }, { "epoch": 0.06899773843307366, "grad_norm": 0.6640625, "learning_rate": 0.0013806895867376867, "loss": 0.1776, "step": 38914 }, { "epoch": 0.06900128459838348, "grad_norm": 0.60546875, "learning_rate": 0.001380633036124113, "loss": 0.1805, "step": 38916 }, { "epoch": 0.06900483076369329, "grad_norm": 0.443359375, "learning_rate": 0.001380576484283213, "loss": 0.2728, "step": 38918 }, { "epoch": 0.0690083769290031, "grad_norm": 0.3515625, "learning_rate": 0.0013805199312152349, "loss": 0.1943, "step": 38920 }, { "epoch": 0.06901192309431292, "grad_norm": 0.91015625, "learning_rate": 0.0013804633769204254, "loss": 0.2868, "step": 38922 }, { "epoch": 0.06901546925962274, "grad_norm": 0.71875, "learning_rate": 0.001380406821399032, "loss": 0.1875, "step": 38924 }, { "epoch": 0.06901901542493255, "grad_norm": 0.42578125, "learning_rate": 0.0013803502646513015, "loss": 0.2583, "step": 38926 }, { "epoch": 0.06902256159024237, "grad_norm": 0.58984375, "learning_rate": 0.0013802937066774825, "loss": 0.2339, "step": 38928 }, { "epoch": 0.0690261077555522, "grad_norm": 0.796875, "learning_rate": 0.0013802371474778213, "loss": 0.1866, "step": 38930 }, { "epoch": 0.06902965392086201, "grad_norm": 0.244140625, "learning_rate": 0.0013801805870525656, "loss": 0.2355, "step": 38932 }, { "epoch": 0.06903320008617182, "grad_norm": 0.359375, "learning_rate": 0.0013801240254019626, "loss": 0.1801, "step": 38934 }, { "epoch": 0.06903674625148164, "grad_norm": 0.54296875, "learning_rate": 0.0013800674625262604, "loss": 0.2283, "step": 38936 }, { "epoch": 0.06904029241679145, "grad_norm": 0.1533203125, "learning_rate": 0.0013800108984257054, "loss": 0.1347, "step": 38938 }, { "epoch": 0.06904383858210127, "grad_norm": 0.65625, "learning_rate": 0.0013799543331005455, "loss": 0.1374, "step": 38940 }, { "epoch": 0.06904738474741108, "grad_norm": 0.30078125, "learning_rate": 0.0013798977665510277, "loss": 0.131, "step": 38942 }, { "epoch": 0.0690509309127209, "grad_norm": 3.375, "learning_rate": 0.0013798411987774003, "loss": 0.2717, "step": 38944 }, { "epoch": 0.06905447707803071, "grad_norm": 0.30078125, "learning_rate": 0.0013797846297799092, "loss": 0.1871, "step": 38946 }, { "epoch": 0.06905802324334052, "grad_norm": 0.267578125, "learning_rate": 0.001379728059558803, "loss": 0.1699, "step": 38948 }, { "epoch": 0.06906156940865034, "grad_norm": 0.546875, "learning_rate": 0.0013796714881143288, "loss": 0.2035, "step": 38950 }, { "epoch": 0.06906511557396015, "grad_norm": 0.640625, "learning_rate": 0.001379614915446734, "loss": 0.2158, "step": 38952 }, { "epoch": 0.06906866173926997, "grad_norm": 1.046875, "learning_rate": 0.0013795583415562658, "loss": 0.2759, "step": 38954 }, { "epoch": 0.06907220790457978, "grad_norm": 0.419921875, "learning_rate": 0.0013795017664431722, "loss": 0.1518, "step": 38956 }, { "epoch": 0.0690757540698896, "grad_norm": 2.21875, "learning_rate": 0.0013794451901077, "loss": 0.2249, "step": 38958 }, { "epoch": 0.06907930023519941, "grad_norm": 0.373046875, "learning_rate": 0.0013793886125500966, "loss": 0.1615, "step": 38960 }, { "epoch": 0.06908284640050923, "grad_norm": 0.4765625, "learning_rate": 0.00137933203377061, "loss": 0.1975, "step": 38962 }, { "epoch": 0.06908639256581904, "grad_norm": 0.56640625, "learning_rate": 0.0013792754537694872, "loss": 0.2532, "step": 38964 }, { "epoch": 0.06908993873112886, "grad_norm": 1.75, "learning_rate": 0.0013792188725469758, "loss": 0.2386, "step": 38966 }, { "epoch": 0.06909348489643867, "grad_norm": 0.328125, "learning_rate": 0.0013791622901033235, "loss": 0.158, "step": 38968 }, { "epoch": 0.06909703106174848, "grad_norm": 1.65625, "learning_rate": 0.0013791057064387773, "loss": 0.2005, "step": 38970 }, { "epoch": 0.0691005772270583, "grad_norm": 2.328125, "learning_rate": 0.0013790491215535847, "loss": 0.2941, "step": 38972 }, { "epoch": 0.06910412339236811, "grad_norm": 0.400390625, "learning_rate": 0.0013789925354479936, "loss": 0.2034, "step": 38974 }, { "epoch": 0.06910766955767794, "grad_norm": 0.38671875, "learning_rate": 0.0013789359481222513, "loss": 0.1746, "step": 38976 }, { "epoch": 0.06911121572298776, "grad_norm": 0.33984375, "learning_rate": 0.001378879359576605, "loss": 0.2583, "step": 38978 }, { "epoch": 0.06911476188829757, "grad_norm": 0.66015625, "learning_rate": 0.0013788227698113022, "loss": 0.1874, "step": 38980 }, { "epoch": 0.06911830805360739, "grad_norm": 0.51171875, "learning_rate": 0.001378766178826591, "loss": 0.2246, "step": 38982 }, { "epoch": 0.0691218542189172, "grad_norm": 0.44921875, "learning_rate": 0.0013787095866227177, "loss": 0.1803, "step": 38984 }, { "epoch": 0.06912540038422701, "grad_norm": 0.7109375, "learning_rate": 0.0013786529931999311, "loss": 0.1907, "step": 38986 }, { "epoch": 0.06912894654953683, "grad_norm": 1.1171875, "learning_rate": 0.0013785963985584783, "loss": 0.2449, "step": 38988 }, { "epoch": 0.06913249271484664, "grad_norm": 0.345703125, "learning_rate": 0.0013785398026986064, "loss": 0.173, "step": 38990 }, { "epoch": 0.06913603888015646, "grad_norm": 0.453125, "learning_rate": 0.0013784832056205631, "loss": 0.192, "step": 38992 }, { "epoch": 0.06913958504546627, "grad_norm": 1.0234375, "learning_rate": 0.0013784266073245965, "loss": 0.2923, "step": 38994 }, { "epoch": 0.06914313121077609, "grad_norm": 0.2470703125, "learning_rate": 0.0013783700078109533, "loss": 0.1412, "step": 38996 }, { "epoch": 0.0691466773760859, "grad_norm": 0.357421875, "learning_rate": 0.0013783134070798815, "loss": 0.2173, "step": 38998 }, { "epoch": 0.06915022354139572, "grad_norm": 0.84375, "learning_rate": 0.0013782568051316287, "loss": 0.2496, "step": 39000 }, { "epoch": 0.06915376970670553, "grad_norm": 0.388671875, "learning_rate": 0.0013782002019664416, "loss": 0.1667, "step": 39002 }, { "epoch": 0.06915731587201535, "grad_norm": 1.875, "learning_rate": 0.0013781435975845691, "loss": 0.392, "step": 39004 }, { "epoch": 0.06916086203732516, "grad_norm": 0.93359375, "learning_rate": 0.0013780869919862578, "loss": 0.205, "step": 39006 }, { "epoch": 0.06916440820263497, "grad_norm": 0.38671875, "learning_rate": 0.0013780303851717555, "loss": 0.2089, "step": 39008 }, { "epoch": 0.06916795436794479, "grad_norm": 2.625, "learning_rate": 0.0013779737771413099, "loss": 0.2644, "step": 39010 }, { "epoch": 0.0691715005332546, "grad_norm": 4.71875, "learning_rate": 0.0013779171678951682, "loss": 0.2357, "step": 39012 }, { "epoch": 0.06917504669856442, "grad_norm": 0.2392578125, "learning_rate": 0.0013778605574335787, "loss": 0.1713, "step": 39014 }, { "epoch": 0.06917859286387423, "grad_norm": 0.421875, "learning_rate": 0.0013778039457567883, "loss": 0.1755, "step": 39016 }, { "epoch": 0.06918213902918405, "grad_norm": 0.765625, "learning_rate": 0.0013777473328650447, "loss": 0.2225, "step": 39018 }, { "epoch": 0.06918568519449386, "grad_norm": 0.55078125, "learning_rate": 0.0013776907187585956, "loss": 0.229, "step": 39020 }, { "epoch": 0.06918923135980369, "grad_norm": 0.353515625, "learning_rate": 0.0013776341034376887, "loss": 0.2176, "step": 39022 }, { "epoch": 0.0691927775251135, "grad_norm": 0.486328125, "learning_rate": 0.0013775774869025715, "loss": 0.2182, "step": 39024 }, { "epoch": 0.06919632369042332, "grad_norm": 0.255859375, "learning_rate": 0.0013775208691534915, "loss": 0.1483, "step": 39026 }, { "epoch": 0.06919986985573313, "grad_norm": 0.30078125, "learning_rate": 0.0013774642501906966, "loss": 0.2045, "step": 39028 }, { "epoch": 0.06920341602104295, "grad_norm": 0.298828125, "learning_rate": 0.0013774076300144343, "loss": 0.2157, "step": 39030 }, { "epoch": 0.06920696218635276, "grad_norm": 0.2734375, "learning_rate": 0.001377351008624952, "loss": 0.2509, "step": 39032 }, { "epoch": 0.06921050835166258, "grad_norm": 0.95703125, "learning_rate": 0.0013772943860224974, "loss": 0.218, "step": 39034 }, { "epoch": 0.06921405451697239, "grad_norm": 0.302734375, "learning_rate": 0.0013772377622073184, "loss": 0.1836, "step": 39036 }, { "epoch": 0.06921760068228221, "grad_norm": 0.734375, "learning_rate": 0.001377181137179662, "loss": 0.2329, "step": 39038 }, { "epoch": 0.06922114684759202, "grad_norm": 0.95703125, "learning_rate": 0.001377124510939777, "loss": 0.2342, "step": 39040 }, { "epoch": 0.06922469301290184, "grad_norm": 6.125, "learning_rate": 0.00137706788348791, "loss": 0.3604, "step": 39042 }, { "epoch": 0.06922823917821165, "grad_norm": 0.458984375, "learning_rate": 0.001377011254824309, "loss": 0.2617, "step": 39044 }, { "epoch": 0.06923178534352147, "grad_norm": 0.96875, "learning_rate": 0.0013769546249492218, "loss": 0.1644, "step": 39046 }, { "epoch": 0.06923533150883128, "grad_norm": 0.7265625, "learning_rate": 0.001376897993862896, "loss": 0.2211, "step": 39048 }, { "epoch": 0.0692388776741411, "grad_norm": 0.1953125, "learning_rate": 0.0013768413615655793, "loss": 0.1609, "step": 39050 }, { "epoch": 0.06924242383945091, "grad_norm": 0.34765625, "learning_rate": 0.0013767847280575193, "loss": 0.3056, "step": 39052 }, { "epoch": 0.06924597000476072, "grad_norm": 2.234375, "learning_rate": 0.0013767280933389635, "loss": 0.2036, "step": 39054 }, { "epoch": 0.06924951617007054, "grad_norm": 0.345703125, "learning_rate": 0.0013766714574101595, "loss": 0.1643, "step": 39056 }, { "epoch": 0.06925306233538035, "grad_norm": 0.40625, "learning_rate": 0.0013766148202713556, "loss": 0.2271, "step": 39058 }, { "epoch": 0.06925660850069017, "grad_norm": 0.263671875, "learning_rate": 0.001376558181922799, "loss": 0.2597, "step": 39060 }, { "epoch": 0.06926015466599998, "grad_norm": 0.6328125, "learning_rate": 0.0013765015423647376, "loss": 0.2582, "step": 39062 }, { "epoch": 0.0692637008313098, "grad_norm": 0.498046875, "learning_rate": 0.0013764449015974192, "loss": 0.154, "step": 39064 }, { "epoch": 0.06926724699661962, "grad_norm": 0.64453125, "learning_rate": 0.0013763882596210912, "loss": 0.1708, "step": 39066 }, { "epoch": 0.06927079316192944, "grad_norm": 0.322265625, "learning_rate": 0.0013763316164360016, "loss": 0.206, "step": 39068 }, { "epoch": 0.06927433932723925, "grad_norm": 1.171875, "learning_rate": 0.001376274972042398, "loss": 0.1629, "step": 39070 }, { "epoch": 0.06927788549254907, "grad_norm": 0.1826171875, "learning_rate": 0.001376218326440528, "loss": 0.128, "step": 39072 }, { "epoch": 0.06928143165785888, "grad_norm": 3.4375, "learning_rate": 0.0013761616796306396, "loss": 0.2082, "step": 39074 }, { "epoch": 0.0692849778231687, "grad_norm": 0.275390625, "learning_rate": 0.0013761050316129805, "loss": 0.2223, "step": 39076 }, { "epoch": 0.06928852398847851, "grad_norm": 0.326171875, "learning_rate": 0.0013760483823877982, "loss": 0.1573, "step": 39078 }, { "epoch": 0.06929207015378833, "grad_norm": 0.6328125, "learning_rate": 0.001375991731955341, "loss": 0.2425, "step": 39080 }, { "epoch": 0.06929561631909814, "grad_norm": 0.5390625, "learning_rate": 0.001375935080315856, "loss": 0.2031, "step": 39082 }, { "epoch": 0.06929916248440796, "grad_norm": 1.1171875, "learning_rate": 0.0013758784274695913, "loss": 0.345, "step": 39084 }, { "epoch": 0.06930270864971777, "grad_norm": 0.279296875, "learning_rate": 0.0013758217734167943, "loss": 0.2171, "step": 39086 }, { "epoch": 0.06930625481502758, "grad_norm": 0.298828125, "learning_rate": 0.0013757651181577138, "loss": 0.1624, "step": 39088 }, { "epoch": 0.0693098009803374, "grad_norm": 0.5546875, "learning_rate": 0.0013757084616925963, "loss": 0.1815, "step": 39090 }, { "epoch": 0.06931334714564721, "grad_norm": 0.361328125, "learning_rate": 0.0013756518040216902, "loss": 0.2404, "step": 39092 }, { "epoch": 0.06931689331095703, "grad_norm": 0.7734375, "learning_rate": 0.0013755951451452433, "loss": 0.2177, "step": 39094 }, { "epoch": 0.06932043947626684, "grad_norm": 0.6953125, "learning_rate": 0.0013755384850635034, "loss": 0.1606, "step": 39096 }, { "epoch": 0.06932398564157666, "grad_norm": 0.380859375, "learning_rate": 0.0013754818237767179, "loss": 0.2213, "step": 39098 }, { "epoch": 0.06932753180688647, "grad_norm": 0.388671875, "learning_rate": 0.0013754251612851352, "loss": 0.2233, "step": 39100 }, { "epoch": 0.06933107797219629, "grad_norm": 0.275390625, "learning_rate": 0.0013753684975890026, "loss": 0.1577, "step": 39102 }, { "epoch": 0.0693346241375061, "grad_norm": 0.1982421875, "learning_rate": 0.0013753118326885684, "loss": 0.2061, "step": 39104 }, { "epoch": 0.06933817030281592, "grad_norm": 0.7265625, "learning_rate": 0.0013752551665840804, "loss": 0.156, "step": 39106 }, { "epoch": 0.06934171646812573, "grad_norm": 0.5390625, "learning_rate": 0.0013751984992757859, "loss": 0.2068, "step": 39108 }, { "epoch": 0.06934526263343554, "grad_norm": 0.419921875, "learning_rate": 0.0013751418307639328, "loss": 0.1798, "step": 39110 }, { "epoch": 0.06934880879874537, "grad_norm": 0.41015625, "learning_rate": 0.0013750851610487697, "loss": 0.1749, "step": 39112 }, { "epoch": 0.06935235496405519, "grad_norm": 0.390625, "learning_rate": 0.0013750284901305433, "loss": 0.1805, "step": 39114 }, { "epoch": 0.069355901129365, "grad_norm": 0.2373046875, "learning_rate": 0.001374971818009502, "loss": 0.1463, "step": 39116 }, { "epoch": 0.06935944729467482, "grad_norm": 1.515625, "learning_rate": 0.0013749151446858941, "loss": 0.2651, "step": 39118 }, { "epoch": 0.06936299345998463, "grad_norm": 1.578125, "learning_rate": 0.0013748584701599668, "loss": 0.2021, "step": 39120 }, { "epoch": 0.06936653962529445, "grad_norm": 0.61328125, "learning_rate": 0.0013748017944319685, "loss": 0.2095, "step": 39122 }, { "epoch": 0.06937008579060426, "grad_norm": 0.392578125, "learning_rate": 0.0013747451175021466, "loss": 0.1892, "step": 39124 }, { "epoch": 0.06937363195591408, "grad_norm": 0.55859375, "learning_rate": 0.0013746884393707492, "loss": 0.2083, "step": 39126 }, { "epoch": 0.06937717812122389, "grad_norm": 0.59375, "learning_rate": 0.0013746317600380241, "loss": 0.2751, "step": 39128 }, { "epoch": 0.0693807242865337, "grad_norm": 0.328125, "learning_rate": 0.001374575079504219, "loss": 0.161, "step": 39130 }, { "epoch": 0.06938427045184352, "grad_norm": 0.427734375, "learning_rate": 0.0013745183977695822, "loss": 0.1875, "step": 39132 }, { "epoch": 0.06938781661715333, "grad_norm": 2.875, "learning_rate": 0.0013744617148343614, "loss": 0.3237, "step": 39134 }, { "epoch": 0.06939136278246315, "grad_norm": 1.8671875, "learning_rate": 0.0013744050306988044, "loss": 0.2617, "step": 39136 }, { "epoch": 0.06939490894777296, "grad_norm": 0.271484375, "learning_rate": 0.0013743483453631594, "loss": 0.1934, "step": 39138 }, { "epoch": 0.06939845511308278, "grad_norm": 1.3515625, "learning_rate": 0.0013742916588276738, "loss": 0.1688, "step": 39140 }, { "epoch": 0.06940200127839259, "grad_norm": 0.58203125, "learning_rate": 0.0013742349710925959, "loss": 0.1909, "step": 39142 }, { "epoch": 0.0694055474437024, "grad_norm": 0.2734375, "learning_rate": 0.0013741782821581735, "loss": 0.2134, "step": 39144 }, { "epoch": 0.06940909360901222, "grad_norm": 0.2431640625, "learning_rate": 0.0013741215920246548, "loss": 0.2087, "step": 39146 }, { "epoch": 0.06941263977432204, "grad_norm": 0.1748046875, "learning_rate": 0.0013740649006922872, "loss": 0.1546, "step": 39148 }, { "epoch": 0.06941618593963185, "grad_norm": 0.67578125, "learning_rate": 0.0013740082081613193, "loss": 0.1477, "step": 39150 }, { "epoch": 0.06941973210494166, "grad_norm": 0.5625, "learning_rate": 0.001373951514431998, "loss": 0.314, "step": 39152 }, { "epoch": 0.06942327827025148, "grad_norm": 0.5, "learning_rate": 0.0013738948195045723, "loss": 0.3834, "step": 39154 }, { "epoch": 0.0694268244355613, "grad_norm": 1.8671875, "learning_rate": 0.00137383812337929, "loss": 0.2823, "step": 39156 }, { "epoch": 0.06943037060087112, "grad_norm": 0.51171875, "learning_rate": 0.0013737814260563984, "loss": 0.1531, "step": 39158 }, { "epoch": 0.06943391676618094, "grad_norm": 0.87890625, "learning_rate": 0.001373724727536146, "loss": 0.2378, "step": 39160 }, { "epoch": 0.06943746293149075, "grad_norm": 2.015625, "learning_rate": 0.0013736680278187807, "loss": 0.2065, "step": 39162 }, { "epoch": 0.06944100909680057, "grad_norm": 0.609375, "learning_rate": 0.0013736113269045506, "loss": 0.1978, "step": 39164 }, { "epoch": 0.06944455526211038, "grad_norm": 2.828125, "learning_rate": 0.0013735546247937035, "loss": 0.2105, "step": 39166 }, { "epoch": 0.0694481014274202, "grad_norm": 0.34375, "learning_rate": 0.001373497921486487, "loss": 0.1961, "step": 39168 }, { "epoch": 0.06945164759273001, "grad_norm": 0.94921875, "learning_rate": 0.0013734412169831493, "loss": 0.1592, "step": 39170 }, { "epoch": 0.06945519375803982, "grad_norm": 0.48828125, "learning_rate": 0.0013733845112839389, "loss": 0.1611, "step": 39172 }, { "epoch": 0.06945873992334964, "grad_norm": 0.9609375, "learning_rate": 0.0013733278043891036, "loss": 0.2299, "step": 39174 }, { "epoch": 0.06946228608865945, "grad_norm": 0.447265625, "learning_rate": 0.001373271096298891, "loss": 0.188, "step": 39176 }, { "epoch": 0.06946583225396927, "grad_norm": 0.4375, "learning_rate": 0.0013732143870135495, "loss": 0.2266, "step": 39178 }, { "epoch": 0.06946937841927908, "grad_norm": 0.41796875, "learning_rate": 0.0013731576765333266, "loss": 0.1917, "step": 39180 }, { "epoch": 0.0694729245845889, "grad_norm": 0.65625, "learning_rate": 0.0013731009648584713, "loss": 0.1405, "step": 39182 }, { "epoch": 0.06947647074989871, "grad_norm": 0.40625, "learning_rate": 0.0013730442519892308, "loss": 0.2033, "step": 39184 }, { "epoch": 0.06948001691520853, "grad_norm": 0.4765625, "learning_rate": 0.0013729875379258534, "loss": 0.1868, "step": 39186 }, { "epoch": 0.06948356308051834, "grad_norm": 0.4609375, "learning_rate": 0.0013729308226685868, "loss": 0.1652, "step": 39188 }, { "epoch": 0.06948710924582815, "grad_norm": 0.34375, "learning_rate": 0.0013728741062176792, "loss": 0.171, "step": 39190 }, { "epoch": 0.06949065541113797, "grad_norm": 0.55859375, "learning_rate": 0.001372817388573379, "loss": 0.1863, "step": 39192 }, { "epoch": 0.06949420157644778, "grad_norm": 0.640625, "learning_rate": 0.001372760669735934, "loss": 0.1805, "step": 39194 }, { "epoch": 0.0694977477417576, "grad_norm": 0.3984375, "learning_rate": 0.0013727039497055925, "loss": 0.1568, "step": 39196 }, { "epoch": 0.06950129390706741, "grad_norm": 0.421875, "learning_rate": 0.0013726472284826023, "loss": 0.1686, "step": 39198 }, { "epoch": 0.06950484007237723, "grad_norm": 0.39453125, "learning_rate": 0.001372590506067211, "loss": 0.1999, "step": 39200 }, { "epoch": 0.06950838623768706, "grad_norm": 0.357421875, "learning_rate": 0.0013725337824596676, "loss": 0.287, "step": 39202 }, { "epoch": 0.06951193240299687, "grad_norm": 0.61328125, "learning_rate": 0.0013724770576602195, "loss": 0.1853, "step": 39204 }, { "epoch": 0.06951547856830669, "grad_norm": 3.265625, "learning_rate": 0.001372420331669115, "loss": 0.3077, "step": 39206 }, { "epoch": 0.0695190247336165, "grad_norm": 0.3828125, "learning_rate": 0.0013723636044866026, "loss": 0.1991, "step": 39208 }, { "epoch": 0.06952257089892631, "grad_norm": 0.54296875, "learning_rate": 0.0013723068761129296, "loss": 0.2168, "step": 39210 }, { "epoch": 0.06952611706423613, "grad_norm": 0.50390625, "learning_rate": 0.0013722501465483446, "loss": 0.1849, "step": 39212 }, { "epoch": 0.06952966322954594, "grad_norm": 4.21875, "learning_rate": 0.0013721934157930956, "loss": 0.319, "step": 39214 }, { "epoch": 0.06953320939485576, "grad_norm": 0.37109375, "learning_rate": 0.0013721366838474309, "loss": 0.1526, "step": 39216 }, { "epoch": 0.06953675556016557, "grad_norm": 0.31640625, "learning_rate": 0.0013720799507115984, "loss": 0.1966, "step": 39218 }, { "epoch": 0.06954030172547539, "grad_norm": 0.7734375, "learning_rate": 0.001372023216385846, "loss": 0.1945, "step": 39220 }, { "epoch": 0.0695438478907852, "grad_norm": 0.2412109375, "learning_rate": 0.0013719664808704221, "loss": 0.1684, "step": 39222 }, { "epoch": 0.06954739405609502, "grad_norm": 0.169921875, "learning_rate": 0.0013719097441655752, "loss": 0.2218, "step": 39224 }, { "epoch": 0.06955094022140483, "grad_norm": 0.7734375, "learning_rate": 0.0013718530062715524, "loss": 0.2157, "step": 39226 }, { "epoch": 0.06955448638671465, "grad_norm": 0.470703125, "learning_rate": 0.0013717962671886028, "loss": 0.1772, "step": 39228 }, { "epoch": 0.06955803255202446, "grad_norm": 0.828125, "learning_rate": 0.001371739526916974, "loss": 0.312, "step": 39230 }, { "epoch": 0.06956157871733427, "grad_norm": 0.57421875, "learning_rate": 0.0013716827854569147, "loss": 0.256, "step": 39232 }, { "epoch": 0.06956512488264409, "grad_norm": 0.5625, "learning_rate": 0.0013716260428086723, "loss": 0.2076, "step": 39234 }, { "epoch": 0.0695686710479539, "grad_norm": 0.828125, "learning_rate": 0.0013715692989724956, "loss": 0.2129, "step": 39236 }, { "epoch": 0.06957221721326372, "grad_norm": 1.2421875, "learning_rate": 0.001371512553948633, "loss": 0.2132, "step": 39238 }, { "epoch": 0.06957576337857353, "grad_norm": 0.3671875, "learning_rate": 0.0013714558077373316, "loss": 0.177, "step": 39240 }, { "epoch": 0.06957930954388335, "grad_norm": 0.9296875, "learning_rate": 0.0013713990603388402, "loss": 0.3473, "step": 39242 }, { "epoch": 0.06958285570919316, "grad_norm": 2.328125, "learning_rate": 0.0013713423117534072, "loss": 0.2037, "step": 39244 }, { "epoch": 0.06958640187450298, "grad_norm": 0.51953125, "learning_rate": 0.0013712855619812804, "loss": 0.1548, "step": 39246 }, { "epoch": 0.0695899480398128, "grad_norm": 0.455078125, "learning_rate": 0.001371228811022708, "loss": 0.1804, "step": 39248 }, { "epoch": 0.06959349420512262, "grad_norm": 0.1533203125, "learning_rate": 0.0013711720588779385, "loss": 0.125, "step": 39250 }, { "epoch": 0.06959704037043243, "grad_norm": 0.35546875, "learning_rate": 0.00137111530554722, "loss": 0.2229, "step": 39252 }, { "epoch": 0.06960058653574225, "grad_norm": 0.81640625, "learning_rate": 0.0013710585510308005, "loss": 0.191, "step": 39254 }, { "epoch": 0.06960413270105206, "grad_norm": 0.2890625, "learning_rate": 0.0013710017953289285, "loss": 0.2911, "step": 39256 }, { "epoch": 0.06960767886636188, "grad_norm": 0.1865234375, "learning_rate": 0.0013709450384418518, "loss": 0.3502, "step": 39258 }, { "epoch": 0.06961122503167169, "grad_norm": 0.283203125, "learning_rate": 0.001370888280369819, "loss": 0.2258, "step": 39260 }, { "epoch": 0.0696147711969815, "grad_norm": 1.390625, "learning_rate": 0.0013708315211130781, "loss": 0.2306, "step": 39262 }, { "epoch": 0.06961831736229132, "grad_norm": 0.5078125, "learning_rate": 0.0013707747606718779, "loss": 0.2524, "step": 39264 }, { "epoch": 0.06962186352760114, "grad_norm": 5.40625, "learning_rate": 0.0013707179990464658, "loss": 0.2523, "step": 39266 }, { "epoch": 0.06962540969291095, "grad_norm": 0.7421875, "learning_rate": 0.0013706612362370905, "loss": 0.2019, "step": 39268 }, { "epoch": 0.06962895585822076, "grad_norm": 0.70703125, "learning_rate": 0.001370604472244, "loss": 0.2609, "step": 39270 }, { "epoch": 0.06963250202353058, "grad_norm": 0.439453125, "learning_rate": 0.001370547707067443, "loss": 0.1699, "step": 39272 }, { "epoch": 0.0696360481888404, "grad_norm": 0.232421875, "learning_rate": 0.0013704909407076674, "loss": 0.216, "step": 39274 }, { "epoch": 0.06963959435415021, "grad_norm": 0.59765625, "learning_rate": 0.0013704341731649214, "loss": 0.1723, "step": 39276 }, { "epoch": 0.06964314051946002, "grad_norm": 0.458984375, "learning_rate": 0.0013703774044394536, "loss": 0.2022, "step": 39278 }, { "epoch": 0.06964668668476984, "grad_norm": 0.322265625, "learning_rate": 0.001370320634531512, "loss": 0.1496, "step": 39280 }, { "epoch": 0.06965023285007965, "grad_norm": 0.6875, "learning_rate": 0.001370263863441345, "loss": 0.1989, "step": 39282 }, { "epoch": 0.06965377901538947, "grad_norm": 0.318359375, "learning_rate": 0.0013702070911692007, "loss": 0.2432, "step": 39284 }, { "epoch": 0.06965732518069928, "grad_norm": 0.6640625, "learning_rate": 0.0013701503177153277, "loss": 0.1603, "step": 39286 }, { "epoch": 0.0696608713460091, "grad_norm": 1.1171875, "learning_rate": 0.0013700935430799742, "loss": 0.2809, "step": 39288 }, { "epoch": 0.06966441751131891, "grad_norm": 1.5234375, "learning_rate": 0.0013700367672633883, "loss": 0.2048, "step": 39290 }, { "epoch": 0.06966796367662872, "grad_norm": 0.53515625, "learning_rate": 0.0013699799902658185, "loss": 0.1746, "step": 39292 }, { "epoch": 0.06967150984193855, "grad_norm": 0.35546875, "learning_rate": 0.0013699232120875132, "loss": 0.2209, "step": 39294 }, { "epoch": 0.06967505600724837, "grad_norm": 13.375, "learning_rate": 0.0013698664327287202, "loss": 0.32, "step": 39296 }, { "epoch": 0.06967860217255818, "grad_norm": 0.9921875, "learning_rate": 0.0013698096521896886, "loss": 0.2308, "step": 39298 }, { "epoch": 0.069682148337868, "grad_norm": 0.515625, "learning_rate": 0.001369752870470666, "loss": 0.2316, "step": 39300 }, { "epoch": 0.06968569450317781, "grad_norm": 0.67578125, "learning_rate": 0.0013696960875719008, "loss": 0.1946, "step": 39302 }, { "epoch": 0.06968924066848763, "grad_norm": 0.71484375, "learning_rate": 0.0013696393034936419, "loss": 0.6397, "step": 39304 }, { "epoch": 0.06969278683379744, "grad_norm": 0.90234375, "learning_rate": 0.0013695825182361372, "loss": 0.1963, "step": 39306 }, { "epoch": 0.06969633299910725, "grad_norm": 0.51171875, "learning_rate": 0.0013695257317996353, "loss": 0.1907, "step": 39308 }, { "epoch": 0.06969987916441707, "grad_norm": 0.2734375, "learning_rate": 0.0013694689441843843, "loss": 0.2383, "step": 39310 }, { "epoch": 0.06970342532972688, "grad_norm": 0.65625, "learning_rate": 0.0013694121553906326, "loss": 0.1667, "step": 39312 }, { "epoch": 0.0697069714950367, "grad_norm": 0.2431640625, "learning_rate": 0.001369355365418629, "loss": 0.1556, "step": 39314 }, { "epoch": 0.06971051766034651, "grad_norm": 0.8984375, "learning_rate": 0.001369298574268621, "loss": 0.2431, "step": 39316 }, { "epoch": 0.06971406382565633, "grad_norm": 0.6015625, "learning_rate": 0.001369241781940858, "loss": 0.1601, "step": 39318 }, { "epoch": 0.06971760999096614, "grad_norm": 0.306640625, "learning_rate": 0.0013691849884355876, "loss": 0.196, "step": 39320 }, { "epoch": 0.06972115615627596, "grad_norm": 0.671875, "learning_rate": 0.0013691281937530581, "loss": 0.1584, "step": 39322 }, { "epoch": 0.06972470232158577, "grad_norm": 1.4921875, "learning_rate": 0.0013690713978935186, "loss": 0.2903, "step": 39324 }, { "epoch": 0.06972824848689559, "grad_norm": 0.53515625, "learning_rate": 0.001369014600857217, "loss": 0.2248, "step": 39326 }, { "epoch": 0.0697317946522054, "grad_norm": 0.48828125, "learning_rate": 0.0013689578026444015, "loss": 0.2133, "step": 39328 }, { "epoch": 0.06973534081751522, "grad_norm": 0.609375, "learning_rate": 0.0013689010032553212, "loss": 0.2924, "step": 39330 }, { "epoch": 0.06973888698282503, "grad_norm": 0.310546875, "learning_rate": 0.0013688442026902241, "loss": 0.1257, "step": 39332 }, { "epoch": 0.06974243314813484, "grad_norm": 0.640625, "learning_rate": 0.0013687874009493583, "loss": 0.209, "step": 39334 }, { "epoch": 0.06974597931344466, "grad_norm": 0.24609375, "learning_rate": 0.001368730598032973, "loss": 0.1893, "step": 39336 }, { "epoch": 0.06974952547875449, "grad_norm": 0.3984375, "learning_rate": 0.0013686737939413158, "loss": 0.1759, "step": 39338 }, { "epoch": 0.0697530716440643, "grad_norm": 0.60546875, "learning_rate": 0.0013686169886746354, "loss": 0.2258, "step": 39340 }, { "epoch": 0.06975661780937412, "grad_norm": 0.97265625, "learning_rate": 0.0013685601822331806, "loss": 0.1558, "step": 39342 }, { "epoch": 0.06976016397468393, "grad_norm": 0.6796875, "learning_rate": 0.0013685033746171993, "loss": 0.155, "step": 39344 }, { "epoch": 0.06976371013999375, "grad_norm": 0.41796875, "learning_rate": 0.0013684465658269406, "loss": 0.4398, "step": 39346 }, { "epoch": 0.06976725630530356, "grad_norm": 0.30078125, "learning_rate": 0.0013683897558626522, "loss": 0.1279, "step": 39348 }, { "epoch": 0.06977080247061337, "grad_norm": 0.80859375, "learning_rate": 0.0013683329447245832, "loss": 0.219, "step": 39350 }, { "epoch": 0.06977434863592319, "grad_norm": 0.4375, "learning_rate": 0.0013682761324129815, "loss": 0.2159, "step": 39352 }, { "epoch": 0.069777894801233, "grad_norm": 0.453125, "learning_rate": 0.0013682193189280961, "loss": 0.1797, "step": 39354 }, { "epoch": 0.06978144096654282, "grad_norm": 1.203125, "learning_rate": 0.001368162504270175, "loss": 0.2075, "step": 39356 }, { "epoch": 0.06978498713185263, "grad_norm": 0.953125, "learning_rate": 0.001368105688439467, "loss": 0.4457, "step": 39358 }, { "epoch": 0.06978853329716245, "grad_norm": 0.34375, "learning_rate": 0.00136804887143622, "loss": 0.3058, "step": 39360 }, { "epoch": 0.06979207946247226, "grad_norm": 0.5625, "learning_rate": 0.0013679920532606834, "loss": 0.2225, "step": 39362 }, { "epoch": 0.06979562562778208, "grad_norm": 0.30078125, "learning_rate": 0.001367935233913105, "loss": 0.2052, "step": 39364 }, { "epoch": 0.06979917179309189, "grad_norm": 0.2236328125, "learning_rate": 0.0013678784133937338, "loss": 0.2366, "step": 39366 }, { "epoch": 0.0698027179584017, "grad_norm": 0.240234375, "learning_rate": 0.0013678215917028178, "loss": 0.2147, "step": 39368 }, { "epoch": 0.06980626412371152, "grad_norm": 0.296875, "learning_rate": 0.0013677647688406058, "loss": 0.2027, "step": 39370 }, { "epoch": 0.06980981028902133, "grad_norm": 0.57421875, "learning_rate": 0.0013677079448073464, "loss": 0.1955, "step": 39372 }, { "epoch": 0.06981335645433115, "grad_norm": 0.90625, "learning_rate": 0.0013676511196032875, "loss": 0.2022, "step": 39374 }, { "epoch": 0.06981690261964096, "grad_norm": 0.380859375, "learning_rate": 0.0013675942932286785, "loss": 0.174, "step": 39376 }, { "epoch": 0.06982044878495078, "grad_norm": 0.93359375, "learning_rate": 0.0013675374656837674, "loss": 0.33, "step": 39378 }, { "epoch": 0.06982399495026059, "grad_norm": 0.73046875, "learning_rate": 0.0013674806369688027, "loss": 0.1911, "step": 39380 }, { "epoch": 0.06982754111557041, "grad_norm": 0.76171875, "learning_rate": 0.001367423807084033, "loss": 0.2315, "step": 39382 }, { "epoch": 0.06983108728088024, "grad_norm": 0.36328125, "learning_rate": 0.001367366976029707, "loss": 0.1508, "step": 39384 }, { "epoch": 0.06983463344619005, "grad_norm": 0.42578125, "learning_rate": 0.0013673101438060732, "loss": 0.2059, "step": 39386 }, { "epoch": 0.06983817961149986, "grad_norm": 1.0, "learning_rate": 0.0013672533104133805, "loss": 0.1699, "step": 39388 }, { "epoch": 0.06984172577680968, "grad_norm": 0.484375, "learning_rate": 0.0013671964758518766, "loss": 0.2582, "step": 39390 }, { "epoch": 0.0698452719421195, "grad_norm": 0.9375, "learning_rate": 0.0013671396401218104, "loss": 0.2007, "step": 39392 }, { "epoch": 0.06984881810742931, "grad_norm": 0.47265625, "learning_rate": 0.0013670828032234306, "loss": 0.3647, "step": 39394 }, { "epoch": 0.06985236427273912, "grad_norm": 0.427734375, "learning_rate": 0.001367025965156986, "loss": 0.1801, "step": 39396 }, { "epoch": 0.06985591043804894, "grad_norm": 0.52734375, "learning_rate": 0.0013669691259227247, "loss": 0.2043, "step": 39398 }, { "epoch": 0.06985945660335875, "grad_norm": 1.40625, "learning_rate": 0.001366912285520896, "loss": 0.2448, "step": 39400 }, { "epoch": 0.06986300276866857, "grad_norm": 0.2353515625, "learning_rate": 0.0013668554439517475, "loss": 0.2215, "step": 39402 }, { "epoch": 0.06986654893397838, "grad_norm": 0.859375, "learning_rate": 0.0013667986012155288, "loss": 0.2539, "step": 39404 }, { "epoch": 0.0698700950992882, "grad_norm": 0.9453125, "learning_rate": 0.0013667417573124876, "loss": 0.2454, "step": 39406 }, { "epoch": 0.06987364126459801, "grad_norm": 0.2294921875, "learning_rate": 0.001366684912242873, "loss": 0.1922, "step": 39408 }, { "epoch": 0.06987718742990782, "grad_norm": 0.302734375, "learning_rate": 0.0013666280660069331, "loss": 0.1912, "step": 39410 }, { "epoch": 0.06988073359521764, "grad_norm": 0.345703125, "learning_rate": 0.0013665712186049176, "loss": 0.1741, "step": 39412 }, { "epoch": 0.06988427976052745, "grad_norm": 1.1015625, "learning_rate": 0.001366514370037074, "loss": 0.2303, "step": 39414 }, { "epoch": 0.06988782592583727, "grad_norm": 0.5703125, "learning_rate": 0.0013664575203036515, "loss": 0.1642, "step": 39416 }, { "epoch": 0.06989137209114708, "grad_norm": 0.369140625, "learning_rate": 0.0013664006694048985, "loss": 0.1562, "step": 39418 }, { "epoch": 0.0698949182564569, "grad_norm": 0.578125, "learning_rate": 0.0013663438173410639, "loss": 0.1852, "step": 39420 }, { "epoch": 0.06989846442176671, "grad_norm": 0.458984375, "learning_rate": 0.0013662869641123958, "loss": 0.1733, "step": 39422 }, { "epoch": 0.06990201058707653, "grad_norm": 0.51171875, "learning_rate": 0.0013662301097191437, "loss": 0.1842, "step": 39424 }, { "epoch": 0.06990555675238634, "grad_norm": 4.625, "learning_rate": 0.0013661732541615556, "loss": 0.3214, "step": 39426 }, { "epoch": 0.06990910291769616, "grad_norm": 0.484375, "learning_rate": 0.0013661163974398802, "loss": 0.2017, "step": 39428 }, { "epoch": 0.06991264908300598, "grad_norm": 0.66015625, "learning_rate": 0.001366059539554366, "loss": 0.3421, "step": 39430 }, { "epoch": 0.0699161952483158, "grad_norm": 0.47265625, "learning_rate": 0.001366002680505262, "loss": 0.2036, "step": 39432 }, { "epoch": 0.06991974141362561, "grad_norm": 0.205078125, "learning_rate": 0.0013659458202928173, "loss": 0.1866, "step": 39434 }, { "epoch": 0.06992328757893543, "grad_norm": 0.51953125, "learning_rate": 0.0013658889589172796, "loss": 0.2605, "step": 39436 }, { "epoch": 0.06992683374424524, "grad_norm": 0.462890625, "learning_rate": 0.001365832096378898, "loss": 0.1619, "step": 39438 }, { "epoch": 0.06993037990955506, "grad_norm": 0.298828125, "learning_rate": 0.0013657752326779215, "loss": 0.1864, "step": 39440 }, { "epoch": 0.06993392607486487, "grad_norm": 0.3515625, "learning_rate": 0.0013657183678145981, "loss": 0.2278, "step": 39442 }, { "epoch": 0.06993747224017469, "grad_norm": 0.3046875, "learning_rate": 0.0013656615017891773, "loss": 0.1632, "step": 39444 }, { "epoch": 0.0699410184054845, "grad_norm": 0.8125, "learning_rate": 0.0013656046346019076, "loss": 0.1935, "step": 39446 }, { "epoch": 0.06994456457079432, "grad_norm": 0.58984375, "learning_rate": 0.001365547766253037, "loss": 0.1681, "step": 39448 }, { "epoch": 0.06994811073610413, "grad_norm": 0.31640625, "learning_rate": 0.0013654908967428148, "loss": 0.1887, "step": 39450 }, { "epoch": 0.06995165690141394, "grad_norm": 1.1015625, "learning_rate": 0.0013654340260714896, "loss": 0.2333, "step": 39452 }, { "epoch": 0.06995520306672376, "grad_norm": 0.42578125, "learning_rate": 0.0013653771542393104, "loss": 0.1781, "step": 39454 }, { "epoch": 0.06995874923203357, "grad_norm": 0.4921875, "learning_rate": 0.0013653202812465258, "loss": 0.2361, "step": 39456 }, { "epoch": 0.06996229539734339, "grad_norm": 0.2734375, "learning_rate": 0.0013652634070933842, "loss": 0.1574, "step": 39458 }, { "epoch": 0.0699658415626532, "grad_norm": 0.41796875, "learning_rate": 0.0013652065317801346, "loss": 0.1966, "step": 39460 }, { "epoch": 0.06996938772796302, "grad_norm": 1.2265625, "learning_rate": 0.0013651496553070254, "loss": 0.238, "step": 39462 }, { "epoch": 0.06997293389327283, "grad_norm": 0.458984375, "learning_rate": 0.0013650927776743059, "loss": 0.2204, "step": 39464 }, { "epoch": 0.06997648005858265, "grad_norm": 0.36328125, "learning_rate": 0.0013650358988822244, "loss": 0.2259, "step": 39466 }, { "epoch": 0.06998002622389246, "grad_norm": 0.68359375, "learning_rate": 0.00136497901893103, "loss": 0.167, "step": 39468 }, { "epoch": 0.06998357238920228, "grad_norm": 0.291015625, "learning_rate": 0.0013649221378209714, "loss": 0.1739, "step": 39470 }, { "epoch": 0.06998711855451209, "grad_norm": 9.0625, "learning_rate": 0.0013648652555522968, "loss": 0.197, "step": 39472 }, { "epoch": 0.06999066471982192, "grad_norm": 3.421875, "learning_rate": 0.0013648083721252559, "loss": 0.1339, "step": 39474 }, { "epoch": 0.06999421088513173, "grad_norm": 0.2294921875, "learning_rate": 0.0013647514875400966, "loss": 0.2285, "step": 39476 }, { "epoch": 0.06999775705044155, "grad_norm": 0.341796875, "learning_rate": 0.0013646946017970684, "loss": 0.2904, "step": 39478 }, { "epoch": 0.07000130321575136, "grad_norm": 0.298828125, "learning_rate": 0.0013646377148964198, "loss": 0.2153, "step": 39480 }, { "epoch": 0.07000484938106118, "grad_norm": 0.33984375, "learning_rate": 0.0013645808268383994, "loss": 0.2854, "step": 39482 }, { "epoch": 0.07000839554637099, "grad_norm": 0.2060546875, "learning_rate": 0.0013645239376232558, "loss": 0.1225, "step": 39484 }, { "epoch": 0.0700119417116808, "grad_norm": 0.462890625, "learning_rate": 0.0013644670472512388, "loss": 0.1992, "step": 39486 }, { "epoch": 0.07001548787699062, "grad_norm": 0.88671875, "learning_rate": 0.0013644101557225962, "loss": 0.2525, "step": 39488 }, { "epoch": 0.07001903404230043, "grad_norm": 0.375, "learning_rate": 0.0013643532630375774, "loss": 0.3063, "step": 39490 }, { "epoch": 0.07002258020761025, "grad_norm": 0.6015625, "learning_rate": 0.0013642963691964307, "loss": 0.1981, "step": 39492 }, { "epoch": 0.07002612637292006, "grad_norm": 0.255859375, "learning_rate": 0.0013642394741994054, "loss": 0.1511, "step": 39494 }, { "epoch": 0.07002967253822988, "grad_norm": 0.48828125, "learning_rate": 0.0013641825780467498, "loss": 0.1825, "step": 39496 }, { "epoch": 0.07003321870353969, "grad_norm": 0.3203125, "learning_rate": 0.0013641256807387133, "loss": 0.2005, "step": 39498 }, { "epoch": 0.07003676486884951, "grad_norm": 0.55078125, "learning_rate": 0.0013640687822755447, "loss": 0.3158, "step": 39500 }, { "epoch": 0.07004031103415932, "grad_norm": 0.73828125, "learning_rate": 0.0013640118826574925, "loss": 0.1923, "step": 39502 }, { "epoch": 0.07004385719946914, "grad_norm": 0.54296875, "learning_rate": 0.0013639549818848054, "loss": 0.1865, "step": 39504 }, { "epoch": 0.07004740336477895, "grad_norm": 1.28125, "learning_rate": 0.001363898079957733, "loss": 0.1819, "step": 39506 }, { "epoch": 0.07005094953008877, "grad_norm": 0.365234375, "learning_rate": 0.0013638411768765234, "loss": 0.2091, "step": 39508 }, { "epoch": 0.07005449569539858, "grad_norm": 0.828125, "learning_rate": 0.0013637842726414258, "loss": 0.2809, "step": 39510 }, { "epoch": 0.0700580418607084, "grad_norm": 0.416015625, "learning_rate": 0.0013637273672526889, "loss": 0.1813, "step": 39512 }, { "epoch": 0.07006158802601821, "grad_norm": 0.59375, "learning_rate": 0.0013636704607105617, "loss": 0.2378, "step": 39514 }, { "epoch": 0.07006513419132802, "grad_norm": 0.365234375, "learning_rate": 0.0013636135530152931, "loss": 0.193, "step": 39516 }, { "epoch": 0.07006868035663784, "grad_norm": 0.54296875, "learning_rate": 0.001363556644167132, "loss": 0.1873, "step": 39518 }, { "epoch": 0.07007222652194767, "grad_norm": 1.21875, "learning_rate": 0.0013634997341663273, "loss": 0.2261, "step": 39520 }, { "epoch": 0.07007577268725748, "grad_norm": 0.25390625, "learning_rate": 0.0013634428230131278, "loss": 0.1574, "step": 39522 }, { "epoch": 0.0700793188525673, "grad_norm": 0.48046875, "learning_rate": 0.0013633859107077821, "loss": 0.174, "step": 39524 }, { "epoch": 0.07008286501787711, "grad_norm": 0.322265625, "learning_rate": 0.0013633289972505398, "loss": 0.2614, "step": 39526 }, { "epoch": 0.07008641118318693, "grad_norm": 0.52734375, "learning_rate": 0.0013632720826416494, "loss": 0.1636, "step": 39528 }, { "epoch": 0.07008995734849674, "grad_norm": 0.27734375, "learning_rate": 0.0013632151668813597, "loss": 0.2678, "step": 39530 }, { "epoch": 0.07009350351380655, "grad_norm": 0.353515625, "learning_rate": 0.0013631582499699197, "loss": 0.2319, "step": 39532 }, { "epoch": 0.07009704967911637, "grad_norm": 0.87109375, "learning_rate": 0.0013631013319075785, "loss": 0.1722, "step": 39534 }, { "epoch": 0.07010059584442618, "grad_norm": 1.109375, "learning_rate": 0.0013630444126945848, "loss": 0.1678, "step": 39536 }, { "epoch": 0.070104142009736, "grad_norm": 1.90625, "learning_rate": 0.0013629874923311877, "loss": 0.1674, "step": 39538 }, { "epoch": 0.07010768817504581, "grad_norm": 0.4375, "learning_rate": 0.0013629305708176363, "loss": 0.1808, "step": 39540 }, { "epoch": 0.07011123434035563, "grad_norm": 1.6328125, "learning_rate": 0.0013628736481541789, "loss": 0.1827, "step": 39542 }, { "epoch": 0.07011478050566544, "grad_norm": 0.470703125, "learning_rate": 0.0013628167243410652, "loss": 0.1685, "step": 39544 }, { "epoch": 0.07011832667097526, "grad_norm": 6.59375, "learning_rate": 0.0013627597993785435, "loss": 0.2463, "step": 39546 }, { "epoch": 0.07012187283628507, "grad_norm": 0.306640625, "learning_rate": 0.0013627028732668633, "loss": 0.1795, "step": 39548 }, { "epoch": 0.07012541900159489, "grad_norm": 0.2294921875, "learning_rate": 0.0013626459460062733, "loss": 0.3245, "step": 39550 }, { "epoch": 0.0701289651669047, "grad_norm": 0.734375, "learning_rate": 0.0013625890175970224, "loss": 0.1695, "step": 39552 }, { "epoch": 0.07013251133221451, "grad_norm": 0.453125, "learning_rate": 0.0013625320880393597, "loss": 0.1603, "step": 39554 }, { "epoch": 0.07013605749752433, "grad_norm": 0.3203125, "learning_rate": 0.0013624751573335344, "loss": 0.1927, "step": 39556 }, { "epoch": 0.07013960366283414, "grad_norm": 0.6796875, "learning_rate": 0.0013624182254797948, "loss": 0.1898, "step": 39558 }, { "epoch": 0.07014314982814396, "grad_norm": 1.296875, "learning_rate": 0.0013623612924783905, "loss": 0.1913, "step": 39560 }, { "epoch": 0.07014669599345377, "grad_norm": 0.212890625, "learning_rate": 0.0013623043583295702, "loss": 0.1882, "step": 39562 }, { "epoch": 0.07015024215876359, "grad_norm": 0.333984375, "learning_rate": 0.0013622474230335832, "loss": 0.2216, "step": 39564 }, { "epoch": 0.07015378832407342, "grad_norm": 0.796875, "learning_rate": 0.001362190486590678, "loss": 0.2112, "step": 39566 }, { "epoch": 0.07015733448938323, "grad_norm": 0.2314453125, "learning_rate": 0.0013621335490011043, "loss": 0.1427, "step": 39568 }, { "epoch": 0.07016088065469304, "grad_norm": 1.0078125, "learning_rate": 0.0013620766102651104, "loss": 0.5895, "step": 39570 }, { "epoch": 0.07016442682000286, "grad_norm": 0.36328125, "learning_rate": 0.0013620196703829459, "loss": 0.1945, "step": 39572 }, { "epoch": 0.07016797298531267, "grad_norm": 0.466796875, "learning_rate": 0.0013619627293548595, "loss": 0.2072, "step": 39574 }, { "epoch": 0.07017151915062249, "grad_norm": 0.5703125, "learning_rate": 0.0013619057871811003, "loss": 0.1916, "step": 39576 }, { "epoch": 0.0701750653159323, "grad_norm": 0.59375, "learning_rate": 0.0013618488438619171, "loss": 0.2227, "step": 39578 }, { "epoch": 0.07017861148124212, "grad_norm": 0.71484375, "learning_rate": 0.0013617918993975592, "loss": 0.2108, "step": 39580 }, { "epoch": 0.07018215764655193, "grad_norm": 0.80078125, "learning_rate": 0.0013617349537882757, "loss": 0.2064, "step": 39582 }, { "epoch": 0.07018570381186175, "grad_norm": 0.58203125, "learning_rate": 0.0013616780070343155, "loss": 0.1652, "step": 39584 }, { "epoch": 0.07018924997717156, "grad_norm": 0.98046875, "learning_rate": 0.0013616210591359279, "loss": 0.2287, "step": 39586 }, { "epoch": 0.07019279614248138, "grad_norm": 0.333984375, "learning_rate": 0.0013615641100933615, "loss": 0.2367, "step": 39588 }, { "epoch": 0.07019634230779119, "grad_norm": 0.46875, "learning_rate": 0.0013615071599068658, "loss": 0.1744, "step": 39590 }, { "epoch": 0.070199888473101, "grad_norm": 4.65625, "learning_rate": 0.0013614502085766895, "loss": 0.1652, "step": 39592 }, { "epoch": 0.07020343463841082, "grad_norm": 3.03125, "learning_rate": 0.0013613932561030821, "loss": 0.2077, "step": 39594 }, { "epoch": 0.07020698080372063, "grad_norm": 0.392578125, "learning_rate": 0.0013613363024862924, "loss": 0.1896, "step": 39596 }, { "epoch": 0.07021052696903045, "grad_norm": 1.1171875, "learning_rate": 0.0013612793477265693, "loss": 0.4711, "step": 39598 }, { "epoch": 0.07021407313434026, "grad_norm": 0.458984375, "learning_rate": 0.0013612223918241621, "loss": 0.1817, "step": 39600 }, { "epoch": 0.07021761929965008, "grad_norm": 0.37890625, "learning_rate": 0.00136116543477932, "loss": 0.1665, "step": 39602 }, { "epoch": 0.07022116546495989, "grad_norm": 0.6875, "learning_rate": 0.001361108476592292, "loss": 0.2127, "step": 39604 }, { "epoch": 0.0702247116302697, "grad_norm": 1.78125, "learning_rate": 0.001361051517263327, "loss": 0.3103, "step": 39606 }, { "epoch": 0.07022825779557952, "grad_norm": 0.2373046875, "learning_rate": 0.0013609945567926746, "loss": 0.2234, "step": 39608 }, { "epoch": 0.07023180396088935, "grad_norm": 0.197265625, "learning_rate": 0.0013609375951805834, "loss": 0.1792, "step": 39610 }, { "epoch": 0.07023535012619916, "grad_norm": 0.671875, "learning_rate": 0.001360880632427303, "loss": 0.2279, "step": 39612 }, { "epoch": 0.07023889629150898, "grad_norm": 0.51171875, "learning_rate": 0.0013608236685330821, "loss": 0.2064, "step": 39614 }, { "epoch": 0.0702424424568188, "grad_norm": 1.4453125, "learning_rate": 0.00136076670349817, "loss": 0.4225, "step": 39616 }, { "epoch": 0.07024598862212861, "grad_norm": 0.8046875, "learning_rate": 0.0013607097373228157, "loss": 0.2006, "step": 39618 }, { "epoch": 0.07024953478743842, "grad_norm": 0.71875, "learning_rate": 0.0013606527700072686, "loss": 0.2223, "step": 39620 }, { "epoch": 0.07025308095274824, "grad_norm": 0.302734375, "learning_rate": 0.0013605958015517777, "loss": 0.1673, "step": 39622 }, { "epoch": 0.07025662711805805, "grad_norm": 1.671875, "learning_rate": 0.0013605388319565916, "loss": 0.4129, "step": 39624 }, { "epoch": 0.07026017328336787, "grad_norm": 0.498046875, "learning_rate": 0.0013604818612219605, "loss": 0.18, "step": 39626 }, { "epoch": 0.07026371944867768, "grad_norm": 0.328125, "learning_rate": 0.001360424889348133, "loss": 0.1757, "step": 39628 }, { "epoch": 0.0702672656139875, "grad_norm": 0.33203125, "learning_rate": 0.0013603679163353584, "loss": 0.2172, "step": 39630 }, { "epoch": 0.07027081177929731, "grad_norm": 0.2578125, "learning_rate": 0.0013603109421838858, "loss": 0.1741, "step": 39632 }, { "epoch": 0.07027435794460712, "grad_norm": 0.345703125, "learning_rate": 0.001360253966893964, "loss": 0.1808, "step": 39634 }, { "epoch": 0.07027790410991694, "grad_norm": 0.494140625, "learning_rate": 0.0013601969904658427, "loss": 0.2741, "step": 39636 }, { "epoch": 0.07028145027522675, "grad_norm": 0.8828125, "learning_rate": 0.001360140012899771, "loss": 0.1832, "step": 39638 }, { "epoch": 0.07028499644053657, "grad_norm": 0.6015625, "learning_rate": 0.0013600830341959978, "loss": 0.1978, "step": 39640 }, { "epoch": 0.07028854260584638, "grad_norm": 0.41796875, "learning_rate": 0.0013600260543547727, "loss": 0.1647, "step": 39642 }, { "epoch": 0.0702920887711562, "grad_norm": 1.140625, "learning_rate": 0.0013599690733763446, "loss": 0.2417, "step": 39644 }, { "epoch": 0.07029563493646601, "grad_norm": 1.640625, "learning_rate": 0.0013599120912609626, "loss": 0.271, "step": 39646 }, { "epoch": 0.07029918110177583, "grad_norm": 0.3984375, "learning_rate": 0.0013598551080088765, "loss": 0.2414, "step": 39648 }, { "epoch": 0.07030272726708564, "grad_norm": 0.29296875, "learning_rate": 0.0013597981236203348, "loss": 0.159, "step": 39650 }, { "epoch": 0.07030627343239546, "grad_norm": 0.34765625, "learning_rate": 0.0013597411380955869, "loss": 0.1927, "step": 39652 }, { "epoch": 0.07030981959770527, "grad_norm": 0.48828125, "learning_rate": 0.0013596841514348825, "loss": 0.2081, "step": 39654 }, { "epoch": 0.0703133657630151, "grad_norm": 0.2119140625, "learning_rate": 0.0013596271636384701, "loss": 0.2242, "step": 39656 }, { "epoch": 0.07031691192832491, "grad_norm": 0.1796875, "learning_rate": 0.0013595701747065993, "loss": 0.1477, "step": 39658 }, { "epoch": 0.07032045809363473, "grad_norm": 0.22265625, "learning_rate": 0.0013595131846395197, "loss": 0.2141, "step": 39660 }, { "epoch": 0.07032400425894454, "grad_norm": 0.294921875, "learning_rate": 0.00135945619343748, "loss": 0.1962, "step": 39662 }, { "epoch": 0.07032755042425436, "grad_norm": 2.375, "learning_rate": 0.0013593992011007295, "loss": 0.2309, "step": 39664 }, { "epoch": 0.07033109658956417, "grad_norm": 1.59375, "learning_rate": 0.0013593422076295177, "loss": 0.2472, "step": 39666 }, { "epoch": 0.07033464275487399, "grad_norm": 0.7109375, "learning_rate": 0.0013592852130240936, "loss": 0.2368, "step": 39668 }, { "epoch": 0.0703381889201838, "grad_norm": 0.46875, "learning_rate": 0.0013592282172847068, "loss": 0.2088, "step": 39670 }, { "epoch": 0.07034173508549361, "grad_norm": 0.4296875, "learning_rate": 0.0013591712204116062, "loss": 0.1185, "step": 39672 }, { "epoch": 0.07034528125080343, "grad_norm": 0.314453125, "learning_rate": 0.0013591142224050412, "loss": 0.1967, "step": 39674 }, { "epoch": 0.07034882741611324, "grad_norm": 0.77734375, "learning_rate": 0.0013590572232652612, "loss": 0.1457, "step": 39676 }, { "epoch": 0.07035237358142306, "grad_norm": 0.2392578125, "learning_rate": 0.0013590002229925153, "loss": 0.2271, "step": 39678 }, { "epoch": 0.07035591974673287, "grad_norm": 0.53125, "learning_rate": 0.001358943221587053, "loss": 0.1882, "step": 39680 }, { "epoch": 0.07035946591204269, "grad_norm": 0.1328125, "learning_rate": 0.0013588862190491231, "loss": 0.1912, "step": 39682 }, { "epoch": 0.0703630120773525, "grad_norm": 0.32421875, "learning_rate": 0.0013588292153789757, "loss": 0.2855, "step": 39684 }, { "epoch": 0.07036655824266232, "grad_norm": 1.3046875, "learning_rate": 0.0013587722105768594, "loss": 0.3307, "step": 39686 }, { "epoch": 0.07037010440797213, "grad_norm": 0.3984375, "learning_rate": 0.001358715204643024, "loss": 0.2412, "step": 39688 }, { "epoch": 0.07037365057328195, "grad_norm": 0.35546875, "learning_rate": 0.0013586581975777185, "loss": 0.188, "step": 39690 }, { "epoch": 0.07037719673859176, "grad_norm": 0.314453125, "learning_rate": 0.001358601189381192, "loss": 0.1547, "step": 39692 }, { "epoch": 0.07038074290390157, "grad_norm": 0.40625, "learning_rate": 0.0013585441800536943, "loss": 0.1995, "step": 39694 }, { "epoch": 0.07038428906921139, "grad_norm": 0.25390625, "learning_rate": 0.0013584871695954746, "loss": 0.2243, "step": 39696 }, { "epoch": 0.0703878352345212, "grad_norm": 0.205078125, "learning_rate": 0.0013584301580067822, "loss": 0.165, "step": 39698 }, { "epoch": 0.07039138139983102, "grad_norm": 0.31640625, "learning_rate": 0.0013583731452878663, "loss": 0.1742, "step": 39700 }, { "epoch": 0.07039492756514085, "grad_norm": 1.1875, "learning_rate": 0.0013583161314389763, "loss": 0.2424, "step": 39702 }, { "epoch": 0.07039847373045066, "grad_norm": 0.640625, "learning_rate": 0.0013582591164603617, "loss": 0.179, "step": 39704 }, { "epoch": 0.07040201989576048, "grad_norm": 1.484375, "learning_rate": 0.0013582021003522718, "loss": 0.2594, "step": 39706 }, { "epoch": 0.07040556606107029, "grad_norm": 0.419921875, "learning_rate": 0.0013581450831149557, "loss": 0.1977, "step": 39708 }, { "epoch": 0.0704091122263801, "grad_norm": 0.412109375, "learning_rate": 0.0013580880647486632, "loss": 0.2201, "step": 39710 }, { "epoch": 0.07041265839168992, "grad_norm": 0.2080078125, "learning_rate": 0.0013580310452536433, "loss": 0.199, "step": 39712 }, { "epoch": 0.07041620455699973, "grad_norm": 0.53515625, "learning_rate": 0.0013579740246301452, "loss": 0.2607, "step": 39714 }, { "epoch": 0.07041975072230955, "grad_norm": 0.5078125, "learning_rate": 0.001357917002878419, "loss": 0.1325, "step": 39716 }, { "epoch": 0.07042329688761936, "grad_norm": 0.302734375, "learning_rate": 0.0013578599799987131, "loss": 0.1655, "step": 39718 }, { "epoch": 0.07042684305292918, "grad_norm": 0.359375, "learning_rate": 0.001357802955991278, "loss": 0.1606, "step": 39720 }, { "epoch": 0.07043038921823899, "grad_norm": 0.62109375, "learning_rate": 0.0013577459308563625, "loss": 0.2562, "step": 39722 }, { "epoch": 0.0704339353835488, "grad_norm": 0.30859375, "learning_rate": 0.0013576889045942158, "loss": 0.1527, "step": 39724 }, { "epoch": 0.07043748154885862, "grad_norm": 0.63671875, "learning_rate": 0.0013576318772050874, "loss": 0.2225, "step": 39726 }, { "epoch": 0.07044102771416844, "grad_norm": 0.423828125, "learning_rate": 0.0013575748486892272, "loss": 0.1977, "step": 39728 }, { "epoch": 0.07044457387947825, "grad_norm": 0.5078125, "learning_rate": 0.001357517819046884, "loss": 0.2292, "step": 39730 }, { "epoch": 0.07044812004478807, "grad_norm": 1.1328125, "learning_rate": 0.0013574607882783074, "loss": 0.1325, "step": 39732 }, { "epoch": 0.07045166621009788, "grad_norm": 0.4609375, "learning_rate": 0.001357403756383747, "loss": 0.1762, "step": 39734 }, { "epoch": 0.0704552123754077, "grad_norm": 0.76953125, "learning_rate": 0.0013573467233634518, "loss": 0.2099, "step": 39736 }, { "epoch": 0.07045875854071751, "grad_norm": 0.431640625, "learning_rate": 0.0013572896892176717, "loss": 0.4089, "step": 39738 }, { "epoch": 0.07046230470602732, "grad_norm": 0.73046875, "learning_rate": 0.001357232653946656, "loss": 0.2257, "step": 39740 }, { "epoch": 0.07046585087133714, "grad_norm": 1.2421875, "learning_rate": 0.0013571756175506542, "loss": 0.2263, "step": 39742 }, { "epoch": 0.07046939703664695, "grad_norm": 1.0703125, "learning_rate": 0.0013571185800299154, "loss": 0.2004, "step": 39744 }, { "epoch": 0.07047294320195678, "grad_norm": 0.1884765625, "learning_rate": 0.0013570615413846894, "loss": 0.1677, "step": 39746 }, { "epoch": 0.0704764893672666, "grad_norm": 0.29296875, "learning_rate": 0.0013570045016152257, "loss": 0.2597, "step": 39748 }, { "epoch": 0.07048003553257641, "grad_norm": 0.349609375, "learning_rate": 0.001356947460721773, "loss": 0.2302, "step": 39750 }, { "epoch": 0.07048358169788622, "grad_norm": 0.49609375, "learning_rate": 0.0013568904187045818, "loss": 0.2152, "step": 39752 }, { "epoch": 0.07048712786319604, "grad_norm": 0.5, "learning_rate": 0.0013568333755639012, "loss": 0.1797, "step": 39754 }, { "epoch": 0.07049067402850585, "grad_norm": 0.58984375, "learning_rate": 0.0013567763312999806, "loss": 0.2028, "step": 39756 }, { "epoch": 0.07049422019381567, "grad_norm": 0.283203125, "learning_rate": 0.0013567192859130692, "loss": 0.1669, "step": 39758 }, { "epoch": 0.07049776635912548, "grad_norm": 0.314453125, "learning_rate": 0.0013566622394034172, "loss": 0.2858, "step": 39760 }, { "epoch": 0.0705013125244353, "grad_norm": 0.302734375, "learning_rate": 0.0013566051917712734, "loss": 0.2104, "step": 39762 }, { "epoch": 0.07050485868974511, "grad_norm": 0.31640625, "learning_rate": 0.0013565481430168876, "loss": 0.1802, "step": 39764 }, { "epoch": 0.07050840485505493, "grad_norm": 0.8203125, "learning_rate": 0.001356491093140509, "loss": 0.2437, "step": 39766 }, { "epoch": 0.07051195102036474, "grad_norm": 1.21875, "learning_rate": 0.0013564340421423876, "loss": 0.2587, "step": 39768 }, { "epoch": 0.07051549718567456, "grad_norm": 0.400390625, "learning_rate": 0.0013563769900227724, "loss": 0.1474, "step": 39770 }, { "epoch": 0.07051904335098437, "grad_norm": 0.447265625, "learning_rate": 0.0013563199367819135, "loss": 0.218, "step": 39772 }, { "epoch": 0.07052258951629418, "grad_norm": 0.84765625, "learning_rate": 0.0013562628824200598, "loss": 0.2109, "step": 39774 }, { "epoch": 0.070526135681604, "grad_norm": 2.25, "learning_rate": 0.0013562058269374613, "loss": 0.4406, "step": 39776 }, { "epoch": 0.07052968184691381, "grad_norm": 0.322265625, "learning_rate": 0.0013561487703343672, "loss": 0.2028, "step": 39778 }, { "epoch": 0.07053322801222363, "grad_norm": 0.4765625, "learning_rate": 0.0013560917126110273, "loss": 0.1721, "step": 39780 }, { "epoch": 0.07053677417753344, "grad_norm": 1.0, "learning_rate": 0.0013560346537676911, "loss": 0.1861, "step": 39782 }, { "epoch": 0.07054032034284326, "grad_norm": 0.828125, "learning_rate": 0.0013559775938046077, "loss": 0.3128, "step": 39784 }, { "epoch": 0.07054386650815307, "grad_norm": 0.353515625, "learning_rate": 0.0013559205327220271, "loss": 0.1694, "step": 39786 }, { "epoch": 0.07054741267346289, "grad_norm": 0.3046875, "learning_rate": 0.0013558634705201989, "loss": 0.1614, "step": 39788 }, { "epoch": 0.0705509588387727, "grad_norm": 0.453125, "learning_rate": 0.0013558064071993723, "loss": 0.1783, "step": 39790 }, { "epoch": 0.07055450500408253, "grad_norm": 0.68359375, "learning_rate": 0.0013557493427597974, "loss": 0.1892, "step": 39792 }, { "epoch": 0.07055805116939234, "grad_norm": 0.431640625, "learning_rate": 0.0013556922772017233, "loss": 0.1349, "step": 39794 }, { "epoch": 0.07056159733470216, "grad_norm": 4.40625, "learning_rate": 0.0013556352105253992, "loss": 0.2353, "step": 39796 }, { "epoch": 0.07056514350001197, "grad_norm": 0.2158203125, "learning_rate": 0.0013555781427310757, "loss": 0.3057, "step": 39798 }, { "epoch": 0.07056868966532179, "grad_norm": 0.56640625, "learning_rate": 0.001355521073819002, "loss": 0.1646, "step": 39800 }, { "epoch": 0.0705722358306316, "grad_norm": 0.47265625, "learning_rate": 0.0013554640037894273, "loss": 0.2572, "step": 39802 }, { "epoch": 0.07057578199594142, "grad_norm": 1.9140625, "learning_rate": 0.0013554069326426016, "loss": 0.278, "step": 39804 }, { "epoch": 0.07057932816125123, "grad_norm": 0.20703125, "learning_rate": 0.0013553498603787741, "loss": 0.1884, "step": 39806 }, { "epoch": 0.07058287432656105, "grad_norm": 0.314453125, "learning_rate": 0.0013552927869981946, "loss": 0.1953, "step": 39808 }, { "epoch": 0.07058642049187086, "grad_norm": 0.318359375, "learning_rate": 0.0013552357125011132, "loss": 0.1982, "step": 39810 }, { "epoch": 0.07058996665718067, "grad_norm": 0.369140625, "learning_rate": 0.0013551786368877783, "loss": 0.1608, "step": 39812 }, { "epoch": 0.07059351282249049, "grad_norm": 0.54296875, "learning_rate": 0.001355121560158441, "loss": 0.1723, "step": 39814 }, { "epoch": 0.0705970589878003, "grad_norm": 0.2314453125, "learning_rate": 0.00135506448231335, "loss": 0.2112, "step": 39816 }, { "epoch": 0.07060060515311012, "grad_norm": 0.5546875, "learning_rate": 0.001355007403352755, "loss": 0.1876, "step": 39818 }, { "epoch": 0.07060415131841993, "grad_norm": 0.296875, "learning_rate": 0.0013549503232769056, "loss": 0.1794, "step": 39820 }, { "epoch": 0.07060769748372975, "grad_norm": 0.39453125, "learning_rate": 0.0013548932420860518, "loss": 0.2297, "step": 39822 }, { "epoch": 0.07061124364903956, "grad_norm": 0.271484375, "learning_rate": 0.0013548361597804429, "loss": 0.1717, "step": 39824 }, { "epoch": 0.07061478981434938, "grad_norm": 0.5234375, "learning_rate": 0.0013547790763603286, "loss": 0.1983, "step": 39826 }, { "epoch": 0.07061833597965919, "grad_norm": 0.4375, "learning_rate": 0.0013547219918259585, "loss": 0.2296, "step": 39828 }, { "epoch": 0.070621882144969, "grad_norm": 1.828125, "learning_rate": 0.0013546649061775828, "loss": 0.2149, "step": 39830 }, { "epoch": 0.07062542831027882, "grad_norm": 0.490234375, "learning_rate": 0.0013546078194154505, "loss": 0.1873, "step": 39832 }, { "epoch": 0.07062897447558864, "grad_norm": 0.7421875, "learning_rate": 0.0013545507315398115, "loss": 0.1988, "step": 39834 }, { "epoch": 0.07063252064089845, "grad_norm": 0.4453125, "learning_rate": 0.0013544936425509155, "loss": 0.1657, "step": 39836 }, { "epoch": 0.07063606680620828, "grad_norm": 0.29296875, "learning_rate": 0.0013544365524490124, "loss": 0.232, "step": 39838 }, { "epoch": 0.07063961297151809, "grad_norm": 0.333984375, "learning_rate": 0.001354379461234351, "loss": 0.1607, "step": 39840 }, { "epoch": 0.07064315913682791, "grad_norm": 0.61328125, "learning_rate": 0.001354322368907182, "loss": 0.1918, "step": 39842 }, { "epoch": 0.07064670530213772, "grad_norm": 0.474609375, "learning_rate": 0.0013542652754677544, "loss": 0.2861, "step": 39844 }, { "epoch": 0.07065025146744754, "grad_norm": 0.66015625, "learning_rate": 0.0013542081809163188, "loss": 0.1896, "step": 39846 }, { "epoch": 0.07065379763275735, "grad_norm": 1.3046875, "learning_rate": 0.0013541510852531236, "loss": 0.2449, "step": 39848 }, { "epoch": 0.07065734379806717, "grad_norm": 0.4375, "learning_rate": 0.0013540939884784196, "loss": 0.2238, "step": 39850 }, { "epoch": 0.07066088996337698, "grad_norm": 0.75, "learning_rate": 0.0013540368905924556, "loss": 0.2319, "step": 39852 }, { "epoch": 0.0706644361286868, "grad_norm": 0.62890625, "learning_rate": 0.001353979791595482, "loss": 0.1616, "step": 39854 }, { "epoch": 0.07066798229399661, "grad_norm": 0.83984375, "learning_rate": 0.0013539226914877488, "loss": 0.1497, "step": 39856 }, { "epoch": 0.07067152845930642, "grad_norm": 0.392578125, "learning_rate": 0.001353865590269505, "loss": 0.143, "step": 39858 }, { "epoch": 0.07067507462461624, "grad_norm": 0.392578125, "learning_rate": 0.0013538084879410003, "loss": 0.1695, "step": 39860 }, { "epoch": 0.07067862078992605, "grad_norm": 0.6640625, "learning_rate": 0.001353751384502485, "loss": 0.1952, "step": 39862 }, { "epoch": 0.07068216695523587, "grad_norm": 0.4765625, "learning_rate": 0.0013536942799542082, "loss": 0.1807, "step": 39864 }, { "epoch": 0.07068571312054568, "grad_norm": 0.412109375, "learning_rate": 0.0013536371742964202, "loss": 0.2531, "step": 39866 }, { "epoch": 0.0706892592858555, "grad_norm": 0.435546875, "learning_rate": 0.0013535800675293706, "loss": 0.1658, "step": 39868 }, { "epoch": 0.07069280545116531, "grad_norm": 0.443359375, "learning_rate": 0.0013535229596533087, "loss": 0.2321, "step": 39870 }, { "epoch": 0.07069635161647513, "grad_norm": 0.419921875, "learning_rate": 0.0013534658506684852, "loss": 0.2076, "step": 39872 }, { "epoch": 0.07069989778178494, "grad_norm": 0.53515625, "learning_rate": 0.0013534087405751491, "loss": 0.4276, "step": 39874 }, { "epoch": 0.07070344394709475, "grad_norm": 0.345703125, "learning_rate": 0.0013533516293735505, "loss": 0.2574, "step": 39876 }, { "epoch": 0.07070699011240457, "grad_norm": 0.404296875, "learning_rate": 0.0013532945170639384, "loss": 0.1819, "step": 39878 }, { "epoch": 0.07071053627771438, "grad_norm": 0.48046875, "learning_rate": 0.0013532374036465638, "loss": 0.1842, "step": 39880 }, { "epoch": 0.07071408244302421, "grad_norm": 0.5859375, "learning_rate": 0.0013531802891216759, "loss": 0.1948, "step": 39882 }, { "epoch": 0.07071762860833403, "grad_norm": 0.2890625, "learning_rate": 0.001353123173489524, "loss": 0.1846, "step": 39884 }, { "epoch": 0.07072117477364384, "grad_norm": 0.2470703125, "learning_rate": 0.0013530660567503587, "loss": 0.2093, "step": 39886 }, { "epoch": 0.07072472093895366, "grad_norm": 0.306640625, "learning_rate": 0.0013530089389044296, "loss": 0.1894, "step": 39888 }, { "epoch": 0.07072826710426347, "grad_norm": 1.8125, "learning_rate": 0.001352951819951986, "loss": 0.2638, "step": 39890 }, { "epoch": 0.07073181326957328, "grad_norm": 0.5625, "learning_rate": 0.0013528946998932786, "loss": 0.1793, "step": 39892 }, { "epoch": 0.0707353594348831, "grad_norm": 0.55078125, "learning_rate": 0.0013528375787285566, "loss": 0.1712, "step": 39894 }, { "epoch": 0.07073890560019291, "grad_norm": 0.408203125, "learning_rate": 0.0013527804564580695, "loss": 0.1586, "step": 39896 }, { "epoch": 0.07074245176550273, "grad_norm": 0.271484375, "learning_rate": 0.0013527233330820677, "loss": 0.1525, "step": 39898 }, { "epoch": 0.07074599793081254, "grad_norm": 0.7109375, "learning_rate": 0.001352666208600801, "loss": 0.2459, "step": 39900 }, { "epoch": 0.07074954409612236, "grad_norm": 0.8515625, "learning_rate": 0.0013526090830145187, "loss": 0.1932, "step": 39902 }, { "epoch": 0.07075309026143217, "grad_norm": 0.349609375, "learning_rate": 0.0013525519563234712, "loss": 0.203, "step": 39904 }, { "epoch": 0.07075663642674199, "grad_norm": 0.67578125, "learning_rate": 0.0013524948285279083, "loss": 0.2397, "step": 39906 }, { "epoch": 0.0707601825920518, "grad_norm": 1.6328125, "learning_rate": 0.0013524376996280794, "loss": 0.2204, "step": 39908 }, { "epoch": 0.07076372875736162, "grad_norm": 0.6171875, "learning_rate": 0.0013523805696242347, "loss": 0.2321, "step": 39910 }, { "epoch": 0.07076727492267143, "grad_norm": 0.265625, "learning_rate": 0.0013523234385166242, "loss": 0.2023, "step": 39912 }, { "epoch": 0.07077082108798124, "grad_norm": 0.2578125, "learning_rate": 0.0013522663063054974, "loss": 0.2345, "step": 39914 }, { "epoch": 0.07077436725329106, "grad_norm": 0.54296875, "learning_rate": 0.0013522091729911043, "loss": 0.1223, "step": 39916 }, { "epoch": 0.07077791341860087, "grad_norm": 0.439453125, "learning_rate": 0.0013521520385736947, "loss": 0.1566, "step": 39918 }, { "epoch": 0.07078145958391069, "grad_norm": 0.609375, "learning_rate": 0.0013520949030535188, "loss": 0.2546, "step": 39920 }, { "epoch": 0.0707850057492205, "grad_norm": 0.63671875, "learning_rate": 0.0013520377664308259, "loss": 0.1791, "step": 39922 }, { "epoch": 0.07078855191453032, "grad_norm": 0.2236328125, "learning_rate": 0.0013519806287058665, "loss": 0.1919, "step": 39924 }, { "epoch": 0.07079209807984013, "grad_norm": 0.2412109375, "learning_rate": 0.00135192348987889, "loss": 0.2205, "step": 39926 }, { "epoch": 0.07079564424514996, "grad_norm": 0.66796875, "learning_rate": 0.0013518663499501467, "loss": 0.1903, "step": 39928 }, { "epoch": 0.07079919041045978, "grad_norm": 2.140625, "learning_rate": 0.0013518092089198862, "loss": 0.1992, "step": 39930 }, { "epoch": 0.07080273657576959, "grad_norm": 0.609375, "learning_rate": 0.0013517520667883586, "loss": 0.1736, "step": 39932 }, { "epoch": 0.0708062827410794, "grad_norm": 0.65625, "learning_rate": 0.0013516949235558133, "loss": 0.2434, "step": 39934 }, { "epoch": 0.07080982890638922, "grad_norm": 0.365234375, "learning_rate": 0.001351637779222501, "loss": 0.2039, "step": 39936 }, { "epoch": 0.07081337507169903, "grad_norm": 0.38671875, "learning_rate": 0.0013515806337886709, "loss": 0.2317, "step": 39938 }, { "epoch": 0.07081692123700885, "grad_norm": 0.330078125, "learning_rate": 0.0013515234872545734, "loss": 0.1936, "step": 39940 }, { "epoch": 0.07082046740231866, "grad_norm": 0.271484375, "learning_rate": 0.0013514663396204582, "loss": 0.1989, "step": 39942 }, { "epoch": 0.07082401356762848, "grad_norm": 0.75390625, "learning_rate": 0.0013514091908865755, "loss": 0.2187, "step": 39944 }, { "epoch": 0.07082755973293829, "grad_norm": 0.412109375, "learning_rate": 0.0013513520410531749, "loss": 0.2441, "step": 39946 }, { "epoch": 0.0708311058982481, "grad_norm": 0.55859375, "learning_rate": 0.0013512948901205064, "loss": 0.2104, "step": 39948 }, { "epoch": 0.07083465206355792, "grad_norm": 0.412109375, "learning_rate": 0.0013512377380888202, "loss": 0.2011, "step": 39950 }, { "epoch": 0.07083819822886774, "grad_norm": 0.5703125, "learning_rate": 0.0013511805849583663, "loss": 0.1808, "step": 39952 }, { "epoch": 0.07084174439417755, "grad_norm": 0.94140625, "learning_rate": 0.0013511234307293939, "loss": 0.1694, "step": 39954 }, { "epoch": 0.07084529055948736, "grad_norm": 0.73046875, "learning_rate": 0.0013510662754021537, "loss": 0.2588, "step": 39956 }, { "epoch": 0.07084883672479718, "grad_norm": 2.921875, "learning_rate": 0.0013510091189768954, "loss": 0.4586, "step": 39958 }, { "epoch": 0.070852382890107, "grad_norm": 1.59375, "learning_rate": 0.0013509519614538688, "loss": 0.2665, "step": 39960 }, { "epoch": 0.07085592905541681, "grad_norm": 0.486328125, "learning_rate": 0.0013508948028333244, "loss": 0.2802, "step": 39962 }, { "epoch": 0.07085947522072662, "grad_norm": 0.78515625, "learning_rate": 0.001350837643115512, "loss": 0.1708, "step": 39964 }, { "epoch": 0.07086302138603644, "grad_norm": 2.109375, "learning_rate": 0.0013507804823006814, "loss": 0.3963, "step": 39966 }, { "epoch": 0.07086656755134625, "grad_norm": 0.45703125, "learning_rate": 0.0013507233203890823, "loss": 0.1948, "step": 39968 }, { "epoch": 0.07087011371665607, "grad_norm": 0.47265625, "learning_rate": 0.0013506661573809654, "loss": 0.2886, "step": 39970 }, { "epoch": 0.07087365988196588, "grad_norm": 0.388671875, "learning_rate": 0.0013506089932765802, "loss": 0.1551, "step": 39972 }, { "epoch": 0.07087720604727571, "grad_norm": 0.47265625, "learning_rate": 0.0013505518280761768, "loss": 0.2885, "step": 39974 }, { "epoch": 0.07088075221258552, "grad_norm": 0.41015625, "learning_rate": 0.0013504946617800052, "loss": 0.2086, "step": 39976 }, { "epoch": 0.07088429837789534, "grad_norm": 0.67578125, "learning_rate": 0.0013504374943883156, "loss": 0.1693, "step": 39978 }, { "epoch": 0.07088784454320515, "grad_norm": 0.26171875, "learning_rate": 0.0013503803259013575, "loss": 0.1964, "step": 39980 }, { "epoch": 0.07089139070851497, "grad_norm": 0.57421875, "learning_rate": 0.0013503231563193815, "loss": 0.2173, "step": 39982 }, { "epoch": 0.07089493687382478, "grad_norm": 1.5703125, "learning_rate": 0.0013502659856426378, "loss": 0.1788, "step": 39984 }, { "epoch": 0.0708984830391346, "grad_norm": 2.171875, "learning_rate": 0.0013502088138713756, "loss": 0.281, "step": 39986 }, { "epoch": 0.07090202920444441, "grad_norm": 1.28125, "learning_rate": 0.0013501516410058454, "loss": 0.1778, "step": 39988 }, { "epoch": 0.07090557536975423, "grad_norm": 0.54296875, "learning_rate": 0.0013500944670462975, "loss": 0.2102, "step": 39990 }, { "epoch": 0.07090912153506404, "grad_norm": 0.4765625, "learning_rate": 0.0013500372919929816, "loss": 0.1924, "step": 39992 }, { "epoch": 0.07091266770037385, "grad_norm": 0.66015625, "learning_rate": 0.0013499801158461478, "loss": 0.2229, "step": 39994 }, { "epoch": 0.07091621386568367, "grad_norm": 0.58984375, "learning_rate": 0.001349922938606046, "loss": 0.2235, "step": 39996 }, { "epoch": 0.07091976003099348, "grad_norm": 0.498046875, "learning_rate": 0.0013498657602729267, "loss": 0.2502, "step": 39998 }, { "epoch": 0.0709233061963033, "grad_norm": 0.7578125, "learning_rate": 0.0013498085808470394, "loss": 0.1928, "step": 40000 }, { "epoch": 0.07092685236161311, "grad_norm": 0.2578125, "learning_rate": 0.0013497514003286346, "loss": 0.2354, "step": 40002 }, { "epoch": 0.07093039852692293, "grad_norm": 1.9609375, "learning_rate": 0.0013496942187179624, "loss": 0.2223, "step": 40004 }, { "epoch": 0.07093394469223274, "grad_norm": 0.5859375, "learning_rate": 0.0013496370360152727, "loss": 0.2018, "step": 40006 }, { "epoch": 0.07093749085754256, "grad_norm": 0.275390625, "learning_rate": 0.0013495798522208157, "loss": 0.1474, "step": 40008 }, { "epoch": 0.07094103702285237, "grad_norm": 0.3046875, "learning_rate": 0.0013495226673348412, "loss": 0.2059, "step": 40010 }, { "epoch": 0.07094458318816219, "grad_norm": 0.5234375, "learning_rate": 0.0013494654813575994, "loss": 0.1858, "step": 40012 }, { "epoch": 0.070948129353472, "grad_norm": 0.82421875, "learning_rate": 0.0013494082942893406, "loss": 0.2039, "step": 40014 }, { "epoch": 0.07095167551878181, "grad_norm": 0.3203125, "learning_rate": 0.0013493511061303145, "loss": 0.1271, "step": 40016 }, { "epoch": 0.07095522168409164, "grad_norm": 0.361328125, "learning_rate": 0.0013492939168807724, "loss": 0.2199, "step": 40018 }, { "epoch": 0.07095876784940146, "grad_norm": 1.0859375, "learning_rate": 0.0013492367265409623, "loss": 0.2153, "step": 40020 }, { "epoch": 0.07096231401471127, "grad_norm": 0.3125, "learning_rate": 0.0013491795351111364, "loss": 0.2096, "step": 40022 }, { "epoch": 0.07096586018002109, "grad_norm": 0.341796875, "learning_rate": 0.001349122342591544, "loss": 0.1386, "step": 40024 }, { "epoch": 0.0709694063453309, "grad_norm": 0.2451171875, "learning_rate": 0.0013490651489824349, "loss": 0.1994, "step": 40026 }, { "epoch": 0.07097295251064072, "grad_norm": 0.66015625, "learning_rate": 0.0013490079542840595, "loss": 0.1639, "step": 40028 }, { "epoch": 0.07097649867595053, "grad_norm": 2.046875, "learning_rate": 0.0013489507584966678, "loss": 0.2441, "step": 40030 }, { "epoch": 0.07098004484126035, "grad_norm": 0.416015625, "learning_rate": 0.0013488935616205102, "loss": 0.2256, "step": 40032 }, { "epoch": 0.07098359100657016, "grad_norm": 5.34375, "learning_rate": 0.001348836363655837, "loss": 0.2626, "step": 40034 }, { "epoch": 0.07098713717187997, "grad_norm": 3.21875, "learning_rate": 0.0013487791646028978, "loss": 0.3327, "step": 40036 }, { "epoch": 0.07099068333718979, "grad_norm": 0.67578125, "learning_rate": 0.0013487219644619428, "loss": 0.2064, "step": 40038 }, { "epoch": 0.0709942295024996, "grad_norm": 0.3515625, "learning_rate": 0.0013486647632332228, "loss": 0.1445, "step": 40040 }, { "epoch": 0.07099777566780942, "grad_norm": 0.5703125, "learning_rate": 0.0013486075609169877, "loss": 0.1962, "step": 40042 }, { "epoch": 0.07100132183311923, "grad_norm": 0.37890625, "learning_rate": 0.0013485503575134871, "loss": 0.1823, "step": 40044 }, { "epoch": 0.07100486799842905, "grad_norm": 1.1015625, "learning_rate": 0.001348493153022972, "loss": 0.3903, "step": 40046 }, { "epoch": 0.07100841416373886, "grad_norm": 0.34765625, "learning_rate": 0.001348435947445692, "loss": 0.1661, "step": 40048 }, { "epoch": 0.07101196032904868, "grad_norm": 0.58984375, "learning_rate": 0.0013483787407818973, "loss": 0.1873, "step": 40050 }, { "epoch": 0.07101550649435849, "grad_norm": 0.498046875, "learning_rate": 0.0013483215330318385, "loss": 0.1846, "step": 40052 }, { "epoch": 0.0710190526596683, "grad_norm": 1.46875, "learning_rate": 0.0013482643241957656, "loss": 0.1852, "step": 40054 }, { "epoch": 0.07102259882497812, "grad_norm": 0.51953125, "learning_rate": 0.0013482071142739288, "loss": 0.1306, "step": 40056 }, { "epoch": 0.07102614499028793, "grad_norm": 0.26953125, "learning_rate": 0.0013481499032665777, "loss": 0.2301, "step": 40058 }, { "epoch": 0.07102969115559775, "grad_norm": 0.494140625, "learning_rate": 0.0013480926911739637, "loss": 0.2063, "step": 40060 }, { "epoch": 0.07103323732090756, "grad_norm": 0.201171875, "learning_rate": 0.0013480354779963363, "loss": 0.1468, "step": 40062 }, { "epoch": 0.07103678348621739, "grad_norm": 0.291015625, "learning_rate": 0.0013479782637339458, "loss": 0.1792, "step": 40064 }, { "epoch": 0.0710403296515272, "grad_norm": 0.21484375, "learning_rate": 0.0013479210483870422, "loss": 0.1711, "step": 40066 }, { "epoch": 0.07104387581683702, "grad_norm": 0.35546875, "learning_rate": 0.001347863831955876, "loss": 0.2095, "step": 40068 }, { "epoch": 0.07104742198214684, "grad_norm": 0.28125, "learning_rate": 0.0013478066144406973, "loss": 0.2072, "step": 40070 }, { "epoch": 0.07105096814745665, "grad_norm": 0.251953125, "learning_rate": 0.0013477493958417566, "loss": 0.2671, "step": 40072 }, { "epoch": 0.07105451431276646, "grad_norm": 0.388671875, "learning_rate": 0.0013476921761593037, "loss": 0.231, "step": 40074 }, { "epoch": 0.07105806047807628, "grad_norm": 0.349609375, "learning_rate": 0.0013476349553935894, "loss": 0.1842, "step": 40076 }, { "epoch": 0.0710616066433861, "grad_norm": 1.8359375, "learning_rate": 0.0013475777335448637, "loss": 0.2328, "step": 40078 }, { "epoch": 0.07106515280869591, "grad_norm": 1.9296875, "learning_rate": 0.0013475205106133765, "loss": 0.2561, "step": 40080 }, { "epoch": 0.07106869897400572, "grad_norm": 1.859375, "learning_rate": 0.0013474632865993785, "loss": 0.2955, "step": 40082 }, { "epoch": 0.07107224513931554, "grad_norm": 1.2421875, "learning_rate": 0.0013474060615031199, "loss": 0.1974, "step": 40084 }, { "epoch": 0.07107579130462535, "grad_norm": 0.314453125, "learning_rate": 0.0013473488353248507, "loss": 0.1873, "step": 40086 }, { "epoch": 0.07107933746993517, "grad_norm": 0.27734375, "learning_rate": 0.0013472916080648214, "loss": 0.121, "step": 40088 }, { "epoch": 0.07108288363524498, "grad_norm": 0.279296875, "learning_rate": 0.0013472343797232824, "loss": 0.1732, "step": 40090 }, { "epoch": 0.0710864298005548, "grad_norm": 0.2578125, "learning_rate": 0.0013471771503004838, "loss": 0.1818, "step": 40092 }, { "epoch": 0.07108997596586461, "grad_norm": 0.455078125, "learning_rate": 0.0013471199197966758, "loss": 0.1985, "step": 40094 }, { "epoch": 0.07109352213117442, "grad_norm": 0.83984375, "learning_rate": 0.0013470626882121088, "loss": 0.2139, "step": 40096 }, { "epoch": 0.07109706829648424, "grad_norm": 0.59765625, "learning_rate": 0.0013470054555470335, "loss": 0.4669, "step": 40098 }, { "epoch": 0.07110061446179405, "grad_norm": 1.03125, "learning_rate": 0.0013469482218016996, "loss": 0.4394, "step": 40100 }, { "epoch": 0.07110416062710387, "grad_norm": 0.375, "learning_rate": 0.0013468909869763575, "loss": 0.1889, "step": 40102 }, { "epoch": 0.07110770679241368, "grad_norm": 0.2490234375, "learning_rate": 0.0013468337510712577, "loss": 0.2205, "step": 40104 }, { "epoch": 0.0711112529577235, "grad_norm": 0.98828125, "learning_rate": 0.00134677651408665, "loss": 0.2248, "step": 40106 }, { "epoch": 0.07111479912303331, "grad_norm": 0.365234375, "learning_rate": 0.001346719276022786, "loss": 0.1902, "step": 40108 }, { "epoch": 0.07111834528834314, "grad_norm": 0.43359375, "learning_rate": 0.0013466620368799147, "loss": 0.1305, "step": 40110 }, { "epoch": 0.07112189145365296, "grad_norm": 0.40234375, "learning_rate": 0.001346604796658287, "loss": 0.1666, "step": 40112 }, { "epoch": 0.07112543761896277, "grad_norm": 1.234375, "learning_rate": 0.0013465475553581531, "loss": 0.1898, "step": 40114 }, { "epoch": 0.07112898378427258, "grad_norm": 1.3203125, "learning_rate": 0.0013464903129797637, "loss": 0.2034, "step": 40116 }, { "epoch": 0.0711325299495824, "grad_norm": 0.5546875, "learning_rate": 0.0013464330695233688, "loss": 0.1737, "step": 40118 }, { "epoch": 0.07113607611489221, "grad_norm": 0.466796875, "learning_rate": 0.0013463758249892186, "loss": 0.2532, "step": 40120 }, { "epoch": 0.07113962228020203, "grad_norm": 0.5546875, "learning_rate": 0.0013463185793775637, "loss": 0.1879, "step": 40122 }, { "epoch": 0.07114316844551184, "grad_norm": 0.74609375, "learning_rate": 0.001346261332688654, "loss": 0.2033, "step": 40124 }, { "epoch": 0.07114671461082166, "grad_norm": 0.5625, "learning_rate": 0.001346204084922741, "loss": 0.1706, "step": 40126 }, { "epoch": 0.07115026077613147, "grad_norm": 0.447265625, "learning_rate": 0.001346146836080074, "loss": 0.1548, "step": 40128 }, { "epoch": 0.07115380694144129, "grad_norm": 0.46484375, "learning_rate": 0.0013460895861609036, "loss": 0.1819, "step": 40130 }, { "epoch": 0.0711573531067511, "grad_norm": 0.2373046875, "learning_rate": 0.0013460323351654805, "loss": 0.209, "step": 40132 }, { "epoch": 0.07116089927206092, "grad_norm": 0.64453125, "learning_rate": 0.001345975083094055, "loss": 0.2712, "step": 40134 }, { "epoch": 0.07116444543737073, "grad_norm": 0.251953125, "learning_rate": 0.0013459178299468774, "loss": 0.1871, "step": 40136 }, { "epoch": 0.07116799160268054, "grad_norm": 1.0, "learning_rate": 0.0013458605757241976, "loss": 0.2765, "step": 40138 }, { "epoch": 0.07117153776799036, "grad_norm": 0.287109375, "learning_rate": 0.0013458033204262667, "loss": 0.147, "step": 40140 }, { "epoch": 0.07117508393330017, "grad_norm": 1.0859375, "learning_rate": 0.001345746064053335, "loss": 0.2862, "step": 40142 }, { "epoch": 0.07117863009860999, "grad_norm": 1.2890625, "learning_rate": 0.0013456888066056525, "loss": 0.3539, "step": 40144 }, { "epoch": 0.0711821762639198, "grad_norm": 0.275390625, "learning_rate": 0.00134563154808347, "loss": 0.1349, "step": 40146 }, { "epoch": 0.07118572242922962, "grad_norm": 0.396484375, "learning_rate": 0.0013455742884870376, "loss": 0.2638, "step": 40148 }, { "epoch": 0.07118926859453943, "grad_norm": 0.265625, "learning_rate": 0.0013455170278166065, "loss": 0.332, "step": 40150 }, { "epoch": 0.07119281475984925, "grad_norm": 0.60546875, "learning_rate": 0.001345459766072426, "loss": 0.1879, "step": 40152 }, { "epoch": 0.07119636092515907, "grad_norm": 0.2890625, "learning_rate": 0.0013454025032547472, "loss": 0.1985, "step": 40154 }, { "epoch": 0.07119990709046889, "grad_norm": 0.345703125, "learning_rate": 0.0013453452393638206, "loss": 0.1757, "step": 40156 }, { "epoch": 0.0712034532557787, "grad_norm": 0.68359375, "learning_rate": 0.0013452879743998962, "loss": 0.1844, "step": 40158 }, { "epoch": 0.07120699942108852, "grad_norm": 0.2578125, "learning_rate": 0.0013452307083632244, "loss": 0.1539, "step": 40160 }, { "epoch": 0.07121054558639833, "grad_norm": 0.2578125, "learning_rate": 0.0013451734412540564, "loss": 0.1911, "step": 40162 }, { "epoch": 0.07121409175170815, "grad_norm": 0.365234375, "learning_rate": 0.001345116173072642, "loss": 0.3409, "step": 40164 }, { "epoch": 0.07121763791701796, "grad_norm": 0.384765625, "learning_rate": 0.001345058903819232, "loss": 0.1577, "step": 40166 }, { "epoch": 0.07122118408232778, "grad_norm": 0.31640625, "learning_rate": 0.0013450016334940764, "loss": 0.2546, "step": 40168 }, { "epoch": 0.07122473024763759, "grad_norm": 0.43359375, "learning_rate": 0.0013449443620974263, "loss": 0.2311, "step": 40170 }, { "epoch": 0.0712282764129474, "grad_norm": 2.859375, "learning_rate": 0.0013448870896295316, "loss": 0.1926, "step": 40172 }, { "epoch": 0.07123182257825722, "grad_norm": 0.26171875, "learning_rate": 0.0013448298160906433, "loss": 0.1379, "step": 40174 }, { "epoch": 0.07123536874356703, "grad_norm": 0.375, "learning_rate": 0.0013447725414810112, "loss": 0.1992, "step": 40176 }, { "epoch": 0.07123891490887685, "grad_norm": 0.43359375, "learning_rate": 0.0013447152658008865, "loss": 0.2278, "step": 40178 }, { "epoch": 0.07124246107418666, "grad_norm": 0.5546875, "learning_rate": 0.0013446579890505192, "loss": 0.1916, "step": 40180 }, { "epoch": 0.07124600723949648, "grad_norm": 0.33203125, "learning_rate": 0.00134460071123016, "loss": 0.153, "step": 40182 }, { "epoch": 0.07124955340480629, "grad_norm": 0.375, "learning_rate": 0.0013445434323400593, "loss": 0.1951, "step": 40184 }, { "epoch": 0.07125309957011611, "grad_norm": 0.416015625, "learning_rate": 0.0013444861523804678, "loss": 0.179, "step": 40186 }, { "epoch": 0.07125664573542592, "grad_norm": 0.484375, "learning_rate": 0.001344428871351636, "loss": 0.1586, "step": 40188 }, { "epoch": 0.07126019190073574, "grad_norm": 0.42578125, "learning_rate": 0.001344371589253814, "loss": 0.1497, "step": 40190 }, { "epoch": 0.07126373806604555, "grad_norm": 1.0859375, "learning_rate": 0.0013443143060872526, "loss": 0.1483, "step": 40192 }, { "epoch": 0.07126728423135537, "grad_norm": 0.60546875, "learning_rate": 0.0013442570218522027, "loss": 0.4609, "step": 40194 }, { "epoch": 0.07127083039666518, "grad_norm": 0.375, "learning_rate": 0.0013441997365489142, "loss": 0.1719, "step": 40196 }, { "epoch": 0.071274376561975, "grad_norm": 0.72265625, "learning_rate": 0.001344142450177638, "loss": 0.2207, "step": 40198 }, { "epoch": 0.07127792272728482, "grad_norm": 1.609375, "learning_rate": 0.0013440851627386245, "loss": 0.4367, "step": 40200 }, { "epoch": 0.07128146889259464, "grad_norm": 0.251953125, "learning_rate": 0.0013440278742321244, "loss": 0.133, "step": 40202 }, { "epoch": 0.07128501505790445, "grad_norm": 0.302734375, "learning_rate": 0.0013439705846583878, "loss": 0.1933, "step": 40204 }, { "epoch": 0.07128856122321427, "grad_norm": 0.466796875, "learning_rate": 0.0013439132940176659, "loss": 0.2138, "step": 40206 }, { "epoch": 0.07129210738852408, "grad_norm": 0.255859375, "learning_rate": 0.001343856002310209, "loss": 0.2445, "step": 40208 }, { "epoch": 0.0712956535538339, "grad_norm": 0.314453125, "learning_rate": 0.0013437987095362672, "loss": 0.2399, "step": 40210 }, { "epoch": 0.07129919971914371, "grad_norm": 0.50390625, "learning_rate": 0.0013437414156960918, "loss": 0.2024, "step": 40212 }, { "epoch": 0.07130274588445353, "grad_norm": 0.57421875, "learning_rate": 0.001343684120789933, "loss": 0.2011, "step": 40214 }, { "epoch": 0.07130629204976334, "grad_norm": 0.83203125, "learning_rate": 0.0013436268248180412, "loss": 0.1483, "step": 40216 }, { "epoch": 0.07130983821507315, "grad_norm": 0.5859375, "learning_rate": 0.0013435695277806672, "loss": 0.2048, "step": 40218 }, { "epoch": 0.07131338438038297, "grad_norm": 1.4453125, "learning_rate": 0.0013435122296780618, "loss": 0.2256, "step": 40220 }, { "epoch": 0.07131693054569278, "grad_norm": 0.5234375, "learning_rate": 0.001343454930510475, "loss": 0.2218, "step": 40222 }, { "epoch": 0.0713204767110026, "grad_norm": 0.326171875, "learning_rate": 0.001343397630278158, "loss": 0.154, "step": 40224 }, { "epoch": 0.07132402287631241, "grad_norm": 2.03125, "learning_rate": 0.001343340328981361, "loss": 0.2788, "step": 40226 }, { "epoch": 0.07132756904162223, "grad_norm": 0.82421875, "learning_rate": 0.001343283026620335, "loss": 0.3385, "step": 40228 }, { "epoch": 0.07133111520693204, "grad_norm": 0.39453125, "learning_rate": 0.0013432257231953304, "loss": 0.207, "step": 40230 }, { "epoch": 0.07133466137224186, "grad_norm": 0.53515625, "learning_rate": 0.0013431684187065979, "loss": 0.1742, "step": 40232 }, { "epoch": 0.07133820753755167, "grad_norm": 0.578125, "learning_rate": 0.0013431111131543873, "loss": 0.2584, "step": 40234 }, { "epoch": 0.07134175370286149, "grad_norm": 0.46875, "learning_rate": 0.0013430538065389503, "loss": 0.1486, "step": 40236 }, { "epoch": 0.0713452998681713, "grad_norm": 0.9453125, "learning_rate": 0.001342996498860537, "loss": 0.1609, "step": 40238 }, { "epoch": 0.07134884603348111, "grad_norm": 0.3203125, "learning_rate": 0.0013429391901193983, "loss": 0.1823, "step": 40240 }, { "epoch": 0.07135239219879093, "grad_norm": 1.25, "learning_rate": 0.0013428818803157847, "loss": 0.2317, "step": 40242 }, { "epoch": 0.07135593836410074, "grad_norm": 0.6796875, "learning_rate": 0.0013428245694499469, "loss": 0.203, "step": 40244 }, { "epoch": 0.07135948452941057, "grad_norm": 0.65234375, "learning_rate": 0.0013427672575221353, "loss": 0.2965, "step": 40246 }, { "epoch": 0.07136303069472039, "grad_norm": 0.7890625, "learning_rate": 0.0013427099445326007, "loss": 0.1921, "step": 40248 }, { "epoch": 0.0713665768600302, "grad_norm": 1.078125, "learning_rate": 0.0013426526304815942, "loss": 0.2169, "step": 40250 }, { "epoch": 0.07137012302534002, "grad_norm": 0.609375, "learning_rate": 0.0013425953153693659, "loss": 0.2054, "step": 40252 }, { "epoch": 0.07137366919064983, "grad_norm": 0.302734375, "learning_rate": 0.0013425379991961661, "loss": 0.2181, "step": 40254 }, { "epoch": 0.07137721535595964, "grad_norm": 0.287109375, "learning_rate": 0.0013424806819622465, "loss": 0.1932, "step": 40256 }, { "epoch": 0.07138076152126946, "grad_norm": 0.33203125, "learning_rate": 0.001342423363667857, "loss": 0.2389, "step": 40258 }, { "epoch": 0.07138430768657927, "grad_norm": 0.73828125, "learning_rate": 0.0013423660443132488, "loss": 0.2431, "step": 40260 }, { "epoch": 0.07138785385188909, "grad_norm": 0.9375, "learning_rate": 0.001342308723898672, "loss": 0.1815, "step": 40262 }, { "epoch": 0.0713914000171989, "grad_norm": 0.205078125, "learning_rate": 0.0013422514024243777, "loss": 0.1432, "step": 40264 }, { "epoch": 0.07139494618250872, "grad_norm": 0.486328125, "learning_rate": 0.0013421940798906168, "loss": 0.2577, "step": 40266 }, { "epoch": 0.07139849234781853, "grad_norm": 0.486328125, "learning_rate": 0.0013421367562976392, "loss": 0.2062, "step": 40268 }, { "epoch": 0.07140203851312835, "grad_norm": 0.341796875, "learning_rate": 0.0013420794316456961, "loss": 0.16, "step": 40270 }, { "epoch": 0.07140558467843816, "grad_norm": 0.326171875, "learning_rate": 0.0013420221059350385, "loss": 0.2026, "step": 40272 }, { "epoch": 0.07140913084374798, "grad_norm": 0.39453125, "learning_rate": 0.0013419647791659166, "loss": 0.1878, "step": 40274 }, { "epoch": 0.07141267700905779, "grad_norm": 2.75, "learning_rate": 0.0013419074513385813, "loss": 0.1858, "step": 40276 }, { "epoch": 0.0714162231743676, "grad_norm": 0.400390625, "learning_rate": 0.0013418501224532833, "loss": 0.2026, "step": 40278 }, { "epoch": 0.07141976933967742, "grad_norm": 0.796875, "learning_rate": 0.0013417927925102736, "loss": 0.4541, "step": 40280 }, { "epoch": 0.07142331550498723, "grad_norm": 0.359375, "learning_rate": 0.001341735461509802, "loss": 0.2149, "step": 40282 }, { "epoch": 0.07142686167029705, "grad_norm": 0.9453125, "learning_rate": 0.0013416781294521205, "loss": 0.231, "step": 40284 }, { "epoch": 0.07143040783560686, "grad_norm": 0.74609375, "learning_rate": 0.0013416207963374794, "loss": 0.2131, "step": 40286 }, { "epoch": 0.07143395400091668, "grad_norm": 0.419921875, "learning_rate": 0.0013415634621661292, "loss": 0.1819, "step": 40288 }, { "epoch": 0.0714375001662265, "grad_norm": 1.59375, "learning_rate": 0.0013415061269383202, "loss": 0.1977, "step": 40290 }, { "epoch": 0.07144104633153632, "grad_norm": 2.46875, "learning_rate": 0.001341448790654304, "loss": 0.3351, "step": 40292 }, { "epoch": 0.07144459249684613, "grad_norm": 0.32421875, "learning_rate": 0.001341391453314331, "loss": 0.252, "step": 40294 }, { "epoch": 0.07144813866215595, "grad_norm": 0.462890625, "learning_rate": 0.0013413341149186523, "loss": 0.2049, "step": 40296 }, { "epoch": 0.07145168482746576, "grad_norm": 0.443359375, "learning_rate": 0.001341276775467518, "loss": 0.163, "step": 40298 }, { "epoch": 0.07145523099277558, "grad_norm": 0.359375, "learning_rate": 0.0013412194349611793, "loss": 0.2085, "step": 40300 }, { "epoch": 0.0714587771580854, "grad_norm": 0.1396484375, "learning_rate": 0.001341162093399887, "loss": 0.153, "step": 40302 }, { "epoch": 0.07146232332339521, "grad_norm": 1.7890625, "learning_rate": 0.0013411047507838917, "loss": 0.1887, "step": 40304 }, { "epoch": 0.07146586948870502, "grad_norm": 0.515625, "learning_rate": 0.0013410474071134447, "loss": 0.2239, "step": 40306 }, { "epoch": 0.07146941565401484, "grad_norm": 0.99609375, "learning_rate": 0.0013409900623887958, "loss": 0.2187, "step": 40308 }, { "epoch": 0.07147296181932465, "grad_norm": 0.7578125, "learning_rate": 0.0013409327166101965, "loss": 0.2109, "step": 40310 }, { "epoch": 0.07147650798463447, "grad_norm": 0.35546875, "learning_rate": 0.0013408753697778978, "loss": 0.2174, "step": 40312 }, { "epoch": 0.07148005414994428, "grad_norm": 1.1484375, "learning_rate": 0.0013408180218921495, "loss": 0.4291, "step": 40314 }, { "epoch": 0.0714836003152541, "grad_norm": 0.50390625, "learning_rate": 0.0013407606729532034, "loss": 0.2038, "step": 40316 }, { "epoch": 0.07148714648056391, "grad_norm": 0.369140625, "learning_rate": 0.0013407033229613102, "loss": 0.1855, "step": 40318 }, { "epoch": 0.07149069264587372, "grad_norm": 1.2890625, "learning_rate": 0.0013406459719167202, "loss": 0.1618, "step": 40320 }, { "epoch": 0.07149423881118354, "grad_norm": 0.48828125, "learning_rate": 0.0013405886198196845, "loss": 0.1814, "step": 40322 }, { "epoch": 0.07149778497649335, "grad_norm": 0.455078125, "learning_rate": 0.0013405312666704538, "loss": 0.2155, "step": 40324 }, { "epoch": 0.07150133114180317, "grad_norm": 0.5234375, "learning_rate": 0.0013404739124692796, "loss": 0.3462, "step": 40326 }, { "epoch": 0.07150487730711298, "grad_norm": 0.412109375, "learning_rate": 0.0013404165572164118, "loss": 0.2232, "step": 40328 }, { "epoch": 0.0715084234724228, "grad_norm": 0.52734375, "learning_rate": 0.0013403592009121016, "loss": 0.1761, "step": 40330 }, { "epoch": 0.07151196963773261, "grad_norm": 0.455078125, "learning_rate": 0.0013403018435565997, "loss": 0.1795, "step": 40332 }, { "epoch": 0.07151551580304243, "grad_norm": 0.408203125, "learning_rate": 0.0013402444851501574, "loss": 0.2882, "step": 40334 }, { "epoch": 0.07151906196835225, "grad_norm": 0.53515625, "learning_rate": 0.001340187125693025, "loss": 0.2407, "step": 40336 }, { "epoch": 0.07152260813366207, "grad_norm": 0.859375, "learning_rate": 0.0013401297651854543, "loss": 0.1491, "step": 40338 }, { "epoch": 0.07152615429897188, "grad_norm": 0.443359375, "learning_rate": 0.001340072403627695, "loss": 0.2113, "step": 40340 }, { "epoch": 0.0715297004642817, "grad_norm": 0.921875, "learning_rate": 0.0013400150410199986, "loss": 0.2485, "step": 40342 }, { "epoch": 0.07153324662959151, "grad_norm": 0.48828125, "learning_rate": 0.0013399576773626153, "loss": 0.18, "step": 40344 }, { "epoch": 0.07153679279490133, "grad_norm": 0.486328125, "learning_rate": 0.0013399003126557973, "loss": 0.1727, "step": 40346 }, { "epoch": 0.07154033896021114, "grad_norm": 0.3046875, "learning_rate": 0.0013398429468997942, "loss": 0.4327, "step": 40348 }, { "epoch": 0.07154388512552096, "grad_norm": 0.5390625, "learning_rate": 0.0013397855800948576, "loss": 0.2121, "step": 40350 }, { "epoch": 0.07154743129083077, "grad_norm": 0.4140625, "learning_rate": 0.0013397282122412377, "loss": 0.2119, "step": 40352 }, { "epoch": 0.07155097745614059, "grad_norm": 0.5, "learning_rate": 0.0013396708433391863, "loss": 0.2414, "step": 40354 }, { "epoch": 0.0715545236214504, "grad_norm": 0.30078125, "learning_rate": 0.0013396134733889536, "loss": 0.258, "step": 40356 }, { "epoch": 0.07155806978676021, "grad_norm": 0.5703125, "learning_rate": 0.0013395561023907912, "loss": 0.2878, "step": 40358 }, { "epoch": 0.07156161595207003, "grad_norm": 0.5, "learning_rate": 0.0013394987303449494, "loss": 0.1462, "step": 40360 }, { "epoch": 0.07156516211737984, "grad_norm": 0.4140625, "learning_rate": 0.001339441357251679, "loss": 0.1919, "step": 40362 }, { "epoch": 0.07156870828268966, "grad_norm": 0.80859375, "learning_rate": 0.001339383983111231, "loss": 0.2032, "step": 40364 }, { "epoch": 0.07157225444799947, "grad_norm": 0.416015625, "learning_rate": 0.0013393266079238574, "loss": 0.2108, "step": 40366 }, { "epoch": 0.07157580061330929, "grad_norm": 0.4765625, "learning_rate": 0.0013392692316898074, "loss": 0.1585, "step": 40368 }, { "epoch": 0.0715793467786191, "grad_norm": 0.458984375, "learning_rate": 0.0013392118544093329, "loss": 0.1658, "step": 40370 }, { "epoch": 0.07158289294392892, "grad_norm": 0.3359375, "learning_rate": 0.0013391544760826845, "loss": 0.1495, "step": 40372 }, { "epoch": 0.07158643910923873, "grad_norm": 0.2041015625, "learning_rate": 0.0013390970967101137, "loss": 0.2032, "step": 40374 }, { "epoch": 0.07158998527454855, "grad_norm": 0.734375, "learning_rate": 0.001339039716291871, "loss": 0.2321, "step": 40376 }, { "epoch": 0.07159353143985836, "grad_norm": 0.4453125, "learning_rate": 0.0013389823348282078, "loss": 0.1745, "step": 40378 }, { "epoch": 0.07159707760516817, "grad_norm": 0.376953125, "learning_rate": 0.0013389249523193742, "loss": 0.1981, "step": 40380 }, { "epoch": 0.071600623770478, "grad_norm": 0.80078125, "learning_rate": 0.001338867568765622, "loss": 0.1894, "step": 40382 }, { "epoch": 0.07160416993578782, "grad_norm": 0.2392578125, "learning_rate": 0.0013388101841672014, "loss": 0.2552, "step": 40384 }, { "epoch": 0.07160771610109763, "grad_norm": 0.1875, "learning_rate": 0.001338752798524364, "loss": 0.2004, "step": 40386 }, { "epoch": 0.07161126226640745, "grad_norm": 0.365234375, "learning_rate": 0.0013386954118373604, "loss": 0.134, "step": 40388 }, { "epoch": 0.07161480843171726, "grad_norm": 0.275390625, "learning_rate": 0.001338638024106442, "loss": 0.1462, "step": 40390 }, { "epoch": 0.07161835459702708, "grad_norm": 0.453125, "learning_rate": 0.0013385806353318594, "loss": 0.3199, "step": 40392 }, { "epoch": 0.07162190076233689, "grad_norm": 0.69140625, "learning_rate": 0.0013385232455138635, "loss": 0.2003, "step": 40394 }, { "epoch": 0.0716254469276467, "grad_norm": 0.28515625, "learning_rate": 0.0013384658546527054, "loss": 0.2072, "step": 40396 }, { "epoch": 0.07162899309295652, "grad_norm": 0.8515625, "learning_rate": 0.0013384084627486366, "loss": 0.1852, "step": 40398 }, { "epoch": 0.07163253925826633, "grad_norm": 0.431640625, "learning_rate": 0.0013383510698019075, "loss": 0.1617, "step": 40400 }, { "epoch": 0.07163608542357615, "grad_norm": 0.30859375, "learning_rate": 0.001338293675812769, "loss": 0.1661, "step": 40402 }, { "epoch": 0.07163963158888596, "grad_norm": 0.390625, "learning_rate": 0.001338236280781473, "loss": 0.1856, "step": 40404 }, { "epoch": 0.07164317775419578, "grad_norm": 0.84375, "learning_rate": 0.001338178884708269, "loss": 0.182, "step": 40406 }, { "epoch": 0.07164672391950559, "grad_norm": 0.51953125, "learning_rate": 0.0013381214875934097, "loss": 0.1964, "step": 40408 }, { "epoch": 0.0716502700848154, "grad_norm": 0.267578125, "learning_rate": 0.0013380640894371447, "loss": 0.1603, "step": 40410 }, { "epoch": 0.07165381625012522, "grad_norm": 0.38671875, "learning_rate": 0.0013380066902397261, "loss": 0.1644, "step": 40412 }, { "epoch": 0.07165736241543504, "grad_norm": 0.423828125, "learning_rate": 0.0013379492900014044, "loss": 0.1474, "step": 40414 }, { "epoch": 0.07166090858074485, "grad_norm": 0.28515625, "learning_rate": 0.0013378918887224304, "loss": 0.2764, "step": 40416 }, { "epoch": 0.07166445474605466, "grad_norm": 0.66015625, "learning_rate": 0.001337834486403056, "loss": 0.1828, "step": 40418 }, { "epoch": 0.07166800091136448, "grad_norm": 0.69921875, "learning_rate": 0.0013377770830435315, "loss": 0.1789, "step": 40420 }, { "epoch": 0.0716715470766743, "grad_norm": 2.0625, "learning_rate": 0.0013377196786441082, "loss": 0.2913, "step": 40422 }, { "epoch": 0.07167509324198411, "grad_norm": 0.5234375, "learning_rate": 0.001337662273205037, "loss": 0.1811, "step": 40424 }, { "epoch": 0.07167863940729394, "grad_norm": 0.88671875, "learning_rate": 0.001337604866726569, "loss": 0.2383, "step": 40426 }, { "epoch": 0.07168218557260375, "grad_norm": 1.5625, "learning_rate": 0.0013375474592089555, "loss": 0.1943, "step": 40428 }, { "epoch": 0.07168573173791357, "grad_norm": 0.671875, "learning_rate": 0.0013374900506524473, "loss": 0.2054, "step": 40430 }, { "epoch": 0.07168927790322338, "grad_norm": 0.333984375, "learning_rate": 0.0013374326410572957, "loss": 0.2231, "step": 40432 }, { "epoch": 0.0716928240685332, "grad_norm": 0.2431640625, "learning_rate": 0.0013373752304237515, "loss": 0.1719, "step": 40434 }, { "epoch": 0.07169637023384301, "grad_norm": 0.1298828125, "learning_rate": 0.001337317818752066, "loss": 0.173, "step": 40436 }, { "epoch": 0.07169991639915282, "grad_norm": 0.46484375, "learning_rate": 0.00133726040604249, "loss": 0.2042, "step": 40438 }, { "epoch": 0.07170346256446264, "grad_norm": 0.357421875, "learning_rate": 0.0013372029922952748, "loss": 0.1768, "step": 40440 }, { "epoch": 0.07170700872977245, "grad_norm": 0.3671875, "learning_rate": 0.0013371455775106717, "loss": 0.1978, "step": 40442 }, { "epoch": 0.07171055489508227, "grad_norm": 0.2294921875, "learning_rate": 0.0013370881616889317, "loss": 0.2152, "step": 40444 }, { "epoch": 0.07171410106039208, "grad_norm": 0.267578125, "learning_rate": 0.0013370307448303056, "loss": 0.1914, "step": 40446 }, { "epoch": 0.0717176472257019, "grad_norm": 0.4375, "learning_rate": 0.0013369733269350448, "loss": 0.2191, "step": 40448 }, { "epoch": 0.07172119339101171, "grad_norm": 0.279296875, "learning_rate": 0.0013369159080034, "loss": 0.1756, "step": 40450 }, { "epoch": 0.07172473955632153, "grad_norm": 0.99609375, "learning_rate": 0.0013368584880356232, "loss": 0.2039, "step": 40452 }, { "epoch": 0.07172828572163134, "grad_norm": 0.34765625, "learning_rate": 0.0013368010670319645, "loss": 0.1793, "step": 40454 }, { "epoch": 0.07173183188694116, "grad_norm": 0.5234375, "learning_rate": 0.0013367436449926759, "loss": 0.1986, "step": 40456 }, { "epoch": 0.07173537805225097, "grad_norm": 0.45703125, "learning_rate": 0.0013366862219180077, "loss": 0.4052, "step": 40458 }, { "epoch": 0.07173892421756078, "grad_norm": 0.62109375, "learning_rate": 0.0013366287978082115, "loss": 0.2113, "step": 40460 }, { "epoch": 0.0717424703828706, "grad_norm": 0.318359375, "learning_rate": 0.0013365713726635386, "loss": 0.2094, "step": 40462 }, { "epoch": 0.07174601654818041, "grad_norm": 0.60546875, "learning_rate": 0.0013365139464842398, "loss": 0.2186, "step": 40464 }, { "epoch": 0.07174956271349023, "grad_norm": 1.296875, "learning_rate": 0.0013364565192705664, "loss": 0.2018, "step": 40466 }, { "epoch": 0.07175310887880004, "grad_norm": 0.462890625, "learning_rate": 0.0013363990910227695, "loss": 0.2018, "step": 40468 }, { "epoch": 0.07175665504410986, "grad_norm": 0.8984375, "learning_rate": 0.0013363416617411003, "loss": 0.2381, "step": 40470 }, { "epoch": 0.07176020120941969, "grad_norm": 0.3046875, "learning_rate": 0.00133628423142581, "loss": 0.1441, "step": 40472 }, { "epoch": 0.0717637473747295, "grad_norm": 0.349609375, "learning_rate": 0.0013362268000771502, "loss": 0.2083, "step": 40474 }, { "epoch": 0.07176729354003931, "grad_norm": 0.3203125, "learning_rate": 0.0013361693676953708, "loss": 0.2606, "step": 40476 }, { "epoch": 0.07177083970534913, "grad_norm": 0.357421875, "learning_rate": 0.0013361119342807243, "loss": 0.1737, "step": 40478 }, { "epoch": 0.07177438587065894, "grad_norm": 0.287109375, "learning_rate": 0.001336054499833461, "loss": 0.2063, "step": 40480 }, { "epoch": 0.07177793203596876, "grad_norm": 0.54296875, "learning_rate": 0.0013359970643538326, "loss": 0.1612, "step": 40482 }, { "epoch": 0.07178147820127857, "grad_norm": 0.25390625, "learning_rate": 0.0013359396278420901, "loss": 0.129, "step": 40484 }, { "epoch": 0.07178502436658839, "grad_norm": 0.1904296875, "learning_rate": 0.0013358821902984847, "loss": 0.1459, "step": 40486 }, { "epoch": 0.0717885705318982, "grad_norm": 1.453125, "learning_rate": 0.0013358247517232674, "loss": 0.3365, "step": 40488 }, { "epoch": 0.07179211669720802, "grad_norm": 0.5078125, "learning_rate": 0.00133576731211669, "loss": 0.1793, "step": 40490 }, { "epoch": 0.07179566286251783, "grad_norm": 0.85546875, "learning_rate": 0.0013357098714790031, "loss": 0.1647, "step": 40492 }, { "epoch": 0.07179920902782765, "grad_norm": 0.5546875, "learning_rate": 0.0013356524298104584, "loss": 0.1161, "step": 40494 }, { "epoch": 0.07180275519313746, "grad_norm": 1.328125, "learning_rate": 0.0013355949871113066, "loss": 0.2879, "step": 40496 }, { "epoch": 0.07180630135844727, "grad_norm": 0.48046875, "learning_rate": 0.0013355375433817993, "loss": 0.2431, "step": 40498 }, { "epoch": 0.07180984752375709, "grad_norm": 0.33203125, "learning_rate": 0.0013354800986221875, "loss": 0.2221, "step": 40500 }, { "epoch": 0.0718133936890669, "grad_norm": 0.45703125, "learning_rate": 0.0013354226528327227, "loss": 0.2369, "step": 40502 }, { "epoch": 0.07181693985437672, "grad_norm": 0.328125, "learning_rate": 0.0013353652060136556, "loss": 0.2804, "step": 40504 }, { "epoch": 0.07182048601968653, "grad_norm": 0.66796875, "learning_rate": 0.0013353077581652383, "loss": 0.452, "step": 40506 }, { "epoch": 0.07182403218499635, "grad_norm": 0.66015625, "learning_rate": 0.0013352503092877212, "loss": 0.127, "step": 40508 }, { "epoch": 0.07182757835030616, "grad_norm": 0.84765625, "learning_rate": 0.0013351928593813558, "loss": 0.1951, "step": 40510 }, { "epoch": 0.07183112451561598, "grad_norm": 0.466796875, "learning_rate": 0.001335135408446394, "loss": 0.1744, "step": 40512 }, { "epoch": 0.07183467068092579, "grad_norm": 0.98046875, "learning_rate": 0.001335077956483086, "loss": 0.1789, "step": 40514 }, { "epoch": 0.0718382168462356, "grad_norm": 0.328125, "learning_rate": 0.0013350205034916836, "loss": 0.2047, "step": 40516 }, { "epoch": 0.07184176301154543, "grad_norm": 0.54296875, "learning_rate": 0.0013349630494724383, "loss": 0.1489, "step": 40518 }, { "epoch": 0.07184530917685525, "grad_norm": 0.447265625, "learning_rate": 0.001334905594425601, "loss": 0.2128, "step": 40520 }, { "epoch": 0.07184885534216506, "grad_norm": 0.6640625, "learning_rate": 0.0013348481383514228, "loss": 0.153, "step": 40522 }, { "epoch": 0.07185240150747488, "grad_norm": 15.625, "learning_rate": 0.0013347906812501559, "loss": 0.3786, "step": 40524 }, { "epoch": 0.07185594767278469, "grad_norm": 0.173828125, "learning_rate": 0.0013347332231220504, "loss": 0.1792, "step": 40526 }, { "epoch": 0.07185949383809451, "grad_norm": 0.447265625, "learning_rate": 0.0013346757639673581, "loss": 0.1418, "step": 40528 }, { "epoch": 0.07186304000340432, "grad_norm": 0.3515625, "learning_rate": 0.0013346183037863308, "loss": 0.1875, "step": 40530 }, { "epoch": 0.07186658616871414, "grad_norm": 1.4453125, "learning_rate": 0.001334560842579219, "loss": 0.2598, "step": 40532 }, { "epoch": 0.07187013233402395, "grad_norm": 0.2177734375, "learning_rate": 0.0013345033803462743, "loss": 0.1806, "step": 40534 }, { "epoch": 0.07187367849933377, "grad_norm": 0.53125, "learning_rate": 0.0013344459170877478, "loss": 0.1552, "step": 40536 }, { "epoch": 0.07187722466464358, "grad_norm": 0.427734375, "learning_rate": 0.0013343884528038918, "loss": 0.354, "step": 40538 }, { "epoch": 0.0718807708299534, "grad_norm": 0.396484375, "learning_rate": 0.0013343309874949561, "loss": 0.1471, "step": 40540 }, { "epoch": 0.07188431699526321, "grad_norm": 1.2734375, "learning_rate": 0.0013342735211611927, "loss": 0.1983, "step": 40542 }, { "epoch": 0.07188786316057302, "grad_norm": 0.6015625, "learning_rate": 0.0013342160538028533, "loss": 0.2025, "step": 40544 }, { "epoch": 0.07189140932588284, "grad_norm": 1.0859375, "learning_rate": 0.001334158585420189, "loss": 0.2548, "step": 40546 }, { "epoch": 0.07189495549119265, "grad_norm": 0.498046875, "learning_rate": 0.0013341011160134513, "loss": 0.1968, "step": 40548 }, { "epoch": 0.07189850165650247, "grad_norm": 0.23828125, "learning_rate": 0.001334043645582891, "loss": 0.2005, "step": 40550 }, { "epoch": 0.07190204782181228, "grad_norm": 0.443359375, "learning_rate": 0.00133398617412876, "loss": 0.1808, "step": 40552 }, { "epoch": 0.0719055939871221, "grad_norm": 0.228515625, "learning_rate": 0.001333928701651309, "loss": 0.1302, "step": 40554 }, { "epoch": 0.07190914015243191, "grad_norm": 0.3203125, "learning_rate": 0.0013338712281507899, "loss": 0.1553, "step": 40556 }, { "epoch": 0.07191268631774173, "grad_norm": 0.416015625, "learning_rate": 0.001333813753627454, "loss": 0.1707, "step": 40558 }, { "epoch": 0.07191623248305154, "grad_norm": 0.57421875, "learning_rate": 0.0013337562780815523, "loss": 0.178, "step": 40560 }, { "epoch": 0.07191977864836137, "grad_norm": 0.40625, "learning_rate": 0.0013336988015133365, "loss": 0.1644, "step": 40562 }, { "epoch": 0.07192332481367118, "grad_norm": 0.85546875, "learning_rate": 0.001333641323923058, "loss": 0.2205, "step": 40564 }, { "epoch": 0.071926870978981, "grad_norm": 0.353515625, "learning_rate": 0.001333583845310968, "loss": 0.2067, "step": 40566 }, { "epoch": 0.07193041714429081, "grad_norm": 0.703125, "learning_rate": 0.0013335263656773185, "loss": 0.2905, "step": 40568 }, { "epoch": 0.07193396330960063, "grad_norm": 0.30078125, "learning_rate": 0.0013334688850223592, "loss": 0.1828, "step": 40570 }, { "epoch": 0.07193750947491044, "grad_norm": 0.498046875, "learning_rate": 0.0013334114033463436, "loss": 0.2946, "step": 40572 }, { "epoch": 0.07194105564022026, "grad_norm": 0.244140625, "learning_rate": 0.0013333539206495215, "loss": 0.2257, "step": 40574 }, { "epoch": 0.07194460180553007, "grad_norm": 0.90625, "learning_rate": 0.0013332964369321453, "loss": 0.2205, "step": 40576 }, { "epoch": 0.07194814797083988, "grad_norm": 0.341796875, "learning_rate": 0.0013332389521944657, "loss": 0.1987, "step": 40578 }, { "epoch": 0.0719516941361497, "grad_norm": 1.53125, "learning_rate": 0.0013331814664367346, "loss": 0.248, "step": 40580 }, { "epoch": 0.07195524030145951, "grad_norm": 1.859375, "learning_rate": 0.001333123979659203, "loss": 0.3464, "step": 40582 }, { "epoch": 0.07195878646676933, "grad_norm": 0.27734375, "learning_rate": 0.0013330664918621228, "loss": 0.229, "step": 40584 }, { "epoch": 0.07196233263207914, "grad_norm": 1.3515625, "learning_rate": 0.0013330090030457449, "loss": 0.2228, "step": 40586 }, { "epoch": 0.07196587879738896, "grad_norm": 7.3125, "learning_rate": 0.0013329515132103212, "loss": 0.15, "step": 40588 }, { "epoch": 0.07196942496269877, "grad_norm": 2.484375, "learning_rate": 0.0013328940223561027, "loss": 0.1937, "step": 40590 }, { "epoch": 0.07197297112800859, "grad_norm": 0.263671875, "learning_rate": 0.0013328365304833413, "loss": 0.168, "step": 40592 }, { "epoch": 0.0719765172933184, "grad_norm": 0.212890625, "learning_rate": 0.0013327790375922879, "loss": 0.2218, "step": 40594 }, { "epoch": 0.07198006345862822, "grad_norm": 0.2294921875, "learning_rate": 0.0013327215436831944, "loss": 0.1479, "step": 40596 }, { "epoch": 0.07198360962393803, "grad_norm": 0.41796875, "learning_rate": 0.0013326640487563118, "loss": 0.1407, "step": 40598 }, { "epoch": 0.07198715578924784, "grad_norm": 1.109375, "learning_rate": 0.0013326065528118918, "loss": 0.2762, "step": 40600 }, { "epoch": 0.07199070195455766, "grad_norm": 0.8984375, "learning_rate": 0.001332549055850186, "loss": 0.1769, "step": 40602 }, { "epoch": 0.07199424811986747, "grad_norm": 0.220703125, "learning_rate": 0.0013324915578714458, "loss": 0.152, "step": 40604 }, { "epoch": 0.07199779428517729, "grad_norm": 0.41796875, "learning_rate": 0.0013324340588759225, "loss": 0.2479, "step": 40606 }, { "epoch": 0.07200134045048712, "grad_norm": 0.2392578125, "learning_rate": 0.0013323765588638676, "loss": 0.2137, "step": 40608 }, { "epoch": 0.07200488661579693, "grad_norm": 0.291015625, "learning_rate": 0.0013323190578355324, "loss": 0.1459, "step": 40610 }, { "epoch": 0.07200843278110675, "grad_norm": 0.625, "learning_rate": 0.001332261555791169, "loss": 0.1702, "step": 40612 }, { "epoch": 0.07201197894641656, "grad_norm": 0.53515625, "learning_rate": 0.001332204052731028, "loss": 0.1968, "step": 40614 }, { "epoch": 0.07201552511172638, "grad_norm": 0.5234375, "learning_rate": 0.0013321465486553618, "loss": 0.2233, "step": 40616 }, { "epoch": 0.07201907127703619, "grad_norm": 0.3515625, "learning_rate": 0.001332089043564421, "loss": 0.174, "step": 40618 }, { "epoch": 0.072022617442346, "grad_norm": 0.85546875, "learning_rate": 0.001332031537458458, "loss": 0.2079, "step": 40620 }, { "epoch": 0.07202616360765582, "grad_norm": 0.49609375, "learning_rate": 0.0013319740303377237, "loss": 0.1656, "step": 40622 }, { "epoch": 0.07202970977296563, "grad_norm": 5.1875, "learning_rate": 0.0013319165222024697, "loss": 0.4063, "step": 40624 }, { "epoch": 0.07203325593827545, "grad_norm": 0.4375, "learning_rate": 0.0013318590130529475, "loss": 0.1516, "step": 40626 }, { "epoch": 0.07203680210358526, "grad_norm": 0.69921875, "learning_rate": 0.0013318015028894086, "loss": 0.1321, "step": 40628 }, { "epoch": 0.07204034826889508, "grad_norm": 0.6328125, "learning_rate": 0.0013317439917121046, "loss": 0.1993, "step": 40630 }, { "epoch": 0.07204389443420489, "grad_norm": 0.765625, "learning_rate": 0.001331686479521287, "loss": 0.1662, "step": 40632 }, { "epoch": 0.0720474405995147, "grad_norm": 0.609375, "learning_rate": 0.0013316289663172075, "loss": 0.2164, "step": 40634 }, { "epoch": 0.07205098676482452, "grad_norm": 0.298828125, "learning_rate": 0.001331571452100117, "loss": 0.1711, "step": 40636 }, { "epoch": 0.07205453293013434, "grad_norm": 0.296875, "learning_rate": 0.0013315139368702679, "loss": 0.1696, "step": 40638 }, { "epoch": 0.07205807909544415, "grad_norm": 0.255859375, "learning_rate": 0.0013314564206279112, "loss": 0.1381, "step": 40640 }, { "epoch": 0.07206162526075396, "grad_norm": 0.48828125, "learning_rate": 0.0013313989033732988, "loss": 0.4364, "step": 40642 }, { "epoch": 0.07206517142606378, "grad_norm": 0.30078125, "learning_rate": 0.0013313413851066818, "loss": 0.1562, "step": 40644 }, { "epoch": 0.0720687175913736, "grad_norm": 0.318359375, "learning_rate": 0.001331283865828312, "loss": 0.2784, "step": 40646 }, { "epoch": 0.07207226375668341, "grad_norm": 0.365234375, "learning_rate": 0.0013312263455384405, "loss": 0.2154, "step": 40648 }, { "epoch": 0.07207580992199322, "grad_norm": 0.49609375, "learning_rate": 0.0013311688242373197, "loss": 0.1716, "step": 40650 }, { "epoch": 0.07207935608730304, "grad_norm": 0.482421875, "learning_rate": 0.0013311113019252006, "loss": 0.1718, "step": 40652 }, { "epoch": 0.07208290225261287, "grad_norm": 0.59375, "learning_rate": 0.0013310537786023352, "loss": 0.2423, "step": 40654 }, { "epoch": 0.07208644841792268, "grad_norm": 0.38671875, "learning_rate": 0.0013309962542689745, "loss": 0.1469, "step": 40656 }, { "epoch": 0.0720899945832325, "grad_norm": 0.3984375, "learning_rate": 0.0013309387289253702, "loss": 0.1761, "step": 40658 }, { "epoch": 0.07209354074854231, "grad_norm": 0.349609375, "learning_rate": 0.0013308812025717746, "loss": 0.3742, "step": 40660 }, { "epoch": 0.07209708691385212, "grad_norm": 1.265625, "learning_rate": 0.0013308236752084383, "loss": 0.2723, "step": 40662 }, { "epoch": 0.07210063307916194, "grad_norm": 4.46875, "learning_rate": 0.0013307661468356135, "loss": 0.1758, "step": 40664 }, { "epoch": 0.07210417924447175, "grad_norm": 0.50390625, "learning_rate": 0.0013307086174535515, "loss": 0.4247, "step": 40666 }, { "epoch": 0.07210772540978157, "grad_norm": 0.7734375, "learning_rate": 0.0013306510870625039, "loss": 0.1558, "step": 40668 }, { "epoch": 0.07211127157509138, "grad_norm": 1.125, "learning_rate": 0.0013305935556627226, "loss": 0.2917, "step": 40670 }, { "epoch": 0.0721148177404012, "grad_norm": 0.51953125, "learning_rate": 0.001330536023254459, "loss": 0.2518, "step": 40672 }, { "epoch": 0.07211836390571101, "grad_norm": 0.28125, "learning_rate": 0.0013304784898379649, "loss": 0.1432, "step": 40674 }, { "epoch": 0.07212191007102083, "grad_norm": 0.431640625, "learning_rate": 0.0013304209554134915, "loss": 0.2052, "step": 40676 }, { "epoch": 0.07212545623633064, "grad_norm": 0.55859375, "learning_rate": 0.0013303634199812908, "loss": 0.2489, "step": 40678 }, { "epoch": 0.07212900240164045, "grad_norm": 0.5703125, "learning_rate": 0.0013303058835416144, "loss": 0.3614, "step": 40680 }, { "epoch": 0.07213254856695027, "grad_norm": 0.734375, "learning_rate": 0.0013302483460947139, "loss": 0.1621, "step": 40682 }, { "epoch": 0.07213609473226008, "grad_norm": 0.30078125, "learning_rate": 0.0013301908076408407, "loss": 0.1896, "step": 40684 }, { "epoch": 0.0721396408975699, "grad_norm": 0.71875, "learning_rate": 0.0013301332681802467, "loss": 0.289, "step": 40686 }, { "epoch": 0.07214318706287971, "grad_norm": 0.3828125, "learning_rate": 0.0013300757277131833, "loss": 0.1378, "step": 40688 }, { "epoch": 0.07214673322818953, "grad_norm": 0.263671875, "learning_rate": 0.0013300181862399023, "loss": 0.2043, "step": 40690 }, { "epoch": 0.07215027939349934, "grad_norm": 0.6171875, "learning_rate": 0.0013299606437606557, "loss": 0.2068, "step": 40692 }, { "epoch": 0.07215382555880916, "grad_norm": 0.2421875, "learning_rate": 0.0013299031002756948, "loss": 0.1636, "step": 40694 }, { "epoch": 0.07215737172411897, "grad_norm": 0.404296875, "learning_rate": 0.001329845555785271, "loss": 0.1801, "step": 40696 }, { "epoch": 0.0721609178894288, "grad_norm": 0.69921875, "learning_rate": 0.0013297880102896365, "loss": 0.2359, "step": 40698 }, { "epoch": 0.07216446405473861, "grad_norm": 0.375, "learning_rate": 0.0013297304637890424, "loss": 0.1644, "step": 40700 }, { "epoch": 0.07216801022004843, "grad_norm": 0.90234375, "learning_rate": 0.0013296729162837408, "loss": 0.3703, "step": 40702 }, { "epoch": 0.07217155638535824, "grad_norm": 0.26953125, "learning_rate": 0.0013296153677739833, "loss": 0.2579, "step": 40704 }, { "epoch": 0.07217510255066806, "grad_norm": 0.2109375, "learning_rate": 0.001329557818260022, "loss": 0.2181, "step": 40706 }, { "epoch": 0.07217864871597787, "grad_norm": 0.31640625, "learning_rate": 0.0013295002677421073, "loss": 0.2043, "step": 40708 }, { "epoch": 0.07218219488128769, "grad_norm": 0.6328125, "learning_rate": 0.0013294427162204921, "loss": 0.2533, "step": 40710 }, { "epoch": 0.0721857410465975, "grad_norm": 0.265625, "learning_rate": 0.0013293851636954279, "loss": 0.2189, "step": 40712 }, { "epoch": 0.07218928721190732, "grad_norm": 1.2890625, "learning_rate": 0.0013293276101671663, "loss": 0.2353, "step": 40714 }, { "epoch": 0.07219283337721713, "grad_norm": 0.341796875, "learning_rate": 0.001329270055635959, "loss": 0.2074, "step": 40716 }, { "epoch": 0.07219637954252695, "grad_norm": 0.482421875, "learning_rate": 0.0013292125001020572, "loss": 0.1905, "step": 40718 }, { "epoch": 0.07219992570783676, "grad_norm": 0.3984375, "learning_rate": 0.0013291549435657133, "loss": 0.1922, "step": 40720 }, { "epoch": 0.07220347187314657, "grad_norm": 1.203125, "learning_rate": 0.0013290973860271788, "loss": 0.2915, "step": 40722 }, { "epoch": 0.07220701803845639, "grad_norm": 0.66015625, "learning_rate": 0.0013290398274867054, "loss": 0.1177, "step": 40724 }, { "epoch": 0.0722105642037662, "grad_norm": 0.306640625, "learning_rate": 0.0013289822679445448, "loss": 0.2329, "step": 40726 }, { "epoch": 0.07221411036907602, "grad_norm": 0.333984375, "learning_rate": 0.001328924707400949, "loss": 0.1412, "step": 40728 }, { "epoch": 0.07221765653438583, "grad_norm": 0.474609375, "learning_rate": 0.0013288671458561692, "loss": 0.2341, "step": 40730 }, { "epoch": 0.07222120269969565, "grad_norm": 1.703125, "learning_rate": 0.0013288095833104577, "loss": 0.2184, "step": 40732 }, { "epoch": 0.07222474886500546, "grad_norm": 0.369140625, "learning_rate": 0.001328752019764066, "loss": 0.1927, "step": 40734 }, { "epoch": 0.07222829503031528, "grad_norm": 0.287109375, "learning_rate": 0.0013286944552172457, "loss": 0.164, "step": 40736 }, { "epoch": 0.07223184119562509, "grad_norm": 0.326171875, "learning_rate": 0.0013286368896702485, "loss": 0.1282, "step": 40738 }, { "epoch": 0.0722353873609349, "grad_norm": 1.0234375, "learning_rate": 0.0013285793231233266, "loss": 0.2402, "step": 40740 }, { "epoch": 0.07223893352624472, "grad_norm": 5.5625, "learning_rate": 0.0013285217555767314, "loss": 0.4325, "step": 40742 }, { "epoch": 0.07224247969155455, "grad_norm": 0.3984375, "learning_rate": 0.0013284641870307148, "loss": 0.1414, "step": 40744 }, { "epoch": 0.07224602585686436, "grad_norm": 0.4765625, "learning_rate": 0.0013284066174855287, "loss": 0.2032, "step": 40746 }, { "epoch": 0.07224957202217418, "grad_norm": 0.36328125, "learning_rate": 0.0013283490469414249, "loss": 0.1789, "step": 40748 }, { "epoch": 0.07225311818748399, "grad_norm": 0.36328125, "learning_rate": 0.0013282914753986544, "loss": 0.1865, "step": 40750 }, { "epoch": 0.0722566643527938, "grad_norm": 0.40625, "learning_rate": 0.0013282339028574698, "loss": 0.2234, "step": 40752 }, { "epoch": 0.07226021051810362, "grad_norm": 0.6484375, "learning_rate": 0.001328176329318123, "loss": 0.1767, "step": 40754 }, { "epoch": 0.07226375668341344, "grad_norm": 0.59765625, "learning_rate": 0.0013281187547808655, "loss": 0.2269, "step": 40756 }, { "epoch": 0.07226730284872325, "grad_norm": 0.412109375, "learning_rate": 0.0013280611792459486, "loss": 0.2028, "step": 40758 }, { "epoch": 0.07227084901403306, "grad_norm": 0.326171875, "learning_rate": 0.001328003602713625, "loss": 0.1692, "step": 40760 }, { "epoch": 0.07227439517934288, "grad_norm": 0.7421875, "learning_rate": 0.0013279460251841458, "loss": 0.1991, "step": 40762 }, { "epoch": 0.0722779413446527, "grad_norm": 0.44140625, "learning_rate": 0.0013278884466577635, "loss": 0.1581, "step": 40764 }, { "epoch": 0.07228148750996251, "grad_norm": 0.48828125, "learning_rate": 0.001327830867134729, "loss": 0.2097, "step": 40766 }, { "epoch": 0.07228503367527232, "grad_norm": 0.703125, "learning_rate": 0.001327773286615295, "loss": 0.2232, "step": 40768 }, { "epoch": 0.07228857984058214, "grad_norm": 3.0, "learning_rate": 0.0013277157050997125, "loss": 0.2982, "step": 40770 }, { "epoch": 0.07229212600589195, "grad_norm": 0.45703125, "learning_rate": 0.0013276581225882343, "loss": 0.1571, "step": 40772 }, { "epoch": 0.07229567217120177, "grad_norm": 0.2216796875, "learning_rate": 0.0013276005390811115, "loss": 0.1392, "step": 40774 }, { "epoch": 0.07229921833651158, "grad_norm": 0.9375, "learning_rate": 0.0013275429545785962, "loss": 0.2031, "step": 40776 }, { "epoch": 0.0723027645018214, "grad_norm": 0.5234375, "learning_rate": 0.00132748536908094, "loss": 0.2434, "step": 40778 }, { "epoch": 0.07230631066713121, "grad_norm": 1.03125, "learning_rate": 0.001327427782588395, "loss": 0.1717, "step": 40780 }, { "epoch": 0.07230985683244102, "grad_norm": 0.462890625, "learning_rate": 0.0013273701951012132, "loss": 0.1701, "step": 40782 }, { "epoch": 0.07231340299775084, "grad_norm": 0.67578125, "learning_rate": 0.0013273126066196462, "loss": 0.1955, "step": 40784 }, { "epoch": 0.07231694916306065, "grad_norm": 1.3984375, "learning_rate": 0.0013272550171439454, "loss": 0.2336, "step": 40786 }, { "epoch": 0.07232049532837047, "grad_norm": 0.6015625, "learning_rate": 0.0013271974266743638, "loss": 0.2082, "step": 40788 }, { "epoch": 0.0723240414936803, "grad_norm": 0.384765625, "learning_rate": 0.0013271398352111526, "loss": 0.1795, "step": 40790 }, { "epoch": 0.07232758765899011, "grad_norm": 1.8984375, "learning_rate": 0.001327082242754563, "loss": 0.1634, "step": 40792 }, { "epoch": 0.07233113382429993, "grad_norm": 0.271484375, "learning_rate": 0.0013270246493048484, "loss": 0.1806, "step": 40794 }, { "epoch": 0.07233467998960974, "grad_norm": 0.2119140625, "learning_rate": 0.0013269670548622591, "loss": 0.2244, "step": 40796 }, { "epoch": 0.07233822615491955, "grad_norm": 1.046875, "learning_rate": 0.0013269094594270485, "loss": 0.2348, "step": 40798 }, { "epoch": 0.07234177232022937, "grad_norm": 0.439453125, "learning_rate": 0.0013268518629994672, "loss": 0.2088, "step": 40800 }, { "epoch": 0.07234531848553918, "grad_norm": 0.306640625, "learning_rate": 0.001326794265579768, "loss": 0.1745, "step": 40802 }, { "epoch": 0.072348864650849, "grad_norm": 0.40625, "learning_rate": 0.0013267366671682022, "loss": 0.1843, "step": 40804 }, { "epoch": 0.07235241081615881, "grad_norm": 0.326171875, "learning_rate": 0.0013266790677650218, "loss": 0.1851, "step": 40806 }, { "epoch": 0.07235595698146863, "grad_norm": 1.640625, "learning_rate": 0.001326621467370479, "loss": 0.3173, "step": 40808 }, { "epoch": 0.07235950314677844, "grad_norm": 0.55859375, "learning_rate": 0.0013265638659848257, "loss": 0.1823, "step": 40810 }, { "epoch": 0.07236304931208826, "grad_norm": 0.32421875, "learning_rate": 0.0013265062636083132, "loss": 0.167, "step": 40812 }, { "epoch": 0.07236659547739807, "grad_norm": 0.34765625, "learning_rate": 0.0013264486602411942, "loss": 0.1839, "step": 40814 }, { "epoch": 0.07237014164270789, "grad_norm": 1.4609375, "learning_rate": 0.0013263910558837203, "loss": 0.4663, "step": 40816 }, { "epoch": 0.0723736878080177, "grad_norm": 0.61328125, "learning_rate": 0.0013263334505361434, "loss": 0.2036, "step": 40818 }, { "epoch": 0.07237723397332751, "grad_norm": 0.2578125, "learning_rate": 0.0013262758441987153, "loss": 0.1635, "step": 40820 }, { "epoch": 0.07238078013863733, "grad_norm": 0.359375, "learning_rate": 0.0013262182368716882, "loss": 0.1764, "step": 40822 }, { "epoch": 0.07238432630394714, "grad_norm": 0.57421875, "learning_rate": 0.0013261606285553138, "loss": 0.1915, "step": 40824 }, { "epoch": 0.07238787246925696, "grad_norm": 1.671875, "learning_rate": 0.0013261030192498447, "loss": 0.186, "step": 40826 }, { "epoch": 0.07239141863456677, "grad_norm": 0.267578125, "learning_rate": 0.0013260454089555319, "loss": 0.1712, "step": 40828 }, { "epoch": 0.07239496479987659, "grad_norm": 0.23828125, "learning_rate": 0.0013259877976726279, "loss": 0.2373, "step": 40830 }, { "epoch": 0.0723985109651864, "grad_norm": 0.74609375, "learning_rate": 0.0013259301854013843, "loss": 0.1912, "step": 40832 }, { "epoch": 0.07240205713049623, "grad_norm": 0.494140625, "learning_rate": 0.0013258725721420534, "loss": 0.21, "step": 40834 }, { "epoch": 0.07240560329580605, "grad_norm": 0.51171875, "learning_rate": 0.0013258149578948874, "loss": 0.1984, "step": 40836 }, { "epoch": 0.07240914946111586, "grad_norm": 0.34375, "learning_rate": 0.0013257573426601378, "loss": 0.2686, "step": 40838 }, { "epoch": 0.07241269562642567, "grad_norm": 0.234375, "learning_rate": 0.0013256997264380565, "loss": 0.2149, "step": 40840 }, { "epoch": 0.07241624179173549, "grad_norm": 0.41015625, "learning_rate": 0.0013256421092288958, "loss": 0.1508, "step": 40842 }, { "epoch": 0.0724197879570453, "grad_norm": 0.357421875, "learning_rate": 0.0013255844910329074, "loss": 0.1665, "step": 40844 }, { "epoch": 0.07242333412235512, "grad_norm": 0.51953125, "learning_rate": 0.0013255268718503437, "loss": 0.1642, "step": 40846 }, { "epoch": 0.07242688028766493, "grad_norm": 0.5078125, "learning_rate": 0.0013254692516814566, "loss": 0.1885, "step": 40848 }, { "epoch": 0.07243042645297475, "grad_norm": 0.275390625, "learning_rate": 0.0013254116305264978, "loss": 0.1656, "step": 40850 }, { "epoch": 0.07243397261828456, "grad_norm": 0.275390625, "learning_rate": 0.0013253540083857191, "loss": 0.1757, "step": 40852 }, { "epoch": 0.07243751878359438, "grad_norm": 0.55859375, "learning_rate": 0.0013252963852593736, "loss": 0.1373, "step": 40854 }, { "epoch": 0.07244106494890419, "grad_norm": 0.33203125, "learning_rate": 0.0013252387611477122, "loss": 0.1654, "step": 40856 }, { "epoch": 0.072444611114214, "grad_norm": 0.294921875, "learning_rate": 0.0013251811360509872, "loss": 0.392, "step": 40858 }, { "epoch": 0.07244815727952382, "grad_norm": 1.71875, "learning_rate": 0.0013251235099694507, "loss": 0.1904, "step": 40860 }, { "epoch": 0.07245170344483363, "grad_norm": 0.205078125, "learning_rate": 0.0013250658829033549, "loss": 0.1631, "step": 40862 }, { "epoch": 0.07245524961014345, "grad_norm": 0.94921875, "learning_rate": 0.0013250082548529516, "loss": 0.2241, "step": 40864 }, { "epoch": 0.07245879577545326, "grad_norm": 1.78125, "learning_rate": 0.0013249506258184928, "loss": 0.1949, "step": 40866 }, { "epoch": 0.07246234194076308, "grad_norm": 0.5390625, "learning_rate": 0.001324892995800231, "loss": 0.1971, "step": 40868 }, { "epoch": 0.07246588810607289, "grad_norm": 0.23046875, "learning_rate": 0.0013248353647984177, "loss": 0.2473, "step": 40870 }, { "epoch": 0.07246943427138271, "grad_norm": 0.65625, "learning_rate": 0.001324777732813305, "loss": 0.166, "step": 40872 }, { "epoch": 0.07247298043669252, "grad_norm": 0.283203125, "learning_rate": 0.0013247200998451449, "loss": 0.1646, "step": 40874 }, { "epoch": 0.07247652660200234, "grad_norm": 0.65234375, "learning_rate": 0.0013246624658941899, "loss": 0.2155, "step": 40876 }, { "epoch": 0.07248007276731215, "grad_norm": 0.69921875, "learning_rate": 0.0013246048309606916, "loss": 0.1975, "step": 40878 }, { "epoch": 0.07248361893262198, "grad_norm": 1.609375, "learning_rate": 0.0013245471950449022, "loss": 0.3094, "step": 40880 }, { "epoch": 0.0724871650979318, "grad_norm": 0.57421875, "learning_rate": 0.001324489558147074, "loss": 0.2094, "step": 40882 }, { "epoch": 0.07249071126324161, "grad_norm": 0.2353515625, "learning_rate": 0.0013244319202674589, "loss": 0.2338, "step": 40884 }, { "epoch": 0.07249425742855142, "grad_norm": 0.66796875, "learning_rate": 0.0013243742814063088, "loss": 0.1885, "step": 40886 }, { "epoch": 0.07249780359386124, "grad_norm": 0.2890625, "learning_rate": 0.0013243166415638759, "loss": 0.2242, "step": 40888 }, { "epoch": 0.07250134975917105, "grad_norm": 1.125, "learning_rate": 0.001324259000740412, "loss": 0.272, "step": 40890 }, { "epoch": 0.07250489592448087, "grad_norm": 1.640625, "learning_rate": 0.0013242013589361702, "loss": 0.3084, "step": 40892 }, { "epoch": 0.07250844208979068, "grad_norm": 0.2138671875, "learning_rate": 0.0013241437161514015, "loss": 0.2045, "step": 40894 }, { "epoch": 0.0725119882551005, "grad_norm": 0.39453125, "learning_rate": 0.0013240860723863583, "loss": 0.2116, "step": 40896 }, { "epoch": 0.07251553442041031, "grad_norm": 0.6796875, "learning_rate": 0.001324028427641293, "loss": 0.1809, "step": 40898 }, { "epoch": 0.07251908058572012, "grad_norm": 0.5859375, "learning_rate": 0.0013239707819164572, "loss": 0.2194, "step": 40900 }, { "epoch": 0.07252262675102994, "grad_norm": 0.87890625, "learning_rate": 0.0013239131352121036, "loss": 0.2141, "step": 40902 }, { "epoch": 0.07252617291633975, "grad_norm": 0.8515625, "learning_rate": 0.001323855487528484, "loss": 0.2051, "step": 40904 }, { "epoch": 0.07252971908164957, "grad_norm": 0.40234375, "learning_rate": 0.0013237978388658505, "loss": 0.2212, "step": 40906 }, { "epoch": 0.07253326524695938, "grad_norm": 0.43359375, "learning_rate": 0.0013237401892244552, "loss": 0.2414, "step": 40908 }, { "epoch": 0.0725368114122692, "grad_norm": 0.9921875, "learning_rate": 0.0013236825386045501, "loss": 0.2058, "step": 40910 }, { "epoch": 0.07254035757757901, "grad_norm": 0.353515625, "learning_rate": 0.0013236248870063879, "loss": 0.1986, "step": 40912 }, { "epoch": 0.07254390374288883, "grad_norm": 1.0703125, "learning_rate": 0.00132356723443022, "loss": 0.2091, "step": 40914 }, { "epoch": 0.07254744990819864, "grad_norm": 0.36328125, "learning_rate": 0.001323509580876299, "loss": 0.1487, "step": 40916 }, { "epoch": 0.07255099607350846, "grad_norm": 1.1328125, "learning_rate": 0.0013234519263448767, "loss": 0.398, "step": 40918 }, { "epoch": 0.07255454223881827, "grad_norm": 0.291015625, "learning_rate": 0.0013233942708362058, "loss": 0.17, "step": 40920 }, { "epoch": 0.07255808840412808, "grad_norm": 0.5, "learning_rate": 0.001323336614350538, "loss": 0.1947, "step": 40922 }, { "epoch": 0.0725616345694379, "grad_norm": 0.40234375, "learning_rate": 0.0013232789568881256, "loss": 0.1682, "step": 40924 }, { "epoch": 0.07256518073474773, "grad_norm": 2.140625, "learning_rate": 0.0013232212984492207, "loss": 0.2488, "step": 40926 }, { "epoch": 0.07256872690005754, "grad_norm": 1.6328125, "learning_rate": 0.0013231636390340755, "loss": 0.2694, "step": 40928 }, { "epoch": 0.07257227306536736, "grad_norm": 0.57421875, "learning_rate": 0.0013231059786429423, "loss": 0.1879, "step": 40930 }, { "epoch": 0.07257581923067717, "grad_norm": 0.2041015625, "learning_rate": 0.001323048317276073, "loss": 0.2041, "step": 40932 }, { "epoch": 0.07257936539598699, "grad_norm": 0.427734375, "learning_rate": 0.00132299065493372, "loss": 0.216, "step": 40934 }, { "epoch": 0.0725829115612968, "grad_norm": 3.703125, "learning_rate": 0.0013229329916161353, "loss": 0.2242, "step": 40936 }, { "epoch": 0.07258645772660662, "grad_norm": 0.53515625, "learning_rate": 0.0013228753273235713, "loss": 0.2047, "step": 40938 }, { "epoch": 0.07259000389191643, "grad_norm": 0.78125, "learning_rate": 0.00132281766205628, "loss": 0.1518, "step": 40940 }, { "epoch": 0.07259355005722624, "grad_norm": 0.36328125, "learning_rate": 0.0013227599958145137, "loss": 0.1452, "step": 40942 }, { "epoch": 0.07259709622253606, "grad_norm": 0.404296875, "learning_rate": 0.0013227023285985247, "loss": 0.1888, "step": 40944 }, { "epoch": 0.07260064238784587, "grad_norm": 0.95703125, "learning_rate": 0.0013226446604085648, "loss": 0.256, "step": 40946 }, { "epoch": 0.07260418855315569, "grad_norm": 0.8125, "learning_rate": 0.0013225869912448867, "loss": 0.1763, "step": 40948 }, { "epoch": 0.0726077347184655, "grad_norm": 0.462890625, "learning_rate": 0.0013225293211077422, "loss": 0.1634, "step": 40950 }, { "epoch": 0.07261128088377532, "grad_norm": 0.240234375, "learning_rate": 0.0013224716499973838, "loss": 0.1794, "step": 40952 }, { "epoch": 0.07261482704908513, "grad_norm": 0.71484375, "learning_rate": 0.001322413977914064, "loss": 0.2006, "step": 40954 }, { "epoch": 0.07261837321439495, "grad_norm": 0.427734375, "learning_rate": 0.001322356304858034, "loss": 0.2308, "step": 40956 }, { "epoch": 0.07262191937970476, "grad_norm": 1.015625, "learning_rate": 0.0013222986308295472, "loss": 0.2353, "step": 40958 }, { "epoch": 0.07262546554501458, "grad_norm": 0.90625, "learning_rate": 0.0013222409558288552, "loss": 0.2724, "step": 40960 }, { "epoch": 0.07262901171032439, "grad_norm": 0.42578125, "learning_rate": 0.00132218327985621, "loss": 0.1991, "step": 40962 }, { "epoch": 0.0726325578756342, "grad_norm": 0.212890625, "learning_rate": 0.0013221256029118647, "loss": 0.1672, "step": 40964 }, { "epoch": 0.07263610404094402, "grad_norm": 0.189453125, "learning_rate": 0.0013220679249960707, "loss": 0.1702, "step": 40966 }, { "epoch": 0.07263965020625383, "grad_norm": 0.2353515625, "learning_rate": 0.0013220102461090808, "loss": 0.1769, "step": 40968 }, { "epoch": 0.07264319637156365, "grad_norm": 0.39453125, "learning_rate": 0.0013219525662511468, "loss": 0.2057, "step": 40970 }, { "epoch": 0.07264674253687348, "grad_norm": 0.9765625, "learning_rate": 0.0013218948854225214, "loss": 0.2164, "step": 40972 }, { "epoch": 0.07265028870218329, "grad_norm": 0.8515625, "learning_rate": 0.0013218372036234564, "loss": 0.2061, "step": 40974 }, { "epoch": 0.0726538348674931, "grad_norm": 0.55078125, "learning_rate": 0.001321779520854205, "loss": 0.165, "step": 40976 }, { "epoch": 0.07265738103280292, "grad_norm": 0.244140625, "learning_rate": 0.0013217218371150182, "loss": 0.1627, "step": 40978 }, { "epoch": 0.07266092719811273, "grad_norm": 3.46875, "learning_rate": 0.0013216641524061489, "loss": 0.2285, "step": 40980 }, { "epoch": 0.07266447336342255, "grad_norm": 0.36328125, "learning_rate": 0.0013216064667278496, "loss": 0.1458, "step": 40982 }, { "epoch": 0.07266801952873236, "grad_norm": 0.6953125, "learning_rate": 0.0013215487800803721, "loss": 0.2164, "step": 40984 }, { "epoch": 0.07267156569404218, "grad_norm": 0.6171875, "learning_rate": 0.0013214910924639691, "loss": 0.1336, "step": 40986 }, { "epoch": 0.07267511185935199, "grad_norm": 0.2470703125, "learning_rate": 0.0013214334038788927, "loss": 0.176, "step": 40988 }, { "epoch": 0.07267865802466181, "grad_norm": 1.078125, "learning_rate": 0.0013213757143253951, "loss": 0.145, "step": 40990 }, { "epoch": 0.07268220418997162, "grad_norm": 1.125, "learning_rate": 0.0013213180238037287, "loss": 0.2553, "step": 40992 }, { "epoch": 0.07268575035528144, "grad_norm": 0.86328125, "learning_rate": 0.001321260332314146, "loss": 0.2065, "step": 40994 }, { "epoch": 0.07268929652059125, "grad_norm": 0.318359375, "learning_rate": 0.001321202639856899, "loss": 0.1321, "step": 40996 }, { "epoch": 0.07269284268590107, "grad_norm": 2.0, "learning_rate": 0.0013211449464322403, "loss": 0.2983, "step": 40998 }, { "epoch": 0.07269638885121088, "grad_norm": 0.53515625, "learning_rate": 0.0013210872520404218, "loss": 0.2136, "step": 41000 }, { "epoch": 0.0726999350165207, "grad_norm": 1.265625, "learning_rate": 0.0013210295566816964, "loss": 0.2561, "step": 41002 }, { "epoch": 0.07270348118183051, "grad_norm": 0.89453125, "learning_rate": 0.0013209718603563157, "loss": 0.2371, "step": 41004 }, { "epoch": 0.07270702734714032, "grad_norm": 0.91796875, "learning_rate": 0.0013209141630645327, "loss": 0.4018, "step": 41006 }, { "epoch": 0.07271057351245014, "grad_norm": 0.2138671875, "learning_rate": 0.0013208564648065993, "loss": 0.1566, "step": 41008 }, { "epoch": 0.07271411967775995, "grad_norm": 0.75, "learning_rate": 0.0013207987655827682, "loss": 0.221, "step": 41010 }, { "epoch": 0.07271766584306977, "grad_norm": 0.2421875, "learning_rate": 0.0013207410653932913, "loss": 0.1432, "step": 41012 }, { "epoch": 0.07272121200837958, "grad_norm": 0.58984375, "learning_rate": 0.0013206833642384216, "loss": 0.2617, "step": 41014 }, { "epoch": 0.07272475817368941, "grad_norm": 0.357421875, "learning_rate": 0.0013206256621184108, "loss": 0.161, "step": 41016 }, { "epoch": 0.07272830433899923, "grad_norm": 0.92578125, "learning_rate": 0.0013205679590335118, "loss": 0.2427, "step": 41018 }, { "epoch": 0.07273185050430904, "grad_norm": 0.578125, "learning_rate": 0.0013205102549839762, "loss": 0.1577, "step": 41020 }, { "epoch": 0.07273539666961885, "grad_norm": 0.46484375, "learning_rate": 0.0013204525499700573, "loss": 0.1241, "step": 41022 }, { "epoch": 0.07273894283492867, "grad_norm": 0.5859375, "learning_rate": 0.0013203948439920066, "loss": 0.2029, "step": 41024 }, { "epoch": 0.07274248900023848, "grad_norm": 1.0234375, "learning_rate": 0.0013203371370500772, "loss": 0.1665, "step": 41026 }, { "epoch": 0.0727460351655483, "grad_norm": 0.59765625, "learning_rate": 0.0013202794291445209, "loss": 0.1667, "step": 41028 }, { "epoch": 0.07274958133085811, "grad_norm": 0.208984375, "learning_rate": 0.0013202217202755904, "loss": 0.1605, "step": 41030 }, { "epoch": 0.07275312749616793, "grad_norm": 0.162109375, "learning_rate": 0.001320164010443538, "loss": 0.2901, "step": 41032 }, { "epoch": 0.07275667366147774, "grad_norm": 0.546875, "learning_rate": 0.0013201062996486162, "loss": 0.1899, "step": 41034 }, { "epoch": 0.07276021982678756, "grad_norm": 0.44921875, "learning_rate": 0.0013200485878910773, "loss": 0.1849, "step": 41036 }, { "epoch": 0.07276376599209737, "grad_norm": 0.25390625, "learning_rate": 0.0013199908751711739, "loss": 0.1961, "step": 41038 }, { "epoch": 0.07276731215740719, "grad_norm": 0.2275390625, "learning_rate": 0.0013199331614891577, "loss": 0.1398, "step": 41040 }, { "epoch": 0.072770858322717, "grad_norm": 0.287109375, "learning_rate": 0.0013198754468452818, "loss": 0.1614, "step": 41042 }, { "epoch": 0.07277440448802681, "grad_norm": 0.55859375, "learning_rate": 0.0013198177312397984, "loss": 0.2253, "step": 41044 }, { "epoch": 0.07277795065333663, "grad_norm": 0.2001953125, "learning_rate": 0.0013197600146729598, "loss": 0.4288, "step": 41046 }, { "epoch": 0.07278149681864644, "grad_norm": 0.515625, "learning_rate": 0.0013197022971450187, "loss": 0.2431, "step": 41048 }, { "epoch": 0.07278504298395626, "grad_norm": 0.490234375, "learning_rate": 0.0013196445786562276, "loss": 0.1883, "step": 41050 }, { "epoch": 0.07278858914926607, "grad_norm": 0.76171875, "learning_rate": 0.0013195868592068385, "loss": 0.3122, "step": 41052 }, { "epoch": 0.07279213531457589, "grad_norm": 0.9765625, "learning_rate": 0.001319529138797104, "loss": 0.1774, "step": 41054 }, { "epoch": 0.0727956814798857, "grad_norm": 0.55859375, "learning_rate": 0.0013194714174272766, "loss": 0.1905, "step": 41056 }, { "epoch": 0.07279922764519552, "grad_norm": 0.703125, "learning_rate": 0.0013194136950976086, "loss": 0.158, "step": 41058 }, { "epoch": 0.07280277381050533, "grad_norm": 1.015625, "learning_rate": 0.0013193559718083527, "loss": 0.3494, "step": 41060 }, { "epoch": 0.07280631997581516, "grad_norm": 1.8671875, "learning_rate": 0.001319298247559761, "loss": 0.2368, "step": 41062 }, { "epoch": 0.07280986614112497, "grad_norm": 0.57421875, "learning_rate": 0.0013192405223520865, "loss": 0.2575, "step": 41064 }, { "epoch": 0.07281341230643479, "grad_norm": 1.5234375, "learning_rate": 0.001319182796185581, "loss": 0.3297, "step": 41066 }, { "epoch": 0.0728169584717446, "grad_norm": 0.279296875, "learning_rate": 0.0013191250690604975, "loss": 0.1404, "step": 41068 }, { "epoch": 0.07282050463705442, "grad_norm": 3.71875, "learning_rate": 0.001319067340977088, "loss": 0.3984, "step": 41070 }, { "epoch": 0.07282405080236423, "grad_norm": 0.80859375, "learning_rate": 0.0013190096119356054, "loss": 0.2398, "step": 41072 }, { "epoch": 0.07282759696767405, "grad_norm": 0.6015625, "learning_rate": 0.0013189518819363018, "loss": 0.3268, "step": 41074 }, { "epoch": 0.07283114313298386, "grad_norm": 4.3125, "learning_rate": 0.0013188941509794298, "loss": 0.3831, "step": 41076 }, { "epoch": 0.07283468929829368, "grad_norm": 0.5703125, "learning_rate": 0.0013188364190652422, "loss": 0.2231, "step": 41078 }, { "epoch": 0.07283823546360349, "grad_norm": 0.7265625, "learning_rate": 0.001318778686193991, "loss": 0.2163, "step": 41080 }, { "epoch": 0.0728417816289133, "grad_norm": 0.2119140625, "learning_rate": 0.001318720952365929, "loss": 0.1993, "step": 41082 }, { "epoch": 0.07284532779422312, "grad_norm": 0.380859375, "learning_rate": 0.0013186632175813087, "loss": 0.177, "step": 41084 }, { "epoch": 0.07284887395953293, "grad_norm": 1.9921875, "learning_rate": 0.001318605481840382, "loss": 0.2076, "step": 41086 }, { "epoch": 0.07285242012484275, "grad_norm": 1.0625, "learning_rate": 0.0013185477451434025, "loss": 0.2971, "step": 41088 }, { "epoch": 0.07285596629015256, "grad_norm": 0.25390625, "learning_rate": 0.001318490007490622, "loss": 0.3296, "step": 41090 }, { "epoch": 0.07285951245546238, "grad_norm": 1.375, "learning_rate": 0.001318432268882293, "loss": 0.2084, "step": 41092 }, { "epoch": 0.07286305862077219, "grad_norm": 0.302734375, "learning_rate": 0.0013183745293186679, "loss": 0.1381, "step": 41094 }, { "epoch": 0.072866604786082, "grad_norm": 0.28515625, "learning_rate": 0.0013183167887999999, "loss": 0.2011, "step": 41096 }, { "epoch": 0.07287015095139182, "grad_norm": 0.5625, "learning_rate": 0.001318259047326541, "loss": 0.1627, "step": 41098 }, { "epoch": 0.07287369711670164, "grad_norm": 0.4296875, "learning_rate": 0.0013182013048985437, "loss": 0.142, "step": 41100 }, { "epoch": 0.07287724328201145, "grad_norm": 0.51171875, "learning_rate": 0.0013181435615162603, "loss": 0.2254, "step": 41102 }, { "epoch": 0.07288078944732126, "grad_norm": 0.44140625, "learning_rate": 0.001318085817179944, "loss": 0.1791, "step": 41104 }, { "epoch": 0.07288433561263108, "grad_norm": 0.31640625, "learning_rate": 0.001318028071889847, "loss": 0.2108, "step": 41106 }, { "epoch": 0.07288788177794091, "grad_norm": 1.25, "learning_rate": 0.0013179703256462221, "loss": 0.2689, "step": 41108 }, { "epoch": 0.07289142794325072, "grad_norm": 3.125, "learning_rate": 0.0013179125784493213, "loss": 0.2566, "step": 41110 }, { "epoch": 0.07289497410856054, "grad_norm": 0.330078125, "learning_rate": 0.0013178548302993978, "loss": 0.1727, "step": 41112 }, { "epoch": 0.07289852027387035, "grad_norm": 0.48046875, "learning_rate": 0.0013177970811967033, "loss": 0.1442, "step": 41114 }, { "epoch": 0.07290206643918017, "grad_norm": 0.53515625, "learning_rate": 0.0013177393311414912, "loss": 0.2195, "step": 41116 }, { "epoch": 0.07290561260448998, "grad_norm": 1.1640625, "learning_rate": 0.0013176815801340135, "loss": 0.4977, "step": 41118 }, { "epoch": 0.0729091587697998, "grad_norm": 0.58203125, "learning_rate": 0.0013176238281745235, "loss": 0.2233, "step": 41120 }, { "epoch": 0.07291270493510961, "grad_norm": 0.375, "learning_rate": 0.001317566075263273, "loss": 0.1613, "step": 41122 }, { "epoch": 0.07291625110041942, "grad_norm": 0.83984375, "learning_rate": 0.0013175083214005147, "loss": 0.2001, "step": 41124 }, { "epoch": 0.07291979726572924, "grad_norm": 0.72265625, "learning_rate": 0.0013174505665865016, "loss": 0.1667, "step": 41126 }, { "epoch": 0.07292334343103905, "grad_norm": 0.330078125, "learning_rate": 0.0013173928108214858, "loss": 0.1293, "step": 41128 }, { "epoch": 0.07292688959634887, "grad_norm": 1.2734375, "learning_rate": 0.0013173350541057205, "loss": 0.2438, "step": 41130 }, { "epoch": 0.07293043576165868, "grad_norm": 0.376953125, "learning_rate": 0.0013172772964394573, "loss": 0.1818, "step": 41132 }, { "epoch": 0.0729339819269685, "grad_norm": 0.5, "learning_rate": 0.00131721953782295, "loss": 0.1736, "step": 41134 }, { "epoch": 0.07293752809227831, "grad_norm": 1.1484375, "learning_rate": 0.0013171617782564502, "loss": 0.174, "step": 41136 }, { "epoch": 0.07294107425758813, "grad_norm": 0.4296875, "learning_rate": 0.0013171040177402114, "loss": 0.1552, "step": 41138 }, { "epoch": 0.07294462042289794, "grad_norm": 0.267578125, "learning_rate": 0.0013170462562744855, "loss": 0.1686, "step": 41140 }, { "epoch": 0.07294816658820776, "grad_norm": 0.25390625, "learning_rate": 0.0013169884938595252, "loss": 0.1776, "step": 41142 }, { "epoch": 0.07295171275351757, "grad_norm": 0.45703125, "learning_rate": 0.0013169307304955833, "loss": 0.1464, "step": 41144 }, { "epoch": 0.07295525891882738, "grad_norm": 1.484375, "learning_rate": 0.0013168729661829127, "loss": 0.1942, "step": 41146 }, { "epoch": 0.0729588050841372, "grad_norm": 0.46875, "learning_rate": 0.0013168152009217654, "loss": 0.1812, "step": 41148 }, { "epoch": 0.07296235124944701, "grad_norm": 0.298828125, "learning_rate": 0.0013167574347123946, "loss": 0.161, "step": 41150 }, { "epoch": 0.07296589741475684, "grad_norm": 0.71484375, "learning_rate": 0.0013166996675550522, "loss": 0.215, "step": 41152 }, { "epoch": 0.07296944358006666, "grad_norm": 1.4140625, "learning_rate": 0.0013166418994499918, "loss": 0.1688, "step": 41154 }, { "epoch": 0.07297298974537647, "grad_norm": 0.76953125, "learning_rate": 0.0013165841303974654, "loss": 0.1668, "step": 41156 }, { "epoch": 0.07297653591068629, "grad_norm": 0.265625, "learning_rate": 0.001316526360397726, "loss": 0.2041, "step": 41158 }, { "epoch": 0.0729800820759961, "grad_norm": 1.1640625, "learning_rate": 0.0013164685894510259, "loss": 0.2378, "step": 41160 }, { "epoch": 0.07298362824130591, "grad_norm": 0.7734375, "learning_rate": 0.0013164108175576178, "loss": 0.1961, "step": 41162 }, { "epoch": 0.07298717440661573, "grad_norm": 1.171875, "learning_rate": 0.0013163530447177548, "loss": 0.1275, "step": 41164 }, { "epoch": 0.07299072057192554, "grad_norm": 0.71484375, "learning_rate": 0.001316295270931689, "loss": 0.2371, "step": 41166 }, { "epoch": 0.07299426673723536, "grad_norm": 0.30859375, "learning_rate": 0.0013162374961996734, "loss": 0.1293, "step": 41168 }, { "epoch": 0.07299781290254517, "grad_norm": 0.39453125, "learning_rate": 0.0013161797205219607, "loss": 0.1908, "step": 41170 }, { "epoch": 0.07300135906785499, "grad_norm": 0.37109375, "learning_rate": 0.0013161219438988033, "loss": 0.1988, "step": 41172 }, { "epoch": 0.0730049052331648, "grad_norm": 0.43359375, "learning_rate": 0.0013160641663304542, "loss": 0.183, "step": 41174 }, { "epoch": 0.07300845139847462, "grad_norm": 0.453125, "learning_rate": 0.0013160063878171657, "loss": 0.1932, "step": 41176 }, { "epoch": 0.07301199756378443, "grad_norm": 0.484375, "learning_rate": 0.001315948608359191, "loss": 0.1775, "step": 41178 }, { "epoch": 0.07301554372909425, "grad_norm": 0.6328125, "learning_rate": 0.0013158908279567825, "loss": 0.189, "step": 41180 }, { "epoch": 0.07301908989440406, "grad_norm": 0.45703125, "learning_rate": 0.001315833046610193, "loss": 0.2757, "step": 41182 }, { "epoch": 0.07302263605971387, "grad_norm": 0.54296875, "learning_rate": 0.001315775264319675, "loss": 0.2798, "step": 41184 }, { "epoch": 0.07302618222502369, "grad_norm": 0.41796875, "learning_rate": 0.0013157174810854814, "loss": 0.1842, "step": 41186 }, { "epoch": 0.0730297283903335, "grad_norm": 0.8203125, "learning_rate": 0.0013156596969078646, "loss": 0.1971, "step": 41188 }, { "epoch": 0.07303327455564332, "grad_norm": 0.546875, "learning_rate": 0.001315601911787078, "loss": 0.1729, "step": 41190 }, { "epoch": 0.07303682072095313, "grad_norm": 0.68359375, "learning_rate": 0.0013155441257233734, "loss": 0.1827, "step": 41192 }, { "epoch": 0.07304036688626295, "grad_norm": 0.5859375, "learning_rate": 0.0013154863387170041, "loss": 0.2658, "step": 41194 }, { "epoch": 0.07304391305157276, "grad_norm": 0.55078125, "learning_rate": 0.001315428550768223, "loss": 0.2529, "step": 41196 }, { "epoch": 0.07304745921688259, "grad_norm": 1.9296875, "learning_rate": 0.0013153707618772824, "loss": 0.2399, "step": 41198 }, { "epoch": 0.0730510053821924, "grad_norm": 0.40625, "learning_rate": 0.001315312972044435, "loss": 0.1756, "step": 41200 }, { "epoch": 0.07305455154750222, "grad_norm": 0.83203125, "learning_rate": 0.001315255181269934, "loss": 0.2159, "step": 41202 }, { "epoch": 0.07305809771281203, "grad_norm": 0.9375, "learning_rate": 0.0013151973895540318, "loss": 0.3578, "step": 41204 }, { "epoch": 0.07306164387812185, "grad_norm": 0.447265625, "learning_rate": 0.0013151395968969812, "loss": 0.2046, "step": 41206 }, { "epoch": 0.07306519004343166, "grad_norm": 0.2490234375, "learning_rate": 0.0013150818032990348, "loss": 0.1681, "step": 41208 }, { "epoch": 0.07306873620874148, "grad_norm": 0.58984375, "learning_rate": 0.0013150240087604456, "loss": 0.1496, "step": 41210 }, { "epoch": 0.07307228237405129, "grad_norm": 0.37890625, "learning_rate": 0.0013149662132814666, "loss": 0.1566, "step": 41212 }, { "epoch": 0.0730758285393611, "grad_norm": 1.3046875, "learning_rate": 0.00131490841686235, "loss": 0.2143, "step": 41214 }, { "epoch": 0.07307937470467092, "grad_norm": 0.43359375, "learning_rate": 0.0013148506195033488, "loss": 0.1567, "step": 41216 }, { "epoch": 0.07308292086998074, "grad_norm": 0.48828125, "learning_rate": 0.0013147928212047157, "loss": 0.2904, "step": 41218 }, { "epoch": 0.07308646703529055, "grad_norm": 0.71484375, "learning_rate": 0.0013147350219667039, "loss": 0.2083, "step": 41220 }, { "epoch": 0.07309001320060036, "grad_norm": 0.484375, "learning_rate": 0.0013146772217895657, "loss": 0.1961, "step": 41222 }, { "epoch": 0.07309355936591018, "grad_norm": 0.255859375, "learning_rate": 0.001314619420673554, "loss": 0.2287, "step": 41224 }, { "epoch": 0.07309710553122, "grad_norm": 0.33984375, "learning_rate": 0.0013145616186189216, "loss": 0.1655, "step": 41226 }, { "epoch": 0.07310065169652981, "grad_norm": 0.609375, "learning_rate": 0.0013145038156259213, "loss": 0.163, "step": 41228 }, { "epoch": 0.07310419786183962, "grad_norm": 0.58984375, "learning_rate": 0.0013144460116948059, "loss": 0.1916, "step": 41230 }, { "epoch": 0.07310774402714944, "grad_norm": 0.48046875, "learning_rate": 0.0013143882068258283, "loss": 0.17, "step": 41232 }, { "epoch": 0.07311129019245925, "grad_norm": 0.38671875, "learning_rate": 0.0013143304010192409, "loss": 0.18, "step": 41234 }, { "epoch": 0.07311483635776907, "grad_norm": 0.78125, "learning_rate": 0.0013142725942752972, "loss": 0.2144, "step": 41236 }, { "epoch": 0.07311838252307888, "grad_norm": 0.52734375, "learning_rate": 0.0013142147865942496, "loss": 0.1978, "step": 41238 }, { "epoch": 0.0731219286883887, "grad_norm": 0.4140625, "learning_rate": 0.0013141569779763509, "loss": 0.2616, "step": 41240 }, { "epoch": 0.07312547485369851, "grad_norm": 0.51953125, "learning_rate": 0.0013140991684218537, "loss": 0.151, "step": 41242 }, { "epoch": 0.07312902101900834, "grad_norm": 1.0859375, "learning_rate": 0.0013140413579310114, "loss": 0.2011, "step": 41244 }, { "epoch": 0.07313256718431815, "grad_norm": 0.1923828125, "learning_rate": 0.0013139835465040763, "loss": 0.1406, "step": 41246 }, { "epoch": 0.07313611334962797, "grad_norm": 0.50390625, "learning_rate": 0.0013139257341413019, "loss": 0.1509, "step": 41248 }, { "epoch": 0.07313965951493778, "grad_norm": 0.35546875, "learning_rate": 0.0013138679208429401, "loss": 0.1778, "step": 41250 }, { "epoch": 0.0731432056802476, "grad_norm": 0.171875, "learning_rate": 0.0013138101066092445, "loss": 0.1633, "step": 41252 }, { "epoch": 0.07314675184555741, "grad_norm": 1.3125, "learning_rate": 0.0013137522914404675, "loss": 0.3091, "step": 41254 }, { "epoch": 0.07315029801086723, "grad_norm": 0.26171875, "learning_rate": 0.0013136944753368626, "loss": 0.1967, "step": 41256 }, { "epoch": 0.07315384417617704, "grad_norm": 1.2109375, "learning_rate": 0.0013136366582986818, "loss": 0.207, "step": 41258 }, { "epoch": 0.07315739034148686, "grad_norm": 0.4921875, "learning_rate": 0.0013135788403261786, "loss": 0.2128, "step": 41260 }, { "epoch": 0.07316093650679667, "grad_norm": 0.2421875, "learning_rate": 0.0013135210214196055, "loss": 0.1666, "step": 41262 }, { "epoch": 0.07316448267210648, "grad_norm": 2.875, "learning_rate": 0.0013134632015792153, "loss": 0.2081, "step": 41264 }, { "epoch": 0.0731680288374163, "grad_norm": 0.279296875, "learning_rate": 0.0013134053808052612, "loss": 0.1918, "step": 41266 }, { "epoch": 0.07317157500272611, "grad_norm": 0.314453125, "learning_rate": 0.001313347559097996, "loss": 0.1763, "step": 41268 }, { "epoch": 0.07317512116803593, "grad_norm": 0.236328125, "learning_rate": 0.0013132897364576724, "loss": 0.1663, "step": 41270 }, { "epoch": 0.07317866733334574, "grad_norm": 0.71484375, "learning_rate": 0.0013132319128845433, "loss": 0.1898, "step": 41272 }, { "epoch": 0.07318221349865556, "grad_norm": 0.8828125, "learning_rate": 0.0013131740883788616, "loss": 0.1926, "step": 41274 }, { "epoch": 0.07318575966396537, "grad_norm": 0.28125, "learning_rate": 0.0013131162629408807, "loss": 0.1861, "step": 41276 }, { "epoch": 0.07318930582927519, "grad_norm": 0.267578125, "learning_rate": 0.0013130584365708528, "loss": 0.1474, "step": 41278 }, { "epoch": 0.073192851994585, "grad_norm": 0.30859375, "learning_rate": 0.0013130006092690312, "loss": 0.1917, "step": 41280 }, { "epoch": 0.07319639815989482, "grad_norm": 0.6484375, "learning_rate": 0.0013129427810356683, "loss": 0.3905, "step": 41282 }, { "epoch": 0.07319994432520463, "grad_norm": 0.2275390625, "learning_rate": 0.0013128849518710178, "loss": 0.1955, "step": 41284 }, { "epoch": 0.07320349049051444, "grad_norm": 1.1328125, "learning_rate": 0.0013128271217753317, "loss": 0.273, "step": 41286 }, { "epoch": 0.07320703665582427, "grad_norm": 0.24609375, "learning_rate": 0.0013127692907488637, "loss": 0.1351, "step": 41288 }, { "epoch": 0.07321058282113409, "grad_norm": 0.3515625, "learning_rate": 0.0013127114587918663, "loss": 0.1933, "step": 41290 }, { "epoch": 0.0732141289864439, "grad_norm": 1.625, "learning_rate": 0.001312653625904593, "loss": 0.2184, "step": 41292 }, { "epoch": 0.07321767515175372, "grad_norm": 1.6015625, "learning_rate": 0.0013125957920872957, "loss": 0.2104, "step": 41294 }, { "epoch": 0.07322122131706353, "grad_norm": 0.48046875, "learning_rate": 0.001312537957340228, "loss": 0.1686, "step": 41296 }, { "epoch": 0.07322476748237335, "grad_norm": 1.0546875, "learning_rate": 0.001312480121663643, "loss": 0.1977, "step": 41298 }, { "epoch": 0.07322831364768316, "grad_norm": 0.1845703125, "learning_rate": 0.0013124222850577928, "loss": 0.1475, "step": 41300 }, { "epoch": 0.07323185981299297, "grad_norm": 0.40625, "learning_rate": 0.0013123644475229316, "loss": 0.1922, "step": 41302 }, { "epoch": 0.07323540597830279, "grad_norm": 0.5, "learning_rate": 0.0013123066090593113, "loss": 0.2174, "step": 41304 }, { "epoch": 0.0732389521436126, "grad_norm": 0.63671875, "learning_rate": 0.0013122487696671853, "loss": 0.2012, "step": 41306 }, { "epoch": 0.07324249830892242, "grad_norm": 0.78515625, "learning_rate": 0.001312190929346806, "loss": 0.257, "step": 41308 }, { "epoch": 0.07324604447423223, "grad_norm": 0.451171875, "learning_rate": 0.0013121330880984277, "loss": 0.1976, "step": 41310 }, { "epoch": 0.07324959063954205, "grad_norm": 0.400390625, "learning_rate": 0.001312075245922302, "loss": 0.2025, "step": 41312 }, { "epoch": 0.07325313680485186, "grad_norm": 0.369140625, "learning_rate": 0.0013120174028186824, "loss": 0.1719, "step": 41314 }, { "epoch": 0.07325668297016168, "grad_norm": 0.36328125, "learning_rate": 0.0013119595587878217, "loss": 0.3545, "step": 41316 }, { "epoch": 0.07326022913547149, "grad_norm": 2.65625, "learning_rate": 0.0013119017138299733, "loss": 0.319, "step": 41318 }, { "epoch": 0.0732637753007813, "grad_norm": 0.216796875, "learning_rate": 0.0013118438679453896, "loss": 0.24, "step": 41320 }, { "epoch": 0.07326732146609112, "grad_norm": 0.8671875, "learning_rate": 0.0013117860211343242, "loss": 0.195, "step": 41322 }, { "epoch": 0.07327086763140093, "grad_norm": 0.255859375, "learning_rate": 0.0013117281733970294, "loss": 0.1463, "step": 41324 }, { "epoch": 0.07327441379671075, "grad_norm": 0.5546875, "learning_rate": 0.001311670324733759, "loss": 0.1874, "step": 41326 }, { "epoch": 0.07327795996202056, "grad_norm": 0.427734375, "learning_rate": 0.001311612475144765, "loss": 0.2333, "step": 41328 }, { "epoch": 0.07328150612733038, "grad_norm": 0.4296875, "learning_rate": 0.0013115546246303016, "loss": 0.2583, "step": 41330 }, { "epoch": 0.0732850522926402, "grad_norm": 0.625, "learning_rate": 0.0013114967731906208, "loss": 0.2637, "step": 41332 }, { "epoch": 0.07328859845795002, "grad_norm": 0.1337890625, "learning_rate": 0.001311438920825976, "loss": 0.1361, "step": 41334 }, { "epoch": 0.07329214462325984, "grad_norm": 3.5, "learning_rate": 0.00131138106753662, "loss": 0.2473, "step": 41336 }, { "epoch": 0.07329569078856965, "grad_norm": 0.408203125, "learning_rate": 0.0013113232133228063, "loss": 0.1688, "step": 41338 }, { "epoch": 0.07329923695387947, "grad_norm": 4.4375, "learning_rate": 0.0013112653581847872, "loss": 0.2558, "step": 41340 }, { "epoch": 0.07330278311918928, "grad_norm": 0.30859375, "learning_rate": 0.0013112075021228166, "loss": 0.1707, "step": 41342 }, { "epoch": 0.0733063292844991, "grad_norm": 0.73828125, "learning_rate": 0.001311149645137147, "loss": 0.2194, "step": 41344 }, { "epoch": 0.07330987544980891, "grad_norm": 0.92578125, "learning_rate": 0.0013110917872280311, "loss": 0.2074, "step": 41346 }, { "epoch": 0.07331342161511872, "grad_norm": 0.318359375, "learning_rate": 0.0013110339283957226, "loss": 0.1688, "step": 41348 }, { "epoch": 0.07331696778042854, "grad_norm": 0.85546875, "learning_rate": 0.0013109760686404745, "loss": 0.1948, "step": 41350 }, { "epoch": 0.07332051394573835, "grad_norm": 0.2421875, "learning_rate": 0.0013109182079625394, "loss": 0.1757, "step": 41352 }, { "epoch": 0.07332406011104817, "grad_norm": 0.61328125, "learning_rate": 0.0013108603463621706, "loss": 0.2578, "step": 41354 }, { "epoch": 0.07332760627635798, "grad_norm": 0.298828125, "learning_rate": 0.0013108024838396212, "loss": 0.1661, "step": 41356 }, { "epoch": 0.0733311524416678, "grad_norm": 0.365234375, "learning_rate": 0.0013107446203951443, "loss": 0.1811, "step": 41358 }, { "epoch": 0.07333469860697761, "grad_norm": 1.0390625, "learning_rate": 0.0013106867560289924, "loss": 0.1941, "step": 41360 }, { "epoch": 0.07333824477228743, "grad_norm": 1.5546875, "learning_rate": 0.0013106288907414195, "loss": 0.3684, "step": 41362 }, { "epoch": 0.07334179093759724, "grad_norm": 0.3671875, "learning_rate": 0.0013105710245326777, "loss": 0.1849, "step": 41364 }, { "epoch": 0.07334533710290705, "grad_norm": 0.45703125, "learning_rate": 0.0013105131574030209, "loss": 0.1698, "step": 41366 }, { "epoch": 0.07334888326821687, "grad_norm": 1.4453125, "learning_rate": 0.0013104552893527018, "loss": 0.2705, "step": 41368 }, { "epoch": 0.07335242943352668, "grad_norm": 0.859375, "learning_rate": 0.0013103974203819732, "loss": 0.2174, "step": 41370 }, { "epoch": 0.0733559755988365, "grad_norm": 0.7421875, "learning_rate": 0.0013103395504910891, "loss": 0.2096, "step": 41372 }, { "epoch": 0.07335952176414631, "grad_norm": 0.7421875, "learning_rate": 0.0013102816796803016, "loss": 0.2023, "step": 41374 }, { "epoch": 0.07336306792945613, "grad_norm": 0.255859375, "learning_rate": 0.001310223807949864, "loss": 0.2426, "step": 41376 }, { "epoch": 0.07336661409476594, "grad_norm": 0.369140625, "learning_rate": 0.00131016593530003, "loss": 0.1549, "step": 41378 }, { "epoch": 0.07337016026007577, "grad_norm": 0.78515625, "learning_rate": 0.0013101080617310518, "loss": 0.167, "step": 41380 }, { "epoch": 0.07337370642538558, "grad_norm": 1.4296875, "learning_rate": 0.001310050187243183, "loss": 0.2515, "step": 41382 }, { "epoch": 0.0733772525906954, "grad_norm": 0.19140625, "learning_rate": 0.0013099923118366772, "loss": 0.1436, "step": 41384 }, { "epoch": 0.07338079875600521, "grad_norm": 0.48046875, "learning_rate": 0.0013099344355117866, "loss": 0.2421, "step": 41386 }, { "epoch": 0.07338434492131503, "grad_norm": 0.67578125, "learning_rate": 0.0013098765582687647, "loss": 0.2051, "step": 41388 }, { "epoch": 0.07338789108662484, "grad_norm": 0.6171875, "learning_rate": 0.0013098186801078651, "loss": 0.1689, "step": 41390 }, { "epoch": 0.07339143725193466, "grad_norm": 0.65625, "learning_rate": 0.0013097608010293402, "loss": 0.1641, "step": 41392 }, { "epoch": 0.07339498341724447, "grad_norm": 0.333984375, "learning_rate": 0.0013097029210334436, "loss": 0.2361, "step": 41394 }, { "epoch": 0.07339852958255429, "grad_norm": 0.4296875, "learning_rate": 0.001309645040120428, "loss": 0.1949, "step": 41396 }, { "epoch": 0.0734020757478641, "grad_norm": 0.51953125, "learning_rate": 0.0013095871582905465, "loss": 0.1869, "step": 41398 }, { "epoch": 0.07340562191317392, "grad_norm": 0.67578125, "learning_rate": 0.0013095292755440529, "loss": 0.203, "step": 41400 }, { "epoch": 0.07340916807848373, "grad_norm": 1.0703125, "learning_rate": 0.0013094713918811998, "loss": 0.2526, "step": 41402 }, { "epoch": 0.07341271424379354, "grad_norm": 0.478515625, "learning_rate": 0.0013094135073022405, "loss": 0.197, "step": 41404 }, { "epoch": 0.07341626040910336, "grad_norm": 0.310546875, "learning_rate": 0.0013093556218074284, "loss": 0.163, "step": 41406 }, { "epoch": 0.07341980657441317, "grad_norm": 0.59765625, "learning_rate": 0.0013092977353970164, "loss": 0.2107, "step": 41408 }, { "epoch": 0.07342335273972299, "grad_norm": 3.484375, "learning_rate": 0.0013092398480712573, "loss": 0.3958, "step": 41410 }, { "epoch": 0.0734268989050328, "grad_norm": 0.39453125, "learning_rate": 0.001309181959830405, "loss": 0.202, "step": 41412 }, { "epoch": 0.07343044507034262, "grad_norm": 0.91796875, "learning_rate": 0.001309124070674712, "loss": 0.1483, "step": 41414 }, { "epoch": 0.07343399123565243, "grad_norm": 0.3671875, "learning_rate": 0.0013090661806044322, "loss": 0.3227, "step": 41416 }, { "epoch": 0.07343753740096225, "grad_norm": 0.380859375, "learning_rate": 0.0013090082896198179, "loss": 0.1505, "step": 41418 }, { "epoch": 0.07344108356627206, "grad_norm": 0.953125, "learning_rate": 0.001308950397721123, "loss": 0.1649, "step": 41420 }, { "epoch": 0.07344462973158188, "grad_norm": 0.5859375, "learning_rate": 0.0013088925049086003, "loss": 0.1981, "step": 41422 }, { "epoch": 0.0734481758968917, "grad_norm": 0.44921875, "learning_rate": 0.0013088346111825033, "loss": 0.1996, "step": 41424 }, { "epoch": 0.07345172206220152, "grad_norm": 1.328125, "learning_rate": 0.001308776716543085, "loss": 0.2283, "step": 41426 }, { "epoch": 0.07345526822751133, "grad_norm": 0.2138671875, "learning_rate": 0.0013087188209905986, "loss": 0.1287, "step": 41428 }, { "epoch": 0.07345881439282115, "grad_norm": 0.953125, "learning_rate": 0.0013086609245252974, "loss": 0.1926, "step": 41430 }, { "epoch": 0.07346236055813096, "grad_norm": 0.412109375, "learning_rate": 0.0013086030271474345, "loss": 0.1916, "step": 41432 }, { "epoch": 0.07346590672344078, "grad_norm": 0.69921875, "learning_rate": 0.001308545128857263, "loss": 0.1537, "step": 41434 }, { "epoch": 0.07346945288875059, "grad_norm": 1.0234375, "learning_rate": 0.0013084872296550364, "loss": 0.2194, "step": 41436 }, { "epoch": 0.0734729990540604, "grad_norm": 0.419921875, "learning_rate": 0.0013084293295410074, "loss": 0.2291, "step": 41438 }, { "epoch": 0.07347654521937022, "grad_norm": 0.76171875, "learning_rate": 0.00130837142851543, "loss": 0.1757, "step": 41440 }, { "epoch": 0.07348009138468004, "grad_norm": 0.8125, "learning_rate": 0.0013083135265785566, "loss": 0.1978, "step": 41442 }, { "epoch": 0.07348363754998985, "grad_norm": 0.83984375, "learning_rate": 0.001308255623730641, "loss": 0.2408, "step": 41444 }, { "epoch": 0.07348718371529966, "grad_norm": 0.291015625, "learning_rate": 0.0013081977199719365, "loss": 0.2416, "step": 41446 }, { "epoch": 0.07349072988060948, "grad_norm": 0.232421875, "learning_rate": 0.001308139815302696, "loss": 0.1467, "step": 41448 }, { "epoch": 0.0734942760459193, "grad_norm": 0.40234375, "learning_rate": 0.0013080819097231729, "loss": 0.1686, "step": 41450 }, { "epoch": 0.07349782221122911, "grad_norm": 0.54296875, "learning_rate": 0.0013080240032336203, "loss": 0.2018, "step": 41452 }, { "epoch": 0.07350136837653892, "grad_norm": 0.291015625, "learning_rate": 0.0013079660958342913, "loss": 0.1724, "step": 41454 }, { "epoch": 0.07350491454184874, "grad_norm": 0.3671875, "learning_rate": 0.00130790818752544, "loss": 0.2432, "step": 41456 }, { "epoch": 0.07350846070715855, "grad_norm": 0.326171875, "learning_rate": 0.0013078502783073183, "loss": 0.1736, "step": 41458 }, { "epoch": 0.07351200687246837, "grad_norm": 0.3046875, "learning_rate": 0.0013077923681801807, "loss": 0.1754, "step": 41460 }, { "epoch": 0.07351555303777818, "grad_norm": 0.419921875, "learning_rate": 0.0013077344571442797, "loss": 0.2174, "step": 41462 }, { "epoch": 0.073519099203088, "grad_norm": 0.55078125, "learning_rate": 0.0013076765451998694, "loss": 0.1755, "step": 41464 }, { "epoch": 0.07352264536839781, "grad_norm": 0.369140625, "learning_rate": 0.0013076186323472021, "loss": 0.2056, "step": 41466 }, { "epoch": 0.07352619153370762, "grad_norm": 0.75, "learning_rate": 0.0013075607185865316, "loss": 0.1787, "step": 41468 }, { "epoch": 0.07352973769901745, "grad_norm": 0.318359375, "learning_rate": 0.001307502803918111, "loss": 0.1623, "step": 41470 }, { "epoch": 0.07353328386432727, "grad_norm": 0.75, "learning_rate": 0.0013074448883421935, "loss": 0.2601, "step": 41472 }, { "epoch": 0.07353683002963708, "grad_norm": 1.9375, "learning_rate": 0.001307386971859033, "loss": 0.2961, "step": 41474 }, { "epoch": 0.0735403761949469, "grad_norm": 0.244140625, "learning_rate": 0.001307329054468882, "loss": 0.1887, "step": 41476 }, { "epoch": 0.07354392236025671, "grad_norm": 0.2451171875, "learning_rate": 0.0013072711361719943, "loss": 0.2729, "step": 41478 }, { "epoch": 0.07354746852556653, "grad_norm": 0.88671875, "learning_rate": 0.0013072132169686228, "loss": 0.1662, "step": 41480 }, { "epoch": 0.07355101469087634, "grad_norm": 1.0859375, "learning_rate": 0.0013071552968590217, "loss": 0.2103, "step": 41482 }, { "epoch": 0.07355456085618615, "grad_norm": 0.47265625, "learning_rate": 0.001307097375843443, "loss": 0.2655, "step": 41484 }, { "epoch": 0.07355810702149597, "grad_norm": 0.453125, "learning_rate": 0.001307039453922141, "loss": 0.2527, "step": 41486 }, { "epoch": 0.07356165318680578, "grad_norm": 0.359375, "learning_rate": 0.0013069815310953684, "loss": 0.2048, "step": 41488 }, { "epoch": 0.0735651993521156, "grad_norm": 0.369140625, "learning_rate": 0.0013069236073633792, "loss": 0.2179, "step": 41490 }, { "epoch": 0.07356874551742541, "grad_norm": 0.251953125, "learning_rate": 0.0013068656827264261, "loss": 0.172, "step": 41492 }, { "epoch": 0.07357229168273523, "grad_norm": 0.3515625, "learning_rate": 0.0013068077571847627, "loss": 0.4337, "step": 41494 }, { "epoch": 0.07357583784804504, "grad_norm": 0.466796875, "learning_rate": 0.0013067498307386422, "loss": 0.1824, "step": 41496 }, { "epoch": 0.07357938401335486, "grad_norm": 1.4140625, "learning_rate": 0.0013066919033883182, "loss": 0.2228, "step": 41498 }, { "epoch": 0.07358293017866467, "grad_norm": 0.3046875, "learning_rate": 0.0013066339751340442, "loss": 0.1906, "step": 41500 }, { "epoch": 0.07358647634397449, "grad_norm": 0.88671875, "learning_rate": 0.0013065760459760726, "loss": 0.2293, "step": 41502 }, { "epoch": 0.0735900225092843, "grad_norm": 0.53125, "learning_rate": 0.0013065181159146576, "loss": 0.2007, "step": 41504 }, { "epoch": 0.07359356867459411, "grad_norm": 1.21875, "learning_rate": 0.0013064601849500526, "loss": 0.1621, "step": 41506 }, { "epoch": 0.07359711483990393, "grad_norm": 0.41015625, "learning_rate": 0.0013064022530825102, "loss": 0.195, "step": 41508 }, { "epoch": 0.07360066100521374, "grad_norm": 0.443359375, "learning_rate": 0.0013063443203122844, "loss": 0.1923, "step": 41510 }, { "epoch": 0.07360420717052356, "grad_norm": 0.82421875, "learning_rate": 0.0013062863866396283, "loss": 0.144, "step": 41512 }, { "epoch": 0.07360775333583337, "grad_norm": 0.50390625, "learning_rate": 0.0013062284520647957, "loss": 0.1976, "step": 41514 }, { "epoch": 0.0736112995011432, "grad_norm": 0.77734375, "learning_rate": 0.0013061705165880392, "loss": 0.2153, "step": 41516 }, { "epoch": 0.07361484566645302, "grad_norm": 0.3515625, "learning_rate": 0.0013061125802096131, "loss": 0.1656, "step": 41518 }, { "epoch": 0.07361839183176283, "grad_norm": 0.4765625, "learning_rate": 0.0013060546429297703, "loss": 0.1617, "step": 41520 }, { "epoch": 0.07362193799707265, "grad_norm": 0.4296875, "learning_rate": 0.001305996704748764, "loss": 0.1797, "step": 41522 }, { "epoch": 0.07362548416238246, "grad_norm": 0.56640625, "learning_rate": 0.0013059387656668476, "loss": 0.1703, "step": 41524 }, { "epoch": 0.07362903032769227, "grad_norm": 0.30078125, "learning_rate": 0.001305880825684275, "loss": 0.1916, "step": 41526 }, { "epoch": 0.07363257649300209, "grad_norm": 0.94140625, "learning_rate": 0.0013058228848012987, "loss": 0.5319, "step": 41528 }, { "epoch": 0.0736361226583119, "grad_norm": 0.5859375, "learning_rate": 0.0013057649430181732, "loss": 0.2023, "step": 41530 }, { "epoch": 0.07363966882362172, "grad_norm": 0.4921875, "learning_rate": 0.001305707000335151, "loss": 0.2101, "step": 41532 }, { "epoch": 0.07364321498893153, "grad_norm": 0.435546875, "learning_rate": 0.001305649056752486, "loss": 0.2106, "step": 41534 }, { "epoch": 0.07364676115424135, "grad_norm": 1.6796875, "learning_rate": 0.0013055911122704315, "loss": 0.3871, "step": 41536 }, { "epoch": 0.07365030731955116, "grad_norm": 0.263671875, "learning_rate": 0.001305533166889241, "loss": 0.1505, "step": 41538 }, { "epoch": 0.07365385348486098, "grad_norm": 0.6796875, "learning_rate": 0.001305475220609168, "loss": 0.1405, "step": 41540 }, { "epoch": 0.07365739965017079, "grad_norm": 0.7265625, "learning_rate": 0.0013054172734304655, "loss": 0.205, "step": 41542 }, { "epoch": 0.0736609458154806, "grad_norm": 0.640625, "learning_rate": 0.0013053593253533869, "loss": 0.211, "step": 41544 }, { "epoch": 0.07366449198079042, "grad_norm": 0.41796875, "learning_rate": 0.001305301376378186, "loss": 0.2117, "step": 41546 }, { "epoch": 0.07366803814610023, "grad_norm": 0.5078125, "learning_rate": 0.0013052434265051166, "loss": 0.2186, "step": 41548 }, { "epoch": 0.07367158431141005, "grad_norm": 0.451171875, "learning_rate": 0.0013051854757344312, "loss": 0.2099, "step": 41550 }, { "epoch": 0.07367513047671986, "grad_norm": 0.486328125, "learning_rate": 0.0013051275240663837, "loss": 0.1504, "step": 41552 }, { "epoch": 0.07367867664202968, "grad_norm": 0.57421875, "learning_rate": 0.0013050695715012277, "loss": 0.1412, "step": 41554 }, { "epoch": 0.07368222280733949, "grad_norm": 0.3515625, "learning_rate": 0.0013050116180392166, "loss": 0.2693, "step": 41556 }, { "epoch": 0.07368576897264931, "grad_norm": 1.484375, "learning_rate": 0.001304953663680604, "loss": 0.2521, "step": 41558 }, { "epoch": 0.07368931513795914, "grad_norm": 0.8515625, "learning_rate": 0.0013048957084256427, "loss": 0.469, "step": 41560 }, { "epoch": 0.07369286130326895, "grad_norm": 0.50390625, "learning_rate": 0.0013048377522745867, "loss": 0.2491, "step": 41562 }, { "epoch": 0.07369640746857876, "grad_norm": 0.43359375, "learning_rate": 0.0013047797952276895, "loss": 0.1541, "step": 41564 }, { "epoch": 0.07369995363388858, "grad_norm": 0.396484375, "learning_rate": 0.001304721837285204, "loss": 0.1849, "step": 41566 }, { "epoch": 0.0737034997991984, "grad_norm": 0.53515625, "learning_rate": 0.0013046638784473845, "loss": 0.18, "step": 41568 }, { "epoch": 0.07370704596450821, "grad_norm": 0.33203125, "learning_rate": 0.0013046059187144842, "loss": 0.1587, "step": 41570 }, { "epoch": 0.07371059212981802, "grad_norm": 0.40625, "learning_rate": 0.001304547958086756, "loss": 0.266, "step": 41572 }, { "epoch": 0.07371413829512784, "grad_norm": 0.5625, "learning_rate": 0.001304489996564454, "loss": 0.1868, "step": 41574 }, { "epoch": 0.07371768446043765, "grad_norm": 0.30859375, "learning_rate": 0.001304432034147832, "loss": 0.1385, "step": 41576 }, { "epoch": 0.07372123062574747, "grad_norm": 0.369140625, "learning_rate": 0.0013043740708371424, "loss": 0.1593, "step": 41578 }, { "epoch": 0.07372477679105728, "grad_norm": 0.5703125, "learning_rate": 0.0013043161066326396, "loss": 0.2435, "step": 41580 }, { "epoch": 0.0737283229563671, "grad_norm": 0.6328125, "learning_rate": 0.0013042581415345766, "loss": 0.1711, "step": 41582 }, { "epoch": 0.07373186912167691, "grad_norm": 0.1640625, "learning_rate": 0.0013042001755432075, "loss": 0.1306, "step": 41584 }, { "epoch": 0.07373541528698672, "grad_norm": 0.69921875, "learning_rate": 0.0013041422086587852, "loss": 0.1609, "step": 41586 }, { "epoch": 0.07373896145229654, "grad_norm": 0.48046875, "learning_rate": 0.0013040842408815639, "loss": 0.1783, "step": 41588 }, { "epoch": 0.07374250761760635, "grad_norm": 0.396484375, "learning_rate": 0.0013040262722117959, "loss": 0.2472, "step": 41590 }, { "epoch": 0.07374605378291617, "grad_norm": 0.27734375, "learning_rate": 0.0013039683026497362, "loss": 0.1674, "step": 41592 }, { "epoch": 0.07374959994822598, "grad_norm": 1.484375, "learning_rate": 0.0013039103321956375, "loss": 0.2132, "step": 41594 }, { "epoch": 0.0737531461135358, "grad_norm": 0.5703125, "learning_rate": 0.0013038523608497532, "loss": 0.2473, "step": 41596 }, { "epoch": 0.07375669227884561, "grad_norm": 0.232421875, "learning_rate": 0.001303794388612337, "loss": 0.2141, "step": 41598 }, { "epoch": 0.07376023844415543, "grad_norm": 0.73828125, "learning_rate": 0.001303736415483643, "loss": 0.2428, "step": 41600 }, { "epoch": 0.07376378460946524, "grad_norm": 1.2890625, "learning_rate": 0.0013036784414639237, "loss": 0.315, "step": 41602 }, { "epoch": 0.07376733077477506, "grad_norm": 0.416015625, "learning_rate": 0.0013036204665534336, "loss": 0.1687, "step": 41604 }, { "epoch": 0.07377087694008488, "grad_norm": 0.59765625, "learning_rate": 0.0013035624907524258, "loss": 0.1882, "step": 41606 }, { "epoch": 0.0737744231053947, "grad_norm": 1.4765625, "learning_rate": 0.0013035045140611537, "loss": 0.2244, "step": 41608 }, { "epoch": 0.07377796927070451, "grad_norm": 0.328125, "learning_rate": 0.0013034465364798714, "loss": 0.1763, "step": 41610 }, { "epoch": 0.07378151543601433, "grad_norm": 0.498046875, "learning_rate": 0.0013033885580088318, "loss": 0.147, "step": 41612 }, { "epoch": 0.07378506160132414, "grad_norm": 0.29296875, "learning_rate": 0.001303330578648289, "loss": 0.1603, "step": 41614 }, { "epoch": 0.07378860776663396, "grad_norm": 0.2470703125, "learning_rate": 0.0013032725983984964, "loss": 0.1505, "step": 41616 }, { "epoch": 0.07379215393194377, "grad_norm": 0.84765625, "learning_rate": 0.0013032146172597073, "loss": 0.1766, "step": 41618 }, { "epoch": 0.07379570009725359, "grad_norm": 1.0859375, "learning_rate": 0.0013031566352321759, "loss": 0.2008, "step": 41620 }, { "epoch": 0.0737992462625634, "grad_norm": 8.875, "learning_rate": 0.0013030986523161548, "loss": 0.407, "step": 41622 }, { "epoch": 0.07380279242787322, "grad_norm": 0.31640625, "learning_rate": 0.0013030406685118985, "loss": 0.2424, "step": 41624 }, { "epoch": 0.07380633859318303, "grad_norm": 0.296875, "learning_rate": 0.00130298268381966, "loss": 0.2426, "step": 41626 }, { "epoch": 0.07380988475849284, "grad_norm": 0.34765625, "learning_rate": 0.0013029246982396937, "loss": 0.1817, "step": 41628 }, { "epoch": 0.07381343092380266, "grad_norm": 0.294921875, "learning_rate": 0.0013028667117722522, "loss": 0.1691, "step": 41630 }, { "epoch": 0.07381697708911247, "grad_norm": 1.9296875, "learning_rate": 0.0013028087244175896, "loss": 0.4154, "step": 41632 }, { "epoch": 0.07382052325442229, "grad_norm": 0.33203125, "learning_rate": 0.0013027507361759597, "loss": 0.1563, "step": 41634 }, { "epoch": 0.0738240694197321, "grad_norm": 0.314453125, "learning_rate": 0.0013026927470476154, "loss": 0.2112, "step": 41636 }, { "epoch": 0.07382761558504192, "grad_norm": 0.267578125, "learning_rate": 0.001302634757032811, "loss": 0.1768, "step": 41638 }, { "epoch": 0.07383116175035173, "grad_norm": 2.03125, "learning_rate": 0.0013025767661317997, "loss": 0.2398, "step": 41640 }, { "epoch": 0.07383470791566155, "grad_norm": 0.337890625, "learning_rate": 0.0013025187743448358, "loss": 0.13, "step": 41642 }, { "epoch": 0.07383825408097136, "grad_norm": 1.1015625, "learning_rate": 0.0013024607816721722, "loss": 0.2094, "step": 41644 }, { "epoch": 0.07384180024628118, "grad_norm": 0.66015625, "learning_rate": 0.0013024027881140625, "loss": 0.212, "step": 41646 }, { "epoch": 0.07384534641159099, "grad_norm": 0.486328125, "learning_rate": 0.0013023447936707606, "loss": 0.2846, "step": 41648 }, { "epoch": 0.0738488925769008, "grad_norm": 0.220703125, "learning_rate": 0.0013022867983425205, "loss": 0.1892, "step": 41650 }, { "epoch": 0.07385243874221063, "grad_norm": 0.796875, "learning_rate": 0.001302228802129595, "loss": 0.3134, "step": 41652 }, { "epoch": 0.07385598490752045, "grad_norm": 0.388671875, "learning_rate": 0.0013021708050322384, "loss": 0.1769, "step": 41654 }, { "epoch": 0.07385953107283026, "grad_norm": 0.60546875, "learning_rate": 0.0013021128070507043, "loss": 0.1415, "step": 41656 }, { "epoch": 0.07386307723814008, "grad_norm": 0.287109375, "learning_rate": 0.001302054808185246, "loss": 0.1613, "step": 41658 }, { "epoch": 0.07386662340344989, "grad_norm": 0.1533203125, "learning_rate": 0.001301996808436117, "loss": 0.1766, "step": 41660 }, { "epoch": 0.0738701695687597, "grad_norm": 0.408203125, "learning_rate": 0.001301938807803572, "loss": 0.2233, "step": 41662 }, { "epoch": 0.07387371573406952, "grad_norm": 1.015625, "learning_rate": 0.0013018808062878633, "loss": 0.2121, "step": 41664 }, { "epoch": 0.07387726189937933, "grad_norm": 4.1875, "learning_rate": 0.0013018228038892455, "loss": 0.2692, "step": 41666 }, { "epoch": 0.07388080806468915, "grad_norm": 0.5234375, "learning_rate": 0.001301764800607972, "loss": 0.2077, "step": 41668 }, { "epoch": 0.07388435422999896, "grad_norm": 1.59375, "learning_rate": 0.0013017067964442965, "loss": 0.2287, "step": 41670 }, { "epoch": 0.07388790039530878, "grad_norm": 0.57421875, "learning_rate": 0.0013016487913984724, "loss": 0.2085, "step": 41672 }, { "epoch": 0.07389144656061859, "grad_norm": 0.2578125, "learning_rate": 0.001301590785470754, "loss": 0.2337, "step": 41674 }, { "epoch": 0.07389499272592841, "grad_norm": 1.28125, "learning_rate": 0.0013015327786613945, "loss": 0.1977, "step": 41676 }, { "epoch": 0.07389853889123822, "grad_norm": 0.369140625, "learning_rate": 0.0013014747709706475, "loss": 0.1578, "step": 41678 }, { "epoch": 0.07390208505654804, "grad_norm": 1.234375, "learning_rate": 0.0013014167623987668, "loss": 0.2107, "step": 41680 }, { "epoch": 0.07390563122185785, "grad_norm": 0.37890625, "learning_rate": 0.0013013587529460064, "loss": 0.192, "step": 41682 }, { "epoch": 0.07390917738716767, "grad_norm": 0.392578125, "learning_rate": 0.0013013007426126196, "loss": 0.1559, "step": 41684 }, { "epoch": 0.07391272355247748, "grad_norm": 0.376953125, "learning_rate": 0.0013012427313988607, "loss": 0.1922, "step": 41686 }, { "epoch": 0.0739162697177873, "grad_norm": 0.419921875, "learning_rate": 0.0013011847193049827, "loss": 0.162, "step": 41688 }, { "epoch": 0.07391981588309711, "grad_norm": 0.474609375, "learning_rate": 0.0013011267063312395, "loss": 0.1536, "step": 41690 }, { "epoch": 0.07392336204840692, "grad_norm": 3.78125, "learning_rate": 0.001301068692477885, "loss": 0.2023, "step": 41692 }, { "epoch": 0.07392690821371674, "grad_norm": 0.33984375, "learning_rate": 0.001301010677745173, "loss": 0.1975, "step": 41694 }, { "epoch": 0.07393045437902657, "grad_norm": 0.45703125, "learning_rate": 0.0013009526621333568, "loss": 0.1923, "step": 41696 }, { "epoch": 0.07393400054433638, "grad_norm": 0.46484375, "learning_rate": 0.0013008946456426905, "loss": 0.2382, "step": 41698 }, { "epoch": 0.0739375467096462, "grad_norm": 0.33984375, "learning_rate": 0.0013008366282734275, "loss": 0.214, "step": 41700 }, { "epoch": 0.07394109287495601, "grad_norm": 0.8125, "learning_rate": 0.001300778610025822, "loss": 0.1894, "step": 41702 }, { "epoch": 0.07394463904026582, "grad_norm": 0.482421875, "learning_rate": 0.0013007205909001274, "loss": 0.2744, "step": 41704 }, { "epoch": 0.07394818520557564, "grad_norm": 0.326171875, "learning_rate": 0.0013006625708965975, "loss": 0.2038, "step": 41706 }, { "epoch": 0.07395173137088545, "grad_norm": 0.39453125, "learning_rate": 0.0013006045500154864, "loss": 0.1836, "step": 41708 }, { "epoch": 0.07395527753619527, "grad_norm": 0.9453125, "learning_rate": 0.0013005465282570471, "loss": 0.2459, "step": 41710 }, { "epoch": 0.07395882370150508, "grad_norm": 0.318359375, "learning_rate": 0.0013004885056215338, "loss": 0.2233, "step": 41712 }, { "epoch": 0.0739623698668149, "grad_norm": 1.1171875, "learning_rate": 0.0013004304821092006, "loss": 0.2476, "step": 41714 }, { "epoch": 0.07396591603212471, "grad_norm": 0.462890625, "learning_rate": 0.0013003724577203006, "loss": 0.1714, "step": 41716 }, { "epoch": 0.07396946219743453, "grad_norm": 0.318359375, "learning_rate": 0.0013003144324550877, "loss": 0.1709, "step": 41718 }, { "epoch": 0.07397300836274434, "grad_norm": 0.376953125, "learning_rate": 0.0013002564063138162, "loss": 0.1613, "step": 41720 }, { "epoch": 0.07397655452805416, "grad_norm": 0.349609375, "learning_rate": 0.0013001983792967392, "loss": 0.1715, "step": 41722 }, { "epoch": 0.07398010069336397, "grad_norm": 0.3515625, "learning_rate": 0.001300140351404111, "loss": 0.3701, "step": 41724 }, { "epoch": 0.07398364685867378, "grad_norm": 0.76953125, "learning_rate": 0.0013000823226361853, "loss": 0.3282, "step": 41726 }, { "epoch": 0.0739871930239836, "grad_norm": 0.65625, "learning_rate": 0.0013000242929932155, "loss": 0.1586, "step": 41728 }, { "epoch": 0.07399073918929341, "grad_norm": 0.578125, "learning_rate": 0.0012999662624754555, "loss": 0.2042, "step": 41730 }, { "epoch": 0.07399428535460323, "grad_norm": 0.67578125, "learning_rate": 0.0012999082310831596, "loss": 0.2139, "step": 41732 }, { "epoch": 0.07399783151991304, "grad_norm": 1.4609375, "learning_rate": 0.0012998501988165808, "loss": 0.3931, "step": 41734 }, { "epoch": 0.07400137768522286, "grad_norm": 0.41796875, "learning_rate": 0.0012997921656759737, "loss": 0.2923, "step": 41736 }, { "epoch": 0.07400492385053267, "grad_norm": 0.5234375, "learning_rate": 0.0012997341316615914, "loss": 0.1712, "step": 41738 }, { "epoch": 0.07400847001584249, "grad_norm": 0.5, "learning_rate": 0.0012996760967736883, "loss": 0.1714, "step": 41740 }, { "epoch": 0.07401201618115232, "grad_norm": 0.55078125, "learning_rate": 0.0012996180610125177, "loss": 0.154, "step": 41742 }, { "epoch": 0.07401556234646213, "grad_norm": 0.333984375, "learning_rate": 0.001299560024378334, "loss": 0.203, "step": 41744 }, { "epoch": 0.07401910851177194, "grad_norm": 0.76953125, "learning_rate": 0.0012995019868713902, "loss": 0.2139, "step": 41746 }, { "epoch": 0.07402265467708176, "grad_norm": 0.259765625, "learning_rate": 0.001299443948491941, "loss": 0.2132, "step": 41748 }, { "epoch": 0.07402620084239157, "grad_norm": 0.5390625, "learning_rate": 0.0012993859092402394, "loss": 0.1691, "step": 41750 }, { "epoch": 0.07402974700770139, "grad_norm": 0.79296875, "learning_rate": 0.00129932786911654, "loss": 0.2244, "step": 41752 }, { "epoch": 0.0740332931730112, "grad_norm": 0.33203125, "learning_rate": 0.001299269828121096, "loss": 0.1479, "step": 41754 }, { "epoch": 0.07403683933832102, "grad_norm": 0.294921875, "learning_rate": 0.001299211786254162, "loss": 0.2465, "step": 41756 }, { "epoch": 0.07404038550363083, "grad_norm": 1.6328125, "learning_rate": 0.001299153743515991, "loss": 0.4845, "step": 41758 }, { "epoch": 0.07404393166894065, "grad_norm": 1.734375, "learning_rate": 0.001299095699906837, "loss": 0.3232, "step": 41760 }, { "epoch": 0.07404747783425046, "grad_norm": 0.859375, "learning_rate": 0.0012990376554269545, "loss": 0.1713, "step": 41762 }, { "epoch": 0.07405102399956028, "grad_norm": 1.03125, "learning_rate": 0.001298979610076597, "loss": 0.3793, "step": 41764 }, { "epoch": 0.07405457016487009, "grad_norm": 0.318359375, "learning_rate": 0.0012989215638560176, "loss": 0.1891, "step": 41766 }, { "epoch": 0.0740581163301799, "grad_norm": 0.361328125, "learning_rate": 0.0012988635167654715, "loss": 0.1348, "step": 41768 }, { "epoch": 0.07406166249548972, "grad_norm": 0.5703125, "learning_rate": 0.0012988054688052115, "loss": 0.1665, "step": 41770 }, { "epoch": 0.07406520866079953, "grad_norm": 0.1640625, "learning_rate": 0.0012987474199754922, "loss": 0.1445, "step": 41772 }, { "epoch": 0.07406875482610935, "grad_norm": 0.44140625, "learning_rate": 0.0012986893702765666, "loss": 0.1702, "step": 41774 }, { "epoch": 0.07407230099141916, "grad_norm": 0.5078125, "learning_rate": 0.0012986313197086894, "loss": 0.1625, "step": 41776 }, { "epoch": 0.07407584715672898, "grad_norm": 0.58984375, "learning_rate": 0.0012985732682721143, "loss": 0.3832, "step": 41778 }, { "epoch": 0.07407939332203879, "grad_norm": 0.318359375, "learning_rate": 0.001298515215967095, "loss": 0.1683, "step": 41780 }, { "epoch": 0.0740829394873486, "grad_norm": 0.5703125, "learning_rate": 0.0012984571627938854, "loss": 0.2195, "step": 41782 }, { "epoch": 0.07408648565265842, "grad_norm": 0.47265625, "learning_rate": 0.0012983991087527397, "loss": 0.172, "step": 41784 }, { "epoch": 0.07409003181796824, "grad_norm": 0.3046875, "learning_rate": 0.001298341053843911, "loss": 0.213, "step": 41786 }, { "epoch": 0.07409357798327806, "grad_norm": 0.326171875, "learning_rate": 0.0012982829980676543, "loss": 0.2067, "step": 41788 }, { "epoch": 0.07409712414858788, "grad_norm": 0.251953125, "learning_rate": 0.0012982249414242226, "loss": 0.1564, "step": 41790 }, { "epoch": 0.07410067031389769, "grad_norm": 0.796875, "learning_rate": 0.0012981668839138704, "loss": 0.2352, "step": 41792 }, { "epoch": 0.07410421647920751, "grad_norm": 0.44140625, "learning_rate": 0.001298108825536851, "loss": 0.1937, "step": 41794 }, { "epoch": 0.07410776264451732, "grad_norm": 0.48828125, "learning_rate": 0.0012980507662934192, "loss": 0.1855, "step": 41796 }, { "epoch": 0.07411130880982714, "grad_norm": 0.62109375, "learning_rate": 0.0012979927061838278, "loss": 0.1858, "step": 41798 }, { "epoch": 0.07411485497513695, "grad_norm": 0.474609375, "learning_rate": 0.0012979346452083316, "loss": 0.185, "step": 41800 }, { "epoch": 0.07411840114044677, "grad_norm": 0.388671875, "learning_rate": 0.0012978765833671843, "loss": 0.1804, "step": 41802 }, { "epoch": 0.07412194730575658, "grad_norm": 0.7578125, "learning_rate": 0.0012978185206606397, "loss": 0.2615, "step": 41804 }, { "epoch": 0.0741254934710664, "grad_norm": 0.224609375, "learning_rate": 0.0012977604570889516, "loss": 0.2223, "step": 41806 }, { "epoch": 0.07412903963637621, "grad_norm": 0.263671875, "learning_rate": 0.0012977023926523743, "loss": 0.24, "step": 41808 }, { "epoch": 0.07413258580168602, "grad_norm": 0.333984375, "learning_rate": 0.0012976443273511615, "loss": 0.1959, "step": 41810 }, { "epoch": 0.07413613196699584, "grad_norm": 0.79296875, "learning_rate": 0.0012975862611855676, "loss": 0.2345, "step": 41812 }, { "epoch": 0.07413967813230565, "grad_norm": 0.59375, "learning_rate": 0.0012975281941558457, "loss": 0.413, "step": 41814 }, { "epoch": 0.07414322429761547, "grad_norm": 0.294921875, "learning_rate": 0.0012974701262622504, "loss": 0.2165, "step": 41816 }, { "epoch": 0.07414677046292528, "grad_norm": 0.60546875, "learning_rate": 0.0012974120575050354, "loss": 0.1855, "step": 41818 }, { "epoch": 0.0741503166282351, "grad_norm": 0.240234375, "learning_rate": 0.001297353987884455, "loss": 0.1762, "step": 41820 }, { "epoch": 0.07415386279354491, "grad_norm": 0.5546875, "learning_rate": 0.0012972959174007627, "loss": 0.2409, "step": 41822 }, { "epoch": 0.07415740895885473, "grad_norm": 0.625, "learning_rate": 0.0012972378460542125, "loss": 0.2073, "step": 41824 }, { "epoch": 0.07416095512416454, "grad_norm": 0.310546875, "learning_rate": 0.0012971797738450589, "loss": 0.1074, "step": 41826 }, { "epoch": 0.07416450128947435, "grad_norm": 0.38671875, "learning_rate": 0.0012971217007735549, "loss": 0.1876, "step": 41828 }, { "epoch": 0.07416804745478417, "grad_norm": 0.91796875, "learning_rate": 0.0012970636268399558, "loss": 0.198, "step": 41830 }, { "epoch": 0.074171593620094, "grad_norm": 0.3671875, "learning_rate": 0.0012970055520445142, "loss": 0.2636, "step": 41832 }, { "epoch": 0.07417513978540381, "grad_norm": 0.56640625, "learning_rate": 0.0012969474763874853, "loss": 0.2263, "step": 41834 }, { "epoch": 0.07417868595071363, "grad_norm": 0.47265625, "learning_rate": 0.001296889399869122, "loss": 0.1915, "step": 41836 }, { "epoch": 0.07418223211602344, "grad_norm": 1.2265625, "learning_rate": 0.0012968313224896795, "loss": 0.2822, "step": 41838 }, { "epoch": 0.07418577828133326, "grad_norm": 0.53515625, "learning_rate": 0.0012967732442494108, "loss": 0.1541, "step": 41840 }, { "epoch": 0.07418932444664307, "grad_norm": 2.828125, "learning_rate": 0.0012967151651485703, "loss": 0.4151, "step": 41842 }, { "epoch": 0.07419287061195289, "grad_norm": 0.96875, "learning_rate": 0.0012966570851874117, "loss": 0.1527, "step": 41844 }, { "epoch": 0.0741964167772627, "grad_norm": 3.234375, "learning_rate": 0.0012965990043661895, "loss": 0.2146, "step": 41846 }, { "epoch": 0.07419996294257251, "grad_norm": 1.0703125, "learning_rate": 0.001296540922685157, "loss": 0.1999, "step": 41848 }, { "epoch": 0.07420350910788233, "grad_norm": 0.1875, "learning_rate": 0.0012964828401445694, "loss": 0.2832, "step": 41850 }, { "epoch": 0.07420705527319214, "grad_norm": 0.50390625, "learning_rate": 0.0012964247567446795, "loss": 0.2381, "step": 41852 }, { "epoch": 0.07421060143850196, "grad_norm": 0.234375, "learning_rate": 0.0012963666724857418, "loss": 0.2024, "step": 41854 }, { "epoch": 0.07421414760381177, "grad_norm": 0.439453125, "learning_rate": 0.0012963085873680105, "loss": 0.2063, "step": 41856 }, { "epoch": 0.07421769376912159, "grad_norm": 0.486328125, "learning_rate": 0.0012962505013917396, "loss": 0.1937, "step": 41858 }, { "epoch": 0.0742212399344314, "grad_norm": 0.2255859375, "learning_rate": 0.0012961924145571827, "loss": 0.1153, "step": 41860 }, { "epoch": 0.07422478609974122, "grad_norm": 0.47265625, "learning_rate": 0.0012961343268645944, "loss": 0.2612, "step": 41862 }, { "epoch": 0.07422833226505103, "grad_norm": 1.6875, "learning_rate": 0.0012960762383142281, "loss": 0.2389, "step": 41864 }, { "epoch": 0.07423187843036085, "grad_norm": 0.3046875, "learning_rate": 0.0012960181489063386, "loss": 0.1313, "step": 41866 }, { "epoch": 0.07423542459567066, "grad_norm": 0.73828125, "learning_rate": 0.0012959600586411792, "loss": 0.2036, "step": 41868 }, { "epoch": 0.07423897076098047, "grad_norm": 0.419921875, "learning_rate": 0.0012959019675190047, "loss": 0.1463, "step": 41870 }, { "epoch": 0.07424251692629029, "grad_norm": 1.0, "learning_rate": 0.0012958438755400685, "loss": 0.2278, "step": 41872 }, { "epoch": 0.0742460630916001, "grad_norm": 0.369140625, "learning_rate": 0.001295785782704625, "loss": 0.1877, "step": 41874 }, { "epoch": 0.07424960925690992, "grad_norm": 0.220703125, "learning_rate": 0.0012957276890129285, "loss": 0.1522, "step": 41876 }, { "epoch": 0.07425315542221975, "grad_norm": 0.5234375, "learning_rate": 0.0012956695944652328, "loss": 0.1413, "step": 41878 }, { "epoch": 0.07425670158752956, "grad_norm": 0.36328125, "learning_rate": 0.0012956114990617916, "loss": 0.12, "step": 41880 }, { "epoch": 0.07426024775283938, "grad_norm": 1.3203125, "learning_rate": 0.0012955534028028595, "loss": 0.2496, "step": 41882 }, { "epoch": 0.07426379391814919, "grad_norm": 0.57421875, "learning_rate": 0.0012954953056886905, "loss": 0.2067, "step": 41884 }, { "epoch": 0.074267340083459, "grad_norm": 0.326171875, "learning_rate": 0.0012954372077195384, "loss": 0.2133, "step": 41886 }, { "epoch": 0.07427088624876882, "grad_norm": 0.33203125, "learning_rate": 0.0012953791088956574, "loss": 0.1738, "step": 41888 }, { "epoch": 0.07427443241407863, "grad_norm": 0.298828125, "learning_rate": 0.0012953210092173017, "loss": 0.145, "step": 41890 }, { "epoch": 0.07427797857938845, "grad_norm": 0.455078125, "learning_rate": 0.0012952629086847257, "loss": 0.1775, "step": 41892 }, { "epoch": 0.07428152474469826, "grad_norm": 0.93359375, "learning_rate": 0.001295204807298183, "loss": 0.17, "step": 41894 }, { "epoch": 0.07428507091000808, "grad_norm": 0.640625, "learning_rate": 0.001295146705057928, "loss": 0.1695, "step": 41896 }, { "epoch": 0.07428861707531789, "grad_norm": 0.546875, "learning_rate": 0.0012950886019642144, "loss": 0.1659, "step": 41898 }, { "epoch": 0.0742921632406277, "grad_norm": 0.58984375, "learning_rate": 0.0012950304980172964, "loss": 0.2128, "step": 41900 }, { "epoch": 0.07429570940593752, "grad_norm": 0.62890625, "learning_rate": 0.0012949723932174286, "loss": 0.1573, "step": 41902 }, { "epoch": 0.07429925557124734, "grad_norm": 0.46484375, "learning_rate": 0.001294914287564865, "loss": 0.2487, "step": 41904 }, { "epoch": 0.07430280173655715, "grad_norm": 0.294921875, "learning_rate": 0.0012948561810598592, "loss": 0.1684, "step": 41906 }, { "epoch": 0.07430634790186696, "grad_norm": 0.1826171875, "learning_rate": 0.0012947980737026657, "loss": 0.1624, "step": 41908 }, { "epoch": 0.07430989406717678, "grad_norm": 8.5625, "learning_rate": 0.0012947399654935382, "loss": 0.236, "step": 41910 }, { "epoch": 0.0743134402324866, "grad_norm": 1.25, "learning_rate": 0.001294681856432732, "loss": 0.2123, "step": 41912 }, { "epoch": 0.07431698639779641, "grad_norm": 1.484375, "learning_rate": 0.0012946237465204999, "loss": 0.2139, "step": 41914 }, { "epoch": 0.07432053256310622, "grad_norm": 0.40625, "learning_rate": 0.0012945656357570968, "loss": 0.1838, "step": 41916 }, { "epoch": 0.07432407872841604, "grad_norm": 3.359375, "learning_rate": 0.0012945075241427764, "loss": 0.4082, "step": 41918 }, { "epoch": 0.07432762489372585, "grad_norm": 0.51953125, "learning_rate": 0.0012944494116777937, "loss": 0.1426, "step": 41920 }, { "epoch": 0.07433117105903567, "grad_norm": 0.234375, "learning_rate": 0.0012943912983624014, "loss": 0.1257, "step": 41922 }, { "epoch": 0.0743347172243455, "grad_norm": 0.421875, "learning_rate": 0.0012943331841968548, "loss": 0.1391, "step": 41924 }, { "epoch": 0.07433826338965531, "grad_norm": 0.6171875, "learning_rate": 0.001294275069181408, "loss": 0.1782, "step": 41926 }, { "epoch": 0.07434180955496512, "grad_norm": 0.443359375, "learning_rate": 0.0012942169533163143, "loss": 0.1644, "step": 41928 }, { "epoch": 0.07434535572027494, "grad_norm": 0.2578125, "learning_rate": 0.0012941588366018294, "loss": 0.1407, "step": 41930 }, { "epoch": 0.07434890188558475, "grad_norm": 0.296875, "learning_rate": 0.0012941007190382058, "loss": 0.2769, "step": 41932 }, { "epoch": 0.07435244805089457, "grad_norm": 0.314453125, "learning_rate": 0.0012940426006256985, "loss": 0.1269, "step": 41934 }, { "epoch": 0.07435599421620438, "grad_norm": 0.66015625, "learning_rate": 0.0012939844813645617, "loss": 0.1599, "step": 41936 }, { "epoch": 0.0743595403815142, "grad_norm": 0.443359375, "learning_rate": 0.0012939263612550492, "loss": 0.2148, "step": 41938 }, { "epoch": 0.07436308654682401, "grad_norm": 0.6953125, "learning_rate": 0.0012938682402974155, "loss": 0.2553, "step": 41940 }, { "epoch": 0.07436663271213383, "grad_norm": 0.43359375, "learning_rate": 0.0012938101184919151, "loss": 0.1834, "step": 41942 }, { "epoch": 0.07437017887744364, "grad_norm": 0.42578125, "learning_rate": 0.0012937519958388015, "loss": 0.1781, "step": 41944 }, { "epoch": 0.07437372504275346, "grad_norm": 0.29296875, "learning_rate": 0.0012936938723383292, "loss": 0.196, "step": 41946 }, { "epoch": 0.07437727120806327, "grad_norm": 0.427734375, "learning_rate": 0.0012936357479907525, "loss": 0.1467, "step": 41948 }, { "epoch": 0.07438081737337308, "grad_norm": 0.265625, "learning_rate": 0.0012935776227963255, "loss": 0.1758, "step": 41950 }, { "epoch": 0.0743843635386829, "grad_norm": 0.294921875, "learning_rate": 0.0012935194967553028, "loss": 0.2258, "step": 41952 }, { "epoch": 0.07438790970399271, "grad_norm": 1.859375, "learning_rate": 0.0012934613698679376, "loss": 0.2101, "step": 41954 }, { "epoch": 0.07439145586930253, "grad_norm": 0.47265625, "learning_rate": 0.001293403242134485, "loss": 0.1821, "step": 41956 }, { "epoch": 0.07439500203461234, "grad_norm": 0.96484375, "learning_rate": 0.001293345113555199, "loss": 0.2871, "step": 41958 }, { "epoch": 0.07439854819992216, "grad_norm": 0.91015625, "learning_rate": 0.001293286984130334, "loss": 0.1656, "step": 41960 }, { "epoch": 0.07440209436523197, "grad_norm": 0.46484375, "learning_rate": 0.0012932288538601436, "loss": 0.1777, "step": 41962 }, { "epoch": 0.07440564053054179, "grad_norm": 0.6484375, "learning_rate": 0.0012931707227448826, "loss": 0.3514, "step": 41964 }, { "epoch": 0.0744091866958516, "grad_norm": 0.4765625, "learning_rate": 0.0012931125907848049, "loss": 0.2044, "step": 41966 }, { "epoch": 0.07441273286116143, "grad_norm": 0.6875, "learning_rate": 0.0012930544579801652, "loss": 0.1947, "step": 41968 }, { "epoch": 0.07441627902647124, "grad_norm": 0.53125, "learning_rate": 0.0012929963243312175, "loss": 0.198, "step": 41970 }, { "epoch": 0.07441982519178106, "grad_norm": 0.294921875, "learning_rate": 0.0012929381898382154, "loss": 0.1771, "step": 41972 }, { "epoch": 0.07442337135709087, "grad_norm": 0.482421875, "learning_rate": 0.0012928800545014144, "loss": 0.2462, "step": 41974 }, { "epoch": 0.07442691752240069, "grad_norm": 0.380859375, "learning_rate": 0.0012928219183210678, "loss": 0.2421, "step": 41976 }, { "epoch": 0.0744304636877105, "grad_norm": 0.4296875, "learning_rate": 0.0012927637812974301, "loss": 0.1626, "step": 41978 }, { "epoch": 0.07443400985302032, "grad_norm": 0.78125, "learning_rate": 0.0012927056434307556, "loss": 0.2204, "step": 41980 }, { "epoch": 0.07443755601833013, "grad_norm": 1.875, "learning_rate": 0.0012926475047212987, "loss": 0.2978, "step": 41982 }, { "epoch": 0.07444110218363995, "grad_norm": 0.28515625, "learning_rate": 0.001292589365169313, "loss": 0.1667, "step": 41984 }, { "epoch": 0.07444464834894976, "grad_norm": 1.265625, "learning_rate": 0.0012925312247750538, "loss": 0.2516, "step": 41986 }, { "epoch": 0.07444819451425957, "grad_norm": 0.287109375, "learning_rate": 0.001292473083538775, "loss": 0.1875, "step": 41988 }, { "epoch": 0.07445174067956939, "grad_norm": 0.828125, "learning_rate": 0.0012924149414607305, "loss": 0.248, "step": 41990 }, { "epoch": 0.0744552868448792, "grad_norm": 0.94140625, "learning_rate": 0.0012923567985411747, "loss": 0.1865, "step": 41992 }, { "epoch": 0.07445883301018902, "grad_norm": 0.30859375, "learning_rate": 0.0012922986547803622, "loss": 0.1638, "step": 41994 }, { "epoch": 0.07446237917549883, "grad_norm": 0.361328125, "learning_rate": 0.0012922405101785468, "loss": 0.1401, "step": 41996 }, { "epoch": 0.07446592534080865, "grad_norm": 0.27734375, "learning_rate": 0.0012921823647359834, "loss": 0.2051, "step": 41998 }, { "epoch": 0.07446947150611846, "grad_norm": 0.384765625, "learning_rate": 0.0012921242184529256, "loss": 0.2274, "step": 42000 }, { "epoch": 0.07447301767142828, "grad_norm": 0.357421875, "learning_rate": 0.0012920660713296283, "loss": 0.1615, "step": 42002 }, { "epoch": 0.07447656383673809, "grad_norm": 3.28125, "learning_rate": 0.0012920079233663456, "loss": 0.227, "step": 42004 }, { "epoch": 0.0744801100020479, "grad_norm": 0.6015625, "learning_rate": 0.0012919497745633318, "loss": 0.146, "step": 42006 }, { "epoch": 0.07448365616735772, "grad_norm": 0.671875, "learning_rate": 0.0012918916249208413, "loss": 0.2583, "step": 42008 }, { "epoch": 0.07448720233266753, "grad_norm": 0.71875, "learning_rate": 0.0012918334744391282, "loss": 0.1965, "step": 42010 }, { "epoch": 0.07449074849797735, "grad_norm": 4.3125, "learning_rate": 0.0012917753231184465, "loss": 0.2462, "step": 42012 }, { "epoch": 0.07449429466328718, "grad_norm": 0.337890625, "learning_rate": 0.0012917171709590513, "loss": 0.1468, "step": 42014 }, { "epoch": 0.07449784082859699, "grad_norm": 0.44140625, "learning_rate": 0.0012916590179611965, "loss": 0.1594, "step": 42016 }, { "epoch": 0.0745013869939068, "grad_norm": 0.349609375, "learning_rate": 0.0012916008641251364, "loss": 0.1672, "step": 42018 }, { "epoch": 0.07450493315921662, "grad_norm": 0.298828125, "learning_rate": 0.0012915427094511254, "loss": 0.1635, "step": 42020 }, { "epoch": 0.07450847932452644, "grad_norm": 0.396484375, "learning_rate": 0.0012914845539394183, "loss": 0.177, "step": 42022 }, { "epoch": 0.07451202548983625, "grad_norm": 0.50390625, "learning_rate": 0.0012914263975902687, "loss": 0.1803, "step": 42024 }, { "epoch": 0.07451557165514607, "grad_norm": 0.5078125, "learning_rate": 0.0012913682404039313, "loss": 0.1664, "step": 42026 }, { "epoch": 0.07451911782045588, "grad_norm": 0.9375, "learning_rate": 0.00129131008238066, "loss": 0.2185, "step": 42028 }, { "epoch": 0.0745226639857657, "grad_norm": 0.3984375, "learning_rate": 0.0012912519235207098, "loss": 0.188, "step": 42030 }, { "epoch": 0.07452621015107551, "grad_norm": 0.279296875, "learning_rate": 0.0012911937638243347, "loss": 0.1546, "step": 42032 }, { "epoch": 0.07452975631638532, "grad_norm": 1.5546875, "learning_rate": 0.0012911356032917895, "loss": 0.3341, "step": 42034 }, { "epoch": 0.07453330248169514, "grad_norm": 0.201171875, "learning_rate": 0.0012910774419233276, "loss": 0.2115, "step": 42036 }, { "epoch": 0.07453684864700495, "grad_norm": 0.34765625, "learning_rate": 0.0012910192797192042, "loss": 0.1567, "step": 42038 }, { "epoch": 0.07454039481231477, "grad_norm": 0.79296875, "learning_rate": 0.0012909611166796735, "loss": 0.1468, "step": 42040 }, { "epoch": 0.07454394097762458, "grad_norm": 0.419921875, "learning_rate": 0.0012909029528049899, "loss": 0.1656, "step": 42042 }, { "epoch": 0.0745474871429344, "grad_norm": 0.28515625, "learning_rate": 0.0012908447880954075, "loss": 0.1677, "step": 42044 }, { "epoch": 0.07455103330824421, "grad_norm": 4.46875, "learning_rate": 0.0012907866225511811, "loss": 0.3275, "step": 42046 }, { "epoch": 0.07455457947355403, "grad_norm": 1.1796875, "learning_rate": 0.0012907284561725645, "loss": 0.164, "step": 42048 }, { "epoch": 0.07455812563886384, "grad_norm": 0.62109375, "learning_rate": 0.0012906702889598123, "loss": 0.2011, "step": 42050 }, { "epoch": 0.07456167180417365, "grad_norm": 0.4296875, "learning_rate": 0.0012906121209131794, "loss": 0.1824, "step": 42052 }, { "epoch": 0.07456521796948347, "grad_norm": 1.65625, "learning_rate": 0.0012905539520329198, "loss": 0.167, "step": 42054 }, { "epoch": 0.07456876413479328, "grad_norm": 0.2890625, "learning_rate": 0.0012904957823192877, "loss": 0.1773, "step": 42056 }, { "epoch": 0.0745723103001031, "grad_norm": 0.326171875, "learning_rate": 0.0012904376117725375, "loss": 0.1813, "step": 42058 }, { "epoch": 0.07457585646541293, "grad_norm": 0.326171875, "learning_rate": 0.0012903794403929243, "loss": 0.1519, "step": 42060 }, { "epoch": 0.07457940263072274, "grad_norm": 0.76953125, "learning_rate": 0.001290321268180702, "loss": 0.3091, "step": 42062 }, { "epoch": 0.07458294879603256, "grad_norm": 0.46484375, "learning_rate": 0.001290263095136125, "loss": 0.198, "step": 42064 }, { "epoch": 0.07458649496134237, "grad_norm": 0.27734375, "learning_rate": 0.001290204921259447, "loss": 0.2487, "step": 42066 }, { "epoch": 0.07459004112665218, "grad_norm": 0.609375, "learning_rate": 0.001290146746550924, "loss": 0.1892, "step": 42068 }, { "epoch": 0.074593587291962, "grad_norm": 0.369140625, "learning_rate": 0.0012900885710108094, "loss": 0.1343, "step": 42070 }, { "epoch": 0.07459713345727181, "grad_norm": 0.5390625, "learning_rate": 0.0012900303946393574, "loss": 0.1957, "step": 42072 }, { "epoch": 0.07460067962258163, "grad_norm": 0.62890625, "learning_rate": 0.0012899722174368234, "loss": 0.117, "step": 42074 }, { "epoch": 0.07460422578789144, "grad_norm": 0.640625, "learning_rate": 0.001289914039403461, "loss": 0.2055, "step": 42076 }, { "epoch": 0.07460777195320126, "grad_norm": 1.3359375, "learning_rate": 0.0012898558605395247, "loss": 0.242, "step": 42078 }, { "epoch": 0.07461131811851107, "grad_norm": 0.81640625, "learning_rate": 0.0012897976808452696, "loss": 0.2211, "step": 42080 }, { "epoch": 0.07461486428382089, "grad_norm": 0.185546875, "learning_rate": 0.0012897395003209496, "loss": 0.1409, "step": 42082 }, { "epoch": 0.0746184104491307, "grad_norm": 0.46875, "learning_rate": 0.001289681318966819, "loss": 0.1606, "step": 42084 }, { "epoch": 0.07462195661444052, "grad_norm": 0.2734375, "learning_rate": 0.0012896231367831325, "loss": 0.1963, "step": 42086 }, { "epoch": 0.07462550277975033, "grad_norm": 0.306640625, "learning_rate": 0.0012895649537701449, "loss": 0.1922, "step": 42088 }, { "epoch": 0.07462904894506014, "grad_norm": 1.3828125, "learning_rate": 0.0012895067699281097, "loss": 0.227, "step": 42090 }, { "epoch": 0.07463259511036996, "grad_norm": 0.9765625, "learning_rate": 0.0012894485852572824, "loss": 0.1655, "step": 42092 }, { "epoch": 0.07463614127567977, "grad_norm": 0.73046875, "learning_rate": 0.0012893903997579167, "loss": 0.2411, "step": 42094 }, { "epoch": 0.07463968744098959, "grad_norm": 0.71875, "learning_rate": 0.0012893322134302678, "loss": 0.1625, "step": 42096 }, { "epoch": 0.0746432336062994, "grad_norm": 1.0078125, "learning_rate": 0.0012892740262745895, "loss": 0.2536, "step": 42098 }, { "epoch": 0.07464677977160922, "grad_norm": 0.71875, "learning_rate": 0.001289215838291137, "loss": 0.165, "step": 42100 }, { "epoch": 0.07465032593691903, "grad_norm": 0.39453125, "learning_rate": 0.0012891576494801639, "loss": 0.2024, "step": 42102 }, { "epoch": 0.07465387210222886, "grad_norm": 2.078125, "learning_rate": 0.0012890994598419255, "loss": 0.1805, "step": 42104 }, { "epoch": 0.07465741826753867, "grad_norm": 0.482421875, "learning_rate": 0.0012890412693766754, "loss": 0.2072, "step": 42106 }, { "epoch": 0.07466096443284849, "grad_norm": 2.875, "learning_rate": 0.0012889830780846688, "loss": 0.2151, "step": 42108 }, { "epoch": 0.0746645105981583, "grad_norm": 0.73828125, "learning_rate": 0.00128892488596616, "loss": 0.3085, "step": 42110 }, { "epoch": 0.07466805676346812, "grad_norm": 0.326171875, "learning_rate": 0.0012888666930214036, "loss": 0.1677, "step": 42112 }, { "epoch": 0.07467160292877793, "grad_norm": 0.546875, "learning_rate": 0.0012888084992506536, "loss": 0.1543, "step": 42114 }, { "epoch": 0.07467514909408775, "grad_norm": 0.53125, "learning_rate": 0.0012887503046541656, "loss": 0.2562, "step": 42116 }, { "epoch": 0.07467869525939756, "grad_norm": 0.31640625, "learning_rate": 0.0012886921092321929, "loss": 0.2136, "step": 42118 }, { "epoch": 0.07468224142470738, "grad_norm": 0.66796875, "learning_rate": 0.0012886339129849907, "loss": 0.2083, "step": 42120 }, { "epoch": 0.07468578759001719, "grad_norm": 0.44921875, "learning_rate": 0.0012885757159128131, "loss": 0.1956, "step": 42122 }, { "epoch": 0.074689333755327, "grad_norm": 0.5078125, "learning_rate": 0.001288517518015915, "loss": 0.2008, "step": 42124 }, { "epoch": 0.07469287992063682, "grad_norm": 1.0234375, "learning_rate": 0.001288459319294551, "loss": 0.2181, "step": 42126 }, { "epoch": 0.07469642608594664, "grad_norm": 0.4921875, "learning_rate": 0.0012884011197489748, "loss": 0.1815, "step": 42128 }, { "epoch": 0.07469997225125645, "grad_norm": 0.69921875, "learning_rate": 0.001288342919379442, "loss": 0.2193, "step": 42130 }, { "epoch": 0.07470351841656626, "grad_norm": 1.609375, "learning_rate": 0.0012882847181862067, "loss": 0.262, "step": 42132 }, { "epoch": 0.07470706458187608, "grad_norm": 0.90234375, "learning_rate": 0.0012882265161695233, "loss": 0.204, "step": 42134 }, { "epoch": 0.0747106107471859, "grad_norm": 0.341796875, "learning_rate": 0.0012881683133296464, "loss": 0.174, "step": 42136 }, { "epoch": 0.07471415691249571, "grad_norm": 1.5625, "learning_rate": 0.001288110109666831, "loss": 0.1992, "step": 42138 }, { "epoch": 0.07471770307780552, "grad_norm": 0.396484375, "learning_rate": 0.0012880519051813306, "loss": 0.1708, "step": 42140 }, { "epoch": 0.07472124924311534, "grad_norm": 0.302734375, "learning_rate": 0.0012879936998734009, "loss": 0.1612, "step": 42142 }, { "epoch": 0.07472479540842515, "grad_norm": 0.3203125, "learning_rate": 0.0012879354937432956, "loss": 0.2452, "step": 42144 }, { "epoch": 0.07472834157373497, "grad_norm": 0.32421875, "learning_rate": 0.00128787728679127, "loss": 0.1884, "step": 42146 }, { "epoch": 0.07473188773904478, "grad_norm": 0.921875, "learning_rate": 0.0012878190790175778, "loss": 0.2035, "step": 42148 }, { "epoch": 0.07473543390435461, "grad_norm": 11.1875, "learning_rate": 0.0012877608704224745, "loss": 0.2472, "step": 42150 }, { "epoch": 0.07473898006966442, "grad_norm": 0.416015625, "learning_rate": 0.001287702661006214, "loss": 0.1866, "step": 42152 }, { "epoch": 0.07474252623497424, "grad_norm": 0.56640625, "learning_rate": 0.0012876444507690513, "loss": 0.183, "step": 42154 }, { "epoch": 0.07474607240028405, "grad_norm": 0.2392578125, "learning_rate": 0.0012875862397112403, "loss": 0.2162, "step": 42156 }, { "epoch": 0.07474961856559387, "grad_norm": 0.61328125, "learning_rate": 0.0012875280278330367, "loss": 0.4303, "step": 42158 }, { "epoch": 0.07475316473090368, "grad_norm": 0.5546875, "learning_rate": 0.001287469815134694, "loss": 0.4304, "step": 42160 }, { "epoch": 0.0747567108962135, "grad_norm": 0.26953125, "learning_rate": 0.0012874116016164672, "loss": 0.2118, "step": 42162 }, { "epoch": 0.07476025706152331, "grad_norm": 0.86328125, "learning_rate": 0.0012873533872786109, "loss": 0.2016, "step": 42164 }, { "epoch": 0.07476380322683313, "grad_norm": 0.435546875, "learning_rate": 0.00128729517212138, "loss": 0.1684, "step": 42166 }, { "epoch": 0.07476734939214294, "grad_norm": 0.79296875, "learning_rate": 0.0012872369561450286, "loss": 0.4251, "step": 42168 }, { "epoch": 0.07477089555745275, "grad_norm": 0.765625, "learning_rate": 0.0012871787393498116, "loss": 0.1948, "step": 42170 }, { "epoch": 0.07477444172276257, "grad_norm": 0.40625, "learning_rate": 0.0012871205217359833, "loss": 0.2283, "step": 42172 }, { "epoch": 0.07477798788807238, "grad_norm": 0.56640625, "learning_rate": 0.0012870623033037988, "loss": 0.2233, "step": 42174 }, { "epoch": 0.0747815340533822, "grad_norm": 0.48046875, "learning_rate": 0.0012870040840535124, "loss": 0.2142, "step": 42176 }, { "epoch": 0.07478508021869201, "grad_norm": 0.28125, "learning_rate": 0.001286945863985379, "loss": 0.2005, "step": 42178 }, { "epoch": 0.07478862638400183, "grad_norm": 0.5546875, "learning_rate": 0.0012868876430996525, "loss": 0.3014, "step": 42180 }, { "epoch": 0.07479217254931164, "grad_norm": 0.65625, "learning_rate": 0.0012868294213965882, "loss": 0.235, "step": 42182 }, { "epoch": 0.07479571871462146, "grad_norm": 0.35546875, "learning_rate": 0.0012867711988764406, "loss": 0.1859, "step": 42184 }, { "epoch": 0.07479926487993127, "grad_norm": 0.330078125, "learning_rate": 0.0012867129755394643, "loss": 0.1471, "step": 42186 }, { "epoch": 0.07480281104524109, "grad_norm": 0.34375, "learning_rate": 0.0012866547513859135, "loss": 0.142, "step": 42188 }, { "epoch": 0.0748063572105509, "grad_norm": 0.3671875, "learning_rate": 0.0012865965264160434, "loss": 0.2588, "step": 42190 }, { "epoch": 0.07480990337586071, "grad_norm": 1.1015625, "learning_rate": 0.001286538300630109, "loss": 0.3685, "step": 42192 }, { "epoch": 0.07481344954117053, "grad_norm": 0.54296875, "learning_rate": 0.001286480074028364, "loss": 0.1783, "step": 42194 }, { "epoch": 0.07481699570648036, "grad_norm": 0.2099609375, "learning_rate": 0.0012864218466110636, "loss": 0.1415, "step": 42196 }, { "epoch": 0.07482054187179017, "grad_norm": 0.267578125, "learning_rate": 0.0012863636183784624, "loss": 0.1764, "step": 42198 }, { "epoch": 0.07482408803709999, "grad_norm": 1.46875, "learning_rate": 0.0012863053893308147, "loss": 0.3061, "step": 42200 }, { "epoch": 0.0748276342024098, "grad_norm": 0.478515625, "learning_rate": 0.0012862471594683755, "loss": 0.2335, "step": 42202 }, { "epoch": 0.07483118036771962, "grad_norm": 0.490234375, "learning_rate": 0.0012861889287913994, "loss": 0.2382, "step": 42204 }, { "epoch": 0.07483472653302943, "grad_norm": 1.15625, "learning_rate": 0.0012861306973001414, "loss": 0.21, "step": 42206 }, { "epoch": 0.07483827269833924, "grad_norm": 0.2734375, "learning_rate": 0.0012860724649948554, "loss": 0.147, "step": 42208 }, { "epoch": 0.07484181886364906, "grad_norm": 0.35546875, "learning_rate": 0.001286014231875797, "loss": 0.1654, "step": 42210 }, { "epoch": 0.07484536502895887, "grad_norm": 1.0234375, "learning_rate": 0.0012859559979432201, "loss": 0.2191, "step": 42212 }, { "epoch": 0.07484891119426869, "grad_norm": 0.421875, "learning_rate": 0.00128589776319738, "loss": 0.1887, "step": 42214 }, { "epoch": 0.0748524573595785, "grad_norm": 0.51953125, "learning_rate": 0.0012858395276385307, "loss": 0.2005, "step": 42216 }, { "epoch": 0.07485600352488832, "grad_norm": 0.83203125, "learning_rate": 0.0012857812912669272, "loss": 0.2229, "step": 42218 }, { "epoch": 0.07485954969019813, "grad_norm": 0.1767578125, "learning_rate": 0.0012857230540828245, "loss": 0.1632, "step": 42220 }, { "epoch": 0.07486309585550795, "grad_norm": 0.6796875, "learning_rate": 0.0012856648160864768, "loss": 0.2001, "step": 42222 }, { "epoch": 0.07486664202081776, "grad_norm": 0.61328125, "learning_rate": 0.001285606577278139, "loss": 0.2185, "step": 42224 }, { "epoch": 0.07487018818612758, "grad_norm": 1.671875, "learning_rate": 0.001285548337658066, "loss": 0.3426, "step": 42226 }, { "epoch": 0.07487373435143739, "grad_norm": 0.2119140625, "learning_rate": 0.0012854900972265125, "loss": 0.1967, "step": 42228 }, { "epoch": 0.0748772805167472, "grad_norm": 0.6484375, "learning_rate": 0.0012854318559837328, "loss": 0.1622, "step": 42230 }, { "epoch": 0.07488082668205702, "grad_norm": 0.35546875, "learning_rate": 0.0012853736139299824, "loss": 0.1656, "step": 42232 }, { "epoch": 0.07488437284736683, "grad_norm": 1.375, "learning_rate": 0.0012853153710655149, "loss": 0.3565, "step": 42234 }, { "epoch": 0.07488791901267665, "grad_norm": 2.375, "learning_rate": 0.001285257127390586, "loss": 0.1807, "step": 42236 }, { "epoch": 0.07489146517798646, "grad_norm": 0.80859375, "learning_rate": 0.0012851988829054495, "loss": 0.1965, "step": 42238 }, { "epoch": 0.07489501134329629, "grad_norm": 1.0, "learning_rate": 0.0012851406376103611, "loss": 0.2008, "step": 42240 }, { "epoch": 0.0748985575086061, "grad_norm": 0.64453125, "learning_rate": 0.0012850823915055752, "loss": 0.1815, "step": 42242 }, { "epoch": 0.07490210367391592, "grad_norm": 0.263671875, "learning_rate": 0.001285024144591346, "loss": 0.3021, "step": 42244 }, { "epoch": 0.07490564983922574, "grad_norm": 0.4296875, "learning_rate": 0.0012849658968679288, "loss": 0.224, "step": 42246 }, { "epoch": 0.07490919600453555, "grad_norm": 0.1806640625, "learning_rate": 0.0012849076483355784, "loss": 0.1739, "step": 42248 }, { "epoch": 0.07491274216984536, "grad_norm": 0.41796875, "learning_rate": 0.0012848493989945494, "loss": 0.2166, "step": 42250 }, { "epoch": 0.07491628833515518, "grad_norm": 0.291015625, "learning_rate": 0.0012847911488450966, "loss": 0.1657, "step": 42252 }, { "epoch": 0.074919834500465, "grad_norm": 1.6875, "learning_rate": 0.0012847328978874742, "loss": 0.1781, "step": 42254 }, { "epoch": 0.07492338066577481, "grad_norm": 1.2578125, "learning_rate": 0.0012846746461219376, "loss": 0.1335, "step": 42256 }, { "epoch": 0.07492692683108462, "grad_norm": 0.1806640625, "learning_rate": 0.0012846163935487413, "loss": 0.2328, "step": 42258 }, { "epoch": 0.07493047299639444, "grad_norm": 0.828125, "learning_rate": 0.0012845581401681402, "loss": 0.1999, "step": 42260 }, { "epoch": 0.07493401916170425, "grad_norm": 0.546875, "learning_rate": 0.0012844998859803888, "loss": 0.1602, "step": 42262 }, { "epoch": 0.07493756532701407, "grad_norm": 0.396484375, "learning_rate": 0.0012844416309857425, "loss": 0.2156, "step": 42264 }, { "epoch": 0.07494111149232388, "grad_norm": 0.91796875, "learning_rate": 0.0012843833751844554, "loss": 0.2227, "step": 42266 }, { "epoch": 0.0749446576576337, "grad_norm": 0.388671875, "learning_rate": 0.0012843251185767825, "loss": 0.2043, "step": 42268 }, { "epoch": 0.07494820382294351, "grad_norm": 0.421875, "learning_rate": 0.0012842668611629789, "loss": 0.21, "step": 42270 }, { "epoch": 0.07495174998825332, "grad_norm": 0.5625, "learning_rate": 0.0012842086029432988, "loss": 0.1669, "step": 42272 }, { "epoch": 0.07495529615356314, "grad_norm": 0.76953125, "learning_rate": 0.0012841503439179973, "loss": 0.1929, "step": 42274 }, { "epoch": 0.07495884231887295, "grad_norm": 0.8203125, "learning_rate": 0.0012840920840873295, "loss": 0.1821, "step": 42276 }, { "epoch": 0.07496238848418277, "grad_norm": 0.1640625, "learning_rate": 0.001284033823451549, "loss": 0.145, "step": 42278 }, { "epoch": 0.07496593464949258, "grad_norm": 0.42578125, "learning_rate": 0.0012839755620109121, "loss": 0.1837, "step": 42280 }, { "epoch": 0.0749694808148024, "grad_norm": 2.015625, "learning_rate": 0.0012839172997656729, "loss": 0.4665, "step": 42282 }, { "epoch": 0.07497302698011221, "grad_norm": 0.1748046875, "learning_rate": 0.0012838590367160864, "loss": 0.3774, "step": 42284 }, { "epoch": 0.07497657314542204, "grad_norm": 0.52734375, "learning_rate": 0.001283800772862407, "loss": 0.2111, "step": 42286 }, { "epoch": 0.07498011931073185, "grad_norm": 0.3515625, "learning_rate": 0.00128374250820489, "loss": 0.3313, "step": 42288 }, { "epoch": 0.07498366547604167, "grad_norm": 0.2734375, "learning_rate": 0.00128368424274379, "loss": 0.1741, "step": 42290 }, { "epoch": 0.07498721164135148, "grad_norm": 0.404296875, "learning_rate": 0.0012836259764793614, "loss": 0.1843, "step": 42292 }, { "epoch": 0.0749907578066613, "grad_norm": 0.40625, "learning_rate": 0.0012835677094118595, "loss": 0.1398, "step": 42294 }, { "epoch": 0.07499430397197111, "grad_norm": 1.203125, "learning_rate": 0.0012835094415415396, "loss": 0.2254, "step": 42296 }, { "epoch": 0.07499785013728093, "grad_norm": 0.306640625, "learning_rate": 0.0012834511728686555, "loss": 0.1347, "step": 42298 }, { "epoch": 0.07500139630259074, "grad_norm": 0.369140625, "learning_rate": 0.0012833929033934629, "loss": 0.1995, "step": 42300 }, { "epoch": 0.07500494246790056, "grad_norm": 0.302734375, "learning_rate": 0.0012833346331162158, "loss": 0.1848, "step": 42302 }, { "epoch": 0.07500848863321037, "grad_norm": 0.671875, "learning_rate": 0.0012832763620371699, "loss": 0.211, "step": 42304 }, { "epoch": 0.07501203479852019, "grad_norm": 1.2109375, "learning_rate": 0.0012832180901565797, "loss": 0.1967, "step": 42306 }, { "epoch": 0.07501558096383, "grad_norm": 0.68359375, "learning_rate": 0.0012831598174747, "loss": 0.1633, "step": 42308 }, { "epoch": 0.07501912712913981, "grad_norm": 0.494140625, "learning_rate": 0.0012831015439917852, "loss": 0.1795, "step": 42310 }, { "epoch": 0.07502267329444963, "grad_norm": 0.27734375, "learning_rate": 0.001283043269708091, "loss": 0.1403, "step": 42312 }, { "epoch": 0.07502621945975944, "grad_norm": 0.8203125, "learning_rate": 0.0012829849946238718, "loss": 0.1656, "step": 42314 }, { "epoch": 0.07502976562506926, "grad_norm": 2.140625, "learning_rate": 0.0012829267187393822, "loss": 0.1761, "step": 42316 }, { "epoch": 0.07503331179037907, "grad_norm": 0.27734375, "learning_rate": 0.0012828684420548779, "loss": 0.2058, "step": 42318 }, { "epoch": 0.07503685795568889, "grad_norm": 0.376953125, "learning_rate": 0.001282810164570613, "loss": 0.231, "step": 42320 }, { "epoch": 0.0750404041209987, "grad_norm": 0.54296875, "learning_rate": 0.001282751886286843, "loss": 0.3435, "step": 42322 }, { "epoch": 0.07504395028630852, "grad_norm": 0.28125, "learning_rate": 0.0012826936072038218, "loss": 0.1962, "step": 42324 }, { "epoch": 0.07504749645161833, "grad_norm": 1.0859375, "learning_rate": 0.0012826353273218051, "loss": 0.1726, "step": 42326 }, { "epoch": 0.07505104261692815, "grad_norm": 0.73828125, "learning_rate": 0.0012825770466410478, "loss": 0.1241, "step": 42328 }, { "epoch": 0.07505458878223796, "grad_norm": 0.291015625, "learning_rate": 0.0012825187651618042, "loss": 0.1947, "step": 42330 }, { "epoch": 0.07505813494754779, "grad_norm": 0.26171875, "learning_rate": 0.0012824604828843296, "loss": 0.1729, "step": 42332 }, { "epoch": 0.0750616811128576, "grad_norm": 0.404296875, "learning_rate": 0.001282402199808879, "loss": 0.1465, "step": 42334 }, { "epoch": 0.07506522727816742, "grad_norm": 0.40234375, "learning_rate": 0.001282343915935707, "loss": 0.1718, "step": 42336 }, { "epoch": 0.07506877344347723, "grad_norm": 0.7265625, "learning_rate": 0.0012822856312650688, "loss": 0.5937, "step": 42338 }, { "epoch": 0.07507231960878705, "grad_norm": 0.2138671875, "learning_rate": 0.0012822273457972191, "loss": 0.1331, "step": 42340 }, { "epoch": 0.07507586577409686, "grad_norm": 0.298828125, "learning_rate": 0.0012821690595324127, "loss": 0.2612, "step": 42342 }, { "epoch": 0.07507941193940668, "grad_norm": 0.609375, "learning_rate": 0.0012821107724709049, "loss": 0.1654, "step": 42344 }, { "epoch": 0.07508295810471649, "grad_norm": 0.314453125, "learning_rate": 0.0012820524846129501, "loss": 0.1824, "step": 42346 }, { "epoch": 0.0750865042700263, "grad_norm": 0.380859375, "learning_rate": 0.0012819941959588037, "loss": 0.2536, "step": 42348 }, { "epoch": 0.07509005043533612, "grad_norm": 0.55078125, "learning_rate": 0.0012819359065087202, "loss": 0.1782, "step": 42350 }, { "epoch": 0.07509359660064593, "grad_norm": 0.279296875, "learning_rate": 0.0012818776162629548, "loss": 0.1541, "step": 42352 }, { "epoch": 0.07509714276595575, "grad_norm": 2.375, "learning_rate": 0.0012818193252217625, "loss": 0.1511, "step": 42354 }, { "epoch": 0.07510068893126556, "grad_norm": 0.341796875, "learning_rate": 0.0012817610333853975, "loss": 0.2191, "step": 42356 }, { "epoch": 0.07510423509657538, "grad_norm": 0.19140625, "learning_rate": 0.001281702740754116, "loss": 0.169, "step": 42358 }, { "epoch": 0.07510778126188519, "grad_norm": 3.578125, "learning_rate": 0.0012816444473281719, "loss": 0.2582, "step": 42360 }, { "epoch": 0.07511132742719501, "grad_norm": 0.34765625, "learning_rate": 0.0012815861531078206, "loss": 0.2601, "step": 42362 }, { "epoch": 0.07511487359250482, "grad_norm": 0.578125, "learning_rate": 0.001281527858093317, "loss": 0.1459, "step": 42364 }, { "epoch": 0.07511841975781464, "grad_norm": 0.2158203125, "learning_rate": 0.001281469562284916, "loss": 0.2203, "step": 42366 }, { "epoch": 0.07512196592312445, "grad_norm": 0.423828125, "learning_rate": 0.0012814112656828725, "loss": 0.1219, "step": 42368 }, { "epoch": 0.07512551208843427, "grad_norm": 0.431640625, "learning_rate": 0.0012813529682874417, "loss": 0.2039, "step": 42370 }, { "epoch": 0.07512905825374408, "grad_norm": 0.87890625, "learning_rate": 0.001281294670098878, "loss": 0.1484, "step": 42372 }, { "epoch": 0.0751326044190539, "grad_norm": 1.234375, "learning_rate": 0.0012812363711174367, "loss": 0.2295, "step": 42374 }, { "epoch": 0.07513615058436372, "grad_norm": 0.80859375, "learning_rate": 0.0012811780713433733, "loss": 0.2054, "step": 42376 }, { "epoch": 0.07513969674967354, "grad_norm": 0.140625, "learning_rate": 0.0012811197707769417, "loss": 0.2062, "step": 42378 }, { "epoch": 0.07514324291498335, "grad_norm": 0.93359375, "learning_rate": 0.0012810614694183978, "loss": 0.2158, "step": 42380 }, { "epoch": 0.07514678908029317, "grad_norm": 1.1328125, "learning_rate": 0.001281003167267996, "loss": 0.1932, "step": 42382 }, { "epoch": 0.07515033524560298, "grad_norm": 0.81640625, "learning_rate": 0.0012809448643259916, "loss": 0.1934, "step": 42384 }, { "epoch": 0.0751538814109128, "grad_norm": 0.90625, "learning_rate": 0.0012808865605926391, "loss": 0.2318, "step": 42386 }, { "epoch": 0.07515742757622261, "grad_norm": 1.1953125, "learning_rate": 0.0012808282560681943, "loss": 0.1849, "step": 42388 }, { "epoch": 0.07516097374153242, "grad_norm": 0.345703125, "learning_rate": 0.0012807699507529117, "loss": 0.1748, "step": 42390 }, { "epoch": 0.07516451990684224, "grad_norm": 0.74609375, "learning_rate": 0.0012807116446470462, "loss": 0.4074, "step": 42392 }, { "epoch": 0.07516806607215205, "grad_norm": 0.490234375, "learning_rate": 0.001280653337750853, "loss": 0.2145, "step": 42394 }, { "epoch": 0.07517161223746187, "grad_norm": 1.2265625, "learning_rate": 0.001280595030064587, "loss": 0.385, "step": 42396 }, { "epoch": 0.07517515840277168, "grad_norm": 1.5234375, "learning_rate": 0.0012805367215885032, "loss": 0.1821, "step": 42398 }, { "epoch": 0.0751787045680815, "grad_norm": 1.578125, "learning_rate": 0.0012804784123228567, "loss": 0.389, "step": 42400 }, { "epoch": 0.07518225073339131, "grad_norm": 0.4921875, "learning_rate": 0.0012804201022679025, "loss": 0.2685, "step": 42402 }, { "epoch": 0.07518579689870113, "grad_norm": 0.37109375, "learning_rate": 0.0012803617914238956, "loss": 0.1971, "step": 42404 }, { "epoch": 0.07518934306401094, "grad_norm": 0.197265625, "learning_rate": 0.0012803034797910908, "loss": 0.1469, "step": 42406 }, { "epoch": 0.07519288922932076, "grad_norm": 2.90625, "learning_rate": 0.0012802451673697437, "loss": 0.3639, "step": 42408 }, { "epoch": 0.07519643539463057, "grad_norm": 0.98828125, "learning_rate": 0.0012801868541601085, "loss": 0.2194, "step": 42410 }, { "epoch": 0.07519998155994038, "grad_norm": 0.34765625, "learning_rate": 0.0012801285401624408, "loss": 0.2308, "step": 42412 }, { "epoch": 0.0752035277252502, "grad_norm": 0.181640625, "learning_rate": 0.0012800702253769956, "loss": 0.1327, "step": 42414 }, { "epoch": 0.07520707389056001, "grad_norm": 1.3046875, "learning_rate": 0.0012800119098040276, "loss": 0.2042, "step": 42416 }, { "epoch": 0.07521062005586983, "grad_norm": 0.73828125, "learning_rate": 0.0012799535934437925, "loss": 0.235, "step": 42418 }, { "epoch": 0.07521416622117964, "grad_norm": 0.33203125, "learning_rate": 0.0012798952762965447, "loss": 0.1713, "step": 42420 }, { "epoch": 0.07521771238648947, "grad_norm": 0.53515625, "learning_rate": 0.0012798369583625394, "loss": 0.3517, "step": 42422 }, { "epoch": 0.07522125855179929, "grad_norm": 0.4296875, "learning_rate": 0.0012797786396420317, "loss": 0.179, "step": 42424 }, { "epoch": 0.0752248047171091, "grad_norm": 0.28515625, "learning_rate": 0.0012797203201352768, "loss": 0.2067, "step": 42426 }, { "epoch": 0.07522835088241892, "grad_norm": 0.177734375, "learning_rate": 0.0012796619998425294, "loss": 0.1951, "step": 42428 }, { "epoch": 0.07523189704772873, "grad_norm": 0.546875, "learning_rate": 0.0012796036787640448, "loss": 0.153, "step": 42430 }, { "epoch": 0.07523544321303854, "grad_norm": 0.40234375, "learning_rate": 0.001279545356900078, "loss": 0.2008, "step": 42432 }, { "epoch": 0.07523898937834836, "grad_norm": 0.392578125, "learning_rate": 0.0012794870342508842, "loss": 0.2075, "step": 42434 }, { "epoch": 0.07524253554365817, "grad_norm": 0.91015625, "learning_rate": 0.0012794287108167184, "loss": 0.2377, "step": 42436 }, { "epoch": 0.07524608170896799, "grad_norm": 0.36328125, "learning_rate": 0.0012793703865978357, "loss": 0.1786, "step": 42438 }, { "epoch": 0.0752496278742778, "grad_norm": 0.2890625, "learning_rate": 0.0012793120615944911, "loss": 0.2304, "step": 42440 }, { "epoch": 0.07525317403958762, "grad_norm": 0.6171875, "learning_rate": 0.0012792537358069394, "loss": 0.2378, "step": 42442 }, { "epoch": 0.07525672020489743, "grad_norm": 0.302734375, "learning_rate": 0.0012791954092354365, "loss": 0.2135, "step": 42444 }, { "epoch": 0.07526026637020725, "grad_norm": 0.388671875, "learning_rate": 0.0012791370818802366, "loss": 0.1239, "step": 42446 }, { "epoch": 0.07526381253551706, "grad_norm": 0.66015625, "learning_rate": 0.0012790787537415953, "loss": 0.3373, "step": 42448 }, { "epoch": 0.07526735870082688, "grad_norm": 0.65625, "learning_rate": 0.001279020424819767, "loss": 0.1624, "step": 42450 }, { "epoch": 0.07527090486613669, "grad_norm": 0.625, "learning_rate": 0.0012789620951150083, "loss": 0.2468, "step": 42452 }, { "epoch": 0.0752744510314465, "grad_norm": 0.412109375, "learning_rate": 0.0012789037646275727, "loss": 0.1606, "step": 42454 }, { "epoch": 0.07527799719675632, "grad_norm": 0.83984375, "learning_rate": 0.0012788454333577163, "loss": 0.2313, "step": 42456 }, { "epoch": 0.07528154336206613, "grad_norm": 0.515625, "learning_rate": 0.0012787871013056938, "loss": 0.1586, "step": 42458 }, { "epoch": 0.07528508952737595, "grad_norm": 0.50390625, "learning_rate": 0.0012787287684717604, "loss": 0.1791, "step": 42460 }, { "epoch": 0.07528863569268576, "grad_norm": 0.3359375, "learning_rate": 0.0012786704348561706, "loss": 0.1814, "step": 42462 }, { "epoch": 0.07529218185799558, "grad_norm": 0.91796875, "learning_rate": 0.0012786121004591808, "loss": 0.1615, "step": 42464 }, { "epoch": 0.07529572802330539, "grad_norm": 0.8125, "learning_rate": 0.0012785537652810453, "loss": 0.2668, "step": 42466 }, { "epoch": 0.07529927418861522, "grad_norm": 0.2470703125, "learning_rate": 0.001278495429322019, "loss": 0.1933, "step": 42468 }, { "epoch": 0.07530282035392503, "grad_norm": 0.2412109375, "learning_rate": 0.0012784370925823577, "loss": 0.184, "step": 42470 }, { "epoch": 0.07530636651923485, "grad_norm": 0.396484375, "learning_rate": 0.0012783787550623162, "loss": 0.1752, "step": 42472 }, { "epoch": 0.07530991268454466, "grad_norm": 0.28125, "learning_rate": 0.0012783204167621496, "loss": 0.1596, "step": 42474 }, { "epoch": 0.07531345884985448, "grad_norm": 0.193359375, "learning_rate": 0.001278262077682113, "loss": 0.1501, "step": 42476 }, { "epoch": 0.07531700501516429, "grad_norm": 0.2119140625, "learning_rate": 0.0012782037378224617, "loss": 0.1723, "step": 42478 }, { "epoch": 0.07532055118047411, "grad_norm": 0.61328125, "learning_rate": 0.0012781453971834505, "loss": 0.1756, "step": 42480 }, { "epoch": 0.07532409734578392, "grad_norm": 0.71875, "learning_rate": 0.0012780870557653353, "loss": 0.2349, "step": 42482 }, { "epoch": 0.07532764351109374, "grad_norm": 0.8125, "learning_rate": 0.00127802871356837, "loss": 0.1788, "step": 42484 }, { "epoch": 0.07533118967640355, "grad_norm": 1.0390625, "learning_rate": 0.0012779703705928114, "loss": 0.2329, "step": 42486 }, { "epoch": 0.07533473584171337, "grad_norm": 0.318359375, "learning_rate": 0.0012779120268389132, "loss": 0.1906, "step": 42488 }, { "epoch": 0.07533828200702318, "grad_norm": 0.32421875, "learning_rate": 0.0012778536823069315, "loss": 0.2032, "step": 42490 }, { "epoch": 0.075341828172333, "grad_norm": 1.3828125, "learning_rate": 0.001277795336997121, "loss": 0.2295, "step": 42492 }, { "epoch": 0.07534537433764281, "grad_norm": 0.462890625, "learning_rate": 0.0012777369909097367, "loss": 0.1574, "step": 42494 }, { "epoch": 0.07534892050295262, "grad_norm": 1.734375, "learning_rate": 0.0012776786440450343, "loss": 0.2568, "step": 42496 }, { "epoch": 0.07535246666826244, "grad_norm": 0.2490234375, "learning_rate": 0.0012776202964032686, "loss": 0.2658, "step": 42498 }, { "epoch": 0.07535601283357225, "grad_norm": 0.3125, "learning_rate": 0.0012775619479846948, "loss": 0.2763, "step": 42500 }, { "epoch": 0.07535955899888207, "grad_norm": 2.015625, "learning_rate": 0.0012775035987895684, "loss": 0.3642, "step": 42502 }, { "epoch": 0.07536310516419188, "grad_norm": 0.859375, "learning_rate": 0.0012774452488181444, "loss": 0.233, "step": 42504 }, { "epoch": 0.0753666513295017, "grad_norm": 0.97265625, "learning_rate": 0.0012773868980706779, "loss": 0.1814, "step": 42506 }, { "epoch": 0.07537019749481151, "grad_norm": 0.65625, "learning_rate": 0.001277328546547424, "loss": 0.2075, "step": 42508 }, { "epoch": 0.07537374366012133, "grad_norm": 0.4609375, "learning_rate": 0.0012772701942486383, "loss": 0.1932, "step": 42510 }, { "epoch": 0.07537728982543115, "grad_norm": 0.2236328125, "learning_rate": 0.0012772118411745754, "loss": 0.2045, "step": 42512 }, { "epoch": 0.07538083599074097, "grad_norm": 0.2734375, "learning_rate": 0.0012771534873254912, "loss": 0.1594, "step": 42514 }, { "epoch": 0.07538438215605078, "grad_norm": 1.515625, "learning_rate": 0.0012770951327016405, "loss": 0.2304, "step": 42516 }, { "epoch": 0.0753879283213606, "grad_norm": 0.376953125, "learning_rate": 0.0012770367773032784, "loss": 0.1805, "step": 42518 }, { "epoch": 0.07539147448667041, "grad_norm": 0.43359375, "learning_rate": 0.00127697842113066, "loss": 0.2589, "step": 42520 }, { "epoch": 0.07539502065198023, "grad_norm": 0.875, "learning_rate": 0.0012769200641840413, "loss": 0.2142, "step": 42522 }, { "epoch": 0.07539856681729004, "grad_norm": 0.79296875, "learning_rate": 0.0012768617064636766, "loss": 0.177, "step": 42524 }, { "epoch": 0.07540211298259986, "grad_norm": 0.25, "learning_rate": 0.001276803347969822, "loss": 0.1738, "step": 42526 }, { "epoch": 0.07540565914790967, "grad_norm": 1.6171875, "learning_rate": 0.001276744988702732, "loss": 0.1564, "step": 42528 }, { "epoch": 0.07540920531321949, "grad_norm": 0.458984375, "learning_rate": 0.001276686628662662, "loss": 0.1901, "step": 42530 }, { "epoch": 0.0754127514785293, "grad_norm": 0.259765625, "learning_rate": 0.0012766282678498674, "loss": 0.1454, "step": 42532 }, { "epoch": 0.07541629764383911, "grad_norm": 0.408203125, "learning_rate": 0.0012765699062646035, "loss": 0.1503, "step": 42534 }, { "epoch": 0.07541984380914893, "grad_norm": 0.33203125, "learning_rate": 0.001276511543907125, "loss": 0.1364, "step": 42536 }, { "epoch": 0.07542338997445874, "grad_norm": 0.423828125, "learning_rate": 0.001276453180777688, "loss": 0.1728, "step": 42538 }, { "epoch": 0.07542693613976856, "grad_norm": 0.5625, "learning_rate": 0.0012763948168765468, "loss": 0.2077, "step": 42540 }, { "epoch": 0.07543048230507837, "grad_norm": 3.921875, "learning_rate": 0.0012763364522039575, "loss": 0.1927, "step": 42542 }, { "epoch": 0.07543402847038819, "grad_norm": 0.37109375, "learning_rate": 0.0012762780867601748, "loss": 0.2214, "step": 42544 }, { "epoch": 0.075437574635698, "grad_norm": 0.3203125, "learning_rate": 0.0012762197205454541, "loss": 0.2144, "step": 42546 }, { "epoch": 0.07544112080100782, "grad_norm": 0.546875, "learning_rate": 0.001276161353560051, "loss": 0.2287, "step": 42548 }, { "epoch": 0.07544466696631763, "grad_norm": 0.228515625, "learning_rate": 0.0012761029858042205, "loss": 0.1532, "step": 42550 }, { "epoch": 0.07544821313162745, "grad_norm": 0.69140625, "learning_rate": 0.0012760446172782177, "loss": 0.2189, "step": 42552 }, { "epoch": 0.07545175929693726, "grad_norm": 0.734375, "learning_rate": 0.0012759862479822977, "loss": 0.2065, "step": 42554 }, { "epoch": 0.07545530546224707, "grad_norm": 0.330078125, "learning_rate": 0.0012759278779167166, "loss": 0.1624, "step": 42556 }, { "epoch": 0.0754588516275569, "grad_norm": 1.0390625, "learning_rate": 0.0012758695070817286, "loss": 0.24, "step": 42558 }, { "epoch": 0.07546239779286672, "grad_norm": 0.458984375, "learning_rate": 0.0012758111354775898, "loss": 0.1715, "step": 42560 }, { "epoch": 0.07546594395817653, "grad_norm": 0.384765625, "learning_rate": 0.001275752763104555, "loss": 0.2684, "step": 42562 }, { "epoch": 0.07546949012348635, "grad_norm": 0.37890625, "learning_rate": 0.00127569438996288, "loss": 0.2173, "step": 42564 }, { "epoch": 0.07547303628879616, "grad_norm": 0.6796875, "learning_rate": 0.0012756360160528198, "loss": 0.2336, "step": 42566 }, { "epoch": 0.07547658245410598, "grad_norm": 0.578125, "learning_rate": 0.0012755776413746298, "loss": 0.1895, "step": 42568 }, { "epoch": 0.07548012861941579, "grad_norm": 0.484375, "learning_rate": 0.0012755192659285649, "loss": 0.1859, "step": 42570 }, { "epoch": 0.0754836747847256, "grad_norm": 0.275390625, "learning_rate": 0.0012754608897148807, "loss": 0.1717, "step": 42572 }, { "epoch": 0.07548722095003542, "grad_norm": 0.77734375, "learning_rate": 0.0012754025127338325, "loss": 0.1757, "step": 42574 }, { "epoch": 0.07549076711534523, "grad_norm": 0.265625, "learning_rate": 0.0012753441349856755, "loss": 0.191, "step": 42576 }, { "epoch": 0.07549431328065505, "grad_norm": 0.40625, "learning_rate": 0.0012752857564706655, "loss": 0.2593, "step": 42578 }, { "epoch": 0.07549785944596486, "grad_norm": 0.439453125, "learning_rate": 0.001275227377189057, "loss": 0.2669, "step": 42580 }, { "epoch": 0.07550140561127468, "grad_norm": 0.5390625, "learning_rate": 0.0012751689971411057, "loss": 0.1764, "step": 42582 }, { "epoch": 0.07550495177658449, "grad_norm": 0.271484375, "learning_rate": 0.0012751106163270676, "loss": 0.1663, "step": 42584 }, { "epoch": 0.0755084979418943, "grad_norm": 0.65234375, "learning_rate": 0.001275052234747197, "loss": 0.1666, "step": 42586 }, { "epoch": 0.07551204410720412, "grad_norm": 0.46484375, "learning_rate": 0.0012749938524017495, "loss": 0.2436, "step": 42588 }, { "epoch": 0.07551559027251394, "grad_norm": 1.0625, "learning_rate": 0.0012749354692909805, "loss": 0.298, "step": 42590 }, { "epoch": 0.07551913643782375, "grad_norm": 0.294921875, "learning_rate": 0.0012748770854151454, "loss": 0.3844, "step": 42592 }, { "epoch": 0.07552268260313356, "grad_norm": 0.62890625, "learning_rate": 0.0012748187007744997, "loss": 0.2258, "step": 42594 }, { "epoch": 0.07552622876844338, "grad_norm": 0.26953125, "learning_rate": 0.0012747603153692984, "loss": 0.2056, "step": 42596 }, { "epoch": 0.0755297749337532, "grad_norm": 0.259765625, "learning_rate": 0.001274701929199797, "loss": 0.2037, "step": 42598 }, { "epoch": 0.07553332109906301, "grad_norm": 0.326171875, "learning_rate": 0.0012746435422662508, "loss": 0.2411, "step": 42600 }, { "epoch": 0.07553686726437282, "grad_norm": 0.404296875, "learning_rate": 0.0012745851545689151, "loss": 0.1712, "step": 42602 }, { "epoch": 0.07554041342968265, "grad_norm": 0.31640625, "learning_rate": 0.0012745267661080457, "loss": 0.122, "step": 42604 }, { "epoch": 0.07554395959499247, "grad_norm": 0.92578125, "learning_rate": 0.0012744683768838973, "loss": 0.1145, "step": 42606 }, { "epoch": 0.07554750576030228, "grad_norm": 0.90625, "learning_rate": 0.0012744099868967258, "loss": 0.2163, "step": 42608 }, { "epoch": 0.0755510519256121, "grad_norm": 0.330078125, "learning_rate": 0.001274351596146786, "loss": 0.1814, "step": 42610 }, { "epoch": 0.07555459809092191, "grad_norm": 0.34375, "learning_rate": 0.001274293204634334, "loss": 0.2555, "step": 42612 }, { "epoch": 0.07555814425623172, "grad_norm": 0.75390625, "learning_rate": 0.0012742348123596244, "loss": 0.1901, "step": 42614 }, { "epoch": 0.07556169042154154, "grad_norm": 0.671875, "learning_rate": 0.0012741764193229133, "loss": 0.1814, "step": 42616 }, { "epoch": 0.07556523658685135, "grad_norm": 0.1484375, "learning_rate": 0.001274118025524455, "loss": 0.2705, "step": 42618 }, { "epoch": 0.07556878275216117, "grad_norm": 0.322265625, "learning_rate": 0.0012740596309645062, "loss": 0.1546, "step": 42620 }, { "epoch": 0.07557232891747098, "grad_norm": 0.337890625, "learning_rate": 0.0012740012356433218, "loss": 0.2137, "step": 42622 }, { "epoch": 0.0755758750827808, "grad_norm": 0.1533203125, "learning_rate": 0.0012739428395611568, "loss": 0.2073, "step": 42624 }, { "epoch": 0.07557942124809061, "grad_norm": 0.353515625, "learning_rate": 0.001273884442718267, "loss": 0.1932, "step": 42626 }, { "epoch": 0.07558296741340043, "grad_norm": 0.416015625, "learning_rate": 0.0012738260451149075, "loss": 0.1814, "step": 42628 }, { "epoch": 0.07558651357871024, "grad_norm": 0.365234375, "learning_rate": 0.0012737676467513338, "loss": 0.3042, "step": 42630 }, { "epoch": 0.07559005974402006, "grad_norm": 0.9375, "learning_rate": 0.0012737092476278015, "loss": 0.187, "step": 42632 }, { "epoch": 0.07559360590932987, "grad_norm": 0.640625, "learning_rate": 0.0012736508477445656, "loss": 0.1933, "step": 42634 }, { "epoch": 0.07559715207463968, "grad_norm": 1.265625, "learning_rate": 0.001273592447101882, "loss": 0.2289, "step": 42636 }, { "epoch": 0.0756006982399495, "grad_norm": 0.5078125, "learning_rate": 0.001273534045700006, "loss": 0.1959, "step": 42638 }, { "epoch": 0.07560424440525931, "grad_norm": 1.8046875, "learning_rate": 0.0012734756435391927, "loss": 0.2773, "step": 42640 }, { "epoch": 0.07560779057056913, "grad_norm": 0.3203125, "learning_rate": 0.0012734172406196978, "loss": 0.2118, "step": 42642 }, { "epoch": 0.07561133673587894, "grad_norm": 0.66015625, "learning_rate": 0.0012733588369417766, "loss": 0.1757, "step": 42644 }, { "epoch": 0.07561488290118876, "grad_norm": 0.2294921875, "learning_rate": 0.0012733004325056843, "loss": 0.1403, "step": 42646 }, { "epoch": 0.07561842906649859, "grad_norm": 0.390625, "learning_rate": 0.0012732420273116767, "loss": 0.2253, "step": 42648 }, { "epoch": 0.0756219752318084, "grad_norm": 0.46875, "learning_rate": 0.0012731836213600094, "loss": 0.1766, "step": 42650 }, { "epoch": 0.07562552139711821, "grad_norm": 1.453125, "learning_rate": 0.0012731252146509372, "loss": 0.2317, "step": 42652 }, { "epoch": 0.07562906756242803, "grad_norm": 0.51171875, "learning_rate": 0.001273066807184716, "loss": 0.1947, "step": 42654 }, { "epoch": 0.07563261372773784, "grad_norm": 0.3671875, "learning_rate": 0.001273008398961601, "loss": 0.2196, "step": 42656 }, { "epoch": 0.07563615989304766, "grad_norm": 1.484375, "learning_rate": 0.0012729499899818478, "loss": 0.2991, "step": 42658 }, { "epoch": 0.07563970605835747, "grad_norm": 1.0625, "learning_rate": 0.001272891580245712, "loss": 0.1942, "step": 42660 }, { "epoch": 0.07564325222366729, "grad_norm": 2.140625, "learning_rate": 0.0012728331697534488, "loss": 0.1694, "step": 42662 }, { "epoch": 0.0756467983889771, "grad_norm": 0.74609375, "learning_rate": 0.0012727747585053134, "loss": 0.1876, "step": 42664 }, { "epoch": 0.07565034455428692, "grad_norm": 0.44140625, "learning_rate": 0.001272716346501562, "loss": 0.2308, "step": 42666 }, { "epoch": 0.07565389071959673, "grad_norm": 0.35546875, "learning_rate": 0.001272657933742449, "loss": 0.1656, "step": 42668 }, { "epoch": 0.07565743688490655, "grad_norm": 1.5546875, "learning_rate": 0.0012725995202282313, "loss": 0.1531, "step": 42670 }, { "epoch": 0.07566098305021636, "grad_norm": 1.8984375, "learning_rate": 0.001272541105959163, "loss": 0.3338, "step": 42672 }, { "epoch": 0.07566452921552617, "grad_norm": 0.51953125, "learning_rate": 0.0012724826909355003, "loss": 0.1947, "step": 42674 }, { "epoch": 0.07566807538083599, "grad_norm": 0.470703125, "learning_rate": 0.0012724242751574984, "loss": 0.2385, "step": 42676 }, { "epoch": 0.0756716215461458, "grad_norm": 0.68359375, "learning_rate": 0.001272365858625413, "loss": 0.2208, "step": 42678 }, { "epoch": 0.07567516771145562, "grad_norm": 1.015625, "learning_rate": 0.0012723074413394995, "loss": 0.2107, "step": 42680 }, { "epoch": 0.07567871387676543, "grad_norm": 0.3203125, "learning_rate": 0.0012722490233000132, "loss": 0.2854, "step": 42682 }, { "epoch": 0.07568226004207525, "grad_norm": 1.3515625, "learning_rate": 0.0012721906045072096, "loss": 0.1887, "step": 42684 }, { "epoch": 0.07568580620738506, "grad_norm": 0.828125, "learning_rate": 0.0012721321849613447, "loss": 0.1831, "step": 42686 }, { "epoch": 0.07568935237269488, "grad_norm": 0.26171875, "learning_rate": 0.0012720737646626735, "loss": 0.1676, "step": 42688 }, { "epoch": 0.07569289853800469, "grad_norm": 0.609375, "learning_rate": 0.0012720153436114512, "loss": 0.2101, "step": 42690 }, { "epoch": 0.0756964447033145, "grad_norm": 0.5859375, "learning_rate": 0.001271956921807934, "loss": 0.3826, "step": 42692 }, { "epoch": 0.07569999086862433, "grad_norm": 0.353515625, "learning_rate": 0.0012718984992523771, "loss": 0.2135, "step": 42694 }, { "epoch": 0.07570353703393415, "grad_norm": 0.251953125, "learning_rate": 0.001271840075945036, "loss": 0.1794, "step": 42696 }, { "epoch": 0.07570708319924396, "grad_norm": 0.5078125, "learning_rate": 0.0012717816518861664, "loss": 0.1736, "step": 42698 }, { "epoch": 0.07571062936455378, "grad_norm": 0.1787109375, "learning_rate": 0.0012717232270760238, "loss": 0.1773, "step": 42700 }, { "epoch": 0.07571417552986359, "grad_norm": 0.349609375, "learning_rate": 0.0012716648015148632, "loss": 0.1839, "step": 42702 }, { "epoch": 0.0757177216951734, "grad_norm": 1.5546875, "learning_rate": 0.0012716063752029405, "loss": 0.5216, "step": 42704 }, { "epoch": 0.07572126786048322, "grad_norm": 0.26171875, "learning_rate": 0.0012715479481405112, "loss": 0.2473, "step": 42706 }, { "epoch": 0.07572481402579304, "grad_norm": 0.416015625, "learning_rate": 0.0012714895203278307, "loss": 0.1913, "step": 42708 }, { "epoch": 0.07572836019110285, "grad_norm": 0.396484375, "learning_rate": 0.0012714310917651549, "loss": 0.1761, "step": 42710 }, { "epoch": 0.07573190635641266, "grad_norm": 0.58203125, "learning_rate": 0.001271372662452739, "loss": 0.1513, "step": 42712 }, { "epoch": 0.07573545252172248, "grad_norm": 0.94140625, "learning_rate": 0.0012713142323908387, "loss": 0.4719, "step": 42714 }, { "epoch": 0.0757389986870323, "grad_norm": 1.1328125, "learning_rate": 0.001271255801579709, "loss": 0.3397, "step": 42716 }, { "epoch": 0.07574254485234211, "grad_norm": 0.2578125, "learning_rate": 0.0012711973700196065, "loss": 0.227, "step": 42718 }, { "epoch": 0.07574609101765192, "grad_norm": 0.2177734375, "learning_rate": 0.0012711389377107858, "loss": 0.199, "step": 42720 }, { "epoch": 0.07574963718296174, "grad_norm": 0.6875, "learning_rate": 0.0012710805046535028, "loss": 0.1479, "step": 42722 }, { "epoch": 0.07575318334827155, "grad_norm": 0.37109375, "learning_rate": 0.0012710220708480133, "loss": 0.2549, "step": 42724 }, { "epoch": 0.07575672951358137, "grad_norm": 0.66015625, "learning_rate": 0.0012709636362945725, "loss": 0.2152, "step": 42726 }, { "epoch": 0.07576027567889118, "grad_norm": 0.6171875, "learning_rate": 0.001270905200993436, "loss": 0.2135, "step": 42728 }, { "epoch": 0.075763821844201, "grad_norm": 0.99609375, "learning_rate": 0.0012708467649448591, "loss": 0.273, "step": 42730 }, { "epoch": 0.07576736800951081, "grad_norm": 0.54296875, "learning_rate": 0.001270788328149098, "loss": 0.2016, "step": 42732 }, { "epoch": 0.07577091417482062, "grad_norm": 1.8984375, "learning_rate": 0.0012707298906064085, "loss": 0.308, "step": 42734 }, { "epoch": 0.07577446034013044, "grad_norm": 0.33984375, "learning_rate": 0.001270671452317045, "loss": 0.139, "step": 42736 }, { "epoch": 0.07577800650544025, "grad_norm": 0.703125, "learning_rate": 0.0012706130132812638, "loss": 0.1724, "step": 42738 }, { "epoch": 0.07578155267075008, "grad_norm": 0.1669921875, "learning_rate": 0.0012705545734993204, "loss": 0.1564, "step": 42740 }, { "epoch": 0.0757850988360599, "grad_norm": 0.21484375, "learning_rate": 0.0012704961329714703, "loss": 0.1633, "step": 42742 }, { "epoch": 0.07578864500136971, "grad_norm": 0.72265625, "learning_rate": 0.0012704376916979692, "loss": 0.2522, "step": 42744 }, { "epoch": 0.07579219116667953, "grad_norm": 1.671875, "learning_rate": 0.0012703792496790724, "loss": 0.2184, "step": 42746 }, { "epoch": 0.07579573733198934, "grad_norm": 0.435546875, "learning_rate": 0.0012703208069150362, "loss": 0.1554, "step": 42748 }, { "epoch": 0.07579928349729916, "grad_norm": 0.296875, "learning_rate": 0.0012702623634061154, "loss": 0.1645, "step": 42750 }, { "epoch": 0.07580282966260897, "grad_norm": 0.3515625, "learning_rate": 0.0012702039191525663, "loss": 0.3486, "step": 42752 }, { "epoch": 0.07580637582791878, "grad_norm": 1.0546875, "learning_rate": 0.0012701454741546438, "loss": 0.2079, "step": 42754 }, { "epoch": 0.0758099219932286, "grad_norm": 0.291015625, "learning_rate": 0.0012700870284126041, "loss": 0.1759, "step": 42756 }, { "epoch": 0.07581346815853841, "grad_norm": 1.4296875, "learning_rate": 0.0012700285819267022, "loss": 0.178, "step": 42758 }, { "epoch": 0.07581701432384823, "grad_norm": 0.337890625, "learning_rate": 0.0012699701346971942, "loss": 0.1803, "step": 42760 }, { "epoch": 0.07582056048915804, "grad_norm": 0.6484375, "learning_rate": 0.0012699116867243355, "loss": 0.1985, "step": 42762 }, { "epoch": 0.07582410665446786, "grad_norm": 0.4453125, "learning_rate": 0.001269853238008382, "loss": 0.1613, "step": 42764 }, { "epoch": 0.07582765281977767, "grad_norm": 0.359375, "learning_rate": 0.0012697947885495883, "loss": 0.2048, "step": 42766 }, { "epoch": 0.07583119898508749, "grad_norm": 0.45703125, "learning_rate": 0.0012697363383482117, "loss": 0.2137, "step": 42768 }, { "epoch": 0.0758347451503973, "grad_norm": 6.09375, "learning_rate": 0.0012696778874045067, "loss": 0.3181, "step": 42770 }, { "epoch": 0.07583829131570712, "grad_norm": 0.55859375, "learning_rate": 0.0012696194357187292, "loss": 0.1735, "step": 42772 }, { "epoch": 0.07584183748101693, "grad_norm": 0.435546875, "learning_rate": 0.0012695609832911348, "loss": 0.1923, "step": 42774 }, { "epoch": 0.07584538364632674, "grad_norm": 1.0703125, "learning_rate": 0.0012695025301219793, "loss": 0.1824, "step": 42776 }, { "epoch": 0.07584892981163656, "grad_norm": 1.5625, "learning_rate": 0.0012694440762115178, "loss": 0.2069, "step": 42778 }, { "epoch": 0.07585247597694637, "grad_norm": 0.201171875, "learning_rate": 0.0012693856215600069, "loss": 0.1325, "step": 42780 }, { "epoch": 0.07585602214225619, "grad_norm": 0.265625, "learning_rate": 0.0012693271661677011, "loss": 0.1564, "step": 42782 }, { "epoch": 0.07585956830756602, "grad_norm": 0.28125, "learning_rate": 0.001269268710034857, "loss": 0.2878, "step": 42784 }, { "epoch": 0.07586311447287583, "grad_norm": 0.62109375, "learning_rate": 0.0012692102531617296, "loss": 0.171, "step": 42786 }, { "epoch": 0.07586666063818565, "grad_norm": 0.9765625, "learning_rate": 0.001269151795548575, "loss": 0.1907, "step": 42788 }, { "epoch": 0.07587020680349546, "grad_norm": 0.1806640625, "learning_rate": 0.0012690933371956486, "loss": 0.1342, "step": 42790 }, { "epoch": 0.07587375296880527, "grad_norm": 0.69140625, "learning_rate": 0.0012690348781032063, "loss": 0.2638, "step": 42792 }, { "epoch": 0.07587729913411509, "grad_norm": 0.27734375, "learning_rate": 0.0012689764182715038, "loss": 0.2399, "step": 42794 }, { "epoch": 0.0758808452994249, "grad_norm": 0.453125, "learning_rate": 0.0012689179577007963, "loss": 0.1557, "step": 42796 }, { "epoch": 0.07588439146473472, "grad_norm": 0.42578125, "learning_rate": 0.00126885949639134, "loss": 0.2135, "step": 42798 }, { "epoch": 0.07588793763004453, "grad_norm": 0.154296875, "learning_rate": 0.0012688010343433903, "loss": 0.141, "step": 42800 }, { "epoch": 0.07589148379535435, "grad_norm": 0.326171875, "learning_rate": 0.0012687425715572028, "loss": 0.1724, "step": 42802 }, { "epoch": 0.07589502996066416, "grad_norm": 0.62890625, "learning_rate": 0.0012686841080330332, "loss": 0.1791, "step": 42804 }, { "epoch": 0.07589857612597398, "grad_norm": 1.2265625, "learning_rate": 0.0012686256437711375, "loss": 0.3222, "step": 42806 }, { "epoch": 0.07590212229128379, "grad_norm": 1.4609375, "learning_rate": 0.0012685671787717712, "loss": 0.3161, "step": 42808 }, { "epoch": 0.0759056684565936, "grad_norm": 0.515625, "learning_rate": 0.0012685087130351897, "loss": 0.1418, "step": 42810 }, { "epoch": 0.07590921462190342, "grad_norm": 0.26953125, "learning_rate": 0.0012684502465616495, "loss": 0.1508, "step": 42812 }, { "epoch": 0.07591276078721323, "grad_norm": 0.71875, "learning_rate": 0.0012683917793514052, "loss": 0.2613, "step": 42814 }, { "epoch": 0.07591630695252305, "grad_norm": 0.796875, "learning_rate": 0.0012683333114047134, "loss": 0.2078, "step": 42816 }, { "epoch": 0.07591985311783286, "grad_norm": 1.3984375, "learning_rate": 0.0012682748427218294, "loss": 0.2017, "step": 42818 }, { "epoch": 0.07592339928314268, "grad_norm": 1.140625, "learning_rate": 0.0012682163733030087, "loss": 0.2297, "step": 42820 }, { "epoch": 0.07592694544845249, "grad_norm": 1.640625, "learning_rate": 0.0012681579031485078, "loss": 0.1842, "step": 42822 }, { "epoch": 0.07593049161376231, "grad_norm": 0.439453125, "learning_rate": 0.0012680994322585815, "loss": 0.176, "step": 42824 }, { "epoch": 0.07593403777907212, "grad_norm": 0.375, "learning_rate": 0.0012680409606334864, "loss": 0.1594, "step": 42826 }, { "epoch": 0.07593758394438194, "grad_norm": 0.671875, "learning_rate": 0.0012679824882734774, "loss": 0.1417, "step": 42828 }, { "epoch": 0.07594113010969177, "grad_norm": 0.5703125, "learning_rate": 0.0012679240151788106, "loss": 0.1633, "step": 42830 }, { "epoch": 0.07594467627500158, "grad_norm": 0.306640625, "learning_rate": 0.0012678655413497417, "loss": 0.1594, "step": 42832 }, { "epoch": 0.0759482224403114, "grad_norm": 0.30859375, "learning_rate": 0.0012678070667865265, "loss": 0.1596, "step": 42834 }, { "epoch": 0.07595176860562121, "grad_norm": 0.263671875, "learning_rate": 0.0012677485914894206, "loss": 0.1625, "step": 42836 }, { "epoch": 0.07595531477093102, "grad_norm": 1.0703125, "learning_rate": 0.0012676901154586799, "loss": 0.4905, "step": 42838 }, { "epoch": 0.07595886093624084, "grad_norm": 0.34765625, "learning_rate": 0.0012676316386945597, "loss": 0.1883, "step": 42840 }, { "epoch": 0.07596240710155065, "grad_norm": 3.765625, "learning_rate": 0.0012675731611973167, "loss": 0.3016, "step": 42842 }, { "epoch": 0.07596595326686047, "grad_norm": 1.390625, "learning_rate": 0.0012675146829672055, "loss": 0.2753, "step": 42844 }, { "epoch": 0.07596949943217028, "grad_norm": 0.59375, "learning_rate": 0.0012674562040044823, "loss": 0.1688, "step": 42846 }, { "epoch": 0.0759730455974801, "grad_norm": 0.37109375, "learning_rate": 0.0012673977243094037, "loss": 0.202, "step": 42848 }, { "epoch": 0.07597659176278991, "grad_norm": 0.8046875, "learning_rate": 0.001267339243882224, "loss": 0.209, "step": 42850 }, { "epoch": 0.07598013792809973, "grad_norm": 0.5546875, "learning_rate": 0.0012672807627231997, "loss": 0.1805, "step": 42852 }, { "epoch": 0.07598368409340954, "grad_norm": 0.375, "learning_rate": 0.001267222280832587, "loss": 0.1871, "step": 42854 }, { "epoch": 0.07598723025871935, "grad_norm": 1.984375, "learning_rate": 0.0012671637982106406, "loss": 0.2728, "step": 42856 }, { "epoch": 0.07599077642402917, "grad_norm": 0.439453125, "learning_rate": 0.0012671053148576171, "loss": 0.1416, "step": 42858 }, { "epoch": 0.07599432258933898, "grad_norm": 0.296875, "learning_rate": 0.001267046830773772, "loss": 0.2251, "step": 42860 }, { "epoch": 0.0759978687546488, "grad_norm": 0.45703125, "learning_rate": 0.001266988345959361, "loss": 0.186, "step": 42862 }, { "epoch": 0.07600141491995861, "grad_norm": 0.359375, "learning_rate": 0.00126692986041464, "loss": 0.1748, "step": 42864 }, { "epoch": 0.07600496108526843, "grad_norm": 0.875, "learning_rate": 0.0012668713741398648, "loss": 0.2187, "step": 42866 }, { "epoch": 0.07600850725057824, "grad_norm": 0.59765625, "learning_rate": 0.0012668128871352912, "loss": 0.1683, "step": 42868 }, { "epoch": 0.07601205341588806, "grad_norm": 0.40625, "learning_rate": 0.001266754399401175, "loss": 0.2086, "step": 42870 }, { "epoch": 0.07601559958119787, "grad_norm": 0.5390625, "learning_rate": 0.0012666959109377718, "loss": 0.1825, "step": 42872 }, { "epoch": 0.07601914574650769, "grad_norm": 0.51171875, "learning_rate": 0.0012666374217453376, "loss": 0.2122, "step": 42874 }, { "epoch": 0.07602269191181751, "grad_norm": 0.296875, "learning_rate": 0.0012665789318241278, "loss": 0.1812, "step": 42876 }, { "epoch": 0.07602623807712733, "grad_norm": 0.5390625, "learning_rate": 0.0012665204411743988, "loss": 0.2722, "step": 42878 }, { "epoch": 0.07602978424243714, "grad_norm": 0.314453125, "learning_rate": 0.0012664619497964058, "loss": 0.1598, "step": 42880 }, { "epoch": 0.07603333040774696, "grad_norm": 0.2041015625, "learning_rate": 0.0012664034576904053, "loss": 0.2698, "step": 42882 }, { "epoch": 0.07603687657305677, "grad_norm": 0.76171875, "learning_rate": 0.0012663449648566525, "loss": 0.2027, "step": 42884 }, { "epoch": 0.07604042273836659, "grad_norm": 0.416015625, "learning_rate": 0.001266286471295404, "loss": 0.2789, "step": 42886 }, { "epoch": 0.0760439689036764, "grad_norm": 0.5234375, "learning_rate": 0.0012662279770069146, "loss": 0.1425, "step": 42888 }, { "epoch": 0.07604751506898622, "grad_norm": 0.361328125, "learning_rate": 0.0012661694819914404, "loss": 0.2457, "step": 42890 }, { "epoch": 0.07605106123429603, "grad_norm": 0.345703125, "learning_rate": 0.0012661109862492376, "loss": 0.1099, "step": 42892 }, { "epoch": 0.07605460739960584, "grad_norm": 0.267578125, "learning_rate": 0.001266052489780562, "loss": 0.1589, "step": 42894 }, { "epoch": 0.07605815356491566, "grad_norm": 0.90234375, "learning_rate": 0.001265993992585669, "loss": 0.1828, "step": 42896 }, { "epoch": 0.07606169973022547, "grad_norm": 0.6953125, "learning_rate": 0.0012659354946648147, "loss": 0.1861, "step": 42898 }, { "epoch": 0.07606524589553529, "grad_norm": 0.373046875, "learning_rate": 0.0012658769960182553, "loss": 0.1886, "step": 42900 }, { "epoch": 0.0760687920608451, "grad_norm": 0.265625, "learning_rate": 0.0012658184966462458, "loss": 0.1727, "step": 42902 }, { "epoch": 0.07607233822615492, "grad_norm": 0.515625, "learning_rate": 0.0012657599965490431, "loss": 0.3147, "step": 42904 }, { "epoch": 0.07607588439146473, "grad_norm": 0.51171875, "learning_rate": 0.001265701495726902, "loss": 0.1263, "step": 42906 }, { "epoch": 0.07607943055677455, "grad_norm": 0.7109375, "learning_rate": 0.001265642994180079, "loss": 0.194, "step": 42908 }, { "epoch": 0.07608297672208436, "grad_norm": 0.45703125, "learning_rate": 0.0012655844919088294, "loss": 0.2206, "step": 42910 }, { "epoch": 0.07608652288739418, "grad_norm": 0.26171875, "learning_rate": 0.00126552598891341, "loss": 0.2097, "step": 42912 }, { "epoch": 0.07609006905270399, "grad_norm": 0.296875, "learning_rate": 0.0012654674851940756, "loss": 0.2224, "step": 42914 }, { "epoch": 0.0760936152180138, "grad_norm": 0.328125, "learning_rate": 0.0012654089807510828, "loss": 0.1357, "step": 42916 }, { "epoch": 0.07609716138332362, "grad_norm": 0.6875, "learning_rate": 0.0012653504755846868, "loss": 0.2215, "step": 42918 }, { "epoch": 0.07610070754863345, "grad_norm": 1.3046875, "learning_rate": 0.0012652919696951445, "loss": 0.3636, "step": 42920 }, { "epoch": 0.07610425371394326, "grad_norm": 0.396484375, "learning_rate": 0.0012652334630827106, "loss": 0.1452, "step": 42922 }, { "epoch": 0.07610779987925308, "grad_norm": 0.283203125, "learning_rate": 0.0012651749557476418, "loss": 0.1931, "step": 42924 }, { "epoch": 0.07611134604456289, "grad_norm": 0.6640625, "learning_rate": 0.0012651164476901935, "loss": 0.1851, "step": 42926 }, { "epoch": 0.0761148922098727, "grad_norm": 1.359375, "learning_rate": 0.0012650579389106218, "loss": 0.321, "step": 42928 }, { "epoch": 0.07611843837518252, "grad_norm": 0.578125, "learning_rate": 0.0012649994294091826, "loss": 0.1545, "step": 42930 }, { "epoch": 0.07612198454049234, "grad_norm": 0.421875, "learning_rate": 0.001264940919186132, "loss": 0.2635, "step": 42932 }, { "epoch": 0.07612553070580215, "grad_norm": 0.7890625, "learning_rate": 0.001264882408241725, "loss": 0.1463, "step": 42934 }, { "epoch": 0.07612907687111196, "grad_norm": 0.546875, "learning_rate": 0.0012648238965762186, "loss": 0.2246, "step": 42936 }, { "epoch": 0.07613262303642178, "grad_norm": 0.4375, "learning_rate": 0.0012647653841898682, "loss": 0.2318, "step": 42938 }, { "epoch": 0.0761361692017316, "grad_norm": 0.3046875, "learning_rate": 0.0012647068710829297, "loss": 0.1568, "step": 42940 }, { "epoch": 0.07613971536704141, "grad_norm": 2.0, "learning_rate": 0.001264648357255659, "loss": 0.2502, "step": 42942 }, { "epoch": 0.07614326153235122, "grad_norm": 0.35546875, "learning_rate": 0.001264589842708312, "loss": 0.1931, "step": 42944 }, { "epoch": 0.07614680769766104, "grad_norm": 0.55859375, "learning_rate": 0.0012645313274411445, "loss": 0.1602, "step": 42946 }, { "epoch": 0.07615035386297085, "grad_norm": 0.265625, "learning_rate": 0.0012644728114544128, "loss": 0.4019, "step": 42948 }, { "epoch": 0.07615390002828067, "grad_norm": 0.490234375, "learning_rate": 0.0012644142947483724, "loss": 0.1833, "step": 42950 }, { "epoch": 0.07615744619359048, "grad_norm": 0.27734375, "learning_rate": 0.0012643557773232795, "loss": 0.542, "step": 42952 }, { "epoch": 0.0761609923589003, "grad_norm": 0.7890625, "learning_rate": 0.0012642972591793895, "loss": 0.2183, "step": 42954 }, { "epoch": 0.07616453852421011, "grad_norm": 0.4453125, "learning_rate": 0.001264238740316959, "loss": 0.2151, "step": 42956 }, { "epoch": 0.07616808468951992, "grad_norm": 0.1962890625, "learning_rate": 0.001264180220736244, "loss": 0.1517, "step": 42958 }, { "epoch": 0.07617163085482974, "grad_norm": 1.3828125, "learning_rate": 0.0012641217004374994, "loss": 0.2212, "step": 42960 }, { "epoch": 0.07617517702013955, "grad_norm": 0.34375, "learning_rate": 0.0012640631794209824, "loss": 0.2196, "step": 42962 }, { "epoch": 0.07617872318544937, "grad_norm": 0.2080078125, "learning_rate": 0.001264004657686948, "loss": 0.1859, "step": 42964 }, { "epoch": 0.0761822693507592, "grad_norm": 1.1875, "learning_rate": 0.0012639461352356525, "loss": 0.2409, "step": 42966 }, { "epoch": 0.07618581551606901, "grad_norm": 0.66796875, "learning_rate": 0.001263887612067352, "loss": 0.2258, "step": 42968 }, { "epoch": 0.07618936168137883, "grad_norm": 0.93359375, "learning_rate": 0.0012638290881823026, "loss": 0.271, "step": 42970 }, { "epoch": 0.07619290784668864, "grad_norm": 0.9765625, "learning_rate": 0.0012637705635807593, "loss": 0.4535, "step": 42972 }, { "epoch": 0.07619645401199845, "grad_norm": 2.53125, "learning_rate": 0.0012637120382629789, "loss": 0.2089, "step": 42974 }, { "epoch": 0.07620000017730827, "grad_norm": 0.40625, "learning_rate": 0.001263653512229217, "loss": 0.2487, "step": 42976 }, { "epoch": 0.07620354634261808, "grad_norm": 0.306640625, "learning_rate": 0.00126359498547973, "loss": 0.1995, "step": 42978 }, { "epoch": 0.0762070925079279, "grad_norm": 0.5, "learning_rate": 0.0012635364580147735, "loss": 0.2615, "step": 42980 }, { "epoch": 0.07621063867323771, "grad_norm": 0.265625, "learning_rate": 0.0012634779298346035, "loss": 0.2012, "step": 42982 }, { "epoch": 0.07621418483854753, "grad_norm": 2.390625, "learning_rate": 0.0012634194009394758, "loss": 0.3588, "step": 42984 }, { "epoch": 0.07621773100385734, "grad_norm": 0.63671875, "learning_rate": 0.001263360871329647, "loss": 0.1658, "step": 42986 }, { "epoch": 0.07622127716916716, "grad_norm": 0.314453125, "learning_rate": 0.0012633023410053724, "loss": 0.1683, "step": 42988 }, { "epoch": 0.07622482333447697, "grad_norm": 1.125, "learning_rate": 0.001263243809966908, "loss": 0.1672, "step": 42990 }, { "epoch": 0.07622836949978679, "grad_norm": 0.73046875, "learning_rate": 0.00126318527821451, "loss": 0.1353, "step": 42992 }, { "epoch": 0.0762319156650966, "grad_norm": 0.244140625, "learning_rate": 0.0012631267457484348, "loss": 0.1855, "step": 42994 }, { "epoch": 0.07623546183040641, "grad_norm": 0.267578125, "learning_rate": 0.001263068212568938, "loss": 0.1886, "step": 42996 }, { "epoch": 0.07623900799571623, "grad_norm": 0.7109375, "learning_rate": 0.0012630096786762752, "loss": 0.2342, "step": 42998 }, { "epoch": 0.07624255416102604, "grad_norm": 5.9375, "learning_rate": 0.0012629511440707026, "loss": 0.2733, "step": 43000 }, { "epoch": 0.07624610032633586, "grad_norm": 0.34375, "learning_rate": 0.0012628926087524769, "loss": 0.1662, "step": 43002 }, { "epoch": 0.07624964649164567, "grad_norm": 1.1015625, "learning_rate": 0.001262834072721853, "loss": 0.1853, "step": 43004 }, { "epoch": 0.07625319265695549, "grad_norm": 0.3359375, "learning_rate": 0.001262775535979088, "loss": 0.1564, "step": 43006 }, { "epoch": 0.0762567388222653, "grad_norm": 0.6953125, "learning_rate": 0.0012627169985244368, "loss": 0.241, "step": 43008 }, { "epoch": 0.07626028498757512, "grad_norm": 0.474609375, "learning_rate": 0.0012626584603581564, "loss": 0.2, "step": 43010 }, { "epoch": 0.07626383115288494, "grad_norm": 0.6796875, "learning_rate": 0.001262599921480502, "loss": 0.1754, "step": 43012 }, { "epoch": 0.07626737731819476, "grad_norm": 0.423828125, "learning_rate": 0.0012625413818917303, "loss": 0.1497, "step": 43014 }, { "epoch": 0.07627092348350457, "grad_norm": 0.73828125, "learning_rate": 0.0012624828415920971, "loss": 0.1606, "step": 43016 }, { "epoch": 0.07627446964881439, "grad_norm": 2.6875, "learning_rate": 0.001262424300581858, "loss": 0.2597, "step": 43018 }, { "epoch": 0.0762780158141242, "grad_norm": 0.5234375, "learning_rate": 0.0012623657588612693, "loss": 0.3192, "step": 43020 }, { "epoch": 0.07628156197943402, "grad_norm": 0.6953125, "learning_rate": 0.0012623072164305873, "loss": 0.2405, "step": 43022 }, { "epoch": 0.07628510814474383, "grad_norm": 0.80078125, "learning_rate": 0.001262248673290068, "loss": 0.198, "step": 43024 }, { "epoch": 0.07628865431005365, "grad_norm": 1.15625, "learning_rate": 0.0012621901294399668, "loss": 0.2473, "step": 43026 }, { "epoch": 0.07629220047536346, "grad_norm": 0.68359375, "learning_rate": 0.0012621315848805406, "loss": 0.1973, "step": 43028 }, { "epoch": 0.07629574664067328, "grad_norm": 0.50390625, "learning_rate": 0.0012620730396120448, "loss": 0.2108, "step": 43030 }, { "epoch": 0.07629929280598309, "grad_norm": 0.23046875, "learning_rate": 0.0012620144936347358, "loss": 0.1947, "step": 43032 }, { "epoch": 0.0763028389712929, "grad_norm": 0.5390625, "learning_rate": 0.0012619559469488693, "loss": 0.2329, "step": 43034 }, { "epoch": 0.07630638513660272, "grad_norm": 0.423828125, "learning_rate": 0.001261897399554702, "loss": 0.2048, "step": 43036 }, { "epoch": 0.07630993130191253, "grad_norm": 0.64453125, "learning_rate": 0.0012618388514524892, "loss": 0.1965, "step": 43038 }, { "epoch": 0.07631347746722235, "grad_norm": 1.4375, "learning_rate": 0.0012617803026424871, "loss": 0.3464, "step": 43040 }, { "epoch": 0.07631702363253216, "grad_norm": 0.6875, "learning_rate": 0.001261721753124952, "loss": 0.1755, "step": 43042 }, { "epoch": 0.07632056979784198, "grad_norm": 0.51171875, "learning_rate": 0.00126166320290014, "loss": 0.1913, "step": 43044 }, { "epoch": 0.07632411596315179, "grad_norm": 0.19921875, "learning_rate": 0.0012616046519683074, "loss": 0.1753, "step": 43046 }, { "epoch": 0.0763276621284616, "grad_norm": 0.337890625, "learning_rate": 0.0012615461003297093, "loss": 0.2345, "step": 43048 }, { "epoch": 0.07633120829377142, "grad_norm": 0.30859375, "learning_rate": 0.0012614875479846027, "loss": 0.212, "step": 43050 }, { "epoch": 0.07633475445908124, "grad_norm": 0.341796875, "learning_rate": 0.0012614289949332432, "loss": 0.1991, "step": 43052 }, { "epoch": 0.07633830062439105, "grad_norm": 0.2392578125, "learning_rate": 0.0012613704411758874, "loss": 0.1231, "step": 43054 }, { "epoch": 0.07634184678970088, "grad_norm": 0.2451171875, "learning_rate": 0.0012613118867127909, "loss": 0.1953, "step": 43056 }, { "epoch": 0.0763453929550107, "grad_norm": 0.6796875, "learning_rate": 0.0012612533315442098, "loss": 0.1864, "step": 43058 }, { "epoch": 0.07634893912032051, "grad_norm": 0.365234375, "learning_rate": 0.0012611947756704, "loss": 0.2145, "step": 43060 }, { "epoch": 0.07635248528563032, "grad_norm": 0.39453125, "learning_rate": 0.0012611362190916183, "loss": 0.2276, "step": 43062 }, { "epoch": 0.07635603145094014, "grad_norm": 0.208984375, "learning_rate": 0.0012610776618081202, "loss": 0.1937, "step": 43064 }, { "epoch": 0.07635957761624995, "grad_norm": 0.8828125, "learning_rate": 0.0012610191038201623, "loss": 0.2159, "step": 43066 }, { "epoch": 0.07636312378155977, "grad_norm": 0.478515625, "learning_rate": 0.0012609605451280002, "loss": 0.1843, "step": 43068 }, { "epoch": 0.07636666994686958, "grad_norm": 1.9921875, "learning_rate": 0.00126090198573189, "loss": 0.1967, "step": 43070 }, { "epoch": 0.0763702161121794, "grad_norm": 0.77734375, "learning_rate": 0.001260843425632088, "loss": 0.3737, "step": 43072 }, { "epoch": 0.07637376227748921, "grad_norm": 0.2255859375, "learning_rate": 0.0012607848648288503, "loss": 0.1966, "step": 43074 }, { "epoch": 0.07637730844279902, "grad_norm": 0.2431640625, "learning_rate": 0.0012607263033224331, "loss": 0.1571, "step": 43076 }, { "epoch": 0.07638085460810884, "grad_norm": 0.453125, "learning_rate": 0.0012606677411130922, "loss": 0.177, "step": 43078 }, { "epoch": 0.07638440077341865, "grad_norm": 0.1962890625, "learning_rate": 0.0012606091782010843, "loss": 0.171, "step": 43080 }, { "epoch": 0.07638794693872847, "grad_norm": 0.73046875, "learning_rate": 0.0012605506145866648, "loss": 0.2442, "step": 43082 }, { "epoch": 0.07639149310403828, "grad_norm": 0.427734375, "learning_rate": 0.0012604920502700906, "loss": 0.2047, "step": 43084 }, { "epoch": 0.0763950392693481, "grad_norm": 0.45703125, "learning_rate": 0.001260433485251617, "loss": 0.1595, "step": 43086 }, { "epoch": 0.07639858543465791, "grad_norm": 0.4921875, "learning_rate": 0.0012603749195315007, "loss": 0.183, "step": 43088 }, { "epoch": 0.07640213159996773, "grad_norm": 0.263671875, "learning_rate": 0.0012603163531099974, "loss": 0.187, "step": 43090 }, { "epoch": 0.07640567776527754, "grad_norm": 0.51953125, "learning_rate": 0.0012602577859873638, "loss": 0.2479, "step": 43092 }, { "epoch": 0.07640922393058736, "grad_norm": 0.30859375, "learning_rate": 0.0012601992181638557, "loss": 0.1974, "step": 43094 }, { "epoch": 0.07641277009589717, "grad_norm": 3.0, "learning_rate": 0.0012601406496397293, "loss": 0.3292, "step": 43096 }, { "epoch": 0.07641631626120698, "grad_norm": 2.28125, "learning_rate": 0.0012600820804152405, "loss": 0.2178, "step": 43098 }, { "epoch": 0.0764198624265168, "grad_norm": 1.125, "learning_rate": 0.0012600235104906457, "loss": 0.3066, "step": 43100 }, { "epoch": 0.07642340859182663, "grad_norm": 0.73828125, "learning_rate": 0.001259964939866201, "loss": 0.1758, "step": 43102 }, { "epoch": 0.07642695475713644, "grad_norm": 4.5, "learning_rate": 0.0012599063685421628, "loss": 0.23, "step": 43104 }, { "epoch": 0.07643050092244626, "grad_norm": 0.58203125, "learning_rate": 0.0012598477965187866, "loss": 0.1764, "step": 43106 }, { "epoch": 0.07643404708775607, "grad_norm": 0.8359375, "learning_rate": 0.0012597892237963296, "loss": 0.2398, "step": 43108 }, { "epoch": 0.07643759325306589, "grad_norm": 0.84375, "learning_rate": 0.0012597306503750471, "loss": 0.2778, "step": 43110 }, { "epoch": 0.0764411394183757, "grad_norm": 0.2216796875, "learning_rate": 0.0012596720762551954, "loss": 0.1963, "step": 43112 }, { "epoch": 0.07644468558368551, "grad_norm": 1.1796875, "learning_rate": 0.001259613501437031, "loss": 0.2184, "step": 43114 }, { "epoch": 0.07644823174899533, "grad_norm": 0.52734375, "learning_rate": 0.0012595549259208094, "loss": 0.1721, "step": 43116 }, { "epoch": 0.07645177791430514, "grad_norm": 0.380859375, "learning_rate": 0.0012594963497067878, "loss": 0.2108, "step": 43118 }, { "epoch": 0.07645532407961496, "grad_norm": 2.703125, "learning_rate": 0.0012594377727952214, "loss": 0.2623, "step": 43120 }, { "epoch": 0.07645887024492477, "grad_norm": 0.2890625, "learning_rate": 0.001259379195186367, "loss": 0.1991, "step": 43122 }, { "epoch": 0.07646241641023459, "grad_norm": 0.388671875, "learning_rate": 0.0012593206168804805, "loss": 0.1442, "step": 43124 }, { "epoch": 0.0764659625755444, "grad_norm": 0.6875, "learning_rate": 0.0012592620378778182, "loss": 0.2038, "step": 43126 }, { "epoch": 0.07646950874085422, "grad_norm": 0.67578125, "learning_rate": 0.0012592034581786363, "loss": 0.1838, "step": 43128 }, { "epoch": 0.07647305490616403, "grad_norm": 4.28125, "learning_rate": 0.0012591448777831908, "loss": 0.2287, "step": 43130 }, { "epoch": 0.07647660107147385, "grad_norm": 0.435546875, "learning_rate": 0.0012590862966917382, "loss": 0.1809, "step": 43132 }, { "epoch": 0.07648014723678366, "grad_norm": 0.482421875, "learning_rate": 0.0012590277149045345, "loss": 0.1781, "step": 43134 }, { "epoch": 0.07648369340209348, "grad_norm": 0.84765625, "learning_rate": 0.001258969132421836, "loss": 0.2176, "step": 43136 }, { "epoch": 0.07648723956740329, "grad_norm": 0.94140625, "learning_rate": 0.0012589105492438986, "loss": 0.1857, "step": 43138 }, { "epoch": 0.0764907857327131, "grad_norm": 2.109375, "learning_rate": 0.001258851965370979, "loss": 0.216, "step": 43140 }, { "epoch": 0.07649433189802292, "grad_norm": 0.7578125, "learning_rate": 0.0012587933808033331, "loss": 0.2248, "step": 43142 }, { "epoch": 0.07649787806333273, "grad_norm": 1.65625, "learning_rate": 0.0012587347955412172, "loss": 0.2327, "step": 43144 }, { "epoch": 0.07650142422864255, "grad_norm": 0.3828125, "learning_rate": 0.0012586762095848877, "loss": 0.1649, "step": 43146 }, { "epoch": 0.07650497039395238, "grad_norm": 0.69140625, "learning_rate": 0.0012586176229346005, "loss": 0.1837, "step": 43148 }, { "epoch": 0.07650851655926219, "grad_norm": 0.62109375, "learning_rate": 0.0012585590355906119, "loss": 0.2208, "step": 43150 }, { "epoch": 0.076512062724572, "grad_norm": 0.33203125, "learning_rate": 0.001258500447553178, "loss": 0.2433, "step": 43152 }, { "epoch": 0.07651560888988182, "grad_norm": 0.58984375, "learning_rate": 0.0012584418588225555, "loss": 0.2391, "step": 43154 }, { "epoch": 0.07651915505519163, "grad_norm": 0.55078125, "learning_rate": 0.0012583832693990001, "loss": 0.2124, "step": 43156 }, { "epoch": 0.07652270122050145, "grad_norm": 0.287109375, "learning_rate": 0.0012583246792827688, "loss": 0.2171, "step": 43158 }, { "epoch": 0.07652624738581126, "grad_norm": 0.6015625, "learning_rate": 0.0012582660884741166, "loss": 0.1565, "step": 43160 }, { "epoch": 0.07652979355112108, "grad_norm": 0.58203125, "learning_rate": 0.001258207496973301, "loss": 0.2374, "step": 43162 }, { "epoch": 0.07653333971643089, "grad_norm": 0.63671875, "learning_rate": 0.0012581489047805777, "loss": 0.2092, "step": 43164 }, { "epoch": 0.07653688588174071, "grad_norm": 0.79296875, "learning_rate": 0.0012580903118962028, "loss": 0.215, "step": 43166 }, { "epoch": 0.07654043204705052, "grad_norm": 0.6015625, "learning_rate": 0.0012580317183204324, "loss": 0.1492, "step": 43168 }, { "epoch": 0.07654397821236034, "grad_norm": 0.6953125, "learning_rate": 0.0012579731240535234, "loss": 0.2604, "step": 43170 }, { "epoch": 0.07654752437767015, "grad_norm": 0.263671875, "learning_rate": 0.0012579145290957316, "loss": 0.2393, "step": 43172 }, { "epoch": 0.07655107054297997, "grad_norm": 0.1689453125, "learning_rate": 0.0012578559334473136, "loss": 0.3144, "step": 43174 }, { "epoch": 0.07655461670828978, "grad_norm": 0.26953125, "learning_rate": 0.001257797337108525, "loss": 0.1454, "step": 43176 }, { "epoch": 0.0765581628735996, "grad_norm": 0.51953125, "learning_rate": 0.001257738740079623, "loss": 0.2378, "step": 43178 }, { "epoch": 0.07656170903890941, "grad_norm": 0.41796875, "learning_rate": 0.001257680142360863, "loss": 0.2038, "step": 43180 }, { "epoch": 0.07656525520421922, "grad_norm": 0.408203125, "learning_rate": 0.0012576215439525017, "loss": 0.2258, "step": 43182 }, { "epoch": 0.07656880136952904, "grad_norm": 0.2265625, "learning_rate": 0.0012575629448547956, "loss": 0.1899, "step": 43184 }, { "epoch": 0.07657234753483885, "grad_norm": 0.5859375, "learning_rate": 0.0012575043450680006, "loss": 0.2279, "step": 43186 }, { "epoch": 0.07657589370014867, "grad_norm": 0.5234375, "learning_rate": 0.001257445744592373, "loss": 0.1845, "step": 43188 }, { "epoch": 0.07657943986545848, "grad_norm": 0.169921875, "learning_rate": 0.001257387143428169, "loss": 0.1981, "step": 43190 }, { "epoch": 0.07658298603076831, "grad_norm": 0.1982421875, "learning_rate": 0.0012573285415756455, "loss": 0.1302, "step": 43192 }, { "epoch": 0.07658653219607812, "grad_norm": 0.302734375, "learning_rate": 0.0012572699390350581, "loss": 0.1866, "step": 43194 }, { "epoch": 0.07659007836138794, "grad_norm": 0.52734375, "learning_rate": 0.0012572113358066633, "loss": 0.1662, "step": 43196 }, { "epoch": 0.07659362452669775, "grad_norm": 1.109375, "learning_rate": 0.0012571527318907174, "loss": 0.2797, "step": 43198 }, { "epoch": 0.07659717069200757, "grad_norm": 0.5, "learning_rate": 0.0012570941272874766, "loss": 0.2011, "step": 43200 }, { "epoch": 0.07660071685731738, "grad_norm": 0.77734375, "learning_rate": 0.0012570355219971975, "loss": 0.1831, "step": 43202 }, { "epoch": 0.0766042630226272, "grad_norm": 0.19921875, "learning_rate": 0.0012569769160201366, "loss": 0.1792, "step": 43204 }, { "epoch": 0.07660780918793701, "grad_norm": 0.5234375, "learning_rate": 0.0012569183093565498, "loss": 0.2388, "step": 43206 }, { "epoch": 0.07661135535324683, "grad_norm": 0.3984375, "learning_rate": 0.0012568597020066927, "loss": 0.1951, "step": 43208 }, { "epoch": 0.07661490151855664, "grad_norm": 0.267578125, "learning_rate": 0.0012568010939708232, "loss": 0.2303, "step": 43210 }, { "epoch": 0.07661844768386646, "grad_norm": 0.25390625, "learning_rate": 0.0012567424852491962, "loss": 0.2477, "step": 43212 }, { "epoch": 0.07662199384917627, "grad_norm": 0.1748046875, "learning_rate": 0.001256683875842069, "loss": 0.3563, "step": 43214 }, { "epoch": 0.07662554001448608, "grad_norm": 0.6484375, "learning_rate": 0.0012566252657496974, "loss": 0.1702, "step": 43216 }, { "epoch": 0.0766290861797959, "grad_norm": 0.98828125, "learning_rate": 0.0012565666549723378, "loss": 0.2291, "step": 43218 }, { "epoch": 0.07663263234510571, "grad_norm": 0.609375, "learning_rate": 0.0012565080435102468, "loss": 0.2497, "step": 43220 }, { "epoch": 0.07663617851041553, "grad_norm": 5.65625, "learning_rate": 0.0012564494313636802, "loss": 0.2416, "step": 43222 }, { "epoch": 0.07663972467572534, "grad_norm": 0.48828125, "learning_rate": 0.001256390818532895, "loss": 0.2383, "step": 43224 }, { "epoch": 0.07664327084103516, "grad_norm": 0.400390625, "learning_rate": 0.0012563322050181472, "loss": 0.2091, "step": 43226 }, { "epoch": 0.07664681700634497, "grad_norm": 1.0390625, "learning_rate": 0.0012562735908196929, "loss": 0.2229, "step": 43228 }, { "epoch": 0.07665036317165479, "grad_norm": 0.34765625, "learning_rate": 0.0012562149759377888, "loss": 0.1968, "step": 43230 }, { "epoch": 0.0766539093369646, "grad_norm": 0.5390625, "learning_rate": 0.0012561563603726911, "loss": 0.2019, "step": 43232 }, { "epoch": 0.07665745550227442, "grad_norm": 0.5625, "learning_rate": 0.001256097744124656, "loss": 0.2091, "step": 43234 }, { "epoch": 0.07666100166758423, "grad_norm": 0.283203125, "learning_rate": 0.0012560391271939407, "loss": 0.2547, "step": 43236 }, { "epoch": 0.07666454783289406, "grad_norm": 0.41015625, "learning_rate": 0.0012559805095808004, "loss": 0.1924, "step": 43238 }, { "epoch": 0.07666809399820387, "grad_norm": 1.3359375, "learning_rate": 0.001255921891285492, "loss": 0.2551, "step": 43240 }, { "epoch": 0.07667164016351369, "grad_norm": 0.60546875, "learning_rate": 0.001255863272308272, "loss": 0.2217, "step": 43242 }, { "epoch": 0.0766751863288235, "grad_norm": 0.55078125, "learning_rate": 0.0012558046526493965, "loss": 0.1663, "step": 43244 }, { "epoch": 0.07667873249413332, "grad_norm": 0.48828125, "learning_rate": 0.0012557460323091216, "loss": 0.1926, "step": 43246 }, { "epoch": 0.07668227865944313, "grad_norm": 0.4765625, "learning_rate": 0.0012556874112877043, "loss": 0.3423, "step": 43248 }, { "epoch": 0.07668582482475295, "grad_norm": 0.341796875, "learning_rate": 0.0012556287895854007, "loss": 0.2445, "step": 43250 }, { "epoch": 0.07668937099006276, "grad_norm": 1.2265625, "learning_rate": 0.0012555701672024673, "loss": 0.2674, "step": 43252 }, { "epoch": 0.07669291715537258, "grad_norm": 0.376953125, "learning_rate": 0.0012555115441391603, "loss": 0.1742, "step": 43254 }, { "epoch": 0.07669646332068239, "grad_norm": 0.296875, "learning_rate": 0.0012554529203957362, "loss": 0.206, "step": 43256 }, { "epoch": 0.0767000094859922, "grad_norm": 1.0625, "learning_rate": 0.001255394295972451, "loss": 0.2527, "step": 43258 }, { "epoch": 0.07670355565130202, "grad_norm": 0.5703125, "learning_rate": 0.0012553356708695617, "loss": 0.3608, "step": 43260 }, { "epoch": 0.07670710181661183, "grad_norm": 1.25, "learning_rate": 0.001255277045087324, "loss": 0.1426, "step": 43262 }, { "epoch": 0.07671064798192165, "grad_norm": 0.34765625, "learning_rate": 0.0012552184186259954, "loss": 0.1512, "step": 43264 }, { "epoch": 0.07671419414723146, "grad_norm": 0.29296875, "learning_rate": 0.0012551597914858311, "loss": 0.1585, "step": 43266 }, { "epoch": 0.07671774031254128, "grad_norm": 1.359375, "learning_rate": 0.001255101163667088, "loss": 0.3263, "step": 43268 }, { "epoch": 0.07672128647785109, "grad_norm": 1.296875, "learning_rate": 0.0012550425351700225, "loss": 0.1719, "step": 43270 }, { "epoch": 0.0767248326431609, "grad_norm": 0.2216796875, "learning_rate": 0.0012549839059948914, "loss": 0.2052, "step": 43272 }, { "epoch": 0.07672837880847072, "grad_norm": 0.330078125, "learning_rate": 0.0012549252761419503, "loss": 0.1222, "step": 43274 }, { "epoch": 0.07673192497378054, "grad_norm": 0.341796875, "learning_rate": 0.001254866645611456, "loss": 0.2564, "step": 43276 }, { "epoch": 0.07673547113909035, "grad_norm": 0.53125, "learning_rate": 0.0012548080144036654, "loss": 0.2159, "step": 43278 }, { "epoch": 0.07673901730440016, "grad_norm": 0.71875, "learning_rate": 0.0012547493825188342, "loss": 0.1705, "step": 43280 }, { "epoch": 0.07674256346970998, "grad_norm": 2.21875, "learning_rate": 0.0012546907499572189, "loss": 0.2123, "step": 43282 }, { "epoch": 0.07674610963501981, "grad_norm": 0.314453125, "learning_rate": 0.0012546321167190763, "loss": 0.214, "step": 43284 }, { "epoch": 0.07674965580032962, "grad_norm": 0.30859375, "learning_rate": 0.0012545734828046627, "loss": 0.1801, "step": 43286 }, { "epoch": 0.07675320196563944, "grad_norm": 0.77734375, "learning_rate": 0.0012545148482142345, "loss": 0.1918, "step": 43288 }, { "epoch": 0.07675674813094925, "grad_norm": 0.71484375, "learning_rate": 0.0012544562129480478, "loss": 0.4194, "step": 43290 }, { "epoch": 0.07676029429625907, "grad_norm": 0.408203125, "learning_rate": 0.0012543975770063598, "loss": 0.2298, "step": 43292 }, { "epoch": 0.07676384046156888, "grad_norm": 0.458984375, "learning_rate": 0.001254338940389426, "loss": 0.1889, "step": 43294 }, { "epoch": 0.0767673866268787, "grad_norm": 0.51171875, "learning_rate": 0.0012542803030975037, "loss": 0.1655, "step": 43296 }, { "epoch": 0.07677093279218851, "grad_norm": 0.318359375, "learning_rate": 0.0012542216651308489, "loss": 0.1715, "step": 43298 }, { "epoch": 0.07677447895749832, "grad_norm": 0.25, "learning_rate": 0.0012541630264897182, "loss": 0.1409, "step": 43300 }, { "epoch": 0.07677802512280814, "grad_norm": 0.447265625, "learning_rate": 0.001254104387174368, "loss": 0.1732, "step": 43302 }, { "epoch": 0.07678157128811795, "grad_norm": 0.25, "learning_rate": 0.0012540457471850544, "loss": 0.2402, "step": 43304 }, { "epoch": 0.07678511745342777, "grad_norm": 1.7421875, "learning_rate": 0.0012539871065220343, "loss": 0.2574, "step": 43306 }, { "epoch": 0.07678866361873758, "grad_norm": 0.28125, "learning_rate": 0.0012539284651855638, "loss": 0.1838, "step": 43308 }, { "epoch": 0.0767922097840474, "grad_norm": 0.7734375, "learning_rate": 0.0012538698231759, "loss": 0.3256, "step": 43310 }, { "epoch": 0.07679575594935721, "grad_norm": 0.3359375, "learning_rate": 0.0012538111804932988, "loss": 0.2205, "step": 43312 }, { "epoch": 0.07679930211466703, "grad_norm": 0.3984375, "learning_rate": 0.001253752537138017, "loss": 0.2212, "step": 43314 }, { "epoch": 0.07680284827997684, "grad_norm": 0.263671875, "learning_rate": 0.0012536938931103107, "loss": 0.1541, "step": 43316 }, { "epoch": 0.07680639444528665, "grad_norm": 0.51953125, "learning_rate": 0.0012536352484104369, "loss": 0.2136, "step": 43318 }, { "epoch": 0.07680994061059647, "grad_norm": 0.1728515625, "learning_rate": 0.0012535766030386515, "loss": 0.1655, "step": 43320 }, { "epoch": 0.07681348677590628, "grad_norm": 1.03125, "learning_rate": 0.0012535179569952114, "loss": 0.2598, "step": 43322 }, { "epoch": 0.0768170329412161, "grad_norm": 2.453125, "learning_rate": 0.001253459310280373, "loss": 0.2637, "step": 43324 }, { "epoch": 0.07682057910652591, "grad_norm": 0.427734375, "learning_rate": 0.0012534006628943924, "loss": 0.2085, "step": 43326 }, { "epoch": 0.07682412527183574, "grad_norm": 0.66015625, "learning_rate": 0.0012533420148375263, "loss": 0.2145, "step": 43328 }, { "epoch": 0.07682767143714556, "grad_norm": 0.64453125, "learning_rate": 0.0012532833661100318, "loss": 0.2019, "step": 43330 }, { "epoch": 0.07683121760245537, "grad_norm": 0.671875, "learning_rate": 0.0012532247167121645, "loss": 0.188, "step": 43332 }, { "epoch": 0.07683476376776519, "grad_norm": 0.404296875, "learning_rate": 0.0012531660666441813, "loss": 0.2247, "step": 43334 }, { "epoch": 0.076838309933075, "grad_norm": 0.5859375, "learning_rate": 0.0012531074159063391, "loss": 0.1976, "step": 43336 }, { "epoch": 0.07684185609838481, "grad_norm": 0.451171875, "learning_rate": 0.001253048764498894, "loss": 0.1937, "step": 43338 }, { "epoch": 0.07684540226369463, "grad_norm": 0.60546875, "learning_rate": 0.0012529901124221018, "loss": 0.2753, "step": 43340 }, { "epoch": 0.07684894842900444, "grad_norm": 1.3046875, "learning_rate": 0.0012529314596762204, "loss": 0.2532, "step": 43342 }, { "epoch": 0.07685249459431426, "grad_norm": 0.166015625, "learning_rate": 0.0012528728062615054, "loss": 0.1854, "step": 43344 }, { "epoch": 0.07685604075962407, "grad_norm": 1.1796875, "learning_rate": 0.0012528141521782135, "loss": 0.1866, "step": 43346 }, { "epoch": 0.07685958692493389, "grad_norm": 0.189453125, "learning_rate": 0.0012527554974266014, "loss": 0.1817, "step": 43348 }, { "epoch": 0.0768631330902437, "grad_norm": 0.64453125, "learning_rate": 0.0012526968420069253, "loss": 0.1674, "step": 43350 }, { "epoch": 0.07686667925555352, "grad_norm": 0.46875, "learning_rate": 0.001252638185919442, "loss": 0.1666, "step": 43352 }, { "epoch": 0.07687022542086333, "grad_norm": 0.4453125, "learning_rate": 0.001252579529164408, "loss": 0.2531, "step": 43354 }, { "epoch": 0.07687377158617315, "grad_norm": 3.234375, "learning_rate": 0.0012525208717420798, "loss": 0.2611, "step": 43356 }, { "epoch": 0.07687731775148296, "grad_norm": 4.3125, "learning_rate": 0.0012524622136527138, "loss": 0.2622, "step": 43358 }, { "epoch": 0.07688086391679277, "grad_norm": 0.67578125, "learning_rate": 0.0012524035548965664, "loss": 0.2097, "step": 43360 }, { "epoch": 0.07688441008210259, "grad_norm": 4.25, "learning_rate": 0.0012523448954738948, "loss": 0.2265, "step": 43362 }, { "epoch": 0.0768879562474124, "grad_norm": 0.59375, "learning_rate": 0.0012522862353849548, "loss": 0.1575, "step": 43364 }, { "epoch": 0.07689150241272222, "grad_norm": 0.322265625, "learning_rate": 0.0012522275746300034, "loss": 0.1685, "step": 43366 }, { "epoch": 0.07689504857803203, "grad_norm": 0.23046875, "learning_rate": 0.001252168913209297, "loss": 0.2024, "step": 43368 }, { "epoch": 0.07689859474334185, "grad_norm": 0.330078125, "learning_rate": 0.0012521102511230922, "loss": 0.2948, "step": 43370 }, { "epoch": 0.07690214090865166, "grad_norm": 0.2451171875, "learning_rate": 0.0012520515883716454, "loss": 0.1528, "step": 43372 }, { "epoch": 0.07690568707396149, "grad_norm": 0.30078125, "learning_rate": 0.0012519929249552135, "loss": 0.1777, "step": 43374 }, { "epoch": 0.0769092332392713, "grad_norm": 0.412109375, "learning_rate": 0.0012519342608740527, "loss": 0.1832, "step": 43376 }, { "epoch": 0.07691277940458112, "grad_norm": 0.361328125, "learning_rate": 0.0012518755961284198, "loss": 0.2201, "step": 43378 }, { "epoch": 0.07691632556989093, "grad_norm": 0.2470703125, "learning_rate": 0.001251816930718571, "loss": 0.1513, "step": 43380 }, { "epoch": 0.07691987173520075, "grad_norm": 0.57421875, "learning_rate": 0.0012517582646447635, "loss": 0.2225, "step": 43382 }, { "epoch": 0.07692341790051056, "grad_norm": 0.326171875, "learning_rate": 0.0012516995979072532, "loss": 0.1542, "step": 43384 }, { "epoch": 0.07692696406582038, "grad_norm": 0.7890625, "learning_rate": 0.001251640930506297, "loss": 0.1727, "step": 43386 }, { "epoch": 0.07693051023113019, "grad_norm": 0.76953125, "learning_rate": 0.0012515822624421516, "loss": 0.202, "step": 43388 }, { "epoch": 0.07693405639644, "grad_norm": 0.150390625, "learning_rate": 0.0012515235937150732, "loss": 0.1374, "step": 43390 }, { "epoch": 0.07693760256174982, "grad_norm": 0.37890625, "learning_rate": 0.0012514649243253189, "loss": 0.178, "step": 43392 }, { "epoch": 0.07694114872705964, "grad_norm": 0.6484375, "learning_rate": 0.001251406254273145, "loss": 0.1627, "step": 43394 }, { "epoch": 0.07694469489236945, "grad_norm": 1.15625, "learning_rate": 0.001251347583558808, "loss": 0.2476, "step": 43396 }, { "epoch": 0.07694824105767926, "grad_norm": 3.203125, "learning_rate": 0.0012512889121825644, "loss": 0.1694, "step": 43398 }, { "epoch": 0.07695178722298908, "grad_norm": 0.3203125, "learning_rate": 0.0012512302401446712, "loss": 0.1593, "step": 43400 }, { "epoch": 0.0769553333882989, "grad_norm": 1.59375, "learning_rate": 0.0012511715674453847, "loss": 0.2462, "step": 43402 }, { "epoch": 0.07695887955360871, "grad_norm": 0.97265625, "learning_rate": 0.0012511128940849614, "loss": 0.207, "step": 43404 }, { "epoch": 0.07696242571891852, "grad_norm": 0.34375, "learning_rate": 0.0012510542200636584, "loss": 0.1774, "step": 43406 }, { "epoch": 0.07696597188422834, "grad_norm": 1.4375, "learning_rate": 0.001250995545381732, "loss": 0.2119, "step": 43408 }, { "epoch": 0.07696951804953815, "grad_norm": 0.56640625, "learning_rate": 0.0012509368700394385, "loss": 0.2696, "step": 43410 }, { "epoch": 0.07697306421484797, "grad_norm": 0.40625, "learning_rate": 0.0012508781940370351, "loss": 0.1429, "step": 43412 }, { "epoch": 0.07697661038015778, "grad_norm": 0.2080078125, "learning_rate": 0.0012508195173747776, "loss": 0.1658, "step": 43414 }, { "epoch": 0.0769801565454676, "grad_norm": 0.50390625, "learning_rate": 0.0012507608400529238, "loss": 0.1885, "step": 43416 }, { "epoch": 0.07698370271077741, "grad_norm": 0.1953125, "learning_rate": 0.001250702162071729, "loss": 0.1714, "step": 43418 }, { "epoch": 0.07698724887608724, "grad_norm": 0.302734375, "learning_rate": 0.001250643483431451, "loss": 0.2876, "step": 43420 }, { "epoch": 0.07699079504139705, "grad_norm": 1.859375, "learning_rate": 0.0012505848041323455, "loss": 0.1957, "step": 43422 }, { "epoch": 0.07699434120670687, "grad_norm": 0.490234375, "learning_rate": 0.0012505261241746702, "loss": 0.2141, "step": 43424 }, { "epoch": 0.07699788737201668, "grad_norm": 0.328125, "learning_rate": 0.0012504674435586804, "loss": 0.1348, "step": 43426 }, { "epoch": 0.0770014335373265, "grad_norm": 1.09375, "learning_rate": 0.0012504087622846338, "loss": 0.274, "step": 43428 }, { "epoch": 0.07700497970263631, "grad_norm": 0.205078125, "learning_rate": 0.0012503500803527866, "loss": 0.1553, "step": 43430 }, { "epoch": 0.07700852586794613, "grad_norm": 0.251953125, "learning_rate": 0.0012502913977633953, "loss": 0.1698, "step": 43432 }, { "epoch": 0.07701207203325594, "grad_norm": 0.59375, "learning_rate": 0.0012502327145167166, "loss": 0.2288, "step": 43434 }, { "epoch": 0.07701561819856576, "grad_norm": 0.328125, "learning_rate": 0.0012501740306130077, "loss": 0.2145, "step": 43436 }, { "epoch": 0.07701916436387557, "grad_norm": 0.3125, "learning_rate": 0.0012501153460525245, "loss": 0.2227, "step": 43438 }, { "epoch": 0.07702271052918538, "grad_norm": 0.298828125, "learning_rate": 0.0012500566608355241, "loss": 0.1802, "step": 43440 }, { "epoch": 0.0770262566944952, "grad_norm": 1.703125, "learning_rate": 0.001249997974962263, "loss": 0.3473, "step": 43442 }, { "epoch": 0.07702980285980501, "grad_norm": 0.373046875, "learning_rate": 0.0012499392884329978, "loss": 0.2199, "step": 43444 }, { "epoch": 0.07703334902511483, "grad_norm": 0.345703125, "learning_rate": 0.0012498806012479855, "loss": 0.128, "step": 43446 }, { "epoch": 0.07703689519042464, "grad_norm": 0.23828125, "learning_rate": 0.0012498219134074824, "loss": 0.2289, "step": 43448 }, { "epoch": 0.07704044135573446, "grad_norm": 0.302734375, "learning_rate": 0.0012497632249117451, "loss": 0.2088, "step": 43450 }, { "epoch": 0.07704398752104427, "grad_norm": 0.326171875, "learning_rate": 0.0012497045357610305, "loss": 0.2718, "step": 43452 }, { "epoch": 0.07704753368635409, "grad_norm": 0.63671875, "learning_rate": 0.0012496458459555953, "loss": 0.2192, "step": 43454 }, { "epoch": 0.0770510798516639, "grad_norm": 0.35546875, "learning_rate": 0.0012495871554956963, "loss": 0.2392, "step": 43456 }, { "epoch": 0.07705462601697372, "grad_norm": 0.41015625, "learning_rate": 0.0012495284643815896, "loss": 0.1877, "step": 43458 }, { "epoch": 0.07705817218228353, "grad_norm": 2.265625, "learning_rate": 0.0012494697726135324, "loss": 0.2447, "step": 43460 }, { "epoch": 0.07706171834759334, "grad_norm": 0.58203125, "learning_rate": 0.0012494110801917814, "loss": 0.2122, "step": 43462 }, { "epoch": 0.07706526451290316, "grad_norm": 0.25, "learning_rate": 0.001249352387116593, "loss": 0.1993, "step": 43464 }, { "epoch": 0.07706881067821299, "grad_norm": 1.4453125, "learning_rate": 0.001249293693388224, "loss": 0.3757, "step": 43466 }, { "epoch": 0.0770723568435228, "grad_norm": 0.46484375, "learning_rate": 0.0012492349990069309, "loss": 0.2074, "step": 43468 }, { "epoch": 0.07707590300883262, "grad_norm": 0.98828125, "learning_rate": 0.001249176303972971, "loss": 0.2543, "step": 43470 }, { "epoch": 0.07707944917414243, "grad_norm": 0.462890625, "learning_rate": 0.0012491176082866005, "loss": 0.1468, "step": 43472 }, { "epoch": 0.07708299533945225, "grad_norm": 0.40234375, "learning_rate": 0.0012490589119480758, "loss": 0.1844, "step": 43474 }, { "epoch": 0.07708654150476206, "grad_norm": 0.9609375, "learning_rate": 0.0012490002149576542, "loss": 0.3584, "step": 43476 }, { "epoch": 0.07709008767007187, "grad_norm": 0.92578125, "learning_rate": 0.0012489415173155922, "loss": 0.1863, "step": 43478 }, { "epoch": 0.07709363383538169, "grad_norm": 0.326171875, "learning_rate": 0.0012488828190221465, "loss": 0.273, "step": 43480 }, { "epoch": 0.0770971800006915, "grad_norm": 1.0, "learning_rate": 0.0012488241200775743, "loss": 0.2389, "step": 43482 }, { "epoch": 0.07710072616600132, "grad_norm": 0.310546875, "learning_rate": 0.0012487654204821315, "loss": 0.1324, "step": 43484 }, { "epoch": 0.07710427233131113, "grad_norm": 0.3203125, "learning_rate": 0.001248706720236075, "loss": 0.1993, "step": 43486 }, { "epoch": 0.07710781849662095, "grad_norm": 0.4375, "learning_rate": 0.0012486480193396618, "loss": 0.1962, "step": 43488 }, { "epoch": 0.07711136466193076, "grad_norm": 0.28515625, "learning_rate": 0.0012485893177931488, "loss": 0.2214, "step": 43490 }, { "epoch": 0.07711491082724058, "grad_norm": 0.57421875, "learning_rate": 0.001248530615596792, "loss": 0.2129, "step": 43492 }, { "epoch": 0.07711845699255039, "grad_norm": 1.328125, "learning_rate": 0.001248471912750849, "loss": 0.2791, "step": 43494 }, { "epoch": 0.0771220031578602, "grad_norm": 0.921875, "learning_rate": 0.0012484132092555756, "loss": 0.1638, "step": 43496 }, { "epoch": 0.07712554932317002, "grad_norm": 0.734375, "learning_rate": 0.0012483545051112292, "loss": 0.2096, "step": 43498 }, { "epoch": 0.07712909548847983, "grad_norm": 1.9375, "learning_rate": 0.0012482958003180663, "loss": 0.3633, "step": 43500 }, { "epoch": 0.07713264165378965, "grad_norm": 0.3671875, "learning_rate": 0.0012482370948763441, "loss": 0.1725, "step": 43502 }, { "epoch": 0.07713618781909946, "grad_norm": 3.53125, "learning_rate": 0.0012481783887863189, "loss": 0.6282, "step": 43504 }, { "epoch": 0.07713973398440928, "grad_norm": 1.265625, "learning_rate": 0.0012481196820482475, "loss": 0.3816, "step": 43506 }, { "epoch": 0.07714328014971909, "grad_norm": 0.48828125, "learning_rate": 0.001248060974662386, "loss": 0.2287, "step": 43508 }, { "epoch": 0.07714682631502892, "grad_norm": 0.412109375, "learning_rate": 0.0012480022666289925, "loss": 0.1994, "step": 43510 }, { "epoch": 0.07715037248033874, "grad_norm": 0.212890625, "learning_rate": 0.0012479435579483228, "loss": 0.203, "step": 43512 }, { "epoch": 0.07715391864564855, "grad_norm": 0.326171875, "learning_rate": 0.001247884848620634, "loss": 0.1827, "step": 43514 }, { "epoch": 0.07715746481095836, "grad_norm": 0.291015625, "learning_rate": 0.0012478261386461827, "loss": 0.2696, "step": 43516 }, { "epoch": 0.07716101097626818, "grad_norm": 0.28125, "learning_rate": 0.0012477674280252258, "loss": 0.1822, "step": 43518 }, { "epoch": 0.077164557141578, "grad_norm": 0.31640625, "learning_rate": 0.0012477087167580198, "loss": 0.193, "step": 43520 }, { "epoch": 0.07716810330688781, "grad_norm": 0.2578125, "learning_rate": 0.0012476500048448221, "loss": 0.1942, "step": 43522 }, { "epoch": 0.07717164947219762, "grad_norm": 0.62109375, "learning_rate": 0.0012475912922858886, "loss": 0.2169, "step": 43524 }, { "epoch": 0.07717519563750744, "grad_norm": 0.248046875, "learning_rate": 0.0012475325790814768, "loss": 0.2669, "step": 43526 }, { "epoch": 0.07717874180281725, "grad_norm": 0.98046875, "learning_rate": 0.001247473865231843, "loss": 0.1991, "step": 43528 }, { "epoch": 0.07718228796812707, "grad_norm": 1.2265625, "learning_rate": 0.0012474151507372443, "loss": 0.2096, "step": 43530 }, { "epoch": 0.07718583413343688, "grad_norm": 0.3984375, "learning_rate": 0.001247356435597937, "loss": 0.279, "step": 43532 }, { "epoch": 0.0771893802987467, "grad_norm": 0.484375, "learning_rate": 0.0012472977198141786, "loss": 0.2449, "step": 43534 }, { "epoch": 0.07719292646405651, "grad_norm": 0.2412109375, "learning_rate": 0.0012472390033862256, "loss": 0.2265, "step": 43536 }, { "epoch": 0.07719647262936633, "grad_norm": 0.306640625, "learning_rate": 0.0012471802863143347, "loss": 0.1514, "step": 43538 }, { "epoch": 0.07720001879467614, "grad_norm": 0.275390625, "learning_rate": 0.0012471215685987626, "loss": 0.1868, "step": 43540 }, { "epoch": 0.07720356495998595, "grad_norm": 0.271484375, "learning_rate": 0.0012470628502397666, "loss": 0.2134, "step": 43542 }, { "epoch": 0.07720711112529577, "grad_norm": 0.265625, "learning_rate": 0.0012470041312376023, "loss": 0.1561, "step": 43544 }, { "epoch": 0.07721065729060558, "grad_norm": 0.8515625, "learning_rate": 0.0012469454115925278, "loss": 0.1882, "step": 43546 }, { "epoch": 0.0772142034559154, "grad_norm": 0.380859375, "learning_rate": 0.0012468866913047994, "loss": 0.1548, "step": 43548 }, { "epoch": 0.07721774962122521, "grad_norm": 0.318359375, "learning_rate": 0.001246827970374674, "loss": 0.1848, "step": 43550 }, { "epoch": 0.07722129578653503, "grad_norm": 0.72265625, "learning_rate": 0.0012467692488024084, "loss": 0.1932, "step": 43552 }, { "epoch": 0.07722484195184484, "grad_norm": 0.265625, "learning_rate": 0.001246710526588259, "loss": 0.2868, "step": 43554 }, { "epoch": 0.07722838811715467, "grad_norm": 0.328125, "learning_rate": 0.0012466518037324832, "loss": 0.2129, "step": 43556 }, { "epoch": 0.07723193428246448, "grad_norm": 0.298828125, "learning_rate": 0.001246593080235338, "loss": 0.1878, "step": 43558 }, { "epoch": 0.0772354804477743, "grad_norm": 0.171875, "learning_rate": 0.0012465343560970794, "loss": 0.1951, "step": 43560 }, { "epoch": 0.07723902661308411, "grad_norm": 1.15625, "learning_rate": 0.0012464756313179644, "loss": 0.2135, "step": 43562 }, { "epoch": 0.07724257277839393, "grad_norm": 0.2275390625, "learning_rate": 0.0012464169058982507, "loss": 0.1713, "step": 43564 }, { "epoch": 0.07724611894370374, "grad_norm": 0.390625, "learning_rate": 0.001246358179838194, "loss": 0.1803, "step": 43566 }, { "epoch": 0.07724966510901356, "grad_norm": 0.3984375, "learning_rate": 0.0012462994531380518, "loss": 0.1925, "step": 43568 }, { "epoch": 0.07725321127432337, "grad_norm": 0.2255859375, "learning_rate": 0.0012462407257980805, "loss": 0.1715, "step": 43570 }, { "epoch": 0.07725675743963319, "grad_norm": 0.55078125, "learning_rate": 0.0012461819978185375, "loss": 0.1702, "step": 43572 }, { "epoch": 0.077260303604943, "grad_norm": 0.4140625, "learning_rate": 0.0012461232691996793, "loss": 0.2004, "step": 43574 }, { "epoch": 0.07726384977025282, "grad_norm": 0.45703125, "learning_rate": 0.001246064539941763, "loss": 0.2121, "step": 43576 }, { "epoch": 0.07726739593556263, "grad_norm": 0.263671875, "learning_rate": 0.001246005810045045, "loss": 0.1915, "step": 43578 }, { "epoch": 0.07727094210087244, "grad_norm": 0.53125, "learning_rate": 0.001245947079509783, "loss": 0.2001, "step": 43580 }, { "epoch": 0.07727448826618226, "grad_norm": 0.30078125, "learning_rate": 0.0012458883483362324, "loss": 0.1962, "step": 43582 }, { "epoch": 0.07727803443149207, "grad_norm": 0.45703125, "learning_rate": 0.0012458296165246512, "loss": 0.2133, "step": 43584 }, { "epoch": 0.07728158059680189, "grad_norm": 1.1484375, "learning_rate": 0.0012457708840752963, "loss": 0.2669, "step": 43586 }, { "epoch": 0.0772851267621117, "grad_norm": 0.462890625, "learning_rate": 0.0012457121509884238, "loss": 0.2568, "step": 43588 }, { "epoch": 0.07728867292742152, "grad_norm": 0.353515625, "learning_rate": 0.0012456534172642908, "loss": 0.1702, "step": 43590 }, { "epoch": 0.07729221909273133, "grad_norm": 0.53515625, "learning_rate": 0.001245594682903155, "loss": 0.2113, "step": 43592 }, { "epoch": 0.07729576525804115, "grad_norm": 0.3203125, "learning_rate": 0.0012455359479052722, "loss": 0.1436, "step": 43594 }, { "epoch": 0.07729931142335096, "grad_norm": 0.30078125, "learning_rate": 0.0012454772122709, "loss": 0.2388, "step": 43596 }, { "epoch": 0.07730285758866078, "grad_norm": 0.392578125, "learning_rate": 0.0012454184760002946, "loss": 0.2233, "step": 43598 }, { "epoch": 0.07730640375397059, "grad_norm": 0.376953125, "learning_rate": 0.0012453597390937137, "loss": 0.1531, "step": 43600 }, { "epoch": 0.07730994991928042, "grad_norm": 0.65234375, "learning_rate": 0.0012453010015514132, "loss": 0.1807, "step": 43602 }, { "epoch": 0.07731349608459023, "grad_norm": 0.291015625, "learning_rate": 0.001245242263373651, "loss": 0.1871, "step": 43604 }, { "epoch": 0.07731704224990005, "grad_norm": 1.125, "learning_rate": 0.0012451835245606833, "loss": 0.2409, "step": 43606 }, { "epoch": 0.07732058841520986, "grad_norm": 0.421875, "learning_rate": 0.0012451247851127671, "loss": 0.2501, "step": 43608 }, { "epoch": 0.07732413458051968, "grad_norm": 0.40234375, "learning_rate": 0.0012450660450301595, "loss": 0.2561, "step": 43610 }, { "epoch": 0.07732768074582949, "grad_norm": 0.2373046875, "learning_rate": 0.0012450073043131173, "loss": 0.2086, "step": 43612 }, { "epoch": 0.0773312269111393, "grad_norm": 0.484375, "learning_rate": 0.0012449485629618974, "loss": 0.1683, "step": 43614 }, { "epoch": 0.07733477307644912, "grad_norm": 0.197265625, "learning_rate": 0.0012448898209767567, "loss": 0.1687, "step": 43616 }, { "epoch": 0.07733831924175893, "grad_norm": 0.74609375, "learning_rate": 0.001244831078357952, "loss": 0.2315, "step": 43618 }, { "epoch": 0.07734186540706875, "grad_norm": 0.39453125, "learning_rate": 0.0012447723351057404, "loss": 0.1571, "step": 43620 }, { "epoch": 0.07734541157237856, "grad_norm": 0.404296875, "learning_rate": 0.0012447135912203784, "loss": 0.1409, "step": 43622 }, { "epoch": 0.07734895773768838, "grad_norm": 0.220703125, "learning_rate": 0.0012446548467021235, "loss": 0.1984, "step": 43624 }, { "epoch": 0.0773525039029982, "grad_norm": 0.494140625, "learning_rate": 0.0012445961015512322, "loss": 0.2317, "step": 43626 }, { "epoch": 0.07735605006830801, "grad_norm": 0.8125, "learning_rate": 0.0012445373557679615, "loss": 0.2296, "step": 43628 }, { "epoch": 0.07735959623361782, "grad_norm": 0.22265625, "learning_rate": 0.0012444786093525684, "loss": 0.1774, "step": 43630 }, { "epoch": 0.07736314239892764, "grad_norm": 0.7578125, "learning_rate": 0.00124441986230531, "loss": 0.3073, "step": 43632 }, { "epoch": 0.07736668856423745, "grad_norm": 0.1630859375, "learning_rate": 0.001244361114626443, "loss": 0.134, "step": 43634 }, { "epoch": 0.07737023472954727, "grad_norm": 0.68359375, "learning_rate": 0.0012443023663162242, "loss": 0.2252, "step": 43636 }, { "epoch": 0.07737378089485708, "grad_norm": 0.25390625, "learning_rate": 0.0012442436173749104, "loss": 0.1761, "step": 43638 }, { "epoch": 0.0773773270601669, "grad_norm": 0.302734375, "learning_rate": 0.0012441848678027595, "loss": 0.1704, "step": 43640 }, { "epoch": 0.07738087322547671, "grad_norm": 0.515625, "learning_rate": 0.001244126117600027, "loss": 0.161, "step": 43642 }, { "epoch": 0.07738441939078652, "grad_norm": 0.328125, "learning_rate": 0.001244067366766971, "loss": 0.2223, "step": 43644 }, { "epoch": 0.07738796555609635, "grad_norm": 0.291015625, "learning_rate": 0.0012440086153038476, "loss": 0.1584, "step": 43646 }, { "epoch": 0.07739151172140617, "grad_norm": 0.25390625, "learning_rate": 0.0012439498632109145, "loss": 0.1605, "step": 43648 }, { "epoch": 0.07739505788671598, "grad_norm": 0.30859375, "learning_rate": 0.0012438911104884285, "loss": 0.1983, "step": 43650 }, { "epoch": 0.0773986040520258, "grad_norm": 0.73828125, "learning_rate": 0.001243832357136646, "loss": 0.4176, "step": 43652 }, { "epoch": 0.07740215021733561, "grad_norm": 0.703125, "learning_rate": 0.0012437736031558246, "loss": 0.1913, "step": 43654 }, { "epoch": 0.07740569638264543, "grad_norm": 0.30078125, "learning_rate": 0.0012437148485462208, "loss": 0.0959, "step": 43656 }, { "epoch": 0.07740924254795524, "grad_norm": 1.3203125, "learning_rate": 0.001243656093308092, "loss": 0.1912, "step": 43658 }, { "epoch": 0.07741278871326505, "grad_norm": 0.84375, "learning_rate": 0.0012435973374416945, "loss": 0.1988, "step": 43660 }, { "epoch": 0.07741633487857487, "grad_norm": 0.32421875, "learning_rate": 0.001243538580947286, "loss": 0.268, "step": 43662 }, { "epoch": 0.07741988104388468, "grad_norm": 0.671875, "learning_rate": 0.0012434798238251225, "loss": 0.157, "step": 43664 }, { "epoch": 0.0774234272091945, "grad_norm": 0.7734375, "learning_rate": 0.0012434210660754624, "loss": 0.2011, "step": 43666 }, { "epoch": 0.07742697337450431, "grad_norm": 0.384765625, "learning_rate": 0.0012433623076985613, "loss": 0.18, "step": 43668 }, { "epoch": 0.07743051953981413, "grad_norm": 1.1953125, "learning_rate": 0.001243303548694677, "loss": 0.1536, "step": 43670 }, { "epoch": 0.07743406570512394, "grad_norm": 1.15625, "learning_rate": 0.0012432447890640664, "loss": 0.2163, "step": 43672 }, { "epoch": 0.07743761187043376, "grad_norm": 0.50390625, "learning_rate": 0.001243186028806986, "loss": 0.2174, "step": 43674 }, { "epoch": 0.07744115803574357, "grad_norm": 0.359375, "learning_rate": 0.0012431272679236931, "loss": 0.4993, "step": 43676 }, { "epoch": 0.07744470420105339, "grad_norm": 0.6328125, "learning_rate": 0.0012430685064144448, "loss": 0.2441, "step": 43678 }, { "epoch": 0.0774482503663632, "grad_norm": 0.427734375, "learning_rate": 0.0012430097442794978, "loss": 0.2633, "step": 43680 }, { "epoch": 0.07745179653167301, "grad_norm": 0.58203125, "learning_rate": 0.0012429509815191093, "loss": 0.1985, "step": 43682 }, { "epoch": 0.07745534269698283, "grad_norm": 0.4921875, "learning_rate": 0.0012428922181335362, "loss": 0.2041, "step": 43684 }, { "epoch": 0.07745888886229264, "grad_norm": 0.208984375, "learning_rate": 0.0012428334541230357, "loss": 0.201, "step": 43686 }, { "epoch": 0.07746243502760246, "grad_norm": 1.3203125, "learning_rate": 0.0012427746894878645, "loss": 0.2904, "step": 43688 }, { "epoch": 0.07746598119291227, "grad_norm": 25.875, "learning_rate": 0.0012427159242282798, "loss": 0.2998, "step": 43690 }, { "epoch": 0.0774695273582221, "grad_norm": 0.6953125, "learning_rate": 0.0012426571583445385, "loss": 0.1525, "step": 43692 }, { "epoch": 0.07747307352353192, "grad_norm": 0.357421875, "learning_rate": 0.001242598391836898, "loss": 0.1592, "step": 43694 }, { "epoch": 0.07747661968884173, "grad_norm": 0.23046875, "learning_rate": 0.0012425396247056147, "loss": 0.1429, "step": 43696 }, { "epoch": 0.07748016585415154, "grad_norm": 1.578125, "learning_rate": 0.0012424808569509458, "loss": 0.4403, "step": 43698 }, { "epoch": 0.07748371201946136, "grad_norm": 0.28515625, "learning_rate": 0.0012424220885731484, "loss": 0.2869, "step": 43700 }, { "epoch": 0.07748725818477117, "grad_norm": 0.45703125, "learning_rate": 0.0012423633195724796, "loss": 0.1879, "step": 43702 }, { "epoch": 0.07749080435008099, "grad_norm": 0.5390625, "learning_rate": 0.001242304549949196, "loss": 0.2883, "step": 43704 }, { "epoch": 0.0774943505153908, "grad_norm": 0.1552734375, "learning_rate": 0.0012422457797035553, "loss": 0.1575, "step": 43706 }, { "epoch": 0.07749789668070062, "grad_norm": 0.69921875, "learning_rate": 0.0012421870088358143, "loss": 0.2946, "step": 43708 }, { "epoch": 0.07750144284601043, "grad_norm": 0.10888671875, "learning_rate": 0.0012421282373462298, "loss": 0.1312, "step": 43710 }, { "epoch": 0.07750498901132025, "grad_norm": 0.2314453125, "learning_rate": 0.0012420694652350588, "loss": 0.1719, "step": 43712 }, { "epoch": 0.07750853517663006, "grad_norm": 0.478515625, "learning_rate": 0.0012420106925025585, "loss": 0.1954, "step": 43714 }, { "epoch": 0.07751208134193988, "grad_norm": 0.33984375, "learning_rate": 0.0012419519191489861, "loss": 0.1841, "step": 43716 }, { "epoch": 0.07751562750724969, "grad_norm": 0.296875, "learning_rate": 0.0012418931451745982, "loss": 0.2561, "step": 43718 }, { "epoch": 0.0775191736725595, "grad_norm": 0.2890625, "learning_rate": 0.0012418343705796523, "loss": 0.2001, "step": 43720 }, { "epoch": 0.07752271983786932, "grad_norm": 0.298828125, "learning_rate": 0.0012417755953644054, "loss": 0.1604, "step": 43722 }, { "epoch": 0.07752626600317913, "grad_norm": 0.61328125, "learning_rate": 0.001241716819529114, "loss": 0.2104, "step": 43724 }, { "epoch": 0.07752981216848895, "grad_norm": 0.349609375, "learning_rate": 0.0012416580430740355, "loss": 0.265, "step": 43726 }, { "epoch": 0.07753335833379876, "grad_norm": 0.388671875, "learning_rate": 0.0012415992659994277, "loss": 0.201, "step": 43728 }, { "epoch": 0.07753690449910858, "grad_norm": 0.42578125, "learning_rate": 0.0012415404883055463, "loss": 0.137, "step": 43730 }, { "epoch": 0.07754045066441839, "grad_norm": 0.37109375, "learning_rate": 0.001241481709992649, "loss": 0.1984, "step": 43732 }, { "epoch": 0.0775439968297282, "grad_norm": 3.34375, "learning_rate": 0.0012414229310609933, "loss": 0.4385, "step": 43734 }, { "epoch": 0.07754754299503802, "grad_norm": 1.3359375, "learning_rate": 0.0012413641515108358, "loss": 0.2675, "step": 43736 }, { "epoch": 0.07755108916034785, "grad_norm": 0.6328125, "learning_rate": 0.0012413053713424334, "loss": 0.2398, "step": 43738 }, { "epoch": 0.07755463532565766, "grad_norm": 0.84765625, "learning_rate": 0.0012412465905560434, "loss": 0.263, "step": 43740 }, { "epoch": 0.07755818149096748, "grad_norm": 0.8203125, "learning_rate": 0.001241187809151923, "loss": 0.2, "step": 43742 }, { "epoch": 0.0775617276562773, "grad_norm": 0.5546875, "learning_rate": 0.0012411290271303291, "loss": 0.1725, "step": 43744 }, { "epoch": 0.07756527382158711, "grad_norm": 0.359375, "learning_rate": 0.0012410702444915188, "loss": 0.1766, "step": 43746 }, { "epoch": 0.07756881998689692, "grad_norm": 0.404296875, "learning_rate": 0.0012410114612357492, "loss": 0.2198, "step": 43748 }, { "epoch": 0.07757236615220674, "grad_norm": 0.703125, "learning_rate": 0.0012409526773632773, "loss": 0.2103, "step": 43750 }, { "epoch": 0.07757591231751655, "grad_norm": 0.349609375, "learning_rate": 0.0012408938928743605, "loss": 0.1565, "step": 43752 }, { "epoch": 0.07757945848282637, "grad_norm": 0.49609375, "learning_rate": 0.0012408351077692551, "loss": 0.1624, "step": 43754 }, { "epoch": 0.07758300464813618, "grad_norm": 2.15625, "learning_rate": 0.0012407763220482195, "loss": 0.3137, "step": 43756 }, { "epoch": 0.077586550813446, "grad_norm": 0.220703125, "learning_rate": 0.0012407175357115096, "loss": 0.1654, "step": 43758 }, { "epoch": 0.07759009697875581, "grad_norm": 0.267578125, "learning_rate": 0.001240658748759383, "loss": 0.2154, "step": 43760 }, { "epoch": 0.07759364314406562, "grad_norm": 1.5078125, "learning_rate": 0.0012405999611920966, "loss": 0.2622, "step": 43762 }, { "epoch": 0.07759718930937544, "grad_norm": 0.29296875, "learning_rate": 0.0012405411730099078, "loss": 0.1693, "step": 43764 }, { "epoch": 0.07760073547468525, "grad_norm": 0.83203125, "learning_rate": 0.0012404823842130737, "loss": 0.2331, "step": 43766 }, { "epoch": 0.07760428163999507, "grad_norm": 0.375, "learning_rate": 0.001240423594801851, "loss": 0.2349, "step": 43768 }, { "epoch": 0.07760782780530488, "grad_norm": 2.140625, "learning_rate": 0.001240364804776497, "loss": 0.6832, "step": 43770 }, { "epoch": 0.0776113739706147, "grad_norm": 0.208984375, "learning_rate": 0.001240306014137269, "loss": 0.2512, "step": 43772 }, { "epoch": 0.07761492013592451, "grad_norm": 0.86328125, "learning_rate": 0.0012402472228844241, "loss": 0.1755, "step": 43774 }, { "epoch": 0.07761846630123433, "grad_norm": 0.67578125, "learning_rate": 0.0012401884310182193, "loss": 0.1883, "step": 43776 }, { "epoch": 0.07762201246654414, "grad_norm": 0.484375, "learning_rate": 0.0012401296385389116, "loss": 0.1541, "step": 43778 }, { "epoch": 0.07762555863185396, "grad_norm": 1.890625, "learning_rate": 0.0012400708454467582, "loss": 0.2813, "step": 43780 }, { "epoch": 0.07762910479716378, "grad_norm": 0.4375, "learning_rate": 0.0012400120517420165, "loss": 0.2385, "step": 43782 }, { "epoch": 0.0776326509624736, "grad_norm": 0.578125, "learning_rate": 0.0012399532574249431, "loss": 0.208, "step": 43784 }, { "epoch": 0.07763619712778341, "grad_norm": 2.96875, "learning_rate": 0.001239894462495796, "loss": 0.3592, "step": 43786 }, { "epoch": 0.07763974329309323, "grad_norm": 0.150390625, "learning_rate": 0.0012398356669548313, "loss": 0.1484, "step": 43788 }, { "epoch": 0.07764328945840304, "grad_norm": 0.400390625, "learning_rate": 0.0012397768708023067, "loss": 0.1486, "step": 43790 }, { "epoch": 0.07764683562371286, "grad_norm": 0.51171875, "learning_rate": 0.0012397180740384795, "loss": 0.2296, "step": 43792 }, { "epoch": 0.07765038178902267, "grad_norm": 0.75390625, "learning_rate": 0.001239659276663606, "loss": 0.2141, "step": 43794 }, { "epoch": 0.07765392795433249, "grad_norm": 0.25390625, "learning_rate": 0.0012396004786779447, "loss": 0.1808, "step": 43796 }, { "epoch": 0.0776574741196423, "grad_norm": 0.625, "learning_rate": 0.0012395416800817514, "loss": 0.2143, "step": 43798 }, { "epoch": 0.07766102028495211, "grad_norm": 0.392578125, "learning_rate": 0.0012394828808752843, "loss": 0.1878, "step": 43800 }, { "epoch": 0.07766456645026193, "grad_norm": 0.7890625, "learning_rate": 0.0012394240810588, "loss": 0.2075, "step": 43802 }, { "epoch": 0.07766811261557174, "grad_norm": 0.328125, "learning_rate": 0.0012393652806325557, "loss": 0.1932, "step": 43804 }, { "epoch": 0.07767165878088156, "grad_norm": 0.5078125, "learning_rate": 0.0012393064795968086, "loss": 0.1867, "step": 43806 }, { "epoch": 0.07767520494619137, "grad_norm": 0.30859375, "learning_rate": 0.0012392476779518155, "loss": 0.2202, "step": 43808 }, { "epoch": 0.07767875111150119, "grad_norm": 0.283203125, "learning_rate": 0.0012391888756978343, "loss": 0.1812, "step": 43810 }, { "epoch": 0.077682297276811, "grad_norm": 0.4375, "learning_rate": 0.0012391300728351218, "loss": 0.1861, "step": 43812 }, { "epoch": 0.07768584344212082, "grad_norm": 0.2138671875, "learning_rate": 0.0012390712693639355, "loss": 0.1826, "step": 43814 }, { "epoch": 0.07768938960743063, "grad_norm": 0.66015625, "learning_rate": 0.0012390124652845317, "loss": 0.1622, "step": 43816 }, { "epoch": 0.07769293577274045, "grad_norm": 0.51953125, "learning_rate": 0.0012389536605971684, "loss": 0.2144, "step": 43818 }, { "epoch": 0.07769648193805026, "grad_norm": 0.55078125, "learning_rate": 0.0012388948553021027, "loss": 0.1556, "step": 43820 }, { "epoch": 0.07770002810336007, "grad_norm": 0.53125, "learning_rate": 0.0012388360493995913, "loss": 0.2318, "step": 43822 }, { "epoch": 0.07770357426866989, "grad_norm": 0.48828125, "learning_rate": 0.0012387772428898915, "loss": 0.2609, "step": 43824 }, { "epoch": 0.0777071204339797, "grad_norm": 0.7109375, "learning_rate": 0.001238718435773261, "loss": 0.1368, "step": 43826 }, { "epoch": 0.07771066659928953, "grad_norm": 0.224609375, "learning_rate": 0.0012386596280499565, "loss": 0.1547, "step": 43828 }, { "epoch": 0.07771421276459935, "grad_norm": 0.337890625, "learning_rate": 0.0012386008197202351, "loss": 0.2404, "step": 43830 }, { "epoch": 0.07771775892990916, "grad_norm": 0.66015625, "learning_rate": 0.0012385420107843546, "loss": 0.2167, "step": 43832 }, { "epoch": 0.07772130509521898, "grad_norm": 0.84765625, "learning_rate": 0.0012384832012425715, "loss": 0.2657, "step": 43834 }, { "epoch": 0.07772485126052879, "grad_norm": 0.50390625, "learning_rate": 0.0012384243910951434, "loss": 0.1417, "step": 43836 }, { "epoch": 0.0777283974258386, "grad_norm": 2.328125, "learning_rate": 0.0012383655803423277, "loss": 0.2152, "step": 43838 }, { "epoch": 0.07773194359114842, "grad_norm": 0.50390625, "learning_rate": 0.0012383067689843812, "loss": 0.1722, "step": 43840 }, { "epoch": 0.07773548975645823, "grad_norm": 0.36328125, "learning_rate": 0.0012382479570215612, "loss": 0.2183, "step": 43842 }, { "epoch": 0.07773903592176805, "grad_norm": 0.3203125, "learning_rate": 0.0012381891444541249, "loss": 0.2383, "step": 43844 }, { "epoch": 0.07774258208707786, "grad_norm": 0.984375, "learning_rate": 0.0012381303312823295, "loss": 0.2476, "step": 43846 }, { "epoch": 0.07774612825238768, "grad_norm": 0.5625, "learning_rate": 0.0012380715175064321, "loss": 0.1481, "step": 43848 }, { "epoch": 0.07774967441769749, "grad_norm": 0.392578125, "learning_rate": 0.0012380127031266905, "loss": 0.1864, "step": 43850 }, { "epoch": 0.07775322058300731, "grad_norm": 0.39453125, "learning_rate": 0.0012379538881433613, "loss": 0.1788, "step": 43852 }, { "epoch": 0.07775676674831712, "grad_norm": 0.248046875, "learning_rate": 0.0012378950725567019, "loss": 0.128, "step": 43854 }, { "epoch": 0.07776031291362694, "grad_norm": 0.39453125, "learning_rate": 0.0012378362563669695, "loss": 0.1779, "step": 43856 }, { "epoch": 0.07776385907893675, "grad_norm": 0.255859375, "learning_rate": 0.0012377774395744216, "loss": 0.2651, "step": 43858 }, { "epoch": 0.07776740524424657, "grad_norm": 0.34375, "learning_rate": 0.0012377186221793153, "loss": 0.1577, "step": 43860 }, { "epoch": 0.07777095140955638, "grad_norm": 1.3203125, "learning_rate": 0.0012376598041819074, "loss": 0.1996, "step": 43862 }, { "epoch": 0.0777744975748662, "grad_norm": 0.82421875, "learning_rate": 0.0012376009855824554, "loss": 0.1892, "step": 43864 }, { "epoch": 0.07777804374017601, "grad_norm": 0.70703125, "learning_rate": 0.001237542166381217, "loss": 0.1637, "step": 43866 }, { "epoch": 0.07778158990548582, "grad_norm": 0.427734375, "learning_rate": 0.0012374833465784488, "loss": 0.1641, "step": 43868 }, { "epoch": 0.07778513607079564, "grad_norm": 0.8515625, "learning_rate": 0.0012374245261744083, "loss": 0.2443, "step": 43870 }, { "epoch": 0.07778868223610545, "grad_norm": 0.271484375, "learning_rate": 0.0012373657051693529, "loss": 0.2836, "step": 43872 }, { "epoch": 0.07779222840141528, "grad_norm": 0.1787109375, "learning_rate": 0.0012373068835635395, "loss": 0.2609, "step": 43874 }, { "epoch": 0.0777957745667251, "grad_norm": 1.1015625, "learning_rate": 0.001237248061357226, "loss": 0.2558, "step": 43876 }, { "epoch": 0.07779932073203491, "grad_norm": 0.296875, "learning_rate": 0.0012371892385506689, "loss": 0.1685, "step": 43878 }, { "epoch": 0.07780286689734472, "grad_norm": 0.765625, "learning_rate": 0.0012371304151441256, "loss": 0.1603, "step": 43880 }, { "epoch": 0.07780641306265454, "grad_norm": 0.85546875, "learning_rate": 0.001237071591137854, "loss": 0.1795, "step": 43882 }, { "epoch": 0.07780995922796435, "grad_norm": 0.384765625, "learning_rate": 0.00123701276653211, "loss": 0.164, "step": 43884 }, { "epoch": 0.07781350539327417, "grad_norm": 0.494140625, "learning_rate": 0.0012369539413271526, "loss": 0.4686, "step": 43886 }, { "epoch": 0.07781705155858398, "grad_norm": 0.416015625, "learning_rate": 0.0012368951155232377, "loss": 0.2307, "step": 43888 }, { "epoch": 0.0778205977238938, "grad_norm": 0.384765625, "learning_rate": 0.0012368362891206235, "loss": 0.2173, "step": 43890 }, { "epoch": 0.07782414388920361, "grad_norm": 0.359375, "learning_rate": 0.0012367774621195665, "loss": 0.3516, "step": 43892 }, { "epoch": 0.07782769005451343, "grad_norm": 0.306640625, "learning_rate": 0.0012367186345203242, "loss": 0.1968, "step": 43894 }, { "epoch": 0.07783123621982324, "grad_norm": 0.23046875, "learning_rate": 0.0012366598063231545, "loss": 0.1707, "step": 43896 }, { "epoch": 0.07783478238513306, "grad_norm": 0.8984375, "learning_rate": 0.001236600977528314, "loss": 0.2472, "step": 43898 }, { "epoch": 0.07783832855044287, "grad_norm": 0.515625, "learning_rate": 0.00123654214813606, "loss": 0.1336, "step": 43900 }, { "epoch": 0.07784187471575268, "grad_norm": 0.5703125, "learning_rate": 0.0012364833181466501, "loss": 0.1666, "step": 43902 }, { "epoch": 0.0778454208810625, "grad_norm": 3.265625, "learning_rate": 0.0012364244875603413, "loss": 0.2281, "step": 43904 }, { "epoch": 0.07784896704637231, "grad_norm": 0.69140625, "learning_rate": 0.0012363656563773911, "loss": 0.1596, "step": 43906 }, { "epoch": 0.07785251321168213, "grad_norm": 0.60546875, "learning_rate": 0.0012363068245980569, "loss": 0.2018, "step": 43908 }, { "epoch": 0.07785605937699194, "grad_norm": 0.61328125, "learning_rate": 0.0012362479922225957, "loss": 0.136, "step": 43910 }, { "epoch": 0.07785960554230176, "grad_norm": 0.37890625, "learning_rate": 0.0012361891592512651, "loss": 0.1947, "step": 43912 }, { "epoch": 0.07786315170761157, "grad_norm": 0.2890625, "learning_rate": 0.0012361303256843222, "loss": 0.1727, "step": 43914 }, { "epoch": 0.07786669787292139, "grad_norm": 0.7578125, "learning_rate": 0.001236071491522024, "loss": 0.1992, "step": 43916 }, { "epoch": 0.07787024403823121, "grad_norm": 0.5625, "learning_rate": 0.0012360126567646285, "loss": 0.1937, "step": 43918 }, { "epoch": 0.07787379020354103, "grad_norm": 0.33984375, "learning_rate": 0.0012359538214123925, "loss": 0.1439, "step": 43920 }, { "epoch": 0.07787733636885084, "grad_norm": 0.6875, "learning_rate": 0.0012358949854655735, "loss": 0.2036, "step": 43922 }, { "epoch": 0.07788088253416066, "grad_norm": 0.65625, "learning_rate": 0.0012358361489244285, "loss": 0.1672, "step": 43924 }, { "epoch": 0.07788442869947047, "grad_norm": 0.52734375, "learning_rate": 0.0012357773117892153, "loss": 0.1676, "step": 43926 }, { "epoch": 0.07788797486478029, "grad_norm": 0.33203125, "learning_rate": 0.001235718474060191, "loss": 0.2347, "step": 43928 }, { "epoch": 0.0778915210300901, "grad_norm": 0.2177734375, "learning_rate": 0.0012356596357376128, "loss": 0.1784, "step": 43930 }, { "epoch": 0.07789506719539992, "grad_norm": 0.44921875, "learning_rate": 0.0012356007968217385, "loss": 0.1951, "step": 43932 }, { "epoch": 0.07789861336070973, "grad_norm": 1.84375, "learning_rate": 0.0012355419573128249, "loss": 0.2739, "step": 43934 }, { "epoch": 0.07790215952601955, "grad_norm": 0.60546875, "learning_rate": 0.0012354831172111296, "loss": 0.1616, "step": 43936 }, { "epoch": 0.07790570569132936, "grad_norm": 0.2890625, "learning_rate": 0.0012354242765169096, "loss": 0.1562, "step": 43938 }, { "epoch": 0.07790925185663918, "grad_norm": 0.75, "learning_rate": 0.0012353654352304227, "loss": 0.1206, "step": 43940 }, { "epoch": 0.07791279802194899, "grad_norm": 0.83203125, "learning_rate": 0.0012353065933519256, "loss": 0.2814, "step": 43942 }, { "epoch": 0.0779163441872588, "grad_norm": 0.2451171875, "learning_rate": 0.0012352477508816766, "loss": 0.157, "step": 43944 }, { "epoch": 0.07791989035256862, "grad_norm": 0.92578125, "learning_rate": 0.0012351889078199322, "loss": 0.186, "step": 43946 }, { "epoch": 0.07792343651787843, "grad_norm": 0.470703125, "learning_rate": 0.00123513006416695, "loss": 0.2041, "step": 43948 }, { "epoch": 0.07792698268318825, "grad_norm": 2.59375, "learning_rate": 0.0012350712199229875, "loss": 0.2208, "step": 43950 }, { "epoch": 0.07793052884849806, "grad_norm": 0.451171875, "learning_rate": 0.001235012375088302, "loss": 0.2571, "step": 43952 }, { "epoch": 0.07793407501380788, "grad_norm": 0.3203125, "learning_rate": 0.0012349535296631508, "loss": 0.1705, "step": 43954 }, { "epoch": 0.07793762117911769, "grad_norm": 0.390625, "learning_rate": 0.0012348946836477914, "loss": 0.2065, "step": 43956 }, { "epoch": 0.0779411673444275, "grad_norm": 0.625, "learning_rate": 0.0012348358370424806, "loss": 0.1398, "step": 43958 }, { "epoch": 0.07794471350973732, "grad_norm": 0.546875, "learning_rate": 0.0012347769898474765, "loss": 0.2053, "step": 43960 }, { "epoch": 0.07794825967504714, "grad_norm": 0.28515625, "learning_rate": 0.0012347181420630357, "loss": 0.162, "step": 43962 }, { "epoch": 0.07795180584035696, "grad_norm": 1.0625, "learning_rate": 0.0012346592936894165, "loss": 0.354, "step": 43964 }, { "epoch": 0.07795535200566678, "grad_norm": 0.76953125, "learning_rate": 0.0012346004447268755, "loss": 0.3264, "step": 43966 }, { "epoch": 0.07795889817097659, "grad_norm": 1.515625, "learning_rate": 0.0012345415951756705, "loss": 0.3395, "step": 43968 }, { "epoch": 0.07796244433628641, "grad_norm": 1.140625, "learning_rate": 0.0012344827450360587, "loss": 0.3585, "step": 43970 }, { "epoch": 0.07796599050159622, "grad_norm": 0.98046875, "learning_rate": 0.0012344238943082975, "loss": 0.1577, "step": 43972 }, { "epoch": 0.07796953666690604, "grad_norm": 0.271484375, "learning_rate": 0.0012343650429926442, "loss": 0.205, "step": 43974 }, { "epoch": 0.07797308283221585, "grad_norm": 0.296875, "learning_rate": 0.0012343061910893563, "loss": 0.1825, "step": 43976 }, { "epoch": 0.07797662899752567, "grad_norm": 0.56640625, "learning_rate": 0.001234247338598691, "loss": 0.1967, "step": 43978 }, { "epoch": 0.07798017516283548, "grad_norm": 0.3046875, "learning_rate": 0.0012341884855209058, "loss": 0.2454, "step": 43980 }, { "epoch": 0.0779837213281453, "grad_norm": 0.5546875, "learning_rate": 0.0012341296318562581, "loss": 0.2044, "step": 43982 }, { "epoch": 0.07798726749345511, "grad_norm": 0.59375, "learning_rate": 0.0012340707776050054, "loss": 0.4246, "step": 43984 }, { "epoch": 0.07799081365876492, "grad_norm": 0.546875, "learning_rate": 0.0012340119227674052, "loss": 0.1581, "step": 43986 }, { "epoch": 0.07799435982407474, "grad_norm": 0.31640625, "learning_rate": 0.0012339530673437145, "loss": 0.1483, "step": 43988 }, { "epoch": 0.07799790598938455, "grad_norm": 0.27734375, "learning_rate": 0.0012338942113341912, "loss": 0.166, "step": 43990 }, { "epoch": 0.07800145215469437, "grad_norm": 0.376953125, "learning_rate": 0.0012338353547390918, "loss": 0.1856, "step": 43992 }, { "epoch": 0.07800499832000418, "grad_norm": 0.1962890625, "learning_rate": 0.0012337764975586744, "loss": 0.3584, "step": 43994 }, { "epoch": 0.078008544485314, "grad_norm": 0.75390625, "learning_rate": 0.0012337176397931966, "loss": 0.2122, "step": 43996 }, { "epoch": 0.07801209065062381, "grad_norm": 1.484375, "learning_rate": 0.0012336587814429155, "loss": 0.2576, "step": 43998 }, { "epoch": 0.07801563681593363, "grad_norm": 0.384765625, "learning_rate": 0.0012335999225080883, "loss": 0.3096, "step": 44000 }, { "epoch": 0.07801918298124344, "grad_norm": 0.2890625, "learning_rate": 0.0012335410629889727, "loss": 0.1398, "step": 44002 }, { "epoch": 0.07802272914655325, "grad_norm": 0.36328125, "learning_rate": 0.0012334822028858261, "loss": 0.2016, "step": 44004 }, { "epoch": 0.07802627531186307, "grad_norm": 0.48046875, "learning_rate": 0.0012334233421989061, "loss": 0.1616, "step": 44006 }, { "epoch": 0.07802982147717288, "grad_norm": 0.421875, "learning_rate": 0.0012333644809284698, "loss": 0.1629, "step": 44008 }, { "epoch": 0.07803336764248271, "grad_norm": 0.6875, "learning_rate": 0.0012333056190747746, "loss": 0.2565, "step": 44010 }, { "epoch": 0.07803691380779253, "grad_norm": 0.330078125, "learning_rate": 0.0012332467566380783, "loss": 0.1325, "step": 44012 }, { "epoch": 0.07804045997310234, "grad_norm": 0.515625, "learning_rate": 0.0012331878936186378, "loss": 0.1524, "step": 44014 }, { "epoch": 0.07804400613841216, "grad_norm": 0.41796875, "learning_rate": 0.0012331290300167108, "loss": 0.1776, "step": 44016 }, { "epoch": 0.07804755230372197, "grad_norm": 0.7421875, "learning_rate": 0.001233070165832555, "loss": 0.1914, "step": 44018 }, { "epoch": 0.07805109846903178, "grad_norm": 0.41796875, "learning_rate": 0.0012330113010664272, "loss": 0.5608, "step": 44020 }, { "epoch": 0.0780546446343416, "grad_norm": 0.330078125, "learning_rate": 0.0012329524357185858, "loss": 0.1722, "step": 44022 }, { "epoch": 0.07805819079965141, "grad_norm": 0.171875, "learning_rate": 0.001232893569789287, "loss": 0.1849, "step": 44024 }, { "epoch": 0.07806173696496123, "grad_norm": 0.6640625, "learning_rate": 0.0012328347032787895, "loss": 0.2374, "step": 44026 }, { "epoch": 0.07806528313027104, "grad_norm": 0.34765625, "learning_rate": 0.00123277583618735, "loss": 0.1994, "step": 44028 }, { "epoch": 0.07806882929558086, "grad_norm": 0.275390625, "learning_rate": 0.0012327169685152262, "loss": 0.1699, "step": 44030 }, { "epoch": 0.07807237546089067, "grad_norm": 0.8046875, "learning_rate": 0.001232658100262675, "loss": 0.2076, "step": 44032 }, { "epoch": 0.07807592162620049, "grad_norm": 0.306640625, "learning_rate": 0.001232599231429955, "loss": 0.1848, "step": 44034 }, { "epoch": 0.0780794677915103, "grad_norm": 0.7265625, "learning_rate": 0.0012325403620173227, "loss": 0.1802, "step": 44036 }, { "epoch": 0.07808301395682012, "grad_norm": 0.259765625, "learning_rate": 0.0012324814920250356, "loss": 0.219, "step": 44038 }, { "epoch": 0.07808656012212993, "grad_norm": 0.765625, "learning_rate": 0.0012324226214533514, "loss": 0.2727, "step": 44040 }, { "epoch": 0.07809010628743975, "grad_norm": 0.2353515625, "learning_rate": 0.001232363750302528, "loss": 0.1698, "step": 44042 }, { "epoch": 0.07809365245274956, "grad_norm": 1.6171875, "learning_rate": 0.001232304878572822, "loss": 0.1896, "step": 44044 }, { "epoch": 0.07809719861805937, "grad_norm": 0.52734375, "learning_rate": 0.0012322460062644915, "loss": 0.1523, "step": 44046 }, { "epoch": 0.07810074478336919, "grad_norm": 0.2314453125, "learning_rate": 0.001232187133377794, "loss": 0.1878, "step": 44048 }, { "epoch": 0.078104290948679, "grad_norm": 1.4453125, "learning_rate": 0.0012321282599129866, "loss": 0.2694, "step": 44050 }, { "epoch": 0.07810783711398882, "grad_norm": 0.4921875, "learning_rate": 0.001232069385870327, "loss": 0.1635, "step": 44052 }, { "epoch": 0.07811138327929865, "grad_norm": 0.283203125, "learning_rate": 0.0012320105112500726, "loss": 0.1832, "step": 44054 }, { "epoch": 0.07811492944460846, "grad_norm": 0.439453125, "learning_rate": 0.0012319516360524807, "loss": 0.1808, "step": 44056 }, { "epoch": 0.07811847560991828, "grad_norm": 0.421875, "learning_rate": 0.001231892760277809, "loss": 0.1809, "step": 44058 }, { "epoch": 0.07812202177522809, "grad_norm": 0.25, "learning_rate": 0.0012318338839263153, "loss": 0.1448, "step": 44060 }, { "epoch": 0.0781255679405379, "grad_norm": 0.298828125, "learning_rate": 0.0012317750069982564, "loss": 0.1757, "step": 44062 }, { "epoch": 0.07812911410584772, "grad_norm": 1.5625, "learning_rate": 0.0012317161294938905, "loss": 0.3332, "step": 44064 }, { "epoch": 0.07813266027115753, "grad_norm": 0.2294921875, "learning_rate": 0.0012316572514134748, "loss": 0.1519, "step": 44066 }, { "epoch": 0.07813620643646735, "grad_norm": 0.287109375, "learning_rate": 0.0012315983727572664, "loss": 0.2141, "step": 44068 }, { "epoch": 0.07813975260177716, "grad_norm": 0.37890625, "learning_rate": 0.0012315394935255232, "loss": 0.2772, "step": 44070 }, { "epoch": 0.07814329876708698, "grad_norm": 1.1171875, "learning_rate": 0.0012314806137185027, "loss": 0.2364, "step": 44072 }, { "epoch": 0.07814684493239679, "grad_norm": 0.2099609375, "learning_rate": 0.0012314217333364626, "loss": 0.1817, "step": 44074 }, { "epoch": 0.0781503910977066, "grad_norm": 0.3828125, "learning_rate": 0.00123136285237966, "loss": 0.1568, "step": 44076 }, { "epoch": 0.07815393726301642, "grad_norm": 0.29296875, "learning_rate": 0.0012313039708483527, "loss": 0.1692, "step": 44078 }, { "epoch": 0.07815748342832624, "grad_norm": 0.7265625, "learning_rate": 0.001231245088742798, "loss": 0.1682, "step": 44080 }, { "epoch": 0.07816102959363605, "grad_norm": 0.328125, "learning_rate": 0.0012311862060632535, "loss": 0.1687, "step": 44082 }, { "epoch": 0.07816457575894586, "grad_norm": 0.60546875, "learning_rate": 0.001231127322809977, "loss": 0.2728, "step": 44084 }, { "epoch": 0.07816812192425568, "grad_norm": 0.466796875, "learning_rate": 0.0012310684389832253, "loss": 0.3572, "step": 44086 }, { "epoch": 0.0781716680895655, "grad_norm": 0.87890625, "learning_rate": 0.0012310095545832566, "loss": 0.2325, "step": 44088 }, { "epoch": 0.07817521425487531, "grad_norm": 0.287109375, "learning_rate": 0.0012309506696103283, "loss": 0.15, "step": 44090 }, { "epoch": 0.07817876042018512, "grad_norm": 1.65625, "learning_rate": 0.0012308917840646977, "loss": 0.2459, "step": 44092 }, { "epoch": 0.07818230658549494, "grad_norm": 0.2060546875, "learning_rate": 0.0012308328979466225, "loss": 0.1759, "step": 44094 }, { "epoch": 0.07818585275080475, "grad_norm": 0.8125, "learning_rate": 0.0012307740112563604, "loss": 0.2184, "step": 44096 }, { "epoch": 0.07818939891611457, "grad_norm": 0.375, "learning_rate": 0.0012307151239941683, "loss": 0.1874, "step": 44098 }, { "epoch": 0.0781929450814244, "grad_norm": 0.54296875, "learning_rate": 0.0012306562361603047, "loss": 0.1509, "step": 44100 }, { "epoch": 0.07819649124673421, "grad_norm": 3.09375, "learning_rate": 0.0012305973477550265, "loss": 0.4378, "step": 44102 }, { "epoch": 0.07820003741204402, "grad_norm": 0.2451171875, "learning_rate": 0.0012305384587785913, "loss": 0.1562, "step": 44104 }, { "epoch": 0.07820358357735384, "grad_norm": 0.2470703125, "learning_rate": 0.0012304795692312566, "loss": 0.1543, "step": 44106 }, { "epoch": 0.07820712974266365, "grad_norm": 0.59375, "learning_rate": 0.00123042067911328, "loss": 0.208, "step": 44108 }, { "epoch": 0.07821067590797347, "grad_norm": 1.5859375, "learning_rate": 0.001230361788424919, "loss": 0.2731, "step": 44110 }, { "epoch": 0.07821422207328328, "grad_norm": 0.32421875, "learning_rate": 0.0012303028971664314, "loss": 0.1861, "step": 44112 }, { "epoch": 0.0782177682385931, "grad_norm": 0.73046875, "learning_rate": 0.0012302440053380747, "loss": 0.1975, "step": 44114 }, { "epoch": 0.07822131440390291, "grad_norm": 0.412109375, "learning_rate": 0.0012301851129401064, "loss": 0.1973, "step": 44116 }, { "epoch": 0.07822486056921273, "grad_norm": 0.3046875, "learning_rate": 0.0012301262199727841, "loss": 0.1478, "step": 44118 }, { "epoch": 0.07822840673452254, "grad_norm": 0.2353515625, "learning_rate": 0.0012300673264363652, "loss": 0.1624, "step": 44120 }, { "epoch": 0.07823195289983235, "grad_norm": 1.359375, "learning_rate": 0.0012300084323311074, "loss": 0.2039, "step": 44122 }, { "epoch": 0.07823549906514217, "grad_norm": 0.29296875, "learning_rate": 0.001229949537657268, "loss": 0.1762, "step": 44124 }, { "epoch": 0.07823904523045198, "grad_norm": 0.64453125, "learning_rate": 0.001229890642415105, "loss": 0.2265, "step": 44126 }, { "epoch": 0.0782425913957618, "grad_norm": 0.30078125, "learning_rate": 0.001229831746604876, "loss": 0.1602, "step": 44128 }, { "epoch": 0.07824613756107161, "grad_norm": 1.1328125, "learning_rate": 0.0012297728502268378, "loss": 0.3035, "step": 44130 }, { "epoch": 0.07824968372638143, "grad_norm": 1.1875, "learning_rate": 0.001229713953281249, "loss": 0.2659, "step": 44132 }, { "epoch": 0.07825322989169124, "grad_norm": 0.30859375, "learning_rate": 0.0012296550557683663, "loss": 0.2165, "step": 44134 }, { "epoch": 0.07825677605700106, "grad_norm": 0.25390625, "learning_rate": 0.0012295961576884481, "loss": 0.1705, "step": 44136 }, { "epoch": 0.07826032222231087, "grad_norm": 0.54296875, "learning_rate": 0.0012295372590417518, "loss": 0.1417, "step": 44138 }, { "epoch": 0.07826386838762069, "grad_norm": 0.296875, "learning_rate": 0.0012294783598285344, "loss": 0.2231, "step": 44140 }, { "epoch": 0.0782674145529305, "grad_norm": 0.80078125, "learning_rate": 0.0012294194600490538, "loss": 0.1928, "step": 44142 }, { "epoch": 0.07827096071824031, "grad_norm": 0.490234375, "learning_rate": 0.0012293605597035677, "loss": 0.1953, "step": 44144 }, { "epoch": 0.07827450688355014, "grad_norm": 0.1748046875, "learning_rate": 0.0012293016587923335, "loss": 0.1695, "step": 44146 }, { "epoch": 0.07827805304885996, "grad_norm": 0.33203125, "learning_rate": 0.0012292427573156093, "loss": 0.1751, "step": 44148 }, { "epoch": 0.07828159921416977, "grad_norm": 1.1484375, "learning_rate": 0.001229183855273652, "loss": 0.2645, "step": 44150 }, { "epoch": 0.07828514537947959, "grad_norm": 0.5078125, "learning_rate": 0.0012291249526667199, "loss": 0.2074, "step": 44152 }, { "epoch": 0.0782886915447894, "grad_norm": 0.33984375, "learning_rate": 0.0012290660494950704, "loss": 0.1667, "step": 44154 }, { "epoch": 0.07829223771009922, "grad_norm": 0.201171875, "learning_rate": 0.0012290071457589605, "loss": 0.1406, "step": 44156 }, { "epoch": 0.07829578387540903, "grad_norm": 0.57421875, "learning_rate": 0.0012289482414586485, "loss": 0.147, "step": 44158 }, { "epoch": 0.07829933004071885, "grad_norm": 0.2890625, "learning_rate": 0.0012288893365943918, "loss": 0.1789, "step": 44160 }, { "epoch": 0.07830287620602866, "grad_norm": 0.9921875, "learning_rate": 0.001228830431166448, "loss": 0.2022, "step": 44162 }, { "epoch": 0.07830642237133847, "grad_norm": 0.578125, "learning_rate": 0.0012287715251750747, "loss": 0.1928, "step": 44164 }, { "epoch": 0.07830996853664829, "grad_norm": 1.0546875, "learning_rate": 0.0012287126186205294, "loss": 0.4551, "step": 44166 }, { "epoch": 0.0783135147019581, "grad_norm": 0.59765625, "learning_rate": 0.0012286537115030701, "loss": 0.1955, "step": 44168 }, { "epoch": 0.07831706086726792, "grad_norm": 0.2236328125, "learning_rate": 0.0012285948038229541, "loss": 0.2041, "step": 44170 }, { "epoch": 0.07832060703257773, "grad_norm": 0.59765625, "learning_rate": 0.001228535895580439, "loss": 0.1894, "step": 44172 }, { "epoch": 0.07832415319788755, "grad_norm": 0.5625, "learning_rate": 0.0012284769867757829, "loss": 0.3237, "step": 44174 }, { "epoch": 0.07832769936319736, "grad_norm": 0.392578125, "learning_rate": 0.001228418077409243, "loss": 0.1731, "step": 44176 }, { "epoch": 0.07833124552850718, "grad_norm": 1.2421875, "learning_rate": 0.001228359167481077, "loss": 0.1541, "step": 44178 }, { "epoch": 0.07833479169381699, "grad_norm": 0.4453125, "learning_rate": 0.0012283002569915423, "loss": 0.1867, "step": 44180 }, { "epoch": 0.0783383378591268, "grad_norm": 0.546875, "learning_rate": 0.0012282413459408972, "loss": 0.1744, "step": 44182 }, { "epoch": 0.07834188402443662, "grad_norm": 0.6015625, "learning_rate": 0.0012281824343293986, "loss": 0.1522, "step": 44184 }, { "epoch": 0.07834543018974643, "grad_norm": 0.361328125, "learning_rate": 0.0012281235221573046, "loss": 0.2065, "step": 44186 }, { "epoch": 0.07834897635505625, "grad_norm": 0.43359375, "learning_rate": 0.0012280646094248726, "loss": 0.1875, "step": 44188 }, { "epoch": 0.07835252252036608, "grad_norm": 1.421875, "learning_rate": 0.0012280056961323606, "loss": 0.2834, "step": 44190 }, { "epoch": 0.07835606868567589, "grad_norm": 0.384765625, "learning_rate": 0.0012279467822800259, "loss": 0.145, "step": 44192 }, { "epoch": 0.0783596148509857, "grad_norm": 0.6484375, "learning_rate": 0.0012278878678681265, "loss": 0.1696, "step": 44194 }, { "epoch": 0.07836316101629552, "grad_norm": 2.5, "learning_rate": 0.0012278289528969198, "loss": 0.2415, "step": 44196 }, { "epoch": 0.07836670718160534, "grad_norm": 0.6015625, "learning_rate": 0.0012277700373666636, "loss": 0.2059, "step": 44198 }, { "epoch": 0.07837025334691515, "grad_norm": 1.171875, "learning_rate": 0.0012277111212776152, "loss": 0.1809, "step": 44200 }, { "epoch": 0.07837379951222496, "grad_norm": 0.421875, "learning_rate": 0.0012276522046300326, "loss": 0.1701, "step": 44202 }, { "epoch": 0.07837734567753478, "grad_norm": 0.41015625, "learning_rate": 0.0012275932874241734, "loss": 0.196, "step": 44204 }, { "epoch": 0.0783808918428446, "grad_norm": 0.462890625, "learning_rate": 0.0012275343696602954, "loss": 0.2472, "step": 44206 }, { "epoch": 0.07838443800815441, "grad_norm": 0.6015625, "learning_rate": 0.001227475451338656, "loss": 0.2176, "step": 44208 }, { "epoch": 0.07838798417346422, "grad_norm": 0.73828125, "learning_rate": 0.001227416532459513, "loss": 0.1812, "step": 44210 }, { "epoch": 0.07839153033877404, "grad_norm": 0.369140625, "learning_rate": 0.0012273576130231241, "loss": 0.1526, "step": 44212 }, { "epoch": 0.07839507650408385, "grad_norm": 16.125, "learning_rate": 0.0012272986930297472, "loss": 0.1933, "step": 44214 }, { "epoch": 0.07839862266939367, "grad_norm": 0.5078125, "learning_rate": 0.00122723977247964, "loss": 0.1909, "step": 44216 }, { "epoch": 0.07840216883470348, "grad_norm": 0.1884765625, "learning_rate": 0.0012271808513730593, "loss": 0.1584, "step": 44218 }, { "epoch": 0.0784057150000133, "grad_norm": 0.2890625, "learning_rate": 0.0012271219297102635, "loss": 0.1361, "step": 44220 }, { "epoch": 0.07840926116532311, "grad_norm": 0.2421875, "learning_rate": 0.0012270630074915107, "loss": 0.187, "step": 44222 }, { "epoch": 0.07841280733063292, "grad_norm": 0.32421875, "learning_rate": 0.0012270040847170577, "loss": 0.1508, "step": 44224 }, { "epoch": 0.07841635349594274, "grad_norm": 0.287109375, "learning_rate": 0.0012269451613871629, "loss": 0.1958, "step": 44226 }, { "epoch": 0.07841989966125255, "grad_norm": 0.48828125, "learning_rate": 0.0012268862375020835, "loss": 0.2241, "step": 44228 }, { "epoch": 0.07842344582656237, "grad_norm": 1.1328125, "learning_rate": 0.0012268273130620776, "loss": 0.2499, "step": 44230 }, { "epoch": 0.07842699199187218, "grad_norm": 0.419921875, "learning_rate": 0.0012267683880674025, "loss": 0.2202, "step": 44232 }, { "epoch": 0.078430538157182, "grad_norm": 0.671875, "learning_rate": 0.0012267094625183158, "loss": 0.1681, "step": 44234 }, { "epoch": 0.07843408432249183, "grad_norm": 0.259765625, "learning_rate": 0.0012266505364150761, "loss": 0.1415, "step": 44236 }, { "epoch": 0.07843763048780164, "grad_norm": 0.640625, "learning_rate": 0.0012265916097579403, "loss": 0.1407, "step": 44238 }, { "epoch": 0.07844117665311146, "grad_norm": 0.8125, "learning_rate": 0.0012265326825471664, "loss": 0.2215, "step": 44240 }, { "epoch": 0.07844472281842127, "grad_norm": 0.314453125, "learning_rate": 0.0012264737547830118, "loss": 0.2346, "step": 44242 }, { "epoch": 0.07844826898373108, "grad_norm": 0.361328125, "learning_rate": 0.001226414826465735, "loss": 0.2412, "step": 44244 }, { "epoch": 0.0784518151490409, "grad_norm": 0.55078125, "learning_rate": 0.0012263558975955924, "loss": 0.1833, "step": 44246 }, { "epoch": 0.07845536131435071, "grad_norm": 0.32421875, "learning_rate": 0.001226296968172843, "loss": 0.2167, "step": 44248 }, { "epoch": 0.07845890747966053, "grad_norm": 1.6875, "learning_rate": 0.001226238038197744, "loss": 0.1639, "step": 44250 }, { "epoch": 0.07846245364497034, "grad_norm": 1.015625, "learning_rate": 0.0012261791076705532, "loss": 0.1505, "step": 44252 }, { "epoch": 0.07846599981028016, "grad_norm": 0.58984375, "learning_rate": 0.001226120176591528, "loss": 0.1524, "step": 44254 }, { "epoch": 0.07846954597558997, "grad_norm": 0.515625, "learning_rate": 0.001226061244960927, "loss": 0.1867, "step": 44256 }, { "epoch": 0.07847309214089979, "grad_norm": 0.2451171875, "learning_rate": 0.0012260023127790066, "loss": 0.2192, "step": 44258 }, { "epoch": 0.0784766383062096, "grad_norm": 0.80859375, "learning_rate": 0.0012259433800460256, "loss": 0.2341, "step": 44260 }, { "epoch": 0.07848018447151942, "grad_norm": 0.58984375, "learning_rate": 0.0012258844467622414, "loss": 0.1826, "step": 44262 }, { "epoch": 0.07848373063682923, "grad_norm": 0.3359375, "learning_rate": 0.0012258255129279116, "loss": 0.1595, "step": 44264 }, { "epoch": 0.07848727680213904, "grad_norm": 0.7578125, "learning_rate": 0.0012257665785432943, "loss": 0.2051, "step": 44266 }, { "epoch": 0.07849082296744886, "grad_norm": 0.6953125, "learning_rate": 0.0012257076436086472, "loss": 0.2343, "step": 44268 }, { "epoch": 0.07849436913275867, "grad_norm": 0.63671875, "learning_rate": 0.0012256487081242276, "loss": 0.1507, "step": 44270 }, { "epoch": 0.07849791529806849, "grad_norm": 0.7109375, "learning_rate": 0.0012255897720902938, "loss": 0.1808, "step": 44272 }, { "epoch": 0.0785014614633783, "grad_norm": 0.2294921875, "learning_rate": 0.0012255308355071028, "loss": 0.1519, "step": 44274 }, { "epoch": 0.07850500762868812, "grad_norm": 0.412109375, "learning_rate": 0.0012254718983749134, "loss": 0.1915, "step": 44276 }, { "epoch": 0.07850855379399793, "grad_norm": 0.296875, "learning_rate": 0.0012254129606939824, "loss": 0.1936, "step": 44278 }, { "epoch": 0.07851209995930775, "grad_norm": 0.625, "learning_rate": 0.001225354022464568, "loss": 0.1712, "step": 44280 }, { "epoch": 0.07851564612461757, "grad_norm": 0.6796875, "learning_rate": 0.0012252950836869282, "loss": 0.2302, "step": 44282 }, { "epoch": 0.07851919228992739, "grad_norm": 0.498046875, "learning_rate": 0.0012252361443613202, "loss": 0.2189, "step": 44284 }, { "epoch": 0.0785227384552372, "grad_norm": 0.2578125, "learning_rate": 0.001225177204488002, "loss": 0.1774, "step": 44286 }, { "epoch": 0.07852628462054702, "grad_norm": 0.42578125, "learning_rate": 0.0012251182640672317, "loss": 0.1623, "step": 44288 }, { "epoch": 0.07852983078585683, "grad_norm": 0.36328125, "learning_rate": 0.0012250593230992669, "loss": 0.2354, "step": 44290 }, { "epoch": 0.07853337695116665, "grad_norm": 0.244140625, "learning_rate": 0.0012250003815843652, "loss": 0.1424, "step": 44292 }, { "epoch": 0.07853692311647646, "grad_norm": 0.28515625, "learning_rate": 0.001224941439522784, "loss": 0.1728, "step": 44294 }, { "epoch": 0.07854046928178628, "grad_norm": 0.361328125, "learning_rate": 0.0012248824969147817, "loss": 0.1976, "step": 44296 }, { "epoch": 0.07854401544709609, "grad_norm": 0.328125, "learning_rate": 0.001224823553760616, "loss": 0.379, "step": 44298 }, { "epoch": 0.0785475616124059, "grad_norm": 0.3828125, "learning_rate": 0.0012247646100605448, "loss": 0.2251, "step": 44300 }, { "epoch": 0.07855110777771572, "grad_norm": 0.2412109375, "learning_rate": 0.001224705665814825, "loss": 0.1977, "step": 44302 }, { "epoch": 0.07855465394302553, "grad_norm": 1.609375, "learning_rate": 0.0012246467210237158, "loss": 0.3025, "step": 44304 }, { "epoch": 0.07855820010833535, "grad_norm": 0.390625, "learning_rate": 0.0012245877756874737, "loss": 0.2302, "step": 44306 }, { "epoch": 0.07856174627364516, "grad_norm": 0.3359375, "learning_rate": 0.0012245288298063572, "loss": 0.1292, "step": 44308 }, { "epoch": 0.07856529243895498, "grad_norm": 0.482421875, "learning_rate": 0.0012244698833806242, "loss": 0.3673, "step": 44310 }, { "epoch": 0.07856883860426479, "grad_norm": 0.6640625, "learning_rate": 0.0012244109364105322, "loss": 0.226, "step": 44312 }, { "epoch": 0.07857238476957461, "grad_norm": 0.345703125, "learning_rate": 0.0012243519888963388, "loss": 0.1658, "step": 44314 }, { "epoch": 0.07857593093488442, "grad_norm": 0.69140625, "learning_rate": 0.001224293040838302, "loss": 0.1876, "step": 44316 }, { "epoch": 0.07857947710019424, "grad_norm": 1.0234375, "learning_rate": 0.0012242340922366797, "loss": 0.2075, "step": 44318 }, { "epoch": 0.07858302326550405, "grad_norm": 0.251953125, "learning_rate": 0.0012241751430917295, "loss": 0.1773, "step": 44320 }, { "epoch": 0.07858656943081387, "grad_norm": 2.796875, "learning_rate": 0.0012241161934037096, "loss": 0.2625, "step": 44322 }, { "epoch": 0.07859011559612368, "grad_norm": 0.53515625, "learning_rate": 0.0012240572431728776, "loss": 0.1959, "step": 44324 }, { "epoch": 0.07859366176143351, "grad_norm": 0.29296875, "learning_rate": 0.0012239982923994914, "loss": 0.1434, "step": 44326 }, { "epoch": 0.07859720792674332, "grad_norm": 1.65625, "learning_rate": 0.0012239393410838084, "loss": 0.2247, "step": 44328 }, { "epoch": 0.07860075409205314, "grad_norm": 0.275390625, "learning_rate": 0.0012238803892260868, "loss": 0.3316, "step": 44330 }, { "epoch": 0.07860430025736295, "grad_norm": 1.390625, "learning_rate": 0.0012238214368265842, "loss": 0.291, "step": 44332 }, { "epoch": 0.07860784642267277, "grad_norm": 0.62109375, "learning_rate": 0.0012237624838855588, "loss": 0.1541, "step": 44334 }, { "epoch": 0.07861139258798258, "grad_norm": 0.431640625, "learning_rate": 0.001223703530403268, "loss": 0.2267, "step": 44336 }, { "epoch": 0.0786149387532924, "grad_norm": 0.22265625, "learning_rate": 0.0012236445763799697, "loss": 0.1631, "step": 44338 }, { "epoch": 0.07861848491860221, "grad_norm": 0.42578125, "learning_rate": 0.001223585621815922, "loss": 0.2138, "step": 44340 }, { "epoch": 0.07862203108391203, "grad_norm": 0.34765625, "learning_rate": 0.0012235266667113829, "loss": 0.1716, "step": 44342 }, { "epoch": 0.07862557724922184, "grad_norm": 0.6875, "learning_rate": 0.0012234677110666095, "loss": 0.1849, "step": 44344 }, { "epoch": 0.07862912341453165, "grad_norm": 0.6328125, "learning_rate": 0.0012234087548818604, "loss": 0.1936, "step": 44346 }, { "epoch": 0.07863266957984147, "grad_norm": 0.453125, "learning_rate": 0.0012233497981573927, "loss": 0.1808, "step": 44348 }, { "epoch": 0.07863621574515128, "grad_norm": 0.23828125, "learning_rate": 0.0012232908408934648, "loss": 0.2572, "step": 44350 }, { "epoch": 0.0786397619104611, "grad_norm": 0.3984375, "learning_rate": 0.0012232318830903343, "loss": 0.1956, "step": 44352 }, { "epoch": 0.07864330807577091, "grad_norm": 0.53125, "learning_rate": 0.0012231729247482593, "loss": 0.3447, "step": 44354 }, { "epoch": 0.07864685424108073, "grad_norm": 0.37109375, "learning_rate": 0.0012231139658674972, "loss": 0.2494, "step": 44356 }, { "epoch": 0.07865040040639054, "grad_norm": 0.376953125, "learning_rate": 0.0012230550064483065, "loss": 0.1548, "step": 44358 }, { "epoch": 0.07865394657170036, "grad_norm": 0.36328125, "learning_rate": 0.0012229960464909444, "loss": 0.1929, "step": 44360 }, { "epoch": 0.07865749273701017, "grad_norm": 0.73828125, "learning_rate": 0.001222937085995669, "loss": 0.2571, "step": 44362 }, { "epoch": 0.07866103890231999, "grad_norm": 0.427734375, "learning_rate": 0.0012228781249627382, "loss": 0.1893, "step": 44364 }, { "epoch": 0.0786645850676298, "grad_norm": 0.55859375, "learning_rate": 0.0012228191633924103, "loss": 0.2091, "step": 44366 }, { "epoch": 0.07866813123293961, "grad_norm": 0.498046875, "learning_rate": 0.0012227602012849423, "loss": 0.1706, "step": 44368 }, { "epoch": 0.07867167739824943, "grad_norm": 0.30859375, "learning_rate": 0.0012227012386405926, "loss": 0.2203, "step": 44370 }, { "epoch": 0.07867522356355926, "grad_norm": 0.70703125, "learning_rate": 0.001222642275459619, "loss": 0.1923, "step": 44372 }, { "epoch": 0.07867876972886907, "grad_norm": 0.29296875, "learning_rate": 0.0012225833117422793, "loss": 0.1735, "step": 44374 }, { "epoch": 0.07868231589417889, "grad_norm": 1.578125, "learning_rate": 0.0012225243474888313, "loss": 0.3383, "step": 44376 }, { "epoch": 0.0786858620594887, "grad_norm": 0.263671875, "learning_rate": 0.0012224653826995331, "loss": 0.1831, "step": 44378 }, { "epoch": 0.07868940822479852, "grad_norm": 0.8515625, "learning_rate": 0.0012224064173746423, "loss": 0.2498, "step": 44380 }, { "epoch": 0.07869295439010833, "grad_norm": 0.2314453125, "learning_rate": 0.0012223474515144171, "loss": 0.1599, "step": 44382 }, { "epoch": 0.07869650055541814, "grad_norm": 0.89453125, "learning_rate": 0.0012222884851191152, "loss": 0.1257, "step": 44384 }, { "epoch": 0.07870004672072796, "grad_norm": 0.50390625, "learning_rate": 0.0012222295181889943, "loss": 0.2341, "step": 44386 }, { "epoch": 0.07870359288603777, "grad_norm": 0.1796875, "learning_rate": 0.0012221705507243126, "loss": 0.2409, "step": 44388 }, { "epoch": 0.07870713905134759, "grad_norm": 0.4375, "learning_rate": 0.001222111582725328, "loss": 0.1694, "step": 44390 }, { "epoch": 0.0787106852166574, "grad_norm": 0.2412109375, "learning_rate": 0.001222052614192298, "loss": 0.1257, "step": 44392 }, { "epoch": 0.07871423138196722, "grad_norm": 0.380859375, "learning_rate": 0.001221993645125481, "loss": 0.2225, "step": 44394 }, { "epoch": 0.07871777754727703, "grad_norm": 0.369140625, "learning_rate": 0.0012219346755251346, "loss": 0.2193, "step": 44396 }, { "epoch": 0.07872132371258685, "grad_norm": 0.3203125, "learning_rate": 0.0012218757053915166, "loss": 0.2669, "step": 44398 }, { "epoch": 0.07872486987789666, "grad_norm": 0.373046875, "learning_rate": 0.0012218167347248852, "loss": 0.2178, "step": 44400 }, { "epoch": 0.07872841604320648, "grad_norm": 0.2197265625, "learning_rate": 0.0012217577635254982, "loss": 0.1881, "step": 44402 }, { "epoch": 0.07873196220851629, "grad_norm": 0.54296875, "learning_rate": 0.0012216987917936135, "loss": 0.1572, "step": 44404 }, { "epoch": 0.0787355083738261, "grad_norm": 0.8671875, "learning_rate": 0.0012216398195294888, "loss": 0.1803, "step": 44406 }, { "epoch": 0.07873905453913592, "grad_norm": 0.625, "learning_rate": 0.0012215808467333825, "loss": 0.1547, "step": 44408 }, { "epoch": 0.07874260070444573, "grad_norm": 1.1328125, "learning_rate": 0.0012215218734055518, "loss": 0.2237, "step": 44410 }, { "epoch": 0.07874614686975555, "grad_norm": 0.59765625, "learning_rate": 0.0012214628995462554, "loss": 0.1764, "step": 44412 }, { "epoch": 0.07874969303506536, "grad_norm": 0.1943359375, "learning_rate": 0.0012214039251557504, "loss": 0.1767, "step": 44414 }, { "epoch": 0.07875323920037518, "grad_norm": 0.1923828125, "learning_rate": 0.0012213449502342954, "loss": 0.1953, "step": 44416 }, { "epoch": 0.078756785365685, "grad_norm": 0.88671875, "learning_rate": 0.0012212859747821484, "loss": 0.229, "step": 44418 }, { "epoch": 0.07876033153099482, "grad_norm": 0.404296875, "learning_rate": 0.0012212269987995667, "loss": 0.1888, "step": 44420 }, { "epoch": 0.07876387769630463, "grad_norm": 0.404296875, "learning_rate": 0.0012211680222868082, "loss": 0.1798, "step": 44422 }, { "epoch": 0.07876742386161445, "grad_norm": 0.7734375, "learning_rate": 0.0012211090452441315, "loss": 0.1761, "step": 44424 }, { "epoch": 0.07877097002692426, "grad_norm": 0.267578125, "learning_rate": 0.001221050067671794, "loss": 0.2042, "step": 44426 }, { "epoch": 0.07877451619223408, "grad_norm": 3.90625, "learning_rate": 0.0012209910895700541, "loss": 0.2524, "step": 44428 }, { "epoch": 0.0787780623575439, "grad_norm": 0.28125, "learning_rate": 0.0012209321109391692, "loss": 0.1649, "step": 44430 }, { "epoch": 0.07878160852285371, "grad_norm": 0.2890625, "learning_rate": 0.0012208731317793974, "loss": 0.2056, "step": 44432 }, { "epoch": 0.07878515468816352, "grad_norm": 0.6171875, "learning_rate": 0.001220814152090997, "loss": 0.2286, "step": 44434 }, { "epoch": 0.07878870085347334, "grad_norm": 0.31640625, "learning_rate": 0.0012207551718742256, "loss": 0.2059, "step": 44436 }, { "epoch": 0.07879224701878315, "grad_norm": 0.72265625, "learning_rate": 0.0012206961911293414, "loss": 0.2167, "step": 44438 }, { "epoch": 0.07879579318409297, "grad_norm": 0.2060546875, "learning_rate": 0.001220637209856602, "loss": 0.1444, "step": 44440 }, { "epoch": 0.07879933934940278, "grad_norm": 0.2333984375, "learning_rate": 0.0012205782280562654, "loss": 0.1928, "step": 44442 }, { "epoch": 0.0788028855147126, "grad_norm": 0.3359375, "learning_rate": 0.0012205192457285898, "loss": 0.1935, "step": 44444 }, { "epoch": 0.07880643168002241, "grad_norm": 0.63671875, "learning_rate": 0.0012204602628738328, "loss": 0.2066, "step": 44446 }, { "epoch": 0.07880997784533222, "grad_norm": 0.44140625, "learning_rate": 0.001220401279492253, "loss": 0.1518, "step": 44448 }, { "epoch": 0.07881352401064204, "grad_norm": 0.57421875, "learning_rate": 0.0012203422955841077, "loss": 0.2369, "step": 44450 }, { "epoch": 0.07881707017595185, "grad_norm": 0.71484375, "learning_rate": 0.0012202833111496553, "loss": 0.1709, "step": 44452 }, { "epoch": 0.07882061634126167, "grad_norm": 0.72265625, "learning_rate": 0.0012202243261891532, "loss": 0.1982, "step": 44454 }, { "epoch": 0.07882416250657148, "grad_norm": 0.71875, "learning_rate": 0.0012201653407028599, "loss": 0.2196, "step": 44456 }, { "epoch": 0.0788277086718813, "grad_norm": 1.078125, "learning_rate": 0.0012201063546910332, "loss": 0.2459, "step": 44458 }, { "epoch": 0.07883125483719111, "grad_norm": 0.1708984375, "learning_rate": 0.0012200473681539313, "loss": 0.1622, "step": 44460 }, { "epoch": 0.07883480100250094, "grad_norm": 0.25390625, "learning_rate": 0.0012199883810918115, "loss": 0.1798, "step": 44462 }, { "epoch": 0.07883834716781075, "grad_norm": 0.333984375, "learning_rate": 0.0012199293935049324, "loss": 0.1725, "step": 44464 }, { "epoch": 0.07884189333312057, "grad_norm": 0.30078125, "learning_rate": 0.001219870405393552, "loss": 0.1846, "step": 44466 }, { "epoch": 0.07884543949843038, "grad_norm": 0.56640625, "learning_rate": 0.001219811416757928, "loss": 0.1881, "step": 44468 }, { "epoch": 0.0788489856637402, "grad_norm": 2.484375, "learning_rate": 0.001219752427598318, "loss": 0.196, "step": 44470 }, { "epoch": 0.07885253182905001, "grad_norm": 0.98828125, "learning_rate": 0.0012196934379149812, "loss": 0.2914, "step": 44472 }, { "epoch": 0.07885607799435983, "grad_norm": 0.337890625, "learning_rate": 0.0012196344477081742, "loss": 0.2266, "step": 44474 }, { "epoch": 0.07885962415966964, "grad_norm": 1.1171875, "learning_rate": 0.001219575456978156, "loss": 0.4136, "step": 44476 }, { "epoch": 0.07886317032497946, "grad_norm": 0.5078125, "learning_rate": 0.001219516465725184, "loss": 0.1725, "step": 44478 }, { "epoch": 0.07886671649028927, "grad_norm": 0.181640625, "learning_rate": 0.0012194574739495165, "loss": 0.1618, "step": 44480 }, { "epoch": 0.07887026265559909, "grad_norm": 0.6875, "learning_rate": 0.0012193984816514115, "loss": 0.1588, "step": 44482 }, { "epoch": 0.0788738088209089, "grad_norm": 2.609375, "learning_rate": 0.0012193394888311267, "loss": 0.2522, "step": 44484 }, { "epoch": 0.07887735498621871, "grad_norm": 0.279296875, "learning_rate": 0.0012192804954889202, "loss": 0.2048, "step": 44486 }, { "epoch": 0.07888090115152853, "grad_norm": 0.2060546875, "learning_rate": 0.0012192215016250502, "loss": 0.1699, "step": 44488 }, { "epoch": 0.07888444731683834, "grad_norm": 0.3984375, "learning_rate": 0.001219162507239775, "loss": 0.2451, "step": 44490 }, { "epoch": 0.07888799348214816, "grad_norm": 0.64453125, "learning_rate": 0.0012191035123333518, "loss": 0.2477, "step": 44492 }, { "epoch": 0.07889153964745797, "grad_norm": 0.5703125, "learning_rate": 0.0012190445169060393, "loss": 0.2017, "step": 44494 }, { "epoch": 0.07889508581276779, "grad_norm": 0.255859375, "learning_rate": 0.001218985520958095, "loss": 0.3788, "step": 44496 }, { "epoch": 0.0788986319780776, "grad_norm": 0.49609375, "learning_rate": 0.001218926524489777, "loss": 0.1462, "step": 44498 }, { "epoch": 0.07890217814338742, "grad_norm": 1.015625, "learning_rate": 0.0012188675275013436, "loss": 0.1653, "step": 44500 }, { "epoch": 0.07890572430869723, "grad_norm": 0.59765625, "learning_rate": 0.001218808529993053, "loss": 0.1698, "step": 44502 }, { "epoch": 0.07890927047400705, "grad_norm": 0.447265625, "learning_rate": 0.0012187495319651625, "loss": 0.1661, "step": 44504 }, { "epoch": 0.07891281663931686, "grad_norm": 0.6796875, "learning_rate": 0.0012186905334179306, "loss": 0.1758, "step": 44506 }, { "epoch": 0.07891636280462669, "grad_norm": 0.33984375, "learning_rate": 0.0012186315343516152, "loss": 0.1868, "step": 44508 }, { "epoch": 0.0789199089699365, "grad_norm": 0.5859375, "learning_rate": 0.0012185725347664744, "loss": 0.1718, "step": 44510 }, { "epoch": 0.07892345513524632, "grad_norm": 0.68359375, "learning_rate": 0.0012185135346627666, "loss": 0.1853, "step": 44512 }, { "epoch": 0.07892700130055613, "grad_norm": 0.859375, "learning_rate": 0.0012184545340407494, "loss": 0.2327, "step": 44514 }, { "epoch": 0.07893054746586595, "grad_norm": 0.314453125, "learning_rate": 0.0012183955329006801, "loss": 0.1678, "step": 44516 }, { "epoch": 0.07893409363117576, "grad_norm": 2.09375, "learning_rate": 0.0012183365312428184, "loss": 0.1916, "step": 44518 }, { "epoch": 0.07893763979648558, "grad_norm": 0.43359375, "learning_rate": 0.0012182775290674208, "loss": 0.186, "step": 44520 }, { "epoch": 0.07894118596179539, "grad_norm": 0.494140625, "learning_rate": 0.0012182185263747466, "loss": 0.1719, "step": 44522 }, { "epoch": 0.0789447321271052, "grad_norm": 2.03125, "learning_rate": 0.0012181595231650526, "loss": 0.1501, "step": 44524 }, { "epoch": 0.07894827829241502, "grad_norm": 0.54296875, "learning_rate": 0.001218100519438598, "loss": 0.4054, "step": 44526 }, { "epoch": 0.07895182445772483, "grad_norm": 0.609375, "learning_rate": 0.0012180415151956398, "loss": 0.1818, "step": 44528 }, { "epoch": 0.07895537062303465, "grad_norm": 10.625, "learning_rate": 0.001217982510436437, "loss": 0.1972, "step": 44530 }, { "epoch": 0.07895891678834446, "grad_norm": 0.87890625, "learning_rate": 0.0012179235051612471, "loss": 0.1913, "step": 44532 }, { "epoch": 0.07896246295365428, "grad_norm": 0.1728515625, "learning_rate": 0.0012178644993703284, "loss": 0.2535, "step": 44534 }, { "epoch": 0.07896600911896409, "grad_norm": 1.78125, "learning_rate": 0.0012178054930639386, "loss": 0.5426, "step": 44536 }, { "epoch": 0.0789695552842739, "grad_norm": 0.75390625, "learning_rate": 0.0012177464862423363, "loss": 0.3335, "step": 44538 }, { "epoch": 0.07897310144958372, "grad_norm": 0.78515625, "learning_rate": 0.001217687478905779, "loss": 0.2334, "step": 44540 }, { "epoch": 0.07897664761489354, "grad_norm": 0.494140625, "learning_rate": 0.0012176284710545251, "loss": 0.1577, "step": 44542 }, { "epoch": 0.07898019378020335, "grad_norm": 0.9375, "learning_rate": 0.0012175694626888327, "loss": 0.2029, "step": 44544 }, { "epoch": 0.07898373994551317, "grad_norm": 0.283203125, "learning_rate": 0.0012175104538089596, "loss": 0.161, "step": 44546 }, { "epoch": 0.07898728611082298, "grad_norm": 0.63671875, "learning_rate": 0.001217451444415164, "loss": 0.2466, "step": 44548 }, { "epoch": 0.0789908322761328, "grad_norm": 2.609375, "learning_rate": 0.0012173924345077044, "loss": 0.2694, "step": 44550 }, { "epoch": 0.07899437844144261, "grad_norm": 0.921875, "learning_rate": 0.0012173334240868383, "loss": 0.204, "step": 44552 }, { "epoch": 0.07899792460675244, "grad_norm": 0.447265625, "learning_rate": 0.0012172744131528242, "loss": 0.1871, "step": 44554 }, { "epoch": 0.07900147077206225, "grad_norm": 0.294921875, "learning_rate": 0.0012172154017059192, "loss": 0.1755, "step": 44556 }, { "epoch": 0.07900501693737207, "grad_norm": 0.9453125, "learning_rate": 0.0012171563897463826, "loss": 0.1367, "step": 44558 }, { "epoch": 0.07900856310268188, "grad_norm": 0.328125, "learning_rate": 0.0012170973772744717, "loss": 0.1748, "step": 44560 }, { "epoch": 0.0790121092679917, "grad_norm": 0.43359375, "learning_rate": 0.0012170383642904454, "loss": 0.1694, "step": 44562 }, { "epoch": 0.07901565543330151, "grad_norm": 1.9921875, "learning_rate": 0.001216979350794561, "loss": 0.2361, "step": 44564 }, { "epoch": 0.07901920159861132, "grad_norm": 0.322265625, "learning_rate": 0.0012169203367870768, "loss": 0.1486, "step": 44566 }, { "epoch": 0.07902274776392114, "grad_norm": 0.41796875, "learning_rate": 0.0012168613222682512, "loss": 0.2215, "step": 44568 }, { "epoch": 0.07902629392923095, "grad_norm": 0.3046875, "learning_rate": 0.0012168023072383419, "loss": 0.1836, "step": 44570 }, { "epoch": 0.07902984009454077, "grad_norm": 0.271484375, "learning_rate": 0.0012167432916976072, "loss": 0.1374, "step": 44572 }, { "epoch": 0.07903338625985058, "grad_norm": 0.3671875, "learning_rate": 0.0012166842756463048, "loss": 0.1978, "step": 44574 }, { "epoch": 0.0790369324251604, "grad_norm": 0.58203125, "learning_rate": 0.0012166252590846936, "loss": 0.1532, "step": 44576 }, { "epoch": 0.07904047859047021, "grad_norm": 1.203125, "learning_rate": 0.001216566242013031, "loss": 0.3239, "step": 44578 }, { "epoch": 0.07904402475578003, "grad_norm": 0.478515625, "learning_rate": 0.0012165072244315757, "loss": 0.1836, "step": 44580 }, { "epoch": 0.07904757092108984, "grad_norm": 0.326171875, "learning_rate": 0.001216448206340585, "loss": 0.1569, "step": 44582 }, { "epoch": 0.07905111708639966, "grad_norm": 0.2890625, "learning_rate": 0.0012163891877403179, "loss": 0.1708, "step": 44584 }, { "epoch": 0.07905466325170947, "grad_norm": 0.4765625, "learning_rate": 0.001216330168631032, "loss": 0.2592, "step": 44586 }, { "epoch": 0.07905820941701928, "grad_norm": 0.3671875, "learning_rate": 0.0012162711490129857, "loss": 0.15, "step": 44588 }, { "epoch": 0.0790617555823291, "grad_norm": 0.65234375, "learning_rate": 0.0012162121288864366, "loss": 0.203, "step": 44590 }, { "epoch": 0.07906530174763891, "grad_norm": 0.408203125, "learning_rate": 0.0012161531082516433, "loss": 0.1675, "step": 44592 }, { "epoch": 0.07906884791294873, "grad_norm": 0.5546875, "learning_rate": 0.0012160940871088636, "loss": 0.1991, "step": 44594 }, { "epoch": 0.07907239407825854, "grad_norm": 1.8046875, "learning_rate": 0.0012160350654583558, "loss": 0.2083, "step": 44596 }, { "epoch": 0.07907594024356837, "grad_norm": 0.44140625, "learning_rate": 0.0012159760433003784, "loss": 0.1516, "step": 44598 }, { "epoch": 0.07907948640887819, "grad_norm": 0.1796875, "learning_rate": 0.0012159170206351888, "loss": 0.1688, "step": 44600 }, { "epoch": 0.079083032574188, "grad_norm": 0.53125, "learning_rate": 0.0012158579974630454, "loss": 0.1724, "step": 44602 }, { "epoch": 0.07908657873949781, "grad_norm": 0.48046875, "learning_rate": 0.0012157989737842068, "loss": 0.155, "step": 44604 }, { "epoch": 0.07909012490480763, "grad_norm": 0.4921875, "learning_rate": 0.0012157399495989305, "loss": 0.1332, "step": 44606 }, { "epoch": 0.07909367107011744, "grad_norm": 1.0859375, "learning_rate": 0.0012156809249074753, "loss": 0.1708, "step": 44608 }, { "epoch": 0.07909721723542726, "grad_norm": 0.2890625, "learning_rate": 0.0012156218997100984, "loss": 0.2365, "step": 44610 }, { "epoch": 0.07910076340073707, "grad_norm": 0.314453125, "learning_rate": 0.0012155628740070585, "loss": 0.4034, "step": 44612 }, { "epoch": 0.07910430956604689, "grad_norm": 0.318359375, "learning_rate": 0.0012155038477986138, "loss": 0.189, "step": 44614 }, { "epoch": 0.0791078557313567, "grad_norm": 0.64453125, "learning_rate": 0.0012154448210850227, "loss": 0.2117, "step": 44616 }, { "epoch": 0.07911140189666652, "grad_norm": 0.453125, "learning_rate": 0.0012153857938665426, "loss": 0.1582, "step": 44618 }, { "epoch": 0.07911494806197633, "grad_norm": 0.47265625, "learning_rate": 0.0012153267661434322, "loss": 0.1902, "step": 44620 }, { "epoch": 0.07911849422728615, "grad_norm": 0.51171875, "learning_rate": 0.0012152677379159496, "loss": 0.1773, "step": 44622 }, { "epoch": 0.07912204039259596, "grad_norm": 0.77734375, "learning_rate": 0.0012152087091843527, "loss": 0.1589, "step": 44624 }, { "epoch": 0.07912558655790577, "grad_norm": 0.431640625, "learning_rate": 0.0012151496799489, "loss": 0.1953, "step": 44626 }, { "epoch": 0.07912913272321559, "grad_norm": 0.443359375, "learning_rate": 0.0012150906502098496, "loss": 0.1762, "step": 44628 }, { "epoch": 0.0791326788885254, "grad_norm": 0.298828125, "learning_rate": 0.0012150316199674592, "loss": 0.179, "step": 44630 }, { "epoch": 0.07913622505383522, "grad_norm": 4.125, "learning_rate": 0.0012149725892219873, "loss": 0.4707, "step": 44632 }, { "epoch": 0.07913977121914503, "grad_norm": 0.298828125, "learning_rate": 0.0012149135579736924, "loss": 0.1733, "step": 44634 }, { "epoch": 0.07914331738445485, "grad_norm": 0.50390625, "learning_rate": 0.001214854526222832, "loss": 0.1676, "step": 44636 }, { "epoch": 0.07914686354976466, "grad_norm": 1.9609375, "learning_rate": 0.0012147954939696647, "loss": 0.1954, "step": 44638 }, { "epoch": 0.07915040971507448, "grad_norm": 0.2333984375, "learning_rate": 0.001214736461214449, "loss": 0.2371, "step": 44640 }, { "epoch": 0.07915395588038429, "grad_norm": 0.65625, "learning_rate": 0.0012146774279574422, "loss": 0.3616, "step": 44642 }, { "epoch": 0.07915750204569412, "grad_norm": 0.400390625, "learning_rate": 0.0012146183941989033, "loss": 0.1332, "step": 44644 }, { "epoch": 0.07916104821100393, "grad_norm": 0.37890625, "learning_rate": 0.0012145593599390899, "loss": 0.14, "step": 44646 }, { "epoch": 0.07916459437631375, "grad_norm": 0.447265625, "learning_rate": 0.0012145003251782605, "loss": 0.2951, "step": 44648 }, { "epoch": 0.07916814054162356, "grad_norm": 0.9609375, "learning_rate": 0.001214441289916673, "loss": 0.1894, "step": 44650 }, { "epoch": 0.07917168670693338, "grad_norm": 1.0546875, "learning_rate": 0.0012143822541545858, "loss": 0.2709, "step": 44652 }, { "epoch": 0.07917523287224319, "grad_norm": 0.267578125, "learning_rate": 0.0012143232178922572, "loss": 0.1612, "step": 44654 }, { "epoch": 0.07917877903755301, "grad_norm": 1.0, "learning_rate": 0.001214264181129945, "loss": 0.2546, "step": 44656 }, { "epoch": 0.07918232520286282, "grad_norm": 0.79296875, "learning_rate": 0.0012142051438679081, "loss": 0.1651, "step": 44658 }, { "epoch": 0.07918587136817264, "grad_norm": 0.76171875, "learning_rate": 0.0012141461061064038, "loss": 0.2117, "step": 44660 }, { "epoch": 0.07918941753348245, "grad_norm": 0.27734375, "learning_rate": 0.001214087067845691, "loss": 0.2181, "step": 44662 }, { "epoch": 0.07919296369879227, "grad_norm": 0.232421875, "learning_rate": 0.0012140280290860277, "loss": 0.2224, "step": 44664 }, { "epoch": 0.07919650986410208, "grad_norm": 0.345703125, "learning_rate": 0.0012139689898276718, "loss": 0.1821, "step": 44666 }, { "epoch": 0.0792000560294119, "grad_norm": 1.1484375, "learning_rate": 0.001213909950070882, "loss": 0.1921, "step": 44668 }, { "epoch": 0.07920360219472171, "grad_norm": 0.609375, "learning_rate": 0.001213850909815916, "loss": 0.184, "step": 44670 }, { "epoch": 0.07920714836003152, "grad_norm": 0.2431640625, "learning_rate": 0.0012137918690630323, "loss": 0.197, "step": 44672 }, { "epoch": 0.07921069452534134, "grad_norm": 0.470703125, "learning_rate": 0.001213732827812489, "loss": 0.1993, "step": 44674 }, { "epoch": 0.07921424069065115, "grad_norm": 1.15625, "learning_rate": 0.0012136737860645444, "loss": 0.2726, "step": 44676 }, { "epoch": 0.07921778685596097, "grad_norm": 1.9921875, "learning_rate": 0.001213614743819457, "loss": 0.1815, "step": 44678 }, { "epoch": 0.07922133302127078, "grad_norm": 1.75, "learning_rate": 0.0012135557010774845, "loss": 0.2969, "step": 44680 }, { "epoch": 0.0792248791865806, "grad_norm": 0.31640625, "learning_rate": 0.0012134966578388853, "loss": 0.3256, "step": 44682 }, { "epoch": 0.07922842535189041, "grad_norm": 1.1015625, "learning_rate": 0.0012134376141039174, "loss": 0.2201, "step": 44684 }, { "epoch": 0.07923197151720023, "grad_norm": 0.357421875, "learning_rate": 0.0012133785698728397, "loss": 0.2177, "step": 44686 }, { "epoch": 0.07923551768251004, "grad_norm": 0.453125, "learning_rate": 0.0012133195251459096, "loss": 0.1736, "step": 44688 }, { "epoch": 0.07923906384781987, "grad_norm": 0.349609375, "learning_rate": 0.001213260479923386, "loss": 0.1802, "step": 44690 }, { "epoch": 0.07924261001312968, "grad_norm": 0.232421875, "learning_rate": 0.0012132014342055267, "loss": 0.164, "step": 44692 }, { "epoch": 0.0792461561784395, "grad_norm": 0.494140625, "learning_rate": 0.0012131423879925903, "loss": 0.174, "step": 44694 }, { "epoch": 0.07924970234374931, "grad_norm": 0.3203125, "learning_rate": 0.0012130833412848346, "loss": 0.2579, "step": 44696 }, { "epoch": 0.07925324850905913, "grad_norm": 0.65234375, "learning_rate": 0.0012130242940825181, "loss": 0.1863, "step": 44698 }, { "epoch": 0.07925679467436894, "grad_norm": 0.361328125, "learning_rate": 0.0012129652463858992, "loss": 0.2421, "step": 44700 }, { "epoch": 0.07926034083967876, "grad_norm": 0.44140625, "learning_rate": 0.0012129061981952355, "loss": 0.1532, "step": 44702 }, { "epoch": 0.07926388700498857, "grad_norm": 0.283203125, "learning_rate": 0.001212847149510786, "loss": 0.2103, "step": 44704 }, { "epoch": 0.07926743317029838, "grad_norm": 2.671875, "learning_rate": 0.0012127881003328087, "loss": 0.3094, "step": 44706 }, { "epoch": 0.0792709793356082, "grad_norm": 0.3515625, "learning_rate": 0.0012127290506615614, "loss": 0.1958, "step": 44708 }, { "epoch": 0.07927452550091801, "grad_norm": 1.0, "learning_rate": 0.001212670000497303, "loss": 0.1755, "step": 44710 }, { "epoch": 0.07927807166622783, "grad_norm": 3.265625, "learning_rate": 0.0012126109498402913, "loss": 0.3853, "step": 44712 }, { "epoch": 0.07928161783153764, "grad_norm": 0.28515625, "learning_rate": 0.0012125518986907848, "loss": 0.1888, "step": 44714 }, { "epoch": 0.07928516399684746, "grad_norm": 1.484375, "learning_rate": 0.0012124928470490417, "loss": 0.3766, "step": 44716 }, { "epoch": 0.07928871016215727, "grad_norm": 1.1015625, "learning_rate": 0.0012124337949153202, "loss": 0.1882, "step": 44718 }, { "epoch": 0.07929225632746709, "grad_norm": 0.296875, "learning_rate": 0.0012123747422898784, "loss": 0.1784, "step": 44720 }, { "epoch": 0.0792958024927769, "grad_norm": 0.361328125, "learning_rate": 0.0012123156891729752, "loss": 0.1882, "step": 44722 }, { "epoch": 0.07929934865808672, "grad_norm": 0.59765625, "learning_rate": 0.0012122566355648682, "loss": 0.1565, "step": 44724 }, { "epoch": 0.07930289482339653, "grad_norm": 0.376953125, "learning_rate": 0.0012121975814658157, "loss": 0.1643, "step": 44726 }, { "epoch": 0.07930644098870634, "grad_norm": 0.2314453125, "learning_rate": 0.0012121385268760764, "loss": 0.1594, "step": 44728 }, { "epoch": 0.07930998715401616, "grad_norm": 0.23046875, "learning_rate": 0.0012120794717959084, "loss": 0.1688, "step": 44730 }, { "epoch": 0.07931353331932597, "grad_norm": 0.578125, "learning_rate": 0.0012120204162255693, "loss": 0.1595, "step": 44732 }, { "epoch": 0.0793170794846358, "grad_norm": 0.470703125, "learning_rate": 0.0012119613601653183, "loss": 0.2332, "step": 44734 }, { "epoch": 0.07932062564994562, "grad_norm": 0.1728515625, "learning_rate": 0.0012119023036154136, "loss": 0.1445, "step": 44736 }, { "epoch": 0.07932417181525543, "grad_norm": 0.93359375, "learning_rate": 0.001211843246576113, "loss": 0.1803, "step": 44738 }, { "epoch": 0.07932771798056525, "grad_norm": 0.2275390625, "learning_rate": 0.0012117841890476753, "loss": 0.1515, "step": 44740 }, { "epoch": 0.07933126414587506, "grad_norm": 0.54296875, "learning_rate": 0.0012117251310303579, "loss": 0.2061, "step": 44742 }, { "epoch": 0.07933481031118488, "grad_norm": 3.625, "learning_rate": 0.0012116660725244202, "loss": 0.3262, "step": 44744 }, { "epoch": 0.07933835647649469, "grad_norm": 0.23046875, "learning_rate": 0.0012116070135301197, "loss": 0.1329, "step": 44746 }, { "epoch": 0.0793419026418045, "grad_norm": 0.271484375, "learning_rate": 0.001211547954047715, "loss": 0.1685, "step": 44748 }, { "epoch": 0.07934544880711432, "grad_norm": 0.90234375, "learning_rate": 0.0012114888940774641, "loss": 0.2283, "step": 44750 }, { "epoch": 0.07934899497242413, "grad_norm": 2.078125, "learning_rate": 0.0012114298336196262, "loss": 0.2036, "step": 44752 }, { "epoch": 0.07935254113773395, "grad_norm": 0.54296875, "learning_rate": 0.0012113707726744585, "loss": 0.1871, "step": 44754 }, { "epoch": 0.07935608730304376, "grad_norm": 0.5, "learning_rate": 0.00121131171124222, "loss": 0.1548, "step": 44756 }, { "epoch": 0.07935963346835358, "grad_norm": 1.828125, "learning_rate": 0.0012112526493231683, "loss": 0.2994, "step": 44758 }, { "epoch": 0.07936317963366339, "grad_norm": 0.34765625, "learning_rate": 0.0012111935869175622, "loss": 0.1838, "step": 44760 }, { "epoch": 0.0793667257989732, "grad_norm": 0.37890625, "learning_rate": 0.00121113452402566, "loss": 0.1649, "step": 44762 }, { "epoch": 0.07937027196428302, "grad_norm": 0.75390625, "learning_rate": 0.0012110754606477203, "loss": 0.2118, "step": 44764 }, { "epoch": 0.07937381812959284, "grad_norm": 1.75, "learning_rate": 0.0012110163967840005, "loss": 0.3524, "step": 44766 }, { "epoch": 0.07937736429490265, "grad_norm": 0.45703125, "learning_rate": 0.0012109573324347598, "loss": 0.1623, "step": 44768 }, { "epoch": 0.07938091046021246, "grad_norm": 0.255859375, "learning_rate": 0.001210898267600256, "loss": 0.1362, "step": 44770 }, { "epoch": 0.07938445662552228, "grad_norm": 1.078125, "learning_rate": 0.001210839202280748, "loss": 0.2739, "step": 44772 }, { "epoch": 0.0793880027908321, "grad_norm": 1.265625, "learning_rate": 0.0012107801364764934, "loss": 0.3574, "step": 44774 }, { "epoch": 0.07939154895614191, "grad_norm": 0.671875, "learning_rate": 0.001210721070187751, "loss": 0.2067, "step": 44776 }, { "epoch": 0.07939509512145172, "grad_norm": 0.30859375, "learning_rate": 0.0012106620034147786, "loss": 0.174, "step": 44778 }, { "epoch": 0.07939864128676155, "grad_norm": 0.96875, "learning_rate": 0.0012106029361578351, "loss": 0.2287, "step": 44780 }, { "epoch": 0.07940218745207137, "grad_norm": 0.154296875, "learning_rate": 0.0012105438684171786, "loss": 0.1714, "step": 44782 }, { "epoch": 0.07940573361738118, "grad_norm": 0.515625, "learning_rate": 0.0012104848001930676, "loss": 0.1965, "step": 44784 }, { "epoch": 0.079409279782691, "grad_norm": 1.078125, "learning_rate": 0.00121042573148576, "loss": 0.2615, "step": 44786 }, { "epoch": 0.07941282594800081, "grad_norm": 0.341796875, "learning_rate": 0.0012103666622955145, "loss": 0.2164, "step": 44788 }, { "epoch": 0.07941637211331062, "grad_norm": 0.55078125, "learning_rate": 0.0012103075926225891, "loss": 0.1517, "step": 44790 }, { "epoch": 0.07941991827862044, "grad_norm": 0.94140625, "learning_rate": 0.0012102485224672428, "loss": 0.3252, "step": 44792 }, { "epoch": 0.07942346444393025, "grad_norm": 0.38671875, "learning_rate": 0.0012101894518297333, "loss": 0.1375, "step": 44794 }, { "epoch": 0.07942701060924007, "grad_norm": 1.234375, "learning_rate": 0.0012101303807103192, "loss": 0.297, "step": 44796 }, { "epoch": 0.07943055677454988, "grad_norm": 3.296875, "learning_rate": 0.0012100713091092587, "loss": 0.2134, "step": 44798 }, { "epoch": 0.0794341029398597, "grad_norm": 0.66796875, "learning_rate": 0.00121001223702681, "loss": 0.1866, "step": 44800 }, { "epoch": 0.07943764910516951, "grad_norm": 1.4140625, "learning_rate": 0.001209953164463232, "loss": 0.1844, "step": 44802 }, { "epoch": 0.07944119527047933, "grad_norm": 0.453125, "learning_rate": 0.0012098940914187827, "loss": 0.1481, "step": 44804 }, { "epoch": 0.07944474143578914, "grad_norm": 0.37890625, "learning_rate": 0.0012098350178937204, "loss": 0.1734, "step": 44806 }, { "epoch": 0.07944828760109895, "grad_norm": 0.384765625, "learning_rate": 0.0012097759438883034, "loss": 0.1965, "step": 44808 }, { "epoch": 0.07945183376640877, "grad_norm": 1.078125, "learning_rate": 0.0012097168694027902, "loss": 0.182, "step": 44810 }, { "epoch": 0.07945537993171858, "grad_norm": 0.205078125, "learning_rate": 0.0012096577944374397, "loss": 0.2406, "step": 44812 }, { "epoch": 0.0794589260970284, "grad_norm": 1.28125, "learning_rate": 0.001209598718992509, "loss": 0.1919, "step": 44814 }, { "epoch": 0.07946247226233821, "grad_norm": 0.2421875, "learning_rate": 0.0012095396430682576, "loss": 0.1835, "step": 44816 }, { "epoch": 0.07946601842764803, "grad_norm": 0.85546875, "learning_rate": 0.001209480566664943, "loss": 0.2983, "step": 44818 }, { "epoch": 0.07946956459295784, "grad_norm": 2.125, "learning_rate": 0.0012094214897828246, "loss": 0.1417, "step": 44820 }, { "epoch": 0.07947311075826766, "grad_norm": 0.28125, "learning_rate": 0.0012093624124221599, "loss": 0.2158, "step": 44822 }, { "epoch": 0.07947665692357747, "grad_norm": 1.390625, "learning_rate": 0.0012093033345832072, "loss": 0.2771, "step": 44824 }, { "epoch": 0.0794802030888873, "grad_norm": 0.19140625, "learning_rate": 0.0012092442562662256, "loss": 0.1342, "step": 44826 }, { "epoch": 0.07948374925419711, "grad_norm": 0.2734375, "learning_rate": 0.0012091851774714728, "loss": 0.1971, "step": 44828 }, { "epoch": 0.07948729541950693, "grad_norm": 0.271484375, "learning_rate": 0.0012091260981992077, "loss": 0.1355, "step": 44830 }, { "epoch": 0.07949084158481674, "grad_norm": 0.291015625, "learning_rate": 0.0012090670184496885, "loss": 0.141, "step": 44832 }, { "epoch": 0.07949438775012656, "grad_norm": 1.125, "learning_rate": 0.0012090079382231736, "loss": 0.2022, "step": 44834 }, { "epoch": 0.07949793391543637, "grad_norm": 0.42578125, "learning_rate": 0.0012089488575199208, "loss": 0.198, "step": 44836 }, { "epoch": 0.07950148008074619, "grad_norm": 0.298828125, "learning_rate": 0.0012088897763401893, "loss": 0.2324, "step": 44838 }, { "epoch": 0.079505026246056, "grad_norm": 0.265625, "learning_rate": 0.001208830694684237, "loss": 0.1674, "step": 44840 }, { "epoch": 0.07950857241136582, "grad_norm": 0.451171875, "learning_rate": 0.0012087716125523226, "loss": 0.1906, "step": 44842 }, { "epoch": 0.07951211857667563, "grad_norm": 0.6640625, "learning_rate": 0.0012087125299447045, "loss": 0.2123, "step": 44844 }, { "epoch": 0.07951566474198545, "grad_norm": 0.482421875, "learning_rate": 0.0012086534468616406, "loss": 0.1975, "step": 44846 }, { "epoch": 0.07951921090729526, "grad_norm": 0.48828125, "learning_rate": 0.0012085943633033899, "loss": 0.2004, "step": 44848 }, { "epoch": 0.07952275707260507, "grad_norm": 0.2236328125, "learning_rate": 0.0012085352792702106, "loss": 0.3544, "step": 44850 }, { "epoch": 0.07952630323791489, "grad_norm": 0.265625, "learning_rate": 0.0012084761947623605, "loss": 0.1985, "step": 44852 }, { "epoch": 0.0795298494032247, "grad_norm": 0.35546875, "learning_rate": 0.0012084171097800991, "loss": 0.1949, "step": 44854 }, { "epoch": 0.07953339556853452, "grad_norm": 0.359375, "learning_rate": 0.001208358024323684, "loss": 0.156, "step": 44856 }, { "epoch": 0.07953694173384433, "grad_norm": 0.546875, "learning_rate": 0.0012082989383933739, "loss": 0.5041, "step": 44858 }, { "epoch": 0.07954048789915415, "grad_norm": 0.625, "learning_rate": 0.0012082398519894272, "loss": 0.1929, "step": 44860 }, { "epoch": 0.07954403406446396, "grad_norm": 0.193359375, "learning_rate": 0.0012081807651121023, "loss": 0.2058, "step": 44862 }, { "epoch": 0.07954758022977378, "grad_norm": 0.1572265625, "learning_rate": 0.001208121677761657, "loss": 0.1581, "step": 44864 }, { "epoch": 0.07955112639508359, "grad_norm": 0.2470703125, "learning_rate": 0.001208062589938351, "loss": 0.1688, "step": 44866 }, { "epoch": 0.0795546725603934, "grad_norm": 0.5625, "learning_rate": 0.001208003501642442, "loss": 0.2118, "step": 44868 }, { "epoch": 0.07955821872570323, "grad_norm": 0.267578125, "learning_rate": 0.001207944412874188, "loss": 0.1722, "step": 44870 }, { "epoch": 0.07956176489101305, "grad_norm": 0.6640625, "learning_rate": 0.001207885323633848, "loss": 0.2159, "step": 44872 }, { "epoch": 0.07956531105632286, "grad_norm": 1.0859375, "learning_rate": 0.0012078262339216804, "loss": 0.2494, "step": 44874 }, { "epoch": 0.07956885722163268, "grad_norm": 0.484375, "learning_rate": 0.0012077671437379432, "loss": 0.2511, "step": 44876 }, { "epoch": 0.07957240338694249, "grad_norm": 0.3359375, "learning_rate": 0.0012077080530828951, "loss": 0.1908, "step": 44878 }, { "epoch": 0.0795759495522523, "grad_norm": 0.5859375, "learning_rate": 0.0012076489619567948, "loss": 0.2299, "step": 44880 }, { "epoch": 0.07957949571756212, "grad_norm": 0.353515625, "learning_rate": 0.0012075898703599007, "loss": 0.1649, "step": 44882 }, { "epoch": 0.07958304188287194, "grad_norm": 0.51953125, "learning_rate": 0.0012075307782924702, "loss": 0.1539, "step": 44884 }, { "epoch": 0.07958658804818175, "grad_norm": 0.609375, "learning_rate": 0.0012074716857547632, "loss": 0.2033, "step": 44886 }, { "epoch": 0.07959013421349156, "grad_norm": 0.85546875, "learning_rate": 0.0012074125927470374, "loss": 0.2389, "step": 44888 }, { "epoch": 0.07959368037880138, "grad_norm": 0.63671875, "learning_rate": 0.0012073534992695515, "loss": 0.2014, "step": 44890 }, { "epoch": 0.0795972265441112, "grad_norm": 1.4453125, "learning_rate": 0.001207294405322563, "loss": 0.2491, "step": 44892 }, { "epoch": 0.07960077270942101, "grad_norm": 1.6796875, "learning_rate": 0.0012072353109063318, "loss": 0.1767, "step": 44894 }, { "epoch": 0.07960431887473082, "grad_norm": 0.859375, "learning_rate": 0.0012071762160211152, "loss": 0.4247, "step": 44896 }, { "epoch": 0.07960786504004064, "grad_norm": 0.455078125, "learning_rate": 0.0012071171206671724, "loss": 0.1254, "step": 44898 }, { "epoch": 0.07961141120535045, "grad_norm": 0.63671875, "learning_rate": 0.0012070580248447615, "loss": 0.2337, "step": 44900 }, { "epoch": 0.07961495737066027, "grad_norm": 0.6953125, "learning_rate": 0.0012069989285541406, "loss": 0.1727, "step": 44902 }, { "epoch": 0.07961850353597008, "grad_norm": 0.26171875, "learning_rate": 0.0012069398317955688, "loss": 0.2612, "step": 44904 }, { "epoch": 0.0796220497012799, "grad_norm": 0.498046875, "learning_rate": 0.0012068807345693045, "loss": 0.1443, "step": 44906 }, { "epoch": 0.07962559586658971, "grad_norm": 0.279296875, "learning_rate": 0.001206821636875606, "loss": 0.1903, "step": 44908 }, { "epoch": 0.07962914203189952, "grad_norm": 0.50390625, "learning_rate": 0.0012067625387147314, "loss": 0.235, "step": 44910 }, { "epoch": 0.07963268819720934, "grad_norm": 0.341796875, "learning_rate": 0.0012067034400869397, "loss": 0.1701, "step": 44912 }, { "epoch": 0.07963623436251915, "grad_norm": 0.2294921875, "learning_rate": 0.0012066443409924888, "loss": 0.3947, "step": 44914 }, { "epoch": 0.07963978052782898, "grad_norm": 0.7578125, "learning_rate": 0.0012065852414316375, "loss": 0.4102, "step": 44916 }, { "epoch": 0.0796433266931388, "grad_norm": 0.271484375, "learning_rate": 0.0012065261414046446, "loss": 0.1659, "step": 44918 }, { "epoch": 0.07964687285844861, "grad_norm": 0.412109375, "learning_rate": 0.0012064670409117682, "loss": 0.2014, "step": 44920 }, { "epoch": 0.07965041902375843, "grad_norm": 4.25, "learning_rate": 0.0012064079399532665, "loss": 0.2358, "step": 44922 }, { "epoch": 0.07965396518906824, "grad_norm": 1.0625, "learning_rate": 0.0012063488385293986, "loss": 0.2684, "step": 44924 }, { "epoch": 0.07965751135437805, "grad_norm": 0.42578125, "learning_rate": 0.0012062897366404229, "loss": 0.2381, "step": 44926 }, { "epoch": 0.07966105751968787, "grad_norm": 0.75, "learning_rate": 0.0012062306342865972, "loss": 0.2085, "step": 44928 }, { "epoch": 0.07966460368499768, "grad_norm": 0.85546875, "learning_rate": 0.0012061715314681805, "loss": 0.4383, "step": 44930 }, { "epoch": 0.0796681498503075, "grad_norm": 0.60546875, "learning_rate": 0.001206112428185431, "loss": 0.1755, "step": 44932 }, { "epoch": 0.07967169601561731, "grad_norm": 0.33984375, "learning_rate": 0.0012060533244386076, "loss": 0.2139, "step": 44934 }, { "epoch": 0.07967524218092713, "grad_norm": 3.625, "learning_rate": 0.0012059942202279686, "loss": 0.1395, "step": 44936 }, { "epoch": 0.07967878834623694, "grad_norm": 0.97265625, "learning_rate": 0.001205935115553772, "loss": 0.1855, "step": 44938 }, { "epoch": 0.07968233451154676, "grad_norm": 0.298828125, "learning_rate": 0.0012058760104162775, "loss": 0.201, "step": 44940 }, { "epoch": 0.07968588067685657, "grad_norm": 0.490234375, "learning_rate": 0.0012058169048157423, "loss": 0.2168, "step": 44942 }, { "epoch": 0.07968942684216639, "grad_norm": 0.201171875, "learning_rate": 0.0012057577987524258, "loss": 0.1893, "step": 44944 }, { "epoch": 0.0796929730074762, "grad_norm": 0.33984375, "learning_rate": 0.0012056986922265856, "loss": 0.179, "step": 44946 }, { "epoch": 0.07969651917278602, "grad_norm": 0.84375, "learning_rate": 0.0012056395852384813, "loss": 0.2223, "step": 44948 }, { "epoch": 0.07970006533809583, "grad_norm": 1.3046875, "learning_rate": 0.0012055804777883702, "loss": 0.2749, "step": 44950 }, { "epoch": 0.07970361150340564, "grad_norm": 0.408203125, "learning_rate": 0.001205521369876512, "loss": 0.1938, "step": 44952 }, { "epoch": 0.07970715766871546, "grad_norm": 1.2890625, "learning_rate": 0.0012054622615031642, "loss": 0.2538, "step": 44954 }, { "epoch": 0.07971070383402527, "grad_norm": 0.140625, "learning_rate": 0.0012054031526685857, "loss": 0.1412, "step": 44956 }, { "epoch": 0.07971424999933509, "grad_norm": 0.27734375, "learning_rate": 0.001205344043373035, "loss": 0.1956, "step": 44958 }, { "epoch": 0.0797177961646449, "grad_norm": 0.84375, "learning_rate": 0.0012052849336167712, "loss": 0.2743, "step": 44960 }, { "epoch": 0.07972134232995473, "grad_norm": 0.50390625, "learning_rate": 0.0012052258234000522, "loss": 0.1397, "step": 44962 }, { "epoch": 0.07972488849526455, "grad_norm": 1.8984375, "learning_rate": 0.0012051667127231363, "loss": 0.223, "step": 44964 }, { "epoch": 0.07972843466057436, "grad_norm": 1.71875, "learning_rate": 0.001205107601586282, "loss": 0.2009, "step": 44966 }, { "epoch": 0.07973198082588417, "grad_norm": 0.8203125, "learning_rate": 0.0012050484899897484, "loss": 0.3823, "step": 44968 }, { "epoch": 0.07973552699119399, "grad_norm": 0.6015625, "learning_rate": 0.001204989377933794, "loss": 0.1823, "step": 44970 }, { "epoch": 0.0797390731565038, "grad_norm": 1.8359375, "learning_rate": 0.0012049302654186767, "loss": 0.1938, "step": 44972 }, { "epoch": 0.07974261932181362, "grad_norm": 1.078125, "learning_rate": 0.0012048711524446554, "loss": 0.3285, "step": 44974 }, { "epoch": 0.07974616548712343, "grad_norm": 0.44921875, "learning_rate": 0.0012048120390119885, "loss": 0.2012, "step": 44976 }, { "epoch": 0.07974971165243325, "grad_norm": 1.609375, "learning_rate": 0.0012047529251209346, "loss": 0.3275, "step": 44978 }, { "epoch": 0.07975325781774306, "grad_norm": 0.13671875, "learning_rate": 0.0012046938107717526, "loss": 0.1371, "step": 44980 }, { "epoch": 0.07975680398305288, "grad_norm": 0.2490234375, "learning_rate": 0.0012046346959647006, "loss": 0.1963, "step": 44982 }, { "epoch": 0.07976035014836269, "grad_norm": 0.6328125, "learning_rate": 0.0012045755807000371, "loss": 0.1645, "step": 44984 }, { "epoch": 0.0797638963136725, "grad_norm": 1.6328125, "learning_rate": 0.0012045164649780204, "loss": 0.2559, "step": 44986 }, { "epoch": 0.07976744247898232, "grad_norm": 0.42578125, "learning_rate": 0.0012044573487989098, "loss": 0.3795, "step": 44988 }, { "epoch": 0.07977098864429213, "grad_norm": 0.1767578125, "learning_rate": 0.0012043982321629635, "loss": 0.1875, "step": 44990 }, { "epoch": 0.07977453480960195, "grad_norm": 0.466796875, "learning_rate": 0.0012043391150704397, "loss": 0.2123, "step": 44992 }, { "epoch": 0.07977808097491176, "grad_norm": 0.302734375, "learning_rate": 0.0012042799975215974, "loss": 0.1983, "step": 44994 }, { "epoch": 0.07978162714022158, "grad_norm": 0.455078125, "learning_rate": 0.001204220879516695, "loss": 0.2125, "step": 44996 }, { "epoch": 0.07978517330553139, "grad_norm": 0.51953125, "learning_rate": 0.001204161761055991, "loss": 0.1748, "step": 44998 }, { "epoch": 0.07978871947084121, "grad_norm": 0.294921875, "learning_rate": 0.0012041026421397438, "loss": 0.1434, "step": 45000 }, { "epoch": 0.07979226563615102, "grad_norm": 1.2109375, "learning_rate": 0.0012040435227682125, "loss": 0.1653, "step": 45002 }, { "epoch": 0.07979581180146084, "grad_norm": 0.3359375, "learning_rate": 0.0012039844029416548, "loss": 0.1516, "step": 45004 }, { "epoch": 0.07979935796677066, "grad_norm": 1.3359375, "learning_rate": 0.00120392528266033, "loss": 0.1549, "step": 45006 }, { "epoch": 0.07980290413208048, "grad_norm": 0.265625, "learning_rate": 0.0012038661619244962, "loss": 0.1536, "step": 45008 }, { "epoch": 0.0798064502973903, "grad_norm": 0.85546875, "learning_rate": 0.0012038070407344123, "loss": 0.2282, "step": 45010 }, { "epoch": 0.07980999646270011, "grad_norm": 0.287109375, "learning_rate": 0.0012037479190903364, "loss": 0.153, "step": 45012 }, { "epoch": 0.07981354262800992, "grad_norm": 0.296875, "learning_rate": 0.0012036887969925278, "loss": 0.2166, "step": 45014 }, { "epoch": 0.07981708879331974, "grad_norm": 0.2138671875, "learning_rate": 0.0012036296744412443, "loss": 0.1595, "step": 45016 }, { "epoch": 0.07982063495862955, "grad_norm": 0.208984375, "learning_rate": 0.0012035705514367452, "loss": 0.1645, "step": 45018 }, { "epoch": 0.07982418112393937, "grad_norm": 1.921875, "learning_rate": 0.0012035114279792883, "loss": 0.178, "step": 45020 }, { "epoch": 0.07982772728924918, "grad_norm": 0.240234375, "learning_rate": 0.0012034523040691328, "loss": 0.1531, "step": 45022 }, { "epoch": 0.079831273454559, "grad_norm": 0.3828125, "learning_rate": 0.0012033931797065366, "loss": 0.1598, "step": 45024 }, { "epoch": 0.07983481961986881, "grad_norm": 0.333984375, "learning_rate": 0.001203334054891759, "loss": 0.2034, "step": 45026 }, { "epoch": 0.07983836578517862, "grad_norm": 3.046875, "learning_rate": 0.0012032749296250582, "loss": 0.2283, "step": 45028 }, { "epoch": 0.07984191195048844, "grad_norm": 0.2890625, "learning_rate": 0.0012032158039066928, "loss": 0.1685, "step": 45030 }, { "epoch": 0.07984545811579825, "grad_norm": 2.09375, "learning_rate": 0.0012031566777369216, "loss": 0.2715, "step": 45032 }, { "epoch": 0.07984900428110807, "grad_norm": 0.3125, "learning_rate": 0.0012030975511160029, "loss": 0.3249, "step": 45034 }, { "epoch": 0.07985255044641788, "grad_norm": 0.2373046875, "learning_rate": 0.0012030384240441956, "loss": 0.1576, "step": 45036 }, { "epoch": 0.0798560966117277, "grad_norm": 0.50390625, "learning_rate": 0.001202979296521758, "loss": 0.1819, "step": 45038 }, { "epoch": 0.07985964277703751, "grad_norm": 0.251953125, "learning_rate": 0.0012029201685489484, "loss": 0.1829, "step": 45040 }, { "epoch": 0.07986318894234733, "grad_norm": 0.42578125, "learning_rate": 0.0012028610401260261, "loss": 0.1794, "step": 45042 }, { "epoch": 0.07986673510765714, "grad_norm": 0.3984375, "learning_rate": 0.0012028019112532494, "loss": 0.2278, "step": 45044 }, { "epoch": 0.07987028127296696, "grad_norm": 0.8828125, "learning_rate": 0.0012027427819308767, "loss": 0.1902, "step": 45046 }, { "epoch": 0.07987382743827677, "grad_norm": 2.40625, "learning_rate": 0.0012026836521591665, "loss": 0.3409, "step": 45048 }, { "epoch": 0.07987737360358659, "grad_norm": 0.462890625, "learning_rate": 0.0012026245219383782, "loss": 0.2617, "step": 45050 }, { "epoch": 0.07988091976889641, "grad_norm": 0.30078125, "learning_rate": 0.0012025653912687696, "loss": 0.1849, "step": 45052 }, { "epoch": 0.07988446593420623, "grad_norm": 0.189453125, "learning_rate": 0.0012025062601505993, "loss": 0.3512, "step": 45054 }, { "epoch": 0.07988801209951604, "grad_norm": 0.1953125, "learning_rate": 0.0012024471285841268, "loss": 0.128, "step": 45056 }, { "epoch": 0.07989155826482586, "grad_norm": 2.1875, "learning_rate": 0.0012023879965696097, "loss": 0.2211, "step": 45058 }, { "epoch": 0.07989510443013567, "grad_norm": 0.431640625, "learning_rate": 0.0012023288641073067, "loss": 0.1701, "step": 45060 }, { "epoch": 0.07989865059544549, "grad_norm": 0.73828125, "learning_rate": 0.001202269731197477, "loss": 0.1767, "step": 45062 }, { "epoch": 0.0799021967607553, "grad_norm": 1.546875, "learning_rate": 0.0012022105978403788, "loss": 0.1956, "step": 45064 }, { "epoch": 0.07990574292606512, "grad_norm": 0.365234375, "learning_rate": 0.001202151464036271, "loss": 0.2484, "step": 45066 }, { "epoch": 0.07990928909137493, "grad_norm": 1.4140625, "learning_rate": 0.0012020923297854117, "loss": 0.3155, "step": 45068 }, { "epoch": 0.07991283525668474, "grad_norm": 0.271484375, "learning_rate": 0.0012020331950880598, "loss": 0.2324, "step": 45070 }, { "epoch": 0.07991638142199456, "grad_norm": 0.7265625, "learning_rate": 0.0012019740599444745, "loss": 0.1859, "step": 45072 }, { "epoch": 0.07991992758730437, "grad_norm": 0.265625, "learning_rate": 0.0012019149243549137, "loss": 0.1444, "step": 45074 }, { "epoch": 0.07992347375261419, "grad_norm": 1.8046875, "learning_rate": 0.001201855788319636, "loss": 0.3091, "step": 45076 }, { "epoch": 0.079927019917924, "grad_norm": 0.61328125, "learning_rate": 0.0012017966518389003, "loss": 0.2341, "step": 45078 }, { "epoch": 0.07993056608323382, "grad_norm": 0.7578125, "learning_rate": 0.0012017375149129654, "loss": 0.2017, "step": 45080 }, { "epoch": 0.07993411224854363, "grad_norm": 0.427734375, "learning_rate": 0.0012016783775420895, "loss": 0.1655, "step": 45082 }, { "epoch": 0.07993765841385345, "grad_norm": 0.609375, "learning_rate": 0.0012016192397265315, "loss": 0.2175, "step": 45084 }, { "epoch": 0.07994120457916326, "grad_norm": 0.84765625, "learning_rate": 0.0012015601014665497, "loss": 0.2321, "step": 45086 }, { "epoch": 0.07994475074447308, "grad_norm": 0.271484375, "learning_rate": 0.0012015009627624033, "loss": 0.1786, "step": 45088 }, { "epoch": 0.07994829690978289, "grad_norm": 0.76171875, "learning_rate": 0.0012014418236143507, "loss": 0.2227, "step": 45090 }, { "epoch": 0.0799518430750927, "grad_norm": 0.419921875, "learning_rate": 0.0012013826840226508, "loss": 0.1825, "step": 45092 }, { "epoch": 0.07995538924040252, "grad_norm": 0.30859375, "learning_rate": 0.0012013235439875614, "loss": 0.147, "step": 45094 }, { "epoch": 0.07995893540571233, "grad_norm": 0.70703125, "learning_rate": 0.0012012644035093419, "loss": 0.1714, "step": 45096 }, { "epoch": 0.07996248157102216, "grad_norm": 0.287109375, "learning_rate": 0.0012012052625882505, "loss": 0.206, "step": 45098 }, { "epoch": 0.07996602773633198, "grad_norm": 0.365234375, "learning_rate": 0.0012011461212245464, "loss": 0.1269, "step": 45100 }, { "epoch": 0.07996957390164179, "grad_norm": 0.330078125, "learning_rate": 0.0012010869794184876, "loss": 0.1497, "step": 45102 }, { "epoch": 0.0799731200669516, "grad_norm": 0.25390625, "learning_rate": 0.0012010278371703332, "loss": 0.1695, "step": 45104 }, { "epoch": 0.07997666623226142, "grad_norm": 0.201171875, "learning_rate": 0.0012009686944803417, "loss": 0.162, "step": 45106 }, { "epoch": 0.07998021239757123, "grad_norm": 0.30078125, "learning_rate": 0.0012009095513487718, "loss": 0.2079, "step": 45108 }, { "epoch": 0.07998375856288105, "grad_norm": 0.423828125, "learning_rate": 0.0012008504077758823, "loss": 0.1866, "step": 45110 }, { "epoch": 0.07998730472819086, "grad_norm": 0.7890625, "learning_rate": 0.0012007912637619317, "loss": 0.14, "step": 45112 }, { "epoch": 0.07999085089350068, "grad_norm": 0.54296875, "learning_rate": 0.0012007321193071782, "loss": 0.1762, "step": 45114 }, { "epoch": 0.07999439705881049, "grad_norm": 0.51171875, "learning_rate": 0.0012006729744118813, "loss": 0.1399, "step": 45116 }, { "epoch": 0.07999794322412031, "grad_norm": 0.30078125, "learning_rate": 0.001200613829076299, "loss": 0.1413, "step": 45118 }, { "epoch": 0.08000148938943012, "grad_norm": 0.43359375, "learning_rate": 0.0012005546833006907, "loss": 0.2885, "step": 45120 }, { "epoch": 0.08000503555473994, "grad_norm": 1.5, "learning_rate": 0.0012004955370853144, "loss": 0.1815, "step": 45122 }, { "epoch": 0.08000858172004975, "grad_norm": 0.296875, "learning_rate": 0.001200436390430429, "loss": 0.3208, "step": 45124 }, { "epoch": 0.08001212788535957, "grad_norm": 0.263671875, "learning_rate": 0.001200377243336293, "loss": 0.1898, "step": 45126 }, { "epoch": 0.08001567405066938, "grad_norm": 0.263671875, "learning_rate": 0.0012003180958031657, "loss": 0.1796, "step": 45128 }, { "epoch": 0.0800192202159792, "grad_norm": 0.53125, "learning_rate": 0.0012002589478313048, "loss": 0.1785, "step": 45130 }, { "epoch": 0.08002276638128901, "grad_norm": 0.3359375, "learning_rate": 0.0012001997994209696, "loss": 0.1842, "step": 45132 }, { "epoch": 0.08002631254659882, "grad_norm": 0.94140625, "learning_rate": 0.0012001406505724188, "loss": 0.2248, "step": 45134 }, { "epoch": 0.08002985871190864, "grad_norm": 0.48828125, "learning_rate": 0.0012000815012859112, "loss": 0.1936, "step": 45136 }, { "epoch": 0.08003340487721845, "grad_norm": 0.41796875, "learning_rate": 0.0012000223515617051, "loss": 0.2463, "step": 45138 }, { "epoch": 0.08003695104252827, "grad_norm": 2.890625, "learning_rate": 0.001199963201400059, "loss": 0.4313, "step": 45140 }, { "epoch": 0.0800404972078381, "grad_norm": 0.486328125, "learning_rate": 0.0011999040508012323, "loss": 0.1641, "step": 45142 }, { "epoch": 0.08004404337314791, "grad_norm": 0.345703125, "learning_rate": 0.001199844899765483, "loss": 0.2528, "step": 45144 }, { "epoch": 0.08004758953845773, "grad_norm": 0.72265625, "learning_rate": 0.0011997857482930704, "loss": 0.1559, "step": 45146 }, { "epoch": 0.08005113570376754, "grad_norm": 0.34765625, "learning_rate": 0.0011997265963842527, "loss": 0.1656, "step": 45148 }, { "epoch": 0.08005468186907735, "grad_norm": 3.625, "learning_rate": 0.001199667444039289, "loss": 0.2433, "step": 45150 }, { "epoch": 0.08005822803438717, "grad_norm": 1.0234375, "learning_rate": 0.0011996082912584373, "loss": 0.1478, "step": 45152 }, { "epoch": 0.08006177419969698, "grad_norm": 0.7109375, "learning_rate": 0.0011995491380419577, "loss": 0.226, "step": 45154 }, { "epoch": 0.0800653203650068, "grad_norm": 0.26171875, "learning_rate": 0.0011994899843901072, "loss": 0.4367, "step": 45156 }, { "epoch": 0.08006886653031661, "grad_norm": 0.236328125, "learning_rate": 0.0011994308303031455, "loss": 0.2084, "step": 45158 }, { "epoch": 0.08007241269562643, "grad_norm": 0.60546875, "learning_rate": 0.0011993716757813314, "loss": 0.1381, "step": 45160 }, { "epoch": 0.08007595886093624, "grad_norm": 0.8046875, "learning_rate": 0.001199312520824923, "loss": 0.1286, "step": 45162 }, { "epoch": 0.08007950502624606, "grad_norm": 0.2890625, "learning_rate": 0.001199253365434179, "loss": 0.2183, "step": 45164 }, { "epoch": 0.08008305119155587, "grad_norm": 0.34375, "learning_rate": 0.0011991942096093592, "loss": 0.1669, "step": 45166 }, { "epoch": 0.08008659735686569, "grad_norm": 0.55078125, "learning_rate": 0.001199135053350721, "loss": 0.2038, "step": 45168 }, { "epoch": 0.0800901435221755, "grad_norm": 0.27734375, "learning_rate": 0.001199075896658524, "loss": 0.1544, "step": 45170 }, { "epoch": 0.08009368968748531, "grad_norm": 0.6328125, "learning_rate": 0.0011990167395330264, "loss": 0.2103, "step": 45172 }, { "epoch": 0.08009723585279513, "grad_norm": 0.421875, "learning_rate": 0.0011989575819744873, "loss": 0.2111, "step": 45174 }, { "epoch": 0.08010078201810494, "grad_norm": 0.181640625, "learning_rate": 0.0011988984239831649, "loss": 0.2891, "step": 45176 }, { "epoch": 0.08010432818341476, "grad_norm": 1.265625, "learning_rate": 0.0011988392655593184, "loss": 0.2118, "step": 45178 }, { "epoch": 0.08010787434872457, "grad_norm": 0.2578125, "learning_rate": 0.0011987801067032062, "loss": 0.1495, "step": 45180 }, { "epoch": 0.08011142051403439, "grad_norm": 0.77734375, "learning_rate": 0.0011987209474150873, "loss": 0.1955, "step": 45182 }, { "epoch": 0.0801149666793442, "grad_norm": 0.20703125, "learning_rate": 0.0011986617876952204, "loss": 0.1794, "step": 45184 }, { "epoch": 0.08011851284465402, "grad_norm": 1.0859375, "learning_rate": 0.0011986026275438639, "loss": 0.2169, "step": 45186 }, { "epoch": 0.08012205900996384, "grad_norm": 1.9921875, "learning_rate": 0.0011985434669612772, "loss": 0.2416, "step": 45188 }, { "epoch": 0.08012560517527366, "grad_norm": 0.66015625, "learning_rate": 0.0011984843059477186, "loss": 0.1843, "step": 45190 }, { "epoch": 0.08012915134058347, "grad_norm": 0.2197265625, "learning_rate": 0.0011984251445034465, "loss": 0.2267, "step": 45192 }, { "epoch": 0.08013269750589329, "grad_norm": 0.318359375, "learning_rate": 0.00119836598262872, "loss": 0.1379, "step": 45194 }, { "epoch": 0.0801362436712031, "grad_norm": 0.341796875, "learning_rate": 0.0011983068203237981, "loss": 0.1871, "step": 45196 }, { "epoch": 0.08013978983651292, "grad_norm": 2.234375, "learning_rate": 0.0011982476575889392, "loss": 0.2514, "step": 45198 }, { "epoch": 0.08014333600182273, "grad_norm": 0.48828125, "learning_rate": 0.001198188494424402, "loss": 0.1643, "step": 45200 }, { "epoch": 0.08014688216713255, "grad_norm": 0.287109375, "learning_rate": 0.0011981293308304453, "loss": 0.1464, "step": 45202 }, { "epoch": 0.08015042833244236, "grad_norm": 0.26171875, "learning_rate": 0.0011980701668073282, "loss": 0.1406, "step": 45204 }, { "epoch": 0.08015397449775218, "grad_norm": 0.1533203125, "learning_rate": 0.001198011002355309, "loss": 0.1968, "step": 45206 }, { "epoch": 0.08015752066306199, "grad_norm": 0.7421875, "learning_rate": 0.0011979518374746465, "loss": 0.173, "step": 45208 }, { "epoch": 0.0801610668283718, "grad_norm": 0.33984375, "learning_rate": 0.0011978926721655995, "loss": 0.1898, "step": 45210 }, { "epoch": 0.08016461299368162, "grad_norm": 0.373046875, "learning_rate": 0.0011978335064284271, "loss": 0.2705, "step": 45212 }, { "epoch": 0.08016815915899143, "grad_norm": 0.322265625, "learning_rate": 0.0011977743402633878, "loss": 0.137, "step": 45214 }, { "epoch": 0.08017170532430125, "grad_norm": 0.51171875, "learning_rate": 0.00119771517367074, "loss": 0.2025, "step": 45216 }, { "epoch": 0.08017525148961106, "grad_norm": 0.447265625, "learning_rate": 0.001197656006650743, "loss": 0.1773, "step": 45218 }, { "epoch": 0.08017879765492088, "grad_norm": 0.3671875, "learning_rate": 0.0011975968392036551, "loss": 0.1627, "step": 45220 }, { "epoch": 0.08018234382023069, "grad_norm": 0.5625, "learning_rate": 0.0011975376713297358, "loss": 0.2443, "step": 45222 }, { "epoch": 0.0801858899855405, "grad_norm": 1.375, "learning_rate": 0.001197478503029243, "loss": 0.2319, "step": 45224 }, { "epoch": 0.08018943615085032, "grad_norm": 0.330078125, "learning_rate": 0.0011974193343024361, "loss": 0.1348, "step": 45226 }, { "epoch": 0.08019298231616014, "grad_norm": 0.455078125, "learning_rate": 0.0011973601651495734, "loss": 0.2335, "step": 45228 }, { "epoch": 0.08019652848146995, "grad_norm": 0.71875, "learning_rate": 0.001197300995570914, "loss": 0.23, "step": 45230 }, { "epoch": 0.08020007464677976, "grad_norm": 0.2451171875, "learning_rate": 0.0011972418255667163, "loss": 0.3882, "step": 45232 }, { "epoch": 0.0802036208120896, "grad_norm": 0.3828125, "learning_rate": 0.0011971826551372398, "loss": 0.1901, "step": 45234 }, { "epoch": 0.08020716697739941, "grad_norm": 0.267578125, "learning_rate": 0.0011971234842827424, "loss": 0.1802, "step": 45236 }, { "epoch": 0.08021071314270922, "grad_norm": 0.39453125, "learning_rate": 0.0011970643130034832, "loss": 0.1666, "step": 45238 }, { "epoch": 0.08021425930801904, "grad_norm": 0.76171875, "learning_rate": 0.0011970051412997216, "loss": 0.2821, "step": 45240 }, { "epoch": 0.08021780547332885, "grad_norm": 0.3046875, "learning_rate": 0.0011969459691717158, "loss": 0.1676, "step": 45242 }, { "epoch": 0.08022135163863867, "grad_norm": 0.76953125, "learning_rate": 0.0011968867966197245, "loss": 0.2461, "step": 45244 }, { "epoch": 0.08022489780394848, "grad_norm": 1.34375, "learning_rate": 0.0011968276236440064, "loss": 0.2932, "step": 45246 }, { "epoch": 0.0802284439692583, "grad_norm": 0.267578125, "learning_rate": 0.0011967684502448208, "loss": 0.2056, "step": 45248 }, { "epoch": 0.08023199013456811, "grad_norm": 0.25, "learning_rate": 0.0011967092764224262, "loss": 0.1753, "step": 45250 }, { "epoch": 0.08023553629987792, "grad_norm": 0.1845703125, "learning_rate": 0.001196650102177081, "loss": 0.1807, "step": 45252 }, { "epoch": 0.08023908246518774, "grad_norm": 0.326171875, "learning_rate": 0.0011965909275090448, "loss": 0.1973, "step": 45254 }, { "epoch": 0.08024262863049755, "grad_norm": 0.447265625, "learning_rate": 0.0011965317524185758, "loss": 0.3607, "step": 45256 }, { "epoch": 0.08024617479580737, "grad_norm": 0.30859375, "learning_rate": 0.0011964725769059333, "loss": 0.1903, "step": 45258 }, { "epoch": 0.08024972096111718, "grad_norm": 0.375, "learning_rate": 0.0011964134009713754, "loss": 0.187, "step": 45260 }, { "epoch": 0.080253267126427, "grad_norm": 1.171875, "learning_rate": 0.0011963542246151617, "loss": 0.2302, "step": 45262 }, { "epoch": 0.08025681329173681, "grad_norm": 1.140625, "learning_rate": 0.0011962950478375503, "loss": 0.2732, "step": 45264 }, { "epoch": 0.08026035945704663, "grad_norm": 0.236328125, "learning_rate": 0.0011962358706388003, "loss": 0.1796, "step": 45266 }, { "epoch": 0.08026390562235644, "grad_norm": 0.41015625, "learning_rate": 0.0011961766930191707, "loss": 0.155, "step": 45268 }, { "epoch": 0.08026745178766626, "grad_norm": 0.30859375, "learning_rate": 0.0011961175149789198, "loss": 0.1962, "step": 45270 }, { "epoch": 0.08027099795297607, "grad_norm": 0.41796875, "learning_rate": 0.0011960583365183068, "loss": 0.2028, "step": 45272 }, { "epoch": 0.08027454411828588, "grad_norm": 0.73046875, "learning_rate": 0.0011959991576375904, "loss": 0.1576, "step": 45274 }, { "epoch": 0.0802780902835957, "grad_norm": 4.53125, "learning_rate": 0.00119593997833703, "loss": 0.2124, "step": 45276 }, { "epoch": 0.08028163644890553, "grad_norm": 0.40234375, "learning_rate": 0.0011958807986168832, "loss": 0.175, "step": 45278 }, { "epoch": 0.08028518261421534, "grad_norm": 0.3671875, "learning_rate": 0.0011958216184774096, "loss": 0.1801, "step": 45280 }, { "epoch": 0.08028872877952516, "grad_norm": 0.359375, "learning_rate": 0.001195762437918868, "loss": 0.2127, "step": 45282 }, { "epoch": 0.08029227494483497, "grad_norm": 0.5390625, "learning_rate": 0.0011957032569415173, "loss": 0.201, "step": 45284 }, { "epoch": 0.08029582111014479, "grad_norm": 0.375, "learning_rate": 0.001195644075545616, "loss": 0.1912, "step": 45286 }, { "epoch": 0.0802993672754546, "grad_norm": 0.384765625, "learning_rate": 0.001195584893731423, "loss": 0.1816, "step": 45288 }, { "epoch": 0.08030291344076441, "grad_norm": 1.1015625, "learning_rate": 0.001195525711499197, "loss": 0.1311, "step": 45290 }, { "epoch": 0.08030645960607423, "grad_norm": 0.5390625, "learning_rate": 0.0011954665288491978, "loss": 0.206, "step": 45292 }, { "epoch": 0.08031000577138404, "grad_norm": 0.369140625, "learning_rate": 0.0011954073457816825, "loss": 0.23, "step": 45294 }, { "epoch": 0.08031355193669386, "grad_norm": 0.2470703125, "learning_rate": 0.0011953481622969115, "loss": 0.4227, "step": 45296 }, { "epoch": 0.08031709810200367, "grad_norm": 0.5390625, "learning_rate": 0.0011952889783951428, "loss": 0.206, "step": 45298 }, { "epoch": 0.08032064426731349, "grad_norm": 3.984375, "learning_rate": 0.0011952297940766357, "loss": 0.2045, "step": 45300 }, { "epoch": 0.0803241904326233, "grad_norm": 0.74609375, "learning_rate": 0.0011951706093416483, "loss": 0.2085, "step": 45302 }, { "epoch": 0.08032773659793312, "grad_norm": 0.384765625, "learning_rate": 0.0011951114241904404, "loss": 0.1781, "step": 45304 }, { "epoch": 0.08033128276324293, "grad_norm": 0.4921875, "learning_rate": 0.00119505223862327, "loss": 0.1506, "step": 45306 }, { "epoch": 0.08033482892855275, "grad_norm": 0.28125, "learning_rate": 0.0011949930526403967, "loss": 0.1558, "step": 45308 }, { "epoch": 0.08033837509386256, "grad_norm": 0.2275390625, "learning_rate": 0.0011949338662420787, "loss": 0.1749, "step": 45310 }, { "epoch": 0.08034192125917237, "grad_norm": 0.8515625, "learning_rate": 0.001194874679428575, "loss": 0.1518, "step": 45312 }, { "epoch": 0.08034546742448219, "grad_norm": 0.302734375, "learning_rate": 0.001194815492200145, "loss": 0.1398, "step": 45314 }, { "epoch": 0.080349013589792, "grad_norm": 1.3359375, "learning_rate": 0.0011947563045570468, "loss": 0.2039, "step": 45316 }, { "epoch": 0.08035255975510182, "grad_norm": 1.7109375, "learning_rate": 0.0011946971164995398, "loss": 0.5373, "step": 45318 }, { "epoch": 0.08035610592041163, "grad_norm": 0.287109375, "learning_rate": 0.0011946379280278823, "loss": 0.158, "step": 45320 }, { "epoch": 0.08035965208572145, "grad_norm": 0.134765625, "learning_rate": 0.0011945787391423339, "loss": 0.2034, "step": 45322 }, { "epoch": 0.08036319825103128, "grad_norm": 0.45703125, "learning_rate": 0.0011945195498431526, "loss": 0.1575, "step": 45324 }, { "epoch": 0.08036674441634109, "grad_norm": 0.57421875, "learning_rate": 0.001194460360130598, "loss": 0.2054, "step": 45326 }, { "epoch": 0.0803702905816509, "grad_norm": 1.546875, "learning_rate": 0.0011944011700049284, "loss": 0.1715, "step": 45328 }, { "epoch": 0.08037383674696072, "grad_norm": 1.5234375, "learning_rate": 0.001194341979466403, "loss": 0.1473, "step": 45330 }, { "epoch": 0.08037738291227053, "grad_norm": 0.330078125, "learning_rate": 0.0011942827885152805, "loss": 0.1899, "step": 45332 }, { "epoch": 0.08038092907758035, "grad_norm": 0.365234375, "learning_rate": 0.00119422359715182, "loss": 0.4067, "step": 45334 }, { "epoch": 0.08038447524289016, "grad_norm": 0.328125, "learning_rate": 0.0011941644053762804, "loss": 0.1955, "step": 45336 }, { "epoch": 0.08038802140819998, "grad_norm": 0.6328125, "learning_rate": 0.00119410521318892, "loss": 0.2619, "step": 45338 }, { "epoch": 0.08039156757350979, "grad_norm": 0.51953125, "learning_rate": 0.0011940460205899983, "loss": 0.1922, "step": 45340 }, { "epoch": 0.0803951137388196, "grad_norm": 1.3203125, "learning_rate": 0.001193986827579774, "loss": 0.2384, "step": 45342 }, { "epoch": 0.08039865990412942, "grad_norm": 0.423828125, "learning_rate": 0.0011939276341585057, "loss": 0.2187, "step": 45344 }, { "epoch": 0.08040220606943924, "grad_norm": 0.52734375, "learning_rate": 0.0011938684403264527, "loss": 0.1835, "step": 45346 }, { "epoch": 0.08040575223474905, "grad_norm": 0.3515625, "learning_rate": 0.0011938092460838732, "loss": 0.1654, "step": 45348 }, { "epoch": 0.08040929840005887, "grad_norm": 0.34765625, "learning_rate": 0.001193750051431027, "loss": 0.2062, "step": 45350 }, { "epoch": 0.08041284456536868, "grad_norm": 0.255859375, "learning_rate": 0.0011936908563681724, "loss": 0.2585, "step": 45352 }, { "epoch": 0.0804163907306785, "grad_norm": 0.49609375, "learning_rate": 0.0011936316608955684, "loss": 0.165, "step": 45354 }, { "epoch": 0.08041993689598831, "grad_norm": 0.77734375, "learning_rate": 0.001193572465013474, "loss": 0.2017, "step": 45356 }, { "epoch": 0.08042348306129812, "grad_norm": 0.7734375, "learning_rate": 0.0011935132687221483, "loss": 0.2078, "step": 45358 }, { "epoch": 0.08042702922660794, "grad_norm": 0.2197265625, "learning_rate": 0.0011934540720218492, "loss": 0.2147, "step": 45360 }, { "epoch": 0.08043057539191775, "grad_norm": 0.36328125, "learning_rate": 0.0011933948749128366, "loss": 0.1742, "step": 45362 }, { "epoch": 0.08043412155722757, "grad_norm": 0.388671875, "learning_rate": 0.001193335677395369, "loss": 0.2114, "step": 45364 }, { "epoch": 0.08043766772253738, "grad_norm": 0.28125, "learning_rate": 0.0011932764794697055, "loss": 0.1762, "step": 45366 }, { "epoch": 0.0804412138878472, "grad_norm": 0.361328125, "learning_rate": 0.0011932172811361045, "loss": 0.1589, "step": 45368 }, { "epoch": 0.08044476005315702, "grad_norm": 0.44921875, "learning_rate": 0.001193158082394826, "loss": 0.2141, "step": 45370 }, { "epoch": 0.08044830621846684, "grad_norm": 0.416015625, "learning_rate": 0.0011930988832461276, "loss": 0.2212, "step": 45372 }, { "epoch": 0.08045185238377665, "grad_norm": 0.5859375, "learning_rate": 0.0011930396836902688, "loss": 0.1697, "step": 45374 }, { "epoch": 0.08045539854908647, "grad_norm": 0.44140625, "learning_rate": 0.0011929804837275085, "loss": 0.1657, "step": 45376 }, { "epoch": 0.08045894471439628, "grad_norm": 2.03125, "learning_rate": 0.0011929212833581056, "loss": 0.3358, "step": 45378 }, { "epoch": 0.0804624908797061, "grad_norm": 0.263671875, "learning_rate": 0.0011928620825823189, "loss": 0.1892, "step": 45380 }, { "epoch": 0.08046603704501591, "grad_norm": 0.3359375, "learning_rate": 0.0011928028814004075, "loss": 0.168, "step": 45382 }, { "epoch": 0.08046958321032573, "grad_norm": 0.1806640625, "learning_rate": 0.00119274367981263, "loss": 0.1416, "step": 45384 }, { "epoch": 0.08047312937563554, "grad_norm": 0.9453125, "learning_rate": 0.0011926844778192458, "loss": 0.182, "step": 45386 }, { "epoch": 0.08047667554094536, "grad_norm": 0.23046875, "learning_rate": 0.0011926252754205133, "loss": 0.1883, "step": 45388 }, { "epoch": 0.08048022170625517, "grad_norm": 0.333984375, "learning_rate": 0.001192566072616692, "loss": 0.2126, "step": 45390 }, { "epoch": 0.08048376787156498, "grad_norm": 1.109375, "learning_rate": 0.00119250686940804, "loss": 0.358, "step": 45392 }, { "epoch": 0.0804873140368748, "grad_norm": 2.96875, "learning_rate": 0.0011924476657948174, "loss": 0.2688, "step": 45394 }, { "epoch": 0.08049086020218461, "grad_norm": 0.5546875, "learning_rate": 0.0011923884617772815, "loss": 0.2975, "step": 45396 }, { "epoch": 0.08049440636749443, "grad_norm": 0.263671875, "learning_rate": 0.0011923292573556927, "loss": 0.2504, "step": 45398 }, { "epoch": 0.08049795253280424, "grad_norm": 0.396484375, "learning_rate": 0.0011922700525303091, "loss": 0.1676, "step": 45400 }, { "epoch": 0.08050149869811406, "grad_norm": 0.498046875, "learning_rate": 0.0011922108473013903, "loss": 0.225, "step": 45402 }, { "epoch": 0.08050504486342387, "grad_norm": 0.34375, "learning_rate": 0.0011921516416691941, "loss": 0.1809, "step": 45404 }, { "epoch": 0.08050859102873369, "grad_norm": 0.474609375, "learning_rate": 0.0011920924356339806, "loss": 0.1655, "step": 45406 }, { "epoch": 0.0805121371940435, "grad_norm": 0.33984375, "learning_rate": 0.0011920332291960086, "loss": 0.1134, "step": 45408 }, { "epoch": 0.08051568335935332, "grad_norm": 1.5859375, "learning_rate": 0.0011919740223555362, "loss": 0.4777, "step": 45410 }, { "epoch": 0.08051922952466313, "grad_norm": 0.43359375, "learning_rate": 0.0011919148151128232, "loss": 0.325, "step": 45412 }, { "epoch": 0.08052277568997296, "grad_norm": 0.65234375, "learning_rate": 0.001191855607468128, "loss": 0.214, "step": 45414 }, { "epoch": 0.08052632185528277, "grad_norm": 0.283203125, "learning_rate": 0.0011917963994217096, "loss": 0.2298, "step": 45416 }, { "epoch": 0.08052986802059259, "grad_norm": 0.91796875, "learning_rate": 0.001191737190973827, "loss": 0.1791, "step": 45418 }, { "epoch": 0.0805334141859024, "grad_norm": 0.5234375, "learning_rate": 0.0011916779821247398, "loss": 0.2348, "step": 45420 }, { "epoch": 0.08053696035121222, "grad_norm": 0.58203125, "learning_rate": 0.0011916187728747058, "loss": 0.2442, "step": 45422 }, { "epoch": 0.08054050651652203, "grad_norm": 0.2294921875, "learning_rate": 0.0011915595632239846, "loss": 0.1679, "step": 45424 }, { "epoch": 0.08054405268183185, "grad_norm": 0.61328125, "learning_rate": 0.0011915003531728352, "loss": 0.2039, "step": 45426 }, { "epoch": 0.08054759884714166, "grad_norm": 0.349609375, "learning_rate": 0.0011914411427215164, "loss": 0.1918, "step": 45428 }, { "epoch": 0.08055114501245147, "grad_norm": 2.296875, "learning_rate": 0.0011913819318702871, "loss": 0.2297, "step": 45430 }, { "epoch": 0.08055469117776129, "grad_norm": 0.384765625, "learning_rate": 0.0011913227206194065, "loss": 0.1474, "step": 45432 }, { "epoch": 0.0805582373430711, "grad_norm": 0.2490234375, "learning_rate": 0.001191263508969133, "loss": 0.3031, "step": 45434 }, { "epoch": 0.08056178350838092, "grad_norm": 0.640625, "learning_rate": 0.0011912042969197264, "loss": 0.1854, "step": 45436 }, { "epoch": 0.08056532967369073, "grad_norm": 0.306640625, "learning_rate": 0.0011911450844714444, "loss": 0.1691, "step": 45438 }, { "epoch": 0.08056887583900055, "grad_norm": 0.98046875, "learning_rate": 0.0011910858716245474, "loss": 0.2107, "step": 45440 }, { "epoch": 0.08057242200431036, "grad_norm": 0.447265625, "learning_rate": 0.0011910266583792936, "loss": 0.1702, "step": 45442 }, { "epoch": 0.08057596816962018, "grad_norm": 1.1484375, "learning_rate": 0.0011909674447359419, "loss": 0.279, "step": 45444 }, { "epoch": 0.08057951433492999, "grad_norm": 0.65234375, "learning_rate": 0.0011909082306947516, "loss": 0.1946, "step": 45446 }, { "epoch": 0.0805830605002398, "grad_norm": 0.298828125, "learning_rate": 0.0011908490162559815, "loss": 0.1795, "step": 45448 }, { "epoch": 0.08058660666554962, "grad_norm": 0.49609375, "learning_rate": 0.0011907898014198906, "loss": 0.1962, "step": 45450 }, { "epoch": 0.08059015283085944, "grad_norm": 0.5390625, "learning_rate": 0.001190730586186738, "loss": 0.2497, "step": 45452 }, { "epoch": 0.08059369899616925, "grad_norm": 0.69140625, "learning_rate": 0.001190671370556782, "loss": 0.2255, "step": 45454 }, { "epoch": 0.08059724516147906, "grad_norm": 0.3046875, "learning_rate": 0.0011906121545302826, "loss": 0.1602, "step": 45456 }, { "epoch": 0.08060079132678888, "grad_norm": 0.447265625, "learning_rate": 0.0011905529381074978, "loss": 0.225, "step": 45458 }, { "epoch": 0.08060433749209871, "grad_norm": 0.5078125, "learning_rate": 0.0011904937212886873, "loss": 0.1512, "step": 45460 }, { "epoch": 0.08060788365740852, "grad_norm": 0.609375, "learning_rate": 0.00119043450407411, "loss": 0.1864, "step": 45462 }, { "epoch": 0.08061142982271834, "grad_norm": 0.478515625, "learning_rate": 0.0011903752864640242, "loss": 0.1789, "step": 45464 }, { "epoch": 0.08061497598802815, "grad_norm": 0.482421875, "learning_rate": 0.00119031606845869, "loss": 0.1279, "step": 45466 }, { "epoch": 0.08061852215333797, "grad_norm": 0.384765625, "learning_rate": 0.0011902568500583655, "loss": 0.1992, "step": 45468 }, { "epoch": 0.08062206831864778, "grad_norm": 1.5390625, "learning_rate": 0.0011901976312633101, "loss": 0.2036, "step": 45470 }, { "epoch": 0.0806256144839576, "grad_norm": 1.1640625, "learning_rate": 0.0011901384120737825, "loss": 0.2104, "step": 45472 }, { "epoch": 0.08062916064926741, "grad_norm": 0.28125, "learning_rate": 0.0011900791924900418, "loss": 0.1386, "step": 45474 }, { "epoch": 0.08063270681457722, "grad_norm": 0.5546875, "learning_rate": 0.001190019972512347, "loss": 0.2414, "step": 45476 }, { "epoch": 0.08063625297988704, "grad_norm": 0.73828125, "learning_rate": 0.0011899607521409573, "loss": 0.1955, "step": 45478 }, { "epoch": 0.08063979914519685, "grad_norm": 0.443359375, "learning_rate": 0.0011899015313761316, "loss": 0.1935, "step": 45480 }, { "epoch": 0.08064334531050667, "grad_norm": 0.47265625, "learning_rate": 0.0011898423102181283, "loss": 0.1777, "step": 45482 }, { "epoch": 0.08064689147581648, "grad_norm": 0.302734375, "learning_rate": 0.0011897830886672075, "loss": 0.3514, "step": 45484 }, { "epoch": 0.0806504376411263, "grad_norm": 0.890625, "learning_rate": 0.0011897238667236277, "loss": 0.2463, "step": 45486 }, { "epoch": 0.08065398380643611, "grad_norm": 0.40625, "learning_rate": 0.0011896646443876474, "loss": 0.2039, "step": 45488 }, { "epoch": 0.08065752997174593, "grad_norm": 2.484375, "learning_rate": 0.0011896054216595261, "loss": 0.2505, "step": 45490 }, { "epoch": 0.08066107613705574, "grad_norm": 0.51953125, "learning_rate": 0.0011895461985395228, "loss": 0.1914, "step": 45492 }, { "epoch": 0.08066462230236555, "grad_norm": 0.67578125, "learning_rate": 0.0011894869750278965, "loss": 0.1769, "step": 45494 }, { "epoch": 0.08066816846767537, "grad_norm": 0.162109375, "learning_rate": 0.001189427751124906, "loss": 0.1587, "step": 45496 }, { "epoch": 0.08067171463298518, "grad_norm": 0.4296875, "learning_rate": 0.0011893685268308109, "loss": 0.1761, "step": 45498 }, { "epoch": 0.080675260798295, "grad_norm": 0.1455078125, "learning_rate": 0.0011893093021458692, "loss": 0.1255, "step": 45500 }, { "epoch": 0.08067880696360481, "grad_norm": 2.546875, "learning_rate": 0.0011892500770703408, "loss": 0.2552, "step": 45502 }, { "epoch": 0.08068235312891463, "grad_norm": 0.54296875, "learning_rate": 0.0011891908516044843, "loss": 0.1857, "step": 45504 }, { "epoch": 0.08068589929422446, "grad_norm": 3.09375, "learning_rate": 0.0011891316257485588, "loss": 0.2695, "step": 45506 }, { "epoch": 0.08068944545953427, "grad_norm": 0.361328125, "learning_rate": 0.0011890723995028234, "loss": 0.1644, "step": 45508 }, { "epoch": 0.08069299162484408, "grad_norm": 0.59375, "learning_rate": 0.0011890131728675374, "loss": 0.1816, "step": 45510 }, { "epoch": 0.0806965377901539, "grad_norm": 0.3125, "learning_rate": 0.001188953945842959, "loss": 0.1937, "step": 45512 }, { "epoch": 0.08070008395546371, "grad_norm": 0.63671875, "learning_rate": 0.001188894718429348, "loss": 0.167, "step": 45514 }, { "epoch": 0.08070363012077353, "grad_norm": 0.50390625, "learning_rate": 0.0011888354906269627, "loss": 0.2302, "step": 45516 }, { "epoch": 0.08070717628608334, "grad_norm": 0.2412109375, "learning_rate": 0.001188776262436063, "loss": 0.2285, "step": 45518 }, { "epoch": 0.08071072245139316, "grad_norm": 0.6796875, "learning_rate": 0.0011887170338569072, "loss": 0.2025, "step": 45520 }, { "epoch": 0.08071426861670297, "grad_norm": 0.146484375, "learning_rate": 0.001188657804889755, "loss": 0.1997, "step": 45522 }, { "epoch": 0.08071781478201279, "grad_norm": 0.953125, "learning_rate": 0.001188598575534865, "loss": 0.2094, "step": 45524 }, { "epoch": 0.0807213609473226, "grad_norm": 0.7421875, "learning_rate": 0.0011885393457924962, "loss": 0.4677, "step": 45526 }, { "epoch": 0.08072490711263242, "grad_norm": 0.52734375, "learning_rate": 0.0011884801156629076, "loss": 0.1637, "step": 45528 }, { "epoch": 0.08072845327794223, "grad_norm": 0.150390625, "learning_rate": 0.0011884208851463585, "loss": 0.2184, "step": 45530 }, { "epoch": 0.08073199944325204, "grad_norm": 0.44140625, "learning_rate": 0.0011883616542431078, "loss": 0.1851, "step": 45532 }, { "epoch": 0.08073554560856186, "grad_norm": 0.275390625, "learning_rate": 0.0011883024229534145, "loss": 0.1443, "step": 45534 }, { "epoch": 0.08073909177387167, "grad_norm": 1.28125, "learning_rate": 0.0011882431912775375, "loss": 0.2317, "step": 45536 }, { "epoch": 0.08074263793918149, "grad_norm": 0.3984375, "learning_rate": 0.0011881839592157363, "loss": 0.1716, "step": 45538 }, { "epoch": 0.0807461841044913, "grad_norm": 0.515625, "learning_rate": 0.0011881247267682695, "loss": 0.1479, "step": 45540 }, { "epoch": 0.08074973026980112, "grad_norm": 0.326171875, "learning_rate": 0.0011880654939353966, "loss": 0.1428, "step": 45542 }, { "epoch": 0.08075327643511093, "grad_norm": 0.29296875, "learning_rate": 0.0011880062607173762, "loss": 0.1499, "step": 45544 }, { "epoch": 0.08075682260042075, "grad_norm": 0.2041015625, "learning_rate": 0.001187947027114468, "loss": 0.1203, "step": 45546 }, { "epoch": 0.08076036876573056, "grad_norm": 0.353515625, "learning_rate": 0.0011878877931269301, "loss": 0.1996, "step": 45548 }, { "epoch": 0.08076391493104039, "grad_norm": 0.259765625, "learning_rate": 0.0011878285587550222, "loss": 0.1913, "step": 45550 }, { "epoch": 0.0807674610963502, "grad_norm": 0.61328125, "learning_rate": 0.0011877693239990027, "loss": 0.1704, "step": 45552 }, { "epoch": 0.08077100726166002, "grad_norm": 0.625, "learning_rate": 0.0011877100888591317, "loss": 0.1751, "step": 45554 }, { "epoch": 0.08077455342696983, "grad_norm": 0.69921875, "learning_rate": 0.0011876508533356675, "loss": 0.1572, "step": 45556 }, { "epoch": 0.08077809959227965, "grad_norm": 0.45703125, "learning_rate": 0.0011875916174288697, "loss": 0.1592, "step": 45558 }, { "epoch": 0.08078164575758946, "grad_norm": 0.2890625, "learning_rate": 0.001187532381138997, "loss": 0.4194, "step": 45560 }, { "epoch": 0.08078519192289928, "grad_norm": 0.3125, "learning_rate": 0.0011874731444663084, "loss": 0.2179, "step": 45562 }, { "epoch": 0.08078873808820909, "grad_norm": 0.65234375, "learning_rate": 0.0011874139074110627, "loss": 0.2191, "step": 45564 }, { "epoch": 0.0807922842535189, "grad_norm": 0.640625, "learning_rate": 0.0011873546699735199, "loss": 0.189, "step": 45566 }, { "epoch": 0.08079583041882872, "grad_norm": 0.28125, "learning_rate": 0.001187295432153938, "loss": 0.2445, "step": 45568 }, { "epoch": 0.08079937658413854, "grad_norm": 0.50390625, "learning_rate": 0.001187236193952577, "loss": 0.2118, "step": 45570 }, { "epoch": 0.08080292274944835, "grad_norm": 0.494140625, "learning_rate": 0.0011871769553696953, "loss": 0.2573, "step": 45572 }, { "epoch": 0.08080646891475816, "grad_norm": 0.6015625, "learning_rate": 0.0011871177164055522, "loss": 0.1771, "step": 45574 }, { "epoch": 0.08081001508006798, "grad_norm": 0.49609375, "learning_rate": 0.0011870584770604072, "loss": 0.2137, "step": 45576 }, { "epoch": 0.0808135612453778, "grad_norm": 0.435546875, "learning_rate": 0.0011869992373345188, "loss": 0.2071, "step": 45578 }, { "epoch": 0.08081710741068761, "grad_norm": 0.228515625, "learning_rate": 0.0011869399972281463, "loss": 0.1593, "step": 45580 }, { "epoch": 0.08082065357599742, "grad_norm": 2.453125, "learning_rate": 0.0011868807567415487, "loss": 0.2302, "step": 45582 }, { "epoch": 0.08082419974130724, "grad_norm": 0.43359375, "learning_rate": 0.001186821515874985, "loss": 0.1697, "step": 45584 }, { "epoch": 0.08082774590661705, "grad_norm": 1.453125, "learning_rate": 0.0011867622746287146, "loss": 0.2529, "step": 45586 }, { "epoch": 0.08083129207192687, "grad_norm": 0.27734375, "learning_rate": 0.0011867030330029965, "loss": 0.1657, "step": 45588 }, { "epoch": 0.08083483823723668, "grad_norm": 0.376953125, "learning_rate": 0.0011866437909980893, "loss": 0.1862, "step": 45590 }, { "epoch": 0.0808383844025465, "grad_norm": 0.51171875, "learning_rate": 0.0011865845486142528, "loss": 0.2037, "step": 45592 }, { "epoch": 0.08084193056785631, "grad_norm": 0.63671875, "learning_rate": 0.0011865253058517457, "loss": 0.1589, "step": 45594 }, { "epoch": 0.08084547673316614, "grad_norm": 0.2373046875, "learning_rate": 0.001186466062710827, "loss": 0.1906, "step": 45596 }, { "epoch": 0.08084902289847595, "grad_norm": 0.2490234375, "learning_rate": 0.0011864068191917564, "loss": 0.1721, "step": 45598 }, { "epoch": 0.08085256906378577, "grad_norm": 0.11181640625, "learning_rate": 0.0011863475752947924, "loss": 0.1298, "step": 45600 }, { "epoch": 0.08085611522909558, "grad_norm": 0.5625, "learning_rate": 0.0011862883310201939, "loss": 0.2065, "step": 45602 }, { "epoch": 0.0808596613944054, "grad_norm": 1.953125, "learning_rate": 0.001186229086368221, "loss": 0.2644, "step": 45604 }, { "epoch": 0.08086320755971521, "grad_norm": 0.29296875, "learning_rate": 0.0011861698413391315, "loss": 0.1807, "step": 45606 }, { "epoch": 0.08086675372502503, "grad_norm": 0.6171875, "learning_rate": 0.0011861105959331858, "loss": 0.4054, "step": 45608 }, { "epoch": 0.08087029989033484, "grad_norm": 0.259765625, "learning_rate": 0.0011860513501506418, "loss": 0.1716, "step": 45610 }, { "epoch": 0.08087384605564465, "grad_norm": 0.353515625, "learning_rate": 0.0011859921039917593, "loss": 0.135, "step": 45612 }, { "epoch": 0.08087739222095447, "grad_norm": 2.234375, "learning_rate": 0.0011859328574567974, "loss": 0.3103, "step": 45614 }, { "epoch": 0.08088093838626428, "grad_norm": 0.474609375, "learning_rate": 0.0011858736105460153, "loss": 0.1598, "step": 45616 }, { "epoch": 0.0808844845515741, "grad_norm": 0.65234375, "learning_rate": 0.0011858143632596718, "loss": 0.2102, "step": 45618 }, { "epoch": 0.08088803071688391, "grad_norm": 1.421875, "learning_rate": 0.0011857551155980259, "loss": 0.2087, "step": 45620 }, { "epoch": 0.08089157688219373, "grad_norm": 1.09375, "learning_rate": 0.0011856958675613373, "loss": 0.2626, "step": 45622 }, { "epoch": 0.08089512304750354, "grad_norm": 0.55859375, "learning_rate": 0.0011856366191498645, "loss": 0.2456, "step": 45624 }, { "epoch": 0.08089866921281336, "grad_norm": 1.0234375, "learning_rate": 0.001185577370363867, "loss": 0.1974, "step": 45626 }, { "epoch": 0.08090221537812317, "grad_norm": 0.3671875, "learning_rate": 0.0011855181212036036, "loss": 0.2101, "step": 45628 }, { "epoch": 0.08090576154343299, "grad_norm": 0.423828125, "learning_rate": 0.0011854588716693336, "loss": 0.1535, "step": 45630 }, { "epoch": 0.0809093077087428, "grad_norm": 0.1689453125, "learning_rate": 0.001185399621761316, "loss": 0.1793, "step": 45632 }, { "epoch": 0.08091285387405261, "grad_norm": 1.9296875, "learning_rate": 0.0011853403714798105, "loss": 0.3316, "step": 45634 }, { "epoch": 0.08091640003936243, "grad_norm": 2.3125, "learning_rate": 0.0011852811208250755, "loss": 0.2505, "step": 45636 }, { "epoch": 0.08091994620467224, "grad_norm": 0.1787109375, "learning_rate": 0.0011852218697973706, "loss": 0.2084, "step": 45638 }, { "epoch": 0.08092349236998206, "grad_norm": 1.046875, "learning_rate": 0.0011851626183969548, "loss": 0.1996, "step": 45640 }, { "epoch": 0.08092703853529189, "grad_norm": 0.75390625, "learning_rate": 0.0011851033666240868, "loss": 0.1636, "step": 45642 }, { "epoch": 0.0809305847006017, "grad_norm": 0.43359375, "learning_rate": 0.0011850441144790264, "loss": 0.1856, "step": 45644 }, { "epoch": 0.08093413086591152, "grad_norm": 0.291015625, "learning_rate": 0.0011849848619620324, "loss": 0.2004, "step": 45646 }, { "epoch": 0.08093767703122133, "grad_norm": 0.44921875, "learning_rate": 0.001184925609073364, "loss": 0.451, "step": 45648 }, { "epoch": 0.08094122319653115, "grad_norm": 0.193359375, "learning_rate": 0.0011848663558132802, "loss": 0.1688, "step": 45650 }, { "epoch": 0.08094476936184096, "grad_norm": 0.3203125, "learning_rate": 0.0011848071021820406, "loss": 0.1773, "step": 45652 }, { "epoch": 0.08094831552715077, "grad_norm": 0.330078125, "learning_rate": 0.0011847478481799036, "loss": 0.119, "step": 45654 }, { "epoch": 0.08095186169246059, "grad_norm": 0.330078125, "learning_rate": 0.0011846885938071288, "loss": 0.183, "step": 45656 }, { "epoch": 0.0809554078577704, "grad_norm": 0.4765625, "learning_rate": 0.0011846293390639754, "loss": 0.2506, "step": 45658 }, { "epoch": 0.08095895402308022, "grad_norm": 0.671875, "learning_rate": 0.0011845700839507023, "loss": 0.1692, "step": 45660 }, { "epoch": 0.08096250018839003, "grad_norm": 0.69140625, "learning_rate": 0.001184510828467569, "loss": 0.22, "step": 45662 }, { "epoch": 0.08096604635369985, "grad_norm": 5.0625, "learning_rate": 0.0011844515726148342, "loss": 0.3033, "step": 45664 }, { "epoch": 0.08096959251900966, "grad_norm": 0.6953125, "learning_rate": 0.0011843923163927575, "loss": 0.1578, "step": 45666 }, { "epoch": 0.08097313868431948, "grad_norm": 0.69140625, "learning_rate": 0.0011843330598015976, "loss": 0.1751, "step": 45668 }, { "epoch": 0.08097668484962929, "grad_norm": 0.1728515625, "learning_rate": 0.0011842738028416137, "loss": 0.151, "step": 45670 }, { "epoch": 0.0809802310149391, "grad_norm": 0.291015625, "learning_rate": 0.0011842145455130655, "loss": 0.2252, "step": 45672 }, { "epoch": 0.08098377718024892, "grad_norm": 0.353515625, "learning_rate": 0.001184155287816212, "loss": 0.149, "step": 45674 }, { "epoch": 0.08098732334555873, "grad_norm": 0.4375, "learning_rate": 0.0011840960297513116, "loss": 0.1473, "step": 45676 }, { "epoch": 0.08099086951086855, "grad_norm": 0.66015625, "learning_rate": 0.0011840367713186245, "loss": 0.1939, "step": 45678 }, { "epoch": 0.08099441567617836, "grad_norm": 0.416015625, "learning_rate": 0.001183977512518409, "loss": 0.1806, "step": 45680 }, { "epoch": 0.08099796184148818, "grad_norm": 0.255859375, "learning_rate": 0.0011839182533509248, "loss": 0.216, "step": 45682 }, { "epoch": 0.08100150800679799, "grad_norm": 0.765625, "learning_rate": 0.0011838589938164307, "loss": 0.1523, "step": 45684 }, { "epoch": 0.08100505417210782, "grad_norm": 0.2060546875, "learning_rate": 0.0011837997339151866, "loss": 0.1962, "step": 45686 }, { "epoch": 0.08100860033741764, "grad_norm": 0.30859375, "learning_rate": 0.0011837404736474507, "loss": 0.1852, "step": 45688 }, { "epoch": 0.08101214650272745, "grad_norm": 0.408203125, "learning_rate": 0.0011836812130134827, "loss": 0.2473, "step": 45690 }, { "epoch": 0.08101569266803726, "grad_norm": 0.55859375, "learning_rate": 0.0011836219520135417, "loss": 0.2346, "step": 45692 }, { "epoch": 0.08101923883334708, "grad_norm": 0.55078125, "learning_rate": 0.001183562690647887, "loss": 0.2113, "step": 45694 }, { "epoch": 0.0810227849986569, "grad_norm": 0.57421875, "learning_rate": 0.0011835034289167771, "loss": 0.1833, "step": 45696 }, { "epoch": 0.08102633116396671, "grad_norm": 0.75, "learning_rate": 0.0011834441668204722, "loss": 0.2128, "step": 45698 }, { "epoch": 0.08102987732927652, "grad_norm": 0.2119140625, "learning_rate": 0.001183384904359231, "loss": 0.1663, "step": 45700 }, { "epoch": 0.08103342349458634, "grad_norm": 0.66015625, "learning_rate": 0.0011833256415333124, "loss": 0.181, "step": 45702 }, { "epoch": 0.08103696965989615, "grad_norm": 0.271484375, "learning_rate": 0.001183266378342976, "loss": 0.1496, "step": 45704 }, { "epoch": 0.08104051582520597, "grad_norm": 0.419921875, "learning_rate": 0.001183207114788481, "loss": 0.1923, "step": 45706 }, { "epoch": 0.08104406199051578, "grad_norm": 0.439453125, "learning_rate": 0.0011831478508700858, "loss": 0.2143, "step": 45708 }, { "epoch": 0.0810476081558256, "grad_norm": 0.2294921875, "learning_rate": 0.001183088586588051, "loss": 0.14, "step": 45710 }, { "epoch": 0.08105115432113541, "grad_norm": 0.51953125, "learning_rate": 0.0011830293219426346, "loss": 0.1709, "step": 45712 }, { "epoch": 0.08105470048644522, "grad_norm": 0.291015625, "learning_rate": 0.0011829700569340963, "loss": 0.1514, "step": 45714 }, { "epoch": 0.08105824665175504, "grad_norm": 0.65234375, "learning_rate": 0.0011829107915626949, "loss": 0.168, "step": 45716 }, { "epoch": 0.08106179281706485, "grad_norm": 0.259765625, "learning_rate": 0.00118285152582869, "loss": 0.2058, "step": 45718 }, { "epoch": 0.08106533898237467, "grad_norm": 0.1884765625, "learning_rate": 0.0011827922597323407, "loss": 0.1785, "step": 45720 }, { "epoch": 0.08106888514768448, "grad_norm": 1.1953125, "learning_rate": 0.001182732993273906, "loss": 0.2484, "step": 45722 }, { "epoch": 0.0810724313129943, "grad_norm": 0.369140625, "learning_rate": 0.0011826737264536453, "loss": 0.2086, "step": 45724 }, { "epoch": 0.08107597747830411, "grad_norm": 0.30078125, "learning_rate": 0.001182614459271818, "loss": 0.1863, "step": 45726 }, { "epoch": 0.08107952364361393, "grad_norm": 0.259765625, "learning_rate": 0.0011825551917286827, "loss": 0.1842, "step": 45728 }, { "epoch": 0.08108306980892374, "grad_norm": 1.359375, "learning_rate": 0.0011824959238244993, "loss": 0.3421, "step": 45730 }, { "epoch": 0.08108661597423357, "grad_norm": 0.234375, "learning_rate": 0.0011824366555595264, "loss": 0.1752, "step": 45732 }, { "epoch": 0.08109016213954338, "grad_norm": 0.365234375, "learning_rate": 0.0011823773869340233, "loss": 0.2064, "step": 45734 }, { "epoch": 0.0810937083048532, "grad_norm": 0.91015625, "learning_rate": 0.0011823181179482496, "loss": 0.2731, "step": 45736 }, { "epoch": 0.08109725447016301, "grad_norm": 0.3515625, "learning_rate": 0.0011822588486024644, "loss": 0.1951, "step": 45738 }, { "epoch": 0.08110080063547283, "grad_norm": 0.60546875, "learning_rate": 0.0011821995788969264, "loss": 0.2031, "step": 45740 }, { "epoch": 0.08110434680078264, "grad_norm": 1.6015625, "learning_rate": 0.001182140308831895, "loss": 0.2013, "step": 45742 }, { "epoch": 0.08110789296609246, "grad_norm": 0.38671875, "learning_rate": 0.0011820810384076303, "loss": 0.15, "step": 45744 }, { "epoch": 0.08111143913140227, "grad_norm": 1.2578125, "learning_rate": 0.0011820217676243908, "loss": 0.2428, "step": 45746 }, { "epoch": 0.08111498529671209, "grad_norm": 0.359375, "learning_rate": 0.0011819624964824354, "loss": 0.21, "step": 45748 }, { "epoch": 0.0811185314620219, "grad_norm": 0.9296875, "learning_rate": 0.0011819032249820235, "loss": 0.1895, "step": 45750 }, { "epoch": 0.08112207762733172, "grad_norm": 0.5234375, "learning_rate": 0.0011818439531234147, "loss": 0.3437, "step": 45752 }, { "epoch": 0.08112562379264153, "grad_norm": 0.478515625, "learning_rate": 0.0011817846809068677, "loss": 0.2104, "step": 45754 }, { "epoch": 0.08112916995795134, "grad_norm": 0.474609375, "learning_rate": 0.0011817254083326422, "loss": 0.2028, "step": 45756 }, { "epoch": 0.08113271612326116, "grad_norm": 0.84375, "learning_rate": 0.0011816661354009973, "loss": 0.3845, "step": 45758 }, { "epoch": 0.08113626228857097, "grad_norm": 1.8203125, "learning_rate": 0.0011816068621121922, "loss": 0.2371, "step": 45760 }, { "epoch": 0.08113980845388079, "grad_norm": 0.84765625, "learning_rate": 0.001181547588466486, "loss": 0.1945, "step": 45762 }, { "epoch": 0.0811433546191906, "grad_norm": 0.74609375, "learning_rate": 0.0011814883144641382, "loss": 0.1389, "step": 45764 }, { "epoch": 0.08114690078450042, "grad_norm": 0.1650390625, "learning_rate": 0.0011814290401054078, "loss": 0.169, "step": 45766 }, { "epoch": 0.08115044694981023, "grad_norm": 0.890625, "learning_rate": 0.0011813697653905541, "loss": 0.1707, "step": 45768 }, { "epoch": 0.08115399311512005, "grad_norm": 0.291015625, "learning_rate": 0.0011813104903198363, "loss": 0.1805, "step": 45770 }, { "epoch": 0.08115753928042986, "grad_norm": 0.486328125, "learning_rate": 0.0011812512148935133, "loss": 0.2678, "step": 45772 }, { "epoch": 0.08116108544573968, "grad_norm": 0.33203125, "learning_rate": 0.0011811919391118448, "loss": 0.1502, "step": 45774 }, { "epoch": 0.08116463161104949, "grad_norm": 1.7890625, "learning_rate": 0.0011811326629750903, "loss": 0.2413, "step": 45776 }, { "epoch": 0.08116817777635932, "grad_norm": 0.56640625, "learning_rate": 0.001181073386483508, "loss": 0.1721, "step": 45778 }, { "epoch": 0.08117172394166913, "grad_norm": 0.404296875, "learning_rate": 0.0011810141096373584, "loss": 0.1544, "step": 45780 }, { "epoch": 0.08117527010697895, "grad_norm": 0.296875, "learning_rate": 0.0011809548324369, "loss": 0.2096, "step": 45782 }, { "epoch": 0.08117881627228876, "grad_norm": 0.1962890625, "learning_rate": 0.0011808955548823922, "loss": 0.3092, "step": 45784 }, { "epoch": 0.08118236243759858, "grad_norm": 0.384765625, "learning_rate": 0.001180836276974094, "loss": 0.2094, "step": 45786 }, { "epoch": 0.08118590860290839, "grad_norm": 0.5234375, "learning_rate": 0.0011807769987122651, "loss": 0.1397, "step": 45788 }, { "epoch": 0.0811894547682182, "grad_norm": 0.2294921875, "learning_rate": 0.0011807177200971644, "loss": 0.1826, "step": 45790 }, { "epoch": 0.08119300093352802, "grad_norm": 0.5703125, "learning_rate": 0.0011806584411290513, "loss": 0.2051, "step": 45792 }, { "epoch": 0.08119654709883783, "grad_norm": 0.2158203125, "learning_rate": 0.001180599161808185, "loss": 0.3116, "step": 45794 }, { "epoch": 0.08120009326414765, "grad_norm": 0.25390625, "learning_rate": 0.001180539882134825, "loss": 0.1784, "step": 45796 }, { "epoch": 0.08120363942945746, "grad_norm": 0.375, "learning_rate": 0.0011804806021092303, "loss": 0.2293, "step": 45798 }, { "epoch": 0.08120718559476728, "grad_norm": 0.51953125, "learning_rate": 0.00118042132173166, "loss": 0.1818, "step": 45800 }, { "epoch": 0.08121073176007709, "grad_norm": 1.875, "learning_rate": 0.0011803620410023737, "loss": 0.2399, "step": 45802 }, { "epoch": 0.08121427792538691, "grad_norm": 1.140625, "learning_rate": 0.0011803027599216302, "loss": 0.23, "step": 45804 }, { "epoch": 0.08121782409069672, "grad_norm": 0.2392578125, "learning_rate": 0.0011802434784896896, "loss": 0.232, "step": 45806 }, { "epoch": 0.08122137025600654, "grad_norm": 0.458984375, "learning_rate": 0.0011801841967068103, "loss": 0.1975, "step": 45808 }, { "epoch": 0.08122491642131635, "grad_norm": 0.357421875, "learning_rate": 0.0011801249145732523, "loss": 0.1623, "step": 45810 }, { "epoch": 0.08122846258662617, "grad_norm": 0.37109375, "learning_rate": 0.001180065632089274, "loss": 0.1689, "step": 45812 }, { "epoch": 0.08123200875193598, "grad_norm": 0.83203125, "learning_rate": 0.001180006349255135, "loss": 0.1767, "step": 45814 }, { "epoch": 0.0812355549172458, "grad_norm": 0.50390625, "learning_rate": 0.0011799470660710952, "loss": 0.1842, "step": 45816 }, { "epoch": 0.08123910108255561, "grad_norm": 0.60546875, "learning_rate": 0.0011798877825374128, "loss": 0.212, "step": 45818 }, { "epoch": 0.08124264724786542, "grad_norm": 0.54296875, "learning_rate": 0.0011798284986543482, "loss": 0.2654, "step": 45820 }, { "epoch": 0.08124619341317524, "grad_norm": 1.28125, "learning_rate": 0.0011797692144221601, "loss": 0.3951, "step": 45822 }, { "epoch": 0.08124973957848507, "grad_norm": 0.455078125, "learning_rate": 0.0011797099298411075, "loss": 0.1818, "step": 45824 }, { "epoch": 0.08125328574379488, "grad_norm": 0.2734375, "learning_rate": 0.0011796506449114502, "loss": 0.249, "step": 45826 }, { "epoch": 0.0812568319091047, "grad_norm": 0.81640625, "learning_rate": 0.001179591359633447, "loss": 0.4378, "step": 45828 }, { "epoch": 0.08126037807441451, "grad_norm": 0.42578125, "learning_rate": 0.0011795320740073576, "loss": 0.1726, "step": 45830 }, { "epoch": 0.08126392423972432, "grad_norm": 0.703125, "learning_rate": 0.0011794727880334408, "loss": 0.2042, "step": 45832 }, { "epoch": 0.08126747040503414, "grad_norm": 0.322265625, "learning_rate": 0.0011794135017119565, "loss": 0.2108, "step": 45834 }, { "epoch": 0.08127101657034395, "grad_norm": 0.28125, "learning_rate": 0.0011793542150431633, "loss": 0.1498, "step": 45836 }, { "epoch": 0.08127456273565377, "grad_norm": 0.828125, "learning_rate": 0.0011792949280273212, "loss": 0.2074, "step": 45838 }, { "epoch": 0.08127810890096358, "grad_norm": 0.38671875, "learning_rate": 0.0011792356406646893, "loss": 0.2994, "step": 45840 }, { "epoch": 0.0812816550662734, "grad_norm": 0.2353515625, "learning_rate": 0.0011791763529555269, "loss": 0.2672, "step": 45842 }, { "epoch": 0.08128520123158321, "grad_norm": 0.61328125, "learning_rate": 0.0011791170649000924, "loss": 0.2976, "step": 45844 }, { "epoch": 0.08128874739689303, "grad_norm": 0.451171875, "learning_rate": 0.0011790577764986463, "loss": 0.2095, "step": 45846 }, { "epoch": 0.08129229356220284, "grad_norm": 0.169921875, "learning_rate": 0.0011789984877514472, "loss": 0.1417, "step": 45848 }, { "epoch": 0.08129583972751266, "grad_norm": 0.6640625, "learning_rate": 0.0011789391986587548, "loss": 0.2365, "step": 45850 }, { "epoch": 0.08129938589282247, "grad_norm": 0.3203125, "learning_rate": 0.0011788799092208278, "loss": 0.2006, "step": 45852 }, { "epoch": 0.08130293205813229, "grad_norm": 0.365234375, "learning_rate": 0.0011788206194379262, "loss": 0.1846, "step": 45854 }, { "epoch": 0.0813064782234421, "grad_norm": 0.40234375, "learning_rate": 0.001178761329310309, "loss": 0.193, "step": 45856 }, { "epoch": 0.08131002438875191, "grad_norm": 1.4140625, "learning_rate": 0.0011787020388382354, "loss": 0.2941, "step": 45858 }, { "epoch": 0.08131357055406173, "grad_norm": 0.322265625, "learning_rate": 0.001178642748021965, "loss": 0.1829, "step": 45860 }, { "epoch": 0.08131711671937154, "grad_norm": 0.50390625, "learning_rate": 0.0011785834568617567, "loss": 0.1591, "step": 45862 }, { "epoch": 0.08132066288468136, "grad_norm": 0.46484375, "learning_rate": 0.0011785241653578702, "loss": 0.1598, "step": 45864 }, { "epoch": 0.08132420904999117, "grad_norm": 0.275390625, "learning_rate": 0.0011784648735105646, "loss": 0.1325, "step": 45866 }, { "epoch": 0.081327755215301, "grad_norm": 0.6015625, "learning_rate": 0.0011784055813200988, "loss": 0.3557, "step": 45868 }, { "epoch": 0.08133130138061082, "grad_norm": 0.98828125, "learning_rate": 0.001178346288786733, "loss": 0.215, "step": 45870 }, { "epoch": 0.08133484754592063, "grad_norm": 0.333984375, "learning_rate": 0.0011782869959107258, "loss": 0.1942, "step": 45872 }, { "epoch": 0.08133839371123044, "grad_norm": 1.984375, "learning_rate": 0.001178227702692337, "loss": 0.2749, "step": 45874 }, { "epoch": 0.08134193987654026, "grad_norm": 0.671875, "learning_rate": 0.0011781684091318255, "loss": 0.2366, "step": 45876 }, { "epoch": 0.08134548604185007, "grad_norm": 0.408203125, "learning_rate": 0.0011781091152294506, "loss": 0.1908, "step": 45878 }, { "epoch": 0.08134903220715989, "grad_norm": 0.73046875, "learning_rate": 0.0011780498209854721, "loss": 0.1324, "step": 45880 }, { "epoch": 0.0813525783724697, "grad_norm": 0.419921875, "learning_rate": 0.0011779905264001492, "loss": 0.2711, "step": 45882 }, { "epoch": 0.08135612453777952, "grad_norm": 1.9609375, "learning_rate": 0.0011779312314737405, "loss": 0.2091, "step": 45884 }, { "epoch": 0.08135967070308933, "grad_norm": 1.2890625, "learning_rate": 0.0011778719362065062, "loss": 0.3085, "step": 45886 }, { "epoch": 0.08136321686839915, "grad_norm": 0.30859375, "learning_rate": 0.001177812640598705, "loss": 0.2143, "step": 45888 }, { "epoch": 0.08136676303370896, "grad_norm": 0.357421875, "learning_rate": 0.0011777533446505968, "loss": 0.1424, "step": 45890 }, { "epoch": 0.08137030919901878, "grad_norm": 0.41015625, "learning_rate": 0.0011776940483624405, "loss": 0.1863, "step": 45892 }, { "epoch": 0.08137385536432859, "grad_norm": 0.6953125, "learning_rate": 0.0011776347517344957, "loss": 0.1823, "step": 45894 }, { "epoch": 0.0813774015296384, "grad_norm": 0.51171875, "learning_rate": 0.0011775754547670213, "loss": 0.1399, "step": 45896 }, { "epoch": 0.08138094769494822, "grad_norm": 0.625, "learning_rate": 0.0011775161574602772, "loss": 0.2066, "step": 45898 }, { "epoch": 0.08138449386025803, "grad_norm": 1.234375, "learning_rate": 0.0011774568598145227, "loss": 0.1441, "step": 45900 }, { "epoch": 0.08138804002556785, "grad_norm": 0.431640625, "learning_rate": 0.0011773975618300163, "loss": 0.3043, "step": 45902 }, { "epoch": 0.08139158619087766, "grad_norm": 0.345703125, "learning_rate": 0.0011773382635070183, "loss": 0.1776, "step": 45904 }, { "epoch": 0.08139513235618748, "grad_norm": 0.80859375, "learning_rate": 0.0011772789648457876, "loss": 0.1993, "step": 45906 }, { "epoch": 0.08139867852149729, "grad_norm": 0.89453125, "learning_rate": 0.0011772196658465836, "loss": 0.3401, "step": 45908 }, { "epoch": 0.0814022246868071, "grad_norm": 0.55078125, "learning_rate": 0.001177160366509665, "loss": 0.2094, "step": 45910 }, { "epoch": 0.08140577085211692, "grad_norm": 2.03125, "learning_rate": 0.0011771010668352926, "loss": 0.4858, "step": 45912 }, { "epoch": 0.08140931701742675, "grad_norm": 0.380859375, "learning_rate": 0.0011770417668237247, "loss": 0.309, "step": 45914 }, { "epoch": 0.08141286318273656, "grad_norm": 0.376953125, "learning_rate": 0.0011769824664752211, "loss": 0.1942, "step": 45916 }, { "epoch": 0.08141640934804638, "grad_norm": 0.75390625, "learning_rate": 0.0011769231657900406, "loss": 0.285, "step": 45918 }, { "epoch": 0.0814199555133562, "grad_norm": 0.283203125, "learning_rate": 0.0011768638647684428, "loss": 0.1779, "step": 45920 }, { "epoch": 0.08142350167866601, "grad_norm": 0.70703125, "learning_rate": 0.0011768045634106873, "loss": 0.1898, "step": 45922 }, { "epoch": 0.08142704784397582, "grad_norm": 0.279296875, "learning_rate": 0.0011767452617170331, "loss": 0.1886, "step": 45924 }, { "epoch": 0.08143059400928564, "grad_norm": 0.5859375, "learning_rate": 0.0011766859596877394, "loss": 0.1708, "step": 45926 }, { "epoch": 0.08143414017459545, "grad_norm": 1.2578125, "learning_rate": 0.0011766266573230665, "loss": 0.2601, "step": 45928 }, { "epoch": 0.08143768633990527, "grad_norm": 0.67578125, "learning_rate": 0.0011765673546232727, "loss": 0.245, "step": 45930 }, { "epoch": 0.08144123250521508, "grad_norm": 4.0625, "learning_rate": 0.0011765080515886179, "loss": 0.4374, "step": 45932 }, { "epoch": 0.0814447786705249, "grad_norm": 0.80078125, "learning_rate": 0.0011764487482193613, "loss": 0.1876, "step": 45934 }, { "epoch": 0.08144832483583471, "grad_norm": 0.353515625, "learning_rate": 0.0011763894445157626, "loss": 0.1573, "step": 45936 }, { "epoch": 0.08145187100114452, "grad_norm": 0.384765625, "learning_rate": 0.00117633014047808, "loss": 0.244, "step": 45938 }, { "epoch": 0.08145541716645434, "grad_norm": 2.25, "learning_rate": 0.0011762708361065747, "loss": 0.2499, "step": 45940 }, { "epoch": 0.08145896333176415, "grad_norm": 0.94140625, "learning_rate": 0.0011762115314015043, "loss": 0.2154, "step": 45942 }, { "epoch": 0.08146250949707397, "grad_norm": 1.625, "learning_rate": 0.0011761522263631294, "loss": 0.3794, "step": 45944 }, { "epoch": 0.08146605566238378, "grad_norm": 0.64453125, "learning_rate": 0.0011760929209917086, "loss": 0.145, "step": 45946 }, { "epoch": 0.0814696018276936, "grad_norm": 0.71484375, "learning_rate": 0.0011760336152875017, "loss": 0.2074, "step": 45948 }, { "epoch": 0.08147314799300341, "grad_norm": 0.3828125, "learning_rate": 0.001175974309250768, "loss": 0.1773, "step": 45950 }, { "epoch": 0.08147669415831323, "grad_norm": 1.0625, "learning_rate": 0.001175915002881767, "loss": 0.3089, "step": 45952 }, { "epoch": 0.08148024032362304, "grad_norm": 0.42578125, "learning_rate": 0.0011758556961807573, "loss": 0.1439, "step": 45954 }, { "epoch": 0.08148378648893286, "grad_norm": 0.451171875, "learning_rate": 0.0011757963891479994, "loss": 0.1425, "step": 45956 }, { "epoch": 0.08148733265424267, "grad_norm": 0.76171875, "learning_rate": 0.001175737081783752, "loss": 0.1921, "step": 45958 }, { "epoch": 0.0814908788195525, "grad_norm": 0.734375, "learning_rate": 0.0011756777740882744, "loss": 0.1867, "step": 45960 }, { "epoch": 0.08149442498486231, "grad_norm": 3.5625, "learning_rate": 0.0011756184660618259, "loss": 0.2532, "step": 45962 }, { "epoch": 0.08149797115017213, "grad_norm": 0.30078125, "learning_rate": 0.0011755591577046664, "loss": 0.1804, "step": 45964 }, { "epoch": 0.08150151731548194, "grad_norm": 0.515625, "learning_rate": 0.0011754998490170554, "loss": 0.2178, "step": 45966 }, { "epoch": 0.08150506348079176, "grad_norm": 0.1533203125, "learning_rate": 0.0011754405399992515, "loss": 0.1558, "step": 45968 }, { "epoch": 0.08150860964610157, "grad_norm": 0.423828125, "learning_rate": 0.0011753812306515147, "loss": 0.1844, "step": 45970 }, { "epoch": 0.08151215581141139, "grad_norm": 1.4453125, "learning_rate": 0.0011753219209741043, "loss": 0.1665, "step": 45972 }, { "epoch": 0.0815157019767212, "grad_norm": 0.55078125, "learning_rate": 0.0011752626109672792, "loss": 0.1514, "step": 45974 }, { "epoch": 0.08151924814203101, "grad_norm": 0.392578125, "learning_rate": 0.0011752033006312997, "loss": 0.1826, "step": 45976 }, { "epoch": 0.08152279430734083, "grad_norm": 2.328125, "learning_rate": 0.001175143989966424, "loss": 0.2148, "step": 45978 }, { "epoch": 0.08152634047265064, "grad_norm": 0.380859375, "learning_rate": 0.0011750846789729127, "loss": 0.229, "step": 45980 }, { "epoch": 0.08152988663796046, "grad_norm": 0.59765625, "learning_rate": 0.001175025367651024, "loss": 0.1923, "step": 45982 }, { "epoch": 0.08153343280327027, "grad_norm": 1.03125, "learning_rate": 0.0011749660560010187, "loss": 0.3076, "step": 45984 }, { "epoch": 0.08153697896858009, "grad_norm": 0.73046875, "learning_rate": 0.0011749067440231546, "loss": 0.257, "step": 45986 }, { "epoch": 0.0815405251338899, "grad_norm": 1.046875, "learning_rate": 0.0011748474317176925, "loss": 0.1226, "step": 45988 }, { "epoch": 0.08154407129919972, "grad_norm": 0.40625, "learning_rate": 0.0011747881190848912, "loss": 0.1724, "step": 45990 }, { "epoch": 0.08154761746450953, "grad_norm": 0.1806640625, "learning_rate": 0.0011747288061250097, "loss": 0.1864, "step": 45992 }, { "epoch": 0.08155116362981935, "grad_norm": 0.5703125, "learning_rate": 0.001174669492838308, "loss": 0.1997, "step": 45994 }, { "epoch": 0.08155470979512916, "grad_norm": 0.376953125, "learning_rate": 0.0011746101792250454, "loss": 0.1651, "step": 45996 }, { "epoch": 0.08155825596043897, "grad_norm": 0.8359375, "learning_rate": 0.0011745508652854812, "loss": 0.2164, "step": 45998 }, { "epoch": 0.08156180212574879, "grad_norm": 0.287109375, "learning_rate": 0.0011744915510198749, "loss": 0.2303, "step": 46000 }, { "epoch": 0.0815653482910586, "grad_norm": 0.1376953125, "learning_rate": 0.0011744322364284857, "loss": 0.1453, "step": 46002 }, { "epoch": 0.08156889445636843, "grad_norm": 0.82421875, "learning_rate": 0.0011743729215115731, "loss": 0.2132, "step": 46004 }, { "epoch": 0.08157244062167825, "grad_norm": 0.408203125, "learning_rate": 0.0011743136062693966, "loss": 0.1992, "step": 46006 }, { "epoch": 0.08157598678698806, "grad_norm": 0.16796875, "learning_rate": 0.0011742542907022156, "loss": 0.1327, "step": 46008 }, { "epoch": 0.08157953295229788, "grad_norm": 0.2578125, "learning_rate": 0.0011741949748102895, "loss": 0.2003, "step": 46010 }, { "epoch": 0.08158307911760769, "grad_norm": 0.8828125, "learning_rate": 0.0011741356585938775, "loss": 0.2759, "step": 46012 }, { "epoch": 0.0815866252829175, "grad_norm": 0.5, "learning_rate": 0.0011740763420532395, "loss": 0.1813, "step": 46014 }, { "epoch": 0.08159017144822732, "grad_norm": 0.98828125, "learning_rate": 0.0011740170251886342, "loss": 0.1994, "step": 46016 }, { "epoch": 0.08159371761353713, "grad_norm": 0.330078125, "learning_rate": 0.0011739577080003216, "loss": 0.2356, "step": 46018 }, { "epoch": 0.08159726377884695, "grad_norm": 0.81640625, "learning_rate": 0.001173898390488561, "loss": 0.2706, "step": 46020 }, { "epoch": 0.08160080994415676, "grad_norm": 1.2109375, "learning_rate": 0.0011738390726536117, "loss": 0.2112, "step": 46022 }, { "epoch": 0.08160435610946658, "grad_norm": 0.34375, "learning_rate": 0.001173779754495733, "loss": 0.1277, "step": 46024 }, { "epoch": 0.08160790227477639, "grad_norm": 0.439453125, "learning_rate": 0.001173720436015185, "loss": 0.2034, "step": 46026 }, { "epoch": 0.0816114484400862, "grad_norm": 0.88671875, "learning_rate": 0.0011736611172122261, "loss": 0.1631, "step": 46028 }, { "epoch": 0.08161499460539602, "grad_norm": 0.515625, "learning_rate": 0.0011736017980871168, "loss": 0.2862, "step": 46030 }, { "epoch": 0.08161854077070584, "grad_norm": 0.306640625, "learning_rate": 0.0011735424786401157, "loss": 0.178, "step": 46032 }, { "epoch": 0.08162208693601565, "grad_norm": 0.30078125, "learning_rate": 0.0011734831588714827, "loss": 0.1898, "step": 46034 }, { "epoch": 0.08162563310132546, "grad_norm": 0.28125, "learning_rate": 0.0011734238387814766, "loss": 0.2098, "step": 46036 }, { "epoch": 0.08162917926663528, "grad_norm": 2.15625, "learning_rate": 0.0011733645183703576, "loss": 0.2601, "step": 46038 }, { "epoch": 0.0816327254319451, "grad_norm": 0.63671875, "learning_rate": 0.0011733051976383847, "loss": 0.1786, "step": 46040 }, { "epoch": 0.08163627159725491, "grad_norm": 0.94140625, "learning_rate": 0.0011732458765858177, "loss": 0.1961, "step": 46042 }, { "epoch": 0.08163981776256472, "grad_norm": 0.294921875, "learning_rate": 0.0011731865552129152, "loss": 0.1706, "step": 46044 }, { "epoch": 0.08164336392787454, "grad_norm": 1.0546875, "learning_rate": 0.001173127233519938, "loss": 0.2082, "step": 46046 }, { "epoch": 0.08164691009318435, "grad_norm": 0.310546875, "learning_rate": 0.0011730679115071444, "loss": 0.2517, "step": 46048 }, { "epoch": 0.08165045625849418, "grad_norm": 0.578125, "learning_rate": 0.0011730085891747944, "loss": 0.1869, "step": 46050 }, { "epoch": 0.081654002423804, "grad_norm": 0.52734375, "learning_rate": 0.0011729492665231466, "loss": 0.1645, "step": 46052 }, { "epoch": 0.08165754858911381, "grad_norm": 0.64453125, "learning_rate": 0.0011728899435524617, "loss": 0.1936, "step": 46054 }, { "epoch": 0.08166109475442362, "grad_norm": 0.6640625, "learning_rate": 0.0011728306202629982, "loss": 0.1322, "step": 46056 }, { "epoch": 0.08166464091973344, "grad_norm": 0.5546875, "learning_rate": 0.001172771296655016, "loss": 0.1961, "step": 46058 }, { "epoch": 0.08166818708504325, "grad_norm": 1.609375, "learning_rate": 0.0011727119727287744, "loss": 0.2552, "step": 46060 }, { "epoch": 0.08167173325035307, "grad_norm": 0.37890625, "learning_rate": 0.0011726526484845327, "loss": 0.1835, "step": 46062 }, { "epoch": 0.08167527941566288, "grad_norm": 0.421875, "learning_rate": 0.0011725933239225509, "loss": 0.2075, "step": 46064 }, { "epoch": 0.0816788255809727, "grad_norm": 0.498046875, "learning_rate": 0.001172533999043088, "loss": 0.237, "step": 46066 }, { "epoch": 0.08168237174628251, "grad_norm": 0.2490234375, "learning_rate": 0.0011724746738464033, "loss": 0.1656, "step": 46068 }, { "epoch": 0.08168591791159233, "grad_norm": 0.4140625, "learning_rate": 0.0011724153483327567, "loss": 0.2222, "step": 46070 }, { "epoch": 0.08168946407690214, "grad_norm": 0.5625, "learning_rate": 0.001172356022502407, "loss": 0.1753, "step": 46072 }, { "epoch": 0.08169301024221196, "grad_norm": 0.1796875, "learning_rate": 0.0011722966963556146, "loss": 0.194, "step": 46074 }, { "epoch": 0.08169655640752177, "grad_norm": 0.3828125, "learning_rate": 0.001172237369892638, "loss": 0.1888, "step": 46076 }, { "epoch": 0.08170010257283158, "grad_norm": 0.484375, "learning_rate": 0.0011721780431137373, "loss": 0.313, "step": 46078 }, { "epoch": 0.0817036487381414, "grad_norm": 0.46484375, "learning_rate": 0.0011721187160191717, "loss": 0.1645, "step": 46080 }, { "epoch": 0.08170719490345121, "grad_norm": 0.796875, "learning_rate": 0.001172059388609201, "loss": 0.1802, "step": 46082 }, { "epoch": 0.08171074106876103, "grad_norm": 0.349609375, "learning_rate": 0.0011720000608840843, "loss": 0.2989, "step": 46084 }, { "epoch": 0.08171428723407084, "grad_norm": 0.1943359375, "learning_rate": 0.001171940732844081, "loss": 0.1956, "step": 46086 }, { "epoch": 0.08171783339938066, "grad_norm": 1.375, "learning_rate": 0.0011718814044894509, "loss": 0.1982, "step": 46088 }, { "epoch": 0.08172137956469047, "grad_norm": 0.455078125, "learning_rate": 0.0011718220758204529, "loss": 0.1515, "step": 46090 }, { "epoch": 0.08172492573000029, "grad_norm": 1.203125, "learning_rate": 0.0011717627468373472, "loss": 0.4931, "step": 46092 }, { "epoch": 0.0817284718953101, "grad_norm": 0.87890625, "learning_rate": 0.001171703417540393, "loss": 0.197, "step": 46094 }, { "epoch": 0.08173201806061993, "grad_norm": 0.78125, "learning_rate": 0.0011716440879298495, "loss": 0.1924, "step": 46096 }, { "epoch": 0.08173556422592974, "grad_norm": 0.40625, "learning_rate": 0.0011715847580059764, "loss": 0.1535, "step": 46098 }, { "epoch": 0.08173911039123956, "grad_norm": 0.39453125, "learning_rate": 0.0011715254277690332, "loss": 0.2652, "step": 46100 }, { "epoch": 0.08174265655654937, "grad_norm": 0.40234375, "learning_rate": 0.0011714660972192794, "loss": 0.1425, "step": 46102 }, { "epoch": 0.08174620272185919, "grad_norm": 0.359375, "learning_rate": 0.0011714067663569745, "loss": 0.1523, "step": 46104 }, { "epoch": 0.081749748887169, "grad_norm": 0.5625, "learning_rate": 0.0011713474351823776, "loss": 0.1767, "step": 46106 }, { "epoch": 0.08175329505247882, "grad_norm": 0.49609375, "learning_rate": 0.0011712881036957485, "loss": 0.1573, "step": 46108 }, { "epoch": 0.08175684121778863, "grad_norm": 0.97265625, "learning_rate": 0.0011712287718973467, "loss": 0.1614, "step": 46110 }, { "epoch": 0.08176038738309845, "grad_norm": 0.6015625, "learning_rate": 0.0011711694397874316, "loss": 0.1519, "step": 46112 }, { "epoch": 0.08176393354840826, "grad_norm": 0.33984375, "learning_rate": 0.0011711101073662627, "loss": 0.1554, "step": 46114 }, { "epoch": 0.08176747971371807, "grad_norm": 0.6484375, "learning_rate": 0.0011710507746340996, "loss": 0.1905, "step": 46116 }, { "epoch": 0.08177102587902789, "grad_norm": 0.60546875, "learning_rate": 0.0011709914415912015, "loss": 0.1633, "step": 46118 }, { "epoch": 0.0817745720443377, "grad_norm": 0.98828125, "learning_rate": 0.0011709321082378282, "loss": 0.562, "step": 46120 }, { "epoch": 0.08177811820964752, "grad_norm": 0.49609375, "learning_rate": 0.0011708727745742393, "loss": 0.1804, "step": 46122 }, { "epoch": 0.08178166437495733, "grad_norm": 0.6796875, "learning_rate": 0.0011708134406006939, "loss": 0.1676, "step": 46124 }, { "epoch": 0.08178521054026715, "grad_norm": 0.208984375, "learning_rate": 0.0011707541063174513, "loss": 0.165, "step": 46126 }, { "epoch": 0.08178875670557696, "grad_norm": 1.625, "learning_rate": 0.0011706947717247719, "loss": 0.2031, "step": 46128 }, { "epoch": 0.08179230287088678, "grad_norm": 1.0078125, "learning_rate": 0.001170635436822914, "loss": 0.1549, "step": 46130 }, { "epoch": 0.08179584903619659, "grad_norm": 0.185546875, "learning_rate": 0.0011705761016121384, "loss": 0.2274, "step": 46132 }, { "epoch": 0.0817993952015064, "grad_norm": 0.91015625, "learning_rate": 0.001170516766092703, "loss": 0.2016, "step": 46134 }, { "epoch": 0.08180294136681622, "grad_norm": 0.447265625, "learning_rate": 0.0011704574302648693, "loss": 0.167, "step": 46136 }, { "epoch": 0.08180648753212603, "grad_norm": 0.52734375, "learning_rate": 0.001170398094128895, "loss": 0.208, "step": 46138 }, { "epoch": 0.08181003369743586, "grad_norm": 0.51171875, "learning_rate": 0.0011703387576850407, "loss": 0.1949, "step": 46140 }, { "epoch": 0.08181357986274568, "grad_norm": 0.1943359375, "learning_rate": 0.0011702794209335653, "loss": 0.1968, "step": 46142 }, { "epoch": 0.08181712602805549, "grad_norm": 0.82421875, "learning_rate": 0.0011702200838747288, "loss": 0.3775, "step": 46144 }, { "epoch": 0.08182067219336531, "grad_norm": 0.2294921875, "learning_rate": 0.0011701607465087903, "loss": 0.1942, "step": 46146 }, { "epoch": 0.08182421835867512, "grad_norm": 0.83203125, "learning_rate": 0.0011701014088360093, "loss": 0.1911, "step": 46148 }, { "epoch": 0.08182776452398494, "grad_norm": 1.71875, "learning_rate": 0.0011700420708566457, "loss": 0.188, "step": 46150 }, { "epoch": 0.08183131068929475, "grad_norm": 0.51171875, "learning_rate": 0.0011699827325709587, "loss": 0.1426, "step": 46152 }, { "epoch": 0.08183485685460457, "grad_norm": 0.435546875, "learning_rate": 0.0011699233939792076, "loss": 0.1878, "step": 46154 }, { "epoch": 0.08183840301991438, "grad_norm": 0.498046875, "learning_rate": 0.0011698640550816526, "loss": 0.1962, "step": 46156 }, { "epoch": 0.0818419491852242, "grad_norm": 0.65625, "learning_rate": 0.0011698047158785524, "loss": 0.189, "step": 46158 }, { "epoch": 0.08184549535053401, "grad_norm": 0.234375, "learning_rate": 0.001169745376370167, "loss": 0.192, "step": 46160 }, { "epoch": 0.08184904151584382, "grad_norm": 0.74609375, "learning_rate": 0.0011696860365567562, "loss": 0.2097, "step": 46162 }, { "epoch": 0.08185258768115364, "grad_norm": 0.498046875, "learning_rate": 0.001169626696438579, "loss": 0.2291, "step": 46164 }, { "epoch": 0.08185613384646345, "grad_norm": 1.7265625, "learning_rate": 0.001169567356015895, "loss": 0.3396, "step": 46166 }, { "epoch": 0.08185968001177327, "grad_norm": 0.4140625, "learning_rate": 0.0011695080152889638, "loss": 0.1405, "step": 46168 }, { "epoch": 0.08186322617708308, "grad_norm": 0.8671875, "learning_rate": 0.0011694486742580448, "loss": 0.1694, "step": 46170 }, { "epoch": 0.0818667723423929, "grad_norm": 0.84765625, "learning_rate": 0.0011693893329233975, "loss": 0.1709, "step": 46172 }, { "epoch": 0.08187031850770271, "grad_norm": 0.427734375, "learning_rate": 0.0011693299912852821, "loss": 0.1765, "step": 46174 }, { "epoch": 0.08187386467301253, "grad_norm": 0.341796875, "learning_rate": 0.0011692706493439573, "loss": 0.447, "step": 46176 }, { "epoch": 0.08187741083832234, "grad_norm": 0.2294921875, "learning_rate": 0.001169211307099683, "loss": 0.1483, "step": 46178 }, { "epoch": 0.08188095700363215, "grad_norm": 0.447265625, "learning_rate": 0.0011691519645527185, "loss": 0.1883, "step": 46180 }, { "epoch": 0.08188450316894197, "grad_norm": 0.4375, "learning_rate": 0.0011690926217033239, "loss": 0.1036, "step": 46182 }, { "epoch": 0.08188804933425178, "grad_norm": 0.265625, "learning_rate": 0.0011690332785517575, "loss": 0.1897, "step": 46184 }, { "epoch": 0.08189159549956161, "grad_norm": 0.40234375, "learning_rate": 0.0011689739350982803, "loss": 0.3377, "step": 46186 }, { "epoch": 0.08189514166487143, "grad_norm": 0.15234375, "learning_rate": 0.0011689145913431507, "loss": 0.1536, "step": 46188 }, { "epoch": 0.08189868783018124, "grad_norm": 0.9765625, "learning_rate": 0.0011688552472866292, "loss": 0.1547, "step": 46190 }, { "epoch": 0.08190223399549106, "grad_norm": 0.2119140625, "learning_rate": 0.0011687959029289745, "loss": 0.1618, "step": 46192 }, { "epoch": 0.08190578016080087, "grad_norm": 0.412109375, "learning_rate": 0.0011687365582704467, "loss": 0.1581, "step": 46194 }, { "epoch": 0.08190932632611068, "grad_norm": 0.85546875, "learning_rate": 0.0011686772133113052, "loss": 0.1744, "step": 46196 }, { "epoch": 0.0819128724914205, "grad_norm": 1.359375, "learning_rate": 0.001168617868051809, "loss": 0.1516, "step": 46198 }, { "epoch": 0.08191641865673031, "grad_norm": 0.46484375, "learning_rate": 0.0011685585224922184, "loss": 0.2126, "step": 46200 }, { "epoch": 0.08191996482204013, "grad_norm": 0.79296875, "learning_rate": 0.0011684991766327927, "loss": 0.2481, "step": 46202 }, { "epoch": 0.08192351098734994, "grad_norm": 0.2470703125, "learning_rate": 0.0011684398304737914, "loss": 0.2565, "step": 46204 }, { "epoch": 0.08192705715265976, "grad_norm": 2.421875, "learning_rate": 0.0011683804840154739, "loss": 0.1769, "step": 46206 }, { "epoch": 0.08193060331796957, "grad_norm": 0.486328125, "learning_rate": 0.0011683211372581, "loss": 0.1571, "step": 46208 }, { "epoch": 0.08193414948327939, "grad_norm": 0.70703125, "learning_rate": 0.001168261790201929, "loss": 0.1549, "step": 46210 }, { "epoch": 0.0819376956485892, "grad_norm": 0.486328125, "learning_rate": 0.0011682024428472204, "loss": 0.298, "step": 46212 }, { "epoch": 0.08194124181389902, "grad_norm": 0.4140625, "learning_rate": 0.0011681430951942344, "loss": 0.1732, "step": 46214 }, { "epoch": 0.08194478797920883, "grad_norm": 0.328125, "learning_rate": 0.00116808374724323, "loss": 0.1894, "step": 46216 }, { "epoch": 0.08194833414451864, "grad_norm": 0.421875, "learning_rate": 0.0011680243989944667, "loss": 0.1608, "step": 46218 }, { "epoch": 0.08195188030982846, "grad_norm": 0.29296875, "learning_rate": 0.001167965050448204, "loss": 0.1784, "step": 46220 }, { "epoch": 0.08195542647513827, "grad_norm": 0.259765625, "learning_rate": 0.0011679057016047022, "loss": 0.2406, "step": 46222 }, { "epoch": 0.08195897264044809, "grad_norm": 0.349609375, "learning_rate": 0.0011678463524642199, "loss": 0.1685, "step": 46224 }, { "epoch": 0.0819625188057579, "grad_norm": 0.25390625, "learning_rate": 0.001167787003027017, "loss": 0.1688, "step": 46226 }, { "epoch": 0.08196606497106772, "grad_norm": 0.416015625, "learning_rate": 0.0011677276532933531, "loss": 0.2096, "step": 46228 }, { "epoch": 0.08196961113637753, "grad_norm": 1.0234375, "learning_rate": 0.001167668303263488, "loss": 0.3177, "step": 46230 }, { "epoch": 0.08197315730168736, "grad_norm": 0.359375, "learning_rate": 0.001167608952937681, "loss": 0.1904, "step": 46232 }, { "epoch": 0.08197670346699718, "grad_norm": 0.19140625, "learning_rate": 0.0011675496023161916, "loss": 0.1519, "step": 46234 }, { "epoch": 0.08198024963230699, "grad_norm": 0.255859375, "learning_rate": 0.0011674902513992798, "loss": 0.1449, "step": 46236 }, { "epoch": 0.0819837957976168, "grad_norm": 0.24609375, "learning_rate": 0.0011674309001872046, "loss": 0.1278, "step": 46238 }, { "epoch": 0.08198734196292662, "grad_norm": 0.6875, "learning_rate": 0.0011673715486802258, "loss": 0.2069, "step": 46240 }, { "epoch": 0.08199088812823643, "grad_norm": 0.2275390625, "learning_rate": 0.0011673121968786028, "loss": 0.1622, "step": 46242 }, { "epoch": 0.08199443429354625, "grad_norm": 0.5390625, "learning_rate": 0.0011672528447825956, "loss": 0.1416, "step": 46244 }, { "epoch": 0.08199798045885606, "grad_norm": 0.3984375, "learning_rate": 0.0011671934923924634, "loss": 0.278, "step": 46246 }, { "epoch": 0.08200152662416588, "grad_norm": 0.3125, "learning_rate": 0.001167134139708466, "loss": 0.2027, "step": 46248 }, { "epoch": 0.08200507278947569, "grad_norm": 0.62890625, "learning_rate": 0.0011670747867308627, "loss": 0.2145, "step": 46250 }, { "epoch": 0.0820086189547855, "grad_norm": 0.93359375, "learning_rate": 0.0011670154334599136, "loss": 0.2675, "step": 46252 }, { "epoch": 0.08201216512009532, "grad_norm": 0.40625, "learning_rate": 0.001166956079895878, "loss": 0.1858, "step": 46254 }, { "epoch": 0.08201571128540514, "grad_norm": 0.42578125, "learning_rate": 0.0011668967260390149, "loss": 0.1732, "step": 46256 }, { "epoch": 0.08201925745071495, "grad_norm": 0.33203125, "learning_rate": 0.0011668373718895844, "loss": 0.1716, "step": 46258 }, { "epoch": 0.08202280361602476, "grad_norm": 0.30078125, "learning_rate": 0.0011667780174478462, "loss": 0.1256, "step": 46260 }, { "epoch": 0.08202634978133458, "grad_norm": 0.439453125, "learning_rate": 0.0011667186627140598, "loss": 0.286, "step": 46262 }, { "epoch": 0.0820298959466444, "grad_norm": 1.2265625, "learning_rate": 0.0011666593076884847, "loss": 0.2304, "step": 46264 }, { "epoch": 0.08203344211195421, "grad_norm": 0.42578125, "learning_rate": 0.0011665999523713805, "loss": 0.1791, "step": 46266 }, { "epoch": 0.08203698827726402, "grad_norm": 0.373046875, "learning_rate": 0.001166540596763007, "loss": 0.1848, "step": 46268 }, { "epoch": 0.08204053444257384, "grad_norm": 0.23046875, "learning_rate": 0.0011664812408636233, "loss": 0.1998, "step": 46270 }, { "epoch": 0.08204408060788365, "grad_norm": 0.37890625, "learning_rate": 0.0011664218846734894, "loss": 0.1873, "step": 46272 }, { "epoch": 0.08204762677319347, "grad_norm": 0.67578125, "learning_rate": 0.0011663625281928647, "loss": 0.2311, "step": 46274 }, { "epoch": 0.0820511729385033, "grad_norm": 0.58984375, "learning_rate": 0.001166303171422009, "loss": 0.1729, "step": 46276 }, { "epoch": 0.08205471910381311, "grad_norm": 1.34375, "learning_rate": 0.0011662438143611815, "loss": 0.4734, "step": 46278 }, { "epoch": 0.08205826526912292, "grad_norm": 0.3203125, "learning_rate": 0.0011661844570106422, "loss": 0.1713, "step": 46280 }, { "epoch": 0.08206181143443274, "grad_norm": 1.6015625, "learning_rate": 0.0011661250993706502, "loss": 0.2313, "step": 46282 }, { "epoch": 0.08206535759974255, "grad_norm": 0.875, "learning_rate": 0.001166065741441466, "loss": 0.1991, "step": 46284 }, { "epoch": 0.08206890376505237, "grad_norm": 0.28125, "learning_rate": 0.001166006383223348, "loss": 0.2026, "step": 46286 }, { "epoch": 0.08207244993036218, "grad_norm": 0.55078125, "learning_rate": 0.0011659470247165568, "loss": 0.175, "step": 46288 }, { "epoch": 0.082075996095672, "grad_norm": 0.62890625, "learning_rate": 0.0011658876659213517, "loss": 0.1671, "step": 46290 }, { "epoch": 0.08207954226098181, "grad_norm": 0.29296875, "learning_rate": 0.001165828306837992, "loss": 0.1552, "step": 46292 }, { "epoch": 0.08208308842629163, "grad_norm": 0.212890625, "learning_rate": 0.0011657689474667376, "loss": 0.3556, "step": 46294 }, { "epoch": 0.08208663459160144, "grad_norm": 2.609375, "learning_rate": 0.001165709587807848, "loss": 0.3747, "step": 46296 }, { "epoch": 0.08209018075691125, "grad_norm": 0.31640625, "learning_rate": 0.0011656502278615827, "loss": 0.2052, "step": 46298 }, { "epoch": 0.08209372692222107, "grad_norm": 0.32421875, "learning_rate": 0.001165590867628202, "loss": 0.1892, "step": 46300 }, { "epoch": 0.08209727308753088, "grad_norm": 1.4375, "learning_rate": 0.0011655315071079644, "loss": 0.2188, "step": 46302 }, { "epoch": 0.0821008192528407, "grad_norm": 0.302734375, "learning_rate": 0.0011654721463011301, "loss": 0.1732, "step": 46304 }, { "epoch": 0.08210436541815051, "grad_norm": 0.66015625, "learning_rate": 0.0011654127852079588, "loss": 0.2901, "step": 46306 }, { "epoch": 0.08210791158346033, "grad_norm": 0.65625, "learning_rate": 0.0011653534238287098, "loss": 0.1681, "step": 46308 }, { "epoch": 0.08211145774877014, "grad_norm": 0.48828125, "learning_rate": 0.0011652940621636433, "loss": 0.1841, "step": 46310 }, { "epoch": 0.08211500391407996, "grad_norm": 0.369140625, "learning_rate": 0.0011652347002130184, "loss": 0.1863, "step": 46312 }, { "epoch": 0.08211855007938977, "grad_norm": 0.412109375, "learning_rate": 0.0011651753379770943, "loss": 0.5548, "step": 46314 }, { "epoch": 0.08212209624469959, "grad_norm": 0.259765625, "learning_rate": 0.0011651159754561317, "loss": 0.1749, "step": 46316 }, { "epoch": 0.0821256424100094, "grad_norm": 1.21875, "learning_rate": 0.0011650566126503892, "loss": 0.1602, "step": 46318 }, { "epoch": 0.08212918857531921, "grad_norm": 0.40234375, "learning_rate": 0.001164997249560127, "loss": 0.181, "step": 46320 }, { "epoch": 0.08213273474062904, "grad_norm": 0.5625, "learning_rate": 0.0011649378861856047, "loss": 0.2501, "step": 46322 }, { "epoch": 0.08213628090593886, "grad_norm": 0.6875, "learning_rate": 0.001164878522527082, "loss": 0.201, "step": 46324 }, { "epoch": 0.08213982707124867, "grad_norm": 1.6484375, "learning_rate": 0.0011648191585848181, "loss": 0.3055, "step": 46326 }, { "epoch": 0.08214337323655849, "grad_norm": 0.3125, "learning_rate": 0.0011647597943590727, "loss": 0.1545, "step": 46328 }, { "epoch": 0.0821469194018683, "grad_norm": 0.50390625, "learning_rate": 0.0011647004298501059, "loss": 0.1994, "step": 46330 }, { "epoch": 0.08215046556717812, "grad_norm": 0.333984375, "learning_rate": 0.0011646410650581764, "loss": 0.2148, "step": 46332 }, { "epoch": 0.08215401173248793, "grad_norm": 0.421875, "learning_rate": 0.0011645816999835451, "loss": 0.2528, "step": 46334 }, { "epoch": 0.08215755789779774, "grad_norm": 0.34765625, "learning_rate": 0.0011645223346264707, "loss": 0.1908, "step": 46336 }, { "epoch": 0.08216110406310756, "grad_norm": 1.015625, "learning_rate": 0.0011644629689872132, "loss": 0.2413, "step": 46338 }, { "epoch": 0.08216465022841737, "grad_norm": 1.4375, "learning_rate": 0.0011644036030660318, "loss": 0.3507, "step": 46340 }, { "epoch": 0.08216819639372719, "grad_norm": 0.306640625, "learning_rate": 0.0011643442368631867, "loss": 0.1894, "step": 46342 }, { "epoch": 0.082171742559037, "grad_norm": 1.09375, "learning_rate": 0.0011642848703789376, "loss": 0.1782, "step": 46344 }, { "epoch": 0.08217528872434682, "grad_norm": 0.51171875, "learning_rate": 0.0011642255036135433, "loss": 0.179, "step": 46346 }, { "epoch": 0.08217883488965663, "grad_norm": 0.291015625, "learning_rate": 0.001164166136567264, "loss": 0.1836, "step": 46348 }, { "epoch": 0.08218238105496645, "grad_norm": 0.248046875, "learning_rate": 0.0011641067692403596, "loss": 0.2158, "step": 46350 }, { "epoch": 0.08218592722027626, "grad_norm": 0.234375, "learning_rate": 0.0011640474016330889, "loss": 0.2051, "step": 46352 }, { "epoch": 0.08218947338558608, "grad_norm": 2.53125, "learning_rate": 0.0011639880337457127, "loss": 0.2181, "step": 46354 }, { "epoch": 0.08219301955089589, "grad_norm": 0.333984375, "learning_rate": 0.0011639286655784895, "loss": 0.124, "step": 46356 }, { "epoch": 0.0821965657162057, "grad_norm": 0.51953125, "learning_rate": 0.0011638692971316798, "loss": 0.2235, "step": 46358 }, { "epoch": 0.08220011188151552, "grad_norm": 0.48828125, "learning_rate": 0.0011638099284055426, "loss": 0.1858, "step": 46360 }, { "epoch": 0.08220365804682533, "grad_norm": 0.33203125, "learning_rate": 0.0011637505594003382, "loss": 0.2725, "step": 46362 }, { "epoch": 0.08220720421213515, "grad_norm": 0.58203125, "learning_rate": 0.0011636911901163256, "loss": 0.2046, "step": 46364 }, { "epoch": 0.08221075037744496, "grad_norm": 0.68359375, "learning_rate": 0.001163631820553765, "loss": 0.2195, "step": 46366 }, { "epoch": 0.08221429654275479, "grad_norm": 0.97265625, "learning_rate": 0.0011635724507129154, "loss": 0.1569, "step": 46368 }, { "epoch": 0.0822178427080646, "grad_norm": 0.3125, "learning_rate": 0.001163513080594037, "loss": 0.1447, "step": 46370 }, { "epoch": 0.08222138887337442, "grad_norm": 0.5546875, "learning_rate": 0.0011634537101973895, "loss": 0.1598, "step": 46372 }, { "epoch": 0.08222493503868424, "grad_norm": 0.25390625, "learning_rate": 0.0011633943395232321, "loss": 0.2083, "step": 46374 }, { "epoch": 0.08222848120399405, "grad_norm": 0.314453125, "learning_rate": 0.0011633349685718246, "loss": 0.1996, "step": 46376 }, { "epoch": 0.08223202736930386, "grad_norm": 0.484375, "learning_rate": 0.0011632755973434267, "loss": 0.1979, "step": 46378 }, { "epoch": 0.08223557353461368, "grad_norm": 0.53125, "learning_rate": 0.0011632162258382983, "loss": 0.255, "step": 46380 }, { "epoch": 0.0822391196999235, "grad_norm": 0.3828125, "learning_rate": 0.0011631568540566989, "loss": 0.1362, "step": 46382 }, { "epoch": 0.08224266586523331, "grad_norm": 0.70703125, "learning_rate": 0.0011630974819988882, "loss": 0.2688, "step": 46384 }, { "epoch": 0.08224621203054312, "grad_norm": 0.3203125, "learning_rate": 0.0011630381096651254, "loss": 0.1999, "step": 46386 }, { "epoch": 0.08224975819585294, "grad_norm": 1.0078125, "learning_rate": 0.0011629787370556707, "loss": 0.2802, "step": 46388 }, { "epoch": 0.08225330436116275, "grad_norm": 0.185546875, "learning_rate": 0.001162919364170784, "loss": 0.1247, "step": 46390 }, { "epoch": 0.08225685052647257, "grad_norm": 0.234375, "learning_rate": 0.0011628599910107236, "loss": 0.1361, "step": 46392 }, { "epoch": 0.08226039669178238, "grad_norm": 0.65625, "learning_rate": 0.0011628006175757508, "loss": 0.166, "step": 46394 }, { "epoch": 0.0822639428570922, "grad_norm": 0.98046875, "learning_rate": 0.0011627412438661242, "loss": 0.207, "step": 46396 }, { "epoch": 0.08226748902240201, "grad_norm": 0.2275390625, "learning_rate": 0.0011626818698821045, "loss": 0.1438, "step": 46398 }, { "epoch": 0.08227103518771182, "grad_norm": 0.478515625, "learning_rate": 0.00116262249562395, "loss": 0.219, "step": 46400 }, { "epoch": 0.08227458135302164, "grad_norm": 1.6484375, "learning_rate": 0.0011625631210919214, "loss": 0.4463, "step": 46402 }, { "epoch": 0.08227812751833145, "grad_norm": 0.44140625, "learning_rate": 0.001162503746286278, "loss": 0.1818, "step": 46404 }, { "epoch": 0.08228167368364127, "grad_norm": 0.251953125, "learning_rate": 0.0011624443712072796, "loss": 0.1527, "step": 46406 }, { "epoch": 0.08228521984895108, "grad_norm": 0.9453125, "learning_rate": 0.0011623849958551855, "loss": 0.2764, "step": 46408 }, { "epoch": 0.0822887660142609, "grad_norm": 0.59375, "learning_rate": 0.001162325620230256, "loss": 0.1479, "step": 46410 }, { "epoch": 0.08229231217957073, "grad_norm": 0.5625, "learning_rate": 0.0011622662443327503, "loss": 0.2058, "step": 46412 }, { "epoch": 0.08229585834488054, "grad_norm": 0.263671875, "learning_rate": 0.001162206868162928, "loss": 0.1824, "step": 46414 }, { "epoch": 0.08229940451019035, "grad_norm": 0.67578125, "learning_rate": 0.0011621474917210496, "loss": 0.3529, "step": 46416 }, { "epoch": 0.08230295067550017, "grad_norm": 0.482421875, "learning_rate": 0.0011620881150073736, "loss": 0.1595, "step": 46418 }, { "epoch": 0.08230649684080998, "grad_norm": 0.2060546875, "learning_rate": 0.0011620287380221606, "loss": 0.2764, "step": 46420 }, { "epoch": 0.0823100430061198, "grad_norm": 0.466796875, "learning_rate": 0.0011619693607656696, "loss": 0.164, "step": 46422 }, { "epoch": 0.08231358917142961, "grad_norm": 0.86328125, "learning_rate": 0.001161909983238161, "loss": 0.215, "step": 46424 }, { "epoch": 0.08231713533673943, "grad_norm": 0.67578125, "learning_rate": 0.0011618506054398938, "loss": 0.2096, "step": 46426 }, { "epoch": 0.08232068150204924, "grad_norm": 0.56640625, "learning_rate": 0.0011617912273711281, "loss": 0.203, "step": 46428 }, { "epoch": 0.08232422766735906, "grad_norm": 0.369140625, "learning_rate": 0.0011617318490321233, "loss": 0.1824, "step": 46430 }, { "epoch": 0.08232777383266887, "grad_norm": 0.341796875, "learning_rate": 0.0011616724704231393, "loss": 0.226, "step": 46432 }, { "epoch": 0.08233131999797869, "grad_norm": 0.1787109375, "learning_rate": 0.0011616130915444358, "loss": 0.1628, "step": 46434 }, { "epoch": 0.0823348661632885, "grad_norm": 0.42578125, "learning_rate": 0.0011615537123962725, "loss": 0.2246, "step": 46436 }, { "epoch": 0.08233841232859831, "grad_norm": 0.44921875, "learning_rate": 0.001161494332978909, "loss": 0.1729, "step": 46438 }, { "epoch": 0.08234195849390813, "grad_norm": 0.498046875, "learning_rate": 0.0011614349532926055, "loss": 0.2455, "step": 46440 }, { "epoch": 0.08234550465921794, "grad_norm": 0.56640625, "learning_rate": 0.0011613755733376203, "loss": 0.1718, "step": 46442 }, { "epoch": 0.08234905082452776, "grad_norm": 1.15625, "learning_rate": 0.0011613161931142144, "loss": 0.213, "step": 46444 }, { "epoch": 0.08235259698983757, "grad_norm": 0.671875, "learning_rate": 0.0011612568126226467, "loss": 0.2034, "step": 46446 }, { "epoch": 0.08235614315514739, "grad_norm": 0.38671875, "learning_rate": 0.001161197431863178, "loss": 0.3075, "step": 46448 }, { "epoch": 0.0823596893204572, "grad_norm": 0.51171875, "learning_rate": 0.0011611380508360666, "loss": 0.1788, "step": 46450 }, { "epoch": 0.08236323548576702, "grad_norm": 0.2392578125, "learning_rate": 0.0011610786695415735, "loss": 0.2861, "step": 46452 }, { "epoch": 0.08236678165107683, "grad_norm": 0.294921875, "learning_rate": 0.0011610192879799573, "loss": 0.211, "step": 46454 }, { "epoch": 0.08237032781638665, "grad_norm": 2.453125, "learning_rate": 0.0011609599061514783, "loss": 0.5537, "step": 46456 }, { "epoch": 0.08237387398169647, "grad_norm": 0.54296875, "learning_rate": 0.0011609005240563965, "loss": 0.2042, "step": 46458 }, { "epoch": 0.08237742014700629, "grad_norm": 0.1943359375, "learning_rate": 0.0011608411416949708, "loss": 0.1821, "step": 46460 }, { "epoch": 0.0823809663123161, "grad_norm": 0.447265625, "learning_rate": 0.001160781759067461, "loss": 0.247, "step": 46462 }, { "epoch": 0.08238451247762592, "grad_norm": 0.859375, "learning_rate": 0.0011607223761741278, "loss": 0.1519, "step": 46464 }, { "epoch": 0.08238805864293573, "grad_norm": 0.423828125, "learning_rate": 0.0011606629930152295, "loss": 0.2014, "step": 46466 }, { "epoch": 0.08239160480824555, "grad_norm": 2.109375, "learning_rate": 0.0011606036095910268, "loss": 0.2286, "step": 46468 }, { "epoch": 0.08239515097355536, "grad_norm": 0.3046875, "learning_rate": 0.0011605442259017792, "loss": 0.1941, "step": 46470 }, { "epoch": 0.08239869713886518, "grad_norm": 0.35546875, "learning_rate": 0.0011604848419477462, "loss": 0.1425, "step": 46472 }, { "epoch": 0.08240224330417499, "grad_norm": 0.5859375, "learning_rate": 0.0011604254577291875, "loss": 0.1696, "step": 46474 }, { "epoch": 0.0824057894694848, "grad_norm": 0.57421875, "learning_rate": 0.0011603660732463633, "loss": 0.2397, "step": 46476 }, { "epoch": 0.08240933563479462, "grad_norm": 1.5078125, "learning_rate": 0.0011603066884995328, "loss": 0.3168, "step": 46478 }, { "epoch": 0.08241288180010443, "grad_norm": 0.314453125, "learning_rate": 0.0011602473034889559, "loss": 0.2591, "step": 46480 }, { "epoch": 0.08241642796541425, "grad_norm": 0.2421875, "learning_rate": 0.0011601879182148922, "loss": 0.1847, "step": 46482 }, { "epoch": 0.08241997413072406, "grad_norm": 0.2421875, "learning_rate": 0.0011601285326776015, "loss": 0.1441, "step": 46484 }, { "epoch": 0.08242352029603388, "grad_norm": 1.71875, "learning_rate": 0.0011600691468773437, "loss": 0.2372, "step": 46486 }, { "epoch": 0.08242706646134369, "grad_norm": 0.42578125, "learning_rate": 0.0011600097608143783, "loss": 0.2274, "step": 46488 }, { "epoch": 0.08243061262665351, "grad_norm": 0.23046875, "learning_rate": 0.0011599503744889649, "loss": 0.16, "step": 46490 }, { "epoch": 0.08243415879196332, "grad_norm": 0.6015625, "learning_rate": 0.0011598909879013634, "loss": 0.2329, "step": 46492 }, { "epoch": 0.08243770495727314, "grad_norm": 0.365234375, "learning_rate": 0.0011598316010518336, "loss": 0.1388, "step": 46494 }, { "epoch": 0.08244125112258295, "grad_norm": 0.390625, "learning_rate": 0.001159772213940635, "loss": 0.1446, "step": 46496 }, { "epoch": 0.08244479728789277, "grad_norm": 0.314453125, "learning_rate": 0.0011597128265680276, "loss": 0.1842, "step": 46498 }, { "epoch": 0.08244834345320258, "grad_norm": 0.33984375, "learning_rate": 0.001159653438934271, "loss": 0.1316, "step": 46500 }, { "epoch": 0.0824518896185124, "grad_norm": 0.71875, "learning_rate": 0.0011595940510396247, "loss": 0.1742, "step": 46502 }, { "epoch": 0.08245543578382222, "grad_norm": 0.703125, "learning_rate": 0.0011595346628843488, "loss": 0.4092, "step": 46504 }, { "epoch": 0.08245898194913204, "grad_norm": 0.33984375, "learning_rate": 0.0011594752744687028, "loss": 0.1999, "step": 46506 }, { "epoch": 0.08246252811444185, "grad_norm": 0.4609375, "learning_rate": 0.0011594158857929466, "loss": 0.1694, "step": 46508 }, { "epoch": 0.08246607427975167, "grad_norm": 0.8984375, "learning_rate": 0.0011593564968573398, "loss": 0.1415, "step": 46510 }, { "epoch": 0.08246962044506148, "grad_norm": 0.2177734375, "learning_rate": 0.0011592971076621421, "loss": 0.1584, "step": 46512 }, { "epoch": 0.0824731666103713, "grad_norm": 0.73046875, "learning_rate": 0.0011592377182076134, "loss": 0.2126, "step": 46514 }, { "epoch": 0.08247671277568111, "grad_norm": 1.4296875, "learning_rate": 0.001159178328494013, "loss": 0.2044, "step": 46516 }, { "epoch": 0.08248025894099092, "grad_norm": 0.263671875, "learning_rate": 0.0011591189385216016, "loss": 0.1617, "step": 46518 }, { "epoch": 0.08248380510630074, "grad_norm": 0.60546875, "learning_rate": 0.0011590595482906378, "loss": 0.2224, "step": 46520 }, { "epoch": 0.08248735127161055, "grad_norm": 0.41796875, "learning_rate": 0.001159000157801382, "loss": 0.2207, "step": 46522 }, { "epoch": 0.08249089743692037, "grad_norm": 0.34375, "learning_rate": 0.0011589407670540936, "loss": 0.1998, "step": 46524 }, { "epoch": 0.08249444360223018, "grad_norm": 0.34765625, "learning_rate": 0.001158881376049033, "loss": 0.1688, "step": 46526 }, { "epoch": 0.08249798976754, "grad_norm": 0.94921875, "learning_rate": 0.001158821984786459, "loss": 0.1583, "step": 46528 }, { "epoch": 0.08250153593284981, "grad_norm": 0.265625, "learning_rate": 0.0011587625932666318, "loss": 0.1804, "step": 46530 }, { "epoch": 0.08250508209815963, "grad_norm": 0.93359375, "learning_rate": 0.0011587032014898115, "loss": 0.2943, "step": 46532 }, { "epoch": 0.08250862826346944, "grad_norm": 1.046875, "learning_rate": 0.0011586438094562574, "loss": 0.2638, "step": 46534 }, { "epoch": 0.08251217442877926, "grad_norm": 0.255859375, "learning_rate": 0.0011585844171662291, "loss": 0.1373, "step": 46536 }, { "epoch": 0.08251572059408907, "grad_norm": 0.294921875, "learning_rate": 0.0011585250246199867, "loss": 0.2027, "step": 46538 }, { "epoch": 0.08251926675939888, "grad_norm": 0.240234375, "learning_rate": 0.00115846563181779, "loss": 0.1524, "step": 46540 }, { "epoch": 0.0825228129247087, "grad_norm": 0.53515625, "learning_rate": 0.0011584062387598984, "loss": 0.173, "step": 46542 }, { "epoch": 0.08252635909001851, "grad_norm": 2.515625, "learning_rate": 0.0011583468454465717, "loss": 0.3153, "step": 46544 }, { "epoch": 0.08252990525532833, "grad_norm": 1.25, "learning_rate": 0.0011582874518780702, "loss": 0.1776, "step": 46546 }, { "epoch": 0.08253345142063816, "grad_norm": 0.33203125, "learning_rate": 0.001158228058054653, "loss": 0.1866, "step": 46548 }, { "epoch": 0.08253699758594797, "grad_norm": 1.1640625, "learning_rate": 0.00115816866397658, "loss": 0.2485, "step": 46550 }, { "epoch": 0.08254054375125779, "grad_norm": 0.80859375, "learning_rate": 0.0011581092696441113, "loss": 0.2117, "step": 46552 }, { "epoch": 0.0825440899165676, "grad_norm": 0.859375, "learning_rate": 0.0011580498750575067, "loss": 0.3915, "step": 46554 }, { "epoch": 0.08254763608187742, "grad_norm": 0.3046875, "learning_rate": 0.001157990480217025, "loss": 0.176, "step": 46556 }, { "epoch": 0.08255118224718723, "grad_norm": 0.73046875, "learning_rate": 0.0011579310851229268, "loss": 0.2649, "step": 46558 }, { "epoch": 0.08255472841249704, "grad_norm": 0.89453125, "learning_rate": 0.0011578716897754715, "loss": 0.2197, "step": 46560 }, { "epoch": 0.08255827457780686, "grad_norm": 0.51171875, "learning_rate": 0.0011578122941749197, "loss": 0.2629, "step": 46562 }, { "epoch": 0.08256182074311667, "grad_norm": 1.0234375, "learning_rate": 0.00115775289832153, "loss": 0.4003, "step": 46564 }, { "epoch": 0.08256536690842649, "grad_norm": 0.2109375, "learning_rate": 0.0011576935022155625, "loss": 0.1751, "step": 46566 }, { "epoch": 0.0825689130737363, "grad_norm": 0.6640625, "learning_rate": 0.0011576341058572774, "loss": 0.2936, "step": 46568 }, { "epoch": 0.08257245923904612, "grad_norm": 0.2275390625, "learning_rate": 0.0011575747092469344, "loss": 0.2244, "step": 46570 }, { "epoch": 0.08257600540435593, "grad_norm": 0.416015625, "learning_rate": 0.001157515312384793, "loss": 0.1894, "step": 46572 }, { "epoch": 0.08257955156966575, "grad_norm": 0.22265625, "learning_rate": 0.0011574559152711127, "loss": 0.172, "step": 46574 }, { "epoch": 0.08258309773497556, "grad_norm": 1.2734375, "learning_rate": 0.001157396517906154, "loss": 0.1901, "step": 46576 }, { "epoch": 0.08258664390028538, "grad_norm": 0.1572265625, "learning_rate": 0.0011573371202901759, "loss": 0.2272, "step": 46578 }, { "epoch": 0.08259019006559519, "grad_norm": 1.1796875, "learning_rate": 0.0011572777224234387, "loss": 0.2483, "step": 46580 }, { "epoch": 0.082593736230905, "grad_norm": 1.7578125, "learning_rate": 0.001157218324306202, "loss": 0.487, "step": 46582 }, { "epoch": 0.08259728239621482, "grad_norm": 0.6875, "learning_rate": 0.0011571589259387256, "loss": 0.182, "step": 46584 }, { "epoch": 0.08260082856152463, "grad_norm": 0.2578125, "learning_rate": 0.0011570995273212693, "loss": 0.2227, "step": 46586 }, { "epoch": 0.08260437472683445, "grad_norm": 0.2392578125, "learning_rate": 0.0011570401284540929, "loss": 0.1827, "step": 46588 }, { "epoch": 0.08260792089214426, "grad_norm": 0.2470703125, "learning_rate": 0.001156980729337456, "loss": 0.1552, "step": 46590 }, { "epoch": 0.08261146705745408, "grad_norm": 0.3984375, "learning_rate": 0.0011569213299716186, "loss": 0.15, "step": 46592 }, { "epoch": 0.0826150132227639, "grad_norm": 1.5390625, "learning_rate": 0.00115686193035684, "loss": 0.2492, "step": 46594 }, { "epoch": 0.08261855938807372, "grad_norm": 0.30078125, "learning_rate": 0.001156802530493381, "loss": 0.1922, "step": 46596 }, { "epoch": 0.08262210555338353, "grad_norm": 0.5859375, "learning_rate": 0.0011567431303815, "loss": 0.2465, "step": 46598 }, { "epoch": 0.08262565171869335, "grad_norm": 0.30078125, "learning_rate": 0.0011566837300214581, "loss": 0.2008, "step": 46600 }, { "epoch": 0.08262919788400316, "grad_norm": 0.4140625, "learning_rate": 0.0011566243294135142, "loss": 0.212, "step": 46602 }, { "epoch": 0.08263274404931298, "grad_norm": 0.6171875, "learning_rate": 0.0011565649285579286, "loss": 0.2144, "step": 46604 }, { "epoch": 0.08263629021462279, "grad_norm": 1.6171875, "learning_rate": 0.0011565055274549607, "loss": 0.3148, "step": 46606 }, { "epoch": 0.08263983637993261, "grad_norm": 1.3671875, "learning_rate": 0.0011564461261048702, "loss": 0.2154, "step": 46608 }, { "epoch": 0.08264338254524242, "grad_norm": 0.3203125, "learning_rate": 0.0011563867245079173, "loss": 0.1662, "step": 46610 }, { "epoch": 0.08264692871055224, "grad_norm": 0.22265625, "learning_rate": 0.001156327322664362, "loss": 0.1932, "step": 46612 }, { "epoch": 0.08265047487586205, "grad_norm": 1.28125, "learning_rate": 0.0011562679205744633, "loss": 0.4326, "step": 46614 }, { "epoch": 0.08265402104117187, "grad_norm": 0.283203125, "learning_rate": 0.0011562085182384816, "loss": 0.1637, "step": 46616 }, { "epoch": 0.08265756720648168, "grad_norm": 0.251953125, "learning_rate": 0.001156149115656676, "loss": 0.1886, "step": 46618 }, { "epoch": 0.0826611133717915, "grad_norm": 0.384765625, "learning_rate": 0.0011560897128293072, "loss": 0.1934, "step": 46620 }, { "epoch": 0.08266465953710131, "grad_norm": 0.4921875, "learning_rate": 0.0011560303097566346, "loss": 0.1926, "step": 46622 }, { "epoch": 0.08266820570241112, "grad_norm": 0.7265625, "learning_rate": 0.001155970906438918, "loss": 0.1515, "step": 46624 }, { "epoch": 0.08267175186772094, "grad_norm": 0.453125, "learning_rate": 0.001155911502876417, "loss": 0.2042, "step": 46626 }, { "epoch": 0.08267529803303075, "grad_norm": 0.25, "learning_rate": 0.001155852099069392, "loss": 0.1709, "step": 46628 }, { "epoch": 0.08267884419834057, "grad_norm": 0.6640625, "learning_rate": 0.0011557926950181018, "loss": 0.1489, "step": 46630 }, { "epoch": 0.08268239036365038, "grad_norm": 0.359375, "learning_rate": 0.001155733290722807, "loss": 0.1689, "step": 46632 }, { "epoch": 0.0826859365289602, "grad_norm": 0.32421875, "learning_rate": 0.001155673886183767, "loss": 0.2047, "step": 46634 }, { "epoch": 0.08268948269427001, "grad_norm": 0.322265625, "learning_rate": 0.001155614481401242, "loss": 0.1394, "step": 46636 }, { "epoch": 0.08269302885957983, "grad_norm": 0.2490234375, "learning_rate": 0.0011555550763754914, "loss": 0.1915, "step": 46638 }, { "epoch": 0.08269657502488965, "grad_norm": 1.03125, "learning_rate": 0.0011554956711067753, "loss": 0.2438, "step": 46640 }, { "epoch": 0.08270012119019947, "grad_norm": 0.6953125, "learning_rate": 0.0011554362655953531, "loss": 0.3025, "step": 46642 }, { "epoch": 0.08270366735550928, "grad_norm": 0.62109375, "learning_rate": 0.0011553768598414852, "loss": 0.2222, "step": 46644 }, { "epoch": 0.0827072135208191, "grad_norm": 1.03125, "learning_rate": 0.001155317453845431, "loss": 0.2544, "step": 46646 }, { "epoch": 0.08271075968612891, "grad_norm": 0.6953125, "learning_rate": 0.0011552580476074503, "loss": 0.1835, "step": 46648 }, { "epoch": 0.08271430585143873, "grad_norm": 1.1171875, "learning_rate": 0.001155198641127803, "loss": 0.174, "step": 46650 }, { "epoch": 0.08271785201674854, "grad_norm": 1.015625, "learning_rate": 0.0011551392344067491, "loss": 0.2038, "step": 46652 }, { "epoch": 0.08272139818205836, "grad_norm": 0.67578125, "learning_rate": 0.0011550798274445478, "loss": 0.1347, "step": 46654 }, { "epoch": 0.08272494434736817, "grad_norm": 0.375, "learning_rate": 0.0011550204202414598, "loss": 0.163, "step": 46656 }, { "epoch": 0.08272849051267799, "grad_norm": 0.251953125, "learning_rate": 0.0011549610127977441, "loss": 0.1878, "step": 46658 }, { "epoch": 0.0827320366779878, "grad_norm": 0.216796875, "learning_rate": 0.0011549016051136608, "loss": 0.1527, "step": 46660 }, { "epoch": 0.08273558284329761, "grad_norm": 0.53125, "learning_rate": 0.00115484219718947, "loss": 0.2471, "step": 46662 }, { "epoch": 0.08273912900860743, "grad_norm": 8.75, "learning_rate": 0.0011547827890254314, "loss": 0.2546, "step": 46664 }, { "epoch": 0.08274267517391724, "grad_norm": 0.220703125, "learning_rate": 0.0011547233806218046, "loss": 0.1598, "step": 46666 }, { "epoch": 0.08274622133922706, "grad_norm": 0.265625, "learning_rate": 0.0011546639719788494, "loss": 0.1707, "step": 46668 }, { "epoch": 0.08274976750453687, "grad_norm": 0.369140625, "learning_rate": 0.0011546045630968257, "loss": 0.1636, "step": 46670 }, { "epoch": 0.08275331366984669, "grad_norm": 0.36328125, "learning_rate": 0.0011545451539759935, "loss": 0.1848, "step": 46672 }, { "epoch": 0.0827568598351565, "grad_norm": 0.40234375, "learning_rate": 0.0011544857446166123, "loss": 0.1665, "step": 46674 }, { "epoch": 0.08276040600046632, "grad_norm": 0.40234375, "learning_rate": 0.0011544263350189423, "loss": 0.3163, "step": 46676 }, { "epoch": 0.08276395216577613, "grad_norm": 0.49609375, "learning_rate": 0.0011543669251832432, "loss": 0.1343, "step": 46678 }, { "epoch": 0.08276749833108595, "grad_norm": 0.2353515625, "learning_rate": 0.0011543075151097745, "loss": 0.2272, "step": 46680 }, { "epoch": 0.08277104449639576, "grad_norm": 0.1845703125, "learning_rate": 0.0011542481047987962, "loss": 0.1398, "step": 46682 }, { "epoch": 0.08277459066170559, "grad_norm": 1.5546875, "learning_rate": 0.001154188694250569, "loss": 0.261, "step": 46684 }, { "epoch": 0.0827781368270154, "grad_norm": 0.30078125, "learning_rate": 0.0011541292834653513, "loss": 0.1852, "step": 46686 }, { "epoch": 0.08278168299232522, "grad_norm": 0.734375, "learning_rate": 0.0011540698724434033, "loss": 0.1919, "step": 46688 }, { "epoch": 0.08278522915763503, "grad_norm": 1.0078125, "learning_rate": 0.0011540104611849854, "loss": 0.267, "step": 46690 }, { "epoch": 0.08278877532294485, "grad_norm": 0.640625, "learning_rate": 0.001153951049690357, "loss": 0.1576, "step": 46692 }, { "epoch": 0.08279232148825466, "grad_norm": 0.82421875, "learning_rate": 0.001153891637959778, "loss": 0.1985, "step": 46694 }, { "epoch": 0.08279586765356448, "grad_norm": 0.380859375, "learning_rate": 0.0011538322259935087, "loss": 0.1922, "step": 46696 }, { "epoch": 0.08279941381887429, "grad_norm": 0.392578125, "learning_rate": 0.0011537728137918082, "loss": 0.2057, "step": 46698 }, { "epoch": 0.0828029599841841, "grad_norm": 0.34375, "learning_rate": 0.0011537134013549365, "loss": 0.2121, "step": 46700 }, { "epoch": 0.08280650614949392, "grad_norm": 0.2001953125, "learning_rate": 0.0011536539886831542, "loss": 0.1698, "step": 46702 }, { "epoch": 0.08281005231480373, "grad_norm": 0.294921875, "learning_rate": 0.0011535945757767197, "loss": 0.1145, "step": 46704 }, { "epoch": 0.08281359848011355, "grad_norm": 0.6328125, "learning_rate": 0.0011535351626358941, "loss": 0.1426, "step": 46706 }, { "epoch": 0.08281714464542336, "grad_norm": 0.4453125, "learning_rate": 0.0011534757492609364, "loss": 0.1651, "step": 46708 }, { "epoch": 0.08282069081073318, "grad_norm": 0.369140625, "learning_rate": 0.0011534163356521074, "loss": 0.2039, "step": 46710 }, { "epoch": 0.08282423697604299, "grad_norm": 0.7265625, "learning_rate": 0.001153356921809666, "loss": 0.2012, "step": 46712 }, { "epoch": 0.0828277831413528, "grad_norm": 1.015625, "learning_rate": 0.0011532975077338725, "loss": 0.2516, "step": 46714 }, { "epoch": 0.08283132930666262, "grad_norm": 0.185546875, "learning_rate": 0.0011532380934249867, "loss": 0.1743, "step": 46716 }, { "epoch": 0.08283487547197244, "grad_norm": 0.46484375, "learning_rate": 0.0011531786788832685, "loss": 0.2352, "step": 46718 }, { "epoch": 0.08283842163728225, "grad_norm": 0.2470703125, "learning_rate": 0.0011531192641089775, "loss": 0.4596, "step": 46720 }, { "epoch": 0.08284196780259206, "grad_norm": 0.1689453125, "learning_rate": 0.001153059849102374, "loss": 0.1866, "step": 46722 }, { "epoch": 0.08284551396790188, "grad_norm": 0.3203125, "learning_rate": 0.001153000433863717, "loss": 0.1403, "step": 46724 }, { "epoch": 0.0828490601332117, "grad_norm": 0.2236328125, "learning_rate": 0.0011529410183932674, "loss": 0.1445, "step": 46726 }, { "epoch": 0.08285260629852151, "grad_norm": 0.43359375, "learning_rate": 0.0011528816026912844, "loss": 0.1296, "step": 46728 }, { "epoch": 0.08285615246383134, "grad_norm": 0.53515625, "learning_rate": 0.0011528221867580278, "loss": 0.1833, "step": 46730 }, { "epoch": 0.08285969862914115, "grad_norm": 0.51171875, "learning_rate": 0.0011527627705937575, "loss": 0.1656, "step": 46732 }, { "epoch": 0.08286324479445097, "grad_norm": 0.349609375, "learning_rate": 0.0011527033541987339, "loss": 0.1693, "step": 46734 }, { "epoch": 0.08286679095976078, "grad_norm": 0.671875, "learning_rate": 0.0011526439375732162, "loss": 0.1707, "step": 46736 }, { "epoch": 0.0828703371250706, "grad_norm": 0.6875, "learning_rate": 0.0011525845207174647, "loss": 0.1859, "step": 46738 }, { "epoch": 0.08287388329038041, "grad_norm": 0.5078125, "learning_rate": 0.001152525103631739, "loss": 0.1972, "step": 46740 }, { "epoch": 0.08287742945569022, "grad_norm": 0.7578125, "learning_rate": 0.001152465686316299, "loss": 0.2076, "step": 46742 }, { "epoch": 0.08288097562100004, "grad_norm": 0.419921875, "learning_rate": 0.0011524062687714044, "loss": 0.1811, "step": 46744 }, { "epoch": 0.08288452178630985, "grad_norm": 3.25, "learning_rate": 0.0011523468509973152, "loss": 0.1708, "step": 46746 }, { "epoch": 0.08288806795161967, "grad_norm": 0.474609375, "learning_rate": 0.0011522874329942913, "loss": 0.1609, "step": 46748 }, { "epoch": 0.08289161411692948, "grad_norm": 0.46484375, "learning_rate": 0.0011522280147625926, "loss": 0.1662, "step": 46750 }, { "epoch": 0.0828951602822393, "grad_norm": 0.5625, "learning_rate": 0.001152168596302479, "loss": 0.1595, "step": 46752 }, { "epoch": 0.08289870644754911, "grad_norm": 0.326171875, "learning_rate": 0.0011521091776142104, "loss": 0.2205, "step": 46754 }, { "epoch": 0.08290225261285893, "grad_norm": 0.494140625, "learning_rate": 0.001152049758698046, "loss": 0.3778, "step": 46756 }, { "epoch": 0.08290579877816874, "grad_norm": 0.23046875, "learning_rate": 0.0011519903395542468, "loss": 0.1819, "step": 46758 }, { "epoch": 0.08290934494347856, "grad_norm": 0.98046875, "learning_rate": 0.001151930920183072, "loss": 0.2159, "step": 46760 }, { "epoch": 0.08291289110878837, "grad_norm": 0.1962890625, "learning_rate": 0.0011518715005847812, "loss": 0.2106, "step": 46762 }, { "epoch": 0.08291643727409818, "grad_norm": 0.19140625, "learning_rate": 0.0011518120807596345, "loss": 0.1801, "step": 46764 }, { "epoch": 0.082919983439408, "grad_norm": 0.3515625, "learning_rate": 0.0011517526607078919, "loss": 0.1308, "step": 46766 }, { "epoch": 0.08292352960471781, "grad_norm": 1.15625, "learning_rate": 0.0011516932404298135, "loss": 0.2411, "step": 46768 }, { "epoch": 0.08292707577002763, "grad_norm": 0.322265625, "learning_rate": 0.0011516338199256588, "loss": 0.2156, "step": 46770 }, { "epoch": 0.08293062193533744, "grad_norm": 1.40625, "learning_rate": 0.0011515743991956878, "loss": 0.2159, "step": 46772 }, { "epoch": 0.08293416810064726, "grad_norm": 0.255859375, "learning_rate": 0.00115151497824016, "loss": 0.1754, "step": 46774 }, { "epoch": 0.08293771426595709, "grad_norm": 0.18359375, "learning_rate": 0.001151455557059336, "loss": 0.1097, "step": 46776 }, { "epoch": 0.0829412604312669, "grad_norm": 0.271484375, "learning_rate": 0.001151396135653475, "loss": 0.1755, "step": 46778 }, { "epoch": 0.08294480659657671, "grad_norm": 0.189453125, "learning_rate": 0.0011513367140228374, "loss": 0.1387, "step": 46780 }, { "epoch": 0.08294835276188653, "grad_norm": 0.6484375, "learning_rate": 0.0011512772921676827, "loss": 0.1675, "step": 46782 }, { "epoch": 0.08295189892719634, "grad_norm": 1.6171875, "learning_rate": 0.0011512178700882711, "loss": 0.2531, "step": 46784 }, { "epoch": 0.08295544509250616, "grad_norm": 2.75, "learning_rate": 0.001151158447784862, "loss": 0.1921, "step": 46786 }, { "epoch": 0.08295899125781597, "grad_norm": 0.365234375, "learning_rate": 0.0011510990252577159, "loss": 0.3618, "step": 46788 }, { "epoch": 0.08296253742312579, "grad_norm": 0.365234375, "learning_rate": 0.001151039602507092, "loss": 0.2222, "step": 46790 }, { "epoch": 0.0829660835884356, "grad_norm": 0.263671875, "learning_rate": 0.0011509801795332507, "loss": 0.1629, "step": 46792 }, { "epoch": 0.08296962975374542, "grad_norm": 0.71484375, "learning_rate": 0.0011509207563364517, "loss": 0.202, "step": 46794 }, { "epoch": 0.08297317591905523, "grad_norm": 0.2197265625, "learning_rate": 0.0011508613329169548, "loss": 0.1477, "step": 46796 }, { "epoch": 0.08297672208436505, "grad_norm": 0.306640625, "learning_rate": 0.00115080190927502, "loss": 0.2414, "step": 46798 }, { "epoch": 0.08298026824967486, "grad_norm": 1.0625, "learning_rate": 0.0011507424854109072, "loss": 0.3612, "step": 46800 }, { "epoch": 0.08298381441498467, "grad_norm": 1.2734375, "learning_rate": 0.0011506830613248762, "loss": 0.1877, "step": 46802 }, { "epoch": 0.08298736058029449, "grad_norm": 0.189453125, "learning_rate": 0.0011506236370171872, "loss": 0.167, "step": 46804 }, { "epoch": 0.0829909067456043, "grad_norm": 0.29296875, "learning_rate": 0.0011505642124880994, "loss": 0.1948, "step": 46806 }, { "epoch": 0.08299445291091412, "grad_norm": 0.470703125, "learning_rate": 0.0011505047877378733, "loss": 0.1667, "step": 46808 }, { "epoch": 0.08299799907622393, "grad_norm": 1.15625, "learning_rate": 0.0011504453627667684, "loss": 0.1657, "step": 46810 }, { "epoch": 0.08300154524153375, "grad_norm": 0.3203125, "learning_rate": 0.0011503859375750452, "loss": 0.1774, "step": 46812 }, { "epoch": 0.08300509140684356, "grad_norm": 0.2470703125, "learning_rate": 0.001150326512162963, "loss": 0.1759, "step": 46814 }, { "epoch": 0.08300863757215338, "grad_norm": 0.298828125, "learning_rate": 0.0011502670865307818, "loss": 0.2188, "step": 46816 }, { "epoch": 0.08301218373746319, "grad_norm": 1.3046875, "learning_rate": 0.0011502076606787616, "loss": 0.3454, "step": 46818 }, { "epoch": 0.08301572990277302, "grad_norm": 1.109375, "learning_rate": 0.0011501482346071622, "loss": 0.2259, "step": 46820 }, { "epoch": 0.08301927606808283, "grad_norm": 0.2099609375, "learning_rate": 0.0011500888083162434, "loss": 0.1385, "step": 46822 }, { "epoch": 0.08302282223339265, "grad_norm": 0.447265625, "learning_rate": 0.0011500293818062658, "loss": 0.1738, "step": 46824 }, { "epoch": 0.08302636839870246, "grad_norm": 0.248046875, "learning_rate": 0.001149969955077488, "loss": 0.1411, "step": 46826 }, { "epoch": 0.08302991456401228, "grad_norm": 0.294921875, "learning_rate": 0.0011499105281301712, "loss": 0.2204, "step": 46828 }, { "epoch": 0.08303346072932209, "grad_norm": 0.3203125, "learning_rate": 0.0011498511009645745, "loss": 0.2022, "step": 46830 }, { "epoch": 0.0830370068946319, "grad_norm": 0.58984375, "learning_rate": 0.0011497916735809582, "loss": 0.1641, "step": 46832 }, { "epoch": 0.08304055305994172, "grad_norm": 0.2734375, "learning_rate": 0.001149732245979582, "loss": 0.3196, "step": 46834 }, { "epoch": 0.08304409922525154, "grad_norm": 0.60546875, "learning_rate": 0.0011496728181607054, "loss": 0.2341, "step": 46836 }, { "epoch": 0.08304764539056135, "grad_norm": 0.4921875, "learning_rate": 0.0011496133901245894, "loss": 0.1461, "step": 46838 }, { "epoch": 0.08305119155587116, "grad_norm": 0.416015625, "learning_rate": 0.0011495539618714928, "loss": 0.1676, "step": 46840 }, { "epoch": 0.08305473772118098, "grad_norm": 0.224609375, "learning_rate": 0.0011494945334016764, "loss": 0.1586, "step": 46842 }, { "epoch": 0.0830582838864908, "grad_norm": 0.263671875, "learning_rate": 0.001149435104715399, "loss": 0.2638, "step": 46844 }, { "epoch": 0.08306183005180061, "grad_norm": 0.1845703125, "learning_rate": 0.001149375675812922, "loss": 0.1735, "step": 46846 }, { "epoch": 0.08306537621711042, "grad_norm": 0.515625, "learning_rate": 0.0011493162466945036, "loss": 0.1917, "step": 46848 }, { "epoch": 0.08306892238242024, "grad_norm": 0.34765625, "learning_rate": 0.0011492568173604055, "loss": 0.1345, "step": 46850 }, { "epoch": 0.08307246854773005, "grad_norm": 0.279296875, "learning_rate": 0.001149197387810886, "loss": 0.167, "step": 46852 }, { "epoch": 0.08307601471303987, "grad_norm": 0.255859375, "learning_rate": 0.0011491379580462063, "loss": 0.1848, "step": 46854 }, { "epoch": 0.08307956087834968, "grad_norm": 0.1796875, "learning_rate": 0.0011490785280666251, "loss": 0.1497, "step": 46856 }, { "epoch": 0.0830831070436595, "grad_norm": 0.376953125, "learning_rate": 0.0011490190978724034, "loss": 0.1654, "step": 46858 }, { "epoch": 0.08308665320896931, "grad_norm": 1.4375, "learning_rate": 0.0011489596674638001, "loss": 0.2383, "step": 46860 }, { "epoch": 0.08309019937427913, "grad_norm": 0.70703125, "learning_rate": 0.0011489002368410758, "loss": 0.2154, "step": 46862 }, { "epoch": 0.08309374553958894, "grad_norm": 0.28515625, "learning_rate": 0.0011488408060044903, "loss": 0.1859, "step": 46864 }, { "epoch": 0.08309729170489877, "grad_norm": 0.388671875, "learning_rate": 0.001148781374954304, "loss": 0.1382, "step": 46866 }, { "epoch": 0.08310083787020858, "grad_norm": 0.85546875, "learning_rate": 0.0011487219436907756, "loss": 0.2145, "step": 46868 }, { "epoch": 0.0831043840355184, "grad_norm": 0.2890625, "learning_rate": 0.0011486625122141662, "loss": 0.1647, "step": 46870 }, { "epoch": 0.08310793020082821, "grad_norm": 0.248046875, "learning_rate": 0.001148603080524735, "loss": 0.1928, "step": 46872 }, { "epoch": 0.08311147636613803, "grad_norm": 0.76953125, "learning_rate": 0.0011485436486227425, "loss": 0.1758, "step": 46874 }, { "epoch": 0.08311502253144784, "grad_norm": 0.578125, "learning_rate": 0.0011484842165084479, "loss": 0.1979, "step": 46876 }, { "epoch": 0.08311856869675766, "grad_norm": 0.51171875, "learning_rate": 0.0011484247841821117, "loss": 0.1655, "step": 46878 }, { "epoch": 0.08312211486206747, "grad_norm": 0.78125, "learning_rate": 0.0011483653516439934, "loss": 0.1701, "step": 46880 }, { "epoch": 0.08312566102737728, "grad_norm": 0.123046875, "learning_rate": 0.0011483059188943535, "loss": 0.1686, "step": 46882 }, { "epoch": 0.0831292071926871, "grad_norm": 1.2578125, "learning_rate": 0.0011482464859334511, "loss": 0.4198, "step": 46884 }, { "epoch": 0.08313275335799691, "grad_norm": 0.5, "learning_rate": 0.001148187052761547, "loss": 0.1664, "step": 46886 }, { "epoch": 0.08313629952330673, "grad_norm": 0.71875, "learning_rate": 0.0011481276193789005, "loss": 0.2095, "step": 46888 }, { "epoch": 0.08313984568861654, "grad_norm": 0.2138671875, "learning_rate": 0.0011480681857857717, "loss": 0.1636, "step": 46890 }, { "epoch": 0.08314339185392636, "grad_norm": 0.3828125, "learning_rate": 0.0011480087519824206, "loss": 0.2067, "step": 46892 }, { "epoch": 0.08314693801923617, "grad_norm": 0.1689453125, "learning_rate": 0.0011479493179691073, "loss": 0.1518, "step": 46894 }, { "epoch": 0.08315048418454599, "grad_norm": 0.81640625, "learning_rate": 0.0011478898837460913, "loss": 0.1792, "step": 46896 }, { "epoch": 0.0831540303498558, "grad_norm": 0.236328125, "learning_rate": 0.001147830449313633, "loss": 0.1863, "step": 46898 }, { "epoch": 0.08315757651516562, "grad_norm": 0.9765625, "learning_rate": 0.001147771014671992, "loss": 0.2412, "step": 46900 }, { "epoch": 0.08316112268047543, "grad_norm": 0.4296875, "learning_rate": 0.0011477115798214283, "loss": 0.1734, "step": 46902 }, { "epoch": 0.08316466884578524, "grad_norm": 0.2109375, "learning_rate": 0.0011476521447622019, "loss": 0.1491, "step": 46904 }, { "epoch": 0.08316821501109506, "grad_norm": 0.6328125, "learning_rate": 0.001147592709494573, "loss": 0.1773, "step": 46906 }, { "epoch": 0.08317176117640487, "grad_norm": 0.48828125, "learning_rate": 0.0011475332740188008, "loss": 0.153, "step": 46908 }, { "epoch": 0.08317530734171469, "grad_norm": 5.21875, "learning_rate": 0.0011474738383351456, "loss": 0.293, "step": 46910 }, { "epoch": 0.08317885350702452, "grad_norm": 0.26171875, "learning_rate": 0.0011474144024438678, "loss": 0.2243, "step": 46912 }, { "epoch": 0.08318239967233433, "grad_norm": 0.2216796875, "learning_rate": 0.0011473549663452264, "loss": 0.2233, "step": 46914 }, { "epoch": 0.08318594583764415, "grad_norm": 0.4296875, "learning_rate": 0.0011472955300394826, "loss": 0.1891, "step": 46916 }, { "epoch": 0.08318949200295396, "grad_norm": 1.1328125, "learning_rate": 0.001147236093526895, "loss": 0.2078, "step": 46918 }, { "epoch": 0.08319303816826377, "grad_norm": 0.380859375, "learning_rate": 0.0011471766568077245, "loss": 0.1704, "step": 46920 }, { "epoch": 0.08319658433357359, "grad_norm": 0.37890625, "learning_rate": 0.0011471172198822305, "loss": 0.1771, "step": 46922 }, { "epoch": 0.0832001304988834, "grad_norm": 0.466796875, "learning_rate": 0.0011470577827506732, "loss": 0.2139, "step": 46924 }, { "epoch": 0.08320367666419322, "grad_norm": 2.25, "learning_rate": 0.0011469983454133127, "loss": 0.4479, "step": 46926 }, { "epoch": 0.08320722282950303, "grad_norm": 0.43359375, "learning_rate": 0.0011469389078704087, "loss": 0.1639, "step": 46928 }, { "epoch": 0.08321076899481285, "grad_norm": 0.45703125, "learning_rate": 0.0011468794701222208, "loss": 0.1323, "step": 46930 }, { "epoch": 0.08321431516012266, "grad_norm": 0.419921875, "learning_rate": 0.0011468200321690099, "loss": 0.1603, "step": 46932 }, { "epoch": 0.08321786132543248, "grad_norm": 0.53125, "learning_rate": 0.001146760594011035, "loss": 0.1758, "step": 46934 }, { "epoch": 0.08322140749074229, "grad_norm": 2.34375, "learning_rate": 0.0011467011556485565, "loss": 0.3164, "step": 46936 }, { "epoch": 0.0832249536560521, "grad_norm": 0.55859375, "learning_rate": 0.0011466417170818341, "loss": 0.1672, "step": 46938 }, { "epoch": 0.08322849982136192, "grad_norm": 1.0625, "learning_rate": 0.0011465822783111282, "loss": 0.1547, "step": 46940 }, { "epoch": 0.08323204598667173, "grad_norm": 0.73046875, "learning_rate": 0.0011465228393366985, "loss": 0.2106, "step": 46942 }, { "epoch": 0.08323559215198155, "grad_norm": 0.1640625, "learning_rate": 0.0011464634001588049, "loss": 0.1731, "step": 46944 }, { "epoch": 0.08323913831729136, "grad_norm": 0.2734375, "learning_rate": 0.001146403960777707, "loss": 0.1577, "step": 46946 }, { "epoch": 0.08324268448260118, "grad_norm": 0.58984375, "learning_rate": 0.0011463445211936658, "loss": 0.1765, "step": 46948 }, { "epoch": 0.083246230647911, "grad_norm": 1.96875, "learning_rate": 0.00114628508140694, "loss": 0.154, "step": 46950 }, { "epoch": 0.08324977681322081, "grad_norm": 1.7109375, "learning_rate": 0.0011462256414177905, "loss": 0.284, "step": 46952 }, { "epoch": 0.08325332297853062, "grad_norm": 0.9765625, "learning_rate": 0.0011461662012264763, "loss": 0.1683, "step": 46954 }, { "epoch": 0.08325686914384045, "grad_norm": 0.5390625, "learning_rate": 0.0011461067608332584, "loss": 0.1621, "step": 46956 }, { "epoch": 0.08326041530915027, "grad_norm": 0.29296875, "learning_rate": 0.0011460473202383962, "loss": 0.1862, "step": 46958 }, { "epoch": 0.08326396147446008, "grad_norm": 0.4921875, "learning_rate": 0.0011459878794421502, "loss": 0.1349, "step": 46960 }, { "epoch": 0.0832675076397699, "grad_norm": 0.408203125, "learning_rate": 0.0011459284384447797, "loss": 0.1641, "step": 46962 }, { "epoch": 0.08327105380507971, "grad_norm": 0.349609375, "learning_rate": 0.0011458689972465449, "loss": 0.1654, "step": 46964 }, { "epoch": 0.08327459997038952, "grad_norm": 0.494140625, "learning_rate": 0.0011458095558477056, "loss": 0.1823, "step": 46966 }, { "epoch": 0.08327814613569934, "grad_norm": 0.3359375, "learning_rate": 0.0011457501142485217, "loss": 0.1676, "step": 46968 }, { "epoch": 0.08328169230100915, "grad_norm": 2.296875, "learning_rate": 0.0011456906724492536, "loss": 0.3335, "step": 46970 }, { "epoch": 0.08328523846631897, "grad_norm": 0.22265625, "learning_rate": 0.0011456312304501612, "loss": 0.1636, "step": 46972 }, { "epoch": 0.08328878463162878, "grad_norm": 0.33984375, "learning_rate": 0.001145571788251504, "loss": 0.1895, "step": 46974 }, { "epoch": 0.0832923307969386, "grad_norm": 0.234375, "learning_rate": 0.0011455123458535424, "loss": 0.199, "step": 46976 }, { "epoch": 0.08329587696224841, "grad_norm": 1.984375, "learning_rate": 0.0011454529032565361, "loss": 0.2023, "step": 46978 }, { "epoch": 0.08329942312755823, "grad_norm": 0.3125, "learning_rate": 0.0011453934604607457, "loss": 0.1863, "step": 46980 }, { "epoch": 0.08330296929286804, "grad_norm": 0.265625, "learning_rate": 0.0011453340174664305, "loss": 0.1568, "step": 46982 }, { "epoch": 0.08330651545817785, "grad_norm": 0.4765625, "learning_rate": 0.0011452745742738504, "loss": 0.3106, "step": 46984 }, { "epoch": 0.08331006162348767, "grad_norm": 0.259765625, "learning_rate": 0.0011452151308832654, "loss": 0.1416, "step": 46986 }, { "epoch": 0.08331360778879748, "grad_norm": 0.28515625, "learning_rate": 0.0011451556872949362, "loss": 0.1719, "step": 46988 }, { "epoch": 0.0833171539541073, "grad_norm": 0.3046875, "learning_rate": 0.001145096243509122, "loss": 0.1675, "step": 46990 }, { "epoch": 0.08332070011941711, "grad_norm": 0.458984375, "learning_rate": 0.001145036799526083, "loss": 0.1905, "step": 46992 }, { "epoch": 0.08332424628472693, "grad_norm": 0.8125, "learning_rate": 0.001144977355346079, "loss": 0.1821, "step": 46994 }, { "epoch": 0.08332779245003674, "grad_norm": 0.7578125, "learning_rate": 0.0011449179109693705, "loss": 0.2909, "step": 46996 }, { "epoch": 0.08333133861534656, "grad_norm": 11.375, "learning_rate": 0.001144858466396217, "loss": 0.1833, "step": 46998 }, { "epoch": 0.08333488478065637, "grad_norm": 0.50390625, "learning_rate": 0.0011447990216268788, "loss": 0.2462, "step": 47000 }, { "epoch": 0.0833384309459662, "grad_norm": 0.259765625, "learning_rate": 0.0011447395766616157, "loss": 0.2116, "step": 47002 }, { "epoch": 0.08334197711127601, "grad_norm": 0.2275390625, "learning_rate": 0.0011446801315006876, "loss": 0.1744, "step": 47004 }, { "epoch": 0.08334552327658583, "grad_norm": 0.263671875, "learning_rate": 0.0011446206861443545, "loss": 0.2364, "step": 47006 }, { "epoch": 0.08334906944189564, "grad_norm": 0.451171875, "learning_rate": 0.0011445612405928765, "loss": 0.1838, "step": 47008 }, { "epoch": 0.08335261560720546, "grad_norm": 0.70703125, "learning_rate": 0.0011445017948465135, "loss": 0.2362, "step": 47010 }, { "epoch": 0.08335616177251527, "grad_norm": 1.1484375, "learning_rate": 0.0011444423489055254, "loss": 0.2742, "step": 47012 }, { "epoch": 0.08335970793782509, "grad_norm": 0.36328125, "learning_rate": 0.0011443829027701724, "loss": 0.1602, "step": 47014 }, { "epoch": 0.0833632541031349, "grad_norm": 1.59375, "learning_rate": 0.0011443234564407143, "loss": 0.2128, "step": 47016 }, { "epoch": 0.08336680026844472, "grad_norm": 0.375, "learning_rate": 0.0011442640099174112, "loss": 0.1985, "step": 47018 }, { "epoch": 0.08337034643375453, "grad_norm": 0.244140625, "learning_rate": 0.0011442045632005231, "loss": 0.1621, "step": 47020 }, { "epoch": 0.08337389259906434, "grad_norm": 0.62890625, "learning_rate": 0.00114414511629031, "loss": 0.2702, "step": 47022 }, { "epoch": 0.08337743876437416, "grad_norm": 0.5234375, "learning_rate": 0.0011440856691870313, "loss": 0.1562, "step": 47024 }, { "epoch": 0.08338098492968397, "grad_norm": 0.7421875, "learning_rate": 0.001144026221890948, "loss": 0.1741, "step": 47026 }, { "epoch": 0.08338453109499379, "grad_norm": 0.48828125, "learning_rate": 0.0011439667744023193, "loss": 0.2177, "step": 47028 }, { "epoch": 0.0833880772603036, "grad_norm": 0.35546875, "learning_rate": 0.0011439073267214058, "loss": 0.1767, "step": 47030 }, { "epoch": 0.08339162342561342, "grad_norm": 0.55078125, "learning_rate": 0.0011438478788484668, "loss": 0.2738, "step": 47032 }, { "epoch": 0.08339516959092323, "grad_norm": 0.51953125, "learning_rate": 0.0011437884307837628, "loss": 0.382, "step": 47034 }, { "epoch": 0.08339871575623305, "grad_norm": 0.94921875, "learning_rate": 0.0011437289825275533, "loss": 0.1975, "step": 47036 }, { "epoch": 0.08340226192154286, "grad_norm": 0.333984375, "learning_rate": 0.001143669534080099, "loss": 0.1884, "step": 47038 }, { "epoch": 0.08340580808685268, "grad_norm": 0.7578125, "learning_rate": 0.0011436100854416598, "loss": 0.1896, "step": 47040 }, { "epoch": 0.08340935425216249, "grad_norm": 1.0390625, "learning_rate": 0.001143550636612495, "loss": 0.1936, "step": 47042 }, { "epoch": 0.0834129004174723, "grad_norm": 0.44921875, "learning_rate": 0.001143491187592865, "loss": 0.2063, "step": 47044 }, { "epoch": 0.08341644658278212, "grad_norm": 0.30078125, "learning_rate": 0.0011434317383830302, "loss": 0.1445, "step": 47046 }, { "epoch": 0.08341999274809195, "grad_norm": 0.671875, "learning_rate": 0.0011433722889832498, "loss": 0.1596, "step": 47048 }, { "epoch": 0.08342353891340176, "grad_norm": 0.2412109375, "learning_rate": 0.0011433128393937844, "loss": 0.1838, "step": 47050 }, { "epoch": 0.08342708507871158, "grad_norm": 0.46484375, "learning_rate": 0.0011432533896148934, "loss": 0.1716, "step": 47052 }, { "epoch": 0.08343063124402139, "grad_norm": 0.419921875, "learning_rate": 0.0011431939396468376, "loss": 0.1585, "step": 47054 }, { "epoch": 0.0834341774093312, "grad_norm": 0.498046875, "learning_rate": 0.0011431344894898767, "loss": 0.1759, "step": 47056 }, { "epoch": 0.08343772357464102, "grad_norm": 0.318359375, "learning_rate": 0.0011430750391442706, "loss": 0.1813, "step": 47058 }, { "epoch": 0.08344126973995084, "grad_norm": 2.03125, "learning_rate": 0.0011430155886102788, "loss": 0.3487, "step": 47060 }, { "epoch": 0.08344481590526065, "grad_norm": 0.326171875, "learning_rate": 0.0011429561378881621, "loss": 0.2413, "step": 47062 }, { "epoch": 0.08344836207057046, "grad_norm": 0.33203125, "learning_rate": 0.0011428966869781803, "loss": 0.1819, "step": 47064 }, { "epoch": 0.08345190823588028, "grad_norm": 0.353515625, "learning_rate": 0.0011428372358805931, "loss": 0.2055, "step": 47066 }, { "epoch": 0.0834554544011901, "grad_norm": 0.255859375, "learning_rate": 0.001142777784595661, "loss": 0.1657, "step": 47068 }, { "epoch": 0.08345900056649991, "grad_norm": 0.298828125, "learning_rate": 0.0011427183331236437, "loss": 0.1649, "step": 47070 }, { "epoch": 0.08346254673180972, "grad_norm": 0.4609375, "learning_rate": 0.0011426588814648006, "loss": 0.1699, "step": 47072 }, { "epoch": 0.08346609289711954, "grad_norm": 0.51171875, "learning_rate": 0.001142599429619393, "loss": 0.2069, "step": 47074 }, { "epoch": 0.08346963906242935, "grad_norm": 0.314453125, "learning_rate": 0.0011425399775876802, "loss": 0.1649, "step": 47076 }, { "epoch": 0.08347318522773917, "grad_norm": 4.625, "learning_rate": 0.0011424805253699222, "loss": 0.1879, "step": 47078 }, { "epoch": 0.08347673139304898, "grad_norm": 0.19140625, "learning_rate": 0.0011424210729663788, "loss": 0.1847, "step": 47080 }, { "epoch": 0.0834802775583588, "grad_norm": 0.259765625, "learning_rate": 0.0011423616203773104, "loss": 0.2043, "step": 47082 }, { "epoch": 0.08348382372366861, "grad_norm": 0.5078125, "learning_rate": 0.001142302167602977, "loss": 0.1824, "step": 47084 }, { "epoch": 0.08348736988897842, "grad_norm": 0.1728515625, "learning_rate": 0.0011422427146436382, "loss": 0.3641, "step": 47086 }, { "epoch": 0.08349091605428824, "grad_norm": 0.58203125, "learning_rate": 0.0011421832614995546, "loss": 0.2165, "step": 47088 }, { "epoch": 0.08349446221959805, "grad_norm": 0.6328125, "learning_rate": 0.0011421238081709857, "loss": 0.2488, "step": 47090 }, { "epoch": 0.08349800838490788, "grad_norm": 0.7265625, "learning_rate": 0.0011420643546581918, "loss": 0.1965, "step": 47092 }, { "epoch": 0.0835015545502177, "grad_norm": 0.44921875, "learning_rate": 0.0011420049009614331, "loss": 0.2034, "step": 47094 }, { "epoch": 0.08350510071552751, "grad_norm": 0.486328125, "learning_rate": 0.0011419454470809694, "loss": 0.179, "step": 47096 }, { "epoch": 0.08350864688083733, "grad_norm": 1.0859375, "learning_rate": 0.0011418859930170601, "loss": 0.1815, "step": 47098 }, { "epoch": 0.08351219304614714, "grad_norm": 0.7109375, "learning_rate": 0.0011418265387699662, "loss": 0.202, "step": 47100 }, { "epoch": 0.08351573921145695, "grad_norm": 1.328125, "learning_rate": 0.0011417670843399473, "loss": 0.2507, "step": 47102 }, { "epoch": 0.08351928537676677, "grad_norm": 0.60546875, "learning_rate": 0.0011417076297272634, "loss": 0.1769, "step": 47104 }, { "epoch": 0.08352283154207658, "grad_norm": 1.21875, "learning_rate": 0.0011416481749321745, "loss": 0.2889, "step": 47106 }, { "epoch": 0.0835263777073864, "grad_norm": 2.609375, "learning_rate": 0.001141588719954941, "loss": 0.4277, "step": 47108 }, { "epoch": 0.08352992387269621, "grad_norm": 0.3984375, "learning_rate": 0.001141529264795822, "loss": 0.2553, "step": 47110 }, { "epoch": 0.08353347003800603, "grad_norm": 0.275390625, "learning_rate": 0.0011414698094550787, "loss": 0.1978, "step": 47112 }, { "epoch": 0.08353701620331584, "grad_norm": 0.41796875, "learning_rate": 0.0011414103539329702, "loss": 0.1793, "step": 47114 }, { "epoch": 0.08354056236862566, "grad_norm": 0.345703125, "learning_rate": 0.0011413508982297568, "loss": 0.1803, "step": 47116 }, { "epoch": 0.08354410853393547, "grad_norm": 0.224609375, "learning_rate": 0.0011412914423456985, "loss": 0.1674, "step": 47118 }, { "epoch": 0.08354765469924529, "grad_norm": 0.28515625, "learning_rate": 0.001141231986281056, "loss": 0.2495, "step": 47120 }, { "epoch": 0.0835512008645551, "grad_norm": 0.216796875, "learning_rate": 0.0011411725300360881, "loss": 0.1584, "step": 47122 }, { "epoch": 0.08355474702986491, "grad_norm": 0.30859375, "learning_rate": 0.0011411130736110559, "loss": 0.203, "step": 47124 }, { "epoch": 0.08355829319517473, "grad_norm": 0.54296875, "learning_rate": 0.0011410536170062186, "loss": 0.1971, "step": 47126 }, { "epoch": 0.08356183936048454, "grad_norm": 0.515625, "learning_rate": 0.0011409941602218373, "loss": 0.1683, "step": 47128 }, { "epoch": 0.08356538552579436, "grad_norm": 0.2470703125, "learning_rate": 0.0011409347032581708, "loss": 0.2049, "step": 47130 }, { "epoch": 0.08356893169110417, "grad_norm": 1.3046875, "learning_rate": 0.00114087524611548, "loss": 0.1605, "step": 47132 }, { "epoch": 0.08357247785641399, "grad_norm": 0.25390625, "learning_rate": 0.0011408157887940243, "loss": 0.1674, "step": 47134 }, { "epoch": 0.0835760240217238, "grad_norm": 0.279296875, "learning_rate": 0.0011407563312940644, "loss": 0.1472, "step": 47136 }, { "epoch": 0.08357957018703363, "grad_norm": 0.1640625, "learning_rate": 0.0011406968736158596, "loss": 0.1787, "step": 47138 }, { "epoch": 0.08358311635234345, "grad_norm": 0.251953125, "learning_rate": 0.0011406374157596703, "loss": 0.1926, "step": 47140 }, { "epoch": 0.08358666251765326, "grad_norm": 0.875, "learning_rate": 0.0011405779577257568, "loss": 0.1992, "step": 47142 }, { "epoch": 0.08359020868296307, "grad_norm": 1.6015625, "learning_rate": 0.001140518499514379, "loss": 0.21, "step": 47144 }, { "epoch": 0.08359375484827289, "grad_norm": 1.71875, "learning_rate": 0.0011404590411257965, "loss": 0.2927, "step": 47146 }, { "epoch": 0.0835973010135827, "grad_norm": 0.57421875, "learning_rate": 0.0011403995825602698, "loss": 0.1544, "step": 47148 }, { "epoch": 0.08360084717889252, "grad_norm": 0.35546875, "learning_rate": 0.0011403401238180588, "loss": 0.2121, "step": 47150 }, { "epoch": 0.08360439334420233, "grad_norm": 1.796875, "learning_rate": 0.0011402806648994237, "loss": 0.4749, "step": 47152 }, { "epoch": 0.08360793950951215, "grad_norm": 0.50390625, "learning_rate": 0.001140221205804624, "loss": 0.2003, "step": 47154 }, { "epoch": 0.08361148567482196, "grad_norm": 0.1962890625, "learning_rate": 0.0011401617465339204, "loss": 0.1396, "step": 47156 }, { "epoch": 0.08361503184013178, "grad_norm": 0.171875, "learning_rate": 0.0011401022870875723, "loss": 0.1869, "step": 47158 }, { "epoch": 0.08361857800544159, "grad_norm": 0.224609375, "learning_rate": 0.0011400428274658406, "loss": 0.1404, "step": 47160 }, { "epoch": 0.0836221241707514, "grad_norm": 0.43359375, "learning_rate": 0.0011399833676689845, "loss": 0.2161, "step": 47162 }, { "epoch": 0.08362567033606122, "grad_norm": 0.3046875, "learning_rate": 0.0011399239076972643, "loss": 0.2661, "step": 47164 }, { "epoch": 0.08362921650137103, "grad_norm": 0.33203125, "learning_rate": 0.00113986444755094, "loss": 0.1694, "step": 47166 }, { "epoch": 0.08363276266668085, "grad_norm": 0.69921875, "learning_rate": 0.0011398049872302723, "loss": 0.1737, "step": 47168 }, { "epoch": 0.08363630883199066, "grad_norm": 0.734375, "learning_rate": 0.0011397455267355204, "loss": 0.2423, "step": 47170 }, { "epoch": 0.08363985499730048, "grad_norm": 0.2470703125, "learning_rate": 0.0011396860660669447, "loss": 0.1428, "step": 47172 }, { "epoch": 0.08364340116261029, "grad_norm": 0.408203125, "learning_rate": 0.0011396266052248053, "loss": 0.1872, "step": 47174 }, { "epoch": 0.08364694732792011, "grad_norm": 0.69921875, "learning_rate": 0.001139567144209362, "loss": 0.1871, "step": 47176 }, { "epoch": 0.08365049349322992, "grad_norm": 0.392578125, "learning_rate": 0.001139507683020875, "loss": 0.1728, "step": 47178 }, { "epoch": 0.08365403965853974, "grad_norm": 0.53515625, "learning_rate": 0.0011394482216596042, "loss": 0.146, "step": 47180 }, { "epoch": 0.08365758582384955, "grad_norm": 0.3671875, "learning_rate": 0.00113938876012581, "loss": 0.3355, "step": 47182 }, { "epoch": 0.08366113198915938, "grad_norm": 0.248046875, "learning_rate": 0.0011393292984197521, "loss": 0.1605, "step": 47184 }, { "epoch": 0.0836646781544692, "grad_norm": 1.078125, "learning_rate": 0.001139269836541691, "loss": 0.1953, "step": 47186 }, { "epoch": 0.08366822431977901, "grad_norm": 0.3671875, "learning_rate": 0.0011392103744918862, "loss": 0.228, "step": 47188 }, { "epoch": 0.08367177048508882, "grad_norm": 0.16796875, "learning_rate": 0.0011391509122705983, "loss": 0.2064, "step": 47190 }, { "epoch": 0.08367531665039864, "grad_norm": 0.17578125, "learning_rate": 0.0011390914498780865, "loss": 0.1582, "step": 47192 }, { "epoch": 0.08367886281570845, "grad_norm": 0.6484375, "learning_rate": 0.0011390319873146118, "loss": 0.1666, "step": 47194 }, { "epoch": 0.08368240898101827, "grad_norm": 0.2060546875, "learning_rate": 0.0011389725245804336, "loss": 0.1599, "step": 47196 }, { "epoch": 0.08368595514632808, "grad_norm": 1.1640625, "learning_rate": 0.0011389130616758125, "loss": 0.19, "step": 47198 }, { "epoch": 0.0836895013116379, "grad_norm": 0.47265625, "learning_rate": 0.001138853598601008, "loss": 0.1615, "step": 47200 }, { "epoch": 0.08369304747694771, "grad_norm": 2.359375, "learning_rate": 0.0011387941353562805, "loss": 0.3523, "step": 47202 }, { "epoch": 0.08369659364225752, "grad_norm": 0.54296875, "learning_rate": 0.0011387346719418901, "loss": 0.1309, "step": 47204 }, { "epoch": 0.08370013980756734, "grad_norm": 0.51171875, "learning_rate": 0.0011386752083580969, "loss": 0.1456, "step": 47206 }, { "epoch": 0.08370368597287715, "grad_norm": 0.30859375, "learning_rate": 0.0011386157446051607, "loss": 0.1315, "step": 47208 }, { "epoch": 0.08370723213818697, "grad_norm": 0.435546875, "learning_rate": 0.0011385562806833414, "loss": 0.2211, "step": 47210 }, { "epoch": 0.08371077830349678, "grad_norm": 0.322265625, "learning_rate": 0.0011384968165928993, "loss": 0.2256, "step": 47212 }, { "epoch": 0.0837143244688066, "grad_norm": 0.3984375, "learning_rate": 0.001138437352334095, "loss": 0.1277, "step": 47214 }, { "epoch": 0.08371787063411641, "grad_norm": 0.71484375, "learning_rate": 0.0011383778879071875, "loss": 0.226, "step": 47216 }, { "epoch": 0.08372141679942623, "grad_norm": 0.177734375, "learning_rate": 0.0011383184233124378, "loss": 0.1931, "step": 47218 }, { "epoch": 0.08372496296473604, "grad_norm": 0.447265625, "learning_rate": 0.001138258958550105, "loss": 0.2917, "step": 47220 }, { "epoch": 0.08372850913004586, "grad_norm": 0.5546875, "learning_rate": 0.0011381994936204504, "loss": 0.1549, "step": 47222 }, { "epoch": 0.08373205529535567, "grad_norm": 0.734375, "learning_rate": 0.001138140028523733, "loss": 0.2144, "step": 47224 }, { "epoch": 0.08373560146066548, "grad_norm": 0.419921875, "learning_rate": 0.0011380805632602135, "loss": 0.1652, "step": 47226 }, { "epoch": 0.08373914762597531, "grad_norm": 0.734375, "learning_rate": 0.0011380210978301517, "loss": 0.1829, "step": 47228 }, { "epoch": 0.08374269379128513, "grad_norm": 0.423828125, "learning_rate": 0.0011379616322338077, "loss": 0.2723, "step": 47230 }, { "epoch": 0.08374623995659494, "grad_norm": 0.201171875, "learning_rate": 0.0011379021664714415, "loss": 0.2002, "step": 47232 }, { "epoch": 0.08374978612190476, "grad_norm": 0.43359375, "learning_rate": 0.0011378427005433137, "loss": 0.3498, "step": 47234 }, { "epoch": 0.08375333228721457, "grad_norm": 0.51171875, "learning_rate": 0.001137783234449683, "loss": 0.1736, "step": 47236 }, { "epoch": 0.08375687845252439, "grad_norm": 2.4375, "learning_rate": 0.001137723768190811, "loss": 0.2948, "step": 47238 }, { "epoch": 0.0837604246178342, "grad_norm": 0.392578125, "learning_rate": 0.001137664301766957, "loss": 0.1887, "step": 47240 }, { "epoch": 0.08376397078314402, "grad_norm": 0.345703125, "learning_rate": 0.0011376048351783813, "loss": 0.2287, "step": 47242 }, { "epoch": 0.08376751694845383, "grad_norm": 0.546875, "learning_rate": 0.0011375453684253438, "loss": 0.1488, "step": 47244 }, { "epoch": 0.08377106311376364, "grad_norm": 0.2216796875, "learning_rate": 0.0011374859015081046, "loss": 0.1734, "step": 47246 }, { "epoch": 0.08377460927907346, "grad_norm": 0.62890625, "learning_rate": 0.001137426434426924, "loss": 0.2326, "step": 47248 }, { "epoch": 0.08377815544438327, "grad_norm": 0.5390625, "learning_rate": 0.001137366967182062, "loss": 0.1629, "step": 47250 }, { "epoch": 0.08378170160969309, "grad_norm": 0.400390625, "learning_rate": 0.0011373074997737783, "loss": 0.1746, "step": 47252 }, { "epoch": 0.0837852477750029, "grad_norm": 0.353515625, "learning_rate": 0.0011372480322023332, "loss": 0.155, "step": 47254 }, { "epoch": 0.08378879394031272, "grad_norm": 0.30078125, "learning_rate": 0.001137188564467987, "loss": 0.2478, "step": 47256 }, { "epoch": 0.08379234010562253, "grad_norm": 0.57421875, "learning_rate": 0.0011371290965709999, "loss": 0.355, "step": 47258 }, { "epoch": 0.08379588627093235, "grad_norm": 0.34375, "learning_rate": 0.0011370696285116315, "loss": 0.1931, "step": 47260 }, { "epoch": 0.08379943243624216, "grad_norm": 0.380859375, "learning_rate": 0.0011370101602901422, "loss": 0.1241, "step": 47262 }, { "epoch": 0.08380297860155198, "grad_norm": 0.49609375, "learning_rate": 0.0011369506919067917, "loss": 0.2031, "step": 47264 }, { "epoch": 0.08380652476686179, "grad_norm": 2.609375, "learning_rate": 0.0011368912233618402, "loss": 0.3724, "step": 47266 }, { "epoch": 0.0838100709321716, "grad_norm": 0.234375, "learning_rate": 0.0011368317546555484, "loss": 0.1516, "step": 47268 }, { "epoch": 0.08381361709748142, "grad_norm": 0.3984375, "learning_rate": 0.0011367722857881758, "loss": 0.2942, "step": 47270 }, { "epoch": 0.08381716326279123, "grad_norm": 0.30859375, "learning_rate": 0.0011367128167599826, "loss": 0.315, "step": 47272 }, { "epoch": 0.08382070942810106, "grad_norm": 0.25, "learning_rate": 0.0011366533475712284, "loss": 0.1728, "step": 47274 }, { "epoch": 0.08382425559341088, "grad_norm": 1.828125, "learning_rate": 0.001136593878222174, "loss": 0.2207, "step": 47276 }, { "epoch": 0.08382780175872069, "grad_norm": 0.236328125, "learning_rate": 0.001136534408713079, "loss": 0.157, "step": 47278 }, { "epoch": 0.0838313479240305, "grad_norm": 0.255859375, "learning_rate": 0.0011364749390442043, "loss": 0.1793, "step": 47280 }, { "epoch": 0.08383489408934032, "grad_norm": 0.31640625, "learning_rate": 0.0011364154692158092, "loss": 0.1513, "step": 47282 }, { "epoch": 0.08383844025465013, "grad_norm": 1.0234375, "learning_rate": 0.0011363559992281541, "loss": 0.2184, "step": 47284 }, { "epoch": 0.08384198641995995, "grad_norm": 0.474609375, "learning_rate": 0.0011362965290814986, "loss": 0.2366, "step": 47286 }, { "epoch": 0.08384553258526976, "grad_norm": 0.484375, "learning_rate": 0.0011362370587761036, "loss": 0.1841, "step": 47288 }, { "epoch": 0.08384907875057958, "grad_norm": 0.369140625, "learning_rate": 0.0011361775883122283, "loss": 0.1804, "step": 47290 }, { "epoch": 0.08385262491588939, "grad_norm": 1.140625, "learning_rate": 0.0011361181176901334, "loss": 0.2291, "step": 47292 }, { "epoch": 0.08385617108119921, "grad_norm": 0.44921875, "learning_rate": 0.0011360586469100788, "loss": 0.1713, "step": 47294 }, { "epoch": 0.08385971724650902, "grad_norm": 1.6953125, "learning_rate": 0.0011359991759723248, "loss": 0.2164, "step": 47296 }, { "epoch": 0.08386326341181884, "grad_norm": 0.55859375, "learning_rate": 0.001135939704877131, "loss": 0.1867, "step": 47298 }, { "epoch": 0.08386680957712865, "grad_norm": 0.447265625, "learning_rate": 0.001135880233624758, "loss": 0.1527, "step": 47300 }, { "epoch": 0.08387035574243847, "grad_norm": 0.5390625, "learning_rate": 0.001135820762215466, "loss": 0.1627, "step": 47302 }, { "epoch": 0.08387390190774828, "grad_norm": 0.40625, "learning_rate": 0.0011357612906495145, "loss": 0.1632, "step": 47304 }, { "epoch": 0.0838774480730581, "grad_norm": 0.40625, "learning_rate": 0.0011357018189271637, "loss": 0.3064, "step": 47306 }, { "epoch": 0.08388099423836791, "grad_norm": 0.56640625, "learning_rate": 0.0011356423470486744, "loss": 0.1373, "step": 47308 }, { "epoch": 0.08388454040367772, "grad_norm": 0.2021484375, "learning_rate": 0.0011355828750143056, "loss": 0.2194, "step": 47310 }, { "epoch": 0.08388808656898754, "grad_norm": 1.2265625, "learning_rate": 0.0011355234028243182, "loss": 0.1781, "step": 47312 }, { "epoch": 0.08389163273429735, "grad_norm": 0.265625, "learning_rate": 0.0011354639304789723, "loss": 0.191, "step": 47314 }, { "epoch": 0.08389517889960717, "grad_norm": 0.2431640625, "learning_rate": 0.0011354044579785275, "loss": 0.2903, "step": 47316 }, { "epoch": 0.08389872506491698, "grad_norm": 0.16015625, "learning_rate": 0.0011353449853232446, "loss": 0.1741, "step": 47318 }, { "epoch": 0.08390227123022681, "grad_norm": 0.45703125, "learning_rate": 0.0011352855125133828, "loss": 0.2106, "step": 47320 }, { "epoch": 0.08390581739553662, "grad_norm": 1.4921875, "learning_rate": 0.0011352260395492025, "loss": 0.2006, "step": 47322 }, { "epoch": 0.08390936356084644, "grad_norm": 0.2470703125, "learning_rate": 0.0011351665664309644, "loss": 0.166, "step": 47324 }, { "epoch": 0.08391290972615625, "grad_norm": 0.60546875, "learning_rate": 0.0011351070931589278, "loss": 0.2467, "step": 47326 }, { "epoch": 0.08391645589146607, "grad_norm": 1.109375, "learning_rate": 0.0011350476197333535, "loss": 0.2146, "step": 47328 }, { "epoch": 0.08392000205677588, "grad_norm": 1.078125, "learning_rate": 0.0011349881461545008, "loss": 0.2844, "step": 47330 }, { "epoch": 0.0839235482220857, "grad_norm": 1.046875, "learning_rate": 0.001134928672422631, "loss": 0.2253, "step": 47332 }, { "epoch": 0.08392709438739551, "grad_norm": 0.439453125, "learning_rate": 0.0011348691985380028, "loss": 0.1686, "step": 47334 }, { "epoch": 0.08393064055270533, "grad_norm": 0.27734375, "learning_rate": 0.0011348097245008773, "loss": 0.1633, "step": 47336 }, { "epoch": 0.08393418671801514, "grad_norm": 0.2890625, "learning_rate": 0.0011347502503115143, "loss": 0.1774, "step": 47338 }, { "epoch": 0.08393773288332496, "grad_norm": 0.357421875, "learning_rate": 0.0011346907759701738, "loss": 0.3286, "step": 47340 }, { "epoch": 0.08394127904863477, "grad_norm": 0.40234375, "learning_rate": 0.0011346313014771162, "loss": 0.1623, "step": 47342 }, { "epoch": 0.08394482521394458, "grad_norm": 0.1943359375, "learning_rate": 0.001134571826832601, "loss": 0.2094, "step": 47344 }, { "epoch": 0.0839483713792544, "grad_norm": 0.408203125, "learning_rate": 0.0011345123520368893, "loss": 0.2735, "step": 47346 }, { "epoch": 0.08395191754456421, "grad_norm": 1.0390625, "learning_rate": 0.00113445287709024, "loss": 0.2351, "step": 47348 }, { "epoch": 0.08395546370987403, "grad_norm": 0.275390625, "learning_rate": 0.001134393401992914, "loss": 0.1722, "step": 47350 }, { "epoch": 0.08395900987518384, "grad_norm": 0.2021484375, "learning_rate": 0.0011343339267451712, "loss": 0.1517, "step": 47352 }, { "epoch": 0.08396255604049366, "grad_norm": 0.1875, "learning_rate": 0.001134274451347272, "loss": 0.208, "step": 47354 }, { "epoch": 0.08396610220580347, "grad_norm": 0.26171875, "learning_rate": 0.0011342149757994765, "loss": 0.1675, "step": 47356 }, { "epoch": 0.08396964837111329, "grad_norm": 0.263671875, "learning_rate": 0.0011341555001020442, "loss": 0.1516, "step": 47358 }, { "epoch": 0.0839731945364231, "grad_norm": 0.38671875, "learning_rate": 0.0011340960242552354, "loss": 0.1409, "step": 47360 }, { "epoch": 0.08397674070173292, "grad_norm": 1.359375, "learning_rate": 0.0011340365482593107, "loss": 0.3538, "step": 47362 }, { "epoch": 0.08398028686704274, "grad_norm": 0.494140625, "learning_rate": 0.0011339770721145296, "loss": 0.1572, "step": 47364 }, { "epoch": 0.08398383303235256, "grad_norm": 0.373046875, "learning_rate": 0.001133917595821153, "loss": 0.2331, "step": 47366 }, { "epoch": 0.08398737919766237, "grad_norm": 0.30078125, "learning_rate": 0.00113385811937944, "loss": 0.1749, "step": 47368 }, { "epoch": 0.08399092536297219, "grad_norm": 0.380859375, "learning_rate": 0.001133798642789652, "loss": 0.2114, "step": 47370 }, { "epoch": 0.083994471528282, "grad_norm": 0.546875, "learning_rate": 0.0011337391660520473, "loss": 0.1594, "step": 47372 }, { "epoch": 0.08399801769359182, "grad_norm": 0.74609375, "learning_rate": 0.001133679689166888, "loss": 0.2942, "step": 47374 }, { "epoch": 0.08400156385890163, "grad_norm": 0.16015625, "learning_rate": 0.001133620212134433, "loss": 0.1542, "step": 47376 }, { "epoch": 0.08400511002421145, "grad_norm": 0.427734375, "learning_rate": 0.0011335607349549429, "loss": 0.1671, "step": 47378 }, { "epoch": 0.08400865618952126, "grad_norm": 0.1923828125, "learning_rate": 0.0011335012576286772, "loss": 0.1504, "step": 47380 }, { "epoch": 0.08401220235483108, "grad_norm": 0.85546875, "learning_rate": 0.0011334417801558967, "loss": 0.196, "step": 47382 }, { "epoch": 0.08401574852014089, "grad_norm": 0.287109375, "learning_rate": 0.001133382302536861, "loss": 0.2562, "step": 47384 }, { "epoch": 0.0840192946854507, "grad_norm": 0.66796875, "learning_rate": 0.001133322824771831, "loss": 0.3185, "step": 47386 }, { "epoch": 0.08402284085076052, "grad_norm": 0.59375, "learning_rate": 0.001133263346861066, "loss": 0.2251, "step": 47388 }, { "epoch": 0.08402638701607033, "grad_norm": 0.24609375, "learning_rate": 0.0011332038688048266, "loss": 0.1626, "step": 47390 }, { "epoch": 0.08402993318138015, "grad_norm": 0.71875, "learning_rate": 0.0011331443906033728, "loss": 0.1952, "step": 47392 }, { "epoch": 0.08403347934668996, "grad_norm": 0.45703125, "learning_rate": 0.0011330849122569645, "loss": 0.2148, "step": 47394 }, { "epoch": 0.08403702551199978, "grad_norm": 0.2060546875, "learning_rate": 0.0011330254337658624, "loss": 0.1268, "step": 47396 }, { "epoch": 0.08404057167730959, "grad_norm": 0.22265625, "learning_rate": 0.0011329659551303258, "loss": 0.1779, "step": 47398 }, { "epoch": 0.0840441178426194, "grad_norm": 0.2734375, "learning_rate": 0.0011329064763506151, "loss": 0.1677, "step": 47400 }, { "epoch": 0.08404766400792922, "grad_norm": 0.353515625, "learning_rate": 0.0011328469974269911, "loss": 0.2875, "step": 47402 }, { "epoch": 0.08405121017323904, "grad_norm": 0.54296875, "learning_rate": 0.001132787518359713, "loss": 0.1826, "step": 47404 }, { "epoch": 0.08405475633854885, "grad_norm": 0.84375, "learning_rate": 0.0011327280391490416, "loss": 0.3227, "step": 47406 }, { "epoch": 0.08405830250385866, "grad_norm": 0.349609375, "learning_rate": 0.0011326685597952367, "loss": 0.1722, "step": 47408 }, { "epoch": 0.08406184866916849, "grad_norm": 0.1845703125, "learning_rate": 0.0011326090802985585, "loss": 0.1733, "step": 47410 }, { "epoch": 0.08406539483447831, "grad_norm": 0.466796875, "learning_rate": 0.0011325496006592672, "loss": 0.2023, "step": 47412 }, { "epoch": 0.08406894099978812, "grad_norm": 0.62109375, "learning_rate": 0.0011324901208776231, "loss": 0.175, "step": 47414 }, { "epoch": 0.08407248716509794, "grad_norm": 2.546875, "learning_rate": 0.0011324306409538855, "loss": 0.3191, "step": 47416 }, { "epoch": 0.08407603333040775, "grad_norm": 0.53125, "learning_rate": 0.0011323711608883151, "loss": 0.1916, "step": 47418 }, { "epoch": 0.08407957949571757, "grad_norm": 0.61328125, "learning_rate": 0.0011323116806811727, "loss": 0.2397, "step": 47420 }, { "epoch": 0.08408312566102738, "grad_norm": 1.046875, "learning_rate": 0.001132252200332717, "loss": 0.2381, "step": 47422 }, { "epoch": 0.0840866718263372, "grad_norm": 0.263671875, "learning_rate": 0.0011321927198432095, "loss": 0.2312, "step": 47424 }, { "epoch": 0.08409021799164701, "grad_norm": 0.220703125, "learning_rate": 0.0011321332392129093, "loss": 0.1643, "step": 47426 }, { "epoch": 0.08409376415695682, "grad_norm": 0.1591796875, "learning_rate": 0.0011320737584420775, "loss": 0.1586, "step": 47428 }, { "epoch": 0.08409731032226664, "grad_norm": 0.984375, "learning_rate": 0.0011320142775309734, "loss": 0.1785, "step": 47430 }, { "epoch": 0.08410085648757645, "grad_norm": 0.5703125, "learning_rate": 0.0011319547964798574, "loss": 0.2734, "step": 47432 }, { "epoch": 0.08410440265288627, "grad_norm": 0.296875, "learning_rate": 0.00113189531528899, "loss": 0.2144, "step": 47434 }, { "epoch": 0.08410794881819608, "grad_norm": 0.328125, "learning_rate": 0.0011318358339586308, "loss": 0.2041, "step": 47436 }, { "epoch": 0.0841114949835059, "grad_norm": 0.455078125, "learning_rate": 0.0011317763524890398, "loss": 0.255, "step": 47438 }, { "epoch": 0.08411504114881571, "grad_norm": 0.310546875, "learning_rate": 0.0011317168708804783, "loss": 0.2131, "step": 47440 }, { "epoch": 0.08411858731412553, "grad_norm": 0.57421875, "learning_rate": 0.001131657389133205, "loss": 0.197, "step": 47442 }, { "epoch": 0.08412213347943534, "grad_norm": 0.609375, "learning_rate": 0.0011315979072474807, "loss": 0.2145, "step": 47444 }, { "epoch": 0.08412567964474515, "grad_norm": 0.58984375, "learning_rate": 0.0011315384252235656, "loss": 0.1744, "step": 47446 }, { "epoch": 0.08412922581005497, "grad_norm": 0.287109375, "learning_rate": 0.0011314789430617199, "loss": 0.1546, "step": 47448 }, { "epoch": 0.08413277197536478, "grad_norm": 0.2314453125, "learning_rate": 0.0011314194607622034, "loss": 0.1409, "step": 47450 }, { "epoch": 0.0841363181406746, "grad_norm": 0.2333984375, "learning_rate": 0.0011313599783252767, "loss": 0.1355, "step": 47452 }, { "epoch": 0.08413986430598441, "grad_norm": 0.3359375, "learning_rate": 0.0011313004957511994, "loss": 0.1522, "step": 47454 }, { "epoch": 0.08414341047129424, "grad_norm": 0.5546875, "learning_rate": 0.001131241013040232, "loss": 0.207, "step": 47456 }, { "epoch": 0.08414695663660406, "grad_norm": 0.53515625, "learning_rate": 0.0011311815301926346, "loss": 0.1927, "step": 47458 }, { "epoch": 0.08415050280191387, "grad_norm": 0.72265625, "learning_rate": 0.0011311220472086671, "loss": 0.1751, "step": 47460 }, { "epoch": 0.08415404896722369, "grad_norm": 0.50390625, "learning_rate": 0.0011310625640885898, "loss": 0.222, "step": 47462 }, { "epoch": 0.0841575951325335, "grad_norm": 0.7421875, "learning_rate": 0.0011310030808326633, "loss": 0.2041, "step": 47464 }, { "epoch": 0.08416114129784331, "grad_norm": 0.48046875, "learning_rate": 0.0011309435974411468, "loss": 0.1401, "step": 47466 }, { "epoch": 0.08416468746315313, "grad_norm": 0.267578125, "learning_rate": 0.0011308841139143016, "loss": 0.1493, "step": 47468 }, { "epoch": 0.08416823362846294, "grad_norm": 0.4921875, "learning_rate": 0.0011308246302523868, "loss": 0.1719, "step": 47470 }, { "epoch": 0.08417177979377276, "grad_norm": 0.33984375, "learning_rate": 0.0011307651464556635, "loss": 0.195, "step": 47472 }, { "epoch": 0.08417532595908257, "grad_norm": 1.6953125, "learning_rate": 0.0011307056625243905, "loss": 0.2645, "step": 47474 }, { "epoch": 0.08417887212439239, "grad_norm": 0.33203125, "learning_rate": 0.0011306461784588294, "loss": 0.1943, "step": 47476 }, { "epoch": 0.0841824182897022, "grad_norm": 0.6796875, "learning_rate": 0.0011305866942592393, "loss": 0.2074, "step": 47478 }, { "epoch": 0.08418596445501202, "grad_norm": 0.56640625, "learning_rate": 0.001130527209925881, "loss": 0.1955, "step": 47480 }, { "epoch": 0.08418951062032183, "grad_norm": 0.2373046875, "learning_rate": 0.0011304677254590143, "loss": 0.2771, "step": 47482 }, { "epoch": 0.08419305678563165, "grad_norm": 0.3671875, "learning_rate": 0.0011304082408588995, "loss": 0.1462, "step": 47484 }, { "epoch": 0.08419660295094146, "grad_norm": 0.251953125, "learning_rate": 0.0011303487561257967, "loss": 0.148, "step": 47486 }, { "epoch": 0.08420014911625127, "grad_norm": 0.357421875, "learning_rate": 0.0011302892712599661, "loss": 0.129, "step": 47488 }, { "epoch": 0.08420369528156109, "grad_norm": 0.2255859375, "learning_rate": 0.001130229786261668, "loss": 0.1488, "step": 47490 }, { "epoch": 0.0842072414468709, "grad_norm": 0.478515625, "learning_rate": 0.0011301703011311623, "loss": 0.1726, "step": 47492 }, { "epoch": 0.08421078761218072, "grad_norm": 2.625, "learning_rate": 0.001130110815868709, "loss": 0.2441, "step": 47494 }, { "epoch": 0.08421433377749053, "grad_norm": 0.1923828125, "learning_rate": 0.0011300513304745687, "loss": 0.1564, "step": 47496 }, { "epoch": 0.08421787994280035, "grad_norm": 0.498046875, "learning_rate": 0.0011299918449490013, "loss": 0.1779, "step": 47498 }, { "epoch": 0.08422142610811018, "grad_norm": 0.1943359375, "learning_rate": 0.0011299323592922669, "loss": 0.1417, "step": 47500 }, { "epoch": 0.08422497227341999, "grad_norm": 0.48046875, "learning_rate": 0.0011298728735046257, "loss": 0.1988, "step": 47502 }, { "epoch": 0.0842285184387298, "grad_norm": 0.267578125, "learning_rate": 0.0011298133875863382, "loss": 0.3473, "step": 47504 }, { "epoch": 0.08423206460403962, "grad_norm": 0.486328125, "learning_rate": 0.001129753901537664, "loss": 0.1993, "step": 47506 }, { "epoch": 0.08423561076934943, "grad_norm": 0.1962890625, "learning_rate": 0.0011296944153588637, "loss": 0.2077, "step": 47508 }, { "epoch": 0.08423915693465925, "grad_norm": 0.482421875, "learning_rate": 0.001129634929050197, "loss": 0.1582, "step": 47510 }, { "epoch": 0.08424270309996906, "grad_norm": 0.404296875, "learning_rate": 0.0011295754426119244, "loss": 0.1732, "step": 47512 }, { "epoch": 0.08424624926527888, "grad_norm": 0.4296875, "learning_rate": 0.0011295159560443062, "loss": 0.2534, "step": 47514 }, { "epoch": 0.08424979543058869, "grad_norm": 0.404296875, "learning_rate": 0.001129456469347602, "loss": 0.1587, "step": 47516 }, { "epoch": 0.0842533415958985, "grad_norm": 0.40625, "learning_rate": 0.001129396982522073, "loss": 0.1773, "step": 47518 }, { "epoch": 0.08425688776120832, "grad_norm": 0.59765625, "learning_rate": 0.0011293374955679782, "loss": 0.4567, "step": 47520 }, { "epoch": 0.08426043392651814, "grad_norm": 0.2294921875, "learning_rate": 0.001129278008485578, "loss": 0.1742, "step": 47522 }, { "epoch": 0.08426398009182795, "grad_norm": 0.62890625, "learning_rate": 0.0011292185212751333, "loss": 0.167, "step": 47524 }, { "epoch": 0.08426752625713776, "grad_norm": 0.546875, "learning_rate": 0.0011291590339369037, "loss": 0.1558, "step": 47526 }, { "epoch": 0.08427107242244758, "grad_norm": 0.765625, "learning_rate": 0.0011290995464711489, "loss": 0.3171, "step": 47528 }, { "epoch": 0.0842746185877574, "grad_norm": 0.2451171875, "learning_rate": 0.00112904005887813, "loss": 0.4318, "step": 47530 }, { "epoch": 0.08427816475306721, "grad_norm": 0.6484375, "learning_rate": 0.0011289805711581066, "loss": 0.2325, "step": 47532 }, { "epoch": 0.08428171091837702, "grad_norm": 0.298828125, "learning_rate": 0.0011289210833113393, "loss": 0.124, "step": 47534 }, { "epoch": 0.08428525708368684, "grad_norm": 0.64453125, "learning_rate": 0.0011288615953380877, "loss": 0.1775, "step": 47536 }, { "epoch": 0.08428880324899665, "grad_norm": 0.443359375, "learning_rate": 0.0011288021072386122, "loss": 0.1331, "step": 47538 }, { "epoch": 0.08429234941430647, "grad_norm": 0.953125, "learning_rate": 0.0011287426190131733, "loss": 0.1702, "step": 47540 }, { "epoch": 0.08429589557961628, "grad_norm": 3.078125, "learning_rate": 0.0011286831306620306, "loss": 0.3227, "step": 47542 }, { "epoch": 0.0842994417449261, "grad_norm": 0.37890625, "learning_rate": 0.0011286236421854448, "loss": 0.2147, "step": 47544 }, { "epoch": 0.08430298791023592, "grad_norm": 0.703125, "learning_rate": 0.0011285641535836756, "loss": 0.3055, "step": 47546 }, { "epoch": 0.08430653407554574, "grad_norm": 2.9375, "learning_rate": 0.0011285046648569837, "loss": 0.4606, "step": 47548 }, { "epoch": 0.08431008024085555, "grad_norm": 0.435546875, "learning_rate": 0.0011284451760056287, "loss": 0.1883, "step": 47550 }, { "epoch": 0.08431362640616537, "grad_norm": 0.7890625, "learning_rate": 0.001128385687029871, "loss": 0.1947, "step": 47552 }, { "epoch": 0.08431717257147518, "grad_norm": 0.458984375, "learning_rate": 0.0011283261979299708, "loss": 0.2316, "step": 47554 }, { "epoch": 0.084320718736785, "grad_norm": 0.5390625, "learning_rate": 0.0011282667087061883, "loss": 0.2282, "step": 47556 }, { "epoch": 0.08432426490209481, "grad_norm": 0.41796875, "learning_rate": 0.0011282072193587835, "loss": 0.1739, "step": 47558 }, { "epoch": 0.08432781106740463, "grad_norm": 0.267578125, "learning_rate": 0.0011281477298880167, "loss": 0.2417, "step": 47560 }, { "epoch": 0.08433135723271444, "grad_norm": 1.328125, "learning_rate": 0.0011280882402941483, "loss": 0.2761, "step": 47562 }, { "epoch": 0.08433490339802426, "grad_norm": 0.390625, "learning_rate": 0.0011280287505774384, "loss": 0.2192, "step": 47564 }, { "epoch": 0.08433844956333407, "grad_norm": 0.1787109375, "learning_rate": 0.001127969260738147, "loss": 0.1417, "step": 47566 }, { "epoch": 0.08434199572864388, "grad_norm": 0.353515625, "learning_rate": 0.0011279097707765338, "loss": 0.2784, "step": 47568 }, { "epoch": 0.0843455418939537, "grad_norm": 0.3359375, "learning_rate": 0.0011278502806928601, "loss": 0.1982, "step": 47570 }, { "epoch": 0.08434908805926351, "grad_norm": 0.5703125, "learning_rate": 0.0011277907904873848, "loss": 0.2931, "step": 47572 }, { "epoch": 0.08435263422457333, "grad_norm": 0.54296875, "learning_rate": 0.0011277313001603693, "loss": 0.2067, "step": 47574 }, { "epoch": 0.08435618038988314, "grad_norm": 1.0390625, "learning_rate": 0.0011276718097120726, "loss": 0.4419, "step": 47576 }, { "epoch": 0.08435972655519296, "grad_norm": 1.4609375, "learning_rate": 0.0011276123191427561, "loss": 0.4312, "step": 47578 }, { "epoch": 0.08436327272050277, "grad_norm": 0.50390625, "learning_rate": 0.0011275528284526793, "loss": 0.2411, "step": 47580 }, { "epoch": 0.08436681888581259, "grad_norm": 0.33203125, "learning_rate": 0.001127493337642102, "loss": 0.1721, "step": 47582 }, { "epoch": 0.0843703650511224, "grad_norm": 3.15625, "learning_rate": 0.001127433846711285, "loss": 0.206, "step": 47584 }, { "epoch": 0.08437391121643222, "grad_norm": 0.2734375, "learning_rate": 0.0011273743556604883, "loss": 0.1708, "step": 47586 }, { "epoch": 0.08437745738174203, "grad_norm": 0.49609375, "learning_rate": 0.0011273148644899721, "loss": 0.197, "step": 47588 }, { "epoch": 0.08438100354705184, "grad_norm": 0.67578125, "learning_rate": 0.0011272553731999965, "loss": 0.1901, "step": 47590 }, { "epoch": 0.08438454971236167, "grad_norm": 0.359375, "learning_rate": 0.0011271958817908217, "loss": 0.2443, "step": 47592 }, { "epoch": 0.08438809587767149, "grad_norm": 0.71875, "learning_rate": 0.001127136390262708, "loss": 0.1909, "step": 47594 }, { "epoch": 0.0843916420429813, "grad_norm": 0.51953125, "learning_rate": 0.0011270768986159155, "loss": 0.1771, "step": 47596 }, { "epoch": 0.08439518820829112, "grad_norm": 0.376953125, "learning_rate": 0.0011270174068507045, "loss": 0.1698, "step": 47598 }, { "epoch": 0.08439873437360093, "grad_norm": 0.7734375, "learning_rate": 0.0011269579149673348, "loss": 0.2115, "step": 47600 }, { "epoch": 0.08440228053891075, "grad_norm": 0.57421875, "learning_rate": 0.001126898422966067, "loss": 0.2327, "step": 47602 }, { "epoch": 0.08440582670422056, "grad_norm": 0.2041015625, "learning_rate": 0.001126838930847161, "loss": 0.1976, "step": 47604 }, { "epoch": 0.08440937286953037, "grad_norm": 0.4375, "learning_rate": 0.0011267794386108768, "loss": 0.1582, "step": 47606 }, { "epoch": 0.08441291903484019, "grad_norm": 0.2734375, "learning_rate": 0.0011267199462574756, "loss": 0.2074, "step": 47608 }, { "epoch": 0.08441646520015, "grad_norm": 0.640625, "learning_rate": 0.0011266604537872164, "loss": 0.13, "step": 47610 }, { "epoch": 0.08442001136545982, "grad_norm": 0.47265625, "learning_rate": 0.0011266009612003604, "loss": 0.1528, "step": 47612 }, { "epoch": 0.08442355753076963, "grad_norm": 0.1708984375, "learning_rate": 0.0011265414684971664, "loss": 0.1401, "step": 47614 }, { "epoch": 0.08442710369607945, "grad_norm": 2.125, "learning_rate": 0.001126481975677896, "loss": 0.2259, "step": 47616 }, { "epoch": 0.08443064986138926, "grad_norm": 0.890625, "learning_rate": 0.0011264224827428088, "loss": 0.2138, "step": 47618 }, { "epoch": 0.08443419602669908, "grad_norm": 0.66796875, "learning_rate": 0.0011263629896921651, "loss": 0.2048, "step": 47620 }, { "epoch": 0.08443774219200889, "grad_norm": 0.4609375, "learning_rate": 0.0011263034965262246, "loss": 0.3624, "step": 47622 }, { "epoch": 0.0844412883573187, "grad_norm": 0.5546875, "learning_rate": 0.001126244003245248, "loss": 0.168, "step": 47624 }, { "epoch": 0.08444483452262852, "grad_norm": 0.298828125, "learning_rate": 0.0011261845098494956, "loss": 0.1476, "step": 47626 }, { "epoch": 0.08444838068793833, "grad_norm": 0.447265625, "learning_rate": 0.0011261250163392275, "loss": 0.1695, "step": 47628 }, { "epoch": 0.08445192685324815, "grad_norm": 0.51953125, "learning_rate": 0.001126065522714703, "loss": 0.2025, "step": 47630 }, { "epoch": 0.08445547301855796, "grad_norm": 0.384765625, "learning_rate": 0.0011260060289761838, "loss": 0.1732, "step": 47632 }, { "epoch": 0.08445901918386778, "grad_norm": 4.03125, "learning_rate": 0.001125946535123929, "loss": 0.1161, "step": 47634 }, { "epoch": 0.0844625653491776, "grad_norm": 0.373046875, "learning_rate": 0.0011258870411581991, "loss": 0.3116, "step": 47636 }, { "epoch": 0.08446611151448742, "grad_norm": 0.99609375, "learning_rate": 0.0011258275470792545, "loss": 0.2432, "step": 47638 }, { "epoch": 0.08446965767979724, "grad_norm": 1.578125, "learning_rate": 0.0011257680528873554, "loss": 0.3542, "step": 47640 }, { "epoch": 0.08447320384510705, "grad_norm": 0.40625, "learning_rate": 0.0011257085585827614, "loss": 0.1942, "step": 47642 }, { "epoch": 0.08447675001041687, "grad_norm": 0.427734375, "learning_rate": 0.001125649064165733, "loss": 0.1777, "step": 47644 }, { "epoch": 0.08448029617572668, "grad_norm": 0.2578125, "learning_rate": 0.0011255895696365308, "loss": 0.1917, "step": 47646 }, { "epoch": 0.0844838423410365, "grad_norm": 0.5078125, "learning_rate": 0.0011255300749954144, "loss": 0.199, "step": 47648 }, { "epoch": 0.08448738850634631, "grad_norm": 0.55078125, "learning_rate": 0.0011254705802426447, "loss": 0.2069, "step": 47650 }, { "epoch": 0.08449093467165612, "grad_norm": 0.66796875, "learning_rate": 0.0011254110853784812, "loss": 0.1905, "step": 47652 }, { "epoch": 0.08449448083696594, "grad_norm": 0.376953125, "learning_rate": 0.0011253515904031844, "loss": 0.1531, "step": 47654 }, { "epoch": 0.08449802700227575, "grad_norm": 0.6875, "learning_rate": 0.0011252920953170144, "loss": 0.165, "step": 47656 }, { "epoch": 0.08450157316758557, "grad_norm": 0.5546875, "learning_rate": 0.0011252326001202318, "loss": 0.1996, "step": 47658 }, { "epoch": 0.08450511933289538, "grad_norm": 0.9140625, "learning_rate": 0.0011251731048130963, "loss": 0.2194, "step": 47660 }, { "epoch": 0.0845086654982052, "grad_norm": 0.5078125, "learning_rate": 0.001125113609395868, "loss": 0.1977, "step": 47662 }, { "epoch": 0.08451221166351501, "grad_norm": 0.55859375, "learning_rate": 0.0011250541138688077, "loss": 0.2104, "step": 47664 }, { "epoch": 0.08451575782882483, "grad_norm": 0.3359375, "learning_rate": 0.0011249946182321751, "loss": 0.1947, "step": 47666 }, { "epoch": 0.08451930399413464, "grad_norm": 0.341796875, "learning_rate": 0.0011249351224862302, "loss": 0.1997, "step": 47668 }, { "epoch": 0.08452285015944445, "grad_norm": 0.359375, "learning_rate": 0.001124875626631234, "loss": 0.1451, "step": 47670 }, { "epoch": 0.08452639632475427, "grad_norm": 0.56640625, "learning_rate": 0.0011248161306674465, "loss": 0.1592, "step": 47672 }, { "epoch": 0.08452994249006408, "grad_norm": 1.796875, "learning_rate": 0.0011247566345951275, "loss": 0.1687, "step": 47674 }, { "epoch": 0.0845334886553739, "grad_norm": 0.416015625, "learning_rate": 0.001124697138414537, "loss": 0.232, "step": 47676 }, { "epoch": 0.08453703482068371, "grad_norm": 0.33984375, "learning_rate": 0.001124637642125936, "loss": 0.2135, "step": 47678 }, { "epoch": 0.08454058098599353, "grad_norm": 0.96484375, "learning_rate": 0.0011245781457295838, "loss": 0.2111, "step": 47680 }, { "epoch": 0.08454412715130336, "grad_norm": 0.25, "learning_rate": 0.0011245186492257412, "loss": 0.1651, "step": 47682 }, { "epoch": 0.08454767331661317, "grad_norm": 0.546875, "learning_rate": 0.0011244591526146683, "loss": 0.2514, "step": 47684 }, { "epoch": 0.08455121948192298, "grad_norm": 4.09375, "learning_rate": 0.0011243996558966253, "loss": 0.158, "step": 47686 }, { "epoch": 0.0845547656472328, "grad_norm": 0.359375, "learning_rate": 0.0011243401590718726, "loss": 0.2795, "step": 47688 }, { "epoch": 0.08455831181254261, "grad_norm": 0.1494140625, "learning_rate": 0.00112428066214067, "loss": 0.1395, "step": 47690 }, { "epoch": 0.08456185797785243, "grad_norm": 0.39453125, "learning_rate": 0.001124221165103278, "loss": 0.182, "step": 47692 }, { "epoch": 0.08456540414316224, "grad_norm": 0.6015625, "learning_rate": 0.0011241616679599567, "loss": 0.2145, "step": 47694 }, { "epoch": 0.08456895030847206, "grad_norm": 0.26171875, "learning_rate": 0.001124102170710966, "loss": 0.1582, "step": 47696 }, { "epoch": 0.08457249647378187, "grad_norm": 0.255859375, "learning_rate": 0.0011240426733565665, "loss": 0.1554, "step": 47698 }, { "epoch": 0.08457604263909169, "grad_norm": 1.171875, "learning_rate": 0.0011239831758970186, "loss": 0.2132, "step": 47700 }, { "epoch": 0.0845795888044015, "grad_norm": 0.4296875, "learning_rate": 0.001123923678332582, "loss": 0.1719, "step": 47702 }, { "epoch": 0.08458313496971132, "grad_norm": 0.240234375, "learning_rate": 0.001123864180663517, "loss": 0.199, "step": 47704 }, { "epoch": 0.08458668113502113, "grad_norm": 0.271484375, "learning_rate": 0.0011238046828900843, "loss": 0.1877, "step": 47706 }, { "epoch": 0.08459022730033094, "grad_norm": 0.53515625, "learning_rate": 0.0011237451850125433, "loss": 0.165, "step": 47708 }, { "epoch": 0.08459377346564076, "grad_norm": 0.361328125, "learning_rate": 0.001123685687031155, "loss": 0.2015, "step": 47710 }, { "epoch": 0.08459731963095057, "grad_norm": 0.365234375, "learning_rate": 0.0011236261889461792, "loss": 0.16, "step": 47712 }, { "epoch": 0.08460086579626039, "grad_norm": 0.3046875, "learning_rate": 0.0011235666907578764, "loss": 0.2383, "step": 47714 }, { "epoch": 0.0846044119615702, "grad_norm": 0.55078125, "learning_rate": 0.001123507192466506, "loss": 0.2155, "step": 47716 }, { "epoch": 0.08460795812688002, "grad_norm": 0.322265625, "learning_rate": 0.0011234476940723292, "loss": 0.1678, "step": 47718 }, { "epoch": 0.08461150429218983, "grad_norm": 0.91796875, "learning_rate": 0.0011233881955756055, "loss": 0.1581, "step": 47720 }, { "epoch": 0.08461505045749965, "grad_norm": 0.31640625, "learning_rate": 0.0011233286969765958, "loss": 0.1417, "step": 47722 }, { "epoch": 0.08461859662280946, "grad_norm": 0.1787109375, "learning_rate": 0.0011232691982755597, "loss": 0.1344, "step": 47724 }, { "epoch": 0.08462214278811928, "grad_norm": 0.357421875, "learning_rate": 0.0011232096994727577, "loss": 0.1815, "step": 47726 }, { "epoch": 0.0846256889534291, "grad_norm": 0.28125, "learning_rate": 0.00112315020056845, "loss": 0.2609, "step": 47728 }, { "epoch": 0.08462923511873892, "grad_norm": 0.296875, "learning_rate": 0.0011230907015628965, "loss": 0.1865, "step": 47730 }, { "epoch": 0.08463278128404873, "grad_norm": 2.0625, "learning_rate": 0.0011230312024563581, "loss": 0.2922, "step": 47732 }, { "epoch": 0.08463632744935855, "grad_norm": 1.2734375, "learning_rate": 0.0011229717032490945, "loss": 0.2129, "step": 47734 }, { "epoch": 0.08463987361466836, "grad_norm": 0.56640625, "learning_rate": 0.001122912203941366, "loss": 0.2174, "step": 47736 }, { "epoch": 0.08464341977997818, "grad_norm": 0.41015625, "learning_rate": 0.0011228527045334326, "loss": 0.2222, "step": 47738 }, { "epoch": 0.08464696594528799, "grad_norm": 0.37890625, "learning_rate": 0.0011227932050255549, "loss": 0.2036, "step": 47740 }, { "epoch": 0.0846505121105978, "grad_norm": 0.388671875, "learning_rate": 0.001122733705417993, "loss": 0.1772, "step": 47742 }, { "epoch": 0.08465405827590762, "grad_norm": 1.2265625, "learning_rate": 0.0011226742057110068, "loss": 0.2133, "step": 47744 }, { "epoch": 0.08465760444121743, "grad_norm": 0.255859375, "learning_rate": 0.001122614705904857, "loss": 0.1843, "step": 47746 }, { "epoch": 0.08466115060652725, "grad_norm": 0.3359375, "learning_rate": 0.0011225552059998037, "loss": 0.1707, "step": 47748 }, { "epoch": 0.08466469677183706, "grad_norm": 0.34765625, "learning_rate": 0.001122495705996107, "loss": 0.163, "step": 47750 }, { "epoch": 0.08466824293714688, "grad_norm": 0.1884765625, "learning_rate": 0.0011224362058940272, "loss": 0.1433, "step": 47752 }, { "epoch": 0.0846717891024567, "grad_norm": 1.6640625, "learning_rate": 0.0011223767056938244, "loss": 0.2521, "step": 47754 }, { "epoch": 0.08467533526776651, "grad_norm": 0.447265625, "learning_rate": 0.0011223172053957588, "loss": 0.2108, "step": 47756 }, { "epoch": 0.08467888143307632, "grad_norm": 0.26171875, "learning_rate": 0.0011222577050000906, "loss": 0.1906, "step": 47758 }, { "epoch": 0.08468242759838614, "grad_norm": 0.1884765625, "learning_rate": 0.0011221982045070804, "loss": 0.1781, "step": 47760 }, { "epoch": 0.08468597376369595, "grad_norm": 0.5625, "learning_rate": 0.0011221387039169882, "loss": 0.2097, "step": 47762 }, { "epoch": 0.08468951992900577, "grad_norm": 0.2578125, "learning_rate": 0.001122079203230074, "loss": 0.2228, "step": 47764 }, { "epoch": 0.08469306609431558, "grad_norm": 0.384765625, "learning_rate": 0.0011220197024465984, "loss": 0.1737, "step": 47766 }, { "epoch": 0.0846966122596254, "grad_norm": 0.28515625, "learning_rate": 0.0011219602015668214, "loss": 0.232, "step": 47768 }, { "epoch": 0.08470015842493521, "grad_norm": 0.41796875, "learning_rate": 0.0011219007005910029, "loss": 0.1977, "step": 47770 }, { "epoch": 0.08470370459024504, "grad_norm": 0.310546875, "learning_rate": 0.0011218411995194035, "loss": 0.1811, "step": 47772 }, { "epoch": 0.08470725075555485, "grad_norm": 0.578125, "learning_rate": 0.0011217816983522834, "loss": 0.1848, "step": 47774 }, { "epoch": 0.08471079692086467, "grad_norm": 1.9921875, "learning_rate": 0.001121722197089903, "loss": 0.2461, "step": 47776 }, { "epoch": 0.08471434308617448, "grad_norm": 0.330078125, "learning_rate": 0.001121662695732522, "loss": 0.223, "step": 47778 }, { "epoch": 0.0847178892514843, "grad_norm": 0.25, "learning_rate": 0.0011216031942804015, "loss": 0.1332, "step": 47780 }, { "epoch": 0.08472143541679411, "grad_norm": 0.2890625, "learning_rate": 0.0011215436927338005, "loss": 0.1788, "step": 47782 }, { "epoch": 0.08472498158210393, "grad_norm": 0.1298828125, "learning_rate": 0.0011214841910929805, "loss": 0.1084, "step": 47784 }, { "epoch": 0.08472852774741374, "grad_norm": 0.2734375, "learning_rate": 0.001121424689358201, "loss": 0.1615, "step": 47786 }, { "epoch": 0.08473207391272355, "grad_norm": 0.3984375, "learning_rate": 0.0011213651875297225, "loss": 0.1825, "step": 47788 }, { "epoch": 0.08473562007803337, "grad_norm": 0.2158203125, "learning_rate": 0.0011213056856078046, "loss": 0.1422, "step": 47790 }, { "epoch": 0.08473916624334318, "grad_norm": 0.5703125, "learning_rate": 0.001121246183592708, "loss": 0.2643, "step": 47792 }, { "epoch": 0.084742712408653, "grad_norm": 0.447265625, "learning_rate": 0.0011211866814846933, "loss": 0.2016, "step": 47794 }, { "epoch": 0.08474625857396281, "grad_norm": 0.66015625, "learning_rate": 0.0011211271792840203, "loss": 0.4068, "step": 47796 }, { "epoch": 0.08474980473927263, "grad_norm": 0.486328125, "learning_rate": 0.001121067676990949, "loss": 0.176, "step": 47798 }, { "epoch": 0.08475335090458244, "grad_norm": 0.59375, "learning_rate": 0.0011210081746057404, "loss": 0.2713, "step": 47800 }, { "epoch": 0.08475689706989226, "grad_norm": 0.36328125, "learning_rate": 0.0011209486721286536, "loss": 0.315, "step": 47802 }, { "epoch": 0.08476044323520207, "grad_norm": 0.490234375, "learning_rate": 0.0011208891695599502, "loss": 0.1783, "step": 47804 }, { "epoch": 0.08476398940051189, "grad_norm": 0.419921875, "learning_rate": 0.0011208296668998893, "loss": 0.2022, "step": 47806 }, { "epoch": 0.0847675355658217, "grad_norm": 0.42578125, "learning_rate": 0.0011207701641487318, "loss": 0.146, "step": 47808 }, { "epoch": 0.08477108173113151, "grad_norm": 0.8125, "learning_rate": 0.0011207106613067373, "loss": 0.1628, "step": 47810 }, { "epoch": 0.08477462789644133, "grad_norm": 0.55078125, "learning_rate": 0.0011206511583741665, "loss": 0.1729, "step": 47812 }, { "epoch": 0.08477817406175114, "grad_norm": 0.37890625, "learning_rate": 0.0011205916553512795, "loss": 0.2085, "step": 47814 }, { "epoch": 0.08478172022706096, "grad_norm": 0.287109375, "learning_rate": 0.0011205321522383368, "loss": 0.1924, "step": 47816 }, { "epoch": 0.08478526639237079, "grad_norm": 0.44921875, "learning_rate": 0.001120472649035598, "loss": 0.1639, "step": 47818 }, { "epoch": 0.0847888125576806, "grad_norm": 1.078125, "learning_rate": 0.001120413145743324, "loss": 0.1953, "step": 47820 }, { "epoch": 0.08479235872299042, "grad_norm": 1.9609375, "learning_rate": 0.0011203536423617748, "loss": 0.2435, "step": 47822 }, { "epoch": 0.08479590488830023, "grad_norm": 0.6484375, "learning_rate": 0.0011202941388912106, "loss": 0.1878, "step": 47824 }, { "epoch": 0.08479945105361004, "grad_norm": 0.60546875, "learning_rate": 0.0011202346353318916, "loss": 0.1769, "step": 47826 }, { "epoch": 0.08480299721891986, "grad_norm": 0.369140625, "learning_rate": 0.0011201751316840783, "loss": 0.2013, "step": 47828 }, { "epoch": 0.08480654338422967, "grad_norm": 0.48828125, "learning_rate": 0.00112011562794803, "loss": 0.1423, "step": 47830 }, { "epoch": 0.08481008954953949, "grad_norm": 0.328125, "learning_rate": 0.001120056124124008, "loss": 0.2551, "step": 47832 }, { "epoch": 0.0848136357148493, "grad_norm": 0.546875, "learning_rate": 0.0011199966202122722, "loss": 0.1391, "step": 47834 }, { "epoch": 0.08481718188015912, "grad_norm": 0.474609375, "learning_rate": 0.0011199371162130827, "loss": 0.2196, "step": 47836 }, { "epoch": 0.08482072804546893, "grad_norm": 0.421875, "learning_rate": 0.0011198776121267, "loss": 0.1703, "step": 47838 }, { "epoch": 0.08482427421077875, "grad_norm": 0.54296875, "learning_rate": 0.001119818107953384, "loss": 0.1912, "step": 47840 }, { "epoch": 0.08482782037608856, "grad_norm": 0.640625, "learning_rate": 0.0011197586036933956, "loss": 0.141, "step": 47842 }, { "epoch": 0.08483136654139838, "grad_norm": 1.1328125, "learning_rate": 0.001119699099346994, "loss": 0.2166, "step": 47844 }, { "epoch": 0.08483491270670819, "grad_norm": 0.365234375, "learning_rate": 0.0011196395949144402, "loss": 0.1123, "step": 47846 }, { "epoch": 0.084838458872018, "grad_norm": 0.380859375, "learning_rate": 0.001119580090395994, "loss": 0.1347, "step": 47848 }, { "epoch": 0.08484200503732782, "grad_norm": 0.6328125, "learning_rate": 0.0011195205857919162, "loss": 0.1684, "step": 47850 }, { "epoch": 0.08484555120263763, "grad_norm": 0.408203125, "learning_rate": 0.0011194610811024663, "loss": 0.1183, "step": 47852 }, { "epoch": 0.08484909736794745, "grad_norm": 0.62890625, "learning_rate": 0.0011194015763279052, "loss": 0.2616, "step": 47854 }, { "epoch": 0.08485264353325726, "grad_norm": 0.66015625, "learning_rate": 0.0011193420714684927, "loss": 0.2665, "step": 47856 }, { "epoch": 0.08485618969856708, "grad_norm": 1.7734375, "learning_rate": 0.0011192825665244895, "loss": 0.1986, "step": 47858 }, { "epoch": 0.08485973586387689, "grad_norm": 1.3828125, "learning_rate": 0.0011192230614961553, "loss": 0.2094, "step": 47860 }, { "epoch": 0.0848632820291867, "grad_norm": 0.55859375, "learning_rate": 0.001119163556383751, "loss": 0.1585, "step": 47862 }, { "epoch": 0.08486682819449654, "grad_norm": 1.7265625, "learning_rate": 0.0011191040511875359, "loss": 0.2574, "step": 47864 }, { "epoch": 0.08487037435980635, "grad_norm": 0.3125, "learning_rate": 0.001119044545907771, "loss": 0.1915, "step": 47866 }, { "epoch": 0.08487392052511616, "grad_norm": 0.89453125, "learning_rate": 0.0011189850405447163, "loss": 0.2131, "step": 47868 }, { "epoch": 0.08487746669042598, "grad_norm": 0.416015625, "learning_rate": 0.001118925535098632, "loss": 0.1832, "step": 47870 }, { "epoch": 0.0848810128557358, "grad_norm": 1.1640625, "learning_rate": 0.0011188660295697783, "loss": 0.2035, "step": 47872 }, { "epoch": 0.08488455902104561, "grad_norm": 1.140625, "learning_rate": 0.001118806523958416, "loss": 0.2057, "step": 47874 }, { "epoch": 0.08488810518635542, "grad_norm": 0.474609375, "learning_rate": 0.0011187470182648046, "loss": 0.2088, "step": 47876 }, { "epoch": 0.08489165135166524, "grad_norm": 1.5078125, "learning_rate": 0.0011186875124892047, "loss": 0.2627, "step": 47878 }, { "epoch": 0.08489519751697505, "grad_norm": 0.5625, "learning_rate": 0.0011186280066318765, "loss": 0.1545, "step": 47880 }, { "epoch": 0.08489874368228487, "grad_norm": 0.26171875, "learning_rate": 0.00111856850069308, "loss": 0.1905, "step": 47882 }, { "epoch": 0.08490228984759468, "grad_norm": 0.671875, "learning_rate": 0.0011185089946730757, "loss": 0.2658, "step": 47884 }, { "epoch": 0.0849058360129045, "grad_norm": 0.171875, "learning_rate": 0.0011184494885721238, "loss": 0.158, "step": 47886 }, { "epoch": 0.08490938217821431, "grad_norm": 0.609375, "learning_rate": 0.0011183899823904847, "loss": 0.1999, "step": 47888 }, { "epoch": 0.08491292834352412, "grad_norm": 0.421875, "learning_rate": 0.0011183304761284187, "loss": 0.1786, "step": 47890 }, { "epoch": 0.08491647450883394, "grad_norm": 0.482421875, "learning_rate": 0.0011182709697861854, "loss": 0.2426, "step": 47892 }, { "epoch": 0.08492002067414375, "grad_norm": 0.4375, "learning_rate": 0.001118211463364046, "loss": 0.2394, "step": 47894 }, { "epoch": 0.08492356683945357, "grad_norm": 0.462890625, "learning_rate": 0.0011181519568622597, "loss": 0.2167, "step": 47896 }, { "epoch": 0.08492711300476338, "grad_norm": 1.296875, "learning_rate": 0.0011180924502810874, "loss": 0.237, "step": 47898 }, { "epoch": 0.0849306591700732, "grad_norm": 0.56640625, "learning_rate": 0.0011180329436207897, "loss": 0.1608, "step": 47900 }, { "epoch": 0.08493420533538301, "grad_norm": 0.29296875, "learning_rate": 0.001117973436881626, "loss": 0.3132, "step": 47902 }, { "epoch": 0.08493775150069283, "grad_norm": 1.90625, "learning_rate": 0.0011179139300638568, "loss": 0.2949, "step": 47904 }, { "epoch": 0.08494129766600264, "grad_norm": 0.6875, "learning_rate": 0.0011178544231677428, "loss": 0.164, "step": 47906 }, { "epoch": 0.08494484383131247, "grad_norm": 0.3984375, "learning_rate": 0.0011177949161935434, "loss": 0.2065, "step": 47908 }, { "epoch": 0.08494838999662228, "grad_norm": 1.1015625, "learning_rate": 0.0011177354091415198, "loss": 0.2207, "step": 47910 }, { "epoch": 0.0849519361619321, "grad_norm": 0.48046875, "learning_rate": 0.0011176759020119316, "loss": 0.2516, "step": 47912 }, { "epoch": 0.08495548232724191, "grad_norm": 0.2294921875, "learning_rate": 0.0011176163948050395, "loss": 0.1843, "step": 47914 }, { "epoch": 0.08495902849255173, "grad_norm": 0.427734375, "learning_rate": 0.0011175568875211035, "loss": 0.1743, "step": 47916 }, { "epoch": 0.08496257465786154, "grad_norm": 0.224609375, "learning_rate": 0.0011174973801603841, "loss": 0.1832, "step": 47918 }, { "epoch": 0.08496612082317136, "grad_norm": 0.25, "learning_rate": 0.001117437872723141, "loss": 0.1511, "step": 47920 }, { "epoch": 0.08496966698848117, "grad_norm": 1.4296875, "learning_rate": 0.0011173783652096347, "loss": 0.2168, "step": 47922 }, { "epoch": 0.08497321315379099, "grad_norm": 0.388671875, "learning_rate": 0.0011173188576201257, "loss": 0.1654, "step": 47924 }, { "epoch": 0.0849767593191008, "grad_norm": 1.390625, "learning_rate": 0.001117259349954874, "loss": 0.2642, "step": 47926 }, { "epoch": 0.08498030548441061, "grad_norm": 0.3046875, "learning_rate": 0.00111719984221414, "loss": 0.1774, "step": 47928 }, { "epoch": 0.08498385164972043, "grad_norm": 0.36328125, "learning_rate": 0.0011171403343981836, "loss": 0.1255, "step": 47930 }, { "epoch": 0.08498739781503024, "grad_norm": 0.2578125, "learning_rate": 0.0011170808265072655, "loss": 0.1235, "step": 47932 }, { "epoch": 0.08499094398034006, "grad_norm": 1.109375, "learning_rate": 0.001117021318541646, "loss": 0.2625, "step": 47934 }, { "epoch": 0.08499449014564987, "grad_norm": 0.28125, "learning_rate": 0.001116961810501585, "loss": 0.174, "step": 47936 }, { "epoch": 0.08499803631095969, "grad_norm": 1.34375, "learning_rate": 0.0011169023023873427, "loss": 0.2475, "step": 47938 }, { "epoch": 0.0850015824762695, "grad_norm": 0.7734375, "learning_rate": 0.00111684279419918, "loss": 0.1866, "step": 47940 }, { "epoch": 0.08500512864157932, "grad_norm": 0.8828125, "learning_rate": 0.0011167832859373564, "loss": 0.2791, "step": 47942 }, { "epoch": 0.08500867480688913, "grad_norm": 0.251953125, "learning_rate": 0.0011167237776021324, "loss": 0.242, "step": 47944 }, { "epoch": 0.08501222097219895, "grad_norm": 0.6484375, "learning_rate": 0.0011166642691937683, "loss": 0.2186, "step": 47946 }, { "epoch": 0.08501576713750876, "grad_norm": 0.3203125, "learning_rate": 0.0011166047607125244, "loss": 0.2025, "step": 47948 }, { "epoch": 0.08501931330281857, "grad_norm": 0.7109375, "learning_rate": 0.001116545252158661, "loss": 0.1703, "step": 47950 }, { "epoch": 0.08502285946812839, "grad_norm": 0.251953125, "learning_rate": 0.0011164857435324383, "loss": 0.1782, "step": 47952 }, { "epoch": 0.08502640563343822, "grad_norm": 0.56640625, "learning_rate": 0.0011164262348341168, "loss": 0.1624, "step": 47954 }, { "epoch": 0.08502995179874803, "grad_norm": 0.25, "learning_rate": 0.001116366726063956, "loss": 0.1391, "step": 47956 }, { "epoch": 0.08503349796405785, "grad_norm": 0.1826171875, "learning_rate": 0.0011163072172222168, "loss": 0.1468, "step": 47958 }, { "epoch": 0.08503704412936766, "grad_norm": 0.3203125, "learning_rate": 0.0011162477083091595, "loss": 0.1408, "step": 47960 }, { "epoch": 0.08504059029467748, "grad_norm": 0.85546875, "learning_rate": 0.0011161881993250438, "loss": 0.1992, "step": 47962 }, { "epoch": 0.08504413645998729, "grad_norm": 0.76171875, "learning_rate": 0.0011161286902701306, "loss": 0.2098, "step": 47964 }, { "epoch": 0.0850476826252971, "grad_norm": 0.2197265625, "learning_rate": 0.0011160691811446797, "loss": 0.1573, "step": 47966 }, { "epoch": 0.08505122879060692, "grad_norm": 0.5703125, "learning_rate": 0.0011160096719489516, "loss": 0.1381, "step": 47968 }, { "epoch": 0.08505477495591673, "grad_norm": 4.875, "learning_rate": 0.0011159501626832064, "loss": 0.3294, "step": 47970 }, { "epoch": 0.08505832112122655, "grad_norm": 0.33203125, "learning_rate": 0.0011158906533477047, "loss": 0.14, "step": 47972 }, { "epoch": 0.08506186728653636, "grad_norm": 0.263671875, "learning_rate": 0.0011158311439427068, "loss": 0.2676, "step": 47974 }, { "epoch": 0.08506541345184618, "grad_norm": 0.240234375, "learning_rate": 0.0011157716344684723, "loss": 0.1775, "step": 47976 }, { "epoch": 0.08506895961715599, "grad_norm": 0.90234375, "learning_rate": 0.0011157121249252614, "loss": 0.2284, "step": 47978 }, { "epoch": 0.08507250578246581, "grad_norm": 0.25390625, "learning_rate": 0.0011156526153133352, "loss": 0.1792, "step": 47980 }, { "epoch": 0.08507605194777562, "grad_norm": 1.25, "learning_rate": 0.0011155931056329536, "loss": 0.1748, "step": 47982 }, { "epoch": 0.08507959811308544, "grad_norm": 0.1689453125, "learning_rate": 0.0011155335958843767, "loss": 0.1492, "step": 47984 }, { "epoch": 0.08508314427839525, "grad_norm": 0.482421875, "learning_rate": 0.0011154740860678648, "loss": 0.1737, "step": 47986 }, { "epoch": 0.08508669044370507, "grad_norm": 0.6953125, "learning_rate": 0.0011154145761836788, "loss": 0.1917, "step": 47988 }, { "epoch": 0.08509023660901488, "grad_norm": 0.263671875, "learning_rate": 0.0011153550662320775, "loss": 0.219, "step": 47990 }, { "epoch": 0.0850937827743247, "grad_norm": 0.6015625, "learning_rate": 0.0011152955562133227, "loss": 0.2097, "step": 47992 }, { "epoch": 0.08509732893963451, "grad_norm": 1.4375, "learning_rate": 0.001115236046127674, "loss": 0.2342, "step": 47994 }, { "epoch": 0.08510087510494432, "grad_norm": 0.388671875, "learning_rate": 0.0011151765359753915, "loss": 0.2133, "step": 47996 }, { "epoch": 0.08510442127025414, "grad_norm": 0.328125, "learning_rate": 0.0011151170257567358, "loss": 0.1591, "step": 47998 }, { "epoch": 0.08510796743556397, "grad_norm": 0.353515625, "learning_rate": 0.0011150575154719668, "loss": 0.1287, "step": 48000 }, { "epoch": 0.08511151360087378, "grad_norm": 0.90625, "learning_rate": 0.0011149980051213448, "loss": 0.2651, "step": 48002 }, { "epoch": 0.0851150597661836, "grad_norm": 0.56640625, "learning_rate": 0.0011149384947051304, "loss": 0.229, "step": 48004 }, { "epoch": 0.08511860593149341, "grad_norm": 0.765625, "learning_rate": 0.001114878984223584, "loss": 0.2511, "step": 48006 }, { "epoch": 0.08512215209680322, "grad_norm": 0.455078125, "learning_rate": 0.0011148194736769652, "loss": 0.2164, "step": 48008 }, { "epoch": 0.08512569826211304, "grad_norm": 0.408203125, "learning_rate": 0.001114759963065535, "loss": 0.1778, "step": 48010 }, { "epoch": 0.08512924442742285, "grad_norm": 0.484375, "learning_rate": 0.001114700452389553, "loss": 0.2956, "step": 48012 }, { "epoch": 0.08513279059273267, "grad_norm": 0.921875, "learning_rate": 0.00111464094164928, "loss": 0.1886, "step": 48014 }, { "epoch": 0.08513633675804248, "grad_norm": 0.78515625, "learning_rate": 0.0011145814308449757, "loss": 0.1493, "step": 48016 }, { "epoch": 0.0851398829233523, "grad_norm": 0.28125, "learning_rate": 0.0011145219199769011, "loss": 0.1659, "step": 48018 }, { "epoch": 0.08514342908866211, "grad_norm": 0.73828125, "learning_rate": 0.0011144624090453157, "loss": 0.174, "step": 48020 }, { "epoch": 0.08514697525397193, "grad_norm": 0.71875, "learning_rate": 0.0011144028980504803, "loss": 0.1621, "step": 48022 }, { "epoch": 0.08515052141928174, "grad_norm": 0.859375, "learning_rate": 0.001114343386992655, "loss": 0.1704, "step": 48024 }, { "epoch": 0.08515406758459156, "grad_norm": 0.337890625, "learning_rate": 0.0011142838758720999, "loss": 0.1665, "step": 48026 }, { "epoch": 0.08515761374990137, "grad_norm": 0.490234375, "learning_rate": 0.0011142243646890756, "loss": 0.2042, "step": 48028 }, { "epoch": 0.08516115991521118, "grad_norm": 0.388671875, "learning_rate": 0.0011141648534438422, "loss": 0.1612, "step": 48030 }, { "epoch": 0.085164706080521, "grad_norm": 0.478515625, "learning_rate": 0.0011141053421366597, "loss": 0.2047, "step": 48032 }, { "epoch": 0.08516825224583081, "grad_norm": 0.6015625, "learning_rate": 0.001114045830767789, "loss": 0.2571, "step": 48034 }, { "epoch": 0.08517179841114063, "grad_norm": 1.5625, "learning_rate": 0.0011139863193374896, "loss": 0.2566, "step": 48036 }, { "epoch": 0.08517534457645044, "grad_norm": 0.6484375, "learning_rate": 0.0011139268078460226, "loss": 0.1663, "step": 48038 }, { "epoch": 0.08517889074176026, "grad_norm": 0.359375, "learning_rate": 0.001113867296293647, "loss": 0.135, "step": 48040 }, { "epoch": 0.08518243690707007, "grad_norm": 0.447265625, "learning_rate": 0.001113807784680625, "loss": 0.1592, "step": 48042 }, { "epoch": 0.0851859830723799, "grad_norm": 0.322265625, "learning_rate": 0.001113748273007215, "loss": 0.1867, "step": 48044 }, { "epoch": 0.08518952923768972, "grad_norm": 0.55859375, "learning_rate": 0.0011136887612736783, "loss": 0.2061, "step": 48046 }, { "epoch": 0.08519307540299953, "grad_norm": 0.375, "learning_rate": 0.001113629249480275, "loss": 0.2217, "step": 48048 }, { "epoch": 0.08519662156830934, "grad_norm": 0.7421875, "learning_rate": 0.0011135697376272653, "loss": 0.1846, "step": 48050 }, { "epoch": 0.08520016773361916, "grad_norm": 0.76171875, "learning_rate": 0.0011135102257149093, "loss": 0.2357, "step": 48052 }, { "epoch": 0.08520371389892897, "grad_norm": 0.361328125, "learning_rate": 0.0011134507137434675, "loss": 0.1502, "step": 48054 }, { "epoch": 0.08520726006423879, "grad_norm": 0.349609375, "learning_rate": 0.0011133912017132, "loss": 0.1735, "step": 48056 }, { "epoch": 0.0852108062295486, "grad_norm": 0.57421875, "learning_rate": 0.0011133316896243673, "loss": 0.1618, "step": 48058 }, { "epoch": 0.08521435239485842, "grad_norm": 0.609375, "learning_rate": 0.001113272177477229, "loss": 0.1999, "step": 48060 }, { "epoch": 0.08521789856016823, "grad_norm": 0.486328125, "learning_rate": 0.0011132126652720466, "loss": 0.2977, "step": 48062 }, { "epoch": 0.08522144472547805, "grad_norm": 0.357421875, "learning_rate": 0.0011131531530090793, "loss": 0.1724, "step": 48064 }, { "epoch": 0.08522499089078786, "grad_norm": 0.5859375, "learning_rate": 0.001113093640688588, "loss": 0.1872, "step": 48066 }, { "epoch": 0.08522853705609768, "grad_norm": 0.28515625, "learning_rate": 0.0011130341283108328, "loss": 0.2018, "step": 48068 }, { "epoch": 0.08523208322140749, "grad_norm": 0.3671875, "learning_rate": 0.0011129746158760735, "loss": 0.1982, "step": 48070 }, { "epoch": 0.0852356293867173, "grad_norm": 0.3828125, "learning_rate": 0.001112915103384571, "loss": 0.1989, "step": 48072 }, { "epoch": 0.08523917555202712, "grad_norm": 0.376953125, "learning_rate": 0.0011128555908365854, "loss": 0.2607, "step": 48074 }, { "epoch": 0.08524272171733693, "grad_norm": 0.62890625, "learning_rate": 0.0011127960782323766, "loss": 0.1406, "step": 48076 }, { "epoch": 0.08524626788264675, "grad_norm": 0.435546875, "learning_rate": 0.0011127365655722056, "loss": 0.197, "step": 48078 }, { "epoch": 0.08524981404795656, "grad_norm": 0.412109375, "learning_rate": 0.001112677052856332, "loss": 0.2208, "step": 48080 }, { "epoch": 0.08525336021326638, "grad_norm": 0.35546875, "learning_rate": 0.0011126175400850162, "loss": 0.194, "step": 48082 }, { "epoch": 0.08525690637857619, "grad_norm": 0.1796875, "learning_rate": 0.0011125580272585188, "loss": 0.1541, "step": 48084 }, { "epoch": 0.085260452543886, "grad_norm": 0.74609375, "learning_rate": 0.0011124985143771001, "loss": 0.1428, "step": 48086 }, { "epoch": 0.08526399870919582, "grad_norm": 0.64453125, "learning_rate": 0.00111243900144102, "loss": 0.1841, "step": 48088 }, { "epoch": 0.08526754487450565, "grad_norm": 0.265625, "learning_rate": 0.0011123794884505386, "loss": 0.197, "step": 48090 }, { "epoch": 0.08527109103981546, "grad_norm": 0.2373046875, "learning_rate": 0.001112319975405917, "loss": 0.1827, "step": 48092 }, { "epoch": 0.08527463720512528, "grad_norm": 1.359375, "learning_rate": 0.0011122604623074147, "loss": 0.2405, "step": 48094 }, { "epoch": 0.08527818337043509, "grad_norm": 0.25390625, "learning_rate": 0.0011122009491552924, "loss": 0.1561, "step": 48096 }, { "epoch": 0.08528172953574491, "grad_norm": 0.337890625, "learning_rate": 0.00111214143594981, "loss": 0.2165, "step": 48098 }, { "epoch": 0.08528527570105472, "grad_norm": 0.3828125, "learning_rate": 0.0011120819226912281, "loss": 0.2219, "step": 48100 }, { "epoch": 0.08528882186636454, "grad_norm": 0.478515625, "learning_rate": 0.001112022409379807, "loss": 0.1329, "step": 48102 }, { "epoch": 0.08529236803167435, "grad_norm": 0.458984375, "learning_rate": 0.0011119628960158069, "loss": 0.2213, "step": 48104 }, { "epoch": 0.08529591419698417, "grad_norm": 0.2578125, "learning_rate": 0.001111903382599488, "loss": 0.2199, "step": 48106 }, { "epoch": 0.08529946036229398, "grad_norm": 4.0, "learning_rate": 0.0011118438691311108, "loss": 0.3293, "step": 48108 }, { "epoch": 0.0853030065276038, "grad_norm": 0.36328125, "learning_rate": 0.001111784355610935, "loss": 0.1792, "step": 48110 }, { "epoch": 0.08530655269291361, "grad_norm": 0.138671875, "learning_rate": 0.0011117248420392215, "loss": 0.1287, "step": 48112 }, { "epoch": 0.08531009885822342, "grad_norm": 0.2333984375, "learning_rate": 0.0011116653284162302, "loss": 0.1249, "step": 48114 }, { "epoch": 0.08531364502353324, "grad_norm": 0.5703125, "learning_rate": 0.0011116058147422217, "loss": 0.1773, "step": 48116 }, { "epoch": 0.08531719118884305, "grad_norm": 0.333984375, "learning_rate": 0.0011115463010174561, "loss": 0.1763, "step": 48118 }, { "epoch": 0.08532073735415287, "grad_norm": 1.2890625, "learning_rate": 0.0011114867872421936, "loss": 0.2395, "step": 48120 }, { "epoch": 0.08532428351946268, "grad_norm": 0.484375, "learning_rate": 0.0011114272734166946, "loss": 0.1524, "step": 48122 }, { "epoch": 0.0853278296847725, "grad_norm": 0.4609375, "learning_rate": 0.0011113677595412195, "loss": 0.1675, "step": 48124 }, { "epoch": 0.08533137585008231, "grad_norm": 0.51171875, "learning_rate": 0.0011113082456160283, "loss": 0.1502, "step": 48126 }, { "epoch": 0.08533492201539213, "grad_norm": 0.431640625, "learning_rate": 0.0011112487316413814, "loss": 0.1259, "step": 48128 }, { "epoch": 0.08533846818070194, "grad_norm": 0.216796875, "learning_rate": 0.0011111892176175388, "loss": 0.1394, "step": 48130 }, { "epoch": 0.08534201434601175, "grad_norm": 1.3203125, "learning_rate": 0.0011111297035447615, "loss": 0.2342, "step": 48132 }, { "epoch": 0.08534556051132157, "grad_norm": 0.26171875, "learning_rate": 0.0011110701894233089, "loss": 0.1861, "step": 48134 }, { "epoch": 0.0853491066766314, "grad_norm": 0.37890625, "learning_rate": 0.0011110106752534419, "loss": 0.1501, "step": 48136 }, { "epoch": 0.08535265284194121, "grad_norm": 0.1884765625, "learning_rate": 0.0011109511610354207, "loss": 0.1379, "step": 48138 }, { "epoch": 0.08535619900725103, "grad_norm": 1.2578125, "learning_rate": 0.0011108916467695057, "loss": 0.1987, "step": 48140 }, { "epoch": 0.08535974517256084, "grad_norm": 1.140625, "learning_rate": 0.0011108321324559568, "loss": 0.1574, "step": 48142 }, { "epoch": 0.08536329133787066, "grad_norm": 0.57421875, "learning_rate": 0.0011107726180950342, "loss": 0.1639, "step": 48144 }, { "epoch": 0.08536683750318047, "grad_norm": 0.408203125, "learning_rate": 0.0011107131036869986, "loss": 0.1221, "step": 48146 }, { "epoch": 0.08537038366849029, "grad_norm": 0.67578125, "learning_rate": 0.0011106535892321102, "loss": 0.2001, "step": 48148 }, { "epoch": 0.0853739298338001, "grad_norm": 0.2216796875, "learning_rate": 0.001110594074730629, "loss": 0.1681, "step": 48150 }, { "epoch": 0.08537747599910991, "grad_norm": 0.63671875, "learning_rate": 0.0011105345601828155, "loss": 0.213, "step": 48152 }, { "epoch": 0.08538102216441973, "grad_norm": 0.1943359375, "learning_rate": 0.0011104750455889299, "loss": 0.1331, "step": 48154 }, { "epoch": 0.08538456832972954, "grad_norm": 1.046875, "learning_rate": 0.0011104155309492327, "loss": 0.1857, "step": 48156 }, { "epoch": 0.08538811449503936, "grad_norm": 0.60546875, "learning_rate": 0.001110356016263984, "loss": 0.2145, "step": 48158 }, { "epoch": 0.08539166066034917, "grad_norm": 0.59375, "learning_rate": 0.001110296501533444, "loss": 0.1933, "step": 48160 }, { "epoch": 0.08539520682565899, "grad_norm": 0.314453125, "learning_rate": 0.0011102369867578735, "loss": 0.1464, "step": 48162 }, { "epoch": 0.0853987529909688, "grad_norm": 0.5078125, "learning_rate": 0.001110177471937532, "loss": 0.2573, "step": 48164 }, { "epoch": 0.08540229915627862, "grad_norm": 0.69140625, "learning_rate": 0.0011101179570726799, "loss": 0.1193, "step": 48166 }, { "epoch": 0.08540584532158843, "grad_norm": 0.28515625, "learning_rate": 0.0011100584421635782, "loss": 0.1544, "step": 48168 }, { "epoch": 0.08540939148689825, "grad_norm": 0.52734375, "learning_rate": 0.0011099989272104864, "loss": 0.1448, "step": 48170 }, { "epoch": 0.08541293765220806, "grad_norm": 0.515625, "learning_rate": 0.001109939412213665, "loss": 0.2474, "step": 48172 }, { "epoch": 0.08541648381751787, "grad_norm": 0.5234375, "learning_rate": 0.0011098798971733747, "loss": 0.1625, "step": 48174 }, { "epoch": 0.08542002998282769, "grad_norm": 0.4453125, "learning_rate": 0.0011098203820898754, "loss": 0.1667, "step": 48176 }, { "epoch": 0.0854235761481375, "grad_norm": 0.2099609375, "learning_rate": 0.0011097608669634277, "loss": 0.1773, "step": 48178 }, { "epoch": 0.08542712231344732, "grad_norm": 0.416015625, "learning_rate": 0.001109701351794291, "loss": 0.4169, "step": 48180 }, { "epoch": 0.08543066847875715, "grad_norm": 0.22265625, "learning_rate": 0.0011096418365827267, "loss": 0.1708, "step": 48182 }, { "epoch": 0.08543421464406696, "grad_norm": 0.4921875, "learning_rate": 0.0011095823213289945, "loss": 0.2015, "step": 48184 }, { "epoch": 0.08543776080937678, "grad_norm": 3.5625, "learning_rate": 0.0011095228060333546, "loss": 0.2461, "step": 48186 }, { "epoch": 0.08544130697468659, "grad_norm": 0.765625, "learning_rate": 0.0011094632906960677, "loss": 0.2404, "step": 48188 }, { "epoch": 0.0854448531399964, "grad_norm": 0.5859375, "learning_rate": 0.001109403775317394, "loss": 0.2163, "step": 48190 }, { "epoch": 0.08544839930530622, "grad_norm": 0.60546875, "learning_rate": 0.0011093442598975934, "loss": 0.1324, "step": 48192 }, { "epoch": 0.08545194547061603, "grad_norm": 0.146484375, "learning_rate": 0.0011092847444369267, "loss": 0.1782, "step": 48194 }, { "epoch": 0.08545549163592585, "grad_norm": 0.162109375, "learning_rate": 0.0011092252289356536, "loss": 0.1542, "step": 48196 }, { "epoch": 0.08545903780123566, "grad_norm": 0.33203125, "learning_rate": 0.0011091657133940347, "loss": 0.1336, "step": 48198 }, { "epoch": 0.08546258396654548, "grad_norm": 0.5546875, "learning_rate": 0.0011091061978123304, "loss": 0.1628, "step": 48200 }, { "epoch": 0.08546613013185529, "grad_norm": 1.21875, "learning_rate": 0.0011090466821908013, "loss": 0.1968, "step": 48202 }, { "epoch": 0.0854696762971651, "grad_norm": 0.330078125, "learning_rate": 0.0011089871665297066, "loss": 0.173, "step": 48204 }, { "epoch": 0.08547322246247492, "grad_norm": 0.486328125, "learning_rate": 0.0011089276508293076, "loss": 0.1945, "step": 48206 }, { "epoch": 0.08547676862778474, "grad_norm": 0.31640625, "learning_rate": 0.0011088681350898643, "loss": 0.1222, "step": 48208 }, { "epoch": 0.08548031479309455, "grad_norm": 0.458984375, "learning_rate": 0.0011088086193116367, "loss": 0.2319, "step": 48210 }, { "epoch": 0.08548386095840436, "grad_norm": 0.341796875, "learning_rate": 0.0011087491034948854, "loss": 0.165, "step": 48212 }, { "epoch": 0.08548740712371418, "grad_norm": 0.2177734375, "learning_rate": 0.0011086895876398705, "loss": 0.2168, "step": 48214 }, { "epoch": 0.085490953289024, "grad_norm": 0.7421875, "learning_rate": 0.0011086300717468528, "loss": 0.1863, "step": 48216 }, { "epoch": 0.08549449945433381, "grad_norm": 1.1171875, "learning_rate": 0.0011085705558160918, "loss": 0.3501, "step": 48218 }, { "epoch": 0.08549804561964362, "grad_norm": 0.466796875, "learning_rate": 0.0011085110398478483, "loss": 0.1955, "step": 48220 }, { "epoch": 0.08550159178495344, "grad_norm": 1.0703125, "learning_rate": 0.0011084515238423823, "loss": 0.1763, "step": 48222 }, { "epoch": 0.08550513795026325, "grad_norm": 0.318359375, "learning_rate": 0.0011083920077999544, "loss": 0.214, "step": 48224 }, { "epoch": 0.08550868411557308, "grad_norm": 0.306640625, "learning_rate": 0.001108332491720825, "loss": 0.218, "step": 48226 }, { "epoch": 0.0855122302808829, "grad_norm": 0.220703125, "learning_rate": 0.0011082729756052533, "loss": 0.1241, "step": 48228 }, { "epoch": 0.08551577644619271, "grad_norm": 0.83984375, "learning_rate": 0.001108213459453501, "loss": 0.217, "step": 48230 }, { "epoch": 0.08551932261150252, "grad_norm": 0.2119140625, "learning_rate": 0.0011081539432658278, "loss": 0.2448, "step": 48232 }, { "epoch": 0.08552286877681234, "grad_norm": 0.5546875, "learning_rate": 0.001108094427042494, "loss": 0.1429, "step": 48234 }, { "epoch": 0.08552641494212215, "grad_norm": 0.62890625, "learning_rate": 0.0011080349107837598, "loss": 0.1908, "step": 48236 }, { "epoch": 0.08552996110743197, "grad_norm": 0.58984375, "learning_rate": 0.0011079753944898856, "loss": 0.1509, "step": 48238 }, { "epoch": 0.08553350727274178, "grad_norm": 0.40625, "learning_rate": 0.0011079158781611314, "loss": 0.1793, "step": 48240 }, { "epoch": 0.0855370534380516, "grad_norm": 0.4453125, "learning_rate": 0.0011078563617977581, "loss": 0.1535, "step": 48242 }, { "epoch": 0.08554059960336141, "grad_norm": 0.27734375, "learning_rate": 0.0011077968454000252, "loss": 0.2885, "step": 48244 }, { "epoch": 0.08554414576867123, "grad_norm": 0.294921875, "learning_rate": 0.001107737328968194, "loss": 0.1625, "step": 48246 }, { "epoch": 0.08554769193398104, "grad_norm": 0.4375, "learning_rate": 0.0011076778125025236, "loss": 0.1947, "step": 48248 }, { "epoch": 0.08555123809929085, "grad_norm": 0.447265625, "learning_rate": 0.0011076182960032754, "loss": 0.1434, "step": 48250 }, { "epoch": 0.08555478426460067, "grad_norm": 0.46484375, "learning_rate": 0.001107558779470709, "loss": 0.1571, "step": 48252 }, { "epoch": 0.08555833042991048, "grad_norm": 0.5625, "learning_rate": 0.0011074992629050851, "loss": 0.1697, "step": 48254 }, { "epoch": 0.0855618765952203, "grad_norm": 0.64453125, "learning_rate": 0.0011074397463066636, "loss": 0.1501, "step": 48256 }, { "epoch": 0.08556542276053011, "grad_norm": 0.482421875, "learning_rate": 0.001107380229675705, "loss": 0.1999, "step": 48258 }, { "epoch": 0.08556896892583993, "grad_norm": 0.4296875, "learning_rate": 0.0011073207130124696, "loss": 0.1379, "step": 48260 }, { "epoch": 0.08557251509114974, "grad_norm": 1.1484375, "learning_rate": 0.0011072611963172175, "loss": 0.2539, "step": 48262 }, { "epoch": 0.08557606125645956, "grad_norm": 0.8203125, "learning_rate": 0.0011072016795902093, "loss": 0.196, "step": 48264 }, { "epoch": 0.08557960742176937, "grad_norm": 0.341796875, "learning_rate": 0.0011071421628317051, "loss": 0.1963, "step": 48266 }, { "epoch": 0.08558315358707919, "grad_norm": 0.3046875, "learning_rate": 0.0011070826460419653, "loss": 0.1746, "step": 48268 }, { "epoch": 0.085586699752389, "grad_norm": 0.4765625, "learning_rate": 0.0011070231292212498, "loss": 0.1761, "step": 48270 }, { "epoch": 0.08559024591769883, "grad_norm": 0.2734375, "learning_rate": 0.0011069636123698194, "loss": 0.1851, "step": 48272 }, { "epoch": 0.08559379208300864, "grad_norm": 0.94921875, "learning_rate": 0.0011069040954879345, "loss": 0.2328, "step": 48274 }, { "epoch": 0.08559733824831846, "grad_norm": 0.2275390625, "learning_rate": 0.0011068445785758548, "loss": 0.1869, "step": 48276 }, { "epoch": 0.08560088441362827, "grad_norm": 0.4921875, "learning_rate": 0.001106785061633841, "loss": 0.17, "step": 48278 }, { "epoch": 0.08560443057893809, "grad_norm": 0.39453125, "learning_rate": 0.001106725544662153, "loss": 0.1517, "step": 48280 }, { "epoch": 0.0856079767442479, "grad_norm": 0.51171875, "learning_rate": 0.0011066660276610517, "loss": 0.2489, "step": 48282 }, { "epoch": 0.08561152290955772, "grad_norm": 1.0, "learning_rate": 0.001106606510630797, "loss": 0.3669, "step": 48284 }, { "epoch": 0.08561506907486753, "grad_norm": 0.64453125, "learning_rate": 0.0011065469935716491, "loss": 0.1991, "step": 48286 }, { "epoch": 0.08561861524017735, "grad_norm": 0.64453125, "learning_rate": 0.0011064874764838686, "loss": 0.1871, "step": 48288 }, { "epoch": 0.08562216140548716, "grad_norm": 0.298828125, "learning_rate": 0.0011064279593677156, "loss": 0.2054, "step": 48290 }, { "epoch": 0.08562570757079697, "grad_norm": 0.373046875, "learning_rate": 0.0011063684422234505, "loss": 0.1655, "step": 48292 }, { "epoch": 0.08562925373610679, "grad_norm": 0.41015625, "learning_rate": 0.0011063089250513336, "loss": 0.4262, "step": 48294 }, { "epoch": 0.0856327999014166, "grad_norm": 0.67578125, "learning_rate": 0.0011062494078516252, "loss": 0.2613, "step": 48296 }, { "epoch": 0.08563634606672642, "grad_norm": 0.271484375, "learning_rate": 0.001106189890624585, "loss": 0.1428, "step": 48298 }, { "epoch": 0.08563989223203623, "grad_norm": 0.41796875, "learning_rate": 0.0011061303733704744, "loss": 0.1446, "step": 48300 }, { "epoch": 0.08564343839734605, "grad_norm": 4.25, "learning_rate": 0.0011060708560895528, "loss": 0.3254, "step": 48302 }, { "epoch": 0.08564698456265586, "grad_norm": 0.427734375, "learning_rate": 0.001106011338782081, "loss": 0.1934, "step": 48304 }, { "epoch": 0.08565053072796568, "grad_norm": 0.359375, "learning_rate": 0.0011059518214483186, "loss": 0.1949, "step": 48306 }, { "epoch": 0.08565407689327549, "grad_norm": 0.22265625, "learning_rate": 0.001105892304088527, "loss": 0.1639, "step": 48308 }, { "epoch": 0.0856576230585853, "grad_norm": 0.2734375, "learning_rate": 0.0011058327867029653, "loss": 0.1982, "step": 48310 }, { "epoch": 0.08566116922389512, "grad_norm": 0.4609375, "learning_rate": 0.0011057732692918952, "loss": 0.1793, "step": 48312 }, { "epoch": 0.08566471538920493, "grad_norm": 0.224609375, "learning_rate": 0.0011057137518555755, "loss": 0.1626, "step": 48314 }, { "epoch": 0.08566826155451475, "grad_norm": 15.5625, "learning_rate": 0.0011056542343942672, "loss": 0.4609, "step": 48316 }, { "epoch": 0.08567180771982458, "grad_norm": 0.3359375, "learning_rate": 0.0011055947169082308, "loss": 0.1948, "step": 48318 }, { "epoch": 0.08567535388513439, "grad_norm": 1.09375, "learning_rate": 0.0011055351993977263, "loss": 0.2294, "step": 48320 }, { "epoch": 0.0856789000504442, "grad_norm": 1.1953125, "learning_rate": 0.001105475681863014, "loss": 0.1768, "step": 48322 }, { "epoch": 0.08568244621575402, "grad_norm": 0.921875, "learning_rate": 0.0011054161643043543, "loss": 0.1918, "step": 48324 }, { "epoch": 0.08568599238106384, "grad_norm": 0.451171875, "learning_rate": 0.0011053566467220074, "loss": 0.2415, "step": 48326 }, { "epoch": 0.08568953854637365, "grad_norm": 0.27734375, "learning_rate": 0.0011052971291162336, "loss": 0.1864, "step": 48328 }, { "epoch": 0.08569308471168346, "grad_norm": 0.2578125, "learning_rate": 0.0011052376114872933, "loss": 0.1938, "step": 48330 }, { "epoch": 0.08569663087699328, "grad_norm": 0.412109375, "learning_rate": 0.0011051780938354469, "loss": 0.1574, "step": 48332 }, { "epoch": 0.0857001770423031, "grad_norm": 0.92578125, "learning_rate": 0.0011051185761609542, "loss": 0.1959, "step": 48334 }, { "epoch": 0.08570372320761291, "grad_norm": 0.76171875, "learning_rate": 0.001105059058464076, "loss": 0.2839, "step": 48336 }, { "epoch": 0.08570726937292272, "grad_norm": 0.52734375, "learning_rate": 0.0011049995407450724, "loss": 0.1962, "step": 48338 }, { "epoch": 0.08571081553823254, "grad_norm": 0.55078125, "learning_rate": 0.0011049400230042036, "loss": 0.1909, "step": 48340 }, { "epoch": 0.08571436170354235, "grad_norm": 0.392578125, "learning_rate": 0.00110488050524173, "loss": 0.173, "step": 48342 }, { "epoch": 0.08571790786885217, "grad_norm": 0.515625, "learning_rate": 0.0011048209874579117, "loss": 0.1982, "step": 48344 }, { "epoch": 0.08572145403416198, "grad_norm": 0.482421875, "learning_rate": 0.00110476146965301, "loss": 0.2255, "step": 48346 }, { "epoch": 0.0857250001994718, "grad_norm": 0.392578125, "learning_rate": 0.001104701951827284, "loss": 0.2688, "step": 48348 }, { "epoch": 0.08572854636478161, "grad_norm": 0.2890625, "learning_rate": 0.0011046424339809942, "loss": 0.1985, "step": 48350 }, { "epoch": 0.08573209253009142, "grad_norm": 0.44140625, "learning_rate": 0.001104582916114401, "loss": 0.2056, "step": 48352 }, { "epoch": 0.08573563869540124, "grad_norm": 0.330078125, "learning_rate": 0.0011045233982277653, "loss": 0.1738, "step": 48354 }, { "epoch": 0.08573918486071105, "grad_norm": 0.298828125, "learning_rate": 0.0011044638803213462, "loss": 0.2255, "step": 48356 }, { "epoch": 0.08574273102602087, "grad_norm": 0.62109375, "learning_rate": 0.0011044043623954052, "loss": 0.2087, "step": 48358 }, { "epoch": 0.08574627719133068, "grad_norm": 1.265625, "learning_rate": 0.001104344844450202, "loss": 0.2244, "step": 48360 }, { "epoch": 0.08574982335664051, "grad_norm": 2.375, "learning_rate": 0.0011042853264859969, "loss": 0.2943, "step": 48362 }, { "epoch": 0.08575336952195033, "grad_norm": 0.32421875, "learning_rate": 0.00110422580850305, "loss": 0.2028, "step": 48364 }, { "epoch": 0.08575691568726014, "grad_norm": 0.30859375, "learning_rate": 0.0011041662905016223, "loss": 0.2424, "step": 48366 }, { "epoch": 0.08576046185256996, "grad_norm": 0.2890625, "learning_rate": 0.0011041067724819735, "loss": 0.1491, "step": 48368 }, { "epoch": 0.08576400801787977, "grad_norm": 1.0703125, "learning_rate": 0.0011040472544443642, "loss": 0.2942, "step": 48370 }, { "epoch": 0.08576755418318958, "grad_norm": 0.6484375, "learning_rate": 0.0011039877363890542, "loss": 0.149, "step": 48372 }, { "epoch": 0.0857711003484994, "grad_norm": 0.318359375, "learning_rate": 0.0011039282183163046, "loss": 0.1725, "step": 48374 }, { "epoch": 0.08577464651380921, "grad_norm": 0.314453125, "learning_rate": 0.001103868700226375, "loss": 0.1661, "step": 48376 }, { "epoch": 0.08577819267911903, "grad_norm": 0.431640625, "learning_rate": 0.0011038091821195261, "loss": 0.279, "step": 48378 }, { "epoch": 0.08578173884442884, "grad_norm": 0.53125, "learning_rate": 0.001103749663996018, "loss": 0.1464, "step": 48380 }, { "epoch": 0.08578528500973866, "grad_norm": 0.255859375, "learning_rate": 0.0011036901458561109, "loss": 0.153, "step": 48382 }, { "epoch": 0.08578883117504847, "grad_norm": 1.125, "learning_rate": 0.0011036306277000653, "loss": 0.1678, "step": 48384 }, { "epoch": 0.08579237734035829, "grad_norm": 0.376953125, "learning_rate": 0.0011035711095281416, "loss": 0.1865, "step": 48386 }, { "epoch": 0.0857959235056681, "grad_norm": 0.275390625, "learning_rate": 0.0011035115913405997, "loss": 0.13, "step": 48388 }, { "epoch": 0.08579946967097792, "grad_norm": 0.9140625, "learning_rate": 0.0011034520731377005, "loss": 0.237, "step": 48390 }, { "epoch": 0.08580301583628773, "grad_norm": 0.279296875, "learning_rate": 0.0011033925549197032, "loss": 0.171, "step": 48392 }, { "epoch": 0.08580656200159754, "grad_norm": 0.193359375, "learning_rate": 0.0011033330366868696, "loss": 0.1601, "step": 48394 }, { "epoch": 0.08581010816690736, "grad_norm": 0.451171875, "learning_rate": 0.0011032735184394588, "loss": 0.1707, "step": 48396 }, { "epoch": 0.08581365433221717, "grad_norm": 1.2890625, "learning_rate": 0.0011032140001777317, "loss": 0.1547, "step": 48398 }, { "epoch": 0.08581720049752699, "grad_norm": 0.53515625, "learning_rate": 0.0011031544819019485, "loss": 0.177, "step": 48400 }, { "epoch": 0.0858207466628368, "grad_norm": 0.408203125, "learning_rate": 0.0011030949636123696, "loss": 0.1822, "step": 48402 }, { "epoch": 0.08582429282814662, "grad_norm": 0.318359375, "learning_rate": 0.0011030354453092546, "loss": 0.2167, "step": 48404 }, { "epoch": 0.08582783899345643, "grad_norm": 0.341796875, "learning_rate": 0.0011029759269928648, "loss": 0.1567, "step": 48406 }, { "epoch": 0.08583138515876626, "grad_norm": 0.361328125, "learning_rate": 0.0011029164086634596, "loss": 0.191, "step": 48408 }, { "epoch": 0.08583493132407607, "grad_norm": 0.31640625, "learning_rate": 0.0011028568903213001, "loss": 0.1655, "step": 48410 }, { "epoch": 0.08583847748938589, "grad_norm": 0.5546875, "learning_rate": 0.001102797371966646, "loss": 0.189, "step": 48412 }, { "epoch": 0.0858420236546957, "grad_norm": 0.326171875, "learning_rate": 0.0011027378535997578, "loss": 0.1743, "step": 48414 }, { "epoch": 0.08584556982000552, "grad_norm": 0.484375, "learning_rate": 0.001102678335220896, "loss": 0.2297, "step": 48416 }, { "epoch": 0.08584911598531533, "grad_norm": 0.208984375, "learning_rate": 0.0011026188168303207, "loss": 0.1714, "step": 48418 }, { "epoch": 0.08585266215062515, "grad_norm": 0.796875, "learning_rate": 0.001102559298428292, "loss": 0.1899, "step": 48420 }, { "epoch": 0.08585620831593496, "grad_norm": 4.0625, "learning_rate": 0.0011024997800150706, "loss": 0.4617, "step": 48422 }, { "epoch": 0.08585975448124478, "grad_norm": 0.54296875, "learning_rate": 0.0011024402615909168, "loss": 0.1672, "step": 48424 }, { "epoch": 0.08586330064655459, "grad_norm": 1.8984375, "learning_rate": 0.0011023807431560903, "loss": 0.3999, "step": 48426 }, { "epoch": 0.0858668468118644, "grad_norm": 0.84375, "learning_rate": 0.001102321224710852, "loss": 0.1886, "step": 48428 }, { "epoch": 0.08587039297717422, "grad_norm": 0.353515625, "learning_rate": 0.001102261706255462, "loss": 0.2875, "step": 48430 }, { "epoch": 0.08587393914248403, "grad_norm": 0.51171875, "learning_rate": 0.0011022021877901806, "loss": 0.1308, "step": 48432 }, { "epoch": 0.08587748530779385, "grad_norm": 0.359375, "learning_rate": 0.001102142669315268, "loss": 0.1853, "step": 48434 }, { "epoch": 0.08588103147310366, "grad_norm": 0.35546875, "learning_rate": 0.0011020831508309849, "loss": 0.1253, "step": 48436 }, { "epoch": 0.08588457763841348, "grad_norm": 0.33984375, "learning_rate": 0.0011020236323375909, "loss": 0.2136, "step": 48438 }, { "epoch": 0.08588812380372329, "grad_norm": 0.93359375, "learning_rate": 0.0011019641138353472, "loss": 0.1706, "step": 48440 }, { "epoch": 0.08589166996903311, "grad_norm": 0.39453125, "learning_rate": 0.0011019045953245135, "loss": 0.184, "step": 48442 }, { "epoch": 0.08589521613434292, "grad_norm": 0.46875, "learning_rate": 0.00110184507680535, "loss": 0.1978, "step": 48444 }, { "epoch": 0.08589876229965274, "grad_norm": 0.2314453125, "learning_rate": 0.0011017855582781174, "loss": 0.1541, "step": 48446 }, { "epoch": 0.08590230846496255, "grad_norm": 0.40625, "learning_rate": 0.0011017260397430755, "loss": 0.1533, "step": 48448 }, { "epoch": 0.08590585463027237, "grad_norm": 5.25, "learning_rate": 0.001101666521200485, "loss": 0.2161, "step": 48450 }, { "epoch": 0.08590940079558218, "grad_norm": 0.287109375, "learning_rate": 0.0011016070026506064, "loss": 0.1991, "step": 48452 }, { "epoch": 0.08591294696089201, "grad_norm": 0.859375, "learning_rate": 0.0011015474840936994, "loss": 0.1322, "step": 48454 }, { "epoch": 0.08591649312620182, "grad_norm": 0.52734375, "learning_rate": 0.0011014879655300248, "loss": 0.3502, "step": 48456 }, { "epoch": 0.08592003929151164, "grad_norm": 0.1376953125, "learning_rate": 0.0011014284469598424, "loss": 0.2333, "step": 48458 }, { "epoch": 0.08592358545682145, "grad_norm": 0.208984375, "learning_rate": 0.001101368928383413, "loss": 0.1386, "step": 48460 }, { "epoch": 0.08592713162213127, "grad_norm": 0.380859375, "learning_rate": 0.0011013094098009967, "loss": 0.2863, "step": 48462 }, { "epoch": 0.08593067778744108, "grad_norm": 0.7734375, "learning_rate": 0.0011012498912128538, "loss": 0.1681, "step": 48464 }, { "epoch": 0.0859342239527509, "grad_norm": 0.37109375, "learning_rate": 0.0011011903726192446, "loss": 0.2023, "step": 48466 }, { "epoch": 0.08593777011806071, "grad_norm": 0.455078125, "learning_rate": 0.0011011308540204298, "loss": 0.2109, "step": 48468 }, { "epoch": 0.08594131628337053, "grad_norm": 1.140625, "learning_rate": 0.0011010713354166685, "loss": 0.2583, "step": 48470 }, { "epoch": 0.08594486244868034, "grad_norm": 0.48046875, "learning_rate": 0.0011010118168082225, "loss": 0.2935, "step": 48472 }, { "epoch": 0.08594840861399015, "grad_norm": 0.81640625, "learning_rate": 0.0011009522981953509, "loss": 0.1853, "step": 48474 }, { "epoch": 0.08595195477929997, "grad_norm": 0.337890625, "learning_rate": 0.001100892779578315, "loss": 0.2114, "step": 48476 }, { "epoch": 0.08595550094460978, "grad_norm": 1.1015625, "learning_rate": 0.001100833260957374, "loss": 0.2339, "step": 48478 }, { "epoch": 0.0859590471099196, "grad_norm": 0.50390625, "learning_rate": 0.0011007737423327892, "loss": 0.1909, "step": 48480 }, { "epoch": 0.08596259327522941, "grad_norm": 0.4140625, "learning_rate": 0.0011007142237048205, "loss": 0.3065, "step": 48482 }, { "epoch": 0.08596613944053923, "grad_norm": 2.46875, "learning_rate": 0.001100654705073728, "loss": 0.216, "step": 48484 }, { "epoch": 0.08596968560584904, "grad_norm": 0.203125, "learning_rate": 0.0011005951864397723, "loss": 0.164, "step": 48486 }, { "epoch": 0.08597323177115886, "grad_norm": 0.5859375, "learning_rate": 0.001100535667803214, "loss": 0.3116, "step": 48488 }, { "epoch": 0.08597677793646867, "grad_norm": 0.28125, "learning_rate": 0.0011004761491643122, "loss": 0.2157, "step": 48490 }, { "epoch": 0.08598032410177849, "grad_norm": 2.25, "learning_rate": 0.0011004166305233285, "loss": 0.219, "step": 48492 }, { "epoch": 0.0859838702670883, "grad_norm": 0.361328125, "learning_rate": 0.0011003571118805226, "loss": 0.225, "step": 48494 }, { "epoch": 0.08598741643239811, "grad_norm": 0.1796875, "learning_rate": 0.001100297593236155, "loss": 0.1611, "step": 48496 }, { "epoch": 0.08599096259770794, "grad_norm": 0.59375, "learning_rate": 0.001100238074590486, "loss": 0.2643, "step": 48498 }, { "epoch": 0.08599450876301776, "grad_norm": 0.275390625, "learning_rate": 0.0011001785559437757, "loss": 0.1742, "step": 48500 }, { "epoch": 0.08599805492832757, "grad_norm": 0.396484375, "learning_rate": 0.0011001190372962844, "loss": 0.1548, "step": 48502 }, { "epoch": 0.08600160109363739, "grad_norm": 0.73046875, "learning_rate": 0.0011000595186482726, "loss": 0.2452, "step": 48504 }, { "epoch": 0.0860051472589472, "grad_norm": 0.8125, "learning_rate": 0.0011, "loss": 0.2213, "step": 48506 }, { "epoch": 0.08600869342425702, "grad_norm": 0.212890625, "learning_rate": 0.0010999404813517278, "loss": 0.1806, "step": 48508 }, { "epoch": 0.08601223958956683, "grad_norm": 0.318359375, "learning_rate": 0.001099880962703716, "loss": 0.1279, "step": 48510 }, { "epoch": 0.08601578575487664, "grad_norm": 0.5234375, "learning_rate": 0.001099821444056225, "loss": 0.2283, "step": 48512 }, { "epoch": 0.08601933192018646, "grad_norm": 0.51953125, "learning_rate": 0.0010997619254095143, "loss": 0.3027, "step": 48514 }, { "epoch": 0.08602287808549627, "grad_norm": 0.9609375, "learning_rate": 0.0010997024067638452, "loss": 0.177, "step": 48516 }, { "epoch": 0.08602642425080609, "grad_norm": 0.375, "learning_rate": 0.0010996428881194776, "loss": 0.1752, "step": 48518 }, { "epoch": 0.0860299704161159, "grad_norm": 0.50390625, "learning_rate": 0.0010995833694766716, "loss": 0.2099, "step": 48520 }, { "epoch": 0.08603351658142572, "grad_norm": 0.220703125, "learning_rate": 0.001099523850835688, "loss": 0.1788, "step": 48522 }, { "epoch": 0.08603706274673553, "grad_norm": 0.2197265625, "learning_rate": 0.0010994643321967862, "loss": 0.1636, "step": 48524 }, { "epoch": 0.08604060891204535, "grad_norm": 0.333984375, "learning_rate": 0.0010994048135602277, "loss": 0.1214, "step": 48526 }, { "epoch": 0.08604415507735516, "grad_norm": 1.0703125, "learning_rate": 0.0010993452949262723, "loss": 0.2123, "step": 48528 }, { "epoch": 0.08604770124266498, "grad_norm": 0.2265625, "learning_rate": 0.0010992857762951798, "loss": 0.2248, "step": 48530 }, { "epoch": 0.08605124740797479, "grad_norm": 0.427734375, "learning_rate": 0.0010992262576672107, "loss": 0.1967, "step": 48532 }, { "epoch": 0.0860547935732846, "grad_norm": 0.80078125, "learning_rate": 0.0010991667390426259, "loss": 0.2271, "step": 48534 }, { "epoch": 0.08605833973859442, "grad_norm": 0.140625, "learning_rate": 0.0010991072204216855, "loss": 0.1451, "step": 48536 }, { "epoch": 0.08606188590390423, "grad_norm": 0.447265625, "learning_rate": 0.0010990477018046492, "loss": 0.2062, "step": 48538 }, { "epoch": 0.08606543206921405, "grad_norm": 0.21875, "learning_rate": 0.001098988183191778, "loss": 0.1551, "step": 48540 }, { "epoch": 0.08606897823452386, "grad_norm": 0.328125, "learning_rate": 0.0010989286645833314, "loss": 0.1807, "step": 48542 }, { "epoch": 0.08607252439983369, "grad_norm": 0.29296875, "learning_rate": 0.0010988691459795708, "loss": 0.1927, "step": 48544 }, { "epoch": 0.0860760705651435, "grad_norm": 0.265625, "learning_rate": 0.0010988096273807555, "loss": 0.1778, "step": 48546 }, { "epoch": 0.08607961673045332, "grad_norm": 0.78125, "learning_rate": 0.001098750108787146, "loss": 0.2181, "step": 48548 }, { "epoch": 0.08608316289576314, "grad_norm": 0.32421875, "learning_rate": 0.0010986905901990034, "loss": 0.2127, "step": 48550 }, { "epoch": 0.08608670906107295, "grad_norm": 0.73828125, "learning_rate": 0.001098631071616587, "loss": 0.2938, "step": 48552 }, { "epoch": 0.08609025522638276, "grad_norm": 0.1875, "learning_rate": 0.001098571553040158, "loss": 0.2818, "step": 48554 }, { "epoch": 0.08609380139169258, "grad_norm": 0.1298828125, "learning_rate": 0.0010985120344699755, "loss": 0.1537, "step": 48556 }, { "epoch": 0.0860973475570024, "grad_norm": 0.66015625, "learning_rate": 0.0010984525159063005, "loss": 0.2053, "step": 48558 }, { "epoch": 0.08610089372231221, "grad_norm": 1.265625, "learning_rate": 0.001098392997349394, "loss": 0.2862, "step": 48560 }, { "epoch": 0.08610443988762202, "grad_norm": 0.17578125, "learning_rate": 0.0010983334787995152, "loss": 0.1459, "step": 48562 }, { "epoch": 0.08610798605293184, "grad_norm": 0.283203125, "learning_rate": 0.0010982739602569244, "loss": 0.1993, "step": 48564 }, { "epoch": 0.08611153221824165, "grad_norm": 0.5625, "learning_rate": 0.001098214441721883, "loss": 0.1537, "step": 48566 }, { "epoch": 0.08611507838355147, "grad_norm": 0.384765625, "learning_rate": 0.0010981549231946503, "loss": 0.1549, "step": 48568 }, { "epoch": 0.08611862454886128, "grad_norm": 0.55078125, "learning_rate": 0.0010980954046754868, "loss": 0.1593, "step": 48570 }, { "epoch": 0.0861221707141711, "grad_norm": 0.34765625, "learning_rate": 0.0010980358861646532, "loss": 0.2112, "step": 48572 }, { "epoch": 0.08612571687948091, "grad_norm": 0.3671875, "learning_rate": 0.001097976367662409, "loss": 0.2272, "step": 48574 }, { "epoch": 0.08612926304479072, "grad_norm": 0.28515625, "learning_rate": 0.0010979168491690155, "loss": 0.1882, "step": 48576 }, { "epoch": 0.08613280921010054, "grad_norm": 0.154296875, "learning_rate": 0.0010978573306847322, "loss": 0.1403, "step": 48578 }, { "epoch": 0.08613635537541035, "grad_norm": 0.328125, "learning_rate": 0.0010977978122098195, "loss": 0.2043, "step": 48580 }, { "epoch": 0.08613990154072017, "grad_norm": 0.419921875, "learning_rate": 0.0010977382937445383, "loss": 0.1768, "step": 48582 }, { "epoch": 0.08614344770602998, "grad_norm": 1.328125, "learning_rate": 0.0010976787752891484, "loss": 0.2107, "step": 48584 }, { "epoch": 0.0861469938713398, "grad_norm": 0.59765625, "learning_rate": 0.00109761925684391, "loss": 0.2093, "step": 48586 }, { "epoch": 0.08615054003664961, "grad_norm": 1.125, "learning_rate": 0.0010975597384090836, "loss": 0.1708, "step": 48588 }, { "epoch": 0.08615408620195944, "grad_norm": 0.35546875, "learning_rate": 0.0010975002199849295, "loss": 0.1335, "step": 48590 }, { "epoch": 0.08615763236726925, "grad_norm": 0.2490234375, "learning_rate": 0.0010974407015717082, "loss": 0.1713, "step": 48592 }, { "epoch": 0.08616117853257907, "grad_norm": 0.18359375, "learning_rate": 0.0010973811831696796, "loss": 0.1992, "step": 48594 }, { "epoch": 0.08616472469788888, "grad_norm": 0.486328125, "learning_rate": 0.0010973216647791042, "loss": 0.1484, "step": 48596 }, { "epoch": 0.0861682708631987, "grad_norm": 0.96484375, "learning_rate": 0.001097262146400242, "loss": 0.3323, "step": 48598 }, { "epoch": 0.08617181702850851, "grad_norm": 0.28125, "learning_rate": 0.001097202628033354, "loss": 0.1736, "step": 48600 }, { "epoch": 0.08617536319381833, "grad_norm": 0.6328125, "learning_rate": 0.0010971431096787004, "loss": 0.3408, "step": 48602 }, { "epoch": 0.08617890935912814, "grad_norm": 0.259765625, "learning_rate": 0.0010970835913365405, "loss": 0.172, "step": 48604 }, { "epoch": 0.08618245552443796, "grad_norm": 0.341796875, "learning_rate": 0.0010970240730071355, "loss": 0.1769, "step": 48606 }, { "epoch": 0.08618600168974777, "grad_norm": 0.46484375, "learning_rate": 0.0010969645546907453, "loss": 0.2094, "step": 48608 }, { "epoch": 0.08618954785505759, "grad_norm": 0.421875, "learning_rate": 0.001096905036387631, "loss": 0.175, "step": 48610 }, { "epoch": 0.0861930940203674, "grad_norm": 0.3046875, "learning_rate": 0.0010968455180980518, "loss": 0.1585, "step": 48612 }, { "epoch": 0.08619664018567721, "grad_norm": 0.62890625, "learning_rate": 0.0010967859998222685, "loss": 0.161, "step": 48614 }, { "epoch": 0.08620018635098703, "grad_norm": 0.2197265625, "learning_rate": 0.0010967264815605414, "loss": 0.2177, "step": 48616 }, { "epoch": 0.08620373251629684, "grad_norm": 0.3984375, "learning_rate": 0.0010966669633131307, "loss": 0.1532, "step": 48618 }, { "epoch": 0.08620727868160666, "grad_norm": 0.66796875, "learning_rate": 0.0010966074450802967, "loss": 0.1713, "step": 48620 }, { "epoch": 0.08621082484691647, "grad_norm": 0.341796875, "learning_rate": 0.0010965479268623, "loss": 0.179, "step": 48622 }, { "epoch": 0.08621437101222629, "grad_norm": 0.2333984375, "learning_rate": 0.0010964884086594002, "loss": 0.165, "step": 48624 }, { "epoch": 0.0862179171775361, "grad_norm": 0.86328125, "learning_rate": 0.001096428890471859, "loss": 0.3584, "step": 48626 }, { "epoch": 0.08622146334284592, "grad_norm": 0.2080078125, "learning_rate": 0.0010963693722999348, "loss": 0.2076, "step": 48628 }, { "epoch": 0.08622500950815573, "grad_norm": 0.490234375, "learning_rate": 0.0010963098541438895, "loss": 0.1279, "step": 48630 }, { "epoch": 0.08622855567346555, "grad_norm": 0.392578125, "learning_rate": 0.0010962503360039825, "loss": 0.2293, "step": 48632 }, { "epoch": 0.08623210183877537, "grad_norm": 0.2216796875, "learning_rate": 0.0010961908178804744, "loss": 0.1787, "step": 48634 }, { "epoch": 0.08623564800408519, "grad_norm": 1.546875, "learning_rate": 0.0010961312997736252, "loss": 0.3364, "step": 48636 }, { "epoch": 0.086239194169395, "grad_norm": 0.30078125, "learning_rate": 0.0010960717816836955, "loss": 0.1513, "step": 48638 }, { "epoch": 0.08624274033470482, "grad_norm": 0.404296875, "learning_rate": 0.0010960122636109457, "loss": 0.2305, "step": 48640 }, { "epoch": 0.08624628650001463, "grad_norm": 0.1923828125, "learning_rate": 0.0010959527455556362, "loss": 0.1412, "step": 48642 }, { "epoch": 0.08624983266532445, "grad_norm": 0.4375, "learning_rate": 0.0010958932275180267, "loss": 0.3239, "step": 48644 }, { "epoch": 0.08625337883063426, "grad_norm": 0.455078125, "learning_rate": 0.0010958337094983779, "loss": 0.1594, "step": 48646 }, { "epoch": 0.08625692499594408, "grad_norm": 0.279296875, "learning_rate": 0.00109577419149695, "loss": 0.1438, "step": 48648 }, { "epoch": 0.08626047116125389, "grad_norm": 0.234375, "learning_rate": 0.0010957146735140035, "loss": 0.1687, "step": 48650 }, { "epoch": 0.0862640173265637, "grad_norm": 0.423828125, "learning_rate": 0.0010956551555497982, "loss": 0.1425, "step": 48652 }, { "epoch": 0.08626756349187352, "grad_norm": 1.890625, "learning_rate": 0.001095595637604595, "loss": 0.1894, "step": 48654 }, { "epoch": 0.08627110965718333, "grad_norm": 0.302734375, "learning_rate": 0.0010955361196786537, "loss": 0.2172, "step": 48656 }, { "epoch": 0.08627465582249315, "grad_norm": 1.1875, "learning_rate": 0.0010954766017722353, "loss": 0.2026, "step": 48658 }, { "epoch": 0.08627820198780296, "grad_norm": 0.251953125, "learning_rate": 0.001095417083885599, "loss": 0.2649, "step": 48660 }, { "epoch": 0.08628174815311278, "grad_norm": 0.546875, "learning_rate": 0.0010953575660190061, "loss": 0.1812, "step": 48662 }, { "epoch": 0.08628529431842259, "grad_norm": 0.490234375, "learning_rate": 0.0010952980481727164, "loss": 0.2278, "step": 48664 }, { "epoch": 0.0862888404837324, "grad_norm": 0.265625, "learning_rate": 0.0010952385303469902, "loss": 0.193, "step": 48666 }, { "epoch": 0.08629238664904222, "grad_norm": 0.6875, "learning_rate": 0.0010951790125420882, "loss": 0.1932, "step": 48668 }, { "epoch": 0.08629593281435204, "grad_norm": 0.5625, "learning_rate": 0.00109511949475827, "loss": 0.2465, "step": 48670 }, { "epoch": 0.08629947897966185, "grad_norm": 0.490234375, "learning_rate": 0.0010950599769957966, "loss": 0.191, "step": 48672 }, { "epoch": 0.08630302514497167, "grad_norm": 0.51171875, "learning_rate": 0.0010950004592549275, "loss": 0.1741, "step": 48674 }, { "epoch": 0.08630657131028148, "grad_norm": 0.24609375, "learning_rate": 0.0010949409415359243, "loss": 0.168, "step": 48676 }, { "epoch": 0.0863101174755913, "grad_norm": 0.2275390625, "learning_rate": 0.0010948814238390461, "loss": 0.1687, "step": 48678 }, { "epoch": 0.08631366364090112, "grad_norm": 0.373046875, "learning_rate": 0.0010948219061645533, "loss": 0.1703, "step": 48680 }, { "epoch": 0.08631720980621094, "grad_norm": 0.74609375, "learning_rate": 0.0010947623885127068, "loss": 0.1684, "step": 48682 }, { "epoch": 0.08632075597152075, "grad_norm": 0.546875, "learning_rate": 0.0010947028708837665, "loss": 0.2822, "step": 48684 }, { "epoch": 0.08632430213683057, "grad_norm": 0.298828125, "learning_rate": 0.001094643353277993, "loss": 0.189, "step": 48686 }, { "epoch": 0.08632784830214038, "grad_norm": 0.37109375, "learning_rate": 0.0010945838356956458, "loss": 0.1656, "step": 48688 }, { "epoch": 0.0863313944674502, "grad_norm": 0.373046875, "learning_rate": 0.001094524318136986, "loss": 0.333, "step": 48690 }, { "epoch": 0.08633494063276001, "grad_norm": 0.8515625, "learning_rate": 0.0010944648006022742, "loss": 0.2017, "step": 48692 }, { "epoch": 0.08633848679806982, "grad_norm": 0.2216796875, "learning_rate": 0.0010944052830917695, "loss": 0.1601, "step": 48694 }, { "epoch": 0.08634203296337964, "grad_norm": 0.439453125, "learning_rate": 0.001094345765605733, "loss": 0.1937, "step": 48696 }, { "epoch": 0.08634557912868945, "grad_norm": 0.486328125, "learning_rate": 0.0010942862481444246, "loss": 0.1347, "step": 48698 }, { "epoch": 0.08634912529399927, "grad_norm": 1.34375, "learning_rate": 0.0010942267307081054, "loss": 0.2311, "step": 48700 }, { "epoch": 0.08635267145930908, "grad_norm": 0.4375, "learning_rate": 0.0010941672132970346, "loss": 0.1766, "step": 48702 }, { "epoch": 0.0863562176246189, "grad_norm": 0.5390625, "learning_rate": 0.0010941076959114734, "loss": 0.1801, "step": 48704 }, { "epoch": 0.08635976378992871, "grad_norm": 0.72265625, "learning_rate": 0.0010940481785516813, "loss": 0.2222, "step": 48706 }, { "epoch": 0.08636330995523853, "grad_norm": 0.435546875, "learning_rate": 0.0010939886612179198, "loss": 0.2221, "step": 48708 }, { "epoch": 0.08636685612054834, "grad_norm": 0.5625, "learning_rate": 0.0010939291439104476, "loss": 0.1908, "step": 48710 }, { "epoch": 0.08637040228585816, "grad_norm": 0.29296875, "learning_rate": 0.0010938696266295257, "loss": 0.2349, "step": 48712 }, { "epoch": 0.08637394845116797, "grad_norm": 0.28515625, "learning_rate": 0.001093810109375415, "loss": 0.1534, "step": 48714 }, { "epoch": 0.08637749461647778, "grad_norm": 1.0, "learning_rate": 0.0010937505921483754, "loss": 0.2074, "step": 48716 }, { "epoch": 0.0863810407817876, "grad_norm": 0.306640625, "learning_rate": 0.0010936910749486666, "loss": 0.1969, "step": 48718 }, { "epoch": 0.08638458694709741, "grad_norm": 0.2080078125, "learning_rate": 0.0010936315577765494, "loss": 0.1449, "step": 48720 }, { "epoch": 0.08638813311240723, "grad_norm": 0.5, "learning_rate": 0.0010935720406322843, "loss": 0.1889, "step": 48722 }, { "epoch": 0.08639167927771704, "grad_norm": 0.458984375, "learning_rate": 0.0010935125235161315, "loss": 0.1781, "step": 48724 }, { "epoch": 0.08639522544302687, "grad_norm": 0.32421875, "learning_rate": 0.0010934530064283512, "loss": 0.2351, "step": 48726 }, { "epoch": 0.08639877160833669, "grad_norm": 0.22265625, "learning_rate": 0.0010933934893692032, "loss": 0.1555, "step": 48728 }, { "epoch": 0.0864023177736465, "grad_norm": 0.52734375, "learning_rate": 0.0010933339723389484, "loss": 0.2263, "step": 48730 }, { "epoch": 0.08640586393895631, "grad_norm": 0.19921875, "learning_rate": 0.0010932744553378473, "loss": 0.1836, "step": 48732 }, { "epoch": 0.08640941010426613, "grad_norm": 0.419921875, "learning_rate": 0.0010932149383661594, "loss": 0.1395, "step": 48734 }, { "epoch": 0.08641295626957594, "grad_norm": 0.345703125, "learning_rate": 0.0010931554214241453, "loss": 0.1993, "step": 48736 }, { "epoch": 0.08641650243488576, "grad_norm": 0.369140625, "learning_rate": 0.0010930959045120658, "loss": 0.1689, "step": 48738 }, { "epoch": 0.08642004860019557, "grad_norm": 0.56640625, "learning_rate": 0.0010930363876301807, "loss": 0.2095, "step": 48740 }, { "epoch": 0.08642359476550539, "grad_norm": 1.203125, "learning_rate": 0.0010929768707787504, "loss": 0.189, "step": 48742 }, { "epoch": 0.0864271409308152, "grad_norm": 0.298828125, "learning_rate": 0.001092917353958035, "loss": 0.1454, "step": 48744 }, { "epoch": 0.08643068709612502, "grad_norm": 0.29296875, "learning_rate": 0.0010928578371682952, "loss": 0.1685, "step": 48746 }, { "epoch": 0.08643423326143483, "grad_norm": 1.1328125, "learning_rate": 0.001092798320409791, "loss": 0.2708, "step": 48748 }, { "epoch": 0.08643777942674465, "grad_norm": 0.35546875, "learning_rate": 0.0010927388036827829, "loss": 0.1414, "step": 48750 }, { "epoch": 0.08644132559205446, "grad_norm": 0.640625, "learning_rate": 0.0010926792869875306, "loss": 0.1761, "step": 48752 }, { "epoch": 0.08644487175736427, "grad_norm": 0.53515625, "learning_rate": 0.001092619770324295, "loss": 0.1363, "step": 48754 }, { "epoch": 0.08644841792267409, "grad_norm": 0.58203125, "learning_rate": 0.0010925602536933368, "loss": 0.1605, "step": 48756 }, { "epoch": 0.0864519640879839, "grad_norm": 0.2333984375, "learning_rate": 0.0010925007370949152, "loss": 0.1424, "step": 48758 }, { "epoch": 0.08645551025329372, "grad_norm": 0.78515625, "learning_rate": 0.001092441220529291, "loss": 0.211, "step": 48760 }, { "epoch": 0.08645905641860353, "grad_norm": 0.6328125, "learning_rate": 0.0010923817039967248, "loss": 0.1858, "step": 48762 }, { "epoch": 0.08646260258391335, "grad_norm": 1.4296875, "learning_rate": 0.0010923221874974763, "loss": 0.2291, "step": 48764 }, { "epoch": 0.08646614874922316, "grad_norm": 1.5546875, "learning_rate": 0.0010922626710318064, "loss": 0.2923, "step": 48766 }, { "epoch": 0.08646969491453298, "grad_norm": 1.4453125, "learning_rate": 0.0010922031545999749, "loss": 0.2677, "step": 48768 }, { "epoch": 0.0864732410798428, "grad_norm": 0.427734375, "learning_rate": 0.001092143638202242, "loss": 0.1585, "step": 48770 }, { "epoch": 0.08647678724515262, "grad_norm": 0.4296875, "learning_rate": 0.0010920841218388685, "loss": 0.1754, "step": 48772 }, { "epoch": 0.08648033341046243, "grad_norm": 0.87109375, "learning_rate": 0.0010920246055101147, "loss": 0.1896, "step": 48774 }, { "epoch": 0.08648387957577225, "grad_norm": 0.36328125, "learning_rate": 0.0010919650892162406, "loss": 0.1826, "step": 48776 }, { "epoch": 0.08648742574108206, "grad_norm": 0.43359375, "learning_rate": 0.0010919055729575063, "loss": 0.2226, "step": 48778 }, { "epoch": 0.08649097190639188, "grad_norm": 0.3046875, "learning_rate": 0.0010918460567341723, "loss": 0.1714, "step": 48780 }, { "epoch": 0.08649451807170169, "grad_norm": 0.2314453125, "learning_rate": 0.0010917865405464992, "loss": 0.1621, "step": 48782 }, { "epoch": 0.08649806423701151, "grad_norm": 0.5234375, "learning_rate": 0.0010917270243947469, "loss": 0.2207, "step": 48784 }, { "epoch": 0.08650161040232132, "grad_norm": 0.2041015625, "learning_rate": 0.0010916675082791756, "loss": 0.1516, "step": 48786 }, { "epoch": 0.08650515656763114, "grad_norm": 0.26171875, "learning_rate": 0.0010916079922000457, "loss": 0.1971, "step": 48788 }, { "epoch": 0.08650870273294095, "grad_norm": 0.25390625, "learning_rate": 0.001091548476157618, "loss": 0.163, "step": 48790 }, { "epoch": 0.08651224889825077, "grad_norm": 0.2431640625, "learning_rate": 0.001091488960152152, "loss": 0.1567, "step": 48792 }, { "epoch": 0.08651579506356058, "grad_norm": 0.34375, "learning_rate": 0.0010914294441839083, "loss": 0.1742, "step": 48794 }, { "epoch": 0.0865193412288704, "grad_norm": 0.81640625, "learning_rate": 0.0010913699282531475, "loss": 0.2904, "step": 48796 }, { "epoch": 0.08652288739418021, "grad_norm": 0.248046875, "learning_rate": 0.0010913104123601296, "loss": 0.2002, "step": 48798 }, { "epoch": 0.08652643355949002, "grad_norm": 0.23046875, "learning_rate": 0.001091250896505115, "loss": 0.1823, "step": 48800 }, { "epoch": 0.08652997972479984, "grad_norm": 0.43359375, "learning_rate": 0.0010911913806883635, "loss": 0.1748, "step": 48802 }, { "epoch": 0.08653352589010965, "grad_norm": 0.328125, "learning_rate": 0.0010911318649101359, "loss": 0.2029, "step": 48804 }, { "epoch": 0.08653707205541947, "grad_norm": 2.359375, "learning_rate": 0.0010910723491706928, "loss": 0.258, "step": 48806 }, { "epoch": 0.08654061822072928, "grad_norm": 0.90625, "learning_rate": 0.0010910128334702936, "loss": 0.1774, "step": 48808 }, { "epoch": 0.0865441643860391, "grad_norm": 0.703125, "learning_rate": 0.001090953317809199, "loss": 0.1653, "step": 48810 }, { "epoch": 0.08654771055134891, "grad_norm": 1.3828125, "learning_rate": 0.0010908938021876695, "loss": 0.2222, "step": 48812 }, { "epoch": 0.08655125671665873, "grad_norm": 0.8359375, "learning_rate": 0.0010908342866059655, "loss": 0.2568, "step": 48814 }, { "epoch": 0.08655480288196855, "grad_norm": 0.375, "learning_rate": 0.0010907747710643466, "loss": 0.1475, "step": 48816 }, { "epoch": 0.08655834904727837, "grad_norm": 0.53125, "learning_rate": 0.0010907152555630737, "loss": 0.3855, "step": 48818 }, { "epoch": 0.08656189521258818, "grad_norm": 0.349609375, "learning_rate": 0.0010906557401024067, "loss": 0.2523, "step": 48820 }, { "epoch": 0.086565441377898, "grad_norm": 0.443359375, "learning_rate": 0.0010905962246826064, "loss": 0.1728, "step": 48822 }, { "epoch": 0.08656898754320781, "grad_norm": 1.40625, "learning_rate": 0.0010905367093039325, "loss": 0.2541, "step": 48824 }, { "epoch": 0.08657253370851763, "grad_norm": 0.4296875, "learning_rate": 0.0010904771939666453, "loss": 0.3226, "step": 48826 }, { "epoch": 0.08657607987382744, "grad_norm": 0.287109375, "learning_rate": 0.0010904176786710057, "loss": 0.211, "step": 48828 }, { "epoch": 0.08657962603913726, "grad_norm": 0.322265625, "learning_rate": 0.0010903581634172737, "loss": 0.2089, "step": 48830 }, { "epoch": 0.08658317220444707, "grad_norm": 2.46875, "learning_rate": 0.0010902986482057091, "loss": 0.3208, "step": 48832 }, { "epoch": 0.08658671836975688, "grad_norm": 0.498046875, "learning_rate": 0.0010902391330365729, "loss": 0.1855, "step": 48834 }, { "epoch": 0.0865902645350667, "grad_norm": 0.71484375, "learning_rate": 0.0010901796179101247, "loss": 0.2128, "step": 48836 }, { "epoch": 0.08659381070037651, "grad_norm": 0.2099609375, "learning_rate": 0.0010901201028266257, "loss": 0.2241, "step": 48838 }, { "epoch": 0.08659735686568633, "grad_norm": 0.2421875, "learning_rate": 0.0010900605877863351, "loss": 0.1959, "step": 48840 }, { "epoch": 0.08660090303099614, "grad_norm": 0.404296875, "learning_rate": 0.0010900010727895137, "loss": 0.1499, "step": 48842 }, { "epoch": 0.08660444919630596, "grad_norm": 0.390625, "learning_rate": 0.001089941557836422, "loss": 0.2229, "step": 48844 }, { "epoch": 0.08660799536161577, "grad_norm": 0.20703125, "learning_rate": 0.00108988204292732, "loss": 0.1513, "step": 48846 }, { "epoch": 0.08661154152692559, "grad_norm": 0.55859375, "learning_rate": 0.0010898225280624686, "loss": 0.1708, "step": 48848 }, { "epoch": 0.0866150876922354, "grad_norm": 0.84765625, "learning_rate": 0.0010897630132421267, "loss": 0.1664, "step": 48850 }, { "epoch": 0.08661863385754522, "grad_norm": 0.32421875, "learning_rate": 0.001089703498466556, "loss": 0.1488, "step": 48852 }, { "epoch": 0.08662218002285503, "grad_norm": 0.35546875, "learning_rate": 0.0010896439837360162, "loss": 0.1407, "step": 48854 }, { "epoch": 0.08662572618816484, "grad_norm": 0.31640625, "learning_rate": 0.0010895844690507676, "loss": 0.1775, "step": 48856 }, { "epoch": 0.08662927235347466, "grad_norm": 0.74609375, "learning_rate": 0.00108952495441107, "loss": 0.2847, "step": 48858 }, { "epoch": 0.08663281851878447, "grad_norm": 0.2470703125, "learning_rate": 0.0010894654398171846, "loss": 0.1364, "step": 48860 }, { "epoch": 0.0866363646840943, "grad_norm": 0.47265625, "learning_rate": 0.0010894059252693712, "loss": 0.1719, "step": 48862 }, { "epoch": 0.08663991084940412, "grad_norm": 1.2421875, "learning_rate": 0.0010893464107678901, "loss": 0.415, "step": 48864 }, { "epoch": 0.08664345701471393, "grad_norm": 0.46484375, "learning_rate": 0.0010892868963130015, "loss": 0.1927, "step": 48866 }, { "epoch": 0.08664700318002375, "grad_norm": 0.484375, "learning_rate": 0.0010892273819049659, "loss": 0.2758, "step": 48868 }, { "epoch": 0.08665054934533356, "grad_norm": 0.34765625, "learning_rate": 0.0010891678675440436, "loss": 0.3137, "step": 48870 }, { "epoch": 0.08665409551064338, "grad_norm": 0.55859375, "learning_rate": 0.0010891083532304946, "loss": 0.2279, "step": 48872 }, { "epoch": 0.08665764167595319, "grad_norm": 0.484375, "learning_rate": 0.0010890488389645795, "loss": 0.2171, "step": 48874 }, { "epoch": 0.086661187841263, "grad_norm": 0.2333984375, "learning_rate": 0.001088989324746558, "loss": 0.1592, "step": 48876 }, { "epoch": 0.08666473400657282, "grad_norm": 0.3828125, "learning_rate": 0.0010889298105766913, "loss": 0.1589, "step": 48878 }, { "epoch": 0.08666828017188263, "grad_norm": 3.53125, "learning_rate": 0.001088870296455239, "loss": 0.3029, "step": 48880 }, { "epoch": 0.08667182633719245, "grad_norm": 0.265625, "learning_rate": 0.0010888107823824613, "loss": 0.1309, "step": 48882 }, { "epoch": 0.08667537250250226, "grad_norm": 0.45703125, "learning_rate": 0.001088751268358619, "loss": 0.1641, "step": 48884 }, { "epoch": 0.08667891866781208, "grad_norm": 0.189453125, "learning_rate": 0.0010886917543839719, "loss": 0.1648, "step": 48886 }, { "epoch": 0.08668246483312189, "grad_norm": 0.4140625, "learning_rate": 0.001088632240458781, "loss": 0.2518, "step": 48888 }, { "epoch": 0.0866860109984317, "grad_norm": 0.494140625, "learning_rate": 0.0010885727265833056, "loss": 0.2279, "step": 48890 }, { "epoch": 0.08668955716374152, "grad_norm": 0.2890625, "learning_rate": 0.0010885132127578065, "loss": 0.1208, "step": 48892 }, { "epoch": 0.08669310332905134, "grad_norm": 1.6015625, "learning_rate": 0.001088453698982544, "loss": 0.1688, "step": 48894 }, { "epoch": 0.08669664949436115, "grad_norm": 0.37109375, "learning_rate": 0.0010883941852577789, "loss": 0.2222, "step": 48896 }, { "epoch": 0.08670019565967096, "grad_norm": 0.228515625, "learning_rate": 0.00108833467158377, "loss": 0.1825, "step": 48898 }, { "epoch": 0.08670374182498078, "grad_norm": 3.28125, "learning_rate": 0.0010882751579607789, "loss": 0.5534, "step": 48900 }, { "epoch": 0.0867072879902906, "grad_norm": 0.9453125, "learning_rate": 0.0010882156443890651, "loss": 0.1436, "step": 48902 }, { "epoch": 0.08671083415560041, "grad_norm": 0.1982421875, "learning_rate": 0.0010881561308688896, "loss": 0.1646, "step": 48904 }, { "epoch": 0.08671438032091024, "grad_norm": 0.31640625, "learning_rate": 0.0010880966174005122, "loss": 0.2599, "step": 48906 }, { "epoch": 0.08671792648622005, "grad_norm": 0.376953125, "learning_rate": 0.0010880371039841935, "loss": 0.1621, "step": 48908 }, { "epoch": 0.08672147265152987, "grad_norm": 0.6328125, "learning_rate": 0.001087977590620193, "loss": 0.1176, "step": 48910 }, { "epoch": 0.08672501881683968, "grad_norm": 0.267578125, "learning_rate": 0.0010879180773087722, "loss": 0.1737, "step": 48912 }, { "epoch": 0.0867285649821495, "grad_norm": 0.208984375, "learning_rate": 0.0010878585640501904, "loss": 0.1238, "step": 48914 }, { "epoch": 0.08673211114745931, "grad_norm": 0.34765625, "learning_rate": 0.001087799050844708, "loss": 0.1617, "step": 48916 }, { "epoch": 0.08673565731276912, "grad_norm": 0.73046875, "learning_rate": 0.0010877395376925854, "loss": 0.1769, "step": 48918 }, { "epoch": 0.08673920347807894, "grad_norm": 2.171875, "learning_rate": 0.0010876800245940834, "loss": 0.2334, "step": 48920 }, { "epoch": 0.08674274964338875, "grad_norm": 0.2890625, "learning_rate": 0.0010876205115494615, "loss": 0.2189, "step": 48922 }, { "epoch": 0.08674629580869857, "grad_norm": 0.326171875, "learning_rate": 0.0010875609985589803, "loss": 0.303, "step": 48924 }, { "epoch": 0.08674984197400838, "grad_norm": 0.482421875, "learning_rate": 0.0010875014856229002, "loss": 0.2432, "step": 48926 }, { "epoch": 0.0867533881393182, "grad_norm": 0.66796875, "learning_rate": 0.0010874419727414813, "loss": 0.1743, "step": 48928 }, { "epoch": 0.08675693430462801, "grad_norm": 0.97265625, "learning_rate": 0.001087382459914984, "loss": 0.2393, "step": 48930 }, { "epoch": 0.08676048046993783, "grad_norm": 0.30859375, "learning_rate": 0.0010873229471436683, "loss": 0.1428, "step": 48932 }, { "epoch": 0.08676402663524764, "grad_norm": 1.3046875, "learning_rate": 0.0010872634344277947, "loss": 0.2371, "step": 48934 }, { "epoch": 0.08676757280055745, "grad_norm": 1.4921875, "learning_rate": 0.0010872039217676233, "loss": 0.3052, "step": 48936 }, { "epoch": 0.08677111896586727, "grad_norm": 0.474609375, "learning_rate": 0.0010871444091634151, "loss": 0.296, "step": 48938 }, { "epoch": 0.08677466513117708, "grad_norm": 0.51171875, "learning_rate": 0.0010870848966154291, "loss": 0.1639, "step": 48940 }, { "epoch": 0.0867782112964869, "grad_norm": 0.267578125, "learning_rate": 0.0010870253841239266, "loss": 0.2208, "step": 48942 }, { "epoch": 0.08678175746179671, "grad_norm": 0.91015625, "learning_rate": 0.0010869658716891675, "loss": 0.2408, "step": 48944 }, { "epoch": 0.08678530362710653, "grad_norm": 0.8046875, "learning_rate": 0.0010869063593114121, "loss": 0.2536, "step": 48946 }, { "epoch": 0.08678884979241634, "grad_norm": 0.212890625, "learning_rate": 0.0010868468469909208, "loss": 0.1878, "step": 48948 }, { "epoch": 0.08679239595772616, "grad_norm": 0.5078125, "learning_rate": 0.0010867873347279536, "loss": 0.1781, "step": 48950 }, { "epoch": 0.08679594212303599, "grad_norm": 0.4453125, "learning_rate": 0.0010867278225227708, "loss": 0.1865, "step": 48952 }, { "epoch": 0.0867994882883458, "grad_norm": 1.5859375, "learning_rate": 0.0010866683103756332, "loss": 0.3186, "step": 48954 }, { "epoch": 0.08680303445365561, "grad_norm": 0.4296875, "learning_rate": 0.0010866087982868004, "loss": 0.15, "step": 48956 }, { "epoch": 0.08680658061896543, "grad_norm": 0.484375, "learning_rate": 0.0010865492862565326, "loss": 0.1717, "step": 48958 }, { "epoch": 0.08681012678427524, "grad_norm": 0.419921875, "learning_rate": 0.0010864897742850908, "loss": 0.2188, "step": 48960 }, { "epoch": 0.08681367294958506, "grad_norm": 0.1376953125, "learning_rate": 0.001086430262372735, "loss": 0.1558, "step": 48962 }, { "epoch": 0.08681721911489487, "grad_norm": 1.8515625, "learning_rate": 0.001086370750519725, "loss": 0.3042, "step": 48964 }, { "epoch": 0.08682076528020469, "grad_norm": 0.66796875, "learning_rate": 0.0010863112387263219, "loss": 0.2229, "step": 48966 }, { "epoch": 0.0868243114455145, "grad_norm": 0.7734375, "learning_rate": 0.001086251726992785, "loss": 0.2437, "step": 48968 }, { "epoch": 0.08682785761082432, "grad_norm": 0.453125, "learning_rate": 0.0010861922153193755, "loss": 0.1555, "step": 48970 }, { "epoch": 0.08683140377613413, "grad_norm": 0.345703125, "learning_rate": 0.0010861327037063528, "loss": 0.1782, "step": 48972 }, { "epoch": 0.08683494994144395, "grad_norm": 0.26953125, "learning_rate": 0.001086073192153978, "loss": 0.2124, "step": 48974 }, { "epoch": 0.08683849610675376, "grad_norm": 0.259765625, "learning_rate": 0.0010860136806625106, "loss": 0.2375, "step": 48976 }, { "epoch": 0.08684204227206357, "grad_norm": 0.25390625, "learning_rate": 0.0010859541692322114, "loss": 0.1378, "step": 48978 }, { "epoch": 0.08684558843737339, "grad_norm": 0.59765625, "learning_rate": 0.0010858946578633404, "loss": 0.2267, "step": 48980 }, { "epoch": 0.0868491346026832, "grad_norm": 1.7109375, "learning_rate": 0.001085835146556158, "loss": 0.4263, "step": 48982 }, { "epoch": 0.08685268076799302, "grad_norm": 0.5859375, "learning_rate": 0.0010857756353109247, "loss": 0.1839, "step": 48984 }, { "epoch": 0.08685622693330283, "grad_norm": 0.259765625, "learning_rate": 0.0010857161241279003, "loss": 0.1915, "step": 48986 }, { "epoch": 0.08685977309861265, "grad_norm": 0.296875, "learning_rate": 0.0010856566130073454, "loss": 0.148, "step": 48988 }, { "epoch": 0.08686331926392246, "grad_norm": 1.203125, "learning_rate": 0.0010855971019495198, "loss": 0.2728, "step": 48990 }, { "epoch": 0.08686686542923228, "grad_norm": 0.55859375, "learning_rate": 0.0010855375909546844, "loss": 0.1805, "step": 48992 }, { "epoch": 0.08687041159454209, "grad_norm": 0.51171875, "learning_rate": 0.0010854780800230994, "loss": 0.1716, "step": 48994 }, { "epoch": 0.0868739577598519, "grad_norm": 1.4765625, "learning_rate": 0.0010854185691550244, "loss": 0.255, "step": 48996 }, { "epoch": 0.08687750392516173, "grad_norm": 0.49609375, "learning_rate": 0.0010853590583507202, "loss": 0.1567, "step": 48998 }, { "epoch": 0.08688105009047155, "grad_norm": 0.828125, "learning_rate": 0.0010852995476104473, "loss": 0.1815, "step": 49000 }, { "epoch": 0.08688459625578136, "grad_norm": 0.546875, "learning_rate": 0.0010852400369344654, "loss": 0.1843, "step": 49002 }, { "epoch": 0.08688814242109118, "grad_norm": 3.203125, "learning_rate": 0.001085180526323035, "loss": 0.1986, "step": 49004 }, { "epoch": 0.08689168858640099, "grad_norm": 0.71875, "learning_rate": 0.0010851210157764163, "loss": 0.2096, "step": 49006 }, { "epoch": 0.0868952347517108, "grad_norm": 0.671875, "learning_rate": 0.0010850615052948693, "loss": 0.1603, "step": 49008 }, { "epoch": 0.08689878091702062, "grad_norm": 0.40234375, "learning_rate": 0.001085001994878655, "loss": 0.2052, "step": 49010 }, { "epoch": 0.08690232708233044, "grad_norm": 0.4765625, "learning_rate": 0.0010849424845280336, "loss": 0.155, "step": 49012 }, { "epoch": 0.08690587324764025, "grad_norm": 0.51171875, "learning_rate": 0.0010848829742432647, "loss": 0.1774, "step": 49014 }, { "epoch": 0.08690941941295006, "grad_norm": 0.345703125, "learning_rate": 0.0010848234640246086, "loss": 0.1781, "step": 49016 }, { "epoch": 0.08691296557825988, "grad_norm": 0.29296875, "learning_rate": 0.0010847639538723263, "loss": 0.2018, "step": 49018 }, { "epoch": 0.0869165117435697, "grad_norm": 0.98046875, "learning_rate": 0.0010847044437866775, "loss": 0.1813, "step": 49020 }, { "epoch": 0.08692005790887951, "grad_norm": 0.373046875, "learning_rate": 0.0010846449337679224, "loss": 0.2766, "step": 49022 }, { "epoch": 0.08692360407418932, "grad_norm": 0.33984375, "learning_rate": 0.0010845854238163216, "loss": 0.2234, "step": 49024 }, { "epoch": 0.08692715023949914, "grad_norm": 0.494140625, "learning_rate": 0.001084525913932135, "loss": 0.1938, "step": 49026 }, { "epoch": 0.08693069640480895, "grad_norm": 0.30859375, "learning_rate": 0.0010844664041156236, "loss": 0.1902, "step": 49028 }, { "epoch": 0.08693424257011877, "grad_norm": 0.2158203125, "learning_rate": 0.0010844068943670466, "loss": 0.2067, "step": 49030 }, { "epoch": 0.08693778873542858, "grad_norm": 0.44921875, "learning_rate": 0.001084347384686665, "loss": 0.1781, "step": 49032 }, { "epoch": 0.0869413349007384, "grad_norm": 0.390625, "learning_rate": 0.0010842878750747386, "loss": 0.2648, "step": 49034 }, { "epoch": 0.08694488106604821, "grad_norm": 0.9140625, "learning_rate": 0.0010842283655315285, "loss": 0.2054, "step": 49036 }, { "epoch": 0.08694842723135802, "grad_norm": 0.30859375, "learning_rate": 0.0010841688560572936, "loss": 0.1389, "step": 49038 }, { "epoch": 0.08695197339666784, "grad_norm": 0.486328125, "learning_rate": 0.0010841093466522954, "loss": 0.1693, "step": 49040 }, { "epoch": 0.08695551956197767, "grad_norm": 0.177734375, "learning_rate": 0.0010840498373167935, "loss": 0.1548, "step": 49042 }, { "epoch": 0.08695906572728748, "grad_norm": 0.44140625, "learning_rate": 0.0010839903280510487, "loss": 0.1836, "step": 49044 }, { "epoch": 0.0869626118925973, "grad_norm": 0.490234375, "learning_rate": 0.0010839308188553204, "loss": 0.2119, "step": 49046 }, { "epoch": 0.08696615805790711, "grad_norm": 0.64453125, "learning_rate": 0.0010838713097298695, "loss": 0.1999, "step": 49048 }, { "epoch": 0.08696970422321693, "grad_norm": 0.443359375, "learning_rate": 0.0010838118006749563, "loss": 0.196, "step": 49050 }, { "epoch": 0.08697325038852674, "grad_norm": 0.55078125, "learning_rate": 0.0010837522916908409, "loss": 0.1737, "step": 49052 }, { "epoch": 0.08697679655383656, "grad_norm": 0.98828125, "learning_rate": 0.0010836927827777834, "loss": 0.4018, "step": 49054 }, { "epoch": 0.08698034271914637, "grad_norm": 0.27734375, "learning_rate": 0.001083633273936044, "loss": 0.1311, "step": 49056 }, { "epoch": 0.08698388888445618, "grad_norm": 0.515625, "learning_rate": 0.0010835737651658836, "loss": 0.1563, "step": 49058 }, { "epoch": 0.086987435049766, "grad_norm": 0.7734375, "learning_rate": 0.0010835142564675616, "loss": 0.2151, "step": 49060 }, { "epoch": 0.08699098121507581, "grad_norm": 0.875, "learning_rate": 0.001083454747841339, "loss": 0.2118, "step": 49062 }, { "epoch": 0.08699452738038563, "grad_norm": 0.36328125, "learning_rate": 0.0010833952392874755, "loss": 0.1969, "step": 49064 }, { "epoch": 0.08699807354569544, "grad_norm": 0.380859375, "learning_rate": 0.0010833357308062317, "loss": 0.1723, "step": 49066 }, { "epoch": 0.08700161971100526, "grad_norm": 0.427734375, "learning_rate": 0.001083276222397868, "loss": 0.158, "step": 49068 }, { "epoch": 0.08700516587631507, "grad_norm": 0.796875, "learning_rate": 0.0010832167140626438, "loss": 0.1686, "step": 49070 }, { "epoch": 0.08700871204162489, "grad_norm": 0.2890625, "learning_rate": 0.0010831572058008201, "loss": 0.1605, "step": 49072 }, { "epoch": 0.0870122582069347, "grad_norm": 0.3671875, "learning_rate": 0.0010830976976126572, "loss": 0.1939, "step": 49074 }, { "epoch": 0.08701580437224452, "grad_norm": 0.5703125, "learning_rate": 0.0010830381894984154, "loss": 0.1563, "step": 49076 }, { "epoch": 0.08701935053755433, "grad_norm": 0.212890625, "learning_rate": 0.0010829786814583542, "loss": 0.1811, "step": 49078 }, { "epoch": 0.08702289670286414, "grad_norm": 0.291015625, "learning_rate": 0.0010829191734927346, "loss": 0.204, "step": 49080 }, { "epoch": 0.08702644286817396, "grad_norm": 0.3671875, "learning_rate": 0.0010828596656018163, "loss": 0.2062, "step": 49082 }, { "epoch": 0.08702998903348377, "grad_norm": 0.275390625, "learning_rate": 0.0010828001577858606, "loss": 0.1784, "step": 49084 }, { "epoch": 0.08703353519879359, "grad_norm": 0.453125, "learning_rate": 0.0010827406500451265, "loss": 0.1464, "step": 49086 }, { "epoch": 0.08703708136410342, "grad_norm": 0.302734375, "learning_rate": 0.0010826811423798747, "loss": 0.1381, "step": 49088 }, { "epoch": 0.08704062752941323, "grad_norm": 0.73828125, "learning_rate": 0.0010826216347903654, "loss": 0.1632, "step": 49090 }, { "epoch": 0.08704417369472305, "grad_norm": 0.26953125, "learning_rate": 0.0010825621272768596, "loss": 0.1436, "step": 49092 }, { "epoch": 0.08704771986003286, "grad_norm": 0.3125, "learning_rate": 0.0010825026198396162, "loss": 0.1923, "step": 49094 }, { "epoch": 0.08705126602534267, "grad_norm": 0.25390625, "learning_rate": 0.0010824431124788966, "loss": 0.1674, "step": 49096 }, { "epoch": 0.08705481219065249, "grad_norm": 0.37890625, "learning_rate": 0.0010823836051949605, "loss": 0.167, "step": 49098 }, { "epoch": 0.0870583583559623, "grad_norm": 0.6484375, "learning_rate": 0.0010823240979880681, "loss": 0.1874, "step": 49100 }, { "epoch": 0.08706190452127212, "grad_norm": 0.349609375, "learning_rate": 0.0010822645908584805, "loss": 0.2011, "step": 49102 }, { "epoch": 0.08706545068658193, "grad_norm": 0.73046875, "learning_rate": 0.0010822050838064565, "loss": 0.214, "step": 49104 }, { "epoch": 0.08706899685189175, "grad_norm": 0.37890625, "learning_rate": 0.0010821455768322574, "loss": 0.1548, "step": 49106 }, { "epoch": 0.08707254301720156, "grad_norm": 0.55078125, "learning_rate": 0.0010820860699361431, "loss": 0.1783, "step": 49108 }, { "epoch": 0.08707608918251138, "grad_norm": 0.32421875, "learning_rate": 0.0010820265631183742, "loss": 0.1515, "step": 49110 }, { "epoch": 0.08707963534782119, "grad_norm": 0.197265625, "learning_rate": 0.0010819670563792108, "loss": 0.1567, "step": 49112 }, { "epoch": 0.087083181513131, "grad_norm": 0.47265625, "learning_rate": 0.0010819075497189128, "loss": 0.1231, "step": 49114 }, { "epoch": 0.08708672767844082, "grad_norm": 0.3203125, "learning_rate": 0.0010818480431377404, "loss": 0.1263, "step": 49116 }, { "epoch": 0.08709027384375063, "grad_norm": 0.1669921875, "learning_rate": 0.0010817885366359546, "loss": 0.1568, "step": 49118 }, { "epoch": 0.08709382000906045, "grad_norm": 0.44921875, "learning_rate": 0.0010817290302138148, "loss": 0.1767, "step": 49120 }, { "epoch": 0.08709736617437026, "grad_norm": 0.2392578125, "learning_rate": 0.0010816695238715817, "loss": 0.1788, "step": 49122 }, { "epoch": 0.08710091233968008, "grad_norm": 0.1767578125, "learning_rate": 0.0010816100176095152, "loss": 0.1509, "step": 49124 }, { "epoch": 0.08710445850498989, "grad_norm": 0.314453125, "learning_rate": 0.0010815505114278763, "loss": 0.1947, "step": 49126 }, { "epoch": 0.08710800467029971, "grad_norm": 0.310546875, "learning_rate": 0.0010814910053269244, "loss": 0.1584, "step": 49128 }, { "epoch": 0.08711155083560952, "grad_norm": 1.1015625, "learning_rate": 0.0010814314993069201, "loss": 0.2555, "step": 49130 }, { "epoch": 0.08711509700091934, "grad_norm": 0.2890625, "learning_rate": 0.001081371993368124, "loss": 0.2153, "step": 49132 }, { "epoch": 0.08711864316622916, "grad_norm": 0.50390625, "learning_rate": 0.0010813124875107955, "loss": 0.2447, "step": 49134 }, { "epoch": 0.08712218933153898, "grad_norm": 0.67578125, "learning_rate": 0.0010812529817351957, "loss": 0.1509, "step": 49136 }, { "epoch": 0.0871257354968488, "grad_norm": 0.462890625, "learning_rate": 0.0010811934760415842, "loss": 0.132, "step": 49138 }, { "epoch": 0.08712928166215861, "grad_norm": 0.7734375, "learning_rate": 0.0010811339704302214, "loss": 0.1966, "step": 49140 }, { "epoch": 0.08713282782746842, "grad_norm": 0.83203125, "learning_rate": 0.0010810744649013683, "loss": 0.1728, "step": 49142 }, { "epoch": 0.08713637399277824, "grad_norm": 0.26953125, "learning_rate": 0.001081014959455284, "loss": 0.2056, "step": 49144 }, { "epoch": 0.08713992015808805, "grad_norm": 0.5234375, "learning_rate": 0.0010809554540922293, "loss": 0.1958, "step": 49146 }, { "epoch": 0.08714346632339787, "grad_norm": 0.2265625, "learning_rate": 0.001080895948812464, "loss": 0.1691, "step": 49148 }, { "epoch": 0.08714701248870768, "grad_norm": 0.7578125, "learning_rate": 0.0010808364436162494, "loss": 0.2682, "step": 49150 }, { "epoch": 0.0871505586540175, "grad_norm": 0.72265625, "learning_rate": 0.0010807769385038448, "loss": 0.2267, "step": 49152 }, { "epoch": 0.08715410481932731, "grad_norm": 0.3046875, "learning_rate": 0.0010807174334755108, "loss": 0.166, "step": 49154 }, { "epoch": 0.08715765098463713, "grad_norm": 0.2412109375, "learning_rate": 0.0010806579285315072, "loss": 0.2112, "step": 49156 }, { "epoch": 0.08716119714994694, "grad_norm": 0.50390625, "learning_rate": 0.0010805984236720951, "loss": 0.289, "step": 49158 }, { "epoch": 0.08716474331525675, "grad_norm": 0.287109375, "learning_rate": 0.0010805389188975338, "loss": 0.1265, "step": 49160 }, { "epoch": 0.08716828948056657, "grad_norm": 0.359375, "learning_rate": 0.0010804794142080841, "loss": 0.174, "step": 49162 }, { "epoch": 0.08717183564587638, "grad_norm": 1.125, "learning_rate": 0.0010804199096040058, "loss": 0.186, "step": 49164 }, { "epoch": 0.0871753818111862, "grad_norm": 0.51171875, "learning_rate": 0.0010803604050855602, "loss": 0.1598, "step": 49166 }, { "epoch": 0.08717892797649601, "grad_norm": 0.30078125, "learning_rate": 0.0010803009006530061, "loss": 0.4546, "step": 49168 }, { "epoch": 0.08718247414180583, "grad_norm": 0.259765625, "learning_rate": 0.001080241396306605, "loss": 0.1544, "step": 49170 }, { "epoch": 0.08718602030711564, "grad_norm": 2.359375, "learning_rate": 0.001080181892046616, "loss": 0.2581, "step": 49172 }, { "epoch": 0.08718956647242546, "grad_norm": 0.75, "learning_rate": 0.0010801223878733003, "loss": 0.2802, "step": 49174 }, { "epoch": 0.08719311263773527, "grad_norm": 0.298828125, "learning_rate": 0.0010800628837869173, "loss": 0.1575, "step": 49176 }, { "epoch": 0.0871966588030451, "grad_norm": 0.82421875, "learning_rate": 0.001080003379787728, "loss": 0.2281, "step": 49178 }, { "epoch": 0.08720020496835491, "grad_norm": 0.259765625, "learning_rate": 0.001079943875875992, "loss": 0.1954, "step": 49180 }, { "epoch": 0.08720375113366473, "grad_norm": 1.40625, "learning_rate": 0.0010798843720519697, "loss": 0.4447, "step": 49182 }, { "epoch": 0.08720729729897454, "grad_norm": 0.9921875, "learning_rate": 0.0010798248683159223, "loss": 0.1433, "step": 49184 }, { "epoch": 0.08721084346428436, "grad_norm": 0.380859375, "learning_rate": 0.0010797653646681085, "loss": 0.3965, "step": 49186 }, { "epoch": 0.08721438962959417, "grad_norm": 1.453125, "learning_rate": 0.0010797058611087896, "loss": 0.3735, "step": 49188 }, { "epoch": 0.08721793579490399, "grad_norm": 0.53125, "learning_rate": 0.001079646357638225, "loss": 0.2626, "step": 49190 }, { "epoch": 0.0872214819602138, "grad_norm": 0.9296875, "learning_rate": 0.0010795868542566762, "loss": 0.2007, "step": 49192 }, { "epoch": 0.08722502812552362, "grad_norm": 0.23828125, "learning_rate": 0.0010795273509644021, "loss": 0.1686, "step": 49194 }, { "epoch": 0.08722857429083343, "grad_norm": 0.283203125, "learning_rate": 0.0010794678477616634, "loss": 0.226, "step": 49196 }, { "epoch": 0.08723212045614324, "grad_norm": 0.59375, "learning_rate": 0.0010794083446487204, "loss": 0.1855, "step": 49198 }, { "epoch": 0.08723566662145306, "grad_norm": 0.28125, "learning_rate": 0.0010793488416258336, "loss": 0.1698, "step": 49200 }, { "epoch": 0.08723921278676287, "grad_norm": 0.404296875, "learning_rate": 0.0010792893386932629, "loss": 0.2158, "step": 49202 }, { "epoch": 0.08724275895207269, "grad_norm": 1.6953125, "learning_rate": 0.0010792298358512685, "loss": 0.3938, "step": 49204 }, { "epoch": 0.0872463051173825, "grad_norm": 0.435546875, "learning_rate": 0.0010791703331001108, "loss": 0.1564, "step": 49206 }, { "epoch": 0.08724985128269232, "grad_norm": 0.2099609375, "learning_rate": 0.0010791108304400502, "loss": 0.2055, "step": 49208 }, { "epoch": 0.08725339744800213, "grad_norm": 2.828125, "learning_rate": 0.0010790513278713465, "loss": 0.2818, "step": 49210 }, { "epoch": 0.08725694361331195, "grad_norm": 0.26171875, "learning_rate": 0.00107899182539426, "loss": 0.1687, "step": 49212 }, { "epoch": 0.08726048977862176, "grad_norm": 2.8125, "learning_rate": 0.0010789323230090508, "loss": 0.3767, "step": 49214 }, { "epoch": 0.08726403594393158, "grad_norm": 0.66796875, "learning_rate": 0.0010788728207159802, "loss": 0.1629, "step": 49216 }, { "epoch": 0.08726758210924139, "grad_norm": 0.39453125, "learning_rate": 0.001078813318515307, "loss": 0.2115, "step": 49218 }, { "epoch": 0.0872711282745512, "grad_norm": 0.83984375, "learning_rate": 0.001078753816407292, "loss": 0.1673, "step": 49220 }, { "epoch": 0.08727467443986102, "grad_norm": 0.62890625, "learning_rate": 0.0010786943143921955, "loss": 0.3404, "step": 49222 }, { "epoch": 0.08727822060517085, "grad_norm": 0.40625, "learning_rate": 0.001078634812470278, "loss": 0.2147, "step": 49224 }, { "epoch": 0.08728176677048066, "grad_norm": 0.34765625, "learning_rate": 0.0010785753106417992, "loss": 0.183, "step": 49226 }, { "epoch": 0.08728531293579048, "grad_norm": 0.59765625, "learning_rate": 0.0010785158089070197, "loss": 0.1888, "step": 49228 }, { "epoch": 0.08728885910110029, "grad_norm": 0.2138671875, "learning_rate": 0.0010784563072661995, "loss": 0.2595, "step": 49230 }, { "epoch": 0.0872924052664101, "grad_norm": 1.2265625, "learning_rate": 0.001078396805719599, "loss": 0.1868, "step": 49232 }, { "epoch": 0.08729595143171992, "grad_norm": 0.51953125, "learning_rate": 0.001078337304267478, "loss": 0.1769, "step": 49234 }, { "epoch": 0.08729949759702973, "grad_norm": 1.015625, "learning_rate": 0.001078277802910097, "loss": 0.1366, "step": 49236 }, { "epoch": 0.08730304376233955, "grad_norm": 0.87890625, "learning_rate": 0.0010782183016477165, "loss": 0.1883, "step": 49238 }, { "epoch": 0.08730658992764936, "grad_norm": 0.61328125, "learning_rate": 0.0010781588004805968, "loss": 0.1666, "step": 49240 }, { "epoch": 0.08731013609295918, "grad_norm": 0.466796875, "learning_rate": 0.0010780992994089975, "loss": 0.1849, "step": 49242 }, { "epoch": 0.087313682258269, "grad_norm": 0.2109375, "learning_rate": 0.0010780397984331792, "loss": 0.1467, "step": 49244 }, { "epoch": 0.08731722842357881, "grad_norm": 0.3046875, "learning_rate": 0.001077980297553402, "loss": 0.222, "step": 49246 }, { "epoch": 0.08732077458888862, "grad_norm": 0.51171875, "learning_rate": 0.0010779207967699263, "loss": 0.215, "step": 49248 }, { "epoch": 0.08732432075419844, "grad_norm": 0.443359375, "learning_rate": 0.0010778612960830121, "loss": 0.1827, "step": 49250 }, { "epoch": 0.08732786691950825, "grad_norm": 0.66015625, "learning_rate": 0.0010778017954929196, "loss": 0.306, "step": 49252 }, { "epoch": 0.08733141308481807, "grad_norm": 0.40234375, "learning_rate": 0.001077742294999909, "loss": 0.2001, "step": 49254 }, { "epoch": 0.08733495925012788, "grad_norm": 0.87109375, "learning_rate": 0.0010776827946042415, "loss": 0.2006, "step": 49256 }, { "epoch": 0.0873385054154377, "grad_norm": 0.52734375, "learning_rate": 0.001077623294306176, "loss": 0.1566, "step": 49258 }, { "epoch": 0.08734205158074751, "grad_norm": 0.5, "learning_rate": 0.0010775637941059727, "loss": 0.2673, "step": 49260 }, { "epoch": 0.08734559774605732, "grad_norm": 0.60546875, "learning_rate": 0.0010775042940038932, "loss": 0.2342, "step": 49262 }, { "epoch": 0.08734914391136714, "grad_norm": 0.5390625, "learning_rate": 0.0010774447940001964, "loss": 0.2308, "step": 49264 }, { "epoch": 0.08735269007667695, "grad_norm": 0.361328125, "learning_rate": 0.0010773852940951431, "loss": 0.1705, "step": 49266 }, { "epoch": 0.08735623624198677, "grad_norm": 0.5, "learning_rate": 0.0010773257942889934, "loss": 0.2159, "step": 49268 }, { "epoch": 0.0873597824072966, "grad_norm": 1.796875, "learning_rate": 0.0010772662945820073, "loss": 0.2665, "step": 49270 }, { "epoch": 0.08736332857260641, "grad_norm": 0.671875, "learning_rate": 0.001077206794974445, "loss": 0.1869, "step": 49272 }, { "epoch": 0.08736687473791623, "grad_norm": 2.40625, "learning_rate": 0.0010771472954665677, "loss": 0.2066, "step": 49274 }, { "epoch": 0.08737042090322604, "grad_norm": 0.384765625, "learning_rate": 0.0010770877960586346, "loss": 0.1482, "step": 49276 }, { "epoch": 0.08737396706853585, "grad_norm": 0.5390625, "learning_rate": 0.0010770282967509058, "loss": 0.2218, "step": 49278 }, { "epoch": 0.08737751323384567, "grad_norm": 1.53125, "learning_rate": 0.001076968797543642, "loss": 0.2086, "step": 49280 }, { "epoch": 0.08738105939915548, "grad_norm": 0.98046875, "learning_rate": 0.0010769092984371036, "loss": 0.2463, "step": 49282 }, { "epoch": 0.0873846055644653, "grad_norm": 0.236328125, "learning_rate": 0.0010768497994315504, "loss": 0.144, "step": 49284 }, { "epoch": 0.08738815172977511, "grad_norm": 1.265625, "learning_rate": 0.0010767903005272426, "loss": 0.2949, "step": 49286 }, { "epoch": 0.08739169789508493, "grad_norm": 0.3203125, "learning_rate": 0.0010767308017244404, "loss": 0.2104, "step": 49288 }, { "epoch": 0.08739524406039474, "grad_norm": 0.546875, "learning_rate": 0.0010766713030234047, "loss": 0.179, "step": 49290 }, { "epoch": 0.08739879022570456, "grad_norm": 0.310546875, "learning_rate": 0.0010766118044243946, "loss": 0.1535, "step": 49292 }, { "epoch": 0.08740233639101437, "grad_norm": 0.9375, "learning_rate": 0.001076552305927671, "loss": 0.2312, "step": 49294 }, { "epoch": 0.08740588255632419, "grad_norm": 0.205078125, "learning_rate": 0.0010764928075334941, "loss": 0.4115, "step": 49296 }, { "epoch": 0.087409428721634, "grad_norm": 0.283203125, "learning_rate": 0.0010764333092421244, "loss": 0.1679, "step": 49298 }, { "epoch": 0.08741297488694381, "grad_norm": 1.2890625, "learning_rate": 0.001076373811053821, "loss": 0.208, "step": 49300 }, { "epoch": 0.08741652105225363, "grad_norm": 0.419921875, "learning_rate": 0.0010763143129688454, "loss": 0.2371, "step": 49302 }, { "epoch": 0.08742006721756344, "grad_norm": 1.1328125, "learning_rate": 0.0010762548149874567, "loss": 0.2442, "step": 49304 }, { "epoch": 0.08742361338287326, "grad_norm": 0.6796875, "learning_rate": 0.001076195317109916, "loss": 0.2141, "step": 49306 }, { "epoch": 0.08742715954818307, "grad_norm": 0.28515625, "learning_rate": 0.0010761358193364832, "loss": 0.4757, "step": 49308 }, { "epoch": 0.08743070571349289, "grad_norm": 1.7578125, "learning_rate": 0.0010760763216674182, "loss": 0.2039, "step": 49310 }, { "epoch": 0.0874342518788027, "grad_norm": 0.6484375, "learning_rate": 0.0010760168241029816, "loss": 0.1942, "step": 49312 }, { "epoch": 0.08743779804411253, "grad_norm": 0.66015625, "learning_rate": 0.0010759573266434337, "loss": 0.2244, "step": 49314 }, { "epoch": 0.08744134420942234, "grad_norm": 0.443359375, "learning_rate": 0.0010758978292890343, "loss": 0.1693, "step": 49316 }, { "epoch": 0.08744489037473216, "grad_norm": 0.267578125, "learning_rate": 0.0010758383320400436, "loss": 0.1999, "step": 49318 }, { "epoch": 0.08744843654004197, "grad_norm": 0.267578125, "learning_rate": 0.0010757788348967224, "loss": 0.1713, "step": 49320 }, { "epoch": 0.08745198270535179, "grad_norm": 0.91796875, "learning_rate": 0.00107571933785933, "loss": 0.2923, "step": 49322 }, { "epoch": 0.0874555288706616, "grad_norm": 1.265625, "learning_rate": 0.0010756598409281278, "loss": 0.1806, "step": 49324 }, { "epoch": 0.08745907503597142, "grad_norm": 0.58203125, "learning_rate": 0.0010756003441033748, "loss": 0.2111, "step": 49326 }, { "epoch": 0.08746262120128123, "grad_norm": 0.45703125, "learning_rate": 0.0010755408473853317, "loss": 0.2479, "step": 49328 }, { "epoch": 0.08746616736659105, "grad_norm": 0.5234375, "learning_rate": 0.0010754813507742591, "loss": 0.1907, "step": 49330 }, { "epoch": 0.08746971353190086, "grad_norm": 1.8359375, "learning_rate": 0.0010754218542704166, "loss": 0.3059, "step": 49332 }, { "epoch": 0.08747325969721068, "grad_norm": 0.52734375, "learning_rate": 0.0010753623578740644, "loss": 0.2217, "step": 49334 }, { "epoch": 0.08747680586252049, "grad_norm": 0.40234375, "learning_rate": 0.0010753028615854631, "loss": 0.1818, "step": 49336 }, { "epoch": 0.0874803520278303, "grad_norm": 0.57421875, "learning_rate": 0.001075243365404873, "loss": 0.1979, "step": 49338 }, { "epoch": 0.08748389819314012, "grad_norm": 0.7578125, "learning_rate": 0.0010751838693325538, "loss": 0.4358, "step": 49340 }, { "epoch": 0.08748744435844993, "grad_norm": 0.478515625, "learning_rate": 0.001075124373368766, "loss": 0.163, "step": 49342 }, { "epoch": 0.08749099052375975, "grad_norm": 0.2294921875, "learning_rate": 0.0010750648775137697, "loss": 0.2265, "step": 49344 }, { "epoch": 0.08749453668906956, "grad_norm": 0.52734375, "learning_rate": 0.001075005381767825, "loss": 0.1932, "step": 49346 }, { "epoch": 0.08749808285437938, "grad_norm": 0.3671875, "learning_rate": 0.0010749458861311926, "loss": 0.1798, "step": 49348 }, { "epoch": 0.08750162901968919, "grad_norm": 0.3359375, "learning_rate": 0.001074886390604132, "loss": 0.2153, "step": 49350 }, { "epoch": 0.087505175184999, "grad_norm": 0.640625, "learning_rate": 0.0010748268951869039, "loss": 0.1951, "step": 49352 }, { "epoch": 0.08750872135030882, "grad_norm": 0.484375, "learning_rate": 0.0010747673998797685, "loss": 0.1233, "step": 49354 }, { "epoch": 0.08751226751561864, "grad_norm": 0.1982421875, "learning_rate": 0.0010747079046829857, "loss": 0.1513, "step": 49356 }, { "epoch": 0.08751581368092845, "grad_norm": 0.390625, "learning_rate": 0.0010746484095968158, "loss": 0.1782, "step": 49358 }, { "epoch": 0.08751935984623828, "grad_norm": 0.423828125, "learning_rate": 0.001074588914621519, "loss": 0.1237, "step": 49360 }, { "epoch": 0.0875229060115481, "grad_norm": 0.6015625, "learning_rate": 0.0010745294197573555, "loss": 0.1596, "step": 49362 }, { "epoch": 0.08752645217685791, "grad_norm": 1.109375, "learning_rate": 0.0010744699250045857, "loss": 0.3277, "step": 49364 }, { "epoch": 0.08752999834216772, "grad_norm": 0.515625, "learning_rate": 0.0010744104303634697, "loss": 0.2337, "step": 49366 }, { "epoch": 0.08753354450747754, "grad_norm": 0.56640625, "learning_rate": 0.001074350935834267, "loss": 0.2027, "step": 49368 }, { "epoch": 0.08753709067278735, "grad_norm": 0.578125, "learning_rate": 0.0010742914414172387, "loss": 0.203, "step": 49370 }, { "epoch": 0.08754063683809717, "grad_norm": 0.44140625, "learning_rate": 0.0010742319471126452, "loss": 0.1664, "step": 49372 }, { "epoch": 0.08754418300340698, "grad_norm": 0.8828125, "learning_rate": 0.0010741724529207458, "loss": 0.2466, "step": 49374 }, { "epoch": 0.0875477291687168, "grad_norm": 0.36328125, "learning_rate": 0.001074112958841801, "loss": 0.2022, "step": 49376 }, { "epoch": 0.08755127533402661, "grad_norm": 0.298828125, "learning_rate": 0.0010740534648760712, "loss": 0.2085, "step": 49378 }, { "epoch": 0.08755482149933642, "grad_norm": 0.427734375, "learning_rate": 0.0010739939710238168, "loss": 0.1749, "step": 49380 }, { "epoch": 0.08755836766464624, "grad_norm": 0.396484375, "learning_rate": 0.0010739344772852971, "loss": 0.251, "step": 49382 }, { "epoch": 0.08756191382995605, "grad_norm": 0.236328125, "learning_rate": 0.0010738749836607731, "loss": 0.3179, "step": 49384 }, { "epoch": 0.08756545999526587, "grad_norm": 0.224609375, "learning_rate": 0.0010738154901505045, "loss": 0.1727, "step": 49386 }, { "epoch": 0.08756900616057568, "grad_norm": 0.302734375, "learning_rate": 0.0010737559967547521, "loss": 0.2122, "step": 49388 }, { "epoch": 0.0875725523258855, "grad_norm": 0.875, "learning_rate": 0.0010736965034737755, "loss": 0.1704, "step": 49390 }, { "epoch": 0.08757609849119531, "grad_norm": 0.388671875, "learning_rate": 0.0010736370103078352, "loss": 0.4133, "step": 49392 }, { "epoch": 0.08757964465650513, "grad_norm": 0.48828125, "learning_rate": 0.0010735775172571913, "loss": 0.1723, "step": 49394 }, { "epoch": 0.08758319082181494, "grad_norm": 0.302734375, "learning_rate": 0.001073518024322104, "loss": 0.1881, "step": 49396 }, { "epoch": 0.08758673698712476, "grad_norm": 0.80859375, "learning_rate": 0.0010734585315028335, "loss": 0.1943, "step": 49398 }, { "epoch": 0.08759028315243457, "grad_norm": 0.296875, "learning_rate": 0.00107339903879964, "loss": 0.2163, "step": 49400 }, { "epoch": 0.08759382931774438, "grad_norm": 0.2294921875, "learning_rate": 0.0010733395462127835, "loss": 0.1509, "step": 49402 }, { "epoch": 0.0875973754830542, "grad_norm": 0.462890625, "learning_rate": 0.0010732800537425248, "loss": 0.1642, "step": 49404 }, { "epoch": 0.08760092164836403, "grad_norm": 0.251953125, "learning_rate": 0.001073220561389123, "loss": 0.164, "step": 49406 }, { "epoch": 0.08760446781367384, "grad_norm": 0.25, "learning_rate": 0.001073161069152839, "loss": 0.1864, "step": 49408 }, { "epoch": 0.08760801397898366, "grad_norm": 0.59765625, "learning_rate": 0.0010731015770339331, "loss": 0.2087, "step": 49410 }, { "epoch": 0.08761156014429347, "grad_norm": 0.91796875, "learning_rate": 0.0010730420850326653, "loss": 0.2552, "step": 49412 }, { "epoch": 0.08761510630960329, "grad_norm": 0.25390625, "learning_rate": 0.0010729825931492958, "loss": 0.182, "step": 49414 }, { "epoch": 0.0876186524749131, "grad_norm": 1.0078125, "learning_rate": 0.0010729231013840849, "loss": 0.4166, "step": 49416 }, { "epoch": 0.08762219864022291, "grad_norm": 0.34375, "learning_rate": 0.0010728636097372922, "loss": 0.1828, "step": 49418 }, { "epoch": 0.08762574480553273, "grad_norm": 1.6953125, "learning_rate": 0.0010728041182091785, "loss": 0.1774, "step": 49420 }, { "epoch": 0.08762929097084254, "grad_norm": 0.283203125, "learning_rate": 0.0010727446268000039, "loss": 0.2366, "step": 49422 }, { "epoch": 0.08763283713615236, "grad_norm": 0.7265625, "learning_rate": 0.0010726851355100282, "loss": 0.1797, "step": 49424 }, { "epoch": 0.08763638330146217, "grad_norm": 0.5546875, "learning_rate": 0.0010726256443395117, "loss": 0.1898, "step": 49426 }, { "epoch": 0.08763992946677199, "grad_norm": 1.4453125, "learning_rate": 0.0010725661532887153, "loss": 0.194, "step": 49428 }, { "epoch": 0.0876434756320818, "grad_norm": 0.35546875, "learning_rate": 0.0010725066623578983, "loss": 0.1859, "step": 49430 }, { "epoch": 0.08764702179739162, "grad_norm": 0.337890625, "learning_rate": 0.001072447171547321, "loss": 0.1559, "step": 49432 }, { "epoch": 0.08765056796270143, "grad_norm": 0.46484375, "learning_rate": 0.0010723876808572442, "loss": 0.2041, "step": 49434 }, { "epoch": 0.08765411412801125, "grad_norm": 0.578125, "learning_rate": 0.0010723281902879273, "loss": 0.119, "step": 49436 }, { "epoch": 0.08765766029332106, "grad_norm": 0.294921875, "learning_rate": 0.0010722686998396315, "loss": 0.1756, "step": 49438 }, { "epoch": 0.08766120645863087, "grad_norm": 0.35546875, "learning_rate": 0.0010722092095126156, "loss": 0.2093, "step": 49440 }, { "epoch": 0.08766475262394069, "grad_norm": 0.796875, "learning_rate": 0.0010721497193071404, "loss": 0.2336, "step": 49442 }, { "epoch": 0.0876682987892505, "grad_norm": 0.3046875, "learning_rate": 0.0010720902292234663, "loss": 0.1495, "step": 49444 }, { "epoch": 0.08767184495456032, "grad_norm": 0.7421875, "learning_rate": 0.0010720307392618536, "loss": 0.158, "step": 49446 }, { "epoch": 0.08767539111987013, "grad_norm": 2.265625, "learning_rate": 0.0010719712494225617, "loss": 0.2101, "step": 49448 }, { "epoch": 0.08767893728517996, "grad_norm": 0.3203125, "learning_rate": 0.0010719117597058519, "loss": 0.1658, "step": 49450 }, { "epoch": 0.08768248345048978, "grad_norm": 1.578125, "learning_rate": 0.0010718522701119832, "loss": 0.1766, "step": 49452 }, { "epoch": 0.08768602961579959, "grad_norm": 0.78515625, "learning_rate": 0.0010717927806412168, "loss": 0.1326, "step": 49454 }, { "epoch": 0.0876895757811094, "grad_norm": 0.515625, "learning_rate": 0.001071733291293812, "loss": 0.261, "step": 49456 }, { "epoch": 0.08769312194641922, "grad_norm": 0.5234375, "learning_rate": 0.0010716738020700293, "loss": 0.3625, "step": 49458 }, { "epoch": 0.08769666811172903, "grad_norm": 0.16015625, "learning_rate": 0.0010716143129701292, "loss": 0.1314, "step": 49460 }, { "epoch": 0.08770021427703885, "grad_norm": 0.64453125, "learning_rate": 0.0010715548239943718, "loss": 0.1721, "step": 49462 }, { "epoch": 0.08770376044234866, "grad_norm": 0.3359375, "learning_rate": 0.0010714953351430169, "loss": 0.137, "step": 49464 }, { "epoch": 0.08770730660765848, "grad_norm": 0.291015625, "learning_rate": 0.0010714358464163243, "loss": 0.2252, "step": 49466 }, { "epoch": 0.08771085277296829, "grad_norm": 1.4921875, "learning_rate": 0.0010713763578145555, "loss": 0.2386, "step": 49468 }, { "epoch": 0.08771439893827811, "grad_norm": 0.25390625, "learning_rate": 0.0010713168693379696, "loss": 0.1994, "step": 49470 }, { "epoch": 0.08771794510358792, "grad_norm": 0.3046875, "learning_rate": 0.001071257380986827, "loss": 0.1934, "step": 49472 }, { "epoch": 0.08772149126889774, "grad_norm": 0.455078125, "learning_rate": 0.001071197892761388, "loss": 0.2177, "step": 49474 }, { "epoch": 0.08772503743420755, "grad_norm": 1.1015625, "learning_rate": 0.0010711384046619124, "loss": 0.1709, "step": 49476 }, { "epoch": 0.08772858359951737, "grad_norm": 0.76953125, "learning_rate": 0.0010710789166886612, "loss": 0.2023, "step": 49478 }, { "epoch": 0.08773212976482718, "grad_norm": 0.890625, "learning_rate": 0.0010710194288418935, "loss": 0.2071, "step": 49480 }, { "epoch": 0.087735675930137, "grad_norm": 0.9296875, "learning_rate": 0.00107095994112187, "loss": 0.4871, "step": 49482 }, { "epoch": 0.08773922209544681, "grad_norm": 0.578125, "learning_rate": 0.001070900453528851, "loss": 0.1526, "step": 49484 }, { "epoch": 0.08774276826075662, "grad_norm": 0.3984375, "learning_rate": 0.001070840966063097, "loss": 0.2574, "step": 49486 }, { "epoch": 0.08774631442606644, "grad_norm": 1.1328125, "learning_rate": 0.0010707814787248669, "loss": 0.1505, "step": 49488 }, { "epoch": 0.08774986059137625, "grad_norm": 0.302734375, "learning_rate": 0.001070721991514422, "loss": 0.1594, "step": 49490 }, { "epoch": 0.08775340675668607, "grad_norm": 0.75, "learning_rate": 0.001070662504432022, "loss": 0.2131, "step": 49492 }, { "epoch": 0.08775695292199588, "grad_norm": 2.8125, "learning_rate": 0.0010706030174779276, "loss": 0.1475, "step": 49494 }, { "epoch": 0.08776049908730571, "grad_norm": 0.8203125, "learning_rate": 0.001070543530652398, "loss": 0.2545, "step": 49496 }, { "epoch": 0.08776404525261552, "grad_norm": 1.0, "learning_rate": 0.001070484043955694, "loss": 0.1584, "step": 49498 }, { "epoch": 0.08776759141792534, "grad_norm": 0.37890625, "learning_rate": 0.0010704245573880755, "loss": 0.1822, "step": 49500 }, { "epoch": 0.08777113758323515, "grad_norm": 0.5234375, "learning_rate": 0.001070365070949803, "loss": 0.197, "step": 49502 }, { "epoch": 0.08777468374854497, "grad_norm": 0.76953125, "learning_rate": 0.0010703055846411366, "loss": 0.2161, "step": 49504 }, { "epoch": 0.08777822991385478, "grad_norm": 0.193359375, "learning_rate": 0.0010702460984623361, "loss": 0.1514, "step": 49506 }, { "epoch": 0.0877817760791646, "grad_norm": 0.32421875, "learning_rate": 0.0010701866124136622, "loss": 0.1732, "step": 49508 }, { "epoch": 0.08778532224447441, "grad_norm": 0.17578125, "learning_rate": 0.0010701271264953744, "loss": 0.1835, "step": 49510 }, { "epoch": 0.08778886840978423, "grad_norm": 0.287109375, "learning_rate": 0.0010700676407077335, "loss": 0.223, "step": 49512 }, { "epoch": 0.08779241457509404, "grad_norm": 0.349609375, "learning_rate": 0.001070008155050999, "loss": 0.2251, "step": 49514 }, { "epoch": 0.08779596074040386, "grad_norm": 0.216796875, "learning_rate": 0.0010699486695254314, "loss": 0.2973, "step": 49516 }, { "epoch": 0.08779950690571367, "grad_norm": 0.3359375, "learning_rate": 0.001069889184131291, "loss": 0.2376, "step": 49518 }, { "epoch": 0.08780305307102348, "grad_norm": 0.408203125, "learning_rate": 0.001069829698868838, "loss": 0.2163, "step": 49520 }, { "epoch": 0.0878065992363333, "grad_norm": 0.359375, "learning_rate": 0.0010697702137383322, "loss": 0.1685, "step": 49522 }, { "epoch": 0.08781014540164311, "grad_norm": 0.33984375, "learning_rate": 0.001069710728740034, "loss": 0.1622, "step": 49524 }, { "epoch": 0.08781369156695293, "grad_norm": 0.6796875, "learning_rate": 0.0010696512438742032, "loss": 0.1833, "step": 49526 }, { "epoch": 0.08781723773226274, "grad_norm": 0.3046875, "learning_rate": 0.0010695917591411008, "loss": 0.1819, "step": 49528 }, { "epoch": 0.08782078389757256, "grad_norm": 0.89453125, "learning_rate": 0.0010695322745409859, "loss": 0.2494, "step": 49530 }, { "epoch": 0.08782433006288237, "grad_norm": 0.2890625, "learning_rate": 0.0010694727900741193, "loss": 0.1934, "step": 49532 }, { "epoch": 0.08782787622819219, "grad_norm": 0.4765625, "learning_rate": 0.0010694133057407606, "loss": 0.2291, "step": 49534 }, { "epoch": 0.087831422393502, "grad_norm": 2.25, "learning_rate": 0.0010693538215411711, "loss": 0.1529, "step": 49536 }, { "epoch": 0.08783496855881182, "grad_norm": 0.69921875, "learning_rate": 0.0010692943374756097, "loss": 0.1696, "step": 49538 }, { "epoch": 0.08783851472412163, "grad_norm": 0.38671875, "learning_rate": 0.001069234853544337, "loss": 0.1548, "step": 49540 }, { "epoch": 0.08784206088943146, "grad_norm": 0.23828125, "learning_rate": 0.0010691753697476133, "loss": 0.2126, "step": 49542 }, { "epoch": 0.08784560705474127, "grad_norm": 0.65234375, "learning_rate": 0.0010691158860856986, "loss": 0.2079, "step": 49544 }, { "epoch": 0.08784915322005109, "grad_norm": 0.53125, "learning_rate": 0.0010690564025588533, "loss": 0.2377, "step": 49546 }, { "epoch": 0.0878526993853609, "grad_norm": 0.328125, "learning_rate": 0.001068996919167337, "loss": 0.15, "step": 49548 }, { "epoch": 0.08785624555067072, "grad_norm": 0.2490234375, "learning_rate": 0.00106893743591141, "loss": 0.17, "step": 49550 }, { "epoch": 0.08785979171598053, "grad_norm": 0.291015625, "learning_rate": 0.0010688779527913332, "loss": 0.1584, "step": 49552 }, { "epoch": 0.08786333788129035, "grad_norm": 0.462890625, "learning_rate": 0.0010688184698073658, "loss": 0.2695, "step": 49554 }, { "epoch": 0.08786688404660016, "grad_norm": 0.2451171875, "learning_rate": 0.0010687589869597682, "loss": 0.1608, "step": 49556 }, { "epoch": 0.08787043021190998, "grad_norm": 0.470703125, "learning_rate": 0.0010686995042488007, "loss": 0.221, "step": 49558 }, { "epoch": 0.08787397637721979, "grad_norm": 0.388671875, "learning_rate": 0.001068640021674724, "loss": 0.3071, "step": 49560 }, { "epoch": 0.0878775225425296, "grad_norm": 1.9765625, "learning_rate": 0.0010685805392377967, "loss": 0.279, "step": 49562 }, { "epoch": 0.08788106870783942, "grad_norm": 0.435546875, "learning_rate": 0.0010685210569382805, "loss": 0.1473, "step": 49564 }, { "epoch": 0.08788461487314923, "grad_norm": 0.3515625, "learning_rate": 0.0010684615747764345, "loss": 0.1257, "step": 49566 }, { "epoch": 0.08788816103845905, "grad_norm": 0.2001953125, "learning_rate": 0.0010684020927525198, "loss": 0.1616, "step": 49568 }, { "epoch": 0.08789170720376886, "grad_norm": 0.466796875, "learning_rate": 0.0010683426108667954, "loss": 0.1411, "step": 49570 }, { "epoch": 0.08789525336907868, "grad_norm": 2.65625, "learning_rate": 0.001068283129119522, "loss": 0.1826, "step": 49572 }, { "epoch": 0.08789879953438849, "grad_norm": 2.6875, "learning_rate": 0.00106822364751096, "loss": 0.2269, "step": 49574 }, { "epoch": 0.0879023456996983, "grad_norm": 0.443359375, "learning_rate": 0.0010681641660413696, "loss": 0.2079, "step": 49576 }, { "epoch": 0.08790589186500812, "grad_norm": 0.56640625, "learning_rate": 0.0010681046847110104, "loss": 0.2139, "step": 49578 }, { "epoch": 0.08790943803031794, "grad_norm": 0.5625, "learning_rate": 0.0010680452035201427, "loss": 0.1732, "step": 49580 }, { "epoch": 0.08791298419562775, "grad_norm": 0.39453125, "learning_rate": 0.001067985722469027, "loss": 0.1704, "step": 49582 }, { "epoch": 0.08791653036093756, "grad_norm": 0.291015625, "learning_rate": 0.0010679262415579228, "loss": 0.1196, "step": 49584 }, { "epoch": 0.08792007652624739, "grad_norm": 0.8671875, "learning_rate": 0.0010678667607870909, "loss": 0.1865, "step": 49586 }, { "epoch": 0.08792362269155721, "grad_norm": 0.439453125, "learning_rate": 0.0010678072801567908, "loss": 0.1338, "step": 49588 }, { "epoch": 0.08792716885686702, "grad_norm": 0.828125, "learning_rate": 0.001067747799667283, "loss": 0.1414, "step": 49590 }, { "epoch": 0.08793071502217684, "grad_norm": 0.28125, "learning_rate": 0.001067688319318828, "loss": 0.2572, "step": 49592 }, { "epoch": 0.08793426118748665, "grad_norm": 2.140625, "learning_rate": 0.001067628839111685, "loss": 0.5188, "step": 49594 }, { "epoch": 0.08793780735279647, "grad_norm": 0.2099609375, "learning_rate": 0.0010675693590461149, "loss": 0.1347, "step": 49596 }, { "epoch": 0.08794135351810628, "grad_norm": 0.201171875, "learning_rate": 0.0010675098791223774, "loss": 0.2071, "step": 49598 }, { "epoch": 0.0879448996834161, "grad_norm": 0.392578125, "learning_rate": 0.001067450399340733, "loss": 0.2031, "step": 49600 }, { "epoch": 0.08794844584872591, "grad_norm": 0.328125, "learning_rate": 0.0010673909197014418, "loss": 0.1938, "step": 49602 }, { "epoch": 0.08795199201403572, "grad_norm": 1.2578125, "learning_rate": 0.0010673314402047636, "loss": 0.202, "step": 49604 }, { "epoch": 0.08795553817934554, "grad_norm": 0.1669921875, "learning_rate": 0.0010672719608509585, "loss": 0.1489, "step": 49606 }, { "epoch": 0.08795908434465535, "grad_norm": 0.57421875, "learning_rate": 0.0010672124816402868, "loss": 0.2926, "step": 49608 }, { "epoch": 0.08796263050996517, "grad_norm": 0.357421875, "learning_rate": 0.0010671530025730095, "loss": 0.1737, "step": 49610 }, { "epoch": 0.08796617667527498, "grad_norm": 0.466796875, "learning_rate": 0.001067093523649385, "loss": 0.2106, "step": 49612 }, { "epoch": 0.0879697228405848, "grad_norm": 0.59375, "learning_rate": 0.0010670340448696745, "loss": 0.2375, "step": 49614 }, { "epoch": 0.08797326900589461, "grad_norm": 0.427734375, "learning_rate": 0.0010669745662341382, "loss": 0.1912, "step": 49616 }, { "epoch": 0.08797681517120443, "grad_norm": 0.75390625, "learning_rate": 0.0010669150877430358, "loss": 0.2129, "step": 49618 }, { "epoch": 0.08798036133651424, "grad_norm": 0.2392578125, "learning_rate": 0.0010668556093966276, "loss": 0.1863, "step": 49620 }, { "epoch": 0.08798390750182405, "grad_norm": 0.5546875, "learning_rate": 0.0010667961311951735, "loss": 0.1532, "step": 49622 }, { "epoch": 0.08798745366713387, "grad_norm": 0.314453125, "learning_rate": 0.001066736653138934, "loss": 0.19, "step": 49624 }, { "epoch": 0.08799099983244368, "grad_norm": 0.38671875, "learning_rate": 0.0010666771752281695, "loss": 0.1783, "step": 49626 }, { "epoch": 0.0879945459977535, "grad_norm": 0.5390625, "learning_rate": 0.001066617697463139, "loss": 0.1855, "step": 49628 }, { "epoch": 0.08799809216306331, "grad_norm": 0.408203125, "learning_rate": 0.0010665582198441034, "loss": 0.1784, "step": 49630 }, { "epoch": 0.08800163832837314, "grad_norm": 0.412109375, "learning_rate": 0.0010664987423713228, "loss": 0.1661, "step": 49632 }, { "epoch": 0.08800518449368296, "grad_norm": 2.671875, "learning_rate": 0.0010664392650450577, "loss": 0.2887, "step": 49634 }, { "epoch": 0.08800873065899277, "grad_norm": 0.51953125, "learning_rate": 0.0010663797878655672, "loss": 0.1525, "step": 49636 }, { "epoch": 0.08801227682430258, "grad_norm": 0.416015625, "learning_rate": 0.0010663203108331122, "loss": 0.2154, "step": 49638 }, { "epoch": 0.0880158229896124, "grad_norm": 0.263671875, "learning_rate": 0.0010662608339479524, "loss": 0.1705, "step": 49640 }, { "epoch": 0.08801936915492221, "grad_norm": 0.1728515625, "learning_rate": 0.0010662013572103489, "loss": 0.2124, "step": 49642 }, { "epoch": 0.08802291532023203, "grad_norm": 0.22265625, "learning_rate": 0.00106614188062056, "loss": 0.1688, "step": 49644 }, { "epoch": 0.08802646148554184, "grad_norm": 0.2216796875, "learning_rate": 0.0010660824041788472, "loss": 0.1655, "step": 49646 }, { "epoch": 0.08803000765085166, "grad_norm": 0.38671875, "learning_rate": 0.0010660229278854703, "loss": 0.1536, "step": 49648 }, { "epoch": 0.08803355381616147, "grad_norm": 2.15625, "learning_rate": 0.0010659634517406896, "loss": 0.4084, "step": 49650 }, { "epoch": 0.08803709998147129, "grad_norm": 1.15625, "learning_rate": 0.0010659039757447647, "loss": 0.1776, "step": 49652 }, { "epoch": 0.0880406461467811, "grad_norm": 0.53515625, "learning_rate": 0.001065844499897956, "loss": 0.2089, "step": 49654 }, { "epoch": 0.08804419231209092, "grad_norm": 1.5, "learning_rate": 0.0010657850242005238, "loss": 0.2007, "step": 49656 }, { "epoch": 0.08804773847740073, "grad_norm": 2.53125, "learning_rate": 0.001065725548652728, "loss": 0.2155, "step": 49658 }, { "epoch": 0.08805128464271055, "grad_norm": 0.6640625, "learning_rate": 0.001065666073254829, "loss": 0.1602, "step": 49660 }, { "epoch": 0.08805483080802036, "grad_norm": 0.2734375, "learning_rate": 0.0010656065980070859, "loss": 0.2417, "step": 49662 }, { "epoch": 0.08805837697333017, "grad_norm": 2.25, "learning_rate": 0.0010655471229097599, "loss": 0.3037, "step": 49664 }, { "epoch": 0.08806192313863999, "grad_norm": 0.8046875, "learning_rate": 0.0010654876479631113, "loss": 0.2164, "step": 49666 }, { "epoch": 0.0880654693039498, "grad_norm": 2.546875, "learning_rate": 0.0010654281731673992, "loss": 0.2991, "step": 49668 }, { "epoch": 0.08806901546925962, "grad_norm": 0.45703125, "learning_rate": 0.0010653686985228842, "loss": 0.1797, "step": 49670 }, { "epoch": 0.08807256163456943, "grad_norm": 0.3046875, "learning_rate": 0.0010653092240298261, "loss": 0.1884, "step": 49672 }, { "epoch": 0.08807610779987925, "grad_norm": 0.8046875, "learning_rate": 0.0010652497496884859, "loss": 0.1881, "step": 49674 }, { "epoch": 0.08807965396518906, "grad_norm": 0.7109375, "learning_rate": 0.0010651902754991228, "loss": 0.2127, "step": 49676 }, { "epoch": 0.08808320013049889, "grad_norm": 0.306640625, "learning_rate": 0.0010651308014619973, "loss": 0.2939, "step": 49678 }, { "epoch": 0.0880867462958087, "grad_norm": 0.34375, "learning_rate": 0.0010650713275773694, "loss": 0.2273, "step": 49680 }, { "epoch": 0.08809029246111852, "grad_norm": 0.85546875, "learning_rate": 0.001065011853845499, "loss": 0.1475, "step": 49682 }, { "epoch": 0.08809383862642833, "grad_norm": 1.2421875, "learning_rate": 0.001064952380266647, "loss": 0.2427, "step": 49684 }, { "epoch": 0.08809738479173815, "grad_norm": 2.21875, "learning_rate": 0.0010648929068410725, "loss": 0.1679, "step": 49686 }, { "epoch": 0.08810093095704796, "grad_norm": 0.1796875, "learning_rate": 0.001064833433569036, "loss": 0.2155, "step": 49688 }, { "epoch": 0.08810447712235778, "grad_norm": 0.1669921875, "learning_rate": 0.0010647739604507976, "loss": 0.1549, "step": 49690 }, { "epoch": 0.08810802328766759, "grad_norm": 0.34765625, "learning_rate": 0.0010647144874866177, "loss": 0.152, "step": 49692 }, { "epoch": 0.0881115694529774, "grad_norm": 0.21484375, "learning_rate": 0.001064655014676756, "loss": 0.2098, "step": 49694 }, { "epoch": 0.08811511561828722, "grad_norm": 0.54296875, "learning_rate": 0.0010645955420214728, "loss": 0.1266, "step": 49696 }, { "epoch": 0.08811866178359704, "grad_norm": 0.98828125, "learning_rate": 0.0010645360695210279, "loss": 0.2707, "step": 49698 }, { "epoch": 0.08812220794890685, "grad_norm": 0.265625, "learning_rate": 0.001064476597175682, "loss": 0.2253, "step": 49700 }, { "epoch": 0.08812575411421666, "grad_norm": 1.046875, "learning_rate": 0.0010644171249856945, "loss": 0.2054, "step": 49702 }, { "epoch": 0.08812930027952648, "grad_norm": 1.265625, "learning_rate": 0.001064357652951326, "loss": 0.2012, "step": 49704 }, { "epoch": 0.0881328464448363, "grad_norm": 2.0, "learning_rate": 0.0010642981810728362, "loss": 0.2408, "step": 49706 }, { "epoch": 0.08813639261014611, "grad_norm": 0.263671875, "learning_rate": 0.0010642387093504858, "loss": 0.2145, "step": 49708 }, { "epoch": 0.08813993877545592, "grad_norm": 0.6171875, "learning_rate": 0.0010641792377845342, "loss": 0.221, "step": 49710 }, { "epoch": 0.08814348494076574, "grad_norm": 2.71875, "learning_rate": 0.001064119766375242, "loss": 0.4624, "step": 49712 }, { "epoch": 0.08814703110607555, "grad_norm": 0.38671875, "learning_rate": 0.001064060295122869, "loss": 0.2586, "step": 49714 }, { "epoch": 0.08815057727138537, "grad_norm": 0.73828125, "learning_rate": 0.0010640008240276758, "loss": 0.2226, "step": 49716 }, { "epoch": 0.08815412343669518, "grad_norm": 0.328125, "learning_rate": 0.0010639413530899213, "loss": 0.162, "step": 49718 }, { "epoch": 0.088157669602005, "grad_norm": 0.6953125, "learning_rate": 0.001063881882309867, "loss": 0.1605, "step": 49720 }, { "epoch": 0.08816121576731482, "grad_norm": 0.49609375, "learning_rate": 0.0010638224116877716, "loss": 0.2021, "step": 49722 }, { "epoch": 0.08816476193262464, "grad_norm": 1.640625, "learning_rate": 0.001063762941223897, "loss": 0.3174, "step": 49724 }, { "epoch": 0.08816830809793445, "grad_norm": 1.34375, "learning_rate": 0.0010637034709185017, "loss": 0.2351, "step": 49726 }, { "epoch": 0.08817185426324427, "grad_norm": 0.349609375, "learning_rate": 0.0010636440007718462, "loss": 0.1656, "step": 49728 }, { "epoch": 0.08817540042855408, "grad_norm": 0.37109375, "learning_rate": 0.001063584530784191, "loss": 0.2065, "step": 49730 }, { "epoch": 0.0881789465938639, "grad_norm": 0.81640625, "learning_rate": 0.0010635250609557958, "loss": 0.2149, "step": 49732 }, { "epoch": 0.08818249275917371, "grad_norm": 0.310546875, "learning_rate": 0.0010634655912869209, "loss": 0.2379, "step": 49734 }, { "epoch": 0.08818603892448353, "grad_norm": 0.4296875, "learning_rate": 0.0010634061217778261, "loss": 0.1678, "step": 49736 }, { "epoch": 0.08818958508979334, "grad_norm": 0.2080078125, "learning_rate": 0.0010633466524287718, "loss": 0.2206, "step": 49738 }, { "epoch": 0.08819313125510315, "grad_norm": 0.296875, "learning_rate": 0.0010632871832400182, "loss": 0.1556, "step": 49740 }, { "epoch": 0.08819667742041297, "grad_norm": 0.197265625, "learning_rate": 0.0010632277142118248, "loss": 0.1791, "step": 49742 }, { "epoch": 0.08820022358572278, "grad_norm": 0.71484375, "learning_rate": 0.0010631682453444518, "loss": 0.3601, "step": 49744 }, { "epoch": 0.0882037697510326, "grad_norm": 0.447265625, "learning_rate": 0.0010631087766381595, "loss": 0.1441, "step": 49746 }, { "epoch": 0.08820731591634241, "grad_norm": 0.2353515625, "learning_rate": 0.0010630493080932087, "loss": 0.1734, "step": 49748 }, { "epoch": 0.08821086208165223, "grad_norm": 1.671875, "learning_rate": 0.0010629898397098581, "loss": 0.2633, "step": 49750 }, { "epoch": 0.08821440824696204, "grad_norm": 0.4140625, "learning_rate": 0.0010629303714883686, "loss": 0.1924, "step": 49752 }, { "epoch": 0.08821795441227186, "grad_norm": 0.361328125, "learning_rate": 0.0010628709034290003, "loss": 0.1851, "step": 49754 }, { "epoch": 0.08822150057758167, "grad_norm": 0.3125, "learning_rate": 0.0010628114355320131, "loss": 0.1507, "step": 49756 }, { "epoch": 0.08822504674289149, "grad_norm": 0.2109375, "learning_rate": 0.001062751967797667, "loss": 0.1433, "step": 49758 }, { "epoch": 0.0882285929082013, "grad_norm": 0.4609375, "learning_rate": 0.001062692500226222, "loss": 0.1425, "step": 49760 }, { "epoch": 0.08823213907351111, "grad_norm": 0.390625, "learning_rate": 0.0010626330328179384, "loss": 0.1502, "step": 49762 }, { "epoch": 0.08823568523882093, "grad_norm": 0.98046875, "learning_rate": 0.001062573565573076, "loss": 0.2004, "step": 49764 }, { "epoch": 0.08823923140413074, "grad_norm": 0.294921875, "learning_rate": 0.0010625140984918955, "loss": 0.1838, "step": 49766 }, { "epoch": 0.08824277756944057, "grad_norm": 0.8671875, "learning_rate": 0.0010624546315746563, "loss": 0.1546, "step": 49768 }, { "epoch": 0.08824632373475039, "grad_norm": 0.859375, "learning_rate": 0.001062395164821619, "loss": 0.275, "step": 49770 }, { "epoch": 0.0882498699000602, "grad_norm": 0.23046875, "learning_rate": 0.001062335698233043, "loss": 0.1564, "step": 49772 }, { "epoch": 0.08825341606537002, "grad_norm": 0.5390625, "learning_rate": 0.0010622762318091895, "loss": 0.1985, "step": 49774 }, { "epoch": 0.08825696223067983, "grad_norm": 0.42578125, "learning_rate": 0.0010622167655503173, "loss": 0.2325, "step": 49776 }, { "epoch": 0.08826050839598965, "grad_norm": 0.236328125, "learning_rate": 0.0010621572994566869, "loss": 0.1928, "step": 49778 }, { "epoch": 0.08826405456129946, "grad_norm": 0.2333984375, "learning_rate": 0.0010620978335285586, "loss": 0.1838, "step": 49780 }, { "epoch": 0.08826760072660927, "grad_norm": 0.1728515625, "learning_rate": 0.0010620383677661925, "loss": 0.2442, "step": 49782 }, { "epoch": 0.08827114689191909, "grad_norm": 0.322265625, "learning_rate": 0.0010619789021698485, "loss": 0.1914, "step": 49784 }, { "epoch": 0.0882746930572289, "grad_norm": 0.345703125, "learning_rate": 0.0010619194367397866, "loss": 0.2193, "step": 49786 }, { "epoch": 0.08827823922253872, "grad_norm": 1.25, "learning_rate": 0.001061859971476267, "loss": 0.3542, "step": 49788 }, { "epoch": 0.08828178538784853, "grad_norm": 0.185546875, "learning_rate": 0.0010618005063795497, "loss": 0.3142, "step": 49790 }, { "epoch": 0.08828533155315835, "grad_norm": 0.44921875, "learning_rate": 0.001061741041449895, "loss": 0.1548, "step": 49792 }, { "epoch": 0.08828887771846816, "grad_norm": 0.71484375, "learning_rate": 0.0010616815766875626, "loss": 0.1777, "step": 49794 }, { "epoch": 0.08829242388377798, "grad_norm": 1.2109375, "learning_rate": 0.0010616221120928124, "loss": 0.1912, "step": 49796 }, { "epoch": 0.08829597004908779, "grad_norm": 0.2353515625, "learning_rate": 0.0010615626476659056, "loss": 0.1882, "step": 49798 }, { "epoch": 0.0882995162143976, "grad_norm": 0.5859375, "learning_rate": 0.0010615031834071008, "loss": 0.2053, "step": 49800 }, { "epoch": 0.08830306237970742, "grad_norm": 0.51953125, "learning_rate": 0.0010614437193166588, "loss": 0.2183, "step": 49802 }, { "epoch": 0.08830660854501723, "grad_norm": 3.3125, "learning_rate": 0.00106138425539484, "loss": 0.1558, "step": 49804 }, { "epoch": 0.08831015471032705, "grad_norm": 0.37890625, "learning_rate": 0.0010613247916419035, "loss": 0.1735, "step": 49806 }, { "epoch": 0.08831370087563686, "grad_norm": 0.408203125, "learning_rate": 0.0010612653280581102, "loss": 0.1828, "step": 49808 }, { "epoch": 0.08831724704094668, "grad_norm": 0.6796875, "learning_rate": 0.0010612058646437196, "loss": 0.1617, "step": 49810 }, { "epoch": 0.08832079320625649, "grad_norm": 1.0546875, "learning_rate": 0.001061146401398992, "loss": 0.1673, "step": 49812 }, { "epoch": 0.08832433937156632, "grad_norm": 0.50390625, "learning_rate": 0.0010610869383241878, "loss": 0.2135, "step": 49814 }, { "epoch": 0.08832788553687614, "grad_norm": 0.291015625, "learning_rate": 0.0010610274754195665, "loss": 0.2675, "step": 49816 }, { "epoch": 0.08833143170218595, "grad_norm": 2.9375, "learning_rate": 0.0010609680126853884, "loss": 0.268, "step": 49818 }, { "epoch": 0.08833497786749576, "grad_norm": 0.36328125, "learning_rate": 0.0010609085501219135, "loss": 0.189, "step": 49820 }, { "epoch": 0.08833852403280558, "grad_norm": 0.357421875, "learning_rate": 0.0010608490877294023, "loss": 0.1742, "step": 49822 }, { "epoch": 0.0883420701981154, "grad_norm": 0.625, "learning_rate": 0.001060789625508114, "loss": 0.1412, "step": 49824 }, { "epoch": 0.08834561636342521, "grad_norm": 0.412109375, "learning_rate": 0.0010607301634583094, "loss": 0.2006, "step": 49826 }, { "epoch": 0.08834916252873502, "grad_norm": 0.8671875, "learning_rate": 0.001060670701580248, "loss": 0.3222, "step": 49828 }, { "epoch": 0.08835270869404484, "grad_norm": 0.796875, "learning_rate": 0.0010606112398741905, "loss": 0.1925, "step": 49830 }, { "epoch": 0.08835625485935465, "grad_norm": 0.349609375, "learning_rate": 0.001060551778340396, "loss": 0.1514, "step": 49832 }, { "epoch": 0.08835980102466447, "grad_norm": 0.43359375, "learning_rate": 0.0010604923169791251, "loss": 0.1894, "step": 49834 }, { "epoch": 0.08836334718997428, "grad_norm": 0.91015625, "learning_rate": 0.0010604328557906383, "loss": 0.1753, "step": 49836 }, { "epoch": 0.0883668933552841, "grad_norm": 1.9453125, "learning_rate": 0.0010603733947751953, "loss": 0.2584, "step": 49838 }, { "epoch": 0.08837043952059391, "grad_norm": 0.376953125, "learning_rate": 0.0010603139339330557, "loss": 0.1684, "step": 49840 }, { "epoch": 0.08837398568590372, "grad_norm": 0.40625, "learning_rate": 0.0010602544732644797, "loss": 0.1926, "step": 49842 }, { "epoch": 0.08837753185121354, "grad_norm": 0.4609375, "learning_rate": 0.0010601950127697279, "loss": 0.1909, "step": 49844 }, { "epoch": 0.08838107801652335, "grad_norm": 1.0078125, "learning_rate": 0.0010601355524490598, "loss": 0.23, "step": 49846 }, { "epoch": 0.08838462418183317, "grad_norm": 0.6171875, "learning_rate": 0.0010600760923027358, "loss": 0.1938, "step": 49848 }, { "epoch": 0.08838817034714298, "grad_norm": 0.6328125, "learning_rate": 0.0010600166323310159, "loss": 0.1935, "step": 49850 }, { "epoch": 0.0883917165124528, "grad_norm": 0.400390625, "learning_rate": 0.0010599571725341597, "loss": 0.234, "step": 49852 }, { "epoch": 0.08839526267776261, "grad_norm": 0.9609375, "learning_rate": 0.0010598977129124276, "loss": 0.1562, "step": 49854 }, { "epoch": 0.08839880884307243, "grad_norm": 0.6484375, "learning_rate": 0.00105983825346608, "loss": 0.3394, "step": 49856 }, { "epoch": 0.08840235500838226, "grad_norm": 0.474609375, "learning_rate": 0.0010597787941953763, "loss": 0.2053, "step": 49858 }, { "epoch": 0.08840590117369207, "grad_norm": 1.7265625, "learning_rate": 0.0010597193351005768, "loss": 0.285, "step": 49860 }, { "epoch": 0.08840944733900188, "grad_norm": 0.23828125, "learning_rate": 0.0010596598761819414, "loss": 0.1417, "step": 49862 }, { "epoch": 0.0884129935043117, "grad_norm": 1.0625, "learning_rate": 0.0010596004174397305, "loss": 0.2061, "step": 49864 }, { "epoch": 0.08841653966962151, "grad_norm": 0.291015625, "learning_rate": 0.0010595409588742039, "loss": 0.2587, "step": 49866 }, { "epoch": 0.08842008583493133, "grad_norm": 0.71484375, "learning_rate": 0.0010594815004856214, "loss": 0.1841, "step": 49868 }, { "epoch": 0.08842363200024114, "grad_norm": 0.65234375, "learning_rate": 0.001059422042274243, "loss": 0.2529, "step": 49870 }, { "epoch": 0.08842717816555096, "grad_norm": 0.55078125, "learning_rate": 0.0010593625842403298, "loss": 0.2445, "step": 49872 }, { "epoch": 0.08843072433086077, "grad_norm": 0.466796875, "learning_rate": 0.0010593031263841407, "loss": 0.1612, "step": 49874 }, { "epoch": 0.08843427049617059, "grad_norm": 3.4375, "learning_rate": 0.001059243668705936, "loss": 0.3073, "step": 49876 }, { "epoch": 0.0884378166614804, "grad_norm": 0.22265625, "learning_rate": 0.001059184211205976, "loss": 0.1643, "step": 49878 }, { "epoch": 0.08844136282679022, "grad_norm": 0.79296875, "learning_rate": 0.0010591247538845206, "loss": 0.1452, "step": 49880 }, { "epoch": 0.08844490899210003, "grad_norm": 0.498046875, "learning_rate": 0.0010590652967418296, "loss": 0.2464, "step": 49882 }, { "epoch": 0.08844845515740984, "grad_norm": 0.1845703125, "learning_rate": 0.001059005839778163, "loss": 0.1894, "step": 49884 }, { "epoch": 0.08845200132271966, "grad_norm": 0.52734375, "learning_rate": 0.001058946382993781, "loss": 0.1777, "step": 49886 }, { "epoch": 0.08845554748802947, "grad_norm": 0.375, "learning_rate": 0.0010588869263889445, "loss": 0.2104, "step": 49888 }, { "epoch": 0.08845909365333929, "grad_norm": 0.63671875, "learning_rate": 0.001058827469963912, "loss": 0.1961, "step": 49890 }, { "epoch": 0.0884626398186491, "grad_norm": 0.45703125, "learning_rate": 0.0010587680137189445, "loss": 0.2261, "step": 49892 }, { "epoch": 0.08846618598395892, "grad_norm": 0.91796875, "learning_rate": 0.0010587085576543012, "loss": 0.1443, "step": 49894 }, { "epoch": 0.08846973214926873, "grad_norm": 0.259765625, "learning_rate": 0.0010586491017702435, "loss": 0.1494, "step": 49896 }, { "epoch": 0.08847327831457855, "grad_norm": 0.23828125, "learning_rate": 0.0010585896460670301, "loss": 0.1527, "step": 49898 }, { "epoch": 0.08847682447988836, "grad_norm": 0.2470703125, "learning_rate": 0.0010585301905449219, "loss": 0.191, "step": 49900 }, { "epoch": 0.08848037064519818, "grad_norm": 0.388671875, "learning_rate": 0.001058470735204178, "loss": 0.1797, "step": 49902 }, { "epoch": 0.088483916810508, "grad_norm": 2.6875, "learning_rate": 0.0010584112800450598, "loss": 0.3251, "step": 49904 }, { "epoch": 0.08848746297581782, "grad_norm": 0.5234375, "learning_rate": 0.0010583518250678256, "loss": 0.1947, "step": 49906 }, { "epoch": 0.08849100914112763, "grad_norm": 1.4140625, "learning_rate": 0.0010582923702727367, "loss": 0.1607, "step": 49908 }, { "epoch": 0.08849455530643745, "grad_norm": 0.4140625, "learning_rate": 0.0010582329156600527, "loss": 0.1646, "step": 49910 }, { "epoch": 0.08849810147174726, "grad_norm": 0.38671875, "learning_rate": 0.001058173461230034, "loss": 0.1827, "step": 49912 }, { "epoch": 0.08850164763705708, "grad_norm": 0.478515625, "learning_rate": 0.00105811400698294, "loss": 0.132, "step": 49914 }, { "epoch": 0.08850519380236689, "grad_norm": 0.44921875, "learning_rate": 0.001058054552919031, "loss": 0.2202, "step": 49916 }, { "epoch": 0.0885087399676767, "grad_norm": 0.5078125, "learning_rate": 0.0010579950990385672, "loss": 0.2198, "step": 49918 }, { "epoch": 0.08851228613298652, "grad_norm": 0.458984375, "learning_rate": 0.0010579356453418083, "loss": 0.2713, "step": 49920 }, { "epoch": 0.08851583229829633, "grad_norm": 0.2412109375, "learning_rate": 0.0010578761918290146, "loss": 0.122, "step": 49922 }, { "epoch": 0.08851937846360615, "grad_norm": 0.486328125, "learning_rate": 0.0010578167385004455, "loss": 0.1345, "step": 49924 }, { "epoch": 0.08852292462891596, "grad_norm": 0.361328125, "learning_rate": 0.0010577572853563617, "loss": 0.1831, "step": 49926 }, { "epoch": 0.08852647079422578, "grad_norm": 0.3671875, "learning_rate": 0.0010576978323970235, "loss": 0.1731, "step": 49928 }, { "epoch": 0.08853001695953559, "grad_norm": 0.490234375, "learning_rate": 0.00105763837962269, "loss": 0.1709, "step": 49930 }, { "epoch": 0.08853356312484541, "grad_norm": 0.447265625, "learning_rate": 0.0010575789270336213, "loss": 0.1881, "step": 49932 }, { "epoch": 0.08853710929015522, "grad_norm": 1.703125, "learning_rate": 0.0010575194746300781, "loss": 0.3402, "step": 49934 }, { "epoch": 0.08854065545546504, "grad_norm": 0.5859375, "learning_rate": 0.0010574600224123201, "loss": 0.3146, "step": 49936 }, { "epoch": 0.08854420162077485, "grad_norm": 0.40625, "learning_rate": 0.0010574005703806071, "loss": 0.1798, "step": 49938 }, { "epoch": 0.08854774778608467, "grad_norm": 0.5390625, "learning_rate": 0.0010573411185351993, "loss": 0.2209, "step": 49940 }, { "epoch": 0.08855129395139448, "grad_norm": 0.60546875, "learning_rate": 0.0010572816668763568, "loss": 0.1569, "step": 49942 }, { "epoch": 0.0885548401167043, "grad_norm": 0.46875, "learning_rate": 0.0010572222154043391, "loss": 0.3733, "step": 49944 }, { "epoch": 0.08855838628201411, "grad_norm": 0.478515625, "learning_rate": 0.001057162764119407, "loss": 0.2088, "step": 49946 }, { "epoch": 0.08856193244732392, "grad_norm": 0.51953125, "learning_rate": 0.00105710331302182, "loss": 0.182, "step": 49948 }, { "epoch": 0.08856547861263375, "grad_norm": 0.400390625, "learning_rate": 0.001057043862111838, "loss": 0.1562, "step": 49950 }, { "epoch": 0.08856902477794357, "grad_norm": 0.345703125, "learning_rate": 0.001056984411389721, "loss": 0.1641, "step": 49952 }, { "epoch": 0.08857257094325338, "grad_norm": 0.376953125, "learning_rate": 0.0010569249608557302, "loss": 0.1498, "step": 49954 }, { "epoch": 0.0885761171085632, "grad_norm": 0.296875, "learning_rate": 0.0010568655105101237, "loss": 0.1702, "step": 49956 }, { "epoch": 0.08857966327387301, "grad_norm": 0.287109375, "learning_rate": 0.0010568060603531625, "loss": 0.1776, "step": 49958 }, { "epoch": 0.08858320943918283, "grad_norm": 1.0234375, "learning_rate": 0.0010567466103851065, "loss": 0.2358, "step": 49960 }, { "epoch": 0.08858675560449264, "grad_norm": 0.890625, "learning_rate": 0.0010566871606062162, "loss": 0.3494, "step": 49962 }, { "epoch": 0.08859030176980245, "grad_norm": 0.46875, "learning_rate": 0.0010566277110167506, "loss": 0.1469, "step": 49964 }, { "epoch": 0.08859384793511227, "grad_norm": 0.181640625, "learning_rate": 0.0010565682616169702, "loss": 0.2199, "step": 49966 }, { "epoch": 0.08859739410042208, "grad_norm": 1.0390625, "learning_rate": 0.001056508812407135, "loss": 0.3808, "step": 49968 }, { "epoch": 0.0886009402657319, "grad_norm": 1.109375, "learning_rate": 0.001056449363387505, "loss": 0.1857, "step": 49970 }, { "epoch": 0.08860448643104171, "grad_norm": 3.125, "learning_rate": 0.0010563899145583405, "loss": 0.1691, "step": 49972 }, { "epoch": 0.08860803259635153, "grad_norm": 0.6640625, "learning_rate": 0.001056330465919901, "loss": 0.2075, "step": 49974 }, { "epoch": 0.08861157876166134, "grad_norm": 0.357421875, "learning_rate": 0.0010562710174724466, "loss": 0.1796, "step": 49976 }, { "epoch": 0.08861512492697116, "grad_norm": 1.1484375, "learning_rate": 0.0010562115692162378, "loss": 0.3646, "step": 49978 }, { "epoch": 0.08861867109228097, "grad_norm": 0.6015625, "learning_rate": 0.0010561521211515336, "loss": 0.2051, "step": 49980 }, { "epoch": 0.08862221725759079, "grad_norm": 1.1875, "learning_rate": 0.0010560926732785946, "loss": 0.2504, "step": 49982 }, { "epoch": 0.0886257634229006, "grad_norm": 0.71484375, "learning_rate": 0.0010560332255976808, "loss": 0.2052, "step": 49984 }, { "epoch": 0.08862930958821041, "grad_norm": 0.349609375, "learning_rate": 0.0010559737781090524, "loss": 0.1619, "step": 49986 }, { "epoch": 0.08863285575352023, "grad_norm": 0.447265625, "learning_rate": 0.0010559143308129688, "loss": 0.2907, "step": 49988 }, { "epoch": 0.08863640191883004, "grad_norm": 0.388671875, "learning_rate": 0.0010558548837096904, "loss": 0.188, "step": 49990 }, { "epoch": 0.08863994808413986, "grad_norm": 10.8125, "learning_rate": 0.001055795436799477, "loss": 0.4607, "step": 49992 }, { "epoch": 0.08864349424944969, "grad_norm": 0.494140625, "learning_rate": 0.0010557359900825892, "loss": 0.2233, "step": 49994 }, { "epoch": 0.0886470404147595, "grad_norm": 0.279296875, "learning_rate": 0.001055676543559286, "loss": 0.1878, "step": 49996 }, { "epoch": 0.08865058658006932, "grad_norm": 0.703125, "learning_rate": 0.0010556170972298277, "loss": 0.2305, "step": 49998 }, { "epoch": 0.08865413274537913, "grad_norm": 0.255859375, "learning_rate": 0.0010555576510944747, "loss": 0.1773, "step": 50000 }, { "epoch": 0.08865767891068894, "grad_norm": 0.484375, "learning_rate": 0.0010554982051534868, "loss": 0.1806, "step": 50002 }, { "epoch": 0.08866122507599876, "grad_norm": 0.234375, "learning_rate": 0.001055438759407124, "loss": 0.206, "step": 50004 }, { "epoch": 0.08866477124130857, "grad_norm": 0.388671875, "learning_rate": 0.0010553793138556456, "loss": 0.1928, "step": 50006 }, { "epoch": 0.08866831740661839, "grad_norm": 2.140625, "learning_rate": 0.0010553198684993125, "loss": 0.233, "step": 50008 }, { "epoch": 0.0886718635719282, "grad_norm": 0.423828125, "learning_rate": 0.0010552604233383847, "loss": 0.2348, "step": 50010 }, { "epoch": 0.08867540973723802, "grad_norm": 0.404296875, "learning_rate": 0.0010552009783731213, "loss": 0.1318, "step": 50012 }, { "epoch": 0.08867895590254783, "grad_norm": 0.6875, "learning_rate": 0.001055141533603783, "loss": 0.2861, "step": 50014 }, { "epoch": 0.08868250206785765, "grad_norm": 0.671875, "learning_rate": 0.0010550820890306296, "loss": 0.2233, "step": 50016 }, { "epoch": 0.08868604823316746, "grad_norm": 0.359375, "learning_rate": 0.0010550226446539208, "loss": 0.162, "step": 50018 }, { "epoch": 0.08868959439847728, "grad_norm": 0.50390625, "learning_rate": 0.0010549632004739173, "loss": 0.2143, "step": 50020 }, { "epoch": 0.08869314056378709, "grad_norm": 0.359375, "learning_rate": 0.0010549037564908782, "loss": 0.1652, "step": 50022 }, { "epoch": 0.0886966867290969, "grad_norm": 0.5234375, "learning_rate": 0.001054844312705064, "loss": 0.1584, "step": 50024 }, { "epoch": 0.08870023289440672, "grad_norm": 0.63671875, "learning_rate": 0.0010547848691167345, "loss": 0.1611, "step": 50026 }, { "epoch": 0.08870377905971653, "grad_norm": 0.5078125, "learning_rate": 0.0010547254257261501, "loss": 0.2312, "step": 50028 }, { "epoch": 0.08870732522502635, "grad_norm": 0.2041015625, "learning_rate": 0.00105466598253357, "loss": 0.1311, "step": 50030 }, { "epoch": 0.08871087139033616, "grad_norm": 0.318359375, "learning_rate": 0.0010546065395392547, "loss": 0.2188, "step": 50032 }, { "epoch": 0.08871441755564598, "grad_norm": 1.3984375, "learning_rate": 0.0010545470967434638, "loss": 0.3097, "step": 50034 }, { "epoch": 0.08871796372095579, "grad_norm": 1.2734375, "learning_rate": 0.001054487654146458, "loss": 0.3379, "step": 50036 }, { "epoch": 0.0887215098862656, "grad_norm": 0.515625, "learning_rate": 0.0010544282117484962, "loss": 0.1193, "step": 50038 }, { "epoch": 0.08872505605157543, "grad_norm": 0.44921875, "learning_rate": 0.0010543687695498392, "loss": 0.1574, "step": 50040 }, { "epoch": 0.08872860221688525, "grad_norm": 0.33203125, "learning_rate": 0.0010543093275507465, "loss": 0.186, "step": 50042 }, { "epoch": 0.08873214838219506, "grad_norm": 0.2265625, "learning_rate": 0.0010542498857514786, "loss": 0.1985, "step": 50044 }, { "epoch": 0.08873569454750488, "grad_norm": 0.38671875, "learning_rate": 0.001054190444152295, "loss": 0.2092, "step": 50046 }, { "epoch": 0.0887392407128147, "grad_norm": 0.39453125, "learning_rate": 0.0010541310027534555, "loss": 0.1791, "step": 50048 }, { "epoch": 0.08874278687812451, "grad_norm": 0.310546875, "learning_rate": 0.0010540715615552205, "loss": 0.1903, "step": 50050 }, { "epoch": 0.08874633304343432, "grad_norm": 0.49609375, "learning_rate": 0.0010540121205578502, "loss": 0.1727, "step": 50052 }, { "epoch": 0.08874987920874414, "grad_norm": 0.48828125, "learning_rate": 0.001053952679761604, "loss": 0.2193, "step": 50054 }, { "epoch": 0.08875342537405395, "grad_norm": 0.337890625, "learning_rate": 0.0010538932391667415, "loss": 0.2002, "step": 50056 }, { "epoch": 0.08875697153936377, "grad_norm": 1.1953125, "learning_rate": 0.0010538337987735236, "loss": 0.1766, "step": 50058 }, { "epoch": 0.08876051770467358, "grad_norm": 0.44921875, "learning_rate": 0.00105377435858221, "loss": 0.2019, "step": 50060 }, { "epoch": 0.0887640638699834, "grad_norm": 0.318359375, "learning_rate": 0.0010537149185930604, "loss": 0.1406, "step": 50062 }, { "epoch": 0.08876761003529321, "grad_norm": 0.33203125, "learning_rate": 0.0010536554788063346, "loss": 0.2046, "step": 50064 }, { "epoch": 0.08877115620060302, "grad_norm": 0.2392578125, "learning_rate": 0.001053596039222293, "loss": 0.1698, "step": 50066 }, { "epoch": 0.08877470236591284, "grad_norm": 0.494140625, "learning_rate": 0.0010535365998411955, "loss": 0.2322, "step": 50068 }, { "epoch": 0.08877824853122265, "grad_norm": 0.68359375, "learning_rate": 0.0010534771606633018, "loss": 0.1939, "step": 50070 }, { "epoch": 0.08878179469653247, "grad_norm": 0.3125, "learning_rate": 0.001053417721688872, "loss": 0.1499, "step": 50072 }, { "epoch": 0.08878534086184228, "grad_norm": 0.2001953125, "learning_rate": 0.0010533582829181658, "loss": 0.2863, "step": 50074 }, { "epoch": 0.0887888870271521, "grad_norm": 0.85546875, "learning_rate": 0.001053298844351444, "loss": 0.207, "step": 50076 }, { "epoch": 0.08879243319246191, "grad_norm": 0.458984375, "learning_rate": 0.0010532394059889654, "loss": 0.154, "step": 50078 }, { "epoch": 0.08879597935777173, "grad_norm": 0.734375, "learning_rate": 0.0010531799678309903, "loss": 0.1653, "step": 50080 }, { "epoch": 0.08879952552308154, "grad_norm": 0.365234375, "learning_rate": 0.001053120529877779, "loss": 0.1556, "step": 50082 }, { "epoch": 0.08880307168839136, "grad_norm": 0.328125, "learning_rate": 0.0010530610921295918, "loss": 0.1412, "step": 50084 }, { "epoch": 0.08880661785370118, "grad_norm": 0.328125, "learning_rate": 0.0010530016545866874, "loss": 0.1549, "step": 50086 }, { "epoch": 0.088810164019011, "grad_norm": 0.388671875, "learning_rate": 0.001052942217249327, "loss": 0.1615, "step": 50088 }, { "epoch": 0.08881371018432081, "grad_norm": 1.2109375, "learning_rate": 0.0010528827801177696, "loss": 0.236, "step": 50090 }, { "epoch": 0.08881725634963063, "grad_norm": 0.306640625, "learning_rate": 0.001052823343192276, "loss": 0.1553, "step": 50092 }, { "epoch": 0.08882080251494044, "grad_norm": 1.3515625, "learning_rate": 0.0010527639064731053, "loss": 0.2274, "step": 50094 }, { "epoch": 0.08882434868025026, "grad_norm": 0.7109375, "learning_rate": 0.001052704469960518, "loss": 0.1706, "step": 50096 }, { "epoch": 0.08882789484556007, "grad_norm": 0.408203125, "learning_rate": 0.0010526450336547735, "loss": 0.2602, "step": 50098 }, { "epoch": 0.08883144101086989, "grad_norm": 0.220703125, "learning_rate": 0.0010525855975561322, "loss": 0.2106, "step": 50100 }, { "epoch": 0.0888349871761797, "grad_norm": 0.6015625, "learning_rate": 0.0010525261616648545, "loss": 0.1815, "step": 50102 }, { "epoch": 0.08883853334148951, "grad_norm": 0.1767578125, "learning_rate": 0.0010524667259811996, "loss": 0.2402, "step": 50104 }, { "epoch": 0.08884207950679933, "grad_norm": 0.5, "learning_rate": 0.0010524072905054277, "loss": 0.3159, "step": 50106 }, { "epoch": 0.08884562567210914, "grad_norm": 0.2177734375, "learning_rate": 0.0010523478552377983, "loss": 0.1616, "step": 50108 }, { "epoch": 0.08884917183741896, "grad_norm": 1.2578125, "learning_rate": 0.001052288420178572, "loss": 0.2115, "step": 50110 }, { "epoch": 0.08885271800272877, "grad_norm": 0.640625, "learning_rate": 0.0010522289853280083, "loss": 0.1938, "step": 50112 }, { "epoch": 0.08885626416803859, "grad_norm": 0.66015625, "learning_rate": 0.001052169550686367, "loss": 0.3099, "step": 50114 }, { "epoch": 0.0888598103333484, "grad_norm": 0.51953125, "learning_rate": 0.0010521101162539086, "loss": 0.1906, "step": 50116 }, { "epoch": 0.08886335649865822, "grad_norm": 0.6875, "learning_rate": 0.001052050682030893, "loss": 0.1942, "step": 50118 }, { "epoch": 0.08886690266396803, "grad_norm": 0.240234375, "learning_rate": 0.0010519912480175795, "loss": 0.1489, "step": 50120 }, { "epoch": 0.08887044882927785, "grad_norm": 0.97265625, "learning_rate": 0.0010519318142142282, "loss": 0.3227, "step": 50122 }, { "epoch": 0.08887399499458766, "grad_norm": 0.31640625, "learning_rate": 0.0010518723806210997, "loss": 0.1476, "step": 50124 }, { "epoch": 0.08887754115989747, "grad_norm": 1.3984375, "learning_rate": 0.0010518129472384535, "loss": 0.2416, "step": 50126 }, { "epoch": 0.08888108732520729, "grad_norm": 0.205078125, "learning_rate": 0.0010517535140665492, "loss": 0.1481, "step": 50128 }, { "epoch": 0.08888463349051712, "grad_norm": 0.421875, "learning_rate": 0.001051694081105647, "loss": 0.1978, "step": 50130 }, { "epoch": 0.08888817965582693, "grad_norm": 0.359375, "learning_rate": 0.0010516346483560065, "loss": 0.1911, "step": 50132 }, { "epoch": 0.08889172582113675, "grad_norm": 0.546875, "learning_rate": 0.0010515752158178889, "loss": 0.1524, "step": 50134 }, { "epoch": 0.08889527198644656, "grad_norm": 0.58984375, "learning_rate": 0.0010515157834915523, "loss": 0.3869, "step": 50136 }, { "epoch": 0.08889881815175638, "grad_norm": 0.337890625, "learning_rate": 0.0010514563513772577, "loss": 0.2173, "step": 50138 }, { "epoch": 0.08890236431706619, "grad_norm": 0.373046875, "learning_rate": 0.001051396919475265, "loss": 0.2851, "step": 50140 }, { "epoch": 0.088905910482376, "grad_norm": 0.61328125, "learning_rate": 0.001051337487785834, "loss": 0.1649, "step": 50142 }, { "epoch": 0.08890945664768582, "grad_norm": 3.140625, "learning_rate": 0.0010512780563092245, "loss": 0.2654, "step": 50144 }, { "epoch": 0.08891300281299563, "grad_norm": 0.28515625, "learning_rate": 0.0010512186250456962, "loss": 0.2302, "step": 50146 }, { "epoch": 0.08891654897830545, "grad_norm": 0.494140625, "learning_rate": 0.0010511591939955094, "loss": 0.2421, "step": 50148 }, { "epoch": 0.08892009514361526, "grad_norm": 0.2373046875, "learning_rate": 0.0010510997631589245, "loss": 0.2102, "step": 50150 }, { "epoch": 0.08892364130892508, "grad_norm": 0.26171875, "learning_rate": 0.0010510403325362002, "loss": 0.1419, "step": 50152 }, { "epoch": 0.08892718747423489, "grad_norm": 0.1767578125, "learning_rate": 0.0010509809021275972, "loss": 0.1521, "step": 50154 }, { "epoch": 0.0889307336395447, "grad_norm": 0.4609375, "learning_rate": 0.001050921471933375, "loss": 0.2159, "step": 50156 }, { "epoch": 0.08893427980485452, "grad_norm": 0.75, "learning_rate": 0.0010508620419537945, "loss": 0.3153, "step": 50158 }, { "epoch": 0.08893782597016434, "grad_norm": 0.2080078125, "learning_rate": 0.001050802612189114, "loss": 0.1476, "step": 50160 }, { "epoch": 0.08894137213547415, "grad_norm": 0.40625, "learning_rate": 0.001050743182639595, "loss": 0.1663, "step": 50162 }, { "epoch": 0.08894491830078396, "grad_norm": 0.1591796875, "learning_rate": 0.0010506837533054963, "loss": 0.1375, "step": 50164 }, { "epoch": 0.08894846446609378, "grad_norm": 0.44140625, "learning_rate": 0.0010506243241870784, "loss": 0.2722, "step": 50166 }, { "epoch": 0.0889520106314036, "grad_norm": 0.4140625, "learning_rate": 0.001050564895284601, "loss": 0.207, "step": 50168 }, { "epoch": 0.08895555679671341, "grad_norm": 0.2109375, "learning_rate": 0.001050505466598324, "loss": 0.1863, "step": 50170 }, { "epoch": 0.08895910296202322, "grad_norm": 0.20703125, "learning_rate": 0.001050446038128507, "loss": 0.1814, "step": 50172 }, { "epoch": 0.08896264912733304, "grad_norm": 3.296875, "learning_rate": 0.0010503866098754112, "loss": 0.2836, "step": 50174 }, { "epoch": 0.08896619529264287, "grad_norm": 0.5859375, "learning_rate": 0.0010503271818392945, "loss": 0.1777, "step": 50176 }, { "epoch": 0.08896974145795268, "grad_norm": 0.53125, "learning_rate": 0.0010502677540204183, "loss": 0.2625, "step": 50178 }, { "epoch": 0.0889732876232625, "grad_norm": 0.296875, "learning_rate": 0.0010502083264190421, "loss": 0.1613, "step": 50180 }, { "epoch": 0.08897683378857231, "grad_norm": 1.1796875, "learning_rate": 0.0010501488990354258, "loss": 0.2466, "step": 50182 }, { "epoch": 0.08898037995388212, "grad_norm": 0.1796875, "learning_rate": 0.0010500894718698294, "loss": 0.1406, "step": 50184 }, { "epoch": 0.08898392611919194, "grad_norm": 0.291015625, "learning_rate": 0.0010500300449225123, "loss": 0.1695, "step": 50186 }, { "epoch": 0.08898747228450175, "grad_norm": 1.1171875, "learning_rate": 0.0010499706181937348, "loss": 0.2176, "step": 50188 }, { "epoch": 0.08899101844981157, "grad_norm": 0.51171875, "learning_rate": 0.0010499111916837565, "loss": 0.1608, "step": 50190 }, { "epoch": 0.08899456461512138, "grad_norm": 0.5625, "learning_rate": 0.0010498517653928382, "loss": 0.1913, "step": 50192 }, { "epoch": 0.0889981107804312, "grad_norm": 1.9140625, "learning_rate": 0.0010497923393212388, "loss": 0.2794, "step": 50194 }, { "epoch": 0.08900165694574101, "grad_norm": 0.328125, "learning_rate": 0.0010497329134692185, "loss": 0.1701, "step": 50196 }, { "epoch": 0.08900520311105083, "grad_norm": 0.34375, "learning_rate": 0.0010496734878370374, "loss": 0.2006, "step": 50198 }, { "epoch": 0.08900874927636064, "grad_norm": 0.376953125, "learning_rate": 0.001049614062424955, "loss": 0.2027, "step": 50200 }, { "epoch": 0.08901229544167046, "grad_norm": 0.765625, "learning_rate": 0.001049554637233232, "loss": 0.1161, "step": 50202 }, { "epoch": 0.08901584160698027, "grad_norm": 1.5234375, "learning_rate": 0.0010494952122621268, "loss": 0.2731, "step": 50204 }, { "epoch": 0.08901938777229008, "grad_norm": 0.22265625, "learning_rate": 0.0010494357875119008, "loss": 0.1354, "step": 50206 }, { "epoch": 0.0890229339375999, "grad_norm": 0.482421875, "learning_rate": 0.0010493763629828132, "loss": 0.362, "step": 50208 }, { "epoch": 0.08902648010290971, "grad_norm": 0.353515625, "learning_rate": 0.001049316938675124, "loss": 0.1697, "step": 50210 }, { "epoch": 0.08903002626821953, "grad_norm": 0.353515625, "learning_rate": 0.0010492575145890927, "loss": 0.1646, "step": 50212 }, { "epoch": 0.08903357243352934, "grad_norm": 0.37890625, "learning_rate": 0.0010491980907249801, "loss": 0.2678, "step": 50214 }, { "epoch": 0.08903711859883916, "grad_norm": 0.5625, "learning_rate": 0.0010491386670830453, "loss": 0.2084, "step": 50216 }, { "epoch": 0.08904066476414897, "grad_norm": 0.4296875, "learning_rate": 0.0010490792436635486, "loss": 0.1516, "step": 50218 }, { "epoch": 0.08904421092945879, "grad_norm": 1.5703125, "learning_rate": 0.0010490198204667497, "loss": 0.2956, "step": 50220 }, { "epoch": 0.08904775709476861, "grad_norm": 0.2470703125, "learning_rate": 0.0010489603974929082, "loss": 0.2074, "step": 50222 }, { "epoch": 0.08905130326007843, "grad_norm": 0.296875, "learning_rate": 0.0010489009747422847, "loss": 0.2007, "step": 50224 }, { "epoch": 0.08905484942538824, "grad_norm": 0.4453125, "learning_rate": 0.0010488415522151382, "loss": 0.2291, "step": 50226 }, { "epoch": 0.08905839559069806, "grad_norm": 0.69140625, "learning_rate": 0.001048782129911729, "loss": 0.2025, "step": 50228 }, { "epoch": 0.08906194175600787, "grad_norm": 0.27734375, "learning_rate": 0.0010487227078323175, "loss": 0.1379, "step": 50230 }, { "epoch": 0.08906548792131769, "grad_norm": 0.451171875, "learning_rate": 0.001048663285977163, "loss": 0.1802, "step": 50232 }, { "epoch": 0.0890690340866275, "grad_norm": 0.302734375, "learning_rate": 0.001048603864346525, "loss": 0.1516, "step": 50234 }, { "epoch": 0.08907258025193732, "grad_norm": 0.796875, "learning_rate": 0.0010485444429406644, "loss": 0.3092, "step": 50236 }, { "epoch": 0.08907612641724713, "grad_norm": 1.0, "learning_rate": 0.00104848502175984, "loss": 0.242, "step": 50238 }, { "epoch": 0.08907967258255695, "grad_norm": 0.625, "learning_rate": 0.0010484256008043127, "loss": 0.2158, "step": 50240 }, { "epoch": 0.08908321874786676, "grad_norm": 0.201171875, "learning_rate": 0.0010483661800743415, "loss": 0.5312, "step": 50242 }, { "epoch": 0.08908676491317657, "grad_norm": 0.5859375, "learning_rate": 0.0010483067595701866, "loss": 0.2038, "step": 50244 }, { "epoch": 0.08909031107848639, "grad_norm": 0.20703125, "learning_rate": 0.001048247339292108, "loss": 0.1614, "step": 50246 }, { "epoch": 0.0890938572437962, "grad_norm": 0.80078125, "learning_rate": 0.0010481879192403658, "loss": 0.2805, "step": 50248 }, { "epoch": 0.08909740340910602, "grad_norm": 0.212890625, "learning_rate": 0.0010481284994152192, "loss": 0.1464, "step": 50250 }, { "epoch": 0.08910094957441583, "grad_norm": 0.40234375, "learning_rate": 0.0010480690798169282, "loss": 0.1739, "step": 50252 }, { "epoch": 0.08910449573972565, "grad_norm": 0.4140625, "learning_rate": 0.0010480096604457533, "loss": 0.2088, "step": 50254 }, { "epoch": 0.08910804190503546, "grad_norm": 0.361328125, "learning_rate": 0.0010479502413019539, "loss": 0.1504, "step": 50256 }, { "epoch": 0.08911158807034528, "grad_norm": 0.546875, "learning_rate": 0.0010478908223857902, "loss": 0.169, "step": 50258 }, { "epoch": 0.08911513423565509, "grad_norm": 0.984375, "learning_rate": 0.001047831403697521, "loss": 0.2338, "step": 50260 }, { "epoch": 0.0891186804009649, "grad_norm": 0.296875, "learning_rate": 0.0010477719852374074, "loss": 0.1955, "step": 50262 }, { "epoch": 0.08912222656627472, "grad_norm": 0.4921875, "learning_rate": 0.0010477125670057088, "loss": 0.1541, "step": 50264 }, { "epoch": 0.08912577273158455, "grad_norm": 2.21875, "learning_rate": 0.0010476531490026851, "loss": 0.2767, "step": 50266 }, { "epoch": 0.08912931889689436, "grad_norm": 0.423828125, "learning_rate": 0.001047593731228596, "loss": 0.1701, "step": 50268 }, { "epoch": 0.08913286506220418, "grad_norm": 0.4140625, "learning_rate": 0.0010475343136837013, "loss": 0.1481, "step": 50270 }, { "epoch": 0.08913641122751399, "grad_norm": 0.2373046875, "learning_rate": 0.0010474748963682611, "loss": 0.1258, "step": 50272 }, { "epoch": 0.08913995739282381, "grad_norm": 1.21875, "learning_rate": 0.0010474154792825354, "loss": 0.2745, "step": 50274 }, { "epoch": 0.08914350355813362, "grad_norm": 0.43359375, "learning_rate": 0.001047356062426784, "loss": 0.1719, "step": 50276 }, { "epoch": 0.08914704972344344, "grad_norm": 0.51171875, "learning_rate": 0.0010472966458012663, "loss": 0.3227, "step": 50278 }, { "epoch": 0.08915059588875325, "grad_norm": 0.18359375, "learning_rate": 0.0010472372294062422, "loss": 0.18, "step": 50280 }, { "epoch": 0.08915414205406307, "grad_norm": 1.6796875, "learning_rate": 0.0010471778132419725, "loss": 0.2196, "step": 50282 }, { "epoch": 0.08915768821937288, "grad_norm": 0.275390625, "learning_rate": 0.001047118397308716, "loss": 0.1366, "step": 50284 }, { "epoch": 0.0891612343846827, "grad_norm": 0.427734375, "learning_rate": 0.0010470589816067325, "loss": 0.2116, "step": 50286 }, { "epoch": 0.08916478054999251, "grad_norm": 2.5625, "learning_rate": 0.0010469995661362829, "loss": 0.3562, "step": 50288 }, { "epoch": 0.08916832671530232, "grad_norm": 0.1181640625, "learning_rate": 0.0010469401508976263, "loss": 0.1342, "step": 50290 }, { "epoch": 0.08917187288061214, "grad_norm": 0.43359375, "learning_rate": 0.0010468807358910224, "loss": 0.1852, "step": 50292 }, { "epoch": 0.08917541904592195, "grad_norm": 0.60546875, "learning_rate": 0.0010468213211167316, "loss": 0.3078, "step": 50294 }, { "epoch": 0.08917896521123177, "grad_norm": 0.373046875, "learning_rate": 0.0010467619065750134, "loss": 0.1681, "step": 50296 }, { "epoch": 0.08918251137654158, "grad_norm": 1.1875, "learning_rate": 0.0010467024922661276, "loss": 0.1547, "step": 50298 }, { "epoch": 0.0891860575418514, "grad_norm": 0.46484375, "learning_rate": 0.0010466430781903343, "loss": 0.1708, "step": 50300 }, { "epoch": 0.08918960370716121, "grad_norm": 0.55859375, "learning_rate": 0.0010465836643478927, "loss": 0.1658, "step": 50302 }, { "epoch": 0.08919314987247103, "grad_norm": 0.224609375, "learning_rate": 0.0010465242507390635, "loss": 0.1533, "step": 50304 }, { "epoch": 0.08919669603778084, "grad_norm": 0.46875, "learning_rate": 0.0010464648373641064, "loss": 0.1972, "step": 50306 }, { "epoch": 0.08920024220309065, "grad_norm": 0.1943359375, "learning_rate": 0.0010464054242232805, "loss": 0.1882, "step": 50308 }, { "epoch": 0.08920378836840047, "grad_norm": 1.6640625, "learning_rate": 0.0010463460113168464, "loss": 0.2184, "step": 50310 }, { "epoch": 0.0892073345337103, "grad_norm": 2.90625, "learning_rate": 0.0010462865986450636, "loss": 0.219, "step": 50312 }, { "epoch": 0.08921088069902011, "grad_norm": 0.357421875, "learning_rate": 0.0010462271862081921, "loss": 0.2286, "step": 50314 }, { "epoch": 0.08921442686432993, "grad_norm": 0.32421875, "learning_rate": 0.001046167774006492, "loss": 0.1866, "step": 50316 }, { "epoch": 0.08921797302963974, "grad_norm": 0.3828125, "learning_rate": 0.001046108362040222, "loss": 0.2271, "step": 50318 }, { "epoch": 0.08922151919494956, "grad_norm": 0.2470703125, "learning_rate": 0.001046048950309643, "loss": 0.1943, "step": 50320 }, { "epoch": 0.08922506536025937, "grad_norm": 0.32421875, "learning_rate": 0.0010459895388150152, "loss": 0.1623, "step": 50322 }, { "epoch": 0.08922861152556918, "grad_norm": 0.88671875, "learning_rate": 0.0010459301275565968, "loss": 0.1714, "step": 50324 }, { "epoch": 0.089232157690879, "grad_norm": 0.546875, "learning_rate": 0.001045870716534649, "loss": 0.2219, "step": 50326 }, { "epoch": 0.08923570385618881, "grad_norm": 0.7109375, "learning_rate": 0.0010458113057494316, "loss": 0.1339, "step": 50328 }, { "epoch": 0.08923925002149863, "grad_norm": 0.828125, "learning_rate": 0.0010457518952012037, "loss": 0.225, "step": 50330 }, { "epoch": 0.08924279618680844, "grad_norm": 0.267578125, "learning_rate": 0.0010456924848902258, "loss": 0.1929, "step": 50332 }, { "epoch": 0.08924634235211826, "grad_norm": 2.515625, "learning_rate": 0.001045633074816757, "loss": 0.2894, "step": 50334 }, { "epoch": 0.08924988851742807, "grad_norm": 1.3359375, "learning_rate": 0.0010455736649810576, "loss": 0.2552, "step": 50336 }, { "epoch": 0.08925343468273789, "grad_norm": 0.2373046875, "learning_rate": 0.0010455142553833878, "loss": 0.2027, "step": 50338 }, { "epoch": 0.0892569808480477, "grad_norm": 0.2158203125, "learning_rate": 0.0010454548460240068, "loss": 0.15, "step": 50340 }, { "epoch": 0.08926052701335752, "grad_norm": 0.478515625, "learning_rate": 0.0010453954369031744, "loss": 0.5615, "step": 50342 }, { "epoch": 0.08926407317866733, "grad_norm": 0.42578125, "learning_rate": 0.0010453360280211505, "loss": 0.2131, "step": 50344 }, { "epoch": 0.08926761934397714, "grad_norm": 0.5, "learning_rate": 0.0010452766193781958, "loss": 0.1767, "step": 50346 }, { "epoch": 0.08927116550928696, "grad_norm": 0.37890625, "learning_rate": 0.0010452172109745687, "loss": 0.2028, "step": 50348 }, { "epoch": 0.08927471167459677, "grad_norm": 0.82421875, "learning_rate": 0.0010451578028105302, "loss": 0.2083, "step": 50350 }, { "epoch": 0.08927825783990659, "grad_norm": 0.6484375, "learning_rate": 0.0010450983948863393, "loss": 0.1556, "step": 50352 }, { "epoch": 0.0892818040052164, "grad_norm": 0.74609375, "learning_rate": 0.0010450389872022558, "loss": 0.1686, "step": 50354 }, { "epoch": 0.08928535017052622, "grad_norm": 0.55859375, "learning_rate": 0.0010449795797585406, "loss": 0.188, "step": 50356 }, { "epoch": 0.08928889633583605, "grad_norm": 0.71875, "learning_rate": 0.0010449201725554523, "loss": 0.1367, "step": 50358 }, { "epoch": 0.08929244250114586, "grad_norm": 2.0625, "learning_rate": 0.0010448607655932512, "loss": 0.1941, "step": 50360 }, { "epoch": 0.08929598866645568, "grad_norm": 19.5, "learning_rate": 0.0010448013588721969, "loss": 0.2306, "step": 50362 }, { "epoch": 0.08929953483176549, "grad_norm": 0.609375, "learning_rate": 0.0010447419523925499, "loss": 0.2649, "step": 50364 }, { "epoch": 0.0893030809970753, "grad_norm": 0.79296875, "learning_rate": 0.0010446825461545692, "loss": 0.208, "step": 50366 }, { "epoch": 0.08930662716238512, "grad_norm": 0.65234375, "learning_rate": 0.0010446231401585152, "loss": 0.3048, "step": 50368 }, { "epoch": 0.08931017332769493, "grad_norm": 0.91015625, "learning_rate": 0.0010445637344046468, "loss": 0.214, "step": 50370 }, { "epoch": 0.08931371949300475, "grad_norm": 1.9140625, "learning_rate": 0.0010445043288932253, "loss": 0.3553, "step": 50372 }, { "epoch": 0.08931726565831456, "grad_norm": 0.259765625, "learning_rate": 0.0010444449236245087, "loss": 0.1424, "step": 50374 }, { "epoch": 0.08932081182362438, "grad_norm": 0.52734375, "learning_rate": 0.001044385518598758, "loss": 0.1693, "step": 50376 }, { "epoch": 0.08932435798893419, "grad_norm": 0.28125, "learning_rate": 0.001044326113816233, "loss": 0.1433, "step": 50378 }, { "epoch": 0.089327904154244, "grad_norm": 1.3828125, "learning_rate": 0.0010442667092771933, "loss": 0.2072, "step": 50380 }, { "epoch": 0.08933145031955382, "grad_norm": 0.31640625, "learning_rate": 0.0010442073049818984, "loss": 0.2003, "step": 50382 }, { "epoch": 0.08933499648486364, "grad_norm": 0.271484375, "learning_rate": 0.0010441479009306084, "loss": 0.1907, "step": 50384 }, { "epoch": 0.08933854265017345, "grad_norm": 1.1484375, "learning_rate": 0.001044088497123583, "loss": 0.1602, "step": 50386 }, { "epoch": 0.08934208881548326, "grad_norm": 0.7734375, "learning_rate": 0.0010440290935610822, "loss": 0.1347, "step": 50388 }, { "epoch": 0.08934563498079308, "grad_norm": 0.31640625, "learning_rate": 0.0010439696902433656, "loss": 0.136, "step": 50390 }, { "epoch": 0.0893491811461029, "grad_norm": 0.404296875, "learning_rate": 0.001043910287170693, "loss": 0.1786, "step": 50392 }, { "epoch": 0.08935272731141271, "grad_norm": 0.4609375, "learning_rate": 0.0010438508843433239, "loss": 0.3837, "step": 50394 }, { "epoch": 0.08935627347672252, "grad_norm": 0.373046875, "learning_rate": 0.0010437914817615188, "loss": 0.1751, "step": 50396 }, { "epoch": 0.08935981964203234, "grad_norm": 0.302734375, "learning_rate": 0.001043732079425537, "loss": 0.1538, "step": 50398 }, { "epoch": 0.08936336580734215, "grad_norm": 0.38671875, "learning_rate": 0.0010436726773356384, "loss": 0.1971, "step": 50400 }, { "epoch": 0.08936691197265198, "grad_norm": 0.25390625, "learning_rate": 0.0010436132754920826, "loss": 0.1617, "step": 50402 }, { "epoch": 0.0893704581379618, "grad_norm": 0.34765625, "learning_rate": 0.00104355387389513, "loss": 0.1446, "step": 50404 }, { "epoch": 0.08937400430327161, "grad_norm": 0.484375, "learning_rate": 0.0010434944725450394, "loss": 0.1371, "step": 50406 }, { "epoch": 0.08937755046858142, "grad_norm": 0.30078125, "learning_rate": 0.0010434350714420718, "loss": 0.1203, "step": 50408 }, { "epoch": 0.08938109663389124, "grad_norm": 0.2412109375, "learning_rate": 0.001043375670586486, "loss": 0.1489, "step": 50410 }, { "epoch": 0.08938464279920105, "grad_norm": 0.58203125, "learning_rate": 0.0010433162699785424, "loss": 0.1284, "step": 50412 }, { "epoch": 0.08938818896451087, "grad_norm": 1.0859375, "learning_rate": 0.0010432568696185, "loss": 0.1519, "step": 50414 }, { "epoch": 0.08939173512982068, "grad_norm": 0.21484375, "learning_rate": 0.0010431974695066194, "loss": 0.1759, "step": 50416 }, { "epoch": 0.0893952812951305, "grad_norm": 0.279296875, "learning_rate": 0.0010431380696431598, "loss": 0.2217, "step": 50418 }, { "epoch": 0.08939882746044031, "grad_norm": 0.322265625, "learning_rate": 0.0010430786700283815, "loss": 0.1833, "step": 50420 }, { "epoch": 0.08940237362575013, "grad_norm": 0.37109375, "learning_rate": 0.001043019270662544, "loss": 0.1859, "step": 50422 }, { "epoch": 0.08940591979105994, "grad_norm": 0.404296875, "learning_rate": 0.0010429598715459075, "loss": 0.1687, "step": 50424 }, { "epoch": 0.08940946595636975, "grad_norm": 0.77734375, "learning_rate": 0.0010429004726787308, "loss": 0.2223, "step": 50426 }, { "epoch": 0.08941301212167957, "grad_norm": 0.25390625, "learning_rate": 0.0010428410740612746, "loss": 0.1786, "step": 50428 }, { "epoch": 0.08941655828698938, "grad_norm": 0.58203125, "learning_rate": 0.0010427816756937982, "loss": 0.1877, "step": 50430 }, { "epoch": 0.0894201044522992, "grad_norm": 0.875, "learning_rate": 0.0010427222775765614, "loss": 0.2102, "step": 50432 }, { "epoch": 0.08942365061760901, "grad_norm": 0.451171875, "learning_rate": 0.001042662879709824, "loss": 0.1724, "step": 50434 }, { "epoch": 0.08942719678291883, "grad_norm": 2.78125, "learning_rate": 0.0010426034820938462, "loss": 0.2032, "step": 50436 }, { "epoch": 0.08943074294822864, "grad_norm": 0.294921875, "learning_rate": 0.0010425440847288874, "loss": 0.1452, "step": 50438 }, { "epoch": 0.08943428911353846, "grad_norm": 0.228515625, "learning_rate": 0.0010424846876152074, "loss": 0.1652, "step": 50440 }, { "epoch": 0.08943783527884827, "grad_norm": 0.359375, "learning_rate": 0.0010424252907530658, "loss": 0.1943, "step": 50442 }, { "epoch": 0.08944138144415809, "grad_norm": 3.234375, "learning_rate": 0.0010423658941427225, "loss": 0.2739, "step": 50444 }, { "epoch": 0.0894449276094679, "grad_norm": 0.263671875, "learning_rate": 0.0010423064977844376, "loss": 0.1977, "step": 50446 }, { "epoch": 0.08944847377477773, "grad_norm": 0.5625, "learning_rate": 0.0010422471016784704, "loss": 0.1756, "step": 50448 }, { "epoch": 0.08945201994008754, "grad_norm": 0.51953125, "learning_rate": 0.0010421877058250806, "loss": 0.225, "step": 50450 }, { "epoch": 0.08945556610539736, "grad_norm": 0.447265625, "learning_rate": 0.0010421283102245284, "loss": 0.1537, "step": 50452 }, { "epoch": 0.08945911227070717, "grad_norm": 0.400390625, "learning_rate": 0.0010420689148770736, "loss": 0.2254, "step": 50454 }, { "epoch": 0.08946265843601699, "grad_norm": 0.458984375, "learning_rate": 0.0010420095197829753, "loss": 0.1944, "step": 50456 }, { "epoch": 0.0894662046013268, "grad_norm": 2.390625, "learning_rate": 0.0010419501249424939, "loss": 0.3574, "step": 50458 }, { "epoch": 0.08946975076663662, "grad_norm": 1.0546875, "learning_rate": 0.0010418907303558888, "loss": 0.1794, "step": 50460 }, { "epoch": 0.08947329693194643, "grad_norm": 0.8359375, "learning_rate": 0.0010418313360234203, "loss": 0.2281, "step": 50462 }, { "epoch": 0.08947684309725625, "grad_norm": 0.408203125, "learning_rate": 0.0010417719419453474, "loss": 0.1696, "step": 50464 }, { "epoch": 0.08948038926256606, "grad_norm": 0.53125, "learning_rate": 0.00104171254812193, "loss": 0.1452, "step": 50466 }, { "epoch": 0.08948393542787587, "grad_norm": 1.015625, "learning_rate": 0.0010416531545534282, "loss": 0.2944, "step": 50468 }, { "epoch": 0.08948748159318569, "grad_norm": 0.39453125, "learning_rate": 0.001041593761240102, "loss": 0.1798, "step": 50470 }, { "epoch": 0.0894910277584955, "grad_norm": 0.73046875, "learning_rate": 0.0010415343681822102, "loss": 0.1485, "step": 50472 }, { "epoch": 0.08949457392380532, "grad_norm": 0.2119140625, "learning_rate": 0.0010414749753800134, "loss": 0.2218, "step": 50474 }, { "epoch": 0.08949812008911513, "grad_norm": 0.640625, "learning_rate": 0.0010414155828337708, "loss": 0.1882, "step": 50476 }, { "epoch": 0.08950166625442495, "grad_norm": 0.64453125, "learning_rate": 0.001041356190543743, "loss": 0.2808, "step": 50478 }, { "epoch": 0.08950521241973476, "grad_norm": 0.6875, "learning_rate": 0.0010412967985101887, "loss": 0.188, "step": 50480 }, { "epoch": 0.08950875858504458, "grad_norm": 0.3203125, "learning_rate": 0.0010412374067333683, "loss": 0.2223, "step": 50482 }, { "epoch": 0.08951230475035439, "grad_norm": 1.625, "learning_rate": 0.001041178015213541, "loss": 0.2464, "step": 50484 }, { "epoch": 0.0895158509156642, "grad_norm": 0.63671875, "learning_rate": 0.0010411186239509677, "loss": 0.1769, "step": 50486 }, { "epoch": 0.08951939708097402, "grad_norm": 0.4140625, "learning_rate": 0.0010410592329459065, "loss": 0.206, "step": 50488 }, { "epoch": 0.08952294324628383, "grad_norm": 1.046875, "learning_rate": 0.0010409998421986183, "loss": 0.3168, "step": 50490 }, { "epoch": 0.08952648941159365, "grad_norm": 0.41015625, "learning_rate": 0.0010409404517093622, "loss": 0.1794, "step": 50492 }, { "epoch": 0.08953003557690348, "grad_norm": 2.40625, "learning_rate": 0.0010408810614783988, "loss": 0.3095, "step": 50494 }, { "epoch": 0.08953358174221329, "grad_norm": 0.1455078125, "learning_rate": 0.001040821671505987, "loss": 0.1327, "step": 50496 }, { "epoch": 0.0895371279075231, "grad_norm": 0.46484375, "learning_rate": 0.001040762281792387, "loss": 0.182, "step": 50498 }, { "epoch": 0.08954067407283292, "grad_norm": 0.357421875, "learning_rate": 0.0010407028923378578, "loss": 0.1798, "step": 50500 }, { "epoch": 0.08954422023814274, "grad_norm": 0.2490234375, "learning_rate": 0.0010406435031426607, "loss": 0.1924, "step": 50502 }, { "epoch": 0.08954776640345255, "grad_norm": 0.2353515625, "learning_rate": 0.0010405841142070538, "loss": 0.1945, "step": 50504 }, { "epoch": 0.08955131256876236, "grad_norm": 0.66015625, "learning_rate": 0.0010405247255312973, "loss": 0.1573, "step": 50506 }, { "epoch": 0.08955485873407218, "grad_norm": 0.5234375, "learning_rate": 0.0010404653371156513, "loss": 0.1595, "step": 50508 }, { "epoch": 0.089558404899382, "grad_norm": 0.375, "learning_rate": 0.0010404059489603757, "loss": 0.1687, "step": 50510 }, { "epoch": 0.08956195106469181, "grad_norm": 0.82421875, "learning_rate": 0.0010403465610657294, "loss": 0.2413, "step": 50512 }, { "epoch": 0.08956549723000162, "grad_norm": 0.96875, "learning_rate": 0.0010402871734319725, "loss": 0.2075, "step": 50514 }, { "epoch": 0.08956904339531144, "grad_norm": 0.1630859375, "learning_rate": 0.0010402277860593653, "loss": 0.1366, "step": 50516 }, { "epoch": 0.08957258956062125, "grad_norm": 0.29296875, "learning_rate": 0.001040168398948167, "loss": 0.177, "step": 50518 }, { "epoch": 0.08957613572593107, "grad_norm": 0.478515625, "learning_rate": 0.0010401090120986372, "loss": 0.202, "step": 50520 }, { "epoch": 0.08957968189124088, "grad_norm": 0.255859375, "learning_rate": 0.0010400496255110357, "loss": 0.1906, "step": 50522 }, { "epoch": 0.0895832280565507, "grad_norm": 0.201171875, "learning_rate": 0.0010399902391856222, "loss": 0.2022, "step": 50524 }, { "epoch": 0.08958677422186051, "grad_norm": 0.263671875, "learning_rate": 0.0010399308531226566, "loss": 0.1698, "step": 50526 }, { "epoch": 0.08959032038717032, "grad_norm": 0.4765625, "learning_rate": 0.0010398714673223988, "loss": 0.2413, "step": 50528 }, { "epoch": 0.08959386655248014, "grad_norm": 0.421875, "learning_rate": 0.0010398120817851082, "loss": 0.16, "step": 50530 }, { "epoch": 0.08959741271778995, "grad_norm": 0.796875, "learning_rate": 0.0010397526965110445, "loss": 0.1873, "step": 50532 }, { "epoch": 0.08960095888309977, "grad_norm": 0.28125, "learning_rate": 0.0010396933115004675, "loss": 0.1646, "step": 50534 }, { "epoch": 0.08960450504840958, "grad_norm": 1.03125, "learning_rate": 0.001039633926753637, "loss": 0.188, "step": 50536 }, { "epoch": 0.08960805121371941, "grad_norm": 0.68359375, "learning_rate": 0.0010395745422708128, "loss": 0.1642, "step": 50538 }, { "epoch": 0.08961159737902923, "grad_norm": 0.349609375, "learning_rate": 0.0010395151580522542, "loss": 0.2082, "step": 50540 }, { "epoch": 0.08961514354433904, "grad_norm": 0.427734375, "learning_rate": 0.0010394557740982212, "loss": 0.1679, "step": 50542 }, { "epoch": 0.08961868970964885, "grad_norm": 0.384765625, "learning_rate": 0.0010393963904089735, "loss": 0.207, "step": 50544 }, { "epoch": 0.08962223587495867, "grad_norm": 0.173828125, "learning_rate": 0.0010393370069847708, "loss": 0.1925, "step": 50546 }, { "epoch": 0.08962578204026848, "grad_norm": 0.294921875, "learning_rate": 0.0010392776238258728, "loss": 0.1916, "step": 50548 }, { "epoch": 0.0896293282055783, "grad_norm": 0.2001953125, "learning_rate": 0.001039218240932539, "loss": 0.182, "step": 50550 }, { "epoch": 0.08963287437088811, "grad_norm": 0.291015625, "learning_rate": 0.0010391588583050298, "loss": 0.2104, "step": 50552 }, { "epoch": 0.08963642053619793, "grad_norm": 0.1376953125, "learning_rate": 0.001039099475943604, "loss": 0.1605, "step": 50554 }, { "epoch": 0.08963996670150774, "grad_norm": 0.5546875, "learning_rate": 0.0010390400938485218, "loss": 0.138, "step": 50556 }, { "epoch": 0.08964351286681756, "grad_norm": 0.49609375, "learning_rate": 0.0010389807120200426, "loss": 0.2131, "step": 50558 }, { "epoch": 0.08964705903212737, "grad_norm": 0.5546875, "learning_rate": 0.001038921330458427, "loss": 0.231, "step": 50560 }, { "epoch": 0.08965060519743719, "grad_norm": 0.455078125, "learning_rate": 0.0010388619491639335, "loss": 0.2464, "step": 50562 }, { "epoch": 0.089654151362747, "grad_norm": 0.4453125, "learning_rate": 0.0010388025681368222, "loss": 0.1848, "step": 50564 }, { "epoch": 0.08965769752805682, "grad_norm": 1.15625, "learning_rate": 0.001038743187377353, "loss": 0.2336, "step": 50566 }, { "epoch": 0.08966124369336663, "grad_norm": 0.421875, "learning_rate": 0.001038683806885786, "loss": 0.2423, "step": 50568 }, { "epoch": 0.08966478985867644, "grad_norm": 0.46875, "learning_rate": 0.0010386244266623798, "loss": 0.1474, "step": 50570 }, { "epoch": 0.08966833602398626, "grad_norm": 0.7421875, "learning_rate": 0.001038565046707395, "loss": 0.3463, "step": 50572 }, { "epoch": 0.08967188218929607, "grad_norm": 0.453125, "learning_rate": 0.001038505667021091, "loss": 0.1958, "step": 50574 }, { "epoch": 0.08967542835460589, "grad_norm": 0.234375, "learning_rate": 0.0010384462876037275, "loss": 0.1677, "step": 50576 }, { "epoch": 0.0896789745199157, "grad_norm": 0.416015625, "learning_rate": 0.0010383869084555643, "loss": 0.1937, "step": 50578 }, { "epoch": 0.08968252068522552, "grad_norm": 0.228515625, "learning_rate": 0.0010383275295768608, "loss": 0.3057, "step": 50580 }, { "epoch": 0.08968606685053533, "grad_norm": 0.326171875, "learning_rate": 0.0010382681509678766, "loss": 0.1862, "step": 50582 }, { "epoch": 0.08968961301584516, "grad_norm": 0.236328125, "learning_rate": 0.0010382087726288722, "loss": 0.1467, "step": 50584 }, { "epoch": 0.08969315918115497, "grad_norm": 1.125, "learning_rate": 0.0010381493945601063, "loss": 0.2009, "step": 50586 }, { "epoch": 0.08969670534646479, "grad_norm": 0.439453125, "learning_rate": 0.0010380900167618393, "loss": 0.1856, "step": 50588 }, { "epoch": 0.0897002515117746, "grad_norm": 0.76171875, "learning_rate": 0.0010380306392343305, "loss": 0.2699, "step": 50590 }, { "epoch": 0.08970379767708442, "grad_norm": 0.283203125, "learning_rate": 0.0010379712619778398, "loss": 0.183, "step": 50592 }, { "epoch": 0.08970734384239423, "grad_norm": 0.546875, "learning_rate": 0.0010379118849926266, "loss": 0.2379, "step": 50594 }, { "epoch": 0.08971089000770405, "grad_norm": 0.32421875, "learning_rate": 0.0010378525082789507, "loss": 0.2138, "step": 50596 }, { "epoch": 0.08971443617301386, "grad_norm": 0.94140625, "learning_rate": 0.0010377931318370718, "loss": 0.1792, "step": 50598 }, { "epoch": 0.08971798233832368, "grad_norm": 2.0625, "learning_rate": 0.0010377337556672502, "loss": 0.1703, "step": 50600 }, { "epoch": 0.08972152850363349, "grad_norm": 2.21875, "learning_rate": 0.0010376743797697442, "loss": 0.3501, "step": 50602 }, { "epoch": 0.0897250746689433, "grad_norm": 0.64453125, "learning_rate": 0.0010376150041448145, "loss": 0.163, "step": 50604 }, { "epoch": 0.08972862083425312, "grad_norm": 0.6796875, "learning_rate": 0.0010375556287927205, "loss": 0.1659, "step": 50606 }, { "epoch": 0.08973216699956293, "grad_norm": 0.1787109375, "learning_rate": 0.0010374962537137223, "loss": 0.1804, "step": 50608 }, { "epoch": 0.08973571316487275, "grad_norm": 0.2255859375, "learning_rate": 0.0010374368789080788, "loss": 0.1774, "step": 50610 }, { "epoch": 0.08973925933018256, "grad_norm": 0.287109375, "learning_rate": 0.0010373775043760502, "loss": 0.1487, "step": 50612 }, { "epoch": 0.08974280549549238, "grad_norm": 0.458984375, "learning_rate": 0.001037318130117896, "loss": 0.1316, "step": 50614 }, { "epoch": 0.08974635166080219, "grad_norm": 0.7109375, "learning_rate": 0.0010372587561338757, "loss": 0.2364, "step": 50616 }, { "epoch": 0.08974989782611201, "grad_norm": 1.2734375, "learning_rate": 0.0010371993824242495, "loss": 0.1456, "step": 50618 }, { "epoch": 0.08975344399142182, "grad_norm": 1.5703125, "learning_rate": 0.0010371400089892765, "loss": 0.1865, "step": 50620 }, { "epoch": 0.08975699015673164, "grad_norm": 0.33984375, "learning_rate": 0.0010370806358292164, "loss": 0.2837, "step": 50622 }, { "epoch": 0.08976053632204145, "grad_norm": 0.5, "learning_rate": 0.0010370212629443292, "loss": 0.1972, "step": 50624 }, { "epoch": 0.08976408248735127, "grad_norm": 0.18359375, "learning_rate": 0.0010369618903348747, "loss": 0.1454, "step": 50626 }, { "epoch": 0.08976762865266108, "grad_norm": 0.1884765625, "learning_rate": 0.001036902518001112, "loss": 0.1344, "step": 50628 }, { "epoch": 0.08977117481797091, "grad_norm": 0.52734375, "learning_rate": 0.0010368431459433012, "loss": 0.2156, "step": 50630 }, { "epoch": 0.08977472098328072, "grad_norm": 0.2421875, "learning_rate": 0.0010367837741617016, "loss": 0.18, "step": 50632 }, { "epoch": 0.08977826714859054, "grad_norm": 0.2490234375, "learning_rate": 0.0010367244026565734, "loss": 0.151, "step": 50634 }, { "epoch": 0.08978181331390035, "grad_norm": 0.29296875, "learning_rate": 0.0010366650314281756, "loss": 0.1473, "step": 50636 }, { "epoch": 0.08978535947921017, "grad_norm": 0.2470703125, "learning_rate": 0.0010366056604767682, "loss": 0.1516, "step": 50638 }, { "epoch": 0.08978890564451998, "grad_norm": 0.6015625, "learning_rate": 0.0010365462898026105, "loss": 0.1877, "step": 50640 }, { "epoch": 0.0897924518098298, "grad_norm": 0.1484375, "learning_rate": 0.0010364869194059631, "loss": 0.1217, "step": 50642 }, { "epoch": 0.08979599797513961, "grad_norm": 0.7890625, "learning_rate": 0.0010364275492870847, "loss": 0.22, "step": 50644 }, { "epoch": 0.08979954414044942, "grad_norm": 0.435546875, "learning_rate": 0.0010363681794462352, "loss": 0.1795, "step": 50646 }, { "epoch": 0.08980309030575924, "grad_norm": 0.7265625, "learning_rate": 0.0010363088098836743, "loss": 0.2359, "step": 50648 }, { "epoch": 0.08980663647106905, "grad_norm": 0.51171875, "learning_rate": 0.0010362494405996622, "loss": 0.1804, "step": 50650 }, { "epoch": 0.08981018263637887, "grad_norm": 0.2890625, "learning_rate": 0.0010361900715944573, "loss": 0.1972, "step": 50652 }, { "epoch": 0.08981372880168868, "grad_norm": 0.8359375, "learning_rate": 0.0010361307028683205, "loss": 0.2126, "step": 50654 }, { "epoch": 0.0898172749669985, "grad_norm": 0.365234375, "learning_rate": 0.0010360713344215104, "loss": 0.16, "step": 50656 }, { "epoch": 0.08982082113230831, "grad_norm": 0.251953125, "learning_rate": 0.0010360119662542878, "loss": 0.1577, "step": 50658 }, { "epoch": 0.08982436729761813, "grad_norm": 0.345703125, "learning_rate": 0.001035952598366911, "loss": 0.2015, "step": 50660 }, { "epoch": 0.08982791346292794, "grad_norm": 0.396484375, "learning_rate": 0.0010358932307596407, "loss": 0.1888, "step": 50662 }, { "epoch": 0.08983145962823776, "grad_norm": 0.90234375, "learning_rate": 0.001035833863432736, "loss": 0.1997, "step": 50664 }, { "epoch": 0.08983500579354757, "grad_norm": 0.2353515625, "learning_rate": 0.001035774496386457, "loss": 0.2102, "step": 50666 }, { "epoch": 0.08983855195885738, "grad_norm": 0.21484375, "learning_rate": 0.001035715129621063, "loss": 0.1532, "step": 50668 }, { "epoch": 0.0898420981241672, "grad_norm": 0.51953125, "learning_rate": 0.0010356557631368136, "loss": 0.1892, "step": 50670 }, { "epoch": 0.08984564428947701, "grad_norm": 0.408203125, "learning_rate": 0.0010355963969339683, "loss": 0.172, "step": 50672 }, { "epoch": 0.08984919045478683, "grad_norm": 0.54296875, "learning_rate": 0.0010355370310127874, "loss": 0.2102, "step": 50674 }, { "epoch": 0.08985273662009666, "grad_norm": 0.6953125, "learning_rate": 0.0010354776653735296, "loss": 0.3479, "step": 50676 }, { "epoch": 0.08985628278540647, "grad_norm": 0.625, "learning_rate": 0.0010354183000164552, "loss": 0.2387, "step": 50678 }, { "epoch": 0.08985982895071629, "grad_norm": 0.228515625, "learning_rate": 0.0010353589349418235, "loss": 0.1367, "step": 50680 }, { "epoch": 0.0898633751160261, "grad_norm": 0.3828125, "learning_rate": 0.0010352995701498947, "loss": 0.143, "step": 50682 }, { "epoch": 0.08986692128133592, "grad_norm": 1.2734375, "learning_rate": 0.0010352402056409277, "loss": 0.1696, "step": 50684 }, { "epoch": 0.08987046744664573, "grad_norm": 1.65625, "learning_rate": 0.0010351808414151824, "loss": 0.2328, "step": 50686 }, { "epoch": 0.08987401361195554, "grad_norm": 0.46484375, "learning_rate": 0.0010351214774729184, "loss": 0.1468, "step": 50688 }, { "epoch": 0.08987755977726536, "grad_norm": 3.53125, "learning_rate": 0.0010350621138143952, "loss": 0.2135, "step": 50690 }, { "epoch": 0.08988110594257517, "grad_norm": 0.5234375, "learning_rate": 0.0010350027504398731, "loss": 0.1456, "step": 50692 }, { "epoch": 0.08988465210788499, "grad_norm": 0.82421875, "learning_rate": 0.0010349433873496111, "loss": 0.1881, "step": 50694 }, { "epoch": 0.0898881982731948, "grad_norm": 0.67578125, "learning_rate": 0.0010348840245438687, "loss": 0.2131, "step": 50696 }, { "epoch": 0.08989174443850462, "grad_norm": 0.3984375, "learning_rate": 0.0010348246620229056, "loss": 0.1645, "step": 50698 }, { "epoch": 0.08989529060381443, "grad_norm": 1.9296875, "learning_rate": 0.0010347652997869821, "loss": 0.3964, "step": 50700 }, { "epoch": 0.08989883676912425, "grad_norm": 0.248046875, "learning_rate": 0.001034705937836357, "loss": 0.2239, "step": 50702 }, { "epoch": 0.08990238293443406, "grad_norm": 0.7578125, "learning_rate": 0.0010346465761712901, "loss": 0.332, "step": 50704 }, { "epoch": 0.08990592909974388, "grad_norm": 0.44140625, "learning_rate": 0.0010345872147920414, "loss": 0.1792, "step": 50706 }, { "epoch": 0.08990947526505369, "grad_norm": 0.39453125, "learning_rate": 0.0010345278536988702, "loss": 0.1854, "step": 50708 }, { "epoch": 0.0899130214303635, "grad_norm": 0.25, "learning_rate": 0.0010344684928920358, "loss": 0.2102, "step": 50710 }, { "epoch": 0.08991656759567332, "grad_norm": 0.77734375, "learning_rate": 0.0010344091323717984, "loss": 0.1576, "step": 50712 }, { "epoch": 0.08992011376098313, "grad_norm": 0.314453125, "learning_rate": 0.0010343497721384172, "loss": 0.1354, "step": 50714 }, { "epoch": 0.08992365992629295, "grad_norm": 0.7265625, "learning_rate": 0.0010342904121921521, "loss": 0.1761, "step": 50716 }, { "epoch": 0.08992720609160276, "grad_norm": 0.34375, "learning_rate": 0.0010342310525332625, "loss": 0.1894, "step": 50718 }, { "epoch": 0.08993075225691259, "grad_norm": 0.609375, "learning_rate": 0.001034171693162008, "loss": 0.1511, "step": 50720 }, { "epoch": 0.0899342984222224, "grad_norm": 0.640625, "learning_rate": 0.0010341123340786484, "loss": 0.212, "step": 50722 }, { "epoch": 0.08993784458753222, "grad_norm": 0.5, "learning_rate": 0.0010340529752834433, "loss": 0.2009, "step": 50724 }, { "epoch": 0.08994139075284203, "grad_norm": 0.9921875, "learning_rate": 0.001033993616776652, "loss": 0.2469, "step": 50726 }, { "epoch": 0.08994493691815185, "grad_norm": 0.265625, "learning_rate": 0.0010339342585585342, "loss": 0.1376, "step": 50728 }, { "epoch": 0.08994848308346166, "grad_norm": 0.30859375, "learning_rate": 0.0010338749006293497, "loss": 0.1615, "step": 50730 }, { "epoch": 0.08995202924877148, "grad_norm": 0.310546875, "learning_rate": 0.0010338155429893583, "loss": 0.1715, "step": 50732 }, { "epoch": 0.08995557541408129, "grad_norm": 1.0625, "learning_rate": 0.0010337561856388188, "loss": 0.2357, "step": 50734 }, { "epoch": 0.08995912157939111, "grad_norm": 0.451171875, "learning_rate": 0.0010336968285779913, "loss": 0.2034, "step": 50736 }, { "epoch": 0.08996266774470092, "grad_norm": 0.50390625, "learning_rate": 0.0010336374718071353, "loss": 0.2218, "step": 50738 }, { "epoch": 0.08996621391001074, "grad_norm": 0.2578125, "learning_rate": 0.001033578115326511, "loss": 0.1674, "step": 50740 }, { "epoch": 0.08996976007532055, "grad_norm": 1.9765625, "learning_rate": 0.0010335187591363768, "loss": 0.3644, "step": 50742 }, { "epoch": 0.08997330624063037, "grad_norm": 0.80859375, "learning_rate": 0.0010334594032369933, "loss": 0.1878, "step": 50744 }, { "epoch": 0.08997685240594018, "grad_norm": 0.53125, "learning_rate": 0.0010334000476286197, "loss": 0.2757, "step": 50746 }, { "epoch": 0.08998039857125, "grad_norm": 0.2177734375, "learning_rate": 0.0010333406923115156, "loss": 0.1991, "step": 50748 }, { "epoch": 0.08998394473655981, "grad_norm": 0.89453125, "learning_rate": 0.0010332813372859403, "loss": 0.162, "step": 50750 }, { "epoch": 0.08998749090186962, "grad_norm": 0.236328125, "learning_rate": 0.0010332219825521538, "loss": 0.2076, "step": 50752 }, { "epoch": 0.08999103706717944, "grad_norm": 0.2392578125, "learning_rate": 0.0010331626281104157, "loss": 0.1327, "step": 50754 }, { "epoch": 0.08999458323248925, "grad_norm": 0.55078125, "learning_rate": 0.0010331032739609857, "loss": 0.1425, "step": 50756 }, { "epoch": 0.08999812939779907, "grad_norm": 0.29296875, "learning_rate": 0.0010330439201041226, "loss": 0.1912, "step": 50758 }, { "epoch": 0.09000167556310888, "grad_norm": 0.40234375, "learning_rate": 0.0010329845665400868, "loss": 0.1408, "step": 50760 }, { "epoch": 0.0900052217284187, "grad_norm": 0.390625, "learning_rate": 0.0010329252132691375, "loss": 0.1961, "step": 50762 }, { "epoch": 0.09000876789372851, "grad_norm": 0.296875, "learning_rate": 0.0010328658602915344, "loss": 0.1686, "step": 50764 }, { "epoch": 0.09001231405903834, "grad_norm": 0.5078125, "learning_rate": 0.0010328065076075368, "loss": 0.1847, "step": 50766 }, { "epoch": 0.09001586022434815, "grad_norm": 0.50390625, "learning_rate": 0.0010327471552174046, "loss": 0.2371, "step": 50768 }, { "epoch": 0.09001940638965797, "grad_norm": 1.21875, "learning_rate": 0.001032687803121397, "loss": 0.1947, "step": 50770 }, { "epoch": 0.09002295255496778, "grad_norm": 2.109375, "learning_rate": 0.0010326284513197744, "loss": 0.2728, "step": 50772 }, { "epoch": 0.0900264987202776, "grad_norm": 0.51953125, "learning_rate": 0.0010325690998127957, "loss": 0.2154, "step": 50774 }, { "epoch": 0.09003004488558741, "grad_norm": 0.2119140625, "learning_rate": 0.0010325097486007205, "loss": 0.195, "step": 50776 }, { "epoch": 0.09003359105089723, "grad_norm": 0.5390625, "learning_rate": 0.0010324503976838085, "loss": 0.2231, "step": 50778 }, { "epoch": 0.09003713721620704, "grad_norm": 0.81640625, "learning_rate": 0.001032391047062319, "loss": 0.1571, "step": 50780 }, { "epoch": 0.09004068338151686, "grad_norm": 0.2890625, "learning_rate": 0.0010323316967365125, "loss": 0.1797, "step": 50782 }, { "epoch": 0.09004422954682667, "grad_norm": 0.177734375, "learning_rate": 0.001032272346706647, "loss": 0.1748, "step": 50784 }, { "epoch": 0.09004777571213649, "grad_norm": 0.435546875, "learning_rate": 0.0010322129969729831, "loss": 0.1398, "step": 50786 }, { "epoch": 0.0900513218774463, "grad_norm": 0.26953125, "learning_rate": 0.0010321536475357803, "loss": 0.162, "step": 50788 }, { "epoch": 0.09005486804275611, "grad_norm": 0.44140625, "learning_rate": 0.0010320942983952984, "loss": 0.175, "step": 50790 }, { "epoch": 0.09005841420806593, "grad_norm": 0.419921875, "learning_rate": 0.001032034949551796, "loss": 0.1794, "step": 50792 }, { "epoch": 0.09006196037337574, "grad_norm": 0.232421875, "learning_rate": 0.0010319756010055334, "loss": 0.1494, "step": 50794 }, { "epoch": 0.09006550653868556, "grad_norm": 0.3984375, "learning_rate": 0.0010319162527567703, "loss": 0.1682, "step": 50796 }, { "epoch": 0.09006905270399537, "grad_norm": 0.197265625, "learning_rate": 0.0010318569048057657, "loss": 0.134, "step": 50798 }, { "epoch": 0.09007259886930519, "grad_norm": 0.71484375, "learning_rate": 0.0010317975571527795, "loss": 0.2317, "step": 50800 }, { "epoch": 0.090076145034615, "grad_norm": 0.265625, "learning_rate": 0.0010317382097980712, "loss": 0.2225, "step": 50802 }, { "epoch": 0.09007969119992482, "grad_norm": 0.3359375, "learning_rate": 0.0010316788627419001, "loss": 0.1519, "step": 50804 }, { "epoch": 0.09008323736523463, "grad_norm": 0.43359375, "learning_rate": 0.0010316195159845265, "loss": 0.2082, "step": 50806 }, { "epoch": 0.09008678353054445, "grad_norm": 0.54296875, "learning_rate": 0.001031560169526209, "loss": 0.5227, "step": 50808 }, { "epoch": 0.09009032969585426, "grad_norm": 1.2109375, "learning_rate": 0.0010315008233672074, "loss": 0.1751, "step": 50810 }, { "epoch": 0.09009387586116409, "grad_norm": 1.1484375, "learning_rate": 0.0010314414775077815, "loss": 0.3788, "step": 50812 }, { "epoch": 0.0900974220264739, "grad_norm": 0.453125, "learning_rate": 0.001031382131948191, "loss": 0.2085, "step": 50814 }, { "epoch": 0.09010096819178372, "grad_norm": 0.52734375, "learning_rate": 0.0010313227866886952, "loss": 0.1746, "step": 50816 }, { "epoch": 0.09010451435709353, "grad_norm": 0.298828125, "learning_rate": 0.0010312634417295535, "loss": 0.2001, "step": 50818 }, { "epoch": 0.09010806052240335, "grad_norm": 0.5625, "learning_rate": 0.0010312040970710255, "loss": 0.1653, "step": 50820 }, { "epoch": 0.09011160668771316, "grad_norm": 0.5859375, "learning_rate": 0.0010311447527133713, "loss": 0.1641, "step": 50822 }, { "epoch": 0.09011515285302298, "grad_norm": 0.3203125, "learning_rate": 0.0010310854086568494, "loss": 0.1953, "step": 50824 }, { "epoch": 0.09011869901833279, "grad_norm": 1.046875, "learning_rate": 0.00103102606490172, "loss": 0.2612, "step": 50826 }, { "epoch": 0.0901222451836426, "grad_norm": 0.255859375, "learning_rate": 0.0010309667214482424, "loss": 0.1746, "step": 50828 }, { "epoch": 0.09012579134895242, "grad_norm": 0.4140625, "learning_rate": 0.0010309073782966767, "loss": 0.1586, "step": 50830 }, { "epoch": 0.09012933751426223, "grad_norm": 0.85546875, "learning_rate": 0.0010308480354472816, "loss": 0.1848, "step": 50832 }, { "epoch": 0.09013288367957205, "grad_norm": 0.1953125, "learning_rate": 0.0010307886929003171, "loss": 0.3749, "step": 50834 }, { "epoch": 0.09013642984488186, "grad_norm": 0.466796875, "learning_rate": 0.0010307293506560428, "loss": 0.1678, "step": 50836 }, { "epoch": 0.09013997601019168, "grad_norm": 0.265625, "learning_rate": 0.001030670008714718, "loss": 0.2123, "step": 50838 }, { "epoch": 0.09014352217550149, "grad_norm": 0.345703125, "learning_rate": 0.0010306106670766025, "loss": 0.171, "step": 50840 }, { "epoch": 0.0901470683408113, "grad_norm": 0.21484375, "learning_rate": 0.0010305513257419553, "loss": 0.1823, "step": 50842 }, { "epoch": 0.09015061450612112, "grad_norm": 0.4140625, "learning_rate": 0.0010304919847110364, "loss": 0.1183, "step": 50844 }, { "epoch": 0.09015416067143094, "grad_norm": 0.84765625, "learning_rate": 0.0010304326439841054, "loss": 0.1716, "step": 50846 }, { "epoch": 0.09015770683674075, "grad_norm": 0.326171875, "learning_rate": 0.0010303733035614212, "loss": 0.128, "step": 50848 }, { "epoch": 0.09016125300205056, "grad_norm": 0.255859375, "learning_rate": 0.001030313963443244, "loss": 0.1566, "step": 50850 }, { "epoch": 0.09016479916736038, "grad_norm": 0.73828125, "learning_rate": 0.001030254623629833, "loss": 0.4804, "step": 50852 }, { "epoch": 0.0901683453326702, "grad_norm": 0.291015625, "learning_rate": 0.0010301952841214477, "loss": 0.2436, "step": 50854 }, { "epoch": 0.09017189149798002, "grad_norm": 0.30859375, "learning_rate": 0.0010301359449183482, "loss": 0.1963, "step": 50856 }, { "epoch": 0.09017543766328984, "grad_norm": 0.25390625, "learning_rate": 0.0010300766060207926, "loss": 0.1368, "step": 50858 }, { "epoch": 0.09017898382859965, "grad_norm": 0.25, "learning_rate": 0.0010300172674290416, "loss": 0.1681, "step": 50860 }, { "epoch": 0.09018252999390947, "grad_norm": 1.1171875, "learning_rate": 0.0010299579291433545, "loss": 0.2636, "step": 50862 }, { "epoch": 0.09018607615921928, "grad_norm": 0.69140625, "learning_rate": 0.001029898591163991, "loss": 0.2084, "step": 50864 }, { "epoch": 0.0901896223245291, "grad_norm": 0.62109375, "learning_rate": 0.00102983925349121, "loss": 0.1665, "step": 50866 }, { "epoch": 0.09019316848983891, "grad_norm": 0.57421875, "learning_rate": 0.0010297799161252715, "loss": 0.1517, "step": 50868 }, { "epoch": 0.09019671465514872, "grad_norm": 0.55859375, "learning_rate": 0.0010297205790664348, "loss": 0.1803, "step": 50870 }, { "epoch": 0.09020026082045854, "grad_norm": 0.44140625, "learning_rate": 0.0010296612423149596, "loss": 0.1957, "step": 50872 }, { "epoch": 0.09020380698576835, "grad_norm": 0.5078125, "learning_rate": 0.0010296019058711053, "loss": 0.1568, "step": 50874 }, { "epoch": 0.09020735315107817, "grad_norm": 0.224609375, "learning_rate": 0.001029542569735131, "loss": 0.1516, "step": 50876 }, { "epoch": 0.09021089931638798, "grad_norm": 0.671875, "learning_rate": 0.0010294832339072968, "loss": 0.2323, "step": 50878 }, { "epoch": 0.0902144454816978, "grad_norm": 0.388671875, "learning_rate": 0.0010294238983878624, "loss": 0.2726, "step": 50880 }, { "epoch": 0.09021799164700761, "grad_norm": 0.298828125, "learning_rate": 0.001029364563177086, "loss": 0.1681, "step": 50882 }, { "epoch": 0.09022153781231743, "grad_norm": 0.26171875, "learning_rate": 0.0010293052282752285, "loss": 0.1489, "step": 50884 }, { "epoch": 0.09022508397762724, "grad_norm": 0.39453125, "learning_rate": 0.0010292458936825486, "loss": 0.173, "step": 50886 }, { "epoch": 0.09022863014293706, "grad_norm": 0.302734375, "learning_rate": 0.0010291865593993067, "loss": 0.1401, "step": 50888 }, { "epoch": 0.09023217630824687, "grad_norm": 0.244140625, "learning_rate": 0.0010291272254257609, "loss": 0.1764, "step": 50890 }, { "epoch": 0.09023572247355668, "grad_norm": 0.41015625, "learning_rate": 0.001029067891762172, "loss": 0.1673, "step": 50892 }, { "epoch": 0.0902392686388665, "grad_norm": 0.95703125, "learning_rate": 0.0010290085584087985, "loss": 0.2154, "step": 50894 }, { "epoch": 0.09024281480417631, "grad_norm": 0.333984375, "learning_rate": 0.0010289492253659008, "loss": 0.1161, "step": 50896 }, { "epoch": 0.09024636096948613, "grad_norm": 0.26953125, "learning_rate": 0.0010288898926337376, "loss": 0.1702, "step": 50898 }, { "epoch": 0.09024990713479594, "grad_norm": 0.45703125, "learning_rate": 0.0010288305602125685, "loss": 0.2119, "step": 50900 }, { "epoch": 0.09025345330010577, "grad_norm": 0.5859375, "learning_rate": 0.0010287712281026532, "loss": 0.2084, "step": 50902 }, { "epoch": 0.09025699946541559, "grad_norm": 0.2294921875, "learning_rate": 0.0010287118963042518, "loss": 0.2736, "step": 50904 }, { "epoch": 0.0902605456307254, "grad_norm": 0.46484375, "learning_rate": 0.001028652564817623, "loss": 0.1303, "step": 50906 }, { "epoch": 0.09026409179603521, "grad_norm": 0.7734375, "learning_rate": 0.0010285932336430257, "loss": 0.205, "step": 50908 }, { "epoch": 0.09026763796134503, "grad_norm": 1.734375, "learning_rate": 0.0010285339027807207, "loss": 0.3623, "step": 50910 }, { "epoch": 0.09027118412665484, "grad_norm": 0.478515625, "learning_rate": 0.001028474572230967, "loss": 0.2784, "step": 50912 }, { "epoch": 0.09027473029196466, "grad_norm": 0.98828125, "learning_rate": 0.001028415241994024, "loss": 0.2094, "step": 50914 }, { "epoch": 0.09027827645727447, "grad_norm": 0.98046875, "learning_rate": 0.0010283559120701506, "loss": 0.1905, "step": 50916 }, { "epoch": 0.09028182262258429, "grad_norm": 1.1328125, "learning_rate": 0.0010282965824596072, "loss": 0.1606, "step": 50918 }, { "epoch": 0.0902853687878941, "grad_norm": 0.87890625, "learning_rate": 0.0010282372531626532, "loss": 0.1702, "step": 50920 }, { "epoch": 0.09028891495320392, "grad_norm": 0.84765625, "learning_rate": 0.0010281779241795473, "loss": 0.1903, "step": 50922 }, { "epoch": 0.09029246111851373, "grad_norm": 0.38671875, "learning_rate": 0.0010281185955105495, "loss": 0.1932, "step": 50924 }, { "epoch": 0.09029600728382355, "grad_norm": 1.1015625, "learning_rate": 0.001028059267155919, "loss": 0.1906, "step": 50926 }, { "epoch": 0.09029955344913336, "grad_norm": 0.60546875, "learning_rate": 0.001027999939115916, "loss": 0.1781, "step": 50928 }, { "epoch": 0.09030309961444317, "grad_norm": 0.439453125, "learning_rate": 0.0010279406113907993, "loss": 0.2086, "step": 50930 }, { "epoch": 0.09030664577975299, "grad_norm": 0.28125, "learning_rate": 0.0010278812839808285, "loss": 0.1628, "step": 50932 }, { "epoch": 0.0903101919450628, "grad_norm": 0.326171875, "learning_rate": 0.0010278219568862629, "loss": 0.1582, "step": 50934 }, { "epoch": 0.09031373811037262, "grad_norm": 1.3515625, "learning_rate": 0.0010277626301073623, "loss": 0.2642, "step": 50936 }, { "epoch": 0.09031728427568243, "grad_norm": 0.1943359375, "learning_rate": 0.001027703303644386, "loss": 0.1405, "step": 50938 }, { "epoch": 0.09032083044099225, "grad_norm": 0.462890625, "learning_rate": 0.0010276439774975932, "loss": 0.2063, "step": 50940 }, { "epoch": 0.09032437660630206, "grad_norm": 0.765625, "learning_rate": 0.0010275846516672436, "loss": 0.1849, "step": 50942 }, { "epoch": 0.09032792277161188, "grad_norm": 0.67578125, "learning_rate": 0.001027525326153597, "loss": 0.2219, "step": 50944 }, { "epoch": 0.09033146893692169, "grad_norm": 1.453125, "learning_rate": 0.0010274660009569124, "loss": 0.2107, "step": 50946 }, { "epoch": 0.09033501510223152, "grad_norm": 1.890625, "learning_rate": 0.0010274066760774495, "loss": 0.2909, "step": 50948 }, { "epoch": 0.09033856126754133, "grad_norm": 0.58984375, "learning_rate": 0.0010273473515154674, "loss": 0.5016, "step": 50950 }, { "epoch": 0.09034210743285115, "grad_norm": 0.3125, "learning_rate": 0.0010272880272712258, "loss": 0.2069, "step": 50952 }, { "epoch": 0.09034565359816096, "grad_norm": 0.55078125, "learning_rate": 0.0010272287033449843, "loss": 0.1372, "step": 50954 }, { "epoch": 0.09034919976347078, "grad_norm": 0.390625, "learning_rate": 0.0010271693797370021, "loss": 0.176, "step": 50956 }, { "epoch": 0.09035274592878059, "grad_norm": 0.27734375, "learning_rate": 0.0010271100564475386, "loss": 0.1474, "step": 50958 }, { "epoch": 0.0903562920940904, "grad_norm": 0.50390625, "learning_rate": 0.0010270507334768533, "loss": 0.2611, "step": 50960 }, { "epoch": 0.09035983825940022, "grad_norm": 0.2412109375, "learning_rate": 0.0010269914108252062, "loss": 0.1766, "step": 50962 }, { "epoch": 0.09036338442471004, "grad_norm": 0.408203125, "learning_rate": 0.001026932088492856, "loss": 0.1714, "step": 50964 }, { "epoch": 0.09036693059001985, "grad_norm": 0.54296875, "learning_rate": 0.0010268727664800624, "loss": 0.1964, "step": 50966 }, { "epoch": 0.09037047675532967, "grad_norm": 0.486328125, "learning_rate": 0.0010268134447870847, "loss": 0.1285, "step": 50968 }, { "epoch": 0.09037402292063948, "grad_norm": 0.419921875, "learning_rate": 0.001026754123414183, "loss": 0.1278, "step": 50970 }, { "epoch": 0.0903775690859493, "grad_norm": 1.7734375, "learning_rate": 0.0010266948023616154, "loss": 0.1815, "step": 50972 }, { "epoch": 0.09038111525125911, "grad_norm": 0.72265625, "learning_rate": 0.0010266354816296426, "loss": 0.2171, "step": 50974 }, { "epoch": 0.09038466141656892, "grad_norm": 1.6484375, "learning_rate": 0.0010265761612185233, "loss": 0.3476, "step": 50976 }, { "epoch": 0.09038820758187874, "grad_norm": 1.015625, "learning_rate": 0.0010265168411285179, "loss": 0.1675, "step": 50978 }, { "epoch": 0.09039175374718855, "grad_norm": 0.9453125, "learning_rate": 0.0010264575213598847, "loss": 0.1611, "step": 50980 }, { "epoch": 0.09039529991249837, "grad_norm": 0.28515625, "learning_rate": 0.0010263982019128833, "loss": 0.1489, "step": 50982 }, { "epoch": 0.09039884607780818, "grad_norm": 1.4375, "learning_rate": 0.0010263388827877738, "loss": 0.204, "step": 50984 }, { "epoch": 0.090402392243118, "grad_norm": 0.376953125, "learning_rate": 0.0010262795639848152, "loss": 0.1728, "step": 50986 }, { "epoch": 0.09040593840842781, "grad_norm": 0.3125, "learning_rate": 0.001026220245504267, "loss": 0.1824, "step": 50988 }, { "epoch": 0.09040948457373763, "grad_norm": 0.439453125, "learning_rate": 0.0010261609273463886, "loss": 0.2274, "step": 50990 }, { "epoch": 0.09041303073904745, "grad_norm": 0.326171875, "learning_rate": 0.0010261016095114391, "loss": 0.1612, "step": 50992 }, { "epoch": 0.09041657690435727, "grad_norm": 0.365234375, "learning_rate": 0.0010260422919996788, "loss": 0.2173, "step": 50994 }, { "epoch": 0.09042012306966708, "grad_norm": 0.73828125, "learning_rate": 0.0010259829748113661, "loss": 0.2359, "step": 50996 }, { "epoch": 0.0904236692349769, "grad_norm": 0.46875, "learning_rate": 0.0010259236579467609, "loss": 0.1701, "step": 50998 }, { "epoch": 0.09042721540028671, "grad_norm": 0.2412109375, "learning_rate": 0.0010258643414061226, "loss": 0.1177, "step": 51000 }, { "epoch": 0.09043076156559653, "grad_norm": 0.4140625, "learning_rate": 0.0010258050251897109, "loss": 0.2203, "step": 51002 }, { "epoch": 0.09043430773090634, "grad_norm": 0.96875, "learning_rate": 0.0010257457092977846, "loss": 0.3289, "step": 51004 }, { "epoch": 0.09043785389621616, "grad_norm": 0.6015625, "learning_rate": 0.0010256863937306035, "loss": 0.211, "step": 51006 }, { "epoch": 0.09044140006152597, "grad_norm": 0.69140625, "learning_rate": 0.001025627078488427, "loss": 0.2094, "step": 51008 }, { "epoch": 0.09044494622683578, "grad_norm": 0.70703125, "learning_rate": 0.0010255677635715148, "loss": 0.2003, "step": 51010 }, { "epoch": 0.0904484923921456, "grad_norm": 0.4375, "learning_rate": 0.0010255084489801255, "loss": 0.2107, "step": 51012 }, { "epoch": 0.09045203855745541, "grad_norm": 0.59375, "learning_rate": 0.001025449134714519, "loss": 0.2027, "step": 51014 }, { "epoch": 0.09045558472276523, "grad_norm": 0.578125, "learning_rate": 0.0010253898207749545, "loss": 0.2167, "step": 51016 }, { "epoch": 0.09045913088807504, "grad_norm": 0.7578125, "learning_rate": 0.0010253305071616923, "loss": 0.2334, "step": 51018 }, { "epoch": 0.09046267705338486, "grad_norm": 0.29296875, "learning_rate": 0.0010252711938749904, "loss": 0.2345, "step": 51020 }, { "epoch": 0.09046622321869467, "grad_norm": 2.640625, "learning_rate": 0.001025211880915109, "loss": 0.3669, "step": 51022 }, { "epoch": 0.09046976938400449, "grad_norm": 0.8203125, "learning_rate": 0.0010251525682823078, "loss": 0.1785, "step": 51024 }, { "epoch": 0.0904733155493143, "grad_norm": 0.28515625, "learning_rate": 0.0010250932559768453, "loss": 0.1655, "step": 51026 }, { "epoch": 0.09047686171462412, "grad_norm": 0.248046875, "learning_rate": 0.001025033943998982, "loss": 0.1629, "step": 51028 }, { "epoch": 0.09048040787993393, "grad_norm": 0.8203125, "learning_rate": 0.001024974632348976, "loss": 0.1996, "step": 51030 }, { "epoch": 0.09048395404524374, "grad_norm": 1.3203125, "learning_rate": 0.0010249153210270877, "loss": 0.2564, "step": 51032 }, { "epoch": 0.09048750021055356, "grad_norm": 0.484375, "learning_rate": 0.001024856010033576, "loss": 0.151, "step": 51034 }, { "epoch": 0.09049104637586337, "grad_norm": 0.92578125, "learning_rate": 0.0010247966993687009, "loss": 0.1636, "step": 51036 }, { "epoch": 0.0904945925411732, "grad_norm": 0.765625, "learning_rate": 0.0010247373890327207, "loss": 0.1602, "step": 51038 }, { "epoch": 0.09049813870648302, "grad_norm": 0.859375, "learning_rate": 0.001024678079025896, "loss": 0.2115, "step": 51040 }, { "epoch": 0.09050168487179283, "grad_norm": 0.4765625, "learning_rate": 0.0010246187693484855, "loss": 0.1527, "step": 51042 }, { "epoch": 0.09050523103710265, "grad_norm": 0.625, "learning_rate": 0.0010245594600007488, "loss": 0.1852, "step": 51044 }, { "epoch": 0.09050877720241246, "grad_norm": 0.22265625, "learning_rate": 0.001024500150982945, "loss": 0.1744, "step": 51046 }, { "epoch": 0.09051232336772227, "grad_norm": 0.66796875, "learning_rate": 0.0010244408422953335, "loss": 0.2131, "step": 51048 }, { "epoch": 0.09051586953303209, "grad_norm": 0.31640625, "learning_rate": 0.001024381533938174, "loss": 0.1335, "step": 51050 }, { "epoch": 0.0905194156983419, "grad_norm": 0.54296875, "learning_rate": 0.0010243222259117262, "loss": 0.3889, "step": 51052 }, { "epoch": 0.09052296186365172, "grad_norm": 2.359375, "learning_rate": 0.0010242629182162487, "loss": 0.2579, "step": 51054 }, { "epoch": 0.09052650802896153, "grad_norm": 1.2109375, "learning_rate": 0.001024203610852001, "loss": 0.1554, "step": 51056 }, { "epoch": 0.09053005419427135, "grad_norm": 0.6484375, "learning_rate": 0.0010241443038192428, "loss": 0.1996, "step": 51058 }, { "epoch": 0.09053360035958116, "grad_norm": 0.41796875, "learning_rate": 0.0010240849971182337, "loss": 0.1771, "step": 51060 }, { "epoch": 0.09053714652489098, "grad_norm": 0.2490234375, "learning_rate": 0.0010240256907492321, "loss": 0.2043, "step": 51062 }, { "epoch": 0.09054069269020079, "grad_norm": 0.376953125, "learning_rate": 0.0010239663847124985, "loss": 0.4302, "step": 51064 }, { "epoch": 0.0905442388555106, "grad_norm": 0.77734375, "learning_rate": 0.0010239070790082913, "loss": 0.2064, "step": 51066 }, { "epoch": 0.09054778502082042, "grad_norm": 0.275390625, "learning_rate": 0.001023847773636871, "loss": 0.1725, "step": 51068 }, { "epoch": 0.09055133118613024, "grad_norm": 0.94921875, "learning_rate": 0.0010237884685984958, "loss": 0.2725, "step": 51070 }, { "epoch": 0.09055487735144005, "grad_norm": 0.318359375, "learning_rate": 0.0010237291638934257, "loss": 0.1655, "step": 51072 }, { "epoch": 0.09055842351674986, "grad_norm": 0.7890625, "learning_rate": 0.0010236698595219198, "loss": 0.2466, "step": 51074 }, { "epoch": 0.09056196968205968, "grad_norm": 1.2734375, "learning_rate": 0.001023610555484238, "loss": 0.3087, "step": 51076 }, { "epoch": 0.0905655158473695, "grad_norm": 0.66796875, "learning_rate": 0.0010235512517806388, "loss": 0.2527, "step": 51078 }, { "epoch": 0.09056906201267931, "grad_norm": 0.45703125, "learning_rate": 0.0010234919484113823, "loss": 0.2252, "step": 51080 }, { "epoch": 0.09057260817798912, "grad_norm": 0.451171875, "learning_rate": 0.0010234326453767272, "loss": 0.287, "step": 51082 }, { "epoch": 0.09057615434329895, "grad_norm": 0.361328125, "learning_rate": 0.001023373342676934, "loss": 0.1724, "step": 51084 }, { "epoch": 0.09057970050860877, "grad_norm": 0.458984375, "learning_rate": 0.0010233140403122607, "loss": 0.151, "step": 51086 }, { "epoch": 0.09058324667391858, "grad_norm": 0.314453125, "learning_rate": 0.0010232547382829672, "loss": 0.1842, "step": 51088 }, { "epoch": 0.0905867928392284, "grad_norm": 0.220703125, "learning_rate": 0.001023195436589313, "loss": 0.2379, "step": 51090 }, { "epoch": 0.09059033900453821, "grad_norm": 0.2080078125, "learning_rate": 0.0010231361352315575, "loss": 0.1468, "step": 51092 }, { "epoch": 0.09059388516984802, "grad_norm": 0.62890625, "learning_rate": 0.00102307683420996, "loss": 0.2301, "step": 51094 }, { "epoch": 0.09059743133515784, "grad_norm": 0.2275390625, "learning_rate": 0.0010230175335247792, "loss": 0.2504, "step": 51096 }, { "epoch": 0.09060097750046765, "grad_norm": 0.31640625, "learning_rate": 0.0010229582331762754, "loss": 0.1652, "step": 51098 }, { "epoch": 0.09060452366577747, "grad_norm": 0.404296875, "learning_rate": 0.0010228989331647076, "loss": 0.2142, "step": 51100 }, { "epoch": 0.09060806983108728, "grad_norm": 0.251953125, "learning_rate": 0.0010228396334903348, "loss": 0.1672, "step": 51102 }, { "epoch": 0.0906116159963971, "grad_norm": 0.33203125, "learning_rate": 0.001022780334153417, "loss": 0.246, "step": 51104 }, { "epoch": 0.09061516216170691, "grad_norm": 0.2099609375, "learning_rate": 0.0010227210351542127, "loss": 0.1337, "step": 51106 }, { "epoch": 0.09061870832701673, "grad_norm": 0.330078125, "learning_rate": 0.0010226617364929818, "loss": 0.1603, "step": 51108 }, { "epoch": 0.09062225449232654, "grad_norm": 0.224609375, "learning_rate": 0.0010226024381699839, "loss": 0.1638, "step": 51110 }, { "epoch": 0.09062580065763635, "grad_norm": 0.4609375, "learning_rate": 0.0010225431401854777, "loss": 0.1787, "step": 51112 }, { "epoch": 0.09062934682294617, "grad_norm": 0.69921875, "learning_rate": 0.001022483842539723, "loss": 0.2416, "step": 51114 }, { "epoch": 0.09063289298825598, "grad_norm": 1.90625, "learning_rate": 0.0010224245452329786, "loss": 0.2378, "step": 51116 }, { "epoch": 0.0906364391535658, "grad_norm": 0.455078125, "learning_rate": 0.0010223652482655046, "loss": 0.2795, "step": 51118 }, { "epoch": 0.09063998531887561, "grad_norm": 0.703125, "learning_rate": 0.0010223059516375596, "loss": 0.2909, "step": 51120 }, { "epoch": 0.09064353148418543, "grad_norm": 0.326171875, "learning_rate": 0.001022246655349403, "loss": 0.151, "step": 51122 }, { "epoch": 0.09064707764949524, "grad_norm": 0.263671875, "learning_rate": 0.0010221873594012949, "loss": 0.1226, "step": 51124 }, { "epoch": 0.09065062381480506, "grad_norm": 0.439453125, "learning_rate": 0.0010221280637934941, "loss": 0.178, "step": 51126 }, { "epoch": 0.09065416998011488, "grad_norm": 0.2490234375, "learning_rate": 0.0010220687685262597, "loss": 0.1576, "step": 51128 }, { "epoch": 0.0906577161454247, "grad_norm": 0.4296875, "learning_rate": 0.0010220094735998511, "loss": 0.1701, "step": 51130 }, { "epoch": 0.09066126231073451, "grad_norm": 1.140625, "learning_rate": 0.001021950179014528, "loss": 0.3283, "step": 51132 }, { "epoch": 0.09066480847604433, "grad_norm": 0.91015625, "learning_rate": 0.0010218908847705495, "loss": 0.3989, "step": 51134 }, { "epoch": 0.09066835464135414, "grad_norm": 0.56640625, "learning_rate": 0.0010218315908681749, "loss": 0.2503, "step": 51136 }, { "epoch": 0.09067190080666396, "grad_norm": 0.5703125, "learning_rate": 0.0010217722973076633, "loss": 0.158, "step": 51138 }, { "epoch": 0.09067544697197377, "grad_norm": 0.2578125, "learning_rate": 0.001021713004089274, "loss": 0.1457, "step": 51140 }, { "epoch": 0.09067899313728359, "grad_norm": 0.55859375, "learning_rate": 0.0010216537112132673, "loss": 0.1511, "step": 51142 }, { "epoch": 0.0906825393025934, "grad_norm": 0.55078125, "learning_rate": 0.0010215944186799013, "loss": 0.1872, "step": 51144 }, { "epoch": 0.09068608546790322, "grad_norm": 0.46484375, "learning_rate": 0.0010215351264894357, "loss": 0.217, "step": 51146 }, { "epoch": 0.09068963163321303, "grad_norm": 1.1015625, "learning_rate": 0.0010214758346421297, "loss": 0.2124, "step": 51148 }, { "epoch": 0.09069317779852284, "grad_norm": 0.328125, "learning_rate": 0.0010214165431382434, "loss": 0.2091, "step": 51150 }, { "epoch": 0.09069672396383266, "grad_norm": 0.65234375, "learning_rate": 0.0010213572519780349, "loss": 0.2218, "step": 51152 }, { "epoch": 0.09070027012914247, "grad_norm": 1.3046875, "learning_rate": 0.0010212979611617645, "loss": 0.2553, "step": 51154 }, { "epoch": 0.09070381629445229, "grad_norm": 0.390625, "learning_rate": 0.001021238670689691, "loss": 0.2001, "step": 51156 }, { "epoch": 0.0907073624597621, "grad_norm": 0.546875, "learning_rate": 0.001021179380562074, "loss": 0.1903, "step": 51158 }, { "epoch": 0.09071090862507192, "grad_norm": 0.2373046875, "learning_rate": 0.0010211200907791723, "loss": 0.1731, "step": 51160 }, { "epoch": 0.09071445479038173, "grad_norm": 0.43359375, "learning_rate": 0.0010210608013412454, "loss": 0.1638, "step": 51162 }, { "epoch": 0.09071800095569155, "grad_norm": 0.83984375, "learning_rate": 0.001021001512248553, "loss": 0.2479, "step": 51164 }, { "epoch": 0.09072154712100136, "grad_norm": 0.73046875, "learning_rate": 0.001020942223501354, "loss": 0.1758, "step": 51166 }, { "epoch": 0.09072509328631118, "grad_norm": 0.77734375, "learning_rate": 0.0010208829350999077, "loss": 0.1718, "step": 51168 }, { "epoch": 0.09072863945162099, "grad_norm": 0.392578125, "learning_rate": 0.0010208236470444737, "loss": 0.2129, "step": 51170 }, { "epoch": 0.0907321856169308, "grad_norm": 0.3515625, "learning_rate": 0.0010207643593353108, "loss": 0.3308, "step": 51172 }, { "epoch": 0.09073573178224063, "grad_norm": 0.494140625, "learning_rate": 0.0010207050719726787, "loss": 0.2061, "step": 51174 }, { "epoch": 0.09073927794755045, "grad_norm": 0.64453125, "learning_rate": 0.0010206457849568367, "loss": 0.1544, "step": 51176 }, { "epoch": 0.09074282411286026, "grad_norm": 0.23828125, "learning_rate": 0.0010205864982880438, "loss": 0.2566, "step": 51178 }, { "epoch": 0.09074637027817008, "grad_norm": 0.5859375, "learning_rate": 0.0010205272119665591, "loss": 0.2082, "step": 51180 }, { "epoch": 0.09074991644347989, "grad_norm": 0.302734375, "learning_rate": 0.001020467925992643, "loss": 0.1376, "step": 51182 }, { "epoch": 0.0907534626087897, "grad_norm": 0.333984375, "learning_rate": 0.0010204086403665534, "loss": 0.1482, "step": 51184 }, { "epoch": 0.09075700877409952, "grad_norm": 0.1767578125, "learning_rate": 0.0010203493550885502, "loss": 0.1286, "step": 51186 }, { "epoch": 0.09076055493940934, "grad_norm": 0.251953125, "learning_rate": 0.0010202900701588927, "loss": 0.1526, "step": 51188 }, { "epoch": 0.09076410110471915, "grad_norm": 0.482421875, "learning_rate": 0.0010202307855778404, "loss": 0.1772, "step": 51190 }, { "epoch": 0.09076764727002896, "grad_norm": 0.44140625, "learning_rate": 0.001020171501345652, "loss": 0.1628, "step": 51192 }, { "epoch": 0.09077119343533878, "grad_norm": 0.373046875, "learning_rate": 0.001020112217462587, "loss": 0.1882, "step": 51194 }, { "epoch": 0.0907747396006486, "grad_norm": 0.46484375, "learning_rate": 0.0010200529339289052, "loss": 0.1827, "step": 51196 }, { "epoch": 0.09077828576595841, "grad_norm": 0.197265625, "learning_rate": 0.0010199936507448647, "loss": 0.1383, "step": 51198 }, { "epoch": 0.09078183193126822, "grad_norm": 0.458984375, "learning_rate": 0.0010199343679107265, "loss": 0.2201, "step": 51200 }, { "epoch": 0.09078537809657804, "grad_norm": 0.2236328125, "learning_rate": 0.0010198750854267483, "loss": 0.1403, "step": 51202 }, { "epoch": 0.09078892426188785, "grad_norm": 0.58203125, "learning_rate": 0.0010198158032931896, "loss": 0.2001, "step": 51204 }, { "epoch": 0.09079247042719767, "grad_norm": 0.54296875, "learning_rate": 0.0010197565215103105, "loss": 0.358, "step": 51206 }, { "epoch": 0.09079601659250748, "grad_norm": 0.6015625, "learning_rate": 0.0010196972400783697, "loss": 0.1991, "step": 51208 }, { "epoch": 0.0907995627578173, "grad_norm": 0.26171875, "learning_rate": 0.0010196379589976266, "loss": 0.2274, "step": 51210 }, { "epoch": 0.09080310892312711, "grad_norm": 0.29296875, "learning_rate": 0.00101957867826834, "loss": 0.2307, "step": 51212 }, { "epoch": 0.09080665508843692, "grad_norm": 0.62109375, "learning_rate": 0.0010195193978907698, "loss": 0.1721, "step": 51214 }, { "epoch": 0.09081020125374674, "grad_norm": 1.375, "learning_rate": 0.0010194601178651753, "loss": 0.3131, "step": 51216 }, { "epoch": 0.09081374741905655, "grad_norm": 0.439453125, "learning_rate": 0.001019400838191815, "loss": 0.1929, "step": 51218 }, { "epoch": 0.09081729358436638, "grad_norm": 0.34375, "learning_rate": 0.0010193415588709486, "loss": 0.1378, "step": 51220 }, { "epoch": 0.0908208397496762, "grad_norm": 0.43359375, "learning_rate": 0.0010192822799028355, "loss": 0.1764, "step": 51222 }, { "epoch": 0.09082438591498601, "grad_norm": 0.455078125, "learning_rate": 0.0010192230012877352, "loss": 0.1687, "step": 51224 }, { "epoch": 0.09082793208029583, "grad_norm": 0.384765625, "learning_rate": 0.001019163723025906, "loss": 0.1617, "step": 51226 }, { "epoch": 0.09083147824560564, "grad_norm": 0.36328125, "learning_rate": 0.0010191044451176082, "loss": 0.214, "step": 51228 }, { "epoch": 0.09083502441091545, "grad_norm": 0.388671875, "learning_rate": 0.0010190451675631, "loss": 0.2674, "step": 51230 }, { "epoch": 0.09083857057622527, "grad_norm": 1.21875, "learning_rate": 0.001018985890362642, "loss": 0.2864, "step": 51232 }, { "epoch": 0.09084211674153508, "grad_norm": 0.51953125, "learning_rate": 0.001018926613516492, "loss": 0.1434, "step": 51234 }, { "epoch": 0.0908456629068449, "grad_norm": 0.55078125, "learning_rate": 0.0010188673370249103, "loss": 0.1216, "step": 51236 }, { "epoch": 0.09084920907215471, "grad_norm": 0.8046875, "learning_rate": 0.0010188080608881551, "loss": 0.2564, "step": 51238 }, { "epoch": 0.09085275523746453, "grad_norm": 0.54296875, "learning_rate": 0.0010187487851064869, "loss": 0.1788, "step": 51240 }, { "epoch": 0.09085630140277434, "grad_norm": 0.5, "learning_rate": 0.0010186895096801643, "loss": 0.1882, "step": 51242 }, { "epoch": 0.09085984756808416, "grad_norm": 0.2001953125, "learning_rate": 0.0010186302346094462, "loss": 0.1936, "step": 51244 }, { "epoch": 0.09086339373339397, "grad_norm": 0.322265625, "learning_rate": 0.0010185709598945925, "loss": 0.1895, "step": 51246 }, { "epoch": 0.09086693989870379, "grad_norm": 0.384765625, "learning_rate": 0.001018511685535862, "loss": 0.1417, "step": 51248 }, { "epoch": 0.0908704860640136, "grad_norm": 0.36328125, "learning_rate": 0.001018452411533514, "loss": 0.1571, "step": 51250 }, { "epoch": 0.09087403222932341, "grad_norm": 1.0, "learning_rate": 0.001018393137887808, "loss": 0.1824, "step": 51252 }, { "epoch": 0.09087757839463323, "grad_norm": 0.57421875, "learning_rate": 0.0010183338645990026, "loss": 0.2393, "step": 51254 }, { "epoch": 0.09088112455994304, "grad_norm": 0.24609375, "learning_rate": 0.001018274591667358, "loss": 0.1489, "step": 51256 }, { "epoch": 0.09088467072525286, "grad_norm": 3.234375, "learning_rate": 0.0010182153190931322, "loss": 0.3346, "step": 51258 }, { "epoch": 0.09088821689056267, "grad_norm": 0.80078125, "learning_rate": 0.0010181560468765856, "loss": 0.5651, "step": 51260 }, { "epoch": 0.09089176305587249, "grad_norm": 0.294921875, "learning_rate": 0.0010180967750179767, "loss": 0.1563, "step": 51262 }, { "epoch": 0.09089530922118232, "grad_norm": 0.890625, "learning_rate": 0.0010180375035175652, "loss": 0.1789, "step": 51264 }, { "epoch": 0.09089885538649213, "grad_norm": 0.291015625, "learning_rate": 0.0010179782323756098, "loss": 0.2015, "step": 51266 }, { "epoch": 0.09090240155180195, "grad_norm": 0.62890625, "learning_rate": 0.0010179189615923698, "loss": 0.1692, "step": 51268 }, { "epoch": 0.09090594771711176, "grad_norm": 7.75, "learning_rate": 0.0010178596911681048, "loss": 0.2291, "step": 51270 }, { "epoch": 0.09090949388242157, "grad_norm": 0.263671875, "learning_rate": 0.001017800421103074, "loss": 0.1447, "step": 51272 }, { "epoch": 0.09091304004773139, "grad_norm": 0.73828125, "learning_rate": 0.0010177411513975362, "loss": 0.2003, "step": 51274 }, { "epoch": 0.0909165862130412, "grad_norm": 0.2470703125, "learning_rate": 0.0010176818820517505, "loss": 0.1716, "step": 51276 }, { "epoch": 0.09092013237835102, "grad_norm": 0.63671875, "learning_rate": 0.0010176226130659766, "loss": 0.1896, "step": 51278 }, { "epoch": 0.09092367854366083, "grad_norm": 0.482421875, "learning_rate": 0.0010175633444404737, "loss": 0.4044, "step": 51280 }, { "epoch": 0.09092722470897065, "grad_norm": 1.015625, "learning_rate": 0.001017504076175501, "loss": 0.2957, "step": 51282 }, { "epoch": 0.09093077087428046, "grad_norm": 0.294921875, "learning_rate": 0.0010174448082713172, "loss": 0.1523, "step": 51284 }, { "epoch": 0.09093431703959028, "grad_norm": 0.8515625, "learning_rate": 0.0010173855407281823, "loss": 0.1469, "step": 51286 }, { "epoch": 0.09093786320490009, "grad_norm": 0.25, "learning_rate": 0.0010173262735463547, "loss": 0.1684, "step": 51288 }, { "epoch": 0.0909414093702099, "grad_norm": 0.71484375, "learning_rate": 0.0010172670067260944, "loss": 0.265, "step": 51290 }, { "epoch": 0.09094495553551972, "grad_norm": 0.28515625, "learning_rate": 0.0010172077402676597, "loss": 0.1482, "step": 51292 }, { "epoch": 0.09094850170082953, "grad_norm": 0.498046875, "learning_rate": 0.0010171484741713101, "loss": 0.2158, "step": 51294 }, { "epoch": 0.09095204786613935, "grad_norm": 0.46484375, "learning_rate": 0.001017089208437305, "loss": 0.2213, "step": 51296 }, { "epoch": 0.09095559403144916, "grad_norm": 0.419921875, "learning_rate": 0.001017029943065904, "loss": 0.1889, "step": 51298 }, { "epoch": 0.09095914019675898, "grad_norm": 2.734375, "learning_rate": 0.0010169706780573655, "loss": 0.225, "step": 51300 }, { "epoch": 0.09096268636206879, "grad_norm": 0.3515625, "learning_rate": 0.0010169114134119493, "loss": 0.1423, "step": 51302 }, { "epoch": 0.09096623252737861, "grad_norm": 0.9765625, "learning_rate": 0.001016852149129914, "loss": 0.1809, "step": 51304 }, { "epoch": 0.09096977869268842, "grad_norm": 0.419921875, "learning_rate": 0.0010167928852115196, "loss": 0.2332, "step": 51306 }, { "epoch": 0.09097332485799824, "grad_norm": 0.38671875, "learning_rate": 0.0010167336216570242, "loss": 0.2039, "step": 51308 }, { "epoch": 0.09097687102330806, "grad_norm": 0.4921875, "learning_rate": 0.0010166743584666877, "loss": 0.1886, "step": 51310 }, { "epoch": 0.09098041718861788, "grad_norm": 0.1435546875, "learning_rate": 0.0010166150956407692, "loss": 0.1624, "step": 51312 }, { "epoch": 0.0909839633539277, "grad_norm": 0.26171875, "learning_rate": 0.0010165558331795281, "loss": 0.1775, "step": 51314 }, { "epoch": 0.09098750951923751, "grad_norm": 0.41015625, "learning_rate": 0.001016496571083223, "loss": 0.174, "step": 51316 }, { "epoch": 0.09099105568454732, "grad_norm": 0.2578125, "learning_rate": 0.0010164373093521134, "loss": 0.1786, "step": 51318 }, { "epoch": 0.09099460184985714, "grad_norm": 0.1962890625, "learning_rate": 0.0010163780479864586, "loss": 0.3043, "step": 51320 }, { "epoch": 0.09099814801516695, "grad_norm": 0.2021484375, "learning_rate": 0.0010163187869865176, "loss": 0.1923, "step": 51322 }, { "epoch": 0.09100169418047677, "grad_norm": 0.546875, "learning_rate": 0.00101625952635255, "loss": 0.2036, "step": 51324 }, { "epoch": 0.09100524034578658, "grad_norm": 0.369140625, "learning_rate": 0.0010162002660848138, "loss": 0.129, "step": 51326 }, { "epoch": 0.0910087865110964, "grad_norm": 0.640625, "learning_rate": 0.0010161410061835692, "loss": 0.1782, "step": 51328 }, { "epoch": 0.09101233267640621, "grad_norm": 0.33984375, "learning_rate": 0.0010160817466490757, "loss": 0.2451, "step": 51330 }, { "epoch": 0.09101587884171602, "grad_norm": 0.2021484375, "learning_rate": 0.0010160224874815912, "loss": 0.1656, "step": 51332 }, { "epoch": 0.09101942500702584, "grad_norm": 0.9296875, "learning_rate": 0.0010159632286813759, "loss": 0.1421, "step": 51334 }, { "epoch": 0.09102297117233565, "grad_norm": 0.2080078125, "learning_rate": 0.0010159039702486883, "loss": 0.212, "step": 51336 }, { "epoch": 0.09102651733764547, "grad_norm": 7.15625, "learning_rate": 0.0010158447121837886, "loss": 0.3255, "step": 51338 }, { "epoch": 0.09103006350295528, "grad_norm": 0.546875, "learning_rate": 0.0010157854544869346, "loss": 0.1834, "step": 51340 }, { "epoch": 0.0910336096682651, "grad_norm": 0.416015625, "learning_rate": 0.0010157261971583864, "loss": 0.2098, "step": 51342 }, { "epoch": 0.09103715583357491, "grad_norm": 0.400390625, "learning_rate": 0.0010156669401984026, "loss": 0.1678, "step": 51344 }, { "epoch": 0.09104070199888473, "grad_norm": 1.1875, "learning_rate": 0.0010156076836072433, "loss": 0.206, "step": 51346 }, { "epoch": 0.09104424816419454, "grad_norm": 0.578125, "learning_rate": 0.0010155484273851663, "loss": 0.2635, "step": 51348 }, { "epoch": 0.09104779432950436, "grad_norm": 0.3359375, "learning_rate": 0.0010154891715324313, "loss": 0.1603, "step": 51350 }, { "epoch": 0.09105134049481417, "grad_norm": 0.76171875, "learning_rate": 0.0010154299160492976, "loss": 0.1339, "step": 51352 }, { "epoch": 0.09105488666012398, "grad_norm": 0.44921875, "learning_rate": 0.001015370660936025, "loss": 0.2413, "step": 51354 }, { "epoch": 0.09105843282543381, "grad_norm": 0.2041015625, "learning_rate": 0.0010153114061928715, "loss": 0.1646, "step": 51356 }, { "epoch": 0.09106197899074363, "grad_norm": 0.38671875, "learning_rate": 0.0010152521518200965, "loss": 0.1768, "step": 51358 }, { "epoch": 0.09106552515605344, "grad_norm": 0.48828125, "learning_rate": 0.0010151928978179598, "loss": 0.2111, "step": 51360 }, { "epoch": 0.09106907132136326, "grad_norm": 0.3203125, "learning_rate": 0.00101513364418672, "loss": 0.1869, "step": 51362 }, { "epoch": 0.09107261748667307, "grad_norm": 0.50390625, "learning_rate": 0.0010150743909266363, "loss": 0.1916, "step": 51364 }, { "epoch": 0.09107616365198289, "grad_norm": 0.248046875, "learning_rate": 0.0010150151380379677, "loss": 0.1578, "step": 51366 }, { "epoch": 0.0910797098172927, "grad_norm": 0.412109375, "learning_rate": 0.0010149558855209735, "loss": 0.2758, "step": 51368 }, { "epoch": 0.09108325598260252, "grad_norm": 0.26953125, "learning_rate": 0.001014896633375913, "loss": 0.1554, "step": 51370 }, { "epoch": 0.09108680214791233, "grad_norm": 0.291015625, "learning_rate": 0.0010148373816030456, "loss": 0.1693, "step": 51372 }, { "epoch": 0.09109034831322214, "grad_norm": 0.61328125, "learning_rate": 0.0010147781302026295, "loss": 0.1515, "step": 51374 }, { "epoch": 0.09109389447853196, "grad_norm": 0.1630859375, "learning_rate": 0.0010147188791749247, "loss": 0.1372, "step": 51376 }, { "epoch": 0.09109744064384177, "grad_norm": 0.37890625, "learning_rate": 0.0010146596285201897, "loss": 0.1712, "step": 51378 }, { "epoch": 0.09110098680915159, "grad_norm": 0.2080078125, "learning_rate": 0.0010146003782386841, "loss": 0.1498, "step": 51380 }, { "epoch": 0.0911045329744614, "grad_norm": 0.376953125, "learning_rate": 0.0010145411283306667, "loss": 0.1942, "step": 51382 }, { "epoch": 0.09110807913977122, "grad_norm": 1.0234375, "learning_rate": 0.0010144818787963966, "loss": 0.1599, "step": 51384 }, { "epoch": 0.09111162530508103, "grad_norm": 0.7265625, "learning_rate": 0.0010144226296361332, "loss": 0.186, "step": 51386 }, { "epoch": 0.09111517147039085, "grad_norm": 0.14453125, "learning_rate": 0.0010143633808501358, "loss": 0.1472, "step": 51388 }, { "epoch": 0.09111871763570066, "grad_norm": 0.48046875, "learning_rate": 0.001014304132438663, "loss": 0.1406, "step": 51390 }, { "epoch": 0.09112226380101048, "grad_norm": 0.345703125, "learning_rate": 0.001014244884401974, "loss": 0.1495, "step": 51392 }, { "epoch": 0.09112580996632029, "grad_norm": 1.4296875, "learning_rate": 0.0010141856367403283, "loss": 0.1802, "step": 51394 }, { "epoch": 0.0911293561316301, "grad_norm": 0.328125, "learning_rate": 0.0010141263894539849, "loss": 0.1378, "step": 51396 }, { "epoch": 0.09113290229693992, "grad_norm": 0.255859375, "learning_rate": 0.0010140671425432027, "loss": 0.1825, "step": 51398 }, { "epoch": 0.09113644846224975, "grad_norm": 0.294921875, "learning_rate": 0.0010140078960082408, "loss": 0.1466, "step": 51400 }, { "epoch": 0.09113999462755956, "grad_norm": 0.28515625, "learning_rate": 0.0010139486498493581, "loss": 0.2209, "step": 51402 }, { "epoch": 0.09114354079286938, "grad_norm": 0.44921875, "learning_rate": 0.0010138894040668148, "loss": 0.168, "step": 51404 }, { "epoch": 0.09114708695817919, "grad_norm": 0.396484375, "learning_rate": 0.0010138301586608688, "loss": 0.2064, "step": 51406 }, { "epoch": 0.091150633123489, "grad_norm": 0.474609375, "learning_rate": 0.0010137709136317794, "loss": 0.2036, "step": 51408 }, { "epoch": 0.09115417928879882, "grad_norm": 0.392578125, "learning_rate": 0.001013711668979806, "loss": 0.3219, "step": 51410 }, { "epoch": 0.09115772545410863, "grad_norm": 0.294921875, "learning_rate": 0.001013652424705208, "loss": 0.1878, "step": 51412 }, { "epoch": 0.09116127161941845, "grad_norm": 0.6171875, "learning_rate": 0.0010135931808082437, "loss": 0.1633, "step": 51414 }, { "epoch": 0.09116481778472826, "grad_norm": 0.58203125, "learning_rate": 0.001013533937289173, "loss": 0.2028, "step": 51416 }, { "epoch": 0.09116836395003808, "grad_norm": 0.466796875, "learning_rate": 0.0010134746941482544, "loss": 0.2224, "step": 51418 }, { "epoch": 0.09117191011534789, "grad_norm": 0.5, "learning_rate": 0.0010134154513857477, "loss": 0.1646, "step": 51420 }, { "epoch": 0.09117545628065771, "grad_norm": 0.220703125, "learning_rate": 0.001013356209001911, "loss": 0.185, "step": 51422 }, { "epoch": 0.09117900244596752, "grad_norm": 0.87890625, "learning_rate": 0.0010132969669970039, "loss": 0.2213, "step": 51424 }, { "epoch": 0.09118254861127734, "grad_norm": 0.5859375, "learning_rate": 0.0010132377253712853, "loss": 0.1846, "step": 51426 }, { "epoch": 0.09118609477658715, "grad_norm": 0.88671875, "learning_rate": 0.0010131784841250152, "loss": 0.2484, "step": 51428 }, { "epoch": 0.09118964094189697, "grad_norm": 0.38671875, "learning_rate": 0.0010131192432584514, "loss": 0.1957, "step": 51430 }, { "epoch": 0.09119318710720678, "grad_norm": 0.2578125, "learning_rate": 0.001013060002771854, "loss": 0.1521, "step": 51432 }, { "epoch": 0.0911967332725166, "grad_norm": 0.2470703125, "learning_rate": 0.0010130007626654813, "loss": 0.1529, "step": 51434 }, { "epoch": 0.09120027943782641, "grad_norm": 0.8828125, "learning_rate": 0.0010129415229395932, "loss": 0.2202, "step": 51436 }, { "epoch": 0.09120382560313622, "grad_norm": 0.337890625, "learning_rate": 0.0010128822835944477, "loss": 0.1395, "step": 51438 }, { "epoch": 0.09120737176844604, "grad_norm": 0.423828125, "learning_rate": 0.0010128230446303048, "loss": 0.201, "step": 51440 }, { "epoch": 0.09121091793375585, "grad_norm": 0.59765625, "learning_rate": 0.0010127638060474231, "loss": 0.2214, "step": 51442 }, { "epoch": 0.09121446409906567, "grad_norm": 0.96484375, "learning_rate": 0.0010127045678460617, "loss": 0.2247, "step": 51444 }, { "epoch": 0.0912180102643755, "grad_norm": 0.34765625, "learning_rate": 0.0010126453300264807, "loss": 0.1549, "step": 51446 }, { "epoch": 0.09122155642968531, "grad_norm": 0.416015625, "learning_rate": 0.0010125860925889374, "loss": 0.4183, "step": 51448 }, { "epoch": 0.09122510259499512, "grad_norm": 0.271484375, "learning_rate": 0.001012526855533692, "loss": 0.1931, "step": 51450 }, { "epoch": 0.09122864876030494, "grad_norm": 0.30859375, "learning_rate": 0.0010124676188610034, "loss": 0.1334, "step": 51452 }, { "epoch": 0.09123219492561475, "grad_norm": 0.58203125, "learning_rate": 0.0010124083825711306, "loss": 0.2104, "step": 51454 }, { "epoch": 0.09123574109092457, "grad_norm": 0.275390625, "learning_rate": 0.0010123491466643326, "loss": 0.2075, "step": 51456 }, { "epoch": 0.09123928725623438, "grad_norm": 0.6640625, "learning_rate": 0.0010122899111408682, "loss": 0.1554, "step": 51458 }, { "epoch": 0.0912428334215442, "grad_norm": 0.38671875, "learning_rate": 0.0010122306760009972, "loss": 0.1879, "step": 51460 }, { "epoch": 0.09124637958685401, "grad_norm": 0.326171875, "learning_rate": 0.0010121714412449783, "loss": 0.189, "step": 51462 }, { "epoch": 0.09124992575216383, "grad_norm": 0.298828125, "learning_rate": 0.0010121122068730704, "loss": 0.177, "step": 51464 }, { "epoch": 0.09125347191747364, "grad_norm": 1.296875, "learning_rate": 0.0010120529728855324, "loss": 0.2517, "step": 51466 }, { "epoch": 0.09125701808278346, "grad_norm": 0.36328125, "learning_rate": 0.0010119937392826237, "loss": 0.1838, "step": 51468 }, { "epoch": 0.09126056424809327, "grad_norm": 0.875, "learning_rate": 0.0010119345060646036, "loss": 0.2726, "step": 51470 }, { "epoch": 0.09126411041340309, "grad_norm": 0.373046875, "learning_rate": 0.0010118752732317306, "loss": 0.2354, "step": 51472 }, { "epoch": 0.0912676565787129, "grad_norm": 0.4140625, "learning_rate": 0.0010118160407842638, "loss": 0.1581, "step": 51474 }, { "epoch": 0.09127120274402271, "grad_norm": 0.546875, "learning_rate": 0.0010117568087224624, "loss": 0.2393, "step": 51476 }, { "epoch": 0.09127474890933253, "grad_norm": 0.6796875, "learning_rate": 0.0010116975770465858, "loss": 0.205, "step": 51478 }, { "epoch": 0.09127829507464234, "grad_norm": 0.1796875, "learning_rate": 0.0010116383457568926, "loss": 0.1301, "step": 51480 }, { "epoch": 0.09128184123995216, "grad_norm": 0.478515625, "learning_rate": 0.0010115791148536417, "loss": 0.3124, "step": 51482 }, { "epoch": 0.09128538740526197, "grad_norm": 0.33984375, "learning_rate": 0.0010115198843370925, "loss": 0.2089, "step": 51484 }, { "epoch": 0.09128893357057179, "grad_norm": 0.357421875, "learning_rate": 0.0010114606542075042, "loss": 0.1682, "step": 51486 }, { "epoch": 0.0912924797358816, "grad_norm": 1.25, "learning_rate": 0.0010114014244651352, "loss": 0.3435, "step": 51488 }, { "epoch": 0.09129602590119142, "grad_norm": 0.72265625, "learning_rate": 0.0010113421951102454, "loss": 0.3021, "step": 51490 }, { "epoch": 0.09129957206650124, "grad_norm": 0.5390625, "learning_rate": 0.0010112829661430927, "loss": 0.2251, "step": 51492 }, { "epoch": 0.09130311823181106, "grad_norm": 0.408203125, "learning_rate": 0.0010112237375639373, "loss": 0.2612, "step": 51494 }, { "epoch": 0.09130666439712087, "grad_norm": 1.5234375, "learning_rate": 0.0010111645093730372, "loss": 0.2216, "step": 51496 }, { "epoch": 0.09131021056243069, "grad_norm": 0.369140625, "learning_rate": 0.0010111052815706522, "loss": 0.1602, "step": 51498 }, { "epoch": 0.0913137567277405, "grad_norm": 0.435546875, "learning_rate": 0.001011046054157041, "loss": 0.155, "step": 51500 }, { "epoch": 0.09131730289305032, "grad_norm": 0.279296875, "learning_rate": 0.0010109868271324632, "loss": 0.1952, "step": 51502 }, { "epoch": 0.09132084905836013, "grad_norm": 0.9375, "learning_rate": 0.0010109276004971765, "loss": 0.1839, "step": 51504 }, { "epoch": 0.09132439522366995, "grad_norm": 0.30859375, "learning_rate": 0.0010108683742514413, "loss": 0.1791, "step": 51506 }, { "epoch": 0.09132794138897976, "grad_norm": 0.498046875, "learning_rate": 0.0010108091483955158, "loss": 0.189, "step": 51508 }, { "epoch": 0.09133148755428958, "grad_norm": 0.61328125, "learning_rate": 0.0010107499229296596, "loss": 0.1933, "step": 51510 }, { "epoch": 0.09133503371959939, "grad_norm": 0.267578125, "learning_rate": 0.001010690697854131, "loss": 0.1502, "step": 51512 }, { "epoch": 0.0913385798849092, "grad_norm": 0.29296875, "learning_rate": 0.0010106314731691895, "loss": 0.1934, "step": 51514 }, { "epoch": 0.09134212605021902, "grad_norm": 0.283203125, "learning_rate": 0.0010105722488750939, "loss": 0.1893, "step": 51516 }, { "epoch": 0.09134567221552883, "grad_norm": 0.76171875, "learning_rate": 0.001010513024972104, "loss": 0.2066, "step": 51518 }, { "epoch": 0.09134921838083865, "grad_norm": 0.267578125, "learning_rate": 0.0010104538014604773, "loss": 0.1276, "step": 51520 }, { "epoch": 0.09135276454614846, "grad_norm": 0.3046875, "learning_rate": 0.001010394578340474, "loss": 0.1454, "step": 51522 }, { "epoch": 0.09135631071145828, "grad_norm": 0.8203125, "learning_rate": 0.0010103353556123527, "loss": 0.2307, "step": 51524 }, { "epoch": 0.09135985687676809, "grad_norm": 0.162109375, "learning_rate": 0.0010102761332763726, "loss": 0.1558, "step": 51526 }, { "epoch": 0.0913634030420779, "grad_norm": 1.65625, "learning_rate": 0.0010102169113327926, "loss": 0.2521, "step": 51528 }, { "epoch": 0.09136694920738772, "grad_norm": 0.353515625, "learning_rate": 0.0010101576897818716, "loss": 0.1909, "step": 51530 }, { "epoch": 0.09137049537269754, "grad_norm": 1.6328125, "learning_rate": 0.0010100984686238687, "loss": 0.2977, "step": 51532 }, { "epoch": 0.09137404153800735, "grad_norm": 0.333984375, "learning_rate": 0.0010100392478590429, "loss": 0.17, "step": 51534 }, { "epoch": 0.09137758770331718, "grad_norm": 0.447265625, "learning_rate": 0.0010099800274876534, "loss": 0.1981, "step": 51536 }, { "epoch": 0.09138113386862699, "grad_norm": 3.796875, "learning_rate": 0.0010099208075099585, "loss": 0.3599, "step": 51538 }, { "epoch": 0.09138468003393681, "grad_norm": 0.296875, "learning_rate": 0.0010098615879262178, "loss": 0.1491, "step": 51540 }, { "epoch": 0.09138822619924662, "grad_norm": 0.5, "learning_rate": 0.00100980236873669, "loss": 0.1347, "step": 51542 }, { "epoch": 0.09139177236455644, "grad_norm": 0.5078125, "learning_rate": 0.0010097431499416346, "loss": 0.2331, "step": 51544 }, { "epoch": 0.09139531852986625, "grad_norm": 0.546875, "learning_rate": 0.0010096839315413102, "loss": 0.1882, "step": 51546 }, { "epoch": 0.09139886469517607, "grad_norm": 2.171875, "learning_rate": 0.0010096247135359757, "loss": 0.2179, "step": 51548 }, { "epoch": 0.09140241086048588, "grad_norm": 1.046875, "learning_rate": 0.0010095654959258902, "loss": 0.1657, "step": 51550 }, { "epoch": 0.0914059570257957, "grad_norm": 2.015625, "learning_rate": 0.001009506278711313, "loss": 0.2467, "step": 51552 }, { "epoch": 0.09140950319110551, "grad_norm": 0.357421875, "learning_rate": 0.0010094470618925025, "loss": 0.2166, "step": 51554 }, { "epoch": 0.09141304935641532, "grad_norm": 0.54296875, "learning_rate": 0.001009387845469718, "loss": 0.1522, "step": 51556 }, { "epoch": 0.09141659552172514, "grad_norm": 0.671875, "learning_rate": 0.001009328629443218, "loss": 0.5705, "step": 51558 }, { "epoch": 0.09142014168703495, "grad_norm": 0.314453125, "learning_rate": 0.0010092694138132624, "loss": 0.1522, "step": 51560 }, { "epoch": 0.09142368785234477, "grad_norm": 0.330078125, "learning_rate": 0.0010092101985801096, "loss": 0.2255, "step": 51562 }, { "epoch": 0.09142723401765458, "grad_norm": 0.58203125, "learning_rate": 0.0010091509837440186, "loss": 0.2007, "step": 51564 }, { "epoch": 0.0914307801829644, "grad_norm": 1.109375, "learning_rate": 0.0010090917693052486, "loss": 0.1587, "step": 51566 }, { "epoch": 0.09143432634827421, "grad_norm": 0.5625, "learning_rate": 0.0010090325552640585, "loss": 0.1823, "step": 51568 }, { "epoch": 0.09143787251358403, "grad_norm": 0.68359375, "learning_rate": 0.0010089733416207068, "loss": 0.2009, "step": 51570 }, { "epoch": 0.09144141867889384, "grad_norm": 0.365234375, "learning_rate": 0.001008914128375453, "loss": 0.1405, "step": 51572 }, { "epoch": 0.09144496484420366, "grad_norm": 0.42578125, "learning_rate": 0.0010088549155285555, "loss": 0.1836, "step": 51574 }, { "epoch": 0.09144851100951347, "grad_norm": 1.1328125, "learning_rate": 0.0010087957030802744, "loss": 0.3288, "step": 51576 }, { "epoch": 0.09145205717482328, "grad_norm": 0.52734375, "learning_rate": 0.001008736491030867, "loss": 0.1992, "step": 51578 }, { "epoch": 0.0914556033401331, "grad_norm": 0.43359375, "learning_rate": 0.0010086772793805939, "loss": 0.1756, "step": 51580 }, { "epoch": 0.09145914950544293, "grad_norm": 0.333984375, "learning_rate": 0.001008618068129713, "loss": 0.2009, "step": 51582 }, { "epoch": 0.09146269567075274, "grad_norm": 1.0234375, "learning_rate": 0.0010085588572784838, "loss": 0.2101, "step": 51584 }, { "epoch": 0.09146624183606256, "grad_norm": 0.375, "learning_rate": 0.0010084996468271651, "loss": 0.196, "step": 51586 }, { "epoch": 0.09146978800137237, "grad_norm": 0.466796875, "learning_rate": 0.0010084404367760155, "loss": 0.1987, "step": 51588 }, { "epoch": 0.09147333416668219, "grad_norm": 0.2890625, "learning_rate": 0.0010083812271252943, "loss": 0.1918, "step": 51590 }, { "epoch": 0.091476880331992, "grad_norm": 0.376953125, "learning_rate": 0.0010083220178752608, "loss": 0.1436, "step": 51592 }, { "epoch": 0.09148042649730181, "grad_norm": 0.4765625, "learning_rate": 0.001008262809026173, "loss": 0.2147, "step": 51594 }, { "epoch": 0.09148397266261163, "grad_norm": 0.3359375, "learning_rate": 0.0010082036005782907, "loss": 0.1474, "step": 51596 }, { "epoch": 0.09148751882792144, "grad_norm": 0.2412109375, "learning_rate": 0.0010081443925318723, "loss": 0.4299, "step": 51598 }, { "epoch": 0.09149106499323126, "grad_norm": 0.3046875, "learning_rate": 0.0010080851848871773, "loss": 0.1233, "step": 51600 }, { "epoch": 0.09149461115854107, "grad_norm": 0.66796875, "learning_rate": 0.001008025977644464, "loss": 0.2825, "step": 51602 }, { "epoch": 0.09149815732385089, "grad_norm": 4.65625, "learning_rate": 0.001007966770803992, "loss": 0.3147, "step": 51604 }, { "epoch": 0.0915017034891607, "grad_norm": 0.353515625, "learning_rate": 0.0010079075643660194, "loss": 0.1394, "step": 51606 }, { "epoch": 0.09150524965447052, "grad_norm": 0.3359375, "learning_rate": 0.0010078483583308062, "loss": 0.1732, "step": 51608 }, { "epoch": 0.09150879581978033, "grad_norm": 0.357421875, "learning_rate": 0.0010077891526986103, "loss": 0.1507, "step": 51610 }, { "epoch": 0.09151234198509015, "grad_norm": 0.29296875, "learning_rate": 0.001007729947469691, "loss": 0.1682, "step": 51612 }, { "epoch": 0.09151588815039996, "grad_norm": 0.4921875, "learning_rate": 0.0010076707426443074, "loss": 0.1732, "step": 51614 }, { "epoch": 0.09151943431570977, "grad_norm": 0.384765625, "learning_rate": 0.0010076115382227184, "loss": 0.1672, "step": 51616 }, { "epoch": 0.09152298048101959, "grad_norm": 0.2392578125, "learning_rate": 0.0010075523342051834, "loss": 0.2293, "step": 51618 }, { "epoch": 0.0915265266463294, "grad_norm": 0.34375, "learning_rate": 0.00100749313059196, "loss": 0.1861, "step": 51620 }, { "epoch": 0.09153007281163922, "grad_norm": 1.140625, "learning_rate": 0.0010074339273833082, "loss": 0.3121, "step": 51622 }, { "epoch": 0.09153361897694903, "grad_norm": 0.431640625, "learning_rate": 0.0010073747245794866, "loss": 0.1637, "step": 51624 }, { "epoch": 0.09153716514225885, "grad_norm": 0.376953125, "learning_rate": 0.0010073155221807545, "loss": 0.1968, "step": 51626 }, { "epoch": 0.09154071130756868, "grad_norm": 0.55859375, "learning_rate": 0.00100725632018737, "loss": 0.2321, "step": 51628 }, { "epoch": 0.09154425747287849, "grad_norm": 0.48046875, "learning_rate": 0.0010071971185995926, "loss": 0.1514, "step": 51630 }, { "epoch": 0.0915478036381883, "grad_norm": 0.6796875, "learning_rate": 0.001007137917417681, "loss": 0.1747, "step": 51632 }, { "epoch": 0.09155134980349812, "grad_norm": 0.6328125, "learning_rate": 0.001007078716641895, "loss": 0.1952, "step": 51634 }, { "epoch": 0.09155489596880793, "grad_norm": 0.703125, "learning_rate": 0.0010070195162724918, "loss": 0.1681, "step": 51636 }, { "epoch": 0.09155844213411775, "grad_norm": 0.703125, "learning_rate": 0.0010069603163097313, "loss": 0.2135, "step": 51638 }, { "epoch": 0.09156198829942756, "grad_norm": 0.380859375, "learning_rate": 0.0010069011167538726, "loss": 0.2407, "step": 51640 }, { "epoch": 0.09156553446473738, "grad_norm": 0.353515625, "learning_rate": 0.0010068419176051744, "loss": 0.145, "step": 51642 }, { "epoch": 0.09156908063004719, "grad_norm": 0.6015625, "learning_rate": 0.0010067827188638956, "loss": 0.1674, "step": 51644 }, { "epoch": 0.091572626795357, "grad_norm": 0.462890625, "learning_rate": 0.0010067235205302946, "loss": 0.1719, "step": 51646 }, { "epoch": 0.09157617296066682, "grad_norm": 0.337890625, "learning_rate": 0.001006664322604631, "loss": 0.2164, "step": 51648 }, { "epoch": 0.09157971912597664, "grad_norm": 0.431640625, "learning_rate": 0.0010066051250871638, "loss": 0.2079, "step": 51650 }, { "epoch": 0.09158326529128645, "grad_norm": 0.181640625, "learning_rate": 0.001006545927978151, "loss": 0.2129, "step": 51652 }, { "epoch": 0.09158681145659626, "grad_norm": 1.5234375, "learning_rate": 0.001006486731277852, "loss": 0.364, "step": 51654 }, { "epoch": 0.09159035762190608, "grad_norm": 0.6796875, "learning_rate": 0.0010064275349865263, "loss": 0.1833, "step": 51656 }, { "epoch": 0.0915939037872159, "grad_norm": 2.015625, "learning_rate": 0.001006368339104432, "loss": 0.1857, "step": 51658 }, { "epoch": 0.09159744995252571, "grad_norm": 0.310546875, "learning_rate": 0.001006309143631828, "loss": 0.1646, "step": 51660 }, { "epoch": 0.09160099611783552, "grad_norm": 0.478515625, "learning_rate": 0.001006249948568973, "loss": 0.3003, "step": 51662 }, { "epoch": 0.09160454228314534, "grad_norm": 0.263671875, "learning_rate": 0.0010061907539161267, "loss": 0.1903, "step": 51664 }, { "epoch": 0.09160808844845515, "grad_norm": 0.50390625, "learning_rate": 0.001006131559673548, "loss": 0.1417, "step": 51666 }, { "epoch": 0.09161163461376497, "grad_norm": 0.5859375, "learning_rate": 0.0010060723658414945, "loss": 0.1909, "step": 51668 }, { "epoch": 0.09161518077907478, "grad_norm": 0.80078125, "learning_rate": 0.0010060131724202261, "loss": 0.2213, "step": 51670 }, { "epoch": 0.09161872694438461, "grad_norm": 0.3671875, "learning_rate": 0.0010059539794100016, "loss": 0.1622, "step": 51672 }, { "epoch": 0.09162227310969442, "grad_norm": 0.9921875, "learning_rate": 0.00100589478681108, "loss": 0.1762, "step": 51674 }, { "epoch": 0.09162581927500424, "grad_norm": 0.337890625, "learning_rate": 0.00100583559462372, "loss": 0.1867, "step": 51676 }, { "epoch": 0.09162936544031405, "grad_norm": 0.490234375, "learning_rate": 0.0010057764028481803, "loss": 0.1841, "step": 51678 }, { "epoch": 0.09163291160562387, "grad_norm": 0.5234375, "learning_rate": 0.0010057172114847194, "loss": 0.1711, "step": 51680 }, { "epoch": 0.09163645777093368, "grad_norm": 0.45703125, "learning_rate": 0.0010056580205335974, "loss": 0.1976, "step": 51682 }, { "epoch": 0.0916400039362435, "grad_norm": 0.3671875, "learning_rate": 0.0010055988299950717, "loss": 0.2061, "step": 51684 }, { "epoch": 0.09164355010155331, "grad_norm": 0.228515625, "learning_rate": 0.0010055396398694024, "loss": 0.1295, "step": 51686 }, { "epoch": 0.09164709626686313, "grad_norm": 0.169921875, "learning_rate": 0.0010054804501568476, "loss": 0.1571, "step": 51688 }, { "epoch": 0.09165064243217294, "grad_norm": 0.72265625, "learning_rate": 0.0010054212608576667, "loss": 0.1956, "step": 51690 }, { "epoch": 0.09165418859748276, "grad_norm": 0.95703125, "learning_rate": 0.001005362071972118, "loss": 0.1924, "step": 51692 }, { "epoch": 0.09165773476279257, "grad_norm": 2.890625, "learning_rate": 0.0010053028835004603, "loss": 0.2443, "step": 51694 }, { "epoch": 0.09166128092810238, "grad_norm": 0.51171875, "learning_rate": 0.0010052436954429533, "loss": 0.164, "step": 51696 }, { "epoch": 0.0916648270934122, "grad_norm": 0.34765625, "learning_rate": 0.0010051845077998552, "loss": 0.2522, "step": 51698 }, { "epoch": 0.09166837325872201, "grad_norm": 0.1728515625, "learning_rate": 0.001005125320571425, "loss": 0.195, "step": 51700 }, { "epoch": 0.09167191942403183, "grad_norm": 0.99609375, "learning_rate": 0.0010050661337579216, "loss": 0.3444, "step": 51702 }, { "epoch": 0.09167546558934164, "grad_norm": 0.72265625, "learning_rate": 0.0010050069473596034, "loss": 0.1547, "step": 51704 }, { "epoch": 0.09167901175465146, "grad_norm": 0.28515625, "learning_rate": 0.0010049477613767299, "loss": 0.157, "step": 51706 }, { "epoch": 0.09168255791996127, "grad_norm": 0.2353515625, "learning_rate": 0.00100488857580956, "loss": 0.1274, "step": 51708 }, { "epoch": 0.09168610408527109, "grad_norm": 1.9375, "learning_rate": 0.0010048293906583519, "loss": 0.2626, "step": 51710 }, { "epoch": 0.0916896502505809, "grad_norm": 0.578125, "learning_rate": 0.0010047702059233646, "loss": 0.1936, "step": 51712 }, { "epoch": 0.09169319641589072, "grad_norm": 0.53515625, "learning_rate": 0.0010047110216048574, "loss": 0.1806, "step": 51714 }, { "epoch": 0.09169674258120053, "grad_norm": 0.361328125, "learning_rate": 0.0010046518377030888, "loss": 0.1579, "step": 51716 }, { "epoch": 0.09170028874651036, "grad_norm": 0.333984375, "learning_rate": 0.0010045926542183177, "loss": 0.1972, "step": 51718 }, { "epoch": 0.09170383491182017, "grad_norm": 0.73046875, "learning_rate": 0.0010045334711508028, "loss": 0.2122, "step": 51720 }, { "epoch": 0.09170738107712999, "grad_norm": 0.388671875, "learning_rate": 0.0010044742885008027, "loss": 0.2444, "step": 51722 }, { "epoch": 0.0917109272424398, "grad_norm": 0.28515625, "learning_rate": 0.0010044151062685773, "loss": 0.1949, "step": 51724 }, { "epoch": 0.09171447340774962, "grad_norm": 0.482421875, "learning_rate": 0.0010043559244543843, "loss": 0.277, "step": 51726 }, { "epoch": 0.09171801957305943, "grad_norm": 1.0, "learning_rate": 0.0010042967430584828, "loss": 0.2671, "step": 51728 }, { "epoch": 0.09172156573836925, "grad_norm": 0.265625, "learning_rate": 0.001004237562081132, "loss": 0.2425, "step": 51730 }, { "epoch": 0.09172511190367906, "grad_norm": 0.404296875, "learning_rate": 0.0010041783815225905, "loss": 0.1775, "step": 51732 }, { "epoch": 0.09172865806898887, "grad_norm": 0.66015625, "learning_rate": 0.0010041192013831171, "loss": 0.2809, "step": 51734 }, { "epoch": 0.09173220423429869, "grad_norm": 0.4375, "learning_rate": 0.0010040600216629704, "loss": 0.177, "step": 51736 }, { "epoch": 0.0917357503996085, "grad_norm": 0.4375, "learning_rate": 0.0010040008423624093, "loss": 0.129, "step": 51738 }, { "epoch": 0.09173929656491832, "grad_norm": 0.6328125, "learning_rate": 0.0010039416634816934, "loss": 0.2147, "step": 51740 }, { "epoch": 0.09174284273022813, "grad_norm": 0.609375, "learning_rate": 0.0010038824850210804, "loss": 0.1485, "step": 51742 }, { "epoch": 0.09174638889553795, "grad_norm": 0.404296875, "learning_rate": 0.0010038233069808295, "loss": 0.1788, "step": 51744 }, { "epoch": 0.09174993506084776, "grad_norm": 0.216796875, "learning_rate": 0.0010037641293611996, "loss": 0.1516, "step": 51746 }, { "epoch": 0.09175348122615758, "grad_norm": 1.0078125, "learning_rate": 0.0010037049521624502, "loss": 0.2108, "step": 51748 }, { "epoch": 0.09175702739146739, "grad_norm": 0.37890625, "learning_rate": 0.0010036457753848386, "loss": 0.1543, "step": 51750 }, { "epoch": 0.0917605735567772, "grad_norm": 1.109375, "learning_rate": 0.0010035865990286248, "loss": 0.195, "step": 51752 }, { "epoch": 0.09176411972208702, "grad_norm": 1.984375, "learning_rate": 0.0010035274230940669, "loss": 0.2335, "step": 51754 }, { "epoch": 0.09176766588739683, "grad_norm": 0.58203125, "learning_rate": 0.0010034682475814245, "loss": 0.361, "step": 51756 }, { "epoch": 0.09177121205270665, "grad_norm": 0.2109375, "learning_rate": 0.0010034090724909555, "loss": 0.1404, "step": 51758 }, { "epoch": 0.09177475821801646, "grad_norm": 0.314453125, "learning_rate": 0.001003349897822919, "loss": 0.1839, "step": 51760 }, { "epoch": 0.09177830438332628, "grad_norm": 0.234375, "learning_rate": 0.0010032907235775742, "loss": 0.1988, "step": 51762 }, { "epoch": 0.0917818505486361, "grad_norm": 0.5546875, "learning_rate": 0.0010032315497551796, "loss": 0.1594, "step": 51764 }, { "epoch": 0.09178539671394592, "grad_norm": 1.015625, "learning_rate": 0.0010031723763559938, "loss": 0.1518, "step": 51766 }, { "epoch": 0.09178894287925574, "grad_norm": 0.33984375, "learning_rate": 0.0010031132033802759, "loss": 0.1573, "step": 51768 }, { "epoch": 0.09179248904456555, "grad_norm": 0.2236328125, "learning_rate": 0.0010030540308282846, "loss": 0.1798, "step": 51770 }, { "epoch": 0.09179603520987537, "grad_norm": 0.2470703125, "learning_rate": 0.0010029948587002785, "loss": 0.1416, "step": 51772 }, { "epoch": 0.09179958137518518, "grad_norm": 0.369140625, "learning_rate": 0.0010029356869965167, "loss": 0.1843, "step": 51774 }, { "epoch": 0.091803127540495, "grad_norm": 0.166015625, "learning_rate": 0.001002876515717258, "loss": 0.4411, "step": 51776 }, { "epoch": 0.09180667370580481, "grad_norm": 0.6875, "learning_rate": 0.0010028173448627603, "loss": 0.1966, "step": 51778 }, { "epoch": 0.09181021987111462, "grad_norm": 0.359375, "learning_rate": 0.0010027581744332837, "loss": 0.1841, "step": 51780 }, { "epoch": 0.09181376603642444, "grad_norm": 0.453125, "learning_rate": 0.0010026990044290866, "loss": 0.2171, "step": 51782 }, { "epoch": 0.09181731220173425, "grad_norm": 14.125, "learning_rate": 0.001002639834850427, "loss": 0.2186, "step": 51784 }, { "epoch": 0.09182085836704407, "grad_norm": 0.83203125, "learning_rate": 0.0010025806656975642, "loss": 0.1492, "step": 51786 }, { "epoch": 0.09182440453235388, "grad_norm": 0.41796875, "learning_rate": 0.0010025214969707572, "loss": 0.2025, "step": 51788 }, { "epoch": 0.0918279506976637, "grad_norm": 0.8359375, "learning_rate": 0.0010024623286702646, "loss": 0.2298, "step": 51790 }, { "epoch": 0.09183149686297351, "grad_norm": 3.453125, "learning_rate": 0.001002403160796345, "loss": 0.2809, "step": 51792 }, { "epoch": 0.09183504302828333, "grad_norm": 0.4765625, "learning_rate": 0.0010023439933492574, "loss": 0.1701, "step": 51794 }, { "epoch": 0.09183858919359314, "grad_norm": 0.259765625, "learning_rate": 0.00100228482632926, "loss": 0.1805, "step": 51796 }, { "epoch": 0.09184213535890295, "grad_norm": 0.224609375, "learning_rate": 0.0010022256597366128, "loss": 0.1624, "step": 51798 }, { "epoch": 0.09184568152421277, "grad_norm": 0.486328125, "learning_rate": 0.001002166493571573, "loss": 0.2081, "step": 51800 }, { "epoch": 0.09184922768952258, "grad_norm": 0.57421875, "learning_rate": 0.0010021073278344004, "loss": 0.2075, "step": 51802 }, { "epoch": 0.0918527738548324, "grad_norm": 0.33984375, "learning_rate": 0.0010020481625253534, "loss": 0.1799, "step": 51804 }, { "epoch": 0.09185632002014221, "grad_norm": 0.71484375, "learning_rate": 0.0010019889976446913, "loss": 0.1414, "step": 51806 }, { "epoch": 0.09185986618545204, "grad_norm": 0.255859375, "learning_rate": 0.0010019298331926721, "loss": 0.1241, "step": 51808 }, { "epoch": 0.09186341235076186, "grad_norm": 2.03125, "learning_rate": 0.001001870669169555, "loss": 0.2272, "step": 51810 }, { "epoch": 0.09186695851607167, "grad_norm": 0.162109375, "learning_rate": 0.0010018115055755981, "loss": 0.1072, "step": 51812 }, { "epoch": 0.09187050468138148, "grad_norm": 0.8984375, "learning_rate": 0.0010017523424110614, "loss": 0.2694, "step": 51814 }, { "epoch": 0.0918740508466913, "grad_norm": 0.80078125, "learning_rate": 0.0010016931796762024, "loss": 0.3905, "step": 51816 }, { "epoch": 0.09187759701200111, "grad_norm": 0.515625, "learning_rate": 0.0010016340173712801, "loss": 0.2203, "step": 51818 }, { "epoch": 0.09188114317731093, "grad_norm": 0.5390625, "learning_rate": 0.0010015748554965537, "loss": 0.1496, "step": 51820 }, { "epoch": 0.09188468934262074, "grad_norm": 0.2451171875, "learning_rate": 0.001001515694052282, "loss": 0.1229, "step": 51822 }, { "epoch": 0.09188823550793056, "grad_norm": 0.345703125, "learning_rate": 0.0010014565330387231, "loss": 0.2024, "step": 51824 }, { "epoch": 0.09189178167324037, "grad_norm": 0.83203125, "learning_rate": 0.001001397372456136, "loss": 0.2061, "step": 51826 }, { "epoch": 0.09189532783855019, "grad_norm": 0.19140625, "learning_rate": 0.00100133821230478, "loss": 0.1834, "step": 51828 }, { "epoch": 0.09189887400386, "grad_norm": 0.2294921875, "learning_rate": 0.0010012790525849129, "loss": 0.2198, "step": 51830 }, { "epoch": 0.09190242016916982, "grad_norm": 0.4375, "learning_rate": 0.001001219893296794, "loss": 0.2347, "step": 51832 }, { "epoch": 0.09190596633447963, "grad_norm": 0.462890625, "learning_rate": 0.001001160734440682, "loss": 0.1977, "step": 51834 }, { "epoch": 0.09190951249978944, "grad_norm": 0.1875, "learning_rate": 0.0010011015760168353, "loss": 0.237, "step": 51836 }, { "epoch": 0.09191305866509926, "grad_norm": 0.359375, "learning_rate": 0.0010010424180255132, "loss": 0.2007, "step": 51838 }, { "epoch": 0.09191660483040907, "grad_norm": 0.2294921875, "learning_rate": 0.0010009832604669737, "loss": 0.1426, "step": 51840 }, { "epoch": 0.09192015099571889, "grad_norm": 0.43359375, "learning_rate": 0.0010009241033414763, "loss": 0.2348, "step": 51842 }, { "epoch": 0.0919236971610287, "grad_norm": 0.404296875, "learning_rate": 0.001000864946649279, "loss": 0.1802, "step": 51844 }, { "epoch": 0.09192724332633852, "grad_norm": 0.486328125, "learning_rate": 0.0010008057903906412, "loss": 0.1732, "step": 51846 }, { "epoch": 0.09193078949164833, "grad_norm": 0.53125, "learning_rate": 0.001000746634565821, "loss": 0.1514, "step": 51848 }, { "epoch": 0.09193433565695815, "grad_norm": 0.189453125, "learning_rate": 0.0010006874791750773, "loss": 0.1661, "step": 51850 }, { "epoch": 0.09193788182226796, "grad_norm": 0.169921875, "learning_rate": 0.001000628324218669, "loss": 0.1517, "step": 51852 }, { "epoch": 0.09194142798757779, "grad_norm": 0.283203125, "learning_rate": 0.0010005691696968549, "loss": 0.1271, "step": 51854 }, { "epoch": 0.0919449741528876, "grad_norm": 0.5546875, "learning_rate": 0.001000510015609893, "loss": 0.1928, "step": 51856 }, { "epoch": 0.09194852031819742, "grad_norm": 0.2490234375, "learning_rate": 0.0010004508619580429, "loss": 0.1865, "step": 51858 }, { "epoch": 0.09195206648350723, "grad_norm": 0.291015625, "learning_rate": 0.0010003917087415624, "loss": 0.1957, "step": 51860 }, { "epoch": 0.09195561264881705, "grad_norm": 0.130859375, "learning_rate": 0.001000332555960711, "loss": 0.1601, "step": 51862 }, { "epoch": 0.09195915881412686, "grad_norm": 0.408203125, "learning_rate": 0.0010002734036157475, "loss": 0.1286, "step": 51864 }, { "epoch": 0.09196270497943668, "grad_norm": 0.96484375, "learning_rate": 0.00100021425170693, "loss": 0.221, "step": 51866 }, { "epoch": 0.09196625114474649, "grad_norm": 0.5625, "learning_rate": 0.001000155100234517, "loss": 0.2033, "step": 51868 }, { "epoch": 0.0919697973100563, "grad_norm": 0.2109375, "learning_rate": 0.0010000959491987679, "loss": 0.1977, "step": 51870 }, { "epoch": 0.09197334347536612, "grad_norm": 0.41015625, "learning_rate": 0.0010000367985999413, "loss": 0.1687, "step": 51872 }, { "epoch": 0.09197688964067594, "grad_norm": 1.0546875, "learning_rate": 0.0009999776484382954, "loss": 0.1492, "step": 51874 }, { "epoch": 0.09198043580598575, "grad_norm": 1.21875, "learning_rate": 0.000999918498714089, "loss": 0.2476, "step": 51876 }, { "epoch": 0.09198398197129556, "grad_norm": 0.373046875, "learning_rate": 0.000999859349427581, "loss": 0.2411, "step": 51878 }, { "epoch": 0.09198752813660538, "grad_norm": 1.3203125, "learning_rate": 0.0009998002005790305, "loss": 0.1579, "step": 51880 }, { "epoch": 0.0919910743019152, "grad_norm": 0.328125, "learning_rate": 0.0009997410521686953, "loss": 0.1955, "step": 51882 }, { "epoch": 0.09199462046722501, "grad_norm": 1.71875, "learning_rate": 0.000999681904196835, "loss": 0.4095, "step": 51884 }, { "epoch": 0.09199816663253482, "grad_norm": 0.38671875, "learning_rate": 0.000999622756663707, "loss": 0.1738, "step": 51886 }, { "epoch": 0.09200171279784464, "grad_norm": 0.46484375, "learning_rate": 0.0009995636095695714, "loss": 0.1743, "step": 51888 }, { "epoch": 0.09200525896315445, "grad_norm": 0.70703125, "learning_rate": 0.000999504462914686, "loss": 0.3001, "step": 51890 }, { "epoch": 0.09200880512846427, "grad_norm": 0.455078125, "learning_rate": 0.0009994453166993094, "loss": 0.1458, "step": 51892 }, { "epoch": 0.09201235129377408, "grad_norm": 0.435546875, "learning_rate": 0.0009993861709237009, "loss": 0.2121, "step": 51894 }, { "epoch": 0.0920158974590839, "grad_norm": 0.45703125, "learning_rate": 0.000999327025588119, "loss": 0.2481, "step": 51896 }, { "epoch": 0.09201944362439371, "grad_norm": 0.294921875, "learning_rate": 0.000999267880692822, "loss": 0.1363, "step": 51898 }, { "epoch": 0.09202298978970354, "grad_norm": 0.36328125, "learning_rate": 0.0009992087362380687, "loss": 0.1877, "step": 51900 }, { "epoch": 0.09202653595501335, "grad_norm": 0.298828125, "learning_rate": 0.0009991495922241178, "loss": 0.1418, "step": 51902 }, { "epoch": 0.09203008212032317, "grad_norm": 0.734375, "learning_rate": 0.0009990904486512282, "loss": 0.211, "step": 51904 }, { "epoch": 0.09203362828563298, "grad_norm": 0.435546875, "learning_rate": 0.0009990313055196585, "loss": 0.2166, "step": 51906 }, { "epoch": 0.0920371744509428, "grad_norm": 1.5234375, "learning_rate": 0.000998972162829667, "loss": 0.3178, "step": 51908 }, { "epoch": 0.09204072061625261, "grad_norm": 0.2431640625, "learning_rate": 0.0009989130205815126, "loss": 0.2283, "step": 51910 }, { "epoch": 0.09204426678156243, "grad_norm": 0.56640625, "learning_rate": 0.000998853878775454, "loss": 0.1212, "step": 51912 }, { "epoch": 0.09204781294687224, "grad_norm": 0.59765625, "learning_rate": 0.0009987947374117496, "loss": 0.1591, "step": 51914 }, { "epoch": 0.09205135911218205, "grad_norm": 0.486328125, "learning_rate": 0.0009987355964906585, "loss": 0.2631, "step": 51916 }, { "epoch": 0.09205490527749187, "grad_norm": 0.25390625, "learning_rate": 0.0009986764560124387, "loss": 0.1666, "step": 51918 }, { "epoch": 0.09205845144280168, "grad_norm": 0.71484375, "learning_rate": 0.0009986173159773498, "loss": 0.2122, "step": 51920 }, { "epoch": 0.0920619976081115, "grad_norm": 0.48828125, "learning_rate": 0.0009985581763856492, "loss": 0.145, "step": 51922 }, { "epoch": 0.09206554377342131, "grad_norm": 0.259765625, "learning_rate": 0.0009984990372375966, "loss": 0.1956, "step": 51924 }, { "epoch": 0.09206908993873113, "grad_norm": 0.50390625, "learning_rate": 0.00099843989853345, "loss": 0.1775, "step": 51926 }, { "epoch": 0.09207263610404094, "grad_norm": 0.55078125, "learning_rate": 0.0009983807602734686, "loss": 0.1939, "step": 51928 }, { "epoch": 0.09207618226935076, "grad_norm": 0.27734375, "learning_rate": 0.0009983216224579109, "loss": 0.1672, "step": 51930 }, { "epoch": 0.09207972843466057, "grad_norm": 0.3515625, "learning_rate": 0.0009982624850870348, "loss": 0.1955, "step": 51932 }, { "epoch": 0.09208327459997039, "grad_norm": 0.259765625, "learning_rate": 0.0009982033481610996, "loss": 0.1584, "step": 51934 }, { "epoch": 0.0920868207652802, "grad_norm": 0.341796875, "learning_rate": 0.0009981442116803643, "loss": 0.1741, "step": 51936 }, { "epoch": 0.09209036693059001, "grad_norm": 1.8046875, "learning_rate": 0.0009980850756450864, "loss": 0.2852, "step": 51938 }, { "epoch": 0.09209391309589983, "grad_norm": 0.98828125, "learning_rate": 0.0009980259400555259, "loss": 0.2023, "step": 51940 }, { "epoch": 0.09209745926120964, "grad_norm": 0.451171875, "learning_rate": 0.0009979668049119401, "loss": 0.2245, "step": 51942 }, { "epoch": 0.09210100542651947, "grad_norm": 0.69921875, "learning_rate": 0.0009979076702145886, "loss": 0.1698, "step": 51944 }, { "epoch": 0.09210455159182929, "grad_norm": 0.5703125, "learning_rate": 0.0009978485359637295, "loss": 0.2887, "step": 51946 }, { "epoch": 0.0921080977571391, "grad_norm": 0.19140625, "learning_rate": 0.0009977894021596214, "loss": 0.1516, "step": 51948 }, { "epoch": 0.09211164392244892, "grad_norm": 0.1611328125, "learning_rate": 0.000997730268802523, "loss": 0.1442, "step": 51950 }, { "epoch": 0.09211519008775873, "grad_norm": 0.462890625, "learning_rate": 0.0009976711358926933, "loss": 0.1714, "step": 51952 }, { "epoch": 0.09211873625306854, "grad_norm": 0.427734375, "learning_rate": 0.0009976120034303909, "loss": 0.1771, "step": 51954 }, { "epoch": 0.09212228241837836, "grad_norm": 0.310546875, "learning_rate": 0.0009975528714158738, "loss": 0.1304, "step": 51956 }, { "epoch": 0.09212582858368817, "grad_norm": 0.9921875, "learning_rate": 0.0009974937398494006, "loss": 0.2301, "step": 51958 }, { "epoch": 0.09212937474899799, "grad_norm": 0.58203125, "learning_rate": 0.0009974346087312307, "loss": 0.2613, "step": 51960 }, { "epoch": 0.0921329209143078, "grad_norm": 0.3359375, "learning_rate": 0.0009973754780616222, "loss": 0.1628, "step": 51962 }, { "epoch": 0.09213646707961762, "grad_norm": 0.2421875, "learning_rate": 0.0009973163478408337, "loss": 0.1359, "step": 51964 }, { "epoch": 0.09214001324492743, "grad_norm": 0.1767578125, "learning_rate": 0.0009972572180691236, "loss": 0.1276, "step": 51966 }, { "epoch": 0.09214355941023725, "grad_norm": 0.1943359375, "learning_rate": 0.000997198088746751, "loss": 0.1562, "step": 51968 }, { "epoch": 0.09214710557554706, "grad_norm": 1.296875, "learning_rate": 0.0009971389598739742, "loss": 0.2552, "step": 51970 }, { "epoch": 0.09215065174085688, "grad_norm": 0.359375, "learning_rate": 0.0009970798314510518, "loss": 0.1778, "step": 51972 }, { "epoch": 0.09215419790616669, "grad_norm": 0.404296875, "learning_rate": 0.0009970207034782424, "loss": 0.2222, "step": 51974 }, { "epoch": 0.0921577440714765, "grad_norm": 0.197265625, "learning_rate": 0.0009969615759558045, "loss": 0.1289, "step": 51976 }, { "epoch": 0.09216129023678632, "grad_norm": 0.34765625, "learning_rate": 0.0009969024488839973, "loss": 0.1552, "step": 51978 }, { "epoch": 0.09216483640209613, "grad_norm": 0.474609375, "learning_rate": 0.0009968433222630787, "loss": 0.3873, "step": 51980 }, { "epoch": 0.09216838256740595, "grad_norm": 0.34765625, "learning_rate": 0.0009967841960933073, "loss": 0.1551, "step": 51982 }, { "epoch": 0.09217192873271576, "grad_norm": 0.2353515625, "learning_rate": 0.0009967250703749417, "loss": 0.2165, "step": 51984 }, { "epoch": 0.09217547489802558, "grad_norm": 0.333984375, "learning_rate": 0.0009966659451082411, "loss": 0.139, "step": 51986 }, { "epoch": 0.09217902106333539, "grad_norm": 0.2421875, "learning_rate": 0.0009966068202934635, "loss": 0.1794, "step": 51988 }, { "epoch": 0.09218256722864522, "grad_norm": 0.2021484375, "learning_rate": 0.0009965476959308674, "loss": 0.1525, "step": 51990 }, { "epoch": 0.09218611339395504, "grad_norm": 0.291015625, "learning_rate": 0.000996488572020712, "loss": 0.1389, "step": 51992 }, { "epoch": 0.09218965955926485, "grad_norm": 0.671875, "learning_rate": 0.0009964294485632552, "loss": 0.2184, "step": 51994 }, { "epoch": 0.09219320572457466, "grad_norm": 0.33984375, "learning_rate": 0.0009963703255587558, "loss": 0.1737, "step": 51996 }, { "epoch": 0.09219675188988448, "grad_norm": 0.546875, "learning_rate": 0.0009963112030074723, "loss": 0.2306, "step": 51998 }, { "epoch": 0.0922002980551943, "grad_norm": 0.76171875, "learning_rate": 0.0009962520809096633, "loss": 0.1734, "step": 52000 }, { "epoch": 0.09220384422050411, "grad_norm": 0.404296875, "learning_rate": 0.000996192959265588, "loss": 0.1826, "step": 52002 }, { "epoch": 0.09220739038581392, "grad_norm": 0.1982421875, "learning_rate": 0.000996133838075504, "loss": 0.2056, "step": 52004 }, { "epoch": 0.09221093655112374, "grad_norm": 0.361328125, "learning_rate": 0.00099607471733967, "loss": 0.1901, "step": 52006 }, { "epoch": 0.09221448271643355, "grad_norm": 0.59375, "learning_rate": 0.000996015597058345, "loss": 0.1477, "step": 52008 }, { "epoch": 0.09221802888174337, "grad_norm": 0.314453125, "learning_rate": 0.000995956477231788, "loss": 0.3976, "step": 52010 }, { "epoch": 0.09222157504705318, "grad_norm": 1.015625, "learning_rate": 0.0009958973578602561, "loss": 0.2034, "step": 52012 }, { "epoch": 0.092225121212363, "grad_norm": 0.734375, "learning_rate": 0.0009958382389440093, "loss": 0.1786, "step": 52014 }, { "epoch": 0.09222866737767281, "grad_norm": 1.0625, "learning_rate": 0.000995779120483305, "loss": 0.2168, "step": 52016 }, { "epoch": 0.09223221354298262, "grad_norm": 0.291015625, "learning_rate": 0.0009957200024784028, "loss": 0.1742, "step": 52018 }, { "epoch": 0.09223575970829244, "grad_norm": 0.25390625, "learning_rate": 0.0009956608849295606, "loss": 0.1723, "step": 52020 }, { "epoch": 0.09223930587360225, "grad_norm": 0.66015625, "learning_rate": 0.0009956017678370368, "loss": 0.1655, "step": 52022 }, { "epoch": 0.09224285203891207, "grad_norm": 1.515625, "learning_rate": 0.00099554265120109, "loss": 0.2923, "step": 52024 }, { "epoch": 0.09224639820422188, "grad_norm": 0.80078125, "learning_rate": 0.0009954835350219798, "loss": 0.1942, "step": 52026 }, { "epoch": 0.0922499443695317, "grad_norm": 0.453125, "learning_rate": 0.0009954244192999632, "loss": 0.1964, "step": 52028 }, { "epoch": 0.09225349053484151, "grad_norm": 0.45703125, "learning_rate": 0.0009953653040352998, "loss": 0.2141, "step": 52030 }, { "epoch": 0.09225703670015133, "grad_norm": 0.435546875, "learning_rate": 0.0009953061892282477, "loss": 0.2524, "step": 52032 }, { "epoch": 0.09226058286546114, "grad_norm": 0.671875, "learning_rate": 0.0009952470748790655, "loss": 0.1621, "step": 52034 }, { "epoch": 0.09226412903077097, "grad_norm": 0.294921875, "learning_rate": 0.0009951879609880118, "loss": 0.1638, "step": 52036 }, { "epoch": 0.09226767519608078, "grad_norm": 0.2138671875, "learning_rate": 0.000995128847555345, "loss": 0.1925, "step": 52038 }, { "epoch": 0.0922712213613906, "grad_norm": 0.26953125, "learning_rate": 0.0009950697345813236, "loss": 0.1985, "step": 52040 }, { "epoch": 0.09227476752670041, "grad_norm": 0.330078125, "learning_rate": 0.0009950106220662062, "loss": 0.2198, "step": 52042 }, { "epoch": 0.09227831369201023, "grad_norm": 0.283203125, "learning_rate": 0.0009949515100102517, "loss": 0.191, "step": 52044 }, { "epoch": 0.09228185985732004, "grad_norm": 0.5703125, "learning_rate": 0.0009948923984137179, "loss": 0.1788, "step": 52046 }, { "epoch": 0.09228540602262986, "grad_norm": 0.369140625, "learning_rate": 0.0009948332872768638, "loss": 0.1559, "step": 52048 }, { "epoch": 0.09228895218793967, "grad_norm": 1.953125, "learning_rate": 0.0009947741765999481, "loss": 0.2679, "step": 52050 }, { "epoch": 0.09229249835324949, "grad_norm": 0.765625, "learning_rate": 0.000994715066383229, "loss": 0.2374, "step": 52052 }, { "epoch": 0.0922960445185593, "grad_norm": 0.55078125, "learning_rate": 0.0009946559566269648, "loss": 0.2015, "step": 52054 }, { "epoch": 0.09229959068386911, "grad_norm": 0.3359375, "learning_rate": 0.0009945968473314142, "loss": 0.1278, "step": 52056 }, { "epoch": 0.09230313684917893, "grad_norm": 0.89453125, "learning_rate": 0.000994537738496836, "loss": 0.4042, "step": 52058 }, { "epoch": 0.09230668301448874, "grad_norm": 0.42578125, "learning_rate": 0.0009944786301234886, "loss": 0.1904, "step": 52060 }, { "epoch": 0.09231022917979856, "grad_norm": 0.271484375, "learning_rate": 0.00099441952221163, "loss": 0.1141, "step": 52062 }, { "epoch": 0.09231377534510837, "grad_norm": 0.447265625, "learning_rate": 0.000994360414761519, "loss": 0.3083, "step": 52064 }, { "epoch": 0.09231732151041819, "grad_norm": 0.87890625, "learning_rate": 0.0009943013077734141, "loss": 0.1871, "step": 52066 }, { "epoch": 0.092320867675728, "grad_norm": 0.384765625, "learning_rate": 0.0009942422012475746, "loss": 0.1673, "step": 52068 }, { "epoch": 0.09232441384103782, "grad_norm": 0.4375, "learning_rate": 0.0009941830951842577, "loss": 0.1752, "step": 52070 }, { "epoch": 0.09232796000634763, "grad_norm": 0.38671875, "learning_rate": 0.0009941239895837229, "loss": 0.1728, "step": 52072 }, { "epoch": 0.09233150617165745, "grad_norm": 0.21484375, "learning_rate": 0.0009940648844462276, "loss": 0.1382, "step": 52074 }, { "epoch": 0.09233505233696726, "grad_norm": 0.59375, "learning_rate": 0.000994005779772032, "loss": 0.1751, "step": 52076 }, { "epoch": 0.09233859850227707, "grad_norm": 0.453125, "learning_rate": 0.0009939466755613925, "loss": 0.2739, "step": 52078 }, { "epoch": 0.0923421446675869, "grad_norm": 0.51171875, "learning_rate": 0.000993887571814569, "loss": 0.187, "step": 52080 }, { "epoch": 0.09234569083289672, "grad_norm": 1.0703125, "learning_rate": 0.0009938284685318197, "loss": 0.4441, "step": 52082 }, { "epoch": 0.09234923699820653, "grad_norm": 0.205078125, "learning_rate": 0.0009937693657134032, "loss": 0.2378, "step": 52084 }, { "epoch": 0.09235278316351635, "grad_norm": 0.267578125, "learning_rate": 0.0009937102633595775, "loss": 0.562, "step": 52086 }, { "epoch": 0.09235632932882616, "grad_norm": 0.56640625, "learning_rate": 0.0009936511614706013, "loss": 0.2189, "step": 52088 }, { "epoch": 0.09235987549413598, "grad_norm": 0.5234375, "learning_rate": 0.0009935920600467334, "loss": 0.2712, "step": 52090 }, { "epoch": 0.09236342165944579, "grad_norm": 0.41015625, "learning_rate": 0.000993532959088232, "loss": 0.1727, "step": 52092 }, { "epoch": 0.0923669678247556, "grad_norm": 0.2412109375, "learning_rate": 0.0009934738585953555, "loss": 0.2381, "step": 52094 }, { "epoch": 0.09237051399006542, "grad_norm": 0.93359375, "learning_rate": 0.0009934147585683624, "loss": 0.157, "step": 52096 }, { "epoch": 0.09237406015537523, "grad_norm": 0.96484375, "learning_rate": 0.0009933556590075113, "loss": 0.1825, "step": 52098 }, { "epoch": 0.09237760632068505, "grad_norm": 0.314453125, "learning_rate": 0.0009932965599130609, "loss": 0.2057, "step": 52100 }, { "epoch": 0.09238115248599486, "grad_norm": 0.2294921875, "learning_rate": 0.0009932374612852688, "loss": 0.1345, "step": 52102 }, { "epoch": 0.09238469865130468, "grad_norm": 0.4375, "learning_rate": 0.0009931783631243942, "loss": 0.1423, "step": 52104 }, { "epoch": 0.09238824481661449, "grad_norm": 0.2265625, "learning_rate": 0.0009931192654306954, "loss": 0.1295, "step": 52106 }, { "epoch": 0.09239179098192431, "grad_norm": 0.376953125, "learning_rate": 0.000993060168204431, "loss": 0.1879, "step": 52108 }, { "epoch": 0.09239533714723412, "grad_norm": 0.515625, "learning_rate": 0.0009930010714458595, "loss": 0.2385, "step": 52110 }, { "epoch": 0.09239888331254394, "grad_norm": 0.98828125, "learning_rate": 0.000992941975155239, "loss": 0.2849, "step": 52112 }, { "epoch": 0.09240242947785375, "grad_norm": 1.2890625, "learning_rate": 0.0009928828793328277, "loss": 0.2499, "step": 52114 }, { "epoch": 0.09240597564316357, "grad_norm": 0.76171875, "learning_rate": 0.0009928237839788847, "loss": 0.146, "step": 52116 }, { "epoch": 0.09240952180847338, "grad_norm": 0.470703125, "learning_rate": 0.0009927646890936686, "loss": 0.1744, "step": 52118 }, { "epoch": 0.0924130679737832, "grad_norm": 0.78125, "learning_rate": 0.000992705594677437, "loss": 0.3022, "step": 52120 }, { "epoch": 0.09241661413909301, "grad_norm": 0.66015625, "learning_rate": 0.0009926465007304489, "loss": 0.1226, "step": 52122 }, { "epoch": 0.09242016030440282, "grad_norm": 0.263671875, "learning_rate": 0.0009925874072529628, "loss": 0.3212, "step": 52124 }, { "epoch": 0.09242370646971265, "grad_norm": 0.46875, "learning_rate": 0.0009925283142452367, "loss": 0.159, "step": 52126 }, { "epoch": 0.09242725263502247, "grad_norm": 0.416015625, "learning_rate": 0.0009924692217075297, "loss": 0.1915, "step": 52128 }, { "epoch": 0.09243079880033228, "grad_norm": 0.17578125, "learning_rate": 0.0009924101296400999, "loss": 0.1173, "step": 52130 }, { "epoch": 0.0924343449656421, "grad_norm": 0.3828125, "learning_rate": 0.0009923510380432051, "loss": 0.1887, "step": 52132 }, { "epoch": 0.09243789113095191, "grad_norm": 0.435546875, "learning_rate": 0.000992291946917105, "loss": 0.1729, "step": 52134 }, { "epoch": 0.09244143729626172, "grad_norm": 0.5078125, "learning_rate": 0.0009922328562620571, "loss": 0.1491, "step": 52136 }, { "epoch": 0.09244498346157154, "grad_norm": 0.34375, "learning_rate": 0.00099217376607832, "loss": 0.2041, "step": 52138 }, { "epoch": 0.09244852962688135, "grad_norm": 3.6875, "learning_rate": 0.000992114676366152, "loss": 0.3109, "step": 52140 }, { "epoch": 0.09245207579219117, "grad_norm": 0.53515625, "learning_rate": 0.0009920555871258123, "loss": 0.1675, "step": 52142 }, { "epoch": 0.09245562195750098, "grad_norm": 0.4140625, "learning_rate": 0.0009919964983575585, "loss": 0.1925, "step": 52144 }, { "epoch": 0.0924591681228108, "grad_norm": 0.79296875, "learning_rate": 0.0009919374100616492, "loss": 0.2552, "step": 52146 }, { "epoch": 0.09246271428812061, "grad_norm": 0.201171875, "learning_rate": 0.0009918783222383426, "loss": 0.2043, "step": 52148 }, { "epoch": 0.09246626045343043, "grad_norm": 1.71875, "learning_rate": 0.0009918192348878983, "loss": 0.4878, "step": 52150 }, { "epoch": 0.09246980661874024, "grad_norm": 0.91796875, "learning_rate": 0.0009917601480105731, "loss": 0.1982, "step": 52152 }, { "epoch": 0.09247335278405006, "grad_norm": 0.5390625, "learning_rate": 0.0009917010616066263, "loss": 0.1846, "step": 52154 }, { "epoch": 0.09247689894935987, "grad_norm": 0.267578125, "learning_rate": 0.000991641975676316, "loss": 0.1777, "step": 52156 }, { "epoch": 0.09248044511466968, "grad_norm": 0.41015625, "learning_rate": 0.000991582890219901, "loss": 0.2153, "step": 52158 }, { "epoch": 0.0924839912799795, "grad_norm": 0.28125, "learning_rate": 0.0009915238052376394, "loss": 0.2313, "step": 52160 }, { "epoch": 0.09248753744528931, "grad_norm": 0.58984375, "learning_rate": 0.0009914647207297897, "loss": 0.2048, "step": 52162 }, { "epoch": 0.09249108361059913, "grad_norm": 0.423828125, "learning_rate": 0.0009914056366966103, "loss": 0.2495, "step": 52164 }, { "epoch": 0.09249462977590894, "grad_norm": 0.77734375, "learning_rate": 0.0009913465531383597, "loss": 0.2387, "step": 52166 }, { "epoch": 0.09249817594121876, "grad_norm": 0.294921875, "learning_rate": 0.000991287470055296, "loss": 0.1867, "step": 52168 }, { "epoch": 0.09250172210652857, "grad_norm": 1.015625, "learning_rate": 0.0009912283874476775, "loss": 0.2293, "step": 52170 }, { "epoch": 0.0925052682718384, "grad_norm": 0.9765625, "learning_rate": 0.000991169305315763, "loss": 0.177, "step": 52172 }, { "epoch": 0.09250881443714822, "grad_norm": 0.419921875, "learning_rate": 0.000991110223659811, "loss": 0.1552, "step": 52174 }, { "epoch": 0.09251236060245803, "grad_norm": 0.259765625, "learning_rate": 0.0009910511424800793, "loss": 0.1282, "step": 52176 }, { "epoch": 0.09251590676776784, "grad_norm": 0.515625, "learning_rate": 0.000990992061776827, "loss": 0.2134, "step": 52178 }, { "epoch": 0.09251945293307766, "grad_norm": 0.21875, "learning_rate": 0.0009909329815503117, "loss": 0.1687, "step": 52180 }, { "epoch": 0.09252299909838747, "grad_norm": 0.51953125, "learning_rate": 0.0009908739018007924, "loss": 0.1684, "step": 52182 }, { "epoch": 0.09252654526369729, "grad_norm": 0.466796875, "learning_rate": 0.0009908148225285273, "loss": 0.1756, "step": 52184 }, { "epoch": 0.0925300914290071, "grad_norm": 0.578125, "learning_rate": 0.0009907557437337747, "loss": 0.1206, "step": 52186 }, { "epoch": 0.09253363759431692, "grad_norm": 0.375, "learning_rate": 0.0009906966654167927, "loss": 0.1642, "step": 52188 }, { "epoch": 0.09253718375962673, "grad_norm": 0.330078125, "learning_rate": 0.0009906375875778407, "loss": 0.1406, "step": 52190 }, { "epoch": 0.09254072992493655, "grad_norm": 0.30078125, "learning_rate": 0.0009905785102171758, "loss": 0.1414, "step": 52192 }, { "epoch": 0.09254427609024636, "grad_norm": 1.265625, "learning_rate": 0.0009905194333350569, "loss": 0.292, "step": 52194 }, { "epoch": 0.09254782225555618, "grad_norm": 0.5625, "learning_rate": 0.0009904603569317425, "loss": 0.17, "step": 52196 }, { "epoch": 0.09255136842086599, "grad_norm": 1.078125, "learning_rate": 0.0009904012810074908, "loss": 0.2628, "step": 52198 }, { "epoch": 0.0925549145861758, "grad_norm": 1.5078125, "learning_rate": 0.0009903422055625606, "loss": 0.2465, "step": 52200 }, { "epoch": 0.09255846075148562, "grad_norm": 0.16796875, "learning_rate": 0.0009902831305972097, "loss": 0.1408, "step": 52202 }, { "epoch": 0.09256200691679543, "grad_norm": 0.96484375, "learning_rate": 0.0009902240561116967, "loss": 0.3531, "step": 52204 }, { "epoch": 0.09256555308210525, "grad_norm": 0.5, "learning_rate": 0.0009901649821062797, "loss": 0.1755, "step": 52206 }, { "epoch": 0.09256909924741506, "grad_norm": 0.43359375, "learning_rate": 0.0009901059085812177, "loss": 0.2742, "step": 52208 }, { "epoch": 0.09257264541272488, "grad_norm": 0.47265625, "learning_rate": 0.0009900468355367683, "loss": 0.1695, "step": 52210 }, { "epoch": 0.09257619157803469, "grad_norm": 0.515625, "learning_rate": 0.00098998776297319, "loss": 0.2334, "step": 52212 }, { "epoch": 0.0925797377433445, "grad_norm": 0.4765625, "learning_rate": 0.0009899286908907415, "loss": 0.1824, "step": 52214 }, { "epoch": 0.09258328390865433, "grad_norm": 0.43359375, "learning_rate": 0.0009898696192896812, "loss": 0.1616, "step": 52216 }, { "epoch": 0.09258683007396415, "grad_norm": 0.2373046875, "learning_rate": 0.000989810548170267, "loss": 0.3733, "step": 52218 }, { "epoch": 0.09259037623927396, "grad_norm": 0.8203125, "learning_rate": 0.0009897514775327573, "loss": 0.2259, "step": 52220 }, { "epoch": 0.09259392240458378, "grad_norm": 0.279296875, "learning_rate": 0.0009896924073774108, "loss": 0.1571, "step": 52222 }, { "epoch": 0.09259746856989359, "grad_norm": 0.3984375, "learning_rate": 0.0009896333377044859, "loss": 0.1524, "step": 52224 }, { "epoch": 0.09260101473520341, "grad_norm": 0.376953125, "learning_rate": 0.0009895742685142404, "loss": 0.1613, "step": 52226 }, { "epoch": 0.09260456090051322, "grad_norm": 0.392578125, "learning_rate": 0.0009895151998069328, "loss": 0.1893, "step": 52228 }, { "epoch": 0.09260810706582304, "grad_norm": 0.34375, "learning_rate": 0.0009894561315828213, "loss": 0.2052, "step": 52230 }, { "epoch": 0.09261165323113285, "grad_norm": 0.6953125, "learning_rate": 0.0009893970638421652, "loss": 0.1618, "step": 52232 }, { "epoch": 0.09261519939644267, "grad_norm": 0.25, "learning_rate": 0.0009893379965852215, "loss": 0.1783, "step": 52234 }, { "epoch": 0.09261874556175248, "grad_norm": 0.423828125, "learning_rate": 0.000989278929812249, "loss": 0.2212, "step": 52236 }, { "epoch": 0.0926222917270623, "grad_norm": 0.44140625, "learning_rate": 0.0009892198635235068, "loss": 0.1162, "step": 52238 }, { "epoch": 0.09262583789237211, "grad_norm": 0.216796875, "learning_rate": 0.0009891607977192522, "loss": 0.1588, "step": 52240 }, { "epoch": 0.09262938405768192, "grad_norm": 0.66015625, "learning_rate": 0.000989101732399744, "loss": 0.1424, "step": 52242 }, { "epoch": 0.09263293022299174, "grad_norm": 0.6328125, "learning_rate": 0.0009890426675652404, "loss": 0.1606, "step": 52244 }, { "epoch": 0.09263647638830155, "grad_norm": 0.23046875, "learning_rate": 0.0009889836032159994, "loss": 0.1611, "step": 52246 }, { "epoch": 0.09264002255361137, "grad_norm": 0.482421875, "learning_rate": 0.0009889245393522803, "loss": 0.1591, "step": 52248 }, { "epoch": 0.09264356871892118, "grad_norm": 0.51953125, "learning_rate": 0.00098886547597434, "loss": 0.1725, "step": 52250 }, { "epoch": 0.092647114884231, "grad_norm": 0.279296875, "learning_rate": 0.0009888064130824379, "loss": 0.1569, "step": 52252 }, { "epoch": 0.09265066104954081, "grad_norm": 0.59375, "learning_rate": 0.0009887473506768319, "loss": 0.1718, "step": 52254 }, { "epoch": 0.09265420721485063, "grad_norm": 0.25, "learning_rate": 0.0009886882887577806, "loss": 0.1965, "step": 52256 }, { "epoch": 0.09265775338016044, "grad_norm": 0.498046875, "learning_rate": 0.0009886292273255416, "loss": 0.2126, "step": 52258 }, { "epoch": 0.09266129954547025, "grad_norm": 0.54296875, "learning_rate": 0.0009885701663803741, "loss": 0.165, "step": 52260 }, { "epoch": 0.09266484571078008, "grad_norm": 0.6171875, "learning_rate": 0.0009885111059225358, "loss": 0.2166, "step": 52262 }, { "epoch": 0.0926683918760899, "grad_norm": 0.494140625, "learning_rate": 0.0009884520459522855, "loss": 0.1773, "step": 52264 }, { "epoch": 0.09267193804139971, "grad_norm": 3.625, "learning_rate": 0.0009883929864698807, "loss": 0.2466, "step": 52266 }, { "epoch": 0.09267548420670953, "grad_norm": 0.259765625, "learning_rate": 0.00098833392747558, "loss": 0.1772, "step": 52268 }, { "epoch": 0.09267903037201934, "grad_norm": 0.376953125, "learning_rate": 0.000988274868969642, "loss": 0.2104, "step": 52270 }, { "epoch": 0.09268257653732916, "grad_norm": 0.41796875, "learning_rate": 0.0009882158109523252, "loss": 0.1953, "step": 52272 }, { "epoch": 0.09268612270263897, "grad_norm": 0.361328125, "learning_rate": 0.000988156753423887, "loss": 0.2231, "step": 52274 }, { "epoch": 0.09268966886794879, "grad_norm": 0.68359375, "learning_rate": 0.0009880976963845868, "loss": 0.1457, "step": 52276 }, { "epoch": 0.0926932150332586, "grad_norm": 1.5, "learning_rate": 0.0009880386398346817, "loss": 0.353, "step": 52278 }, { "epoch": 0.09269676119856841, "grad_norm": 2.296875, "learning_rate": 0.0009879795837744304, "loss": 0.225, "step": 52280 }, { "epoch": 0.09270030736387823, "grad_norm": 1.8828125, "learning_rate": 0.0009879205282040922, "loss": 0.2083, "step": 52282 }, { "epoch": 0.09270385352918804, "grad_norm": 0.76953125, "learning_rate": 0.000987861473123924, "loss": 0.1544, "step": 52284 }, { "epoch": 0.09270739969449786, "grad_norm": 0.2734375, "learning_rate": 0.0009878024185341844, "loss": 0.1494, "step": 52286 }, { "epoch": 0.09271094585980767, "grad_norm": 0.43359375, "learning_rate": 0.000987743364435132, "loss": 0.1501, "step": 52288 }, { "epoch": 0.09271449202511749, "grad_norm": 0.83984375, "learning_rate": 0.0009876843108270253, "loss": 0.1518, "step": 52290 }, { "epoch": 0.0927180381904273, "grad_norm": 0.78125, "learning_rate": 0.0009876252577101215, "loss": 0.2016, "step": 52292 }, { "epoch": 0.09272158435573712, "grad_norm": 0.3984375, "learning_rate": 0.0009875662050846802, "loss": 0.1821, "step": 52294 }, { "epoch": 0.09272513052104693, "grad_norm": 0.83984375, "learning_rate": 0.0009875071529509584, "loss": 0.1921, "step": 52296 }, { "epoch": 0.09272867668635675, "grad_norm": 0.365234375, "learning_rate": 0.0009874481013092155, "loss": 0.1818, "step": 52298 }, { "epoch": 0.09273222285166656, "grad_norm": 0.419921875, "learning_rate": 0.0009873890501597088, "loss": 0.1763, "step": 52300 }, { "epoch": 0.09273576901697637, "grad_norm": 0.259765625, "learning_rate": 0.0009873299995026973, "loss": 0.3224, "step": 52302 }, { "epoch": 0.09273931518228619, "grad_norm": 1.3515625, "learning_rate": 0.0009872709493384385, "loss": 0.2235, "step": 52304 }, { "epoch": 0.092742861347596, "grad_norm": 0.30859375, "learning_rate": 0.0009872118996671916, "loss": 0.1635, "step": 52306 }, { "epoch": 0.09274640751290583, "grad_norm": 0.4375, "learning_rate": 0.0009871528504892142, "loss": 0.1206, "step": 52308 }, { "epoch": 0.09274995367821565, "grad_norm": 0.333984375, "learning_rate": 0.0009870938018047644, "loss": 0.3376, "step": 52310 }, { "epoch": 0.09275349984352546, "grad_norm": 0.34765625, "learning_rate": 0.0009870347536141012, "loss": 0.1977, "step": 52312 }, { "epoch": 0.09275704600883528, "grad_norm": 0.28515625, "learning_rate": 0.000986975705917482, "loss": 0.1541, "step": 52314 }, { "epoch": 0.09276059217414509, "grad_norm": 0.15625, "learning_rate": 0.0009869166587151658, "loss": 0.1345, "step": 52316 }, { "epoch": 0.0927641383394549, "grad_norm": 1.0625, "learning_rate": 0.00098685761200741, "loss": 0.2529, "step": 52318 }, { "epoch": 0.09276768450476472, "grad_norm": 0.1953125, "learning_rate": 0.0009867985657944732, "loss": 0.153, "step": 52320 }, { "epoch": 0.09277123067007453, "grad_norm": 0.44140625, "learning_rate": 0.0009867395200766143, "loss": 0.187, "step": 52322 }, { "epoch": 0.09277477683538435, "grad_norm": 0.419921875, "learning_rate": 0.0009866804748540905, "loss": 0.1884, "step": 52324 }, { "epoch": 0.09277832300069416, "grad_norm": 0.57421875, "learning_rate": 0.0009866214301271605, "loss": 0.1748, "step": 52326 }, { "epoch": 0.09278186916600398, "grad_norm": 1.53125, "learning_rate": 0.0009865623858960823, "loss": 0.2102, "step": 52328 }, { "epoch": 0.09278541533131379, "grad_norm": 0.166015625, "learning_rate": 0.000986503342161115, "loss": 0.1922, "step": 52330 }, { "epoch": 0.0927889614966236, "grad_norm": 0.20703125, "learning_rate": 0.0009864442989225158, "loss": 0.2186, "step": 52332 }, { "epoch": 0.09279250766193342, "grad_norm": 0.439453125, "learning_rate": 0.0009863852561805434, "loss": 0.1868, "step": 52334 }, { "epoch": 0.09279605382724324, "grad_norm": 0.47265625, "learning_rate": 0.0009863262139354553, "loss": 0.1936, "step": 52336 }, { "epoch": 0.09279959999255305, "grad_norm": 0.91015625, "learning_rate": 0.0009862671721875113, "loss": 0.2524, "step": 52338 }, { "epoch": 0.09280314615786286, "grad_norm": 0.248046875, "learning_rate": 0.000986208130936968, "loss": 0.185, "step": 52340 }, { "epoch": 0.09280669232317268, "grad_norm": 0.453125, "learning_rate": 0.0009861490901840841, "loss": 0.1863, "step": 52342 }, { "epoch": 0.0928102384884825, "grad_norm": 0.6484375, "learning_rate": 0.0009860900499291181, "loss": 0.2205, "step": 52344 }, { "epoch": 0.09281378465379231, "grad_norm": 1.328125, "learning_rate": 0.0009860310101723285, "loss": 0.1704, "step": 52346 }, { "epoch": 0.09281733081910212, "grad_norm": 0.859375, "learning_rate": 0.0009859719709139725, "loss": 0.3387, "step": 52348 }, { "epoch": 0.09282087698441194, "grad_norm": 0.7578125, "learning_rate": 0.0009859129321543093, "loss": 0.1602, "step": 52350 }, { "epoch": 0.09282442314972177, "grad_norm": 0.51953125, "learning_rate": 0.0009858538938935962, "loss": 0.2155, "step": 52352 }, { "epoch": 0.09282796931503158, "grad_norm": 0.322265625, "learning_rate": 0.0009857948561320924, "loss": 0.1719, "step": 52354 }, { "epoch": 0.0928315154803414, "grad_norm": 0.2314453125, "learning_rate": 0.000985735818870055, "loss": 0.1497, "step": 52356 }, { "epoch": 0.09283506164565121, "grad_norm": 0.58984375, "learning_rate": 0.000985676782107743, "loss": 0.1513, "step": 52358 }, { "epoch": 0.09283860781096102, "grad_norm": 0.4296875, "learning_rate": 0.0009856177458454143, "loss": 0.2664, "step": 52360 }, { "epoch": 0.09284215397627084, "grad_norm": 0.61328125, "learning_rate": 0.0009855587100833275, "loss": 0.1654, "step": 52362 }, { "epoch": 0.09284570014158065, "grad_norm": 0.177734375, "learning_rate": 0.0009854996748217399, "loss": 0.1725, "step": 52364 }, { "epoch": 0.09284924630689047, "grad_norm": 0.57421875, "learning_rate": 0.0009854406400609105, "loss": 0.1434, "step": 52366 }, { "epoch": 0.09285279247220028, "grad_norm": 1.546875, "learning_rate": 0.0009853816058010972, "loss": 0.2312, "step": 52368 }, { "epoch": 0.0928563386375101, "grad_norm": 6.65625, "learning_rate": 0.000985322572042558, "loss": 0.4198, "step": 52370 }, { "epoch": 0.09285988480281991, "grad_norm": 1.2265625, "learning_rate": 0.0009852635387855514, "loss": 0.1772, "step": 52372 }, { "epoch": 0.09286343096812973, "grad_norm": 0.369140625, "learning_rate": 0.0009852045060303354, "loss": 0.1742, "step": 52374 }, { "epoch": 0.09286697713343954, "grad_norm": 0.341796875, "learning_rate": 0.0009851454737771679, "loss": 0.1874, "step": 52376 }, { "epoch": 0.09287052329874936, "grad_norm": 0.80859375, "learning_rate": 0.0009850864420263077, "loss": 0.4938, "step": 52378 }, { "epoch": 0.09287406946405917, "grad_norm": 0.244140625, "learning_rate": 0.0009850274107780128, "loss": 0.2045, "step": 52380 }, { "epoch": 0.09287761562936898, "grad_norm": 0.400390625, "learning_rate": 0.0009849683800325412, "loss": 0.187, "step": 52382 }, { "epoch": 0.0928811617946788, "grad_norm": 0.474609375, "learning_rate": 0.0009849093497901506, "loss": 0.1733, "step": 52384 }, { "epoch": 0.09288470795998861, "grad_norm": 1.09375, "learning_rate": 0.0009848503200511, "loss": 0.2309, "step": 52386 }, { "epoch": 0.09288825412529843, "grad_norm": 0.4140625, "learning_rate": 0.0009847912908156477, "loss": 0.1916, "step": 52388 }, { "epoch": 0.09289180029060824, "grad_norm": 0.3046875, "learning_rate": 0.000984732262084051, "loss": 0.1555, "step": 52390 }, { "epoch": 0.09289534645591806, "grad_norm": 0.60546875, "learning_rate": 0.0009846732338565677, "loss": 0.1654, "step": 52392 }, { "epoch": 0.09289889262122787, "grad_norm": 0.294921875, "learning_rate": 0.0009846142061334575, "loss": 0.1806, "step": 52394 }, { "epoch": 0.09290243878653769, "grad_norm": 0.72265625, "learning_rate": 0.000984555178914978, "loss": 0.175, "step": 52396 }, { "epoch": 0.09290598495184751, "grad_norm": 2.3125, "learning_rate": 0.0009844961522013861, "loss": 0.2626, "step": 52398 }, { "epoch": 0.09290953111715733, "grad_norm": 0.474609375, "learning_rate": 0.0009844371259929415, "loss": 0.2194, "step": 52400 }, { "epoch": 0.09291307728246714, "grad_norm": 0.56640625, "learning_rate": 0.0009843781002899018, "loss": 0.1479, "step": 52402 }, { "epoch": 0.09291662344777696, "grad_norm": 0.3046875, "learning_rate": 0.0009843190750925253, "loss": 0.16, "step": 52404 }, { "epoch": 0.09292016961308677, "grad_norm": 0.48828125, "learning_rate": 0.0009842600504010694, "loss": 0.3035, "step": 52406 }, { "epoch": 0.09292371577839659, "grad_norm": 0.33984375, "learning_rate": 0.0009842010262157935, "loss": 0.1851, "step": 52408 }, { "epoch": 0.0929272619437064, "grad_norm": 0.65625, "learning_rate": 0.0009841420025369545, "loss": 0.1495, "step": 52410 }, { "epoch": 0.09293080810901622, "grad_norm": 0.37109375, "learning_rate": 0.0009840829793648115, "loss": 0.2819, "step": 52412 }, { "epoch": 0.09293435427432603, "grad_norm": 0.275390625, "learning_rate": 0.0009840239566996222, "loss": 0.1532, "step": 52414 }, { "epoch": 0.09293790043963585, "grad_norm": 0.369140625, "learning_rate": 0.000983964934541644, "loss": 0.1765, "step": 52416 }, { "epoch": 0.09294144660494566, "grad_norm": 0.26953125, "learning_rate": 0.0009839059128911364, "loss": 0.1872, "step": 52418 }, { "epoch": 0.09294499277025547, "grad_norm": 0.53125, "learning_rate": 0.000983846891748357, "loss": 0.186, "step": 52420 }, { "epoch": 0.09294853893556529, "grad_norm": 0.25, "learning_rate": 0.0009837878711135635, "loss": 0.1656, "step": 52422 }, { "epoch": 0.0929520851008751, "grad_norm": 0.9140625, "learning_rate": 0.0009837288509870145, "loss": 0.1867, "step": 52424 }, { "epoch": 0.09295563126618492, "grad_norm": 1.828125, "learning_rate": 0.000983669831368968, "loss": 0.2137, "step": 52426 }, { "epoch": 0.09295917743149473, "grad_norm": 0.3515625, "learning_rate": 0.0009836108122596823, "loss": 0.1675, "step": 52428 }, { "epoch": 0.09296272359680455, "grad_norm": 0.435546875, "learning_rate": 0.000983551793659415, "loss": 0.1941, "step": 52430 }, { "epoch": 0.09296626976211436, "grad_norm": 0.25390625, "learning_rate": 0.0009834927755684247, "loss": 0.2072, "step": 52432 }, { "epoch": 0.09296981592742418, "grad_norm": 0.1796875, "learning_rate": 0.0009834337579869687, "loss": 0.1318, "step": 52434 }, { "epoch": 0.09297336209273399, "grad_norm": 0.66015625, "learning_rate": 0.0009833747409153067, "loss": 0.2001, "step": 52436 }, { "epoch": 0.0929769082580438, "grad_norm": 0.271484375, "learning_rate": 0.0009833157243536953, "loss": 0.1516, "step": 52438 }, { "epoch": 0.09298045442335362, "grad_norm": 0.55078125, "learning_rate": 0.0009832567083023931, "loss": 0.1745, "step": 52440 }, { "epoch": 0.09298400058866343, "grad_norm": 0.41015625, "learning_rate": 0.0009831976927616585, "loss": 0.2117, "step": 52442 }, { "epoch": 0.09298754675397326, "grad_norm": 0.47265625, "learning_rate": 0.0009831386777317493, "loss": 0.151, "step": 52444 }, { "epoch": 0.09299109291928308, "grad_norm": 0.26953125, "learning_rate": 0.0009830796632129236, "loss": 0.2277, "step": 52446 }, { "epoch": 0.09299463908459289, "grad_norm": 0.2578125, "learning_rate": 0.0009830206492054392, "loss": 0.1384, "step": 52448 }, { "epoch": 0.0929981852499027, "grad_norm": 0.318359375, "learning_rate": 0.0009829616357095547, "loss": 0.1853, "step": 52450 }, { "epoch": 0.09300173141521252, "grad_norm": 1.171875, "learning_rate": 0.000982902622725528, "loss": 0.1879, "step": 52452 }, { "epoch": 0.09300527758052234, "grad_norm": 0.490234375, "learning_rate": 0.0009828436102536176, "loss": 0.2376, "step": 52454 }, { "epoch": 0.09300882374583215, "grad_norm": 0.365234375, "learning_rate": 0.000982784598294081, "loss": 0.2186, "step": 52456 }, { "epoch": 0.09301236991114196, "grad_norm": 0.43359375, "learning_rate": 0.0009827255868471764, "loss": 0.2338, "step": 52458 }, { "epoch": 0.09301591607645178, "grad_norm": 0.609375, "learning_rate": 0.0009826665759131619, "loss": 0.1951, "step": 52460 }, { "epoch": 0.0930194622417616, "grad_norm": 0.18359375, "learning_rate": 0.0009826075654922957, "loss": 0.1424, "step": 52462 }, { "epoch": 0.09302300840707141, "grad_norm": 3.640625, "learning_rate": 0.000982548555584836, "loss": 0.2951, "step": 52464 }, { "epoch": 0.09302655457238122, "grad_norm": 0.220703125, "learning_rate": 0.0009824895461910403, "loss": 0.182, "step": 52466 }, { "epoch": 0.09303010073769104, "grad_norm": 0.287109375, "learning_rate": 0.0009824305373111672, "loss": 0.1879, "step": 52468 }, { "epoch": 0.09303364690300085, "grad_norm": 0.71484375, "learning_rate": 0.0009823715289454752, "loss": 0.1874, "step": 52470 }, { "epoch": 0.09303719306831067, "grad_norm": 0.3515625, "learning_rate": 0.000982312521094221, "loss": 0.2003, "step": 52472 }, { "epoch": 0.09304073923362048, "grad_norm": 0.765625, "learning_rate": 0.0009822535137576638, "loss": 0.2379, "step": 52474 }, { "epoch": 0.0930442853989303, "grad_norm": 0.66015625, "learning_rate": 0.0009821945069360613, "loss": 0.2257, "step": 52476 }, { "epoch": 0.09304783156424011, "grad_norm": 1.2265625, "learning_rate": 0.000982135500629672, "loss": 0.2832, "step": 52478 }, { "epoch": 0.09305137772954993, "grad_norm": 0.486328125, "learning_rate": 0.000982076494838753, "loss": 0.1443, "step": 52480 }, { "epoch": 0.09305492389485974, "grad_norm": 0.279296875, "learning_rate": 0.0009820174895635633, "loss": 0.1995, "step": 52482 }, { "epoch": 0.09305847006016955, "grad_norm": 0.546875, "learning_rate": 0.00098195848480436, "loss": 0.2421, "step": 52484 }, { "epoch": 0.09306201622547937, "grad_norm": 0.57421875, "learning_rate": 0.0009818994805614026, "loss": 0.1485, "step": 52486 }, { "epoch": 0.0930655623907892, "grad_norm": 0.314453125, "learning_rate": 0.0009818404768349475, "loss": 0.1722, "step": 52488 }, { "epoch": 0.09306910855609901, "grad_norm": 0.2734375, "learning_rate": 0.0009817814736252537, "loss": 0.2624, "step": 52490 }, { "epoch": 0.09307265472140883, "grad_norm": 0.486328125, "learning_rate": 0.0009817224709325792, "loss": 0.2056, "step": 52492 }, { "epoch": 0.09307620088671864, "grad_norm": 0.5078125, "learning_rate": 0.0009816634687571822, "loss": 0.2615, "step": 52494 }, { "epoch": 0.09307974705202846, "grad_norm": 0.294921875, "learning_rate": 0.0009816044670993198, "loss": 0.1595, "step": 52496 }, { "epoch": 0.09308329321733827, "grad_norm": 0.3984375, "learning_rate": 0.0009815454659592511, "loss": 0.1692, "step": 52498 }, { "epoch": 0.09308683938264808, "grad_norm": 0.36328125, "learning_rate": 0.0009814864653372335, "loss": 0.2168, "step": 52500 }, { "epoch": 0.0930903855479579, "grad_norm": 0.5, "learning_rate": 0.0009814274652335255, "loss": 0.2205, "step": 52502 }, { "epoch": 0.09309393171326771, "grad_norm": 0.484375, "learning_rate": 0.000981368465648385, "loss": 0.1675, "step": 52504 }, { "epoch": 0.09309747787857753, "grad_norm": 0.51171875, "learning_rate": 0.0009813094665820695, "loss": 0.1266, "step": 52506 }, { "epoch": 0.09310102404388734, "grad_norm": 0.546875, "learning_rate": 0.0009812504680348376, "loss": 0.1782, "step": 52508 }, { "epoch": 0.09310457020919716, "grad_norm": 0.46875, "learning_rate": 0.0009811914700069476, "loss": 0.1684, "step": 52510 }, { "epoch": 0.09310811637450697, "grad_norm": 0.3203125, "learning_rate": 0.0009811324724986565, "loss": 0.2549, "step": 52512 }, { "epoch": 0.09311166253981679, "grad_norm": 0.609375, "learning_rate": 0.000981073475510223, "loss": 0.1912, "step": 52514 }, { "epoch": 0.0931152087051266, "grad_norm": 0.4296875, "learning_rate": 0.0009810144790419052, "loss": 0.1812, "step": 52516 }, { "epoch": 0.09311875487043642, "grad_norm": 0.56640625, "learning_rate": 0.000980955483093961, "loss": 0.2096, "step": 52518 }, { "epoch": 0.09312230103574623, "grad_norm": 0.90234375, "learning_rate": 0.0009808964876666484, "loss": 0.3059, "step": 52520 }, { "epoch": 0.09312584720105604, "grad_norm": 0.3125, "learning_rate": 0.0009808374927602252, "loss": 0.3122, "step": 52522 }, { "epoch": 0.09312939336636586, "grad_norm": 0.177734375, "learning_rate": 0.0009807784983749497, "loss": 0.1656, "step": 52524 }, { "epoch": 0.09313293953167567, "grad_norm": 3.8125, "learning_rate": 0.00098071950451108, "loss": 0.2198, "step": 52526 }, { "epoch": 0.09313648569698549, "grad_norm": 0.68359375, "learning_rate": 0.0009806605111688735, "loss": 0.2202, "step": 52528 }, { "epoch": 0.0931400318622953, "grad_norm": 0.45703125, "learning_rate": 0.0009806015183485887, "loss": 0.195, "step": 52530 }, { "epoch": 0.09314357802760512, "grad_norm": 0.2294921875, "learning_rate": 0.0009805425260504834, "loss": 0.1659, "step": 52532 }, { "epoch": 0.09314712419291495, "grad_norm": 0.337890625, "learning_rate": 0.0009804835342748159, "loss": 0.1449, "step": 52534 }, { "epoch": 0.09315067035822476, "grad_norm": 0.51953125, "learning_rate": 0.0009804245430218442, "loss": 0.2093, "step": 52536 }, { "epoch": 0.09315421652353457, "grad_norm": 0.88671875, "learning_rate": 0.000980365552291826, "loss": 0.1782, "step": 52538 }, { "epoch": 0.09315776268884439, "grad_norm": 1.125, "learning_rate": 0.0009803065620850192, "loss": 0.1435, "step": 52540 }, { "epoch": 0.0931613088541542, "grad_norm": 0.87890625, "learning_rate": 0.0009802475724016817, "loss": 0.1841, "step": 52542 }, { "epoch": 0.09316485501946402, "grad_norm": 0.46484375, "learning_rate": 0.0009801885832420726, "loss": 0.1579, "step": 52544 }, { "epoch": 0.09316840118477383, "grad_norm": 0.4296875, "learning_rate": 0.0009801295946064484, "loss": 0.2072, "step": 52546 }, { "epoch": 0.09317194735008365, "grad_norm": 0.2353515625, "learning_rate": 0.0009800706064950675, "loss": 0.1445, "step": 52548 }, { "epoch": 0.09317549351539346, "grad_norm": 0.26953125, "learning_rate": 0.0009800116189081884, "loss": 0.1745, "step": 52550 }, { "epoch": 0.09317903968070328, "grad_norm": 0.404296875, "learning_rate": 0.0009799526318460693, "loss": 0.2179, "step": 52552 }, { "epoch": 0.09318258584601309, "grad_norm": 0.31640625, "learning_rate": 0.0009798936453089668, "loss": 0.1325, "step": 52554 }, { "epoch": 0.0931861320113229, "grad_norm": 0.56640625, "learning_rate": 0.0009798346592971403, "loss": 0.2335, "step": 52556 }, { "epoch": 0.09318967817663272, "grad_norm": 1.0234375, "learning_rate": 0.000979775673810847, "loss": 0.1696, "step": 52558 }, { "epoch": 0.09319322434194253, "grad_norm": 0.3515625, "learning_rate": 0.0009797166888503453, "loss": 0.1512, "step": 52560 }, { "epoch": 0.09319677050725235, "grad_norm": 0.58984375, "learning_rate": 0.0009796577044158927, "loss": 0.1633, "step": 52562 }, { "epoch": 0.09320031667256216, "grad_norm": 0.361328125, "learning_rate": 0.0009795987205077471, "loss": 0.1611, "step": 52564 }, { "epoch": 0.09320386283787198, "grad_norm": 0.2890625, "learning_rate": 0.000979539737126167, "loss": 0.1889, "step": 52566 }, { "epoch": 0.0932074090031818, "grad_norm": 0.26953125, "learning_rate": 0.0009794807542714105, "loss": 0.1287, "step": 52568 }, { "epoch": 0.09321095516849161, "grad_norm": 0.65625, "learning_rate": 0.000979421771943735, "loss": 0.1919, "step": 52570 }, { "epoch": 0.09321450133380142, "grad_norm": 0.275390625, "learning_rate": 0.000979362790143398, "loss": 0.1922, "step": 52572 }, { "epoch": 0.09321804749911124, "grad_norm": 0.380859375, "learning_rate": 0.000979303808870659, "loss": 0.1905, "step": 52574 }, { "epoch": 0.09322159366442105, "grad_norm": 0.2099609375, "learning_rate": 0.0009792448281257745, "loss": 0.1239, "step": 52576 }, { "epoch": 0.09322513982973087, "grad_norm": 0.6875, "learning_rate": 0.0009791858479090032, "loss": 0.22, "step": 52578 }, { "epoch": 0.0932286859950407, "grad_norm": 0.53515625, "learning_rate": 0.0009791268682206027, "loss": 0.2066, "step": 52580 }, { "epoch": 0.09323223216035051, "grad_norm": 0.8671875, "learning_rate": 0.000979067889060831, "loss": 0.1607, "step": 52582 }, { "epoch": 0.09323577832566032, "grad_norm": 0.4609375, "learning_rate": 0.0009790089104299465, "loss": 0.1626, "step": 52584 }, { "epoch": 0.09323932449097014, "grad_norm": 0.765625, "learning_rate": 0.0009789499323282062, "loss": 0.2573, "step": 52586 }, { "epoch": 0.09324287065627995, "grad_norm": 0.203125, "learning_rate": 0.0009788909547558686, "loss": 0.1667, "step": 52588 }, { "epoch": 0.09324641682158977, "grad_norm": 1.6484375, "learning_rate": 0.0009788319777131918, "loss": 0.2491, "step": 52590 }, { "epoch": 0.09324996298689958, "grad_norm": 0.435546875, "learning_rate": 0.000978773001200434, "loss": 0.1841, "step": 52592 }, { "epoch": 0.0932535091522094, "grad_norm": 0.21484375, "learning_rate": 0.0009787140252178522, "loss": 0.1623, "step": 52594 }, { "epoch": 0.09325705531751921, "grad_norm": 0.431640625, "learning_rate": 0.0009786550497657047, "loss": 0.1982, "step": 52596 }, { "epoch": 0.09326060148282903, "grad_norm": 0.74609375, "learning_rate": 0.0009785960748442497, "loss": 0.2024, "step": 52598 }, { "epoch": 0.09326414764813884, "grad_norm": 1.515625, "learning_rate": 0.0009785371004537452, "loss": 0.2383, "step": 52600 }, { "epoch": 0.09326769381344865, "grad_norm": 0.62109375, "learning_rate": 0.0009784781265944485, "loss": 0.1686, "step": 52602 }, { "epoch": 0.09327123997875847, "grad_norm": 0.66015625, "learning_rate": 0.0009784191532666178, "loss": 0.1345, "step": 52604 }, { "epoch": 0.09327478614406828, "grad_norm": 0.73046875, "learning_rate": 0.000978360180470511, "loss": 0.2652, "step": 52606 }, { "epoch": 0.0932783323093781, "grad_norm": 0.2080078125, "learning_rate": 0.0009783012082063868, "loss": 0.3063, "step": 52608 }, { "epoch": 0.09328187847468791, "grad_norm": 3.0625, "learning_rate": 0.000978242236474502, "loss": 0.2605, "step": 52610 }, { "epoch": 0.09328542463999773, "grad_norm": 2.578125, "learning_rate": 0.0009781832652751151, "loss": 0.1936, "step": 52612 }, { "epoch": 0.09328897080530754, "grad_norm": 0.30859375, "learning_rate": 0.0009781242946084834, "loss": 0.1604, "step": 52614 }, { "epoch": 0.09329251697061736, "grad_norm": 0.56640625, "learning_rate": 0.0009780653244748656, "loss": 0.2611, "step": 52616 }, { "epoch": 0.09329606313592717, "grad_norm": 0.25, "learning_rate": 0.0009780063548745193, "loss": 0.1783, "step": 52618 }, { "epoch": 0.09329960930123699, "grad_norm": 0.375, "learning_rate": 0.0009779473858077023, "loss": 0.1395, "step": 52620 }, { "epoch": 0.0933031554665468, "grad_norm": 0.390625, "learning_rate": 0.0009778884172746722, "loss": 0.1944, "step": 52622 }, { "epoch": 0.09330670163185663, "grad_norm": 0.3046875, "learning_rate": 0.0009778294492756873, "loss": 0.2296, "step": 52624 }, { "epoch": 0.09331024779716644, "grad_norm": 0.412109375, "learning_rate": 0.000977770481811006, "loss": 0.2104, "step": 52626 }, { "epoch": 0.09331379396247626, "grad_norm": 1.4609375, "learning_rate": 0.0009777115148808851, "loss": 0.1965, "step": 52628 }, { "epoch": 0.09331734012778607, "grad_norm": 0.2041015625, "learning_rate": 0.0009776525484855832, "loss": 0.1625, "step": 52630 }, { "epoch": 0.09332088629309589, "grad_norm": 0.29296875, "learning_rate": 0.0009775935826253578, "loss": 0.1518, "step": 52632 }, { "epoch": 0.0933244324584057, "grad_norm": 0.58984375, "learning_rate": 0.0009775346173004676, "loss": 0.2392, "step": 52634 }, { "epoch": 0.09332797862371552, "grad_norm": 0.275390625, "learning_rate": 0.000977475652511169, "loss": 0.1816, "step": 52636 }, { "epoch": 0.09333152478902533, "grad_norm": 0.51171875, "learning_rate": 0.0009774166882577208, "loss": 0.2131, "step": 52638 }, { "epoch": 0.09333507095433514, "grad_norm": 0.41015625, "learning_rate": 0.0009773577245403811, "loss": 0.2329, "step": 52640 }, { "epoch": 0.09333861711964496, "grad_norm": 0.357421875, "learning_rate": 0.0009772987613594077, "loss": 0.2116, "step": 52642 }, { "epoch": 0.09334216328495477, "grad_norm": 0.498046875, "learning_rate": 0.0009772397987150578, "loss": 0.2233, "step": 52644 }, { "epoch": 0.09334570945026459, "grad_norm": 0.5, "learning_rate": 0.0009771808366075899, "loss": 0.23, "step": 52646 }, { "epoch": 0.0933492556155744, "grad_norm": 1.1796875, "learning_rate": 0.0009771218750372617, "loss": 0.1952, "step": 52648 }, { "epoch": 0.09335280178088422, "grad_norm": 0.39453125, "learning_rate": 0.000977062914004331, "loss": 0.1914, "step": 52650 }, { "epoch": 0.09335634794619403, "grad_norm": 0.640625, "learning_rate": 0.0009770039535090557, "loss": 0.2228, "step": 52652 }, { "epoch": 0.09335989411150385, "grad_norm": 0.38671875, "learning_rate": 0.0009769449935516938, "loss": 0.2217, "step": 52654 }, { "epoch": 0.09336344027681366, "grad_norm": 0.474609375, "learning_rate": 0.0009768860341325028, "loss": 0.1958, "step": 52656 }, { "epoch": 0.09336698644212348, "grad_norm": 0.47265625, "learning_rate": 0.0009768270752517412, "loss": 0.2265, "step": 52658 }, { "epoch": 0.09337053260743329, "grad_norm": 0.423828125, "learning_rate": 0.0009767681169096658, "loss": 0.1619, "step": 52660 }, { "epoch": 0.0933740787727431, "grad_norm": 0.26171875, "learning_rate": 0.0009767091591065353, "loss": 0.2215, "step": 52662 }, { "epoch": 0.09337762493805292, "grad_norm": 0.68359375, "learning_rate": 0.0009766502018426074, "loss": 0.1744, "step": 52664 }, { "epoch": 0.09338117110336273, "grad_norm": 0.216796875, "learning_rate": 0.00097659124511814, "loss": 0.1307, "step": 52666 }, { "epoch": 0.09338471726867255, "grad_norm": 0.54296875, "learning_rate": 0.0009765322889333906, "loss": 0.3505, "step": 52668 }, { "epoch": 0.09338826343398238, "grad_norm": 0.82421875, "learning_rate": 0.0009764733332886174, "loss": 0.1625, "step": 52670 }, { "epoch": 0.09339180959929219, "grad_norm": 0.51171875, "learning_rate": 0.0009764143781840778, "loss": 0.1766, "step": 52672 }, { "epoch": 0.093395355764602, "grad_norm": 0.287109375, "learning_rate": 0.0009763554236200305, "loss": 0.1906, "step": 52674 }, { "epoch": 0.09339890192991182, "grad_norm": 3.046875, "learning_rate": 0.0009762964695967322, "loss": 0.2484, "step": 52676 }, { "epoch": 0.09340244809522164, "grad_norm": 0.546875, "learning_rate": 0.0009762375161144415, "loss": 0.1791, "step": 52678 }, { "epoch": 0.09340599426053145, "grad_norm": 0.376953125, "learning_rate": 0.0009761785631734157, "loss": 0.2776, "step": 52680 }, { "epoch": 0.09340954042584126, "grad_norm": 0.3671875, "learning_rate": 0.0009761196107739137, "loss": 0.2084, "step": 52682 }, { "epoch": 0.09341308659115108, "grad_norm": 0.359375, "learning_rate": 0.0009760606589161917, "loss": 0.219, "step": 52684 }, { "epoch": 0.0934166327564609, "grad_norm": 0.66015625, "learning_rate": 0.0009760017076005088, "loss": 0.1389, "step": 52686 }, { "epoch": 0.09342017892177071, "grad_norm": 1.6640625, "learning_rate": 0.0009759427568271224, "loss": 0.344, "step": 52688 }, { "epoch": 0.09342372508708052, "grad_norm": 0.498046875, "learning_rate": 0.0009758838065962905, "loss": 0.2536, "step": 52690 }, { "epoch": 0.09342727125239034, "grad_norm": 0.89453125, "learning_rate": 0.0009758248569082706, "loss": 0.2178, "step": 52692 }, { "epoch": 0.09343081741770015, "grad_norm": 0.59375, "learning_rate": 0.0009757659077633205, "loss": 0.2505, "step": 52694 }, { "epoch": 0.09343436358300997, "grad_norm": 0.412109375, "learning_rate": 0.000975706959161698, "loss": 0.2014, "step": 52696 }, { "epoch": 0.09343790974831978, "grad_norm": 0.31640625, "learning_rate": 0.0009756480111036617, "loss": 0.1479, "step": 52698 }, { "epoch": 0.0934414559136296, "grad_norm": 0.59765625, "learning_rate": 0.0009755890635894682, "loss": 0.1736, "step": 52700 }, { "epoch": 0.09344500207893941, "grad_norm": 0.8359375, "learning_rate": 0.000975530116619376, "loss": 0.2379, "step": 52702 }, { "epoch": 0.09344854824424922, "grad_norm": 1.0703125, "learning_rate": 0.0009754711701936427, "loss": 0.2135, "step": 52704 }, { "epoch": 0.09345209440955904, "grad_norm": 0.2353515625, "learning_rate": 0.0009754122243125263, "loss": 0.1504, "step": 52706 }, { "epoch": 0.09345564057486885, "grad_norm": 0.63671875, "learning_rate": 0.0009753532789762847, "loss": 0.172, "step": 52708 }, { "epoch": 0.09345918674017867, "grad_norm": 0.396484375, "learning_rate": 0.000975294334185175, "loss": 0.241, "step": 52710 }, { "epoch": 0.09346273290548848, "grad_norm": 0.84375, "learning_rate": 0.0009752353899394555, "loss": 0.3639, "step": 52712 }, { "epoch": 0.0934662790707983, "grad_norm": 0.41015625, "learning_rate": 0.000975176446239384, "loss": 0.2037, "step": 52714 }, { "epoch": 0.09346982523610813, "grad_norm": 0.625, "learning_rate": 0.0009751175030852186, "loss": 0.194, "step": 52716 }, { "epoch": 0.09347337140141794, "grad_norm": 0.50390625, "learning_rate": 0.0009750585604772162, "loss": 0.1549, "step": 52718 }, { "epoch": 0.09347691756672775, "grad_norm": 1.3203125, "learning_rate": 0.0009749996184156352, "loss": 0.1471, "step": 52720 }, { "epoch": 0.09348046373203757, "grad_norm": 0.3984375, "learning_rate": 0.0009749406769007334, "loss": 0.1925, "step": 52722 }, { "epoch": 0.09348400989734738, "grad_norm": 0.57421875, "learning_rate": 0.0009748817359327684, "loss": 0.142, "step": 52724 }, { "epoch": 0.0934875560626572, "grad_norm": 0.55859375, "learning_rate": 0.0009748227955119982, "loss": 0.2635, "step": 52726 }, { "epoch": 0.09349110222796701, "grad_norm": 0.466796875, "learning_rate": 0.0009747638556386799, "loss": 0.1837, "step": 52728 }, { "epoch": 0.09349464839327683, "grad_norm": 0.49609375, "learning_rate": 0.0009747049163130719, "loss": 0.3385, "step": 52730 }, { "epoch": 0.09349819455858664, "grad_norm": 0.384765625, "learning_rate": 0.0009746459775354322, "loss": 0.1761, "step": 52732 }, { "epoch": 0.09350174072389646, "grad_norm": 0.41796875, "learning_rate": 0.0009745870393060179, "loss": 0.1256, "step": 52734 }, { "epoch": 0.09350528688920627, "grad_norm": 0.3828125, "learning_rate": 0.0009745281016250868, "loss": 0.1341, "step": 52736 }, { "epoch": 0.09350883305451609, "grad_norm": 0.5546875, "learning_rate": 0.0009744691644928969, "loss": 0.2315, "step": 52738 }, { "epoch": 0.0935123792198259, "grad_norm": 0.251953125, "learning_rate": 0.0009744102279097067, "loss": 0.1484, "step": 52740 }, { "epoch": 0.09351592538513571, "grad_norm": 0.78515625, "learning_rate": 0.0009743512918757724, "loss": 0.2437, "step": 52742 }, { "epoch": 0.09351947155044553, "grad_norm": 0.291015625, "learning_rate": 0.000974292356391353, "loss": 0.1907, "step": 52744 }, { "epoch": 0.09352301771575534, "grad_norm": 1.1640625, "learning_rate": 0.0009742334214567057, "loss": 0.2899, "step": 52746 }, { "epoch": 0.09352656388106516, "grad_norm": 1.890625, "learning_rate": 0.0009741744870720886, "loss": 0.2313, "step": 52748 }, { "epoch": 0.09353011004637497, "grad_norm": 0.8046875, "learning_rate": 0.0009741155532377588, "loss": 0.213, "step": 52750 }, { "epoch": 0.09353365621168479, "grad_norm": 0.359375, "learning_rate": 0.0009740566199539744, "loss": 0.1632, "step": 52752 }, { "epoch": 0.0935372023769946, "grad_norm": 1.0390625, "learning_rate": 0.0009739976872209932, "loss": 0.1801, "step": 52754 }, { "epoch": 0.09354074854230442, "grad_norm": 0.296875, "learning_rate": 0.0009739387550390737, "loss": 0.1835, "step": 52756 }, { "epoch": 0.09354429470761423, "grad_norm": 1.3125, "learning_rate": 0.000973879823408472, "loss": 0.4264, "step": 52758 }, { "epoch": 0.09354784087292406, "grad_norm": 0.66796875, "learning_rate": 0.0009738208923294469, "loss": 0.2583, "step": 52760 }, { "epoch": 0.09355138703823387, "grad_norm": 0.1865234375, "learning_rate": 0.000973761961802256, "loss": 0.169, "step": 52762 }, { "epoch": 0.09355493320354369, "grad_norm": 2.140625, "learning_rate": 0.0009737030318271571, "loss": 0.207, "step": 52764 }, { "epoch": 0.0935584793688535, "grad_norm": 2.046875, "learning_rate": 0.0009736441024044076, "loss": 0.197, "step": 52766 }, { "epoch": 0.09356202553416332, "grad_norm": 0.49609375, "learning_rate": 0.0009735851735342653, "loss": 0.1218, "step": 52768 }, { "epoch": 0.09356557169947313, "grad_norm": 0.2421875, "learning_rate": 0.0009735262452169882, "loss": 0.1397, "step": 52770 }, { "epoch": 0.09356911786478295, "grad_norm": 0.4765625, "learning_rate": 0.0009734673174528339, "loss": 0.1417, "step": 52772 }, { "epoch": 0.09357266403009276, "grad_norm": 2.75, "learning_rate": 0.0009734083902420598, "loss": 0.2473, "step": 52774 }, { "epoch": 0.09357621019540258, "grad_norm": 0.8125, "learning_rate": 0.0009733494635849238, "loss": 0.1865, "step": 52776 }, { "epoch": 0.09357975636071239, "grad_norm": 0.419921875, "learning_rate": 0.0009732905374816839, "loss": 0.1471, "step": 52778 }, { "epoch": 0.0935833025260222, "grad_norm": 0.345703125, "learning_rate": 0.0009732316119325979, "loss": 0.151, "step": 52780 }, { "epoch": 0.09358684869133202, "grad_norm": 0.61328125, "learning_rate": 0.0009731726869379227, "loss": 0.1835, "step": 52782 }, { "epoch": 0.09359039485664183, "grad_norm": 0.228515625, "learning_rate": 0.0009731137624979168, "loss": 0.2074, "step": 52784 }, { "epoch": 0.09359394102195165, "grad_norm": 0.29296875, "learning_rate": 0.0009730548386128373, "loss": 0.1478, "step": 52786 }, { "epoch": 0.09359748718726146, "grad_norm": 0.412109375, "learning_rate": 0.0009729959152829421, "loss": 0.2643, "step": 52788 }, { "epoch": 0.09360103335257128, "grad_norm": 0.5546875, "learning_rate": 0.0009729369925084897, "loss": 0.1798, "step": 52790 }, { "epoch": 0.09360457951788109, "grad_norm": 0.46875, "learning_rate": 0.0009728780702897364, "loss": 0.1716, "step": 52792 }, { "epoch": 0.09360812568319091, "grad_norm": 0.71875, "learning_rate": 0.0009728191486269406, "loss": 0.2156, "step": 52794 }, { "epoch": 0.09361167184850072, "grad_norm": 0.2890625, "learning_rate": 0.0009727602275203604, "loss": 0.1539, "step": 52796 }, { "epoch": 0.09361521801381054, "grad_norm": 0.51171875, "learning_rate": 0.0009727013069702528, "loss": 0.1992, "step": 52798 }, { "epoch": 0.09361876417912035, "grad_norm": 0.796875, "learning_rate": 0.0009726423869768759, "loss": 0.2355, "step": 52800 }, { "epoch": 0.09362231034443017, "grad_norm": 0.2197265625, "learning_rate": 0.000972583467540487, "loss": 0.1508, "step": 52802 }, { "epoch": 0.09362585650973998, "grad_norm": 0.62890625, "learning_rate": 0.0009725245486613441, "loss": 0.2025, "step": 52804 }, { "epoch": 0.09362940267504981, "grad_norm": 0.24609375, "learning_rate": 0.0009724656303397051, "loss": 0.2127, "step": 52806 }, { "epoch": 0.09363294884035962, "grad_norm": 0.875, "learning_rate": 0.0009724067125758269, "loss": 0.1654, "step": 52808 }, { "epoch": 0.09363649500566944, "grad_norm": 0.41015625, "learning_rate": 0.0009723477953699675, "loss": 0.2249, "step": 52810 }, { "epoch": 0.09364004117097925, "grad_norm": 0.189453125, "learning_rate": 0.0009722888787223849, "loss": 0.1586, "step": 52812 }, { "epoch": 0.09364358733628907, "grad_norm": 0.9453125, "learning_rate": 0.0009722299626333371, "loss": 0.3103, "step": 52814 }, { "epoch": 0.09364713350159888, "grad_norm": 0.28515625, "learning_rate": 0.0009721710471030804, "loss": 0.2166, "step": 52816 }, { "epoch": 0.0936506796669087, "grad_norm": 0.53125, "learning_rate": 0.0009721121321318738, "loss": 0.2705, "step": 52818 }, { "epoch": 0.09365422583221851, "grad_norm": 0.296875, "learning_rate": 0.0009720532177199742, "loss": 0.1749, "step": 52820 }, { "epoch": 0.09365777199752832, "grad_norm": 0.490234375, "learning_rate": 0.0009719943038676397, "loss": 0.2943, "step": 52822 }, { "epoch": 0.09366131816283814, "grad_norm": 0.9140625, "learning_rate": 0.0009719353905751276, "loss": 0.2154, "step": 52824 }, { "epoch": 0.09366486432814795, "grad_norm": 0.5234375, "learning_rate": 0.0009718764778426956, "loss": 0.1878, "step": 52826 }, { "epoch": 0.09366841049345777, "grad_norm": 0.38671875, "learning_rate": 0.0009718175656706014, "loss": 0.1787, "step": 52828 }, { "epoch": 0.09367195665876758, "grad_norm": 0.2412109375, "learning_rate": 0.0009717586540591033, "loss": 0.1592, "step": 52830 }, { "epoch": 0.0936755028240774, "grad_norm": 0.1796875, "learning_rate": 0.0009716997430084577, "loss": 0.2519, "step": 52832 }, { "epoch": 0.09367904898938721, "grad_norm": 0.6328125, "learning_rate": 0.0009716408325189232, "loss": 0.1821, "step": 52834 }, { "epoch": 0.09368259515469703, "grad_norm": 0.373046875, "learning_rate": 0.0009715819225907572, "loss": 0.1543, "step": 52836 }, { "epoch": 0.09368614132000684, "grad_norm": 0.267578125, "learning_rate": 0.0009715230132242173, "loss": 0.2136, "step": 52838 }, { "epoch": 0.09368968748531666, "grad_norm": 0.4453125, "learning_rate": 0.0009714641044195611, "loss": 0.1385, "step": 52840 }, { "epoch": 0.09369323365062647, "grad_norm": 0.421875, "learning_rate": 0.000971405196177046, "loss": 0.2044, "step": 52842 }, { "epoch": 0.09369677981593628, "grad_norm": 0.5703125, "learning_rate": 0.00097134628849693, "loss": 0.1885, "step": 52844 }, { "epoch": 0.0937003259812461, "grad_norm": 0.66796875, "learning_rate": 0.0009712873813794709, "loss": 0.133, "step": 52846 }, { "epoch": 0.09370387214655591, "grad_norm": 0.2109375, "learning_rate": 0.0009712284748249256, "loss": 0.1578, "step": 52848 }, { "epoch": 0.09370741831186573, "grad_norm": 0.546875, "learning_rate": 0.0009711695688335522, "loss": 0.2152, "step": 52850 }, { "epoch": 0.09371096447717556, "grad_norm": 1.1015625, "learning_rate": 0.0009711106634056082, "loss": 0.267, "step": 52852 }, { "epoch": 0.09371451064248537, "grad_norm": 0.49609375, "learning_rate": 0.0009710517585413516, "loss": 0.2191, "step": 52854 }, { "epoch": 0.09371805680779519, "grad_norm": 0.423828125, "learning_rate": 0.0009709928542410395, "loss": 0.1692, "step": 52856 }, { "epoch": 0.093721602973105, "grad_norm": 2.625, "learning_rate": 0.00097093395050493, "loss": 0.4009, "step": 52858 }, { "epoch": 0.09372514913841481, "grad_norm": 0.353515625, "learning_rate": 0.0009708750473332801, "loss": 0.1882, "step": 52860 }, { "epoch": 0.09372869530372463, "grad_norm": 1.0546875, "learning_rate": 0.0009708161447263482, "loss": 0.4668, "step": 52862 }, { "epoch": 0.09373224146903444, "grad_norm": 0.5859375, "learning_rate": 0.000970757242684391, "loss": 0.2639, "step": 52864 }, { "epoch": 0.09373578763434426, "grad_norm": 0.4609375, "learning_rate": 0.0009706983412076664, "loss": 0.2279, "step": 52866 }, { "epoch": 0.09373933379965407, "grad_norm": 0.796875, "learning_rate": 0.0009706394402964325, "loss": 0.1746, "step": 52868 }, { "epoch": 0.09374287996496389, "grad_norm": 2.65625, "learning_rate": 0.0009705805399509462, "loss": 0.2625, "step": 52870 }, { "epoch": 0.0937464261302737, "grad_norm": 0.302734375, "learning_rate": 0.0009705216401714661, "loss": 0.1723, "step": 52872 }, { "epoch": 0.09374997229558352, "grad_norm": 1.3515625, "learning_rate": 0.0009704627409582485, "loss": 0.1842, "step": 52874 }, { "epoch": 0.09375351846089333, "grad_norm": 2.171875, "learning_rate": 0.000970403842311552, "loss": 0.1493, "step": 52876 }, { "epoch": 0.09375706462620315, "grad_norm": 0.455078125, "learning_rate": 0.0009703449442316335, "loss": 0.1467, "step": 52878 }, { "epoch": 0.09376061079151296, "grad_norm": 0.4921875, "learning_rate": 0.0009702860467187513, "loss": 0.1832, "step": 52880 }, { "epoch": 0.09376415695682278, "grad_norm": 0.416015625, "learning_rate": 0.0009702271497731622, "loss": 0.1411, "step": 52882 }, { "epoch": 0.09376770312213259, "grad_norm": 0.66796875, "learning_rate": 0.0009701682533951244, "loss": 0.1815, "step": 52884 }, { "epoch": 0.0937712492874424, "grad_norm": 1.046875, "learning_rate": 0.0009701093575848949, "loss": 0.204, "step": 52886 }, { "epoch": 0.09377479545275222, "grad_norm": 1.15625, "learning_rate": 0.0009700504623427322, "loss": 0.3752, "step": 52888 }, { "epoch": 0.09377834161806203, "grad_norm": 0.30078125, "learning_rate": 0.0009699915676688928, "loss": 0.1559, "step": 52890 }, { "epoch": 0.09378188778337185, "grad_norm": 1.125, "learning_rate": 0.0009699326735636352, "loss": 0.2366, "step": 52892 }, { "epoch": 0.09378543394868166, "grad_norm": 0.45703125, "learning_rate": 0.0009698737800272161, "loss": 0.1935, "step": 52894 }, { "epoch": 0.09378898011399149, "grad_norm": 0.341796875, "learning_rate": 0.0009698148870598939, "loss": 0.1927, "step": 52896 }, { "epoch": 0.0937925262793013, "grad_norm": 0.466796875, "learning_rate": 0.0009697559946619254, "loss": 0.2048, "step": 52898 }, { "epoch": 0.09379607244461112, "grad_norm": 0.240234375, "learning_rate": 0.0009696971028335686, "loss": 0.1615, "step": 52900 }, { "epoch": 0.09379961860992093, "grad_norm": 0.359375, "learning_rate": 0.000969638211575081, "loss": 0.1876, "step": 52902 }, { "epoch": 0.09380316477523075, "grad_norm": 0.44140625, "learning_rate": 0.0009695793208867206, "loss": 0.2142, "step": 52904 }, { "epoch": 0.09380671094054056, "grad_norm": 0.66796875, "learning_rate": 0.0009695204307687438, "loss": 0.152, "step": 52906 }, { "epoch": 0.09381025710585038, "grad_norm": 0.30859375, "learning_rate": 0.000969461541221409, "loss": 0.1521, "step": 52908 }, { "epoch": 0.09381380327116019, "grad_norm": 0.490234375, "learning_rate": 0.0009694026522449739, "loss": 0.19, "step": 52910 }, { "epoch": 0.09381734943647001, "grad_norm": 0.408203125, "learning_rate": 0.0009693437638396956, "loss": 0.2007, "step": 52912 }, { "epoch": 0.09382089560177982, "grad_norm": 0.36328125, "learning_rate": 0.0009692848760058317, "loss": 0.182, "step": 52914 }, { "epoch": 0.09382444176708964, "grad_norm": 0.66015625, "learning_rate": 0.0009692259887436399, "loss": 0.156, "step": 52916 }, { "epoch": 0.09382798793239945, "grad_norm": 0.3671875, "learning_rate": 0.0009691671020533774, "loss": 0.1487, "step": 52918 }, { "epoch": 0.09383153409770927, "grad_norm": 0.3203125, "learning_rate": 0.0009691082159353024, "loss": 0.1889, "step": 52920 }, { "epoch": 0.09383508026301908, "grad_norm": 1.375, "learning_rate": 0.0009690493303896719, "loss": 0.2277, "step": 52922 }, { "epoch": 0.0938386264283289, "grad_norm": 0.56640625, "learning_rate": 0.0009689904454167433, "loss": 0.1817, "step": 52924 }, { "epoch": 0.09384217259363871, "grad_norm": 0.26953125, "learning_rate": 0.0009689315610167746, "loss": 0.1971, "step": 52926 }, { "epoch": 0.09384571875894852, "grad_norm": 1.3984375, "learning_rate": 0.0009688726771900233, "loss": 0.2518, "step": 52928 }, { "epoch": 0.09384926492425834, "grad_norm": 0.56640625, "learning_rate": 0.0009688137939367465, "loss": 0.1925, "step": 52930 }, { "epoch": 0.09385281108956815, "grad_norm": 0.359375, "learning_rate": 0.0009687549112572022, "loss": 0.1849, "step": 52932 }, { "epoch": 0.09385635725487797, "grad_norm": 0.4296875, "learning_rate": 0.0009686960291516476, "loss": 0.1748, "step": 52934 }, { "epoch": 0.09385990342018778, "grad_norm": 0.37109375, "learning_rate": 0.0009686371476203404, "loss": 0.1662, "step": 52936 }, { "epoch": 0.0938634495854976, "grad_norm": 0.6484375, "learning_rate": 0.0009685782666635376, "loss": 0.1687, "step": 52938 }, { "epoch": 0.09386699575080741, "grad_norm": 0.318359375, "learning_rate": 0.0009685193862814974, "loss": 0.1486, "step": 52940 }, { "epoch": 0.09387054191611724, "grad_norm": 0.41015625, "learning_rate": 0.0009684605064744767, "loss": 0.1678, "step": 52942 }, { "epoch": 0.09387408808142705, "grad_norm": 1.75, "learning_rate": 0.000968401627242734, "loss": 0.2104, "step": 52944 }, { "epoch": 0.09387763424673687, "grad_norm": 0.515625, "learning_rate": 0.0009683427485865257, "loss": 0.1599, "step": 52946 }, { "epoch": 0.09388118041204668, "grad_norm": 0.40234375, "learning_rate": 0.0009682838705061095, "loss": 0.1515, "step": 52948 }, { "epoch": 0.0938847265773565, "grad_norm": 0.46875, "learning_rate": 0.0009682249930017435, "loss": 0.1697, "step": 52950 }, { "epoch": 0.09388827274266631, "grad_norm": 0.51171875, "learning_rate": 0.0009681661160736847, "loss": 0.159, "step": 52952 }, { "epoch": 0.09389181890797613, "grad_norm": 1.1953125, "learning_rate": 0.0009681072397221913, "loss": 0.1479, "step": 52954 }, { "epoch": 0.09389536507328594, "grad_norm": 0.37109375, "learning_rate": 0.0009680483639475195, "loss": 0.1308, "step": 52956 }, { "epoch": 0.09389891123859576, "grad_norm": 0.421875, "learning_rate": 0.0009679894887499276, "loss": 0.1689, "step": 52958 }, { "epoch": 0.09390245740390557, "grad_norm": 0.3828125, "learning_rate": 0.0009679306141296731, "loss": 0.209, "step": 52960 }, { "epoch": 0.09390600356921538, "grad_norm": 0.240234375, "learning_rate": 0.0009678717400870136, "loss": 0.191, "step": 52962 }, { "epoch": 0.0939095497345252, "grad_norm": 0.140625, "learning_rate": 0.0009678128666222061, "loss": 0.1553, "step": 52964 }, { "epoch": 0.09391309589983501, "grad_norm": 0.3671875, "learning_rate": 0.0009677539937355084, "loss": 0.1817, "step": 52966 }, { "epoch": 0.09391664206514483, "grad_norm": 0.125, "learning_rate": 0.0009676951214271779, "loss": 0.1777, "step": 52968 }, { "epoch": 0.09392018823045464, "grad_norm": 0.173828125, "learning_rate": 0.0009676362496974725, "loss": 0.1303, "step": 52970 }, { "epoch": 0.09392373439576446, "grad_norm": 0.84375, "learning_rate": 0.0009675773785466486, "loss": 0.1757, "step": 52972 }, { "epoch": 0.09392728056107427, "grad_norm": 0.56640625, "learning_rate": 0.0009675185079749644, "loss": 0.1689, "step": 52974 }, { "epoch": 0.09393082672638409, "grad_norm": 0.275390625, "learning_rate": 0.0009674596379826775, "loss": 0.1361, "step": 52976 }, { "epoch": 0.0939343728916939, "grad_norm": 1.515625, "learning_rate": 0.0009674007685700455, "loss": 0.1763, "step": 52978 }, { "epoch": 0.09393791905700372, "grad_norm": 0.87890625, "learning_rate": 0.0009673418997373252, "loss": 0.2118, "step": 52980 }, { "epoch": 0.09394146522231353, "grad_norm": 1.984375, "learning_rate": 0.0009672830314847741, "loss": 0.2167, "step": 52982 }, { "epoch": 0.09394501138762335, "grad_norm": 0.1875, "learning_rate": 0.0009672241638126502, "loss": 0.1609, "step": 52984 }, { "epoch": 0.09394855755293316, "grad_norm": 0.35546875, "learning_rate": 0.0009671652967212108, "loss": 0.1815, "step": 52986 }, { "epoch": 0.09395210371824299, "grad_norm": 0.34765625, "learning_rate": 0.0009671064302107131, "loss": 0.1578, "step": 52988 }, { "epoch": 0.0939556498835528, "grad_norm": 0.2333984375, "learning_rate": 0.0009670475642814145, "loss": 0.2046, "step": 52990 }, { "epoch": 0.09395919604886262, "grad_norm": 0.4140625, "learning_rate": 0.0009669886989335728, "loss": 0.1637, "step": 52992 }, { "epoch": 0.09396274221417243, "grad_norm": 0.8046875, "learning_rate": 0.0009669298341674452, "loss": 0.1769, "step": 52994 }, { "epoch": 0.09396628837948225, "grad_norm": 1.921875, "learning_rate": 0.0009668709699832894, "loss": 0.2567, "step": 52996 }, { "epoch": 0.09396983454479206, "grad_norm": 0.80859375, "learning_rate": 0.0009668121063813624, "loss": 0.1605, "step": 52998 }, { "epoch": 0.09397338071010188, "grad_norm": 0.28515625, "learning_rate": 0.000966753243361922, "loss": 0.1642, "step": 53000 }, { "epoch": 0.09397692687541169, "grad_norm": 0.388671875, "learning_rate": 0.0009666943809252255, "loss": 0.2031, "step": 53002 }, { "epoch": 0.0939804730407215, "grad_norm": 0.65234375, "learning_rate": 0.0009666355190715303, "loss": 0.2146, "step": 53004 }, { "epoch": 0.09398401920603132, "grad_norm": 1.3125, "learning_rate": 0.0009665766578010941, "loss": 0.2329, "step": 53006 }, { "epoch": 0.09398756537134113, "grad_norm": 0.259765625, "learning_rate": 0.0009665177971141737, "loss": 0.174, "step": 53008 }, { "epoch": 0.09399111153665095, "grad_norm": 0.2216796875, "learning_rate": 0.0009664589370110275, "loss": 0.2221, "step": 53010 }, { "epoch": 0.09399465770196076, "grad_norm": 0.70703125, "learning_rate": 0.0009664000774919119, "loss": 0.1919, "step": 53012 }, { "epoch": 0.09399820386727058, "grad_norm": 5.84375, "learning_rate": 0.0009663412185570848, "loss": 0.3898, "step": 53014 }, { "epoch": 0.09400175003258039, "grad_norm": 0.78515625, "learning_rate": 0.0009662823602068034, "loss": 0.1847, "step": 53016 }, { "epoch": 0.0940052961978902, "grad_norm": 0.4140625, "learning_rate": 0.0009662235024413257, "loss": 0.3082, "step": 53018 }, { "epoch": 0.09400884236320002, "grad_norm": 0.2041015625, "learning_rate": 0.0009661646452609085, "loss": 0.1332, "step": 53020 }, { "epoch": 0.09401238852850984, "grad_norm": 1.71875, "learning_rate": 0.0009661057886658093, "loss": 0.2189, "step": 53022 }, { "epoch": 0.09401593469381965, "grad_norm": 5.09375, "learning_rate": 0.0009660469326562858, "loss": 0.3927, "step": 53024 }, { "epoch": 0.09401948085912946, "grad_norm": 1.046875, "learning_rate": 0.0009659880772325951, "loss": 0.1909, "step": 53026 }, { "epoch": 0.09402302702443928, "grad_norm": 0.33203125, "learning_rate": 0.0009659292223949947, "loss": 0.1378, "step": 53028 }, { "epoch": 0.0940265731897491, "grad_norm": 0.326171875, "learning_rate": 0.0009658703681437419, "loss": 0.1864, "step": 53030 }, { "epoch": 0.09403011935505891, "grad_norm": 13.3125, "learning_rate": 0.0009658115144790942, "loss": 0.2011, "step": 53032 }, { "epoch": 0.09403366552036874, "grad_norm": 0.2890625, "learning_rate": 0.0009657526614013094, "loss": 0.1799, "step": 53034 }, { "epoch": 0.09403721168567855, "grad_norm": 0.4375, "learning_rate": 0.0009656938089106442, "loss": 0.2288, "step": 53036 }, { "epoch": 0.09404075785098837, "grad_norm": 0.51171875, "learning_rate": 0.0009656349570073561, "loss": 0.2186, "step": 53038 }, { "epoch": 0.09404430401629818, "grad_norm": 3.0, "learning_rate": 0.0009655761056917025, "loss": 0.2074, "step": 53040 }, { "epoch": 0.094047850181608, "grad_norm": 1.0859375, "learning_rate": 0.0009655172549639413, "loss": 0.247, "step": 53042 }, { "epoch": 0.09405139634691781, "grad_norm": 0.279296875, "learning_rate": 0.0009654584048243295, "loss": 0.1623, "step": 53044 }, { "epoch": 0.09405494251222762, "grad_norm": 0.40625, "learning_rate": 0.0009653995552731247, "loss": 0.1698, "step": 53046 }, { "epoch": 0.09405848867753744, "grad_norm": 0.263671875, "learning_rate": 0.0009653407063105837, "loss": 0.1759, "step": 53048 }, { "epoch": 0.09406203484284725, "grad_norm": 0.48046875, "learning_rate": 0.000965281857936964, "loss": 0.1662, "step": 53050 }, { "epoch": 0.09406558100815707, "grad_norm": 0.396484375, "learning_rate": 0.0009652230101525239, "loss": 0.1901, "step": 53052 }, { "epoch": 0.09406912717346688, "grad_norm": 0.177734375, "learning_rate": 0.0009651641629575197, "loss": 0.1955, "step": 53054 }, { "epoch": 0.0940726733387767, "grad_norm": 0.2109375, "learning_rate": 0.0009651053163522088, "loss": 0.1712, "step": 53056 }, { "epoch": 0.09407621950408651, "grad_norm": 0.52734375, "learning_rate": 0.0009650464703368493, "loss": 0.1919, "step": 53058 }, { "epoch": 0.09407976566939633, "grad_norm": 0.376953125, "learning_rate": 0.0009649876249116982, "loss": 0.1624, "step": 53060 }, { "epoch": 0.09408331183470614, "grad_norm": 1.109375, "learning_rate": 0.0009649287800770125, "loss": 0.1807, "step": 53062 }, { "epoch": 0.09408685800001595, "grad_norm": 0.443359375, "learning_rate": 0.0009648699358330502, "loss": 0.1798, "step": 53064 }, { "epoch": 0.09409040416532577, "grad_norm": 0.390625, "learning_rate": 0.0009648110921800679, "loss": 0.1978, "step": 53066 }, { "epoch": 0.09409395033063558, "grad_norm": 0.302734375, "learning_rate": 0.0009647522491183238, "loss": 0.2217, "step": 53068 }, { "epoch": 0.0940974964959454, "grad_norm": 0.54296875, "learning_rate": 0.0009646934066480746, "loss": 0.1346, "step": 53070 }, { "epoch": 0.09410104266125521, "grad_norm": 1.109375, "learning_rate": 0.0009646345647695776, "loss": 0.168, "step": 53072 }, { "epoch": 0.09410458882656503, "grad_norm": 0.177734375, "learning_rate": 0.0009645757234830906, "loss": 0.1227, "step": 53074 }, { "epoch": 0.09410813499187484, "grad_norm": 0.1953125, "learning_rate": 0.0009645168827888709, "loss": 0.1565, "step": 53076 }, { "epoch": 0.09411168115718467, "grad_norm": 0.6015625, "learning_rate": 0.0009644580426871754, "loss": 0.2403, "step": 53078 }, { "epoch": 0.09411522732249449, "grad_norm": 0.47265625, "learning_rate": 0.0009643992031782618, "loss": 0.2412, "step": 53080 }, { "epoch": 0.0941187734878043, "grad_norm": 1.4140625, "learning_rate": 0.0009643403642623871, "loss": 0.129, "step": 53082 }, { "epoch": 0.09412231965311411, "grad_norm": 0.435546875, "learning_rate": 0.0009642815259398093, "loss": 0.1811, "step": 53084 }, { "epoch": 0.09412586581842393, "grad_norm": 0.82421875, "learning_rate": 0.000964222688210785, "loss": 0.2821, "step": 53086 }, { "epoch": 0.09412941198373374, "grad_norm": 0.98046875, "learning_rate": 0.0009641638510755716, "loss": 0.2447, "step": 53088 }, { "epoch": 0.09413295814904356, "grad_norm": 0.3984375, "learning_rate": 0.0009641050145344268, "loss": 0.2098, "step": 53090 }, { "epoch": 0.09413650431435337, "grad_norm": 0.5859375, "learning_rate": 0.000964046178587608, "loss": 0.19, "step": 53092 }, { "epoch": 0.09414005047966319, "grad_norm": 0.94140625, "learning_rate": 0.0009639873432353719, "loss": 0.19, "step": 53094 }, { "epoch": 0.094143596644973, "grad_norm": 0.189453125, "learning_rate": 0.000963928508477976, "loss": 0.213, "step": 53096 }, { "epoch": 0.09414714281028282, "grad_norm": 0.49609375, "learning_rate": 0.0009638696743156781, "loss": 0.1817, "step": 53098 }, { "epoch": 0.09415068897559263, "grad_norm": 2.5625, "learning_rate": 0.0009638108407487351, "loss": 0.2933, "step": 53100 }, { "epoch": 0.09415423514090245, "grad_norm": 0.87109375, "learning_rate": 0.0009637520077774045, "loss": 0.2266, "step": 53102 }, { "epoch": 0.09415778130621226, "grad_norm": 0.83203125, "learning_rate": 0.0009636931754019431, "loss": 0.1824, "step": 53104 }, { "epoch": 0.09416132747152207, "grad_norm": 0.38671875, "learning_rate": 0.0009636343436226087, "loss": 0.1566, "step": 53106 }, { "epoch": 0.09416487363683189, "grad_norm": 0.74609375, "learning_rate": 0.0009635755124396588, "loss": 0.1583, "step": 53108 }, { "epoch": 0.0941684198021417, "grad_norm": 0.32421875, "learning_rate": 0.0009635166818533502, "loss": 0.1773, "step": 53110 }, { "epoch": 0.09417196596745152, "grad_norm": 2.484375, "learning_rate": 0.0009634578518639401, "loss": 0.2434, "step": 53112 }, { "epoch": 0.09417551213276133, "grad_norm": 0.80859375, "learning_rate": 0.0009633990224716862, "loss": 0.1736, "step": 53114 }, { "epoch": 0.09417905829807115, "grad_norm": 0.203125, "learning_rate": 0.0009633401936768457, "loss": 0.1874, "step": 53116 }, { "epoch": 0.09418260446338096, "grad_norm": 0.71875, "learning_rate": 0.0009632813654796757, "loss": 0.1775, "step": 53118 }, { "epoch": 0.09418615062869078, "grad_norm": 0.369140625, "learning_rate": 0.0009632225378804338, "loss": 0.1369, "step": 53120 }, { "epoch": 0.09418969679400059, "grad_norm": 0.6796875, "learning_rate": 0.0009631637108793769, "loss": 0.2189, "step": 53122 }, { "epoch": 0.09419324295931042, "grad_norm": 0.609375, "learning_rate": 0.0009631048844767622, "loss": 0.1709, "step": 53124 }, { "epoch": 0.09419678912462023, "grad_norm": 1.0078125, "learning_rate": 0.000963046058672848, "loss": 0.1331, "step": 53126 }, { "epoch": 0.09420033528993005, "grad_norm": 3.875, "learning_rate": 0.00096298723346789, "loss": 0.2386, "step": 53128 }, { "epoch": 0.09420388145523986, "grad_norm": 0.3359375, "learning_rate": 0.0009629284088621463, "loss": 0.2949, "step": 53130 }, { "epoch": 0.09420742762054968, "grad_norm": 0.26953125, "learning_rate": 0.0009628695848558744, "loss": 0.1639, "step": 53132 }, { "epoch": 0.09421097378585949, "grad_norm": 0.330078125, "learning_rate": 0.0009628107614493316, "loss": 0.1797, "step": 53134 }, { "epoch": 0.0942145199511693, "grad_norm": 0.375, "learning_rate": 0.0009627519386427744, "loss": 0.1892, "step": 53136 }, { "epoch": 0.09421806611647912, "grad_norm": 0.31640625, "learning_rate": 0.0009626931164364606, "loss": 0.2827, "step": 53138 }, { "epoch": 0.09422161228178894, "grad_norm": 0.333984375, "learning_rate": 0.0009626342948306471, "loss": 0.1579, "step": 53140 }, { "epoch": 0.09422515844709875, "grad_norm": 0.87109375, "learning_rate": 0.0009625754738255918, "loss": 0.1593, "step": 53142 }, { "epoch": 0.09422870461240856, "grad_norm": 1.1484375, "learning_rate": 0.0009625166534215515, "loss": 0.2297, "step": 53144 }, { "epoch": 0.09423225077771838, "grad_norm": 0.310546875, "learning_rate": 0.0009624578336187833, "loss": 0.2194, "step": 53146 }, { "epoch": 0.0942357969430282, "grad_norm": 0.3203125, "learning_rate": 0.0009623990144175445, "loss": 0.1804, "step": 53148 }, { "epoch": 0.09423934310833801, "grad_norm": 0.75390625, "learning_rate": 0.0009623401958180929, "loss": 0.3056, "step": 53150 }, { "epoch": 0.09424288927364782, "grad_norm": 0.455078125, "learning_rate": 0.0009622813778206852, "loss": 0.2344, "step": 53152 }, { "epoch": 0.09424643543895764, "grad_norm": 0.2216796875, "learning_rate": 0.0009622225604255785, "loss": 0.1436, "step": 53154 }, { "epoch": 0.09424998160426745, "grad_norm": 0.26953125, "learning_rate": 0.0009621637436330304, "loss": 0.2116, "step": 53156 }, { "epoch": 0.09425352776957727, "grad_norm": 0.6640625, "learning_rate": 0.0009621049274432985, "loss": 0.1795, "step": 53158 }, { "epoch": 0.09425707393488708, "grad_norm": 0.51953125, "learning_rate": 0.0009620461118566388, "loss": 0.1793, "step": 53160 }, { "epoch": 0.0942606201001969, "grad_norm": 0.251953125, "learning_rate": 0.0009619872968733097, "loss": 0.2134, "step": 53162 }, { "epoch": 0.09426416626550671, "grad_norm": 0.38671875, "learning_rate": 0.0009619284824935677, "loss": 0.1913, "step": 53164 }, { "epoch": 0.09426771243081652, "grad_norm": 0.45703125, "learning_rate": 0.0009618696687176709, "loss": 0.2605, "step": 53166 }, { "epoch": 0.09427125859612634, "grad_norm": 0.9453125, "learning_rate": 0.0009618108555458754, "loss": 0.3632, "step": 53168 }, { "epoch": 0.09427480476143617, "grad_norm": 0.56640625, "learning_rate": 0.000961752042978439, "loss": 0.1595, "step": 53170 }, { "epoch": 0.09427835092674598, "grad_norm": 0.70703125, "learning_rate": 0.000961693231015619, "loss": 0.2034, "step": 53172 }, { "epoch": 0.0942818970920558, "grad_norm": 0.9453125, "learning_rate": 0.0009616344196576725, "loss": 0.1892, "step": 53174 }, { "epoch": 0.09428544325736561, "grad_norm": 0.349609375, "learning_rate": 0.0009615756089048567, "loss": 0.2132, "step": 53176 }, { "epoch": 0.09428898942267543, "grad_norm": 0.392578125, "learning_rate": 0.0009615167987574285, "loss": 0.2036, "step": 53178 }, { "epoch": 0.09429253558798524, "grad_norm": 0.67578125, "learning_rate": 0.0009614579892156455, "loss": 0.2508, "step": 53180 }, { "epoch": 0.09429608175329506, "grad_norm": 0.3125, "learning_rate": 0.000961399180279765, "loss": 0.3308, "step": 53182 }, { "epoch": 0.09429962791860487, "grad_norm": 0.78125, "learning_rate": 0.000961340371950044, "loss": 0.1785, "step": 53184 }, { "epoch": 0.09430317408391468, "grad_norm": 0.419921875, "learning_rate": 0.0009612815642267391, "loss": 0.2303, "step": 53186 }, { "epoch": 0.0943067202492245, "grad_norm": 0.349609375, "learning_rate": 0.0009612227571101084, "loss": 0.1411, "step": 53188 }, { "epoch": 0.09431026641453431, "grad_norm": 0.66796875, "learning_rate": 0.0009611639506004091, "loss": 0.2009, "step": 53190 }, { "epoch": 0.09431381257984413, "grad_norm": 0.3125, "learning_rate": 0.0009611051446978975, "loss": 0.1739, "step": 53192 }, { "epoch": 0.09431735874515394, "grad_norm": 0.84765625, "learning_rate": 0.0009610463394028319, "loss": 0.2171, "step": 53194 }, { "epoch": 0.09432090491046376, "grad_norm": 0.357421875, "learning_rate": 0.0009609875347154684, "loss": 0.1892, "step": 53196 }, { "epoch": 0.09432445107577357, "grad_norm": 0.484375, "learning_rate": 0.000960928730636065, "loss": 0.1769, "step": 53198 }, { "epoch": 0.09432799724108339, "grad_norm": 0.53515625, "learning_rate": 0.0009608699271648784, "loss": 0.2165, "step": 53200 }, { "epoch": 0.0943315434063932, "grad_norm": 0.94140625, "learning_rate": 0.0009608111243021658, "loss": 0.1643, "step": 53202 }, { "epoch": 0.09433508957170302, "grad_norm": 2.421875, "learning_rate": 0.0009607523220481844, "loss": 0.2882, "step": 53204 }, { "epoch": 0.09433863573701283, "grad_norm": 0.32421875, "learning_rate": 0.0009606935204031914, "loss": 0.2136, "step": 53206 }, { "epoch": 0.09434218190232264, "grad_norm": 0.48828125, "learning_rate": 0.0009606347193674448, "loss": 0.1525, "step": 53208 }, { "epoch": 0.09434572806763246, "grad_norm": 0.83984375, "learning_rate": 0.0009605759189412002, "loss": 0.3567, "step": 53210 }, { "epoch": 0.09434927423294227, "grad_norm": 1.109375, "learning_rate": 0.000960517119124716, "loss": 0.2837, "step": 53212 }, { "epoch": 0.0943528203982521, "grad_norm": 5.3125, "learning_rate": 0.0009604583199182487, "loss": 0.2649, "step": 53214 }, { "epoch": 0.09435636656356192, "grad_norm": 0.19921875, "learning_rate": 0.0009603995213220557, "loss": 0.2217, "step": 53216 }, { "epoch": 0.09435991272887173, "grad_norm": 2.15625, "learning_rate": 0.0009603407233363939, "loss": 0.1683, "step": 53218 }, { "epoch": 0.09436345889418155, "grad_norm": 0.8046875, "learning_rate": 0.0009602819259615208, "loss": 0.2162, "step": 53220 }, { "epoch": 0.09436700505949136, "grad_norm": 1.6015625, "learning_rate": 0.0009602231291976933, "loss": 0.2365, "step": 53222 }, { "epoch": 0.09437055122480117, "grad_norm": 0.298828125, "learning_rate": 0.000960164333045169, "loss": 0.2083, "step": 53224 }, { "epoch": 0.09437409739011099, "grad_norm": 0.279296875, "learning_rate": 0.0009601055375042045, "loss": 0.1671, "step": 53226 }, { "epoch": 0.0943776435554208, "grad_norm": 0.263671875, "learning_rate": 0.000960046742575057, "loss": 0.2433, "step": 53228 }, { "epoch": 0.09438118972073062, "grad_norm": 0.443359375, "learning_rate": 0.0009599879482579837, "loss": 0.1995, "step": 53230 }, { "epoch": 0.09438473588604043, "grad_norm": 0.2119140625, "learning_rate": 0.0009599291545532418, "loss": 0.2309, "step": 53232 }, { "epoch": 0.09438828205135025, "grad_norm": 1.109375, "learning_rate": 0.0009598703614610889, "loss": 0.2478, "step": 53234 }, { "epoch": 0.09439182821666006, "grad_norm": 0.1728515625, "learning_rate": 0.000959811568981781, "loss": 0.1257, "step": 53236 }, { "epoch": 0.09439537438196988, "grad_norm": 1.7890625, "learning_rate": 0.0009597527771155761, "loss": 0.2903, "step": 53238 }, { "epoch": 0.09439892054727969, "grad_norm": 0.2392578125, "learning_rate": 0.0009596939858627311, "loss": 0.1821, "step": 53240 }, { "epoch": 0.0944024667125895, "grad_norm": 0.37109375, "learning_rate": 0.0009596351952235032, "loss": 0.1637, "step": 53242 }, { "epoch": 0.09440601287789932, "grad_norm": 0.53125, "learning_rate": 0.0009595764051981493, "loss": 0.1686, "step": 53244 }, { "epoch": 0.09440955904320913, "grad_norm": 0.404296875, "learning_rate": 0.0009595176157869266, "loss": 0.1463, "step": 53246 }, { "epoch": 0.09441310520851895, "grad_norm": 1.234375, "learning_rate": 0.0009594588269900923, "loss": 0.5276, "step": 53248 }, { "epoch": 0.09441665137382876, "grad_norm": 0.640625, "learning_rate": 0.0009594000388079036, "loss": 0.1523, "step": 53250 }, { "epoch": 0.09442019753913858, "grad_norm": 1.25, "learning_rate": 0.0009593412512406172, "loss": 0.2109, "step": 53252 }, { "epoch": 0.09442374370444839, "grad_norm": 2.046875, "learning_rate": 0.0009592824642884906, "loss": 0.2416, "step": 53254 }, { "epoch": 0.09442728986975821, "grad_norm": 0.357421875, "learning_rate": 0.0009592236779517809, "loss": 0.1864, "step": 53256 }, { "epoch": 0.09443083603506802, "grad_norm": 3.515625, "learning_rate": 0.0009591648922307448, "loss": 0.2596, "step": 53258 }, { "epoch": 0.09443438220037785, "grad_norm": 0.1875, "learning_rate": 0.0009591061071256397, "loss": 0.1806, "step": 53260 }, { "epoch": 0.09443792836568767, "grad_norm": 0.50390625, "learning_rate": 0.0009590473226367227, "loss": 0.1897, "step": 53262 }, { "epoch": 0.09444147453099748, "grad_norm": 1.0703125, "learning_rate": 0.000958988538764251, "loss": 0.2313, "step": 53264 }, { "epoch": 0.0944450206963073, "grad_norm": 0.263671875, "learning_rate": 0.0009589297555084813, "loss": 0.1626, "step": 53266 }, { "epoch": 0.09444856686161711, "grad_norm": 0.3828125, "learning_rate": 0.0009588709728696709, "loss": 0.1283, "step": 53268 }, { "epoch": 0.09445211302692692, "grad_norm": 0.251953125, "learning_rate": 0.0009588121908480769, "loss": 0.1276, "step": 53270 }, { "epoch": 0.09445565919223674, "grad_norm": 0.166015625, "learning_rate": 0.0009587534094439567, "loss": 0.1792, "step": 53272 }, { "epoch": 0.09445920535754655, "grad_norm": 0.2236328125, "learning_rate": 0.0009586946286575668, "loss": 0.1649, "step": 53274 }, { "epoch": 0.09446275152285637, "grad_norm": 0.271484375, "learning_rate": 0.0009586358484891645, "loss": 0.1672, "step": 53276 }, { "epoch": 0.09446629768816618, "grad_norm": 0.35546875, "learning_rate": 0.0009585770689390066, "loss": 0.1749, "step": 53278 }, { "epoch": 0.094469843853476, "grad_norm": 3.71875, "learning_rate": 0.000958518290007351, "loss": 0.36, "step": 53280 }, { "epoch": 0.09447339001878581, "grad_norm": 0.279296875, "learning_rate": 0.0009584595116944539, "loss": 0.1403, "step": 53282 }, { "epoch": 0.09447693618409563, "grad_norm": 0.447265625, "learning_rate": 0.0009584007340005724, "loss": 0.1804, "step": 53284 }, { "epoch": 0.09448048234940544, "grad_norm": 0.34765625, "learning_rate": 0.0009583419569259644, "loss": 0.1981, "step": 53286 }, { "epoch": 0.09448402851471525, "grad_norm": 0.3828125, "learning_rate": 0.000958283180470886, "loss": 0.2221, "step": 53288 }, { "epoch": 0.09448757468002507, "grad_norm": 0.48828125, "learning_rate": 0.0009582244046355952, "loss": 0.2057, "step": 53290 }, { "epoch": 0.09449112084533488, "grad_norm": 0.302734375, "learning_rate": 0.0009581656294203481, "loss": 0.1468, "step": 53292 }, { "epoch": 0.0944946670106447, "grad_norm": 0.52734375, "learning_rate": 0.0009581068548254017, "loss": 0.1901, "step": 53294 }, { "epoch": 0.09449821317595451, "grad_norm": 0.318359375, "learning_rate": 0.0009580480808510141, "loss": 0.4163, "step": 53296 }, { "epoch": 0.09450175934126433, "grad_norm": 0.609375, "learning_rate": 0.0009579893074974417, "loss": 0.1959, "step": 53298 }, { "epoch": 0.09450530550657414, "grad_norm": 0.142578125, "learning_rate": 0.0009579305347649414, "loss": 0.1491, "step": 53300 }, { "epoch": 0.09450885167188396, "grad_norm": 1.265625, "learning_rate": 0.0009578717626537703, "loss": 0.2432, "step": 53302 }, { "epoch": 0.09451239783719377, "grad_norm": 0.181640625, "learning_rate": 0.0009578129911641859, "loss": 0.1736, "step": 53304 }, { "epoch": 0.0945159440025036, "grad_norm": 0.90625, "learning_rate": 0.0009577542202964448, "loss": 0.1099, "step": 53306 }, { "epoch": 0.09451949016781341, "grad_norm": 0.65234375, "learning_rate": 0.0009576954500508042, "loss": 0.1713, "step": 53308 }, { "epoch": 0.09452303633312323, "grad_norm": 0.2451171875, "learning_rate": 0.0009576366804275207, "loss": 0.1487, "step": 53310 }, { "epoch": 0.09452658249843304, "grad_norm": 0.6015625, "learning_rate": 0.0009575779114268517, "loss": 0.2229, "step": 53312 }, { "epoch": 0.09453012866374286, "grad_norm": 0.26171875, "learning_rate": 0.0009575191430490546, "loss": 0.2391, "step": 53314 }, { "epoch": 0.09453367482905267, "grad_norm": 0.3203125, "learning_rate": 0.0009574603752943856, "loss": 0.2283, "step": 53316 }, { "epoch": 0.09453722099436249, "grad_norm": 0.2255859375, "learning_rate": 0.0009574016081631023, "loss": 0.1648, "step": 53318 }, { "epoch": 0.0945407671596723, "grad_norm": 0.66015625, "learning_rate": 0.0009573428416554615, "loss": 0.167, "step": 53320 }, { "epoch": 0.09454431332498212, "grad_norm": 0.62109375, "learning_rate": 0.0009572840757717203, "loss": 0.136, "step": 53322 }, { "epoch": 0.09454785949029193, "grad_norm": 0.71875, "learning_rate": 0.0009572253105121356, "loss": 0.1676, "step": 53324 }, { "epoch": 0.09455140565560174, "grad_norm": 1.359375, "learning_rate": 0.0009571665458769645, "loss": 0.1737, "step": 53326 }, { "epoch": 0.09455495182091156, "grad_norm": 0.1748046875, "learning_rate": 0.0009571077818664637, "loss": 0.1359, "step": 53328 }, { "epoch": 0.09455849798622137, "grad_norm": 0.6328125, "learning_rate": 0.0009570490184808912, "loss": 0.1473, "step": 53330 }, { "epoch": 0.09456204415153119, "grad_norm": 0.453125, "learning_rate": 0.0009569902557205024, "loss": 0.2031, "step": 53332 }, { "epoch": 0.094565590316841, "grad_norm": 1.2578125, "learning_rate": 0.0009569314935855553, "loss": 0.2849, "step": 53334 }, { "epoch": 0.09456913648215082, "grad_norm": 1.4375, "learning_rate": 0.0009568727320763069, "loss": 0.3065, "step": 53336 }, { "epoch": 0.09457268264746063, "grad_norm": 0.390625, "learning_rate": 0.0009568139711930142, "loss": 0.1472, "step": 53338 }, { "epoch": 0.09457622881277045, "grad_norm": 1.1328125, "learning_rate": 0.0009567552109359339, "loss": 0.2274, "step": 53340 }, { "epoch": 0.09457977497808026, "grad_norm": 0.396484375, "learning_rate": 0.0009566964513053231, "loss": 0.1537, "step": 53342 }, { "epoch": 0.09458332114339008, "grad_norm": 0.51953125, "learning_rate": 0.0009566376923014386, "loss": 0.1569, "step": 53344 }, { "epoch": 0.09458686730869989, "grad_norm": 2.265625, "learning_rate": 0.0009565789339245381, "loss": 0.3639, "step": 53346 }, { "epoch": 0.0945904134740097, "grad_norm": 0.65234375, "learning_rate": 0.0009565201761748774, "loss": 0.2726, "step": 53348 }, { "epoch": 0.09459395963931953, "grad_norm": 0.23046875, "learning_rate": 0.0009564614190527143, "loss": 0.1703, "step": 53350 }, { "epoch": 0.09459750580462935, "grad_norm": 0.28515625, "learning_rate": 0.0009564026625583055, "loss": 0.1691, "step": 53352 }, { "epoch": 0.09460105196993916, "grad_norm": 0.65234375, "learning_rate": 0.0009563439066919086, "loss": 0.1597, "step": 53354 }, { "epoch": 0.09460459813524898, "grad_norm": 0.60546875, "learning_rate": 0.0009562851514537794, "loss": 0.1948, "step": 53356 }, { "epoch": 0.09460814430055879, "grad_norm": 0.6015625, "learning_rate": 0.0009562263968441756, "loss": 0.2508, "step": 53358 }, { "epoch": 0.0946116904658686, "grad_norm": 0.404296875, "learning_rate": 0.0009561676428633539, "loss": 0.1676, "step": 53360 }, { "epoch": 0.09461523663117842, "grad_norm": 0.64453125, "learning_rate": 0.0009561088895115717, "loss": 0.2336, "step": 53362 }, { "epoch": 0.09461878279648823, "grad_norm": 4.25, "learning_rate": 0.0009560501367890857, "loss": 0.2461, "step": 53364 }, { "epoch": 0.09462232896179805, "grad_norm": 0.2490234375, "learning_rate": 0.0009559913846961523, "loss": 0.2066, "step": 53366 }, { "epoch": 0.09462587512710786, "grad_norm": 0.78515625, "learning_rate": 0.0009559326332330291, "loss": 0.1498, "step": 53368 }, { "epoch": 0.09462942129241768, "grad_norm": 0.671875, "learning_rate": 0.0009558738823999731, "loss": 0.1963, "step": 53370 }, { "epoch": 0.0946329674577275, "grad_norm": 0.2158203125, "learning_rate": 0.000955815132197241, "loss": 0.3632, "step": 53372 }, { "epoch": 0.09463651362303731, "grad_norm": 0.296875, "learning_rate": 0.0009557563826250897, "loss": 0.1827, "step": 53374 }, { "epoch": 0.09464005978834712, "grad_norm": 0.365234375, "learning_rate": 0.000955697633683776, "loss": 0.1244, "step": 53376 }, { "epoch": 0.09464360595365694, "grad_norm": 5.40625, "learning_rate": 0.0009556388853735571, "loss": 0.3194, "step": 53378 }, { "epoch": 0.09464715211896675, "grad_norm": 0.25, "learning_rate": 0.0009555801376946902, "loss": 0.1377, "step": 53380 }, { "epoch": 0.09465069828427657, "grad_norm": 0.55859375, "learning_rate": 0.0009555213906474316, "loss": 0.2493, "step": 53382 }, { "epoch": 0.09465424444958638, "grad_norm": 0.34375, "learning_rate": 0.0009554626442320384, "loss": 0.1711, "step": 53384 }, { "epoch": 0.0946577906148962, "grad_norm": 0.42578125, "learning_rate": 0.0009554038984487678, "loss": 0.1707, "step": 53386 }, { "epoch": 0.09466133678020601, "grad_norm": 0.466796875, "learning_rate": 0.0009553451532978767, "loss": 0.1844, "step": 53388 }, { "epoch": 0.09466488294551582, "grad_norm": 0.3046875, "learning_rate": 0.0009552864087796218, "loss": 0.2408, "step": 53390 }, { "epoch": 0.09466842911082564, "grad_norm": 0.375, "learning_rate": 0.00095522766489426, "loss": 0.1401, "step": 53392 }, { "epoch": 0.09467197527613545, "grad_norm": 0.67578125, "learning_rate": 0.000955168921642048, "loss": 0.2569, "step": 53394 }, { "epoch": 0.09467552144144528, "grad_norm": 0.369140625, "learning_rate": 0.0009551101790232437, "loss": 0.178, "step": 53396 }, { "epoch": 0.0946790676067551, "grad_norm": 1.2109375, "learning_rate": 0.0009550514370381027, "loss": 0.2423, "step": 53398 }, { "epoch": 0.09468261377206491, "grad_norm": 1.0546875, "learning_rate": 0.0009549926956868829, "loss": 0.1862, "step": 53400 }, { "epoch": 0.09468615993737473, "grad_norm": 0.828125, "learning_rate": 0.0009549339549698406, "loss": 0.1652, "step": 53402 }, { "epoch": 0.09468970610268454, "grad_norm": 0.173828125, "learning_rate": 0.0009548752148872333, "loss": 0.1559, "step": 53404 }, { "epoch": 0.09469325226799435, "grad_norm": 0.31640625, "learning_rate": 0.0009548164754393172, "loss": 0.1811, "step": 53406 }, { "epoch": 0.09469679843330417, "grad_norm": 0.34765625, "learning_rate": 0.0009547577366263491, "loss": 0.1278, "step": 53408 }, { "epoch": 0.09470034459861398, "grad_norm": 0.3046875, "learning_rate": 0.0009546989984485867, "loss": 0.1817, "step": 53410 }, { "epoch": 0.0947038907639238, "grad_norm": 0.2119140625, "learning_rate": 0.0009546402609062868, "loss": 0.1594, "step": 53412 }, { "epoch": 0.09470743692923361, "grad_norm": 0.431640625, "learning_rate": 0.0009545815239997055, "loss": 0.1433, "step": 53414 }, { "epoch": 0.09471098309454343, "grad_norm": 0.396484375, "learning_rate": 0.0009545227877291002, "loss": 0.1602, "step": 53416 }, { "epoch": 0.09471452925985324, "grad_norm": 0.28125, "learning_rate": 0.0009544640520947279, "loss": 0.1597, "step": 53418 }, { "epoch": 0.09471807542516306, "grad_norm": 0.49609375, "learning_rate": 0.0009544053170968454, "loss": 0.2079, "step": 53420 }, { "epoch": 0.09472162159047287, "grad_norm": 0.3203125, "learning_rate": 0.0009543465827357092, "loss": 0.1467, "step": 53422 }, { "epoch": 0.09472516775578269, "grad_norm": 0.2197265625, "learning_rate": 0.0009542878490115765, "loss": 0.1757, "step": 53424 }, { "epoch": 0.0947287139210925, "grad_norm": 0.373046875, "learning_rate": 0.0009542291159247039, "loss": 0.1765, "step": 53426 }, { "epoch": 0.09473226008640231, "grad_norm": 0.50390625, "learning_rate": 0.000954170383475349, "loss": 0.1702, "step": 53428 }, { "epoch": 0.09473580625171213, "grad_norm": 0.359375, "learning_rate": 0.0009541116516637678, "loss": 0.2153, "step": 53430 }, { "epoch": 0.09473935241702194, "grad_norm": 2.203125, "learning_rate": 0.0009540529204902173, "loss": 0.1861, "step": 53432 }, { "epoch": 0.09474289858233176, "grad_norm": 0.421875, "learning_rate": 0.0009539941899549549, "loss": 0.2443, "step": 53434 }, { "epoch": 0.09474644474764157, "grad_norm": 0.36328125, "learning_rate": 0.0009539354600582371, "loss": 0.1841, "step": 53436 }, { "epoch": 0.09474999091295139, "grad_norm": 0.3671875, "learning_rate": 0.0009538767308003206, "loss": 0.1698, "step": 53438 }, { "epoch": 0.0947535370782612, "grad_norm": 0.3515625, "learning_rate": 0.0009538180021814624, "loss": 0.2018, "step": 53440 }, { "epoch": 0.09475708324357103, "grad_norm": 0.51953125, "learning_rate": 0.0009537592742019192, "loss": 0.2232, "step": 53442 }, { "epoch": 0.09476062940888084, "grad_norm": 3.90625, "learning_rate": 0.0009537005468619487, "loss": 0.2551, "step": 53444 }, { "epoch": 0.09476417557419066, "grad_norm": 0.28125, "learning_rate": 0.0009536418201618063, "loss": 0.1726, "step": 53446 }, { "epoch": 0.09476772173950047, "grad_norm": 1.3984375, "learning_rate": 0.0009535830941017495, "loss": 0.1796, "step": 53448 }, { "epoch": 0.09477126790481029, "grad_norm": 3.15625, "learning_rate": 0.0009535243686820354, "loss": 0.212, "step": 53450 }, { "epoch": 0.0947748140701201, "grad_norm": 0.75390625, "learning_rate": 0.000953465643902921, "loss": 0.1748, "step": 53452 }, { "epoch": 0.09477836023542992, "grad_norm": 0.1943359375, "learning_rate": 0.0009534069197646624, "loss": 0.1928, "step": 53454 }, { "epoch": 0.09478190640073973, "grad_norm": 2.40625, "learning_rate": 0.0009533481962675168, "loss": 0.1849, "step": 53456 }, { "epoch": 0.09478545256604955, "grad_norm": 0.384765625, "learning_rate": 0.000953289473411741, "loss": 0.1471, "step": 53458 }, { "epoch": 0.09478899873135936, "grad_norm": 0.3046875, "learning_rate": 0.0009532307511975918, "loss": 0.1466, "step": 53460 }, { "epoch": 0.09479254489666918, "grad_norm": 0.546875, "learning_rate": 0.0009531720296253263, "loss": 0.2351, "step": 53462 }, { "epoch": 0.09479609106197899, "grad_norm": 0.2451171875, "learning_rate": 0.0009531133086952008, "loss": 0.1522, "step": 53464 }, { "epoch": 0.0947996372272888, "grad_norm": 0.341796875, "learning_rate": 0.0009530545884074722, "loss": 0.2193, "step": 53466 }, { "epoch": 0.09480318339259862, "grad_norm": 0.2158203125, "learning_rate": 0.0009529958687623976, "loss": 0.1533, "step": 53468 }, { "epoch": 0.09480672955790843, "grad_norm": 0.228515625, "learning_rate": 0.0009529371497602341, "loss": 0.1926, "step": 53470 }, { "epoch": 0.09481027572321825, "grad_norm": 0.91796875, "learning_rate": 0.0009528784314012375, "loss": 0.2299, "step": 53472 }, { "epoch": 0.09481382188852806, "grad_norm": 0.546875, "learning_rate": 0.0009528197136856655, "loss": 0.1913, "step": 53474 }, { "epoch": 0.09481736805383788, "grad_norm": 0.3046875, "learning_rate": 0.0009527609966137744, "loss": 0.2963, "step": 53476 }, { "epoch": 0.09482091421914769, "grad_norm": 0.38671875, "learning_rate": 0.0009527022801858216, "loss": 0.1693, "step": 53478 }, { "epoch": 0.0948244603844575, "grad_norm": 0.2734375, "learning_rate": 0.000952643564402063, "loss": 0.1968, "step": 53480 }, { "epoch": 0.09482800654976732, "grad_norm": 1.046875, "learning_rate": 0.0009525848492627561, "loss": 0.2451, "step": 53482 }, { "epoch": 0.09483155271507714, "grad_norm": 3.0, "learning_rate": 0.0009525261347681571, "loss": 0.4232, "step": 53484 }, { "epoch": 0.09483509888038696, "grad_norm": 0.1650390625, "learning_rate": 0.0009524674209185238, "loss": 0.2036, "step": 53486 }, { "epoch": 0.09483864504569678, "grad_norm": 0.244140625, "learning_rate": 0.0009524087077141117, "loss": 0.1713, "step": 53488 }, { "epoch": 0.0948421912110066, "grad_norm": 0.337890625, "learning_rate": 0.0009523499951551782, "loss": 0.1914, "step": 53490 }, { "epoch": 0.09484573737631641, "grad_norm": 0.2177734375, "learning_rate": 0.0009522912832419803, "loss": 0.1186, "step": 53492 }, { "epoch": 0.09484928354162622, "grad_norm": 0.337890625, "learning_rate": 0.0009522325719747746, "loss": 0.1788, "step": 53494 }, { "epoch": 0.09485282970693604, "grad_norm": 1.625, "learning_rate": 0.0009521738613538178, "loss": 0.1883, "step": 53496 }, { "epoch": 0.09485637587224585, "grad_norm": 0.279296875, "learning_rate": 0.0009521151513793662, "loss": 0.408, "step": 53498 }, { "epoch": 0.09485992203755567, "grad_norm": 0.361328125, "learning_rate": 0.0009520564420516773, "loss": 0.1759, "step": 53500 }, { "epoch": 0.09486346820286548, "grad_norm": 1.0234375, "learning_rate": 0.0009519977333710079, "loss": 0.2159, "step": 53502 }, { "epoch": 0.0948670143681753, "grad_norm": 0.64453125, "learning_rate": 0.0009519390253376139, "loss": 0.1613, "step": 53504 }, { "epoch": 0.09487056053348511, "grad_norm": 0.5, "learning_rate": 0.0009518803179517528, "loss": 0.1356, "step": 53506 }, { "epoch": 0.09487410669879492, "grad_norm": 0.486328125, "learning_rate": 0.0009518216112136815, "loss": 0.1763, "step": 53508 }, { "epoch": 0.09487765286410474, "grad_norm": 0.376953125, "learning_rate": 0.000951762905123656, "loss": 0.1694, "step": 53510 }, { "epoch": 0.09488119902941455, "grad_norm": 1.0, "learning_rate": 0.0009517041996819336, "loss": 0.1651, "step": 53512 }, { "epoch": 0.09488474519472437, "grad_norm": 1.015625, "learning_rate": 0.0009516454948887707, "loss": 0.496, "step": 53514 }, { "epoch": 0.09488829136003418, "grad_norm": 3.046875, "learning_rate": 0.0009515867907444242, "loss": 0.3141, "step": 53516 }, { "epoch": 0.094891837525344, "grad_norm": 2.96875, "learning_rate": 0.0009515280872491514, "loss": 0.2559, "step": 53518 }, { "epoch": 0.09489538369065381, "grad_norm": 0.33984375, "learning_rate": 0.0009514693844032081, "loss": 0.1913, "step": 53520 }, { "epoch": 0.09489892985596363, "grad_norm": 0.34765625, "learning_rate": 0.0009514106822068513, "loss": 0.1575, "step": 53522 }, { "epoch": 0.09490247602127344, "grad_norm": 0.490234375, "learning_rate": 0.0009513519806603379, "loss": 0.3122, "step": 53524 }, { "epoch": 0.09490602218658326, "grad_norm": 0.2314453125, "learning_rate": 0.0009512932797639251, "loss": 0.1168, "step": 53526 }, { "epoch": 0.09490956835189307, "grad_norm": 0.376953125, "learning_rate": 0.0009512345795178687, "loss": 0.2392, "step": 53528 }, { "epoch": 0.09491311451720288, "grad_norm": 0.48828125, "learning_rate": 0.0009511758799224259, "loss": 0.2013, "step": 53530 }, { "epoch": 0.09491666068251271, "grad_norm": 0.953125, "learning_rate": 0.0009511171809778534, "loss": 0.2509, "step": 53532 }, { "epoch": 0.09492020684782253, "grad_norm": 0.52734375, "learning_rate": 0.000951058482684408, "loss": 0.1417, "step": 53534 }, { "epoch": 0.09492375301313234, "grad_norm": 1.5703125, "learning_rate": 0.0009509997850423459, "loss": 0.2697, "step": 53536 }, { "epoch": 0.09492729917844216, "grad_norm": 0.29296875, "learning_rate": 0.0009509410880519242, "loss": 0.1905, "step": 53538 }, { "epoch": 0.09493084534375197, "grad_norm": 0.58984375, "learning_rate": 0.0009508823917134, "loss": 0.1797, "step": 53540 }, { "epoch": 0.09493439150906179, "grad_norm": 2.1875, "learning_rate": 0.0009508236960270291, "loss": 0.4071, "step": 53542 }, { "epoch": 0.0949379376743716, "grad_norm": 0.43359375, "learning_rate": 0.0009507650009930692, "loss": 0.2231, "step": 53544 }, { "epoch": 0.09494148383968141, "grad_norm": 0.3203125, "learning_rate": 0.0009507063066117763, "loss": 0.1535, "step": 53546 }, { "epoch": 0.09494503000499123, "grad_norm": 0.267578125, "learning_rate": 0.0009506476128834072, "loss": 0.1764, "step": 53548 }, { "epoch": 0.09494857617030104, "grad_norm": 0.2353515625, "learning_rate": 0.0009505889198082187, "loss": 0.1849, "step": 53550 }, { "epoch": 0.09495212233561086, "grad_norm": 0.3828125, "learning_rate": 0.000950530227386468, "loss": 0.1443, "step": 53552 }, { "epoch": 0.09495566850092067, "grad_norm": 0.63671875, "learning_rate": 0.0009504715356184106, "loss": 0.2078, "step": 53554 }, { "epoch": 0.09495921466623049, "grad_norm": 0.232421875, "learning_rate": 0.0009504128445043039, "loss": 0.2088, "step": 53556 }, { "epoch": 0.0949627608315403, "grad_norm": 1.4140625, "learning_rate": 0.0009503541540444046, "loss": 0.3859, "step": 53558 }, { "epoch": 0.09496630699685012, "grad_norm": 1.1015625, "learning_rate": 0.0009502954642389696, "loss": 0.2139, "step": 53560 }, { "epoch": 0.09496985316215993, "grad_norm": 1.1875, "learning_rate": 0.000950236775088255, "loss": 0.1758, "step": 53562 }, { "epoch": 0.09497339932746975, "grad_norm": 3.03125, "learning_rate": 0.0009501780865925179, "loss": 0.3793, "step": 53564 }, { "epoch": 0.09497694549277956, "grad_norm": 0.94921875, "learning_rate": 0.0009501193987520147, "loss": 0.2128, "step": 53566 }, { "epoch": 0.09498049165808937, "grad_norm": 0.443359375, "learning_rate": 0.0009500607115670023, "loss": 0.1454, "step": 53568 }, { "epoch": 0.09498403782339919, "grad_norm": 0.7265625, "learning_rate": 0.0009500020250377372, "loss": 0.2671, "step": 53570 }, { "epoch": 0.094987583988709, "grad_norm": 1.5, "learning_rate": 0.0009499433391644761, "loss": 0.2144, "step": 53572 }, { "epoch": 0.09499113015401882, "grad_norm": 0.84765625, "learning_rate": 0.0009498846539474755, "loss": 0.3476, "step": 53574 }, { "epoch": 0.09499467631932863, "grad_norm": 1.828125, "learning_rate": 0.0009498259693869926, "loss": 0.2014, "step": 53576 }, { "epoch": 0.09499822248463846, "grad_norm": 0.3203125, "learning_rate": 0.0009497672854832833, "loss": 0.2293, "step": 53578 }, { "epoch": 0.09500176864994828, "grad_norm": 0.2265625, "learning_rate": 0.0009497086022366048, "loss": 0.1474, "step": 53580 }, { "epoch": 0.09500531481525809, "grad_norm": 0.2470703125, "learning_rate": 0.0009496499196472137, "loss": 0.177, "step": 53582 }, { "epoch": 0.0950088609805679, "grad_norm": 0.2236328125, "learning_rate": 0.0009495912377153663, "loss": 0.164, "step": 53584 }, { "epoch": 0.09501240714587772, "grad_norm": 0.296875, "learning_rate": 0.0009495325564413198, "loss": 0.2386, "step": 53586 }, { "epoch": 0.09501595331118753, "grad_norm": 0.85546875, "learning_rate": 0.0009494738758253301, "loss": 0.1831, "step": 53588 }, { "epoch": 0.09501949947649735, "grad_norm": 0.5234375, "learning_rate": 0.0009494151958676542, "loss": 0.2373, "step": 53590 }, { "epoch": 0.09502304564180716, "grad_norm": 0.337890625, "learning_rate": 0.0009493565165685494, "loss": 0.1606, "step": 53592 }, { "epoch": 0.09502659180711698, "grad_norm": 0.50390625, "learning_rate": 0.0009492978379282708, "loss": 0.2065, "step": 53594 }, { "epoch": 0.09503013797242679, "grad_norm": 0.1689453125, "learning_rate": 0.0009492391599470765, "loss": 0.1668, "step": 53596 }, { "epoch": 0.09503368413773661, "grad_norm": 0.2294921875, "learning_rate": 0.0009491804826252221, "loss": 0.1827, "step": 53598 }, { "epoch": 0.09503723030304642, "grad_norm": 1.2421875, "learning_rate": 0.0009491218059629653, "loss": 0.253, "step": 53600 }, { "epoch": 0.09504077646835624, "grad_norm": 1.1484375, "learning_rate": 0.0009490631299605616, "loss": 0.3384, "step": 53602 }, { "epoch": 0.09504432263366605, "grad_norm": 0.2236328125, "learning_rate": 0.0009490044546182683, "loss": 0.1129, "step": 53604 }, { "epoch": 0.09504786879897587, "grad_norm": 0.494140625, "learning_rate": 0.0009489457799363417, "loss": 0.1967, "step": 53606 }, { "epoch": 0.09505141496428568, "grad_norm": 0.54296875, "learning_rate": 0.0009488871059150388, "loss": 0.1623, "step": 53608 }, { "epoch": 0.0950549611295955, "grad_norm": 1.09375, "learning_rate": 0.0009488284325546157, "loss": 0.1863, "step": 53610 }, { "epoch": 0.09505850729490531, "grad_norm": 0.671875, "learning_rate": 0.0009487697598553289, "loss": 0.1692, "step": 53612 }, { "epoch": 0.09506205346021512, "grad_norm": 0.2265625, "learning_rate": 0.0009487110878174357, "loss": 0.1842, "step": 53614 }, { "epoch": 0.09506559962552494, "grad_norm": 0.55859375, "learning_rate": 0.0009486524164411925, "loss": 0.1835, "step": 53616 }, { "epoch": 0.09506914579083475, "grad_norm": 0.396484375, "learning_rate": 0.0009485937457268554, "loss": 0.1628, "step": 53618 }, { "epoch": 0.09507269195614457, "grad_norm": 0.73828125, "learning_rate": 0.0009485350756746811, "loss": 0.1539, "step": 53620 }, { "epoch": 0.0950762381214544, "grad_norm": 2.234375, "learning_rate": 0.0009484764062849269, "loss": 0.2332, "step": 53622 }, { "epoch": 0.09507978428676421, "grad_norm": 0.8125, "learning_rate": 0.0009484177375578485, "loss": 0.2611, "step": 53624 }, { "epoch": 0.09508333045207402, "grad_norm": 1.1484375, "learning_rate": 0.0009483590694937032, "loss": 0.1893, "step": 53626 }, { "epoch": 0.09508687661738384, "grad_norm": 0.380859375, "learning_rate": 0.000948300402092747, "loss": 0.2451, "step": 53628 }, { "epoch": 0.09509042278269365, "grad_norm": 0.1826171875, "learning_rate": 0.0009482417353552368, "loss": 0.1541, "step": 53630 }, { "epoch": 0.09509396894800347, "grad_norm": 0.44921875, "learning_rate": 0.0009481830692814288, "loss": 0.1995, "step": 53632 }, { "epoch": 0.09509751511331328, "grad_norm": 0.35546875, "learning_rate": 0.0009481244038715807, "loss": 0.1413, "step": 53634 }, { "epoch": 0.0951010612786231, "grad_norm": 0.490234375, "learning_rate": 0.0009480657391259475, "loss": 0.1635, "step": 53636 }, { "epoch": 0.09510460744393291, "grad_norm": 1.9375, "learning_rate": 0.0009480070750447864, "loss": 0.2122, "step": 53638 }, { "epoch": 0.09510815360924273, "grad_norm": 0.53515625, "learning_rate": 0.0009479484116283546, "loss": 0.2577, "step": 53640 }, { "epoch": 0.09511169977455254, "grad_norm": 1.359375, "learning_rate": 0.000947889748876908, "loss": 0.171, "step": 53642 }, { "epoch": 0.09511524593986236, "grad_norm": 0.2578125, "learning_rate": 0.0009478310867907032, "loss": 0.1797, "step": 53644 }, { "epoch": 0.09511879210517217, "grad_norm": 0.427734375, "learning_rate": 0.0009477724253699967, "loss": 0.1478, "step": 53646 }, { "epoch": 0.09512233827048198, "grad_norm": 0.39453125, "learning_rate": 0.0009477137646150453, "loss": 0.1446, "step": 53648 }, { "epoch": 0.0951258844357918, "grad_norm": 1.609375, "learning_rate": 0.0009476551045261055, "loss": 0.1977, "step": 53650 }, { "epoch": 0.09512943060110161, "grad_norm": 1.5625, "learning_rate": 0.0009475964451034338, "loss": 0.2504, "step": 53652 }, { "epoch": 0.09513297676641143, "grad_norm": 6.34375, "learning_rate": 0.0009475377863472865, "loss": 0.2192, "step": 53654 }, { "epoch": 0.09513652293172124, "grad_norm": 1.6171875, "learning_rate": 0.0009474791282579202, "loss": 0.1597, "step": 53656 }, { "epoch": 0.09514006909703106, "grad_norm": 0.36328125, "learning_rate": 0.0009474204708355924, "loss": 0.1704, "step": 53658 }, { "epoch": 0.09514361526234087, "grad_norm": 0.58984375, "learning_rate": 0.0009473618140805583, "loss": 0.1359, "step": 53660 }, { "epoch": 0.09514716142765069, "grad_norm": 0.3125, "learning_rate": 0.0009473031579930748, "loss": 0.2349, "step": 53662 }, { "epoch": 0.0951507075929605, "grad_norm": 0.2333984375, "learning_rate": 0.0009472445025733989, "loss": 0.1702, "step": 53664 }, { "epoch": 0.09515425375827032, "grad_norm": 0.275390625, "learning_rate": 0.000947185847821787, "loss": 0.1971, "step": 53666 }, { "epoch": 0.09515779992358014, "grad_norm": 0.2431640625, "learning_rate": 0.0009471271937384948, "loss": 0.2173, "step": 53668 }, { "epoch": 0.09516134608888996, "grad_norm": 1.8984375, "learning_rate": 0.0009470685403237798, "loss": 0.4322, "step": 53670 }, { "epoch": 0.09516489225419977, "grad_norm": 0.59765625, "learning_rate": 0.0009470098875778979, "loss": 0.4916, "step": 53672 }, { "epoch": 0.09516843841950959, "grad_norm": 0.4609375, "learning_rate": 0.0009469512355011065, "loss": 0.3984, "step": 53674 }, { "epoch": 0.0951719845848194, "grad_norm": 0.54296875, "learning_rate": 0.0009468925840936611, "loss": 0.2071, "step": 53676 }, { "epoch": 0.09517553075012922, "grad_norm": 1.6640625, "learning_rate": 0.0009468339333558186, "loss": 0.1595, "step": 53678 }, { "epoch": 0.09517907691543903, "grad_norm": 0.79296875, "learning_rate": 0.0009467752832878355, "loss": 0.2137, "step": 53680 }, { "epoch": 0.09518262308074885, "grad_norm": 0.5, "learning_rate": 0.0009467166338899686, "loss": 0.2366, "step": 53682 }, { "epoch": 0.09518616924605866, "grad_norm": 0.96875, "learning_rate": 0.0009466579851624739, "loss": 0.2679, "step": 53684 }, { "epoch": 0.09518971541136848, "grad_norm": 0.30078125, "learning_rate": 0.0009465993371056079, "loss": 0.1683, "step": 53686 }, { "epoch": 0.09519326157667829, "grad_norm": 0.5546875, "learning_rate": 0.0009465406897196274, "loss": 0.2054, "step": 53688 }, { "epoch": 0.0951968077419881, "grad_norm": 0.1962890625, "learning_rate": 0.0009464820430047891, "loss": 0.135, "step": 53690 }, { "epoch": 0.09520035390729792, "grad_norm": 0.365234375, "learning_rate": 0.0009464233969613487, "loss": 0.1348, "step": 53692 }, { "epoch": 0.09520390007260773, "grad_norm": 0.83203125, "learning_rate": 0.0009463647515895633, "loss": 0.2388, "step": 53694 }, { "epoch": 0.09520744623791755, "grad_norm": 0.5859375, "learning_rate": 0.0009463061068896894, "loss": 0.159, "step": 53696 }, { "epoch": 0.09521099240322736, "grad_norm": 0.357421875, "learning_rate": 0.0009462474628619832, "loss": 0.1617, "step": 53698 }, { "epoch": 0.09521453856853718, "grad_norm": 1.453125, "learning_rate": 0.0009461888195067015, "loss": 0.176, "step": 53700 }, { "epoch": 0.09521808473384699, "grad_norm": 0.388671875, "learning_rate": 0.0009461301768241002, "loss": 0.2082, "step": 53702 }, { "epoch": 0.0952216308991568, "grad_norm": 0.86328125, "learning_rate": 0.0009460715348144362, "loss": 0.2326, "step": 53704 }, { "epoch": 0.09522517706446662, "grad_norm": 1.2734375, "learning_rate": 0.000946012893477966, "loss": 0.3357, "step": 53706 }, { "epoch": 0.09522872322977644, "grad_norm": 0.3828125, "learning_rate": 0.0009459542528149461, "loss": 0.1986, "step": 53708 }, { "epoch": 0.09523226939508625, "grad_norm": 0.3984375, "learning_rate": 0.0009458956128256325, "loss": 0.1835, "step": 53710 }, { "epoch": 0.09523581556039606, "grad_norm": 0.71484375, "learning_rate": 0.0009458369735102819, "loss": 0.1829, "step": 53712 }, { "epoch": 0.09523936172570589, "grad_norm": 0.22265625, "learning_rate": 0.0009457783348691511, "loss": 0.1827, "step": 53714 }, { "epoch": 0.09524290789101571, "grad_norm": 0.451171875, "learning_rate": 0.0009457196969024964, "loss": 0.2034, "step": 53716 }, { "epoch": 0.09524645405632552, "grad_norm": 4.09375, "learning_rate": 0.0009456610596105742, "loss": 0.1577, "step": 53718 }, { "epoch": 0.09525000022163534, "grad_norm": 0.59375, "learning_rate": 0.0009456024229936404, "loss": 0.2295, "step": 53720 }, { "epoch": 0.09525354638694515, "grad_norm": 0.34375, "learning_rate": 0.0009455437870519521, "loss": 0.1708, "step": 53722 }, { "epoch": 0.09525709255225497, "grad_norm": 0.42578125, "learning_rate": 0.0009454851517857658, "loss": 0.1738, "step": 53724 }, { "epoch": 0.09526063871756478, "grad_norm": 0.4296875, "learning_rate": 0.0009454265171953373, "loss": 0.2398, "step": 53726 }, { "epoch": 0.0952641848828746, "grad_norm": 0.326171875, "learning_rate": 0.0009453678832809236, "loss": 0.1497, "step": 53728 }, { "epoch": 0.09526773104818441, "grad_norm": 0.1611328125, "learning_rate": 0.0009453092500427809, "loss": 0.1471, "step": 53730 }, { "epoch": 0.09527127721349422, "grad_norm": 0.875, "learning_rate": 0.0009452506174811662, "loss": 0.1733, "step": 53732 }, { "epoch": 0.09527482337880404, "grad_norm": 0.5078125, "learning_rate": 0.0009451919855963349, "loss": 0.2243, "step": 53734 }, { "epoch": 0.09527836954411385, "grad_norm": 0.369140625, "learning_rate": 0.0009451333543885441, "loss": 0.1945, "step": 53736 }, { "epoch": 0.09528191570942367, "grad_norm": 0.376953125, "learning_rate": 0.0009450747238580497, "loss": 0.1748, "step": 53738 }, { "epoch": 0.09528546187473348, "grad_norm": 0.40625, "learning_rate": 0.0009450160940051092, "loss": 0.206, "step": 53740 }, { "epoch": 0.0952890080400433, "grad_norm": 2.71875, "learning_rate": 0.0009449574648299776, "loss": 0.2187, "step": 53742 }, { "epoch": 0.09529255420535311, "grad_norm": 0.25390625, "learning_rate": 0.0009448988363329121, "loss": 0.1526, "step": 53744 }, { "epoch": 0.09529610037066293, "grad_norm": 0.255859375, "learning_rate": 0.000944840208514169, "loss": 0.1327, "step": 53746 }, { "epoch": 0.09529964653597274, "grad_norm": 0.2265625, "learning_rate": 0.000944781581374005, "loss": 0.1888, "step": 53748 }, { "epoch": 0.09530319270128255, "grad_norm": 0.515625, "learning_rate": 0.0009447229549126759, "loss": 0.173, "step": 53750 }, { "epoch": 0.09530673886659237, "grad_norm": 0.1923828125, "learning_rate": 0.0009446643291304384, "loss": 0.1704, "step": 53752 }, { "epoch": 0.09531028503190218, "grad_norm": 0.283203125, "learning_rate": 0.0009446057040275491, "loss": 0.2039, "step": 53754 }, { "epoch": 0.095313831197212, "grad_norm": 0.65625, "learning_rate": 0.0009445470796042642, "loss": 0.204, "step": 53756 }, { "epoch": 0.09531737736252183, "grad_norm": 0.2099609375, "learning_rate": 0.0009444884558608402, "loss": 0.1606, "step": 53758 }, { "epoch": 0.09532092352783164, "grad_norm": 0.296875, "learning_rate": 0.000944429832797533, "loss": 0.1671, "step": 53760 }, { "epoch": 0.09532446969314146, "grad_norm": 0.2890625, "learning_rate": 0.0009443712104145992, "loss": 0.1538, "step": 53762 }, { "epoch": 0.09532801585845127, "grad_norm": 1.578125, "learning_rate": 0.0009443125887122958, "loss": 0.3138, "step": 53764 }, { "epoch": 0.09533156202376109, "grad_norm": 0.83984375, "learning_rate": 0.0009442539676908785, "loss": 0.2052, "step": 53766 }, { "epoch": 0.0953351081890709, "grad_norm": 0.265625, "learning_rate": 0.0009441953473506038, "loss": 0.1857, "step": 53768 }, { "epoch": 0.09533865435438071, "grad_norm": 0.466796875, "learning_rate": 0.0009441367276917283, "loss": 0.1795, "step": 53770 }, { "epoch": 0.09534220051969053, "grad_norm": 0.228515625, "learning_rate": 0.0009440781087145081, "loss": 0.2191, "step": 53772 }, { "epoch": 0.09534574668500034, "grad_norm": 0.486328125, "learning_rate": 0.0009440194904191999, "loss": 0.204, "step": 53774 }, { "epoch": 0.09534929285031016, "grad_norm": 0.8671875, "learning_rate": 0.0009439608728060596, "loss": 0.1528, "step": 53776 }, { "epoch": 0.09535283901561997, "grad_norm": 1.0859375, "learning_rate": 0.0009439022558753436, "loss": 0.6424, "step": 53778 }, { "epoch": 0.09535638518092979, "grad_norm": 0.2451171875, "learning_rate": 0.0009438436396273091, "loss": 0.1547, "step": 53780 }, { "epoch": 0.0953599313462396, "grad_norm": 0.263671875, "learning_rate": 0.0009437850240622115, "loss": 0.1676, "step": 53782 }, { "epoch": 0.09536347751154942, "grad_norm": 0.2890625, "learning_rate": 0.0009437264091803072, "loss": 0.1851, "step": 53784 }, { "epoch": 0.09536702367685923, "grad_norm": 0.1611328125, "learning_rate": 0.0009436677949818528, "loss": 0.1698, "step": 53786 }, { "epoch": 0.09537056984216905, "grad_norm": 0.5078125, "learning_rate": 0.0009436091814671053, "loss": 0.1907, "step": 53788 }, { "epoch": 0.09537411600747886, "grad_norm": 1.7109375, "learning_rate": 0.0009435505686363197, "loss": 0.2651, "step": 53790 }, { "epoch": 0.09537766217278867, "grad_norm": 0.63671875, "learning_rate": 0.0009434919564897535, "loss": 0.2671, "step": 53792 }, { "epoch": 0.09538120833809849, "grad_norm": 0.357421875, "learning_rate": 0.0009434333450276622, "loss": 0.2245, "step": 53794 }, { "epoch": 0.0953847545034083, "grad_norm": 0.388671875, "learning_rate": 0.0009433747342503028, "loss": 0.207, "step": 53796 }, { "epoch": 0.09538830066871812, "grad_norm": 0.28515625, "learning_rate": 0.0009433161241579314, "loss": 0.195, "step": 53798 }, { "epoch": 0.09539184683402793, "grad_norm": 1.1484375, "learning_rate": 0.000943257514750804, "loss": 0.1788, "step": 53800 }, { "epoch": 0.09539539299933775, "grad_norm": 0.71484375, "learning_rate": 0.000943198906029177, "loss": 0.1602, "step": 53802 }, { "epoch": 0.09539893916464758, "grad_norm": 0.76953125, "learning_rate": 0.000943140297993307, "loss": 0.2443, "step": 53804 }, { "epoch": 0.09540248532995739, "grad_norm": 7.53125, "learning_rate": 0.000943081690643451, "loss": 0.1763, "step": 53806 }, { "epoch": 0.0954060314952672, "grad_norm": 0.43359375, "learning_rate": 0.0009430230839798637, "loss": 0.2295, "step": 53808 }, { "epoch": 0.09540957766057702, "grad_norm": 0.291015625, "learning_rate": 0.0009429644780028024, "loss": 0.183, "step": 53810 }, { "epoch": 0.09541312382588683, "grad_norm": 0.5625, "learning_rate": 0.0009429058727125233, "loss": 0.2235, "step": 53812 }, { "epoch": 0.09541666999119665, "grad_norm": 0.25390625, "learning_rate": 0.0009428472681092831, "loss": 0.1748, "step": 53814 }, { "epoch": 0.09542021615650646, "grad_norm": 0.416015625, "learning_rate": 0.000942788664193337, "loss": 0.1672, "step": 53816 }, { "epoch": 0.09542376232181628, "grad_norm": 0.8984375, "learning_rate": 0.0009427300609649421, "loss": 0.2099, "step": 53818 }, { "epoch": 0.09542730848712609, "grad_norm": 0.412109375, "learning_rate": 0.0009426714584243545, "loss": 0.1808, "step": 53820 }, { "epoch": 0.0954308546524359, "grad_norm": 1.9765625, "learning_rate": 0.000942612856571831, "loss": 0.2795, "step": 53822 }, { "epoch": 0.09543440081774572, "grad_norm": 0.5390625, "learning_rate": 0.0009425542554076272, "loss": 0.1976, "step": 53824 }, { "epoch": 0.09543794698305554, "grad_norm": 0.4921875, "learning_rate": 0.0009424956549319994, "loss": 0.1817, "step": 53826 }, { "epoch": 0.09544149314836535, "grad_norm": 0.71484375, "learning_rate": 0.0009424370551452044, "loss": 0.2092, "step": 53828 }, { "epoch": 0.09544503931367516, "grad_norm": 0.3359375, "learning_rate": 0.0009423784560474984, "loss": 0.1982, "step": 53830 }, { "epoch": 0.09544858547898498, "grad_norm": 0.181640625, "learning_rate": 0.0009423198576391372, "loss": 0.1904, "step": 53832 }, { "epoch": 0.0954521316442948, "grad_norm": 0.42578125, "learning_rate": 0.0009422612599203772, "loss": 0.3371, "step": 53834 }, { "epoch": 0.09545567780960461, "grad_norm": 1.046875, "learning_rate": 0.0009422026628914748, "loss": 0.2482, "step": 53836 }, { "epoch": 0.09545922397491442, "grad_norm": 0.291015625, "learning_rate": 0.0009421440665526868, "loss": 0.2443, "step": 53838 }, { "epoch": 0.09546277014022424, "grad_norm": 0.224609375, "learning_rate": 0.0009420854709042685, "loss": 0.1869, "step": 53840 }, { "epoch": 0.09546631630553405, "grad_norm": 0.255859375, "learning_rate": 0.0009420268759464767, "loss": 0.1774, "step": 53842 }, { "epoch": 0.09546986247084387, "grad_norm": 0.2373046875, "learning_rate": 0.0009419682816795676, "loss": 0.1816, "step": 53844 }, { "epoch": 0.09547340863615368, "grad_norm": 0.4921875, "learning_rate": 0.0009419096881037976, "loss": 0.1812, "step": 53846 }, { "epoch": 0.0954769548014635, "grad_norm": 0.28515625, "learning_rate": 0.0009418510952194226, "loss": 0.1623, "step": 53848 }, { "epoch": 0.09548050096677332, "grad_norm": 0.57421875, "learning_rate": 0.0009417925030266992, "loss": 0.2533, "step": 53850 }, { "epoch": 0.09548404713208314, "grad_norm": 0.5, "learning_rate": 0.0009417339115258834, "loss": 0.1697, "step": 53852 }, { "epoch": 0.09548759329739295, "grad_norm": 0.66015625, "learning_rate": 0.0009416753207172317, "loss": 0.1678, "step": 53854 }, { "epoch": 0.09549113946270277, "grad_norm": 0.4296875, "learning_rate": 0.000941616730601, "loss": 0.1493, "step": 53856 }, { "epoch": 0.09549468562801258, "grad_norm": 0.2255859375, "learning_rate": 0.0009415581411774445, "loss": 0.172, "step": 53858 }, { "epoch": 0.0954982317933224, "grad_norm": 0.8125, "learning_rate": 0.000941499552446822, "loss": 0.2835, "step": 53860 }, { "epoch": 0.09550177795863221, "grad_norm": 0.326171875, "learning_rate": 0.0009414409644093884, "loss": 0.159, "step": 53862 }, { "epoch": 0.09550532412394203, "grad_norm": 0.40625, "learning_rate": 0.0009413823770653998, "loss": 0.1822, "step": 53864 }, { "epoch": 0.09550887028925184, "grad_norm": 1.75, "learning_rate": 0.0009413237904151126, "loss": 0.2256, "step": 53866 }, { "epoch": 0.09551241645456165, "grad_norm": 0.30078125, "learning_rate": 0.0009412652044587829, "loss": 0.2437, "step": 53868 }, { "epoch": 0.09551596261987147, "grad_norm": 0.578125, "learning_rate": 0.0009412066191966672, "loss": 0.2488, "step": 53870 }, { "epoch": 0.09551950878518128, "grad_norm": 0.57421875, "learning_rate": 0.0009411480346290211, "loss": 0.3263, "step": 53872 }, { "epoch": 0.0955230549504911, "grad_norm": 0.466796875, "learning_rate": 0.0009410894507561014, "loss": 0.1435, "step": 53874 }, { "epoch": 0.09552660111580091, "grad_norm": 0.49609375, "learning_rate": 0.0009410308675781643, "loss": 0.1787, "step": 53876 }, { "epoch": 0.09553014728111073, "grad_norm": 0.291015625, "learning_rate": 0.0009409722850954653, "loss": 0.2532, "step": 53878 }, { "epoch": 0.09553369344642054, "grad_norm": 0.232421875, "learning_rate": 0.000940913703308262, "loss": 0.1757, "step": 53880 }, { "epoch": 0.09553723961173036, "grad_norm": 1.328125, "learning_rate": 0.0009408551222168094, "loss": 0.223, "step": 53882 }, { "epoch": 0.09554078577704017, "grad_norm": 0.5625, "learning_rate": 0.000940796541821364, "loss": 0.1612, "step": 53884 }, { "epoch": 0.09554433194234999, "grad_norm": 0.435546875, "learning_rate": 0.0009407379621221819, "loss": 0.1932, "step": 53886 }, { "epoch": 0.0955478781076598, "grad_norm": 0.470703125, "learning_rate": 0.00094067938311952, "loss": 0.1511, "step": 53888 }, { "epoch": 0.09555142427296962, "grad_norm": 0.29296875, "learning_rate": 0.0009406208048136334, "loss": 0.1843, "step": 53890 }, { "epoch": 0.09555497043827943, "grad_norm": 0.1875, "learning_rate": 0.0009405622272047787, "loss": 0.129, "step": 53892 }, { "epoch": 0.09555851660358926, "grad_norm": 0.89453125, "learning_rate": 0.0009405036502932123, "loss": 0.1575, "step": 53894 }, { "epoch": 0.09556206276889907, "grad_norm": 0.3203125, "learning_rate": 0.0009404450740791907, "loss": 0.2182, "step": 53896 }, { "epoch": 0.09556560893420889, "grad_norm": 0.46484375, "learning_rate": 0.0009403864985629693, "loss": 0.2682, "step": 53898 }, { "epoch": 0.0955691550995187, "grad_norm": 0.22265625, "learning_rate": 0.0009403279237448046, "loss": 0.1733, "step": 53900 }, { "epoch": 0.09557270126482852, "grad_norm": 0.26171875, "learning_rate": 0.000940269349624953, "loss": 0.1458, "step": 53902 }, { "epoch": 0.09557624743013833, "grad_norm": 0.423828125, "learning_rate": 0.0009402107762036707, "loss": 0.1725, "step": 53904 }, { "epoch": 0.09557979359544815, "grad_norm": 0.3828125, "learning_rate": 0.0009401522034812133, "loss": 0.2225, "step": 53906 }, { "epoch": 0.09558333976075796, "grad_norm": 0.38671875, "learning_rate": 0.0009400936314578374, "loss": 0.1617, "step": 53908 }, { "epoch": 0.09558688592606777, "grad_norm": 0.2177734375, "learning_rate": 0.0009400350601337989, "loss": 0.1801, "step": 53910 }, { "epoch": 0.09559043209137759, "grad_norm": 0.2578125, "learning_rate": 0.0009399764895093545, "loss": 0.1364, "step": 53912 }, { "epoch": 0.0955939782566874, "grad_norm": 0.400390625, "learning_rate": 0.0009399179195847598, "loss": 0.251, "step": 53914 }, { "epoch": 0.09559752442199722, "grad_norm": 1.5078125, "learning_rate": 0.000939859350360271, "loss": 0.2121, "step": 53916 }, { "epoch": 0.09560107058730703, "grad_norm": 0.4296875, "learning_rate": 0.0009398007818361443, "loss": 0.1406, "step": 53918 }, { "epoch": 0.09560461675261685, "grad_norm": 4.90625, "learning_rate": 0.0009397422140126364, "loss": 0.2042, "step": 53920 }, { "epoch": 0.09560816291792666, "grad_norm": 0.66015625, "learning_rate": 0.0009396836468900025, "loss": 0.2006, "step": 53922 }, { "epoch": 0.09561170908323648, "grad_norm": 0.255859375, "learning_rate": 0.0009396250804684996, "loss": 0.1417, "step": 53924 }, { "epoch": 0.09561525524854629, "grad_norm": 0.35546875, "learning_rate": 0.0009395665147483832, "loss": 0.1756, "step": 53926 }, { "epoch": 0.0956188014138561, "grad_norm": 0.2578125, "learning_rate": 0.00093950794972991, "loss": 0.1702, "step": 53928 }, { "epoch": 0.09562234757916592, "grad_norm": 0.390625, "learning_rate": 0.0009394493854133352, "loss": 0.1667, "step": 53930 }, { "epoch": 0.09562589374447573, "grad_norm": 0.984375, "learning_rate": 0.0009393908217989158, "loss": 0.2076, "step": 53932 }, { "epoch": 0.09562943990978555, "grad_norm": 0.82421875, "learning_rate": 0.0009393322588869075, "loss": 0.1822, "step": 53934 }, { "epoch": 0.09563298607509536, "grad_norm": 1.0390625, "learning_rate": 0.000939273696677567, "loss": 0.1824, "step": 53936 }, { "epoch": 0.09563653224040518, "grad_norm": 0.283203125, "learning_rate": 0.0009392151351711497, "loss": 0.1601, "step": 53938 }, { "epoch": 0.095640078405715, "grad_norm": 0.30078125, "learning_rate": 0.0009391565743679122, "loss": 0.1601, "step": 53940 }, { "epoch": 0.09564362457102482, "grad_norm": 0.2373046875, "learning_rate": 0.0009390980142681102, "loss": 0.2001, "step": 53942 }, { "epoch": 0.09564717073633464, "grad_norm": 1.5859375, "learning_rate": 0.0009390394548720004, "loss": 0.359, "step": 53944 }, { "epoch": 0.09565071690164445, "grad_norm": 2.890625, "learning_rate": 0.0009389808961798381, "loss": 0.1623, "step": 53946 }, { "epoch": 0.09565426306695426, "grad_norm": 0.23828125, "learning_rate": 0.0009389223381918797, "loss": 0.1528, "step": 53948 }, { "epoch": 0.09565780923226408, "grad_norm": 2.75, "learning_rate": 0.0009388637809083815, "loss": 0.2219, "step": 53950 }, { "epoch": 0.0956613553975739, "grad_norm": 0.392578125, "learning_rate": 0.0009388052243296002, "loss": 0.2096, "step": 53952 }, { "epoch": 0.09566490156288371, "grad_norm": 0.40625, "learning_rate": 0.0009387466684557906, "loss": 0.1883, "step": 53954 }, { "epoch": 0.09566844772819352, "grad_norm": 0.322265625, "learning_rate": 0.0009386881132872094, "loss": 0.1542, "step": 53956 }, { "epoch": 0.09567199389350334, "grad_norm": 2.015625, "learning_rate": 0.0009386295588241127, "loss": 0.2324, "step": 53958 }, { "epoch": 0.09567554005881315, "grad_norm": 0.3671875, "learning_rate": 0.0009385710050667566, "loss": 0.1771, "step": 53960 }, { "epoch": 0.09567908622412297, "grad_norm": 0.431640625, "learning_rate": 0.0009385124520153976, "loss": 0.2122, "step": 53962 }, { "epoch": 0.09568263238943278, "grad_norm": 0.27734375, "learning_rate": 0.000938453899670291, "loss": 0.147, "step": 53964 }, { "epoch": 0.0956861785547426, "grad_norm": 0.55859375, "learning_rate": 0.000938395348031693, "loss": 0.168, "step": 53966 }, { "epoch": 0.09568972472005241, "grad_norm": 0.380859375, "learning_rate": 0.00093833679709986, "loss": 0.2393, "step": 53968 }, { "epoch": 0.09569327088536222, "grad_norm": 0.2578125, "learning_rate": 0.0009382782468750481, "loss": 0.1711, "step": 53970 }, { "epoch": 0.09569681705067204, "grad_norm": 0.81640625, "learning_rate": 0.000938219697357513, "loss": 0.1875, "step": 53972 }, { "epoch": 0.09570036321598185, "grad_norm": 0.6640625, "learning_rate": 0.000938161148547511, "loss": 0.2747, "step": 53974 }, { "epoch": 0.09570390938129167, "grad_norm": 0.380859375, "learning_rate": 0.0009381026004452984, "loss": 0.168, "step": 53976 }, { "epoch": 0.09570745554660148, "grad_norm": 0.341796875, "learning_rate": 0.0009380440530511307, "loss": 0.1725, "step": 53978 }, { "epoch": 0.0957110017119113, "grad_norm": 0.455078125, "learning_rate": 0.0009379855063652645, "loss": 0.1854, "step": 53980 }, { "epoch": 0.09571454787722111, "grad_norm": 0.48046875, "learning_rate": 0.0009379269603879554, "loss": 0.2726, "step": 53982 }, { "epoch": 0.09571809404253093, "grad_norm": 0.80078125, "learning_rate": 0.0009378684151194594, "loss": 0.2142, "step": 53984 }, { "epoch": 0.09572164020784076, "grad_norm": 0.84765625, "learning_rate": 0.0009378098705600333, "loss": 0.2886, "step": 53986 }, { "epoch": 0.09572518637315057, "grad_norm": 0.55078125, "learning_rate": 0.0009377513267099322, "loss": 0.2287, "step": 53988 }, { "epoch": 0.09572873253846038, "grad_norm": 0.302734375, "learning_rate": 0.0009376927835694126, "loss": 0.2012, "step": 53990 }, { "epoch": 0.0957322787037702, "grad_norm": 0.2451171875, "learning_rate": 0.0009376342411387304, "loss": 0.1811, "step": 53992 }, { "epoch": 0.09573582486908001, "grad_norm": 2.375, "learning_rate": 0.0009375756994181424, "loss": 0.2225, "step": 53994 }, { "epoch": 0.09573937103438983, "grad_norm": 0.490234375, "learning_rate": 0.0009375171584079032, "loss": 0.129, "step": 53996 }, { "epoch": 0.09574291719969964, "grad_norm": 0.28125, "learning_rate": 0.0009374586181082698, "loss": 0.1874, "step": 53998 }, { "epoch": 0.09574646336500946, "grad_norm": 0.40234375, "learning_rate": 0.000937400078519498, "loss": 0.1996, "step": 54000 }, { "epoch": 0.09575000953031927, "grad_norm": 0.408203125, "learning_rate": 0.0009373415396418441, "loss": 0.1947, "step": 54002 }, { "epoch": 0.09575355569562909, "grad_norm": 0.78125, "learning_rate": 0.0009372830014755633, "loss": 0.158, "step": 54004 }, { "epoch": 0.0957571018609389, "grad_norm": 0.7109375, "learning_rate": 0.0009372244640209123, "loss": 0.2838, "step": 54006 }, { "epoch": 0.09576064802624872, "grad_norm": 1.390625, "learning_rate": 0.000937165927278147, "loss": 0.1952, "step": 54008 }, { "epoch": 0.09576419419155853, "grad_norm": 0.396484375, "learning_rate": 0.0009371073912475234, "loss": 0.1623, "step": 54010 }, { "epoch": 0.09576774035686834, "grad_norm": 0.380859375, "learning_rate": 0.0009370488559292974, "loss": 0.1847, "step": 54012 }, { "epoch": 0.09577128652217816, "grad_norm": 0.94140625, "learning_rate": 0.000936990321323725, "loss": 0.2231, "step": 54014 }, { "epoch": 0.09577483268748797, "grad_norm": 0.53515625, "learning_rate": 0.0009369317874310623, "loss": 0.3733, "step": 54016 }, { "epoch": 0.09577837885279779, "grad_norm": 2.328125, "learning_rate": 0.0009368732542515652, "loss": 0.2471, "step": 54018 }, { "epoch": 0.0957819250181076, "grad_norm": 1.3515625, "learning_rate": 0.0009368147217854901, "loss": 0.1766, "step": 54020 }, { "epoch": 0.09578547118341742, "grad_norm": 0.97265625, "learning_rate": 0.0009367561900330921, "loss": 0.1929, "step": 54022 }, { "epoch": 0.09578901734872723, "grad_norm": 0.62890625, "learning_rate": 0.0009366976589946276, "loss": 0.1867, "step": 54024 }, { "epoch": 0.09579256351403705, "grad_norm": 0.31640625, "learning_rate": 0.0009366391286703533, "loss": 0.2236, "step": 54026 }, { "epoch": 0.09579610967934686, "grad_norm": 0.43359375, "learning_rate": 0.0009365805990605243, "loss": 0.2002, "step": 54028 }, { "epoch": 0.09579965584465669, "grad_norm": 0.466796875, "learning_rate": 0.0009365220701653965, "loss": 0.1505, "step": 54030 }, { "epoch": 0.0958032020099665, "grad_norm": 0.4375, "learning_rate": 0.0009364635419852266, "loss": 0.2512, "step": 54032 }, { "epoch": 0.09580674817527632, "grad_norm": 0.296875, "learning_rate": 0.00093640501452027, "loss": 0.1998, "step": 54034 }, { "epoch": 0.09581029434058613, "grad_norm": 2.421875, "learning_rate": 0.000936346487770783, "loss": 0.1873, "step": 54036 }, { "epoch": 0.09581384050589595, "grad_norm": 0.3828125, "learning_rate": 0.0009362879617370212, "loss": 0.1268, "step": 54038 }, { "epoch": 0.09581738667120576, "grad_norm": 0.52734375, "learning_rate": 0.0009362294364192407, "loss": 0.1874, "step": 54040 }, { "epoch": 0.09582093283651558, "grad_norm": 0.251953125, "learning_rate": 0.0009361709118176978, "loss": 0.2035, "step": 54042 }, { "epoch": 0.09582447900182539, "grad_norm": 0.421875, "learning_rate": 0.000936112387932648, "loss": 0.1549, "step": 54044 }, { "epoch": 0.0958280251671352, "grad_norm": 0.60546875, "learning_rate": 0.0009360538647643475, "loss": 0.1394, "step": 54046 }, { "epoch": 0.09583157133244502, "grad_norm": 0.7109375, "learning_rate": 0.000935995342313052, "loss": 0.2147, "step": 54048 }, { "epoch": 0.09583511749775483, "grad_norm": 0.60546875, "learning_rate": 0.0009359368205790178, "loss": 0.1953, "step": 54050 }, { "epoch": 0.09583866366306465, "grad_norm": 0.34375, "learning_rate": 0.0009358782995625006, "loss": 0.1495, "step": 54052 }, { "epoch": 0.09584220982837446, "grad_norm": 0.291015625, "learning_rate": 0.0009358197792637566, "loss": 0.1767, "step": 54054 }, { "epoch": 0.09584575599368428, "grad_norm": 0.53515625, "learning_rate": 0.0009357612596830411, "loss": 0.2031, "step": 54056 }, { "epoch": 0.09584930215899409, "grad_norm": 0.34375, "learning_rate": 0.0009357027408206104, "loss": 0.1467, "step": 54058 }, { "epoch": 0.09585284832430391, "grad_norm": 0.5078125, "learning_rate": 0.000935644222676721, "loss": 0.1902, "step": 54060 }, { "epoch": 0.09585639448961372, "grad_norm": 0.20703125, "learning_rate": 0.0009355857052516279, "loss": 0.1987, "step": 54062 }, { "epoch": 0.09585994065492354, "grad_norm": 0.4921875, "learning_rate": 0.0009355271885455874, "loss": 0.2463, "step": 54064 }, { "epoch": 0.09586348682023335, "grad_norm": 0.77734375, "learning_rate": 0.0009354686725588555, "loss": 0.3129, "step": 54066 }, { "epoch": 0.09586703298554317, "grad_norm": 0.369140625, "learning_rate": 0.0009354101572916885, "loss": 0.2187, "step": 54068 }, { "epoch": 0.09587057915085298, "grad_norm": 0.255859375, "learning_rate": 0.0009353516427443411, "loss": 0.1411, "step": 54070 }, { "epoch": 0.0958741253161628, "grad_norm": 0.44921875, "learning_rate": 0.0009352931289170705, "loss": 0.184, "step": 54072 }, { "epoch": 0.09587767148147261, "grad_norm": 0.28515625, "learning_rate": 0.0009352346158101319, "loss": 0.1465, "step": 54074 }, { "epoch": 0.09588121764678244, "grad_norm": 0.37890625, "learning_rate": 0.0009351761034237818, "loss": 0.1934, "step": 54076 }, { "epoch": 0.09588476381209225, "grad_norm": 0.4765625, "learning_rate": 0.000935117591758275, "loss": 0.1725, "step": 54078 }, { "epoch": 0.09588830997740207, "grad_norm": 0.5859375, "learning_rate": 0.0009350590808138684, "loss": 0.1418, "step": 54080 }, { "epoch": 0.09589185614271188, "grad_norm": 0.46875, "learning_rate": 0.0009350005705908173, "loss": 0.162, "step": 54082 }, { "epoch": 0.0958954023080217, "grad_norm": 0.58984375, "learning_rate": 0.0009349420610893784, "loss": 0.1457, "step": 54084 }, { "epoch": 0.09589894847333151, "grad_norm": 0.478515625, "learning_rate": 0.0009348835523098067, "loss": 0.1674, "step": 54086 }, { "epoch": 0.09590249463864133, "grad_norm": 0.287109375, "learning_rate": 0.0009348250442523583, "loss": 0.2087, "step": 54088 }, { "epoch": 0.09590604080395114, "grad_norm": 0.359375, "learning_rate": 0.0009347665369172896, "loss": 0.1637, "step": 54090 }, { "epoch": 0.09590958696926095, "grad_norm": 0.228515625, "learning_rate": 0.0009347080303048559, "loss": 0.226, "step": 54092 }, { "epoch": 0.09591313313457077, "grad_norm": 0.326171875, "learning_rate": 0.0009346495244153134, "loss": 0.2274, "step": 54094 }, { "epoch": 0.09591667929988058, "grad_norm": 0.330078125, "learning_rate": 0.0009345910192489176, "loss": 0.1526, "step": 54096 }, { "epoch": 0.0959202254651904, "grad_norm": 1.328125, "learning_rate": 0.0009345325148059245, "loss": 0.1612, "step": 54098 }, { "epoch": 0.09592377163050021, "grad_norm": 0.48046875, "learning_rate": 0.0009344740110865905, "loss": 0.1642, "step": 54100 }, { "epoch": 0.09592731779581003, "grad_norm": 1.890625, "learning_rate": 0.0009344155080911707, "loss": 0.219, "step": 54102 }, { "epoch": 0.09593086396111984, "grad_norm": 0.87109375, "learning_rate": 0.0009343570058199213, "loss": 0.1852, "step": 54104 }, { "epoch": 0.09593441012642966, "grad_norm": 0.6171875, "learning_rate": 0.0009342985042730981, "loss": 0.1639, "step": 54106 }, { "epoch": 0.09593795629173947, "grad_norm": 0.220703125, "learning_rate": 0.0009342400034509573, "loss": 0.3001, "step": 54108 }, { "epoch": 0.09594150245704929, "grad_norm": 0.458984375, "learning_rate": 0.0009341815033537542, "loss": 0.1382, "step": 54110 }, { "epoch": 0.0959450486223591, "grad_norm": 0.39453125, "learning_rate": 0.000934123003981745, "loss": 0.1817, "step": 54112 }, { "epoch": 0.09594859478766891, "grad_norm": 0.384765625, "learning_rate": 0.0009340645053351852, "loss": 0.1714, "step": 54114 }, { "epoch": 0.09595214095297873, "grad_norm": 0.490234375, "learning_rate": 0.0009340060074143313, "loss": 0.1775, "step": 54116 }, { "epoch": 0.09595568711828854, "grad_norm": 0.384765625, "learning_rate": 0.0009339475102194384, "loss": 0.1448, "step": 54118 }, { "epoch": 0.09595923328359836, "grad_norm": 0.34375, "learning_rate": 0.0009338890137507625, "loss": 0.1903, "step": 54120 }, { "epoch": 0.09596277944890819, "grad_norm": 0.25, "learning_rate": 0.0009338305180085595, "loss": 0.3956, "step": 54122 }, { "epoch": 0.095966325614218, "grad_norm": 1.078125, "learning_rate": 0.0009337720229930858, "loss": 0.271, "step": 54124 }, { "epoch": 0.09596987177952782, "grad_norm": 0.8125, "learning_rate": 0.0009337135287045963, "loss": 0.1788, "step": 54126 }, { "epoch": 0.09597341794483763, "grad_norm": 2.421875, "learning_rate": 0.0009336550351433475, "loss": 0.2894, "step": 54128 }, { "epoch": 0.09597696411014744, "grad_norm": 0.3671875, "learning_rate": 0.0009335965423095947, "loss": 0.2139, "step": 54130 }, { "epoch": 0.09598051027545726, "grad_norm": 0.85546875, "learning_rate": 0.0009335380502035939, "loss": 0.1706, "step": 54132 }, { "epoch": 0.09598405644076707, "grad_norm": 0.59375, "learning_rate": 0.0009334795588256015, "loss": 0.1627, "step": 54134 }, { "epoch": 0.09598760260607689, "grad_norm": 4.375, "learning_rate": 0.0009334210681758723, "loss": 0.1774, "step": 54136 }, { "epoch": 0.0959911487713867, "grad_norm": 1.2734375, "learning_rate": 0.0009333625782546627, "loss": 0.1391, "step": 54138 }, { "epoch": 0.09599469493669652, "grad_norm": 0.4921875, "learning_rate": 0.0009333040890622282, "loss": 0.1862, "step": 54140 }, { "epoch": 0.09599824110200633, "grad_norm": 0.353515625, "learning_rate": 0.0009332456005988252, "loss": 0.1956, "step": 54142 }, { "epoch": 0.09600178726731615, "grad_norm": 0.38671875, "learning_rate": 0.0009331871128647089, "loss": 0.1438, "step": 54144 }, { "epoch": 0.09600533343262596, "grad_norm": 1.65625, "learning_rate": 0.0009331286258601353, "loss": 0.2117, "step": 54146 }, { "epoch": 0.09600887959793578, "grad_norm": 0.72265625, "learning_rate": 0.00093307013958536, "loss": 0.1748, "step": 54148 }, { "epoch": 0.09601242576324559, "grad_norm": 1.71875, "learning_rate": 0.0009330116540406392, "loss": 0.3535, "step": 54150 }, { "epoch": 0.0960159719285554, "grad_norm": 1.4140625, "learning_rate": 0.0009329531692262283, "loss": 0.2384, "step": 54152 }, { "epoch": 0.09601951809386522, "grad_norm": 0.1953125, "learning_rate": 0.0009328946851423829, "loss": 0.3151, "step": 54154 }, { "epoch": 0.09602306425917503, "grad_norm": 0.7109375, "learning_rate": 0.0009328362017893595, "loss": 0.2151, "step": 54156 }, { "epoch": 0.09602661042448485, "grad_norm": 1.609375, "learning_rate": 0.0009327777191674134, "loss": 0.1837, "step": 54158 }, { "epoch": 0.09603015658979466, "grad_norm": 0.470703125, "learning_rate": 0.0009327192372768004, "loss": 0.1897, "step": 54160 }, { "epoch": 0.09603370275510448, "grad_norm": 0.42578125, "learning_rate": 0.000932660756117776, "loss": 0.1609, "step": 54162 }, { "epoch": 0.09603724892041429, "grad_norm": 0.30078125, "learning_rate": 0.0009326022756905967, "loss": 0.1453, "step": 54164 }, { "epoch": 0.09604079508572412, "grad_norm": 0.49609375, "learning_rate": 0.0009325437959955176, "loss": 0.1478, "step": 54166 }, { "epoch": 0.09604434125103394, "grad_norm": 0.92578125, "learning_rate": 0.0009324853170327947, "loss": 0.1897, "step": 54168 }, { "epoch": 0.09604788741634375, "grad_norm": 0.53125, "learning_rate": 0.0009324268388026838, "loss": 0.1598, "step": 54170 }, { "epoch": 0.09605143358165356, "grad_norm": 1.1484375, "learning_rate": 0.0009323683613054401, "loss": 0.1883, "step": 54172 }, { "epoch": 0.09605497974696338, "grad_norm": 0.39453125, "learning_rate": 0.0009323098845413205, "loss": 0.1434, "step": 54174 }, { "epoch": 0.0960585259122732, "grad_norm": 0.76171875, "learning_rate": 0.0009322514085105796, "loss": 0.1966, "step": 54176 }, { "epoch": 0.09606207207758301, "grad_norm": 1.25, "learning_rate": 0.0009321929332134735, "loss": 0.271, "step": 54178 }, { "epoch": 0.09606561824289282, "grad_norm": 0.4140625, "learning_rate": 0.0009321344586502582, "loss": 0.2117, "step": 54180 }, { "epoch": 0.09606916440820264, "grad_norm": 0.6796875, "learning_rate": 0.0009320759848211897, "loss": 0.1592, "step": 54182 }, { "epoch": 0.09607271057351245, "grad_norm": 0.609375, "learning_rate": 0.0009320175117265229, "loss": 0.2941, "step": 54184 }, { "epoch": 0.09607625673882227, "grad_norm": 0.1923828125, "learning_rate": 0.0009319590393665141, "loss": 0.1575, "step": 54186 }, { "epoch": 0.09607980290413208, "grad_norm": 0.609375, "learning_rate": 0.0009319005677414183, "loss": 0.2649, "step": 54188 }, { "epoch": 0.0960833490694419, "grad_norm": 0.423828125, "learning_rate": 0.0009318420968514924, "loss": 0.1887, "step": 54190 }, { "epoch": 0.09608689523475171, "grad_norm": 4.59375, "learning_rate": 0.0009317836266969912, "loss": 0.2511, "step": 54192 }, { "epoch": 0.09609044140006152, "grad_norm": 0.59375, "learning_rate": 0.0009317251572781708, "loss": 0.1863, "step": 54194 }, { "epoch": 0.09609398756537134, "grad_norm": 0.50390625, "learning_rate": 0.0009316666885952867, "loss": 0.2326, "step": 54196 }, { "epoch": 0.09609753373068115, "grad_norm": 0.498046875, "learning_rate": 0.0009316082206485952, "loss": 0.387, "step": 54198 }, { "epoch": 0.09610107989599097, "grad_norm": 0.5078125, "learning_rate": 0.000931549753438351, "loss": 0.2068, "step": 54200 }, { "epoch": 0.09610462606130078, "grad_norm": 0.376953125, "learning_rate": 0.0009314912869648104, "loss": 0.1503, "step": 54202 }, { "epoch": 0.0961081722266106, "grad_norm": 0.259765625, "learning_rate": 0.000931432821228229, "loss": 0.1654, "step": 54204 }, { "epoch": 0.09611171839192041, "grad_norm": 0.16015625, "learning_rate": 0.0009313743562288628, "loss": 0.1422, "step": 54206 }, { "epoch": 0.09611526455723023, "grad_norm": 0.671875, "learning_rate": 0.0009313158919669671, "loss": 0.1908, "step": 54208 }, { "epoch": 0.09611881072254004, "grad_norm": 0.333984375, "learning_rate": 0.0009312574284427975, "loss": 0.2242, "step": 54210 }, { "epoch": 0.09612235688784987, "grad_norm": 0.3515625, "learning_rate": 0.00093119896565661, "loss": 0.1929, "step": 54212 }, { "epoch": 0.09612590305315968, "grad_norm": 0.421875, "learning_rate": 0.0009311405036086602, "loss": 0.1956, "step": 54214 }, { "epoch": 0.0961294492184695, "grad_norm": 0.341796875, "learning_rate": 0.0009310820422992039, "loss": 0.2024, "step": 54216 }, { "epoch": 0.09613299538377931, "grad_norm": 0.3515625, "learning_rate": 0.0009310235817284964, "loss": 0.2286, "step": 54218 }, { "epoch": 0.09613654154908913, "grad_norm": 0.359375, "learning_rate": 0.0009309651218967938, "loss": 0.2492, "step": 54220 }, { "epoch": 0.09614008771439894, "grad_norm": 0.322265625, "learning_rate": 0.0009309066628043512, "loss": 0.1645, "step": 54222 }, { "epoch": 0.09614363387970876, "grad_norm": 0.5859375, "learning_rate": 0.0009308482044514253, "loss": 0.1764, "step": 54224 }, { "epoch": 0.09614718004501857, "grad_norm": 0.37890625, "learning_rate": 0.0009307897468382707, "loss": 0.2859, "step": 54226 }, { "epoch": 0.09615072621032839, "grad_norm": 0.73046875, "learning_rate": 0.0009307312899651432, "loss": 0.2348, "step": 54228 }, { "epoch": 0.0961542723756382, "grad_norm": 0.1923828125, "learning_rate": 0.0009306728338322989, "loss": 0.1425, "step": 54230 }, { "epoch": 0.09615781854094801, "grad_norm": 0.73828125, "learning_rate": 0.0009306143784399936, "loss": 0.1675, "step": 54232 }, { "epoch": 0.09616136470625783, "grad_norm": 0.51171875, "learning_rate": 0.0009305559237884822, "loss": 0.2388, "step": 54234 }, { "epoch": 0.09616491087156764, "grad_norm": 0.17578125, "learning_rate": 0.0009304974698780209, "loss": 0.2141, "step": 54236 }, { "epoch": 0.09616845703687746, "grad_norm": 2.03125, "learning_rate": 0.0009304390167088652, "loss": 0.191, "step": 54238 }, { "epoch": 0.09617200320218727, "grad_norm": 0.32421875, "learning_rate": 0.0009303805642812711, "loss": 0.1745, "step": 54240 }, { "epoch": 0.09617554936749709, "grad_norm": 0.298828125, "learning_rate": 0.0009303221125954935, "loss": 0.2835, "step": 54242 }, { "epoch": 0.0961790955328069, "grad_norm": 6.21875, "learning_rate": 0.0009302636616517883, "loss": 0.2682, "step": 54244 }, { "epoch": 0.09618264169811672, "grad_norm": 0.19921875, "learning_rate": 0.0009302052114504114, "loss": 0.2213, "step": 54246 }, { "epoch": 0.09618618786342653, "grad_norm": 0.734375, "learning_rate": 0.0009301467619916186, "loss": 0.1947, "step": 54248 }, { "epoch": 0.09618973402873635, "grad_norm": 0.63671875, "learning_rate": 0.0009300883132756648, "loss": 0.2101, "step": 54250 }, { "epoch": 0.09619328019404616, "grad_norm": 0.294921875, "learning_rate": 0.000930029865302806, "loss": 0.1611, "step": 54252 }, { "epoch": 0.09619682635935597, "grad_norm": 0.236328125, "learning_rate": 0.0009299714180732979, "loss": 0.1578, "step": 54254 }, { "epoch": 0.09620037252466579, "grad_norm": 1.6484375, "learning_rate": 0.0009299129715873964, "loss": 0.344, "step": 54256 }, { "epoch": 0.09620391868997562, "grad_norm": 0.271484375, "learning_rate": 0.0009298545258453563, "loss": 0.1288, "step": 54258 }, { "epoch": 0.09620746485528543, "grad_norm": 0.220703125, "learning_rate": 0.000929796080847434, "loss": 0.2208, "step": 54260 }, { "epoch": 0.09621101102059525, "grad_norm": 2.25, "learning_rate": 0.0009297376365938845, "loss": 0.3127, "step": 54262 }, { "epoch": 0.09621455718590506, "grad_norm": 0.88671875, "learning_rate": 0.0009296791930849639, "loss": 0.1559, "step": 54264 }, { "epoch": 0.09621810335121488, "grad_norm": 0.435546875, "learning_rate": 0.0009296207503209275, "loss": 0.1821, "step": 54266 }, { "epoch": 0.09622164951652469, "grad_norm": 0.337890625, "learning_rate": 0.0009295623083020308, "loss": 0.1721, "step": 54268 }, { "epoch": 0.0962251956818345, "grad_norm": 0.2421875, "learning_rate": 0.0009295038670285296, "loss": 0.1634, "step": 54270 }, { "epoch": 0.09622874184714432, "grad_norm": 0.267578125, "learning_rate": 0.00092944542650068, "loss": 0.158, "step": 54272 }, { "epoch": 0.09623228801245413, "grad_norm": 6.625, "learning_rate": 0.0009293869867187364, "loss": 0.2195, "step": 54274 }, { "epoch": 0.09623583417776395, "grad_norm": 0.3515625, "learning_rate": 0.0009293285476829552, "loss": 0.2387, "step": 54276 }, { "epoch": 0.09623938034307376, "grad_norm": 0.6796875, "learning_rate": 0.0009292701093935918, "loss": 0.1756, "step": 54278 }, { "epoch": 0.09624292650838358, "grad_norm": 0.625, "learning_rate": 0.0009292116718509018, "loss": 0.1881, "step": 54280 }, { "epoch": 0.09624647267369339, "grad_norm": 0.64453125, "learning_rate": 0.0009291532350551408, "loss": 0.1941, "step": 54282 }, { "epoch": 0.0962500188390032, "grad_norm": 0.65234375, "learning_rate": 0.0009290947990065641, "loss": 0.1688, "step": 54284 }, { "epoch": 0.09625356500431302, "grad_norm": 0.451171875, "learning_rate": 0.0009290363637054276, "loss": 0.153, "step": 54286 }, { "epoch": 0.09625711116962284, "grad_norm": 0.412109375, "learning_rate": 0.0009289779291519871, "loss": 0.2033, "step": 54288 }, { "epoch": 0.09626065733493265, "grad_norm": 0.333984375, "learning_rate": 0.0009289194953464973, "loss": 0.1794, "step": 54290 }, { "epoch": 0.09626420350024247, "grad_norm": 0.31640625, "learning_rate": 0.0009288610622892142, "loss": 0.1791, "step": 54292 }, { "epoch": 0.09626774966555228, "grad_norm": 0.216796875, "learning_rate": 0.0009288026299803938, "loss": 0.146, "step": 54294 }, { "epoch": 0.0962712958308621, "grad_norm": 0.59765625, "learning_rate": 0.000928744198420291, "loss": 0.3573, "step": 54296 }, { "epoch": 0.09627484199617191, "grad_norm": 0.94140625, "learning_rate": 0.0009286857676091618, "loss": 0.175, "step": 54298 }, { "epoch": 0.09627838816148172, "grad_norm": 0.419921875, "learning_rate": 0.0009286273375472613, "loss": 0.2028, "step": 54300 }, { "epoch": 0.09628193432679155, "grad_norm": 0.1640625, "learning_rate": 0.0009285689082348451, "loss": 0.1191, "step": 54302 }, { "epoch": 0.09628548049210137, "grad_norm": 0.71875, "learning_rate": 0.0009285104796721693, "loss": 0.2002, "step": 54304 }, { "epoch": 0.09628902665741118, "grad_norm": 0.50390625, "learning_rate": 0.0009284520518594892, "loss": 0.1478, "step": 54306 }, { "epoch": 0.096292572822721, "grad_norm": 0.375, "learning_rate": 0.0009283936247970599, "loss": 0.4496, "step": 54308 }, { "epoch": 0.09629611898803081, "grad_norm": 0.458984375, "learning_rate": 0.0009283351984851369, "loss": 0.1655, "step": 54310 }, { "epoch": 0.09629966515334062, "grad_norm": 0.55859375, "learning_rate": 0.0009282767729239765, "loss": 0.1743, "step": 54312 }, { "epoch": 0.09630321131865044, "grad_norm": 4.375, "learning_rate": 0.0009282183481138337, "loss": 0.3051, "step": 54314 }, { "epoch": 0.09630675748396025, "grad_norm": 1.3359375, "learning_rate": 0.0009281599240549641, "loss": 0.1868, "step": 54316 }, { "epoch": 0.09631030364927007, "grad_norm": 0.423828125, "learning_rate": 0.0009281015007476228, "loss": 0.1757, "step": 54318 }, { "epoch": 0.09631384981457988, "grad_norm": 0.1630859375, "learning_rate": 0.0009280430781920659, "loss": 0.3379, "step": 54320 }, { "epoch": 0.0963173959798897, "grad_norm": 0.294921875, "learning_rate": 0.0009279846563885491, "loss": 0.2065, "step": 54322 }, { "epoch": 0.09632094214519951, "grad_norm": 0.431640625, "learning_rate": 0.0009279262353373268, "loss": 0.1665, "step": 54324 }, { "epoch": 0.09632448831050933, "grad_norm": 0.85546875, "learning_rate": 0.0009278678150386555, "loss": 0.3096, "step": 54326 }, { "epoch": 0.09632803447581914, "grad_norm": 0.57421875, "learning_rate": 0.0009278093954927902, "loss": 0.1917, "step": 54328 }, { "epoch": 0.09633158064112896, "grad_norm": 1.5859375, "learning_rate": 0.000927750976699987, "loss": 0.2616, "step": 54330 }, { "epoch": 0.09633512680643877, "grad_norm": 0.302734375, "learning_rate": 0.0009276925586605007, "loss": 0.1604, "step": 54332 }, { "epoch": 0.09633867297174858, "grad_norm": 0.8515625, "learning_rate": 0.0009276341413745873, "loss": 0.1947, "step": 54334 }, { "epoch": 0.0963422191370584, "grad_norm": 0.34765625, "learning_rate": 0.0009275757248425016, "loss": 0.226, "step": 54336 }, { "epoch": 0.09634576530236821, "grad_norm": 0.82421875, "learning_rate": 0.0009275173090645001, "loss": 0.2102, "step": 54338 }, { "epoch": 0.09634931146767803, "grad_norm": 0.3203125, "learning_rate": 0.0009274588940408373, "loss": 0.1813, "step": 54340 }, { "epoch": 0.09635285763298784, "grad_norm": 0.48046875, "learning_rate": 0.000927400479771769, "loss": 0.2155, "step": 54342 }, { "epoch": 0.09635640379829766, "grad_norm": 0.4921875, "learning_rate": 0.0009273420662575507, "loss": 0.2893, "step": 54344 }, { "epoch": 0.09635994996360747, "grad_norm": 2.046875, "learning_rate": 0.0009272836534984386, "loss": 0.2173, "step": 54346 }, { "epoch": 0.0963634961289173, "grad_norm": 0.40234375, "learning_rate": 0.0009272252414946867, "loss": 0.2026, "step": 54348 }, { "epoch": 0.09636704229422711, "grad_norm": 0.431640625, "learning_rate": 0.0009271668302465515, "loss": 0.2055, "step": 54350 }, { "epoch": 0.09637058845953693, "grad_norm": 3.5625, "learning_rate": 0.0009271084197542882, "loss": 0.2428, "step": 54352 }, { "epoch": 0.09637413462484674, "grad_norm": 0.58984375, "learning_rate": 0.0009270500100181524, "loss": 0.1566, "step": 54354 }, { "epoch": 0.09637768079015656, "grad_norm": 0.3984375, "learning_rate": 0.0009269916010383992, "loss": 0.3635, "step": 54356 }, { "epoch": 0.09638122695546637, "grad_norm": 1.9140625, "learning_rate": 0.0009269331928152843, "loss": 0.4302, "step": 54358 }, { "epoch": 0.09638477312077619, "grad_norm": 0.2041015625, "learning_rate": 0.0009268747853490629, "loss": 0.1663, "step": 54360 }, { "epoch": 0.096388319286086, "grad_norm": 4.3125, "learning_rate": 0.0009268163786399912, "loss": 0.2744, "step": 54362 }, { "epoch": 0.09639186545139582, "grad_norm": 2.4375, "learning_rate": 0.0009267579726883235, "loss": 0.1811, "step": 54364 }, { "epoch": 0.09639541161670563, "grad_norm": 0.5625, "learning_rate": 0.0009266995674943158, "loss": 0.172, "step": 54366 }, { "epoch": 0.09639895778201545, "grad_norm": 0.38671875, "learning_rate": 0.0009266411630582237, "loss": 0.2079, "step": 54368 }, { "epoch": 0.09640250394732526, "grad_norm": 0.76171875, "learning_rate": 0.0009265827593803027, "loss": 0.2423, "step": 54370 }, { "epoch": 0.09640605011263507, "grad_norm": 0.296875, "learning_rate": 0.0009265243564608075, "loss": 0.2395, "step": 54372 }, { "epoch": 0.09640959627794489, "grad_norm": 0.66015625, "learning_rate": 0.0009264659542999943, "loss": 0.1717, "step": 54374 }, { "epoch": 0.0964131424432547, "grad_norm": 0.3984375, "learning_rate": 0.000926407552898118, "loss": 0.1787, "step": 54376 }, { "epoch": 0.09641668860856452, "grad_norm": 0.33984375, "learning_rate": 0.0009263491522554341, "loss": 0.234, "step": 54378 }, { "epoch": 0.09642023477387433, "grad_norm": 0.349609375, "learning_rate": 0.0009262907523721988, "loss": 0.1542, "step": 54380 }, { "epoch": 0.09642378093918415, "grad_norm": 0.36328125, "learning_rate": 0.0009262323532486662, "loss": 0.1923, "step": 54382 }, { "epoch": 0.09642732710449396, "grad_norm": 0.55859375, "learning_rate": 0.0009261739548850925, "loss": 0.1565, "step": 54384 }, { "epoch": 0.09643087326980378, "grad_norm": 3.234375, "learning_rate": 0.0009261155572817333, "loss": 0.279, "step": 54386 }, { "epoch": 0.09643441943511359, "grad_norm": 0.361328125, "learning_rate": 0.0009260571604388435, "loss": 0.1809, "step": 54388 }, { "epoch": 0.0964379656004234, "grad_norm": 0.482421875, "learning_rate": 0.0009259987643566785, "loss": 0.3592, "step": 54390 }, { "epoch": 0.09644151176573322, "grad_norm": 0.79296875, "learning_rate": 0.0009259403690354938, "loss": 0.381, "step": 54392 }, { "epoch": 0.09644505793104305, "grad_norm": 0.4921875, "learning_rate": 0.0009258819744755449, "loss": 0.1708, "step": 54394 }, { "epoch": 0.09644860409635286, "grad_norm": 0.365234375, "learning_rate": 0.0009258235806770873, "loss": 0.1575, "step": 54396 }, { "epoch": 0.09645215026166268, "grad_norm": 0.56640625, "learning_rate": 0.0009257651876403758, "loss": 0.1765, "step": 54398 }, { "epoch": 0.09645569642697249, "grad_norm": 0.232421875, "learning_rate": 0.0009257067953656661, "loss": 0.2044, "step": 54400 }, { "epoch": 0.09645924259228231, "grad_norm": 0.259765625, "learning_rate": 0.0009256484038532138, "loss": 0.2069, "step": 54402 }, { "epoch": 0.09646278875759212, "grad_norm": 1.7578125, "learning_rate": 0.0009255900131032747, "loss": 0.3355, "step": 54404 }, { "epoch": 0.09646633492290194, "grad_norm": 0.396484375, "learning_rate": 0.0009255316231161028, "loss": 0.1681, "step": 54406 }, { "epoch": 0.09646988108821175, "grad_norm": 0.24609375, "learning_rate": 0.0009254732338919545, "loss": 0.2071, "step": 54408 }, { "epoch": 0.09647342725352157, "grad_norm": 0.337890625, "learning_rate": 0.0009254148454310849, "loss": 0.1501, "step": 54410 }, { "epoch": 0.09647697341883138, "grad_norm": 0.34375, "learning_rate": 0.0009253564577337495, "loss": 0.3319, "step": 54412 }, { "epoch": 0.0964805195841412, "grad_norm": 0.53515625, "learning_rate": 0.0009252980708002032, "loss": 0.1433, "step": 54414 }, { "epoch": 0.09648406574945101, "grad_norm": 0.5234375, "learning_rate": 0.0009252396846307019, "loss": 0.1884, "step": 54416 }, { "epoch": 0.09648761191476082, "grad_norm": 2.734375, "learning_rate": 0.0009251812992255005, "loss": 0.2848, "step": 54418 }, { "epoch": 0.09649115808007064, "grad_norm": 0.228515625, "learning_rate": 0.000925122914584855, "loss": 0.2251, "step": 54420 }, { "epoch": 0.09649470424538045, "grad_norm": 0.44140625, "learning_rate": 0.0009250645307090198, "loss": 0.1448, "step": 54422 }, { "epoch": 0.09649825041069027, "grad_norm": 0.875, "learning_rate": 0.0009250061475982507, "loss": 0.1798, "step": 54424 }, { "epoch": 0.09650179657600008, "grad_norm": 0.44921875, "learning_rate": 0.0009249477652528034, "loss": 0.1625, "step": 54426 }, { "epoch": 0.0965053427413099, "grad_norm": 0.197265625, "learning_rate": 0.0009248893836729329, "loss": 0.1995, "step": 54428 }, { "epoch": 0.09650888890661971, "grad_norm": 2.546875, "learning_rate": 0.0009248310028588942, "loss": 0.1846, "step": 54430 }, { "epoch": 0.09651243507192953, "grad_norm": 0.2734375, "learning_rate": 0.000924772622810943, "loss": 0.1474, "step": 54432 }, { "epoch": 0.09651598123723934, "grad_norm": 1.03125, "learning_rate": 0.0009247142435293345, "loss": 0.2639, "step": 54434 }, { "epoch": 0.09651952740254915, "grad_norm": 0.423828125, "learning_rate": 0.0009246558650143246, "loss": 0.1569, "step": 54436 }, { "epoch": 0.09652307356785898, "grad_norm": 0.1796875, "learning_rate": 0.0009245974872661675, "loss": 0.1718, "step": 54438 }, { "epoch": 0.0965266197331688, "grad_norm": 0.33984375, "learning_rate": 0.0009245391102851195, "loss": 0.1923, "step": 54440 }, { "epoch": 0.09653016589847861, "grad_norm": 0.458984375, "learning_rate": 0.0009244807340714351, "loss": 0.1843, "step": 54442 }, { "epoch": 0.09653371206378843, "grad_norm": 0.208984375, "learning_rate": 0.0009244223586253707, "loss": 0.1849, "step": 54444 }, { "epoch": 0.09653725822909824, "grad_norm": 0.314453125, "learning_rate": 0.0009243639839471802, "loss": 0.1403, "step": 54446 }, { "epoch": 0.09654080439440806, "grad_norm": 0.50390625, "learning_rate": 0.0009243056100371202, "loss": 0.2051, "step": 54448 }, { "epoch": 0.09654435055971787, "grad_norm": 0.404296875, "learning_rate": 0.0009242472368954449, "loss": 0.1395, "step": 54450 }, { "epoch": 0.09654789672502768, "grad_norm": 0.671875, "learning_rate": 0.0009241888645224105, "loss": 0.2077, "step": 54452 }, { "epoch": 0.0965514428903375, "grad_norm": 1.0390625, "learning_rate": 0.0009241304929182716, "loss": 0.2372, "step": 54454 }, { "epoch": 0.09655498905564731, "grad_norm": 0.99609375, "learning_rate": 0.0009240721220832838, "loss": 0.2428, "step": 54456 }, { "epoch": 0.09655853522095713, "grad_norm": 0.69921875, "learning_rate": 0.0009240137520177023, "loss": 0.292, "step": 54458 }, { "epoch": 0.09656208138626694, "grad_norm": 0.263671875, "learning_rate": 0.0009239553827217826, "loss": 0.1765, "step": 54460 }, { "epoch": 0.09656562755157676, "grad_norm": 0.291015625, "learning_rate": 0.0009238970141957799, "loss": 0.2441, "step": 54462 }, { "epoch": 0.09656917371688657, "grad_norm": 0.298828125, "learning_rate": 0.0009238386464399491, "loss": 0.1769, "step": 54464 }, { "epoch": 0.09657271988219639, "grad_norm": 0.7109375, "learning_rate": 0.0009237802794545458, "loss": 0.174, "step": 54466 }, { "epoch": 0.0965762660475062, "grad_norm": 5.625, "learning_rate": 0.0009237219132398251, "loss": 0.2701, "step": 54468 }, { "epoch": 0.09657981221281602, "grad_norm": 0.2021484375, "learning_rate": 0.0009236635477960427, "loss": 0.1782, "step": 54470 }, { "epoch": 0.09658335837812583, "grad_norm": 0.1953125, "learning_rate": 0.0009236051831234533, "loss": 0.1236, "step": 54472 }, { "epoch": 0.09658690454343564, "grad_norm": 0.84765625, "learning_rate": 0.0009235468192223122, "loss": 0.2677, "step": 54474 }, { "epoch": 0.09659045070874546, "grad_norm": 0.337890625, "learning_rate": 0.0009234884560928749, "loss": 0.1306, "step": 54476 }, { "epoch": 0.09659399687405527, "grad_norm": 0.224609375, "learning_rate": 0.0009234300937353968, "loss": 0.1525, "step": 54478 }, { "epoch": 0.09659754303936509, "grad_norm": 0.96484375, "learning_rate": 0.0009233717321501327, "loss": 0.2247, "step": 54480 }, { "epoch": 0.0966010892046749, "grad_norm": 0.70703125, "learning_rate": 0.0009233133713373383, "loss": 0.1684, "step": 54482 }, { "epoch": 0.09660463536998473, "grad_norm": 0.25390625, "learning_rate": 0.0009232550112972682, "loss": 0.19, "step": 54484 }, { "epoch": 0.09660818153529455, "grad_norm": 0.361328125, "learning_rate": 0.0009231966520301785, "loss": 0.1857, "step": 54486 }, { "epoch": 0.09661172770060436, "grad_norm": 0.373046875, "learning_rate": 0.0009231382935363235, "loss": 0.2081, "step": 54488 }, { "epoch": 0.09661527386591418, "grad_norm": 0.322265625, "learning_rate": 0.0009230799358159587, "loss": 0.1877, "step": 54490 }, { "epoch": 0.09661882003122399, "grad_norm": 1.390625, "learning_rate": 0.0009230215788693397, "loss": 0.4418, "step": 54492 }, { "epoch": 0.0966223661965338, "grad_norm": 0.23046875, "learning_rate": 0.0009229632226967219, "loss": 0.2194, "step": 54494 }, { "epoch": 0.09662591236184362, "grad_norm": 0.625, "learning_rate": 0.0009229048672983598, "loss": 0.2698, "step": 54496 }, { "epoch": 0.09662945852715343, "grad_norm": 0.2275390625, "learning_rate": 0.0009228465126745089, "loss": 0.2087, "step": 54498 }, { "epoch": 0.09663300469246325, "grad_norm": 0.330078125, "learning_rate": 0.0009227881588254245, "loss": 0.1332, "step": 54500 }, { "epoch": 0.09663655085777306, "grad_norm": 0.9140625, "learning_rate": 0.000922729805751362, "loss": 0.1783, "step": 54502 }, { "epoch": 0.09664009702308288, "grad_norm": 0.50390625, "learning_rate": 0.0009226714534525762, "loss": 0.2059, "step": 54504 }, { "epoch": 0.09664364318839269, "grad_norm": 1.90625, "learning_rate": 0.0009226131019293223, "loss": 0.2286, "step": 54506 }, { "epoch": 0.0966471893537025, "grad_norm": 1.5078125, "learning_rate": 0.0009225547511818555, "loss": 0.2272, "step": 54508 }, { "epoch": 0.09665073551901232, "grad_norm": 0.392578125, "learning_rate": 0.0009224964012104318, "loss": 0.1606, "step": 54510 }, { "epoch": 0.09665428168432214, "grad_norm": 0.2333984375, "learning_rate": 0.0009224380520153053, "loss": 0.167, "step": 54512 }, { "epoch": 0.09665782784963195, "grad_norm": 0.58984375, "learning_rate": 0.0009223797035967314, "loss": 0.193, "step": 54514 }, { "epoch": 0.09666137401494176, "grad_norm": 0.78515625, "learning_rate": 0.0009223213559549657, "loss": 0.1678, "step": 54516 }, { "epoch": 0.09666492018025158, "grad_norm": 0.33203125, "learning_rate": 0.0009222630090902635, "loss": 0.1699, "step": 54518 }, { "epoch": 0.0966684663455614, "grad_norm": 0.3828125, "learning_rate": 0.0009222046630028793, "loss": 0.2126, "step": 54520 }, { "epoch": 0.09667201251087121, "grad_norm": 0.2392578125, "learning_rate": 0.0009221463176930689, "loss": 0.1721, "step": 54522 }, { "epoch": 0.09667555867618102, "grad_norm": 0.234375, "learning_rate": 0.0009220879731610869, "loss": 0.1286, "step": 54524 }, { "epoch": 0.09667910484149084, "grad_norm": 0.19921875, "learning_rate": 0.000922029629407189, "loss": 0.1208, "step": 54526 }, { "epoch": 0.09668265100680065, "grad_norm": 0.33984375, "learning_rate": 0.0009219712864316301, "loss": 0.4982, "step": 54528 }, { "epoch": 0.09668619717211048, "grad_norm": 0.1474609375, "learning_rate": 0.000921912944234665, "loss": 0.119, "step": 54530 }, { "epoch": 0.0966897433374203, "grad_norm": 0.390625, "learning_rate": 0.0009218546028165496, "loss": 0.1713, "step": 54532 }, { "epoch": 0.09669328950273011, "grad_norm": 0.427734375, "learning_rate": 0.0009217962621775387, "loss": 0.1836, "step": 54534 }, { "epoch": 0.09669683566803992, "grad_norm": 0.1826171875, "learning_rate": 0.0009217379223178875, "loss": 0.1439, "step": 54536 }, { "epoch": 0.09670038183334974, "grad_norm": 0.828125, "learning_rate": 0.0009216795832378506, "loss": 0.1646, "step": 54538 }, { "epoch": 0.09670392799865955, "grad_norm": 0.283203125, "learning_rate": 0.0009216212449376839, "loss": 0.1535, "step": 54540 }, { "epoch": 0.09670747416396937, "grad_norm": 0.333984375, "learning_rate": 0.0009215629074176426, "loss": 0.1393, "step": 54542 }, { "epoch": 0.09671102032927918, "grad_norm": 2.78125, "learning_rate": 0.0009215045706779812, "loss": 0.2029, "step": 54544 }, { "epoch": 0.096714566494589, "grad_norm": 0.279296875, "learning_rate": 0.0009214462347189549, "loss": 0.1728, "step": 54546 }, { "epoch": 0.09671811265989881, "grad_norm": 1.1328125, "learning_rate": 0.0009213878995408194, "loss": 0.2619, "step": 54548 }, { "epoch": 0.09672165882520863, "grad_norm": 0.2333984375, "learning_rate": 0.0009213295651438291, "loss": 0.1388, "step": 54550 }, { "epoch": 0.09672520499051844, "grad_norm": 0.48046875, "learning_rate": 0.0009212712315282401, "loss": 0.1648, "step": 54552 }, { "epoch": 0.09672875115582825, "grad_norm": 0.43359375, "learning_rate": 0.0009212128986943065, "loss": 0.2198, "step": 54554 }, { "epoch": 0.09673229732113807, "grad_norm": 0.466796875, "learning_rate": 0.0009211545666422841, "loss": 0.1779, "step": 54556 }, { "epoch": 0.09673584348644788, "grad_norm": 0.71875, "learning_rate": 0.0009210962353724272, "loss": 0.492, "step": 54558 }, { "epoch": 0.0967393896517577, "grad_norm": 0.6640625, "learning_rate": 0.0009210379048849922, "loss": 0.3664, "step": 54560 }, { "epoch": 0.09674293581706751, "grad_norm": 0.45703125, "learning_rate": 0.000920979575180233, "loss": 0.2013, "step": 54562 }, { "epoch": 0.09674648198237733, "grad_norm": 0.28125, "learning_rate": 0.0009209212462584049, "loss": 0.1864, "step": 54564 }, { "epoch": 0.09675002814768714, "grad_norm": 0.2216796875, "learning_rate": 0.0009208629181197636, "loss": 0.131, "step": 54566 }, { "epoch": 0.09675357431299696, "grad_norm": 0.84375, "learning_rate": 0.0009208045907645641, "loss": 0.1389, "step": 54568 }, { "epoch": 0.09675712047830677, "grad_norm": 0.310546875, "learning_rate": 0.0009207462641930606, "loss": 0.2339, "step": 54570 }, { "epoch": 0.09676066664361659, "grad_norm": 0.578125, "learning_rate": 0.0009206879384055091, "loss": 0.1674, "step": 54572 }, { "epoch": 0.09676421280892641, "grad_norm": 0.291015625, "learning_rate": 0.0009206296134021645, "loss": 0.1244, "step": 54574 }, { "epoch": 0.09676775897423623, "grad_norm": 2.015625, "learning_rate": 0.0009205712891832818, "loss": 0.349, "step": 54576 }, { "epoch": 0.09677130513954604, "grad_norm": 1.671875, "learning_rate": 0.000920512965749116, "loss": 0.2712, "step": 54578 }, { "epoch": 0.09677485130485586, "grad_norm": 0.51953125, "learning_rate": 0.0009204546430999221, "loss": 0.2488, "step": 54580 }, { "epoch": 0.09677839747016567, "grad_norm": 0.66015625, "learning_rate": 0.0009203963212359555, "loss": 0.1564, "step": 54582 }, { "epoch": 0.09678194363547549, "grad_norm": 0.369140625, "learning_rate": 0.0009203380001574711, "loss": 0.2041, "step": 54584 }, { "epoch": 0.0967854898007853, "grad_norm": 0.55078125, "learning_rate": 0.0009202796798647237, "loss": 0.2216, "step": 54586 }, { "epoch": 0.09678903596609512, "grad_norm": 0.361328125, "learning_rate": 0.0009202213603579685, "loss": 0.1708, "step": 54588 }, { "epoch": 0.09679258213140493, "grad_norm": 0.22265625, "learning_rate": 0.0009201630416374607, "loss": 0.1583, "step": 54590 }, { "epoch": 0.09679612829671475, "grad_norm": 0.9375, "learning_rate": 0.0009201047237034557, "loss": 0.2158, "step": 54592 }, { "epoch": 0.09679967446202456, "grad_norm": 0.29296875, "learning_rate": 0.0009200464065562077, "loss": 0.2223, "step": 54594 }, { "epoch": 0.09680322062733437, "grad_norm": 1.890625, "learning_rate": 0.0009199880901959723, "loss": 0.4148, "step": 54596 }, { "epoch": 0.09680676679264419, "grad_norm": 0.322265625, "learning_rate": 0.0009199297746230044, "loss": 0.1762, "step": 54598 }, { "epoch": 0.096810312957954, "grad_norm": 0.384765625, "learning_rate": 0.0009198714598375595, "loss": 0.2302, "step": 54600 }, { "epoch": 0.09681385912326382, "grad_norm": 0.5078125, "learning_rate": 0.0009198131458398917, "loss": 0.2053, "step": 54602 }, { "epoch": 0.09681740528857363, "grad_norm": 0.22265625, "learning_rate": 0.0009197548326302567, "loss": 0.2135, "step": 54604 }, { "epoch": 0.09682095145388345, "grad_norm": 0.3203125, "learning_rate": 0.0009196965202089092, "loss": 0.2115, "step": 54606 }, { "epoch": 0.09682449761919326, "grad_norm": 0.59375, "learning_rate": 0.0009196382085761048, "loss": 0.1746, "step": 54608 }, { "epoch": 0.09682804378450308, "grad_norm": 0.671875, "learning_rate": 0.0009195798977320978, "loss": 0.1697, "step": 54610 }, { "epoch": 0.09683158994981289, "grad_norm": 1.125, "learning_rate": 0.0009195215876771432, "loss": 0.2543, "step": 54612 }, { "epoch": 0.0968351361151227, "grad_norm": 0.61328125, "learning_rate": 0.0009194632784114969, "loss": 0.18, "step": 54614 }, { "epoch": 0.09683868228043252, "grad_norm": 0.90234375, "learning_rate": 0.0009194049699354132, "loss": 0.1732, "step": 54616 }, { "epoch": 0.09684222844574233, "grad_norm": 0.53125, "learning_rate": 0.0009193466622491473, "loss": 0.2428, "step": 54618 }, { "epoch": 0.09684577461105216, "grad_norm": 0.328125, "learning_rate": 0.0009192883553529541, "loss": 0.1661, "step": 54620 }, { "epoch": 0.09684932077636198, "grad_norm": 2.984375, "learning_rate": 0.0009192300492470884, "loss": 0.1857, "step": 54622 }, { "epoch": 0.09685286694167179, "grad_norm": 0.36328125, "learning_rate": 0.0009191717439318059, "loss": 0.1842, "step": 54624 }, { "epoch": 0.0968564131069816, "grad_norm": 0.287109375, "learning_rate": 0.000919113439407361, "loss": 0.3385, "step": 54626 }, { "epoch": 0.09685995927229142, "grad_norm": 2.203125, "learning_rate": 0.0009190551356740087, "loss": 0.2075, "step": 54628 }, { "epoch": 0.09686350543760124, "grad_norm": 0.5390625, "learning_rate": 0.000918996832732004, "loss": 0.1891, "step": 54630 }, { "epoch": 0.09686705160291105, "grad_norm": 0.240234375, "learning_rate": 0.0009189385305816025, "loss": 0.1396, "step": 54632 }, { "epoch": 0.09687059776822086, "grad_norm": 0.94921875, "learning_rate": 0.0009188802292230585, "loss": 0.2014, "step": 54634 }, { "epoch": 0.09687414393353068, "grad_norm": 0.296875, "learning_rate": 0.0009188219286566271, "loss": 0.1807, "step": 54636 }, { "epoch": 0.0968776900988405, "grad_norm": 2.03125, "learning_rate": 0.0009187636288825634, "loss": 0.1849, "step": 54638 }, { "epoch": 0.09688123626415031, "grad_norm": 0.32421875, "learning_rate": 0.0009187053299011221, "loss": 0.1987, "step": 54640 }, { "epoch": 0.09688478242946012, "grad_norm": 0.234375, "learning_rate": 0.0009186470317125588, "loss": 0.18, "step": 54642 }, { "epoch": 0.09688832859476994, "grad_norm": 0.3671875, "learning_rate": 0.0009185887343171275, "loss": 0.1879, "step": 54644 }, { "epoch": 0.09689187476007975, "grad_norm": 0.443359375, "learning_rate": 0.0009185304377150841, "loss": 0.2028, "step": 54646 }, { "epoch": 0.09689542092538957, "grad_norm": 0.734375, "learning_rate": 0.0009184721419066831, "loss": 0.3262, "step": 54648 }, { "epoch": 0.09689896709069938, "grad_norm": 0.546875, "learning_rate": 0.0009184138468921794, "loss": 0.211, "step": 54650 }, { "epoch": 0.0969025132560092, "grad_norm": 0.58984375, "learning_rate": 0.0009183555526718284, "loss": 0.2103, "step": 54652 }, { "epoch": 0.09690605942131901, "grad_norm": 0.72265625, "learning_rate": 0.0009182972592458843, "loss": 0.182, "step": 54654 }, { "epoch": 0.09690960558662882, "grad_norm": 0.1845703125, "learning_rate": 0.0009182389666146023, "loss": 0.131, "step": 54656 }, { "epoch": 0.09691315175193864, "grad_norm": 0.7265625, "learning_rate": 0.000918180674778238, "loss": 0.2427, "step": 54658 }, { "epoch": 0.09691669791724845, "grad_norm": 1.1015625, "learning_rate": 0.0009181223837370456, "loss": 0.2615, "step": 54660 }, { "epoch": 0.09692024408255827, "grad_norm": 0.40234375, "learning_rate": 0.0009180640934912798, "loss": 0.203, "step": 54662 }, { "epoch": 0.09692379024786808, "grad_norm": 0.72265625, "learning_rate": 0.0009180058040411965, "loss": 0.226, "step": 54664 }, { "epoch": 0.09692733641317791, "grad_norm": 0.703125, "learning_rate": 0.0009179475153870502, "loss": 0.2138, "step": 54666 }, { "epoch": 0.09693088257848773, "grad_norm": 0.48828125, "learning_rate": 0.0009178892275290954, "loss": 0.1459, "step": 54668 }, { "epoch": 0.09693442874379754, "grad_norm": 0.263671875, "learning_rate": 0.0009178309404675875, "loss": 0.1956, "step": 54670 }, { "epoch": 0.09693797490910736, "grad_norm": 0.921875, "learning_rate": 0.0009177726542027811, "loss": 0.2435, "step": 54672 }, { "epoch": 0.09694152107441717, "grad_norm": 0.625, "learning_rate": 0.0009177143687349314, "loss": 0.1859, "step": 54674 }, { "epoch": 0.09694506723972698, "grad_norm": 0.859375, "learning_rate": 0.0009176560840642932, "loss": 0.1318, "step": 54676 }, { "epoch": 0.0969486134050368, "grad_norm": 0.263671875, "learning_rate": 0.0009175978001911212, "loss": 0.2396, "step": 54678 }, { "epoch": 0.09695215957034661, "grad_norm": 0.482421875, "learning_rate": 0.0009175395171156701, "loss": 0.2663, "step": 54680 }, { "epoch": 0.09695570573565643, "grad_norm": 1.0625, "learning_rate": 0.000917481234838196, "loss": 0.1624, "step": 54682 }, { "epoch": 0.09695925190096624, "grad_norm": 8.4375, "learning_rate": 0.0009174229533589526, "loss": 0.4396, "step": 54684 }, { "epoch": 0.09696279806627606, "grad_norm": 0.29296875, "learning_rate": 0.000917364672678195, "loss": 0.189, "step": 54686 }, { "epoch": 0.09696634423158587, "grad_norm": 0.36328125, "learning_rate": 0.0009173063927961784, "loss": 0.1763, "step": 54688 }, { "epoch": 0.09696989039689569, "grad_norm": 0.349609375, "learning_rate": 0.0009172481137131577, "loss": 0.2121, "step": 54690 }, { "epoch": 0.0969734365622055, "grad_norm": 0.2041015625, "learning_rate": 0.0009171898354293872, "loss": 0.2556, "step": 54692 }, { "epoch": 0.09697698272751532, "grad_norm": 0.94921875, "learning_rate": 0.0009171315579451222, "loss": 0.194, "step": 54694 }, { "epoch": 0.09698052889282513, "grad_norm": 1.1484375, "learning_rate": 0.0009170732812606177, "loss": 0.2028, "step": 54696 }, { "epoch": 0.09698407505813494, "grad_norm": 0.353515625, "learning_rate": 0.0009170150053761286, "loss": 0.2263, "step": 54698 }, { "epoch": 0.09698762122344476, "grad_norm": 0.486328125, "learning_rate": 0.0009169567302919093, "loss": 0.1582, "step": 54700 }, { "epoch": 0.09699116738875457, "grad_norm": 0.279296875, "learning_rate": 0.0009168984560082148, "loss": 0.1919, "step": 54702 }, { "epoch": 0.09699471355406439, "grad_norm": 0.333984375, "learning_rate": 0.0009168401825253004, "loss": 0.2045, "step": 54704 }, { "epoch": 0.0969982597193742, "grad_norm": 0.63671875, "learning_rate": 0.0009167819098434207, "loss": 0.199, "step": 54706 }, { "epoch": 0.09700180588468402, "grad_norm": 0.1865234375, "learning_rate": 0.0009167236379628301, "loss": 0.2224, "step": 54708 }, { "epoch": 0.09700535204999385, "grad_norm": 0.75, "learning_rate": 0.0009166653668837843, "loss": 0.2583, "step": 54710 }, { "epoch": 0.09700889821530366, "grad_norm": 0.68359375, "learning_rate": 0.0009166070966065374, "loss": 0.1935, "step": 54712 }, { "epoch": 0.09701244438061347, "grad_norm": 3.53125, "learning_rate": 0.0009165488271313444, "loss": 0.2847, "step": 54714 }, { "epoch": 0.09701599054592329, "grad_norm": 0.71875, "learning_rate": 0.0009164905584584608, "loss": 0.1849, "step": 54716 }, { "epoch": 0.0970195367112331, "grad_norm": 0.322265625, "learning_rate": 0.0009164322905881404, "loss": 0.1702, "step": 54718 }, { "epoch": 0.09702308287654292, "grad_norm": 0.390625, "learning_rate": 0.0009163740235206389, "loss": 0.2127, "step": 54720 }, { "epoch": 0.09702662904185273, "grad_norm": 0.68359375, "learning_rate": 0.0009163157572562102, "loss": 0.1879, "step": 54722 }, { "epoch": 0.09703017520716255, "grad_norm": 0.27734375, "learning_rate": 0.0009162574917951104, "loss": 0.192, "step": 54724 }, { "epoch": 0.09703372137247236, "grad_norm": 0.20703125, "learning_rate": 0.0009161992271375932, "loss": 0.1365, "step": 54726 }, { "epoch": 0.09703726753778218, "grad_norm": 0.29296875, "learning_rate": 0.000916140963283914, "loss": 0.1882, "step": 54728 }, { "epoch": 0.09704081370309199, "grad_norm": 0.27734375, "learning_rate": 0.000916082700234327, "loss": 0.1523, "step": 54730 }, { "epoch": 0.0970443598684018, "grad_norm": 0.27734375, "learning_rate": 0.0009160244379890881, "loss": 0.215, "step": 54732 }, { "epoch": 0.09704790603371162, "grad_norm": 1.1640625, "learning_rate": 0.000915966176548451, "loss": 0.2075, "step": 54734 }, { "epoch": 0.09705145219902143, "grad_norm": 0.380859375, "learning_rate": 0.0009159079159126709, "loss": 0.1741, "step": 54736 }, { "epoch": 0.09705499836433125, "grad_norm": 1.1328125, "learning_rate": 0.0009158496560820027, "loss": 0.2474, "step": 54738 }, { "epoch": 0.09705854452964106, "grad_norm": 0.1845703125, "learning_rate": 0.0009157913970567013, "loss": 0.1117, "step": 54740 }, { "epoch": 0.09706209069495088, "grad_norm": 0.859375, "learning_rate": 0.0009157331388370214, "loss": 0.2116, "step": 54742 }, { "epoch": 0.09706563686026069, "grad_norm": 0.59375, "learning_rate": 0.0009156748814232176, "loss": 0.1941, "step": 54744 }, { "epoch": 0.09706918302557051, "grad_norm": 0.314453125, "learning_rate": 0.0009156166248155446, "loss": 0.2161, "step": 54746 }, { "epoch": 0.09707272919088032, "grad_norm": 1.3671875, "learning_rate": 0.0009155583690142578, "loss": 0.2049, "step": 54748 }, { "epoch": 0.09707627535619014, "grad_norm": 0.3359375, "learning_rate": 0.0009155001140196111, "loss": 0.189, "step": 54750 }, { "epoch": 0.09707982152149995, "grad_norm": 0.306640625, "learning_rate": 0.0009154418598318599, "loss": 0.1937, "step": 54752 }, { "epoch": 0.09708336768680977, "grad_norm": 3.9375, "learning_rate": 0.0009153836064512587, "loss": 0.2732, "step": 54754 }, { "epoch": 0.0970869138521196, "grad_norm": 0.42578125, "learning_rate": 0.0009153253538780628, "loss": 0.1838, "step": 54756 }, { "epoch": 0.09709046001742941, "grad_norm": 1.4609375, "learning_rate": 0.0009152671021125261, "loss": 0.2626, "step": 54758 }, { "epoch": 0.09709400618273922, "grad_norm": 0.609375, "learning_rate": 0.000915208851154904, "loss": 0.2583, "step": 54760 }, { "epoch": 0.09709755234804904, "grad_norm": 0.251953125, "learning_rate": 0.0009151506010054509, "loss": 0.2034, "step": 54762 }, { "epoch": 0.09710109851335885, "grad_norm": 0.84765625, "learning_rate": 0.0009150923516644216, "loss": 0.2256, "step": 54764 }, { "epoch": 0.09710464467866867, "grad_norm": 0.33984375, "learning_rate": 0.0009150341031320713, "loss": 0.3766, "step": 54766 }, { "epoch": 0.09710819084397848, "grad_norm": 0.306640625, "learning_rate": 0.0009149758554086542, "loss": 0.2108, "step": 54768 }, { "epoch": 0.0971117370092883, "grad_norm": 0.8359375, "learning_rate": 0.0009149176084944251, "loss": 0.3018, "step": 54770 }, { "epoch": 0.09711528317459811, "grad_norm": 0.392578125, "learning_rate": 0.0009148593623896391, "loss": 0.2203, "step": 54772 }, { "epoch": 0.09711882933990792, "grad_norm": 0.38671875, "learning_rate": 0.0009148011170945508, "loss": 0.2545, "step": 54774 }, { "epoch": 0.09712237550521774, "grad_norm": 0.490234375, "learning_rate": 0.0009147428726094143, "loss": 0.2738, "step": 54776 }, { "epoch": 0.09712592167052755, "grad_norm": 0.158203125, "learning_rate": 0.0009146846289344852, "loss": 0.1323, "step": 54778 }, { "epoch": 0.09712946783583737, "grad_norm": 0.8125, "learning_rate": 0.0009146263860700182, "loss": 0.2756, "step": 54780 }, { "epoch": 0.09713301400114718, "grad_norm": 0.416015625, "learning_rate": 0.0009145681440162672, "loss": 0.1533, "step": 54782 }, { "epoch": 0.097136560166457, "grad_norm": 0.1962890625, "learning_rate": 0.0009145099027734878, "loss": 0.1764, "step": 54784 }, { "epoch": 0.09714010633176681, "grad_norm": 0.65625, "learning_rate": 0.0009144516623419341, "loss": 0.191, "step": 54786 }, { "epoch": 0.09714365249707663, "grad_norm": 1.1484375, "learning_rate": 0.0009143934227218613, "loss": 0.1917, "step": 54788 }, { "epoch": 0.09714719866238644, "grad_norm": 0.35546875, "learning_rate": 0.0009143351839135237, "loss": 0.1662, "step": 54790 }, { "epoch": 0.09715074482769626, "grad_norm": 0.2578125, "learning_rate": 0.0009142769459171759, "loss": 0.1794, "step": 54792 }, { "epoch": 0.09715429099300607, "grad_norm": 0.369140625, "learning_rate": 0.0009142187087330732, "loss": 0.1826, "step": 54794 }, { "epoch": 0.09715783715831589, "grad_norm": 0.6484375, "learning_rate": 0.0009141604723614696, "loss": 0.1478, "step": 54796 }, { "epoch": 0.0971613833236257, "grad_norm": 0.1279296875, "learning_rate": 0.0009141022368026206, "loss": 0.1915, "step": 54798 }, { "epoch": 0.09716492948893551, "grad_norm": 0.388671875, "learning_rate": 0.0009140440020567801, "loss": 0.182, "step": 54800 }, { "epoch": 0.09716847565424534, "grad_norm": 0.5, "learning_rate": 0.0009139857681242032, "loss": 0.1822, "step": 54802 }, { "epoch": 0.09717202181955516, "grad_norm": 1.640625, "learning_rate": 0.0009139275350051446, "loss": 0.1632, "step": 54804 }, { "epoch": 0.09717556798486497, "grad_norm": 0.859375, "learning_rate": 0.000913869302699859, "loss": 0.2858, "step": 54806 }, { "epoch": 0.09717911415017479, "grad_norm": 0.396484375, "learning_rate": 0.0009138110712086007, "loss": 0.1863, "step": 54808 }, { "epoch": 0.0971826603154846, "grad_norm": 1.4765625, "learning_rate": 0.0009137528405316247, "loss": 0.259, "step": 54810 }, { "epoch": 0.09718620648079442, "grad_norm": 0.478515625, "learning_rate": 0.0009136946106691855, "loss": 0.1617, "step": 54812 }, { "epoch": 0.09718975264610423, "grad_norm": 0.765625, "learning_rate": 0.0009136363816215381, "loss": 0.2084, "step": 54814 }, { "epoch": 0.09719329881141404, "grad_norm": 0.302734375, "learning_rate": 0.0009135781533889367, "loss": 0.3492, "step": 54816 }, { "epoch": 0.09719684497672386, "grad_norm": 0.640625, "learning_rate": 0.0009135199259716363, "loss": 0.2249, "step": 54818 }, { "epoch": 0.09720039114203367, "grad_norm": 0.3359375, "learning_rate": 0.0009134616993698912, "loss": 0.148, "step": 54820 }, { "epoch": 0.09720393730734349, "grad_norm": 0.392578125, "learning_rate": 0.0009134034735839565, "loss": 0.2387, "step": 54822 }, { "epoch": 0.0972074834726533, "grad_norm": 0.28515625, "learning_rate": 0.0009133452486140865, "loss": 0.1658, "step": 54824 }, { "epoch": 0.09721102963796312, "grad_norm": 0.7421875, "learning_rate": 0.0009132870244605361, "loss": 0.201, "step": 54826 }, { "epoch": 0.09721457580327293, "grad_norm": 0.640625, "learning_rate": 0.0009132288011235596, "loss": 0.1536, "step": 54828 }, { "epoch": 0.09721812196858275, "grad_norm": 0.41015625, "learning_rate": 0.0009131705786034121, "loss": 0.1537, "step": 54830 }, { "epoch": 0.09722166813389256, "grad_norm": 0.2001953125, "learning_rate": 0.0009131123569003477, "loss": 0.2008, "step": 54832 }, { "epoch": 0.09722521429920238, "grad_norm": 0.30859375, "learning_rate": 0.0009130541360146214, "loss": 0.3152, "step": 54834 }, { "epoch": 0.09722876046451219, "grad_norm": 0.361328125, "learning_rate": 0.0009129959159464878, "loss": 0.2128, "step": 54836 }, { "epoch": 0.097232306629822, "grad_norm": 0.46875, "learning_rate": 0.0009129376966962013, "loss": 0.1843, "step": 54838 }, { "epoch": 0.09723585279513182, "grad_norm": 0.34765625, "learning_rate": 0.0009128794782640168, "loss": 0.1927, "step": 54840 }, { "epoch": 0.09723939896044163, "grad_norm": 0.376953125, "learning_rate": 0.0009128212606501887, "loss": 0.2591, "step": 54842 }, { "epoch": 0.09724294512575145, "grad_norm": 0.212890625, "learning_rate": 0.0009127630438549714, "loss": 0.1651, "step": 54844 }, { "epoch": 0.09724649129106128, "grad_norm": 0.400390625, "learning_rate": 0.0009127048278786203, "loss": 0.3069, "step": 54846 }, { "epoch": 0.09725003745637109, "grad_norm": 0.53515625, "learning_rate": 0.0009126466127213893, "loss": 0.1915, "step": 54848 }, { "epoch": 0.0972535836216809, "grad_norm": 1.015625, "learning_rate": 0.0009125883983835329, "loss": 0.2055, "step": 54850 }, { "epoch": 0.09725712978699072, "grad_norm": 1.140625, "learning_rate": 0.000912530184865306, "loss": 0.275, "step": 54852 }, { "epoch": 0.09726067595230053, "grad_norm": 0.55078125, "learning_rate": 0.0009124719721669637, "loss": 0.2286, "step": 54854 }, { "epoch": 0.09726422211761035, "grad_norm": 0.9296875, "learning_rate": 0.0009124137602887596, "loss": 0.1984, "step": 54856 }, { "epoch": 0.09726776828292016, "grad_norm": 0.439453125, "learning_rate": 0.000912355549230949, "loss": 0.1922, "step": 54858 }, { "epoch": 0.09727131444822998, "grad_norm": 0.306640625, "learning_rate": 0.0009122973389937861, "loss": 0.1632, "step": 54860 }, { "epoch": 0.0972748606135398, "grad_norm": 0.31640625, "learning_rate": 0.0009122391295775258, "loss": 0.1541, "step": 54862 }, { "epoch": 0.09727840677884961, "grad_norm": 0.228515625, "learning_rate": 0.0009121809209824223, "loss": 0.1605, "step": 54864 }, { "epoch": 0.09728195294415942, "grad_norm": 0.34765625, "learning_rate": 0.0009121227132087304, "loss": 0.1562, "step": 54866 }, { "epoch": 0.09728549910946924, "grad_norm": 0.3828125, "learning_rate": 0.0009120645062567044, "loss": 0.1899, "step": 54868 }, { "epoch": 0.09728904527477905, "grad_norm": 7.0625, "learning_rate": 0.0009120063001265995, "loss": 0.2573, "step": 54870 }, { "epoch": 0.09729259144008887, "grad_norm": 0.220703125, "learning_rate": 0.0009119480948186696, "loss": 0.2874, "step": 54872 }, { "epoch": 0.09729613760539868, "grad_norm": 0.2392578125, "learning_rate": 0.0009118898903331694, "loss": 0.1326, "step": 54874 }, { "epoch": 0.0972996837707085, "grad_norm": 0.6953125, "learning_rate": 0.0009118316866703538, "loss": 0.2388, "step": 54876 }, { "epoch": 0.09730322993601831, "grad_norm": 0.7265625, "learning_rate": 0.0009117734838304769, "loss": 0.2594, "step": 54878 }, { "epoch": 0.09730677610132812, "grad_norm": 0.44140625, "learning_rate": 0.0009117152818137937, "loss": 0.3769, "step": 54880 }, { "epoch": 0.09731032226663794, "grad_norm": 0.267578125, "learning_rate": 0.0009116570806205582, "loss": 0.1647, "step": 54882 }, { "epoch": 0.09731386843194775, "grad_norm": 1.359375, "learning_rate": 0.0009115988802510251, "loss": 0.2223, "step": 54884 }, { "epoch": 0.09731741459725757, "grad_norm": 0.3515625, "learning_rate": 0.0009115406807054491, "loss": 0.2147, "step": 54886 }, { "epoch": 0.09732096076256738, "grad_norm": 0.455078125, "learning_rate": 0.0009114824819840853, "loss": 0.227, "step": 54888 }, { "epoch": 0.0973245069278772, "grad_norm": 1.4765625, "learning_rate": 0.0009114242840871871, "loss": 0.2891, "step": 54890 }, { "epoch": 0.09732805309318703, "grad_norm": 0.703125, "learning_rate": 0.0009113660870150096, "loss": 0.2384, "step": 54892 }, { "epoch": 0.09733159925849684, "grad_norm": 0.5859375, "learning_rate": 0.0009113078907678074, "loss": 0.2055, "step": 54894 }, { "epoch": 0.09733514542380665, "grad_norm": 0.177734375, "learning_rate": 0.0009112496953458349, "loss": 0.1891, "step": 54896 }, { "epoch": 0.09733869158911647, "grad_norm": 0.275390625, "learning_rate": 0.0009111915007493465, "loss": 0.1676, "step": 54898 }, { "epoch": 0.09734223775442628, "grad_norm": 0.95703125, "learning_rate": 0.0009111333069785968, "loss": 0.1791, "step": 54900 }, { "epoch": 0.0973457839197361, "grad_norm": 0.36328125, "learning_rate": 0.00091107511403384, "loss": 0.1556, "step": 54902 }, { "epoch": 0.09734933008504591, "grad_norm": 0.359375, "learning_rate": 0.0009110169219153315, "loss": 0.2232, "step": 54904 }, { "epoch": 0.09735287625035573, "grad_norm": 0.376953125, "learning_rate": 0.0009109587306233249, "loss": 0.1579, "step": 54906 }, { "epoch": 0.09735642241566554, "grad_norm": 0.71484375, "learning_rate": 0.000910900540158075, "loss": 0.2913, "step": 54908 }, { "epoch": 0.09735996858097536, "grad_norm": 1.609375, "learning_rate": 0.0009108423505198362, "loss": 0.2951, "step": 54910 }, { "epoch": 0.09736351474628517, "grad_norm": 0.28515625, "learning_rate": 0.0009107841617088635, "loss": 0.1808, "step": 54912 }, { "epoch": 0.09736706091159499, "grad_norm": 1.8046875, "learning_rate": 0.0009107259737254106, "loss": 0.2265, "step": 54914 }, { "epoch": 0.0973706070769048, "grad_norm": 1.6796875, "learning_rate": 0.0009106677865697322, "loss": 0.1886, "step": 54916 }, { "epoch": 0.09737415324221461, "grad_norm": 0.2734375, "learning_rate": 0.0009106096002420832, "loss": 0.2005, "step": 54918 }, { "epoch": 0.09737769940752443, "grad_norm": 0.69140625, "learning_rate": 0.0009105514147427178, "loss": 0.1244, "step": 54920 }, { "epoch": 0.09738124557283424, "grad_norm": 0.416015625, "learning_rate": 0.0009104932300718904, "loss": 0.2176, "step": 54922 }, { "epoch": 0.09738479173814406, "grad_norm": 0.7265625, "learning_rate": 0.0009104350462298554, "loss": 0.1967, "step": 54924 }, { "epoch": 0.09738833790345387, "grad_norm": 0.68359375, "learning_rate": 0.0009103768632168674, "loss": 0.1786, "step": 54926 }, { "epoch": 0.09739188406876369, "grad_norm": 0.62890625, "learning_rate": 0.0009103186810331814, "loss": 0.1883, "step": 54928 }, { "epoch": 0.0973954302340735, "grad_norm": 0.384765625, "learning_rate": 0.0009102604996790507, "loss": 0.1839, "step": 54930 }, { "epoch": 0.09739897639938332, "grad_norm": 1.203125, "learning_rate": 0.0009102023191547306, "loss": 0.2188, "step": 54932 }, { "epoch": 0.09740252256469313, "grad_norm": 0.255859375, "learning_rate": 0.0009101441394604753, "loss": 0.1254, "step": 54934 }, { "epoch": 0.09740606873000295, "grad_norm": 0.94140625, "learning_rate": 0.0009100859605965395, "loss": 0.1998, "step": 54936 }, { "epoch": 0.09740961489531277, "grad_norm": 0.28515625, "learning_rate": 0.0009100277825631769, "loss": 0.2023, "step": 54938 }, { "epoch": 0.09741316106062259, "grad_norm": 0.5078125, "learning_rate": 0.0009099696053606425, "loss": 0.2431, "step": 54940 }, { "epoch": 0.0974167072259324, "grad_norm": 0.6640625, "learning_rate": 0.0009099114289891908, "loss": 0.2079, "step": 54942 }, { "epoch": 0.09742025339124222, "grad_norm": 0.60546875, "learning_rate": 0.0009098532534490763, "loss": 0.1653, "step": 54944 }, { "epoch": 0.09742379955655203, "grad_norm": 0.96875, "learning_rate": 0.0009097950787405528, "loss": 0.1934, "step": 54946 }, { "epoch": 0.09742734572186185, "grad_norm": 0.359375, "learning_rate": 0.0009097369048638753, "loss": 0.2035, "step": 54948 }, { "epoch": 0.09743089188717166, "grad_norm": 1.1328125, "learning_rate": 0.0009096787318192983, "loss": 0.167, "step": 54950 }, { "epoch": 0.09743443805248148, "grad_norm": 0.3984375, "learning_rate": 0.0009096205596070759, "loss": 0.2173, "step": 54952 }, { "epoch": 0.09743798421779129, "grad_norm": 0.5234375, "learning_rate": 0.0009095623882274626, "loss": 0.1687, "step": 54954 }, { "epoch": 0.0974415303831011, "grad_norm": 0.1923828125, "learning_rate": 0.0009095042176807124, "loss": 0.152, "step": 54956 }, { "epoch": 0.09744507654841092, "grad_norm": 0.2119140625, "learning_rate": 0.0009094460479670803, "loss": 0.1304, "step": 54958 }, { "epoch": 0.09744862271372073, "grad_norm": 0.48046875, "learning_rate": 0.0009093878790868208, "loss": 0.1741, "step": 54960 }, { "epoch": 0.09745216887903055, "grad_norm": 0.74609375, "learning_rate": 0.0009093297110401877, "loss": 0.2119, "step": 54962 }, { "epoch": 0.09745571504434036, "grad_norm": 0.6484375, "learning_rate": 0.0009092715438274357, "loss": 0.2512, "step": 54964 }, { "epoch": 0.09745926120965018, "grad_norm": 0.380859375, "learning_rate": 0.0009092133774488191, "loss": 0.153, "step": 54966 }, { "epoch": 0.09746280737495999, "grad_norm": 0.41796875, "learning_rate": 0.0009091552119045925, "loss": 0.1643, "step": 54968 }, { "epoch": 0.0974663535402698, "grad_norm": 0.88671875, "learning_rate": 0.0009090970471950104, "loss": 0.1559, "step": 54970 }, { "epoch": 0.09746989970557962, "grad_norm": 1.59375, "learning_rate": 0.0009090388833203266, "loss": 0.3511, "step": 54972 }, { "epoch": 0.09747344587088944, "grad_norm": 0.232421875, "learning_rate": 0.0009089807202807958, "loss": 0.1876, "step": 54974 }, { "epoch": 0.09747699203619925, "grad_norm": 0.384765625, "learning_rate": 0.0009089225580766723, "loss": 0.1804, "step": 54976 }, { "epoch": 0.09748053820150906, "grad_norm": 1.15625, "learning_rate": 0.0009088643967082111, "loss": 0.21, "step": 54978 }, { "epoch": 0.09748408436681888, "grad_norm": 0.98046875, "learning_rate": 0.0009088062361756655, "loss": 0.1922, "step": 54980 }, { "epoch": 0.09748763053212871, "grad_norm": 0.28125, "learning_rate": 0.0009087480764792904, "loss": 0.146, "step": 54982 }, { "epoch": 0.09749117669743852, "grad_norm": 0.58203125, "learning_rate": 0.00090868991761934, "loss": 0.1771, "step": 54984 }, { "epoch": 0.09749472286274834, "grad_norm": 0.53125, "learning_rate": 0.0009086317595960691, "loss": 0.1708, "step": 54986 }, { "epoch": 0.09749826902805815, "grad_norm": 0.578125, "learning_rate": 0.0009085736024097316, "loss": 0.1876, "step": 54988 }, { "epoch": 0.09750181519336797, "grad_norm": 5.6875, "learning_rate": 0.000908515446060582, "loss": 0.2581, "step": 54990 }, { "epoch": 0.09750536135867778, "grad_norm": 0.3125, "learning_rate": 0.0009084572905488743, "loss": 0.1552, "step": 54992 }, { "epoch": 0.0975089075239876, "grad_norm": 0.52734375, "learning_rate": 0.0009083991358748638, "loss": 0.1355, "step": 54994 }, { "epoch": 0.09751245368929741, "grad_norm": 0.291015625, "learning_rate": 0.0009083409820388038, "loss": 0.1966, "step": 54996 }, { "epoch": 0.09751599985460722, "grad_norm": 0.416015625, "learning_rate": 0.0009082828290409488, "loss": 0.1465, "step": 54998 }, { "epoch": 0.09751954601991704, "grad_norm": 0.61328125, "learning_rate": 0.0009082246768815534, "loss": 0.2222, "step": 55000 }, { "epoch": 0.09752309218522685, "grad_norm": 0.5390625, "learning_rate": 0.0009081665255608725, "loss": 0.1751, "step": 55002 }, { "epoch": 0.09752663835053667, "grad_norm": 0.388671875, "learning_rate": 0.000908108375079159, "loss": 0.1241, "step": 55004 }, { "epoch": 0.09753018451584648, "grad_norm": 0.443359375, "learning_rate": 0.0009080502254366682, "loss": 0.1445, "step": 55006 }, { "epoch": 0.0975337306811563, "grad_norm": 0.30078125, "learning_rate": 0.0009079920766336544, "loss": 0.2082, "step": 55008 }, { "epoch": 0.09753727684646611, "grad_norm": 0.384765625, "learning_rate": 0.000907933928670372, "loss": 0.2199, "step": 55010 }, { "epoch": 0.09754082301177593, "grad_norm": 0.333984375, "learning_rate": 0.0009078757815470744, "loss": 0.2081, "step": 55012 }, { "epoch": 0.09754436917708574, "grad_norm": 0.33203125, "learning_rate": 0.0009078176352640168, "loss": 0.1563, "step": 55014 }, { "epoch": 0.09754791534239556, "grad_norm": 0.5859375, "learning_rate": 0.0009077594898214532, "loss": 0.1813, "step": 55016 }, { "epoch": 0.09755146150770537, "grad_norm": 0.6171875, "learning_rate": 0.0009077013452196384, "loss": 0.1664, "step": 55018 }, { "epoch": 0.09755500767301518, "grad_norm": 0.44140625, "learning_rate": 0.0009076432014588254, "loss": 0.1601, "step": 55020 }, { "epoch": 0.097558553838325, "grad_norm": 0.384765625, "learning_rate": 0.0009075850585392695, "loss": 0.2306, "step": 55022 }, { "epoch": 0.09756210000363481, "grad_norm": 1.28125, "learning_rate": 0.0009075269164612252, "loss": 0.2771, "step": 55024 }, { "epoch": 0.09756564616894463, "grad_norm": 0.69140625, "learning_rate": 0.0009074687752249462, "loss": 0.3827, "step": 55026 }, { "epoch": 0.09756919233425446, "grad_norm": 0.9453125, "learning_rate": 0.000907410634830687, "loss": 0.1523, "step": 55028 }, { "epoch": 0.09757273849956427, "grad_norm": 0.8359375, "learning_rate": 0.0009073524952787016, "loss": 0.2915, "step": 55030 }, { "epoch": 0.09757628466487409, "grad_norm": 0.9609375, "learning_rate": 0.0009072943565692444, "loss": 0.1961, "step": 55032 }, { "epoch": 0.0975798308301839, "grad_norm": 0.63671875, "learning_rate": 0.0009072362187025702, "loss": 0.2202, "step": 55034 }, { "epoch": 0.09758337699549371, "grad_norm": 0.384765625, "learning_rate": 0.0009071780816789323, "loss": 0.2368, "step": 55036 }, { "epoch": 0.09758692316080353, "grad_norm": 1.7421875, "learning_rate": 0.0009071199454985856, "loss": 0.4211, "step": 55038 }, { "epoch": 0.09759046932611334, "grad_norm": 1.640625, "learning_rate": 0.0009070618101617843, "loss": 0.271, "step": 55040 }, { "epoch": 0.09759401549142316, "grad_norm": 0.58203125, "learning_rate": 0.0009070036756687829, "loss": 0.1713, "step": 55042 }, { "epoch": 0.09759756165673297, "grad_norm": 0.416015625, "learning_rate": 0.000906945542019835, "loss": 0.1687, "step": 55044 }, { "epoch": 0.09760110782204279, "grad_norm": 0.421875, "learning_rate": 0.000906887409215195, "loss": 0.1724, "step": 55046 }, { "epoch": 0.0976046539873526, "grad_norm": 0.609375, "learning_rate": 0.0009068292772551175, "loss": 0.2563, "step": 55048 }, { "epoch": 0.09760820015266242, "grad_norm": 0.400390625, "learning_rate": 0.0009067711461398564, "loss": 0.1551, "step": 55050 }, { "epoch": 0.09761174631797223, "grad_norm": 0.94921875, "learning_rate": 0.0009067130158696663, "loss": 0.2086, "step": 55052 }, { "epoch": 0.09761529248328205, "grad_norm": 0.87890625, "learning_rate": 0.0009066548864448013, "loss": 0.298, "step": 55054 }, { "epoch": 0.09761883864859186, "grad_norm": 0.62109375, "learning_rate": 0.000906596757865515, "loss": 0.128, "step": 55056 }, { "epoch": 0.09762238481390167, "grad_norm": 1.3515625, "learning_rate": 0.0009065386301320621, "loss": 0.3627, "step": 55058 }, { "epoch": 0.09762593097921149, "grad_norm": 0.80078125, "learning_rate": 0.0009064805032446976, "loss": 0.2413, "step": 55060 }, { "epoch": 0.0976294771445213, "grad_norm": 1.1796875, "learning_rate": 0.0009064223772036744, "loss": 0.2634, "step": 55062 }, { "epoch": 0.09763302330983112, "grad_norm": 0.8671875, "learning_rate": 0.0009063642520092477, "loss": 0.1903, "step": 55064 }, { "epoch": 0.09763656947514093, "grad_norm": 2.21875, "learning_rate": 0.0009063061276616707, "loss": 0.2353, "step": 55066 }, { "epoch": 0.09764011564045075, "grad_norm": 0.2451171875, "learning_rate": 0.0009062480041611989, "loss": 0.1474, "step": 55068 }, { "epoch": 0.09764366180576056, "grad_norm": 0.55859375, "learning_rate": 0.0009061898815080853, "loss": 0.1796, "step": 55070 }, { "epoch": 0.09764720797107038, "grad_norm": 0.4921875, "learning_rate": 0.0009061317597025844, "loss": 0.1995, "step": 55072 }, { "epoch": 0.0976507541363802, "grad_norm": 0.1748046875, "learning_rate": 0.0009060736387449509, "loss": 0.1533, "step": 55074 }, { "epoch": 0.09765430030169002, "grad_norm": 0.2294921875, "learning_rate": 0.0009060155186354389, "loss": 0.171, "step": 55076 }, { "epoch": 0.09765784646699983, "grad_norm": 0.35546875, "learning_rate": 0.0009059573993743017, "loss": 0.1924, "step": 55078 }, { "epoch": 0.09766139263230965, "grad_norm": 0.41015625, "learning_rate": 0.0009058992809617946, "loss": 0.1959, "step": 55080 }, { "epoch": 0.09766493879761946, "grad_norm": 0.3359375, "learning_rate": 0.0009058411633981711, "loss": 0.1461, "step": 55082 }, { "epoch": 0.09766848496292928, "grad_norm": 0.455078125, "learning_rate": 0.0009057830466836856, "loss": 0.1583, "step": 55084 }, { "epoch": 0.09767203112823909, "grad_norm": 0.240234375, "learning_rate": 0.0009057249308185925, "loss": 0.105, "step": 55086 }, { "epoch": 0.09767557729354891, "grad_norm": 0.310546875, "learning_rate": 0.0009056668158031452, "loss": 0.2064, "step": 55088 }, { "epoch": 0.09767912345885872, "grad_norm": 0.32421875, "learning_rate": 0.0009056087016375984, "loss": 0.222, "step": 55090 }, { "epoch": 0.09768266962416854, "grad_norm": 0.6171875, "learning_rate": 0.0009055505883222068, "loss": 0.3497, "step": 55092 }, { "epoch": 0.09768621578947835, "grad_norm": 0.498046875, "learning_rate": 0.0009054924758572236, "loss": 0.2059, "step": 55094 }, { "epoch": 0.09768976195478817, "grad_norm": 3.71875, "learning_rate": 0.0009054343642429033, "loss": 0.2537, "step": 55096 }, { "epoch": 0.09769330812009798, "grad_norm": 0.27734375, "learning_rate": 0.0009053762534795002, "loss": 0.1435, "step": 55098 }, { "epoch": 0.0976968542854078, "grad_norm": 1.3203125, "learning_rate": 0.0009053181435672684, "loss": 0.234, "step": 55100 }, { "epoch": 0.09770040045071761, "grad_norm": 0.3828125, "learning_rate": 0.0009052600345064617, "loss": 0.2992, "step": 55102 }, { "epoch": 0.09770394661602742, "grad_norm": 0.55078125, "learning_rate": 0.0009052019262973345, "loss": 0.251, "step": 55104 }, { "epoch": 0.09770749278133724, "grad_norm": 0.28515625, "learning_rate": 0.000905143818940141, "loss": 0.1896, "step": 55106 }, { "epoch": 0.09771103894664705, "grad_norm": 0.4375, "learning_rate": 0.0009050857124351355, "loss": 0.1558, "step": 55108 }, { "epoch": 0.09771458511195687, "grad_norm": 5.28125, "learning_rate": 0.0009050276067825715, "loss": 0.4083, "step": 55110 }, { "epoch": 0.09771813127726668, "grad_norm": 1.4921875, "learning_rate": 0.0009049695019827036, "loss": 0.1544, "step": 55112 }, { "epoch": 0.0977216774425765, "grad_norm": 1.6171875, "learning_rate": 0.0009049113980357858, "loss": 0.1406, "step": 55114 }, { "epoch": 0.09772522360788631, "grad_norm": 0.279296875, "learning_rate": 0.0009048532949420725, "loss": 0.1589, "step": 55116 }, { "epoch": 0.09772876977319614, "grad_norm": 0.51171875, "learning_rate": 0.0009047951927018172, "loss": 0.2319, "step": 55118 }, { "epoch": 0.09773231593850595, "grad_norm": 0.34765625, "learning_rate": 0.0009047370913152744, "loss": 0.1503, "step": 55120 }, { "epoch": 0.09773586210381577, "grad_norm": 0.474609375, "learning_rate": 0.0009046789907826982, "loss": 0.2038, "step": 55122 }, { "epoch": 0.09773940826912558, "grad_norm": 0.984375, "learning_rate": 0.0009046208911043427, "loss": 0.2273, "step": 55124 }, { "epoch": 0.0977429544344354, "grad_norm": 0.38671875, "learning_rate": 0.000904562792280462, "loss": 0.1798, "step": 55126 }, { "epoch": 0.09774650059974521, "grad_norm": 0.625, "learning_rate": 0.0009045046943113098, "loss": 0.2547, "step": 55128 }, { "epoch": 0.09775004676505503, "grad_norm": 0.80859375, "learning_rate": 0.0009044465971971405, "loss": 0.2905, "step": 55130 }, { "epoch": 0.09775359293036484, "grad_norm": 1.25, "learning_rate": 0.0009043885009382082, "loss": 0.2873, "step": 55132 }, { "epoch": 0.09775713909567466, "grad_norm": 0.35546875, "learning_rate": 0.0009043304055347674, "loss": 0.2043, "step": 55134 }, { "epoch": 0.09776068526098447, "grad_norm": 0.482421875, "learning_rate": 0.0009042723109870714, "loss": 0.1781, "step": 55136 }, { "epoch": 0.09776423142629428, "grad_norm": 0.77734375, "learning_rate": 0.0009042142172953749, "loss": 0.2073, "step": 55138 }, { "epoch": 0.0977677775916041, "grad_norm": 0.6015625, "learning_rate": 0.0009041561244599314, "loss": 0.1457, "step": 55140 }, { "epoch": 0.09777132375691391, "grad_norm": 0.55859375, "learning_rate": 0.0009040980324809955, "loss": 0.1797, "step": 55142 }, { "epoch": 0.09777486992222373, "grad_norm": 0.65625, "learning_rate": 0.0009040399413588209, "loss": 0.1538, "step": 55144 }, { "epoch": 0.09777841608753354, "grad_norm": 0.2578125, "learning_rate": 0.0009039818510936617, "loss": 0.2764, "step": 55146 }, { "epoch": 0.09778196225284336, "grad_norm": 0.671875, "learning_rate": 0.0009039237616857718, "loss": 0.1738, "step": 55148 }, { "epoch": 0.09778550841815317, "grad_norm": 0.3125, "learning_rate": 0.000903865673135406, "loss": 0.1277, "step": 55150 }, { "epoch": 0.09778905458346299, "grad_norm": 0.23828125, "learning_rate": 0.0009038075854428176, "loss": 0.1608, "step": 55152 }, { "epoch": 0.0977926007487728, "grad_norm": 1.6640625, "learning_rate": 0.0009037494986082606, "loss": 0.2452, "step": 55154 }, { "epoch": 0.09779614691408262, "grad_norm": 0.43359375, "learning_rate": 0.0009036914126319898, "loss": 0.1647, "step": 55156 }, { "epoch": 0.09779969307939243, "grad_norm": 0.51953125, "learning_rate": 0.0009036333275142583, "loss": 0.2201, "step": 55158 }, { "epoch": 0.09780323924470224, "grad_norm": 0.306640625, "learning_rate": 0.0009035752432553208, "loss": 0.1633, "step": 55160 }, { "epoch": 0.09780678541001206, "grad_norm": 0.267578125, "learning_rate": 0.0009035171598554308, "loss": 0.2217, "step": 55162 }, { "epoch": 0.09781033157532189, "grad_norm": 0.7109375, "learning_rate": 0.0009034590773148428, "loss": 0.1886, "step": 55164 }, { "epoch": 0.0978138777406317, "grad_norm": 0.5546875, "learning_rate": 0.0009034009956338109, "loss": 0.1878, "step": 55166 }, { "epoch": 0.09781742390594152, "grad_norm": 1.4296875, "learning_rate": 0.0009033429148125884, "loss": 0.1781, "step": 55168 }, { "epoch": 0.09782097007125133, "grad_norm": 0.310546875, "learning_rate": 0.00090328483485143, "loss": 0.1942, "step": 55170 }, { "epoch": 0.09782451623656115, "grad_norm": 0.447265625, "learning_rate": 0.0009032267557505893, "loss": 0.2204, "step": 55172 }, { "epoch": 0.09782806240187096, "grad_norm": 0.294921875, "learning_rate": 0.0009031686775103207, "loss": 0.147, "step": 55174 }, { "epoch": 0.09783160856718078, "grad_norm": 0.67578125, "learning_rate": 0.000903110600130878, "loss": 0.1725, "step": 55176 }, { "epoch": 0.09783515473249059, "grad_norm": 0.77734375, "learning_rate": 0.0009030525236125149, "loss": 0.2302, "step": 55178 }, { "epoch": 0.0978387008978004, "grad_norm": 5.09375, "learning_rate": 0.0009029944479554857, "loss": 0.1987, "step": 55180 }, { "epoch": 0.09784224706311022, "grad_norm": 4.34375, "learning_rate": 0.0009029363731600447, "loss": 0.2516, "step": 55182 }, { "epoch": 0.09784579322842003, "grad_norm": 0.453125, "learning_rate": 0.000902878299226445, "loss": 0.1473, "step": 55184 }, { "epoch": 0.09784933939372985, "grad_norm": 0.4765625, "learning_rate": 0.0009028202261549414, "loss": 0.2003, "step": 55186 }, { "epoch": 0.09785288555903966, "grad_norm": 0.23828125, "learning_rate": 0.0009027621539457873, "loss": 0.1524, "step": 55188 }, { "epoch": 0.09785643172434948, "grad_norm": 0.1572265625, "learning_rate": 0.0009027040825992376, "loss": 0.1672, "step": 55190 }, { "epoch": 0.09785997788965929, "grad_norm": 0.78125, "learning_rate": 0.0009026460121155453, "loss": 0.2967, "step": 55192 }, { "epoch": 0.0978635240549691, "grad_norm": 0.4609375, "learning_rate": 0.0009025879424949648, "loss": 0.1342, "step": 55194 }, { "epoch": 0.09786707022027892, "grad_norm": 0.291015625, "learning_rate": 0.0009025298737377497, "loss": 0.3072, "step": 55196 }, { "epoch": 0.09787061638558874, "grad_norm": 0.2490234375, "learning_rate": 0.0009024718058441546, "loss": 0.1492, "step": 55198 }, { "epoch": 0.09787416255089855, "grad_norm": 0.32421875, "learning_rate": 0.0009024137388144329, "loss": 0.1655, "step": 55200 }, { "epoch": 0.09787770871620836, "grad_norm": 2.453125, "learning_rate": 0.0009023556726488384, "loss": 0.2503, "step": 55202 }, { "epoch": 0.09788125488151818, "grad_norm": 4.0, "learning_rate": 0.0009022976073476257, "loss": 0.2738, "step": 55204 }, { "epoch": 0.097884801046828, "grad_norm": 0.81640625, "learning_rate": 0.0009022395429110486, "loss": 0.2955, "step": 55206 }, { "epoch": 0.09788834721213781, "grad_norm": 1.390625, "learning_rate": 0.0009021814793393606, "loss": 0.2012, "step": 55208 }, { "epoch": 0.09789189337744764, "grad_norm": 0.328125, "learning_rate": 0.0009021234166328158, "loss": 0.1563, "step": 55210 }, { "epoch": 0.09789543954275745, "grad_norm": 0.3125, "learning_rate": 0.0009020653547916687, "loss": 0.1569, "step": 55212 }, { "epoch": 0.09789898570806727, "grad_norm": 0.2041015625, "learning_rate": 0.0009020072938161725, "loss": 0.1109, "step": 55214 }, { "epoch": 0.09790253187337708, "grad_norm": 0.369140625, "learning_rate": 0.0009019492337065813, "loss": 0.2323, "step": 55216 }, { "epoch": 0.0979060780386869, "grad_norm": 0.66015625, "learning_rate": 0.0009018911744631493, "loss": 0.1785, "step": 55218 }, { "epoch": 0.09790962420399671, "grad_norm": 2.171875, "learning_rate": 0.00090183311608613, "loss": 0.165, "step": 55220 }, { "epoch": 0.09791317036930652, "grad_norm": 0.294921875, "learning_rate": 0.0009017750585757773, "loss": 0.2696, "step": 55222 }, { "epoch": 0.09791671653461634, "grad_norm": 0.24609375, "learning_rate": 0.0009017170019323462, "loss": 0.1797, "step": 55224 }, { "epoch": 0.09792026269992615, "grad_norm": 0.482421875, "learning_rate": 0.0009016589461560889, "loss": 0.1622, "step": 55226 }, { "epoch": 0.09792380886523597, "grad_norm": 0.462890625, "learning_rate": 0.0009016008912472607, "loss": 0.1893, "step": 55228 }, { "epoch": 0.09792735503054578, "grad_norm": 0.7421875, "learning_rate": 0.0009015428372061147, "loss": 0.2142, "step": 55230 }, { "epoch": 0.0979309011958556, "grad_norm": 0.341796875, "learning_rate": 0.0009014847840329053, "loss": 0.1559, "step": 55232 }, { "epoch": 0.09793444736116541, "grad_norm": 0.462890625, "learning_rate": 0.000901426731727886, "loss": 0.2364, "step": 55234 }, { "epoch": 0.09793799352647523, "grad_norm": 0.40234375, "learning_rate": 0.0009013686802913106, "loss": 0.1621, "step": 55236 }, { "epoch": 0.09794153969178504, "grad_norm": 0.298828125, "learning_rate": 0.0009013106297234332, "loss": 0.1883, "step": 55238 }, { "epoch": 0.09794508585709485, "grad_norm": 0.23046875, "learning_rate": 0.0009012525800245084, "loss": 0.1715, "step": 55240 }, { "epoch": 0.09794863202240467, "grad_norm": 0.1748046875, "learning_rate": 0.0009011945311947887, "loss": 0.1637, "step": 55242 }, { "epoch": 0.09795217818771448, "grad_norm": 0.46875, "learning_rate": 0.0009011364832345288, "loss": 0.169, "step": 55244 }, { "epoch": 0.0979557243530243, "grad_norm": 0.609375, "learning_rate": 0.0009010784361439823, "loss": 0.2666, "step": 55246 }, { "epoch": 0.09795927051833411, "grad_norm": 1.0625, "learning_rate": 0.0009010203899234036, "loss": 0.1584, "step": 55248 }, { "epoch": 0.09796281668364393, "grad_norm": 1.546875, "learning_rate": 0.0009009623445730455, "loss": 0.3042, "step": 55250 }, { "epoch": 0.09796636284895374, "grad_norm": 0.921875, "learning_rate": 0.0009009043000931629, "loss": 0.3266, "step": 55252 }, { "epoch": 0.09796990901426357, "grad_norm": 0.34375, "learning_rate": 0.0009008462564840092, "loss": 0.1653, "step": 55254 }, { "epoch": 0.09797345517957338, "grad_norm": 0.640625, "learning_rate": 0.0009007882137458385, "loss": 0.1878, "step": 55256 }, { "epoch": 0.0979770013448832, "grad_norm": 0.375, "learning_rate": 0.0009007301718789041, "loss": 0.1542, "step": 55258 }, { "epoch": 0.09798054751019301, "grad_norm": 0.515625, "learning_rate": 0.0009006721308834603, "loss": 0.1816, "step": 55260 }, { "epoch": 0.09798409367550283, "grad_norm": 0.40234375, "learning_rate": 0.0009006140907597604, "loss": 0.1899, "step": 55262 }, { "epoch": 0.09798763984081264, "grad_norm": 1.328125, "learning_rate": 0.0009005560515080595, "loss": 0.2776, "step": 55264 }, { "epoch": 0.09799118600612246, "grad_norm": 0.33984375, "learning_rate": 0.00090049801312861, "loss": 0.1781, "step": 55266 }, { "epoch": 0.09799473217143227, "grad_norm": 0.396484375, "learning_rate": 0.0009004399756216666, "loss": 0.1844, "step": 55268 }, { "epoch": 0.09799827833674209, "grad_norm": 0.53515625, "learning_rate": 0.0009003819389874823, "loss": 0.249, "step": 55270 }, { "epoch": 0.0980018245020519, "grad_norm": 0.318359375, "learning_rate": 0.0009003239032263122, "loss": 0.1613, "step": 55272 }, { "epoch": 0.09800537066736172, "grad_norm": 0.2431640625, "learning_rate": 0.0009002658683384088, "loss": 0.1637, "step": 55274 }, { "epoch": 0.09800891683267153, "grad_norm": 0.3828125, "learning_rate": 0.0009002078343240265, "loss": 0.1427, "step": 55276 }, { "epoch": 0.09801246299798134, "grad_norm": 0.89453125, "learning_rate": 0.000900149801183419, "loss": 0.2232, "step": 55278 }, { "epoch": 0.09801600916329116, "grad_norm": 0.365234375, "learning_rate": 0.0009000917689168409, "loss": 0.1532, "step": 55280 }, { "epoch": 0.09801955532860097, "grad_norm": 0.59765625, "learning_rate": 0.0009000337375245446, "loss": 0.1863, "step": 55282 }, { "epoch": 0.09802310149391079, "grad_norm": 0.2080078125, "learning_rate": 0.0008999757070067846, "loss": 0.2985, "step": 55284 }, { "epoch": 0.0980266476592206, "grad_norm": 0.9140625, "learning_rate": 0.000899917677363815, "loss": 0.1873, "step": 55286 }, { "epoch": 0.09803019382453042, "grad_norm": 0.51953125, "learning_rate": 0.0008998596485958892, "loss": 0.1914, "step": 55288 }, { "epoch": 0.09803373998984023, "grad_norm": 0.2451171875, "learning_rate": 0.0008998016207032609, "loss": 0.1384, "step": 55290 }, { "epoch": 0.09803728615515005, "grad_norm": 0.3828125, "learning_rate": 0.0008997435936861839, "loss": 0.2631, "step": 55292 }, { "epoch": 0.09804083232045986, "grad_norm": 0.474609375, "learning_rate": 0.0008996855675449122, "loss": 0.2287, "step": 55294 }, { "epoch": 0.09804437848576968, "grad_norm": 0.58984375, "learning_rate": 0.0008996275422796998, "loss": 0.1675, "step": 55296 }, { "epoch": 0.09804792465107949, "grad_norm": 0.26171875, "learning_rate": 0.0008995695178907999, "loss": 0.1685, "step": 55298 }, { "epoch": 0.09805147081638932, "grad_norm": 0.44921875, "learning_rate": 0.0008995114943784661, "loss": 0.2073, "step": 55300 }, { "epoch": 0.09805501698169913, "grad_norm": 2.625, "learning_rate": 0.0008994534717429528, "loss": 0.277, "step": 55302 }, { "epoch": 0.09805856314700895, "grad_norm": 0.478515625, "learning_rate": 0.0008993954499845138, "loss": 0.1931, "step": 55304 }, { "epoch": 0.09806210931231876, "grad_norm": 0.69921875, "learning_rate": 0.0008993374291034024, "loss": 0.1811, "step": 55306 }, { "epoch": 0.09806565547762858, "grad_norm": 2.296875, "learning_rate": 0.0008992794090998728, "loss": 0.1866, "step": 55308 }, { "epoch": 0.09806920164293839, "grad_norm": 0.68359375, "learning_rate": 0.0008992213899741781, "loss": 0.2452, "step": 55310 }, { "epoch": 0.0980727478082482, "grad_norm": 0.27734375, "learning_rate": 0.0008991633717265724, "loss": 0.1533, "step": 55312 }, { "epoch": 0.09807629397355802, "grad_norm": 0.435546875, "learning_rate": 0.0008991053543573097, "loss": 0.1458, "step": 55314 }, { "epoch": 0.09807984013886784, "grad_norm": 0.15625, "learning_rate": 0.0008990473378666434, "loss": 0.1511, "step": 55316 }, { "epoch": 0.09808338630417765, "grad_norm": 0.279296875, "learning_rate": 0.0008989893222548273, "loss": 0.1744, "step": 55318 }, { "epoch": 0.09808693246948746, "grad_norm": 0.3671875, "learning_rate": 0.000898931307522115, "loss": 0.1882, "step": 55320 }, { "epoch": 0.09809047863479728, "grad_norm": 0.412109375, "learning_rate": 0.0008988732936687607, "loss": 0.243, "step": 55322 }, { "epoch": 0.0980940248001071, "grad_norm": 0.55859375, "learning_rate": 0.0008988152806950177, "loss": 0.2244, "step": 55324 }, { "epoch": 0.09809757096541691, "grad_norm": 0.404296875, "learning_rate": 0.0008987572686011396, "loss": 0.1643, "step": 55326 }, { "epoch": 0.09810111713072672, "grad_norm": 0.84375, "learning_rate": 0.0008986992573873803, "loss": 0.1826, "step": 55328 }, { "epoch": 0.09810466329603654, "grad_norm": 0.40234375, "learning_rate": 0.000898641247053994, "loss": 0.1753, "step": 55330 }, { "epoch": 0.09810820946134635, "grad_norm": 0.671875, "learning_rate": 0.0008985832376012333, "loss": 0.2309, "step": 55332 }, { "epoch": 0.09811175562665617, "grad_norm": 2.265625, "learning_rate": 0.0008985252290293526, "loss": 0.1774, "step": 55334 }, { "epoch": 0.09811530179196598, "grad_norm": 0.91796875, "learning_rate": 0.0008984672213386056, "loss": 0.1611, "step": 55336 }, { "epoch": 0.0981188479572758, "grad_norm": 0.546875, "learning_rate": 0.0008984092145292464, "loss": 0.3402, "step": 55338 }, { "epoch": 0.09812239412258561, "grad_norm": 4.5625, "learning_rate": 0.0008983512086015276, "loss": 0.1419, "step": 55340 }, { "epoch": 0.09812594028789542, "grad_norm": 0.345703125, "learning_rate": 0.0008982932035557036, "loss": 0.208, "step": 55342 }, { "epoch": 0.09812948645320524, "grad_norm": 0.32421875, "learning_rate": 0.0008982351993920281, "loss": 0.2104, "step": 55344 }, { "epoch": 0.09813303261851507, "grad_norm": 0.50390625, "learning_rate": 0.0008981771961107546, "loss": 0.1911, "step": 55346 }, { "epoch": 0.09813657878382488, "grad_norm": 0.3125, "learning_rate": 0.0008981191937121369, "loss": 0.2294, "step": 55348 }, { "epoch": 0.0981401249491347, "grad_norm": 0.33203125, "learning_rate": 0.0008980611921964284, "loss": 0.2669, "step": 55350 }, { "epoch": 0.09814367111444451, "grad_norm": 1.4296875, "learning_rate": 0.0008980031915638829, "loss": 0.1671, "step": 55352 }, { "epoch": 0.09814721727975433, "grad_norm": 0.6171875, "learning_rate": 0.0008979451918147545, "loss": 0.1874, "step": 55354 }, { "epoch": 0.09815076344506414, "grad_norm": 0.287109375, "learning_rate": 0.000897887192949296, "loss": 0.1548, "step": 55356 }, { "epoch": 0.09815430961037395, "grad_norm": 0.322265625, "learning_rate": 0.0008978291949677617, "loss": 0.2502, "step": 55358 }, { "epoch": 0.09815785577568377, "grad_norm": 0.74609375, "learning_rate": 0.0008977711978704051, "loss": 0.1826, "step": 55360 }, { "epoch": 0.09816140194099358, "grad_norm": 0.6796875, "learning_rate": 0.0008977132016574798, "loss": 0.1847, "step": 55362 }, { "epoch": 0.0981649481063034, "grad_norm": 5.90625, "learning_rate": 0.0008976552063292396, "loss": 0.2391, "step": 55364 }, { "epoch": 0.09816849427161321, "grad_norm": 0.72265625, "learning_rate": 0.0008975972118859377, "loss": 0.1586, "step": 55366 }, { "epoch": 0.09817204043692303, "grad_norm": 0.5703125, "learning_rate": 0.0008975392183278282, "loss": 0.1663, "step": 55368 }, { "epoch": 0.09817558660223284, "grad_norm": 0.65625, "learning_rate": 0.0008974812256551646, "loss": 0.1942, "step": 55370 }, { "epoch": 0.09817913276754266, "grad_norm": 0.4765625, "learning_rate": 0.0008974232338682003, "loss": 0.1872, "step": 55372 }, { "epoch": 0.09818267893285247, "grad_norm": 0.5625, "learning_rate": 0.000897365242967189, "loss": 0.1456, "step": 55374 }, { "epoch": 0.09818622509816229, "grad_norm": 0.65234375, "learning_rate": 0.0008973072529523845, "loss": 0.2198, "step": 55376 }, { "epoch": 0.0981897712634721, "grad_norm": 0.3359375, "learning_rate": 0.0008972492638240408, "loss": 0.1793, "step": 55378 }, { "epoch": 0.09819331742878191, "grad_norm": 3.15625, "learning_rate": 0.0008971912755824105, "loss": 0.3532, "step": 55380 }, { "epoch": 0.09819686359409173, "grad_norm": 0.404296875, "learning_rate": 0.0008971332882277481, "loss": 0.1908, "step": 55382 }, { "epoch": 0.09820040975940154, "grad_norm": 0.2890625, "learning_rate": 0.0008970753017603066, "loss": 0.1822, "step": 55384 }, { "epoch": 0.09820395592471136, "grad_norm": 0.2890625, "learning_rate": 0.0008970173161803399, "loss": 0.1643, "step": 55386 }, { "epoch": 0.09820750209002117, "grad_norm": 0.546875, "learning_rate": 0.000896959331488102, "loss": 0.1615, "step": 55388 }, { "epoch": 0.09821104825533099, "grad_norm": 0.181640625, "learning_rate": 0.0008969013476838454, "loss": 0.1764, "step": 55390 }, { "epoch": 0.09821459442064082, "grad_norm": 1.9921875, "learning_rate": 0.0008968433647678245, "loss": 0.3105, "step": 55392 }, { "epoch": 0.09821814058595063, "grad_norm": 0.38671875, "learning_rate": 0.0008967853827402928, "loss": 0.1639, "step": 55394 }, { "epoch": 0.09822168675126045, "grad_norm": 0.2333984375, "learning_rate": 0.000896727401601504, "loss": 0.1905, "step": 55396 }, { "epoch": 0.09822523291657026, "grad_norm": 0.515625, "learning_rate": 0.000896669421351711, "loss": 0.1686, "step": 55398 }, { "epoch": 0.09822877908188007, "grad_norm": 0.65625, "learning_rate": 0.0008966114419911684, "loss": 0.1891, "step": 55400 }, { "epoch": 0.09823232524718989, "grad_norm": 0.5234375, "learning_rate": 0.0008965534635201289, "loss": 0.2163, "step": 55402 }, { "epoch": 0.0982358714124997, "grad_norm": 0.9765625, "learning_rate": 0.0008964954859388465, "loss": 0.2444, "step": 55404 }, { "epoch": 0.09823941757780952, "grad_norm": 1.0234375, "learning_rate": 0.0008964375092475745, "loss": 0.549, "step": 55406 }, { "epoch": 0.09824296374311933, "grad_norm": 0.59375, "learning_rate": 0.0008963795334465665, "loss": 0.1798, "step": 55408 }, { "epoch": 0.09824650990842915, "grad_norm": 0.2314453125, "learning_rate": 0.0008963215585360762, "loss": 0.1461, "step": 55410 }, { "epoch": 0.09825005607373896, "grad_norm": 0.44140625, "learning_rate": 0.0008962635845163574, "loss": 0.2409, "step": 55412 }, { "epoch": 0.09825360223904878, "grad_norm": 0.375, "learning_rate": 0.0008962056113876631, "loss": 0.1878, "step": 55414 }, { "epoch": 0.09825714840435859, "grad_norm": 0.69921875, "learning_rate": 0.000896147639150247, "loss": 0.1687, "step": 55416 }, { "epoch": 0.0982606945696684, "grad_norm": 0.31640625, "learning_rate": 0.0008960896678043629, "loss": 0.2105, "step": 55418 }, { "epoch": 0.09826424073497822, "grad_norm": 0.85546875, "learning_rate": 0.0008960316973502641, "loss": 0.2468, "step": 55420 }, { "epoch": 0.09826778690028803, "grad_norm": 0.453125, "learning_rate": 0.000895973727788204, "loss": 0.2081, "step": 55422 }, { "epoch": 0.09827133306559785, "grad_norm": 0.87109375, "learning_rate": 0.0008959157591184366, "loss": 0.1928, "step": 55424 }, { "epoch": 0.09827487923090766, "grad_norm": 1.4609375, "learning_rate": 0.0008958577913412147, "loss": 0.3574, "step": 55426 }, { "epoch": 0.09827842539621748, "grad_norm": 0.6015625, "learning_rate": 0.0008957998244567926, "loss": 0.1614, "step": 55428 }, { "epoch": 0.09828197156152729, "grad_norm": 0.27734375, "learning_rate": 0.0008957418584654233, "loss": 0.1682, "step": 55430 }, { "epoch": 0.09828551772683711, "grad_norm": 2.65625, "learning_rate": 0.0008956838933673605, "loss": 0.2693, "step": 55432 }, { "epoch": 0.09828906389214692, "grad_norm": 0.7734375, "learning_rate": 0.0008956259291628577, "loss": 0.1596, "step": 55434 }, { "epoch": 0.09829261005745675, "grad_norm": 0.337890625, "learning_rate": 0.0008955679658521685, "loss": 0.1278, "step": 55436 }, { "epoch": 0.09829615622276656, "grad_norm": 0.4921875, "learning_rate": 0.0008955100034355459, "loss": 0.1858, "step": 55438 }, { "epoch": 0.09829970238807638, "grad_norm": 0.515625, "learning_rate": 0.0008954520419132442, "loss": 0.1793, "step": 55440 }, { "epoch": 0.0983032485533862, "grad_norm": 0.31640625, "learning_rate": 0.0008953940812855162, "loss": 0.1899, "step": 55442 }, { "epoch": 0.09830679471869601, "grad_norm": 0.462890625, "learning_rate": 0.0008953361215526159, "loss": 0.1833, "step": 55444 }, { "epoch": 0.09831034088400582, "grad_norm": 1.0390625, "learning_rate": 0.0008952781627147961, "loss": 0.3695, "step": 55446 }, { "epoch": 0.09831388704931564, "grad_norm": 0.6328125, "learning_rate": 0.0008952202047723108, "loss": 0.1572, "step": 55448 }, { "epoch": 0.09831743321462545, "grad_norm": 0.7421875, "learning_rate": 0.0008951622477254133, "loss": 0.1999, "step": 55450 }, { "epoch": 0.09832097937993527, "grad_norm": 0.97265625, "learning_rate": 0.0008951042915743577, "loss": 0.2374, "step": 55452 }, { "epoch": 0.09832452554524508, "grad_norm": 0.248046875, "learning_rate": 0.0008950463363193963, "loss": 0.1676, "step": 55454 }, { "epoch": 0.0983280717105549, "grad_norm": 0.3828125, "learning_rate": 0.0008949883819607836, "loss": 0.1473, "step": 55456 }, { "epoch": 0.09833161787586471, "grad_norm": 1.7265625, "learning_rate": 0.0008949304284987722, "loss": 0.4879, "step": 55458 }, { "epoch": 0.09833516404117452, "grad_norm": 1.3125, "learning_rate": 0.0008948724759336166, "loss": 0.3157, "step": 55460 }, { "epoch": 0.09833871020648434, "grad_norm": 0.66015625, "learning_rate": 0.0008948145242655692, "loss": 0.1684, "step": 55462 }, { "epoch": 0.09834225637179415, "grad_norm": 0.349609375, "learning_rate": 0.0008947565734948837, "loss": 0.175, "step": 55464 }, { "epoch": 0.09834580253710397, "grad_norm": 0.375, "learning_rate": 0.000894698623621814, "loss": 0.1527, "step": 55466 }, { "epoch": 0.09834934870241378, "grad_norm": 0.54296875, "learning_rate": 0.0008946406746466129, "loss": 0.1849, "step": 55468 }, { "epoch": 0.0983528948677236, "grad_norm": 1.8515625, "learning_rate": 0.000894582726569535, "loss": 0.2726, "step": 55470 }, { "epoch": 0.09835644103303341, "grad_norm": 0.490234375, "learning_rate": 0.0008945247793908324, "loss": 0.2734, "step": 55472 }, { "epoch": 0.09835998719834323, "grad_norm": 0.51953125, "learning_rate": 0.0008944668331107591, "loss": 0.2058, "step": 55474 }, { "epoch": 0.09836353336365304, "grad_norm": 0.25390625, "learning_rate": 0.0008944088877295684, "loss": 0.1967, "step": 55476 }, { "epoch": 0.09836707952896286, "grad_norm": 1.125, "learning_rate": 0.0008943509432475143, "loss": 0.1851, "step": 55478 }, { "epoch": 0.09837062569427267, "grad_norm": 1.0390625, "learning_rate": 0.0008942929996648493, "loss": 0.2749, "step": 55480 }, { "epoch": 0.0983741718595825, "grad_norm": 0.287109375, "learning_rate": 0.000894235056981827, "loss": 0.3135, "step": 55482 }, { "epoch": 0.09837771802489231, "grad_norm": 2.453125, "learning_rate": 0.0008941771151987013, "loss": 0.1893, "step": 55484 }, { "epoch": 0.09838126419020213, "grad_norm": 0.6328125, "learning_rate": 0.0008941191743157256, "loss": 0.2324, "step": 55486 }, { "epoch": 0.09838481035551194, "grad_norm": 1.0625, "learning_rate": 0.0008940612343331526, "loss": 0.2639, "step": 55488 }, { "epoch": 0.09838835652082176, "grad_norm": 0.453125, "learning_rate": 0.0008940032952512362, "loss": 0.1694, "step": 55490 }, { "epoch": 0.09839190268613157, "grad_norm": 0.205078125, "learning_rate": 0.0008939453570702299, "loss": 0.1808, "step": 55492 }, { "epoch": 0.09839544885144139, "grad_norm": 1.8984375, "learning_rate": 0.000893887419790387, "loss": 0.4099, "step": 55494 }, { "epoch": 0.0983989950167512, "grad_norm": 0.58984375, "learning_rate": 0.0008938294834119607, "loss": 0.2053, "step": 55496 }, { "epoch": 0.09840254118206102, "grad_norm": 0.33203125, "learning_rate": 0.0008937715479352044, "loss": 0.1194, "step": 55498 }, { "epoch": 0.09840608734737083, "grad_norm": 0.373046875, "learning_rate": 0.0008937136133603716, "loss": 0.24, "step": 55500 }, { "epoch": 0.09840963351268064, "grad_norm": 0.328125, "learning_rate": 0.0008936556796877158, "loss": 0.1668, "step": 55502 }, { "epoch": 0.09841317967799046, "grad_norm": 1.3203125, "learning_rate": 0.0008935977469174901, "loss": 0.1652, "step": 55504 }, { "epoch": 0.09841672584330027, "grad_norm": 0.427734375, "learning_rate": 0.0008935398150499476, "loss": 0.1363, "step": 55506 }, { "epoch": 0.09842027200861009, "grad_norm": 0.296875, "learning_rate": 0.0008934818840853423, "loss": 0.159, "step": 55508 }, { "epoch": 0.0984238181739199, "grad_norm": 0.51953125, "learning_rate": 0.0008934239540239275, "loss": 0.1611, "step": 55510 }, { "epoch": 0.09842736433922972, "grad_norm": 0.6015625, "learning_rate": 0.0008933660248659561, "loss": 0.1932, "step": 55512 }, { "epoch": 0.09843091050453953, "grad_norm": 0.40234375, "learning_rate": 0.0008933080966116818, "loss": 0.1782, "step": 55514 }, { "epoch": 0.09843445666984935, "grad_norm": 0.275390625, "learning_rate": 0.0008932501692613577, "loss": 0.2571, "step": 55516 }, { "epoch": 0.09843800283515916, "grad_norm": 0.490234375, "learning_rate": 0.0008931922428152374, "loss": 0.1649, "step": 55518 }, { "epoch": 0.09844154900046898, "grad_norm": 0.89453125, "learning_rate": 0.000893134317273574, "loss": 0.2045, "step": 55520 }, { "epoch": 0.09844509516577879, "grad_norm": 0.28515625, "learning_rate": 0.0008930763926366207, "loss": 0.1444, "step": 55522 }, { "epoch": 0.0984486413310886, "grad_norm": 0.54296875, "learning_rate": 0.0008930184689046315, "loss": 0.1937, "step": 55524 }, { "epoch": 0.09845218749639842, "grad_norm": 1.3359375, "learning_rate": 0.0008929605460778592, "loss": 0.2769, "step": 55526 }, { "epoch": 0.09845573366170825, "grad_norm": 0.50390625, "learning_rate": 0.000892902624156557, "loss": 0.208, "step": 55528 }, { "epoch": 0.09845927982701806, "grad_norm": 0.240234375, "learning_rate": 0.0008928447031409788, "loss": 0.1203, "step": 55530 }, { "epoch": 0.09846282599232788, "grad_norm": 0.20703125, "learning_rate": 0.000892786783031377, "loss": 0.1981, "step": 55532 }, { "epoch": 0.09846637215763769, "grad_norm": 1.0078125, "learning_rate": 0.000892728863828006, "loss": 0.2152, "step": 55534 }, { "epoch": 0.0984699183229475, "grad_norm": 0.28125, "learning_rate": 0.0008926709455311182, "loss": 0.3346, "step": 55536 }, { "epoch": 0.09847346448825732, "grad_norm": 0.5, "learning_rate": 0.0008926130281409672, "loss": 0.1718, "step": 55538 }, { "epoch": 0.09847701065356713, "grad_norm": 0.466796875, "learning_rate": 0.0008925551116578063, "loss": 0.1793, "step": 55540 }, { "epoch": 0.09848055681887695, "grad_norm": 0.263671875, "learning_rate": 0.0008924971960818892, "loss": 0.1595, "step": 55542 }, { "epoch": 0.09848410298418676, "grad_norm": 0.90234375, "learning_rate": 0.0008924392814134688, "loss": 0.2215, "step": 55544 }, { "epoch": 0.09848764914949658, "grad_norm": 2.15625, "learning_rate": 0.0008923813676527981, "loss": 0.1857, "step": 55546 }, { "epoch": 0.09849119531480639, "grad_norm": 0.392578125, "learning_rate": 0.0008923234548001309, "loss": 0.2391, "step": 55548 }, { "epoch": 0.09849474148011621, "grad_norm": 0.7109375, "learning_rate": 0.0008922655428557201, "loss": 0.1462, "step": 55550 }, { "epoch": 0.09849828764542602, "grad_norm": 0.43359375, "learning_rate": 0.0008922076318198196, "loss": 0.353, "step": 55552 }, { "epoch": 0.09850183381073584, "grad_norm": 0.46875, "learning_rate": 0.0008921497216926817, "loss": 0.1872, "step": 55554 }, { "epoch": 0.09850537997604565, "grad_norm": 0.28125, "learning_rate": 0.0008920918124745603, "loss": 0.1771, "step": 55556 }, { "epoch": 0.09850892614135547, "grad_norm": 0.2041015625, "learning_rate": 0.0008920339041657086, "loss": 0.1727, "step": 55558 }, { "epoch": 0.09851247230666528, "grad_norm": 1.1875, "learning_rate": 0.0008919759967663801, "loss": 0.2382, "step": 55560 }, { "epoch": 0.0985160184719751, "grad_norm": 0.373046875, "learning_rate": 0.0008919180902768274, "loss": 0.1492, "step": 55562 }, { "epoch": 0.09851956463728491, "grad_norm": 0.3125, "learning_rate": 0.000891860184697304, "loss": 0.1647, "step": 55564 }, { "epoch": 0.09852311080259472, "grad_norm": 1.1015625, "learning_rate": 0.0008918022800280636, "loss": 0.2848, "step": 55566 }, { "epoch": 0.09852665696790454, "grad_norm": 0.2177734375, "learning_rate": 0.0008917443762693589, "loss": 0.1558, "step": 55568 }, { "epoch": 0.09853020313321435, "grad_norm": 0.19921875, "learning_rate": 0.0008916864734214436, "loss": 0.183, "step": 55570 }, { "epoch": 0.09853374929852418, "grad_norm": 0.240234375, "learning_rate": 0.0008916285714845705, "loss": 0.2311, "step": 55572 }, { "epoch": 0.098537295463834, "grad_norm": 1.5625, "learning_rate": 0.0008915706704589925, "loss": 0.2849, "step": 55574 }, { "epoch": 0.09854084162914381, "grad_norm": 0.58203125, "learning_rate": 0.0008915127703449642, "loss": 0.2983, "step": 55576 }, { "epoch": 0.09854438779445363, "grad_norm": 0.40625, "learning_rate": 0.0008914548711427372, "loss": 0.2081, "step": 55578 }, { "epoch": 0.09854793395976344, "grad_norm": 0.85546875, "learning_rate": 0.0008913969728525657, "loss": 0.1419, "step": 55580 }, { "epoch": 0.09855148012507325, "grad_norm": 0.42578125, "learning_rate": 0.0008913390754747027, "loss": 0.2074, "step": 55582 }, { "epoch": 0.09855502629038307, "grad_norm": 0.396484375, "learning_rate": 0.0008912811790094015, "loss": 0.2254, "step": 55584 }, { "epoch": 0.09855857245569288, "grad_norm": 0.5078125, "learning_rate": 0.000891223283456915, "loss": 0.2213, "step": 55586 }, { "epoch": 0.0985621186210027, "grad_norm": 0.404296875, "learning_rate": 0.0008911653888174966, "loss": 0.1252, "step": 55588 }, { "epoch": 0.09856566478631251, "grad_norm": 2.421875, "learning_rate": 0.0008911074950913995, "loss": 0.3151, "step": 55590 }, { "epoch": 0.09856921095162233, "grad_norm": 0.5625, "learning_rate": 0.0008910496022788773, "loss": 0.171, "step": 55592 }, { "epoch": 0.09857275711693214, "grad_norm": 4.03125, "learning_rate": 0.0008909917103801824, "loss": 0.2999, "step": 55594 }, { "epoch": 0.09857630328224196, "grad_norm": 0.4921875, "learning_rate": 0.0008909338193955681, "loss": 0.1789, "step": 55596 }, { "epoch": 0.09857984944755177, "grad_norm": 1.125, "learning_rate": 0.0008908759293252879, "loss": 0.1893, "step": 55598 }, { "epoch": 0.09858339561286159, "grad_norm": 0.6640625, "learning_rate": 0.0008908180401695953, "loss": 0.1443, "step": 55600 }, { "epoch": 0.0985869417781714, "grad_norm": 0.703125, "learning_rate": 0.0008907601519287427, "loss": 0.1995, "step": 55602 }, { "epoch": 0.09859048794348121, "grad_norm": 0.306640625, "learning_rate": 0.0008907022646029839, "loss": 0.1975, "step": 55604 }, { "epoch": 0.09859403410879103, "grad_norm": 0.1982421875, "learning_rate": 0.0008906443781925717, "loss": 0.1592, "step": 55606 }, { "epoch": 0.09859758027410084, "grad_norm": 0.22265625, "learning_rate": 0.0008905864926977596, "loss": 0.1628, "step": 55608 }, { "epoch": 0.09860112643941066, "grad_norm": 1.5625, "learning_rate": 0.0008905286081188005, "loss": 0.2056, "step": 55610 }, { "epoch": 0.09860467260472047, "grad_norm": 0.73046875, "learning_rate": 0.0008904707244559471, "loss": 0.2081, "step": 55612 }, { "epoch": 0.09860821877003029, "grad_norm": 0.203125, "learning_rate": 0.0008904128417094535, "loss": 0.1844, "step": 55614 }, { "epoch": 0.0986117649353401, "grad_norm": 0.423828125, "learning_rate": 0.0008903549598795726, "loss": 0.1584, "step": 55616 }, { "epoch": 0.09861531110064993, "grad_norm": 0.49609375, "learning_rate": 0.0008902970789665568, "loss": 0.2014, "step": 55618 }, { "epoch": 0.09861885726595974, "grad_norm": 0.328125, "learning_rate": 0.00089023919897066, "loss": 0.1916, "step": 55620 }, { "epoch": 0.09862240343126956, "grad_norm": 2.1875, "learning_rate": 0.0008901813198921349, "loss": 0.2027, "step": 55622 }, { "epoch": 0.09862594959657937, "grad_norm": 0.45703125, "learning_rate": 0.0008901234417312352, "loss": 0.1665, "step": 55624 }, { "epoch": 0.09862949576188919, "grad_norm": 0.2275390625, "learning_rate": 0.0008900655644882137, "loss": 0.1807, "step": 55626 }, { "epoch": 0.098633041927199, "grad_norm": 1.0625, "learning_rate": 0.000890007688163323, "loss": 0.3785, "step": 55628 }, { "epoch": 0.09863658809250882, "grad_norm": 1.2109375, "learning_rate": 0.0008899498127568168, "loss": 0.2001, "step": 55630 }, { "epoch": 0.09864013425781863, "grad_norm": 0.828125, "learning_rate": 0.0008898919382689485, "loss": 0.3361, "step": 55632 }, { "epoch": 0.09864368042312845, "grad_norm": 0.2578125, "learning_rate": 0.0008898340646999705, "loss": 0.176, "step": 55634 }, { "epoch": 0.09864722658843826, "grad_norm": 0.396484375, "learning_rate": 0.0008897761920501362, "loss": 0.1695, "step": 55636 }, { "epoch": 0.09865077275374808, "grad_norm": 0.3984375, "learning_rate": 0.0008897183203196986, "loss": 0.2162, "step": 55638 }, { "epoch": 0.09865431891905789, "grad_norm": 0.283203125, "learning_rate": 0.0008896604495089113, "loss": 0.198, "step": 55640 }, { "epoch": 0.0986578650843677, "grad_norm": 1.0078125, "learning_rate": 0.0008896025796180269, "loss": 0.2113, "step": 55642 }, { "epoch": 0.09866141124967752, "grad_norm": 0.4453125, "learning_rate": 0.0008895447106472987, "loss": 0.2474, "step": 55644 }, { "epoch": 0.09866495741498733, "grad_norm": 0.5546875, "learning_rate": 0.0008894868425969793, "loss": 0.1919, "step": 55646 }, { "epoch": 0.09866850358029715, "grad_norm": 0.60546875, "learning_rate": 0.0008894289754673222, "loss": 0.1651, "step": 55648 }, { "epoch": 0.09867204974560696, "grad_norm": 0.62109375, "learning_rate": 0.000889371109258581, "loss": 0.1807, "step": 55650 }, { "epoch": 0.09867559591091678, "grad_norm": 0.7890625, "learning_rate": 0.0008893132439710079, "loss": 0.1457, "step": 55652 }, { "epoch": 0.09867914207622659, "grad_norm": 0.7109375, "learning_rate": 0.0008892553796048562, "loss": 0.1996, "step": 55654 }, { "epoch": 0.0986826882415364, "grad_norm": 0.341796875, "learning_rate": 0.0008891975161603786, "loss": 0.1569, "step": 55656 }, { "epoch": 0.09868623440684622, "grad_norm": 1.53125, "learning_rate": 0.0008891396536378296, "loss": 0.2765, "step": 55658 }, { "epoch": 0.09868978057215604, "grad_norm": 0.625, "learning_rate": 0.0008890817920374608, "loss": 0.175, "step": 55660 }, { "epoch": 0.09869332673746585, "grad_norm": 0.6640625, "learning_rate": 0.0008890239313595256, "loss": 0.2299, "step": 55662 }, { "epoch": 0.09869687290277568, "grad_norm": 0.54296875, "learning_rate": 0.0008889660716042773, "loss": 0.1919, "step": 55664 }, { "epoch": 0.0987004190680855, "grad_norm": 0.41796875, "learning_rate": 0.000888908212771969, "loss": 0.2019, "step": 55666 }, { "epoch": 0.09870396523339531, "grad_norm": 0.5390625, "learning_rate": 0.0008888503548628532, "loss": 0.212, "step": 55668 }, { "epoch": 0.09870751139870512, "grad_norm": 0.458984375, "learning_rate": 0.0008887924978771835, "loss": 0.1845, "step": 55670 }, { "epoch": 0.09871105756401494, "grad_norm": 0.330078125, "learning_rate": 0.0008887346418152127, "loss": 0.2416, "step": 55672 }, { "epoch": 0.09871460372932475, "grad_norm": 0.333984375, "learning_rate": 0.0008886767866771941, "loss": 0.1793, "step": 55674 }, { "epoch": 0.09871814989463457, "grad_norm": 0.4609375, "learning_rate": 0.00088861893246338, "loss": 0.2015, "step": 55676 }, { "epoch": 0.09872169605994438, "grad_norm": 0.435546875, "learning_rate": 0.0008885610791740242, "loss": 0.1684, "step": 55678 }, { "epoch": 0.0987252422252542, "grad_norm": 0.24609375, "learning_rate": 0.0008885032268093796, "loss": 0.2307, "step": 55680 }, { "epoch": 0.09872878839056401, "grad_norm": 0.5234375, "learning_rate": 0.0008884453753696988, "loss": 0.1801, "step": 55682 }, { "epoch": 0.09873233455587382, "grad_norm": 1.640625, "learning_rate": 0.0008883875248552352, "loss": 0.2219, "step": 55684 }, { "epoch": 0.09873588072118364, "grad_norm": 0.26953125, "learning_rate": 0.0008883296752662413, "loss": 0.176, "step": 55686 }, { "epoch": 0.09873942688649345, "grad_norm": 0.328125, "learning_rate": 0.0008882718266029704, "loss": 0.2393, "step": 55688 }, { "epoch": 0.09874297305180327, "grad_norm": 16.5, "learning_rate": 0.0008882139788656762, "loss": 0.2603, "step": 55690 }, { "epoch": 0.09874651921711308, "grad_norm": 1.4921875, "learning_rate": 0.0008881561320546105, "loss": 0.1687, "step": 55692 }, { "epoch": 0.0987500653824229, "grad_norm": 0.2001953125, "learning_rate": 0.0008880982861700268, "loss": 0.1857, "step": 55694 }, { "epoch": 0.09875361154773271, "grad_norm": 0.890625, "learning_rate": 0.0008880404412121781, "loss": 0.1918, "step": 55696 }, { "epoch": 0.09875715771304253, "grad_norm": 0.486328125, "learning_rate": 0.0008879825971813178, "loss": 0.1955, "step": 55698 }, { "epoch": 0.09876070387835234, "grad_norm": 3.34375, "learning_rate": 0.0008879247540776982, "loss": 0.2801, "step": 55700 }, { "epoch": 0.09876425004366216, "grad_norm": 0.35546875, "learning_rate": 0.0008878669119015728, "loss": 0.2002, "step": 55702 }, { "epoch": 0.09876779620897197, "grad_norm": 3.015625, "learning_rate": 0.0008878090706531938, "loss": 0.4214, "step": 55704 }, { "epoch": 0.09877134237428178, "grad_norm": 0.84375, "learning_rate": 0.0008877512303328152, "loss": 0.2176, "step": 55706 }, { "epoch": 0.09877488853959161, "grad_norm": 0.55859375, "learning_rate": 0.0008876933909406891, "loss": 0.1511, "step": 55708 }, { "epoch": 0.09877843470490143, "grad_norm": 0.81640625, "learning_rate": 0.0008876355524770688, "loss": 0.2259, "step": 55710 }, { "epoch": 0.09878198087021124, "grad_norm": 0.15234375, "learning_rate": 0.0008875777149422071, "loss": 0.1491, "step": 55712 }, { "epoch": 0.09878552703552106, "grad_norm": 0.2119140625, "learning_rate": 0.0008875198783363573, "loss": 0.1848, "step": 55714 }, { "epoch": 0.09878907320083087, "grad_norm": 1.421875, "learning_rate": 0.0008874620426597722, "loss": 0.2072, "step": 55716 }, { "epoch": 0.09879261936614069, "grad_norm": 1.5625, "learning_rate": 0.0008874042079127047, "loss": 0.2698, "step": 55718 }, { "epoch": 0.0987961655314505, "grad_norm": 0.7578125, "learning_rate": 0.0008873463740954075, "loss": 0.2012, "step": 55720 }, { "epoch": 0.09879971169676031, "grad_norm": 0.49609375, "learning_rate": 0.0008872885412081335, "loss": 0.163, "step": 55722 }, { "epoch": 0.09880325786207013, "grad_norm": 0.4296875, "learning_rate": 0.0008872307092511365, "loss": 0.174, "step": 55724 }, { "epoch": 0.09880680402737994, "grad_norm": 0.333984375, "learning_rate": 0.0008871728782246684, "loss": 0.1968, "step": 55726 }, { "epoch": 0.09881035019268976, "grad_norm": 0.5859375, "learning_rate": 0.0008871150481289825, "loss": 0.2374, "step": 55728 }, { "epoch": 0.09881389635799957, "grad_norm": 0.53515625, "learning_rate": 0.0008870572189643316, "loss": 0.3656, "step": 55730 }, { "epoch": 0.09881744252330939, "grad_norm": 0.78125, "learning_rate": 0.0008869993907309692, "loss": 0.1445, "step": 55732 }, { "epoch": 0.0988209886886192, "grad_norm": 0.53125, "learning_rate": 0.0008869415634291473, "loss": 0.1359, "step": 55734 }, { "epoch": 0.09882453485392902, "grad_norm": 0.427734375, "learning_rate": 0.0008868837370591195, "loss": 0.1935, "step": 55736 }, { "epoch": 0.09882808101923883, "grad_norm": 0.275390625, "learning_rate": 0.0008868259116211383, "loss": 0.1431, "step": 55738 }, { "epoch": 0.09883162718454865, "grad_norm": 0.41015625, "learning_rate": 0.0008867680871154571, "loss": 0.1549, "step": 55740 }, { "epoch": 0.09883517334985846, "grad_norm": 1.2734375, "learning_rate": 0.0008867102635423279, "loss": 0.1762, "step": 55742 }, { "epoch": 0.09883871951516827, "grad_norm": 0.51953125, "learning_rate": 0.0008866524409020043, "loss": 0.1818, "step": 55744 }, { "epoch": 0.09884226568047809, "grad_norm": 0.423828125, "learning_rate": 0.0008865946191947388, "loss": 0.2001, "step": 55746 }, { "epoch": 0.0988458118457879, "grad_norm": 1.0390625, "learning_rate": 0.000886536798420785, "loss": 0.5109, "step": 55748 }, { "epoch": 0.09884935801109772, "grad_norm": 0.333984375, "learning_rate": 0.0008864789785803948, "loss": 0.2153, "step": 55750 }, { "epoch": 0.09885290417640753, "grad_norm": 0.640625, "learning_rate": 0.0008864211596738214, "loss": 0.1635, "step": 55752 }, { "epoch": 0.09885645034171736, "grad_norm": 0.31640625, "learning_rate": 0.0008863633417013184, "loss": 0.2654, "step": 55754 }, { "epoch": 0.09885999650702718, "grad_norm": 0.50390625, "learning_rate": 0.0008863055246631377, "loss": 0.1288, "step": 55756 }, { "epoch": 0.09886354267233699, "grad_norm": 0.58984375, "learning_rate": 0.0008862477085595326, "loss": 0.2132, "step": 55758 }, { "epoch": 0.0988670888376468, "grad_norm": 0.68359375, "learning_rate": 0.0008861898933907555, "loss": 0.1928, "step": 55760 }, { "epoch": 0.09887063500295662, "grad_norm": 0.2734375, "learning_rate": 0.0008861320791570597, "loss": 0.1858, "step": 55762 }, { "epoch": 0.09887418116826643, "grad_norm": 1.796875, "learning_rate": 0.0008860742658586985, "loss": 0.1883, "step": 55764 }, { "epoch": 0.09887772733357625, "grad_norm": 0.35546875, "learning_rate": 0.0008860164534959239, "loss": 0.2241, "step": 55766 }, { "epoch": 0.09888127349888606, "grad_norm": 0.283203125, "learning_rate": 0.0008859586420689886, "loss": 0.1508, "step": 55768 }, { "epoch": 0.09888481966419588, "grad_norm": 9.625, "learning_rate": 0.0008859008315781463, "loss": 0.2651, "step": 55770 }, { "epoch": 0.09888836582950569, "grad_norm": 0.7109375, "learning_rate": 0.0008858430220236495, "loss": 0.3187, "step": 55772 }, { "epoch": 0.0988919119948155, "grad_norm": 0.28125, "learning_rate": 0.0008857852134057507, "loss": 0.167, "step": 55774 }, { "epoch": 0.09889545816012532, "grad_norm": 1.8828125, "learning_rate": 0.0008857274057247031, "loss": 0.3077, "step": 55776 }, { "epoch": 0.09889900432543514, "grad_norm": 0.310546875, "learning_rate": 0.000885669598980759, "loss": 0.156, "step": 55778 }, { "epoch": 0.09890255049074495, "grad_norm": 0.6171875, "learning_rate": 0.0008856117931741723, "loss": 0.1914, "step": 55780 }, { "epoch": 0.09890609665605476, "grad_norm": 0.39453125, "learning_rate": 0.0008855539883051944, "loss": 0.1933, "step": 55782 }, { "epoch": 0.09890964282136458, "grad_norm": 0.671875, "learning_rate": 0.000885496184374079, "loss": 0.2202, "step": 55784 }, { "epoch": 0.0989131889866744, "grad_norm": 0.2734375, "learning_rate": 0.0008854383813810786, "loss": 0.2614, "step": 55786 }, { "epoch": 0.09891673515198421, "grad_norm": 0.294921875, "learning_rate": 0.0008853805793264465, "loss": 0.1668, "step": 55788 }, { "epoch": 0.09892028131729402, "grad_norm": 0.341796875, "learning_rate": 0.0008853227782104347, "loss": 0.1926, "step": 55790 }, { "epoch": 0.09892382748260384, "grad_norm": 0.66015625, "learning_rate": 0.0008852649780332965, "loss": 0.2537, "step": 55792 }, { "epoch": 0.09892737364791365, "grad_norm": 0.208984375, "learning_rate": 0.0008852071787952844, "loss": 0.1796, "step": 55794 }, { "epoch": 0.09893091981322347, "grad_norm": 2.03125, "learning_rate": 0.0008851493804966512, "loss": 0.1658, "step": 55796 }, { "epoch": 0.09893446597853328, "grad_norm": 0.26171875, "learning_rate": 0.0008850915831376504, "loss": 0.1928, "step": 55798 }, { "epoch": 0.09893801214384311, "grad_norm": 0.3125, "learning_rate": 0.0008850337867185337, "loss": 0.187, "step": 55800 }, { "epoch": 0.09894155830915292, "grad_norm": 0.380859375, "learning_rate": 0.0008849759912395543, "loss": 0.3241, "step": 55802 }, { "epoch": 0.09894510447446274, "grad_norm": 0.5546875, "learning_rate": 0.0008849181967009651, "loss": 0.2108, "step": 55804 }, { "epoch": 0.09894865063977255, "grad_norm": 0.28515625, "learning_rate": 0.000884860403103019, "loss": 0.1412, "step": 55806 }, { "epoch": 0.09895219680508237, "grad_norm": 1.3671875, "learning_rate": 0.0008848026104459684, "loss": 0.1722, "step": 55808 }, { "epoch": 0.09895574297039218, "grad_norm": 0.63671875, "learning_rate": 0.0008847448187300662, "loss": 0.1712, "step": 55810 }, { "epoch": 0.098959289135702, "grad_norm": 0.2236328125, "learning_rate": 0.0008846870279555649, "loss": 0.153, "step": 55812 }, { "epoch": 0.09896283530101181, "grad_norm": 1.140625, "learning_rate": 0.0008846292381227181, "loss": 0.1641, "step": 55814 }, { "epoch": 0.09896638146632163, "grad_norm": 0.4296875, "learning_rate": 0.0008845714492317773, "loss": 0.1753, "step": 55816 }, { "epoch": 0.09896992763163144, "grad_norm": 0.68359375, "learning_rate": 0.000884513661282996, "loss": 0.1723, "step": 55818 }, { "epoch": 0.09897347379694126, "grad_norm": 0.28515625, "learning_rate": 0.0008844558742766267, "loss": 0.1351, "step": 55820 }, { "epoch": 0.09897701996225107, "grad_norm": 0.7265625, "learning_rate": 0.0008843980882129223, "loss": 0.1686, "step": 55822 }, { "epoch": 0.09898056612756088, "grad_norm": 0.494140625, "learning_rate": 0.0008843403030921355, "loss": 0.3363, "step": 55824 }, { "epoch": 0.0989841122928707, "grad_norm": 0.341796875, "learning_rate": 0.0008842825189145187, "loss": 0.3175, "step": 55826 }, { "epoch": 0.09898765845818051, "grad_norm": 0.330078125, "learning_rate": 0.0008842247356803252, "loss": 0.209, "step": 55828 }, { "epoch": 0.09899120462349033, "grad_norm": 0.73828125, "learning_rate": 0.0008841669533898073, "loss": 0.1894, "step": 55830 }, { "epoch": 0.09899475078880014, "grad_norm": 0.240234375, "learning_rate": 0.0008841091720432177, "loss": 0.1638, "step": 55832 }, { "epoch": 0.09899829695410996, "grad_norm": 0.44921875, "learning_rate": 0.000884051391640809, "loss": 0.249, "step": 55834 }, { "epoch": 0.09900184311941977, "grad_norm": 20.5, "learning_rate": 0.0008839936121828341, "loss": 0.1764, "step": 55836 }, { "epoch": 0.09900538928472959, "grad_norm": 0.90234375, "learning_rate": 0.0008839358336695462, "loss": 0.2035, "step": 55838 }, { "epoch": 0.0990089354500394, "grad_norm": 0.3203125, "learning_rate": 0.0008838780561011968, "loss": 0.1983, "step": 55840 }, { "epoch": 0.09901248161534922, "grad_norm": 0.515625, "learning_rate": 0.0008838202794780395, "loss": 0.1682, "step": 55842 }, { "epoch": 0.09901602778065904, "grad_norm": 1.578125, "learning_rate": 0.0008837625038003265, "loss": 0.2423, "step": 55844 }, { "epoch": 0.09901957394596886, "grad_norm": 0.2021484375, "learning_rate": 0.0008837047290683114, "loss": 0.2777, "step": 55846 }, { "epoch": 0.09902312011127867, "grad_norm": 0.3671875, "learning_rate": 0.0008836469552822454, "loss": 0.1513, "step": 55848 }, { "epoch": 0.09902666627658849, "grad_norm": 0.37109375, "learning_rate": 0.0008835891824423823, "loss": 0.1702, "step": 55850 }, { "epoch": 0.0990302124418983, "grad_norm": 0.9375, "learning_rate": 0.0008835314105489743, "loss": 0.1984, "step": 55852 }, { "epoch": 0.09903375860720812, "grad_norm": 0.59375, "learning_rate": 0.0008834736396022744, "loss": 0.1673, "step": 55854 }, { "epoch": 0.09903730477251793, "grad_norm": 0.318359375, "learning_rate": 0.000883415869602535, "loss": 0.2426, "step": 55856 }, { "epoch": 0.09904085093782775, "grad_norm": 0.95703125, "learning_rate": 0.0008833581005500084, "loss": 0.3368, "step": 55858 }, { "epoch": 0.09904439710313756, "grad_norm": 0.267578125, "learning_rate": 0.0008833003324449478, "loss": 0.1819, "step": 55860 }, { "epoch": 0.09904794326844737, "grad_norm": 0.51953125, "learning_rate": 0.0008832425652876059, "loss": 0.1559, "step": 55862 }, { "epoch": 0.09905148943375719, "grad_norm": 0.6015625, "learning_rate": 0.0008831847990782349, "loss": 0.1766, "step": 55864 }, { "epoch": 0.099055035599067, "grad_norm": 1.4296875, "learning_rate": 0.0008831270338170876, "loss": 0.2154, "step": 55866 }, { "epoch": 0.09905858176437682, "grad_norm": 0.470703125, "learning_rate": 0.0008830692695044169, "loss": 0.1499, "step": 55868 }, { "epoch": 0.09906212792968663, "grad_norm": 0.1865234375, "learning_rate": 0.0008830115061404751, "loss": 0.1605, "step": 55870 }, { "epoch": 0.09906567409499645, "grad_norm": 0.150390625, "learning_rate": 0.0008829537437255151, "loss": 0.1461, "step": 55872 }, { "epoch": 0.09906922026030626, "grad_norm": 0.67578125, "learning_rate": 0.0008828959822597889, "loss": 0.2762, "step": 55874 }, { "epoch": 0.09907276642561608, "grad_norm": 0.44140625, "learning_rate": 0.0008828382217435498, "loss": 0.1631, "step": 55876 }, { "epoch": 0.09907631259092589, "grad_norm": 0.38671875, "learning_rate": 0.0008827804621770503, "loss": 0.1541, "step": 55878 }, { "epoch": 0.0990798587562357, "grad_norm": 0.34375, "learning_rate": 0.0008827227035605428, "loss": 0.1818, "step": 55880 }, { "epoch": 0.09908340492154552, "grad_norm": 0.470703125, "learning_rate": 0.0008826649458942797, "loss": 0.1713, "step": 55882 }, { "epoch": 0.09908695108685533, "grad_norm": 2.375, "learning_rate": 0.0008826071891785143, "loss": 0.2413, "step": 55884 }, { "epoch": 0.09909049725216515, "grad_norm": 0.462890625, "learning_rate": 0.0008825494334134985, "loss": 0.1843, "step": 55886 }, { "epoch": 0.09909404341747496, "grad_norm": 0.453125, "learning_rate": 0.0008824916785994856, "loss": 0.248, "step": 55888 }, { "epoch": 0.09909758958278479, "grad_norm": 0.310546875, "learning_rate": 0.0008824339247367275, "loss": 0.1934, "step": 55890 }, { "epoch": 0.09910113574809461, "grad_norm": 0.68359375, "learning_rate": 0.0008823761718254766, "loss": 0.1487, "step": 55892 }, { "epoch": 0.09910468191340442, "grad_norm": 0.197265625, "learning_rate": 0.0008823184198659864, "loss": 0.144, "step": 55894 }, { "epoch": 0.09910822807871424, "grad_norm": 0.4765625, "learning_rate": 0.0008822606688585091, "loss": 0.2042, "step": 55896 }, { "epoch": 0.09911177424402405, "grad_norm": 0.35546875, "learning_rate": 0.0008822029188032967, "loss": 0.1734, "step": 55898 }, { "epoch": 0.09911532040933387, "grad_norm": 0.44140625, "learning_rate": 0.0008821451697006024, "loss": 0.1797, "step": 55900 }, { "epoch": 0.09911886657464368, "grad_norm": 0.54296875, "learning_rate": 0.0008820874215506786, "loss": 0.1548, "step": 55902 }, { "epoch": 0.0991224127399535, "grad_norm": 1.8515625, "learning_rate": 0.0008820296743537781, "loss": 0.2061, "step": 55904 }, { "epoch": 0.09912595890526331, "grad_norm": 0.259765625, "learning_rate": 0.000881971928110153, "loss": 0.1997, "step": 55906 }, { "epoch": 0.09912950507057312, "grad_norm": 0.2216796875, "learning_rate": 0.0008819141828200559, "loss": 0.21, "step": 55908 }, { "epoch": 0.09913305123588294, "grad_norm": 0.51171875, "learning_rate": 0.0008818564384837395, "loss": 0.1663, "step": 55910 }, { "epoch": 0.09913659740119275, "grad_norm": 0.1806640625, "learning_rate": 0.0008817986951014567, "loss": 0.1688, "step": 55912 }, { "epoch": 0.09914014356650257, "grad_norm": 0.51953125, "learning_rate": 0.0008817409526734594, "loss": 0.162, "step": 55914 }, { "epoch": 0.09914368973181238, "grad_norm": 0.423828125, "learning_rate": 0.0008816832112000002, "loss": 0.1951, "step": 55916 }, { "epoch": 0.0991472358971222, "grad_norm": 0.431640625, "learning_rate": 0.0008816254706813318, "loss": 0.2108, "step": 55918 }, { "epoch": 0.09915078206243201, "grad_norm": 5.0625, "learning_rate": 0.0008815677311177073, "loss": 0.2278, "step": 55920 }, { "epoch": 0.09915432822774183, "grad_norm": 0.279296875, "learning_rate": 0.0008815099925093781, "loss": 0.1815, "step": 55922 }, { "epoch": 0.09915787439305164, "grad_norm": 0.337890625, "learning_rate": 0.0008814522548565978, "loss": 0.1915, "step": 55924 }, { "epoch": 0.09916142055836145, "grad_norm": 0.435546875, "learning_rate": 0.0008813945181596179, "loss": 0.1592, "step": 55926 }, { "epoch": 0.09916496672367127, "grad_norm": 1.2265625, "learning_rate": 0.0008813367824186917, "loss": 0.2829, "step": 55928 }, { "epoch": 0.09916851288898108, "grad_norm": 0.6875, "learning_rate": 0.0008812790476340713, "loss": 0.1958, "step": 55930 }, { "epoch": 0.0991720590542909, "grad_norm": 0.2421875, "learning_rate": 0.000881221313806009, "loss": 0.1717, "step": 55932 }, { "epoch": 0.09917560521960071, "grad_norm": 0.7734375, "learning_rate": 0.0008811635809347578, "loss": 0.186, "step": 55934 }, { "epoch": 0.09917915138491054, "grad_norm": 0.57421875, "learning_rate": 0.0008811058490205703, "loss": 0.2874, "step": 55936 }, { "epoch": 0.09918269755022036, "grad_norm": 0.6171875, "learning_rate": 0.0008810481180636984, "loss": 0.1805, "step": 55938 }, { "epoch": 0.09918624371553017, "grad_norm": 0.251953125, "learning_rate": 0.0008809903880643948, "loss": 0.1993, "step": 55940 }, { "epoch": 0.09918978988083998, "grad_norm": 0.609375, "learning_rate": 0.000880932659022912, "loss": 0.1243, "step": 55942 }, { "epoch": 0.0991933360461498, "grad_norm": 0.41015625, "learning_rate": 0.0008808749309395028, "loss": 0.1574, "step": 55944 }, { "epoch": 0.09919688221145961, "grad_norm": 0.86328125, "learning_rate": 0.0008808172038144193, "loss": 0.3392, "step": 55946 }, { "epoch": 0.09920042837676943, "grad_norm": 0.19921875, "learning_rate": 0.0008807594776479138, "loss": 0.1146, "step": 55948 }, { "epoch": 0.09920397454207924, "grad_norm": 12.3125, "learning_rate": 0.0008807017524402387, "loss": 0.2193, "step": 55950 }, { "epoch": 0.09920752070738906, "grad_norm": 0.2158203125, "learning_rate": 0.0008806440281916475, "loss": 0.2076, "step": 55952 }, { "epoch": 0.09921106687269887, "grad_norm": 0.55078125, "learning_rate": 0.0008805863049023915, "loss": 0.3185, "step": 55954 }, { "epoch": 0.09921461303800869, "grad_norm": 0.7734375, "learning_rate": 0.0008805285825727236, "loss": 0.2364, "step": 55956 }, { "epoch": 0.0992181592033185, "grad_norm": 0.1884765625, "learning_rate": 0.000880470861202896, "loss": 0.161, "step": 55958 }, { "epoch": 0.09922170536862832, "grad_norm": 0.328125, "learning_rate": 0.0008804131407931617, "loss": 0.1677, "step": 55960 }, { "epoch": 0.09922525153393813, "grad_norm": 0.71484375, "learning_rate": 0.0008803554213437726, "loss": 0.2035, "step": 55962 }, { "epoch": 0.09922879769924794, "grad_norm": 0.56640625, "learning_rate": 0.0008802977028549812, "loss": 0.1857, "step": 55964 }, { "epoch": 0.09923234386455776, "grad_norm": 0.33984375, "learning_rate": 0.0008802399853270402, "loss": 0.1755, "step": 55966 }, { "epoch": 0.09923589002986757, "grad_norm": 0.306640625, "learning_rate": 0.0008801822687602019, "loss": 0.1071, "step": 55968 }, { "epoch": 0.09923943619517739, "grad_norm": 0.396484375, "learning_rate": 0.0008801245531547184, "loss": 0.1414, "step": 55970 }, { "epoch": 0.0992429823604872, "grad_norm": 0.27734375, "learning_rate": 0.0008800668385108426, "loss": 0.1348, "step": 55972 }, { "epoch": 0.09924652852579702, "grad_norm": 0.609375, "learning_rate": 0.0008800091248288265, "loss": 0.1609, "step": 55974 }, { "epoch": 0.09925007469110683, "grad_norm": 0.271484375, "learning_rate": 0.000879951412108923, "loss": 0.1597, "step": 55976 }, { "epoch": 0.09925362085641665, "grad_norm": 0.3125, "learning_rate": 0.0008798937003513841, "loss": 0.173, "step": 55978 }, { "epoch": 0.09925716702172648, "grad_norm": 0.3828125, "learning_rate": 0.0008798359895564623, "loss": 0.2102, "step": 55980 }, { "epoch": 0.09926071318703629, "grad_norm": 0.25390625, "learning_rate": 0.0008797782797244097, "loss": 0.183, "step": 55982 }, { "epoch": 0.0992642593523461, "grad_norm": 0.515625, "learning_rate": 0.0008797205708554792, "loss": 0.2535, "step": 55984 }, { "epoch": 0.09926780551765592, "grad_norm": 0.5234375, "learning_rate": 0.0008796628629499232, "loss": 0.1582, "step": 55986 }, { "epoch": 0.09927135168296573, "grad_norm": 0.3515625, "learning_rate": 0.0008796051560079936, "loss": 0.1661, "step": 55988 }, { "epoch": 0.09927489784827555, "grad_norm": 0.83984375, "learning_rate": 0.0008795474500299429, "loss": 0.17, "step": 55990 }, { "epoch": 0.09927844401358536, "grad_norm": 0.33203125, "learning_rate": 0.0008794897450160237, "loss": 0.1517, "step": 55992 }, { "epoch": 0.09928199017889518, "grad_norm": 1.375, "learning_rate": 0.0008794320409664884, "loss": 0.1961, "step": 55994 }, { "epoch": 0.09928553634420499, "grad_norm": 0.28515625, "learning_rate": 0.0008793743378815893, "loss": 0.1526, "step": 55996 }, { "epoch": 0.0992890825095148, "grad_norm": 0.5546875, "learning_rate": 0.0008793166357615785, "loss": 0.1893, "step": 55998 }, { "epoch": 0.09929262867482462, "grad_norm": 4.09375, "learning_rate": 0.0008792589346067086, "loss": 0.4298, "step": 56000 }, { "epoch": 0.09929617484013444, "grad_norm": 0.6796875, "learning_rate": 0.0008792012344172322, "loss": 0.1929, "step": 56002 }, { "epoch": 0.09929972100544425, "grad_norm": 0.62109375, "learning_rate": 0.0008791435351934008, "loss": 0.2183, "step": 56004 }, { "epoch": 0.09930326717075406, "grad_norm": 2.046875, "learning_rate": 0.0008790858369354675, "loss": 0.398, "step": 56006 }, { "epoch": 0.09930681333606388, "grad_norm": 0.255859375, "learning_rate": 0.0008790281396436843, "loss": 0.224, "step": 56008 }, { "epoch": 0.0993103595013737, "grad_norm": 0.859375, "learning_rate": 0.0008789704433183041, "loss": 0.1713, "step": 56010 }, { "epoch": 0.09931390566668351, "grad_norm": 0.3125, "learning_rate": 0.0008789127479595784, "loss": 0.2118, "step": 56012 }, { "epoch": 0.09931745183199332, "grad_norm": 0.396484375, "learning_rate": 0.0008788550535677599, "loss": 0.2228, "step": 56014 }, { "epoch": 0.09932099799730314, "grad_norm": 0.37890625, "learning_rate": 0.0008787973601431012, "loss": 0.1917, "step": 56016 }, { "epoch": 0.09932454416261295, "grad_norm": 2.75, "learning_rate": 0.0008787396676858543, "loss": 0.4334, "step": 56018 }, { "epoch": 0.09932809032792277, "grad_norm": 1.9140625, "learning_rate": 0.0008786819761962717, "loss": 0.2177, "step": 56020 }, { "epoch": 0.09933163649323258, "grad_norm": 0.56640625, "learning_rate": 0.0008786242856746052, "loss": 0.2003, "step": 56022 }, { "epoch": 0.0993351826585424, "grad_norm": 0.408203125, "learning_rate": 0.0008785665961211076, "loss": 0.1561, "step": 56024 }, { "epoch": 0.09933872882385222, "grad_norm": 0.56640625, "learning_rate": 0.0008785089075360313, "loss": 0.2248, "step": 56026 }, { "epoch": 0.09934227498916204, "grad_norm": 0.2314453125, "learning_rate": 0.0008784512199196282, "loss": 0.1669, "step": 56028 }, { "epoch": 0.09934582115447185, "grad_norm": 0.3671875, "learning_rate": 0.0008783935332721506, "loss": 0.2741, "step": 56030 }, { "epoch": 0.09934936731978167, "grad_norm": 0.60546875, "learning_rate": 0.000878335847593851, "loss": 0.1695, "step": 56032 }, { "epoch": 0.09935291348509148, "grad_norm": 0.3984375, "learning_rate": 0.0008782781628849821, "loss": 0.1331, "step": 56034 }, { "epoch": 0.0993564596504013, "grad_norm": 0.337890625, "learning_rate": 0.0008782204791457955, "loss": 0.1788, "step": 56036 }, { "epoch": 0.09936000581571111, "grad_norm": 0.322265625, "learning_rate": 0.0008781627963765435, "loss": 0.2134, "step": 56038 }, { "epoch": 0.09936355198102093, "grad_norm": 0.6953125, "learning_rate": 0.0008781051145774788, "loss": 0.2106, "step": 56040 }, { "epoch": 0.09936709814633074, "grad_norm": 0.314453125, "learning_rate": 0.0008780474337488535, "loss": 0.1859, "step": 56042 }, { "epoch": 0.09937064431164055, "grad_norm": 0.875, "learning_rate": 0.0008779897538909197, "loss": 0.1478, "step": 56044 }, { "epoch": 0.09937419047695037, "grad_norm": 0.37109375, "learning_rate": 0.0008779320750039293, "loss": 0.1554, "step": 56046 }, { "epoch": 0.09937773664226018, "grad_norm": 1.7109375, "learning_rate": 0.0008778743970881353, "loss": 0.235, "step": 56048 }, { "epoch": 0.09938128280757, "grad_norm": 1.078125, "learning_rate": 0.0008778167201437901, "loss": 0.2927, "step": 56050 }, { "epoch": 0.09938482897287981, "grad_norm": 0.3828125, "learning_rate": 0.0008777590441711451, "loss": 0.2118, "step": 56052 }, { "epoch": 0.09938837513818963, "grad_norm": 0.70703125, "learning_rate": 0.0008777013691704529, "loss": 0.2226, "step": 56054 }, { "epoch": 0.09939192130349944, "grad_norm": 0.28515625, "learning_rate": 0.0008776436951419659, "loss": 0.2083, "step": 56056 }, { "epoch": 0.09939546746880926, "grad_norm": 0.298828125, "learning_rate": 0.0008775860220859362, "loss": 0.1828, "step": 56058 }, { "epoch": 0.09939901363411907, "grad_norm": 0.439453125, "learning_rate": 0.0008775283500026163, "loss": 0.1772, "step": 56060 }, { "epoch": 0.09940255979942889, "grad_norm": 0.2421875, "learning_rate": 0.0008774706788922579, "loss": 0.1594, "step": 56062 }, { "epoch": 0.0994061059647387, "grad_norm": 0.546875, "learning_rate": 0.0008774130087551133, "loss": 0.1896, "step": 56064 }, { "epoch": 0.09940965213004851, "grad_norm": 0.48828125, "learning_rate": 0.0008773553395914351, "loss": 0.1729, "step": 56066 }, { "epoch": 0.09941319829535833, "grad_norm": 0.220703125, "learning_rate": 0.0008772976714014755, "loss": 0.1724, "step": 56068 }, { "epoch": 0.09941674446066814, "grad_norm": 1.484375, "learning_rate": 0.0008772400041854864, "loss": 0.2276, "step": 56070 }, { "epoch": 0.09942029062597797, "grad_norm": 0.26953125, "learning_rate": 0.0008771823379437202, "loss": 0.2443, "step": 56072 }, { "epoch": 0.09942383679128779, "grad_norm": 0.46484375, "learning_rate": 0.0008771246726764288, "loss": 0.1672, "step": 56074 }, { "epoch": 0.0994273829565976, "grad_norm": 0.609375, "learning_rate": 0.000877067008383865, "loss": 0.1537, "step": 56076 }, { "epoch": 0.09943092912190742, "grad_norm": 0.36328125, "learning_rate": 0.0008770093450662801, "loss": 0.209, "step": 56078 }, { "epoch": 0.09943447528721723, "grad_norm": 0.66796875, "learning_rate": 0.0008769516827239273, "loss": 0.2023, "step": 56080 }, { "epoch": 0.09943802145252705, "grad_norm": 0.55859375, "learning_rate": 0.0008768940213570577, "loss": 0.2232, "step": 56082 }, { "epoch": 0.09944156761783686, "grad_norm": 0.7265625, "learning_rate": 0.0008768363609659247, "loss": 0.1813, "step": 56084 }, { "epoch": 0.09944511378314667, "grad_norm": 0.61328125, "learning_rate": 0.0008767787015507795, "loss": 0.168, "step": 56086 }, { "epoch": 0.09944865994845649, "grad_norm": 0.54296875, "learning_rate": 0.0008767210431118744, "loss": 0.1812, "step": 56088 }, { "epoch": 0.0994522061137663, "grad_norm": 0.3828125, "learning_rate": 0.0008766633856494621, "loss": 0.1801, "step": 56090 }, { "epoch": 0.09945575227907612, "grad_norm": 0.4609375, "learning_rate": 0.0008766057291637944, "loss": 0.1602, "step": 56092 }, { "epoch": 0.09945929844438593, "grad_norm": 0.267578125, "learning_rate": 0.0008765480736551234, "loss": 0.1648, "step": 56094 }, { "epoch": 0.09946284460969575, "grad_norm": 0.359375, "learning_rate": 0.0008764904191237013, "loss": 0.2092, "step": 56096 }, { "epoch": 0.09946639077500556, "grad_norm": 0.291015625, "learning_rate": 0.0008764327655697801, "loss": 0.162, "step": 56098 }, { "epoch": 0.09946993694031538, "grad_norm": 1.03125, "learning_rate": 0.0008763751129936126, "loss": 0.17, "step": 56100 }, { "epoch": 0.09947348310562519, "grad_norm": 0.314453125, "learning_rate": 0.0008763174613954501, "loss": 0.1771, "step": 56102 }, { "epoch": 0.099477029270935, "grad_norm": 0.435546875, "learning_rate": 0.000876259810775545, "loss": 0.2182, "step": 56104 }, { "epoch": 0.09948057543624482, "grad_norm": 0.259765625, "learning_rate": 0.0008762021611341495, "loss": 0.1518, "step": 56106 }, { "epoch": 0.09948412160155463, "grad_norm": 0.380859375, "learning_rate": 0.0008761445124715162, "loss": 0.1695, "step": 56108 }, { "epoch": 0.09948766776686445, "grad_norm": 0.66015625, "learning_rate": 0.0008760868647878966, "loss": 0.1424, "step": 56110 }, { "epoch": 0.09949121393217426, "grad_norm": 1.1328125, "learning_rate": 0.0008760292180835427, "loss": 0.234, "step": 56112 }, { "epoch": 0.09949476009748408, "grad_norm": 0.37109375, "learning_rate": 0.0008759715723587072, "loss": 0.1689, "step": 56114 }, { "epoch": 0.0994983062627939, "grad_norm": 0.259765625, "learning_rate": 0.0008759139276136421, "loss": 0.1474, "step": 56116 }, { "epoch": 0.09950185242810372, "grad_norm": 0.484375, "learning_rate": 0.0008758562838485989, "loss": 0.2394, "step": 56118 }, { "epoch": 0.09950539859341354, "grad_norm": 1.6875, "learning_rate": 0.0008757986410638301, "loss": 0.2329, "step": 56120 }, { "epoch": 0.09950894475872335, "grad_norm": 1.046875, "learning_rate": 0.0008757409992595877, "loss": 0.1865, "step": 56122 }, { "epoch": 0.09951249092403316, "grad_norm": 0.302734375, "learning_rate": 0.0008756833584361246, "loss": 0.1347, "step": 56124 }, { "epoch": 0.09951603708934298, "grad_norm": 0.134765625, "learning_rate": 0.0008756257185936916, "loss": 0.1294, "step": 56126 }, { "epoch": 0.0995195832546528, "grad_norm": 0.76953125, "learning_rate": 0.0008755680797325414, "loss": 0.1571, "step": 56128 }, { "epoch": 0.09952312941996261, "grad_norm": 0.515625, "learning_rate": 0.0008755104418529262, "loss": 0.17, "step": 56130 }, { "epoch": 0.09952667558527242, "grad_norm": 2.53125, "learning_rate": 0.000875452804955098, "loss": 0.2501, "step": 56132 }, { "epoch": 0.09953022175058224, "grad_norm": 0.94921875, "learning_rate": 0.0008753951690393088, "loss": 0.5218, "step": 56134 }, { "epoch": 0.09953376791589205, "grad_norm": 0.328125, "learning_rate": 0.0008753375341058105, "loss": 0.1631, "step": 56136 }, { "epoch": 0.09953731408120187, "grad_norm": 1.2578125, "learning_rate": 0.0008752799001548552, "loss": 0.1681, "step": 56138 }, { "epoch": 0.09954086024651168, "grad_norm": 0.3125, "learning_rate": 0.0008752222671866951, "loss": 0.1408, "step": 56140 }, { "epoch": 0.0995444064118215, "grad_norm": 0.37890625, "learning_rate": 0.0008751646352015827, "loss": 0.1731, "step": 56142 }, { "epoch": 0.09954795257713131, "grad_norm": 0.2216796875, "learning_rate": 0.0008751070041997693, "loss": 0.1507, "step": 56144 }, { "epoch": 0.09955149874244112, "grad_norm": 0.453125, "learning_rate": 0.0008750493741815073, "loss": 0.163, "step": 56146 }, { "epoch": 0.09955504490775094, "grad_norm": 0.361328125, "learning_rate": 0.0008749917451470484, "loss": 0.1741, "step": 56148 }, { "epoch": 0.09955859107306075, "grad_norm": 1.8125, "learning_rate": 0.0008749341170966454, "loss": 0.3392, "step": 56150 }, { "epoch": 0.09956213723837057, "grad_norm": 0.66796875, "learning_rate": 0.0008748764900305495, "loss": 0.1687, "step": 56152 }, { "epoch": 0.09956568340368038, "grad_norm": 0.267578125, "learning_rate": 0.0008748188639490129, "loss": 0.1388, "step": 56154 }, { "epoch": 0.0995692295689902, "grad_norm": 2.65625, "learning_rate": 0.0008747612388522878, "loss": 0.4326, "step": 56156 }, { "epoch": 0.09957277573430001, "grad_norm": 0.365234375, "learning_rate": 0.0008747036147406269, "loss": 0.2099, "step": 56158 }, { "epoch": 0.09957632189960983, "grad_norm": 0.380859375, "learning_rate": 0.0008746459916142808, "loss": 0.2317, "step": 56160 }, { "epoch": 0.09957986806491965, "grad_norm": 0.5625, "learning_rate": 0.0008745883694735024, "loss": 0.1347, "step": 56162 }, { "epoch": 0.09958341423022947, "grad_norm": 0.25390625, "learning_rate": 0.0008745307483185436, "loss": 0.1864, "step": 56164 }, { "epoch": 0.09958696039553928, "grad_norm": 2.015625, "learning_rate": 0.0008744731281496565, "loss": 0.2167, "step": 56166 }, { "epoch": 0.0995905065608491, "grad_norm": 0.369140625, "learning_rate": 0.0008744155089670929, "loss": 0.1608, "step": 56168 }, { "epoch": 0.09959405272615891, "grad_norm": 0.33203125, "learning_rate": 0.0008743578907711045, "loss": 0.1614, "step": 56170 }, { "epoch": 0.09959759889146873, "grad_norm": 0.87890625, "learning_rate": 0.0008743002735619439, "loss": 0.179, "step": 56172 }, { "epoch": 0.09960114505677854, "grad_norm": 0.341796875, "learning_rate": 0.0008742426573398628, "loss": 0.1785, "step": 56174 }, { "epoch": 0.09960469122208836, "grad_norm": 1.7578125, "learning_rate": 0.000874185042105113, "loss": 0.291, "step": 56176 }, { "epoch": 0.09960823738739817, "grad_norm": 0.1337890625, "learning_rate": 0.0008741274278579465, "loss": 0.1629, "step": 56178 }, { "epoch": 0.09961178355270799, "grad_norm": 0.349609375, "learning_rate": 0.0008740698145986157, "loss": 0.1797, "step": 56180 }, { "epoch": 0.0996153297180178, "grad_norm": 0.302734375, "learning_rate": 0.0008740122023273727, "loss": 0.2522, "step": 56182 }, { "epoch": 0.09961887588332762, "grad_norm": 0.82421875, "learning_rate": 0.0008739545910444684, "loss": 0.173, "step": 56184 }, { "epoch": 0.09962242204863743, "grad_norm": 0.267578125, "learning_rate": 0.0008738969807501558, "loss": 0.1176, "step": 56186 }, { "epoch": 0.09962596821394724, "grad_norm": 1.9765625, "learning_rate": 0.0008738393714446862, "loss": 0.2208, "step": 56188 }, { "epoch": 0.09962951437925706, "grad_norm": 0.2177734375, "learning_rate": 0.0008737817631283123, "loss": 0.22, "step": 56190 }, { "epoch": 0.09963306054456687, "grad_norm": 0.384765625, "learning_rate": 0.0008737241558012849, "loss": 0.1788, "step": 56192 }, { "epoch": 0.09963660670987669, "grad_norm": 0.318359375, "learning_rate": 0.0008736665494638567, "loss": 0.1648, "step": 56194 }, { "epoch": 0.0996401528751865, "grad_norm": 0.380859375, "learning_rate": 0.0008736089441162797, "loss": 0.2278, "step": 56196 }, { "epoch": 0.09964369904049632, "grad_norm": 0.4921875, "learning_rate": 0.0008735513397588059, "loss": 0.3371, "step": 56198 }, { "epoch": 0.09964724520580613, "grad_norm": 0.5859375, "learning_rate": 0.0008734937363916868, "loss": 0.1514, "step": 56200 }, { "epoch": 0.09965079137111595, "grad_norm": 0.50390625, "learning_rate": 0.0008734361340151746, "loss": 0.1583, "step": 56202 }, { "epoch": 0.09965433753642576, "grad_norm": 0.9375, "learning_rate": 0.0008733785326295212, "loss": 0.2317, "step": 56204 }, { "epoch": 0.09965788370173558, "grad_norm": 0.71484375, "learning_rate": 0.0008733209322349785, "loss": 0.1507, "step": 56206 }, { "epoch": 0.0996614298670454, "grad_norm": 0.2734375, "learning_rate": 0.0008732633328317982, "loss": 0.1457, "step": 56208 }, { "epoch": 0.09966497603235522, "grad_norm": 0.63671875, "learning_rate": 0.0008732057344202323, "loss": 0.1749, "step": 56210 }, { "epoch": 0.09966852219766503, "grad_norm": 0.453125, "learning_rate": 0.0008731481370005328, "loss": 0.1678, "step": 56212 }, { "epoch": 0.09967206836297485, "grad_norm": 0.337890625, "learning_rate": 0.0008730905405729519, "loss": 0.1702, "step": 56214 }, { "epoch": 0.09967561452828466, "grad_norm": 0.5078125, "learning_rate": 0.0008730329451377408, "loss": 0.1619, "step": 56216 }, { "epoch": 0.09967916069359448, "grad_norm": 1.8125, "learning_rate": 0.0008729753506951519, "loss": 0.2463, "step": 56218 }, { "epoch": 0.09968270685890429, "grad_norm": 0.62890625, "learning_rate": 0.0008729177572454368, "loss": 0.1597, "step": 56220 }, { "epoch": 0.0996862530242141, "grad_norm": 1.1875, "learning_rate": 0.0008728601647888479, "loss": 0.2464, "step": 56222 }, { "epoch": 0.09968979918952392, "grad_norm": 0.75390625, "learning_rate": 0.0008728025733256364, "loss": 0.2372, "step": 56224 }, { "epoch": 0.09969334535483373, "grad_norm": 0.408203125, "learning_rate": 0.0008727449828560545, "loss": 0.2084, "step": 56226 }, { "epoch": 0.09969689152014355, "grad_norm": 0.326171875, "learning_rate": 0.000872687393380354, "loss": 0.1461, "step": 56228 }, { "epoch": 0.09970043768545336, "grad_norm": 0.78125, "learning_rate": 0.0008726298048987868, "loss": 0.4344, "step": 56230 }, { "epoch": 0.09970398385076318, "grad_norm": 0.859375, "learning_rate": 0.0008725722174116053, "loss": 0.1481, "step": 56232 }, { "epoch": 0.09970753001607299, "grad_norm": 0.78515625, "learning_rate": 0.0008725146309190602, "loss": 0.1306, "step": 56234 }, { "epoch": 0.09971107618138281, "grad_norm": 0.298828125, "learning_rate": 0.000872457045421404, "loss": 0.1764, "step": 56236 }, { "epoch": 0.09971462234669262, "grad_norm": 0.5625, "learning_rate": 0.0008723994609188886, "loss": 0.1782, "step": 56238 }, { "epoch": 0.09971816851200244, "grad_norm": 0.1806640625, "learning_rate": 0.000872341877411766, "loss": 0.1572, "step": 56240 }, { "epoch": 0.09972171467731225, "grad_norm": 0.279296875, "learning_rate": 0.0008722842949002876, "loss": 0.2112, "step": 56242 }, { "epoch": 0.09972526084262207, "grad_norm": 0.318359375, "learning_rate": 0.0008722267133847053, "loss": 0.284, "step": 56244 }, { "epoch": 0.09972880700793188, "grad_norm": 0.2265625, "learning_rate": 0.000872169132865271, "loss": 0.1673, "step": 56246 }, { "epoch": 0.0997323531732417, "grad_norm": 0.81640625, "learning_rate": 0.000872111553342237, "loss": 0.1322, "step": 56248 }, { "epoch": 0.09973589933855151, "grad_norm": 0.6015625, "learning_rate": 0.0008720539748158545, "loss": 0.2133, "step": 56250 }, { "epoch": 0.09973944550386134, "grad_norm": 0.22265625, "learning_rate": 0.0008719963972863751, "loss": 0.1603, "step": 56252 }, { "epoch": 0.09974299166917115, "grad_norm": 0.310546875, "learning_rate": 0.0008719388207540513, "loss": 0.1981, "step": 56254 }, { "epoch": 0.09974653783448097, "grad_norm": 0.205078125, "learning_rate": 0.000871881245219135, "loss": 0.1437, "step": 56256 }, { "epoch": 0.09975008399979078, "grad_norm": 0.2080078125, "learning_rate": 0.000871823670681877, "loss": 0.1823, "step": 56258 }, { "epoch": 0.0997536301651006, "grad_norm": 0.62890625, "learning_rate": 0.0008717660971425303, "loss": 0.1583, "step": 56260 }, { "epoch": 0.09975717633041041, "grad_norm": 0.59765625, "learning_rate": 0.0008717085246013457, "loss": 0.1702, "step": 56262 }, { "epoch": 0.09976072249572022, "grad_norm": 0.57421875, "learning_rate": 0.0008716509530585757, "loss": 0.1719, "step": 56264 }, { "epoch": 0.09976426866103004, "grad_norm": 6.4375, "learning_rate": 0.0008715933825144715, "loss": 0.6289, "step": 56266 }, { "epoch": 0.09976781482633985, "grad_norm": 0.86328125, "learning_rate": 0.0008715358129692852, "loss": 0.2161, "step": 56268 }, { "epoch": 0.09977136099164967, "grad_norm": 0.201171875, "learning_rate": 0.0008714782444232685, "loss": 0.1786, "step": 56270 }, { "epoch": 0.09977490715695948, "grad_norm": 0.388671875, "learning_rate": 0.0008714206768766737, "loss": 0.425, "step": 56272 }, { "epoch": 0.0997784533222693, "grad_norm": 0.515625, "learning_rate": 0.0008713631103297517, "loss": 0.1346, "step": 56274 }, { "epoch": 0.09978199948757911, "grad_norm": 1.1796875, "learning_rate": 0.0008713055447827545, "loss": 0.2909, "step": 56276 }, { "epoch": 0.09978554565288893, "grad_norm": 0.349609375, "learning_rate": 0.0008712479802359343, "loss": 0.1783, "step": 56278 }, { "epoch": 0.09978909181819874, "grad_norm": 0.6171875, "learning_rate": 0.0008711904166895426, "loss": 0.1443, "step": 56280 }, { "epoch": 0.09979263798350856, "grad_norm": 1.28125, "learning_rate": 0.000871132854143831, "loss": 0.1729, "step": 56282 }, { "epoch": 0.09979618414881837, "grad_norm": 0.578125, "learning_rate": 0.0008710752925990512, "loss": 0.1657, "step": 56284 }, { "epoch": 0.09979973031412818, "grad_norm": 1.203125, "learning_rate": 0.0008710177320554551, "loss": 0.2381, "step": 56286 }, { "epoch": 0.099803276479438, "grad_norm": 0.24609375, "learning_rate": 0.0008709601725132947, "loss": 0.1925, "step": 56288 }, { "epoch": 0.09980682264474781, "grad_norm": 0.1806640625, "learning_rate": 0.0008709026139728214, "loss": 0.1726, "step": 56290 }, { "epoch": 0.09981036881005763, "grad_norm": 0.275390625, "learning_rate": 0.0008708450564342867, "loss": 0.151, "step": 56292 }, { "epoch": 0.09981391497536744, "grad_norm": 0.20703125, "learning_rate": 0.0008707874998979428, "loss": 0.1673, "step": 56294 }, { "epoch": 0.09981746114067726, "grad_norm": 0.59375, "learning_rate": 0.0008707299443640415, "loss": 0.2136, "step": 56296 }, { "epoch": 0.09982100730598709, "grad_norm": 13.5625, "learning_rate": 0.0008706723898328339, "loss": 0.3422, "step": 56298 }, { "epoch": 0.0998245534712969, "grad_norm": 1.0703125, "learning_rate": 0.0008706148363045723, "loss": 0.1813, "step": 56300 }, { "epoch": 0.09982809963660672, "grad_norm": 0.482421875, "learning_rate": 0.0008705572837795077, "loss": 0.21, "step": 56302 }, { "epoch": 0.09983164580191653, "grad_norm": 0.412109375, "learning_rate": 0.0008704997322578929, "loss": 0.1948, "step": 56304 }, { "epoch": 0.09983519196722634, "grad_norm": 0.97265625, "learning_rate": 0.0008704421817399786, "loss": 0.1677, "step": 56306 }, { "epoch": 0.09983873813253616, "grad_norm": 1.5859375, "learning_rate": 0.0008703846322260167, "loss": 0.1799, "step": 56308 }, { "epoch": 0.09984228429784597, "grad_norm": 0.337890625, "learning_rate": 0.0008703270837162591, "loss": 0.1638, "step": 56310 }, { "epoch": 0.09984583046315579, "grad_norm": 0.546875, "learning_rate": 0.0008702695362109576, "loss": 0.1773, "step": 56312 }, { "epoch": 0.0998493766284656, "grad_norm": 0.455078125, "learning_rate": 0.000870211989710364, "loss": 0.2343, "step": 56314 }, { "epoch": 0.09985292279377542, "grad_norm": 0.41796875, "learning_rate": 0.0008701544442147291, "loss": 0.2995, "step": 56316 }, { "epoch": 0.09985646895908523, "grad_norm": 0.318359375, "learning_rate": 0.0008700968997243055, "loss": 0.1476, "step": 56318 }, { "epoch": 0.09986001512439505, "grad_norm": 0.2470703125, "learning_rate": 0.0008700393562393443, "loss": 0.1664, "step": 56320 }, { "epoch": 0.09986356128970486, "grad_norm": 0.6796875, "learning_rate": 0.0008699818137600979, "loss": 0.1572, "step": 56322 }, { "epoch": 0.09986710745501468, "grad_norm": 0.25, "learning_rate": 0.0008699242722868168, "loss": 0.1147, "step": 56324 }, { "epoch": 0.09987065362032449, "grad_norm": 0.271484375, "learning_rate": 0.0008698667318197535, "loss": 0.2299, "step": 56326 }, { "epoch": 0.0998741997856343, "grad_norm": 0.34375, "learning_rate": 0.0008698091923591593, "loss": 0.276, "step": 56328 }, { "epoch": 0.09987774595094412, "grad_norm": 0.376953125, "learning_rate": 0.0008697516539052866, "loss": 0.1946, "step": 56330 }, { "epoch": 0.09988129211625393, "grad_norm": 0.34375, "learning_rate": 0.0008696941164583855, "loss": 0.1916, "step": 56332 }, { "epoch": 0.09988483828156375, "grad_norm": 0.294921875, "learning_rate": 0.0008696365800187094, "loss": 0.1928, "step": 56334 }, { "epoch": 0.09988838444687356, "grad_norm": 0.478515625, "learning_rate": 0.0008695790445865084, "loss": 0.1763, "step": 56336 }, { "epoch": 0.09989193061218338, "grad_norm": 0.2255859375, "learning_rate": 0.0008695215101620355, "loss": 0.1608, "step": 56338 }, { "epoch": 0.09989547677749319, "grad_norm": 1.2109375, "learning_rate": 0.0008694639767455412, "loss": 0.2782, "step": 56340 }, { "epoch": 0.099899022942803, "grad_norm": 0.5078125, "learning_rate": 0.0008694064443372773, "loss": 0.2066, "step": 56342 }, { "epoch": 0.09990256910811283, "grad_norm": 0.486328125, "learning_rate": 0.000869348912937496, "loss": 0.1933, "step": 56344 }, { "epoch": 0.09990611527342265, "grad_norm": 0.396484375, "learning_rate": 0.0008692913825464488, "loss": 0.1926, "step": 56346 }, { "epoch": 0.09990966143873246, "grad_norm": 0.6875, "learning_rate": 0.0008692338531643869, "loss": 0.229, "step": 56348 }, { "epoch": 0.09991320760404228, "grad_norm": 0.54296875, "learning_rate": 0.0008691763247915619, "loss": 0.1942, "step": 56350 }, { "epoch": 0.09991675376935209, "grad_norm": 1.015625, "learning_rate": 0.0008691187974282258, "loss": 0.1611, "step": 56352 }, { "epoch": 0.09992029993466191, "grad_norm": 0.291015625, "learning_rate": 0.0008690612710746299, "loss": 0.1905, "step": 56354 }, { "epoch": 0.09992384609997172, "grad_norm": 0.49609375, "learning_rate": 0.0008690037457310258, "loss": 0.2474, "step": 56356 }, { "epoch": 0.09992739226528154, "grad_norm": 0.99609375, "learning_rate": 0.0008689462213976651, "loss": 0.3094, "step": 56358 }, { "epoch": 0.09993093843059135, "grad_norm": 0.185546875, "learning_rate": 0.0008688886980747994, "loss": 0.168, "step": 56360 }, { "epoch": 0.09993448459590117, "grad_norm": 5.5625, "learning_rate": 0.0008688311757626806, "loss": 0.1907, "step": 56362 }, { "epoch": 0.09993803076121098, "grad_norm": 0.5703125, "learning_rate": 0.0008687736544615597, "loss": 0.1741, "step": 56364 }, { "epoch": 0.0999415769265208, "grad_norm": 0.59765625, "learning_rate": 0.0008687161341716885, "loss": 0.2228, "step": 56366 }, { "epoch": 0.09994512309183061, "grad_norm": 0.2392578125, "learning_rate": 0.0008686586148933185, "loss": 0.1869, "step": 56368 }, { "epoch": 0.09994866925714042, "grad_norm": 0.28125, "learning_rate": 0.0008686010966267017, "loss": 0.1894, "step": 56370 }, { "epoch": 0.09995221542245024, "grad_norm": 1.5, "learning_rate": 0.0008685435793720889, "loss": 0.2108, "step": 56372 }, { "epoch": 0.09995576158776005, "grad_norm": 0.8984375, "learning_rate": 0.0008684860631297323, "loss": 0.1924, "step": 56374 }, { "epoch": 0.09995930775306987, "grad_norm": 0.298828125, "learning_rate": 0.000868428547899883, "loss": 0.1675, "step": 56376 }, { "epoch": 0.09996285391837968, "grad_norm": 0.35546875, "learning_rate": 0.000868371033682793, "loss": 0.142, "step": 56378 }, { "epoch": 0.0999664000836895, "grad_norm": 0.49609375, "learning_rate": 0.0008683135204787132, "loss": 0.1773, "step": 56380 }, { "epoch": 0.09996994624899931, "grad_norm": 1.8671875, "learning_rate": 0.0008682560082878956, "loss": 0.2527, "step": 56382 }, { "epoch": 0.09997349241430913, "grad_norm": 0.263671875, "learning_rate": 0.0008681984971105917, "loss": 0.1224, "step": 56384 }, { "epoch": 0.09997703857961894, "grad_norm": 1.0546875, "learning_rate": 0.0008681409869470529, "loss": 0.1934, "step": 56386 }, { "epoch": 0.09998058474492877, "grad_norm": 0.26953125, "learning_rate": 0.0008680834777975306, "loss": 0.3522, "step": 56388 }, { "epoch": 0.09998413091023858, "grad_norm": 1.0859375, "learning_rate": 0.0008680259696622765, "loss": 0.1792, "step": 56390 }, { "epoch": 0.0999876770755484, "grad_norm": 0.1865234375, "learning_rate": 0.0008679684625415423, "loss": 0.1313, "step": 56392 }, { "epoch": 0.09999122324085821, "grad_norm": 0.458984375, "learning_rate": 0.0008679109564355789, "loss": 0.2031, "step": 56394 }, { "epoch": 0.09999476940616803, "grad_norm": 1.5234375, "learning_rate": 0.0008678534513446386, "loss": 0.1576, "step": 56396 }, { "epoch": 0.09999831557147784, "grad_norm": 0.330078125, "learning_rate": 0.000867795947268972, "loss": 0.235, "step": 56398 }, { "epoch": 0.10000186173678766, "grad_norm": 0.357421875, "learning_rate": 0.0008677384442088312, "loss": 0.2252, "step": 56400 }, { "epoch": 0.10000540790209747, "grad_norm": 0.3046875, "learning_rate": 0.0008676809421644676, "loss": 0.2226, "step": 56402 }, { "epoch": 0.10000895406740729, "grad_norm": 0.796875, "learning_rate": 0.0008676234411361328, "loss": 0.2549, "step": 56404 }, { "epoch": 0.1000125002327171, "grad_norm": 0.5546875, "learning_rate": 0.0008675659411240777, "loss": 0.1596, "step": 56406 }, { "epoch": 0.10001604639802691, "grad_norm": 0.302734375, "learning_rate": 0.0008675084421285544, "loss": 0.1758, "step": 56408 }, { "epoch": 0.10001959256333673, "grad_norm": 0.6171875, "learning_rate": 0.0008674509441498142, "loss": 0.2018, "step": 56410 }, { "epoch": 0.10002313872864654, "grad_norm": 1.0703125, "learning_rate": 0.0008673934471881084, "loss": 0.26, "step": 56412 }, { "epoch": 0.10002668489395636, "grad_norm": 0.9765625, "learning_rate": 0.0008673359512436884, "loss": 0.234, "step": 56414 }, { "epoch": 0.10003023105926617, "grad_norm": 0.2216796875, "learning_rate": 0.0008672784563168059, "loss": 0.1553, "step": 56416 }, { "epoch": 0.10003377722457599, "grad_norm": 0.486328125, "learning_rate": 0.000867220962407712, "loss": 0.1543, "step": 56418 }, { "epoch": 0.1000373233898858, "grad_norm": 1.453125, "learning_rate": 0.0008671634695166591, "loss": 0.2528, "step": 56420 }, { "epoch": 0.10004086955519562, "grad_norm": 0.5625, "learning_rate": 0.0008671059776438974, "loss": 0.1946, "step": 56422 }, { "epoch": 0.10004441572050543, "grad_norm": 0.341796875, "learning_rate": 0.0008670484867896788, "loss": 0.1639, "step": 56424 }, { "epoch": 0.10004796188581525, "grad_norm": 0.236328125, "learning_rate": 0.0008669909969542551, "loss": 0.1415, "step": 56426 }, { "epoch": 0.10005150805112506, "grad_norm": 0.462890625, "learning_rate": 0.0008669335081378775, "loss": 0.1407, "step": 56428 }, { "epoch": 0.10005505421643487, "grad_norm": 0.98046875, "learning_rate": 0.0008668760203407972, "loss": 0.1678, "step": 56430 }, { "epoch": 0.10005860038174469, "grad_norm": 2.734375, "learning_rate": 0.0008668185335632657, "loss": 0.4107, "step": 56432 }, { "epoch": 0.10006214654705452, "grad_norm": 2.84375, "learning_rate": 0.0008667610478055343, "loss": 0.5739, "step": 56434 }, { "epoch": 0.10006569271236433, "grad_norm": 0.51171875, "learning_rate": 0.0008667035630678551, "loss": 0.194, "step": 56436 }, { "epoch": 0.10006923887767415, "grad_norm": 0.328125, "learning_rate": 0.0008666460793504787, "loss": 0.2318, "step": 56438 }, { "epoch": 0.10007278504298396, "grad_norm": 1.9765625, "learning_rate": 0.0008665885966536567, "loss": 0.3028, "step": 56440 }, { "epoch": 0.10007633120829378, "grad_norm": 0.50390625, "learning_rate": 0.0008665311149776405, "loss": 0.177, "step": 56442 }, { "epoch": 0.10007987737360359, "grad_norm": 0.322265625, "learning_rate": 0.0008664736343226823, "loss": 0.2102, "step": 56444 }, { "epoch": 0.1000834235389134, "grad_norm": 0.34765625, "learning_rate": 0.0008664161546890319, "loss": 0.179, "step": 56446 }, { "epoch": 0.10008696970422322, "grad_norm": 0.21484375, "learning_rate": 0.0008663586760769422, "loss": 0.1291, "step": 56448 }, { "epoch": 0.10009051586953303, "grad_norm": 0.78515625, "learning_rate": 0.0008663011984866635, "loss": 0.4822, "step": 56450 }, { "epoch": 0.10009406203484285, "grad_norm": 0.9609375, "learning_rate": 0.000866243721918448, "loss": 0.219, "step": 56452 }, { "epoch": 0.10009760820015266, "grad_norm": 0.4296875, "learning_rate": 0.0008661862463725465, "loss": 0.1293, "step": 56454 }, { "epoch": 0.10010115436546248, "grad_norm": 0.427734375, "learning_rate": 0.0008661287718492102, "loss": 0.1897, "step": 56456 }, { "epoch": 0.10010470053077229, "grad_norm": 0.2451171875, "learning_rate": 0.0008660712983486912, "loss": 0.1933, "step": 56458 }, { "epoch": 0.1001082466960821, "grad_norm": 0.2734375, "learning_rate": 0.0008660138258712404, "loss": 0.1822, "step": 56460 }, { "epoch": 0.10011179286139192, "grad_norm": 1.046875, "learning_rate": 0.0008659563544171091, "loss": 0.233, "step": 56462 }, { "epoch": 0.10011533902670174, "grad_norm": 0.9921875, "learning_rate": 0.0008658988839865487, "loss": 0.4883, "step": 56464 }, { "epoch": 0.10011888519201155, "grad_norm": 0.482421875, "learning_rate": 0.0008658414145798108, "loss": 0.1614, "step": 56466 }, { "epoch": 0.10012243135732136, "grad_norm": 0.40625, "learning_rate": 0.0008657839461971466, "loss": 0.1661, "step": 56468 }, { "epoch": 0.10012597752263118, "grad_norm": 0.828125, "learning_rate": 0.0008657264788388075, "loss": 0.1638, "step": 56470 }, { "epoch": 0.100129523687941, "grad_norm": 0.478515625, "learning_rate": 0.0008656690125050444, "loss": 0.2554, "step": 56472 }, { "epoch": 0.10013306985325081, "grad_norm": 0.271484375, "learning_rate": 0.0008656115471961088, "loss": 0.2086, "step": 56474 }, { "epoch": 0.10013661601856062, "grad_norm": 0.640625, "learning_rate": 0.0008655540829122521, "loss": 0.1452, "step": 56476 }, { "epoch": 0.10014016218387044, "grad_norm": 0.5078125, "learning_rate": 0.0008654966196537262, "loss": 0.1209, "step": 56478 }, { "epoch": 0.10014370834918027, "grad_norm": 0.478515625, "learning_rate": 0.0008654391574207813, "loss": 0.2254, "step": 56480 }, { "epoch": 0.10014725451449008, "grad_norm": 0.349609375, "learning_rate": 0.0008653816962136695, "loss": 0.138, "step": 56482 }, { "epoch": 0.1001508006797999, "grad_norm": 0.6328125, "learning_rate": 0.0008653242360326419, "loss": 0.1629, "step": 56484 }, { "epoch": 0.10015434684510971, "grad_norm": 0.44140625, "learning_rate": 0.0008652667768779498, "loss": 0.1813, "step": 56486 }, { "epoch": 0.10015789301041952, "grad_norm": 0.7109375, "learning_rate": 0.0008652093187498447, "loss": 0.1366, "step": 56488 }, { "epoch": 0.10016143917572934, "grad_norm": 0.63671875, "learning_rate": 0.0008651518616485771, "loss": 0.2264, "step": 56490 }, { "epoch": 0.10016498534103915, "grad_norm": 1.9765625, "learning_rate": 0.0008650944055743992, "loss": 0.2413, "step": 56492 }, { "epoch": 0.10016853150634897, "grad_norm": 0.49609375, "learning_rate": 0.000865036950527562, "loss": 0.1511, "step": 56494 }, { "epoch": 0.10017207767165878, "grad_norm": 0.38671875, "learning_rate": 0.0008649794965083164, "loss": 0.2089, "step": 56496 }, { "epoch": 0.1001756238369686, "grad_norm": 0.345703125, "learning_rate": 0.0008649220435169141, "loss": 0.1456, "step": 56498 }, { "epoch": 0.10017917000227841, "grad_norm": 0.6796875, "learning_rate": 0.0008648645915536062, "loss": 0.1952, "step": 56500 }, { "epoch": 0.10018271616758823, "grad_norm": 0.6484375, "learning_rate": 0.0008648071406186442, "loss": 0.182, "step": 56502 }, { "epoch": 0.10018626233289804, "grad_norm": 0.2890625, "learning_rate": 0.0008647496907122792, "loss": 0.2069, "step": 56504 }, { "epoch": 0.10018980849820786, "grad_norm": 0.546875, "learning_rate": 0.000864692241834762, "loss": 0.195, "step": 56506 }, { "epoch": 0.10019335466351767, "grad_norm": 0.7734375, "learning_rate": 0.0008646347939863444, "loss": 0.1828, "step": 56508 }, { "epoch": 0.10019690082882748, "grad_norm": 1.1171875, "learning_rate": 0.0008645773471672777, "loss": 0.2699, "step": 56510 }, { "epoch": 0.1002004469941373, "grad_norm": 0.275390625, "learning_rate": 0.0008645199013778127, "loss": 0.1742, "step": 56512 }, { "epoch": 0.10020399315944711, "grad_norm": 0.58984375, "learning_rate": 0.0008644624566182009, "loss": 0.1747, "step": 56514 }, { "epoch": 0.10020753932475693, "grad_norm": 0.546875, "learning_rate": 0.0008644050128886933, "loss": 0.206, "step": 56516 }, { "epoch": 0.10021108549006674, "grad_norm": 0.55078125, "learning_rate": 0.000864347570189542, "loss": 0.2374, "step": 56518 }, { "epoch": 0.10021463165537656, "grad_norm": 0.2216796875, "learning_rate": 0.000864290128520997, "loss": 0.1746, "step": 56520 }, { "epoch": 0.10021817782068637, "grad_norm": 0.80078125, "learning_rate": 0.0008642326878833103, "loss": 0.2456, "step": 56522 }, { "epoch": 0.1002217239859962, "grad_norm": 0.28515625, "learning_rate": 0.0008641752482767324, "loss": 0.206, "step": 56524 }, { "epoch": 0.10022527015130601, "grad_norm": 0.439453125, "learning_rate": 0.0008641178097015157, "loss": 0.1843, "step": 56526 }, { "epoch": 0.10022881631661583, "grad_norm": 0.2431640625, "learning_rate": 0.0008640603721579101, "loss": 0.2316, "step": 56528 }, { "epoch": 0.10023236248192564, "grad_norm": 0.53515625, "learning_rate": 0.0008640029356461676, "loss": 0.1585, "step": 56530 }, { "epoch": 0.10023590864723546, "grad_norm": 1.1953125, "learning_rate": 0.000863945500166539, "loss": 0.1821, "step": 56532 }, { "epoch": 0.10023945481254527, "grad_norm": 0.47265625, "learning_rate": 0.0008638880657192762, "loss": 0.2066, "step": 56534 }, { "epoch": 0.10024300097785509, "grad_norm": 0.8046875, "learning_rate": 0.0008638306323046292, "loss": 0.1555, "step": 56536 }, { "epoch": 0.1002465471431649, "grad_norm": 0.328125, "learning_rate": 0.0008637731999228502, "loss": 0.1882, "step": 56538 }, { "epoch": 0.10025009330847472, "grad_norm": 0.5234375, "learning_rate": 0.0008637157685741899, "loss": 0.1527, "step": 56540 }, { "epoch": 0.10025363947378453, "grad_norm": 1.0546875, "learning_rate": 0.0008636583382588998, "loss": 0.2275, "step": 56542 }, { "epoch": 0.10025718563909435, "grad_norm": 0.357421875, "learning_rate": 0.0008636009089772306, "loss": 0.3643, "step": 56544 }, { "epoch": 0.10026073180440416, "grad_norm": 0.75390625, "learning_rate": 0.0008635434807294338, "loss": 0.2698, "step": 56546 }, { "epoch": 0.10026427796971397, "grad_norm": 1.296875, "learning_rate": 0.0008634860535157603, "loss": 0.2826, "step": 56548 }, { "epoch": 0.10026782413502379, "grad_norm": 1.3203125, "learning_rate": 0.0008634286273364618, "loss": 0.1845, "step": 56550 }, { "epoch": 0.1002713703003336, "grad_norm": 0.474609375, "learning_rate": 0.0008633712021917885, "loss": 0.1987, "step": 56552 }, { "epoch": 0.10027491646564342, "grad_norm": 0.19921875, "learning_rate": 0.0008633137780819924, "loss": 0.1301, "step": 56554 }, { "epoch": 0.10027846263095323, "grad_norm": 0.53515625, "learning_rate": 0.0008632563550073243, "loss": 0.2542, "step": 56556 }, { "epoch": 0.10028200879626305, "grad_norm": 0.43359375, "learning_rate": 0.0008631989329680354, "loss": 0.1321, "step": 56558 }, { "epoch": 0.10028555496157286, "grad_norm": 0.302734375, "learning_rate": 0.000863141511964377, "loss": 0.1358, "step": 56560 }, { "epoch": 0.10028910112688268, "grad_norm": 0.26953125, "learning_rate": 0.0008630840919966001, "loss": 0.1651, "step": 56562 }, { "epoch": 0.10029264729219249, "grad_norm": 1.5078125, "learning_rate": 0.0008630266730649555, "loss": 0.1959, "step": 56564 }, { "epoch": 0.1002961934575023, "grad_norm": 1.5, "learning_rate": 0.0008629692551696944, "loss": 0.2105, "step": 56566 }, { "epoch": 0.10029973962281212, "grad_norm": 0.625, "learning_rate": 0.0008629118383110685, "loss": 0.2308, "step": 56568 }, { "epoch": 0.10030328578812195, "grad_norm": 0.353515625, "learning_rate": 0.0008628544224893285, "loss": 0.1236, "step": 56570 }, { "epoch": 0.10030683195343176, "grad_norm": 0.306640625, "learning_rate": 0.0008627970077047251, "loss": 0.1756, "step": 56572 }, { "epoch": 0.10031037811874158, "grad_norm": 0.349609375, "learning_rate": 0.00086273959395751, "loss": 0.2197, "step": 56574 }, { "epoch": 0.10031392428405139, "grad_norm": 0.2353515625, "learning_rate": 0.0008626821812479344, "loss": 0.1624, "step": 56576 }, { "epoch": 0.1003174704493612, "grad_norm": 0.64453125, "learning_rate": 0.0008626247695762489, "loss": 0.1533, "step": 56578 }, { "epoch": 0.10032101661467102, "grad_norm": 0.28515625, "learning_rate": 0.0008625673589427047, "loss": 0.1372, "step": 56580 }, { "epoch": 0.10032456277998084, "grad_norm": 0.7890625, "learning_rate": 0.0008625099493475527, "loss": 0.2254, "step": 56582 }, { "epoch": 0.10032810894529065, "grad_norm": 0.77734375, "learning_rate": 0.0008624525407910449, "loss": 0.2514, "step": 56584 }, { "epoch": 0.10033165511060047, "grad_norm": 0.76171875, "learning_rate": 0.0008623951332734312, "loss": 0.2086, "step": 56586 }, { "epoch": 0.10033520127591028, "grad_norm": 0.96875, "learning_rate": 0.0008623377267949632, "loss": 0.183, "step": 56588 }, { "epoch": 0.1003387474412201, "grad_norm": 0.24609375, "learning_rate": 0.0008622803213558919, "loss": 0.1645, "step": 56590 }, { "epoch": 0.10034229360652991, "grad_norm": 0.326171875, "learning_rate": 0.0008622229169564688, "loss": 0.1935, "step": 56592 }, { "epoch": 0.10034583977183972, "grad_norm": 0.484375, "learning_rate": 0.0008621655135969442, "loss": 0.1999, "step": 56594 }, { "epoch": 0.10034938593714954, "grad_norm": 0.234375, "learning_rate": 0.0008621081112775695, "loss": 0.1339, "step": 56596 }, { "epoch": 0.10035293210245935, "grad_norm": 0.5078125, "learning_rate": 0.0008620507099985957, "loss": 0.2051, "step": 56598 }, { "epoch": 0.10035647826776917, "grad_norm": 0.23046875, "learning_rate": 0.0008619933097602741, "loss": 0.1988, "step": 56600 }, { "epoch": 0.10036002443307898, "grad_norm": 0.30078125, "learning_rate": 0.0008619359105628555, "loss": 0.1669, "step": 56602 }, { "epoch": 0.1003635705983888, "grad_norm": 0.70703125, "learning_rate": 0.0008618785124065906, "loss": 0.1923, "step": 56604 }, { "epoch": 0.10036711676369861, "grad_norm": 2.578125, "learning_rate": 0.0008618211152917308, "loss": 0.2905, "step": 56606 }, { "epoch": 0.10037066292900843, "grad_norm": 5.96875, "learning_rate": 0.0008617637192185277, "loss": 0.2853, "step": 56608 }, { "epoch": 0.10037420909431824, "grad_norm": 0.259765625, "learning_rate": 0.0008617063241872312, "loss": 0.1433, "step": 56610 }, { "epoch": 0.10037775525962805, "grad_norm": 0.40234375, "learning_rate": 0.0008616489301980927, "loss": 0.1908, "step": 56612 }, { "epoch": 0.10038130142493787, "grad_norm": 0.478515625, "learning_rate": 0.0008615915372513636, "loss": 0.2599, "step": 56614 }, { "epoch": 0.1003848475902477, "grad_norm": 0.76171875, "learning_rate": 0.0008615341453472945, "loss": 0.2391, "step": 56616 }, { "epoch": 0.10038839375555751, "grad_norm": 0.37109375, "learning_rate": 0.0008614767544861368, "loss": 0.1323, "step": 56618 }, { "epoch": 0.10039193992086733, "grad_norm": 0.57421875, "learning_rate": 0.0008614193646681409, "loss": 0.1729, "step": 56620 }, { "epoch": 0.10039548608617714, "grad_norm": 0.70703125, "learning_rate": 0.0008613619758935581, "loss": 0.1542, "step": 56622 }, { "epoch": 0.10039903225148696, "grad_norm": 0.7890625, "learning_rate": 0.0008613045881626398, "loss": 0.1846, "step": 56624 }, { "epoch": 0.10040257841679677, "grad_norm": 0.287109375, "learning_rate": 0.0008612472014756363, "loss": 0.1977, "step": 56626 }, { "epoch": 0.10040612458210658, "grad_norm": 0.453125, "learning_rate": 0.0008611898158327986, "loss": 0.1948, "step": 56628 }, { "epoch": 0.1004096707474164, "grad_norm": 0.2099609375, "learning_rate": 0.0008611324312343782, "loss": 0.159, "step": 56630 }, { "epoch": 0.10041321691272621, "grad_norm": 1.6328125, "learning_rate": 0.0008610750476806259, "loss": 0.2527, "step": 56632 }, { "epoch": 0.10041676307803603, "grad_norm": 0.953125, "learning_rate": 0.0008610176651717925, "loss": 0.2123, "step": 56634 }, { "epoch": 0.10042030924334584, "grad_norm": 0.9921875, "learning_rate": 0.000860960283708129, "loss": 0.3411, "step": 56636 }, { "epoch": 0.10042385540865566, "grad_norm": 0.345703125, "learning_rate": 0.0008609029032898863, "loss": 0.1647, "step": 56638 }, { "epoch": 0.10042740157396547, "grad_norm": 0.5546875, "learning_rate": 0.0008608455239173156, "loss": 0.2172, "step": 56640 }, { "epoch": 0.10043094773927529, "grad_norm": 0.3359375, "learning_rate": 0.0008607881455906675, "loss": 0.1728, "step": 56642 }, { "epoch": 0.1004344939045851, "grad_norm": 0.181640625, "learning_rate": 0.000860730768310193, "loss": 0.2429, "step": 56644 }, { "epoch": 0.10043804006989492, "grad_norm": 0.326171875, "learning_rate": 0.0008606733920761431, "loss": 0.2254, "step": 56646 }, { "epoch": 0.10044158623520473, "grad_norm": 0.318359375, "learning_rate": 0.0008606160168887686, "loss": 0.1925, "step": 56648 }, { "epoch": 0.10044513240051454, "grad_norm": 0.1923828125, "learning_rate": 0.0008605586427483215, "loss": 0.1811, "step": 56650 }, { "epoch": 0.10044867856582436, "grad_norm": 0.388671875, "learning_rate": 0.000860501269655051, "loss": 0.1567, "step": 56652 }, { "epoch": 0.10045222473113417, "grad_norm": 0.3125, "learning_rate": 0.0008604438976092091, "loss": 0.1722, "step": 56654 }, { "epoch": 0.10045577089644399, "grad_norm": 0.494140625, "learning_rate": 0.0008603865266110462, "loss": 0.1659, "step": 56656 }, { "epoch": 0.1004593170617538, "grad_norm": 0.369140625, "learning_rate": 0.000860329156660814, "loss": 0.1857, "step": 56658 }, { "epoch": 0.10046286322706363, "grad_norm": 0.265625, "learning_rate": 0.0008602717877587622, "loss": 0.1189, "step": 56660 }, { "epoch": 0.10046640939237345, "grad_norm": 0.91796875, "learning_rate": 0.0008602144199051428, "loss": 0.2507, "step": 56662 }, { "epoch": 0.10046995555768326, "grad_norm": 0.28515625, "learning_rate": 0.0008601570531002058, "loss": 0.1346, "step": 56664 }, { "epoch": 0.10047350172299307, "grad_norm": 0.2578125, "learning_rate": 0.0008600996873442031, "loss": 0.2187, "step": 56666 }, { "epoch": 0.10047704788830289, "grad_norm": 0.435546875, "learning_rate": 0.0008600423226373846, "loss": 0.17, "step": 56668 }, { "epoch": 0.1004805940536127, "grad_norm": 1.2265625, "learning_rate": 0.0008599849589800015, "loss": 0.1849, "step": 56670 }, { "epoch": 0.10048414021892252, "grad_norm": 0.625, "learning_rate": 0.0008599275963723052, "loss": 0.1808, "step": 56672 }, { "epoch": 0.10048768638423233, "grad_norm": 0.5703125, "learning_rate": 0.000859870234814546, "loss": 0.1943, "step": 56674 }, { "epoch": 0.10049123254954215, "grad_norm": 0.76171875, "learning_rate": 0.0008598128743069751, "loss": 0.2011, "step": 56676 }, { "epoch": 0.10049477871485196, "grad_norm": 0.76171875, "learning_rate": 0.0008597555148498426, "loss": 0.2134, "step": 56678 }, { "epoch": 0.10049832488016178, "grad_norm": 0.3828125, "learning_rate": 0.0008596981564434001, "loss": 0.1405, "step": 56680 }, { "epoch": 0.10050187104547159, "grad_norm": 1.0390625, "learning_rate": 0.0008596407990878987, "loss": 0.1642, "step": 56682 }, { "epoch": 0.1005054172107814, "grad_norm": 0.2197265625, "learning_rate": 0.0008595834427835885, "loss": 0.1989, "step": 56684 }, { "epoch": 0.10050896337609122, "grad_norm": 0.9921875, "learning_rate": 0.0008595260875307207, "loss": 0.2611, "step": 56686 }, { "epoch": 0.10051250954140103, "grad_norm": 0.291015625, "learning_rate": 0.0008594687333295461, "loss": 0.17, "step": 56688 }, { "epoch": 0.10051605570671085, "grad_norm": 0.63671875, "learning_rate": 0.0008594113801803159, "loss": 0.3155, "step": 56690 }, { "epoch": 0.10051960187202066, "grad_norm": 0.34765625, "learning_rate": 0.0008593540280832802, "loss": 0.1457, "step": 56692 }, { "epoch": 0.10052314803733048, "grad_norm": 0.4375, "learning_rate": 0.00085929667703869, "loss": 0.2109, "step": 56694 }, { "epoch": 0.1005266942026403, "grad_norm": 2.84375, "learning_rate": 0.0008592393270467967, "loss": 0.3353, "step": 56696 }, { "epoch": 0.10053024036795011, "grad_norm": 0.91015625, "learning_rate": 0.0008591819781078507, "loss": 0.1657, "step": 56698 }, { "epoch": 0.10053378653325992, "grad_norm": 0.3671875, "learning_rate": 0.0008591246302221028, "loss": 0.1774, "step": 56700 }, { "epoch": 0.10053733269856974, "grad_norm": 0.2236328125, "learning_rate": 0.0008590672833898036, "loss": 0.1359, "step": 56702 }, { "epoch": 0.10054087886387955, "grad_norm": 0.421875, "learning_rate": 0.0008590099376112042, "loss": 0.1676, "step": 56704 }, { "epoch": 0.10054442502918938, "grad_norm": 1.2734375, "learning_rate": 0.0008589525928865556, "loss": 0.3094, "step": 56706 }, { "epoch": 0.1005479711944992, "grad_norm": 0.984375, "learning_rate": 0.0008588952492161082, "loss": 0.1545, "step": 56708 }, { "epoch": 0.10055151735980901, "grad_norm": 0.546875, "learning_rate": 0.0008588379066001131, "loss": 0.1809, "step": 56710 }, { "epoch": 0.10055506352511882, "grad_norm": 0.419921875, "learning_rate": 0.0008587805650388207, "loss": 0.2164, "step": 56712 }, { "epoch": 0.10055860969042864, "grad_norm": 0.78125, "learning_rate": 0.0008587232245324823, "loss": 0.2358, "step": 56714 }, { "epoch": 0.10056215585573845, "grad_norm": 0.33984375, "learning_rate": 0.000858665885081348, "loss": 0.193, "step": 56716 }, { "epoch": 0.10056570202104827, "grad_norm": 0.26953125, "learning_rate": 0.000858608546685669, "loss": 0.1144, "step": 56718 }, { "epoch": 0.10056924818635808, "grad_norm": 0.2470703125, "learning_rate": 0.000858551209345696, "loss": 0.1544, "step": 56720 }, { "epoch": 0.1005727943516679, "grad_norm": 0.546875, "learning_rate": 0.00085849387306168, "loss": 0.193, "step": 56722 }, { "epoch": 0.10057634051697771, "grad_norm": 1.3046875, "learning_rate": 0.0008584365378338713, "loss": 0.1429, "step": 56724 }, { "epoch": 0.10057988668228753, "grad_norm": 0.408203125, "learning_rate": 0.0008583792036625209, "loss": 0.1445, "step": 56726 }, { "epoch": 0.10058343284759734, "grad_norm": 0.32421875, "learning_rate": 0.0008583218705478794, "loss": 0.193, "step": 56728 }, { "epoch": 0.10058697901290715, "grad_norm": 0.201171875, "learning_rate": 0.0008582645384901977, "loss": 0.1258, "step": 56730 }, { "epoch": 0.10059052517821697, "grad_norm": 0.33203125, "learning_rate": 0.0008582072074897269, "loss": 0.1975, "step": 56732 }, { "epoch": 0.10059407134352678, "grad_norm": 0.3984375, "learning_rate": 0.0008581498775467169, "loss": 0.1911, "step": 56734 }, { "epoch": 0.1005976175088366, "grad_norm": 0.3515625, "learning_rate": 0.0008580925486614188, "loss": 0.3237, "step": 56736 }, { "epoch": 0.10060116367414641, "grad_norm": 0.23828125, "learning_rate": 0.0008580352208340834, "loss": 0.1359, "step": 56738 }, { "epoch": 0.10060470983945623, "grad_norm": 0.388671875, "learning_rate": 0.0008579778940649618, "loss": 0.3184, "step": 56740 }, { "epoch": 0.10060825600476604, "grad_norm": 0.2265625, "learning_rate": 0.0008579205683543039, "loss": 0.2258, "step": 56742 }, { "epoch": 0.10061180217007586, "grad_norm": 0.25390625, "learning_rate": 0.0008578632437023608, "loss": 0.1652, "step": 56744 }, { "epoch": 0.10061534833538567, "grad_norm": 0.412109375, "learning_rate": 0.0008578059201093835, "loss": 0.2254, "step": 56746 }, { "epoch": 0.10061889450069549, "grad_norm": 0.298828125, "learning_rate": 0.0008577485975756225, "loss": 0.1837, "step": 56748 }, { "epoch": 0.1006224406660053, "grad_norm": 1.8828125, "learning_rate": 0.0008576912761013283, "loss": 0.1767, "step": 56750 }, { "epoch": 0.10062598683131513, "grad_norm": 0.73046875, "learning_rate": 0.0008576339556867514, "loss": 0.26, "step": 56752 }, { "epoch": 0.10062953299662494, "grad_norm": 0.2578125, "learning_rate": 0.0008575766363321429, "loss": 0.1904, "step": 56754 }, { "epoch": 0.10063307916193476, "grad_norm": 0.431640625, "learning_rate": 0.0008575193180377538, "loss": 0.1858, "step": 56756 }, { "epoch": 0.10063662532724457, "grad_norm": 0.56640625, "learning_rate": 0.0008574620008038338, "loss": 0.2827, "step": 56758 }, { "epoch": 0.10064017149255439, "grad_norm": 0.8046875, "learning_rate": 0.0008574046846306343, "loss": 0.1591, "step": 56760 }, { "epoch": 0.1006437176578642, "grad_norm": 0.5078125, "learning_rate": 0.000857347369518406, "loss": 0.2258, "step": 56762 }, { "epoch": 0.10064726382317402, "grad_norm": 0.373046875, "learning_rate": 0.0008572900554673993, "loss": 0.2177, "step": 56764 }, { "epoch": 0.10065080998848383, "grad_norm": 0.5546875, "learning_rate": 0.000857232742477865, "loss": 0.1448, "step": 56766 }, { "epoch": 0.10065435615379364, "grad_norm": 0.298828125, "learning_rate": 0.0008571754305500532, "loss": 0.2049, "step": 56768 }, { "epoch": 0.10065790231910346, "grad_norm": 0.38671875, "learning_rate": 0.0008571181196842153, "loss": 0.2061, "step": 56770 }, { "epoch": 0.10066144848441327, "grad_norm": 0.7421875, "learning_rate": 0.0008570608098806021, "loss": 0.2303, "step": 56772 }, { "epoch": 0.10066499464972309, "grad_norm": 0.45703125, "learning_rate": 0.0008570035011394632, "loss": 0.2265, "step": 56774 }, { "epoch": 0.1006685408150329, "grad_norm": 0.328125, "learning_rate": 0.0008569461934610498, "loss": 0.1493, "step": 56776 }, { "epoch": 0.10067208698034272, "grad_norm": 0.314453125, "learning_rate": 0.0008568888868456127, "loss": 0.1959, "step": 56778 }, { "epoch": 0.10067563314565253, "grad_norm": 0.56640625, "learning_rate": 0.0008568315812934027, "loss": 0.1952, "step": 56780 }, { "epoch": 0.10067917931096235, "grad_norm": 0.369140625, "learning_rate": 0.0008567742768046698, "loss": 0.1903, "step": 56782 }, { "epoch": 0.10068272547627216, "grad_norm": 0.4140625, "learning_rate": 0.0008567169733796652, "loss": 0.15, "step": 56784 }, { "epoch": 0.10068627164158198, "grad_norm": 0.52734375, "learning_rate": 0.0008566596710186389, "loss": 0.1615, "step": 56786 }, { "epoch": 0.10068981780689179, "grad_norm": 0.99609375, "learning_rate": 0.0008566023697218424, "loss": 0.2474, "step": 56788 }, { "epoch": 0.1006933639722016, "grad_norm": 0.2255859375, "learning_rate": 0.0008565450694895251, "loss": 0.1894, "step": 56790 }, { "epoch": 0.10069691013751142, "grad_norm": 0.26953125, "learning_rate": 0.0008564877703219385, "loss": 0.1523, "step": 56792 }, { "epoch": 0.10070045630282123, "grad_norm": 0.322265625, "learning_rate": 0.0008564304722193328, "loss": 0.1689, "step": 56794 }, { "epoch": 0.10070400246813106, "grad_norm": 0.54296875, "learning_rate": 0.0008563731751819592, "loss": 0.2813, "step": 56796 }, { "epoch": 0.10070754863344088, "grad_norm": 0.326171875, "learning_rate": 0.0008563158792100673, "loss": 0.189, "step": 56798 }, { "epoch": 0.10071109479875069, "grad_norm": 0.578125, "learning_rate": 0.0008562585843039084, "loss": 0.2105, "step": 56800 }, { "epoch": 0.1007146409640605, "grad_norm": 0.2412109375, "learning_rate": 0.0008562012904637328, "loss": 0.1538, "step": 56802 }, { "epoch": 0.10071818712937032, "grad_norm": 0.361328125, "learning_rate": 0.0008561439976897913, "loss": 0.1622, "step": 56804 }, { "epoch": 0.10072173329468014, "grad_norm": 0.287109375, "learning_rate": 0.0008560867059823345, "loss": 0.1357, "step": 56806 }, { "epoch": 0.10072527945998995, "grad_norm": 0.72265625, "learning_rate": 0.0008560294153416124, "loss": 0.1641, "step": 56808 }, { "epoch": 0.10072882562529976, "grad_norm": 1.2890625, "learning_rate": 0.0008559721257678759, "loss": 0.4458, "step": 56810 }, { "epoch": 0.10073237179060958, "grad_norm": 2.515625, "learning_rate": 0.0008559148372613756, "loss": 0.4552, "step": 56812 }, { "epoch": 0.1007359179559194, "grad_norm": 0.41015625, "learning_rate": 0.0008558575498223622, "loss": 0.156, "step": 56814 }, { "epoch": 0.10073946412122921, "grad_norm": 0.271484375, "learning_rate": 0.000855800263451086, "loss": 0.3558, "step": 56816 }, { "epoch": 0.10074301028653902, "grad_norm": 0.61328125, "learning_rate": 0.0008557429781477974, "loss": 0.1676, "step": 56818 }, { "epoch": 0.10074655645184884, "grad_norm": 0.490234375, "learning_rate": 0.0008556856939127473, "loss": 0.2067, "step": 56820 }, { "epoch": 0.10075010261715865, "grad_norm": 0.416015625, "learning_rate": 0.0008556284107461863, "loss": 0.1997, "step": 56822 }, { "epoch": 0.10075364878246847, "grad_norm": 0.2734375, "learning_rate": 0.0008555711286483645, "loss": 0.1409, "step": 56824 }, { "epoch": 0.10075719494777828, "grad_norm": 0.318359375, "learning_rate": 0.0008555138476195323, "loss": 0.1628, "step": 56826 }, { "epoch": 0.1007607411130881, "grad_norm": 0.3671875, "learning_rate": 0.0008554565676599409, "loss": 0.1578, "step": 56828 }, { "epoch": 0.10076428727839791, "grad_norm": 0.49609375, "learning_rate": 0.0008553992887698402, "loss": 0.165, "step": 56830 }, { "epoch": 0.10076783344370772, "grad_norm": 0.416015625, "learning_rate": 0.0008553420109494812, "loss": 0.2805, "step": 56832 }, { "epoch": 0.10077137960901754, "grad_norm": 0.36328125, "learning_rate": 0.0008552847341991137, "loss": 0.3563, "step": 56834 }, { "epoch": 0.10077492577432735, "grad_norm": 0.87109375, "learning_rate": 0.0008552274585189888, "loss": 0.1986, "step": 56836 }, { "epoch": 0.10077847193963717, "grad_norm": 0.26953125, "learning_rate": 0.0008551701839093571, "loss": 0.1341, "step": 56838 }, { "epoch": 0.10078201810494698, "grad_norm": 2.203125, "learning_rate": 0.0008551129103704685, "loss": 0.3739, "step": 56840 }, { "epoch": 0.10078556427025681, "grad_norm": 0.1533203125, "learning_rate": 0.000855055637902574, "loss": 0.1169, "step": 56842 }, { "epoch": 0.10078911043556663, "grad_norm": 1.2265625, "learning_rate": 0.0008549983665059235, "loss": 0.2814, "step": 56844 }, { "epoch": 0.10079265660087644, "grad_norm": 0.68359375, "learning_rate": 0.0008549410961807684, "loss": 0.1898, "step": 56846 }, { "epoch": 0.10079620276618625, "grad_norm": 0.486328125, "learning_rate": 0.0008548838269273583, "loss": 0.1912, "step": 56848 }, { "epoch": 0.10079974893149607, "grad_norm": 0.5390625, "learning_rate": 0.0008548265587459437, "loss": 0.164, "step": 56850 }, { "epoch": 0.10080329509680588, "grad_norm": 0.37109375, "learning_rate": 0.0008547692916367755, "loss": 0.1251, "step": 56852 }, { "epoch": 0.1008068412621157, "grad_norm": 0.55078125, "learning_rate": 0.0008547120256001042, "loss": 0.1731, "step": 56854 }, { "epoch": 0.10081038742742551, "grad_norm": 0.6328125, "learning_rate": 0.0008546547606361796, "loss": 0.1767, "step": 56856 }, { "epoch": 0.10081393359273533, "grad_norm": 0.306640625, "learning_rate": 0.000854597496745253, "loss": 0.1876, "step": 56858 }, { "epoch": 0.10081747975804514, "grad_norm": 0.9921875, "learning_rate": 0.000854540233927574, "loss": 0.1537, "step": 56860 }, { "epoch": 0.10082102592335496, "grad_norm": 0.39453125, "learning_rate": 0.000854482972183394, "loss": 0.2404, "step": 56862 }, { "epoch": 0.10082457208866477, "grad_norm": 0.93359375, "learning_rate": 0.0008544257115129623, "loss": 0.1999, "step": 56864 }, { "epoch": 0.10082811825397459, "grad_norm": 0.5703125, "learning_rate": 0.00085436845191653, "loss": 0.1459, "step": 56866 }, { "epoch": 0.1008316644192844, "grad_norm": 0.22265625, "learning_rate": 0.0008543111933943475, "loss": 0.2039, "step": 56868 }, { "epoch": 0.10083521058459421, "grad_norm": 0.255859375, "learning_rate": 0.0008542539359466655, "loss": 0.2257, "step": 56870 }, { "epoch": 0.10083875674990403, "grad_norm": 0.53125, "learning_rate": 0.0008541966795737334, "loss": 0.1454, "step": 56872 }, { "epoch": 0.10084230291521384, "grad_norm": 0.302734375, "learning_rate": 0.0008541394242758023, "loss": 0.2095, "step": 56874 }, { "epoch": 0.10084584908052366, "grad_norm": 1.7109375, "learning_rate": 0.000854082170053123, "loss": 0.276, "step": 56876 }, { "epoch": 0.10084939524583347, "grad_norm": 0.29296875, "learning_rate": 0.0008540249169059453, "loss": 0.1568, "step": 56878 }, { "epoch": 0.10085294141114329, "grad_norm": 0.60546875, "learning_rate": 0.0008539676648345196, "loss": 0.1517, "step": 56880 }, { "epoch": 0.1008564875764531, "grad_norm": 0.5078125, "learning_rate": 0.0008539104138390963, "loss": 0.1718, "step": 56882 }, { "epoch": 0.10086003374176292, "grad_norm": 0.228515625, "learning_rate": 0.000853853163919926, "loss": 0.29, "step": 56884 }, { "epoch": 0.10086357990707273, "grad_norm": 0.5546875, "learning_rate": 0.0008537959150772594, "loss": 0.1805, "step": 56886 }, { "epoch": 0.10086712607238256, "grad_norm": 0.310546875, "learning_rate": 0.0008537386673113459, "loss": 0.1076, "step": 56888 }, { "epoch": 0.10087067223769237, "grad_norm": 1.34375, "learning_rate": 0.0008536814206224366, "loss": 0.2339, "step": 56890 }, { "epoch": 0.10087421840300219, "grad_norm": 0.6328125, "learning_rate": 0.0008536241750107816, "loss": 0.1269, "step": 56892 }, { "epoch": 0.100877764568312, "grad_norm": 0.4296875, "learning_rate": 0.0008535669304766313, "loss": 0.2264, "step": 56894 }, { "epoch": 0.10088131073362182, "grad_norm": 0.53515625, "learning_rate": 0.0008535096870202364, "loss": 0.1569, "step": 56896 }, { "epoch": 0.10088485689893163, "grad_norm": 0.365234375, "learning_rate": 0.000853452444641847, "loss": 0.2159, "step": 56898 }, { "epoch": 0.10088840306424145, "grad_norm": 0.8046875, "learning_rate": 0.0008533952033417132, "loss": 0.2204, "step": 56900 }, { "epoch": 0.10089194922955126, "grad_norm": 0.71875, "learning_rate": 0.0008533379631200852, "loss": 0.1247, "step": 56902 }, { "epoch": 0.10089549539486108, "grad_norm": 0.392578125, "learning_rate": 0.0008532807239772144, "loss": 0.2163, "step": 56904 }, { "epoch": 0.10089904156017089, "grad_norm": 0.63671875, "learning_rate": 0.0008532234859133499, "loss": 0.1486, "step": 56906 }, { "epoch": 0.1009025877254807, "grad_norm": 2.828125, "learning_rate": 0.0008531662489287426, "loss": 0.2211, "step": 56908 }, { "epoch": 0.10090613389079052, "grad_norm": 0.3671875, "learning_rate": 0.0008531090130236425, "loss": 0.1504, "step": 56910 }, { "epoch": 0.10090968005610033, "grad_norm": 0.46484375, "learning_rate": 0.0008530517781983007, "loss": 0.1851, "step": 56912 }, { "epoch": 0.10091322622141015, "grad_norm": 1.25, "learning_rate": 0.0008529945444529668, "loss": 0.1585, "step": 56914 }, { "epoch": 0.10091677238671996, "grad_norm": 0.69140625, "learning_rate": 0.0008529373117878911, "loss": 0.1219, "step": 56916 }, { "epoch": 0.10092031855202978, "grad_norm": 0.1962890625, "learning_rate": 0.0008528800802033243, "loss": 0.1589, "step": 56918 }, { "epoch": 0.10092386471733959, "grad_norm": 0.443359375, "learning_rate": 0.0008528228496995165, "loss": 0.2072, "step": 56920 }, { "epoch": 0.10092741088264941, "grad_norm": 0.55078125, "learning_rate": 0.0008527656202767178, "loss": 0.1608, "step": 56922 }, { "epoch": 0.10093095704795922, "grad_norm": 0.36328125, "learning_rate": 0.0008527083919351785, "loss": 0.1408, "step": 56924 }, { "epoch": 0.10093450321326904, "grad_norm": 1.359375, "learning_rate": 0.0008526511646751491, "loss": 0.4384, "step": 56926 }, { "epoch": 0.10093804937857885, "grad_norm": 0.26953125, "learning_rate": 0.0008525939384968806, "loss": 0.2447, "step": 56928 }, { "epoch": 0.10094159554388867, "grad_norm": 0.2431640625, "learning_rate": 0.0008525367134006217, "loss": 0.162, "step": 56930 }, { "epoch": 0.1009451417091985, "grad_norm": 0.322265625, "learning_rate": 0.0008524794893866236, "loss": 0.1454, "step": 56932 }, { "epoch": 0.10094868787450831, "grad_norm": 0.859375, "learning_rate": 0.0008524222664551366, "loss": 0.1818, "step": 56934 }, { "epoch": 0.10095223403981812, "grad_norm": 0.515625, "learning_rate": 0.0008523650446064108, "loss": 0.1806, "step": 56936 }, { "epoch": 0.10095578020512794, "grad_norm": 4.09375, "learning_rate": 0.0008523078238406962, "loss": 0.3003, "step": 56938 }, { "epoch": 0.10095932637043775, "grad_norm": 0.63671875, "learning_rate": 0.0008522506041582437, "loss": 0.1688, "step": 56940 }, { "epoch": 0.10096287253574757, "grad_norm": 0.9765625, "learning_rate": 0.0008521933855593027, "loss": 0.2795, "step": 56942 }, { "epoch": 0.10096641870105738, "grad_norm": 0.55859375, "learning_rate": 0.0008521361680441244, "loss": 0.1695, "step": 56944 }, { "epoch": 0.1009699648663672, "grad_norm": 2.984375, "learning_rate": 0.0008520789516129578, "loss": 0.1624, "step": 56946 }, { "epoch": 0.10097351103167701, "grad_norm": 0.54296875, "learning_rate": 0.0008520217362660545, "loss": 0.3998, "step": 56948 }, { "epoch": 0.10097705719698682, "grad_norm": 0.85546875, "learning_rate": 0.0008519645220036638, "loss": 0.1449, "step": 56950 }, { "epoch": 0.10098060336229664, "grad_norm": 0.66015625, "learning_rate": 0.0008519073088260363, "loss": 0.1801, "step": 56952 }, { "epoch": 0.10098414952760645, "grad_norm": 0.79296875, "learning_rate": 0.0008518500967334224, "loss": 0.2255, "step": 56954 }, { "epoch": 0.10098769569291627, "grad_norm": 0.609375, "learning_rate": 0.0008517928857260715, "loss": 0.1673, "step": 56956 }, { "epoch": 0.10099124185822608, "grad_norm": 1.7890625, "learning_rate": 0.0008517356758042345, "loss": 0.2182, "step": 56958 }, { "epoch": 0.1009947880235359, "grad_norm": 0.466796875, "learning_rate": 0.0008516784669681616, "loss": 0.1402, "step": 56960 }, { "epoch": 0.10099833418884571, "grad_norm": 0.271484375, "learning_rate": 0.0008516212592181027, "loss": 0.1689, "step": 56962 }, { "epoch": 0.10100188035415553, "grad_norm": 0.3125, "learning_rate": 0.0008515640525543082, "loss": 0.1353, "step": 56964 }, { "epoch": 0.10100542651946534, "grad_norm": 0.3046875, "learning_rate": 0.0008515068469770281, "loss": 0.1717, "step": 56966 }, { "epoch": 0.10100897268477516, "grad_norm": 0.326171875, "learning_rate": 0.000851449642486513, "loss": 0.2041, "step": 56968 }, { "epoch": 0.10101251885008497, "grad_norm": 0.2177734375, "learning_rate": 0.0008513924390830125, "loss": 0.2015, "step": 56970 }, { "epoch": 0.10101606501539478, "grad_norm": 0.2197265625, "learning_rate": 0.0008513352367667772, "loss": 0.1714, "step": 56972 }, { "epoch": 0.1010196111807046, "grad_norm": 8.0625, "learning_rate": 0.0008512780355380571, "loss": 0.3065, "step": 56974 }, { "epoch": 0.10102315734601441, "grad_norm": 1.078125, "learning_rate": 0.0008512208353971023, "loss": 0.2611, "step": 56976 }, { "epoch": 0.10102670351132424, "grad_norm": 0.3671875, "learning_rate": 0.0008511636363441635, "loss": 0.2335, "step": 56978 }, { "epoch": 0.10103024967663406, "grad_norm": 0.251953125, "learning_rate": 0.00085110643837949, "loss": 0.2225, "step": 56980 }, { "epoch": 0.10103379584194387, "grad_norm": 0.671875, "learning_rate": 0.0008510492415033324, "loss": 0.2788, "step": 56982 }, { "epoch": 0.10103734200725369, "grad_norm": 0.72265625, "learning_rate": 0.0008509920457159405, "loss": 0.2118, "step": 56984 }, { "epoch": 0.1010408881725635, "grad_norm": 0.498046875, "learning_rate": 0.0008509348510175654, "loss": 0.2161, "step": 56986 }, { "epoch": 0.10104443433787332, "grad_norm": 0.4921875, "learning_rate": 0.0008508776574084564, "loss": 0.1518, "step": 56988 }, { "epoch": 0.10104798050318313, "grad_norm": 0.8125, "learning_rate": 0.0008508204648888638, "loss": 0.1542, "step": 56990 }, { "epoch": 0.10105152666849294, "grad_norm": 0.40625, "learning_rate": 0.0008507632734590374, "loss": 0.2001, "step": 56992 }, { "epoch": 0.10105507283380276, "grad_norm": 1.234375, "learning_rate": 0.0008507060831192284, "loss": 0.2356, "step": 56994 }, { "epoch": 0.10105861899911257, "grad_norm": 0.4921875, "learning_rate": 0.0008506488938696855, "loss": 0.1949, "step": 56996 }, { "epoch": 0.10106216516442239, "grad_norm": 0.2431640625, "learning_rate": 0.0008505917057106595, "loss": 0.2253, "step": 56998 }, { "epoch": 0.1010657113297322, "grad_norm": 0.4765625, "learning_rate": 0.0008505345186424007, "loss": 0.1963, "step": 57000 }, { "epoch": 0.10106925749504202, "grad_norm": 0.5078125, "learning_rate": 0.0008504773326651594, "loss": 0.1749, "step": 57002 }, { "epoch": 0.10107280366035183, "grad_norm": 0.2080078125, "learning_rate": 0.0008504201477791847, "loss": 0.1551, "step": 57004 }, { "epoch": 0.10107634982566165, "grad_norm": 3.109375, "learning_rate": 0.0008503629639847275, "loss": 0.3244, "step": 57006 }, { "epoch": 0.10107989599097146, "grad_norm": 0.26953125, "learning_rate": 0.0008503057812820376, "loss": 0.161, "step": 57008 }, { "epoch": 0.10108344215628128, "grad_norm": 1.3984375, "learning_rate": 0.0008502485996713656, "loss": 0.2115, "step": 57010 }, { "epoch": 0.10108698832159109, "grad_norm": 1.359375, "learning_rate": 0.0008501914191529607, "loss": 0.2403, "step": 57012 }, { "epoch": 0.1010905344869009, "grad_norm": 0.353515625, "learning_rate": 0.0008501342397270737, "loss": 0.183, "step": 57014 }, { "epoch": 0.10109408065221072, "grad_norm": 0.369140625, "learning_rate": 0.0008500770613939539, "loss": 0.1566, "step": 57016 }, { "epoch": 0.10109762681752053, "grad_norm": 0.44921875, "learning_rate": 0.0008500198841538527, "loss": 0.1726, "step": 57018 }, { "epoch": 0.10110117298283035, "grad_norm": 0.322265625, "learning_rate": 0.0008499627080070187, "loss": 0.1693, "step": 57020 }, { "epoch": 0.10110471914814016, "grad_norm": 2.78125, "learning_rate": 0.0008499055329537024, "loss": 0.231, "step": 57022 }, { "epoch": 0.10110826531344999, "grad_norm": 3.65625, "learning_rate": 0.0008498483589941546, "loss": 0.1994, "step": 57024 }, { "epoch": 0.1011118114787598, "grad_norm": 0.37109375, "learning_rate": 0.0008497911861286245, "loss": 0.1911, "step": 57026 }, { "epoch": 0.10111535764406962, "grad_norm": 0.423828125, "learning_rate": 0.0008497340143573624, "loss": 0.143, "step": 57028 }, { "epoch": 0.10111890380937943, "grad_norm": 0.220703125, "learning_rate": 0.0008496768436806184, "loss": 0.1931, "step": 57030 }, { "epoch": 0.10112244997468925, "grad_norm": 0.2109375, "learning_rate": 0.0008496196740986424, "loss": 0.1509, "step": 57032 }, { "epoch": 0.10112599613999906, "grad_norm": 1.03125, "learning_rate": 0.0008495625056116849, "loss": 0.1948, "step": 57034 }, { "epoch": 0.10112954230530888, "grad_norm": 0.45703125, "learning_rate": 0.000849505338219995, "loss": 0.1375, "step": 57036 }, { "epoch": 0.10113308847061869, "grad_norm": 1.140625, "learning_rate": 0.0008494481719238234, "loss": 0.2311, "step": 57038 }, { "epoch": 0.10113663463592851, "grad_norm": 0.38671875, "learning_rate": 0.0008493910067234201, "loss": 0.1797, "step": 57040 }, { "epoch": 0.10114018080123832, "grad_norm": 1.09375, "learning_rate": 0.000849333842619035, "loss": 0.3163, "step": 57042 }, { "epoch": 0.10114372696654814, "grad_norm": 0.2314453125, "learning_rate": 0.0008492766796109179, "loss": 0.1769, "step": 57044 }, { "epoch": 0.10114727313185795, "grad_norm": 0.203125, "learning_rate": 0.000849219517699319, "loss": 0.1677, "step": 57046 }, { "epoch": 0.10115081929716777, "grad_norm": 0.56640625, "learning_rate": 0.0008491623568844882, "loss": 0.1657, "step": 57048 }, { "epoch": 0.10115436546247758, "grad_norm": 0.5390625, "learning_rate": 0.0008491051971666758, "loss": 0.4428, "step": 57050 }, { "epoch": 0.1011579116277874, "grad_norm": 0.271484375, "learning_rate": 0.0008490480385461312, "loss": 0.1806, "step": 57052 }, { "epoch": 0.10116145779309721, "grad_norm": 1.265625, "learning_rate": 0.0008489908810231048, "loss": 0.1913, "step": 57054 }, { "epoch": 0.10116500395840702, "grad_norm": 0.33984375, "learning_rate": 0.0008489337245978465, "loss": 0.1693, "step": 57056 }, { "epoch": 0.10116855012371684, "grad_norm": 0.2431640625, "learning_rate": 0.0008488765692706064, "loss": 0.1628, "step": 57058 }, { "epoch": 0.10117209628902665, "grad_norm": 1.4609375, "learning_rate": 0.0008488194150416342, "loss": 0.2884, "step": 57060 }, { "epoch": 0.10117564245433647, "grad_norm": 0.8671875, "learning_rate": 0.0008487622619111799, "loss": 0.3412, "step": 57062 }, { "epoch": 0.10117918861964628, "grad_norm": 0.5625, "learning_rate": 0.0008487051098794936, "loss": 0.2011, "step": 57064 }, { "epoch": 0.1011827347849561, "grad_norm": 0.296875, "learning_rate": 0.0008486479589468253, "loss": 0.2157, "step": 57066 }, { "epoch": 0.10118628095026592, "grad_norm": 0.345703125, "learning_rate": 0.0008485908091134249, "loss": 0.1563, "step": 57068 }, { "epoch": 0.10118982711557574, "grad_norm": 1.171875, "learning_rate": 0.0008485336603795419, "loss": 0.1703, "step": 57070 }, { "epoch": 0.10119337328088555, "grad_norm": 0.61328125, "learning_rate": 0.0008484765127454265, "loss": 0.2581, "step": 57072 }, { "epoch": 0.10119691944619537, "grad_norm": 0.318359375, "learning_rate": 0.000848419366211329, "loss": 0.2088, "step": 57074 }, { "epoch": 0.10120046561150518, "grad_norm": 0.43359375, "learning_rate": 0.0008483622207774994, "loss": 0.1958, "step": 57076 }, { "epoch": 0.101204011776815, "grad_norm": 0.609375, "learning_rate": 0.0008483050764441869, "loss": 0.2036, "step": 57078 }, { "epoch": 0.10120755794212481, "grad_norm": 0.31640625, "learning_rate": 0.0008482479332116418, "loss": 0.3663, "step": 57080 }, { "epoch": 0.10121110410743463, "grad_norm": 0.4140625, "learning_rate": 0.0008481907910801141, "loss": 0.1575, "step": 57082 }, { "epoch": 0.10121465027274444, "grad_norm": 0.30859375, "learning_rate": 0.0008481336500498535, "loss": 0.1635, "step": 57084 }, { "epoch": 0.10121819643805426, "grad_norm": 1.015625, "learning_rate": 0.0008480765101211103, "loss": 0.1786, "step": 57086 }, { "epoch": 0.10122174260336407, "grad_norm": 0.283203125, "learning_rate": 0.0008480193712941338, "loss": 0.1434, "step": 57088 }, { "epoch": 0.10122528876867389, "grad_norm": 1.0078125, "learning_rate": 0.0008479622335691742, "loss": 0.1942, "step": 57090 }, { "epoch": 0.1012288349339837, "grad_norm": 0.828125, "learning_rate": 0.0008479050969464817, "loss": 0.174, "step": 57092 }, { "epoch": 0.10123238109929351, "grad_norm": 1.3984375, "learning_rate": 0.0008478479614263055, "loss": 0.2347, "step": 57094 }, { "epoch": 0.10123592726460333, "grad_norm": 0.251953125, "learning_rate": 0.0008477908270088959, "loss": 0.2716, "step": 57096 }, { "epoch": 0.10123947342991314, "grad_norm": 1.3828125, "learning_rate": 0.0008477336936945026, "loss": 0.2555, "step": 57098 }, { "epoch": 0.10124301959522296, "grad_norm": 0.6875, "learning_rate": 0.0008476765614833761, "loss": 0.179, "step": 57100 }, { "epoch": 0.10124656576053277, "grad_norm": 0.2138671875, "learning_rate": 0.0008476194303757654, "loss": 0.2244, "step": 57102 }, { "epoch": 0.10125011192584259, "grad_norm": 0.51953125, "learning_rate": 0.0008475623003719209, "loss": 0.1683, "step": 57104 }, { "epoch": 0.1012536580911524, "grad_norm": 0.390625, "learning_rate": 0.0008475051714720919, "loss": 0.2284, "step": 57106 }, { "epoch": 0.10125720425646222, "grad_norm": 0.67578125, "learning_rate": 0.0008474480436765293, "loss": 0.1529, "step": 57108 }, { "epoch": 0.10126075042177203, "grad_norm": 0.59765625, "learning_rate": 0.0008473909169854816, "loss": 0.2011, "step": 57110 }, { "epoch": 0.10126429658708185, "grad_norm": 0.359375, "learning_rate": 0.0008473337913991995, "loss": 0.2037, "step": 57112 }, { "epoch": 0.10126784275239167, "grad_norm": 0.443359375, "learning_rate": 0.0008472766669179323, "loss": 0.3115, "step": 57114 }, { "epoch": 0.10127138891770149, "grad_norm": 0.470703125, "learning_rate": 0.0008472195435419307, "loss": 0.1814, "step": 57116 }, { "epoch": 0.1012749350830113, "grad_norm": 0.212890625, "learning_rate": 0.000847162421271444, "loss": 0.1679, "step": 57118 }, { "epoch": 0.10127848124832112, "grad_norm": 0.55859375, "learning_rate": 0.0008471053001067217, "loss": 0.1848, "step": 57120 }, { "epoch": 0.10128202741363093, "grad_norm": 0.341796875, "learning_rate": 0.0008470481800480139, "loss": 0.3101, "step": 57122 }, { "epoch": 0.10128557357894075, "grad_norm": 6.34375, "learning_rate": 0.0008469910610955709, "loss": 0.2978, "step": 57124 }, { "epoch": 0.10128911974425056, "grad_norm": 0.18359375, "learning_rate": 0.0008469339432496415, "loss": 0.1469, "step": 57126 }, { "epoch": 0.10129266590956038, "grad_norm": 0.63671875, "learning_rate": 0.0008468768265104758, "loss": 0.1673, "step": 57128 }, { "epoch": 0.10129621207487019, "grad_norm": 0.27734375, "learning_rate": 0.0008468197108783243, "loss": 0.2171, "step": 57130 }, { "epoch": 0.10129975824018, "grad_norm": 0.2490234375, "learning_rate": 0.0008467625963534364, "loss": 0.1909, "step": 57132 }, { "epoch": 0.10130330440548982, "grad_norm": 0.4140625, "learning_rate": 0.0008467054829360617, "loss": 0.179, "step": 57134 }, { "epoch": 0.10130685057079963, "grad_norm": 0.4765625, "learning_rate": 0.0008466483706264499, "loss": 0.2795, "step": 57136 }, { "epoch": 0.10131039673610945, "grad_norm": 0.41015625, "learning_rate": 0.0008465912594248512, "loss": 0.2441, "step": 57138 }, { "epoch": 0.10131394290141926, "grad_norm": 0.412109375, "learning_rate": 0.0008465341493315151, "loss": 0.1404, "step": 57140 }, { "epoch": 0.10131748906672908, "grad_norm": 0.578125, "learning_rate": 0.0008464770403466915, "loss": 0.24, "step": 57142 }, { "epoch": 0.10132103523203889, "grad_norm": 0.65625, "learning_rate": 0.0008464199324706296, "loss": 0.1912, "step": 57144 }, { "epoch": 0.1013245813973487, "grad_norm": 0.52734375, "learning_rate": 0.0008463628257035798, "loss": 0.1776, "step": 57146 }, { "epoch": 0.10132812756265852, "grad_norm": 0.255859375, "learning_rate": 0.0008463057200457918, "loss": 0.1905, "step": 57148 }, { "epoch": 0.10133167372796834, "grad_norm": 0.8125, "learning_rate": 0.0008462486154975156, "loss": 0.2662, "step": 57150 }, { "epoch": 0.10133521989327815, "grad_norm": 0.44140625, "learning_rate": 0.0008461915120590001, "loss": 0.1511, "step": 57152 }, { "epoch": 0.10133876605858796, "grad_norm": 0.765625, "learning_rate": 0.0008461344097304953, "loss": 0.1509, "step": 57154 }, { "epoch": 0.10134231222389778, "grad_norm": 0.3359375, "learning_rate": 0.0008460773085122513, "loss": 0.1454, "step": 57156 }, { "epoch": 0.1013458583892076, "grad_norm": 0.63671875, "learning_rate": 0.0008460202084045179, "loss": 0.169, "step": 57158 }, { "epoch": 0.10134940455451742, "grad_norm": 0.302734375, "learning_rate": 0.0008459631094075444, "loss": 0.2946, "step": 57160 }, { "epoch": 0.10135295071982724, "grad_norm": 0.421875, "learning_rate": 0.0008459060115215809, "loss": 0.1872, "step": 57162 }, { "epoch": 0.10135649688513705, "grad_norm": 0.23828125, "learning_rate": 0.0008458489147468764, "loss": 0.1693, "step": 57164 }, { "epoch": 0.10136004305044687, "grad_norm": 0.1962890625, "learning_rate": 0.0008457918190836818, "loss": 0.1834, "step": 57166 }, { "epoch": 0.10136358921575668, "grad_norm": 0.73828125, "learning_rate": 0.0008457347245322457, "loss": 0.1741, "step": 57168 }, { "epoch": 0.1013671353810665, "grad_norm": 0.5390625, "learning_rate": 0.0008456776310928183, "loss": 0.1767, "step": 57170 }, { "epoch": 0.10137068154637631, "grad_norm": 0.322265625, "learning_rate": 0.0008456205387656488, "loss": 0.2331, "step": 57172 }, { "epoch": 0.10137422771168612, "grad_norm": 0.6640625, "learning_rate": 0.0008455634475509882, "loss": 0.22, "step": 57174 }, { "epoch": 0.10137777387699594, "grad_norm": 0.1875, "learning_rate": 0.0008455063574490846, "loss": 0.2746, "step": 57176 }, { "epoch": 0.10138132004230575, "grad_norm": 0.2578125, "learning_rate": 0.0008454492684601886, "loss": 0.1279, "step": 57178 }, { "epoch": 0.10138486620761557, "grad_norm": 0.26953125, "learning_rate": 0.0008453921805845495, "loss": 0.1846, "step": 57180 }, { "epoch": 0.10138841237292538, "grad_norm": 2.8125, "learning_rate": 0.0008453350938224176, "loss": 0.339, "step": 57182 }, { "epoch": 0.1013919585382352, "grad_norm": 0.318359375, "learning_rate": 0.0008452780081740414, "loss": 0.1693, "step": 57184 }, { "epoch": 0.10139550470354501, "grad_norm": 0.328125, "learning_rate": 0.0008452209236396715, "loss": 0.1844, "step": 57186 }, { "epoch": 0.10139905086885483, "grad_norm": 0.36328125, "learning_rate": 0.0008451638402195571, "loss": 0.157, "step": 57188 }, { "epoch": 0.10140259703416464, "grad_norm": 0.1650390625, "learning_rate": 0.0008451067579139486, "loss": 0.1786, "step": 57190 }, { "epoch": 0.10140614319947445, "grad_norm": 0.431640625, "learning_rate": 0.0008450496767230946, "loss": 0.1705, "step": 57192 }, { "epoch": 0.10140968936478427, "grad_norm": 2.28125, "learning_rate": 0.0008449925966472451, "loss": 0.1797, "step": 57194 }, { "epoch": 0.10141323553009408, "grad_norm": 4.0625, "learning_rate": 0.0008449355176866503, "loss": 0.3672, "step": 57196 }, { "epoch": 0.1014167816954039, "grad_norm": 0.21484375, "learning_rate": 0.0008448784398415592, "loss": 0.1182, "step": 57198 }, { "epoch": 0.10142032786071371, "grad_norm": 0.5625, "learning_rate": 0.0008448213631122217, "loss": 0.1878, "step": 57200 }, { "epoch": 0.10142387402602353, "grad_norm": 0.58203125, "learning_rate": 0.000844764287498887, "loss": 0.1691, "step": 57202 }, { "epoch": 0.10142742019133336, "grad_norm": 1.0859375, "learning_rate": 0.0008447072130018052, "loss": 0.2015, "step": 57204 }, { "epoch": 0.10143096635664317, "grad_norm": 0.419921875, "learning_rate": 0.0008446501396212263, "loss": 0.281, "step": 57206 }, { "epoch": 0.10143451252195299, "grad_norm": 0.765625, "learning_rate": 0.0008445930673573987, "loss": 0.2316, "step": 57208 }, { "epoch": 0.1014380586872628, "grad_norm": 0.40625, "learning_rate": 0.0008445359962105728, "loss": 0.2283, "step": 57210 }, { "epoch": 0.10144160485257261, "grad_norm": 0.3125, "learning_rate": 0.0008444789261809982, "loss": 0.1787, "step": 57212 }, { "epoch": 0.10144515101788243, "grad_norm": 0.25390625, "learning_rate": 0.0008444218572689244, "loss": 0.1469, "step": 57214 }, { "epoch": 0.10144869718319224, "grad_norm": 0.80859375, "learning_rate": 0.0008443647894746007, "loss": 0.2126, "step": 57216 }, { "epoch": 0.10145224334850206, "grad_norm": 0.5234375, "learning_rate": 0.0008443077227982769, "loss": 0.1584, "step": 57218 }, { "epoch": 0.10145578951381187, "grad_norm": 0.34765625, "learning_rate": 0.0008442506572402026, "loss": 0.1695, "step": 57220 }, { "epoch": 0.10145933567912169, "grad_norm": 0.220703125, "learning_rate": 0.000844193592800628, "loss": 0.1594, "step": 57222 }, { "epoch": 0.1014628818444315, "grad_norm": 0.3515625, "learning_rate": 0.0008441365294798014, "loss": 0.1661, "step": 57224 }, { "epoch": 0.10146642800974132, "grad_norm": 0.5859375, "learning_rate": 0.000844079467277973, "loss": 0.496, "step": 57226 }, { "epoch": 0.10146997417505113, "grad_norm": 0.79296875, "learning_rate": 0.0008440224061953923, "loss": 0.2608, "step": 57228 }, { "epoch": 0.10147352034036095, "grad_norm": 5.15625, "learning_rate": 0.0008439653462323091, "loss": 0.1934, "step": 57230 }, { "epoch": 0.10147706650567076, "grad_norm": 0.40625, "learning_rate": 0.000843908287388973, "loss": 0.1486, "step": 57232 }, { "epoch": 0.10148061267098057, "grad_norm": 0.455078125, "learning_rate": 0.0008438512296656331, "loss": 0.1807, "step": 57234 }, { "epoch": 0.10148415883629039, "grad_norm": 0.4453125, "learning_rate": 0.0008437941730625389, "loss": 0.1109, "step": 57236 }, { "epoch": 0.1014877050016002, "grad_norm": 0.353515625, "learning_rate": 0.0008437371175799403, "loss": 0.1533, "step": 57238 }, { "epoch": 0.10149125116691002, "grad_norm": 0.69921875, "learning_rate": 0.0008436800632180868, "loss": 0.1981, "step": 57240 }, { "epoch": 0.10149479733221983, "grad_norm": 0.9921875, "learning_rate": 0.0008436230099772278, "loss": 0.1954, "step": 57242 }, { "epoch": 0.10149834349752965, "grad_norm": 0.294921875, "learning_rate": 0.0008435659578576125, "loss": 0.1209, "step": 57244 }, { "epoch": 0.10150188966283946, "grad_norm": 0.53515625, "learning_rate": 0.000843508906859491, "loss": 0.2136, "step": 57246 }, { "epoch": 0.10150543582814928, "grad_norm": 0.6875, "learning_rate": 0.0008434518569831129, "loss": 0.238, "step": 57248 }, { "epoch": 0.1015089819934591, "grad_norm": 0.326171875, "learning_rate": 0.0008433948082287268, "loss": 0.2414, "step": 57250 }, { "epoch": 0.10151252815876892, "grad_norm": 0.51953125, "learning_rate": 0.0008433377605965831, "loss": 0.2065, "step": 57252 }, { "epoch": 0.10151607432407873, "grad_norm": 0.70703125, "learning_rate": 0.0008432807140869307, "loss": 0.2124, "step": 57254 }, { "epoch": 0.10151962048938855, "grad_norm": 0.5234375, "learning_rate": 0.0008432236687000199, "loss": 0.2035, "step": 57256 }, { "epoch": 0.10152316665469836, "grad_norm": 0.271484375, "learning_rate": 0.000843166624436099, "loss": 0.2161, "step": 57258 }, { "epoch": 0.10152671282000818, "grad_norm": 0.232421875, "learning_rate": 0.0008431095812954181, "loss": 0.1757, "step": 57260 }, { "epoch": 0.10153025898531799, "grad_norm": 1.2578125, "learning_rate": 0.0008430525392782268, "loss": 0.2226, "step": 57262 }, { "epoch": 0.1015338051506278, "grad_norm": 0.49609375, "learning_rate": 0.000842995498384775, "loss": 0.1669, "step": 57264 }, { "epoch": 0.10153735131593762, "grad_norm": 0.30859375, "learning_rate": 0.0008429384586153109, "loss": 0.1738, "step": 57266 }, { "epoch": 0.10154089748124744, "grad_norm": 1.03125, "learning_rate": 0.0008428814199700847, "loss": 0.195, "step": 57268 }, { "epoch": 0.10154444364655725, "grad_norm": 0.796875, "learning_rate": 0.000842824382449346, "loss": 0.3039, "step": 57270 }, { "epoch": 0.10154798981186706, "grad_norm": 0.60546875, "learning_rate": 0.0008427673460533441, "loss": 0.2302, "step": 57272 }, { "epoch": 0.10155153597717688, "grad_norm": 0.328125, "learning_rate": 0.0008427103107823284, "loss": 0.1667, "step": 57274 }, { "epoch": 0.1015550821424867, "grad_norm": 0.318359375, "learning_rate": 0.0008426532766365483, "loss": 0.1982, "step": 57276 }, { "epoch": 0.10155862830779651, "grad_norm": 1.734375, "learning_rate": 0.0008425962436162531, "loss": 0.2738, "step": 57278 }, { "epoch": 0.10156217447310632, "grad_norm": 0.59765625, "learning_rate": 0.000842539211721693, "loss": 0.1542, "step": 57280 }, { "epoch": 0.10156572063841614, "grad_norm": 0.38671875, "learning_rate": 0.0008424821809531163, "loss": 0.1758, "step": 57282 }, { "epoch": 0.10156926680372595, "grad_norm": 0.3046875, "learning_rate": 0.000842425151310773, "loss": 0.19, "step": 57284 }, { "epoch": 0.10157281296903577, "grad_norm": 1.7421875, "learning_rate": 0.0008423681227949125, "loss": 0.361, "step": 57286 }, { "epoch": 0.10157635913434558, "grad_norm": 0.279296875, "learning_rate": 0.0008423110954057844, "loss": 0.1607, "step": 57288 }, { "epoch": 0.1015799052996554, "grad_norm": 0.7109375, "learning_rate": 0.0008422540691436378, "loss": 0.1731, "step": 57290 }, { "epoch": 0.10158345146496521, "grad_norm": 0.267578125, "learning_rate": 0.0008421970440087221, "loss": 0.1657, "step": 57292 }, { "epoch": 0.10158699763027502, "grad_norm": 0.287109375, "learning_rate": 0.0008421400200012866, "loss": 0.1868, "step": 57294 }, { "epoch": 0.10159054379558485, "grad_norm": 0.65625, "learning_rate": 0.0008420829971215813, "loss": 0.2517, "step": 57296 }, { "epoch": 0.10159408996089467, "grad_norm": 0.21875, "learning_rate": 0.0008420259753698549, "loss": 0.2634, "step": 57298 }, { "epoch": 0.10159763612620448, "grad_norm": 0.490234375, "learning_rate": 0.0008419689547463572, "loss": 0.2616, "step": 57300 }, { "epoch": 0.1016011822915143, "grad_norm": 0.2412109375, "learning_rate": 0.0008419119352513369, "loss": 0.1796, "step": 57302 }, { "epoch": 0.10160472845682411, "grad_norm": 0.7109375, "learning_rate": 0.0008418549168850446, "loss": 0.1653, "step": 57304 }, { "epoch": 0.10160827462213393, "grad_norm": 0.353515625, "learning_rate": 0.0008417978996477284, "loss": 0.2108, "step": 57306 }, { "epoch": 0.10161182078744374, "grad_norm": 1.7421875, "learning_rate": 0.0008417408835396385, "loss": 0.2141, "step": 57308 }, { "epoch": 0.10161536695275356, "grad_norm": 0.25390625, "learning_rate": 0.0008416838685610237, "loss": 0.1481, "step": 57310 }, { "epoch": 0.10161891311806337, "grad_norm": 1.2109375, "learning_rate": 0.0008416268547121337, "loss": 0.2031, "step": 57312 }, { "epoch": 0.10162245928337318, "grad_norm": 0.59765625, "learning_rate": 0.0008415698419932183, "loss": 0.2889, "step": 57314 }, { "epoch": 0.101626005448683, "grad_norm": 0.474609375, "learning_rate": 0.0008415128304045254, "loss": 0.29, "step": 57316 }, { "epoch": 0.10162955161399281, "grad_norm": 0.29296875, "learning_rate": 0.0008414558199463058, "loss": 0.1468, "step": 57318 }, { "epoch": 0.10163309777930263, "grad_norm": 0.462890625, "learning_rate": 0.000841398810618808, "loss": 0.1847, "step": 57320 }, { "epoch": 0.10163664394461244, "grad_norm": 1.0078125, "learning_rate": 0.0008413418024222818, "loss": 0.1923, "step": 57322 }, { "epoch": 0.10164019010992226, "grad_norm": 1.765625, "learning_rate": 0.0008412847953569762, "loss": 0.1768, "step": 57324 }, { "epoch": 0.10164373627523207, "grad_norm": 0.55078125, "learning_rate": 0.0008412277894231406, "loss": 0.235, "step": 57326 }, { "epoch": 0.10164728244054189, "grad_norm": 0.44921875, "learning_rate": 0.0008411707846210243, "loss": 0.1781, "step": 57328 }, { "epoch": 0.1016508286058517, "grad_norm": 0.2890625, "learning_rate": 0.0008411137809508771, "loss": 0.1491, "step": 57330 }, { "epoch": 0.10165437477116152, "grad_norm": 0.96484375, "learning_rate": 0.0008410567784129475, "loss": 0.163, "step": 57332 }, { "epoch": 0.10165792093647133, "grad_norm": 0.306640625, "learning_rate": 0.0008409997770074849, "loss": 0.1971, "step": 57334 }, { "epoch": 0.10166146710178114, "grad_norm": 0.2421875, "learning_rate": 0.0008409427767347388, "loss": 0.1322, "step": 57336 }, { "epoch": 0.10166501326709096, "grad_norm": 0.3125, "learning_rate": 0.000840885777594959, "loss": 0.2467, "step": 57338 }, { "epoch": 0.10166855943240079, "grad_norm": 0.9375, "learning_rate": 0.000840828779588394, "loss": 0.1988, "step": 57340 }, { "epoch": 0.1016721055977106, "grad_norm": 0.451171875, "learning_rate": 0.0008407717827152932, "loss": 0.1839, "step": 57342 }, { "epoch": 0.10167565176302042, "grad_norm": 0.25390625, "learning_rate": 0.0008407147869759064, "loss": 0.1639, "step": 57344 }, { "epoch": 0.10167919792833023, "grad_norm": 0.216796875, "learning_rate": 0.0008406577923704826, "loss": 0.1887, "step": 57346 }, { "epoch": 0.10168274409364005, "grad_norm": 0.349609375, "learning_rate": 0.0008406007988992708, "loss": 0.1529, "step": 57348 }, { "epoch": 0.10168629025894986, "grad_norm": 0.53125, "learning_rate": 0.0008405438065625204, "loss": 0.1667, "step": 57350 }, { "epoch": 0.10168983642425967, "grad_norm": 0.478515625, "learning_rate": 0.0008404868153604803, "loss": 0.1998, "step": 57352 }, { "epoch": 0.10169338258956949, "grad_norm": 0.466796875, "learning_rate": 0.0008404298252934009, "loss": 0.1705, "step": 57354 }, { "epoch": 0.1016969287548793, "grad_norm": 0.240234375, "learning_rate": 0.00084037283636153, "loss": 0.2419, "step": 57356 }, { "epoch": 0.10170047492018912, "grad_norm": 0.26171875, "learning_rate": 0.0008403158485651177, "loss": 0.1862, "step": 57358 }, { "epoch": 0.10170402108549893, "grad_norm": 0.96484375, "learning_rate": 0.0008402588619044129, "loss": 0.1829, "step": 57360 }, { "epoch": 0.10170756725080875, "grad_norm": 0.5859375, "learning_rate": 0.0008402018763796655, "loss": 0.2265, "step": 57362 }, { "epoch": 0.10171111341611856, "grad_norm": 1.03125, "learning_rate": 0.0008401448919911239, "loss": 0.2517, "step": 57364 }, { "epoch": 0.10171465958142838, "grad_norm": 0.1630859375, "learning_rate": 0.0008400879087390376, "loss": 0.1832, "step": 57366 }, { "epoch": 0.10171820574673819, "grad_norm": 1.4140625, "learning_rate": 0.0008400309266236557, "loss": 0.2417, "step": 57368 }, { "epoch": 0.101721751912048, "grad_norm": 0.89453125, "learning_rate": 0.0008399739456452277, "loss": 0.1776, "step": 57370 }, { "epoch": 0.10172529807735782, "grad_norm": 0.205078125, "learning_rate": 0.0008399169658040024, "loss": 0.1805, "step": 57372 }, { "epoch": 0.10172884424266763, "grad_norm": 0.2041015625, "learning_rate": 0.0008398599871002292, "loss": 0.168, "step": 57374 }, { "epoch": 0.10173239040797745, "grad_norm": 0.240234375, "learning_rate": 0.0008398030095341573, "loss": 0.2312, "step": 57376 }, { "epoch": 0.10173593657328726, "grad_norm": 0.375, "learning_rate": 0.0008397460331060363, "loss": 0.144, "step": 57378 }, { "epoch": 0.10173948273859708, "grad_norm": 0.291015625, "learning_rate": 0.0008396890578161144, "loss": 0.164, "step": 57380 }, { "epoch": 0.10174302890390689, "grad_norm": 0.380859375, "learning_rate": 0.0008396320836646418, "loss": 0.2084, "step": 57382 }, { "epoch": 0.10174657506921671, "grad_norm": 0.224609375, "learning_rate": 0.000839575110651867, "loss": 0.178, "step": 57384 }, { "epoch": 0.10175012123452654, "grad_norm": 0.546875, "learning_rate": 0.0008395181387780396, "loss": 0.1778, "step": 57386 }, { "epoch": 0.10175366739983635, "grad_norm": 1.21875, "learning_rate": 0.0008394611680434083, "loss": 0.1929, "step": 57388 }, { "epoch": 0.10175721356514617, "grad_norm": 0.265625, "learning_rate": 0.0008394041984482228, "loss": 0.1718, "step": 57390 }, { "epoch": 0.10176075973045598, "grad_norm": 0.2314453125, "learning_rate": 0.0008393472299927316, "loss": 0.1584, "step": 57392 }, { "epoch": 0.1017643058957658, "grad_norm": 0.1650390625, "learning_rate": 0.0008392902626771848, "loss": 0.1538, "step": 57394 }, { "epoch": 0.10176785206107561, "grad_norm": 1.3984375, "learning_rate": 0.0008392332965018304, "loss": 0.2971, "step": 57396 }, { "epoch": 0.10177139822638542, "grad_norm": 0.5625, "learning_rate": 0.0008391763314669181, "loss": 0.1766, "step": 57398 }, { "epoch": 0.10177494439169524, "grad_norm": 0.92578125, "learning_rate": 0.0008391193675726974, "loss": 0.2267, "step": 57400 }, { "epoch": 0.10177849055700505, "grad_norm": 0.33203125, "learning_rate": 0.0008390624048194164, "loss": 0.2566, "step": 57402 }, { "epoch": 0.10178203672231487, "grad_norm": 0.78125, "learning_rate": 0.0008390054432073257, "loss": 0.2641, "step": 57404 }, { "epoch": 0.10178558288762468, "grad_norm": 0.33203125, "learning_rate": 0.0008389484827366732, "loss": 0.2278, "step": 57406 }, { "epoch": 0.1017891290529345, "grad_norm": 0.427734375, "learning_rate": 0.0008388915234077082, "loss": 0.1909, "step": 57408 }, { "epoch": 0.10179267521824431, "grad_norm": 0.87109375, "learning_rate": 0.0008388345652206799, "loss": 0.1555, "step": 57410 }, { "epoch": 0.10179622138355413, "grad_norm": 0.466796875, "learning_rate": 0.000838777608175838, "loss": 0.1831, "step": 57412 }, { "epoch": 0.10179976754886394, "grad_norm": 0.96484375, "learning_rate": 0.000838720652273431, "loss": 0.2292, "step": 57414 }, { "epoch": 0.10180331371417375, "grad_norm": 0.66796875, "learning_rate": 0.0008386636975137078, "loss": 0.1968, "step": 57416 }, { "epoch": 0.10180685987948357, "grad_norm": 0.248046875, "learning_rate": 0.000838606743896918, "loss": 0.2067, "step": 57418 }, { "epoch": 0.10181040604479338, "grad_norm": 0.51953125, "learning_rate": 0.0008385497914233106, "loss": 0.1748, "step": 57420 }, { "epoch": 0.1018139522101032, "grad_norm": 1.3203125, "learning_rate": 0.0008384928400931346, "loss": 0.2567, "step": 57422 }, { "epoch": 0.10181749837541301, "grad_norm": 0.5234375, "learning_rate": 0.0008384358899066386, "loss": 0.1733, "step": 57424 }, { "epoch": 0.10182104454072283, "grad_norm": 0.4375, "learning_rate": 0.0008383789408640723, "loss": 0.1324, "step": 57426 }, { "epoch": 0.10182459070603264, "grad_norm": 0.29296875, "learning_rate": 0.0008383219929656847, "loss": 0.2484, "step": 57428 }, { "epoch": 0.10182813687134246, "grad_norm": 0.6640625, "learning_rate": 0.0008382650462117245, "loss": 0.1669, "step": 57430 }, { "epoch": 0.10183168303665228, "grad_norm": 0.458984375, "learning_rate": 0.0008382081006024409, "loss": 0.1758, "step": 57432 }, { "epoch": 0.1018352292019621, "grad_norm": 0.396484375, "learning_rate": 0.000838151156138083, "loss": 0.1508, "step": 57434 }, { "epoch": 0.10183877536727191, "grad_norm": 0.5234375, "learning_rate": 0.0008380942128189001, "loss": 0.1423, "step": 57436 }, { "epoch": 0.10184232153258173, "grad_norm": 0.283203125, "learning_rate": 0.0008380372706451407, "loss": 0.1778, "step": 57438 }, { "epoch": 0.10184586769789154, "grad_norm": 0.490234375, "learning_rate": 0.0008379803296170542, "loss": 0.1787, "step": 57440 }, { "epoch": 0.10184941386320136, "grad_norm": 0.59765625, "learning_rate": 0.0008379233897348897, "loss": 0.1892, "step": 57442 }, { "epoch": 0.10185296002851117, "grad_norm": 0.453125, "learning_rate": 0.000837866450998896, "loss": 0.1895, "step": 57444 }, { "epoch": 0.10185650619382099, "grad_norm": 0.30859375, "learning_rate": 0.000837809513409322, "loss": 0.2046, "step": 57446 }, { "epoch": 0.1018600523591308, "grad_norm": 0.404296875, "learning_rate": 0.0008377525769664169, "loss": 0.1378, "step": 57448 }, { "epoch": 0.10186359852444062, "grad_norm": 0.4921875, "learning_rate": 0.0008376956416704297, "loss": 0.1442, "step": 57450 }, { "epoch": 0.10186714468975043, "grad_norm": 0.388671875, "learning_rate": 0.0008376387075216099, "loss": 0.1866, "step": 57452 }, { "epoch": 0.10187069085506024, "grad_norm": 0.58984375, "learning_rate": 0.0008375817745202055, "loss": 0.1715, "step": 57454 }, { "epoch": 0.10187423702037006, "grad_norm": 0.765625, "learning_rate": 0.0008375248426664659, "loss": 0.1875, "step": 57456 }, { "epoch": 0.10187778318567987, "grad_norm": 0.546875, "learning_rate": 0.0008374679119606403, "loss": 0.2008, "step": 57458 }, { "epoch": 0.10188132935098969, "grad_norm": 0.6171875, "learning_rate": 0.0008374109824029779, "loss": 0.1993, "step": 57460 }, { "epoch": 0.1018848755162995, "grad_norm": 0.7890625, "learning_rate": 0.000837354053993727, "loss": 0.2152, "step": 57462 }, { "epoch": 0.10188842168160932, "grad_norm": 0.32421875, "learning_rate": 0.0008372971267331369, "loss": 0.1464, "step": 57464 }, { "epoch": 0.10189196784691913, "grad_norm": 0.2431640625, "learning_rate": 0.0008372402006214566, "loss": 0.1511, "step": 57466 }, { "epoch": 0.10189551401222895, "grad_norm": 0.5390625, "learning_rate": 0.0008371832756589353, "loss": 0.1727, "step": 57468 }, { "epoch": 0.10189906017753876, "grad_norm": 0.22265625, "learning_rate": 0.0008371263518458212, "loss": 0.1306, "step": 57470 }, { "epoch": 0.10190260634284858, "grad_norm": 1.515625, "learning_rate": 0.000837069429182364, "loss": 0.2198, "step": 57472 }, { "epoch": 0.10190615250815839, "grad_norm": 0.9375, "learning_rate": 0.0008370125076688124, "loss": 0.2157, "step": 57474 }, { "epoch": 0.10190969867346822, "grad_norm": 0.423828125, "learning_rate": 0.0008369555873054154, "loss": 0.1998, "step": 57476 }, { "epoch": 0.10191324483877803, "grad_norm": 0.33203125, "learning_rate": 0.0008368986680924218, "loss": 0.16, "step": 57478 }, { "epoch": 0.10191679100408785, "grad_norm": 0.9453125, "learning_rate": 0.0008368417500300804, "loss": 0.1574, "step": 57480 }, { "epoch": 0.10192033716939766, "grad_norm": 0.43359375, "learning_rate": 0.0008367848331186405, "loss": 0.2167, "step": 57482 }, { "epoch": 0.10192388333470748, "grad_norm": 0.54296875, "learning_rate": 0.0008367279173583506, "loss": 0.5373, "step": 57484 }, { "epoch": 0.10192742950001729, "grad_norm": 0.380859375, "learning_rate": 0.0008366710027494604, "loss": 0.1694, "step": 57486 }, { "epoch": 0.1019309756653271, "grad_norm": 0.8046875, "learning_rate": 0.0008366140892922178, "loss": 0.1744, "step": 57488 }, { "epoch": 0.10193452183063692, "grad_norm": 0.6640625, "learning_rate": 0.0008365571769868722, "loss": 0.232, "step": 57490 }, { "epoch": 0.10193806799594674, "grad_norm": 0.5703125, "learning_rate": 0.0008365002658336729, "loss": 0.2128, "step": 57492 }, { "epoch": 0.10194161416125655, "grad_norm": 0.25, "learning_rate": 0.0008364433558328681, "loss": 0.1815, "step": 57494 }, { "epoch": 0.10194516032656636, "grad_norm": 0.5859375, "learning_rate": 0.0008363864469847071, "loss": 0.1427, "step": 57496 }, { "epoch": 0.10194870649187618, "grad_norm": 0.478515625, "learning_rate": 0.0008363295392894383, "loss": 0.2042, "step": 57498 }, { "epoch": 0.101952252657186, "grad_norm": 0.212890625, "learning_rate": 0.0008362726327473114, "loss": 0.1862, "step": 57500 }, { "epoch": 0.10195579882249581, "grad_norm": 0.2470703125, "learning_rate": 0.0008362157273585747, "loss": 0.2515, "step": 57502 }, { "epoch": 0.10195934498780562, "grad_norm": 1.0859375, "learning_rate": 0.0008361588231234769, "loss": 0.1921, "step": 57504 }, { "epoch": 0.10196289115311544, "grad_norm": 0.6015625, "learning_rate": 0.0008361019200422672, "loss": 0.183, "step": 57506 }, { "epoch": 0.10196643731842525, "grad_norm": 0.2001953125, "learning_rate": 0.0008360450181151947, "loss": 0.1597, "step": 57508 }, { "epoch": 0.10196998348373507, "grad_norm": 0.28125, "learning_rate": 0.0008359881173425079, "loss": 0.1603, "step": 57510 }, { "epoch": 0.10197352964904488, "grad_norm": 0.53515625, "learning_rate": 0.0008359312177244556, "loss": 0.1769, "step": 57512 }, { "epoch": 0.1019770758143547, "grad_norm": 0.671875, "learning_rate": 0.0008358743192612868, "loss": 0.3613, "step": 57514 }, { "epoch": 0.10198062197966451, "grad_norm": 0.515625, "learning_rate": 0.0008358174219532502, "loss": 0.1782, "step": 57516 }, { "epoch": 0.10198416814497432, "grad_norm": 0.251953125, "learning_rate": 0.0008357605258005952, "loss": 0.2014, "step": 57518 }, { "epoch": 0.10198771431028414, "grad_norm": 0.7890625, "learning_rate": 0.0008357036308035695, "loss": 0.1583, "step": 57520 }, { "epoch": 0.10199126047559397, "grad_norm": 0.25, "learning_rate": 0.000835646736962423, "loss": 0.1208, "step": 57522 }, { "epoch": 0.10199480664090378, "grad_norm": 0.5234375, "learning_rate": 0.0008355898442774039, "loss": 0.1608, "step": 57524 }, { "epoch": 0.1019983528062136, "grad_norm": 0.28125, "learning_rate": 0.0008355329527487617, "loss": 0.1701, "step": 57526 }, { "epoch": 0.10200189897152341, "grad_norm": 1.7109375, "learning_rate": 0.0008354760623767441, "loss": 0.2066, "step": 57528 }, { "epoch": 0.10200544513683323, "grad_norm": 0.2236328125, "learning_rate": 0.0008354191731616007, "loss": 0.1394, "step": 57530 }, { "epoch": 0.10200899130214304, "grad_norm": 0.53125, "learning_rate": 0.0008353622851035805, "loss": 0.1601, "step": 57532 }, { "epoch": 0.10201253746745285, "grad_norm": 1.53125, "learning_rate": 0.0008353053982029317, "loss": 0.3475, "step": 57534 }, { "epoch": 0.10201608363276267, "grad_norm": 0.369140625, "learning_rate": 0.0008352485124599036, "loss": 0.1755, "step": 57536 }, { "epoch": 0.10201962979807248, "grad_norm": 0.375, "learning_rate": 0.0008351916278747443, "loss": 0.1684, "step": 57538 }, { "epoch": 0.1020231759633823, "grad_norm": 1.5546875, "learning_rate": 0.0008351347444477031, "loss": 0.4991, "step": 57540 }, { "epoch": 0.10202672212869211, "grad_norm": 0.27734375, "learning_rate": 0.0008350778621790292, "loss": 0.1456, "step": 57542 }, { "epoch": 0.10203026829400193, "grad_norm": 0.388671875, "learning_rate": 0.0008350209810689702, "loss": 0.1525, "step": 57544 }, { "epoch": 0.10203381445931174, "grad_norm": 1.203125, "learning_rate": 0.0008349641011177755, "loss": 0.1601, "step": 57546 }, { "epoch": 0.10203736062462156, "grad_norm": 0.359375, "learning_rate": 0.000834907222325694, "loss": 0.2022, "step": 57548 }, { "epoch": 0.10204090678993137, "grad_norm": 0.2734375, "learning_rate": 0.0008348503446929747, "loss": 0.2302, "step": 57550 }, { "epoch": 0.10204445295524119, "grad_norm": 1.1640625, "learning_rate": 0.0008347934682198657, "loss": 0.22, "step": 57552 }, { "epoch": 0.102047999120551, "grad_norm": 0.318359375, "learning_rate": 0.0008347365929066159, "loss": 0.18, "step": 57554 }, { "epoch": 0.10205154528586081, "grad_norm": 0.67578125, "learning_rate": 0.0008346797187534744, "loss": 0.2149, "step": 57556 }, { "epoch": 0.10205509145117063, "grad_norm": 0.392578125, "learning_rate": 0.0008346228457606897, "loss": 0.2312, "step": 57558 }, { "epoch": 0.10205863761648044, "grad_norm": 0.298828125, "learning_rate": 0.0008345659739285103, "loss": 0.2156, "step": 57560 }, { "epoch": 0.10206218378179026, "grad_norm": 0.287109375, "learning_rate": 0.0008345091032571852, "loss": 0.2066, "step": 57562 }, { "epoch": 0.10206572994710007, "grad_norm": 0.55078125, "learning_rate": 0.0008344522337469629, "loss": 0.1665, "step": 57564 }, { "epoch": 0.10206927611240989, "grad_norm": 0.212890625, "learning_rate": 0.0008343953653980927, "loss": 0.3059, "step": 57566 }, { "epoch": 0.10207282227771972, "grad_norm": 0.828125, "learning_rate": 0.0008343384982108229, "loss": 0.2615, "step": 57568 }, { "epoch": 0.10207636844302953, "grad_norm": 0.25, "learning_rate": 0.0008342816321854019, "loss": 0.1372, "step": 57570 }, { "epoch": 0.10207991460833934, "grad_norm": 0.80859375, "learning_rate": 0.0008342247673220787, "loss": 0.1829, "step": 57572 }, { "epoch": 0.10208346077364916, "grad_norm": 0.61328125, "learning_rate": 0.000834167903621102, "loss": 0.197, "step": 57574 }, { "epoch": 0.10208700693895897, "grad_norm": 0.2578125, "learning_rate": 0.0008341110410827209, "loss": 0.2088, "step": 57576 }, { "epoch": 0.10209055310426879, "grad_norm": 0.6875, "learning_rate": 0.000834054179707183, "loss": 0.2083, "step": 57578 }, { "epoch": 0.1020940992695786, "grad_norm": 1.015625, "learning_rate": 0.0008339973194947379, "loss": 0.2228, "step": 57580 }, { "epoch": 0.10209764543488842, "grad_norm": 0.306640625, "learning_rate": 0.0008339404604456338, "loss": 0.2002, "step": 57582 }, { "epoch": 0.10210119160019823, "grad_norm": 0.27734375, "learning_rate": 0.0008338836025601202, "loss": 0.2037, "step": 57584 }, { "epoch": 0.10210473776550805, "grad_norm": 0.7890625, "learning_rate": 0.0008338267458384448, "loss": 0.1664, "step": 57586 }, { "epoch": 0.10210828393081786, "grad_norm": 1.4375, "learning_rate": 0.0008337698902808566, "loss": 0.2586, "step": 57588 }, { "epoch": 0.10211183009612768, "grad_norm": 0.4921875, "learning_rate": 0.0008337130358876039, "loss": 0.1605, "step": 57590 }, { "epoch": 0.10211537626143749, "grad_norm": 2.84375, "learning_rate": 0.0008336561826589364, "loss": 0.2229, "step": 57592 }, { "epoch": 0.1021189224267473, "grad_norm": 0.416015625, "learning_rate": 0.0008335993305951015, "loss": 0.2645, "step": 57594 }, { "epoch": 0.10212246859205712, "grad_norm": 0.2275390625, "learning_rate": 0.0008335424796963487, "loss": 0.2266, "step": 57596 }, { "epoch": 0.10212601475736693, "grad_norm": 0.494140625, "learning_rate": 0.0008334856299629259, "loss": 0.1774, "step": 57598 }, { "epoch": 0.10212956092267675, "grad_norm": 0.373046875, "learning_rate": 0.0008334287813950827, "loss": 0.1714, "step": 57600 }, { "epoch": 0.10213310708798656, "grad_norm": 0.474609375, "learning_rate": 0.0008333719339930668, "loss": 0.1599, "step": 57602 }, { "epoch": 0.10213665325329638, "grad_norm": 1.1328125, "learning_rate": 0.0008333150877571271, "loss": 0.2647, "step": 57604 }, { "epoch": 0.10214019941860619, "grad_norm": 0.384765625, "learning_rate": 0.0008332582426875126, "loss": 0.1423, "step": 57606 }, { "epoch": 0.102143745583916, "grad_norm": 0.3046875, "learning_rate": 0.0008332013987844716, "loss": 0.1657, "step": 57608 }, { "epoch": 0.10214729174922582, "grad_norm": 0.234375, "learning_rate": 0.0008331445560482527, "loss": 0.1547, "step": 57610 }, { "epoch": 0.10215083791453565, "grad_norm": 0.380859375, "learning_rate": 0.0008330877144791043, "loss": 0.2026, "step": 57612 }, { "epoch": 0.10215438407984546, "grad_norm": 0.7265625, "learning_rate": 0.000833030874077275, "loss": 0.1994, "step": 57614 }, { "epoch": 0.10215793024515528, "grad_norm": 1.65625, "learning_rate": 0.0008329740348430141, "loss": 0.4025, "step": 57616 }, { "epoch": 0.1021614764104651, "grad_norm": 0.40625, "learning_rate": 0.0008329171967765694, "loss": 0.1479, "step": 57618 }, { "epoch": 0.10216502257577491, "grad_norm": 0.189453125, "learning_rate": 0.0008328603598781897, "loss": 0.1349, "step": 57620 }, { "epoch": 0.10216856874108472, "grad_norm": 0.515625, "learning_rate": 0.0008328035241481236, "loss": 0.1395, "step": 57622 }, { "epoch": 0.10217211490639454, "grad_norm": 0.546875, "learning_rate": 0.00083274668958662, "loss": 0.2043, "step": 57624 }, { "epoch": 0.10217566107170435, "grad_norm": 0.251953125, "learning_rate": 0.0008326898561939269, "loss": 0.1942, "step": 57626 }, { "epoch": 0.10217920723701417, "grad_norm": 0.61328125, "learning_rate": 0.000832633023970293, "loss": 0.2068, "step": 57628 }, { "epoch": 0.10218275340232398, "grad_norm": 0.310546875, "learning_rate": 0.000832576192915967, "loss": 0.1529, "step": 57630 }, { "epoch": 0.1021862995676338, "grad_norm": 0.5859375, "learning_rate": 0.0008325193630311978, "loss": 0.2028, "step": 57632 }, { "epoch": 0.10218984573294361, "grad_norm": 0.31640625, "learning_rate": 0.000832462534316233, "loss": 0.1886, "step": 57634 }, { "epoch": 0.10219339189825342, "grad_norm": 0.31640625, "learning_rate": 0.0008324057067713215, "loss": 0.1645, "step": 57636 }, { "epoch": 0.10219693806356324, "grad_norm": 0.27734375, "learning_rate": 0.0008323488803967126, "loss": 0.2005, "step": 57638 }, { "epoch": 0.10220048422887305, "grad_norm": 1.546875, "learning_rate": 0.000832292055192654, "loss": 0.2216, "step": 57640 }, { "epoch": 0.10220403039418287, "grad_norm": 0.74609375, "learning_rate": 0.0008322352311593945, "loss": 0.2146, "step": 57642 }, { "epoch": 0.10220757655949268, "grad_norm": 0.83984375, "learning_rate": 0.0008321784082971823, "loss": 0.2062, "step": 57644 }, { "epoch": 0.1022111227248025, "grad_norm": 0.47265625, "learning_rate": 0.0008321215866062666, "loss": 0.1908, "step": 57646 }, { "epoch": 0.10221466889011231, "grad_norm": 0.26171875, "learning_rate": 0.000832064766086895, "loss": 0.2316, "step": 57648 }, { "epoch": 0.10221821505542213, "grad_norm": 0.25390625, "learning_rate": 0.0008320079467393169, "loss": 0.149, "step": 57650 }, { "epoch": 0.10222176122073194, "grad_norm": 0.73046875, "learning_rate": 0.0008319511285637802, "loss": 0.1818, "step": 57652 }, { "epoch": 0.10222530738604176, "grad_norm": 0.578125, "learning_rate": 0.0008318943115605334, "loss": 0.2301, "step": 57654 }, { "epoch": 0.10222885355135157, "grad_norm": 0.7734375, "learning_rate": 0.0008318374957298251, "loss": 0.2024, "step": 57656 }, { "epoch": 0.1022323997166614, "grad_norm": 0.3515625, "learning_rate": 0.0008317806810719044, "loss": 0.2357, "step": 57658 }, { "epoch": 0.10223594588197121, "grad_norm": 3.390625, "learning_rate": 0.0008317238675870186, "loss": 0.2759, "step": 57660 }, { "epoch": 0.10223949204728103, "grad_norm": 0.18359375, "learning_rate": 0.0008316670552754171, "loss": 0.1839, "step": 57662 }, { "epoch": 0.10224303821259084, "grad_norm": 0.287109375, "learning_rate": 0.0008316102441373479, "loss": 0.1698, "step": 57664 }, { "epoch": 0.10224658437790066, "grad_norm": 0.2734375, "learning_rate": 0.00083155343417306, "loss": 0.1839, "step": 57666 }, { "epoch": 0.10225013054321047, "grad_norm": 0.5234375, "learning_rate": 0.0008314966253828007, "loss": 0.3647, "step": 57668 }, { "epoch": 0.10225367670852029, "grad_norm": 0.3984375, "learning_rate": 0.0008314398177668194, "loss": 0.0993, "step": 57670 }, { "epoch": 0.1022572228738301, "grad_norm": 0.322265625, "learning_rate": 0.0008313830113253645, "loss": 0.1781, "step": 57672 }, { "epoch": 0.10226076903913991, "grad_norm": 0.28515625, "learning_rate": 0.0008313262060586846, "loss": 0.1708, "step": 57674 }, { "epoch": 0.10226431520444973, "grad_norm": 0.5859375, "learning_rate": 0.0008312694019670275, "loss": 0.2274, "step": 57676 }, { "epoch": 0.10226786136975954, "grad_norm": 0.56640625, "learning_rate": 0.0008312125990506416, "loss": 0.2001, "step": 57678 }, { "epoch": 0.10227140753506936, "grad_norm": 0.224609375, "learning_rate": 0.0008311557973097761, "loss": 0.1481, "step": 57680 }, { "epoch": 0.10227495370037917, "grad_norm": 0.49609375, "learning_rate": 0.0008310989967446789, "loss": 0.2073, "step": 57682 }, { "epoch": 0.10227849986568899, "grad_norm": 0.3203125, "learning_rate": 0.0008310421973555986, "loss": 0.158, "step": 57684 }, { "epoch": 0.1022820460309988, "grad_norm": 0.828125, "learning_rate": 0.0008309853991427834, "loss": 0.1915, "step": 57686 }, { "epoch": 0.10228559219630862, "grad_norm": 0.6875, "learning_rate": 0.0008309286021064815, "loss": 0.1363, "step": 57688 }, { "epoch": 0.10228913836161843, "grad_norm": 0.59765625, "learning_rate": 0.000830871806246942, "loss": 0.3355, "step": 57690 }, { "epoch": 0.10229268452692825, "grad_norm": 0.412109375, "learning_rate": 0.0008308150115644129, "loss": 0.1753, "step": 57692 }, { "epoch": 0.10229623069223806, "grad_norm": 0.291015625, "learning_rate": 0.0008307582180591422, "loss": 0.2052, "step": 57694 }, { "epoch": 0.10229977685754787, "grad_norm": 0.578125, "learning_rate": 0.0008307014257313787, "loss": 0.1684, "step": 57696 }, { "epoch": 0.10230332302285769, "grad_norm": 0.1904296875, "learning_rate": 0.0008306446345813711, "loss": 0.1155, "step": 57698 }, { "epoch": 0.1023068691881675, "grad_norm": 0.33203125, "learning_rate": 0.0008305878446093673, "loss": 0.1885, "step": 57700 }, { "epoch": 0.10231041535347732, "grad_norm": 0.28515625, "learning_rate": 0.0008305310558156158, "loss": 0.1416, "step": 57702 }, { "epoch": 0.10231396151878715, "grad_norm": 0.42578125, "learning_rate": 0.0008304742682003648, "loss": 0.2131, "step": 57704 }, { "epoch": 0.10231750768409696, "grad_norm": 0.279296875, "learning_rate": 0.000830417481763863, "loss": 0.1325, "step": 57706 }, { "epoch": 0.10232105384940678, "grad_norm": 0.80078125, "learning_rate": 0.0008303606965063584, "loss": 0.1978, "step": 57708 }, { "epoch": 0.10232460001471659, "grad_norm": 0.1826171875, "learning_rate": 0.0008303039124280993, "loss": 0.1564, "step": 57710 }, { "epoch": 0.1023281461800264, "grad_norm": 0.38671875, "learning_rate": 0.0008302471295293343, "loss": 0.1909, "step": 57712 }, { "epoch": 0.10233169234533622, "grad_norm": 0.57421875, "learning_rate": 0.0008301903478103121, "loss": 0.1811, "step": 57714 }, { "epoch": 0.10233523851064603, "grad_norm": 0.41015625, "learning_rate": 0.00083013356727128, "loss": 0.2176, "step": 57716 }, { "epoch": 0.10233878467595585, "grad_norm": 0.37109375, "learning_rate": 0.0008300767879124871, "loss": 0.1133, "step": 57718 }, { "epoch": 0.10234233084126566, "grad_norm": 0.36328125, "learning_rate": 0.0008300200097341817, "loss": 0.1784, "step": 57720 }, { "epoch": 0.10234587700657548, "grad_norm": 0.30859375, "learning_rate": 0.000829963232736612, "loss": 0.1727, "step": 57722 }, { "epoch": 0.10234942317188529, "grad_norm": 0.169921875, "learning_rate": 0.0008299064569200261, "loss": 0.1746, "step": 57724 }, { "epoch": 0.10235296933719511, "grad_norm": 1.2109375, "learning_rate": 0.0008298496822846724, "loss": 0.1889, "step": 57726 }, { "epoch": 0.10235651550250492, "grad_norm": 0.74609375, "learning_rate": 0.0008297929088307991, "loss": 0.3225, "step": 57728 }, { "epoch": 0.10236006166781474, "grad_norm": 0.330078125, "learning_rate": 0.0008297361365586553, "loss": 0.1617, "step": 57730 }, { "epoch": 0.10236360783312455, "grad_norm": 0.1962890625, "learning_rate": 0.0008296793654684882, "loss": 0.1351, "step": 57732 }, { "epoch": 0.10236715399843437, "grad_norm": 0.5234375, "learning_rate": 0.0008296225955605465, "loss": 0.1989, "step": 57734 }, { "epoch": 0.10237070016374418, "grad_norm": 0.59375, "learning_rate": 0.0008295658268350788, "loss": 0.1598, "step": 57736 }, { "epoch": 0.102374246329054, "grad_norm": 0.3359375, "learning_rate": 0.0008295090592923326, "loss": 0.1772, "step": 57738 }, { "epoch": 0.10237779249436381, "grad_norm": 1.5, "learning_rate": 0.0008294522929325574, "loss": 0.2372, "step": 57740 }, { "epoch": 0.10238133865967362, "grad_norm": 4.53125, "learning_rate": 0.0008293955277560002, "loss": 0.2551, "step": 57742 }, { "epoch": 0.10238488482498344, "grad_norm": 1.1171875, "learning_rate": 0.0008293387637629097, "loss": 0.2375, "step": 57744 }, { "epoch": 0.10238843099029325, "grad_norm": 0.34375, "learning_rate": 0.0008292820009535344, "loss": 0.1825, "step": 57746 }, { "epoch": 0.10239197715560308, "grad_norm": 0.57421875, "learning_rate": 0.0008292252393281226, "loss": 0.1509, "step": 57748 }, { "epoch": 0.1023955233209129, "grad_norm": 0.359375, "learning_rate": 0.0008291684788869218, "loss": 0.4121, "step": 57750 }, { "epoch": 0.10239906948622271, "grad_norm": 0.96875, "learning_rate": 0.000829111719630181, "loss": 0.2563, "step": 57752 }, { "epoch": 0.10240261565153252, "grad_norm": 0.4453125, "learning_rate": 0.0008290549615581484, "loss": 0.1633, "step": 57754 }, { "epoch": 0.10240616181684234, "grad_norm": 0.349609375, "learning_rate": 0.000828998204671072, "loss": 0.2017, "step": 57756 }, { "epoch": 0.10240970798215215, "grad_norm": 0.365234375, "learning_rate": 0.0008289414489692, "loss": 0.1514, "step": 57758 }, { "epoch": 0.10241325414746197, "grad_norm": 0.3359375, "learning_rate": 0.0008288846944527803, "loss": 0.167, "step": 57760 }, { "epoch": 0.10241680031277178, "grad_norm": 0.52734375, "learning_rate": 0.0008288279411220617, "loss": 0.1803, "step": 57762 }, { "epoch": 0.1024203464780816, "grad_norm": 0.578125, "learning_rate": 0.0008287711889772922, "loss": 0.1837, "step": 57764 }, { "epoch": 0.10242389264339141, "grad_norm": 1.546875, "learning_rate": 0.00082871443801872, "loss": 0.253, "step": 57766 }, { "epoch": 0.10242743880870123, "grad_norm": 0.640625, "learning_rate": 0.0008286576882465931, "loss": 0.1606, "step": 57768 }, { "epoch": 0.10243098497401104, "grad_norm": 0.474609375, "learning_rate": 0.0008286009396611598, "loss": 0.2396, "step": 57770 }, { "epoch": 0.10243453113932086, "grad_norm": 0.494140625, "learning_rate": 0.0008285441922626686, "loss": 0.1745, "step": 57772 }, { "epoch": 0.10243807730463067, "grad_norm": 0.2177734375, "learning_rate": 0.0008284874460513674, "loss": 0.1611, "step": 57774 }, { "epoch": 0.10244162346994048, "grad_norm": 0.51953125, "learning_rate": 0.0008284307010275043, "loss": 0.1609, "step": 57776 }, { "epoch": 0.1024451696352503, "grad_norm": 0.408203125, "learning_rate": 0.0008283739571913276, "loss": 0.1958, "step": 57778 }, { "epoch": 0.10244871580056011, "grad_norm": 0.23828125, "learning_rate": 0.0008283172145430857, "loss": 0.1685, "step": 57780 }, { "epoch": 0.10245226196586993, "grad_norm": 3.875, "learning_rate": 0.0008282604730830261, "loss": 0.218, "step": 57782 }, { "epoch": 0.10245580813117974, "grad_norm": 0.328125, "learning_rate": 0.0008282037328113974, "loss": 0.2151, "step": 57784 }, { "epoch": 0.10245935429648956, "grad_norm": 0.5234375, "learning_rate": 0.0008281469937284476, "loss": 0.1711, "step": 57786 }, { "epoch": 0.10246290046179937, "grad_norm": 0.8828125, "learning_rate": 0.0008280902558344254, "loss": 0.2326, "step": 57788 }, { "epoch": 0.10246644662710919, "grad_norm": 0.3125, "learning_rate": 0.000828033519129578, "loss": 0.1565, "step": 57790 }, { "epoch": 0.102469992792419, "grad_norm": 0.48828125, "learning_rate": 0.000827976783614154, "loss": 0.2212, "step": 57792 }, { "epoch": 0.10247353895772883, "grad_norm": 0.404296875, "learning_rate": 0.0008279200492884018, "loss": 0.1587, "step": 57794 }, { "epoch": 0.10247708512303864, "grad_norm": 0.1708984375, "learning_rate": 0.0008278633161525693, "loss": 0.1488, "step": 57796 }, { "epoch": 0.10248063128834846, "grad_norm": 0.357421875, "learning_rate": 0.0008278065842069045, "loss": 0.1584, "step": 57798 }, { "epoch": 0.10248417745365827, "grad_norm": 0.69921875, "learning_rate": 0.0008277498534516554, "loss": 0.1926, "step": 57800 }, { "epoch": 0.10248772361896809, "grad_norm": 2.3125, "learning_rate": 0.0008276931238870704, "loss": 0.2955, "step": 57802 }, { "epoch": 0.1024912697842779, "grad_norm": 0.158203125, "learning_rate": 0.0008276363955133978, "loss": 0.1881, "step": 57804 }, { "epoch": 0.10249481594958772, "grad_norm": 0.45703125, "learning_rate": 0.0008275796683308852, "loss": 0.1553, "step": 57806 }, { "epoch": 0.10249836211489753, "grad_norm": 1.2734375, "learning_rate": 0.0008275229423397806, "loss": 0.2053, "step": 57808 }, { "epoch": 0.10250190828020735, "grad_norm": 0.3359375, "learning_rate": 0.0008274662175403324, "loss": 0.2057, "step": 57810 }, { "epoch": 0.10250545444551716, "grad_norm": 0.2451171875, "learning_rate": 0.0008274094939327892, "loss": 0.2471, "step": 57812 }, { "epoch": 0.10250900061082698, "grad_norm": 0.75, "learning_rate": 0.0008273527715173983, "loss": 0.1508, "step": 57814 }, { "epoch": 0.10251254677613679, "grad_norm": 0.8046875, "learning_rate": 0.000827296050294408, "loss": 0.1717, "step": 57816 }, { "epoch": 0.1025160929414466, "grad_norm": 2.375, "learning_rate": 0.000827239330264066, "loss": 0.2601, "step": 57818 }, { "epoch": 0.10251963910675642, "grad_norm": 0.81640625, "learning_rate": 0.000827182611426621, "loss": 0.2738, "step": 57820 }, { "epoch": 0.10252318527206623, "grad_norm": 0.28125, "learning_rate": 0.0008271258937823212, "loss": 0.1753, "step": 57822 }, { "epoch": 0.10252673143737605, "grad_norm": 0.2578125, "learning_rate": 0.0008270691773314136, "loss": 0.1614, "step": 57824 }, { "epoch": 0.10253027760268586, "grad_norm": 0.298828125, "learning_rate": 0.0008270124620741471, "loss": 0.181, "step": 57826 }, { "epoch": 0.10253382376799568, "grad_norm": 0.349609375, "learning_rate": 0.0008269557480107696, "loss": 0.1852, "step": 57828 }, { "epoch": 0.10253736993330549, "grad_norm": 1.1171875, "learning_rate": 0.0008268990351415289, "loss": 0.1792, "step": 57830 }, { "epoch": 0.1025409160986153, "grad_norm": 0.39453125, "learning_rate": 0.0008268423234666735, "loss": 0.2161, "step": 57832 }, { "epoch": 0.10254446226392512, "grad_norm": 0.5546875, "learning_rate": 0.0008267856129864508, "loss": 0.1648, "step": 57834 }, { "epoch": 0.10254800842923494, "grad_norm": 0.404296875, "learning_rate": 0.0008267289037011092, "loss": 0.2317, "step": 57836 }, { "epoch": 0.10255155459454475, "grad_norm": 0.421875, "learning_rate": 0.0008266721956108967, "loss": 0.2072, "step": 57838 }, { "epoch": 0.10255510075985458, "grad_norm": 0.625, "learning_rate": 0.0008266154887160613, "loss": 0.1645, "step": 57840 }, { "epoch": 0.10255864692516439, "grad_norm": 0.5546875, "learning_rate": 0.0008265587830168506, "loss": 0.1532, "step": 57842 }, { "epoch": 0.10256219309047421, "grad_norm": 0.7109375, "learning_rate": 0.0008265020785135131, "loss": 0.1868, "step": 57844 }, { "epoch": 0.10256573925578402, "grad_norm": 1.2890625, "learning_rate": 0.000826445375206297, "loss": 0.3256, "step": 57846 }, { "epoch": 0.10256928542109384, "grad_norm": 0.28125, "learning_rate": 0.0008263886730954496, "loss": 0.1861, "step": 57848 }, { "epoch": 0.10257283158640365, "grad_norm": 0.47265625, "learning_rate": 0.0008263319721812195, "loss": 0.1954, "step": 57850 }, { "epoch": 0.10257637775171347, "grad_norm": 0.267578125, "learning_rate": 0.000826275272463854, "loss": 0.1479, "step": 57852 }, { "epoch": 0.10257992391702328, "grad_norm": 0.8203125, "learning_rate": 0.0008262185739436019, "loss": 0.1654, "step": 57854 }, { "epoch": 0.1025834700823331, "grad_norm": 0.1875, "learning_rate": 0.0008261618766207102, "loss": 0.2136, "step": 57856 }, { "epoch": 0.10258701624764291, "grad_norm": 0.5234375, "learning_rate": 0.0008261051804954276, "loss": 0.3704, "step": 57858 }, { "epoch": 0.10259056241295272, "grad_norm": 1.6484375, "learning_rate": 0.000826048485568002, "loss": 0.1942, "step": 57860 }, { "epoch": 0.10259410857826254, "grad_norm": 0.380859375, "learning_rate": 0.0008259917918386812, "loss": 0.1986, "step": 57862 }, { "epoch": 0.10259765474357235, "grad_norm": 0.267578125, "learning_rate": 0.0008259350993077128, "loss": 0.1871, "step": 57864 }, { "epoch": 0.10260120090888217, "grad_norm": 0.427734375, "learning_rate": 0.0008258784079753452, "loss": 0.2035, "step": 57866 }, { "epoch": 0.10260474707419198, "grad_norm": 3.046875, "learning_rate": 0.0008258217178418266, "loss": 0.2593, "step": 57868 }, { "epoch": 0.1026082932395018, "grad_norm": 1.59375, "learning_rate": 0.0008257650289074044, "loss": 0.2187, "step": 57870 }, { "epoch": 0.10261183940481161, "grad_norm": 0.8515625, "learning_rate": 0.0008257083411723265, "loss": 0.2193, "step": 57872 }, { "epoch": 0.10261538557012143, "grad_norm": 0.2578125, "learning_rate": 0.0008256516546368409, "loss": 0.1336, "step": 57874 }, { "epoch": 0.10261893173543124, "grad_norm": 0.51953125, "learning_rate": 0.0008255949693011958, "loss": 0.2049, "step": 57876 }, { "epoch": 0.10262247790074105, "grad_norm": 0.9375, "learning_rate": 0.000825538285165639, "loss": 0.406, "step": 57878 }, { "epoch": 0.10262602406605087, "grad_norm": 0.3828125, "learning_rate": 0.0008254816022304179, "loss": 0.2126, "step": 57880 }, { "epoch": 0.10262957023136068, "grad_norm": 0.353515625, "learning_rate": 0.0008254249204957811, "loss": 0.1997, "step": 57882 }, { "epoch": 0.1026331163966705, "grad_norm": 0.341796875, "learning_rate": 0.000825368239961976, "loss": 0.1427, "step": 57884 }, { "epoch": 0.10263666256198033, "grad_norm": 0.2890625, "learning_rate": 0.0008253115606292512, "loss": 0.2263, "step": 57886 }, { "epoch": 0.10264020872729014, "grad_norm": 1.171875, "learning_rate": 0.0008252548824978536, "loss": 0.2538, "step": 57888 }, { "epoch": 0.10264375489259996, "grad_norm": 0.384765625, "learning_rate": 0.0008251982055680319, "loss": 0.1973, "step": 57890 }, { "epoch": 0.10264730105790977, "grad_norm": 0.3515625, "learning_rate": 0.0008251415298400331, "loss": 0.1744, "step": 57892 }, { "epoch": 0.10265084722321959, "grad_norm": 1.046875, "learning_rate": 0.0008250848553141063, "loss": 0.1712, "step": 57894 }, { "epoch": 0.1026543933885294, "grad_norm": 0.35546875, "learning_rate": 0.0008250281819904981, "loss": 0.1808, "step": 57896 }, { "epoch": 0.10265793955383921, "grad_norm": 0.5234375, "learning_rate": 0.0008249715098694571, "loss": 0.2068, "step": 57898 }, { "epoch": 0.10266148571914903, "grad_norm": 0.451171875, "learning_rate": 0.0008249148389512307, "loss": 0.2823, "step": 57900 }, { "epoch": 0.10266503188445884, "grad_norm": 0.3125, "learning_rate": 0.000824858169236067, "loss": 0.2018, "step": 57902 }, { "epoch": 0.10266857804976866, "grad_norm": 0.353515625, "learning_rate": 0.0008248015007242145, "loss": 0.2281, "step": 57904 }, { "epoch": 0.10267212421507847, "grad_norm": 1.5234375, "learning_rate": 0.0008247448334159198, "loss": 0.2287, "step": 57906 }, { "epoch": 0.10267567038038829, "grad_norm": 0.31640625, "learning_rate": 0.0008246881673114315, "loss": 0.2051, "step": 57908 }, { "epoch": 0.1026792165456981, "grad_norm": 0.3984375, "learning_rate": 0.0008246315024109972, "loss": 0.1977, "step": 57910 }, { "epoch": 0.10268276271100792, "grad_norm": 0.26953125, "learning_rate": 0.000824574838714865, "loss": 0.1747, "step": 57912 }, { "epoch": 0.10268630887631773, "grad_norm": 0.326171875, "learning_rate": 0.0008245181762232823, "loss": 0.16, "step": 57914 }, { "epoch": 0.10268985504162755, "grad_norm": 0.369140625, "learning_rate": 0.0008244615149364967, "loss": 0.1365, "step": 57916 }, { "epoch": 0.10269340120693736, "grad_norm": 0.30859375, "learning_rate": 0.0008244048548547567, "loss": 0.1758, "step": 57918 }, { "epoch": 0.10269694737224717, "grad_norm": 0.52734375, "learning_rate": 0.0008243481959783101, "loss": 0.184, "step": 57920 }, { "epoch": 0.10270049353755699, "grad_norm": 0.8515625, "learning_rate": 0.0008242915383074039, "loss": 0.159, "step": 57922 }, { "epoch": 0.1027040397028668, "grad_norm": 0.296875, "learning_rate": 0.0008242348818422867, "loss": 0.1601, "step": 57924 }, { "epoch": 0.10270758586817662, "grad_norm": 0.52734375, "learning_rate": 0.0008241782265832056, "loss": 0.1614, "step": 57926 }, { "epoch": 0.10271113203348643, "grad_norm": 0.181640625, "learning_rate": 0.0008241215725304092, "loss": 0.1504, "step": 57928 }, { "epoch": 0.10271467819879626, "grad_norm": 1.0625, "learning_rate": 0.0008240649196841443, "loss": 0.1925, "step": 57930 }, { "epoch": 0.10271822436410608, "grad_norm": 0.375, "learning_rate": 0.0008240082680446593, "loss": 0.2584, "step": 57932 }, { "epoch": 0.10272177052941589, "grad_norm": 1.5, "learning_rate": 0.0008239516176122015, "loss": 0.2085, "step": 57934 }, { "epoch": 0.1027253166947257, "grad_norm": 0.466796875, "learning_rate": 0.0008238949683870198, "loss": 0.1968, "step": 57936 }, { "epoch": 0.10272886286003552, "grad_norm": 0.33984375, "learning_rate": 0.0008238383203693605, "loss": 0.1369, "step": 57938 }, { "epoch": 0.10273240902534533, "grad_norm": 0.376953125, "learning_rate": 0.0008237816735594721, "loss": 0.1589, "step": 57940 }, { "epoch": 0.10273595519065515, "grad_norm": 0.83203125, "learning_rate": 0.0008237250279576022, "loss": 0.184, "step": 57942 }, { "epoch": 0.10273950135596496, "grad_norm": 0.40625, "learning_rate": 0.0008236683835639987, "loss": 0.19, "step": 57944 }, { "epoch": 0.10274304752127478, "grad_norm": 0.5625, "learning_rate": 0.0008236117403789091, "loss": 0.2126, "step": 57946 }, { "epoch": 0.10274659368658459, "grad_norm": 0.21875, "learning_rate": 0.000823555098402581, "loss": 0.1512, "step": 57948 }, { "epoch": 0.1027501398518944, "grad_norm": 0.400390625, "learning_rate": 0.0008234984576352625, "loss": 0.2271, "step": 57950 }, { "epoch": 0.10275368601720422, "grad_norm": 0.51171875, "learning_rate": 0.0008234418180772014, "loss": 0.1482, "step": 57952 }, { "epoch": 0.10275723218251404, "grad_norm": 0.34765625, "learning_rate": 0.0008233851797286447, "loss": 0.1837, "step": 57954 }, { "epoch": 0.10276077834782385, "grad_norm": 0.271484375, "learning_rate": 0.0008233285425898407, "loss": 0.1982, "step": 57956 }, { "epoch": 0.10276432451313366, "grad_norm": 0.59375, "learning_rate": 0.0008232719066610368, "loss": 0.2214, "step": 57958 }, { "epoch": 0.10276787067844348, "grad_norm": 1.8828125, "learning_rate": 0.0008232152719424812, "loss": 0.2917, "step": 57960 }, { "epoch": 0.1027714168437533, "grad_norm": 0.69140625, "learning_rate": 0.0008231586384344209, "loss": 0.1632, "step": 57962 }, { "epoch": 0.10277496300906311, "grad_norm": 0.263671875, "learning_rate": 0.0008231020061371042, "loss": 0.2212, "step": 57964 }, { "epoch": 0.10277850917437292, "grad_norm": 0.240234375, "learning_rate": 0.000823045375050778, "loss": 0.131, "step": 57966 }, { "epoch": 0.10278205533968274, "grad_norm": 0.19140625, "learning_rate": 0.0008229887451756912, "loss": 0.1676, "step": 57968 }, { "epoch": 0.10278560150499255, "grad_norm": 0.515625, "learning_rate": 0.0008229321165120902, "loss": 0.1925, "step": 57970 }, { "epoch": 0.10278914767030237, "grad_norm": 0.296875, "learning_rate": 0.000822875489060223, "loss": 0.1612, "step": 57972 }, { "epoch": 0.10279269383561218, "grad_norm": 1.1015625, "learning_rate": 0.0008228188628203376, "loss": 0.1209, "step": 57974 }, { "epoch": 0.10279624000092201, "grad_norm": 0.17578125, "learning_rate": 0.000822762237792682, "loss": 0.1728, "step": 57976 }, { "epoch": 0.10279978616623182, "grad_norm": 0.234375, "learning_rate": 0.0008227056139775029, "loss": 0.182, "step": 57978 }, { "epoch": 0.10280333233154164, "grad_norm": 0.2138671875, "learning_rate": 0.0008226489913750483, "loss": 0.1421, "step": 57980 }, { "epoch": 0.10280687849685145, "grad_norm": 2.703125, "learning_rate": 0.000822592369985566, "loss": 0.3989, "step": 57982 }, { "epoch": 0.10281042466216127, "grad_norm": 0.478515625, "learning_rate": 0.0008225357498093036, "loss": 0.2255, "step": 57984 }, { "epoch": 0.10281397082747108, "grad_norm": 0.177734375, "learning_rate": 0.0008224791308465087, "loss": 0.1504, "step": 57986 }, { "epoch": 0.1028175169927809, "grad_norm": 0.38671875, "learning_rate": 0.0008224225130974288, "loss": 0.1839, "step": 57988 }, { "epoch": 0.10282106315809071, "grad_norm": 0.3515625, "learning_rate": 0.0008223658965623113, "loss": 0.1514, "step": 57990 }, { "epoch": 0.10282460932340053, "grad_norm": 0.396484375, "learning_rate": 0.0008223092812414043, "loss": 0.1964, "step": 57992 }, { "epoch": 0.10282815548871034, "grad_norm": 0.419921875, "learning_rate": 0.0008222526671349554, "loss": 0.1741, "step": 57994 }, { "epoch": 0.10283170165402016, "grad_norm": 0.3984375, "learning_rate": 0.0008221960542432119, "loss": 0.2492, "step": 57996 }, { "epoch": 0.10283524781932997, "grad_norm": 0.486328125, "learning_rate": 0.0008221394425664215, "loss": 0.1815, "step": 57998 }, { "epoch": 0.10283879398463978, "grad_norm": 1.09375, "learning_rate": 0.0008220828321048317, "loss": 0.1746, "step": 58000 }, { "epoch": 0.1028423401499496, "grad_norm": 0.79296875, "learning_rate": 0.0008220262228586904, "loss": 0.2257, "step": 58002 }, { "epoch": 0.10284588631525941, "grad_norm": 0.310546875, "learning_rate": 0.0008219696148282447, "loss": 0.1314, "step": 58004 }, { "epoch": 0.10284943248056923, "grad_norm": 0.38671875, "learning_rate": 0.0008219130080137423, "loss": 0.1684, "step": 58006 }, { "epoch": 0.10285297864587904, "grad_norm": 0.28125, "learning_rate": 0.0008218564024154309, "loss": 0.1863, "step": 58008 }, { "epoch": 0.10285652481118886, "grad_norm": 0.283203125, "learning_rate": 0.0008217997980335585, "loss": 0.1586, "step": 58010 }, { "epoch": 0.10286007097649867, "grad_norm": 0.267578125, "learning_rate": 0.0008217431948683718, "loss": 0.1188, "step": 58012 }, { "epoch": 0.10286361714180849, "grad_norm": 0.24609375, "learning_rate": 0.0008216865929201185, "loss": 0.1708, "step": 58014 }, { "epoch": 0.1028671633071183, "grad_norm": 0.60546875, "learning_rate": 0.0008216299921890469, "loss": 0.1745, "step": 58016 }, { "epoch": 0.10287070947242812, "grad_norm": 0.310546875, "learning_rate": 0.0008215733926754036, "loss": 0.2527, "step": 58018 }, { "epoch": 0.10287425563773793, "grad_norm": 0.318359375, "learning_rate": 0.000821516794379437, "loss": 0.1647, "step": 58020 }, { "epoch": 0.10287780180304776, "grad_norm": 0.53125, "learning_rate": 0.0008214601973013937, "loss": 0.1861, "step": 58022 }, { "epoch": 0.10288134796835757, "grad_norm": 1.7265625, "learning_rate": 0.0008214036014415219, "loss": 0.1345, "step": 58024 }, { "epoch": 0.10288489413366739, "grad_norm": 0.259765625, "learning_rate": 0.000821347006800069, "loss": 0.1725, "step": 58026 }, { "epoch": 0.1028884402989772, "grad_norm": 0.47265625, "learning_rate": 0.0008212904133772823, "loss": 0.204, "step": 58028 }, { "epoch": 0.10289198646428702, "grad_norm": 0.5, "learning_rate": 0.0008212338211734094, "loss": 0.2194, "step": 58030 }, { "epoch": 0.10289553262959683, "grad_norm": 0.27734375, "learning_rate": 0.0008211772301886979, "loss": 0.1792, "step": 58032 }, { "epoch": 0.10289907879490665, "grad_norm": 0.388671875, "learning_rate": 0.0008211206404233955, "loss": 0.1405, "step": 58034 }, { "epoch": 0.10290262496021646, "grad_norm": 0.404296875, "learning_rate": 0.000821064051877749, "loss": 0.1677, "step": 58036 }, { "epoch": 0.10290617112552627, "grad_norm": 0.38671875, "learning_rate": 0.0008210074645520067, "loss": 0.1775, "step": 58038 }, { "epoch": 0.10290971729083609, "grad_norm": 0.71484375, "learning_rate": 0.0008209508784464152, "loss": 0.2251, "step": 58040 }, { "epoch": 0.1029132634561459, "grad_norm": 0.2578125, "learning_rate": 0.0008208942935612231, "loss": 0.2063, "step": 58042 }, { "epoch": 0.10291680962145572, "grad_norm": 0.5703125, "learning_rate": 0.0008208377098966769, "loss": 0.2858, "step": 58044 }, { "epoch": 0.10292035578676553, "grad_norm": 1.7421875, "learning_rate": 0.0008207811274530242, "loss": 0.1538, "step": 58046 }, { "epoch": 0.10292390195207535, "grad_norm": 0.55078125, "learning_rate": 0.0008207245462305126, "loss": 0.1452, "step": 58048 }, { "epoch": 0.10292744811738516, "grad_norm": 0.6015625, "learning_rate": 0.0008206679662293903, "loss": 0.1892, "step": 58050 }, { "epoch": 0.10293099428269498, "grad_norm": 0.337890625, "learning_rate": 0.0008206113874499035, "loss": 0.1627, "step": 58052 }, { "epoch": 0.10293454044800479, "grad_norm": 0.267578125, "learning_rate": 0.0008205548098923002, "loss": 0.2071, "step": 58054 }, { "epoch": 0.1029380866133146, "grad_norm": 0.75390625, "learning_rate": 0.0008204982335568281, "loss": 0.173, "step": 58056 }, { "epoch": 0.10294163277862442, "grad_norm": 0.44140625, "learning_rate": 0.0008204416584437342, "loss": 0.1764, "step": 58058 }, { "epoch": 0.10294517894393423, "grad_norm": 0.26171875, "learning_rate": 0.0008203850845532664, "loss": 0.1754, "step": 58060 }, { "epoch": 0.10294872510924405, "grad_norm": 1.046875, "learning_rate": 0.0008203285118856714, "loss": 0.2039, "step": 58062 }, { "epoch": 0.10295227127455386, "grad_norm": 0.7890625, "learning_rate": 0.0008202719404411972, "loss": 0.1669, "step": 58064 }, { "epoch": 0.10295581743986369, "grad_norm": 0.61328125, "learning_rate": 0.0008202153702200907, "loss": 0.1926, "step": 58066 }, { "epoch": 0.1029593636051735, "grad_norm": 0.353515625, "learning_rate": 0.0008201588012226004, "loss": 0.1649, "step": 58068 }, { "epoch": 0.10296290977048332, "grad_norm": 0.345703125, "learning_rate": 0.0008201022334489722, "loss": 0.1688, "step": 58070 }, { "epoch": 0.10296645593579314, "grad_norm": 1.5546875, "learning_rate": 0.0008200456668994548, "loss": 0.3024, "step": 58072 }, { "epoch": 0.10297000210110295, "grad_norm": 0.447265625, "learning_rate": 0.0008199891015742949, "loss": 0.1715, "step": 58074 }, { "epoch": 0.10297354826641276, "grad_norm": 0.408203125, "learning_rate": 0.0008199325374737399, "loss": 0.1625, "step": 58076 }, { "epoch": 0.10297709443172258, "grad_norm": 0.55859375, "learning_rate": 0.0008198759745980373, "loss": 0.1892, "step": 58078 }, { "epoch": 0.1029806405970324, "grad_norm": 0.2275390625, "learning_rate": 0.0008198194129474346, "loss": 0.166, "step": 58080 }, { "epoch": 0.10298418676234221, "grad_norm": 0.87109375, "learning_rate": 0.0008197628525221787, "loss": 0.2182, "step": 58082 }, { "epoch": 0.10298773292765202, "grad_norm": 0.61328125, "learning_rate": 0.0008197062933225179, "loss": 0.1974, "step": 58084 }, { "epoch": 0.10299127909296184, "grad_norm": 0.486328125, "learning_rate": 0.0008196497353486985, "loss": 0.3346, "step": 58086 }, { "epoch": 0.10299482525827165, "grad_norm": 0.365234375, "learning_rate": 0.0008195931786009685, "loss": 0.1736, "step": 58088 }, { "epoch": 0.10299837142358147, "grad_norm": 0.400390625, "learning_rate": 0.0008195366230795749, "loss": 0.1518, "step": 58090 }, { "epoch": 0.10300191758889128, "grad_norm": 0.54296875, "learning_rate": 0.0008194800687847652, "loss": 0.1875, "step": 58092 }, { "epoch": 0.1030054637542011, "grad_norm": 0.453125, "learning_rate": 0.0008194235157167871, "loss": 0.1706, "step": 58094 }, { "epoch": 0.10300900991951091, "grad_norm": 0.5390625, "learning_rate": 0.0008193669638758872, "loss": 0.1735, "step": 58096 }, { "epoch": 0.10301255608482073, "grad_norm": 0.796875, "learning_rate": 0.000819310413262313, "loss": 0.1623, "step": 58098 }, { "epoch": 0.10301610225013054, "grad_norm": 0.4140625, "learning_rate": 0.0008192538638763128, "loss": 0.1603, "step": 58100 }, { "epoch": 0.10301964841544035, "grad_norm": 1.0078125, "learning_rate": 0.0008191973157181325, "loss": 0.2303, "step": 58102 }, { "epoch": 0.10302319458075017, "grad_norm": 0.5390625, "learning_rate": 0.0008191407687880199, "loss": 0.1911, "step": 58104 }, { "epoch": 0.10302674074605998, "grad_norm": 0.265625, "learning_rate": 0.0008190842230862225, "loss": 0.1895, "step": 58106 }, { "epoch": 0.1030302869113698, "grad_norm": 0.83984375, "learning_rate": 0.000819027678612988, "loss": 0.1532, "step": 58108 }, { "epoch": 0.10303383307667961, "grad_norm": 0.490234375, "learning_rate": 0.000818971135368563, "loss": 0.1762, "step": 58110 }, { "epoch": 0.10303737924198944, "grad_norm": 0.2216796875, "learning_rate": 0.0008189145933531949, "loss": 0.16, "step": 58112 }, { "epoch": 0.10304092540729926, "grad_norm": 0.70703125, "learning_rate": 0.000818858052567131, "loss": 0.1745, "step": 58114 }, { "epoch": 0.10304447157260907, "grad_norm": 0.494140625, "learning_rate": 0.0008188015130106192, "loss": 0.229, "step": 58116 }, { "epoch": 0.10304801773791888, "grad_norm": 0.365234375, "learning_rate": 0.0008187449746839056, "loss": 0.1911, "step": 58118 }, { "epoch": 0.1030515639032287, "grad_norm": 0.359375, "learning_rate": 0.0008186884375872383, "loss": 0.1722, "step": 58120 }, { "epoch": 0.10305511006853851, "grad_norm": 0.87890625, "learning_rate": 0.0008186319017208643, "loss": 0.1586, "step": 58122 }, { "epoch": 0.10305865623384833, "grad_norm": 9.6875, "learning_rate": 0.0008185753670850313, "loss": 0.1418, "step": 58124 }, { "epoch": 0.10306220239915814, "grad_norm": 0.73828125, "learning_rate": 0.0008185188336799858, "loss": 0.3134, "step": 58126 }, { "epoch": 0.10306574856446796, "grad_norm": 0.298828125, "learning_rate": 0.0008184623015059755, "loss": 0.1927, "step": 58128 }, { "epoch": 0.10306929472977777, "grad_norm": 0.1787109375, "learning_rate": 0.0008184057705632475, "loss": 0.1732, "step": 58130 }, { "epoch": 0.10307284089508759, "grad_norm": 0.59375, "learning_rate": 0.0008183492408520495, "loss": 0.1546, "step": 58132 }, { "epoch": 0.1030763870603974, "grad_norm": 0.220703125, "learning_rate": 0.000818292712372628, "loss": 0.1898, "step": 58134 }, { "epoch": 0.10307993322570722, "grad_norm": 0.255859375, "learning_rate": 0.0008182361851252304, "loss": 0.1913, "step": 58136 }, { "epoch": 0.10308347939101703, "grad_norm": 0.416015625, "learning_rate": 0.0008181796591101043, "loss": 0.1745, "step": 58138 }, { "epoch": 0.10308702555632684, "grad_norm": 0.373046875, "learning_rate": 0.0008181231343274966, "loss": 0.1688, "step": 58140 }, { "epoch": 0.10309057172163666, "grad_norm": 0.57421875, "learning_rate": 0.0008180666107776547, "loss": 0.2377, "step": 58142 }, { "epoch": 0.10309411788694647, "grad_norm": 0.59375, "learning_rate": 0.0008180100884608252, "loss": 0.1523, "step": 58144 }, { "epoch": 0.10309766405225629, "grad_norm": 2.875, "learning_rate": 0.000817953567377256, "loss": 0.6198, "step": 58146 }, { "epoch": 0.1031012102175661, "grad_norm": 0.21484375, "learning_rate": 0.0008178970475271945, "loss": 0.1804, "step": 58148 }, { "epoch": 0.10310475638287592, "grad_norm": 0.546875, "learning_rate": 0.000817840528910887, "loss": 0.161, "step": 58150 }, { "epoch": 0.10310830254818573, "grad_norm": 0.345703125, "learning_rate": 0.0008177840115285815, "loss": 0.2046, "step": 58152 }, { "epoch": 0.10311184871349555, "grad_norm": 0.765625, "learning_rate": 0.0008177274953805242, "loss": 0.2217, "step": 58154 }, { "epoch": 0.10311539487880536, "grad_norm": 0.41796875, "learning_rate": 0.0008176709804669631, "loss": 0.1745, "step": 58156 }, { "epoch": 0.10311894104411519, "grad_norm": 0.26953125, "learning_rate": 0.0008176144667881458, "loss": 0.1908, "step": 58158 }, { "epoch": 0.103122487209425, "grad_norm": 0.412109375, "learning_rate": 0.0008175579543443179, "loss": 0.2197, "step": 58160 }, { "epoch": 0.10312603337473482, "grad_norm": 0.39453125, "learning_rate": 0.0008175014431357278, "loss": 0.159, "step": 58162 }, { "epoch": 0.10312957954004463, "grad_norm": 0.5859375, "learning_rate": 0.0008174449331626221, "loss": 0.1315, "step": 58164 }, { "epoch": 0.10313312570535445, "grad_norm": 0.37109375, "learning_rate": 0.0008173884244252487, "loss": 0.1798, "step": 58166 }, { "epoch": 0.10313667187066426, "grad_norm": 1.03125, "learning_rate": 0.0008173319169238538, "loss": 0.3124, "step": 58168 }, { "epoch": 0.10314021803597408, "grad_norm": 0.5078125, "learning_rate": 0.0008172754106586848, "loss": 0.4006, "step": 58170 }, { "epoch": 0.10314376420128389, "grad_norm": 0.7109375, "learning_rate": 0.0008172189056299891, "loss": 0.1649, "step": 58172 }, { "epoch": 0.1031473103665937, "grad_norm": 0.4140625, "learning_rate": 0.0008171624018380137, "loss": 0.1432, "step": 58174 }, { "epoch": 0.10315085653190352, "grad_norm": 0.310546875, "learning_rate": 0.0008171058992830054, "loss": 0.1535, "step": 58176 }, { "epoch": 0.10315440269721333, "grad_norm": 0.671875, "learning_rate": 0.0008170493979652116, "loss": 0.1268, "step": 58178 }, { "epoch": 0.10315794886252315, "grad_norm": 0.388671875, "learning_rate": 0.0008169928978848793, "loss": 0.1762, "step": 58180 }, { "epoch": 0.10316149502783296, "grad_norm": 3.1875, "learning_rate": 0.0008169363990422563, "loss": 0.2337, "step": 58182 }, { "epoch": 0.10316504119314278, "grad_norm": 1.4609375, "learning_rate": 0.0008168799014375885, "loss": 0.2833, "step": 58184 }, { "epoch": 0.1031685873584526, "grad_norm": 1.828125, "learning_rate": 0.0008168234050711238, "loss": 0.4239, "step": 58186 }, { "epoch": 0.10317213352376241, "grad_norm": 0.373046875, "learning_rate": 0.0008167669099431087, "loss": 0.1893, "step": 58188 }, { "epoch": 0.10317567968907222, "grad_norm": 0.4140625, "learning_rate": 0.0008167104160537913, "loss": 0.1676, "step": 58190 }, { "epoch": 0.10317922585438204, "grad_norm": 0.443359375, "learning_rate": 0.0008166539234034172, "loss": 0.238, "step": 58192 }, { "epoch": 0.10318277201969185, "grad_norm": 0.251953125, "learning_rate": 0.0008165974319922345, "loss": 0.1512, "step": 58194 }, { "epoch": 0.10318631818500167, "grad_norm": 0.25, "learning_rate": 0.0008165409418204899, "loss": 0.1747, "step": 58196 }, { "epoch": 0.10318986435031148, "grad_norm": 1.0546875, "learning_rate": 0.0008164844528884312, "loss": 0.2172, "step": 58198 }, { "epoch": 0.1031934105156213, "grad_norm": 0.63671875, "learning_rate": 0.0008164279651963043, "loss": 0.2149, "step": 58200 }, { "epoch": 0.10319695668093112, "grad_norm": 0.91015625, "learning_rate": 0.0008163714787443565, "loss": 0.1908, "step": 58202 }, { "epoch": 0.10320050284624094, "grad_norm": 0.2734375, "learning_rate": 0.0008163149935328358, "loss": 0.141, "step": 58204 }, { "epoch": 0.10320404901155075, "grad_norm": 0.5, "learning_rate": 0.0008162585095619881, "loss": 0.1835, "step": 58206 }, { "epoch": 0.10320759517686057, "grad_norm": 1.609375, "learning_rate": 0.0008162020268320609, "loss": 0.1664, "step": 58208 }, { "epoch": 0.10321114134217038, "grad_norm": 8.1875, "learning_rate": 0.0008161455453433013, "loss": 0.2784, "step": 58210 }, { "epoch": 0.1032146875074802, "grad_norm": 0.478515625, "learning_rate": 0.0008160890650959558, "loss": 0.2561, "step": 58212 }, { "epoch": 0.10321823367279001, "grad_norm": 1.4765625, "learning_rate": 0.0008160325860902726, "loss": 0.2899, "step": 58214 }, { "epoch": 0.10322177983809983, "grad_norm": 0.578125, "learning_rate": 0.0008159761083264971, "loss": 0.1341, "step": 58216 }, { "epoch": 0.10322532600340964, "grad_norm": 0.703125, "learning_rate": 0.0008159196318048776, "loss": 0.1662, "step": 58218 }, { "epoch": 0.10322887216871945, "grad_norm": 0.1865234375, "learning_rate": 0.0008158631565256605, "loss": 0.1536, "step": 58220 }, { "epoch": 0.10323241833402927, "grad_norm": 0.5234375, "learning_rate": 0.0008158066824890931, "loss": 0.156, "step": 58222 }, { "epoch": 0.10323596449933908, "grad_norm": 0.318359375, "learning_rate": 0.0008157502096954219, "loss": 0.1709, "step": 58224 }, { "epoch": 0.1032395106646489, "grad_norm": 0.416015625, "learning_rate": 0.0008156937381448941, "loss": 0.1753, "step": 58226 }, { "epoch": 0.10324305682995871, "grad_norm": 0.61328125, "learning_rate": 0.0008156372678377571, "loss": 0.153, "step": 58228 }, { "epoch": 0.10324660299526853, "grad_norm": 1.4375, "learning_rate": 0.0008155807987742573, "loss": 0.3898, "step": 58230 }, { "epoch": 0.10325014916057834, "grad_norm": 0.80859375, "learning_rate": 0.000815524330954642, "loss": 0.1925, "step": 58232 }, { "epoch": 0.10325369532588816, "grad_norm": 0.162109375, "learning_rate": 0.0008154678643791577, "loss": 0.1536, "step": 58234 }, { "epoch": 0.10325724149119797, "grad_norm": 0.232421875, "learning_rate": 0.0008154113990480518, "loss": 0.1321, "step": 58236 }, { "epoch": 0.10326078765650779, "grad_norm": 0.37109375, "learning_rate": 0.0008153549349615712, "loss": 0.1731, "step": 58238 }, { "epoch": 0.1032643338218176, "grad_norm": 2.734375, "learning_rate": 0.0008152984721199632, "loss": 0.2859, "step": 58240 }, { "epoch": 0.10326787998712741, "grad_norm": 1.0703125, "learning_rate": 0.0008152420105234737, "loss": 0.222, "step": 58242 }, { "epoch": 0.10327142615243723, "grad_norm": 0.16015625, "learning_rate": 0.0008151855501723507, "loss": 0.1917, "step": 58244 }, { "epoch": 0.10327497231774704, "grad_norm": 0.73046875, "learning_rate": 0.0008151290910668401, "loss": 0.1791, "step": 58246 }, { "epoch": 0.10327851848305687, "grad_norm": 0.2353515625, "learning_rate": 0.00081507263320719, "loss": 0.1351, "step": 58248 }, { "epoch": 0.10328206464836669, "grad_norm": 0.6875, "learning_rate": 0.0008150161765936464, "loss": 0.201, "step": 58250 }, { "epoch": 0.1032856108136765, "grad_norm": 2.0625, "learning_rate": 0.0008149597212264564, "loss": 0.3845, "step": 58252 }, { "epoch": 0.10328915697898632, "grad_norm": 0.875, "learning_rate": 0.0008149032671058671, "loss": 0.1972, "step": 58254 }, { "epoch": 0.10329270314429613, "grad_norm": 1.8828125, "learning_rate": 0.0008148468142321254, "loss": 0.212, "step": 58256 }, { "epoch": 0.10329624930960594, "grad_norm": 1.0859375, "learning_rate": 0.000814790362605478, "loss": 0.3324, "step": 58258 }, { "epoch": 0.10329979547491576, "grad_norm": 0.498046875, "learning_rate": 0.0008147339122261719, "loss": 0.2936, "step": 58260 }, { "epoch": 0.10330334164022557, "grad_norm": 0.333984375, "learning_rate": 0.000814677463094454, "loss": 0.1449, "step": 58262 }, { "epoch": 0.10330688780553539, "grad_norm": 0.7109375, "learning_rate": 0.000814621015210571, "loss": 0.1791, "step": 58264 }, { "epoch": 0.1033104339708452, "grad_norm": 0.66015625, "learning_rate": 0.0008145645685747702, "loss": 0.3723, "step": 58266 }, { "epoch": 0.10331398013615502, "grad_norm": 0.2578125, "learning_rate": 0.000814508123187298, "loss": 0.1598, "step": 58268 }, { "epoch": 0.10331752630146483, "grad_norm": 2.140625, "learning_rate": 0.0008144516790484014, "loss": 0.2309, "step": 58270 }, { "epoch": 0.10332107246677465, "grad_norm": 0.42578125, "learning_rate": 0.0008143952361583276, "loss": 0.1823, "step": 58272 }, { "epoch": 0.10332461863208446, "grad_norm": 0.375, "learning_rate": 0.0008143387945173227, "loss": 0.1539, "step": 58274 }, { "epoch": 0.10332816479739428, "grad_norm": 0.357421875, "learning_rate": 0.0008142823541256339, "loss": 0.2604, "step": 58276 }, { "epoch": 0.10333171096270409, "grad_norm": 1.0, "learning_rate": 0.0008142259149835085, "loss": 0.3508, "step": 58278 }, { "epoch": 0.1033352571280139, "grad_norm": 0.353515625, "learning_rate": 0.0008141694770911929, "loss": 0.1884, "step": 58280 }, { "epoch": 0.10333880329332372, "grad_norm": 0.34765625, "learning_rate": 0.0008141130404489341, "loss": 0.1911, "step": 58282 }, { "epoch": 0.10334234945863353, "grad_norm": 0.353515625, "learning_rate": 0.0008140566050569784, "loss": 0.17, "step": 58284 }, { "epoch": 0.10334589562394335, "grad_norm": 0.22265625, "learning_rate": 0.0008140001709155731, "loss": 0.1674, "step": 58286 }, { "epoch": 0.10334944178925316, "grad_norm": 0.357421875, "learning_rate": 0.0008139437380249655, "loss": 0.2345, "step": 58288 }, { "epoch": 0.10335298795456298, "grad_norm": 0.296875, "learning_rate": 0.0008138873063854011, "loss": 0.1488, "step": 58290 }, { "epoch": 0.10335653411987279, "grad_norm": 1.65625, "learning_rate": 0.0008138308759971276, "loss": 0.2899, "step": 58292 }, { "epoch": 0.10336008028518262, "grad_norm": 0.90234375, "learning_rate": 0.0008137744468603916, "loss": 0.2179, "step": 58294 }, { "epoch": 0.10336362645049244, "grad_norm": 0.39453125, "learning_rate": 0.0008137180189754403, "loss": 0.1932, "step": 58296 }, { "epoch": 0.10336717261580225, "grad_norm": 0.55078125, "learning_rate": 0.0008136615923425197, "loss": 0.2216, "step": 58298 }, { "epoch": 0.10337071878111206, "grad_norm": 1.4453125, "learning_rate": 0.0008136051669618772, "loss": 0.1492, "step": 58300 }, { "epoch": 0.10337426494642188, "grad_norm": 0.30078125, "learning_rate": 0.0008135487428337591, "loss": 0.2062, "step": 58302 }, { "epoch": 0.1033778111117317, "grad_norm": 0.36328125, "learning_rate": 0.0008134923199584128, "loss": 0.1601, "step": 58304 }, { "epoch": 0.10338135727704151, "grad_norm": 0.6015625, "learning_rate": 0.0008134358983360844, "loss": 0.2893, "step": 58306 }, { "epoch": 0.10338490344235132, "grad_norm": 0.578125, "learning_rate": 0.0008133794779670207, "loss": 0.1068, "step": 58308 }, { "epoch": 0.10338844960766114, "grad_norm": 0.36328125, "learning_rate": 0.0008133230588514689, "loss": 0.2331, "step": 58310 }, { "epoch": 0.10339199577297095, "grad_norm": 0.19140625, "learning_rate": 0.0008132666409896756, "loss": 0.1413, "step": 58312 }, { "epoch": 0.10339554193828077, "grad_norm": 1.265625, "learning_rate": 0.0008132102243818873, "loss": 0.5206, "step": 58314 }, { "epoch": 0.10339908810359058, "grad_norm": 0.52734375, "learning_rate": 0.0008131538090283508, "loss": 0.2317, "step": 58316 }, { "epoch": 0.1034026342689004, "grad_norm": 0.423828125, "learning_rate": 0.0008130973949293132, "loss": 0.2074, "step": 58318 }, { "epoch": 0.10340618043421021, "grad_norm": 0.3984375, "learning_rate": 0.0008130409820850208, "loss": 0.2479, "step": 58320 }, { "epoch": 0.10340972659952002, "grad_norm": 0.4921875, "learning_rate": 0.0008129845704957206, "loss": 0.189, "step": 58322 }, { "epoch": 0.10341327276482984, "grad_norm": 0.1484375, "learning_rate": 0.0008129281601616592, "loss": 0.1708, "step": 58324 }, { "epoch": 0.10341681893013965, "grad_norm": 0.46875, "learning_rate": 0.0008128717510830829, "loss": 0.1528, "step": 58326 }, { "epoch": 0.10342036509544947, "grad_norm": 0.46484375, "learning_rate": 0.000812815343260239, "loss": 0.1553, "step": 58328 }, { "epoch": 0.10342391126075928, "grad_norm": 0.2109375, "learning_rate": 0.0008127589366933744, "loss": 0.1243, "step": 58330 }, { "epoch": 0.1034274574260691, "grad_norm": 2.546875, "learning_rate": 0.0008127025313827346, "loss": 0.4347, "step": 58332 }, { "epoch": 0.10343100359137891, "grad_norm": 0.40625, "learning_rate": 0.0008126461273285673, "loss": 0.2097, "step": 58334 }, { "epoch": 0.10343454975668873, "grad_norm": 0.30859375, "learning_rate": 0.0008125897245311191, "loss": 0.1731, "step": 58336 }, { "epoch": 0.10343809592199855, "grad_norm": 0.3203125, "learning_rate": 0.0008125333229906365, "loss": 0.1679, "step": 58338 }, { "epoch": 0.10344164208730837, "grad_norm": 2.421875, "learning_rate": 0.0008124769227073663, "loss": 0.2038, "step": 58340 }, { "epoch": 0.10344518825261818, "grad_norm": 0.41796875, "learning_rate": 0.0008124205236815545, "loss": 0.2443, "step": 58342 }, { "epoch": 0.103448734417928, "grad_norm": 0.60546875, "learning_rate": 0.0008123641259134484, "loss": 0.1253, "step": 58344 }, { "epoch": 0.10345228058323781, "grad_norm": 0.34765625, "learning_rate": 0.0008123077294032952, "loss": 0.1836, "step": 58346 }, { "epoch": 0.10345582674854763, "grad_norm": 0.423828125, "learning_rate": 0.0008122513341513402, "loss": 0.2037, "step": 58348 }, { "epoch": 0.10345937291385744, "grad_norm": 0.13671875, "learning_rate": 0.0008121949401578308, "loss": 0.1284, "step": 58350 }, { "epoch": 0.10346291907916726, "grad_norm": 1.5625, "learning_rate": 0.0008121385474230138, "loss": 0.2138, "step": 58352 }, { "epoch": 0.10346646524447707, "grad_norm": 0.392578125, "learning_rate": 0.0008120821559471356, "loss": 0.1901, "step": 58354 }, { "epoch": 0.10347001140978689, "grad_norm": 0.51953125, "learning_rate": 0.0008120257657304427, "loss": 0.1707, "step": 58356 }, { "epoch": 0.1034735575750967, "grad_norm": 1.328125, "learning_rate": 0.0008119693767731817, "loss": 0.169, "step": 58358 }, { "epoch": 0.10347710374040651, "grad_norm": 2.515625, "learning_rate": 0.0008119129890755994, "loss": 0.1978, "step": 58360 }, { "epoch": 0.10348064990571633, "grad_norm": 0.283203125, "learning_rate": 0.0008118566026379426, "loss": 0.1724, "step": 58362 }, { "epoch": 0.10348419607102614, "grad_norm": 0.38671875, "learning_rate": 0.0008118002174604573, "loss": 0.2781, "step": 58364 }, { "epoch": 0.10348774223633596, "grad_norm": 0.416015625, "learning_rate": 0.0008117438335433904, "loss": 0.202, "step": 58366 }, { "epoch": 0.10349128840164577, "grad_norm": 0.3046875, "learning_rate": 0.0008116874508869887, "loss": 0.1388, "step": 58368 }, { "epoch": 0.10349483456695559, "grad_norm": 0.859375, "learning_rate": 0.0008116310694914988, "loss": 0.2096, "step": 58370 }, { "epoch": 0.1034983807322654, "grad_norm": 0.4140625, "learning_rate": 0.000811574689357167, "loss": 0.1821, "step": 58372 }, { "epoch": 0.10350192689757522, "grad_norm": 1.484375, "learning_rate": 0.0008115183104842399, "loss": 0.1696, "step": 58374 }, { "epoch": 0.10350547306288503, "grad_norm": 1.875, "learning_rate": 0.000811461932872964, "loss": 0.2003, "step": 58376 }, { "epoch": 0.10350901922819485, "grad_norm": 1.0078125, "learning_rate": 0.0008114055565235863, "loss": 0.229, "step": 58378 }, { "epoch": 0.10351256539350466, "grad_norm": 0.392578125, "learning_rate": 0.0008113491814363528, "loss": 0.177, "step": 58380 }, { "epoch": 0.10351611155881447, "grad_norm": 0.439453125, "learning_rate": 0.0008112928076115102, "loss": 0.2308, "step": 58382 }, { "epoch": 0.1035196577241243, "grad_norm": 0.306640625, "learning_rate": 0.0008112364350493053, "loss": 0.2401, "step": 58384 }, { "epoch": 0.10352320388943412, "grad_norm": 0.294921875, "learning_rate": 0.0008111800637499849, "loss": 0.1905, "step": 58386 }, { "epoch": 0.10352675005474393, "grad_norm": 0.296875, "learning_rate": 0.0008111236937137947, "loss": 0.2226, "step": 58388 }, { "epoch": 0.10353029622005375, "grad_norm": 1.484375, "learning_rate": 0.0008110673249409816, "loss": 0.2661, "step": 58390 }, { "epoch": 0.10353384238536356, "grad_norm": 0.80078125, "learning_rate": 0.0008110109574317923, "loss": 0.2525, "step": 58392 }, { "epoch": 0.10353738855067338, "grad_norm": 0.44140625, "learning_rate": 0.0008109545911864735, "loss": 0.265, "step": 58394 }, { "epoch": 0.10354093471598319, "grad_norm": 0.5625, "learning_rate": 0.0008108982262052711, "loss": 0.1631, "step": 58396 }, { "epoch": 0.103544480881293, "grad_norm": 1.734375, "learning_rate": 0.0008108418624884322, "loss": 0.2471, "step": 58398 }, { "epoch": 0.10354802704660282, "grad_norm": 0.490234375, "learning_rate": 0.0008107855000362025, "loss": 0.1666, "step": 58400 }, { "epoch": 0.10355157321191263, "grad_norm": 1.3203125, "learning_rate": 0.0008107291388488294, "loss": 0.2679, "step": 58402 }, { "epoch": 0.10355511937722245, "grad_norm": 0.6484375, "learning_rate": 0.0008106727789265591, "loss": 0.1686, "step": 58404 }, { "epoch": 0.10355866554253226, "grad_norm": 0.58203125, "learning_rate": 0.0008106164202696378, "loss": 0.2842, "step": 58406 }, { "epoch": 0.10356221170784208, "grad_norm": 0.33203125, "learning_rate": 0.000810560062878312, "loss": 0.1929, "step": 58408 }, { "epoch": 0.10356575787315189, "grad_norm": 0.5390625, "learning_rate": 0.000810503706752829, "loss": 0.1781, "step": 58410 }, { "epoch": 0.10356930403846171, "grad_norm": 0.484375, "learning_rate": 0.0008104473518934342, "loss": 0.1388, "step": 58412 }, { "epoch": 0.10357285020377152, "grad_norm": 0.232421875, "learning_rate": 0.0008103909983003746, "loss": 0.1685, "step": 58414 }, { "epoch": 0.10357639636908134, "grad_norm": 0.3203125, "learning_rate": 0.0008103346459738965, "loss": 0.2019, "step": 58416 }, { "epoch": 0.10357994253439115, "grad_norm": 0.400390625, "learning_rate": 0.0008102782949142462, "loss": 0.1647, "step": 58418 }, { "epoch": 0.10358348869970097, "grad_norm": 0.44140625, "learning_rate": 0.0008102219451216708, "loss": 0.3138, "step": 58420 }, { "epoch": 0.10358703486501078, "grad_norm": 0.337890625, "learning_rate": 0.0008101655965964159, "loss": 0.1811, "step": 58422 }, { "epoch": 0.1035905810303206, "grad_norm": 0.6796875, "learning_rate": 0.0008101092493387283, "loss": 0.2105, "step": 58424 }, { "epoch": 0.10359412719563041, "grad_norm": 1.5390625, "learning_rate": 0.0008100529033488546, "loss": 0.4736, "step": 58426 }, { "epoch": 0.10359767336094022, "grad_norm": 0.1787109375, "learning_rate": 0.0008099965586270412, "loss": 0.1532, "step": 58428 }, { "epoch": 0.10360121952625005, "grad_norm": 0.484375, "learning_rate": 0.0008099402151735343, "loss": 0.1528, "step": 58430 }, { "epoch": 0.10360476569155987, "grad_norm": 0.33984375, "learning_rate": 0.0008098838729885805, "loss": 0.166, "step": 58432 }, { "epoch": 0.10360831185686968, "grad_norm": 0.302734375, "learning_rate": 0.0008098275320724258, "loss": 0.1201, "step": 58434 }, { "epoch": 0.1036118580221795, "grad_norm": 0.35546875, "learning_rate": 0.0008097711924253175, "loss": 0.2026, "step": 58436 }, { "epoch": 0.10361540418748931, "grad_norm": 0.435546875, "learning_rate": 0.000809714854047501, "loss": 0.2066, "step": 58438 }, { "epoch": 0.10361895035279912, "grad_norm": 0.5, "learning_rate": 0.000809658516939223, "loss": 0.1754, "step": 58440 }, { "epoch": 0.10362249651810894, "grad_norm": 0.49609375, "learning_rate": 0.0008096021811007301, "loss": 0.6041, "step": 58442 }, { "epoch": 0.10362604268341875, "grad_norm": 0.263671875, "learning_rate": 0.0008095458465322687, "loss": 0.1778, "step": 58444 }, { "epoch": 0.10362958884872857, "grad_norm": 0.474609375, "learning_rate": 0.0008094895132340852, "loss": 0.1363, "step": 58446 }, { "epoch": 0.10363313501403838, "grad_norm": 1.3203125, "learning_rate": 0.0008094331812064255, "loss": 0.187, "step": 58448 }, { "epoch": 0.1036366811793482, "grad_norm": 0.7421875, "learning_rate": 0.0008093768504495364, "loss": 0.1437, "step": 58450 }, { "epoch": 0.10364022734465801, "grad_norm": 0.65234375, "learning_rate": 0.0008093205209636642, "loss": 0.1956, "step": 58452 }, { "epoch": 0.10364377350996783, "grad_norm": 0.57421875, "learning_rate": 0.0008092641927490552, "loss": 0.2196, "step": 58454 }, { "epoch": 0.10364731967527764, "grad_norm": 0.376953125, "learning_rate": 0.0008092078658059554, "loss": 0.1671, "step": 58456 }, { "epoch": 0.10365086584058746, "grad_norm": 2.59375, "learning_rate": 0.0008091515401346117, "loss": 0.1601, "step": 58458 }, { "epoch": 0.10365441200589727, "grad_norm": 2.390625, "learning_rate": 0.0008090952157352707, "loss": 0.2204, "step": 58460 }, { "epoch": 0.10365795817120708, "grad_norm": 0.6328125, "learning_rate": 0.0008090388926081776, "loss": 0.155, "step": 58462 }, { "epoch": 0.1036615043365169, "grad_norm": 0.345703125, "learning_rate": 0.0008089825707535796, "loss": 0.1736, "step": 58464 }, { "epoch": 0.10366505050182671, "grad_norm": 0.419921875, "learning_rate": 0.0008089262501717227, "loss": 0.1291, "step": 58466 }, { "epoch": 0.10366859666713653, "grad_norm": 0.279296875, "learning_rate": 0.0008088699308628537, "loss": 0.1573, "step": 58468 }, { "epoch": 0.10367214283244634, "grad_norm": 0.32421875, "learning_rate": 0.0008088136128272181, "loss": 0.2214, "step": 58470 }, { "epoch": 0.10367568899775616, "grad_norm": 0.490234375, "learning_rate": 0.0008087572960650627, "loss": 0.2247, "step": 58472 }, { "epoch": 0.10367923516306599, "grad_norm": 0.52734375, "learning_rate": 0.0008087009805766337, "loss": 0.1844, "step": 58474 }, { "epoch": 0.1036827813283758, "grad_norm": 0.734375, "learning_rate": 0.0008086446663621776, "loss": 0.2181, "step": 58476 }, { "epoch": 0.10368632749368561, "grad_norm": 0.453125, "learning_rate": 0.0008085883534219402, "loss": 0.1512, "step": 58478 }, { "epoch": 0.10368987365899543, "grad_norm": 0.3828125, "learning_rate": 0.0008085320417561681, "loss": 0.1615, "step": 58480 }, { "epoch": 0.10369341982430524, "grad_norm": 0.419921875, "learning_rate": 0.0008084757313651077, "loss": 0.1979, "step": 58482 }, { "epoch": 0.10369696598961506, "grad_norm": 1.015625, "learning_rate": 0.0008084194222490053, "loss": 0.3434, "step": 58484 }, { "epoch": 0.10370051215492487, "grad_norm": 0.310546875, "learning_rate": 0.0008083631144081065, "loss": 0.1393, "step": 58486 }, { "epoch": 0.10370405832023469, "grad_norm": 0.484375, "learning_rate": 0.0008083068078426585, "loss": 0.2945, "step": 58488 }, { "epoch": 0.1037076044855445, "grad_norm": 0.45703125, "learning_rate": 0.0008082505025529068, "loss": 0.1405, "step": 58490 }, { "epoch": 0.10371115065085432, "grad_norm": 0.322265625, "learning_rate": 0.0008081941985390977, "loss": 0.145, "step": 58492 }, { "epoch": 0.10371469681616413, "grad_norm": 0.361328125, "learning_rate": 0.0008081378958014785, "loss": 0.1466, "step": 58494 }, { "epoch": 0.10371824298147395, "grad_norm": 0.248046875, "learning_rate": 0.000808081594340294, "loss": 0.2936, "step": 58496 }, { "epoch": 0.10372178914678376, "grad_norm": 0.2294921875, "learning_rate": 0.0008080252941557908, "loss": 0.1483, "step": 58498 }, { "epoch": 0.10372533531209358, "grad_norm": 0.291015625, "learning_rate": 0.0008079689952482156, "loss": 0.1559, "step": 58500 }, { "epoch": 0.10372888147740339, "grad_norm": 1.1796875, "learning_rate": 0.0008079126976178146, "loss": 0.2033, "step": 58502 }, { "epoch": 0.1037324276427132, "grad_norm": 0.2177734375, "learning_rate": 0.0008078564012648338, "loss": 0.1623, "step": 58504 }, { "epoch": 0.10373597380802302, "grad_norm": 0.515625, "learning_rate": 0.0008078001061895193, "loss": 0.2399, "step": 58506 }, { "epoch": 0.10373951997333283, "grad_norm": 0.9921875, "learning_rate": 0.0008077438123921172, "loss": 0.1871, "step": 58508 }, { "epoch": 0.10374306613864265, "grad_norm": 3.25, "learning_rate": 0.0008076875198728743, "loss": 0.2045, "step": 58510 }, { "epoch": 0.10374661230395246, "grad_norm": 0.47265625, "learning_rate": 0.000807631228632036, "loss": 0.1795, "step": 58512 }, { "epoch": 0.10375015846926228, "grad_norm": 0.279296875, "learning_rate": 0.000807574938669849, "loss": 0.1387, "step": 58514 }, { "epoch": 0.10375370463457209, "grad_norm": 0.330078125, "learning_rate": 0.0008075186499865595, "loss": 0.1528, "step": 58516 }, { "epoch": 0.1037572507998819, "grad_norm": 0.7734375, "learning_rate": 0.0008074623625824135, "loss": 0.1927, "step": 58518 }, { "epoch": 0.10376079696519173, "grad_norm": 0.59765625, "learning_rate": 0.0008074060764576571, "loss": 0.1986, "step": 58520 }, { "epoch": 0.10376434313050155, "grad_norm": 0.255859375, "learning_rate": 0.0008073497916125365, "loss": 0.1631, "step": 58522 }, { "epoch": 0.10376788929581136, "grad_norm": 0.61328125, "learning_rate": 0.0008072935080472981, "loss": 0.1679, "step": 58524 }, { "epoch": 0.10377143546112118, "grad_norm": 0.482421875, "learning_rate": 0.0008072372257621878, "loss": 0.1458, "step": 58526 }, { "epoch": 0.10377498162643099, "grad_norm": 0.42578125, "learning_rate": 0.0008071809447574519, "loss": 0.1944, "step": 58528 }, { "epoch": 0.10377852779174081, "grad_norm": 0.3046875, "learning_rate": 0.0008071246650333361, "loss": 0.1777, "step": 58530 }, { "epoch": 0.10378207395705062, "grad_norm": 0.44140625, "learning_rate": 0.0008070683865900872, "loss": 0.1889, "step": 58532 }, { "epoch": 0.10378562012236044, "grad_norm": 3.328125, "learning_rate": 0.0008070121094279513, "loss": 0.456, "step": 58534 }, { "epoch": 0.10378916628767025, "grad_norm": 0.365234375, "learning_rate": 0.0008069558335471738, "loss": 0.1956, "step": 58536 }, { "epoch": 0.10379271245298007, "grad_norm": 0.22265625, "learning_rate": 0.0008068995589480013, "loss": 0.1635, "step": 58538 }, { "epoch": 0.10379625861828988, "grad_norm": 0.3359375, "learning_rate": 0.00080684328563068, "loss": 0.3136, "step": 58540 }, { "epoch": 0.1037998047835997, "grad_norm": 0.228515625, "learning_rate": 0.000806787013595456, "loss": 0.1599, "step": 58542 }, { "epoch": 0.10380335094890951, "grad_norm": 0.259765625, "learning_rate": 0.0008067307428425752, "loss": 0.1647, "step": 58544 }, { "epoch": 0.10380689711421932, "grad_norm": 0.314453125, "learning_rate": 0.0008066744733722837, "loss": 0.1458, "step": 58546 }, { "epoch": 0.10381044327952914, "grad_norm": 0.357421875, "learning_rate": 0.0008066182051848274, "loss": 0.2087, "step": 58548 }, { "epoch": 0.10381398944483895, "grad_norm": 0.4375, "learning_rate": 0.0008065619382804532, "loss": 0.3061, "step": 58550 }, { "epoch": 0.10381753561014877, "grad_norm": 0.87890625, "learning_rate": 0.0008065056726594065, "loss": 0.1616, "step": 58552 }, { "epoch": 0.10382108177545858, "grad_norm": 0.609375, "learning_rate": 0.0008064494083219331, "loss": 0.1883, "step": 58554 }, { "epoch": 0.1038246279407684, "grad_norm": 0.54296875, "learning_rate": 0.0008063931452682796, "loss": 0.2266, "step": 58556 }, { "epoch": 0.10382817410607821, "grad_norm": 0.357421875, "learning_rate": 0.0008063368834986923, "loss": 0.1767, "step": 58558 }, { "epoch": 0.10383172027138803, "grad_norm": 0.78515625, "learning_rate": 0.0008062806230134167, "loss": 0.3221, "step": 58560 }, { "epoch": 0.10383526643669784, "grad_norm": 0.373046875, "learning_rate": 0.0008062243638126992, "loss": 0.1883, "step": 58562 }, { "epoch": 0.10383881260200765, "grad_norm": 0.72265625, "learning_rate": 0.0008061681058967854, "loss": 0.5023, "step": 58564 }, { "epoch": 0.10384235876731748, "grad_norm": 0.2451171875, "learning_rate": 0.0008061118492659219, "loss": 0.1698, "step": 58566 }, { "epoch": 0.1038459049326273, "grad_norm": 1.65625, "learning_rate": 0.0008060555939203542, "loss": 0.2334, "step": 58568 }, { "epoch": 0.10384945109793711, "grad_norm": 0.65625, "learning_rate": 0.0008059993398603285, "loss": 0.2094, "step": 58570 }, { "epoch": 0.10385299726324693, "grad_norm": 0.72265625, "learning_rate": 0.0008059430870860911, "loss": 0.2656, "step": 58572 }, { "epoch": 0.10385654342855674, "grad_norm": 0.427734375, "learning_rate": 0.0008058868355978877, "loss": 0.1945, "step": 58574 }, { "epoch": 0.10386008959386656, "grad_norm": 0.27734375, "learning_rate": 0.0008058305853959649, "loss": 0.1758, "step": 58576 }, { "epoch": 0.10386363575917637, "grad_norm": 0.44140625, "learning_rate": 0.0008057743364805677, "loss": 0.1735, "step": 58578 }, { "epoch": 0.10386718192448618, "grad_norm": 0.6328125, "learning_rate": 0.0008057180888519428, "loss": 0.235, "step": 58580 }, { "epoch": 0.103870728089796, "grad_norm": 1.8515625, "learning_rate": 0.0008056618425103362, "loss": 0.2829, "step": 58582 }, { "epoch": 0.10387427425510581, "grad_norm": 0.3203125, "learning_rate": 0.0008056055974559937, "loss": 0.1694, "step": 58584 }, { "epoch": 0.10387782042041563, "grad_norm": 6.53125, "learning_rate": 0.0008055493536891612, "loss": 0.3452, "step": 58586 }, { "epoch": 0.10388136658572544, "grad_norm": 0.173828125, "learning_rate": 0.0008054931112100848, "loss": 0.1386, "step": 58588 }, { "epoch": 0.10388491275103526, "grad_norm": 1.1328125, "learning_rate": 0.0008054368700190105, "loss": 0.1839, "step": 58590 }, { "epoch": 0.10388845891634507, "grad_norm": 11.5625, "learning_rate": 0.0008053806301161843, "loss": 0.2503, "step": 58592 }, { "epoch": 0.10389200508165489, "grad_norm": 0.38671875, "learning_rate": 0.000805324391501852, "loss": 0.1805, "step": 58594 }, { "epoch": 0.1038955512469647, "grad_norm": 0.640625, "learning_rate": 0.0008052681541762595, "loss": 0.2011, "step": 58596 }, { "epoch": 0.10389909741227452, "grad_norm": 1.53125, "learning_rate": 0.0008052119181396531, "loss": 0.3263, "step": 58598 }, { "epoch": 0.10390264357758433, "grad_norm": 0.51171875, "learning_rate": 0.0008051556833922784, "loss": 0.1745, "step": 58600 }, { "epoch": 0.10390618974289415, "grad_norm": 1.671875, "learning_rate": 0.0008050994499343817, "loss": 0.1651, "step": 58602 }, { "epoch": 0.10390973590820396, "grad_norm": 0.302734375, "learning_rate": 0.0008050432177662084, "loss": 0.1553, "step": 58604 }, { "epoch": 0.10391328207351377, "grad_norm": 0.44921875, "learning_rate": 0.0008049869868880049, "loss": 0.1744, "step": 58606 }, { "epoch": 0.10391682823882359, "grad_norm": 0.439453125, "learning_rate": 0.0008049307573000173, "loss": 0.1637, "step": 58608 }, { "epoch": 0.10392037440413342, "grad_norm": 0.291015625, "learning_rate": 0.0008048745290024906, "loss": 0.2139, "step": 58610 }, { "epoch": 0.10392392056944323, "grad_norm": 0.71875, "learning_rate": 0.0008048183019956714, "loss": 0.1641, "step": 58612 }, { "epoch": 0.10392746673475305, "grad_norm": 0.1962890625, "learning_rate": 0.0008047620762798058, "loss": 0.1967, "step": 58614 }, { "epoch": 0.10393101290006286, "grad_norm": 0.3046875, "learning_rate": 0.0008047058518551391, "loss": 0.1697, "step": 58616 }, { "epoch": 0.10393455906537268, "grad_norm": 0.412109375, "learning_rate": 0.0008046496287219176, "loss": 0.2045, "step": 58618 }, { "epoch": 0.10393810523068249, "grad_norm": 1.0625, "learning_rate": 0.0008045934068803871, "loss": 0.1396, "step": 58620 }, { "epoch": 0.1039416513959923, "grad_norm": 0.400390625, "learning_rate": 0.000804537186330793, "loss": 0.1483, "step": 58622 }, { "epoch": 0.10394519756130212, "grad_norm": 0.4609375, "learning_rate": 0.0008044809670733824, "loss": 0.4682, "step": 58624 }, { "epoch": 0.10394874372661193, "grad_norm": 0.73828125, "learning_rate": 0.0008044247491083997, "loss": 0.1783, "step": 58626 }, { "epoch": 0.10395228989192175, "grad_norm": 0.59375, "learning_rate": 0.0008043685324360918, "loss": 0.1825, "step": 58628 }, { "epoch": 0.10395583605723156, "grad_norm": 3.109375, "learning_rate": 0.000804312317056704, "loss": 0.4518, "step": 58630 }, { "epoch": 0.10395938222254138, "grad_norm": 0.177734375, "learning_rate": 0.0008042561029704827, "loss": 0.1369, "step": 58632 }, { "epoch": 0.10396292838785119, "grad_norm": 0.423828125, "learning_rate": 0.0008041998901776732, "loss": 0.1845, "step": 58634 }, { "epoch": 0.103966474553161, "grad_norm": 0.63671875, "learning_rate": 0.0008041436786785217, "loss": 0.2695, "step": 58636 }, { "epoch": 0.10397002071847082, "grad_norm": 0.36328125, "learning_rate": 0.0008040874684732737, "loss": 0.182, "step": 58638 }, { "epoch": 0.10397356688378064, "grad_norm": 0.24609375, "learning_rate": 0.0008040312595621754, "loss": 0.1337, "step": 58640 }, { "epoch": 0.10397711304909045, "grad_norm": 0.8125, "learning_rate": 0.0008039750519454723, "loss": 0.2416, "step": 58642 }, { "epoch": 0.10398065921440026, "grad_norm": 2.015625, "learning_rate": 0.0008039188456234103, "loss": 0.204, "step": 58644 }, { "epoch": 0.10398420537971008, "grad_norm": 0.484375, "learning_rate": 0.0008038626405962355, "loss": 0.2046, "step": 58646 }, { "epoch": 0.1039877515450199, "grad_norm": 3.015625, "learning_rate": 0.0008038064368641935, "loss": 0.3545, "step": 58648 }, { "epoch": 0.10399129771032971, "grad_norm": 1.09375, "learning_rate": 0.00080375023442753, "loss": 0.2283, "step": 58650 }, { "epoch": 0.10399484387563952, "grad_norm": 0.6015625, "learning_rate": 0.0008036940332864908, "loss": 0.1665, "step": 58652 }, { "epoch": 0.10399839004094934, "grad_norm": 0.46484375, "learning_rate": 0.0008036378334413219, "loss": 0.2514, "step": 58654 }, { "epoch": 0.10400193620625917, "grad_norm": 1.4296875, "learning_rate": 0.0008035816348922689, "loss": 0.1953, "step": 58656 }, { "epoch": 0.10400548237156898, "grad_norm": 0.48046875, "learning_rate": 0.0008035254376395779, "loss": 0.1482, "step": 58658 }, { "epoch": 0.1040090285368788, "grad_norm": 0.462890625, "learning_rate": 0.0008034692416834941, "loss": 0.1561, "step": 58660 }, { "epoch": 0.10401257470218861, "grad_norm": 0.25, "learning_rate": 0.0008034130470242637, "loss": 0.1234, "step": 58662 }, { "epoch": 0.10401612086749842, "grad_norm": 1.6796875, "learning_rate": 0.0008033568536621323, "loss": 0.2087, "step": 58664 }, { "epoch": 0.10401966703280824, "grad_norm": 1.0546875, "learning_rate": 0.0008033006615973462, "loss": 0.4398, "step": 58666 }, { "epoch": 0.10402321319811805, "grad_norm": 1.1796875, "learning_rate": 0.0008032444708301499, "loss": 0.2942, "step": 58668 }, { "epoch": 0.10402675936342787, "grad_norm": 0.578125, "learning_rate": 0.0008031882813607903, "loss": 0.1589, "step": 58670 }, { "epoch": 0.10403030552873768, "grad_norm": 0.5625, "learning_rate": 0.0008031320931895128, "loss": 0.127, "step": 58672 }, { "epoch": 0.1040338516940475, "grad_norm": 0.37890625, "learning_rate": 0.0008030759063165629, "loss": 0.168, "step": 58674 }, { "epoch": 0.10403739785935731, "grad_norm": 0.54296875, "learning_rate": 0.0008030197207421865, "loss": 0.2547, "step": 58676 }, { "epoch": 0.10404094402466713, "grad_norm": 0.52734375, "learning_rate": 0.0008029635364666294, "loss": 0.119, "step": 58678 }, { "epoch": 0.10404449018997694, "grad_norm": 0.25, "learning_rate": 0.0008029073534901374, "loss": 0.1704, "step": 58680 }, { "epoch": 0.10404803635528675, "grad_norm": 0.8515625, "learning_rate": 0.000802851171812956, "loss": 0.1333, "step": 58682 }, { "epoch": 0.10405158252059657, "grad_norm": 1.4453125, "learning_rate": 0.000802794991435331, "loss": 0.2899, "step": 58684 }, { "epoch": 0.10405512868590638, "grad_norm": 1.796875, "learning_rate": 0.0008027388123575076, "loss": 0.244, "step": 58686 }, { "epoch": 0.1040586748512162, "grad_norm": 2.625, "learning_rate": 0.0008026826345797325, "loss": 0.295, "step": 58688 }, { "epoch": 0.10406222101652601, "grad_norm": 2.4375, "learning_rate": 0.0008026264581022509, "loss": 0.123, "step": 58690 }, { "epoch": 0.10406576718183583, "grad_norm": 0.4609375, "learning_rate": 0.0008025702829253081, "loss": 0.2371, "step": 58692 }, { "epoch": 0.10406931334714564, "grad_norm": 0.33203125, "learning_rate": 0.0008025141090491503, "loss": 0.2096, "step": 58694 }, { "epoch": 0.10407285951245546, "grad_norm": 0.25390625, "learning_rate": 0.000802457936474023, "loss": 0.1955, "step": 58696 }, { "epoch": 0.10407640567776527, "grad_norm": 0.37109375, "learning_rate": 0.000802401765200172, "loss": 0.1403, "step": 58698 }, { "epoch": 0.10407995184307509, "grad_norm": 0.310546875, "learning_rate": 0.0008023455952278425, "loss": 0.1732, "step": 58700 }, { "epoch": 0.10408349800838491, "grad_norm": 1.640625, "learning_rate": 0.0008022894265572805, "loss": 0.2855, "step": 58702 }, { "epoch": 0.10408704417369473, "grad_norm": 0.265625, "learning_rate": 0.0008022332591887315, "loss": 0.1769, "step": 58704 }, { "epoch": 0.10409059033900454, "grad_norm": 2.078125, "learning_rate": 0.0008021770931224417, "loss": 0.3658, "step": 58706 }, { "epoch": 0.10409413650431436, "grad_norm": 0.30078125, "learning_rate": 0.0008021209283586559, "loss": 0.2223, "step": 58708 }, { "epoch": 0.10409768266962417, "grad_norm": 3.671875, "learning_rate": 0.0008020647648976205, "loss": 0.2032, "step": 58710 }, { "epoch": 0.10410122883493399, "grad_norm": 1.3828125, "learning_rate": 0.0008020086027395804, "loss": 0.4167, "step": 58712 }, { "epoch": 0.1041047750002438, "grad_norm": 0.4765625, "learning_rate": 0.0008019524418847819, "loss": 0.2649, "step": 58714 }, { "epoch": 0.10410832116555362, "grad_norm": 0.2080078125, "learning_rate": 0.0008018962823334702, "loss": 0.1568, "step": 58716 }, { "epoch": 0.10411186733086343, "grad_norm": 0.29296875, "learning_rate": 0.0008018401240858908, "loss": 0.1902, "step": 58718 }, { "epoch": 0.10411541349617325, "grad_norm": 0.5078125, "learning_rate": 0.0008017839671422895, "loss": 0.2665, "step": 58720 }, { "epoch": 0.10411895966148306, "grad_norm": 0.53125, "learning_rate": 0.0008017278115029123, "loss": 0.1744, "step": 58722 }, { "epoch": 0.10412250582679287, "grad_norm": 0.31640625, "learning_rate": 0.0008016716571680038, "loss": 0.2001, "step": 58724 }, { "epoch": 0.10412605199210269, "grad_norm": 0.46875, "learning_rate": 0.0008016155041378106, "loss": 0.1757, "step": 58726 }, { "epoch": 0.1041295981574125, "grad_norm": 0.60546875, "learning_rate": 0.0008015593524125777, "loss": 0.1879, "step": 58728 }, { "epoch": 0.10413314432272232, "grad_norm": 4.21875, "learning_rate": 0.000801503201992551, "loss": 0.2732, "step": 58730 }, { "epoch": 0.10413669048803213, "grad_norm": 0.64453125, "learning_rate": 0.0008014470528779758, "loss": 0.2198, "step": 58732 }, { "epoch": 0.10414023665334195, "grad_norm": 0.58203125, "learning_rate": 0.0008013909050690977, "loss": 0.1446, "step": 58734 }, { "epoch": 0.10414378281865176, "grad_norm": 0.365234375, "learning_rate": 0.0008013347585661624, "loss": 0.1451, "step": 58736 }, { "epoch": 0.10414732898396158, "grad_norm": 0.8125, "learning_rate": 0.0008012786133694153, "loss": 0.2336, "step": 58738 }, { "epoch": 0.10415087514927139, "grad_norm": 0.296875, "learning_rate": 0.0008012224694791024, "loss": 0.2281, "step": 58740 }, { "epoch": 0.1041544213145812, "grad_norm": 0.56640625, "learning_rate": 0.0008011663268954686, "loss": 0.1853, "step": 58742 }, { "epoch": 0.10415796747989102, "grad_norm": 0.384765625, "learning_rate": 0.0008011101856187592, "loss": 0.1877, "step": 58744 }, { "epoch": 0.10416151364520085, "grad_norm": 0.79296875, "learning_rate": 0.000801054045649221, "loss": 0.1302, "step": 58746 }, { "epoch": 0.10416505981051066, "grad_norm": 0.279296875, "learning_rate": 0.0008009979069870985, "loss": 0.2158, "step": 58748 }, { "epoch": 0.10416860597582048, "grad_norm": 0.322265625, "learning_rate": 0.0008009417696326376, "loss": 0.1344, "step": 58750 }, { "epoch": 0.10417215214113029, "grad_norm": 0.408203125, "learning_rate": 0.0008008856335860832, "loss": 0.2229, "step": 58752 }, { "epoch": 0.1041756983064401, "grad_norm": 0.1591796875, "learning_rate": 0.0008008294988476817, "loss": 0.1409, "step": 58754 }, { "epoch": 0.10417924447174992, "grad_norm": 0.392578125, "learning_rate": 0.0008007733654176783, "loss": 0.1602, "step": 58756 }, { "epoch": 0.10418279063705974, "grad_norm": 0.5390625, "learning_rate": 0.0008007172332963182, "loss": 0.1304, "step": 58758 }, { "epoch": 0.10418633680236955, "grad_norm": 0.45703125, "learning_rate": 0.0008006611024838471, "loss": 0.2304, "step": 58760 }, { "epoch": 0.10418988296767936, "grad_norm": 0.41015625, "learning_rate": 0.0008006049729805103, "loss": 0.1756, "step": 58762 }, { "epoch": 0.10419342913298918, "grad_norm": 2.234375, "learning_rate": 0.0008005488447865536, "loss": 0.353, "step": 58764 }, { "epoch": 0.104196975298299, "grad_norm": 0.5234375, "learning_rate": 0.0008004927179022223, "loss": 0.1582, "step": 58766 }, { "epoch": 0.10420052146360881, "grad_norm": 0.640625, "learning_rate": 0.0008004365923277621, "loss": 0.1862, "step": 58768 }, { "epoch": 0.10420406762891862, "grad_norm": 9.8125, "learning_rate": 0.0008003804680634179, "loss": 0.2806, "step": 58770 }, { "epoch": 0.10420761379422844, "grad_norm": 0.71875, "learning_rate": 0.0008003243451094358, "loss": 0.2218, "step": 58772 }, { "epoch": 0.10421115995953825, "grad_norm": 1.09375, "learning_rate": 0.0008002682234660606, "loss": 0.1816, "step": 58774 }, { "epoch": 0.10421470612484807, "grad_norm": 0.52734375, "learning_rate": 0.0008002121031335381, "loss": 0.1985, "step": 58776 }, { "epoch": 0.10421825229015788, "grad_norm": 0.3515625, "learning_rate": 0.0008001559841121137, "loss": 0.1481, "step": 58778 }, { "epoch": 0.1042217984554677, "grad_norm": 0.36328125, "learning_rate": 0.0008000998664020333, "loss": 0.183, "step": 58780 }, { "epoch": 0.10422534462077751, "grad_norm": 1.40625, "learning_rate": 0.0008000437500035415, "loss": 0.3369, "step": 58782 }, { "epoch": 0.10422889078608732, "grad_norm": 1.453125, "learning_rate": 0.0007999876349168842, "loss": 0.2469, "step": 58784 }, { "epoch": 0.10423243695139714, "grad_norm": 0.5390625, "learning_rate": 0.0007999315211423066, "loss": 0.1382, "step": 58786 }, { "epoch": 0.10423598311670695, "grad_norm": 0.1875, "learning_rate": 0.0007998754086800546, "loss": 0.1295, "step": 58788 }, { "epoch": 0.10423952928201677, "grad_norm": 1.5546875, "learning_rate": 0.0007998192975303729, "loss": 0.2344, "step": 58790 }, { "epoch": 0.1042430754473266, "grad_norm": 0.39453125, "learning_rate": 0.0007997631876935072, "loss": 0.1715, "step": 58792 }, { "epoch": 0.10424662161263641, "grad_norm": 0.322265625, "learning_rate": 0.000799707079169703, "loss": 0.1594, "step": 58794 }, { "epoch": 0.10425016777794623, "grad_norm": 0.5625, "learning_rate": 0.0007996509719592058, "loss": 0.2411, "step": 58796 }, { "epoch": 0.10425371394325604, "grad_norm": 0.361328125, "learning_rate": 0.0007995948660622605, "loss": 0.1542, "step": 58798 }, { "epoch": 0.10425726010856586, "grad_norm": 0.74609375, "learning_rate": 0.0007995387614791128, "loss": 0.2363, "step": 58800 }, { "epoch": 0.10426080627387567, "grad_norm": 0.36328125, "learning_rate": 0.0007994826582100081, "loss": 0.1915, "step": 58802 }, { "epoch": 0.10426435243918548, "grad_norm": 1.4765625, "learning_rate": 0.0007994265562551919, "loss": 0.3613, "step": 58804 }, { "epoch": 0.1042678986044953, "grad_norm": 0.259765625, "learning_rate": 0.0007993704556149091, "loss": 0.1639, "step": 58806 }, { "epoch": 0.10427144476980511, "grad_norm": 0.435546875, "learning_rate": 0.0007993143562894052, "loss": 0.171, "step": 58808 }, { "epoch": 0.10427499093511493, "grad_norm": 0.6640625, "learning_rate": 0.0007992582582789257, "loss": 0.2056, "step": 58810 }, { "epoch": 0.10427853710042474, "grad_norm": 0.2177734375, "learning_rate": 0.0007992021615837164, "loss": 0.139, "step": 58812 }, { "epoch": 0.10428208326573456, "grad_norm": 0.2470703125, "learning_rate": 0.0007991460662040213, "loss": 0.1901, "step": 58814 }, { "epoch": 0.10428562943104437, "grad_norm": 0.419921875, "learning_rate": 0.0007990899721400871, "loss": 0.154, "step": 58816 }, { "epoch": 0.10428917559635419, "grad_norm": 0.6328125, "learning_rate": 0.0007990338793921582, "loss": 0.2712, "step": 58818 }, { "epoch": 0.104292721761664, "grad_norm": 0.32421875, "learning_rate": 0.0007989777879604807, "loss": 0.2441, "step": 58820 }, { "epoch": 0.10429626792697382, "grad_norm": 0.2265625, "learning_rate": 0.0007989216978452991, "loss": 0.1687, "step": 58822 }, { "epoch": 0.10429981409228363, "grad_norm": 0.671875, "learning_rate": 0.0007988656090468593, "loss": 0.206, "step": 58824 }, { "epoch": 0.10430336025759344, "grad_norm": 0.365234375, "learning_rate": 0.0007988095215654066, "loss": 0.1723, "step": 58826 }, { "epoch": 0.10430690642290326, "grad_norm": 0.62109375, "learning_rate": 0.0007987534354011856, "loss": 0.1974, "step": 58828 }, { "epoch": 0.10431045258821307, "grad_norm": 0.267578125, "learning_rate": 0.0007986973505544428, "loss": 0.1489, "step": 58830 }, { "epoch": 0.10431399875352289, "grad_norm": 0.390625, "learning_rate": 0.0007986412670254222, "loss": 0.2381, "step": 58832 }, { "epoch": 0.1043175449188327, "grad_norm": 0.96484375, "learning_rate": 0.0007985851848143697, "loss": 0.2054, "step": 58834 }, { "epoch": 0.10432109108414252, "grad_norm": 0.41015625, "learning_rate": 0.0007985291039215305, "loss": 0.2109, "step": 58836 }, { "epoch": 0.10432463724945235, "grad_norm": 0.1923828125, "learning_rate": 0.0007984730243471503, "loss": 0.1471, "step": 58838 }, { "epoch": 0.10432818341476216, "grad_norm": 1.0859375, "learning_rate": 0.0007984169460914736, "loss": 0.4735, "step": 58840 }, { "epoch": 0.10433172958007197, "grad_norm": 0.41015625, "learning_rate": 0.0007983608691547461, "loss": 0.1838, "step": 58842 }, { "epoch": 0.10433527574538179, "grad_norm": 0.3828125, "learning_rate": 0.0007983047935372127, "loss": 0.2351, "step": 58844 }, { "epoch": 0.1043388219106916, "grad_norm": 0.259765625, "learning_rate": 0.0007982487192391194, "loss": 0.1996, "step": 58846 }, { "epoch": 0.10434236807600142, "grad_norm": 0.328125, "learning_rate": 0.0007981926462607105, "loss": 0.1607, "step": 58848 }, { "epoch": 0.10434591424131123, "grad_norm": 0.443359375, "learning_rate": 0.0007981365746022318, "loss": 0.1553, "step": 58850 }, { "epoch": 0.10434946040662105, "grad_norm": 0.373046875, "learning_rate": 0.0007980805042639283, "loss": 0.1607, "step": 58852 }, { "epoch": 0.10435300657193086, "grad_norm": 0.12451171875, "learning_rate": 0.0007980244352460455, "loss": 0.1446, "step": 58854 }, { "epoch": 0.10435655273724068, "grad_norm": 0.287109375, "learning_rate": 0.0007979683675488281, "loss": 0.1937, "step": 58856 }, { "epoch": 0.10436009890255049, "grad_norm": 0.310546875, "learning_rate": 0.0007979123011725218, "loss": 0.206, "step": 58858 }, { "epoch": 0.1043636450678603, "grad_norm": 0.4609375, "learning_rate": 0.0007978562361173717, "loss": 0.1378, "step": 58860 }, { "epoch": 0.10436719123317012, "grad_norm": 0.330078125, "learning_rate": 0.0007978001723836228, "loss": 0.2169, "step": 58862 }, { "epoch": 0.10437073739847993, "grad_norm": 0.5, "learning_rate": 0.0007977441099715206, "loss": 0.2389, "step": 58864 }, { "epoch": 0.10437428356378975, "grad_norm": 0.85546875, "learning_rate": 0.0007976880488813099, "loss": 0.2318, "step": 58866 }, { "epoch": 0.10437782972909956, "grad_norm": 0.490234375, "learning_rate": 0.000797631989113236, "loss": 0.1647, "step": 58868 }, { "epoch": 0.10438137589440938, "grad_norm": 0.47265625, "learning_rate": 0.0007975759306675446, "loss": 0.1648, "step": 58870 }, { "epoch": 0.10438492205971919, "grad_norm": 0.6015625, "learning_rate": 0.0007975198735444797, "loss": 0.1235, "step": 58872 }, { "epoch": 0.10438846822502901, "grad_norm": 1.3359375, "learning_rate": 0.0007974638177442874, "loss": 0.2564, "step": 58874 }, { "epoch": 0.10439201439033882, "grad_norm": 0.34765625, "learning_rate": 0.0007974077632672128, "loss": 0.1545, "step": 58876 }, { "epoch": 0.10439556055564864, "grad_norm": 0.7109375, "learning_rate": 0.0007973517101135011, "loss": 0.1984, "step": 58878 }, { "epoch": 0.10439910672095845, "grad_norm": 0.875, "learning_rate": 0.0007972956582833968, "loss": 0.2437, "step": 58880 }, { "epoch": 0.10440265288626828, "grad_norm": 0.314453125, "learning_rate": 0.0007972396077771455, "loss": 0.1684, "step": 58882 }, { "epoch": 0.1044061990515781, "grad_norm": 0.53125, "learning_rate": 0.0007971835585949922, "loss": 0.1948, "step": 58884 }, { "epoch": 0.10440974521688791, "grad_norm": 0.294921875, "learning_rate": 0.0007971275107371829, "loss": 0.186, "step": 58886 }, { "epoch": 0.10441329138219772, "grad_norm": 0.384765625, "learning_rate": 0.0007970714642039612, "loss": 0.1804, "step": 58888 }, { "epoch": 0.10441683754750754, "grad_norm": 0.671875, "learning_rate": 0.0007970154189955729, "loss": 0.1974, "step": 58890 }, { "epoch": 0.10442038371281735, "grad_norm": 0.59375, "learning_rate": 0.0007969593751122633, "loss": 0.1514, "step": 58892 }, { "epoch": 0.10442392987812717, "grad_norm": 0.375, "learning_rate": 0.0007969033325542778, "loss": 0.2836, "step": 58894 }, { "epoch": 0.10442747604343698, "grad_norm": 1.3984375, "learning_rate": 0.0007968472913218606, "loss": 0.2308, "step": 58896 }, { "epoch": 0.1044310222087468, "grad_norm": 0.671875, "learning_rate": 0.0007967912514152574, "loss": 0.1908, "step": 58898 }, { "epoch": 0.10443456837405661, "grad_norm": 1.390625, "learning_rate": 0.000796735212834713, "loss": 0.2846, "step": 58900 }, { "epoch": 0.10443811453936643, "grad_norm": 0.349609375, "learning_rate": 0.000796679175580473, "loss": 0.1928, "step": 58902 }, { "epoch": 0.10444166070467624, "grad_norm": 0.1572265625, "learning_rate": 0.0007966231396527814, "loss": 0.1522, "step": 58904 }, { "epoch": 0.10444520686998605, "grad_norm": 0.875, "learning_rate": 0.0007965671050518844, "loss": 0.1783, "step": 58906 }, { "epoch": 0.10444875303529587, "grad_norm": 0.353515625, "learning_rate": 0.0007965110717780266, "loss": 0.2374, "step": 58908 }, { "epoch": 0.10445229920060568, "grad_norm": 0.279296875, "learning_rate": 0.000796455039831453, "loss": 0.1374, "step": 58910 }, { "epoch": 0.1044558453659155, "grad_norm": 0.54296875, "learning_rate": 0.000796399009212409, "loss": 0.2282, "step": 58912 }, { "epoch": 0.10445939153122531, "grad_norm": 0.2734375, "learning_rate": 0.0007963429799211393, "loss": 0.147, "step": 58914 }, { "epoch": 0.10446293769653513, "grad_norm": 0.97265625, "learning_rate": 0.0007962869519578888, "loss": 0.1978, "step": 58916 }, { "epoch": 0.10446648386184494, "grad_norm": 1.171875, "learning_rate": 0.0007962309253229027, "loss": 0.1637, "step": 58918 }, { "epoch": 0.10447003002715476, "grad_norm": 2.859375, "learning_rate": 0.0007961749000164267, "loss": 0.1751, "step": 58920 }, { "epoch": 0.10447357619246457, "grad_norm": 0.240234375, "learning_rate": 0.0007961188760387045, "loss": 0.1358, "step": 58922 }, { "epoch": 0.10447712235777439, "grad_norm": 2.015625, "learning_rate": 0.0007960628533899821, "loss": 0.437, "step": 58924 }, { "epoch": 0.1044806685230842, "grad_norm": 0.58984375, "learning_rate": 0.0007960068320705041, "loss": 0.1705, "step": 58926 }, { "epoch": 0.10448421468839403, "grad_norm": 0.5234375, "learning_rate": 0.0007959508120805159, "loss": 0.1584, "step": 58928 }, { "epoch": 0.10448776085370384, "grad_norm": 0.310546875, "learning_rate": 0.000795894793420262, "loss": 0.2002, "step": 58930 }, { "epoch": 0.10449130701901366, "grad_norm": 1.4453125, "learning_rate": 0.0007958387760899876, "loss": 0.2019, "step": 58932 }, { "epoch": 0.10449485318432347, "grad_norm": 0.267578125, "learning_rate": 0.0007957827600899381, "loss": 0.1847, "step": 58934 }, { "epoch": 0.10449839934963329, "grad_norm": 0.65234375, "learning_rate": 0.0007957267454203578, "loss": 0.2098, "step": 58936 }, { "epoch": 0.1045019455149431, "grad_norm": 0.2578125, "learning_rate": 0.0007956707320814922, "loss": 0.165, "step": 58938 }, { "epoch": 0.10450549168025292, "grad_norm": 0.4375, "learning_rate": 0.0007956147200735857, "loss": 0.1496, "step": 58940 }, { "epoch": 0.10450903784556273, "grad_norm": 0.373046875, "learning_rate": 0.0007955587093968837, "loss": 0.1698, "step": 58942 }, { "epoch": 0.10451258401087254, "grad_norm": 0.57421875, "learning_rate": 0.0007955027000516313, "loss": 0.1427, "step": 58944 }, { "epoch": 0.10451613017618236, "grad_norm": 0.404296875, "learning_rate": 0.000795446692038073, "loss": 0.1888, "step": 58946 }, { "epoch": 0.10451967634149217, "grad_norm": 0.2099609375, "learning_rate": 0.0007953906853564539, "loss": 0.1891, "step": 58948 }, { "epoch": 0.10452322250680199, "grad_norm": 0.51171875, "learning_rate": 0.0007953346800070189, "loss": 0.1565, "step": 58950 }, { "epoch": 0.1045267686721118, "grad_norm": 1.1171875, "learning_rate": 0.0007952786759900135, "loss": 0.2496, "step": 58952 }, { "epoch": 0.10453031483742162, "grad_norm": 0.40625, "learning_rate": 0.000795222673305682, "loss": 0.1745, "step": 58954 }, { "epoch": 0.10453386100273143, "grad_norm": 0.359375, "learning_rate": 0.0007951666719542694, "loss": 0.2108, "step": 58956 }, { "epoch": 0.10453740716804125, "grad_norm": 0.66796875, "learning_rate": 0.0007951106719360206, "loss": 0.1732, "step": 58958 }, { "epoch": 0.10454095333335106, "grad_norm": 0.2138671875, "learning_rate": 0.0007950546732511809, "loss": 0.2146, "step": 58960 }, { "epoch": 0.10454449949866088, "grad_norm": 0.275390625, "learning_rate": 0.0007949986758999947, "loss": 0.1714, "step": 58962 }, { "epoch": 0.10454804566397069, "grad_norm": 0.79296875, "learning_rate": 0.0007949426798827071, "loss": 0.175, "step": 58964 }, { "epoch": 0.1045515918292805, "grad_norm": 0.83203125, "learning_rate": 0.0007948866851995631, "loss": 0.1644, "step": 58966 }, { "epoch": 0.10455513799459032, "grad_norm": 4.15625, "learning_rate": 0.0007948306918508077, "loss": 0.4825, "step": 58968 }, { "epoch": 0.10455868415990013, "grad_norm": 0.85546875, "learning_rate": 0.0007947746998366852, "loss": 0.166, "step": 58970 }, { "epoch": 0.10456223032520995, "grad_norm": 2.6875, "learning_rate": 0.0007947187091574412, "loss": 0.2994, "step": 58972 }, { "epoch": 0.10456577649051978, "grad_norm": 0.7109375, "learning_rate": 0.0007946627198133201, "loss": 0.2028, "step": 58974 }, { "epoch": 0.10456932265582959, "grad_norm": 2.46875, "learning_rate": 0.0007946067318045671, "loss": 0.2515, "step": 58976 }, { "epoch": 0.1045728688211394, "grad_norm": 0.62109375, "learning_rate": 0.0007945507451314265, "loss": 0.1927, "step": 58978 }, { "epoch": 0.10457641498644922, "grad_norm": 0.60546875, "learning_rate": 0.0007944947597941437, "loss": 0.2312, "step": 58980 }, { "epoch": 0.10457996115175903, "grad_norm": 0.38671875, "learning_rate": 0.0007944387757929632, "loss": 0.2115, "step": 58982 }, { "epoch": 0.10458350731706885, "grad_norm": 0.94921875, "learning_rate": 0.0007943827931281304, "loss": 0.302, "step": 58984 }, { "epoch": 0.10458705348237866, "grad_norm": 0.369140625, "learning_rate": 0.0007943268117998894, "loss": 0.2267, "step": 58986 }, { "epoch": 0.10459059964768848, "grad_norm": 1.0390625, "learning_rate": 0.0007942708318084853, "loss": 0.279, "step": 58988 }, { "epoch": 0.1045941458129983, "grad_norm": 0.33203125, "learning_rate": 0.0007942148531541633, "loss": 0.1968, "step": 58990 }, { "epoch": 0.10459769197830811, "grad_norm": 0.412109375, "learning_rate": 0.0007941588758371677, "loss": 0.192, "step": 58992 }, { "epoch": 0.10460123814361792, "grad_norm": 1.3125, "learning_rate": 0.0007941028998577437, "loss": 0.2532, "step": 58994 }, { "epoch": 0.10460478430892774, "grad_norm": 0.482421875, "learning_rate": 0.0007940469252161358, "loss": 0.4615, "step": 58996 }, { "epoch": 0.10460833047423755, "grad_norm": 0.23828125, "learning_rate": 0.000793990951912589, "loss": 0.2283, "step": 58998 }, { "epoch": 0.10461187663954737, "grad_norm": 0.49609375, "learning_rate": 0.0007939349799473476, "loss": 0.164, "step": 59000 }, { "epoch": 0.10461542280485718, "grad_norm": 0.486328125, "learning_rate": 0.0007938790093206576, "loss": 0.1698, "step": 59002 }, { "epoch": 0.104618968970167, "grad_norm": 0.271484375, "learning_rate": 0.0007938230400327626, "loss": 0.1265, "step": 59004 }, { "epoch": 0.10462251513547681, "grad_norm": 0.53515625, "learning_rate": 0.0007937670720839074, "loss": 0.1535, "step": 59006 }, { "epoch": 0.10462606130078662, "grad_norm": 1.1328125, "learning_rate": 0.0007937111054743378, "loss": 0.1983, "step": 59008 }, { "epoch": 0.10462960746609644, "grad_norm": 0.2236328125, "learning_rate": 0.0007936551402042975, "loss": 0.1657, "step": 59010 }, { "epoch": 0.10463315363140625, "grad_norm": 0.9765625, "learning_rate": 0.000793599176274032, "loss": 0.1668, "step": 59012 }, { "epoch": 0.10463669979671607, "grad_norm": 1.359375, "learning_rate": 0.0007935432136837853, "loss": 0.2667, "step": 59014 }, { "epoch": 0.10464024596202588, "grad_norm": 0.796875, "learning_rate": 0.0007934872524338026, "loss": 0.1845, "step": 59016 }, { "epoch": 0.10464379212733571, "grad_norm": 0.150390625, "learning_rate": 0.0007934312925243292, "loss": 0.3815, "step": 59018 }, { "epoch": 0.10464733829264553, "grad_norm": 0.439453125, "learning_rate": 0.0007933753339556086, "loss": 0.1917, "step": 59020 }, { "epoch": 0.10465088445795534, "grad_norm": 0.56640625, "learning_rate": 0.0007933193767278863, "loss": 0.1351, "step": 59022 }, { "epoch": 0.10465443062326515, "grad_norm": 0.78125, "learning_rate": 0.0007932634208414071, "loss": 0.2515, "step": 59024 }, { "epoch": 0.10465797678857497, "grad_norm": 0.328125, "learning_rate": 0.0007932074662964157, "loss": 0.2071, "step": 59026 }, { "epoch": 0.10466152295388478, "grad_norm": 0.28515625, "learning_rate": 0.0007931515130931564, "loss": 0.1594, "step": 59028 }, { "epoch": 0.1046650691191946, "grad_norm": 0.1474609375, "learning_rate": 0.0007930955612318742, "loss": 0.1456, "step": 59030 }, { "epoch": 0.10466861528450441, "grad_norm": 0.5625, "learning_rate": 0.0007930396107128135, "loss": 0.1298, "step": 59032 }, { "epoch": 0.10467216144981423, "grad_norm": 0.84765625, "learning_rate": 0.0007929836615362198, "loss": 0.2503, "step": 59034 }, { "epoch": 0.10467570761512404, "grad_norm": 0.419921875, "learning_rate": 0.0007929277137023365, "loss": 0.2076, "step": 59036 }, { "epoch": 0.10467925378043386, "grad_norm": 0.267578125, "learning_rate": 0.0007928717672114094, "loss": 0.128, "step": 59038 }, { "epoch": 0.10468279994574367, "grad_norm": 0.3203125, "learning_rate": 0.0007928158220636825, "loss": 0.2386, "step": 59040 }, { "epoch": 0.10468634611105349, "grad_norm": 0.3203125, "learning_rate": 0.0007927598782594015, "loss": 0.1802, "step": 59042 }, { "epoch": 0.1046898922763633, "grad_norm": 0.640625, "learning_rate": 0.0007927039357988095, "loss": 0.1856, "step": 59044 }, { "epoch": 0.10469343844167311, "grad_norm": 0.3515625, "learning_rate": 0.0007926479946821523, "loss": 0.1561, "step": 59046 }, { "epoch": 0.10469698460698293, "grad_norm": 0.6640625, "learning_rate": 0.0007925920549096743, "loss": 0.2336, "step": 59048 }, { "epoch": 0.10470053077229274, "grad_norm": 0.265625, "learning_rate": 0.0007925361164816201, "loss": 0.172, "step": 59050 }, { "epoch": 0.10470407693760256, "grad_norm": 0.251953125, "learning_rate": 0.0007924801793982346, "loss": 0.201, "step": 59052 }, { "epoch": 0.10470762310291237, "grad_norm": 0.7421875, "learning_rate": 0.0007924242436597615, "loss": 0.1733, "step": 59054 }, { "epoch": 0.10471116926822219, "grad_norm": 0.65625, "learning_rate": 0.0007923683092664464, "loss": 0.19, "step": 59056 }, { "epoch": 0.104714715433532, "grad_norm": 0.76171875, "learning_rate": 0.0007923123762185339, "loss": 0.1567, "step": 59058 }, { "epoch": 0.10471826159884182, "grad_norm": 0.859375, "learning_rate": 0.0007922564445162678, "loss": 0.2111, "step": 59060 }, { "epoch": 0.10472180776415163, "grad_norm": 0.1533203125, "learning_rate": 0.0007922005141598935, "loss": 0.1356, "step": 59062 }, { "epoch": 0.10472535392946146, "grad_norm": 0.7578125, "learning_rate": 0.0007921445851496556, "loss": 0.1753, "step": 59064 }, { "epoch": 0.10472890009477127, "grad_norm": 0.4921875, "learning_rate": 0.0007920886574857984, "loss": 0.2054, "step": 59066 }, { "epoch": 0.10473244626008109, "grad_norm": 0.36328125, "learning_rate": 0.0007920327311685667, "loss": 0.1924, "step": 59068 }, { "epoch": 0.1047359924253909, "grad_norm": 0.3203125, "learning_rate": 0.0007919768061982042, "loss": 0.1651, "step": 59070 }, { "epoch": 0.10473953859070072, "grad_norm": 0.38671875, "learning_rate": 0.0007919208825749567, "loss": 0.164, "step": 59072 }, { "epoch": 0.10474308475601053, "grad_norm": 0.337890625, "learning_rate": 0.0007918649602990682, "loss": 0.2141, "step": 59074 }, { "epoch": 0.10474663092132035, "grad_norm": 0.51171875, "learning_rate": 0.0007918090393707837, "loss": 0.3052, "step": 59076 }, { "epoch": 0.10475017708663016, "grad_norm": 2.828125, "learning_rate": 0.0007917531197903473, "loss": 0.2352, "step": 59078 }, { "epoch": 0.10475372325193998, "grad_norm": 0.62890625, "learning_rate": 0.0007916972015580035, "loss": 0.1697, "step": 59080 }, { "epoch": 0.10475726941724979, "grad_norm": 0.59375, "learning_rate": 0.0007916412846739973, "loss": 0.1684, "step": 59082 }, { "epoch": 0.1047608155825596, "grad_norm": 0.703125, "learning_rate": 0.000791585369138573, "loss": 0.1915, "step": 59084 }, { "epoch": 0.10476436174786942, "grad_norm": 0.24609375, "learning_rate": 0.0007915294549519752, "loss": 0.1494, "step": 59086 }, { "epoch": 0.10476790791317923, "grad_norm": 0.6796875, "learning_rate": 0.0007914735421144478, "loss": 0.2214, "step": 59088 }, { "epoch": 0.10477145407848905, "grad_norm": 0.46875, "learning_rate": 0.0007914176306262364, "loss": 0.2272, "step": 59090 }, { "epoch": 0.10477500024379886, "grad_norm": 0.9765625, "learning_rate": 0.0007913617204875854, "loss": 0.3095, "step": 59092 }, { "epoch": 0.10477854640910868, "grad_norm": 0.380859375, "learning_rate": 0.0007913058116987383, "loss": 0.1949, "step": 59094 }, { "epoch": 0.10478209257441849, "grad_norm": 1.890625, "learning_rate": 0.0007912499042599404, "loss": 0.2338, "step": 59096 }, { "epoch": 0.1047856387397283, "grad_norm": 0.416015625, "learning_rate": 0.000791193998171436, "loss": 0.1668, "step": 59098 }, { "epoch": 0.10478918490503812, "grad_norm": 0.494140625, "learning_rate": 0.0007911380934334702, "loss": 0.1661, "step": 59100 }, { "epoch": 0.10479273107034794, "grad_norm": 0.427734375, "learning_rate": 0.0007910821900462864, "loss": 0.1374, "step": 59102 }, { "epoch": 0.10479627723565775, "grad_norm": 0.95703125, "learning_rate": 0.00079102628801013, "loss": 0.2021, "step": 59104 }, { "epoch": 0.10479982340096756, "grad_norm": 3.5, "learning_rate": 0.000790970387325245, "loss": 0.2364, "step": 59106 }, { "epoch": 0.10480336956627738, "grad_norm": 0.36328125, "learning_rate": 0.0007909144879918762, "loss": 0.181, "step": 59108 }, { "epoch": 0.10480691573158721, "grad_norm": 0.294921875, "learning_rate": 0.0007908585900102676, "loss": 0.2007, "step": 59110 }, { "epoch": 0.10481046189689702, "grad_norm": 0.72265625, "learning_rate": 0.000790802693380664, "loss": 0.2255, "step": 59112 }, { "epoch": 0.10481400806220684, "grad_norm": 4.4375, "learning_rate": 0.0007907467981033098, "loss": 0.186, "step": 59114 }, { "epoch": 0.10481755422751665, "grad_norm": 0.62109375, "learning_rate": 0.00079069090417845, "loss": 0.3152, "step": 59116 }, { "epoch": 0.10482110039282647, "grad_norm": 0.28125, "learning_rate": 0.0007906350116063281, "loss": 0.2427, "step": 59118 }, { "epoch": 0.10482464655813628, "grad_norm": 0.453125, "learning_rate": 0.0007905791203871886, "loss": 0.1758, "step": 59120 }, { "epoch": 0.1048281927234461, "grad_norm": 0.369140625, "learning_rate": 0.0007905232305212769, "loss": 0.3389, "step": 59122 }, { "epoch": 0.10483173888875591, "grad_norm": 0.74609375, "learning_rate": 0.0007904673420088366, "loss": 0.1536, "step": 59124 }, { "epoch": 0.10483528505406572, "grad_norm": 0.5703125, "learning_rate": 0.0007904114548501124, "loss": 0.1949, "step": 59126 }, { "epoch": 0.10483883121937554, "grad_norm": 0.244140625, "learning_rate": 0.0007903555690453486, "loss": 0.16, "step": 59128 }, { "epoch": 0.10484237738468535, "grad_norm": 0.9765625, "learning_rate": 0.0007902996845947894, "loss": 0.1638, "step": 59130 }, { "epoch": 0.10484592354999517, "grad_norm": 0.515625, "learning_rate": 0.00079024380149868, "loss": 0.1553, "step": 59132 }, { "epoch": 0.10484946971530498, "grad_norm": 0.80859375, "learning_rate": 0.0007901879197572639, "loss": 0.3269, "step": 59134 }, { "epoch": 0.1048530158806148, "grad_norm": 1.8671875, "learning_rate": 0.0007901320393707861, "loss": 0.2718, "step": 59136 }, { "epoch": 0.10485656204592461, "grad_norm": 0.51171875, "learning_rate": 0.0007900761603394904, "loss": 0.1218, "step": 59138 }, { "epoch": 0.10486010821123443, "grad_norm": 0.23828125, "learning_rate": 0.0007900202826636222, "loss": 0.1379, "step": 59140 }, { "epoch": 0.10486365437654424, "grad_norm": 2.609375, "learning_rate": 0.0007899644063434248, "loss": 0.337, "step": 59142 }, { "epoch": 0.10486720054185406, "grad_norm": 0.375, "learning_rate": 0.000789908531379143, "loss": 0.2075, "step": 59144 }, { "epoch": 0.10487074670716387, "grad_norm": 0.9296875, "learning_rate": 0.0007898526577710212, "loss": 0.2143, "step": 59146 }, { "epoch": 0.10487429287247368, "grad_norm": 0.3359375, "learning_rate": 0.000789796785519304, "loss": 0.1744, "step": 59148 }, { "epoch": 0.1048778390377835, "grad_norm": 0.462890625, "learning_rate": 0.0007897409146242349, "loss": 0.1959, "step": 59150 }, { "epoch": 0.10488138520309331, "grad_norm": 0.39453125, "learning_rate": 0.0007896850450860591, "loss": 0.1456, "step": 59152 }, { "epoch": 0.10488493136840314, "grad_norm": 0.486328125, "learning_rate": 0.0007896291769050206, "loss": 0.1562, "step": 59154 }, { "epoch": 0.10488847753371296, "grad_norm": 0.447265625, "learning_rate": 0.000789573310081364, "loss": 0.1665, "step": 59156 }, { "epoch": 0.10489202369902277, "grad_norm": 0.875, "learning_rate": 0.0007895174446153333, "loss": 0.2385, "step": 59158 }, { "epoch": 0.10489556986433259, "grad_norm": 0.193359375, "learning_rate": 0.0007894615805071732, "loss": 0.1792, "step": 59160 }, { "epoch": 0.1048991160296424, "grad_norm": 0.337890625, "learning_rate": 0.0007894057177571275, "loss": 0.2088, "step": 59162 }, { "epoch": 0.10490266219495221, "grad_norm": 0.58203125, "learning_rate": 0.0007893498563654404, "loss": 0.2067, "step": 59164 }, { "epoch": 0.10490620836026203, "grad_norm": 0.8671875, "learning_rate": 0.0007892939963323574, "loss": 0.1693, "step": 59166 }, { "epoch": 0.10490975452557184, "grad_norm": 1.7421875, "learning_rate": 0.0007892381376581215, "loss": 0.249, "step": 59168 }, { "epoch": 0.10491330069088166, "grad_norm": 0.345703125, "learning_rate": 0.0007891822803429774, "loss": 0.1487, "step": 59170 }, { "epoch": 0.10491684685619147, "grad_norm": 0.63671875, "learning_rate": 0.0007891264243871694, "loss": 0.2002, "step": 59172 }, { "epoch": 0.10492039302150129, "grad_norm": 0.64453125, "learning_rate": 0.0007890705697909424, "loss": 0.2386, "step": 59174 }, { "epoch": 0.1049239391868111, "grad_norm": 0.20703125, "learning_rate": 0.0007890147165545397, "loss": 0.1939, "step": 59176 }, { "epoch": 0.10492748535212092, "grad_norm": 1.3203125, "learning_rate": 0.000788958864678206, "loss": 0.2591, "step": 59178 }, { "epoch": 0.10493103151743073, "grad_norm": 0.40234375, "learning_rate": 0.0007889030141621856, "loss": 0.2001, "step": 59180 }, { "epoch": 0.10493457768274055, "grad_norm": 0.2412109375, "learning_rate": 0.0007888471650067229, "loss": 0.1108, "step": 59182 }, { "epoch": 0.10493812384805036, "grad_norm": 0.310546875, "learning_rate": 0.0007887913172120616, "loss": 0.1648, "step": 59184 }, { "epoch": 0.10494167001336017, "grad_norm": 1.078125, "learning_rate": 0.0007887354707784464, "loss": 0.1824, "step": 59186 }, { "epoch": 0.10494521617866999, "grad_norm": 0.48828125, "learning_rate": 0.0007886796257061215, "loss": 0.1445, "step": 59188 }, { "epoch": 0.1049487623439798, "grad_norm": 0.72265625, "learning_rate": 0.0007886237819953312, "loss": 0.1556, "step": 59190 }, { "epoch": 0.10495230850928962, "grad_norm": 0.515625, "learning_rate": 0.0007885679396463194, "loss": 0.4125, "step": 59192 }, { "epoch": 0.10495585467459943, "grad_norm": 0.921875, "learning_rate": 0.0007885120986593306, "loss": 0.2077, "step": 59194 }, { "epoch": 0.10495940083990925, "grad_norm": 0.2314453125, "learning_rate": 0.000788456259034609, "loss": 0.1674, "step": 59196 }, { "epoch": 0.10496294700521906, "grad_norm": 1.0390625, "learning_rate": 0.0007884004207723989, "loss": 0.2089, "step": 59198 }, { "epoch": 0.10496649317052889, "grad_norm": 0.765625, "learning_rate": 0.0007883445838729444, "loss": 0.2828, "step": 59200 }, { "epoch": 0.1049700393358387, "grad_norm": 0.77734375, "learning_rate": 0.0007882887483364891, "loss": 0.1883, "step": 59202 }, { "epoch": 0.10497358550114852, "grad_norm": 0.5078125, "learning_rate": 0.000788232914163278, "loss": 0.1523, "step": 59204 }, { "epoch": 0.10497713166645833, "grad_norm": 0.322265625, "learning_rate": 0.0007881770813535556, "loss": 0.203, "step": 59206 }, { "epoch": 0.10498067783176815, "grad_norm": 1.015625, "learning_rate": 0.0007881212499075649, "loss": 0.2197, "step": 59208 }, { "epoch": 0.10498422399707796, "grad_norm": 2.0, "learning_rate": 0.0007880654198255509, "loss": 0.2264, "step": 59210 }, { "epoch": 0.10498777016238778, "grad_norm": 0.50390625, "learning_rate": 0.0007880095911077571, "loss": 0.3321, "step": 59212 }, { "epoch": 0.10499131632769759, "grad_norm": 0.5546875, "learning_rate": 0.0007879537637544289, "loss": 0.2354, "step": 59214 }, { "epoch": 0.10499486249300741, "grad_norm": 0.78125, "learning_rate": 0.0007878979377658093, "loss": 0.2055, "step": 59216 }, { "epoch": 0.10499840865831722, "grad_norm": 1.0078125, "learning_rate": 0.000787842113142143, "loss": 0.2077, "step": 59218 }, { "epoch": 0.10500195482362704, "grad_norm": 0.40625, "learning_rate": 0.0007877862898836737, "loss": 0.1584, "step": 59220 }, { "epoch": 0.10500550098893685, "grad_norm": 0.251953125, "learning_rate": 0.0007877304679906461, "loss": 0.1226, "step": 59222 }, { "epoch": 0.10500904715424667, "grad_norm": 0.47265625, "learning_rate": 0.0007876746474633038, "loss": 0.1764, "step": 59224 }, { "epoch": 0.10501259331955648, "grad_norm": 0.32421875, "learning_rate": 0.0007876188283018913, "loss": 0.3081, "step": 59226 }, { "epoch": 0.1050161394848663, "grad_norm": 0.640625, "learning_rate": 0.0007875630105066522, "loss": 0.1508, "step": 59228 }, { "epoch": 0.10501968565017611, "grad_norm": 1.0390625, "learning_rate": 0.0007875071940778318, "loss": 0.3866, "step": 59230 }, { "epoch": 0.10502323181548592, "grad_norm": 0.640625, "learning_rate": 0.000787451379015673, "loss": 0.1765, "step": 59232 }, { "epoch": 0.10502677798079574, "grad_norm": 0.2412109375, "learning_rate": 0.0007873955653204199, "loss": 0.1768, "step": 59234 }, { "epoch": 0.10503032414610555, "grad_norm": 0.361328125, "learning_rate": 0.0007873397529923177, "loss": 0.1715, "step": 59236 }, { "epoch": 0.10503387031141537, "grad_norm": 1.453125, "learning_rate": 0.0007872839420316094, "loss": 0.2572, "step": 59238 }, { "epoch": 0.10503741647672518, "grad_norm": 0.60546875, "learning_rate": 0.0007872281324385398, "loss": 0.1558, "step": 59240 }, { "epoch": 0.105040962642035, "grad_norm": 1.515625, "learning_rate": 0.0007871723242133524, "loss": 0.1988, "step": 59242 }, { "epoch": 0.10504450880734481, "grad_norm": 0.76171875, "learning_rate": 0.0007871165173562915, "loss": 0.1658, "step": 59244 }, { "epoch": 0.10504805497265464, "grad_norm": 0.396484375, "learning_rate": 0.0007870607118676012, "loss": 0.1924, "step": 59246 }, { "epoch": 0.10505160113796445, "grad_norm": 5.375, "learning_rate": 0.0007870049077475259, "loss": 0.2963, "step": 59248 }, { "epoch": 0.10505514730327427, "grad_norm": 1.21875, "learning_rate": 0.0007869491049963089, "loss": 0.153, "step": 59250 }, { "epoch": 0.10505869346858408, "grad_norm": 0.279296875, "learning_rate": 0.000786893303614195, "loss": 0.1689, "step": 59252 }, { "epoch": 0.1050622396338939, "grad_norm": 0.359375, "learning_rate": 0.0007868375036014277, "loss": 0.1488, "step": 59254 }, { "epoch": 0.10506578579920371, "grad_norm": 2.3125, "learning_rate": 0.0007867817049582518, "loss": 0.2285, "step": 59256 }, { "epoch": 0.10506933196451353, "grad_norm": 0.53125, "learning_rate": 0.00078672590768491, "loss": 0.154, "step": 59258 }, { "epoch": 0.10507287812982334, "grad_norm": 0.423828125, "learning_rate": 0.0007866701117816475, "loss": 0.166, "step": 59260 }, { "epoch": 0.10507642429513316, "grad_norm": 0.61328125, "learning_rate": 0.0007866143172487077, "loss": 0.2913, "step": 59262 }, { "epoch": 0.10507997046044297, "grad_norm": 1.359375, "learning_rate": 0.0007865585240863353, "loss": 0.2001, "step": 59264 }, { "epoch": 0.10508351662575278, "grad_norm": 0.70703125, "learning_rate": 0.0007865027322947736, "loss": 0.2055, "step": 59266 }, { "epoch": 0.1050870627910626, "grad_norm": 0.267578125, "learning_rate": 0.0007864469418742666, "loss": 0.1629, "step": 59268 }, { "epoch": 0.10509060895637241, "grad_norm": 0.26953125, "learning_rate": 0.0007863911528250589, "loss": 0.1561, "step": 59270 }, { "epoch": 0.10509415512168223, "grad_norm": 0.4921875, "learning_rate": 0.0007863353651473941, "loss": 0.2031, "step": 59272 }, { "epoch": 0.10509770128699204, "grad_norm": 0.388671875, "learning_rate": 0.000786279578841516, "loss": 0.1957, "step": 59274 }, { "epoch": 0.10510124745230186, "grad_norm": 0.1552734375, "learning_rate": 0.0007862237939076688, "loss": 0.1661, "step": 59276 }, { "epoch": 0.10510479361761167, "grad_norm": 2.5, "learning_rate": 0.0007861680103460964, "loss": 0.2907, "step": 59278 }, { "epoch": 0.10510833978292149, "grad_norm": 0.408203125, "learning_rate": 0.0007861122281570434, "loss": 0.1744, "step": 59280 }, { "epoch": 0.1051118859482313, "grad_norm": 0.306640625, "learning_rate": 0.0007860564473407526, "loss": 0.1508, "step": 59282 }, { "epoch": 0.10511543211354112, "grad_norm": 0.337890625, "learning_rate": 0.0007860006678974687, "loss": 0.1147, "step": 59284 }, { "epoch": 0.10511897827885093, "grad_norm": 0.234375, "learning_rate": 0.0007859448898274353, "loss": 0.2, "step": 59286 }, { "epoch": 0.10512252444416074, "grad_norm": 0.703125, "learning_rate": 0.0007858891131308969, "loss": 0.2318, "step": 59288 }, { "epoch": 0.10512607060947057, "grad_norm": 0.671875, "learning_rate": 0.0007858333378080968, "loss": 0.1616, "step": 59290 }, { "epoch": 0.10512961677478039, "grad_norm": 2.671875, "learning_rate": 0.0007857775638592795, "loss": 0.2651, "step": 59292 }, { "epoch": 0.1051331629400902, "grad_norm": 0.875, "learning_rate": 0.0007857217912846883, "loss": 0.1931, "step": 59294 }, { "epoch": 0.10513670910540002, "grad_norm": 0.85546875, "learning_rate": 0.0007856660200845678, "loss": 0.1924, "step": 59296 }, { "epoch": 0.10514025527070983, "grad_norm": 0.4921875, "learning_rate": 0.0007856102502591611, "loss": 0.2175, "step": 59298 }, { "epoch": 0.10514380143601965, "grad_norm": 0.5703125, "learning_rate": 0.0007855544818087125, "loss": 0.1052, "step": 59300 }, { "epoch": 0.10514734760132946, "grad_norm": 0.26953125, "learning_rate": 0.0007854987147334662, "loss": 0.1482, "step": 59302 }, { "epoch": 0.10515089376663928, "grad_norm": 0.33203125, "learning_rate": 0.0007854429490336659, "loss": 0.1986, "step": 59304 }, { "epoch": 0.10515443993194909, "grad_norm": 0.4140625, "learning_rate": 0.0007853871847095553, "loss": 0.2532, "step": 59306 }, { "epoch": 0.1051579860972589, "grad_norm": 0.21484375, "learning_rate": 0.0007853314217613783, "loss": 0.1561, "step": 59308 }, { "epoch": 0.10516153226256872, "grad_norm": 0.404296875, "learning_rate": 0.0007852756601893792, "loss": 0.2152, "step": 59310 }, { "epoch": 0.10516507842787853, "grad_norm": 0.248046875, "learning_rate": 0.0007852198999938014, "loss": 0.1858, "step": 59312 }, { "epoch": 0.10516862459318835, "grad_norm": 0.62109375, "learning_rate": 0.0007851641411748889, "loss": 0.1836, "step": 59314 }, { "epoch": 0.10517217075849816, "grad_norm": 0.322265625, "learning_rate": 0.0007851083837328854, "loss": 0.1838, "step": 59316 }, { "epoch": 0.10517571692380798, "grad_norm": 3.078125, "learning_rate": 0.0007850526276680348, "loss": 0.3331, "step": 59318 }, { "epoch": 0.10517926308911779, "grad_norm": 0.298828125, "learning_rate": 0.0007849968729805816, "loss": 0.3898, "step": 59320 }, { "epoch": 0.1051828092544276, "grad_norm": 1.265625, "learning_rate": 0.0007849411196707688, "loss": 0.1963, "step": 59322 }, { "epoch": 0.10518635541973742, "grad_norm": 0.5, "learning_rate": 0.0007848853677388404, "loss": 0.1611, "step": 59324 }, { "epoch": 0.10518990158504724, "grad_norm": 0.421875, "learning_rate": 0.0007848296171850406, "loss": 0.2529, "step": 59326 }, { "epoch": 0.10519344775035705, "grad_norm": 0.443359375, "learning_rate": 0.0007847738680096129, "loss": 0.2333, "step": 59328 }, { "epoch": 0.10519699391566686, "grad_norm": 0.5078125, "learning_rate": 0.0007847181202128013, "loss": 0.1586, "step": 59330 }, { "epoch": 0.10520054008097668, "grad_norm": 0.33984375, "learning_rate": 0.0007846623737948492, "loss": 0.169, "step": 59332 }, { "epoch": 0.1052040862462865, "grad_norm": 1.234375, "learning_rate": 0.0007846066287560008, "loss": 0.1915, "step": 59334 }, { "epoch": 0.10520763241159632, "grad_norm": 0.42578125, "learning_rate": 0.0007845508850965, "loss": 0.1691, "step": 59336 }, { "epoch": 0.10521117857690614, "grad_norm": 0.337890625, "learning_rate": 0.0007844951428165905, "loss": 0.1944, "step": 59338 }, { "epoch": 0.10521472474221595, "grad_norm": 0.4609375, "learning_rate": 0.0007844394019165155, "loss": 0.1793, "step": 59340 }, { "epoch": 0.10521827090752577, "grad_norm": 0.279296875, "learning_rate": 0.0007843836623965195, "loss": 0.1808, "step": 59342 }, { "epoch": 0.10522181707283558, "grad_norm": 0.3515625, "learning_rate": 0.0007843279242568462, "loss": 0.2053, "step": 59344 }, { "epoch": 0.1052253632381454, "grad_norm": 0.2265625, "learning_rate": 0.0007842721874977393, "loss": 0.1473, "step": 59346 }, { "epoch": 0.10522890940345521, "grad_norm": 0.466796875, "learning_rate": 0.0007842164521194422, "loss": 0.1475, "step": 59348 }, { "epoch": 0.10523245556876502, "grad_norm": 0.55078125, "learning_rate": 0.0007841607181221988, "loss": 0.1327, "step": 59350 }, { "epoch": 0.10523600173407484, "grad_norm": 0.72265625, "learning_rate": 0.000784104985506253, "loss": 0.2368, "step": 59352 }, { "epoch": 0.10523954789938465, "grad_norm": 1.6796875, "learning_rate": 0.0007840492542718487, "loss": 0.223, "step": 59354 }, { "epoch": 0.10524309406469447, "grad_norm": 0.28515625, "learning_rate": 0.0007839935244192292, "loss": 0.2434, "step": 59356 }, { "epoch": 0.10524664023000428, "grad_norm": 0.7578125, "learning_rate": 0.0007839377959486384, "loss": 0.1897, "step": 59358 }, { "epoch": 0.1052501863953141, "grad_norm": 0.4609375, "learning_rate": 0.0007838820688603203, "loss": 0.1719, "step": 59360 }, { "epoch": 0.10525373256062391, "grad_norm": 0.396484375, "learning_rate": 0.0007838263431545187, "loss": 0.2039, "step": 59362 }, { "epoch": 0.10525727872593373, "grad_norm": 0.462890625, "learning_rate": 0.0007837706188314766, "loss": 0.1536, "step": 59364 }, { "epoch": 0.10526082489124354, "grad_norm": 1.6171875, "learning_rate": 0.0007837148958914382, "loss": 0.1557, "step": 59366 }, { "epoch": 0.10526437105655335, "grad_norm": 0.427734375, "learning_rate": 0.0007836591743346471, "loss": 0.1767, "step": 59368 }, { "epoch": 0.10526791722186317, "grad_norm": 0.251953125, "learning_rate": 0.0007836034541613472, "loss": 0.2247, "step": 59370 }, { "epoch": 0.10527146338717298, "grad_norm": 0.6328125, "learning_rate": 0.0007835477353717817, "loss": 0.1448, "step": 59372 }, { "epoch": 0.1052750095524828, "grad_norm": 1.515625, "learning_rate": 0.0007834920179661948, "loss": 0.2311, "step": 59374 }, { "epoch": 0.10527855571779261, "grad_norm": 0.34375, "learning_rate": 0.0007834363019448298, "loss": 0.1735, "step": 59376 }, { "epoch": 0.10528210188310243, "grad_norm": 0.177734375, "learning_rate": 0.0007833805873079309, "loss": 0.1493, "step": 59378 }, { "epoch": 0.10528564804841224, "grad_norm": 0.828125, "learning_rate": 0.000783324874055741, "loss": 0.1748, "step": 59380 }, { "epoch": 0.10528919421372207, "grad_norm": 0.37109375, "learning_rate": 0.0007832691621885042, "loss": 0.2299, "step": 59382 }, { "epoch": 0.10529274037903188, "grad_norm": 0.26953125, "learning_rate": 0.0007832134517064642, "loss": 0.1776, "step": 59384 }, { "epoch": 0.1052962865443417, "grad_norm": 0.267578125, "learning_rate": 0.0007831577426098648, "loss": 0.1691, "step": 59386 }, { "epoch": 0.10529983270965151, "grad_norm": 0.314453125, "learning_rate": 0.0007831020348989492, "loss": 0.2513, "step": 59388 }, { "epoch": 0.10530337887496133, "grad_norm": 2.46875, "learning_rate": 0.000783046328573961, "loss": 0.2454, "step": 59390 }, { "epoch": 0.10530692504027114, "grad_norm": 1.859375, "learning_rate": 0.0007829906236351443, "loss": 0.3701, "step": 59392 }, { "epoch": 0.10531047120558096, "grad_norm": 0.2060546875, "learning_rate": 0.0007829349200827427, "loss": 0.178, "step": 59394 }, { "epoch": 0.10531401737089077, "grad_norm": 0.279296875, "learning_rate": 0.000782879217916999, "loss": 0.1395, "step": 59396 }, { "epoch": 0.10531756353620059, "grad_norm": 0.267578125, "learning_rate": 0.0007828235171381577, "loss": 0.3625, "step": 59398 }, { "epoch": 0.1053211097015104, "grad_norm": 0.29296875, "learning_rate": 0.0007827678177464619, "loss": 0.1436, "step": 59400 }, { "epoch": 0.10532465586682022, "grad_norm": 0.318359375, "learning_rate": 0.0007827121197421558, "loss": 0.176, "step": 59402 }, { "epoch": 0.10532820203213003, "grad_norm": 0.765625, "learning_rate": 0.0007826564231254822, "loss": 0.3046, "step": 59404 }, { "epoch": 0.10533174819743985, "grad_norm": 0.38671875, "learning_rate": 0.0007826007278966854, "loss": 0.1418, "step": 59406 }, { "epoch": 0.10533529436274966, "grad_norm": 0.48046875, "learning_rate": 0.0007825450340560083, "loss": 0.1873, "step": 59408 }, { "epoch": 0.10533884052805947, "grad_norm": 2.625, "learning_rate": 0.0007824893416036949, "loss": 0.418, "step": 59410 }, { "epoch": 0.10534238669336929, "grad_norm": 0.52734375, "learning_rate": 0.000782433650539989, "loss": 0.1807, "step": 59412 }, { "epoch": 0.1053459328586791, "grad_norm": 0.353515625, "learning_rate": 0.0007823779608651336, "loss": 0.1468, "step": 59414 }, { "epoch": 0.10534947902398892, "grad_norm": 3.484375, "learning_rate": 0.0007823222725793724, "loss": 0.211, "step": 59416 }, { "epoch": 0.10535302518929873, "grad_norm": 0.318359375, "learning_rate": 0.0007822665856829492, "loss": 0.1259, "step": 59418 }, { "epoch": 0.10535657135460855, "grad_norm": 0.275390625, "learning_rate": 0.0007822109001761076, "loss": 0.1945, "step": 59420 }, { "epoch": 0.10536011751991836, "grad_norm": 0.25390625, "learning_rate": 0.0007821552160590907, "loss": 0.1705, "step": 59422 }, { "epoch": 0.10536366368522818, "grad_norm": 0.52734375, "learning_rate": 0.0007820995333321421, "loss": 0.1585, "step": 59424 }, { "epoch": 0.105367209850538, "grad_norm": 1.796875, "learning_rate": 0.0007820438519955056, "loss": 0.2697, "step": 59426 }, { "epoch": 0.10537075601584782, "grad_norm": 0.44140625, "learning_rate": 0.0007819881720494249, "loss": 0.2182, "step": 59428 }, { "epoch": 0.10537430218115763, "grad_norm": 1.6171875, "learning_rate": 0.000781932493494143, "loss": 0.4109, "step": 59430 }, { "epoch": 0.10537784834646745, "grad_norm": 0.345703125, "learning_rate": 0.0007818768163299032, "loss": 0.1778, "step": 59432 }, { "epoch": 0.10538139451177726, "grad_norm": 0.40234375, "learning_rate": 0.0007818211405569498, "loss": 0.181, "step": 59434 }, { "epoch": 0.10538494067708708, "grad_norm": 0.2216796875, "learning_rate": 0.0007817654661755263, "loss": 0.1933, "step": 59436 }, { "epoch": 0.10538848684239689, "grad_norm": 0.234375, "learning_rate": 0.0007817097931858752, "loss": 0.2219, "step": 59438 }, { "epoch": 0.1053920330077067, "grad_norm": 1.390625, "learning_rate": 0.0007816541215882409, "loss": 0.2657, "step": 59440 }, { "epoch": 0.10539557917301652, "grad_norm": 0.4140625, "learning_rate": 0.0007815984513828664, "loss": 0.2346, "step": 59442 }, { "epoch": 0.10539912533832634, "grad_norm": 0.328125, "learning_rate": 0.0007815427825699954, "loss": 0.2179, "step": 59444 }, { "epoch": 0.10540267150363615, "grad_norm": 0.57421875, "learning_rate": 0.0007814871151498715, "loss": 0.1682, "step": 59446 }, { "epoch": 0.10540621766894596, "grad_norm": 0.6484375, "learning_rate": 0.0007814314491227374, "loss": 0.1528, "step": 59448 }, { "epoch": 0.10540976383425578, "grad_norm": 1.046875, "learning_rate": 0.0007813757844888373, "loss": 0.3794, "step": 59450 }, { "epoch": 0.1054133099995656, "grad_norm": 0.2275390625, "learning_rate": 0.0007813201212484147, "loss": 0.1693, "step": 59452 }, { "epoch": 0.10541685616487541, "grad_norm": 0.263671875, "learning_rate": 0.0007812644594017127, "loss": 0.1513, "step": 59454 }, { "epoch": 0.10542040233018522, "grad_norm": 0.61328125, "learning_rate": 0.0007812087989489745, "loss": 0.1864, "step": 59456 }, { "epoch": 0.10542394849549504, "grad_norm": 0.392578125, "learning_rate": 0.0007811531398904441, "loss": 0.3142, "step": 59458 }, { "epoch": 0.10542749466080485, "grad_norm": 0.431640625, "learning_rate": 0.0007810974822263648, "loss": 0.2266, "step": 59460 }, { "epoch": 0.10543104082611467, "grad_norm": 0.5234375, "learning_rate": 0.0007810418259569796, "loss": 0.1958, "step": 59462 }, { "epoch": 0.10543458699142448, "grad_norm": 0.328125, "learning_rate": 0.0007809861710825322, "loss": 0.1684, "step": 59464 }, { "epoch": 0.1054381331567343, "grad_norm": 0.244140625, "learning_rate": 0.0007809305176032659, "loss": 0.1918, "step": 59466 }, { "epoch": 0.10544167932204411, "grad_norm": 1.296875, "learning_rate": 0.0007808748655194247, "loss": 0.1796, "step": 59468 }, { "epoch": 0.10544522548735392, "grad_norm": 0.71875, "learning_rate": 0.000780819214831251, "loss": 0.1392, "step": 59470 }, { "epoch": 0.10544877165266375, "grad_norm": 0.3125, "learning_rate": 0.0007807635655389887, "loss": 0.18, "step": 59472 }, { "epoch": 0.10545231781797357, "grad_norm": 0.396484375, "learning_rate": 0.0007807079176428811, "loss": 0.1804, "step": 59474 }, { "epoch": 0.10545586398328338, "grad_norm": 0.244140625, "learning_rate": 0.0007806522711431719, "loss": 0.1505, "step": 59476 }, { "epoch": 0.1054594101485932, "grad_norm": 0.373046875, "learning_rate": 0.0007805966260401038, "loss": 0.1281, "step": 59478 }, { "epoch": 0.10546295631390301, "grad_norm": 0.38671875, "learning_rate": 0.0007805409823339207, "loss": 0.1778, "step": 59480 }, { "epoch": 0.10546650247921283, "grad_norm": 1.21875, "learning_rate": 0.0007804853400248657, "loss": 0.2406, "step": 59482 }, { "epoch": 0.10547004864452264, "grad_norm": 0.2734375, "learning_rate": 0.0007804296991131825, "loss": 0.1372, "step": 59484 }, { "epoch": 0.10547359480983245, "grad_norm": 2.96875, "learning_rate": 0.0007803740595991138, "loss": 0.2137, "step": 59486 }, { "epoch": 0.10547714097514227, "grad_norm": 0.43359375, "learning_rate": 0.0007803184214829036, "loss": 0.1714, "step": 59488 }, { "epoch": 0.10548068714045208, "grad_norm": 1.8828125, "learning_rate": 0.0007802627847647945, "loss": 0.2427, "step": 59490 }, { "epoch": 0.1054842333057619, "grad_norm": 0.73046875, "learning_rate": 0.0007802071494450305, "loss": 0.1873, "step": 59492 }, { "epoch": 0.10548777947107171, "grad_norm": 0.59765625, "learning_rate": 0.0007801515155238548, "loss": 0.1916, "step": 59494 }, { "epoch": 0.10549132563638153, "grad_norm": 0.73828125, "learning_rate": 0.0007800958830015103, "loss": 0.1781, "step": 59496 }, { "epoch": 0.10549487180169134, "grad_norm": 0.4765625, "learning_rate": 0.000780040251878241, "loss": 0.1797, "step": 59498 }, { "epoch": 0.10549841796700116, "grad_norm": 0.451171875, "learning_rate": 0.0007799846221542894, "loss": 0.1706, "step": 59500 }, { "epoch": 0.10550196413231097, "grad_norm": 1.6328125, "learning_rate": 0.0007799289938298997, "loss": 0.2012, "step": 59502 }, { "epoch": 0.10550551029762079, "grad_norm": 0.609375, "learning_rate": 0.0007798733669053142, "loss": 0.1788, "step": 59504 }, { "epoch": 0.1055090564629306, "grad_norm": 0.50390625, "learning_rate": 0.0007798177413807766, "loss": 0.1488, "step": 59506 }, { "epoch": 0.10551260262824042, "grad_norm": 0.87109375, "learning_rate": 0.0007797621172565301, "loss": 0.2123, "step": 59508 }, { "epoch": 0.10551614879355023, "grad_norm": 0.921875, "learning_rate": 0.0007797064945328187, "loss": 0.2242, "step": 59510 }, { "epoch": 0.10551969495886004, "grad_norm": 0.69140625, "learning_rate": 0.0007796508732098846, "loss": 0.1229, "step": 59512 }, { "epoch": 0.10552324112416986, "grad_norm": 0.361328125, "learning_rate": 0.0007795952532879715, "loss": 0.2091, "step": 59514 }, { "epoch": 0.10552678728947967, "grad_norm": 0.431640625, "learning_rate": 0.0007795396347673227, "loss": 0.2309, "step": 59516 }, { "epoch": 0.1055303334547895, "grad_norm": 1.0546875, "learning_rate": 0.0007794840176481816, "loss": 0.4786, "step": 59518 }, { "epoch": 0.10553387962009932, "grad_norm": 1.1328125, "learning_rate": 0.0007794284019307911, "loss": 0.1496, "step": 59520 }, { "epoch": 0.10553742578540913, "grad_norm": 0.419921875, "learning_rate": 0.0007793727876153944, "loss": 0.2788, "step": 59522 }, { "epoch": 0.10554097195071895, "grad_norm": 0.3671875, "learning_rate": 0.0007793171747022347, "loss": 0.1709, "step": 59524 }, { "epoch": 0.10554451811602876, "grad_norm": 0.318359375, "learning_rate": 0.0007792615631915561, "loss": 0.1437, "step": 59526 }, { "epoch": 0.10554806428133857, "grad_norm": 0.26953125, "learning_rate": 0.0007792059530836007, "loss": 0.1565, "step": 59528 }, { "epoch": 0.10555161044664839, "grad_norm": 0.380859375, "learning_rate": 0.0007791503443786118, "loss": 0.1729, "step": 59530 }, { "epoch": 0.1055551566119582, "grad_norm": 0.35546875, "learning_rate": 0.0007790947370768333, "loss": 0.1651, "step": 59532 }, { "epoch": 0.10555870277726802, "grad_norm": 0.353515625, "learning_rate": 0.0007790391311785081, "loss": 0.1938, "step": 59534 }, { "epoch": 0.10556224894257783, "grad_norm": 0.26953125, "learning_rate": 0.0007789835266838791, "loss": 0.1575, "step": 59536 }, { "epoch": 0.10556579510788765, "grad_norm": 0.322265625, "learning_rate": 0.0007789279235931899, "loss": 0.2234, "step": 59538 }, { "epoch": 0.10556934127319746, "grad_norm": 0.53515625, "learning_rate": 0.0007788723219066831, "loss": 0.1531, "step": 59540 }, { "epoch": 0.10557288743850728, "grad_norm": 0.7265625, "learning_rate": 0.0007788167216246027, "loss": 0.2121, "step": 59542 }, { "epoch": 0.10557643360381709, "grad_norm": 0.6875, "learning_rate": 0.0007787611227471911, "loss": 0.1889, "step": 59544 }, { "epoch": 0.1055799797691269, "grad_norm": 0.9140625, "learning_rate": 0.0007787055252746918, "loss": 0.178, "step": 59546 }, { "epoch": 0.10558352593443672, "grad_norm": 1.6484375, "learning_rate": 0.0007786499292073476, "loss": 0.1503, "step": 59548 }, { "epoch": 0.10558707209974653, "grad_norm": 0.240234375, "learning_rate": 0.0007785943345454025, "loss": 0.2095, "step": 59550 }, { "epoch": 0.10559061826505635, "grad_norm": 0.361328125, "learning_rate": 0.0007785387412890987, "loss": 0.1663, "step": 59552 }, { "epoch": 0.10559416443036616, "grad_norm": 0.4609375, "learning_rate": 0.0007784831494386801, "loss": 0.2229, "step": 59554 }, { "epoch": 0.10559771059567598, "grad_norm": 0.353515625, "learning_rate": 0.000778427558994389, "loss": 0.1712, "step": 59556 }, { "epoch": 0.10560125676098579, "grad_norm": 1.9375, "learning_rate": 0.0007783719699564696, "loss": 0.2266, "step": 59558 }, { "epoch": 0.10560480292629561, "grad_norm": 0.64453125, "learning_rate": 0.0007783163823251637, "loss": 0.203, "step": 59560 }, { "epoch": 0.10560834909160544, "grad_norm": 0.435546875, "learning_rate": 0.0007782607961007153, "loss": 0.1206, "step": 59562 }, { "epoch": 0.10561189525691525, "grad_norm": 0.55859375, "learning_rate": 0.0007782052112833671, "loss": 0.2074, "step": 59564 }, { "epoch": 0.10561544142222506, "grad_norm": 0.330078125, "learning_rate": 0.0007781496278733629, "loss": 0.1628, "step": 59566 }, { "epoch": 0.10561898758753488, "grad_norm": 0.52734375, "learning_rate": 0.0007780940458709449, "loss": 0.2562, "step": 59568 }, { "epoch": 0.1056225337528447, "grad_norm": 0.95703125, "learning_rate": 0.0007780384652763564, "loss": 0.3556, "step": 59570 }, { "epoch": 0.10562607991815451, "grad_norm": 0.7578125, "learning_rate": 0.000777982886089841, "loss": 0.1999, "step": 59572 }, { "epoch": 0.10562962608346432, "grad_norm": 0.2158203125, "learning_rate": 0.0007779273083116414, "loss": 0.2128, "step": 59574 }, { "epoch": 0.10563317224877414, "grad_norm": 0.546875, "learning_rate": 0.0007778717319420006, "loss": 0.2099, "step": 59576 }, { "epoch": 0.10563671841408395, "grad_norm": 0.27734375, "learning_rate": 0.0007778161569811614, "loss": 0.1765, "step": 59578 }, { "epoch": 0.10564026457939377, "grad_norm": 0.88671875, "learning_rate": 0.0007777605834293675, "loss": 0.1787, "step": 59580 }, { "epoch": 0.10564381074470358, "grad_norm": 0.6796875, "learning_rate": 0.0007777050112868616, "loss": 0.2102, "step": 59582 }, { "epoch": 0.1056473569100134, "grad_norm": 0.52734375, "learning_rate": 0.0007776494405538869, "loss": 0.1547, "step": 59584 }, { "epoch": 0.10565090307532321, "grad_norm": 0.279296875, "learning_rate": 0.000777593871230686, "loss": 0.1716, "step": 59586 }, { "epoch": 0.10565444924063302, "grad_norm": 0.447265625, "learning_rate": 0.0007775383033175026, "loss": 0.1642, "step": 59588 }, { "epoch": 0.10565799540594284, "grad_norm": 0.51953125, "learning_rate": 0.0007774827368145789, "loss": 0.1846, "step": 59590 }, { "epoch": 0.10566154157125265, "grad_norm": 0.27734375, "learning_rate": 0.0007774271717221591, "loss": 0.1666, "step": 59592 }, { "epoch": 0.10566508773656247, "grad_norm": 0.609375, "learning_rate": 0.0007773716080404849, "loss": 0.1967, "step": 59594 }, { "epoch": 0.10566863390187228, "grad_norm": 0.19921875, "learning_rate": 0.0007773160457697996, "loss": 0.119, "step": 59596 }, { "epoch": 0.1056721800671821, "grad_norm": 0.341796875, "learning_rate": 0.0007772604849103469, "loss": 0.1094, "step": 59598 }, { "epoch": 0.10567572623249191, "grad_norm": 0.255859375, "learning_rate": 0.0007772049254623697, "loss": 0.1411, "step": 59600 }, { "epoch": 0.10567927239780173, "grad_norm": 0.9453125, "learning_rate": 0.0007771493674261101, "loss": 0.1941, "step": 59602 }, { "epoch": 0.10568281856311154, "grad_norm": 0.376953125, "learning_rate": 0.0007770938108018115, "loss": 0.1834, "step": 59604 }, { "epoch": 0.10568636472842136, "grad_norm": 0.22265625, "learning_rate": 0.0007770382555897175, "loss": 0.1297, "step": 59606 }, { "epoch": 0.10568991089373118, "grad_norm": 1.2421875, "learning_rate": 0.0007769827017900706, "loss": 0.1736, "step": 59608 }, { "epoch": 0.105693457059041, "grad_norm": 0.455078125, "learning_rate": 0.0007769271494031136, "loss": 0.1509, "step": 59610 }, { "epoch": 0.10569700322435081, "grad_norm": 0.439453125, "learning_rate": 0.0007768715984290893, "loss": 0.2432, "step": 59612 }, { "epoch": 0.10570054938966063, "grad_norm": 0.859375, "learning_rate": 0.000776816048868241, "loss": 0.3025, "step": 59614 }, { "epoch": 0.10570409555497044, "grad_norm": 0.31640625, "learning_rate": 0.0007767605007208121, "loss": 0.1875, "step": 59616 }, { "epoch": 0.10570764172028026, "grad_norm": 0.376953125, "learning_rate": 0.0007767049539870443, "loss": 0.2197, "step": 59618 }, { "epoch": 0.10571118788559007, "grad_norm": 1.0234375, "learning_rate": 0.0007766494086671816, "loss": 0.1638, "step": 59620 }, { "epoch": 0.10571473405089989, "grad_norm": 0.41015625, "learning_rate": 0.0007765938647614665, "loss": 0.1643, "step": 59622 }, { "epoch": 0.1057182802162097, "grad_norm": 3.140625, "learning_rate": 0.0007765383222701423, "loss": 0.3973, "step": 59624 }, { "epoch": 0.10572182638151952, "grad_norm": 0.380859375, "learning_rate": 0.000776482781193451, "loss": 0.158, "step": 59626 }, { "epoch": 0.10572537254682933, "grad_norm": 0.546875, "learning_rate": 0.0007764272415316365, "loss": 0.1974, "step": 59628 }, { "epoch": 0.10572891871213914, "grad_norm": 0.484375, "learning_rate": 0.0007763717032849411, "loss": 0.1421, "step": 59630 }, { "epoch": 0.10573246487744896, "grad_norm": 4.40625, "learning_rate": 0.0007763161664536083, "loss": 0.2483, "step": 59632 }, { "epoch": 0.10573601104275877, "grad_norm": 2.859375, "learning_rate": 0.0007762606310378799, "loss": 0.3454, "step": 59634 }, { "epoch": 0.10573955720806859, "grad_norm": 0.62109375, "learning_rate": 0.0007762050970379999, "loss": 0.2808, "step": 59636 }, { "epoch": 0.1057431033733784, "grad_norm": 0.251953125, "learning_rate": 0.0007761495644542104, "loss": 0.2326, "step": 59638 }, { "epoch": 0.10574664953868822, "grad_norm": 0.31640625, "learning_rate": 0.0007760940332867553, "loss": 0.1874, "step": 59640 }, { "epoch": 0.10575019570399803, "grad_norm": 0.2177734375, "learning_rate": 0.0007760385035358759, "loss": 0.1439, "step": 59642 }, { "epoch": 0.10575374186930785, "grad_norm": 0.349609375, "learning_rate": 0.0007759829752018161, "loss": 0.2549, "step": 59644 }, { "epoch": 0.10575728803461766, "grad_norm": 0.53515625, "learning_rate": 0.0007759274482848189, "loss": 0.2032, "step": 59646 }, { "epoch": 0.10576083419992748, "grad_norm": 0.484375, "learning_rate": 0.0007758719227851265, "loss": 0.1942, "step": 59648 }, { "epoch": 0.10576438036523729, "grad_norm": 1.1640625, "learning_rate": 0.0007758163987029824, "loss": 0.4442, "step": 59650 }, { "epoch": 0.1057679265305471, "grad_norm": 0.98828125, "learning_rate": 0.0007757608760386286, "loss": 0.1986, "step": 59652 }, { "epoch": 0.10577147269585693, "grad_norm": 0.7265625, "learning_rate": 0.0007757053547923085, "loss": 0.2743, "step": 59654 }, { "epoch": 0.10577501886116675, "grad_norm": 0.470703125, "learning_rate": 0.0007756498349642652, "loss": 0.2461, "step": 59656 }, { "epoch": 0.10577856502647656, "grad_norm": 0.439453125, "learning_rate": 0.0007755943165547408, "loss": 0.2112, "step": 59658 }, { "epoch": 0.10578211119178638, "grad_norm": 0.859375, "learning_rate": 0.0007755387995639782, "loss": 0.2196, "step": 59660 }, { "epoch": 0.10578565735709619, "grad_norm": 0.33203125, "learning_rate": 0.0007754832839922206, "loss": 0.1547, "step": 59662 }, { "epoch": 0.105789203522406, "grad_norm": 0.328125, "learning_rate": 0.0007754277698397107, "loss": 0.1554, "step": 59664 }, { "epoch": 0.10579274968771582, "grad_norm": 1.296875, "learning_rate": 0.0007753722571066915, "loss": 0.1742, "step": 59666 }, { "epoch": 0.10579629585302563, "grad_norm": 0.345703125, "learning_rate": 0.0007753167457934052, "loss": 0.2145, "step": 59668 }, { "epoch": 0.10579984201833545, "grad_norm": 0.49609375, "learning_rate": 0.0007752612359000946, "loss": 0.5307, "step": 59670 }, { "epoch": 0.10580338818364526, "grad_norm": 0.54296875, "learning_rate": 0.0007752057274270028, "loss": 0.1858, "step": 59672 }, { "epoch": 0.10580693434895508, "grad_norm": 3.15625, "learning_rate": 0.000775150220374373, "loss": 0.2669, "step": 59674 }, { "epoch": 0.10581048051426489, "grad_norm": 0.453125, "learning_rate": 0.0007750947147424468, "loss": 0.3005, "step": 59676 }, { "epoch": 0.10581402667957471, "grad_norm": 0.28515625, "learning_rate": 0.0007750392105314676, "loss": 0.1712, "step": 59678 }, { "epoch": 0.10581757284488452, "grad_norm": 1.875, "learning_rate": 0.0007749837077416786, "loss": 0.1706, "step": 59680 }, { "epoch": 0.10582111901019434, "grad_norm": 0.3203125, "learning_rate": 0.0007749282063733221, "loss": 0.1613, "step": 59682 }, { "epoch": 0.10582466517550415, "grad_norm": 0.296875, "learning_rate": 0.0007748727064266405, "loss": 0.1871, "step": 59684 }, { "epoch": 0.10582821134081397, "grad_norm": 0.5625, "learning_rate": 0.0007748172079018767, "loss": 0.1728, "step": 59686 }, { "epoch": 0.10583175750612378, "grad_norm": 0.25, "learning_rate": 0.0007747617107992737, "loss": 0.3202, "step": 59688 }, { "epoch": 0.1058353036714336, "grad_norm": 1.453125, "learning_rate": 0.0007747062151190743, "loss": 0.194, "step": 59690 }, { "epoch": 0.10583884983674341, "grad_norm": 0.462890625, "learning_rate": 0.0007746507208615205, "loss": 0.1564, "step": 59692 }, { "epoch": 0.10584239600205322, "grad_norm": 0.27734375, "learning_rate": 0.0007745952280268557, "loss": 0.1441, "step": 59694 }, { "epoch": 0.10584594216736304, "grad_norm": 0.78125, "learning_rate": 0.0007745397366153224, "loss": 0.1916, "step": 59696 }, { "epoch": 0.10584948833267287, "grad_norm": 0.2177734375, "learning_rate": 0.0007744842466271633, "loss": 0.1491, "step": 59698 }, { "epoch": 0.10585303449798268, "grad_norm": 2.421875, "learning_rate": 0.000774428758062621, "loss": 0.3448, "step": 59700 }, { "epoch": 0.1058565806632925, "grad_norm": 0.275390625, "learning_rate": 0.000774373270921938, "loss": 0.121, "step": 59702 }, { "epoch": 0.10586012682860231, "grad_norm": 0.4296875, "learning_rate": 0.0007743177852053574, "loss": 0.1702, "step": 59704 }, { "epoch": 0.10586367299391213, "grad_norm": 0.322265625, "learning_rate": 0.0007742623009131215, "loss": 0.3386, "step": 59706 }, { "epoch": 0.10586721915922194, "grad_norm": 1.453125, "learning_rate": 0.0007742068180454731, "loss": 0.4272, "step": 59708 }, { "epoch": 0.10587076532453175, "grad_norm": 0.62890625, "learning_rate": 0.0007741513366026547, "loss": 0.2042, "step": 59710 }, { "epoch": 0.10587431148984157, "grad_norm": 0.287109375, "learning_rate": 0.0007740958565849093, "loss": 0.1442, "step": 59712 }, { "epoch": 0.10587785765515138, "grad_norm": 0.345703125, "learning_rate": 0.0007740403779924794, "loss": 0.1977, "step": 59714 }, { "epoch": 0.1058814038204612, "grad_norm": 0.88671875, "learning_rate": 0.0007739849008256073, "loss": 0.2043, "step": 59716 }, { "epoch": 0.10588494998577101, "grad_norm": 0.2216796875, "learning_rate": 0.0007739294250845356, "loss": 0.1395, "step": 59718 }, { "epoch": 0.10588849615108083, "grad_norm": 0.482421875, "learning_rate": 0.0007738739507695078, "loss": 0.1387, "step": 59720 }, { "epoch": 0.10589204231639064, "grad_norm": 0.44140625, "learning_rate": 0.0007738184778807658, "loss": 0.253, "step": 59722 }, { "epoch": 0.10589558848170046, "grad_norm": 0.63671875, "learning_rate": 0.0007737630064185521, "loss": 0.2967, "step": 59724 }, { "epoch": 0.10589913464701027, "grad_norm": 0.466796875, "learning_rate": 0.0007737075363831095, "loss": 0.1684, "step": 59726 }, { "epoch": 0.10590268081232009, "grad_norm": 0.2265625, "learning_rate": 0.0007736520677746805, "loss": 0.1695, "step": 59728 }, { "epoch": 0.1059062269776299, "grad_norm": 0.39453125, "learning_rate": 0.0007735966005935082, "loss": 0.1629, "step": 59730 }, { "epoch": 0.10590977314293971, "grad_norm": 0.6953125, "learning_rate": 0.0007735411348398345, "loss": 0.2109, "step": 59732 }, { "epoch": 0.10591331930824953, "grad_norm": 0.21484375, "learning_rate": 0.0007734856705139022, "loss": 0.1293, "step": 59734 }, { "epoch": 0.10591686547355934, "grad_norm": 0.62109375, "learning_rate": 0.0007734302076159538, "loss": 0.2298, "step": 59736 }, { "epoch": 0.10592041163886916, "grad_norm": 0.5078125, "learning_rate": 0.0007733747461462326, "loss": 0.2008, "step": 59738 }, { "epoch": 0.10592395780417897, "grad_norm": 0.96875, "learning_rate": 0.0007733192861049798, "loss": 0.2226, "step": 59740 }, { "epoch": 0.10592750396948879, "grad_norm": 0.18359375, "learning_rate": 0.0007732638274924393, "loss": 0.16, "step": 59742 }, { "epoch": 0.10593105013479862, "grad_norm": 0.28125, "learning_rate": 0.0007732083703088524, "loss": 0.1936, "step": 59744 }, { "epoch": 0.10593459630010843, "grad_norm": 0.55859375, "learning_rate": 0.0007731529145544625, "loss": 0.2172, "step": 59746 }, { "epoch": 0.10593814246541824, "grad_norm": 0.62890625, "learning_rate": 0.0007730974602295122, "loss": 0.1697, "step": 59748 }, { "epoch": 0.10594168863072806, "grad_norm": 2.15625, "learning_rate": 0.0007730420073342435, "loss": 0.3533, "step": 59750 }, { "epoch": 0.10594523479603787, "grad_norm": 0.302734375, "learning_rate": 0.0007729865558688989, "loss": 0.1796, "step": 59752 }, { "epoch": 0.10594878096134769, "grad_norm": 0.58203125, "learning_rate": 0.0007729311058337214, "loss": 0.169, "step": 59754 }, { "epoch": 0.1059523271266575, "grad_norm": 3.5625, "learning_rate": 0.0007728756572289535, "loss": 0.4283, "step": 59756 }, { "epoch": 0.10595587329196732, "grad_norm": 0.349609375, "learning_rate": 0.0007728202100548369, "loss": 0.2509, "step": 59758 }, { "epoch": 0.10595941945727713, "grad_norm": 0.5625, "learning_rate": 0.0007727647643116151, "loss": 0.172, "step": 59760 }, { "epoch": 0.10596296562258695, "grad_norm": 2.671875, "learning_rate": 0.0007727093199995299, "loss": 0.2146, "step": 59762 }, { "epoch": 0.10596651178789676, "grad_norm": 0.30859375, "learning_rate": 0.0007726538771188243, "loss": 0.2039, "step": 59764 }, { "epoch": 0.10597005795320658, "grad_norm": 0.4296875, "learning_rate": 0.0007725984356697404, "loss": 0.1788, "step": 59766 }, { "epoch": 0.10597360411851639, "grad_norm": 0.5859375, "learning_rate": 0.0007725429956525205, "loss": 0.2014, "step": 59768 }, { "epoch": 0.1059771502838262, "grad_norm": 0.279296875, "learning_rate": 0.0007724875570674073, "loss": 0.1603, "step": 59770 }, { "epoch": 0.10598069644913602, "grad_norm": 2.203125, "learning_rate": 0.0007724321199146438, "loss": 0.2132, "step": 59772 }, { "epoch": 0.10598424261444583, "grad_norm": 0.62890625, "learning_rate": 0.0007723766841944714, "loss": 0.1939, "step": 59774 }, { "epoch": 0.10598778877975565, "grad_norm": 2.25, "learning_rate": 0.0007723212499071335, "loss": 0.2982, "step": 59776 }, { "epoch": 0.10599133494506546, "grad_norm": 0.330078125, "learning_rate": 0.0007722658170528718, "loss": 0.2355, "step": 59778 }, { "epoch": 0.10599488111037528, "grad_norm": 0.21484375, "learning_rate": 0.0007722103856319296, "loss": 0.1861, "step": 59780 }, { "epoch": 0.10599842727568509, "grad_norm": 0.427734375, "learning_rate": 0.0007721549556445479, "loss": 0.18, "step": 59782 }, { "epoch": 0.1060019734409949, "grad_norm": 0.83203125, "learning_rate": 0.0007720995270909705, "loss": 0.188, "step": 59784 }, { "epoch": 0.10600551960630472, "grad_norm": 0.447265625, "learning_rate": 0.0007720440999714391, "loss": 0.3644, "step": 59786 }, { "epoch": 0.10600906577161454, "grad_norm": 0.26953125, "learning_rate": 0.0007719886742861968, "loss": 0.1572, "step": 59788 }, { "epoch": 0.10601261193692436, "grad_norm": 0.6640625, "learning_rate": 0.000771933250035485, "loss": 0.1409, "step": 59790 }, { "epoch": 0.10601615810223418, "grad_norm": 0.578125, "learning_rate": 0.0007718778272195466, "loss": 0.1837, "step": 59792 }, { "epoch": 0.106019704267544, "grad_norm": 0.6484375, "learning_rate": 0.000771822405838624, "loss": 0.2404, "step": 59794 }, { "epoch": 0.10602325043285381, "grad_norm": 0.45703125, "learning_rate": 0.0007717669858929599, "loss": 0.2151, "step": 59796 }, { "epoch": 0.10602679659816362, "grad_norm": 1.3828125, "learning_rate": 0.0007717115673827961, "loss": 0.1977, "step": 59798 }, { "epoch": 0.10603034276347344, "grad_norm": 0.361328125, "learning_rate": 0.0007716561503083752, "loss": 0.211, "step": 59800 }, { "epoch": 0.10603388892878325, "grad_norm": 0.4375, "learning_rate": 0.0007716007346699397, "loss": 0.1902, "step": 59802 }, { "epoch": 0.10603743509409307, "grad_norm": 0.61328125, "learning_rate": 0.0007715453204677319, "loss": 0.2113, "step": 59804 }, { "epoch": 0.10604098125940288, "grad_norm": 0.37109375, "learning_rate": 0.0007714899077019936, "loss": 0.204, "step": 59806 }, { "epoch": 0.1060445274247127, "grad_norm": 0.42578125, "learning_rate": 0.000771434496372968, "loss": 0.184, "step": 59808 }, { "epoch": 0.10604807359002251, "grad_norm": 0.42578125, "learning_rate": 0.000771379086480897, "loss": 0.1691, "step": 59810 }, { "epoch": 0.10605161975533232, "grad_norm": 0.3515625, "learning_rate": 0.0007713236780260233, "loss": 0.1839, "step": 59812 }, { "epoch": 0.10605516592064214, "grad_norm": 0.8359375, "learning_rate": 0.0007712682710085883, "loss": 0.2053, "step": 59814 }, { "epoch": 0.10605871208595195, "grad_norm": 0.435546875, "learning_rate": 0.0007712128654288355, "loss": 0.1673, "step": 59816 }, { "epoch": 0.10606225825126177, "grad_norm": 0.2578125, "learning_rate": 0.0007711574612870063, "loss": 0.2223, "step": 59818 }, { "epoch": 0.10606580441657158, "grad_norm": 0.26171875, "learning_rate": 0.0007711020585833437, "loss": 0.2218, "step": 59820 }, { "epoch": 0.1060693505818814, "grad_norm": 1.078125, "learning_rate": 0.0007710466573180892, "loss": 0.2429, "step": 59822 }, { "epoch": 0.10607289674719121, "grad_norm": 1.7578125, "learning_rate": 0.0007709912574914857, "loss": 0.1948, "step": 59824 }, { "epoch": 0.10607644291250103, "grad_norm": 0.58984375, "learning_rate": 0.0007709358591037751, "loss": 0.1522, "step": 59826 }, { "epoch": 0.10607998907781084, "grad_norm": 0.54296875, "learning_rate": 0.0007708804621552001, "loss": 0.1613, "step": 59828 }, { "epoch": 0.10608353524312066, "grad_norm": 0.80078125, "learning_rate": 0.000770825066646003, "loss": 0.174, "step": 59830 }, { "epoch": 0.10608708140843047, "grad_norm": 0.54296875, "learning_rate": 0.0007707696725764257, "loss": 0.1772, "step": 59832 }, { "epoch": 0.1060906275737403, "grad_norm": 2.4375, "learning_rate": 0.0007707142799467106, "loss": 0.3765, "step": 59834 }, { "epoch": 0.10609417373905011, "grad_norm": 2.484375, "learning_rate": 0.0007706588887570999, "loss": 0.3625, "step": 59836 }, { "epoch": 0.10609771990435993, "grad_norm": 0.65234375, "learning_rate": 0.0007706034990078364, "loss": 0.1057, "step": 59838 }, { "epoch": 0.10610126606966974, "grad_norm": 0.55078125, "learning_rate": 0.0007705481106991614, "loss": 0.2125, "step": 59840 }, { "epoch": 0.10610481223497956, "grad_norm": 0.37890625, "learning_rate": 0.0007704927238313175, "loss": 0.1652, "step": 59842 }, { "epoch": 0.10610835840028937, "grad_norm": 0.48828125, "learning_rate": 0.0007704373384045471, "loss": 0.1996, "step": 59844 }, { "epoch": 0.10611190456559919, "grad_norm": 1.5234375, "learning_rate": 0.0007703819544190927, "loss": 0.3383, "step": 59846 }, { "epoch": 0.106115450730909, "grad_norm": 2.078125, "learning_rate": 0.0007703265718751957, "loss": 0.4154, "step": 59848 }, { "epoch": 0.10611899689621881, "grad_norm": 0.3671875, "learning_rate": 0.0007702711907730988, "loss": 0.1765, "step": 59850 }, { "epoch": 0.10612254306152863, "grad_norm": 0.66015625, "learning_rate": 0.0007702158111130447, "loss": 0.2297, "step": 59852 }, { "epoch": 0.10612608922683844, "grad_norm": 0.359375, "learning_rate": 0.0007701604328952747, "loss": 0.2184, "step": 59854 }, { "epoch": 0.10612963539214826, "grad_norm": 0.396484375, "learning_rate": 0.0007701050561200314, "loss": 0.2211, "step": 59856 }, { "epoch": 0.10613318155745807, "grad_norm": 0.412109375, "learning_rate": 0.0007700496807875567, "loss": 0.3929, "step": 59858 }, { "epoch": 0.10613672772276789, "grad_norm": 0.400390625, "learning_rate": 0.0007699943068980935, "loss": 0.1526, "step": 59860 }, { "epoch": 0.1061402738880777, "grad_norm": 0.458984375, "learning_rate": 0.0007699389344518835, "loss": 0.2078, "step": 59862 }, { "epoch": 0.10614382005338752, "grad_norm": 0.369140625, "learning_rate": 0.0007698835634491686, "loss": 0.1432, "step": 59864 }, { "epoch": 0.10614736621869733, "grad_norm": 0.7578125, "learning_rate": 0.0007698281938901912, "loss": 0.1866, "step": 59866 }, { "epoch": 0.10615091238400715, "grad_norm": 2.578125, "learning_rate": 0.0007697728257751937, "loss": 0.195, "step": 59868 }, { "epoch": 0.10615445854931696, "grad_norm": 0.388671875, "learning_rate": 0.0007697174591044181, "loss": 0.1481, "step": 59870 }, { "epoch": 0.10615800471462677, "grad_norm": 0.396484375, "learning_rate": 0.0007696620938781065, "loss": 0.144, "step": 59872 }, { "epoch": 0.10616155087993659, "grad_norm": 0.267578125, "learning_rate": 0.0007696067300965007, "loss": 0.2425, "step": 59874 }, { "epoch": 0.1061650970452464, "grad_norm": 0.419921875, "learning_rate": 0.0007695513677598431, "loss": 0.1314, "step": 59876 }, { "epoch": 0.10616864321055622, "grad_norm": 0.6875, "learning_rate": 0.0007694960068683762, "loss": 0.2595, "step": 59878 }, { "epoch": 0.10617218937586605, "grad_norm": 0.330078125, "learning_rate": 0.0007694406474223419, "loss": 0.183, "step": 59880 }, { "epoch": 0.10617573554117586, "grad_norm": 0.322265625, "learning_rate": 0.0007693852894219817, "loss": 0.1903, "step": 59882 }, { "epoch": 0.10617928170648568, "grad_norm": 0.2060546875, "learning_rate": 0.0007693299328675384, "loss": 0.1971, "step": 59884 }, { "epoch": 0.10618282787179549, "grad_norm": 0.703125, "learning_rate": 0.0007692745777592542, "loss": 0.2144, "step": 59886 }, { "epoch": 0.1061863740371053, "grad_norm": 0.498046875, "learning_rate": 0.0007692192240973706, "loss": 0.2074, "step": 59888 }, { "epoch": 0.10618992020241512, "grad_norm": 0.50390625, "learning_rate": 0.0007691638718821301, "loss": 0.1599, "step": 59890 }, { "epoch": 0.10619346636772493, "grad_norm": 0.271484375, "learning_rate": 0.0007691085211137745, "loss": 0.1599, "step": 59892 }, { "epoch": 0.10619701253303475, "grad_norm": 0.2421875, "learning_rate": 0.0007690531717925465, "loss": 0.168, "step": 59894 }, { "epoch": 0.10620055869834456, "grad_norm": 0.625, "learning_rate": 0.000768997823918687, "loss": 0.1935, "step": 59896 }, { "epoch": 0.10620410486365438, "grad_norm": 0.51953125, "learning_rate": 0.0007689424774924389, "loss": 0.2059, "step": 59898 }, { "epoch": 0.10620765102896419, "grad_norm": 0.6953125, "learning_rate": 0.0007688871325140443, "loss": 0.1616, "step": 59900 }, { "epoch": 0.106211197194274, "grad_norm": 0.376953125, "learning_rate": 0.0007688317889837451, "loss": 0.2185, "step": 59902 }, { "epoch": 0.10621474335958382, "grad_norm": 0.2216796875, "learning_rate": 0.0007687764469017833, "loss": 0.1641, "step": 59904 }, { "epoch": 0.10621828952489364, "grad_norm": 1.6484375, "learning_rate": 0.0007687211062684005, "loss": 0.3287, "step": 59906 }, { "epoch": 0.10622183569020345, "grad_norm": 1.6484375, "learning_rate": 0.0007686657670838395, "loss": 0.1998, "step": 59908 }, { "epoch": 0.10622538185551327, "grad_norm": 0.66796875, "learning_rate": 0.000768610429348342, "loss": 0.3843, "step": 59910 }, { "epoch": 0.10622892802082308, "grad_norm": 0.58984375, "learning_rate": 0.0007685550930621501, "loss": 0.169, "step": 59912 }, { "epoch": 0.1062324741861329, "grad_norm": 0.734375, "learning_rate": 0.0007684997582255053, "loss": 0.12, "step": 59914 }, { "epoch": 0.10623602035144271, "grad_norm": 0.419921875, "learning_rate": 0.0007684444248386502, "loss": 0.2412, "step": 59916 }, { "epoch": 0.10623956651675252, "grad_norm": 0.578125, "learning_rate": 0.0007683890929018265, "loss": 0.1647, "step": 59918 }, { "epoch": 0.10624311268206234, "grad_norm": 0.267578125, "learning_rate": 0.0007683337624152766, "loss": 0.1267, "step": 59920 }, { "epoch": 0.10624665884737215, "grad_norm": 0.5390625, "learning_rate": 0.0007682784333792421, "loss": 0.1949, "step": 59922 }, { "epoch": 0.10625020501268197, "grad_norm": 1.0234375, "learning_rate": 0.0007682231057939646, "loss": 0.1765, "step": 59924 }, { "epoch": 0.1062537511779918, "grad_norm": 0.82421875, "learning_rate": 0.0007681677796596871, "loss": 0.1916, "step": 59926 }, { "epoch": 0.10625729734330161, "grad_norm": 0.2275390625, "learning_rate": 0.0007681124549766505, "loss": 0.1386, "step": 59928 }, { "epoch": 0.10626084350861142, "grad_norm": 1.1015625, "learning_rate": 0.0007680571317450977, "loss": 0.1567, "step": 59930 }, { "epoch": 0.10626438967392124, "grad_norm": 0.2373046875, "learning_rate": 0.0007680018099652697, "loss": 0.2123, "step": 59932 }, { "epoch": 0.10626793583923105, "grad_norm": 0.5078125, "learning_rate": 0.0007679464896374091, "loss": 0.1568, "step": 59934 }, { "epoch": 0.10627148200454087, "grad_norm": 0.1572265625, "learning_rate": 0.0007678911707617582, "loss": 0.1404, "step": 59936 }, { "epoch": 0.10627502816985068, "grad_norm": 0.46484375, "learning_rate": 0.0007678358533385576, "loss": 0.176, "step": 59938 }, { "epoch": 0.1062785743351605, "grad_norm": 0.361328125, "learning_rate": 0.0007677805373680505, "loss": 0.114, "step": 59940 }, { "epoch": 0.10628212050047031, "grad_norm": 0.9609375, "learning_rate": 0.0007677252228504783, "loss": 0.197, "step": 59942 }, { "epoch": 0.10628566666578013, "grad_norm": 0.357421875, "learning_rate": 0.0007676699097860831, "loss": 0.1777, "step": 59944 }, { "epoch": 0.10628921283108994, "grad_norm": 0.40625, "learning_rate": 0.0007676145981751064, "loss": 0.1743, "step": 59946 }, { "epoch": 0.10629275899639976, "grad_norm": 0.333984375, "learning_rate": 0.0007675592880177904, "loss": 0.1568, "step": 59948 }, { "epoch": 0.10629630516170957, "grad_norm": 0.3984375, "learning_rate": 0.0007675039793143768, "loss": 0.1656, "step": 59950 }, { "epoch": 0.10629985132701938, "grad_norm": 0.2431640625, "learning_rate": 0.0007674486720651081, "loss": 0.1875, "step": 59952 }, { "epoch": 0.1063033974923292, "grad_norm": 0.7890625, "learning_rate": 0.0007673933662702255, "loss": 0.1671, "step": 59954 }, { "epoch": 0.10630694365763901, "grad_norm": 0.48828125, "learning_rate": 0.000767338061929971, "loss": 0.168, "step": 59956 }, { "epoch": 0.10631048982294883, "grad_norm": 0.5, "learning_rate": 0.0007672827590445867, "loss": 0.1784, "step": 59958 }, { "epoch": 0.10631403598825864, "grad_norm": 0.24609375, "learning_rate": 0.0007672274576143148, "loss": 0.229, "step": 59960 }, { "epoch": 0.10631758215356846, "grad_norm": 0.2021484375, "learning_rate": 0.000767172157639396, "loss": 0.1496, "step": 59962 }, { "epoch": 0.10632112831887827, "grad_norm": 0.7265625, "learning_rate": 0.0007671168591200732, "loss": 0.2616, "step": 59964 }, { "epoch": 0.10632467448418809, "grad_norm": 0.1572265625, "learning_rate": 0.0007670615620565878, "loss": 0.1278, "step": 59966 }, { "epoch": 0.1063282206494979, "grad_norm": 0.9921875, "learning_rate": 0.0007670062664491819, "loss": 0.182, "step": 59968 }, { "epoch": 0.10633176681480773, "grad_norm": 0.4765625, "learning_rate": 0.0007669509722980969, "loss": 0.1731, "step": 59970 }, { "epoch": 0.10633531298011754, "grad_norm": 0.7109375, "learning_rate": 0.0007668956796035751, "loss": 0.1972, "step": 59972 }, { "epoch": 0.10633885914542736, "grad_norm": 0.33203125, "learning_rate": 0.0007668403883658577, "loss": 0.1657, "step": 59974 }, { "epoch": 0.10634240531073717, "grad_norm": 0.314453125, "learning_rate": 0.0007667850985851876, "loss": 0.1581, "step": 59976 }, { "epoch": 0.10634595147604699, "grad_norm": 0.375, "learning_rate": 0.0007667298102618054, "loss": 0.1418, "step": 59978 }, { "epoch": 0.1063494976413568, "grad_norm": 0.62890625, "learning_rate": 0.0007666745233959536, "loss": 0.1596, "step": 59980 }, { "epoch": 0.10635304380666662, "grad_norm": 0.361328125, "learning_rate": 0.0007666192379878738, "loss": 0.209, "step": 59982 }, { "epoch": 0.10635658997197643, "grad_norm": 0.44140625, "learning_rate": 0.000766563954037808, "loss": 0.3723, "step": 59984 }, { "epoch": 0.10636013613728625, "grad_norm": 0.7578125, "learning_rate": 0.0007665086715459977, "loss": 0.2542, "step": 59986 }, { "epoch": 0.10636368230259606, "grad_norm": 0.265625, "learning_rate": 0.0007664533905126845, "loss": 0.157, "step": 59988 }, { "epoch": 0.10636722846790587, "grad_norm": 0.484375, "learning_rate": 0.0007663981109381104, "loss": 0.1945, "step": 59990 }, { "epoch": 0.10637077463321569, "grad_norm": 0.337890625, "learning_rate": 0.0007663428328225175, "loss": 0.159, "step": 59992 }, { "epoch": 0.1063743207985255, "grad_norm": 0.3828125, "learning_rate": 0.0007662875561661471, "loss": 0.183, "step": 59994 }, { "epoch": 0.10637786696383532, "grad_norm": 0.18359375, "learning_rate": 0.000766232280969241, "loss": 0.1791, "step": 59996 }, { "epoch": 0.10638141312914513, "grad_norm": 0.58984375, "learning_rate": 0.0007661770072320409, "loss": 0.2909, "step": 59998 }, { "epoch": 0.10638495929445495, "grad_norm": 0.263671875, "learning_rate": 0.0007661217349547891, "loss": 0.1634, "step": 60000 }, { "epoch": 0.10638850545976476, "grad_norm": 0.63671875, "learning_rate": 0.0007660664641377267, "loss": 0.2225, "step": 60002 }, { "epoch": 0.10639205162507458, "grad_norm": 0.455078125, "learning_rate": 0.0007660111947810957, "loss": 0.2038, "step": 60004 }, { "epoch": 0.10639559779038439, "grad_norm": 2.578125, "learning_rate": 0.0007659559268851372, "loss": 0.27, "step": 60006 }, { "epoch": 0.1063991439556942, "grad_norm": 0.4375, "learning_rate": 0.0007659006604500939, "loss": 0.1783, "step": 60008 }, { "epoch": 0.10640269012100402, "grad_norm": 2.75, "learning_rate": 0.0007658453954762075, "loss": 0.2205, "step": 60010 }, { "epoch": 0.10640623628631384, "grad_norm": 0.42578125, "learning_rate": 0.0007657901319637184, "loss": 0.1632, "step": 60012 }, { "epoch": 0.10640978245162365, "grad_norm": 1.46875, "learning_rate": 0.0007657348699128694, "loss": 0.2231, "step": 60014 }, { "epoch": 0.10641332861693348, "grad_norm": 2.78125, "learning_rate": 0.0007656796093239018, "loss": 0.2531, "step": 60016 }, { "epoch": 0.10641687478224329, "grad_norm": 0.98828125, "learning_rate": 0.0007656243501970579, "loss": 0.282, "step": 60018 }, { "epoch": 0.10642042094755311, "grad_norm": 1.21875, "learning_rate": 0.0007655690925325784, "loss": 0.2236, "step": 60020 }, { "epoch": 0.10642396711286292, "grad_norm": 0.578125, "learning_rate": 0.0007655138363307058, "loss": 0.1897, "step": 60022 }, { "epoch": 0.10642751327817274, "grad_norm": 2.109375, "learning_rate": 0.0007654585815916809, "loss": 0.2606, "step": 60024 }, { "epoch": 0.10643105944348255, "grad_norm": 0.2255859375, "learning_rate": 0.0007654033283157462, "loss": 0.167, "step": 60026 }, { "epoch": 0.10643460560879237, "grad_norm": 0.388671875, "learning_rate": 0.0007653480765031429, "loss": 0.1639, "step": 60028 }, { "epoch": 0.10643815177410218, "grad_norm": 0.6875, "learning_rate": 0.0007652928261541126, "loss": 0.1832, "step": 60030 }, { "epoch": 0.106441697939412, "grad_norm": 0.35546875, "learning_rate": 0.0007652375772688968, "loss": 0.2147, "step": 60032 }, { "epoch": 0.10644524410472181, "grad_norm": 0.400390625, "learning_rate": 0.0007651823298477381, "loss": 0.2031, "step": 60034 }, { "epoch": 0.10644879027003162, "grad_norm": 0.27734375, "learning_rate": 0.000765127083890877, "loss": 0.1768, "step": 60036 }, { "epoch": 0.10645233643534144, "grad_norm": 0.34765625, "learning_rate": 0.0007650718393985556, "loss": 0.1536, "step": 60038 }, { "epoch": 0.10645588260065125, "grad_norm": 0.53515625, "learning_rate": 0.0007650165963710154, "loss": 0.1823, "step": 60040 }, { "epoch": 0.10645942876596107, "grad_norm": 0.30078125, "learning_rate": 0.0007649613548084981, "loss": 0.1519, "step": 60042 }, { "epoch": 0.10646297493127088, "grad_norm": 0.51171875, "learning_rate": 0.000764906114711245, "loss": 0.2389, "step": 60044 }, { "epoch": 0.1064665210965807, "grad_norm": 0.73046875, "learning_rate": 0.0007648508760794981, "loss": 0.1704, "step": 60046 }, { "epoch": 0.10647006726189051, "grad_norm": 0.419921875, "learning_rate": 0.0007647956389134987, "loss": 0.1751, "step": 60048 }, { "epoch": 0.10647361342720033, "grad_norm": 0.5625, "learning_rate": 0.0007647404032134886, "loss": 0.1398, "step": 60050 }, { "epoch": 0.10647715959251014, "grad_norm": 0.46484375, "learning_rate": 0.000764685168979709, "loss": 0.1551, "step": 60052 }, { "epoch": 0.10648070575781995, "grad_norm": 1.140625, "learning_rate": 0.0007646299362124018, "loss": 0.1854, "step": 60054 }, { "epoch": 0.10648425192312977, "grad_norm": 0.2021484375, "learning_rate": 0.0007645747049118086, "loss": 0.181, "step": 60056 }, { "epoch": 0.10648779808843958, "grad_norm": 0.91015625, "learning_rate": 0.0007645194750781708, "loss": 0.2371, "step": 60058 }, { "epoch": 0.1064913442537494, "grad_norm": 0.21484375, "learning_rate": 0.0007644642467117298, "loss": 0.2263, "step": 60060 }, { "epoch": 0.10649489041905923, "grad_norm": 0.34765625, "learning_rate": 0.0007644090198127272, "loss": 0.1717, "step": 60062 }, { "epoch": 0.10649843658436904, "grad_norm": 1.046875, "learning_rate": 0.0007643537943814047, "loss": 0.1826, "step": 60064 }, { "epoch": 0.10650198274967886, "grad_norm": 0.3515625, "learning_rate": 0.0007642985704180039, "loss": 0.1818, "step": 60066 }, { "epoch": 0.10650552891498867, "grad_norm": 0.88671875, "learning_rate": 0.000764243347922766, "loss": 0.2289, "step": 60068 }, { "epoch": 0.10650907508029848, "grad_norm": 0.30078125, "learning_rate": 0.0007641881268959325, "loss": 0.1388, "step": 60070 }, { "epoch": 0.1065126212456083, "grad_norm": 0.40625, "learning_rate": 0.000764132907337745, "loss": 0.1629, "step": 60072 }, { "epoch": 0.10651616741091811, "grad_norm": 1.3046875, "learning_rate": 0.0007640776892484458, "loss": 0.21, "step": 60074 }, { "epoch": 0.10651971357622793, "grad_norm": 0.22265625, "learning_rate": 0.000764022472628275, "loss": 0.1744, "step": 60076 }, { "epoch": 0.10652325974153774, "grad_norm": 0.84765625, "learning_rate": 0.0007639672574774749, "loss": 0.2037, "step": 60078 }, { "epoch": 0.10652680590684756, "grad_norm": 0.41015625, "learning_rate": 0.0007639120437962867, "loss": 0.1509, "step": 60080 }, { "epoch": 0.10653035207215737, "grad_norm": 0.51953125, "learning_rate": 0.0007638568315849521, "loss": 0.2078, "step": 60082 }, { "epoch": 0.10653389823746719, "grad_norm": 0.62890625, "learning_rate": 0.0007638016208437129, "loss": 0.1615, "step": 60084 }, { "epoch": 0.106537444402777, "grad_norm": 0.35546875, "learning_rate": 0.0007637464115728094, "loss": 0.1902, "step": 60086 }, { "epoch": 0.10654099056808682, "grad_norm": 0.333984375, "learning_rate": 0.0007636912037724842, "loss": 0.1575, "step": 60088 }, { "epoch": 0.10654453673339663, "grad_norm": 1.28125, "learning_rate": 0.0007636359974429781, "loss": 0.1918, "step": 60090 }, { "epoch": 0.10654808289870644, "grad_norm": 0.58203125, "learning_rate": 0.0007635807925845331, "loss": 0.1689, "step": 60092 }, { "epoch": 0.10655162906401626, "grad_norm": 0.69921875, "learning_rate": 0.00076352558919739, "loss": 0.158, "step": 60094 }, { "epoch": 0.10655517522932607, "grad_norm": 1.3515625, "learning_rate": 0.0007634703872817908, "loss": 0.2061, "step": 60096 }, { "epoch": 0.10655872139463589, "grad_norm": 0.4453125, "learning_rate": 0.0007634151868379764, "loss": 0.1712, "step": 60098 }, { "epoch": 0.1065622675599457, "grad_norm": 0.29296875, "learning_rate": 0.0007633599878661885, "loss": 0.187, "step": 60100 }, { "epoch": 0.10656581372525552, "grad_norm": 0.275390625, "learning_rate": 0.0007633047903666685, "loss": 0.1491, "step": 60102 }, { "epoch": 0.10656935989056533, "grad_norm": 0.5, "learning_rate": 0.0007632495943396576, "loss": 0.2142, "step": 60104 }, { "epoch": 0.10657290605587516, "grad_norm": 0.408203125, "learning_rate": 0.0007631943997853976, "loss": 0.1551, "step": 60106 }, { "epoch": 0.10657645222118498, "grad_norm": 0.84765625, "learning_rate": 0.0007631392067041299, "loss": 0.3162, "step": 60108 }, { "epoch": 0.10657999838649479, "grad_norm": 0.365234375, "learning_rate": 0.0007630840150960952, "loss": 0.1968, "step": 60110 }, { "epoch": 0.1065835445518046, "grad_norm": 0.5, "learning_rate": 0.0007630288249615352, "loss": 0.4444, "step": 60112 }, { "epoch": 0.10658709071711442, "grad_norm": 0.7578125, "learning_rate": 0.0007629736363006916, "loss": 0.1757, "step": 60114 }, { "epoch": 0.10659063688242423, "grad_norm": 0.333984375, "learning_rate": 0.0007629184491138057, "loss": 0.1652, "step": 60116 }, { "epoch": 0.10659418304773405, "grad_norm": 0.3984375, "learning_rate": 0.0007628632634011188, "loss": 0.1591, "step": 60118 }, { "epoch": 0.10659772921304386, "grad_norm": 0.345703125, "learning_rate": 0.0007628080791628718, "loss": 0.1676, "step": 60120 }, { "epoch": 0.10660127537835368, "grad_norm": 0.64453125, "learning_rate": 0.0007627528963993062, "loss": 0.1692, "step": 60122 }, { "epoch": 0.10660482154366349, "grad_norm": 1.90625, "learning_rate": 0.0007626977151106643, "loss": 0.3159, "step": 60124 }, { "epoch": 0.1066083677089733, "grad_norm": 0.392578125, "learning_rate": 0.0007626425352971861, "loss": 0.1736, "step": 60126 }, { "epoch": 0.10661191387428312, "grad_norm": 0.306640625, "learning_rate": 0.0007625873569591135, "loss": 0.1523, "step": 60128 }, { "epoch": 0.10661546003959294, "grad_norm": 0.73046875, "learning_rate": 0.000762532180096688, "loss": 0.168, "step": 60130 }, { "epoch": 0.10661900620490275, "grad_norm": 0.75, "learning_rate": 0.0007624770047101508, "loss": 0.3052, "step": 60132 }, { "epoch": 0.10662255237021256, "grad_norm": 0.265625, "learning_rate": 0.000762421830799743, "loss": 0.1332, "step": 60134 }, { "epoch": 0.10662609853552238, "grad_norm": 0.310546875, "learning_rate": 0.0007623666583657058, "loss": 0.1688, "step": 60136 }, { "epoch": 0.1066296447008322, "grad_norm": 0.42578125, "learning_rate": 0.0007623114874082808, "loss": 0.2009, "step": 60138 }, { "epoch": 0.10663319086614201, "grad_norm": 0.248046875, "learning_rate": 0.0007622563179277096, "loss": 0.1252, "step": 60140 }, { "epoch": 0.10663673703145182, "grad_norm": 2.734375, "learning_rate": 0.0007622011499242328, "loss": 0.2023, "step": 60142 }, { "epoch": 0.10664028319676164, "grad_norm": 0.46875, "learning_rate": 0.0007621459833980917, "loss": 0.1924, "step": 60144 }, { "epoch": 0.10664382936207145, "grad_norm": 0.6484375, "learning_rate": 0.0007620908183495279, "loss": 0.2468, "step": 60146 }, { "epoch": 0.10664737552738127, "grad_norm": 0.9609375, "learning_rate": 0.000762035654778783, "loss": 0.3453, "step": 60148 }, { "epoch": 0.10665092169269108, "grad_norm": 0.80078125, "learning_rate": 0.0007619804926860976, "loss": 0.2243, "step": 60150 }, { "epoch": 0.10665446785800091, "grad_norm": 0.212890625, "learning_rate": 0.0007619253320717132, "loss": 0.1866, "step": 60152 }, { "epoch": 0.10665801402331072, "grad_norm": 0.455078125, "learning_rate": 0.0007618701729358708, "loss": 0.2305, "step": 60154 }, { "epoch": 0.10666156018862054, "grad_norm": 0.2216796875, "learning_rate": 0.0007618150152788124, "loss": 0.162, "step": 60156 }, { "epoch": 0.10666510635393035, "grad_norm": 0.21484375, "learning_rate": 0.0007617598591007781, "loss": 0.1987, "step": 60158 }, { "epoch": 0.10666865251924017, "grad_norm": 0.279296875, "learning_rate": 0.0007617047044020098, "loss": 0.1897, "step": 60160 }, { "epoch": 0.10667219868454998, "grad_norm": 1.2109375, "learning_rate": 0.0007616495511827488, "loss": 0.2072, "step": 60162 }, { "epoch": 0.1066757448498598, "grad_norm": 0.60546875, "learning_rate": 0.0007615943994432357, "loss": 0.4345, "step": 60164 }, { "epoch": 0.10667929101516961, "grad_norm": 0.87109375, "learning_rate": 0.0007615392491837127, "loss": 0.1804, "step": 60166 }, { "epoch": 0.10668283718047943, "grad_norm": 1.1875, "learning_rate": 0.0007614841004044202, "loss": 0.3871, "step": 60168 }, { "epoch": 0.10668638334578924, "grad_norm": 0.337890625, "learning_rate": 0.0007614289531055995, "loss": 0.1731, "step": 60170 }, { "epoch": 0.10668992951109905, "grad_norm": 0.455078125, "learning_rate": 0.000761373807287492, "loss": 0.169, "step": 60172 }, { "epoch": 0.10669347567640887, "grad_norm": 0.5703125, "learning_rate": 0.000761318662950339, "loss": 0.1558, "step": 60174 }, { "epoch": 0.10669702184171868, "grad_norm": 0.7890625, "learning_rate": 0.0007612635200943814, "loss": 0.2173, "step": 60176 }, { "epoch": 0.1067005680070285, "grad_norm": 0.294921875, "learning_rate": 0.00076120837871986, "loss": 0.1653, "step": 60178 }, { "epoch": 0.10670411417233831, "grad_norm": 0.240234375, "learning_rate": 0.0007611532388270164, "loss": 0.1693, "step": 60180 }, { "epoch": 0.10670766033764813, "grad_norm": 0.458984375, "learning_rate": 0.0007610981004160922, "loss": 0.1895, "step": 60182 }, { "epoch": 0.10671120650295794, "grad_norm": 0.453125, "learning_rate": 0.0007610429634873278, "loss": 0.1999, "step": 60184 }, { "epoch": 0.10671475266826776, "grad_norm": 0.2197265625, "learning_rate": 0.0007609878280409643, "loss": 0.3783, "step": 60186 }, { "epoch": 0.10671829883357757, "grad_norm": 1.5, "learning_rate": 0.0007609326940772435, "loss": 0.2302, "step": 60188 }, { "epoch": 0.10672184499888739, "grad_norm": 2.03125, "learning_rate": 0.0007608775615964061, "loss": 0.2462, "step": 60190 }, { "epoch": 0.1067253911641972, "grad_norm": 0.60546875, "learning_rate": 0.0007608224305986931, "loss": 0.2619, "step": 60192 }, { "epoch": 0.10672893732950701, "grad_norm": 1.0390625, "learning_rate": 0.0007607673010843457, "loss": 0.1621, "step": 60194 }, { "epoch": 0.10673248349481683, "grad_norm": 0.298828125, "learning_rate": 0.0007607121730536051, "loss": 0.1892, "step": 60196 }, { "epoch": 0.10673602966012666, "grad_norm": 0.5, "learning_rate": 0.0007606570465067126, "loss": 0.2249, "step": 60198 }, { "epoch": 0.10673957582543647, "grad_norm": 0.2578125, "learning_rate": 0.0007606019214439091, "loss": 0.1621, "step": 60200 }, { "epoch": 0.10674312199074629, "grad_norm": 0.232421875, "learning_rate": 0.0007605467978654353, "loss": 0.1825, "step": 60202 }, { "epoch": 0.1067466681560561, "grad_norm": 0.5078125, "learning_rate": 0.0007604916757715328, "loss": 0.2266, "step": 60204 }, { "epoch": 0.10675021432136592, "grad_norm": 0.361328125, "learning_rate": 0.0007604365551624426, "loss": 0.2203, "step": 60206 }, { "epoch": 0.10675376048667573, "grad_norm": 0.54296875, "learning_rate": 0.0007603814360384055, "loss": 0.2453, "step": 60208 }, { "epoch": 0.10675730665198555, "grad_norm": 0.5703125, "learning_rate": 0.0007603263183996629, "loss": 0.2083, "step": 60210 }, { "epoch": 0.10676085281729536, "grad_norm": 0.1728515625, "learning_rate": 0.0007602712022464556, "loss": 0.1307, "step": 60212 }, { "epoch": 0.10676439898260517, "grad_norm": 0.279296875, "learning_rate": 0.0007602160875790247, "loss": 0.2168, "step": 60214 }, { "epoch": 0.10676794514791499, "grad_norm": 0.41796875, "learning_rate": 0.0007601609743976113, "loss": 0.195, "step": 60216 }, { "epoch": 0.1067714913132248, "grad_norm": 1.5390625, "learning_rate": 0.0007601058627024562, "loss": 0.3337, "step": 60218 }, { "epoch": 0.10677503747853462, "grad_norm": 0.59765625, "learning_rate": 0.0007600507524938007, "loss": 0.2675, "step": 60220 }, { "epoch": 0.10677858364384443, "grad_norm": 0.349609375, "learning_rate": 0.000759995643771886, "loss": 0.1993, "step": 60222 }, { "epoch": 0.10678212980915425, "grad_norm": 0.33984375, "learning_rate": 0.0007599405365369525, "loss": 0.182, "step": 60224 }, { "epoch": 0.10678567597446406, "grad_norm": 0.40234375, "learning_rate": 0.0007598854307892418, "loss": 0.1855, "step": 60226 }, { "epoch": 0.10678922213977388, "grad_norm": 0.671875, "learning_rate": 0.0007598303265289946, "loss": 0.1938, "step": 60228 }, { "epoch": 0.10679276830508369, "grad_norm": 0.333984375, "learning_rate": 0.0007597752237564521, "loss": 0.1673, "step": 60230 }, { "epoch": 0.1067963144703935, "grad_norm": 0.306640625, "learning_rate": 0.0007597201224718551, "loss": 0.2277, "step": 60232 }, { "epoch": 0.10679986063570332, "grad_norm": 0.318359375, "learning_rate": 0.0007596650226754445, "loss": 0.1687, "step": 60234 }, { "epoch": 0.10680340680101313, "grad_norm": 0.6640625, "learning_rate": 0.0007596099243674612, "loss": 0.193, "step": 60236 }, { "epoch": 0.10680695296632295, "grad_norm": 0.46875, "learning_rate": 0.0007595548275481467, "loss": 0.1355, "step": 60238 }, { "epoch": 0.10681049913163276, "grad_norm": 0.5234375, "learning_rate": 0.0007594997322177417, "loss": 0.1886, "step": 60240 }, { "epoch": 0.10681404529694258, "grad_norm": 0.734375, "learning_rate": 0.0007594446383764866, "loss": 0.1749, "step": 60242 }, { "epoch": 0.1068175914622524, "grad_norm": 0.61328125, "learning_rate": 0.0007593895460246234, "loss": 0.4686, "step": 60244 }, { "epoch": 0.10682113762756222, "grad_norm": 0.423828125, "learning_rate": 0.000759334455162392, "loss": 0.1625, "step": 60246 }, { "epoch": 0.10682468379287204, "grad_norm": 0.296875, "learning_rate": 0.0007592793657900343, "loss": 0.147, "step": 60248 }, { "epoch": 0.10682822995818185, "grad_norm": 0.310546875, "learning_rate": 0.0007592242779077903, "loss": 0.1625, "step": 60250 }, { "epoch": 0.10683177612349166, "grad_norm": 0.1728515625, "learning_rate": 0.0007591691915159016, "loss": 0.2041, "step": 60252 }, { "epoch": 0.10683532228880148, "grad_norm": 1.078125, "learning_rate": 0.0007591141066146086, "loss": 0.1736, "step": 60254 }, { "epoch": 0.1068388684541113, "grad_norm": 0.3671875, "learning_rate": 0.000759059023204153, "loss": 0.2123, "step": 60256 }, { "epoch": 0.10684241461942111, "grad_norm": 0.265625, "learning_rate": 0.0007590039412847749, "loss": 0.2659, "step": 60258 }, { "epoch": 0.10684596078473092, "grad_norm": 0.359375, "learning_rate": 0.0007589488608567152, "loss": 0.1614, "step": 60260 }, { "epoch": 0.10684950695004074, "grad_norm": 0.2373046875, "learning_rate": 0.0007588937819202155, "loss": 0.18, "step": 60262 }, { "epoch": 0.10685305311535055, "grad_norm": 0.4296875, "learning_rate": 0.0007588387044755163, "loss": 0.1573, "step": 60264 }, { "epoch": 0.10685659928066037, "grad_norm": 0.423828125, "learning_rate": 0.0007587836285228582, "loss": 0.1906, "step": 60266 }, { "epoch": 0.10686014544597018, "grad_norm": 0.42578125, "learning_rate": 0.0007587285540624822, "loss": 0.2452, "step": 60268 }, { "epoch": 0.10686369161128, "grad_norm": 0.73046875, "learning_rate": 0.0007586734810946294, "loss": 0.1935, "step": 60270 }, { "epoch": 0.10686723777658981, "grad_norm": 0.1650390625, "learning_rate": 0.0007586184096195409, "loss": 0.195, "step": 60272 }, { "epoch": 0.10687078394189962, "grad_norm": 0.267578125, "learning_rate": 0.0007585633396374567, "loss": 0.1527, "step": 60274 }, { "epoch": 0.10687433010720944, "grad_norm": 2.71875, "learning_rate": 0.0007585082711486182, "loss": 0.3048, "step": 60276 }, { "epoch": 0.10687787627251925, "grad_norm": 0.341796875, "learning_rate": 0.0007584532041532661, "loss": 0.4171, "step": 60278 }, { "epoch": 0.10688142243782907, "grad_norm": 0.361328125, "learning_rate": 0.0007583981386516417, "loss": 0.1565, "step": 60280 }, { "epoch": 0.10688496860313888, "grad_norm": 0.27734375, "learning_rate": 0.000758343074643985, "loss": 0.1456, "step": 60282 }, { "epoch": 0.1068885147684487, "grad_norm": 0.42578125, "learning_rate": 0.0007582880121305373, "loss": 0.1866, "step": 60284 }, { "epoch": 0.10689206093375851, "grad_norm": 0.443359375, "learning_rate": 0.0007582329511115393, "loss": 0.1883, "step": 60286 }, { "epoch": 0.10689560709906834, "grad_norm": 0.58984375, "learning_rate": 0.0007581778915872324, "loss": 0.2012, "step": 60288 }, { "epoch": 0.10689915326437816, "grad_norm": 0.333984375, "learning_rate": 0.0007581228335578563, "loss": 0.1877, "step": 60290 }, { "epoch": 0.10690269942968797, "grad_norm": 0.259765625, "learning_rate": 0.0007580677770236524, "loss": 0.1718, "step": 60292 }, { "epoch": 0.10690624559499778, "grad_norm": 0.53125, "learning_rate": 0.0007580127219848614, "loss": 0.2011, "step": 60294 }, { "epoch": 0.1069097917603076, "grad_norm": 0.294921875, "learning_rate": 0.0007579576684417242, "loss": 0.1776, "step": 60296 }, { "epoch": 0.10691333792561741, "grad_norm": 0.2060546875, "learning_rate": 0.0007579026163944815, "loss": 0.1656, "step": 60298 }, { "epoch": 0.10691688409092723, "grad_norm": 0.23046875, "learning_rate": 0.0007578475658433741, "loss": 0.1681, "step": 60300 }, { "epoch": 0.10692043025623704, "grad_norm": 0.435546875, "learning_rate": 0.0007577925167886423, "loss": 0.2031, "step": 60302 }, { "epoch": 0.10692397642154686, "grad_norm": 0.306640625, "learning_rate": 0.0007577374692305279, "loss": 0.2132, "step": 60304 }, { "epoch": 0.10692752258685667, "grad_norm": 0.49609375, "learning_rate": 0.0007576824231692705, "loss": 0.188, "step": 60306 }, { "epoch": 0.10693106875216649, "grad_norm": 0.376953125, "learning_rate": 0.0007576273786051115, "loss": 0.1746, "step": 60308 }, { "epoch": 0.1069346149174763, "grad_norm": 0.8046875, "learning_rate": 0.0007575723355382914, "loss": 0.2013, "step": 60310 }, { "epoch": 0.10693816108278612, "grad_norm": 2.21875, "learning_rate": 0.0007575172939690513, "loss": 0.3983, "step": 60312 }, { "epoch": 0.10694170724809593, "grad_norm": 0.296875, "learning_rate": 0.0007574622538976312, "loss": 0.1889, "step": 60314 }, { "epoch": 0.10694525341340574, "grad_norm": 0.349609375, "learning_rate": 0.0007574072153242724, "loss": 0.1816, "step": 60316 }, { "epoch": 0.10694879957871556, "grad_norm": 1.859375, "learning_rate": 0.0007573521782492154, "loss": 0.2101, "step": 60318 }, { "epoch": 0.10695234574402537, "grad_norm": 0.703125, "learning_rate": 0.0007572971426727011, "loss": 0.1883, "step": 60320 }, { "epoch": 0.10695589190933519, "grad_norm": 0.46484375, "learning_rate": 0.0007572421085949699, "loss": 0.1574, "step": 60322 }, { "epoch": 0.106959438074645, "grad_norm": 0.3203125, "learning_rate": 0.0007571870760162627, "loss": 0.1946, "step": 60324 }, { "epoch": 0.10696298423995482, "grad_norm": 0.224609375, "learning_rate": 0.00075713204493682, "loss": 0.1663, "step": 60326 }, { "epoch": 0.10696653040526463, "grad_norm": 1.859375, "learning_rate": 0.0007570770153568826, "loss": 0.1868, "step": 60328 }, { "epoch": 0.10697007657057445, "grad_norm": 0.283203125, "learning_rate": 0.0007570219872766913, "loss": 0.1448, "step": 60330 }, { "epoch": 0.10697362273588426, "grad_norm": 0.263671875, "learning_rate": 0.0007569669606964864, "loss": 0.1855, "step": 60332 }, { "epoch": 0.10697716890119409, "grad_norm": 0.478515625, "learning_rate": 0.0007569119356165086, "loss": 0.1862, "step": 60334 }, { "epoch": 0.1069807150665039, "grad_norm": 1.125, "learning_rate": 0.0007568569120369991, "loss": 0.239, "step": 60336 }, { "epoch": 0.10698426123181372, "grad_norm": 0.42578125, "learning_rate": 0.0007568018899581978, "loss": 0.2393, "step": 60338 }, { "epoch": 0.10698780739712353, "grad_norm": 1.796875, "learning_rate": 0.0007567468693803461, "loss": 0.2418, "step": 60340 }, { "epoch": 0.10699135356243335, "grad_norm": 0.265625, "learning_rate": 0.0007566918503036839, "loss": 0.1903, "step": 60342 }, { "epoch": 0.10699489972774316, "grad_norm": 1.0234375, "learning_rate": 0.000756636832728452, "loss": 0.2056, "step": 60344 }, { "epoch": 0.10699844589305298, "grad_norm": 0.5625, "learning_rate": 0.0007565818166548915, "loss": 0.1956, "step": 60346 }, { "epoch": 0.10700199205836279, "grad_norm": 0.478515625, "learning_rate": 0.0007565268020832424, "loss": 0.1674, "step": 60348 }, { "epoch": 0.1070055382236726, "grad_norm": 0.333984375, "learning_rate": 0.0007564717890137456, "loss": 0.22, "step": 60350 }, { "epoch": 0.10700908438898242, "grad_norm": 0.255859375, "learning_rate": 0.0007564167774466415, "loss": 0.2155, "step": 60352 }, { "epoch": 0.10701263055429223, "grad_norm": 0.609375, "learning_rate": 0.0007563617673821713, "loss": 0.178, "step": 60354 }, { "epoch": 0.10701617671960205, "grad_norm": 0.3046875, "learning_rate": 0.0007563067588205747, "loss": 0.2112, "step": 60356 }, { "epoch": 0.10701972288491186, "grad_norm": 0.341796875, "learning_rate": 0.000756251751762093, "loss": 0.1922, "step": 60358 }, { "epoch": 0.10702326905022168, "grad_norm": 0.43359375, "learning_rate": 0.0007561967462069661, "loss": 0.2128, "step": 60360 }, { "epoch": 0.10702681521553149, "grad_norm": 0.99609375, "learning_rate": 0.0007561417421554355, "loss": 0.1684, "step": 60362 }, { "epoch": 0.10703036138084131, "grad_norm": 0.34765625, "learning_rate": 0.0007560867396077406, "loss": 0.188, "step": 60364 }, { "epoch": 0.10703390754615112, "grad_norm": 0.5390625, "learning_rate": 0.0007560317385641226, "loss": 0.1188, "step": 60366 }, { "epoch": 0.10703745371146094, "grad_norm": 0.451171875, "learning_rate": 0.000755976739024822, "loss": 0.2598, "step": 60368 }, { "epoch": 0.10704099987677075, "grad_norm": 0.435546875, "learning_rate": 0.0007559217409900798, "loss": 0.1416, "step": 60370 }, { "epoch": 0.10704454604208057, "grad_norm": 0.39453125, "learning_rate": 0.0007558667444601355, "loss": 0.1696, "step": 60372 }, { "epoch": 0.10704809220739038, "grad_norm": 0.2392578125, "learning_rate": 0.0007558117494352303, "loss": 0.2014, "step": 60374 }, { "epoch": 0.1070516383727002, "grad_norm": 0.322265625, "learning_rate": 0.0007557567559156046, "loss": 0.1712, "step": 60376 }, { "epoch": 0.10705518453801001, "grad_norm": 0.1650390625, "learning_rate": 0.0007557017639014991, "loss": 0.1788, "step": 60378 }, { "epoch": 0.10705873070331984, "grad_norm": 0.484375, "learning_rate": 0.000755646773393154, "loss": 0.1896, "step": 60380 }, { "epoch": 0.10706227686862965, "grad_norm": 0.333984375, "learning_rate": 0.0007555917843908096, "loss": 0.1779, "step": 60382 }, { "epoch": 0.10706582303393947, "grad_norm": 0.291015625, "learning_rate": 0.0007555367968947069, "loss": 0.1528, "step": 60384 }, { "epoch": 0.10706936919924928, "grad_norm": 0.373046875, "learning_rate": 0.0007554818109050863, "loss": 0.1804, "step": 60386 }, { "epoch": 0.1070729153645591, "grad_norm": 0.435546875, "learning_rate": 0.000755426826422188, "loss": 0.1389, "step": 60388 }, { "epoch": 0.10707646152986891, "grad_norm": 0.384765625, "learning_rate": 0.0007553718434462524, "loss": 0.1837, "step": 60390 }, { "epoch": 0.10708000769517872, "grad_norm": 0.279296875, "learning_rate": 0.0007553168619775204, "loss": 0.2196, "step": 60392 }, { "epoch": 0.10708355386048854, "grad_norm": 0.328125, "learning_rate": 0.0007552618820162322, "loss": 0.1872, "step": 60394 }, { "epoch": 0.10708710002579835, "grad_norm": 0.388671875, "learning_rate": 0.0007552069035626284, "loss": 0.1936, "step": 60396 }, { "epoch": 0.10709064619110817, "grad_norm": 0.70703125, "learning_rate": 0.0007551519266169491, "loss": 0.3422, "step": 60398 }, { "epoch": 0.10709419235641798, "grad_norm": 0.373046875, "learning_rate": 0.000755096951179435, "loss": 0.1689, "step": 60400 }, { "epoch": 0.1070977385217278, "grad_norm": 0.333984375, "learning_rate": 0.0007550419772503268, "loss": 0.2087, "step": 60402 }, { "epoch": 0.10710128468703761, "grad_norm": 0.197265625, "learning_rate": 0.0007549870048298642, "loss": 0.1338, "step": 60404 }, { "epoch": 0.10710483085234743, "grad_norm": 1.0234375, "learning_rate": 0.0007549320339182879, "loss": 0.2411, "step": 60406 }, { "epoch": 0.10710837701765724, "grad_norm": 0.61328125, "learning_rate": 0.0007548770645158387, "loss": 0.1932, "step": 60408 }, { "epoch": 0.10711192318296706, "grad_norm": 0.58203125, "learning_rate": 0.000754822096622757, "loss": 0.1977, "step": 60410 }, { "epoch": 0.10711546934827687, "grad_norm": 0.79296875, "learning_rate": 0.0007547671302392826, "loss": 0.1664, "step": 60412 }, { "epoch": 0.10711901551358669, "grad_norm": 0.60546875, "learning_rate": 0.0007547121653656565, "loss": 0.1614, "step": 60414 }, { "epoch": 0.1071225616788965, "grad_norm": 0.6484375, "learning_rate": 0.0007546572020021187, "loss": 0.3044, "step": 60416 }, { "epoch": 0.10712610784420631, "grad_norm": 0.439453125, "learning_rate": 0.0007546022401489095, "loss": 0.1593, "step": 60418 }, { "epoch": 0.10712965400951613, "grad_norm": 0.55859375, "learning_rate": 0.00075454727980627, "loss": 0.1813, "step": 60420 }, { "epoch": 0.10713320017482594, "grad_norm": 0.451171875, "learning_rate": 0.0007544923209744397, "loss": 0.2415, "step": 60422 }, { "epoch": 0.10713674634013577, "grad_norm": 0.60546875, "learning_rate": 0.000754437363653659, "loss": 0.1828, "step": 60424 }, { "epoch": 0.10714029250544559, "grad_norm": 0.1728515625, "learning_rate": 0.0007543824078441685, "loss": 0.1906, "step": 60426 }, { "epoch": 0.1071438386707554, "grad_norm": 0.35546875, "learning_rate": 0.0007543274535462094, "loss": 0.1571, "step": 60428 }, { "epoch": 0.10714738483606522, "grad_norm": 0.671875, "learning_rate": 0.0007542725007600206, "loss": 0.198, "step": 60430 }, { "epoch": 0.10715093100137503, "grad_norm": 0.4375, "learning_rate": 0.0007542175494858434, "loss": 0.1669, "step": 60432 }, { "epoch": 0.10715447716668484, "grad_norm": 0.5703125, "learning_rate": 0.0007541625997239174, "loss": 0.1655, "step": 60434 }, { "epoch": 0.10715802333199466, "grad_norm": 0.7734375, "learning_rate": 0.0007541076514744838, "loss": 0.1838, "step": 60436 }, { "epoch": 0.10716156949730447, "grad_norm": 1.9609375, "learning_rate": 0.0007540527047377821, "loss": 0.2794, "step": 60438 }, { "epoch": 0.10716511566261429, "grad_norm": 0.306640625, "learning_rate": 0.0007539977595140528, "loss": 0.2009, "step": 60440 }, { "epoch": 0.1071686618279241, "grad_norm": 0.90234375, "learning_rate": 0.0007539428158035362, "loss": 0.1681, "step": 60442 }, { "epoch": 0.10717220799323392, "grad_norm": 0.451171875, "learning_rate": 0.0007538878736064731, "loss": 0.1391, "step": 60444 }, { "epoch": 0.10717575415854373, "grad_norm": 0.283203125, "learning_rate": 0.0007538329329231033, "loss": 0.1734, "step": 60446 }, { "epoch": 0.10717930032385355, "grad_norm": 0.322265625, "learning_rate": 0.0007537779937536671, "loss": 0.1861, "step": 60448 }, { "epoch": 0.10718284648916336, "grad_norm": 0.37109375, "learning_rate": 0.0007537230560984048, "loss": 0.1845, "step": 60450 }, { "epoch": 0.10718639265447318, "grad_norm": 0.56640625, "learning_rate": 0.000753668119957557, "loss": 0.1996, "step": 60452 }, { "epoch": 0.10718993881978299, "grad_norm": 0.99609375, "learning_rate": 0.0007536131853313635, "loss": 0.1641, "step": 60454 }, { "epoch": 0.1071934849850928, "grad_norm": 0.6640625, "learning_rate": 0.0007535582522200644, "loss": 0.1853, "step": 60456 }, { "epoch": 0.10719703115040262, "grad_norm": 0.494140625, "learning_rate": 0.0007535033206239006, "loss": 0.1449, "step": 60458 }, { "epoch": 0.10720057731571243, "grad_norm": 0.2041015625, "learning_rate": 0.0007534483905431122, "loss": 0.183, "step": 60460 }, { "epoch": 0.10720412348102225, "grad_norm": 0.455078125, "learning_rate": 0.0007533934619779387, "loss": 0.2257, "step": 60462 }, { "epoch": 0.10720766964633206, "grad_norm": 0.2314453125, "learning_rate": 0.0007533385349286212, "loss": 0.1665, "step": 60464 }, { "epoch": 0.10721121581164188, "grad_norm": 1.7890625, "learning_rate": 0.0007532836093953994, "loss": 0.3858, "step": 60466 }, { "epoch": 0.10721476197695169, "grad_norm": 0.310546875, "learning_rate": 0.0007532286853785142, "loss": 0.1699, "step": 60468 }, { "epoch": 0.10721830814226152, "grad_norm": 0.162109375, "learning_rate": 0.0007531737628782047, "loss": 0.1593, "step": 60470 }, { "epoch": 0.10722185430757133, "grad_norm": 0.365234375, "learning_rate": 0.0007531188418947121, "loss": 0.2029, "step": 60472 }, { "epoch": 0.10722540047288115, "grad_norm": 1.1484375, "learning_rate": 0.0007530639224282758, "loss": 0.1821, "step": 60474 }, { "epoch": 0.10722894663819096, "grad_norm": 0.2333984375, "learning_rate": 0.0007530090044791369, "loss": 0.2118, "step": 60476 }, { "epoch": 0.10723249280350078, "grad_norm": 0.7265625, "learning_rate": 0.0007529540880475348, "loss": 0.1921, "step": 60478 }, { "epoch": 0.10723603896881059, "grad_norm": 0.3359375, "learning_rate": 0.0007528991731337096, "loss": 0.1602, "step": 60480 }, { "epoch": 0.10723958513412041, "grad_norm": 0.27734375, "learning_rate": 0.0007528442597379019, "loss": 0.1886, "step": 60482 }, { "epoch": 0.10724313129943022, "grad_norm": 0.341796875, "learning_rate": 0.0007527893478603522, "loss": 0.1657, "step": 60484 }, { "epoch": 0.10724667746474004, "grad_norm": 0.73046875, "learning_rate": 0.0007527344375013001, "loss": 0.1823, "step": 60486 }, { "epoch": 0.10725022363004985, "grad_norm": 0.4296875, "learning_rate": 0.0007526795286609856, "loss": 0.2414, "step": 60488 }, { "epoch": 0.10725376979535967, "grad_norm": 0.421875, "learning_rate": 0.000752624621339649, "loss": 0.1857, "step": 60490 }, { "epoch": 0.10725731596066948, "grad_norm": 0.57421875, "learning_rate": 0.000752569715537531, "loss": 0.1943, "step": 60492 }, { "epoch": 0.1072608621259793, "grad_norm": 0.60546875, "learning_rate": 0.000752514811254871, "loss": 0.1109, "step": 60494 }, { "epoch": 0.10726440829128911, "grad_norm": 0.27734375, "learning_rate": 0.0007524599084919092, "loss": 0.2092, "step": 60496 }, { "epoch": 0.10726795445659892, "grad_norm": 0.330078125, "learning_rate": 0.0007524050072488859, "loss": 0.2364, "step": 60498 }, { "epoch": 0.10727150062190874, "grad_norm": 0.255859375, "learning_rate": 0.0007523501075260413, "loss": 0.1877, "step": 60500 }, { "epoch": 0.10727504678721855, "grad_norm": 0.298828125, "learning_rate": 0.0007522952093236158, "loss": 0.1895, "step": 60502 }, { "epoch": 0.10727859295252837, "grad_norm": 0.61328125, "learning_rate": 0.0007522403126418484, "loss": 0.1652, "step": 60504 }, { "epoch": 0.10728213911783818, "grad_norm": 0.328125, "learning_rate": 0.0007521854174809804, "loss": 0.1516, "step": 60506 }, { "epoch": 0.107285685283148, "grad_norm": 0.828125, "learning_rate": 0.000752130523841251, "loss": 0.3644, "step": 60508 }, { "epoch": 0.10728923144845781, "grad_norm": 0.197265625, "learning_rate": 0.0007520756317229009, "loss": 0.1453, "step": 60510 }, { "epoch": 0.10729277761376763, "grad_norm": 0.2412109375, "learning_rate": 0.0007520207411261697, "loss": 0.1867, "step": 60512 }, { "epoch": 0.10729632377907744, "grad_norm": 0.1591796875, "learning_rate": 0.0007519658520512975, "loss": 0.1907, "step": 60514 }, { "epoch": 0.10729986994438727, "grad_norm": 0.287109375, "learning_rate": 0.0007519109644985247, "loss": 0.1202, "step": 60516 }, { "epoch": 0.10730341610969708, "grad_norm": 0.1806640625, "learning_rate": 0.0007518560784680913, "loss": 0.1551, "step": 60518 }, { "epoch": 0.1073069622750069, "grad_norm": 0.6328125, "learning_rate": 0.0007518011939602369, "loss": 0.1323, "step": 60520 }, { "epoch": 0.10731050844031671, "grad_norm": 0.333984375, "learning_rate": 0.0007517463109752018, "loss": 0.2417, "step": 60522 }, { "epoch": 0.10731405460562653, "grad_norm": 0.140625, "learning_rate": 0.0007516914295132262, "loss": 0.1387, "step": 60524 }, { "epoch": 0.10731760077093634, "grad_norm": 0.400390625, "learning_rate": 0.0007516365495745501, "loss": 0.2722, "step": 60526 }, { "epoch": 0.10732114693624616, "grad_norm": 0.3515625, "learning_rate": 0.0007515816711594132, "loss": 0.1703, "step": 60528 }, { "epoch": 0.10732469310155597, "grad_norm": 0.5078125, "learning_rate": 0.0007515267942680557, "loss": 0.1614, "step": 60530 }, { "epoch": 0.10732823926686579, "grad_norm": 0.375, "learning_rate": 0.0007514719189007175, "loss": 0.1838, "step": 60532 }, { "epoch": 0.1073317854321756, "grad_norm": 0.458984375, "learning_rate": 0.0007514170450576389, "loss": 0.191, "step": 60534 }, { "epoch": 0.10733533159748541, "grad_norm": 0.419921875, "learning_rate": 0.0007513621727390594, "loss": 0.1731, "step": 60536 }, { "epoch": 0.10733887776279523, "grad_norm": 1.328125, "learning_rate": 0.0007513073019452195, "loss": 0.2031, "step": 60538 }, { "epoch": 0.10734242392810504, "grad_norm": 0.4921875, "learning_rate": 0.0007512524326763586, "loss": 0.4131, "step": 60540 }, { "epoch": 0.10734597009341486, "grad_norm": 0.443359375, "learning_rate": 0.0007511975649327174, "loss": 0.16, "step": 60542 }, { "epoch": 0.10734951625872467, "grad_norm": 0.58203125, "learning_rate": 0.0007511426987145351, "loss": 0.1986, "step": 60544 }, { "epoch": 0.10735306242403449, "grad_norm": 0.494140625, "learning_rate": 0.0007510878340220522, "loss": 0.2276, "step": 60546 }, { "epoch": 0.1073566085893443, "grad_norm": 0.427734375, "learning_rate": 0.0007510329708555082, "loss": 0.1345, "step": 60548 }, { "epoch": 0.10736015475465412, "grad_norm": 0.478515625, "learning_rate": 0.0007509781092151438, "loss": 0.1551, "step": 60550 }, { "epoch": 0.10736370091996393, "grad_norm": 2.546875, "learning_rate": 0.0007509232491011979, "loss": 0.2326, "step": 60552 }, { "epoch": 0.10736724708527375, "grad_norm": 0.2451171875, "learning_rate": 0.000750868390513911, "loss": 0.1485, "step": 60554 }, { "epoch": 0.10737079325058356, "grad_norm": 0.134765625, "learning_rate": 0.0007508135334535229, "loss": 0.2021, "step": 60556 }, { "epoch": 0.10737433941589337, "grad_norm": 0.26953125, "learning_rate": 0.000750758677920274, "loss": 0.1568, "step": 60558 }, { "epoch": 0.1073778855812032, "grad_norm": 0.97265625, "learning_rate": 0.0007507038239144035, "loss": 0.1597, "step": 60560 }, { "epoch": 0.10738143174651302, "grad_norm": 0.404296875, "learning_rate": 0.0007506489714361517, "loss": 0.2645, "step": 60562 }, { "epoch": 0.10738497791182283, "grad_norm": 0.376953125, "learning_rate": 0.000750594120485758, "loss": 0.1971, "step": 60564 }, { "epoch": 0.10738852407713265, "grad_norm": 0.40625, "learning_rate": 0.0007505392710634633, "loss": 0.2491, "step": 60566 }, { "epoch": 0.10739207024244246, "grad_norm": 0.91015625, "learning_rate": 0.0007504844231695063, "loss": 0.1674, "step": 60568 }, { "epoch": 0.10739561640775228, "grad_norm": 0.33203125, "learning_rate": 0.0007504295768041274, "loss": 0.1756, "step": 60570 }, { "epoch": 0.10739916257306209, "grad_norm": 4.71875, "learning_rate": 0.0007503747319675665, "loss": 0.2774, "step": 60572 }, { "epoch": 0.1074027087383719, "grad_norm": 2.78125, "learning_rate": 0.000750319888660064, "loss": 0.2757, "step": 60574 }, { "epoch": 0.10740625490368172, "grad_norm": 0.34375, "learning_rate": 0.0007502650468818586, "loss": 0.1269, "step": 60576 }, { "epoch": 0.10740980106899153, "grad_norm": 0.5078125, "learning_rate": 0.0007502102066331909, "loss": 0.2593, "step": 60578 }, { "epoch": 0.10741334723430135, "grad_norm": 0.94921875, "learning_rate": 0.0007501553679143006, "loss": 0.2442, "step": 60580 }, { "epoch": 0.10741689339961116, "grad_norm": 0.23046875, "learning_rate": 0.0007501005307254272, "loss": 0.1499, "step": 60582 }, { "epoch": 0.10742043956492098, "grad_norm": 0.765625, "learning_rate": 0.0007500456950668116, "loss": 0.1631, "step": 60584 }, { "epoch": 0.10742398573023079, "grad_norm": 0.259765625, "learning_rate": 0.000749990860938692, "loss": 0.1523, "step": 60586 }, { "epoch": 0.1074275318955406, "grad_norm": 0.447265625, "learning_rate": 0.0007499360283413094, "loss": 0.1793, "step": 60588 }, { "epoch": 0.10743107806085042, "grad_norm": 0.2373046875, "learning_rate": 0.0007498811972749031, "loss": 0.1889, "step": 60590 }, { "epoch": 0.10743462422616024, "grad_norm": 0.330078125, "learning_rate": 0.0007498263677397137, "loss": 0.1657, "step": 60592 }, { "epoch": 0.10743817039147005, "grad_norm": 0.353515625, "learning_rate": 0.0007497715397359796, "loss": 0.1405, "step": 60594 }, { "epoch": 0.10744171655677986, "grad_norm": 0.376953125, "learning_rate": 0.0007497167132639417, "loss": 0.1629, "step": 60596 }, { "epoch": 0.10744526272208968, "grad_norm": 0.22265625, "learning_rate": 0.0007496618883238395, "loss": 0.2273, "step": 60598 }, { "epoch": 0.1074488088873995, "grad_norm": 1.765625, "learning_rate": 0.0007496070649159128, "loss": 0.2782, "step": 60600 }, { "epoch": 0.10745235505270931, "grad_norm": 0.42578125, "learning_rate": 0.0007495522430404012, "loss": 0.3086, "step": 60602 }, { "epoch": 0.10745590121801912, "grad_norm": 0.86328125, "learning_rate": 0.0007494974226975442, "loss": 0.3726, "step": 60604 }, { "epoch": 0.10745944738332895, "grad_norm": 0.349609375, "learning_rate": 0.0007494426038875819, "loss": 0.1874, "step": 60606 }, { "epoch": 0.10746299354863877, "grad_norm": 0.61328125, "learning_rate": 0.0007493877866107543, "loss": 0.2064, "step": 60608 }, { "epoch": 0.10746653971394858, "grad_norm": 0.453125, "learning_rate": 0.0007493329708673008, "loss": 0.1472, "step": 60610 }, { "epoch": 0.1074700858792584, "grad_norm": 0.4140625, "learning_rate": 0.0007492781566574611, "loss": 0.1983, "step": 60612 }, { "epoch": 0.10747363204456821, "grad_norm": 0.8515625, "learning_rate": 0.0007492233439814748, "loss": 0.2038, "step": 60614 }, { "epoch": 0.10747717820987802, "grad_norm": 1.0390625, "learning_rate": 0.0007491685328395825, "loss": 0.1822, "step": 60616 }, { "epoch": 0.10748072437518784, "grad_norm": 0.205078125, "learning_rate": 0.0007491137232320227, "loss": 0.1723, "step": 60618 }, { "epoch": 0.10748427054049765, "grad_norm": 5.21875, "learning_rate": 0.0007490589151590359, "loss": 0.2843, "step": 60620 }, { "epoch": 0.10748781670580747, "grad_norm": 2.234375, "learning_rate": 0.0007490041086208614, "loss": 0.2317, "step": 60622 }, { "epoch": 0.10749136287111728, "grad_norm": 0.59375, "learning_rate": 0.0007489493036177393, "loss": 0.188, "step": 60624 }, { "epoch": 0.1074949090364271, "grad_norm": 0.2080078125, "learning_rate": 0.0007488945001499087, "loss": 0.1784, "step": 60626 }, { "epoch": 0.10749845520173691, "grad_norm": 0.328125, "learning_rate": 0.0007488396982176098, "loss": 0.1754, "step": 60628 }, { "epoch": 0.10750200136704673, "grad_norm": 0.64453125, "learning_rate": 0.0007487848978210819, "loss": 0.1775, "step": 60630 }, { "epoch": 0.10750554753235654, "grad_norm": 0.59765625, "learning_rate": 0.0007487300989605652, "loss": 0.2427, "step": 60632 }, { "epoch": 0.10750909369766636, "grad_norm": 0.9375, "learning_rate": 0.0007486753016362988, "loss": 0.173, "step": 60634 }, { "epoch": 0.10751263986297617, "grad_norm": 0.2109375, "learning_rate": 0.0007486205058485222, "loss": 0.1442, "step": 60636 }, { "epoch": 0.10751618602828598, "grad_norm": 0.34375, "learning_rate": 0.000748565711597476, "loss": 0.1929, "step": 60638 }, { "epoch": 0.1075197321935958, "grad_norm": 2.46875, "learning_rate": 0.0007485109188833991, "loss": 0.3644, "step": 60640 }, { "epoch": 0.10752327835890561, "grad_norm": 0.396484375, "learning_rate": 0.0007484561277065311, "loss": 0.1742, "step": 60642 }, { "epoch": 0.10752682452421543, "grad_norm": 0.62109375, "learning_rate": 0.000748401338067112, "loss": 0.1674, "step": 60644 }, { "epoch": 0.10753037068952524, "grad_norm": 0.330078125, "learning_rate": 0.000748346549965381, "loss": 0.1763, "step": 60646 }, { "epoch": 0.10753391685483506, "grad_norm": 0.59375, "learning_rate": 0.0007482917634015781, "loss": 0.1739, "step": 60648 }, { "epoch": 0.10753746302014487, "grad_norm": 0.45703125, "learning_rate": 0.0007482369783759424, "loss": 0.2283, "step": 60650 }, { "epoch": 0.1075410091854547, "grad_norm": 0.267578125, "learning_rate": 0.000748182194888714, "loss": 0.1844, "step": 60652 }, { "epoch": 0.10754455535076451, "grad_norm": 0.31640625, "learning_rate": 0.0007481274129401325, "loss": 0.1883, "step": 60654 }, { "epoch": 0.10754810151607433, "grad_norm": 0.54296875, "learning_rate": 0.0007480726325304372, "loss": 0.25, "step": 60656 }, { "epoch": 0.10755164768138414, "grad_norm": 1.828125, "learning_rate": 0.0007480178536598679, "loss": 0.2292, "step": 60658 }, { "epoch": 0.10755519384669396, "grad_norm": 0.234375, "learning_rate": 0.0007479630763286638, "loss": 0.1544, "step": 60660 }, { "epoch": 0.10755874001200377, "grad_norm": 0.90234375, "learning_rate": 0.0007479083005370646, "loss": 0.1855, "step": 60662 }, { "epoch": 0.10756228617731359, "grad_norm": 0.271484375, "learning_rate": 0.0007478535262853105, "loss": 0.2117, "step": 60664 }, { "epoch": 0.1075658323426234, "grad_norm": 0.578125, "learning_rate": 0.0007477987535736402, "loss": 0.2196, "step": 60666 }, { "epoch": 0.10756937850793322, "grad_norm": 1.7265625, "learning_rate": 0.0007477439824022936, "loss": 0.1928, "step": 60668 }, { "epoch": 0.10757292467324303, "grad_norm": 0.82421875, "learning_rate": 0.00074768921277151, "loss": 0.2245, "step": 60670 }, { "epoch": 0.10757647083855285, "grad_norm": 2.046875, "learning_rate": 0.0007476344446815293, "loss": 0.284, "step": 60672 }, { "epoch": 0.10758001700386266, "grad_norm": 0.5625, "learning_rate": 0.0007475796781325912, "loss": 0.1469, "step": 60674 }, { "epoch": 0.10758356316917247, "grad_norm": 0.3671875, "learning_rate": 0.0007475249131249345, "loss": 0.1973, "step": 60676 }, { "epoch": 0.10758710933448229, "grad_norm": 0.625, "learning_rate": 0.000747470149658799, "loss": 0.172, "step": 60678 }, { "epoch": 0.1075906554997921, "grad_norm": 0.490234375, "learning_rate": 0.0007474153877344245, "loss": 0.1991, "step": 60680 }, { "epoch": 0.10759420166510192, "grad_norm": 0.76171875, "learning_rate": 0.0007473606273520506, "loss": 0.2459, "step": 60682 }, { "epoch": 0.10759774783041173, "grad_norm": 1.765625, "learning_rate": 0.0007473058685119158, "loss": 0.3104, "step": 60684 }, { "epoch": 0.10760129399572155, "grad_norm": 0.421875, "learning_rate": 0.0007472511112142606, "loss": 0.1972, "step": 60686 }, { "epoch": 0.10760484016103136, "grad_norm": 0.333984375, "learning_rate": 0.0007471963554593242, "loss": 0.209, "step": 60688 }, { "epoch": 0.10760838632634118, "grad_norm": 2.703125, "learning_rate": 0.000747141601247346, "loss": 0.1622, "step": 60690 }, { "epoch": 0.10761193249165099, "grad_norm": 0.90625, "learning_rate": 0.0007470868485785654, "loss": 0.186, "step": 60692 }, { "epoch": 0.1076154786569608, "grad_norm": 1.0078125, "learning_rate": 0.0007470320974532218, "loss": 0.1691, "step": 60694 }, { "epoch": 0.10761902482227063, "grad_norm": 0.40234375, "learning_rate": 0.0007469773478715549, "loss": 0.1874, "step": 60696 }, { "epoch": 0.10762257098758045, "grad_norm": 0.419921875, "learning_rate": 0.0007469225998338043, "loss": 0.2056, "step": 60698 }, { "epoch": 0.10762611715289026, "grad_norm": 0.333984375, "learning_rate": 0.000746867853340209, "loss": 0.2215, "step": 60700 }, { "epoch": 0.10762966331820008, "grad_norm": 3.5, "learning_rate": 0.0007468131083910081, "loss": 0.2487, "step": 60702 }, { "epoch": 0.10763320948350989, "grad_norm": 1.7890625, "learning_rate": 0.000746758364986442, "loss": 0.2687, "step": 60704 }, { "epoch": 0.1076367556488197, "grad_norm": 4.0625, "learning_rate": 0.0007467036231267497, "loss": 0.4454, "step": 60706 }, { "epoch": 0.10764030181412952, "grad_norm": 0.83984375, "learning_rate": 0.0007466488828121703, "loss": 0.1935, "step": 60708 }, { "epoch": 0.10764384797943934, "grad_norm": 0.4921875, "learning_rate": 0.0007465941440429431, "loss": 0.1825, "step": 60710 }, { "epoch": 0.10764739414474915, "grad_norm": 0.248046875, "learning_rate": 0.0007465394068193085, "loss": 0.1815, "step": 60712 }, { "epoch": 0.10765094031005897, "grad_norm": 1.578125, "learning_rate": 0.000746484671141505, "loss": 0.2272, "step": 60714 }, { "epoch": 0.10765448647536878, "grad_norm": 1.1953125, "learning_rate": 0.0007464299370097722, "loss": 0.1946, "step": 60716 }, { "epoch": 0.1076580326406786, "grad_norm": 3.09375, "learning_rate": 0.0007463752044243493, "loss": 0.2678, "step": 60718 }, { "epoch": 0.10766157880598841, "grad_norm": 1.6640625, "learning_rate": 0.0007463204733854759, "loss": 0.3335, "step": 60720 }, { "epoch": 0.10766512497129822, "grad_norm": 0.23046875, "learning_rate": 0.0007462657438933915, "loss": 0.1773, "step": 60722 }, { "epoch": 0.10766867113660804, "grad_norm": 1.0546875, "learning_rate": 0.0007462110159483351, "loss": 0.2576, "step": 60724 }, { "epoch": 0.10767221730191785, "grad_norm": 0.53515625, "learning_rate": 0.0007461562895505461, "loss": 0.1749, "step": 60726 }, { "epoch": 0.10767576346722767, "grad_norm": 2.234375, "learning_rate": 0.000746101564700264, "loss": 0.4067, "step": 60728 }, { "epoch": 0.10767930963253748, "grad_norm": 0.498046875, "learning_rate": 0.0007460468413977284, "loss": 0.2153, "step": 60730 }, { "epoch": 0.1076828557978473, "grad_norm": 0.87109375, "learning_rate": 0.0007459921196431781, "loss": 0.2004, "step": 60732 }, { "epoch": 0.10768640196315711, "grad_norm": 2.109375, "learning_rate": 0.0007459373994368525, "loss": 0.2336, "step": 60734 }, { "epoch": 0.10768994812846693, "grad_norm": 0.7890625, "learning_rate": 0.0007458826807789911, "loss": 0.4122, "step": 60736 }, { "epoch": 0.10769349429377674, "grad_norm": 1.34375, "learning_rate": 0.0007458279636698335, "loss": 0.3463, "step": 60738 }, { "epoch": 0.10769704045908655, "grad_norm": 0.46484375, "learning_rate": 0.0007457732481096185, "loss": 0.1758, "step": 60740 }, { "epoch": 0.10770058662439638, "grad_norm": 0.31640625, "learning_rate": 0.0007457185340985853, "loss": 0.2132, "step": 60742 }, { "epoch": 0.1077041327897062, "grad_norm": 0.376953125, "learning_rate": 0.0007456638216369733, "loss": 0.2158, "step": 60744 }, { "epoch": 0.10770767895501601, "grad_norm": 0.60546875, "learning_rate": 0.0007456091107250227, "loss": 0.2458, "step": 60746 }, { "epoch": 0.10771122512032583, "grad_norm": 0.283203125, "learning_rate": 0.0007455544013629713, "loss": 0.1626, "step": 60748 }, { "epoch": 0.10771477128563564, "grad_norm": 0.265625, "learning_rate": 0.0007454996935510594, "loss": 0.3113, "step": 60750 }, { "epoch": 0.10771831745094546, "grad_norm": 0.232421875, "learning_rate": 0.0007454449872895255, "loss": 0.1791, "step": 60752 }, { "epoch": 0.10772186361625527, "grad_norm": 0.345703125, "learning_rate": 0.0007453902825786097, "loss": 0.2013, "step": 60754 }, { "epoch": 0.10772540978156508, "grad_norm": 0.6328125, "learning_rate": 0.0007453355794185508, "loss": 0.1521, "step": 60756 }, { "epoch": 0.1077289559468749, "grad_norm": 0.37890625, "learning_rate": 0.0007452808778095879, "loss": 0.1344, "step": 60758 }, { "epoch": 0.10773250211218471, "grad_norm": 0.375, "learning_rate": 0.0007452261777519605, "loss": 0.2079, "step": 60760 }, { "epoch": 0.10773604827749453, "grad_norm": 0.248046875, "learning_rate": 0.0007451714792459075, "loss": 0.152, "step": 60762 }, { "epoch": 0.10773959444280434, "grad_norm": 1.1640625, "learning_rate": 0.0007451167822916687, "loss": 0.2802, "step": 60764 }, { "epoch": 0.10774314060811416, "grad_norm": 0.3515625, "learning_rate": 0.0007450620868894826, "loss": 0.1838, "step": 60766 }, { "epoch": 0.10774668677342397, "grad_norm": 1.6875, "learning_rate": 0.000745007393039589, "loss": 0.2333, "step": 60768 }, { "epoch": 0.10775023293873379, "grad_norm": 0.265625, "learning_rate": 0.0007449527007422265, "loss": 0.2021, "step": 60770 }, { "epoch": 0.1077537791040436, "grad_norm": 0.330078125, "learning_rate": 0.0007448980099976354, "loss": 0.2656, "step": 60772 }, { "epoch": 0.10775732526935342, "grad_norm": 0.1884765625, "learning_rate": 0.0007448433208060536, "loss": 0.1829, "step": 60774 }, { "epoch": 0.10776087143466323, "grad_norm": 0.3125, "learning_rate": 0.0007447886331677208, "loss": 0.1496, "step": 60776 }, { "epoch": 0.10776441759997304, "grad_norm": 0.64453125, "learning_rate": 0.000744733947082876, "loss": 0.2108, "step": 60778 }, { "epoch": 0.10776796376528286, "grad_norm": 0.77734375, "learning_rate": 0.0007446792625517591, "loss": 0.1383, "step": 60780 }, { "epoch": 0.10777150993059267, "grad_norm": 1.1484375, "learning_rate": 0.0007446245795746083, "loss": 0.1384, "step": 60782 }, { "epoch": 0.10777505609590249, "grad_norm": 0.248046875, "learning_rate": 0.0007445698981516633, "loss": 0.1997, "step": 60784 }, { "epoch": 0.1077786022612123, "grad_norm": 0.5, "learning_rate": 0.0007445152182831632, "loss": 0.1328, "step": 60786 }, { "epoch": 0.10778214842652213, "grad_norm": 0.64453125, "learning_rate": 0.000744460539969347, "loss": 0.2623, "step": 60788 }, { "epoch": 0.10778569459183195, "grad_norm": 0.63671875, "learning_rate": 0.000744405863210454, "loss": 0.2075, "step": 60790 }, { "epoch": 0.10778924075714176, "grad_norm": 0.6875, "learning_rate": 0.0007443511880067229, "loss": 0.1748, "step": 60792 }, { "epoch": 0.10779278692245158, "grad_norm": 0.3515625, "learning_rate": 0.0007442965143583931, "loss": 0.1558, "step": 60794 }, { "epoch": 0.10779633308776139, "grad_norm": 0.66796875, "learning_rate": 0.000744241842265704, "loss": 0.2202, "step": 60796 }, { "epoch": 0.1077998792530712, "grad_norm": 0.37890625, "learning_rate": 0.0007441871717288943, "loss": 0.2083, "step": 60798 }, { "epoch": 0.10780342541838102, "grad_norm": 0.48828125, "learning_rate": 0.0007441325027482031, "loss": 0.1827, "step": 60800 }, { "epoch": 0.10780697158369083, "grad_norm": 0.65625, "learning_rate": 0.0007440778353238698, "loss": 0.1929, "step": 60802 }, { "epoch": 0.10781051774900065, "grad_norm": 0.9375, "learning_rate": 0.0007440231694561335, "loss": 0.1805, "step": 60804 }, { "epoch": 0.10781406391431046, "grad_norm": 0.3984375, "learning_rate": 0.0007439685051452325, "loss": 0.1863, "step": 60806 }, { "epoch": 0.10781761007962028, "grad_norm": 0.671875, "learning_rate": 0.0007439138423914069, "loss": 0.1798, "step": 60808 }, { "epoch": 0.10782115624493009, "grad_norm": 0.359375, "learning_rate": 0.0007438591811948952, "loss": 0.1375, "step": 60810 }, { "epoch": 0.1078247024102399, "grad_norm": 0.1943359375, "learning_rate": 0.0007438045215559368, "loss": 0.1562, "step": 60812 }, { "epoch": 0.10782824857554972, "grad_norm": 1.015625, "learning_rate": 0.0007437498634747701, "loss": 0.305, "step": 60814 }, { "epoch": 0.10783179474085954, "grad_norm": 0.349609375, "learning_rate": 0.0007436952069516347, "loss": 0.1651, "step": 60816 }, { "epoch": 0.10783534090616935, "grad_norm": 0.357421875, "learning_rate": 0.0007436405519867695, "loss": 0.1994, "step": 60818 }, { "epoch": 0.10783888707147916, "grad_norm": 0.4375, "learning_rate": 0.0007435858985804138, "loss": 0.1994, "step": 60820 }, { "epoch": 0.10784243323678898, "grad_norm": 0.314453125, "learning_rate": 0.0007435312467328061, "loss": 0.1784, "step": 60822 }, { "epoch": 0.1078459794020988, "grad_norm": 1.0625, "learning_rate": 0.0007434765964441859, "loss": 0.2589, "step": 60824 }, { "epoch": 0.10784952556740861, "grad_norm": 1.2734375, "learning_rate": 0.0007434219477147917, "loss": 0.2773, "step": 60826 }, { "epoch": 0.10785307173271842, "grad_norm": 0.625, "learning_rate": 0.0007433673005448629, "loss": 0.2091, "step": 60828 }, { "epoch": 0.10785661789802824, "grad_norm": 0.265625, "learning_rate": 0.0007433126549346383, "loss": 0.1796, "step": 60830 }, { "epoch": 0.10786016406333807, "grad_norm": 0.314453125, "learning_rate": 0.000743258010884357, "loss": 0.1859, "step": 60832 }, { "epoch": 0.10786371022864788, "grad_norm": 1.3515625, "learning_rate": 0.0007432033683942576, "loss": 0.2546, "step": 60834 }, { "epoch": 0.1078672563939577, "grad_norm": 0.330078125, "learning_rate": 0.0007431487274645798, "loss": 0.2297, "step": 60836 }, { "epoch": 0.10787080255926751, "grad_norm": 0.92578125, "learning_rate": 0.0007430940880955623, "loss": 0.1652, "step": 60838 }, { "epoch": 0.10787434872457732, "grad_norm": 0.2890625, "learning_rate": 0.0007430394502874435, "loss": 0.1315, "step": 60840 }, { "epoch": 0.10787789488988714, "grad_norm": 0.296875, "learning_rate": 0.0007429848140404633, "loss": 0.1947, "step": 60842 }, { "epoch": 0.10788144105519695, "grad_norm": 0.453125, "learning_rate": 0.0007429301793548598, "loss": 0.1576, "step": 60844 }, { "epoch": 0.10788498722050677, "grad_norm": 0.380859375, "learning_rate": 0.0007428755462308725, "loss": 0.2136, "step": 60846 }, { "epoch": 0.10788853338581658, "grad_norm": 0.353515625, "learning_rate": 0.0007428209146687399, "loss": 0.1275, "step": 60848 }, { "epoch": 0.1078920795511264, "grad_norm": 0.7265625, "learning_rate": 0.0007427662846687013, "loss": 0.201, "step": 60850 }, { "epoch": 0.10789562571643621, "grad_norm": 0.89453125, "learning_rate": 0.0007427116562309953, "loss": 0.2231, "step": 60852 }, { "epoch": 0.10789917188174603, "grad_norm": 0.2275390625, "learning_rate": 0.0007426570293558612, "loss": 0.1733, "step": 60854 }, { "epoch": 0.10790271804705584, "grad_norm": 0.34375, "learning_rate": 0.0007426024040435375, "loss": 0.1674, "step": 60856 }, { "epoch": 0.10790626421236565, "grad_norm": 0.79296875, "learning_rate": 0.0007425477802942633, "loss": 0.2054, "step": 60858 }, { "epoch": 0.10790981037767547, "grad_norm": 0.453125, "learning_rate": 0.0007424931581082777, "loss": 0.1719, "step": 60860 }, { "epoch": 0.10791335654298528, "grad_norm": 1.2109375, "learning_rate": 0.0007424385374858191, "loss": 0.2782, "step": 60862 }, { "epoch": 0.1079169027082951, "grad_norm": 0.353515625, "learning_rate": 0.0007423839184271268, "loss": 0.2516, "step": 60864 }, { "epoch": 0.10792044887360491, "grad_norm": 1.71875, "learning_rate": 0.0007423293009324395, "loss": 0.2002, "step": 60866 }, { "epoch": 0.10792399503891473, "grad_norm": 0.40625, "learning_rate": 0.0007422746850019959, "loss": 0.2014, "step": 60868 }, { "epoch": 0.10792754120422454, "grad_norm": 0.3203125, "learning_rate": 0.0007422200706360354, "loss": 0.2022, "step": 60870 }, { "epoch": 0.10793108736953436, "grad_norm": 0.318359375, "learning_rate": 0.0007421654578347963, "loss": 0.1821, "step": 60872 }, { "epoch": 0.10793463353484417, "grad_norm": 0.58203125, "learning_rate": 0.0007421108465985173, "loss": 0.1532, "step": 60874 }, { "epoch": 0.10793817970015399, "grad_norm": 0.515625, "learning_rate": 0.0007420562369274377, "loss": 0.1621, "step": 60876 }, { "epoch": 0.10794172586546381, "grad_norm": 0.201171875, "learning_rate": 0.0007420016288217968, "loss": 0.1996, "step": 60878 }, { "epoch": 0.10794527203077363, "grad_norm": 0.341796875, "learning_rate": 0.000741947022281832, "loss": 0.1368, "step": 60880 }, { "epoch": 0.10794881819608344, "grad_norm": 0.55078125, "learning_rate": 0.0007418924173077835, "loss": 0.207, "step": 60882 }, { "epoch": 0.10795236436139326, "grad_norm": 0.44140625, "learning_rate": 0.0007418378138998894, "loss": 0.1758, "step": 60884 }, { "epoch": 0.10795591052670307, "grad_norm": 0.27734375, "learning_rate": 0.000741783212058389, "loss": 0.1981, "step": 60886 }, { "epoch": 0.10795945669201289, "grad_norm": 0.29296875, "learning_rate": 0.00074172861178352, "loss": 0.1594, "step": 60888 }, { "epoch": 0.1079630028573227, "grad_norm": 1.6640625, "learning_rate": 0.0007416740130755224, "loss": 0.3521, "step": 60890 }, { "epoch": 0.10796654902263252, "grad_norm": 0.2080078125, "learning_rate": 0.0007416194159346343, "loss": 0.1855, "step": 60892 }, { "epoch": 0.10797009518794233, "grad_norm": 0.244140625, "learning_rate": 0.0007415648203610949, "loss": 0.1557, "step": 60894 }, { "epoch": 0.10797364135325214, "grad_norm": 1.015625, "learning_rate": 0.0007415102263551428, "loss": 0.184, "step": 60896 }, { "epoch": 0.10797718751856196, "grad_norm": 0.412109375, "learning_rate": 0.0007414556339170167, "loss": 0.1455, "step": 60898 }, { "epoch": 0.10798073368387177, "grad_norm": 1.6875, "learning_rate": 0.0007414010430469554, "loss": 0.3047, "step": 60900 }, { "epoch": 0.10798427984918159, "grad_norm": 0.65625, "learning_rate": 0.0007413464537451975, "loss": 0.1483, "step": 60902 }, { "epoch": 0.1079878260144914, "grad_norm": 3.609375, "learning_rate": 0.0007412918660119821, "loss": 0.2708, "step": 60904 }, { "epoch": 0.10799137217980122, "grad_norm": 0.48828125, "learning_rate": 0.0007412372798475475, "loss": 0.1762, "step": 60906 }, { "epoch": 0.10799491834511103, "grad_norm": 0.1533203125, "learning_rate": 0.0007411826952521325, "loss": 0.1554, "step": 60908 }, { "epoch": 0.10799846451042085, "grad_norm": 0.9140625, "learning_rate": 0.0007411281122259765, "loss": 0.1501, "step": 60910 }, { "epoch": 0.10800201067573066, "grad_norm": 0.5234375, "learning_rate": 0.0007410735307693172, "loss": 0.198, "step": 60912 }, { "epoch": 0.10800555684104048, "grad_norm": 0.390625, "learning_rate": 0.0007410189508823942, "loss": 0.1385, "step": 60914 }, { "epoch": 0.10800910300635029, "grad_norm": 0.357421875, "learning_rate": 0.0007409643725654454, "loss": 0.199, "step": 60916 }, { "epoch": 0.1080126491716601, "grad_norm": 0.3046875, "learning_rate": 0.0007409097958187101, "loss": 0.1916, "step": 60918 }, { "epoch": 0.10801619533696992, "grad_norm": 0.328125, "learning_rate": 0.0007408552206424269, "loss": 0.1785, "step": 60920 }, { "epoch": 0.10801974150227973, "grad_norm": 0.189453125, "learning_rate": 0.0007408006470368341, "loss": 0.1719, "step": 60922 }, { "epoch": 0.10802328766758956, "grad_norm": 0.435546875, "learning_rate": 0.0007407460750021704, "loss": 0.1865, "step": 60924 }, { "epoch": 0.10802683383289938, "grad_norm": 0.2265625, "learning_rate": 0.0007406915045386749, "loss": 0.2031, "step": 60926 }, { "epoch": 0.10803037999820919, "grad_norm": 0.515625, "learning_rate": 0.0007406369356465863, "loss": 0.2034, "step": 60928 }, { "epoch": 0.108033926163519, "grad_norm": 0.240234375, "learning_rate": 0.0007405823683261427, "loss": 0.1346, "step": 60930 }, { "epoch": 0.10803747232882882, "grad_norm": 0.40625, "learning_rate": 0.0007405278025775832, "loss": 0.2293, "step": 60932 }, { "epoch": 0.10804101849413864, "grad_norm": 0.671875, "learning_rate": 0.0007404732384011463, "loss": 0.1813, "step": 60934 }, { "epoch": 0.10804456465944845, "grad_norm": 0.46875, "learning_rate": 0.0007404186757970706, "loss": 0.1691, "step": 60936 }, { "epoch": 0.10804811082475826, "grad_norm": 0.4921875, "learning_rate": 0.0007403641147655949, "loss": 0.1819, "step": 60938 }, { "epoch": 0.10805165699006808, "grad_norm": 0.302734375, "learning_rate": 0.0007403095553069572, "loss": 0.1186, "step": 60940 }, { "epoch": 0.1080552031553779, "grad_norm": 0.49609375, "learning_rate": 0.0007402549974213965, "loss": 0.1822, "step": 60942 }, { "epoch": 0.10805874932068771, "grad_norm": 0.330078125, "learning_rate": 0.0007402004411091523, "loss": 0.3953, "step": 60944 }, { "epoch": 0.10806229548599752, "grad_norm": 0.8203125, "learning_rate": 0.0007401458863704617, "loss": 0.1975, "step": 60946 }, { "epoch": 0.10806584165130734, "grad_norm": 0.4140625, "learning_rate": 0.0007400913332055641, "loss": 0.2254, "step": 60948 }, { "epoch": 0.10806938781661715, "grad_norm": 0.314453125, "learning_rate": 0.000740036781614698, "loss": 0.1586, "step": 60950 }, { "epoch": 0.10807293398192697, "grad_norm": 0.275390625, "learning_rate": 0.0007399822315981022, "loss": 0.1526, "step": 60952 }, { "epoch": 0.10807648014723678, "grad_norm": 0.298828125, "learning_rate": 0.0007399276831560144, "loss": 0.1259, "step": 60954 }, { "epoch": 0.1080800263125466, "grad_norm": 0.53125, "learning_rate": 0.0007398731362886742, "loss": 0.1714, "step": 60956 }, { "epoch": 0.10808357247785641, "grad_norm": 0.2177734375, "learning_rate": 0.0007398185909963195, "loss": 0.1541, "step": 60958 }, { "epoch": 0.10808711864316622, "grad_norm": 1.109375, "learning_rate": 0.0007397640472791895, "loss": 0.1773, "step": 60960 }, { "epoch": 0.10809066480847604, "grad_norm": 0.703125, "learning_rate": 0.0007397095051375217, "loss": 0.1316, "step": 60962 }, { "epoch": 0.10809421097378585, "grad_norm": 1.921875, "learning_rate": 0.0007396549645715556, "loss": 0.2609, "step": 60964 }, { "epoch": 0.10809775713909567, "grad_norm": 0.18359375, "learning_rate": 0.0007396004255815292, "loss": 0.1563, "step": 60966 }, { "epoch": 0.1081013033044055, "grad_norm": 0.484375, "learning_rate": 0.0007395458881676815, "loss": 0.183, "step": 60968 }, { "epoch": 0.10810484946971531, "grad_norm": 0.54296875, "learning_rate": 0.0007394913523302505, "loss": 0.2872, "step": 60970 }, { "epoch": 0.10810839563502513, "grad_norm": 7.6875, "learning_rate": 0.0007394368180694746, "loss": 0.5279, "step": 60972 }, { "epoch": 0.10811194180033494, "grad_norm": 1.1796875, "learning_rate": 0.0007393822853855929, "loss": 0.167, "step": 60974 }, { "epoch": 0.10811548796564475, "grad_norm": 0.8359375, "learning_rate": 0.0007393277542788437, "loss": 0.1782, "step": 60976 }, { "epoch": 0.10811903413095457, "grad_norm": 0.81640625, "learning_rate": 0.0007392732247494656, "loss": 0.2367, "step": 60978 }, { "epoch": 0.10812258029626438, "grad_norm": 0.234375, "learning_rate": 0.0007392186967976964, "loss": 0.3233, "step": 60980 }, { "epoch": 0.1081261264615742, "grad_norm": 0.341796875, "learning_rate": 0.0007391641704237751, "loss": 0.1595, "step": 60982 }, { "epoch": 0.10812967262688401, "grad_norm": 0.82421875, "learning_rate": 0.0007391096456279407, "loss": 0.1541, "step": 60984 }, { "epoch": 0.10813321879219383, "grad_norm": 0.197265625, "learning_rate": 0.0007390551224104305, "loss": 0.3158, "step": 60986 }, { "epoch": 0.10813676495750364, "grad_norm": 0.51171875, "learning_rate": 0.0007390006007714835, "loss": 0.1726, "step": 60988 }, { "epoch": 0.10814031112281346, "grad_norm": 0.193359375, "learning_rate": 0.0007389460807113384, "loss": 0.3318, "step": 60990 }, { "epoch": 0.10814385728812327, "grad_norm": 0.1982421875, "learning_rate": 0.0007388915622302337, "loss": 0.1716, "step": 60992 }, { "epoch": 0.10814740345343309, "grad_norm": 0.380859375, "learning_rate": 0.000738837045328407, "loss": 0.1358, "step": 60994 }, { "epoch": 0.1081509496187429, "grad_norm": 0.34375, "learning_rate": 0.0007387825300060975, "loss": 0.2155, "step": 60996 }, { "epoch": 0.10815449578405271, "grad_norm": 0.146484375, "learning_rate": 0.0007387280162635435, "loss": 0.1292, "step": 60998 }, { "epoch": 0.10815804194936253, "grad_norm": 0.287109375, "learning_rate": 0.0007386735041009833, "loss": 0.1464, "step": 61000 }, { "epoch": 0.10816158811467234, "grad_norm": 0.3671875, "learning_rate": 0.0007386189935186552, "loss": 0.1897, "step": 61002 }, { "epoch": 0.10816513427998216, "grad_norm": 0.458984375, "learning_rate": 0.0007385644845167976, "loss": 0.1499, "step": 61004 }, { "epoch": 0.10816868044529197, "grad_norm": 0.6640625, "learning_rate": 0.0007385099770956491, "loss": 0.1227, "step": 61006 }, { "epoch": 0.10817222661060179, "grad_norm": 0.5546875, "learning_rate": 0.0007384554712554481, "loss": 0.1836, "step": 61008 }, { "epoch": 0.1081757727759116, "grad_norm": 0.365234375, "learning_rate": 0.0007384009669964326, "loss": 0.1349, "step": 61010 }, { "epoch": 0.10817931894122142, "grad_norm": 0.640625, "learning_rate": 0.0007383464643188416, "loss": 0.1668, "step": 61012 }, { "epoch": 0.10818286510653125, "grad_norm": 1.0546875, "learning_rate": 0.0007382919632229129, "loss": 0.2388, "step": 61014 }, { "epoch": 0.10818641127184106, "grad_norm": 0.474609375, "learning_rate": 0.0007382374637088849, "loss": 0.1641, "step": 61016 }, { "epoch": 0.10818995743715087, "grad_norm": 0.69140625, "learning_rate": 0.0007381829657769966, "loss": 0.3516, "step": 61018 }, { "epoch": 0.10819350360246069, "grad_norm": 0.328125, "learning_rate": 0.0007381284694274853, "loss": 0.145, "step": 61020 }, { "epoch": 0.1081970497677705, "grad_norm": 0.859375, "learning_rate": 0.00073807397466059, "loss": 0.4604, "step": 61022 }, { "epoch": 0.10820059593308032, "grad_norm": 0.259765625, "learning_rate": 0.0007380194814765489, "loss": 0.1896, "step": 61024 }, { "epoch": 0.10820414209839013, "grad_norm": 1.3125, "learning_rate": 0.0007379649898756005, "loss": 0.185, "step": 61026 }, { "epoch": 0.10820768826369995, "grad_norm": 0.2451171875, "learning_rate": 0.0007379104998579826, "loss": 0.1634, "step": 61028 }, { "epoch": 0.10821123442900976, "grad_norm": 1.0, "learning_rate": 0.0007378560114239343, "loss": 0.1615, "step": 61030 }, { "epoch": 0.10821478059431958, "grad_norm": 0.30078125, "learning_rate": 0.0007378015245736931, "loss": 0.1627, "step": 61032 }, { "epoch": 0.10821832675962939, "grad_norm": 0.67578125, "learning_rate": 0.0007377470393074981, "loss": 0.2993, "step": 61034 }, { "epoch": 0.1082218729249392, "grad_norm": 0.455078125, "learning_rate": 0.0007376925556255864, "loss": 0.192, "step": 61036 }, { "epoch": 0.10822541909024902, "grad_norm": 0.46875, "learning_rate": 0.0007376380735281973, "loss": 0.2221, "step": 61038 }, { "epoch": 0.10822896525555883, "grad_norm": 0.451171875, "learning_rate": 0.000737583593015569, "loss": 0.2259, "step": 61040 }, { "epoch": 0.10823251142086865, "grad_norm": 0.341796875, "learning_rate": 0.0007375291140879396, "loss": 0.1919, "step": 61042 }, { "epoch": 0.10823605758617846, "grad_norm": 0.359375, "learning_rate": 0.0007374746367455471, "loss": 0.2354, "step": 61044 }, { "epoch": 0.10823960375148828, "grad_norm": 0.3359375, "learning_rate": 0.0007374201609886298, "loss": 0.1831, "step": 61046 }, { "epoch": 0.10824314991679809, "grad_norm": 0.5703125, "learning_rate": 0.0007373656868174263, "loss": 0.2037, "step": 61048 }, { "epoch": 0.10824669608210791, "grad_norm": 1.859375, "learning_rate": 0.0007373112142321747, "loss": 0.2811, "step": 61050 }, { "epoch": 0.10825024224741772, "grad_norm": 0.26953125, "learning_rate": 0.0007372567432331132, "loss": 0.1414, "step": 61052 }, { "epoch": 0.10825378841272754, "grad_norm": 0.55859375, "learning_rate": 0.0007372022738204798, "loss": 0.1646, "step": 61054 }, { "epoch": 0.10825733457803735, "grad_norm": 0.63671875, "learning_rate": 0.000737147805994513, "loss": 0.1646, "step": 61056 }, { "epoch": 0.10826088074334717, "grad_norm": 0.41796875, "learning_rate": 0.0007370933397554514, "loss": 0.142, "step": 61058 }, { "epoch": 0.108264426908657, "grad_norm": 0.59375, "learning_rate": 0.0007370388751035319, "loss": 0.1635, "step": 61060 }, { "epoch": 0.10826797307396681, "grad_norm": 0.2041015625, "learning_rate": 0.000736984412038994, "loss": 0.2059, "step": 61062 }, { "epoch": 0.10827151923927662, "grad_norm": 1.03125, "learning_rate": 0.000736929950562075, "loss": 0.205, "step": 61064 }, { "epoch": 0.10827506540458644, "grad_norm": 1.5546875, "learning_rate": 0.0007368754906730142, "loss": 0.2077, "step": 61066 }, { "epoch": 0.10827861156989625, "grad_norm": 1.4921875, "learning_rate": 0.0007368210323720485, "loss": 0.2001, "step": 61068 }, { "epoch": 0.10828215773520607, "grad_norm": 1.2265625, "learning_rate": 0.0007367665756594169, "loss": 0.1675, "step": 61070 }, { "epoch": 0.10828570390051588, "grad_norm": 0.2119140625, "learning_rate": 0.0007367121205353573, "loss": 0.2152, "step": 61072 }, { "epoch": 0.1082892500658257, "grad_norm": 0.48046875, "learning_rate": 0.000736657667000108, "loss": 0.2143, "step": 61074 }, { "epoch": 0.10829279623113551, "grad_norm": 0.36328125, "learning_rate": 0.0007366032150539068, "loss": 0.1781, "step": 61076 }, { "epoch": 0.10829634239644532, "grad_norm": 0.76953125, "learning_rate": 0.0007365487646969918, "loss": 0.1422, "step": 61078 }, { "epoch": 0.10829988856175514, "grad_norm": 0.4140625, "learning_rate": 0.0007364943159296016, "loss": 0.1266, "step": 61080 }, { "epoch": 0.10830343472706495, "grad_norm": 0.75, "learning_rate": 0.0007364398687519744, "loss": 0.1292, "step": 61082 }, { "epoch": 0.10830698089237477, "grad_norm": 0.494140625, "learning_rate": 0.0007363854231643478, "loss": 0.1653, "step": 61084 }, { "epoch": 0.10831052705768458, "grad_norm": 0.39453125, "learning_rate": 0.0007363309791669599, "loss": 0.1748, "step": 61086 }, { "epoch": 0.1083140732229944, "grad_norm": 0.37109375, "learning_rate": 0.0007362765367600495, "loss": 0.2194, "step": 61088 }, { "epoch": 0.10831761938830421, "grad_norm": 0.32421875, "learning_rate": 0.0007362220959438539, "loss": 0.1874, "step": 61090 }, { "epoch": 0.10832116555361403, "grad_norm": 0.27734375, "learning_rate": 0.0007361676567186121, "loss": 0.1428, "step": 61092 }, { "epoch": 0.10832471171892384, "grad_norm": 0.1943359375, "learning_rate": 0.000736113219084561, "loss": 0.2343, "step": 61094 }, { "epoch": 0.10832825788423366, "grad_norm": 0.279296875, "learning_rate": 0.0007360587830419394, "loss": 0.1828, "step": 61096 }, { "epoch": 0.10833180404954347, "grad_norm": 0.3125, "learning_rate": 0.0007360043485909854, "loss": 0.1305, "step": 61098 }, { "epoch": 0.10833535021485328, "grad_norm": 0.341796875, "learning_rate": 0.0007359499157319372, "loss": 0.1767, "step": 61100 }, { "epoch": 0.1083388963801631, "grad_norm": 0.380859375, "learning_rate": 0.0007358954844650323, "loss": 0.1823, "step": 61102 }, { "epoch": 0.10834244254547293, "grad_norm": 0.74609375, "learning_rate": 0.0007358410547905093, "loss": 0.1615, "step": 61104 }, { "epoch": 0.10834598871078274, "grad_norm": 0.392578125, "learning_rate": 0.0007357866267086058, "loss": 0.2977, "step": 61106 }, { "epoch": 0.10834953487609256, "grad_norm": 0.359375, "learning_rate": 0.0007357322002195601, "loss": 0.1312, "step": 61108 }, { "epoch": 0.10835308104140237, "grad_norm": 1.6796875, "learning_rate": 0.0007356777753236101, "loss": 0.3896, "step": 61110 }, { "epoch": 0.10835662720671219, "grad_norm": 0.4375, "learning_rate": 0.0007356233520209939, "loss": 0.1362, "step": 61112 }, { "epoch": 0.108360173372022, "grad_norm": 0.337890625, "learning_rate": 0.0007355689303119493, "loss": 0.1762, "step": 61114 }, { "epoch": 0.10836371953733182, "grad_norm": 1.1171875, "learning_rate": 0.0007355145101967149, "loss": 0.171, "step": 61116 }, { "epoch": 0.10836726570264163, "grad_norm": 0.6328125, "learning_rate": 0.0007354600916755281, "loss": 0.2705, "step": 61118 }, { "epoch": 0.10837081186795144, "grad_norm": 1.0390625, "learning_rate": 0.000735405674748627, "loss": 0.233, "step": 61120 }, { "epoch": 0.10837435803326126, "grad_norm": 0.2333984375, "learning_rate": 0.00073535125941625, "loss": 0.1797, "step": 61122 }, { "epoch": 0.10837790419857107, "grad_norm": 0.453125, "learning_rate": 0.0007352968456786346, "loss": 0.1786, "step": 61124 }, { "epoch": 0.10838145036388089, "grad_norm": 0.34765625, "learning_rate": 0.000735242433536019, "loss": 0.1705, "step": 61126 }, { "epoch": 0.1083849965291907, "grad_norm": 0.3984375, "learning_rate": 0.0007351880229886412, "loss": 0.166, "step": 61128 }, { "epoch": 0.10838854269450052, "grad_norm": 1.75, "learning_rate": 0.0007351336140367389, "loss": 0.2229, "step": 61130 }, { "epoch": 0.10839208885981033, "grad_norm": 0.2451171875, "learning_rate": 0.0007350792066805505, "loss": 0.1785, "step": 61132 }, { "epoch": 0.10839563502512015, "grad_norm": 0.318359375, "learning_rate": 0.0007350248009203133, "loss": 0.131, "step": 61134 }, { "epoch": 0.10839918119042996, "grad_norm": 1.734375, "learning_rate": 0.0007349703967562658, "loss": 0.1642, "step": 61136 }, { "epoch": 0.10840272735573978, "grad_norm": 0.5, "learning_rate": 0.0007349159941886456, "loss": 0.2721, "step": 61138 }, { "epoch": 0.10840627352104959, "grad_norm": 0.2275390625, "learning_rate": 0.0007348615932176912, "loss": 0.1811, "step": 61140 }, { "epoch": 0.1084098196863594, "grad_norm": 0.73046875, "learning_rate": 0.0007348071938436398, "loss": 0.2061, "step": 61142 }, { "epoch": 0.10841336585166922, "grad_norm": 0.318359375, "learning_rate": 0.0007347527960667298, "loss": 0.1381, "step": 61144 }, { "epoch": 0.10841691201697903, "grad_norm": 0.2099609375, "learning_rate": 0.0007346983998871985, "loss": 0.2179, "step": 61146 }, { "epoch": 0.10842045818228885, "grad_norm": 0.2177734375, "learning_rate": 0.0007346440053052845, "loss": 0.1459, "step": 61148 }, { "epoch": 0.10842400434759868, "grad_norm": 0.4375, "learning_rate": 0.0007345896123212254, "loss": 0.2061, "step": 61150 }, { "epoch": 0.10842755051290849, "grad_norm": 0.5859375, "learning_rate": 0.0007345352209352589, "loss": 0.1913, "step": 61152 }, { "epoch": 0.1084310966782183, "grad_norm": 0.34765625, "learning_rate": 0.000734480831147623, "loss": 0.1846, "step": 61154 }, { "epoch": 0.10843464284352812, "grad_norm": 0.53125, "learning_rate": 0.0007344264429585563, "loss": 0.1481, "step": 61156 }, { "epoch": 0.10843818900883793, "grad_norm": 0.62890625, "learning_rate": 0.0007343720563682952, "loss": 0.2577, "step": 61158 }, { "epoch": 0.10844173517414775, "grad_norm": 0.74609375, "learning_rate": 0.0007343176713770785, "loss": 0.2773, "step": 61160 }, { "epoch": 0.10844528133945756, "grad_norm": 1.609375, "learning_rate": 0.0007342632879851441, "loss": 0.2481, "step": 61162 }, { "epoch": 0.10844882750476738, "grad_norm": 0.376953125, "learning_rate": 0.0007342089061927298, "loss": 0.1674, "step": 61164 }, { "epoch": 0.10845237367007719, "grad_norm": 0.44921875, "learning_rate": 0.000734154526000073, "loss": 0.1866, "step": 61166 }, { "epoch": 0.10845591983538701, "grad_norm": 0.275390625, "learning_rate": 0.0007341001474074118, "loss": 0.2178, "step": 61168 }, { "epoch": 0.10845946600069682, "grad_norm": 0.6640625, "learning_rate": 0.0007340457704149839, "loss": 0.1338, "step": 61170 }, { "epoch": 0.10846301216600664, "grad_norm": 2.078125, "learning_rate": 0.0007339913950230272, "loss": 0.1453, "step": 61172 }, { "epoch": 0.10846655833131645, "grad_norm": 0.353515625, "learning_rate": 0.0007339370212317799, "loss": 0.1982, "step": 61174 }, { "epoch": 0.10847010449662627, "grad_norm": 0.48046875, "learning_rate": 0.0007338826490414791, "loss": 0.1777, "step": 61176 }, { "epoch": 0.10847365066193608, "grad_norm": 0.419921875, "learning_rate": 0.0007338282784523633, "loss": 0.2262, "step": 61178 }, { "epoch": 0.1084771968272459, "grad_norm": 0.306640625, "learning_rate": 0.0007337739094646693, "loss": 0.1642, "step": 61180 }, { "epoch": 0.10848074299255571, "grad_norm": 0.400390625, "learning_rate": 0.0007337195420786362, "loss": 0.2032, "step": 61182 }, { "epoch": 0.10848428915786552, "grad_norm": 0.64453125, "learning_rate": 0.0007336651762945008, "loss": 0.1954, "step": 61184 }, { "epoch": 0.10848783532317534, "grad_norm": 0.5390625, "learning_rate": 0.0007336108121125009, "loss": 0.2192, "step": 61186 }, { "epoch": 0.10849138148848515, "grad_norm": 0.69140625, "learning_rate": 0.0007335564495328746, "loss": 0.1667, "step": 61188 }, { "epoch": 0.10849492765379497, "grad_norm": 1.0, "learning_rate": 0.0007335020885558598, "loss": 0.2298, "step": 61190 }, { "epoch": 0.10849847381910478, "grad_norm": 0.26953125, "learning_rate": 0.0007334477291816937, "loss": 0.1755, "step": 61192 }, { "epoch": 0.1085020199844146, "grad_norm": 0.390625, "learning_rate": 0.0007333933714106142, "loss": 0.2008, "step": 61194 }, { "epoch": 0.10850556614972443, "grad_norm": 0.35546875, "learning_rate": 0.0007333390152428594, "loss": 0.163, "step": 61196 }, { "epoch": 0.10850911231503424, "grad_norm": 0.498046875, "learning_rate": 0.0007332846606786667, "loss": 0.1693, "step": 61198 }, { "epoch": 0.10851265848034405, "grad_norm": 0.341796875, "learning_rate": 0.000733230307718274, "loss": 0.1866, "step": 61200 }, { "epoch": 0.10851620464565387, "grad_norm": 0.9453125, "learning_rate": 0.0007331759563619187, "loss": 0.2757, "step": 61202 }, { "epoch": 0.10851975081096368, "grad_norm": 0.482421875, "learning_rate": 0.0007331216066098385, "loss": 0.1617, "step": 61204 }, { "epoch": 0.1085232969762735, "grad_norm": 0.26171875, "learning_rate": 0.000733067258462272, "loss": 0.1424, "step": 61206 }, { "epoch": 0.10852684314158331, "grad_norm": 1.03125, "learning_rate": 0.0007330129119194553, "loss": 0.1844, "step": 61208 }, { "epoch": 0.10853038930689313, "grad_norm": 0.328125, "learning_rate": 0.0007329585669816273, "loss": 0.2866, "step": 61210 }, { "epoch": 0.10853393547220294, "grad_norm": 0.232421875, "learning_rate": 0.0007329042236490253, "loss": 0.1642, "step": 61212 }, { "epoch": 0.10853748163751276, "grad_norm": 0.376953125, "learning_rate": 0.0007328498819218871, "loss": 0.2163, "step": 61214 }, { "epoch": 0.10854102780282257, "grad_norm": 0.53125, "learning_rate": 0.0007327955418004504, "loss": 0.1542, "step": 61216 }, { "epoch": 0.10854457396813239, "grad_norm": 0.482421875, "learning_rate": 0.0007327412032849523, "loss": 0.195, "step": 61218 }, { "epoch": 0.1085481201334422, "grad_norm": 0.2734375, "learning_rate": 0.0007326868663756311, "loss": 0.2398, "step": 61220 }, { "epoch": 0.10855166629875201, "grad_norm": 0.412109375, "learning_rate": 0.0007326325310727244, "loss": 0.1921, "step": 61222 }, { "epoch": 0.10855521246406183, "grad_norm": 0.212890625, "learning_rate": 0.0007325781973764691, "loss": 0.1545, "step": 61224 }, { "epoch": 0.10855875862937164, "grad_norm": 1.7421875, "learning_rate": 0.0007325238652871036, "loss": 0.2612, "step": 61226 }, { "epoch": 0.10856230479468146, "grad_norm": 0.37890625, "learning_rate": 0.0007324695348048651, "loss": 0.1777, "step": 61228 }, { "epoch": 0.10856585095999127, "grad_norm": 6.84375, "learning_rate": 0.0007324152059299917, "loss": 0.2629, "step": 61230 }, { "epoch": 0.10856939712530109, "grad_norm": 0.408203125, "learning_rate": 0.0007323608786627207, "loss": 0.2405, "step": 61232 }, { "epoch": 0.1085729432906109, "grad_norm": 0.3671875, "learning_rate": 0.0007323065530032893, "loss": 0.2373, "step": 61234 }, { "epoch": 0.10857648945592072, "grad_norm": 0.306640625, "learning_rate": 0.0007322522289519355, "loss": 0.1984, "step": 61236 }, { "epoch": 0.10858003562123053, "grad_norm": 0.5546875, "learning_rate": 0.0007321979065088971, "loss": 0.1279, "step": 61238 }, { "epoch": 0.10858358178654036, "grad_norm": 1.2109375, "learning_rate": 0.0007321435856744114, "loss": 0.3258, "step": 61240 }, { "epoch": 0.10858712795185017, "grad_norm": 0.2431640625, "learning_rate": 0.0007320892664487157, "loss": 0.155, "step": 61242 }, { "epoch": 0.10859067411715999, "grad_norm": 0.318359375, "learning_rate": 0.0007320349488320479, "loss": 0.2235, "step": 61244 }, { "epoch": 0.1085942202824698, "grad_norm": 0.2421875, "learning_rate": 0.0007319806328246459, "loss": 0.1476, "step": 61246 }, { "epoch": 0.10859776644777962, "grad_norm": 0.625, "learning_rate": 0.0007319263184267464, "loss": 0.1843, "step": 61248 }, { "epoch": 0.10860131261308943, "grad_norm": 0.2412109375, "learning_rate": 0.0007318720056385874, "loss": 0.2016, "step": 61250 }, { "epoch": 0.10860485877839925, "grad_norm": 0.7265625, "learning_rate": 0.0007318176944604064, "loss": 0.3837, "step": 61252 }, { "epoch": 0.10860840494370906, "grad_norm": 0.1572265625, "learning_rate": 0.0007317633848924412, "loss": 0.1618, "step": 61254 }, { "epoch": 0.10861195110901888, "grad_norm": 0.45703125, "learning_rate": 0.0007317090769349288, "loss": 0.1568, "step": 61256 }, { "epoch": 0.10861549727432869, "grad_norm": 0.7734375, "learning_rate": 0.0007316547705881074, "loss": 0.2368, "step": 61258 }, { "epoch": 0.1086190434396385, "grad_norm": 0.365234375, "learning_rate": 0.0007316004658522135, "loss": 0.255, "step": 61260 }, { "epoch": 0.10862258960494832, "grad_norm": 0.3203125, "learning_rate": 0.0007315461627274853, "loss": 0.3742, "step": 61262 }, { "epoch": 0.10862613577025813, "grad_norm": 0.373046875, "learning_rate": 0.0007314918612141606, "loss": 0.1488, "step": 61264 }, { "epoch": 0.10862968193556795, "grad_norm": 0.12109375, "learning_rate": 0.0007314375613124759, "loss": 0.1642, "step": 61266 }, { "epoch": 0.10863322810087776, "grad_norm": 0.302734375, "learning_rate": 0.0007313832630226696, "loss": 0.1288, "step": 61268 }, { "epoch": 0.10863677426618758, "grad_norm": 0.796875, "learning_rate": 0.0007313289663449785, "loss": 0.1853, "step": 61270 }, { "epoch": 0.10864032043149739, "grad_norm": 0.30859375, "learning_rate": 0.0007312746712796406, "loss": 0.1637, "step": 61272 }, { "epoch": 0.1086438665968072, "grad_norm": 0.29296875, "learning_rate": 0.0007312203778268931, "loss": 0.1583, "step": 61274 }, { "epoch": 0.10864741276211702, "grad_norm": 0.5234375, "learning_rate": 0.0007311660859869733, "loss": 0.1732, "step": 61276 }, { "epoch": 0.10865095892742684, "grad_norm": 0.44921875, "learning_rate": 0.0007311117957601185, "loss": 0.1689, "step": 61278 }, { "epoch": 0.10865450509273665, "grad_norm": 0.271484375, "learning_rate": 0.0007310575071465673, "loss": 0.1551, "step": 61280 }, { "epoch": 0.10865805125804646, "grad_norm": 0.32421875, "learning_rate": 0.0007310032201465555, "loss": 0.1901, "step": 61282 }, { "epoch": 0.10866159742335628, "grad_norm": 0.255859375, "learning_rate": 0.0007309489347603213, "loss": 0.1522, "step": 61284 }, { "epoch": 0.10866514358866611, "grad_norm": 0.4140625, "learning_rate": 0.0007308946509881022, "loss": 0.1736, "step": 61286 }, { "epoch": 0.10866868975397592, "grad_norm": 1.9140625, "learning_rate": 0.000730840368830136, "loss": 0.2825, "step": 61288 }, { "epoch": 0.10867223591928574, "grad_norm": 0.98046875, "learning_rate": 0.000730786088286659, "loss": 0.2171, "step": 61290 }, { "epoch": 0.10867578208459555, "grad_norm": 0.388671875, "learning_rate": 0.0007307318093579093, "loss": 0.1948, "step": 61292 }, { "epoch": 0.10867932824990537, "grad_norm": 0.60546875, "learning_rate": 0.0007306775320441244, "loss": 0.1633, "step": 61294 }, { "epoch": 0.10868287441521518, "grad_norm": 0.52734375, "learning_rate": 0.0007306232563455415, "loss": 0.3467, "step": 61296 }, { "epoch": 0.108686420580525, "grad_norm": 1.0234375, "learning_rate": 0.0007305689822623975, "loss": 0.2322, "step": 61298 }, { "epoch": 0.10868996674583481, "grad_norm": 0.5078125, "learning_rate": 0.0007305147097949303, "loss": 0.1668, "step": 61300 }, { "epoch": 0.10869351291114462, "grad_norm": 0.5703125, "learning_rate": 0.0007304604389433774, "loss": 0.1922, "step": 61302 }, { "epoch": 0.10869705907645444, "grad_norm": 0.4453125, "learning_rate": 0.0007304061697079758, "loss": 0.2139, "step": 61304 }, { "epoch": 0.10870060524176425, "grad_norm": 0.73046875, "learning_rate": 0.0007303519020889626, "loss": 0.194, "step": 61306 }, { "epoch": 0.10870415140707407, "grad_norm": 0.63671875, "learning_rate": 0.0007302976360865756, "loss": 0.2146, "step": 61308 }, { "epoch": 0.10870769757238388, "grad_norm": 0.240234375, "learning_rate": 0.0007302433717010524, "loss": 0.1822, "step": 61310 }, { "epoch": 0.1087112437376937, "grad_norm": 0.8359375, "learning_rate": 0.0007301891089326295, "loss": 0.2013, "step": 61312 }, { "epoch": 0.10871478990300351, "grad_norm": 0.322265625, "learning_rate": 0.0007301348477815448, "loss": 0.1792, "step": 61314 }, { "epoch": 0.10871833606831333, "grad_norm": 0.7890625, "learning_rate": 0.0007300805882480353, "loss": 0.2456, "step": 61316 }, { "epoch": 0.10872188223362314, "grad_norm": 0.578125, "learning_rate": 0.0007300263303323385, "loss": 0.181, "step": 61318 }, { "epoch": 0.10872542839893296, "grad_norm": 0.7890625, "learning_rate": 0.0007299720740346917, "loss": 0.249, "step": 61320 }, { "epoch": 0.10872897456424277, "grad_norm": 0.609375, "learning_rate": 0.000729917819355332, "loss": 0.212, "step": 61322 }, { "epoch": 0.10873252072955258, "grad_norm": 0.4921875, "learning_rate": 0.0007298635662944966, "loss": 0.1643, "step": 61324 }, { "epoch": 0.1087360668948624, "grad_norm": 5.5, "learning_rate": 0.0007298093148524232, "loss": 0.5251, "step": 61326 }, { "epoch": 0.10873961306017221, "grad_norm": 0.25390625, "learning_rate": 0.000729755065029349, "loss": 0.2004, "step": 61328 }, { "epoch": 0.10874315922548203, "grad_norm": 0.2001953125, "learning_rate": 0.0007297008168255108, "loss": 0.13, "step": 61330 }, { "epoch": 0.10874670539079186, "grad_norm": 0.5625, "learning_rate": 0.0007296465702411463, "loss": 0.158, "step": 61332 }, { "epoch": 0.10875025155610167, "grad_norm": 1.4453125, "learning_rate": 0.0007295923252764922, "loss": 0.2204, "step": 61334 }, { "epoch": 0.10875379772141149, "grad_norm": 0.1962890625, "learning_rate": 0.0007295380819317867, "loss": 0.2549, "step": 61336 }, { "epoch": 0.1087573438867213, "grad_norm": 0.484375, "learning_rate": 0.0007294838402072661, "loss": 0.189, "step": 61338 }, { "epoch": 0.10876089005203111, "grad_norm": 0.3125, "learning_rate": 0.000729429600103168, "loss": 0.2086, "step": 61340 }, { "epoch": 0.10876443621734093, "grad_norm": 0.88671875, "learning_rate": 0.0007293753616197294, "loss": 0.1541, "step": 61342 }, { "epoch": 0.10876798238265074, "grad_norm": 0.5859375, "learning_rate": 0.0007293211247571878, "loss": 0.2065, "step": 61344 }, { "epoch": 0.10877152854796056, "grad_norm": 0.39453125, "learning_rate": 0.0007292668895157806, "loss": 0.1466, "step": 61346 }, { "epoch": 0.10877507471327037, "grad_norm": 0.314453125, "learning_rate": 0.0007292126558957443, "loss": 0.1581, "step": 61348 }, { "epoch": 0.10877862087858019, "grad_norm": 0.32421875, "learning_rate": 0.0007291584238973168, "loss": 0.1011, "step": 61350 }, { "epoch": 0.10878216704389, "grad_norm": 0.38671875, "learning_rate": 0.0007291041935207345, "loss": 0.1565, "step": 61352 }, { "epoch": 0.10878571320919982, "grad_norm": 0.4453125, "learning_rate": 0.0007290499647662354, "loss": 0.1964, "step": 61354 }, { "epoch": 0.10878925937450963, "grad_norm": 0.3359375, "learning_rate": 0.0007289957376340561, "loss": 0.1987, "step": 61356 }, { "epoch": 0.10879280553981945, "grad_norm": 0.671875, "learning_rate": 0.0007289415121244338, "loss": 0.1908, "step": 61358 }, { "epoch": 0.10879635170512926, "grad_norm": 0.87890625, "learning_rate": 0.0007288872882376059, "loss": 0.3852, "step": 61360 }, { "epoch": 0.10879989787043907, "grad_norm": 0.291015625, "learning_rate": 0.0007288330659738099, "loss": 0.1775, "step": 61362 }, { "epoch": 0.10880344403574889, "grad_norm": 0.298828125, "learning_rate": 0.0007287788453332818, "loss": 0.1803, "step": 61364 }, { "epoch": 0.1088069902010587, "grad_norm": 0.52734375, "learning_rate": 0.0007287246263162596, "loss": 0.1262, "step": 61366 }, { "epoch": 0.10881053636636852, "grad_norm": 0.392578125, "learning_rate": 0.0007286704089229803, "loss": 0.1721, "step": 61368 }, { "epoch": 0.10881408253167833, "grad_norm": 0.7421875, "learning_rate": 0.0007286161931536812, "loss": 0.151, "step": 61370 }, { "epoch": 0.10881762869698815, "grad_norm": 2.484375, "learning_rate": 0.0007285619790085987, "loss": 0.2076, "step": 61372 }, { "epoch": 0.10882117486229796, "grad_norm": 0.78125, "learning_rate": 0.0007285077664879704, "loss": 0.1667, "step": 61374 }, { "epoch": 0.10882472102760779, "grad_norm": 0.44921875, "learning_rate": 0.0007284535555920333, "loss": 0.155, "step": 61376 }, { "epoch": 0.1088282671929176, "grad_norm": 0.859375, "learning_rate": 0.0007283993463210253, "loss": 0.2676, "step": 61378 }, { "epoch": 0.10883181335822742, "grad_norm": 0.435546875, "learning_rate": 0.0007283451386751818, "loss": 0.1948, "step": 61380 }, { "epoch": 0.10883535952353723, "grad_norm": 0.384765625, "learning_rate": 0.0007282909326547409, "loss": 0.2169, "step": 61382 }, { "epoch": 0.10883890568884705, "grad_norm": 1.8984375, "learning_rate": 0.0007282367282599398, "loss": 0.2431, "step": 61384 }, { "epoch": 0.10884245185415686, "grad_norm": 0.404296875, "learning_rate": 0.0007281825254910152, "loss": 0.1811, "step": 61386 }, { "epoch": 0.10884599801946668, "grad_norm": 0.279296875, "learning_rate": 0.0007281283243482044, "loss": 0.167, "step": 61388 }, { "epoch": 0.10884954418477649, "grad_norm": 1.3359375, "learning_rate": 0.0007280741248317442, "loss": 0.2143, "step": 61390 }, { "epoch": 0.1088530903500863, "grad_norm": 0.3203125, "learning_rate": 0.0007280199269418715, "loss": 0.2022, "step": 61392 }, { "epoch": 0.10885663651539612, "grad_norm": 1.0234375, "learning_rate": 0.0007279657306788241, "loss": 0.1345, "step": 61394 }, { "epoch": 0.10886018268070594, "grad_norm": 0.466796875, "learning_rate": 0.0007279115360428379, "loss": 0.2359, "step": 61396 }, { "epoch": 0.10886372884601575, "grad_norm": 1.25, "learning_rate": 0.0007278573430341508, "loss": 0.163, "step": 61398 }, { "epoch": 0.10886727501132556, "grad_norm": 0.447265625, "learning_rate": 0.0007278031516529995, "loss": 0.254, "step": 61400 }, { "epoch": 0.10887082117663538, "grad_norm": 0.5078125, "learning_rate": 0.0007277489618996211, "loss": 0.1973, "step": 61402 }, { "epoch": 0.1088743673419452, "grad_norm": 0.251953125, "learning_rate": 0.0007276947737742523, "loss": 0.1385, "step": 61404 }, { "epoch": 0.10887791350725501, "grad_norm": 0.31640625, "learning_rate": 0.0007276405872771306, "loss": 0.168, "step": 61406 }, { "epoch": 0.10888145967256482, "grad_norm": 0.59765625, "learning_rate": 0.0007275864024084924, "loss": 0.1787, "step": 61408 }, { "epoch": 0.10888500583787464, "grad_norm": 0.65234375, "learning_rate": 0.0007275322191685754, "loss": 0.1786, "step": 61410 }, { "epoch": 0.10888855200318445, "grad_norm": 1.0625, "learning_rate": 0.0007274780375576156, "loss": 0.35, "step": 61412 }, { "epoch": 0.10889209816849427, "grad_norm": 3.203125, "learning_rate": 0.0007274238575758505, "loss": 0.1801, "step": 61414 }, { "epoch": 0.10889564433380408, "grad_norm": 0.71484375, "learning_rate": 0.0007273696792235171, "loss": 0.2093, "step": 61416 }, { "epoch": 0.1088991904991139, "grad_norm": 0.5390625, "learning_rate": 0.0007273155025008526, "loss": 0.4342, "step": 61418 }, { "epoch": 0.10890273666442371, "grad_norm": 0.482421875, "learning_rate": 0.0007272613274080932, "loss": 0.1262, "step": 61420 }, { "epoch": 0.10890628282973354, "grad_norm": 3.03125, "learning_rate": 0.0007272071539454763, "loss": 0.2986, "step": 61422 }, { "epoch": 0.10890982899504335, "grad_norm": 1.796875, "learning_rate": 0.0007271529821132389, "loss": 0.2314, "step": 61424 }, { "epoch": 0.10891337516035317, "grad_norm": 0.25390625, "learning_rate": 0.0007270988119116177, "loss": 0.144, "step": 61426 }, { "epoch": 0.10891692132566298, "grad_norm": 0.451171875, "learning_rate": 0.0007270446433408499, "loss": 0.2421, "step": 61428 }, { "epoch": 0.1089204674909728, "grad_norm": 0.53125, "learning_rate": 0.0007269904764011722, "loss": 0.1532, "step": 61430 }, { "epoch": 0.10892401365628261, "grad_norm": 0.478515625, "learning_rate": 0.0007269363110928211, "loss": 0.2149, "step": 61432 }, { "epoch": 0.10892755982159243, "grad_norm": 0.490234375, "learning_rate": 0.0007268821474160339, "loss": 0.1682, "step": 61434 }, { "epoch": 0.10893110598690224, "grad_norm": 0.25390625, "learning_rate": 0.000726827985371048, "loss": 0.1741, "step": 61436 }, { "epoch": 0.10893465215221206, "grad_norm": 0.4609375, "learning_rate": 0.0007267738249580993, "loss": 0.1605, "step": 61438 }, { "epoch": 0.10893819831752187, "grad_norm": 0.5859375, "learning_rate": 0.0007267196661774251, "loss": 0.1458, "step": 61440 }, { "epoch": 0.10894174448283168, "grad_norm": 0.341796875, "learning_rate": 0.0007266655090292622, "loss": 0.1664, "step": 61442 }, { "epoch": 0.1089452906481415, "grad_norm": 0.236328125, "learning_rate": 0.0007266113535138479, "loss": 0.1802, "step": 61444 }, { "epoch": 0.10894883681345131, "grad_norm": 0.3671875, "learning_rate": 0.0007265571996314185, "loss": 0.1675, "step": 61446 }, { "epoch": 0.10895238297876113, "grad_norm": 0.302734375, "learning_rate": 0.0007265030473822105, "loss": 0.1547, "step": 61448 }, { "epoch": 0.10895592914407094, "grad_norm": 0.232421875, "learning_rate": 0.0007264488967664615, "loss": 0.117, "step": 61450 }, { "epoch": 0.10895947530938076, "grad_norm": 0.56640625, "learning_rate": 0.0007263947477844086, "loss": 0.1868, "step": 61452 }, { "epoch": 0.10896302147469057, "grad_norm": 0.412109375, "learning_rate": 0.0007263406004362874, "loss": 0.1092, "step": 61454 }, { "epoch": 0.10896656764000039, "grad_norm": 0.3046875, "learning_rate": 0.0007262864547223353, "loss": 0.1443, "step": 61456 }, { "epoch": 0.1089701138053102, "grad_norm": 0.41796875, "learning_rate": 0.0007262323106427895, "loss": 0.1701, "step": 61458 }, { "epoch": 0.10897365997062002, "grad_norm": 0.482421875, "learning_rate": 0.0007261781681978866, "loss": 0.1563, "step": 61460 }, { "epoch": 0.10897720613592983, "grad_norm": 0.61328125, "learning_rate": 0.0007261240273878631, "loss": 0.1845, "step": 61462 }, { "epoch": 0.10898075230123964, "grad_norm": 0.58203125, "learning_rate": 0.0007260698882129557, "loss": 0.1755, "step": 61464 }, { "epoch": 0.10898429846654946, "grad_norm": 1.046875, "learning_rate": 0.0007260157506734016, "loss": 0.2298, "step": 61466 }, { "epoch": 0.10898784463185929, "grad_norm": 0.17578125, "learning_rate": 0.0007259616147694374, "loss": 0.1741, "step": 61468 }, { "epoch": 0.1089913907971691, "grad_norm": 0.31640625, "learning_rate": 0.0007259074805012997, "loss": 0.1422, "step": 61470 }, { "epoch": 0.10899493696247892, "grad_norm": 0.390625, "learning_rate": 0.0007258533478692253, "loss": 0.1844, "step": 61472 }, { "epoch": 0.10899848312778873, "grad_norm": 0.2197265625, "learning_rate": 0.0007257992168734513, "loss": 0.1639, "step": 61474 }, { "epoch": 0.10900202929309855, "grad_norm": 0.3046875, "learning_rate": 0.0007257450875142142, "loss": 0.1546, "step": 61476 }, { "epoch": 0.10900557545840836, "grad_norm": 0.65625, "learning_rate": 0.0007256909597917505, "loss": 0.1798, "step": 61478 }, { "epoch": 0.10900912162371817, "grad_norm": 0.306640625, "learning_rate": 0.000725636833706297, "loss": 0.1498, "step": 61480 }, { "epoch": 0.10901266778902799, "grad_norm": 1.46875, "learning_rate": 0.0007255827092580906, "loss": 0.217, "step": 61482 }, { "epoch": 0.1090162139543378, "grad_norm": 0.279296875, "learning_rate": 0.0007255285864473684, "loss": 0.2579, "step": 61484 }, { "epoch": 0.10901976011964762, "grad_norm": 0.33203125, "learning_rate": 0.0007254744652743661, "loss": 0.1829, "step": 61486 }, { "epoch": 0.10902330628495743, "grad_norm": 0.65625, "learning_rate": 0.0007254203457393211, "loss": 0.2555, "step": 61488 }, { "epoch": 0.10902685245026725, "grad_norm": 1.0, "learning_rate": 0.0007253662278424698, "loss": 0.2375, "step": 61490 }, { "epoch": 0.10903039861557706, "grad_norm": 1.03125, "learning_rate": 0.0007253121115840495, "loss": 0.2603, "step": 61492 }, { "epoch": 0.10903394478088688, "grad_norm": 2.40625, "learning_rate": 0.0007252579969642958, "loss": 0.1582, "step": 61494 }, { "epoch": 0.10903749094619669, "grad_norm": 0.228515625, "learning_rate": 0.000725203883983446, "loss": 0.239, "step": 61496 }, { "epoch": 0.1090410371115065, "grad_norm": 0.328125, "learning_rate": 0.0007251497726417366, "loss": 0.1525, "step": 61498 }, { "epoch": 0.10904458327681632, "grad_norm": 1.171875, "learning_rate": 0.0007250956629394052, "loss": 0.2746, "step": 61500 }, { "epoch": 0.10904812944212613, "grad_norm": 0.400390625, "learning_rate": 0.0007250415548766869, "loss": 0.1717, "step": 61502 }, { "epoch": 0.10905167560743595, "grad_norm": 0.2373046875, "learning_rate": 0.0007249874484538193, "loss": 0.1561, "step": 61504 }, { "epoch": 0.10905522177274576, "grad_norm": 0.65234375, "learning_rate": 0.0007249333436710385, "loss": 0.1533, "step": 61506 }, { "epoch": 0.10905876793805558, "grad_norm": 1.359375, "learning_rate": 0.0007248792405285813, "loss": 0.3738, "step": 61508 }, { "epoch": 0.1090623141033654, "grad_norm": 0.56640625, "learning_rate": 0.0007248251390266849, "loss": 0.2384, "step": 61510 }, { "epoch": 0.10906586026867522, "grad_norm": 0.314453125, "learning_rate": 0.0007247710391655849, "loss": 0.2039, "step": 61512 }, { "epoch": 0.10906940643398504, "grad_norm": 0.287109375, "learning_rate": 0.0007247169409455188, "loss": 0.3537, "step": 61514 }, { "epoch": 0.10907295259929485, "grad_norm": 1.9453125, "learning_rate": 0.0007246628443667222, "loss": 0.224, "step": 61516 }, { "epoch": 0.10907649876460467, "grad_norm": 0.287109375, "learning_rate": 0.0007246087494294333, "loss": 0.1682, "step": 61518 }, { "epoch": 0.10908004492991448, "grad_norm": 1.203125, "learning_rate": 0.0007245546561338869, "loss": 0.1916, "step": 61520 }, { "epoch": 0.1090835910952243, "grad_norm": 0.283203125, "learning_rate": 0.0007245005644803207, "loss": 0.1439, "step": 61522 }, { "epoch": 0.10908713726053411, "grad_norm": 0.296875, "learning_rate": 0.000724446474468971, "loss": 0.2312, "step": 61524 }, { "epoch": 0.10909068342584392, "grad_norm": 0.1630859375, "learning_rate": 0.000724392386100074, "loss": 0.1592, "step": 61526 }, { "epoch": 0.10909422959115374, "grad_norm": 1.1328125, "learning_rate": 0.0007243382993738668, "loss": 0.2323, "step": 61528 }, { "epoch": 0.10909777575646355, "grad_norm": 0.2734375, "learning_rate": 0.000724284214290585, "loss": 0.1661, "step": 61530 }, { "epoch": 0.10910132192177337, "grad_norm": 0.578125, "learning_rate": 0.0007242301308504664, "loss": 0.1893, "step": 61532 }, { "epoch": 0.10910486808708318, "grad_norm": 0.380859375, "learning_rate": 0.0007241760490537468, "loss": 0.1634, "step": 61534 }, { "epoch": 0.109108414252393, "grad_norm": 0.396484375, "learning_rate": 0.000724121968900663, "loss": 0.1731, "step": 61536 }, { "epoch": 0.10911196041770281, "grad_norm": 0.93359375, "learning_rate": 0.000724067890391451, "loss": 0.222, "step": 61538 }, { "epoch": 0.10911550658301263, "grad_norm": 0.373046875, "learning_rate": 0.0007240138135263481, "loss": 0.1818, "step": 61540 }, { "epoch": 0.10911905274832244, "grad_norm": 0.1865234375, "learning_rate": 0.0007239597383055902, "loss": 0.1394, "step": 61542 }, { "epoch": 0.10912259891363225, "grad_norm": 0.298828125, "learning_rate": 0.000723905664729414, "loss": 0.1932, "step": 61544 }, { "epoch": 0.10912614507894207, "grad_norm": 0.18359375, "learning_rate": 0.000723851592798056, "loss": 0.1279, "step": 61546 }, { "epoch": 0.10912969124425188, "grad_norm": 0.474609375, "learning_rate": 0.0007237975225117524, "loss": 0.1901, "step": 61548 }, { "epoch": 0.1091332374095617, "grad_norm": 1.3671875, "learning_rate": 0.0007237434538707404, "loss": 0.2195, "step": 61550 }, { "epoch": 0.10913678357487151, "grad_norm": 0.9765625, "learning_rate": 0.0007236893868752555, "loss": 0.1739, "step": 61552 }, { "epoch": 0.10914032974018133, "grad_norm": 1.0078125, "learning_rate": 0.0007236353215255349, "loss": 0.1624, "step": 61554 }, { "epoch": 0.10914387590549114, "grad_norm": 0.3828125, "learning_rate": 0.0007235812578218142, "loss": 0.1931, "step": 61556 }, { "epoch": 0.10914742207080097, "grad_norm": 0.34375, "learning_rate": 0.0007235271957643315, "loss": 0.1999, "step": 61558 }, { "epoch": 0.10915096823611078, "grad_norm": 2.0625, "learning_rate": 0.0007234731353533213, "loss": 0.4279, "step": 61560 }, { "epoch": 0.1091545144014206, "grad_norm": 0.5703125, "learning_rate": 0.0007234190765890211, "loss": 0.2011, "step": 61562 }, { "epoch": 0.10915806056673041, "grad_norm": 0.453125, "learning_rate": 0.0007233650194716675, "loss": 0.1591, "step": 61564 }, { "epoch": 0.10916160673204023, "grad_norm": 0.6796875, "learning_rate": 0.000723310964001496, "loss": 0.2483, "step": 61566 }, { "epoch": 0.10916515289735004, "grad_norm": 0.57421875, "learning_rate": 0.0007232569101787438, "loss": 0.2177, "step": 61568 }, { "epoch": 0.10916869906265986, "grad_norm": 0.69140625, "learning_rate": 0.0007232028580036467, "loss": 0.1617, "step": 61570 }, { "epoch": 0.10917224522796967, "grad_norm": 0.37109375, "learning_rate": 0.0007231488074764419, "loss": 0.1715, "step": 61572 }, { "epoch": 0.10917579139327949, "grad_norm": 0.28125, "learning_rate": 0.0007230947585973652, "loss": 0.1558, "step": 61574 }, { "epoch": 0.1091793375585893, "grad_norm": 0.98046875, "learning_rate": 0.0007230407113666531, "loss": 0.2761, "step": 61576 }, { "epoch": 0.10918288372389912, "grad_norm": 0.7890625, "learning_rate": 0.0007229866657845417, "loss": 0.1858, "step": 61578 }, { "epoch": 0.10918642988920893, "grad_norm": 0.326171875, "learning_rate": 0.0007229326218512678, "loss": 0.1904, "step": 61580 }, { "epoch": 0.10918997605451874, "grad_norm": 0.39453125, "learning_rate": 0.0007228785795670676, "loss": 0.1818, "step": 61582 }, { "epoch": 0.10919352221982856, "grad_norm": 0.275390625, "learning_rate": 0.0007228245389321777, "loss": 0.2084, "step": 61584 }, { "epoch": 0.10919706838513837, "grad_norm": 0.33203125, "learning_rate": 0.0007227704999468339, "loss": 0.1878, "step": 61586 }, { "epoch": 0.10920061455044819, "grad_norm": 0.322265625, "learning_rate": 0.0007227164626112725, "loss": 0.2058, "step": 61588 }, { "epoch": 0.109204160715758, "grad_norm": 0.283203125, "learning_rate": 0.0007226624269257307, "loss": 0.2337, "step": 61590 }, { "epoch": 0.10920770688106782, "grad_norm": 0.478515625, "learning_rate": 0.0007226083928904439, "loss": 0.1878, "step": 61592 }, { "epoch": 0.10921125304637763, "grad_norm": 0.2099609375, "learning_rate": 0.000722554360505649, "loss": 0.1854, "step": 61594 }, { "epoch": 0.10921479921168745, "grad_norm": 0.3671875, "learning_rate": 0.0007225003297715817, "loss": 0.1378, "step": 61596 }, { "epoch": 0.10921834537699726, "grad_norm": 0.5703125, "learning_rate": 0.000722446300688479, "loss": 0.1368, "step": 61598 }, { "epoch": 0.10922189154230708, "grad_norm": 0.25390625, "learning_rate": 0.0007223922732565771, "loss": 0.1537, "step": 61600 }, { "epoch": 0.10922543770761689, "grad_norm": 0.2177734375, "learning_rate": 0.0007223382474761118, "loss": 0.146, "step": 61602 }, { "epoch": 0.10922898387292672, "grad_norm": 0.29296875, "learning_rate": 0.0007222842233473197, "loss": 0.1582, "step": 61604 }, { "epoch": 0.10923253003823653, "grad_norm": 0.443359375, "learning_rate": 0.0007222302008704364, "loss": 0.1582, "step": 61606 }, { "epoch": 0.10923607620354635, "grad_norm": 2.046875, "learning_rate": 0.0007221761800456999, "loss": 0.1983, "step": 61608 }, { "epoch": 0.10923962236885616, "grad_norm": 1.4453125, "learning_rate": 0.0007221221608733443, "loss": 0.2384, "step": 61610 }, { "epoch": 0.10924316853416598, "grad_norm": 0.2080078125, "learning_rate": 0.0007220681433536076, "loss": 0.251, "step": 61612 }, { "epoch": 0.10924671469947579, "grad_norm": 0.21484375, "learning_rate": 0.0007220141274867247, "loss": 0.2262, "step": 61614 }, { "epoch": 0.1092502608647856, "grad_norm": 0.2333984375, "learning_rate": 0.000721960113272933, "loss": 0.2076, "step": 61616 }, { "epoch": 0.10925380703009542, "grad_norm": 1.5234375, "learning_rate": 0.0007219061007124677, "loss": 0.2956, "step": 61618 }, { "epoch": 0.10925735319540524, "grad_norm": 0.2890625, "learning_rate": 0.0007218520898055658, "loss": 0.1599, "step": 61620 }, { "epoch": 0.10926089936071505, "grad_norm": 0.400390625, "learning_rate": 0.0007217980805524631, "loss": 0.2269, "step": 61622 }, { "epoch": 0.10926444552602486, "grad_norm": 0.890625, "learning_rate": 0.0007217440729533962, "loss": 0.2389, "step": 61624 }, { "epoch": 0.10926799169133468, "grad_norm": 0.314453125, "learning_rate": 0.0007216900670086008, "loss": 0.1522, "step": 61626 }, { "epoch": 0.1092715378566445, "grad_norm": 0.97265625, "learning_rate": 0.000721636062718313, "loss": 0.2066, "step": 61628 }, { "epoch": 0.10927508402195431, "grad_norm": 1.2734375, "learning_rate": 0.0007215820600827694, "loss": 0.1443, "step": 61630 }, { "epoch": 0.10927863018726412, "grad_norm": 0.51171875, "learning_rate": 0.0007215280591022062, "loss": 0.1696, "step": 61632 }, { "epoch": 0.10928217635257394, "grad_norm": 0.74609375, "learning_rate": 0.0007214740597768594, "loss": 0.3619, "step": 61634 }, { "epoch": 0.10928572251788375, "grad_norm": 0.69921875, "learning_rate": 0.000721420062106965, "loss": 0.1456, "step": 61636 }, { "epoch": 0.10928926868319357, "grad_norm": 0.99609375, "learning_rate": 0.0007213660660927597, "loss": 0.1799, "step": 61638 }, { "epoch": 0.10929281484850338, "grad_norm": 0.4921875, "learning_rate": 0.0007213120717344792, "loss": 0.1379, "step": 61640 }, { "epoch": 0.1092963610138132, "grad_norm": 0.2177734375, "learning_rate": 0.0007212580790323598, "loss": 0.1768, "step": 61642 }, { "epoch": 0.10929990717912301, "grad_norm": 0.2470703125, "learning_rate": 0.0007212040879866373, "loss": 0.1563, "step": 61644 }, { "epoch": 0.10930345334443282, "grad_norm": 0.423828125, "learning_rate": 0.0007211500985975479, "loss": 0.1561, "step": 61646 }, { "epoch": 0.10930699950974265, "grad_norm": 0.58203125, "learning_rate": 0.0007210961108653287, "loss": 0.264, "step": 61648 }, { "epoch": 0.10931054567505247, "grad_norm": 2.03125, "learning_rate": 0.0007210421247902142, "loss": 0.2982, "step": 61650 }, { "epoch": 0.10931409184036228, "grad_norm": 0.1953125, "learning_rate": 0.0007209881403724416, "loss": 0.1857, "step": 61652 }, { "epoch": 0.1093176380056721, "grad_norm": 0.62890625, "learning_rate": 0.0007209341576122466, "loss": 0.1558, "step": 61654 }, { "epoch": 0.10932118417098191, "grad_norm": 0.72265625, "learning_rate": 0.000720880176509866, "loss": 0.2188, "step": 61656 }, { "epoch": 0.10932473033629173, "grad_norm": 0.310546875, "learning_rate": 0.0007208261970655345, "loss": 0.1929, "step": 61658 }, { "epoch": 0.10932827650160154, "grad_norm": 0.423828125, "learning_rate": 0.0007207722192794895, "loss": 0.1757, "step": 61660 }, { "epoch": 0.10933182266691135, "grad_norm": 0.2216796875, "learning_rate": 0.0007207182431519665, "loss": 0.1838, "step": 61662 }, { "epoch": 0.10933536883222117, "grad_norm": 0.72265625, "learning_rate": 0.0007206642686832014, "loss": 0.1718, "step": 61664 }, { "epoch": 0.10933891499753098, "grad_norm": 0.25390625, "learning_rate": 0.0007206102958734307, "loss": 0.1946, "step": 61666 }, { "epoch": 0.1093424611628408, "grad_norm": 2.8125, "learning_rate": 0.0007205563247228897, "loss": 0.2974, "step": 61668 }, { "epoch": 0.10934600732815061, "grad_norm": 0.53515625, "learning_rate": 0.000720502355231815, "loss": 0.3548, "step": 61670 }, { "epoch": 0.10934955349346043, "grad_norm": 0.28125, "learning_rate": 0.000720448387400443, "loss": 0.2264, "step": 61672 }, { "epoch": 0.10935309965877024, "grad_norm": 0.734375, "learning_rate": 0.0007203944212290092, "loss": 0.2211, "step": 61674 }, { "epoch": 0.10935664582408006, "grad_norm": 0.314453125, "learning_rate": 0.0007203404567177496, "loss": 0.2236, "step": 61676 }, { "epoch": 0.10936019198938987, "grad_norm": 0.40625, "learning_rate": 0.0007202864938669001, "loss": 0.2317, "step": 61678 }, { "epoch": 0.10936373815469969, "grad_norm": 2.28125, "learning_rate": 0.0007202325326766969, "loss": 0.1933, "step": 61680 }, { "epoch": 0.1093672843200095, "grad_norm": 0.2197265625, "learning_rate": 0.0007201785731473763, "loss": 0.263, "step": 61682 }, { "epoch": 0.10937083048531931, "grad_norm": 1.2734375, "learning_rate": 0.000720124615279174, "loss": 0.2145, "step": 61684 }, { "epoch": 0.10937437665062913, "grad_norm": 2.5, "learning_rate": 0.0007200706590723256, "loss": 0.2751, "step": 61686 }, { "epoch": 0.10937792281593894, "grad_norm": 0.50390625, "learning_rate": 0.0007200167045270678, "loss": 0.1505, "step": 61688 }, { "epoch": 0.10938146898124876, "grad_norm": 0.71484375, "learning_rate": 0.000719962751643636, "loss": 0.196, "step": 61690 }, { "epoch": 0.10938501514655857, "grad_norm": 0.3828125, "learning_rate": 0.0007199088004222667, "loss": 0.1827, "step": 61692 }, { "epoch": 0.1093885613118684, "grad_norm": 0.1826171875, "learning_rate": 0.0007198548508631952, "loss": 0.1586, "step": 61694 }, { "epoch": 0.10939210747717822, "grad_norm": 0.314453125, "learning_rate": 0.0007198009029666576, "loss": 0.1764, "step": 61696 }, { "epoch": 0.10939565364248803, "grad_norm": 0.26953125, "learning_rate": 0.0007197469567328909, "loss": 0.1649, "step": 61698 }, { "epoch": 0.10939919980779785, "grad_norm": 0.72265625, "learning_rate": 0.0007196930121621293, "loss": 0.1506, "step": 61700 }, { "epoch": 0.10940274597310766, "grad_norm": 0.400390625, "learning_rate": 0.0007196390692546098, "loss": 0.1669, "step": 61702 }, { "epoch": 0.10940629213841747, "grad_norm": 0.4453125, "learning_rate": 0.0007195851280105677, "loss": 0.1519, "step": 61704 }, { "epoch": 0.10940983830372729, "grad_norm": 0.419921875, "learning_rate": 0.00071953118843024, "loss": 0.1677, "step": 61706 }, { "epoch": 0.1094133844690371, "grad_norm": 1.234375, "learning_rate": 0.0007194772505138611, "loss": 0.1646, "step": 61708 }, { "epoch": 0.10941693063434692, "grad_norm": 0.419921875, "learning_rate": 0.0007194233142616684, "loss": 0.1803, "step": 61710 }, { "epoch": 0.10942047679965673, "grad_norm": 0.318359375, "learning_rate": 0.0007193693796738967, "loss": 0.1753, "step": 61712 }, { "epoch": 0.10942402296496655, "grad_norm": 0.32421875, "learning_rate": 0.0007193154467507823, "loss": 0.1323, "step": 61714 }, { "epoch": 0.10942756913027636, "grad_norm": 0.57421875, "learning_rate": 0.000719261515492561, "loss": 0.1704, "step": 61716 }, { "epoch": 0.10943111529558618, "grad_norm": 0.404296875, "learning_rate": 0.0007192075858994684, "loss": 0.1673, "step": 61718 }, { "epoch": 0.10943466146089599, "grad_norm": 0.390625, "learning_rate": 0.000719153657971741, "loss": 0.2219, "step": 61720 }, { "epoch": 0.1094382076262058, "grad_norm": 0.53125, "learning_rate": 0.0007190997317096142, "loss": 0.1758, "step": 61722 }, { "epoch": 0.10944175379151562, "grad_norm": 0.59375, "learning_rate": 0.000719045807113324, "loss": 0.1828, "step": 61724 }, { "epoch": 0.10944529995682543, "grad_norm": 0.294921875, "learning_rate": 0.0007189918841831058, "loss": 0.2221, "step": 61726 }, { "epoch": 0.10944884612213525, "grad_norm": 0.34375, "learning_rate": 0.0007189379629191961, "loss": 0.2131, "step": 61728 }, { "epoch": 0.10945239228744506, "grad_norm": 2.046875, "learning_rate": 0.0007188840433218302, "loss": 0.2124, "step": 61730 }, { "epoch": 0.10945593845275488, "grad_norm": 0.220703125, "learning_rate": 0.0007188301253912443, "loss": 0.1736, "step": 61732 }, { "epoch": 0.10945948461806469, "grad_norm": 0.74609375, "learning_rate": 0.0007187762091276741, "loss": 0.3351, "step": 61734 }, { "epoch": 0.10946303078337451, "grad_norm": 0.54296875, "learning_rate": 0.0007187222945313549, "loss": 0.1654, "step": 61736 }, { "epoch": 0.10946657694868432, "grad_norm": 3.15625, "learning_rate": 0.0007186683816025237, "loss": 0.2183, "step": 61738 }, { "epoch": 0.10947012311399415, "grad_norm": 0.453125, "learning_rate": 0.0007186144703414146, "loss": 0.1411, "step": 61740 }, { "epoch": 0.10947366927930396, "grad_norm": 0.5078125, "learning_rate": 0.0007185605607482648, "loss": 0.1647, "step": 61742 }, { "epoch": 0.10947721544461378, "grad_norm": 0.8828125, "learning_rate": 0.000718506652823309, "loss": 0.165, "step": 61744 }, { "epoch": 0.1094807616099236, "grad_norm": 0.306640625, "learning_rate": 0.0007184527465667845, "loss": 0.1849, "step": 61746 }, { "epoch": 0.10948430777523341, "grad_norm": 0.28125, "learning_rate": 0.0007183988419789251, "loss": 0.1213, "step": 61748 }, { "epoch": 0.10948785394054322, "grad_norm": 0.376953125, "learning_rate": 0.0007183449390599679, "loss": 0.1449, "step": 61750 }, { "epoch": 0.10949140010585304, "grad_norm": 0.345703125, "learning_rate": 0.0007182910378101482, "loss": 0.1411, "step": 61752 }, { "epoch": 0.10949494627116285, "grad_norm": 0.333984375, "learning_rate": 0.0007182371382297019, "loss": 0.3674, "step": 61754 }, { "epoch": 0.10949849243647267, "grad_norm": 0.232421875, "learning_rate": 0.0007181832403188644, "loss": 0.1936, "step": 61756 }, { "epoch": 0.10950203860178248, "grad_norm": 0.1826171875, "learning_rate": 0.0007181293440778715, "loss": 0.1148, "step": 61758 }, { "epoch": 0.1095055847670923, "grad_norm": 0.6796875, "learning_rate": 0.0007180754495069594, "loss": 0.1738, "step": 61760 }, { "epoch": 0.10950913093240211, "grad_norm": 0.2275390625, "learning_rate": 0.000718021556606363, "loss": 0.1332, "step": 61762 }, { "epoch": 0.10951267709771192, "grad_norm": 0.48046875, "learning_rate": 0.0007179676653763192, "loss": 0.1879, "step": 61764 }, { "epoch": 0.10951622326302174, "grad_norm": 0.59375, "learning_rate": 0.0007179137758170622, "loss": 0.1654, "step": 61766 }, { "epoch": 0.10951976942833155, "grad_norm": 0.84375, "learning_rate": 0.0007178598879288285, "loss": 0.2144, "step": 61768 }, { "epoch": 0.10952331559364137, "grad_norm": 0.439453125, "learning_rate": 0.0007178060017118539, "loss": 0.2149, "step": 61770 }, { "epoch": 0.10952686175895118, "grad_norm": 0.255859375, "learning_rate": 0.0007177521171663739, "loss": 0.1491, "step": 61772 }, { "epoch": 0.109530407924261, "grad_norm": 0.91015625, "learning_rate": 0.0007176982342926239, "loss": 0.225, "step": 61774 }, { "epoch": 0.10953395408957081, "grad_norm": 0.76953125, "learning_rate": 0.0007176443530908394, "loss": 0.2755, "step": 61776 }, { "epoch": 0.10953750025488063, "grad_norm": 0.796875, "learning_rate": 0.0007175904735612571, "loss": 0.2616, "step": 61778 }, { "epoch": 0.10954104642019044, "grad_norm": 0.416015625, "learning_rate": 0.0007175365957041118, "loss": 0.2401, "step": 61780 }, { "epoch": 0.10954459258550026, "grad_norm": 0.39453125, "learning_rate": 0.0007174827195196389, "loss": 0.3938, "step": 61782 }, { "epoch": 0.10954813875081008, "grad_norm": 0.2578125, "learning_rate": 0.0007174288450080744, "loss": 0.1804, "step": 61784 }, { "epoch": 0.1095516849161199, "grad_norm": 0.31640625, "learning_rate": 0.0007173749721696544, "loss": 0.1729, "step": 61786 }, { "epoch": 0.10955523108142971, "grad_norm": 0.66796875, "learning_rate": 0.0007173211010046137, "loss": 0.1937, "step": 61788 }, { "epoch": 0.10955877724673953, "grad_norm": 1.2890625, "learning_rate": 0.0007172672315131883, "loss": 0.2232, "step": 61790 }, { "epoch": 0.10956232341204934, "grad_norm": 0.5703125, "learning_rate": 0.0007172133636956137, "loss": 0.2125, "step": 61792 }, { "epoch": 0.10956586957735916, "grad_norm": 0.3828125, "learning_rate": 0.0007171594975521251, "loss": 0.1858, "step": 61794 }, { "epoch": 0.10956941574266897, "grad_norm": 0.53125, "learning_rate": 0.0007171056330829595, "loss": 0.1821, "step": 61796 }, { "epoch": 0.10957296190797879, "grad_norm": 0.189453125, "learning_rate": 0.0007170517702883504, "loss": 0.1808, "step": 61798 }, { "epoch": 0.1095765080732886, "grad_norm": 1.484375, "learning_rate": 0.0007169979091685348, "loss": 0.3679, "step": 61800 }, { "epoch": 0.10958005423859841, "grad_norm": 0.7578125, "learning_rate": 0.0007169440497237476, "loss": 0.1953, "step": 61802 }, { "epoch": 0.10958360040390823, "grad_norm": 0.78515625, "learning_rate": 0.0007168901919542255, "loss": 0.2002, "step": 61804 }, { "epoch": 0.10958714656921804, "grad_norm": 0.953125, "learning_rate": 0.0007168363358602022, "loss": 0.3212, "step": 61806 }, { "epoch": 0.10959069273452786, "grad_norm": 0.373046875, "learning_rate": 0.0007167824814419146, "loss": 0.353, "step": 61808 }, { "epoch": 0.10959423889983767, "grad_norm": 0.234375, "learning_rate": 0.0007167286286995979, "loss": 0.2045, "step": 61810 }, { "epoch": 0.10959778506514749, "grad_norm": 0.171875, "learning_rate": 0.0007166747776334874, "loss": 0.1605, "step": 61812 }, { "epoch": 0.1096013312304573, "grad_norm": 0.1865234375, "learning_rate": 0.000716620928243819, "loss": 0.2004, "step": 61814 }, { "epoch": 0.10960487739576712, "grad_norm": 0.40625, "learning_rate": 0.0007165670805308273, "loss": 0.1641, "step": 61816 }, { "epoch": 0.10960842356107693, "grad_norm": 0.189453125, "learning_rate": 0.0007165132344947491, "loss": 0.1173, "step": 61818 }, { "epoch": 0.10961196972638675, "grad_norm": 0.59765625, "learning_rate": 0.0007164593901358192, "loss": 0.2698, "step": 61820 }, { "epoch": 0.10961551589169656, "grad_norm": 1.171875, "learning_rate": 0.000716405547454273, "loss": 0.2138, "step": 61822 }, { "epoch": 0.10961906205700638, "grad_norm": 0.314453125, "learning_rate": 0.000716351706450346, "loss": 0.1847, "step": 61824 }, { "epoch": 0.10962260822231619, "grad_norm": 5.875, "learning_rate": 0.0007162978671242739, "loss": 0.282, "step": 61826 }, { "epoch": 0.109626154387626, "grad_norm": 0.19140625, "learning_rate": 0.0007162440294762922, "loss": 0.1571, "step": 61828 }, { "epoch": 0.10962970055293583, "grad_norm": 0.263671875, "learning_rate": 0.0007161901935066362, "loss": 0.2479, "step": 61830 }, { "epoch": 0.10963324671824565, "grad_norm": 0.875, "learning_rate": 0.0007161363592155412, "loss": 0.1483, "step": 61832 }, { "epoch": 0.10963679288355546, "grad_norm": 0.337890625, "learning_rate": 0.0007160825266032425, "loss": 0.1706, "step": 61834 }, { "epoch": 0.10964033904886528, "grad_norm": 0.34765625, "learning_rate": 0.0007160286956699767, "loss": 0.161, "step": 61836 }, { "epoch": 0.10964388521417509, "grad_norm": 0.4140625, "learning_rate": 0.0007159748664159773, "loss": 0.137, "step": 61838 }, { "epoch": 0.1096474313794849, "grad_norm": 0.41015625, "learning_rate": 0.0007159210388414814, "loss": 0.228, "step": 61840 }, { "epoch": 0.10965097754479472, "grad_norm": 1.265625, "learning_rate": 0.0007158672129467239, "loss": 0.1652, "step": 61842 }, { "epoch": 0.10965452371010453, "grad_norm": 0.431640625, "learning_rate": 0.0007158133887319394, "loss": 0.2086, "step": 61844 }, { "epoch": 0.10965806987541435, "grad_norm": 0.189453125, "learning_rate": 0.0007157595661973648, "loss": 0.1365, "step": 61846 }, { "epoch": 0.10966161604072416, "grad_norm": 0.55859375, "learning_rate": 0.000715705745343234, "loss": 0.2661, "step": 61848 }, { "epoch": 0.10966516220603398, "grad_norm": 0.2265625, "learning_rate": 0.0007156519261697833, "loss": 0.2503, "step": 61850 }, { "epoch": 0.10966870837134379, "grad_norm": 0.9453125, "learning_rate": 0.0007155981086772477, "loss": 0.2316, "step": 61852 }, { "epoch": 0.10967225453665361, "grad_norm": 0.3671875, "learning_rate": 0.0007155442928658631, "loss": 0.1277, "step": 61854 }, { "epoch": 0.10967580070196342, "grad_norm": 0.9375, "learning_rate": 0.0007154904787358638, "loss": 0.1898, "step": 61856 }, { "epoch": 0.10967934686727324, "grad_norm": 0.498046875, "learning_rate": 0.0007154366662874863, "loss": 0.1547, "step": 61858 }, { "epoch": 0.10968289303258305, "grad_norm": 0.416015625, "learning_rate": 0.0007153828555209654, "loss": 0.1344, "step": 61860 }, { "epoch": 0.10968643919789287, "grad_norm": 0.365234375, "learning_rate": 0.0007153290464365363, "loss": 0.1873, "step": 61862 }, { "epoch": 0.10968998536320268, "grad_norm": 0.2138671875, "learning_rate": 0.0007152752390344346, "loss": 0.191, "step": 61864 }, { "epoch": 0.1096935315285125, "grad_norm": 0.37109375, "learning_rate": 0.0007152214333148954, "loss": 0.1985, "step": 61866 }, { "epoch": 0.10969707769382231, "grad_norm": 0.333984375, "learning_rate": 0.0007151676292781542, "loss": 0.1797, "step": 61868 }, { "epoch": 0.10970062385913212, "grad_norm": 0.90234375, "learning_rate": 0.0007151138269244464, "loss": 0.2072, "step": 61870 }, { "epoch": 0.10970417002444194, "grad_norm": 0.3203125, "learning_rate": 0.0007150600262540072, "loss": 0.2101, "step": 61872 }, { "epoch": 0.10970771618975175, "grad_norm": 0.201171875, "learning_rate": 0.0007150062272670712, "loss": 0.1581, "step": 61874 }, { "epoch": 0.10971126235506158, "grad_norm": 0.2353515625, "learning_rate": 0.000714952429963875, "loss": 0.1885, "step": 61876 }, { "epoch": 0.1097148085203714, "grad_norm": 0.59375, "learning_rate": 0.000714898634344653, "loss": 0.173, "step": 61878 }, { "epoch": 0.10971835468568121, "grad_norm": 0.734375, "learning_rate": 0.0007148448404096408, "loss": 0.1899, "step": 61880 }, { "epoch": 0.10972190085099102, "grad_norm": 0.2734375, "learning_rate": 0.0007147910481590736, "loss": 0.2003, "step": 61882 }, { "epoch": 0.10972544701630084, "grad_norm": 0.2041015625, "learning_rate": 0.000714737257593186, "loss": 0.1871, "step": 61884 }, { "epoch": 0.10972899318161065, "grad_norm": 0.4140625, "learning_rate": 0.000714683468712215, "loss": 0.1709, "step": 61886 }, { "epoch": 0.10973253934692047, "grad_norm": 0.4296875, "learning_rate": 0.0007146296815163937, "loss": 0.1481, "step": 61888 }, { "epoch": 0.10973608551223028, "grad_norm": 0.8671875, "learning_rate": 0.0007145758960059587, "loss": 0.1747, "step": 61890 }, { "epoch": 0.1097396316775401, "grad_norm": 0.326171875, "learning_rate": 0.0007145221121811445, "loss": 0.2098, "step": 61892 }, { "epoch": 0.10974317784284991, "grad_norm": 0.578125, "learning_rate": 0.0007144683300421873, "loss": 0.1538, "step": 61894 }, { "epoch": 0.10974672400815973, "grad_norm": 0.40234375, "learning_rate": 0.000714414549589321, "loss": 0.1947, "step": 61896 }, { "epoch": 0.10975027017346954, "grad_norm": 0.78125, "learning_rate": 0.0007143607708227817, "loss": 0.1941, "step": 61898 }, { "epoch": 0.10975381633877936, "grad_norm": 0.27734375, "learning_rate": 0.0007143069937428046, "loss": 0.1717, "step": 61900 }, { "epoch": 0.10975736250408917, "grad_norm": 0.52734375, "learning_rate": 0.0007142532183496246, "loss": 0.1517, "step": 61902 }, { "epoch": 0.10976090866939898, "grad_norm": 0.24609375, "learning_rate": 0.0007141994446434772, "loss": 0.1501, "step": 61904 }, { "epoch": 0.1097644548347088, "grad_norm": 0.2080078125, "learning_rate": 0.0007141456726245968, "loss": 0.1844, "step": 61906 }, { "epoch": 0.10976800100001861, "grad_norm": 0.68359375, "learning_rate": 0.0007140919022932193, "loss": 0.1921, "step": 61908 }, { "epoch": 0.10977154716532843, "grad_norm": 0.3203125, "learning_rate": 0.0007140381336495799, "loss": 0.1821, "step": 61910 }, { "epoch": 0.10977509333063824, "grad_norm": 0.30078125, "learning_rate": 0.0007139843666939132, "loss": 0.1459, "step": 61912 }, { "epoch": 0.10977863949594806, "grad_norm": 0.51171875, "learning_rate": 0.0007139306014264546, "loss": 0.2022, "step": 61914 }, { "epoch": 0.10978218566125787, "grad_norm": 0.4921875, "learning_rate": 0.0007138768378474394, "loss": 0.2106, "step": 61916 }, { "epoch": 0.10978573182656769, "grad_norm": 0.41796875, "learning_rate": 0.0007138230759571027, "loss": 0.2238, "step": 61918 }, { "epoch": 0.10978927799187752, "grad_norm": 0.7109375, "learning_rate": 0.0007137693157556797, "loss": 0.2717, "step": 61920 }, { "epoch": 0.10979282415718733, "grad_norm": 0.71875, "learning_rate": 0.0007137155572434054, "loss": 0.23, "step": 61922 }, { "epoch": 0.10979637032249714, "grad_norm": 1.2578125, "learning_rate": 0.0007136618004205142, "loss": 0.165, "step": 61924 }, { "epoch": 0.10979991648780696, "grad_norm": 0.322265625, "learning_rate": 0.0007136080452872424, "loss": 0.1801, "step": 61926 }, { "epoch": 0.10980346265311677, "grad_norm": 0.49609375, "learning_rate": 0.0007135542918438247, "loss": 0.1749, "step": 61928 }, { "epoch": 0.10980700881842659, "grad_norm": 0.6015625, "learning_rate": 0.0007135005400904959, "loss": 0.1312, "step": 61930 }, { "epoch": 0.1098105549837364, "grad_norm": 0.224609375, "learning_rate": 0.0007134467900274908, "loss": 0.1684, "step": 61932 }, { "epoch": 0.10981410114904622, "grad_norm": 0.51171875, "learning_rate": 0.0007133930416550454, "loss": 0.2062, "step": 61934 }, { "epoch": 0.10981764731435603, "grad_norm": 0.765625, "learning_rate": 0.0007133392949733944, "loss": 0.2089, "step": 61936 }, { "epoch": 0.10982119347966585, "grad_norm": 0.375, "learning_rate": 0.0007132855499827723, "loss": 0.166, "step": 61938 }, { "epoch": 0.10982473964497566, "grad_norm": 0.6640625, "learning_rate": 0.0007132318066834149, "loss": 0.1728, "step": 61940 }, { "epoch": 0.10982828581028548, "grad_norm": 0.31640625, "learning_rate": 0.0007131780650755566, "loss": 0.1993, "step": 61942 }, { "epoch": 0.10983183197559529, "grad_norm": 0.5703125, "learning_rate": 0.0007131243251594332, "loss": 0.3349, "step": 61944 }, { "epoch": 0.1098353781409051, "grad_norm": 0.2216796875, "learning_rate": 0.0007130705869352787, "loss": 0.1462, "step": 61946 }, { "epoch": 0.10983892430621492, "grad_norm": 0.447265625, "learning_rate": 0.0007130168504033292, "loss": 0.1826, "step": 61948 }, { "epoch": 0.10984247047152473, "grad_norm": 0.66796875, "learning_rate": 0.0007129631155638188, "loss": 0.1493, "step": 61950 }, { "epoch": 0.10984601663683455, "grad_norm": 0.61328125, "learning_rate": 0.0007129093824169835, "loss": 0.1553, "step": 61952 }, { "epoch": 0.10984956280214436, "grad_norm": 0.275390625, "learning_rate": 0.0007128556509630571, "loss": 0.1618, "step": 61954 }, { "epoch": 0.10985310896745418, "grad_norm": 0.53125, "learning_rate": 0.0007128019212022754, "loss": 0.1438, "step": 61956 }, { "epoch": 0.10985665513276399, "grad_norm": 0.328125, "learning_rate": 0.0007127481931348731, "loss": 0.1802, "step": 61958 }, { "epoch": 0.1098602012980738, "grad_norm": 1.265625, "learning_rate": 0.0007126944667610858, "loss": 0.1555, "step": 61960 }, { "epoch": 0.10986374746338362, "grad_norm": 1.015625, "learning_rate": 0.0007126407420811474, "loss": 0.1825, "step": 61962 }, { "epoch": 0.10986729362869344, "grad_norm": 0.5078125, "learning_rate": 0.0007125870190952933, "loss": 0.2056, "step": 61964 }, { "epoch": 0.10987083979400326, "grad_norm": 0.21875, "learning_rate": 0.0007125332978037587, "loss": 0.2423, "step": 61966 }, { "epoch": 0.10987438595931308, "grad_norm": 0.7265625, "learning_rate": 0.0007124795782067786, "loss": 0.2025, "step": 61968 }, { "epoch": 0.10987793212462289, "grad_norm": 0.84765625, "learning_rate": 0.0007124258603045875, "loss": 0.1496, "step": 61970 }, { "epoch": 0.10988147828993271, "grad_norm": 0.828125, "learning_rate": 0.0007123721440974205, "loss": 0.1754, "step": 61972 }, { "epoch": 0.10988502445524252, "grad_norm": 0.796875, "learning_rate": 0.0007123184295855125, "loss": 0.1606, "step": 61974 }, { "epoch": 0.10988857062055234, "grad_norm": 1.3125, "learning_rate": 0.000712264716769099, "loss": 0.2801, "step": 61976 }, { "epoch": 0.10989211678586215, "grad_norm": 0.59765625, "learning_rate": 0.0007122110056484142, "loss": 0.2153, "step": 61978 }, { "epoch": 0.10989566295117197, "grad_norm": 0.2294921875, "learning_rate": 0.0007121572962236931, "loss": 0.1651, "step": 61980 }, { "epoch": 0.10989920911648178, "grad_norm": 0.267578125, "learning_rate": 0.0007121035884951703, "loss": 0.2115, "step": 61982 }, { "epoch": 0.1099027552817916, "grad_norm": 0.361328125, "learning_rate": 0.0007120498824630821, "loss": 0.1708, "step": 61984 }, { "epoch": 0.10990630144710141, "grad_norm": 0.65234375, "learning_rate": 0.0007119961781276614, "loss": 0.1666, "step": 61986 }, { "epoch": 0.10990984761241122, "grad_norm": 0.55859375, "learning_rate": 0.0007119424754891444, "loss": 0.1731, "step": 61988 }, { "epoch": 0.10991339377772104, "grad_norm": 0.28515625, "learning_rate": 0.0007118887745477654, "loss": 0.1726, "step": 61990 }, { "epoch": 0.10991693994303085, "grad_norm": 1.2109375, "learning_rate": 0.0007118350753037602, "loss": 0.3025, "step": 61992 }, { "epoch": 0.10992048610834067, "grad_norm": 0.2490234375, "learning_rate": 0.0007117813777573618, "loss": 0.2188, "step": 61994 }, { "epoch": 0.10992403227365048, "grad_norm": 0.55859375, "learning_rate": 0.0007117276819088068, "loss": 0.1749, "step": 61996 }, { "epoch": 0.1099275784389603, "grad_norm": 0.82421875, "learning_rate": 0.0007116739877583294, "loss": 0.1726, "step": 61998 }, { "epoch": 0.10993112460427011, "grad_norm": 0.318359375, "learning_rate": 0.0007116202953061643, "loss": 0.2287, "step": 62000 }, { "epoch": 0.10993467076957993, "grad_norm": 0.796875, "learning_rate": 0.0007115666045525467, "loss": 0.1813, "step": 62002 }, { "epoch": 0.10993821693488974, "grad_norm": 1.0390625, "learning_rate": 0.0007115129154977107, "loss": 0.2925, "step": 62004 }, { "epoch": 0.10994176310019955, "grad_norm": 0.197265625, "learning_rate": 0.0007114592281418917, "loss": 0.1167, "step": 62006 }, { "epoch": 0.10994530926550937, "grad_norm": 0.69921875, "learning_rate": 0.000711405542485324, "loss": 0.3405, "step": 62008 }, { "epoch": 0.10994885543081918, "grad_norm": 0.396484375, "learning_rate": 0.0007113518585282437, "loss": 0.1373, "step": 62010 }, { "epoch": 0.10995240159612901, "grad_norm": 0.26171875, "learning_rate": 0.0007112981762708837, "loss": 0.1841, "step": 62012 }, { "epoch": 0.10995594776143883, "grad_norm": 0.216796875, "learning_rate": 0.00071124449571348, "loss": 0.162, "step": 62014 }, { "epoch": 0.10995949392674864, "grad_norm": 0.326171875, "learning_rate": 0.0007111908168562671, "loss": 0.2037, "step": 62016 }, { "epoch": 0.10996304009205846, "grad_norm": 0.220703125, "learning_rate": 0.0007111371396994797, "loss": 0.1676, "step": 62018 }, { "epoch": 0.10996658625736827, "grad_norm": 0.240234375, "learning_rate": 0.0007110834642433528, "loss": 0.1744, "step": 62020 }, { "epoch": 0.10997013242267809, "grad_norm": 0.275390625, "learning_rate": 0.0007110297904881204, "loss": 0.2078, "step": 62022 }, { "epoch": 0.1099736785879879, "grad_norm": 0.359375, "learning_rate": 0.000710976118434018, "loss": 0.1703, "step": 62024 }, { "epoch": 0.10997722475329771, "grad_norm": 0.703125, "learning_rate": 0.0007109224480812801, "loss": 0.2025, "step": 62026 }, { "epoch": 0.10998077091860753, "grad_norm": 0.443359375, "learning_rate": 0.0007108687794301417, "loss": 0.1285, "step": 62028 }, { "epoch": 0.10998431708391734, "grad_norm": 0.60546875, "learning_rate": 0.0007108151124808371, "loss": 0.2528, "step": 62030 }, { "epoch": 0.10998786324922716, "grad_norm": 0.5546875, "learning_rate": 0.0007107614472336006, "loss": 0.1583, "step": 62032 }, { "epoch": 0.10999140941453697, "grad_norm": 0.4375, "learning_rate": 0.0007107077836886682, "loss": 0.1827, "step": 62034 }, { "epoch": 0.10999495557984679, "grad_norm": 2.359375, "learning_rate": 0.0007106541218462732, "loss": 0.1757, "step": 62036 }, { "epoch": 0.1099985017451566, "grad_norm": 0.40234375, "learning_rate": 0.0007106004617066512, "loss": 0.1632, "step": 62038 }, { "epoch": 0.11000204791046642, "grad_norm": 0.400390625, "learning_rate": 0.0007105468032700364, "loss": 0.2459, "step": 62040 }, { "epoch": 0.11000559407577623, "grad_norm": 0.484375, "learning_rate": 0.0007104931465366642, "loss": 0.1406, "step": 62042 }, { "epoch": 0.11000914024108605, "grad_norm": 0.5078125, "learning_rate": 0.000710439491506768, "loss": 0.185, "step": 62044 }, { "epoch": 0.11001268640639586, "grad_norm": 1.0859375, "learning_rate": 0.0007103858381805837, "loss": 0.3695, "step": 62046 }, { "epoch": 0.11001623257170567, "grad_norm": 1.265625, "learning_rate": 0.0007103321865583452, "loss": 0.1661, "step": 62048 }, { "epoch": 0.11001977873701549, "grad_norm": 0.390625, "learning_rate": 0.0007102785366402875, "loss": 0.2241, "step": 62050 }, { "epoch": 0.1100233249023253, "grad_norm": 0.244140625, "learning_rate": 0.0007102248884266452, "loss": 0.1529, "step": 62052 }, { "epoch": 0.11002687106763512, "grad_norm": 0.2333984375, "learning_rate": 0.0007101712419176523, "loss": 0.2005, "step": 62054 }, { "epoch": 0.11003041723294495, "grad_norm": 0.474609375, "learning_rate": 0.0007101175971135445, "loss": 0.1815, "step": 62056 }, { "epoch": 0.11003396339825476, "grad_norm": 0.22265625, "learning_rate": 0.0007100639540145556, "loss": 0.1827, "step": 62058 }, { "epoch": 0.11003750956356458, "grad_norm": 0.490234375, "learning_rate": 0.0007100103126209208, "loss": 0.2088, "step": 62060 }, { "epoch": 0.11004105572887439, "grad_norm": 0.296875, "learning_rate": 0.0007099566729328735, "loss": 0.1761, "step": 62062 }, { "epoch": 0.1100446018941842, "grad_norm": 0.2158203125, "learning_rate": 0.0007099030349506499, "loss": 0.2496, "step": 62064 }, { "epoch": 0.11004814805949402, "grad_norm": 0.3203125, "learning_rate": 0.0007098493986744836, "loss": 0.2186, "step": 62066 }, { "epoch": 0.11005169422480383, "grad_norm": 0.337890625, "learning_rate": 0.0007097957641046098, "loss": 0.1655, "step": 62068 }, { "epoch": 0.11005524039011365, "grad_norm": 0.2265625, "learning_rate": 0.0007097421312412622, "loss": 0.1645, "step": 62070 }, { "epoch": 0.11005878655542346, "grad_norm": 1.171875, "learning_rate": 0.0007096885000846758, "loss": 0.444, "step": 62072 }, { "epoch": 0.11006233272073328, "grad_norm": 1.3359375, "learning_rate": 0.0007096348706350857, "loss": 0.2271, "step": 62074 }, { "epoch": 0.11006587888604309, "grad_norm": 0.34765625, "learning_rate": 0.0007095812428927253, "loss": 0.1597, "step": 62076 }, { "epoch": 0.1100694250513529, "grad_norm": 0.431640625, "learning_rate": 0.0007095276168578302, "loss": 0.1606, "step": 62078 }, { "epoch": 0.11007297121666272, "grad_norm": 0.88671875, "learning_rate": 0.0007094739925306339, "loss": 0.2174, "step": 62080 }, { "epoch": 0.11007651738197254, "grad_norm": 1.03125, "learning_rate": 0.0007094203699113724, "loss": 0.1387, "step": 62082 }, { "epoch": 0.11008006354728235, "grad_norm": 0.25, "learning_rate": 0.0007093667490002787, "loss": 0.1806, "step": 62084 }, { "epoch": 0.11008360971259216, "grad_norm": 0.287109375, "learning_rate": 0.0007093131297975881, "loss": 0.1158, "step": 62086 }, { "epoch": 0.11008715587790198, "grad_norm": 0.48046875, "learning_rate": 0.000709259512303535, "loss": 0.1531, "step": 62088 }, { "epoch": 0.1100907020432118, "grad_norm": 0.31640625, "learning_rate": 0.0007092058965183539, "loss": 0.1602, "step": 62090 }, { "epoch": 0.11009424820852161, "grad_norm": 4.5, "learning_rate": 0.0007091522824422791, "loss": 0.2477, "step": 62092 }, { "epoch": 0.11009779437383142, "grad_norm": 0.2060546875, "learning_rate": 0.0007090986700755449, "loss": 0.3838, "step": 62094 }, { "epoch": 0.11010134053914124, "grad_norm": 2.390625, "learning_rate": 0.0007090450594183862, "loss": 0.2373, "step": 62096 }, { "epoch": 0.11010488670445105, "grad_norm": 0.3125, "learning_rate": 0.0007089914504710371, "loss": 0.1525, "step": 62098 }, { "epoch": 0.11010843286976087, "grad_norm": 0.484375, "learning_rate": 0.000708937843233733, "loss": 0.2123, "step": 62100 }, { "epoch": 0.1101119790350707, "grad_norm": 1.6484375, "learning_rate": 0.0007088842377067071, "loss": 0.2749, "step": 62102 }, { "epoch": 0.11011552520038051, "grad_norm": 1.234375, "learning_rate": 0.0007088306338901944, "loss": 0.2197, "step": 62104 }, { "epoch": 0.11011907136569032, "grad_norm": 0.828125, "learning_rate": 0.0007087770317844295, "loss": 0.157, "step": 62106 }, { "epoch": 0.11012261753100014, "grad_norm": 0.259765625, "learning_rate": 0.0007087234313896464, "loss": 0.1986, "step": 62108 }, { "epoch": 0.11012616369630995, "grad_norm": 0.330078125, "learning_rate": 0.0007086698327060801, "loss": 0.1575, "step": 62110 }, { "epoch": 0.11012970986161977, "grad_norm": 0.259765625, "learning_rate": 0.0007086162357339638, "loss": 0.1497, "step": 62112 }, { "epoch": 0.11013325602692958, "grad_norm": 0.81640625, "learning_rate": 0.0007085626404735335, "loss": 0.1992, "step": 62114 }, { "epoch": 0.1101368021922394, "grad_norm": 0.53125, "learning_rate": 0.0007085090469250226, "loss": 0.1629, "step": 62116 }, { "epoch": 0.11014034835754921, "grad_norm": 0.87109375, "learning_rate": 0.000708455455088666, "loss": 0.2097, "step": 62118 }, { "epoch": 0.11014389452285903, "grad_norm": 0.333984375, "learning_rate": 0.0007084018649646971, "loss": 0.2871, "step": 62120 }, { "epoch": 0.11014744068816884, "grad_norm": 0.2421875, "learning_rate": 0.0007083482765533515, "loss": 0.1746, "step": 62122 }, { "epoch": 0.11015098685347866, "grad_norm": 0.81640625, "learning_rate": 0.0007082946898548631, "loss": 0.2453, "step": 62124 }, { "epoch": 0.11015453301878847, "grad_norm": 0.32421875, "learning_rate": 0.000708241104869466, "loss": 0.2061, "step": 62126 }, { "epoch": 0.11015807918409828, "grad_norm": 0.447265625, "learning_rate": 0.0007081875215973949, "loss": 0.1821, "step": 62128 }, { "epoch": 0.1101616253494081, "grad_norm": 0.8046875, "learning_rate": 0.0007081339400388835, "loss": 0.1522, "step": 62130 }, { "epoch": 0.11016517151471791, "grad_norm": 1.46875, "learning_rate": 0.0007080803601941674, "loss": 0.2401, "step": 62132 }, { "epoch": 0.11016871768002773, "grad_norm": 0.2578125, "learning_rate": 0.0007080267820634796, "loss": 0.2207, "step": 62134 }, { "epoch": 0.11017226384533754, "grad_norm": 3.296875, "learning_rate": 0.0007079732056470551, "loss": 0.2537, "step": 62136 }, { "epoch": 0.11017581001064736, "grad_norm": 0.36328125, "learning_rate": 0.000707919630945128, "loss": 0.1869, "step": 62138 }, { "epoch": 0.11017935617595717, "grad_norm": 0.80078125, "learning_rate": 0.0007078660579579332, "loss": 0.1759, "step": 62140 }, { "epoch": 0.11018290234126699, "grad_norm": 0.91015625, "learning_rate": 0.0007078124866857038, "loss": 0.2216, "step": 62142 }, { "epoch": 0.1101864485065768, "grad_norm": 1.671875, "learning_rate": 0.0007077589171286752, "loss": 0.2492, "step": 62144 }, { "epoch": 0.11018999467188662, "grad_norm": 0.453125, "learning_rate": 0.0007077053492870811, "loss": 0.1411, "step": 62146 }, { "epoch": 0.11019354083719644, "grad_norm": 0.443359375, "learning_rate": 0.0007076517831611559, "loss": 0.1674, "step": 62148 }, { "epoch": 0.11019708700250626, "grad_norm": 0.515625, "learning_rate": 0.0007075982187511341, "loss": 0.2374, "step": 62150 }, { "epoch": 0.11020063316781607, "grad_norm": 0.1923828125, "learning_rate": 0.0007075446560572494, "loss": 0.2125, "step": 62152 }, { "epoch": 0.11020417933312589, "grad_norm": 0.375, "learning_rate": 0.0007074910950797366, "loss": 0.2402, "step": 62154 }, { "epoch": 0.1102077254984357, "grad_norm": 2.125, "learning_rate": 0.0007074375358188298, "loss": 0.3191, "step": 62156 }, { "epoch": 0.11021127166374552, "grad_norm": 0.427734375, "learning_rate": 0.0007073839782747633, "loss": 0.1775, "step": 62158 }, { "epoch": 0.11021481782905533, "grad_norm": 0.349609375, "learning_rate": 0.0007073304224477708, "loss": 0.2481, "step": 62160 }, { "epoch": 0.11021836399436515, "grad_norm": 0.953125, "learning_rate": 0.0007072768683380873, "loss": 0.1958, "step": 62162 }, { "epoch": 0.11022191015967496, "grad_norm": 0.65625, "learning_rate": 0.0007072233159459467, "loss": 0.1658, "step": 62164 }, { "epoch": 0.11022545632498477, "grad_norm": 0.16796875, "learning_rate": 0.0007071697652715832, "loss": 0.1834, "step": 62166 }, { "epoch": 0.11022900249029459, "grad_norm": 0.7890625, "learning_rate": 0.0007071162163152307, "loss": 0.2156, "step": 62168 }, { "epoch": 0.1102325486556044, "grad_norm": 0.77734375, "learning_rate": 0.0007070626690771236, "loss": 0.2163, "step": 62170 }, { "epoch": 0.11023609482091422, "grad_norm": 0.5, "learning_rate": 0.0007070091235574967, "loss": 0.178, "step": 62172 }, { "epoch": 0.11023964098622403, "grad_norm": 0.55078125, "learning_rate": 0.0007069555797565828, "loss": 0.1827, "step": 62174 }, { "epoch": 0.11024318715153385, "grad_norm": 0.40625, "learning_rate": 0.0007069020376746174, "loss": 0.2055, "step": 62176 }, { "epoch": 0.11024673331684366, "grad_norm": 0.5234375, "learning_rate": 0.0007068484973118339, "loss": 0.1893, "step": 62178 }, { "epoch": 0.11025027948215348, "grad_norm": 0.83984375, "learning_rate": 0.0007067949586684667, "loss": 0.1474, "step": 62180 }, { "epoch": 0.11025382564746329, "grad_norm": 0.306640625, "learning_rate": 0.0007067414217447503, "loss": 0.1423, "step": 62182 }, { "epoch": 0.1102573718127731, "grad_norm": 0.59765625, "learning_rate": 0.0007066878865409183, "loss": 0.188, "step": 62184 }, { "epoch": 0.11026091797808292, "grad_norm": 0.462890625, "learning_rate": 0.000706634353057205, "loss": 0.2891, "step": 62186 }, { "epoch": 0.11026446414339273, "grad_norm": 0.8203125, "learning_rate": 0.0007065808212938445, "loss": 0.1717, "step": 62188 }, { "epoch": 0.11026801030870255, "grad_norm": 0.28125, "learning_rate": 0.0007065272912510714, "loss": 0.1751, "step": 62190 }, { "epoch": 0.11027155647401238, "grad_norm": 0.6015625, "learning_rate": 0.0007064737629291186, "loss": 0.18, "step": 62192 }, { "epoch": 0.11027510263932219, "grad_norm": 0.451171875, "learning_rate": 0.0007064202363282215, "loss": 0.1944, "step": 62194 }, { "epoch": 0.110278648804632, "grad_norm": 0.392578125, "learning_rate": 0.0007063667114486136, "loss": 0.3246, "step": 62196 }, { "epoch": 0.11028219496994182, "grad_norm": 0.796875, "learning_rate": 0.0007063131882905292, "loss": 0.1918, "step": 62198 }, { "epoch": 0.11028574113525164, "grad_norm": 0.390625, "learning_rate": 0.0007062596668542021, "loss": 0.1685, "step": 62200 }, { "epoch": 0.11028928730056145, "grad_norm": 0.359375, "learning_rate": 0.000706206147139866, "loss": 0.165, "step": 62202 }, { "epoch": 0.11029283346587127, "grad_norm": 0.32421875, "learning_rate": 0.0007061526291477561, "loss": 0.2911, "step": 62204 }, { "epoch": 0.11029637963118108, "grad_norm": 0.55078125, "learning_rate": 0.0007060991128781056, "loss": 0.4218, "step": 62206 }, { "epoch": 0.1102999257964909, "grad_norm": 0.97265625, "learning_rate": 0.0007060455983311489, "loss": 0.4523, "step": 62208 }, { "epoch": 0.11030347196180071, "grad_norm": 0.171875, "learning_rate": 0.0007059920855071196, "loss": 0.1253, "step": 62210 }, { "epoch": 0.11030701812711052, "grad_norm": 0.197265625, "learning_rate": 0.0007059385744062523, "loss": 0.1704, "step": 62212 }, { "epoch": 0.11031056429242034, "grad_norm": 0.251953125, "learning_rate": 0.0007058850650287809, "loss": 0.2476, "step": 62214 }, { "epoch": 0.11031411045773015, "grad_norm": 0.546875, "learning_rate": 0.0007058315573749393, "loss": 0.1857, "step": 62216 }, { "epoch": 0.11031765662303997, "grad_norm": 0.2255859375, "learning_rate": 0.0007057780514449613, "loss": 0.1412, "step": 62218 }, { "epoch": 0.11032120278834978, "grad_norm": 0.90625, "learning_rate": 0.0007057245472390808, "loss": 0.2745, "step": 62220 }, { "epoch": 0.1103247489536596, "grad_norm": 0.3125, "learning_rate": 0.0007056710447575329, "loss": 0.1739, "step": 62222 }, { "epoch": 0.11032829511896941, "grad_norm": 0.20703125, "learning_rate": 0.0007056175440005501, "loss": 0.2196, "step": 62224 }, { "epoch": 0.11033184128427923, "grad_norm": 0.220703125, "learning_rate": 0.0007055640449683674, "loss": 0.1362, "step": 62226 }, { "epoch": 0.11033538744958904, "grad_norm": 0.2177734375, "learning_rate": 0.0007055105476612183, "loss": 0.1073, "step": 62228 }, { "epoch": 0.11033893361489885, "grad_norm": 0.171875, "learning_rate": 0.0007054570520793373, "loss": 0.1454, "step": 62230 }, { "epoch": 0.11034247978020867, "grad_norm": 0.271484375, "learning_rate": 0.0007054035582229574, "loss": 0.1583, "step": 62232 }, { "epoch": 0.11034602594551848, "grad_norm": 0.2451171875, "learning_rate": 0.0007053500660923136, "loss": 0.1435, "step": 62234 }, { "epoch": 0.1103495721108283, "grad_norm": 0.28515625, "learning_rate": 0.0007052965756876391, "loss": 0.1698, "step": 62236 }, { "epoch": 0.11035311827613813, "grad_norm": 0.478515625, "learning_rate": 0.0007052430870091684, "loss": 0.1943, "step": 62238 }, { "epoch": 0.11035666444144794, "grad_norm": 1.2109375, "learning_rate": 0.000705189600057135, "loss": 0.1665, "step": 62240 }, { "epoch": 0.11036021060675776, "grad_norm": 0.4375, "learning_rate": 0.0007051361148317725, "loss": 0.1792, "step": 62242 }, { "epoch": 0.11036375677206757, "grad_norm": 0.6796875, "learning_rate": 0.0007050826313333158, "loss": 0.1365, "step": 62244 }, { "epoch": 0.11036730293737738, "grad_norm": 2.1875, "learning_rate": 0.0007050291495619982, "loss": 0.3092, "step": 62246 }, { "epoch": 0.1103708491026872, "grad_norm": 0.232421875, "learning_rate": 0.0007049756695180536, "loss": 0.1348, "step": 62248 }, { "epoch": 0.11037439526799701, "grad_norm": 0.201171875, "learning_rate": 0.0007049221912017159, "loss": 0.1348, "step": 62250 }, { "epoch": 0.11037794143330683, "grad_norm": 0.408203125, "learning_rate": 0.0007048687146132188, "loss": 0.2047, "step": 62252 }, { "epoch": 0.11038148759861664, "grad_norm": 0.4765625, "learning_rate": 0.0007048152397527972, "loss": 0.2023, "step": 62254 }, { "epoch": 0.11038503376392646, "grad_norm": 0.25390625, "learning_rate": 0.0007047617666206836, "loss": 0.1634, "step": 62256 }, { "epoch": 0.11038857992923627, "grad_norm": 0.90625, "learning_rate": 0.0007047082952171127, "loss": 0.2883, "step": 62258 }, { "epoch": 0.11039212609454609, "grad_norm": 0.341796875, "learning_rate": 0.0007046548255423178, "loss": 0.1729, "step": 62260 }, { "epoch": 0.1103956722598559, "grad_norm": 0.484375, "learning_rate": 0.0007046013575965333, "loss": 0.1813, "step": 62262 }, { "epoch": 0.11039921842516572, "grad_norm": 0.27734375, "learning_rate": 0.0007045478913799927, "loss": 0.1793, "step": 62264 }, { "epoch": 0.11040276459047553, "grad_norm": 0.3125, "learning_rate": 0.0007044944268929301, "loss": 0.1771, "step": 62266 }, { "epoch": 0.11040631075578534, "grad_norm": 0.546875, "learning_rate": 0.0007044409641355784, "loss": 0.1708, "step": 62268 }, { "epoch": 0.11040985692109516, "grad_norm": 0.7109375, "learning_rate": 0.000704387503108173, "loss": 0.2844, "step": 62270 }, { "epoch": 0.11041340308640497, "grad_norm": 0.36328125, "learning_rate": 0.0007043340438109467, "loss": 0.1563, "step": 62272 }, { "epoch": 0.11041694925171479, "grad_norm": 0.34375, "learning_rate": 0.0007042805862441333, "loss": 0.157, "step": 62274 }, { "epoch": 0.1104204954170246, "grad_norm": 0.671875, "learning_rate": 0.0007042271304079671, "loss": 0.1994, "step": 62276 }, { "epoch": 0.11042404158233442, "grad_norm": 2.421875, "learning_rate": 0.000704173676302681, "loss": 0.204, "step": 62278 }, { "epoch": 0.11042758774764423, "grad_norm": 1.1640625, "learning_rate": 0.0007041202239285099, "loss": 0.1986, "step": 62280 }, { "epoch": 0.11043113391295405, "grad_norm": 0.63671875, "learning_rate": 0.0007040667732856865, "loss": 0.178, "step": 62282 }, { "epoch": 0.11043468007826387, "grad_norm": 1.0703125, "learning_rate": 0.0007040133243744451, "loss": 0.2342, "step": 62284 }, { "epoch": 0.11043822624357369, "grad_norm": 0.30078125, "learning_rate": 0.0007039598771950193, "loss": 0.1828, "step": 62286 }, { "epoch": 0.1104417724088835, "grad_norm": 0.302734375, "learning_rate": 0.0007039064317476436, "loss": 0.1766, "step": 62288 }, { "epoch": 0.11044531857419332, "grad_norm": 0.40234375, "learning_rate": 0.0007038529880325503, "loss": 0.2497, "step": 62290 }, { "epoch": 0.11044886473950313, "grad_norm": 0.6484375, "learning_rate": 0.0007037995460499742, "loss": 0.295, "step": 62292 }, { "epoch": 0.11045241090481295, "grad_norm": 3.84375, "learning_rate": 0.0007037461058001491, "loss": 0.1428, "step": 62294 }, { "epoch": 0.11045595707012276, "grad_norm": 0.71875, "learning_rate": 0.0007036926672833082, "loss": 0.1751, "step": 62296 }, { "epoch": 0.11045950323543258, "grad_norm": 0.275390625, "learning_rate": 0.0007036392304996852, "loss": 0.1567, "step": 62298 }, { "epoch": 0.11046304940074239, "grad_norm": 0.3359375, "learning_rate": 0.0007035857954495137, "loss": 0.1989, "step": 62300 }, { "epoch": 0.1104665955660522, "grad_norm": 0.765625, "learning_rate": 0.0007035323621330281, "loss": 0.1756, "step": 62302 }, { "epoch": 0.11047014173136202, "grad_norm": 0.7109375, "learning_rate": 0.0007034789305504617, "loss": 0.1575, "step": 62304 }, { "epoch": 0.11047368789667183, "grad_norm": 3.625, "learning_rate": 0.000703425500702048, "loss": 0.2551, "step": 62306 }, { "epoch": 0.11047723406198165, "grad_norm": 0.53515625, "learning_rate": 0.0007033720725880204, "loss": 0.2512, "step": 62308 }, { "epoch": 0.11048078022729146, "grad_norm": 0.201171875, "learning_rate": 0.0007033186462086134, "loss": 0.1677, "step": 62310 }, { "epoch": 0.11048432639260128, "grad_norm": 0.33203125, "learning_rate": 0.0007032652215640603, "loss": 0.1407, "step": 62312 }, { "epoch": 0.1104878725579111, "grad_norm": 0.34765625, "learning_rate": 0.0007032117986545945, "loss": 0.1974, "step": 62314 }, { "epoch": 0.11049141872322091, "grad_norm": 0.19140625, "learning_rate": 0.00070315837748045, "loss": 0.2338, "step": 62316 }, { "epoch": 0.11049496488853072, "grad_norm": 0.111328125, "learning_rate": 0.0007031049580418596, "loss": 0.152, "step": 62318 }, { "epoch": 0.11049851105384054, "grad_norm": 0.267578125, "learning_rate": 0.0007030515403390585, "loss": 0.288, "step": 62320 }, { "epoch": 0.11050205721915035, "grad_norm": 0.2041015625, "learning_rate": 0.0007029981243722787, "loss": 0.1132, "step": 62322 }, { "epoch": 0.11050560338446017, "grad_norm": 0.2099609375, "learning_rate": 0.0007029447101417548, "loss": 0.1712, "step": 62324 }, { "epoch": 0.11050914954976998, "grad_norm": 0.8203125, "learning_rate": 0.0007028912976477197, "loss": 0.2026, "step": 62326 }, { "epoch": 0.11051269571507981, "grad_norm": 0.3203125, "learning_rate": 0.0007028378868904082, "loss": 0.1617, "step": 62328 }, { "epoch": 0.11051624188038962, "grad_norm": 0.1884765625, "learning_rate": 0.0007027844778700525, "loss": 0.1726, "step": 62330 }, { "epoch": 0.11051978804569944, "grad_norm": 0.349609375, "learning_rate": 0.000702731070586887, "loss": 0.1634, "step": 62332 }, { "epoch": 0.11052333421100925, "grad_norm": 0.734375, "learning_rate": 0.0007026776650411448, "loss": 0.1764, "step": 62334 }, { "epoch": 0.11052688037631907, "grad_norm": 0.26171875, "learning_rate": 0.0007026242612330598, "loss": 0.1358, "step": 62336 }, { "epoch": 0.11053042654162888, "grad_norm": 0.4375, "learning_rate": 0.0007025708591628657, "loss": 0.196, "step": 62338 }, { "epoch": 0.1105339727069387, "grad_norm": 0.41796875, "learning_rate": 0.0007025174588307955, "loss": 0.2025, "step": 62340 }, { "epoch": 0.11053751887224851, "grad_norm": 0.2373046875, "learning_rate": 0.0007024640602370829, "loss": 0.2701, "step": 62342 }, { "epoch": 0.11054106503755833, "grad_norm": 0.390625, "learning_rate": 0.0007024106633819616, "loss": 0.1646, "step": 62344 }, { "epoch": 0.11054461120286814, "grad_norm": 0.375, "learning_rate": 0.0007023572682656658, "loss": 0.1572, "step": 62346 }, { "epoch": 0.11054815736817795, "grad_norm": 0.70703125, "learning_rate": 0.0007023038748884274, "loss": 0.2379, "step": 62348 }, { "epoch": 0.11055170353348777, "grad_norm": 0.291015625, "learning_rate": 0.0007022504832504815, "loss": 0.13, "step": 62350 }, { "epoch": 0.11055524969879758, "grad_norm": 0.263671875, "learning_rate": 0.0007021970933520607, "loss": 0.1151, "step": 62352 }, { "epoch": 0.1105587958641074, "grad_norm": 0.451171875, "learning_rate": 0.000702143705193399, "loss": 0.1362, "step": 62354 }, { "epoch": 0.11056234202941721, "grad_norm": 0.181640625, "learning_rate": 0.0007020903187747294, "loss": 0.1528, "step": 62356 }, { "epoch": 0.11056588819472703, "grad_norm": 0.326171875, "learning_rate": 0.0007020369340962853, "loss": 0.1846, "step": 62358 }, { "epoch": 0.11056943436003684, "grad_norm": 0.2177734375, "learning_rate": 0.0007019835511583011, "loss": 0.254, "step": 62360 }, { "epoch": 0.11057298052534666, "grad_norm": 0.53125, "learning_rate": 0.0007019301699610095, "loss": 0.1947, "step": 62362 }, { "epoch": 0.11057652669065647, "grad_norm": 0.302734375, "learning_rate": 0.0007018767905046439, "loss": 0.1515, "step": 62364 }, { "epoch": 0.11058007285596629, "grad_norm": 0.625, "learning_rate": 0.0007018234127894376, "loss": 0.2519, "step": 62366 }, { "epoch": 0.1105836190212761, "grad_norm": 0.423828125, "learning_rate": 0.000701770036815625, "loss": 0.1737, "step": 62368 }, { "epoch": 0.11058716518658591, "grad_norm": 0.1962890625, "learning_rate": 0.0007017166625834388, "loss": 0.1779, "step": 62370 }, { "epoch": 0.11059071135189573, "grad_norm": 0.39453125, "learning_rate": 0.0007016632900931128, "loss": 0.1771, "step": 62372 }, { "epoch": 0.11059425751720556, "grad_norm": 0.50390625, "learning_rate": 0.0007016099193448799, "loss": 0.1789, "step": 62374 }, { "epoch": 0.11059780368251537, "grad_norm": 1.1328125, "learning_rate": 0.0007015565503389736, "loss": 0.3887, "step": 62376 }, { "epoch": 0.11060134984782519, "grad_norm": 0.296875, "learning_rate": 0.0007015031830756282, "loss": 0.1876, "step": 62378 }, { "epoch": 0.110604896013135, "grad_norm": 0.21875, "learning_rate": 0.0007014498175550757, "loss": 0.131, "step": 62380 }, { "epoch": 0.11060844217844482, "grad_norm": 2.4375, "learning_rate": 0.0007013964537775503, "loss": 0.1871, "step": 62382 }, { "epoch": 0.11061198834375463, "grad_norm": 0.3125, "learning_rate": 0.0007013430917432857, "loss": 0.1943, "step": 62384 }, { "epoch": 0.11061553450906444, "grad_norm": 0.33203125, "learning_rate": 0.0007012897314525143, "loss": 0.1952, "step": 62386 }, { "epoch": 0.11061908067437426, "grad_norm": 0.37109375, "learning_rate": 0.0007012363729054704, "loss": 0.1903, "step": 62388 }, { "epoch": 0.11062262683968407, "grad_norm": 0.69921875, "learning_rate": 0.0007011830161023866, "loss": 0.2455, "step": 62390 }, { "epoch": 0.11062617300499389, "grad_norm": 0.71875, "learning_rate": 0.000701129661043497, "loss": 0.139, "step": 62392 }, { "epoch": 0.1106297191703037, "grad_norm": 0.9609375, "learning_rate": 0.0007010763077290345, "loss": 0.1389, "step": 62394 }, { "epoch": 0.11063326533561352, "grad_norm": 0.671875, "learning_rate": 0.0007010229561592328, "loss": 0.266, "step": 62396 }, { "epoch": 0.11063681150092333, "grad_norm": 5.1875, "learning_rate": 0.0007009696063343243, "loss": 0.2285, "step": 62398 }, { "epoch": 0.11064035766623315, "grad_norm": 0.53515625, "learning_rate": 0.0007009162582545432, "loss": 0.1854, "step": 62400 }, { "epoch": 0.11064390383154296, "grad_norm": 0.90625, "learning_rate": 0.0007008629119201227, "loss": 0.2469, "step": 62402 }, { "epoch": 0.11064744999685278, "grad_norm": 0.95703125, "learning_rate": 0.0007008095673312959, "loss": 0.2095, "step": 62404 }, { "epoch": 0.11065099616216259, "grad_norm": 1.1640625, "learning_rate": 0.0007007562244882963, "loss": 0.2356, "step": 62406 }, { "epoch": 0.1106545423274724, "grad_norm": 0.484375, "learning_rate": 0.0007007028833913567, "loss": 0.2805, "step": 62408 }, { "epoch": 0.11065808849278222, "grad_norm": 0.7734375, "learning_rate": 0.0007006495440407115, "loss": 0.2021, "step": 62410 }, { "epoch": 0.11066163465809203, "grad_norm": 0.39453125, "learning_rate": 0.0007005962064365923, "loss": 0.2188, "step": 62412 }, { "epoch": 0.11066518082340185, "grad_norm": 0.44140625, "learning_rate": 0.0007005428705792338, "loss": 0.2165, "step": 62414 }, { "epoch": 0.11066872698871166, "grad_norm": 0.2890625, "learning_rate": 0.0007004895364688683, "loss": 0.1916, "step": 62416 }, { "epoch": 0.11067227315402148, "grad_norm": 0.1748046875, "learning_rate": 0.0007004362041057304, "loss": 0.1199, "step": 62418 }, { "epoch": 0.1106758193193313, "grad_norm": 0.283203125, "learning_rate": 0.0007003828734900515, "loss": 0.216, "step": 62420 }, { "epoch": 0.11067936548464112, "grad_norm": 0.298828125, "learning_rate": 0.0007003295446220664, "loss": 0.2924, "step": 62422 }, { "epoch": 0.11068291164995094, "grad_norm": 0.859375, "learning_rate": 0.0007002762175020074, "loss": 0.2036, "step": 62424 }, { "epoch": 0.11068645781526075, "grad_norm": 0.4375, "learning_rate": 0.0007002228921301084, "loss": 0.2391, "step": 62426 }, { "epoch": 0.11069000398057056, "grad_norm": 0.203125, "learning_rate": 0.0007001695685066021, "loss": 0.1452, "step": 62428 }, { "epoch": 0.11069355014588038, "grad_norm": 0.39453125, "learning_rate": 0.0007001162466317214, "loss": 0.1946, "step": 62430 }, { "epoch": 0.1106970963111902, "grad_norm": 0.54296875, "learning_rate": 0.0007000629265057004, "loss": 0.1885, "step": 62432 }, { "epoch": 0.11070064247650001, "grad_norm": 0.53125, "learning_rate": 0.0007000096081287714, "loss": 0.1894, "step": 62434 }, { "epoch": 0.11070418864180982, "grad_norm": 2.21875, "learning_rate": 0.0006999562915011687, "loss": 0.204, "step": 62436 }, { "epoch": 0.11070773480711964, "grad_norm": 0.265625, "learning_rate": 0.0006999029766231245, "loss": 0.1907, "step": 62438 }, { "epoch": 0.11071128097242945, "grad_norm": 0.435546875, "learning_rate": 0.0006998496634948722, "loss": 0.2519, "step": 62440 }, { "epoch": 0.11071482713773927, "grad_norm": 0.19921875, "learning_rate": 0.0006997963521166451, "loss": 0.1417, "step": 62442 }, { "epoch": 0.11071837330304908, "grad_norm": 0.4453125, "learning_rate": 0.0006997430424886766, "loss": 0.1929, "step": 62444 }, { "epoch": 0.1107219194683589, "grad_norm": 0.79296875, "learning_rate": 0.0006996897346111992, "loss": 0.2144, "step": 62446 }, { "epoch": 0.11072546563366871, "grad_norm": 0.69921875, "learning_rate": 0.0006996364284844462, "loss": 0.2037, "step": 62448 }, { "epoch": 0.11072901179897852, "grad_norm": 0.490234375, "learning_rate": 0.0006995831241086511, "loss": 0.4558, "step": 62450 }, { "epoch": 0.11073255796428834, "grad_norm": 1.046875, "learning_rate": 0.000699529821484047, "loss": 0.2158, "step": 62452 }, { "epoch": 0.11073610412959815, "grad_norm": 0.58203125, "learning_rate": 0.0006994765206108667, "loss": 0.4056, "step": 62454 }, { "epoch": 0.11073965029490797, "grad_norm": 0.3984375, "learning_rate": 0.0006994232214893432, "loss": 0.1599, "step": 62456 }, { "epoch": 0.11074319646021778, "grad_norm": 0.1787109375, "learning_rate": 0.00069936992411971, "loss": 0.2182, "step": 62458 }, { "epoch": 0.1107467426255276, "grad_norm": 0.2333984375, "learning_rate": 0.0006993166285022002, "loss": 0.1777, "step": 62460 }, { "epoch": 0.11075028879083741, "grad_norm": 0.494140625, "learning_rate": 0.0006992633346370467, "loss": 0.2411, "step": 62462 }, { "epoch": 0.11075383495614724, "grad_norm": 0.70703125, "learning_rate": 0.0006992100425244828, "loss": 0.3034, "step": 62464 }, { "epoch": 0.11075738112145705, "grad_norm": 0.61328125, "learning_rate": 0.0006991567521647407, "loss": 0.157, "step": 62466 }, { "epoch": 0.11076092728676687, "grad_norm": 1.6484375, "learning_rate": 0.0006991034635580549, "loss": 0.1762, "step": 62468 }, { "epoch": 0.11076447345207668, "grad_norm": 0.7578125, "learning_rate": 0.0006990501767046571, "loss": 0.3701, "step": 62470 }, { "epoch": 0.1107680196173865, "grad_norm": 0.28515625, "learning_rate": 0.0006989968916047813, "loss": 0.159, "step": 62472 }, { "epoch": 0.11077156578269631, "grad_norm": 0.3984375, "learning_rate": 0.0006989436082586596, "loss": 0.1403, "step": 62474 }, { "epoch": 0.11077511194800613, "grad_norm": 1.40625, "learning_rate": 0.0006988903266665264, "loss": 0.2531, "step": 62476 }, { "epoch": 0.11077865811331594, "grad_norm": 0.2314453125, "learning_rate": 0.0006988370468286134, "loss": 0.1306, "step": 62478 }, { "epoch": 0.11078220427862576, "grad_norm": 0.82421875, "learning_rate": 0.0006987837687451541, "loss": 0.1763, "step": 62480 }, { "epoch": 0.11078575044393557, "grad_norm": 0.53515625, "learning_rate": 0.000698730492416382, "loss": 0.1941, "step": 62482 }, { "epoch": 0.11078929660924539, "grad_norm": 0.23046875, "learning_rate": 0.0006986772178425294, "loss": 0.2459, "step": 62484 }, { "epoch": 0.1107928427745552, "grad_norm": 0.83203125, "learning_rate": 0.0006986239450238294, "loss": 0.1918, "step": 62486 }, { "epoch": 0.11079638893986501, "grad_norm": 0.73046875, "learning_rate": 0.000698570673960515, "loss": 0.17, "step": 62488 }, { "epoch": 0.11079993510517483, "grad_norm": 0.310546875, "learning_rate": 0.0006985174046528196, "loss": 0.1212, "step": 62490 }, { "epoch": 0.11080348127048464, "grad_norm": 0.9765625, "learning_rate": 0.0006984641371009757, "loss": 0.2192, "step": 62492 }, { "epoch": 0.11080702743579446, "grad_norm": 0.73046875, "learning_rate": 0.0006984108713052167, "loss": 0.3376, "step": 62494 }, { "epoch": 0.11081057360110427, "grad_norm": 0.53125, "learning_rate": 0.0006983576072657747, "loss": 0.1683, "step": 62496 }, { "epoch": 0.11081411976641409, "grad_norm": 0.265625, "learning_rate": 0.0006983043449828837, "loss": 0.1322, "step": 62498 }, { "epoch": 0.1108176659317239, "grad_norm": 0.267578125, "learning_rate": 0.000698251084456776, "loss": 0.1189, "step": 62500 }, { "epoch": 0.11082121209703372, "grad_norm": 0.734375, "learning_rate": 0.0006981978256876849, "loss": 0.2418, "step": 62502 }, { "epoch": 0.11082475826234353, "grad_norm": 0.3359375, "learning_rate": 0.000698144568675843, "loss": 0.2266, "step": 62504 }, { "epoch": 0.11082830442765335, "grad_norm": 0.5, "learning_rate": 0.000698091313421483, "loss": 0.1631, "step": 62506 }, { "epoch": 0.11083185059296316, "grad_norm": 0.5234375, "learning_rate": 0.0006980380599248389, "loss": 0.2084, "step": 62508 }, { "epoch": 0.11083539675827299, "grad_norm": 0.322265625, "learning_rate": 0.0006979848081861419, "loss": 0.1762, "step": 62510 }, { "epoch": 0.1108389429235828, "grad_norm": 0.46484375, "learning_rate": 0.0006979315582056265, "loss": 0.2181, "step": 62512 }, { "epoch": 0.11084248908889262, "grad_norm": 0.1787109375, "learning_rate": 0.0006978783099835244, "loss": 0.143, "step": 62514 }, { "epoch": 0.11084603525420243, "grad_norm": 0.53125, "learning_rate": 0.0006978250635200693, "loss": 0.1697, "step": 62516 }, { "epoch": 0.11084958141951225, "grad_norm": 0.5, "learning_rate": 0.0006977718188154938, "loss": 0.1622, "step": 62518 }, { "epoch": 0.11085312758482206, "grad_norm": 0.27734375, "learning_rate": 0.0006977185758700308, "loss": 0.2514, "step": 62520 }, { "epoch": 0.11085667375013188, "grad_norm": 0.140625, "learning_rate": 0.0006976653346839129, "loss": 0.1258, "step": 62522 }, { "epoch": 0.11086021991544169, "grad_norm": 0.26171875, "learning_rate": 0.000697612095257373, "loss": 0.1656, "step": 62524 }, { "epoch": 0.1108637660807515, "grad_norm": 1.78125, "learning_rate": 0.0006975588575906447, "loss": 0.3702, "step": 62526 }, { "epoch": 0.11086731224606132, "grad_norm": 0.59375, "learning_rate": 0.0006975056216839595, "loss": 0.2011, "step": 62528 }, { "epoch": 0.11087085841137113, "grad_norm": 0.34375, "learning_rate": 0.0006974523875375511, "loss": 0.1319, "step": 62530 }, { "epoch": 0.11087440457668095, "grad_norm": 0.51953125, "learning_rate": 0.0006973991551516519, "loss": 0.1381, "step": 62532 }, { "epoch": 0.11087795074199076, "grad_norm": 0.419921875, "learning_rate": 0.0006973459245264955, "loss": 0.1691, "step": 62534 }, { "epoch": 0.11088149690730058, "grad_norm": 0.4375, "learning_rate": 0.0006972926956623136, "loss": 0.143, "step": 62536 }, { "epoch": 0.11088504307261039, "grad_norm": 1.4921875, "learning_rate": 0.00069723946855934, "loss": 0.1956, "step": 62538 }, { "epoch": 0.11088858923792021, "grad_norm": 0.9765625, "learning_rate": 0.0006971862432178069, "loss": 0.2289, "step": 62540 }, { "epoch": 0.11089213540323002, "grad_norm": 0.380859375, "learning_rate": 0.000697133019637947, "loss": 0.2077, "step": 62542 }, { "epoch": 0.11089568156853984, "grad_norm": 0.921875, "learning_rate": 0.0006970797978199934, "loss": 0.2337, "step": 62544 }, { "epoch": 0.11089922773384965, "grad_norm": 0.298828125, "learning_rate": 0.0006970265777641784, "loss": 0.1562, "step": 62546 }, { "epoch": 0.11090277389915947, "grad_norm": 0.384765625, "learning_rate": 0.0006969733594707353, "loss": 0.2055, "step": 62548 }, { "epoch": 0.11090632006446928, "grad_norm": 0.291015625, "learning_rate": 0.0006969201429398969, "loss": 0.1778, "step": 62550 }, { "epoch": 0.1109098662297791, "grad_norm": 0.330078125, "learning_rate": 0.0006968669281718954, "loss": 0.3654, "step": 62552 }, { "epoch": 0.11091341239508891, "grad_norm": 0.26171875, "learning_rate": 0.0006968137151669639, "loss": 0.1688, "step": 62554 }, { "epoch": 0.11091695856039874, "grad_norm": 0.423828125, "learning_rate": 0.0006967605039253347, "loss": 0.2349, "step": 62556 }, { "epoch": 0.11092050472570855, "grad_norm": 1.734375, "learning_rate": 0.0006967072944472414, "loss": 0.186, "step": 62558 }, { "epoch": 0.11092405089101837, "grad_norm": 0.330078125, "learning_rate": 0.0006966540867329154, "loss": 0.1388, "step": 62560 }, { "epoch": 0.11092759705632818, "grad_norm": 0.458984375, "learning_rate": 0.0006966008807825907, "loss": 0.1578, "step": 62562 }, { "epoch": 0.110931143221638, "grad_norm": 0.26953125, "learning_rate": 0.0006965476765964988, "loss": 0.1537, "step": 62564 }, { "epoch": 0.11093468938694781, "grad_norm": 0.7890625, "learning_rate": 0.000696494474174874, "loss": 0.1607, "step": 62566 }, { "epoch": 0.11093823555225762, "grad_norm": 1.03125, "learning_rate": 0.000696441273517947, "loss": 0.2944, "step": 62568 }, { "epoch": 0.11094178171756744, "grad_norm": 0.482421875, "learning_rate": 0.0006963880746259519, "loss": 0.162, "step": 62570 }, { "epoch": 0.11094532788287725, "grad_norm": 0.306640625, "learning_rate": 0.0006963348774991212, "loss": 0.1611, "step": 62572 }, { "epoch": 0.11094887404818707, "grad_norm": 0.306640625, "learning_rate": 0.0006962816821376869, "loss": 0.2044, "step": 62574 }, { "epoch": 0.11095242021349688, "grad_norm": 0.3828125, "learning_rate": 0.0006962284885418821, "loss": 0.1711, "step": 62576 }, { "epoch": 0.1109559663788067, "grad_norm": 0.2421875, "learning_rate": 0.0006961752967119392, "loss": 0.1408, "step": 62578 }, { "epoch": 0.11095951254411651, "grad_norm": 0.302734375, "learning_rate": 0.0006961221066480912, "loss": 0.1703, "step": 62580 }, { "epoch": 0.11096305870942633, "grad_norm": 0.275390625, "learning_rate": 0.0006960689183505704, "loss": 0.2237, "step": 62582 }, { "epoch": 0.11096660487473614, "grad_norm": 1.2421875, "learning_rate": 0.0006960157318196096, "loss": 0.1902, "step": 62584 }, { "epoch": 0.11097015104004596, "grad_norm": 1.3671875, "learning_rate": 0.000695962547055441, "loss": 0.3739, "step": 62586 }, { "epoch": 0.11097369720535577, "grad_norm": 1.0234375, "learning_rate": 0.0006959093640582979, "loss": 0.1398, "step": 62588 }, { "epoch": 0.11097724337066558, "grad_norm": 0.6015625, "learning_rate": 0.0006958561828284125, "loss": 0.1621, "step": 62590 }, { "epoch": 0.1109807895359754, "grad_norm": 0.26171875, "learning_rate": 0.0006958030033660175, "loss": 0.2058, "step": 62592 }, { "epoch": 0.11098433570128521, "grad_norm": 0.400390625, "learning_rate": 0.000695749825671345, "loss": 0.1982, "step": 62594 }, { "epoch": 0.11098788186659503, "grad_norm": 0.5, "learning_rate": 0.000695696649744628, "loss": 0.1522, "step": 62596 }, { "epoch": 0.11099142803190484, "grad_norm": 0.77734375, "learning_rate": 0.0006956434755860992, "loss": 0.1862, "step": 62598 }, { "epoch": 0.11099497419721466, "grad_norm": 0.44921875, "learning_rate": 0.0006955903031959909, "loss": 0.1648, "step": 62600 }, { "epoch": 0.11099852036252449, "grad_norm": 0.421875, "learning_rate": 0.0006955371325745359, "loss": 0.1741, "step": 62602 }, { "epoch": 0.1110020665278343, "grad_norm": 1.3046875, "learning_rate": 0.0006954839637219659, "loss": 0.1927, "step": 62604 }, { "epoch": 0.11100561269314412, "grad_norm": 0.259765625, "learning_rate": 0.0006954307966385147, "loss": 0.183, "step": 62606 }, { "epoch": 0.11100915885845393, "grad_norm": 1.25, "learning_rate": 0.0006953776313244141, "loss": 0.2099, "step": 62608 }, { "epoch": 0.11101270502376374, "grad_norm": 0.453125, "learning_rate": 0.0006953244677798966, "loss": 0.1379, "step": 62610 }, { "epoch": 0.11101625118907356, "grad_norm": 0.375, "learning_rate": 0.000695271306005195, "loss": 0.1624, "step": 62612 }, { "epoch": 0.11101979735438337, "grad_norm": 0.3125, "learning_rate": 0.0006952181460005411, "loss": 0.1638, "step": 62614 }, { "epoch": 0.11102334351969319, "grad_norm": 0.20703125, "learning_rate": 0.0006951649877661687, "loss": 0.18, "step": 62616 }, { "epoch": 0.111026889685003, "grad_norm": 0.462890625, "learning_rate": 0.0006951118313023088, "loss": 0.2122, "step": 62618 }, { "epoch": 0.11103043585031282, "grad_norm": 0.498046875, "learning_rate": 0.0006950586766091949, "loss": 0.1657, "step": 62620 }, { "epoch": 0.11103398201562263, "grad_norm": 0.9453125, "learning_rate": 0.0006950055236870587, "loss": 0.551, "step": 62622 }, { "epoch": 0.11103752818093245, "grad_norm": 0.890625, "learning_rate": 0.000694952372536134, "loss": 0.1861, "step": 62624 }, { "epoch": 0.11104107434624226, "grad_norm": 4.0625, "learning_rate": 0.0006948992231566514, "loss": 0.4305, "step": 62626 }, { "epoch": 0.11104462051155208, "grad_norm": 0.453125, "learning_rate": 0.0006948460755488446, "loss": 0.1224, "step": 62628 }, { "epoch": 0.11104816667686189, "grad_norm": 0.5, "learning_rate": 0.0006947929297129459, "loss": 0.1481, "step": 62630 }, { "epoch": 0.1110517128421717, "grad_norm": 0.28125, "learning_rate": 0.0006947397856491875, "loss": 0.1684, "step": 62632 }, { "epoch": 0.11105525900748152, "grad_norm": 0.359375, "learning_rate": 0.0006946866433578018, "loss": 0.2036, "step": 62634 }, { "epoch": 0.11105880517279133, "grad_norm": 0.26953125, "learning_rate": 0.0006946335028390211, "loss": 0.23, "step": 62636 }, { "epoch": 0.11106235133810115, "grad_norm": 0.78515625, "learning_rate": 0.0006945803640930784, "loss": 0.1933, "step": 62638 }, { "epoch": 0.11106589750341096, "grad_norm": 1.265625, "learning_rate": 0.0006945272271202055, "loss": 0.1591, "step": 62640 }, { "epoch": 0.11106944366872078, "grad_norm": 0.3828125, "learning_rate": 0.0006944740919206349, "loss": 0.1611, "step": 62642 }, { "epoch": 0.11107298983403059, "grad_norm": 0.318359375, "learning_rate": 0.000694420958494599, "loss": 0.1719, "step": 62644 }, { "epoch": 0.11107653599934042, "grad_norm": 2.234375, "learning_rate": 0.0006943678268423304, "loss": 0.314, "step": 62646 }, { "epoch": 0.11108008216465023, "grad_norm": 0.41015625, "learning_rate": 0.0006943146969640612, "loss": 0.2266, "step": 62648 }, { "epoch": 0.11108362832996005, "grad_norm": 0.6484375, "learning_rate": 0.0006942615688600241, "loss": 0.193, "step": 62650 }, { "epoch": 0.11108717449526986, "grad_norm": 0.81640625, "learning_rate": 0.0006942084425304512, "loss": 0.1671, "step": 62652 }, { "epoch": 0.11109072066057968, "grad_norm": 0.57421875, "learning_rate": 0.0006941553179755744, "loss": 0.1692, "step": 62654 }, { "epoch": 0.11109426682588949, "grad_norm": 0.38671875, "learning_rate": 0.0006941021951956272, "loss": 0.1928, "step": 62656 }, { "epoch": 0.11109781299119931, "grad_norm": 1.8046875, "learning_rate": 0.0006940490741908405, "loss": 0.1558, "step": 62658 }, { "epoch": 0.11110135915650912, "grad_norm": 0.33984375, "learning_rate": 0.0006939959549614479, "loss": 0.161, "step": 62660 }, { "epoch": 0.11110490532181894, "grad_norm": 0.66796875, "learning_rate": 0.0006939428375076806, "loss": 0.2084, "step": 62662 }, { "epoch": 0.11110845148712875, "grad_norm": 0.4296875, "learning_rate": 0.0006938897218297722, "loss": 0.2854, "step": 62664 }, { "epoch": 0.11111199765243857, "grad_norm": 0.181640625, "learning_rate": 0.0006938366079279537, "loss": 0.1529, "step": 62666 }, { "epoch": 0.11111554381774838, "grad_norm": 0.427734375, "learning_rate": 0.000693783495802458, "loss": 0.1565, "step": 62668 }, { "epoch": 0.1111190899830582, "grad_norm": 0.376953125, "learning_rate": 0.0006937303854535177, "loss": 0.1846, "step": 62670 }, { "epoch": 0.11112263614836801, "grad_norm": 2.5, "learning_rate": 0.0006936772768813644, "loss": 0.3094, "step": 62672 }, { "epoch": 0.11112618231367782, "grad_norm": 0.30078125, "learning_rate": 0.0006936241700862307, "loss": 0.1594, "step": 62674 }, { "epoch": 0.11112972847898764, "grad_norm": 0.2275390625, "learning_rate": 0.0006935710650683486, "loss": 0.174, "step": 62676 }, { "epoch": 0.11113327464429745, "grad_norm": 1.390625, "learning_rate": 0.000693517961827951, "loss": 0.2612, "step": 62678 }, { "epoch": 0.11113682080960727, "grad_norm": 0.421875, "learning_rate": 0.0006934648603652692, "loss": 0.2167, "step": 62680 }, { "epoch": 0.11114036697491708, "grad_norm": 0.4140625, "learning_rate": 0.0006934117606805368, "loss": 0.1577, "step": 62682 }, { "epoch": 0.1111439131402269, "grad_norm": 0.46875, "learning_rate": 0.0006933586627739844, "loss": 0.1728, "step": 62684 }, { "epoch": 0.11114745930553671, "grad_norm": 0.326171875, "learning_rate": 0.0006933055666458453, "loss": 0.1985, "step": 62686 }, { "epoch": 0.11115100547084653, "grad_norm": 0.498046875, "learning_rate": 0.0006932524722963514, "loss": 0.28, "step": 62688 }, { "epoch": 0.11115455163615634, "grad_norm": 0.56640625, "learning_rate": 0.0006931993797257352, "loss": 0.1664, "step": 62690 }, { "epoch": 0.11115809780146617, "grad_norm": 0.55859375, "learning_rate": 0.0006931462889342283, "loss": 0.1653, "step": 62692 }, { "epoch": 0.11116164396677598, "grad_norm": 1.3125, "learning_rate": 0.0006930931999220629, "loss": 0.193, "step": 62694 }, { "epoch": 0.1111651901320858, "grad_norm": 0.578125, "learning_rate": 0.000693040112689472, "loss": 0.1527, "step": 62696 }, { "epoch": 0.11116873629739561, "grad_norm": 0.357421875, "learning_rate": 0.0006929870272366871, "loss": 0.1684, "step": 62698 }, { "epoch": 0.11117228246270543, "grad_norm": 0.70703125, "learning_rate": 0.0006929339435639406, "loss": 0.2181, "step": 62700 }, { "epoch": 0.11117582862801524, "grad_norm": 0.671875, "learning_rate": 0.0006928808616714641, "loss": 0.2417, "step": 62702 }, { "epoch": 0.11117937479332506, "grad_norm": 0.7734375, "learning_rate": 0.0006928277815594908, "loss": 0.2446, "step": 62704 }, { "epoch": 0.11118292095863487, "grad_norm": 0.482421875, "learning_rate": 0.0006927747032282521, "loss": 0.1319, "step": 62706 }, { "epoch": 0.11118646712394469, "grad_norm": 1.65625, "learning_rate": 0.0006927216266779804, "loss": 0.1522, "step": 62708 }, { "epoch": 0.1111900132892545, "grad_norm": 0.2578125, "learning_rate": 0.0006926685519089078, "loss": 0.1598, "step": 62710 }, { "epoch": 0.11119355945456431, "grad_norm": 0.212890625, "learning_rate": 0.0006926154789212658, "loss": 0.2059, "step": 62712 }, { "epoch": 0.11119710561987413, "grad_norm": 0.734375, "learning_rate": 0.0006925624077152879, "loss": 0.2101, "step": 62714 }, { "epoch": 0.11120065178518394, "grad_norm": 0.46875, "learning_rate": 0.0006925093382912046, "loss": 0.144, "step": 62716 }, { "epoch": 0.11120419795049376, "grad_norm": 1.3828125, "learning_rate": 0.000692456270649249, "loss": 0.2211, "step": 62718 }, { "epoch": 0.11120774411580357, "grad_norm": 0.51953125, "learning_rate": 0.0006924032047896531, "loss": 0.1734, "step": 62720 }, { "epoch": 0.11121129028111339, "grad_norm": 0.439453125, "learning_rate": 0.0006923501407126489, "loss": 0.1616, "step": 62722 }, { "epoch": 0.1112148364464232, "grad_norm": 0.2490234375, "learning_rate": 0.0006922970784184681, "loss": 0.2088, "step": 62724 }, { "epoch": 0.11121838261173302, "grad_norm": 0.490234375, "learning_rate": 0.0006922440179073429, "loss": 0.2026, "step": 62726 }, { "epoch": 0.11122192877704283, "grad_norm": 0.93359375, "learning_rate": 0.0006921909591795059, "loss": 0.1222, "step": 62728 }, { "epoch": 0.11122547494235265, "grad_norm": 2.53125, "learning_rate": 0.0006921379022351887, "loss": 0.2722, "step": 62730 }, { "epoch": 0.11122902110766246, "grad_norm": 0.423828125, "learning_rate": 0.0006920848470746234, "loss": 0.4402, "step": 62732 }, { "epoch": 0.11123256727297227, "grad_norm": 0.369140625, "learning_rate": 0.0006920317936980417, "loss": 0.1946, "step": 62734 }, { "epoch": 0.11123611343828209, "grad_norm": 0.279296875, "learning_rate": 0.0006919787421056763, "loss": 0.2118, "step": 62736 }, { "epoch": 0.11123965960359192, "grad_norm": 3.09375, "learning_rate": 0.0006919256922977589, "loss": 0.4481, "step": 62738 }, { "epoch": 0.11124320576890173, "grad_norm": 0.328125, "learning_rate": 0.0006918726442745214, "loss": 0.247, "step": 62740 }, { "epoch": 0.11124675193421155, "grad_norm": 0.3046875, "learning_rate": 0.0006918195980361959, "loss": 0.1922, "step": 62742 }, { "epoch": 0.11125029809952136, "grad_norm": 0.2734375, "learning_rate": 0.0006917665535830141, "loss": 0.2544, "step": 62744 }, { "epoch": 0.11125384426483118, "grad_norm": 0.298828125, "learning_rate": 0.0006917135109152088, "loss": 0.1678, "step": 62746 }, { "epoch": 0.11125739043014099, "grad_norm": 0.51171875, "learning_rate": 0.0006916604700330109, "loss": 0.2029, "step": 62748 }, { "epoch": 0.1112609365954508, "grad_norm": 0.2353515625, "learning_rate": 0.0006916074309366533, "loss": 0.168, "step": 62750 }, { "epoch": 0.11126448276076062, "grad_norm": 0.224609375, "learning_rate": 0.0006915543936263672, "loss": 0.1318, "step": 62752 }, { "epoch": 0.11126802892607043, "grad_norm": 2.34375, "learning_rate": 0.0006915013581023855, "loss": 0.2553, "step": 62754 }, { "epoch": 0.11127157509138025, "grad_norm": 0.48046875, "learning_rate": 0.000691448324364939, "loss": 0.1903, "step": 62756 }, { "epoch": 0.11127512125669006, "grad_norm": 0.2392578125, "learning_rate": 0.0006913952924142606, "loss": 0.1266, "step": 62758 }, { "epoch": 0.11127866742199988, "grad_norm": 0.51953125, "learning_rate": 0.0006913422622505818, "loss": 0.174, "step": 62760 }, { "epoch": 0.11128221358730969, "grad_norm": 0.32421875, "learning_rate": 0.0006912892338741341, "loss": 0.2033, "step": 62762 }, { "epoch": 0.1112857597526195, "grad_norm": 0.1923828125, "learning_rate": 0.0006912362072851505, "loss": 0.3265, "step": 62764 }, { "epoch": 0.11128930591792932, "grad_norm": 0.490234375, "learning_rate": 0.000691183182483862, "loss": 0.1759, "step": 62766 }, { "epoch": 0.11129285208323914, "grad_norm": 0.361328125, "learning_rate": 0.0006911301594705008, "loss": 0.2304, "step": 62768 }, { "epoch": 0.11129639824854895, "grad_norm": 0.26171875, "learning_rate": 0.0006910771382452985, "loss": 0.2064, "step": 62770 }, { "epoch": 0.11129994441385876, "grad_norm": 2.0, "learning_rate": 0.000691024118808488, "loss": 0.2377, "step": 62772 }, { "epoch": 0.11130349057916858, "grad_norm": 0.2734375, "learning_rate": 0.0006909711011602998, "loss": 0.2443, "step": 62774 }, { "epoch": 0.1113070367444784, "grad_norm": 3.484375, "learning_rate": 0.0006909180853009668, "loss": 0.3439, "step": 62776 }, { "epoch": 0.11131058290978821, "grad_norm": 0.314453125, "learning_rate": 0.0006908650712307202, "loss": 0.1869, "step": 62778 }, { "epoch": 0.11131412907509802, "grad_norm": 0.3515625, "learning_rate": 0.0006908120589497925, "loss": 0.3004, "step": 62780 }, { "epoch": 0.11131767524040785, "grad_norm": 0.291015625, "learning_rate": 0.000690759048458415, "loss": 0.1622, "step": 62782 }, { "epoch": 0.11132122140571767, "grad_norm": 0.6796875, "learning_rate": 0.0006907060397568193, "loss": 0.1609, "step": 62784 }, { "epoch": 0.11132476757102748, "grad_norm": 0.30859375, "learning_rate": 0.0006906530328452379, "loss": 0.1649, "step": 62786 }, { "epoch": 0.1113283137363373, "grad_norm": 0.546875, "learning_rate": 0.0006906000277239025, "loss": 0.1922, "step": 62788 }, { "epoch": 0.11133185990164711, "grad_norm": 0.72265625, "learning_rate": 0.0006905470243930448, "loss": 0.2233, "step": 62790 }, { "epoch": 0.11133540606695692, "grad_norm": 0.35546875, "learning_rate": 0.0006904940228528961, "loss": 0.1861, "step": 62792 }, { "epoch": 0.11133895223226674, "grad_norm": 0.96875, "learning_rate": 0.0006904410231036892, "loss": 0.2232, "step": 62794 }, { "epoch": 0.11134249839757655, "grad_norm": 0.953125, "learning_rate": 0.0006903880251456551, "loss": 0.1984, "step": 62796 }, { "epoch": 0.11134604456288637, "grad_norm": 0.8125, "learning_rate": 0.000690335028979026, "loss": 0.2335, "step": 62798 }, { "epoch": 0.11134959072819618, "grad_norm": 0.271484375, "learning_rate": 0.0006902820346040336, "loss": 0.1357, "step": 62800 }, { "epoch": 0.111353136893506, "grad_norm": 0.55078125, "learning_rate": 0.0006902290420209092, "loss": 0.2154, "step": 62802 }, { "epoch": 0.11135668305881581, "grad_norm": 0.412109375, "learning_rate": 0.0006901760512298856, "loss": 0.2368, "step": 62804 }, { "epoch": 0.11136022922412563, "grad_norm": 0.259765625, "learning_rate": 0.0006901230622311933, "loss": 0.1381, "step": 62806 }, { "epoch": 0.11136377538943544, "grad_norm": 0.30078125, "learning_rate": 0.0006900700750250649, "loss": 0.1524, "step": 62808 }, { "epoch": 0.11136732155474525, "grad_norm": 0.5546875, "learning_rate": 0.0006900170896117315, "loss": 0.2002, "step": 62810 }, { "epoch": 0.11137086772005507, "grad_norm": 0.435546875, "learning_rate": 0.0006899641059914259, "loss": 0.2792, "step": 62812 }, { "epoch": 0.11137441388536488, "grad_norm": 0.59375, "learning_rate": 0.0006899111241643785, "loss": 0.1806, "step": 62814 }, { "epoch": 0.1113779600506747, "grad_norm": 2.421875, "learning_rate": 0.0006898581441308219, "loss": 0.1817, "step": 62816 }, { "epoch": 0.11138150621598451, "grad_norm": 0.2177734375, "learning_rate": 0.0006898051658909876, "loss": 0.1296, "step": 62818 }, { "epoch": 0.11138505238129433, "grad_norm": 0.515625, "learning_rate": 0.0006897521894451074, "loss": 0.258, "step": 62820 }, { "epoch": 0.11138859854660414, "grad_norm": 0.5234375, "learning_rate": 0.0006896992147934123, "loss": 0.1281, "step": 62822 }, { "epoch": 0.11139214471191396, "grad_norm": 2.953125, "learning_rate": 0.0006896462419361347, "loss": 0.3824, "step": 62824 }, { "epoch": 0.11139569087722377, "grad_norm": 0.421875, "learning_rate": 0.0006895932708735062, "loss": 0.1832, "step": 62826 }, { "epoch": 0.1113992370425336, "grad_norm": 0.34765625, "learning_rate": 0.0006895403016057582, "loss": 0.1474, "step": 62828 }, { "epoch": 0.11140278320784341, "grad_norm": 1.421875, "learning_rate": 0.0006894873341331227, "loss": 0.168, "step": 62830 }, { "epoch": 0.11140632937315323, "grad_norm": 0.5859375, "learning_rate": 0.0006894343684558309, "loss": 0.1676, "step": 62832 }, { "epoch": 0.11140987553846304, "grad_norm": 0.494140625, "learning_rate": 0.000689381404574115, "loss": 0.2065, "step": 62834 }, { "epoch": 0.11141342170377286, "grad_norm": 0.224609375, "learning_rate": 0.000689328442488206, "loss": 0.1799, "step": 62836 }, { "epoch": 0.11141696786908267, "grad_norm": 0.53125, "learning_rate": 0.000689275482198336, "loss": 0.1815, "step": 62838 }, { "epoch": 0.11142051403439249, "grad_norm": 0.734375, "learning_rate": 0.0006892225237047366, "loss": 0.2531, "step": 62840 }, { "epoch": 0.1114240601997023, "grad_norm": 0.34765625, "learning_rate": 0.0006891695670076389, "loss": 0.1582, "step": 62842 }, { "epoch": 0.11142760636501212, "grad_norm": 1.3515625, "learning_rate": 0.0006891166121072754, "loss": 0.1827, "step": 62844 }, { "epoch": 0.11143115253032193, "grad_norm": 0.4765625, "learning_rate": 0.0006890636590038766, "loss": 0.3033, "step": 62846 }, { "epoch": 0.11143469869563175, "grad_norm": 0.640625, "learning_rate": 0.0006890107076976751, "loss": 0.2249, "step": 62848 }, { "epoch": 0.11143824486094156, "grad_norm": 0.67578125, "learning_rate": 0.0006889577581889014, "loss": 0.1494, "step": 62850 }, { "epoch": 0.11144179102625137, "grad_norm": 0.439453125, "learning_rate": 0.0006889048104777885, "loss": 0.1558, "step": 62852 }, { "epoch": 0.11144533719156119, "grad_norm": 0.2578125, "learning_rate": 0.0006888518645645667, "loss": 0.2057, "step": 62854 }, { "epoch": 0.111448883356871, "grad_norm": 0.86328125, "learning_rate": 0.0006887989204494683, "loss": 0.1993, "step": 62856 }, { "epoch": 0.11145242952218082, "grad_norm": 0.46875, "learning_rate": 0.0006887459781327246, "loss": 0.1401, "step": 62858 }, { "epoch": 0.11145597568749063, "grad_norm": 0.8984375, "learning_rate": 0.0006886930376145665, "loss": 0.2159, "step": 62860 }, { "epoch": 0.11145952185280045, "grad_norm": 0.328125, "learning_rate": 0.0006886400988952271, "loss": 0.1618, "step": 62862 }, { "epoch": 0.11146306801811026, "grad_norm": 2.546875, "learning_rate": 0.0006885871619749361, "loss": 0.2333, "step": 62864 }, { "epoch": 0.11146661418342008, "grad_norm": 0.298828125, "learning_rate": 0.0006885342268539264, "loss": 0.2633, "step": 62866 }, { "epoch": 0.11147016034872989, "grad_norm": 0.30859375, "learning_rate": 0.0006884812935324283, "loss": 0.1818, "step": 62868 }, { "epoch": 0.1114737065140397, "grad_norm": 0.65625, "learning_rate": 0.0006884283620106748, "loss": 0.1957, "step": 62870 }, { "epoch": 0.11147725267934952, "grad_norm": 0.875, "learning_rate": 0.000688375432288896, "loss": 0.1539, "step": 62872 }, { "epoch": 0.11148079884465935, "grad_norm": 0.57421875, "learning_rate": 0.0006883225043673245, "loss": 0.1802, "step": 62874 }, { "epoch": 0.11148434500996916, "grad_norm": 0.3203125, "learning_rate": 0.0006882695782461909, "loss": 0.2087, "step": 62876 }, { "epoch": 0.11148789117527898, "grad_norm": 0.7109375, "learning_rate": 0.0006882166539257271, "loss": 0.2178, "step": 62878 }, { "epoch": 0.11149143734058879, "grad_norm": 0.388671875, "learning_rate": 0.0006881637314061645, "loss": 0.1791, "step": 62880 }, { "epoch": 0.1114949835058986, "grad_norm": 0.5546875, "learning_rate": 0.0006881108106877341, "loss": 0.1751, "step": 62882 }, { "epoch": 0.11149852967120842, "grad_norm": 0.9140625, "learning_rate": 0.0006880578917706681, "loss": 0.1727, "step": 62884 }, { "epoch": 0.11150207583651824, "grad_norm": 1.7109375, "learning_rate": 0.0006880049746551978, "loss": 0.2342, "step": 62886 }, { "epoch": 0.11150562200182805, "grad_norm": 0.359375, "learning_rate": 0.0006879520593415542, "loss": 0.1175, "step": 62888 }, { "epoch": 0.11150916816713786, "grad_norm": 0.265625, "learning_rate": 0.0006878991458299686, "loss": 0.184, "step": 62890 }, { "epoch": 0.11151271433244768, "grad_norm": 3.3125, "learning_rate": 0.0006878462341206732, "loss": 0.3343, "step": 62892 }, { "epoch": 0.1115162604977575, "grad_norm": 0.396484375, "learning_rate": 0.0006877933242138989, "loss": 0.1306, "step": 62894 }, { "epoch": 0.11151980666306731, "grad_norm": 0.208984375, "learning_rate": 0.0006877404161098773, "loss": 0.1819, "step": 62896 }, { "epoch": 0.11152335282837712, "grad_norm": 0.34765625, "learning_rate": 0.0006876875098088395, "loss": 0.2042, "step": 62898 }, { "epoch": 0.11152689899368694, "grad_norm": 0.400390625, "learning_rate": 0.0006876346053110167, "loss": 0.1927, "step": 62900 }, { "epoch": 0.11153044515899675, "grad_norm": 1.2265625, "learning_rate": 0.0006875817026166413, "loss": 0.1965, "step": 62902 }, { "epoch": 0.11153399132430657, "grad_norm": 0.380859375, "learning_rate": 0.0006875288017259433, "loss": 0.24, "step": 62904 }, { "epoch": 0.11153753748961638, "grad_norm": 2.375, "learning_rate": 0.0006874759026391552, "loss": 0.2725, "step": 62906 }, { "epoch": 0.1115410836549262, "grad_norm": 0.46484375, "learning_rate": 0.0006874230053565078, "loss": 0.2257, "step": 62908 }, { "epoch": 0.11154462982023601, "grad_norm": 0.384765625, "learning_rate": 0.0006873701098782325, "loss": 0.2005, "step": 62910 }, { "epoch": 0.11154817598554582, "grad_norm": 0.357421875, "learning_rate": 0.0006873172162045606, "loss": 0.1568, "step": 62912 }, { "epoch": 0.11155172215085564, "grad_norm": 7.59375, "learning_rate": 0.0006872643243357232, "loss": 0.4833, "step": 62914 }, { "epoch": 0.11155526831616545, "grad_norm": 0.408203125, "learning_rate": 0.0006872114342719522, "loss": 0.2643, "step": 62916 }, { "epoch": 0.11155881448147528, "grad_norm": 0.482421875, "learning_rate": 0.0006871585460134787, "loss": 0.1938, "step": 62918 }, { "epoch": 0.1115623606467851, "grad_norm": 0.23828125, "learning_rate": 0.0006871056595605338, "loss": 0.1552, "step": 62920 }, { "epoch": 0.11156590681209491, "grad_norm": 0.75, "learning_rate": 0.0006870527749133486, "loss": 0.2, "step": 62922 }, { "epoch": 0.11156945297740473, "grad_norm": 1.1328125, "learning_rate": 0.0006869998920721552, "loss": 0.229, "step": 62924 }, { "epoch": 0.11157299914271454, "grad_norm": 0.271484375, "learning_rate": 0.0006869470110371841, "loss": 0.2036, "step": 62926 }, { "epoch": 0.11157654530802436, "grad_norm": 0.6328125, "learning_rate": 0.000686894131808667, "loss": 0.1454, "step": 62928 }, { "epoch": 0.11158009147333417, "grad_norm": 0.5625, "learning_rate": 0.0006868412543868349, "loss": 0.1693, "step": 62930 }, { "epoch": 0.11158363763864398, "grad_norm": 0.46484375, "learning_rate": 0.0006867883787719189, "loss": 0.1829, "step": 62932 }, { "epoch": 0.1115871838039538, "grad_norm": 0.353515625, "learning_rate": 0.0006867355049641511, "loss": 0.1763, "step": 62934 }, { "epoch": 0.11159072996926361, "grad_norm": 0.41796875, "learning_rate": 0.0006866826329637618, "loss": 0.1845, "step": 62936 }, { "epoch": 0.11159427613457343, "grad_norm": 0.1787109375, "learning_rate": 0.0006866297627709826, "loss": 0.1707, "step": 62938 }, { "epoch": 0.11159782229988324, "grad_norm": 0.94140625, "learning_rate": 0.0006865768943860444, "loss": 0.2348, "step": 62940 }, { "epoch": 0.11160136846519306, "grad_norm": 0.421875, "learning_rate": 0.0006865240278091791, "loss": 0.1704, "step": 62942 }, { "epoch": 0.11160491463050287, "grad_norm": 0.5703125, "learning_rate": 0.0006864711630406175, "loss": 0.1321, "step": 62944 }, { "epoch": 0.11160846079581269, "grad_norm": 0.44921875, "learning_rate": 0.0006864183000805911, "loss": 0.2092, "step": 62946 }, { "epoch": 0.1116120069611225, "grad_norm": 1.1796875, "learning_rate": 0.0006863654389293305, "loss": 0.3919, "step": 62948 }, { "epoch": 0.11161555312643232, "grad_norm": 3.359375, "learning_rate": 0.0006863125795870668, "loss": 0.3886, "step": 62950 }, { "epoch": 0.11161909929174213, "grad_norm": 2.125, "learning_rate": 0.0006862597220540325, "loss": 0.2333, "step": 62952 }, { "epoch": 0.11162264545705194, "grad_norm": 0.296875, "learning_rate": 0.000686206866330457, "loss": 0.1357, "step": 62954 }, { "epoch": 0.11162619162236176, "grad_norm": 0.279296875, "learning_rate": 0.0006861540124165728, "loss": 0.1592, "step": 62956 }, { "epoch": 0.11162973778767157, "grad_norm": 0.58203125, "learning_rate": 0.0006861011603126101, "loss": 0.4347, "step": 62958 }, { "epoch": 0.11163328395298139, "grad_norm": 0.2099609375, "learning_rate": 0.0006860483100188012, "loss": 0.1916, "step": 62960 }, { "epoch": 0.1116368301182912, "grad_norm": 0.498046875, "learning_rate": 0.0006859954615353758, "loss": 0.1691, "step": 62962 }, { "epoch": 0.11164037628360103, "grad_norm": 0.71484375, "learning_rate": 0.0006859426148625663, "loss": 0.1315, "step": 62964 }, { "epoch": 0.11164392244891085, "grad_norm": 1.3359375, "learning_rate": 0.0006858897700006031, "loss": 0.1954, "step": 62966 }, { "epoch": 0.11164746861422066, "grad_norm": 2.484375, "learning_rate": 0.0006858369269497175, "loss": 0.3105, "step": 62968 }, { "epoch": 0.11165101477953047, "grad_norm": 0.2099609375, "learning_rate": 0.0006857840857101409, "loss": 0.1525, "step": 62970 }, { "epoch": 0.11165456094484029, "grad_norm": 0.4765625, "learning_rate": 0.0006857312462821035, "loss": 0.1473, "step": 62972 }, { "epoch": 0.1116581071101501, "grad_norm": 0.373046875, "learning_rate": 0.0006856784086658373, "loss": 0.2163, "step": 62974 }, { "epoch": 0.11166165327545992, "grad_norm": 0.55078125, "learning_rate": 0.0006856255728615732, "loss": 0.1511, "step": 62976 }, { "epoch": 0.11166519944076973, "grad_norm": 0.21875, "learning_rate": 0.0006855727388695421, "loss": 0.1414, "step": 62978 }, { "epoch": 0.11166874560607955, "grad_norm": 0.77734375, "learning_rate": 0.0006855199066899746, "loss": 0.3297, "step": 62980 }, { "epoch": 0.11167229177138936, "grad_norm": 0.93359375, "learning_rate": 0.0006854670763231029, "loss": 0.2958, "step": 62982 }, { "epoch": 0.11167583793669918, "grad_norm": 0.28125, "learning_rate": 0.0006854142477691574, "loss": 0.1396, "step": 62984 }, { "epoch": 0.11167938410200899, "grad_norm": 0.46875, "learning_rate": 0.000685361421028369, "loss": 0.1875, "step": 62986 }, { "epoch": 0.1116829302673188, "grad_norm": 0.376953125, "learning_rate": 0.0006853085961009689, "loss": 0.1579, "step": 62988 }, { "epoch": 0.11168647643262862, "grad_norm": 0.9609375, "learning_rate": 0.0006852557729871878, "loss": 0.2984, "step": 62990 }, { "epoch": 0.11169002259793843, "grad_norm": 1.0078125, "learning_rate": 0.0006852029516872577, "loss": 0.2509, "step": 62992 }, { "epoch": 0.11169356876324825, "grad_norm": 0.81640625, "learning_rate": 0.0006851501322014084, "loss": 0.2002, "step": 62994 }, { "epoch": 0.11169711492855806, "grad_norm": 0.921875, "learning_rate": 0.0006850973145298716, "loss": 0.181, "step": 62996 }, { "epoch": 0.11170066109386788, "grad_norm": 0.38671875, "learning_rate": 0.0006850444986728779, "loss": 0.1667, "step": 62998 }, { "epoch": 0.11170420725917769, "grad_norm": 0.455078125, "learning_rate": 0.0006849916846306594, "loss": 0.151, "step": 63000 }, { "epoch": 0.11170775342448751, "grad_norm": 0.96484375, "learning_rate": 0.0006849388724034453, "loss": 0.158, "step": 63002 }, { "epoch": 0.11171129958979732, "grad_norm": 0.2470703125, "learning_rate": 0.0006848860619914678, "loss": 0.1776, "step": 63004 }, { "epoch": 0.11171484575510714, "grad_norm": 1.2421875, "learning_rate": 0.0006848332533949576, "loss": 0.163, "step": 63006 }, { "epoch": 0.11171839192041695, "grad_norm": 0.84375, "learning_rate": 0.0006847804466141456, "loss": 0.1631, "step": 63008 }, { "epoch": 0.11172193808572678, "grad_norm": 0.25, "learning_rate": 0.0006847276416492627, "loss": 0.1593, "step": 63010 }, { "epoch": 0.1117254842510366, "grad_norm": 0.337890625, "learning_rate": 0.0006846748385005395, "loss": 0.2019, "step": 63012 }, { "epoch": 0.11172903041634641, "grad_norm": 0.33203125, "learning_rate": 0.0006846220371682077, "loss": 0.198, "step": 63014 }, { "epoch": 0.11173257658165622, "grad_norm": 0.6796875, "learning_rate": 0.0006845692376524976, "loss": 0.1437, "step": 63016 }, { "epoch": 0.11173612274696604, "grad_norm": 0.36328125, "learning_rate": 0.000684516439953641, "loss": 0.2268, "step": 63018 }, { "epoch": 0.11173966891227585, "grad_norm": 0.455078125, "learning_rate": 0.0006844636440718673, "loss": 0.1686, "step": 63020 }, { "epoch": 0.11174321507758567, "grad_norm": 0.310546875, "learning_rate": 0.0006844108500074086, "loss": 0.2165, "step": 63022 }, { "epoch": 0.11174676124289548, "grad_norm": 2.359375, "learning_rate": 0.0006843580577604958, "loss": 0.2138, "step": 63024 }, { "epoch": 0.1117503074082053, "grad_norm": 0.328125, "learning_rate": 0.000684305267331359, "loss": 0.2031, "step": 63026 }, { "epoch": 0.11175385357351511, "grad_norm": 0.40625, "learning_rate": 0.0006842524787202297, "loss": 0.2327, "step": 63028 }, { "epoch": 0.11175739973882493, "grad_norm": 0.271484375, "learning_rate": 0.0006841996919273382, "loss": 0.1492, "step": 63030 }, { "epoch": 0.11176094590413474, "grad_norm": 0.41796875, "learning_rate": 0.0006841469069529158, "loss": 0.1895, "step": 63032 }, { "epoch": 0.11176449206944455, "grad_norm": 0.34375, "learning_rate": 0.0006840941237971936, "loss": 0.1475, "step": 63034 }, { "epoch": 0.11176803823475437, "grad_norm": 0.1787109375, "learning_rate": 0.0006840413424604021, "loss": 0.1891, "step": 63036 }, { "epoch": 0.11177158440006418, "grad_norm": 0.236328125, "learning_rate": 0.0006839885629427716, "loss": 0.1859, "step": 63038 }, { "epoch": 0.111775130565374, "grad_norm": 0.345703125, "learning_rate": 0.0006839357852445339, "loss": 0.2203, "step": 63040 }, { "epoch": 0.11177867673068381, "grad_norm": 0.373046875, "learning_rate": 0.0006838830093659194, "loss": 0.1674, "step": 63042 }, { "epoch": 0.11178222289599363, "grad_norm": 0.73046875, "learning_rate": 0.000683830235307159, "loss": 0.166, "step": 63044 }, { "epoch": 0.11178576906130344, "grad_norm": 0.53515625, "learning_rate": 0.0006837774630684834, "loss": 0.1665, "step": 63046 }, { "epoch": 0.11178931522661326, "grad_norm": 1.234375, "learning_rate": 0.000683724692650123, "loss": 0.23, "step": 63048 }, { "epoch": 0.11179286139192307, "grad_norm": 0.27734375, "learning_rate": 0.0006836719240523097, "loss": 0.1913, "step": 63050 }, { "epoch": 0.11179640755723289, "grad_norm": 0.421875, "learning_rate": 0.0006836191572752728, "loss": 0.1775, "step": 63052 }, { "epoch": 0.11179995372254271, "grad_norm": 1.0, "learning_rate": 0.0006835663923192443, "loss": 0.2478, "step": 63054 }, { "epoch": 0.11180349988785253, "grad_norm": 0.365234375, "learning_rate": 0.0006835136291844542, "loss": 0.1859, "step": 63056 }, { "epoch": 0.11180704605316234, "grad_norm": 1.140625, "learning_rate": 0.0006834608678711341, "loss": 0.1833, "step": 63058 }, { "epoch": 0.11181059221847216, "grad_norm": 0.359375, "learning_rate": 0.0006834081083795136, "loss": 0.121, "step": 63060 }, { "epoch": 0.11181413838378197, "grad_norm": 0.330078125, "learning_rate": 0.0006833553507098244, "loss": 0.1947, "step": 63062 }, { "epoch": 0.11181768454909179, "grad_norm": 0.3203125, "learning_rate": 0.0006833025948622969, "loss": 0.2725, "step": 63064 }, { "epoch": 0.1118212307144016, "grad_norm": 0.2578125, "learning_rate": 0.0006832498408371616, "loss": 0.1817, "step": 63066 }, { "epoch": 0.11182477687971142, "grad_norm": 0.75390625, "learning_rate": 0.0006831970886346498, "loss": 0.1844, "step": 63068 }, { "epoch": 0.11182832304502123, "grad_norm": 0.3359375, "learning_rate": 0.0006831443382549913, "loss": 0.1765, "step": 63070 }, { "epoch": 0.11183186921033104, "grad_norm": 0.25, "learning_rate": 0.0006830915896984176, "loss": 0.2723, "step": 63072 }, { "epoch": 0.11183541537564086, "grad_norm": 0.5234375, "learning_rate": 0.000683038842965159, "loss": 0.217, "step": 63074 }, { "epoch": 0.11183896154095067, "grad_norm": 0.494140625, "learning_rate": 0.0006829860980554467, "loss": 0.225, "step": 63076 }, { "epoch": 0.11184250770626049, "grad_norm": 1.109375, "learning_rate": 0.0006829333549695108, "loss": 0.2032, "step": 63078 }, { "epoch": 0.1118460538715703, "grad_norm": 0.396484375, "learning_rate": 0.0006828806137075816, "loss": 0.1492, "step": 63080 }, { "epoch": 0.11184960003688012, "grad_norm": 3.078125, "learning_rate": 0.0006828278742698911, "loss": 0.2731, "step": 63082 }, { "epoch": 0.11185314620218993, "grad_norm": 0.68359375, "learning_rate": 0.0006827751366566686, "loss": 0.1511, "step": 63084 }, { "epoch": 0.11185669236749975, "grad_norm": 0.48828125, "learning_rate": 0.0006827224008681456, "loss": 0.1655, "step": 63086 }, { "epoch": 0.11186023853280956, "grad_norm": 0.2158203125, "learning_rate": 0.000682669666904552, "loss": 0.1488, "step": 63088 }, { "epoch": 0.11186378469811938, "grad_norm": 0.59375, "learning_rate": 0.0006826169347661195, "loss": 0.1709, "step": 63090 }, { "epoch": 0.11186733086342919, "grad_norm": 0.396484375, "learning_rate": 0.0006825642044530775, "loss": 0.1729, "step": 63092 }, { "epoch": 0.111870877028739, "grad_norm": 0.54296875, "learning_rate": 0.0006825114759656576, "loss": 0.2071, "step": 63094 }, { "epoch": 0.11187442319404882, "grad_norm": 0.40234375, "learning_rate": 0.0006824587493040898, "loss": 0.2306, "step": 63096 }, { "epoch": 0.11187796935935863, "grad_norm": 0.6328125, "learning_rate": 0.0006824060244686048, "loss": 0.1466, "step": 63098 }, { "epoch": 0.11188151552466846, "grad_norm": 1.8203125, "learning_rate": 0.0006823533014594337, "loss": 0.1919, "step": 63100 }, { "epoch": 0.11188506168997828, "grad_norm": 0.2177734375, "learning_rate": 0.0006823005802768061, "loss": 0.1927, "step": 63102 }, { "epoch": 0.11188860785528809, "grad_norm": 0.41015625, "learning_rate": 0.0006822478609209536, "loss": 0.2436, "step": 63104 }, { "epoch": 0.1118921540205979, "grad_norm": 3.5, "learning_rate": 0.0006821951433921058, "loss": 0.1796, "step": 63106 }, { "epoch": 0.11189570018590772, "grad_norm": 0.3671875, "learning_rate": 0.0006821424276904946, "loss": 0.174, "step": 63108 }, { "epoch": 0.11189924635121754, "grad_norm": 0.462890625, "learning_rate": 0.0006820897138163489, "loss": 0.1698, "step": 63110 }, { "epoch": 0.11190279251652735, "grad_norm": 0.400390625, "learning_rate": 0.0006820370017699002, "loss": 0.1828, "step": 63112 }, { "epoch": 0.11190633868183716, "grad_norm": 0.67578125, "learning_rate": 0.0006819842915513792, "loss": 0.1931, "step": 63114 }, { "epoch": 0.11190988484714698, "grad_norm": 1.7578125, "learning_rate": 0.0006819315831610159, "loss": 0.1997, "step": 63116 }, { "epoch": 0.1119134310124568, "grad_norm": 0.32421875, "learning_rate": 0.000681878876599041, "loss": 0.1706, "step": 63118 }, { "epoch": 0.11191697717776661, "grad_norm": 0.462890625, "learning_rate": 0.0006818261718656848, "loss": 0.1619, "step": 63120 }, { "epoch": 0.11192052334307642, "grad_norm": 0.40234375, "learning_rate": 0.0006817734689611783, "loss": 0.1369, "step": 63122 }, { "epoch": 0.11192406950838624, "grad_norm": 0.50390625, "learning_rate": 0.0006817207678857518, "loss": 0.1347, "step": 63124 }, { "epoch": 0.11192761567369605, "grad_norm": 0.259765625, "learning_rate": 0.0006816680686396355, "loss": 0.1598, "step": 63126 }, { "epoch": 0.11193116183900587, "grad_norm": 0.158203125, "learning_rate": 0.0006816153712230599, "loss": 0.1647, "step": 63128 }, { "epoch": 0.11193470800431568, "grad_norm": 0.6875, "learning_rate": 0.0006815626756362558, "loss": 0.2923, "step": 63130 }, { "epoch": 0.1119382541696255, "grad_norm": 0.2001953125, "learning_rate": 0.0006815099818794537, "loss": 0.4023, "step": 63132 }, { "epoch": 0.11194180033493531, "grad_norm": 0.23828125, "learning_rate": 0.0006814572899528838, "loss": 0.168, "step": 63134 }, { "epoch": 0.11194534650024512, "grad_norm": 0.369140625, "learning_rate": 0.0006814045998567766, "loss": 0.192, "step": 63136 }, { "epoch": 0.11194889266555494, "grad_norm": 0.5859375, "learning_rate": 0.000681351911591362, "loss": 0.323, "step": 63138 }, { "epoch": 0.11195243883086475, "grad_norm": 0.56640625, "learning_rate": 0.0006812992251568719, "loss": 0.1513, "step": 63140 }, { "epoch": 0.11195598499617457, "grad_norm": 0.267578125, "learning_rate": 0.000681246540553535, "loss": 0.1744, "step": 63142 }, { "epoch": 0.11195953116148438, "grad_norm": 0.6171875, "learning_rate": 0.0006811938577815828, "loss": 0.1681, "step": 63144 }, { "epoch": 0.11196307732679421, "grad_norm": 1.109375, "learning_rate": 0.000681141176841245, "loss": 0.243, "step": 63146 }, { "epoch": 0.11196662349210403, "grad_norm": 0.51171875, "learning_rate": 0.0006810884977327533, "loss": 0.2079, "step": 63148 }, { "epoch": 0.11197016965741384, "grad_norm": 0.4765625, "learning_rate": 0.0006810358204563363, "loss": 0.2026, "step": 63150 }, { "epoch": 0.11197371582272365, "grad_norm": 0.53125, "learning_rate": 0.0006809831450122256, "loss": 0.1301, "step": 63152 }, { "epoch": 0.11197726198803347, "grad_norm": 0.63671875, "learning_rate": 0.0006809304714006513, "loss": 0.1644, "step": 63154 }, { "epoch": 0.11198080815334328, "grad_norm": 0.2080078125, "learning_rate": 0.0006808777996218435, "loss": 0.2005, "step": 63156 }, { "epoch": 0.1119843543186531, "grad_norm": 0.201171875, "learning_rate": 0.0006808251296760331, "loss": 0.1697, "step": 63158 }, { "epoch": 0.11198790048396291, "grad_norm": 0.48828125, "learning_rate": 0.0006807724615634496, "loss": 0.2095, "step": 63160 }, { "epoch": 0.11199144664927273, "grad_norm": 0.53515625, "learning_rate": 0.0006807197952843241, "loss": 0.1938, "step": 63162 }, { "epoch": 0.11199499281458254, "grad_norm": 1.3984375, "learning_rate": 0.0006806671308388867, "loss": 0.2392, "step": 63164 }, { "epoch": 0.11199853897989236, "grad_norm": 0.478515625, "learning_rate": 0.0006806144682273678, "loss": 0.135, "step": 63166 }, { "epoch": 0.11200208514520217, "grad_norm": 0.80859375, "learning_rate": 0.000680561807449997, "loss": 0.1453, "step": 63168 }, { "epoch": 0.11200563131051199, "grad_norm": 0.470703125, "learning_rate": 0.0006805091485070059, "loss": 0.1877, "step": 63170 }, { "epoch": 0.1120091774758218, "grad_norm": 0.345703125, "learning_rate": 0.000680456491398624, "loss": 0.1324, "step": 63172 }, { "epoch": 0.11201272364113161, "grad_norm": 0.2265625, "learning_rate": 0.0006804038361250815, "loss": 0.1711, "step": 63174 }, { "epoch": 0.11201626980644143, "grad_norm": 3.296875, "learning_rate": 0.0006803511826866092, "loss": 0.2491, "step": 63176 }, { "epoch": 0.11201981597175124, "grad_norm": 0.578125, "learning_rate": 0.0006802985310834365, "loss": 0.1926, "step": 63178 }, { "epoch": 0.11202336213706106, "grad_norm": 0.2490234375, "learning_rate": 0.000680245881315795, "loss": 0.2052, "step": 63180 }, { "epoch": 0.11202690830237087, "grad_norm": 0.255859375, "learning_rate": 0.0006801932333839135, "loss": 0.1528, "step": 63182 }, { "epoch": 0.11203045446768069, "grad_norm": 0.15234375, "learning_rate": 0.0006801405872880233, "loss": 0.1578, "step": 63184 }, { "epoch": 0.1120340006329905, "grad_norm": 0.3515625, "learning_rate": 0.000680087943028354, "loss": 0.1714, "step": 63186 }, { "epoch": 0.11203754679830032, "grad_norm": 0.423828125, "learning_rate": 0.0006800353006051363, "loss": 0.182, "step": 63188 }, { "epoch": 0.11204109296361014, "grad_norm": 0.25390625, "learning_rate": 0.0006799826600186003, "loss": 0.1452, "step": 63190 }, { "epoch": 0.11204463912891996, "grad_norm": 0.1923828125, "learning_rate": 0.0006799300212689763, "loss": 0.1397, "step": 63192 }, { "epoch": 0.11204818529422977, "grad_norm": 0.27734375, "learning_rate": 0.0006798773843564943, "loss": 0.1531, "step": 63194 }, { "epoch": 0.11205173145953959, "grad_norm": 1.0390625, "learning_rate": 0.0006798247492813843, "loss": 0.5079, "step": 63196 }, { "epoch": 0.1120552776248494, "grad_norm": 0.380859375, "learning_rate": 0.0006797721160438773, "loss": 0.2264, "step": 63198 }, { "epoch": 0.11205882379015922, "grad_norm": 0.50390625, "learning_rate": 0.0006797194846442026, "loss": 0.1549, "step": 63200 }, { "epoch": 0.11206236995546903, "grad_norm": 0.43359375, "learning_rate": 0.0006796668550825907, "loss": 0.1048, "step": 63202 }, { "epoch": 0.11206591612077885, "grad_norm": 0.7109375, "learning_rate": 0.0006796142273592715, "loss": 0.235, "step": 63204 }, { "epoch": 0.11206946228608866, "grad_norm": 0.3125, "learning_rate": 0.0006795616014744765, "loss": 0.1699, "step": 63206 }, { "epoch": 0.11207300845139848, "grad_norm": 0.1875, "learning_rate": 0.0006795089774284339, "loss": 0.1681, "step": 63208 }, { "epoch": 0.11207655461670829, "grad_norm": 0.59765625, "learning_rate": 0.0006794563552213751, "loss": 0.3953, "step": 63210 }, { "epoch": 0.1120801007820181, "grad_norm": 0.5078125, "learning_rate": 0.00067940373485353, "loss": 0.2901, "step": 63212 }, { "epoch": 0.11208364694732792, "grad_norm": 0.18359375, "learning_rate": 0.0006793511163251288, "loss": 0.1638, "step": 63214 }, { "epoch": 0.11208719311263773, "grad_norm": 0.50390625, "learning_rate": 0.0006792984996364013, "loss": 0.2014, "step": 63216 }, { "epoch": 0.11209073927794755, "grad_norm": 0.40625, "learning_rate": 0.0006792458847875775, "loss": 0.3111, "step": 63218 }, { "epoch": 0.11209428544325736, "grad_norm": 0.21875, "learning_rate": 0.0006791932717788882, "loss": 0.1932, "step": 63220 }, { "epoch": 0.11209783160856718, "grad_norm": 0.267578125, "learning_rate": 0.0006791406606105631, "loss": 0.179, "step": 63222 }, { "epoch": 0.11210137777387699, "grad_norm": 0.380859375, "learning_rate": 0.0006790880512828321, "loss": 0.1675, "step": 63224 }, { "epoch": 0.1121049239391868, "grad_norm": 0.48828125, "learning_rate": 0.0006790354437959252, "loss": 0.2787, "step": 63226 }, { "epoch": 0.11210847010449662, "grad_norm": 0.408203125, "learning_rate": 0.0006789828381500731, "loss": 0.1732, "step": 63228 }, { "epoch": 0.11211201626980644, "grad_norm": 0.28125, "learning_rate": 0.0006789302343455055, "loss": 0.1928, "step": 63230 }, { "epoch": 0.11211556243511625, "grad_norm": 0.353515625, "learning_rate": 0.0006788776323824525, "loss": 0.2219, "step": 63232 }, { "epoch": 0.11211910860042607, "grad_norm": 0.9296875, "learning_rate": 0.0006788250322611443, "loss": 0.2214, "step": 63234 }, { "epoch": 0.1121226547657359, "grad_norm": 0.349609375, "learning_rate": 0.0006787724339818102, "loss": 0.2405, "step": 63236 }, { "epoch": 0.11212620093104571, "grad_norm": 0.4765625, "learning_rate": 0.0006787198375446814, "loss": 0.2149, "step": 63238 }, { "epoch": 0.11212974709635552, "grad_norm": 0.33203125, "learning_rate": 0.0006786672429499868, "loss": 0.1599, "step": 63240 }, { "epoch": 0.11213329326166534, "grad_norm": 0.392578125, "learning_rate": 0.0006786146501979572, "loss": 0.2294, "step": 63242 }, { "epoch": 0.11213683942697515, "grad_norm": 0.298828125, "learning_rate": 0.000678562059288822, "loss": 0.1623, "step": 63244 }, { "epoch": 0.11214038559228497, "grad_norm": 0.486328125, "learning_rate": 0.0006785094702228125, "loss": 0.1725, "step": 63246 }, { "epoch": 0.11214393175759478, "grad_norm": 1.9375, "learning_rate": 0.0006784568830001567, "loss": 0.1884, "step": 63248 }, { "epoch": 0.1121474779229046, "grad_norm": 0.4765625, "learning_rate": 0.0006784042976210862, "loss": 0.1339, "step": 63250 }, { "epoch": 0.11215102408821441, "grad_norm": 0.62109375, "learning_rate": 0.0006783517140858303, "loss": 0.1928, "step": 63252 }, { "epoch": 0.11215457025352422, "grad_norm": 0.341796875, "learning_rate": 0.0006782991323946191, "loss": 0.1476, "step": 63254 }, { "epoch": 0.11215811641883404, "grad_norm": 0.42578125, "learning_rate": 0.0006782465525476825, "loss": 0.1655, "step": 63256 }, { "epoch": 0.11216166258414385, "grad_norm": 0.55078125, "learning_rate": 0.0006781939745452502, "loss": 0.4395, "step": 63258 }, { "epoch": 0.11216520874945367, "grad_norm": 2.953125, "learning_rate": 0.0006781413983875528, "loss": 0.2215, "step": 63260 }, { "epoch": 0.11216875491476348, "grad_norm": 0.392578125, "learning_rate": 0.0006780888240748199, "loss": 0.1985, "step": 63262 }, { "epoch": 0.1121723010800733, "grad_norm": 0.66796875, "learning_rate": 0.0006780362516072814, "loss": 0.282, "step": 63264 }, { "epoch": 0.11217584724538311, "grad_norm": 0.38671875, "learning_rate": 0.0006779836809851672, "loss": 0.1842, "step": 63266 }, { "epoch": 0.11217939341069293, "grad_norm": 0.94140625, "learning_rate": 0.0006779311122087068, "loss": 0.3825, "step": 63268 }, { "epoch": 0.11218293957600274, "grad_norm": 1.671875, "learning_rate": 0.0006778785452781311, "loss": 0.1418, "step": 63270 }, { "epoch": 0.11218648574131256, "grad_norm": 1.2109375, "learning_rate": 0.0006778259801936692, "loss": 0.2428, "step": 63272 }, { "epoch": 0.11219003190662237, "grad_norm": 0.404296875, "learning_rate": 0.0006777734169555515, "loss": 0.1779, "step": 63274 }, { "epoch": 0.11219357807193218, "grad_norm": 4.9375, "learning_rate": 0.0006777208555640072, "loss": 0.4282, "step": 63276 }, { "epoch": 0.112197124237242, "grad_norm": 1.34375, "learning_rate": 0.0006776682960192668, "loss": 0.1826, "step": 63278 }, { "epoch": 0.11220067040255181, "grad_norm": 0.40625, "learning_rate": 0.00067761573832156, "loss": 0.2005, "step": 63280 }, { "epoch": 0.11220421656786164, "grad_norm": 0.5703125, "learning_rate": 0.0006775631824711168, "loss": 0.1176, "step": 63282 }, { "epoch": 0.11220776273317146, "grad_norm": 0.51171875, "learning_rate": 0.0006775106284681667, "loss": 0.1572, "step": 63284 }, { "epoch": 0.11221130889848127, "grad_norm": 0.296875, "learning_rate": 0.0006774580763129392, "loss": 0.1707, "step": 63286 }, { "epoch": 0.11221485506379109, "grad_norm": 0.1875, "learning_rate": 0.0006774055260056655, "loss": 0.1603, "step": 63288 }, { "epoch": 0.1122184012291009, "grad_norm": 0.71484375, "learning_rate": 0.0006773529775465738, "loss": 0.2016, "step": 63290 }, { "epoch": 0.11222194739441071, "grad_norm": 0.8359375, "learning_rate": 0.0006773004309358952, "loss": 0.167, "step": 63292 }, { "epoch": 0.11222549355972053, "grad_norm": 0.466796875, "learning_rate": 0.0006772478861738585, "loss": 0.1554, "step": 63294 }, { "epoch": 0.11222903972503034, "grad_norm": 0.6640625, "learning_rate": 0.0006771953432606947, "loss": 0.2452, "step": 63296 }, { "epoch": 0.11223258589034016, "grad_norm": 0.3671875, "learning_rate": 0.0006771428021966322, "loss": 0.1815, "step": 63298 }, { "epoch": 0.11223613205564997, "grad_norm": 1.0546875, "learning_rate": 0.0006770902629819016, "loss": 0.2485, "step": 63300 }, { "epoch": 0.11223967822095979, "grad_norm": 0.3828125, "learning_rate": 0.0006770377256167328, "loss": 0.2494, "step": 63302 }, { "epoch": 0.1122432243862696, "grad_norm": 0.4765625, "learning_rate": 0.0006769851901013553, "loss": 0.1858, "step": 63304 }, { "epoch": 0.11224677055157942, "grad_norm": 0.291015625, "learning_rate": 0.0006769326564359988, "loss": 0.179, "step": 63306 }, { "epoch": 0.11225031671688923, "grad_norm": 0.259765625, "learning_rate": 0.0006768801246208926, "loss": 0.2051, "step": 63308 }, { "epoch": 0.11225386288219905, "grad_norm": 0.46875, "learning_rate": 0.0006768275946562675, "loss": 0.2015, "step": 63310 }, { "epoch": 0.11225740904750886, "grad_norm": 0.484375, "learning_rate": 0.0006767750665423527, "loss": 0.1955, "step": 63312 }, { "epoch": 0.11226095521281867, "grad_norm": 1.6796875, "learning_rate": 0.0006767225402793777, "loss": 0.2128, "step": 63314 }, { "epoch": 0.11226450137812849, "grad_norm": 0.228515625, "learning_rate": 0.0006766700158675723, "loss": 0.209, "step": 63316 }, { "epoch": 0.1122680475434383, "grad_norm": 0.34375, "learning_rate": 0.0006766174933071667, "loss": 0.1948, "step": 63318 }, { "epoch": 0.11227159370874812, "grad_norm": 0.6328125, "learning_rate": 0.0006765649725983902, "loss": 0.1548, "step": 63320 }, { "epoch": 0.11227513987405793, "grad_norm": 0.55078125, "learning_rate": 0.0006765124537414724, "loss": 0.2479, "step": 63322 }, { "epoch": 0.11227868603936775, "grad_norm": 0.55078125, "learning_rate": 0.0006764599367366432, "loss": 0.198, "step": 63324 }, { "epoch": 0.11228223220467758, "grad_norm": 1.8125, "learning_rate": 0.0006764074215841322, "loss": 0.1566, "step": 63326 }, { "epoch": 0.11228577836998739, "grad_norm": 0.2412109375, "learning_rate": 0.0006763549082841696, "loss": 0.284, "step": 63328 }, { "epoch": 0.1122893245352972, "grad_norm": 0.89453125, "learning_rate": 0.0006763023968369837, "loss": 0.1879, "step": 63330 }, { "epoch": 0.11229287070060702, "grad_norm": 0.28125, "learning_rate": 0.0006762498872428053, "loss": 0.1825, "step": 63332 }, { "epoch": 0.11229641686591683, "grad_norm": 0.2490234375, "learning_rate": 0.0006761973795018637, "loss": 0.195, "step": 63334 }, { "epoch": 0.11229996303122665, "grad_norm": 0.3203125, "learning_rate": 0.0006761448736143891, "loss": 0.1057, "step": 63336 }, { "epoch": 0.11230350919653646, "grad_norm": 0.9296875, "learning_rate": 0.0006760923695806099, "loss": 0.2237, "step": 63338 }, { "epoch": 0.11230705536184628, "grad_norm": 0.373046875, "learning_rate": 0.0006760398674007568, "loss": 0.1943, "step": 63340 }, { "epoch": 0.11231060152715609, "grad_norm": 0.185546875, "learning_rate": 0.0006759873670750591, "loss": 0.1499, "step": 63342 }, { "epoch": 0.11231414769246591, "grad_norm": 0.1435546875, "learning_rate": 0.0006759348686037464, "loss": 0.1442, "step": 63344 }, { "epoch": 0.11231769385777572, "grad_norm": 0.447265625, "learning_rate": 0.0006758823719870482, "loss": 0.1653, "step": 63346 }, { "epoch": 0.11232124002308554, "grad_norm": 0.412109375, "learning_rate": 0.0006758298772251938, "loss": 0.1789, "step": 63348 }, { "epoch": 0.11232478618839535, "grad_norm": 0.6796875, "learning_rate": 0.0006757773843184135, "loss": 0.1675, "step": 63350 }, { "epoch": 0.11232833235370517, "grad_norm": 0.283203125, "learning_rate": 0.0006757248932669361, "loss": 0.1491, "step": 63352 }, { "epoch": 0.11233187851901498, "grad_norm": 1.046875, "learning_rate": 0.0006756724040709924, "loss": 0.2408, "step": 63354 }, { "epoch": 0.1123354246843248, "grad_norm": 0.294921875, "learning_rate": 0.0006756199167308102, "loss": 0.158, "step": 63356 }, { "epoch": 0.11233897084963461, "grad_norm": 0.55859375, "learning_rate": 0.0006755674312466205, "loss": 0.1511, "step": 63358 }, { "epoch": 0.11234251701494442, "grad_norm": 0.36328125, "learning_rate": 0.0006755149476186523, "loss": 0.3279, "step": 63360 }, { "epoch": 0.11234606318025424, "grad_norm": 0.515625, "learning_rate": 0.0006754624658471351, "loss": 0.1623, "step": 63362 }, { "epoch": 0.11234960934556405, "grad_norm": 0.84375, "learning_rate": 0.0006754099859322987, "loss": 0.3326, "step": 63364 }, { "epoch": 0.11235315551087387, "grad_norm": 0.44140625, "learning_rate": 0.0006753575078743717, "loss": 0.2096, "step": 63366 }, { "epoch": 0.11235670167618368, "grad_norm": 0.431640625, "learning_rate": 0.0006753050316735848, "loss": 0.2048, "step": 63368 }, { "epoch": 0.1123602478414935, "grad_norm": 0.2177734375, "learning_rate": 0.000675252557330167, "loss": 0.1525, "step": 63370 }, { "epoch": 0.11236379400680332, "grad_norm": 0.16015625, "learning_rate": 0.0006752000848443478, "loss": 0.1242, "step": 63372 }, { "epoch": 0.11236734017211314, "grad_norm": 0.228515625, "learning_rate": 0.0006751476142163562, "loss": 0.1668, "step": 63374 }, { "epoch": 0.11237088633742295, "grad_norm": 0.83984375, "learning_rate": 0.0006750951454464227, "loss": 0.164, "step": 63376 }, { "epoch": 0.11237443250273277, "grad_norm": 0.8125, "learning_rate": 0.000675042678534776, "loss": 0.1467, "step": 63378 }, { "epoch": 0.11237797866804258, "grad_norm": 0.75390625, "learning_rate": 0.000674990213481646, "loss": 0.1907, "step": 63380 }, { "epoch": 0.1123815248333524, "grad_norm": 0.3515625, "learning_rate": 0.0006749377502872618, "loss": 0.1738, "step": 63382 }, { "epoch": 0.11238507099866221, "grad_norm": 0.328125, "learning_rate": 0.0006748852889518526, "loss": 0.1197, "step": 63384 }, { "epoch": 0.11238861716397203, "grad_norm": 0.10107421875, "learning_rate": 0.000674832829475649, "loss": 0.1314, "step": 63386 }, { "epoch": 0.11239216332928184, "grad_norm": 1.140625, "learning_rate": 0.000674780371858879, "loss": 0.4781, "step": 63388 }, { "epoch": 0.11239570949459166, "grad_norm": 0.66015625, "learning_rate": 0.000674727916101773, "loss": 0.2614, "step": 63390 }, { "epoch": 0.11239925565990147, "grad_norm": 0.69921875, "learning_rate": 0.0006746754622045596, "loss": 0.2574, "step": 63392 }, { "epoch": 0.11240280182521128, "grad_norm": 0.462890625, "learning_rate": 0.0006746230101674693, "loss": 0.1289, "step": 63394 }, { "epoch": 0.1124063479905211, "grad_norm": 0.337890625, "learning_rate": 0.0006745705599907303, "loss": 0.1813, "step": 63396 }, { "epoch": 0.11240989415583091, "grad_norm": 0.33203125, "learning_rate": 0.0006745181116745728, "loss": 0.178, "step": 63398 }, { "epoch": 0.11241344032114073, "grad_norm": 0.349609375, "learning_rate": 0.0006744656652192261, "loss": 0.1487, "step": 63400 }, { "epoch": 0.11241698648645054, "grad_norm": 0.279296875, "learning_rate": 0.0006744132206249192, "loss": 0.188, "step": 63402 }, { "epoch": 0.11242053265176036, "grad_norm": 0.48046875, "learning_rate": 0.0006743607778918819, "loss": 0.1602, "step": 63404 }, { "epoch": 0.11242407881707017, "grad_norm": 0.14453125, "learning_rate": 0.0006743083370203427, "loss": 0.126, "step": 63406 }, { "epoch": 0.11242762498237999, "grad_norm": 0.90625, "learning_rate": 0.0006742558980105322, "loss": 0.2096, "step": 63408 }, { "epoch": 0.1124311711476898, "grad_norm": 0.5546875, "learning_rate": 0.0006742034608626788, "loss": 0.1602, "step": 63410 }, { "epoch": 0.11243471731299962, "grad_norm": 1.0234375, "learning_rate": 0.0006741510255770122, "loss": 0.2321, "step": 63412 }, { "epoch": 0.11243826347830943, "grad_norm": 0.208984375, "learning_rate": 0.0006740985921537615, "loss": 0.1783, "step": 63414 }, { "epoch": 0.11244180964361924, "grad_norm": 0.53515625, "learning_rate": 0.0006740461605931565, "loss": 0.2083, "step": 63416 }, { "epoch": 0.11244535580892907, "grad_norm": 0.53515625, "learning_rate": 0.0006739937308954259, "loss": 0.1896, "step": 63418 }, { "epoch": 0.11244890197423889, "grad_norm": 0.19140625, "learning_rate": 0.0006739413030607995, "loss": 0.1915, "step": 63420 }, { "epoch": 0.1124524481395487, "grad_norm": 0.474609375, "learning_rate": 0.0006738888770895063, "loss": 0.1753, "step": 63422 }, { "epoch": 0.11245599430485852, "grad_norm": 1.40625, "learning_rate": 0.0006738364529817752, "loss": 0.2393, "step": 63424 }, { "epoch": 0.11245954047016833, "grad_norm": 0.294921875, "learning_rate": 0.0006737840307378367, "loss": 0.1561, "step": 63426 }, { "epoch": 0.11246308663547815, "grad_norm": 0.267578125, "learning_rate": 0.0006737316103579186, "loss": 0.2179, "step": 63428 }, { "epoch": 0.11246663280078796, "grad_norm": 2.84375, "learning_rate": 0.0006736791918422511, "loss": 0.2568, "step": 63430 }, { "epoch": 0.11247017896609778, "grad_norm": 1.0, "learning_rate": 0.0006736267751910633, "loss": 0.176, "step": 63432 }, { "epoch": 0.11247372513140759, "grad_norm": 0.51171875, "learning_rate": 0.0006735743604045837, "loss": 0.1479, "step": 63434 }, { "epoch": 0.1124772712967174, "grad_norm": 0.5390625, "learning_rate": 0.000673521947483043, "loss": 0.189, "step": 63436 }, { "epoch": 0.11248081746202722, "grad_norm": 0.390625, "learning_rate": 0.0006734695364266689, "loss": 0.1469, "step": 63438 }, { "epoch": 0.11248436362733703, "grad_norm": 0.66796875, "learning_rate": 0.0006734171272356918, "loss": 0.1937, "step": 63440 }, { "epoch": 0.11248790979264685, "grad_norm": 0.341796875, "learning_rate": 0.0006733647199103399, "loss": 0.169, "step": 63442 }, { "epoch": 0.11249145595795666, "grad_norm": 0.5234375, "learning_rate": 0.0006733123144508435, "loss": 0.2451, "step": 63444 }, { "epoch": 0.11249500212326648, "grad_norm": 0.4921875, "learning_rate": 0.0006732599108574307, "loss": 0.1141, "step": 63446 }, { "epoch": 0.11249854828857629, "grad_norm": 0.4140625, "learning_rate": 0.0006732075091303313, "loss": 0.2102, "step": 63448 }, { "epoch": 0.1125020944538861, "grad_norm": 0.2197265625, "learning_rate": 0.0006731551092697745, "loss": 0.1622, "step": 63450 }, { "epoch": 0.11250564061919592, "grad_norm": 0.361328125, "learning_rate": 0.0006731027112759892, "loss": 0.2526, "step": 63452 }, { "epoch": 0.11250918678450574, "grad_norm": 0.5390625, "learning_rate": 0.0006730503151492047, "loss": 0.1665, "step": 63454 }, { "epoch": 0.11251273294981555, "grad_norm": 0.66015625, "learning_rate": 0.0006729979208896498, "loss": 0.3648, "step": 63456 }, { "epoch": 0.11251627911512536, "grad_norm": 1.3359375, "learning_rate": 0.0006729455284975544, "loss": 0.2527, "step": 63458 }, { "epoch": 0.11251982528043518, "grad_norm": 0.52734375, "learning_rate": 0.0006728931379731472, "loss": 0.1293, "step": 63460 }, { "epoch": 0.11252337144574501, "grad_norm": 0.400390625, "learning_rate": 0.0006728407493166571, "loss": 0.1584, "step": 63462 }, { "epoch": 0.11252691761105482, "grad_norm": 0.384765625, "learning_rate": 0.0006727883625283133, "loss": 0.215, "step": 63464 }, { "epoch": 0.11253046377636464, "grad_norm": 0.390625, "learning_rate": 0.0006727359776083451, "loss": 0.1603, "step": 63466 }, { "epoch": 0.11253400994167445, "grad_norm": 3.1875, "learning_rate": 0.0006726835945569818, "loss": 0.2621, "step": 63468 }, { "epoch": 0.11253755610698427, "grad_norm": 0.52734375, "learning_rate": 0.0006726312133744523, "loss": 0.185, "step": 63470 }, { "epoch": 0.11254110227229408, "grad_norm": 1.8046875, "learning_rate": 0.0006725788340609855, "loss": 0.4803, "step": 63472 }, { "epoch": 0.1125446484376039, "grad_norm": 1.1875, "learning_rate": 0.0006725264566168101, "loss": 0.2339, "step": 63474 }, { "epoch": 0.11254819460291371, "grad_norm": 0.59765625, "learning_rate": 0.0006724740810421567, "loss": 0.176, "step": 63476 }, { "epoch": 0.11255174076822352, "grad_norm": 0.5078125, "learning_rate": 0.0006724217073372523, "loss": 0.15, "step": 63478 }, { "epoch": 0.11255528693353334, "grad_norm": 0.40625, "learning_rate": 0.0006723693355023276, "loss": 0.157, "step": 63480 }, { "epoch": 0.11255883309884315, "grad_norm": 1.9140625, "learning_rate": 0.0006723169655376106, "loss": 0.2122, "step": 63482 }, { "epoch": 0.11256237926415297, "grad_norm": 0.546875, "learning_rate": 0.0006722645974433315, "loss": 0.1591, "step": 63484 }, { "epoch": 0.11256592542946278, "grad_norm": 0.380859375, "learning_rate": 0.0006722122312197179, "loss": 0.1932, "step": 63486 }, { "epoch": 0.1125694715947726, "grad_norm": 1.1484375, "learning_rate": 0.0006721598668669998, "loss": 0.2579, "step": 63488 }, { "epoch": 0.11257301776008241, "grad_norm": 0.232421875, "learning_rate": 0.0006721075043854059, "loss": 0.1859, "step": 63490 }, { "epoch": 0.11257656392539223, "grad_norm": 0.333984375, "learning_rate": 0.0006720551437751653, "loss": 0.2155, "step": 63492 }, { "epoch": 0.11258011009070204, "grad_norm": 0.33203125, "learning_rate": 0.0006720027850365069, "loss": 0.3474, "step": 63494 }, { "epoch": 0.11258365625601185, "grad_norm": 0.2177734375, "learning_rate": 0.0006719504281696594, "loss": 0.1114, "step": 63496 }, { "epoch": 0.11258720242132167, "grad_norm": 0.7734375, "learning_rate": 0.0006718980731748523, "loss": 0.1701, "step": 63498 }, { "epoch": 0.11259074858663148, "grad_norm": 0.79296875, "learning_rate": 0.0006718457200523144, "loss": 0.2237, "step": 63500 }, { "epoch": 0.1125942947519413, "grad_norm": 0.333984375, "learning_rate": 0.0006717933688022746, "loss": 0.1564, "step": 63502 }, { "epoch": 0.11259784091725111, "grad_norm": 0.2001953125, "learning_rate": 0.0006717410194249615, "loss": 0.1628, "step": 63504 }, { "epoch": 0.11260138708256093, "grad_norm": 0.5390625, "learning_rate": 0.000671688671920605, "loss": 0.1855, "step": 63506 }, { "epoch": 0.11260493324787076, "grad_norm": 0.46484375, "learning_rate": 0.0006716363262894334, "loss": 0.1677, "step": 63508 }, { "epoch": 0.11260847941318057, "grad_norm": 7.09375, "learning_rate": 0.0006715839825316756, "loss": 0.3004, "step": 63510 }, { "epoch": 0.11261202557849039, "grad_norm": 0.59375, "learning_rate": 0.0006715316406475605, "loss": 0.1707, "step": 63512 }, { "epoch": 0.1126155717438002, "grad_norm": 0.28125, "learning_rate": 0.0006714793006373169, "loss": 0.163, "step": 63514 }, { "epoch": 0.11261911790911001, "grad_norm": 0.478515625, "learning_rate": 0.0006714269625011746, "loss": 0.1763, "step": 63516 }, { "epoch": 0.11262266407441983, "grad_norm": 0.333984375, "learning_rate": 0.0006713746262393612, "loss": 0.1891, "step": 63518 }, { "epoch": 0.11262621023972964, "grad_norm": 0.333984375, "learning_rate": 0.0006713222918521062, "loss": 0.1962, "step": 63520 }, { "epoch": 0.11262975640503946, "grad_norm": 0.150390625, "learning_rate": 0.0006712699593396386, "loss": 0.1522, "step": 63522 }, { "epoch": 0.11263330257034927, "grad_norm": 2.609375, "learning_rate": 0.0006712176287021873, "loss": 0.1843, "step": 63524 }, { "epoch": 0.11263684873565909, "grad_norm": 1.0546875, "learning_rate": 0.0006711652999399811, "loss": 0.1903, "step": 63526 }, { "epoch": 0.1126403949009689, "grad_norm": 0.48046875, "learning_rate": 0.0006711129730532486, "loss": 0.1489, "step": 63528 }, { "epoch": 0.11264394106627872, "grad_norm": 0.2275390625, "learning_rate": 0.000671060648042219, "loss": 0.1543, "step": 63530 }, { "epoch": 0.11264748723158853, "grad_norm": 0.5234375, "learning_rate": 0.0006710083249071206, "loss": 0.2205, "step": 63532 }, { "epoch": 0.11265103339689835, "grad_norm": 0.359375, "learning_rate": 0.0006709560036481832, "loss": 0.1587, "step": 63534 }, { "epoch": 0.11265457956220816, "grad_norm": 0.67578125, "learning_rate": 0.0006709036842656344, "loss": 0.1549, "step": 63536 }, { "epoch": 0.11265812572751797, "grad_norm": 0.28515625, "learning_rate": 0.0006708513667597041, "loss": 0.1634, "step": 63538 }, { "epoch": 0.11266167189282779, "grad_norm": 0.68359375, "learning_rate": 0.0006707990511306202, "loss": 0.1485, "step": 63540 }, { "epoch": 0.1126652180581376, "grad_norm": 0.1748046875, "learning_rate": 0.0006707467373786127, "loss": 0.1357, "step": 63542 }, { "epoch": 0.11266876422344742, "grad_norm": 0.232421875, "learning_rate": 0.0006706944255039087, "loss": 0.3306, "step": 63544 }, { "epoch": 0.11267231038875723, "grad_norm": 0.482421875, "learning_rate": 0.0006706421155067383, "loss": 0.1616, "step": 63546 }, { "epoch": 0.11267585655406705, "grad_norm": 0.416015625, "learning_rate": 0.0006705898073873301, "loss": 0.2617, "step": 63548 }, { "epoch": 0.11267940271937686, "grad_norm": 5.21875, "learning_rate": 0.0006705375011459125, "loss": 0.3077, "step": 63550 }, { "epoch": 0.11268294888468668, "grad_norm": 0.6953125, "learning_rate": 0.0006704851967827144, "loss": 0.1861, "step": 63552 }, { "epoch": 0.1126864950499965, "grad_norm": 0.265625, "learning_rate": 0.0006704328942979643, "loss": 0.1958, "step": 63554 }, { "epoch": 0.11269004121530632, "grad_norm": 2.25, "learning_rate": 0.0006703805936918916, "loss": 0.4085, "step": 63556 }, { "epoch": 0.11269358738061613, "grad_norm": 0.34765625, "learning_rate": 0.0006703282949647246, "loss": 0.184, "step": 63558 }, { "epoch": 0.11269713354592595, "grad_norm": 0.384765625, "learning_rate": 0.000670275998116692, "loss": 0.139, "step": 63560 }, { "epoch": 0.11270067971123576, "grad_norm": 0.291015625, "learning_rate": 0.0006702237031480221, "loss": 0.2268, "step": 63562 }, { "epoch": 0.11270422587654558, "grad_norm": 0.203125, "learning_rate": 0.0006701714100589447, "loss": 0.1481, "step": 63564 }, { "epoch": 0.11270777204185539, "grad_norm": 0.83984375, "learning_rate": 0.0006701191188496875, "loss": 0.1653, "step": 63566 }, { "epoch": 0.1127113182071652, "grad_norm": 0.439453125, "learning_rate": 0.00067006682952048, "loss": 0.184, "step": 63568 }, { "epoch": 0.11271486437247502, "grad_norm": 0.53125, "learning_rate": 0.0006700145420715501, "loss": 0.1536, "step": 63570 }, { "epoch": 0.11271841053778484, "grad_norm": 0.82421875, "learning_rate": 0.0006699622565031266, "loss": 0.1545, "step": 63572 }, { "epoch": 0.11272195670309465, "grad_norm": 0.859375, "learning_rate": 0.0006699099728154392, "loss": 0.1986, "step": 63574 }, { "epoch": 0.11272550286840446, "grad_norm": 0.5234375, "learning_rate": 0.0006698576910087149, "loss": 0.2835, "step": 63576 }, { "epoch": 0.11272904903371428, "grad_norm": 0.353515625, "learning_rate": 0.0006698054110831838, "loss": 0.2473, "step": 63578 }, { "epoch": 0.1127325951990241, "grad_norm": 0.2412109375, "learning_rate": 0.0006697531330390734, "loss": 0.1497, "step": 63580 }, { "epoch": 0.11273614136433391, "grad_norm": 0.6015625, "learning_rate": 0.0006697008568766135, "loss": 0.2439, "step": 63582 }, { "epoch": 0.11273968752964372, "grad_norm": 1.1796875, "learning_rate": 0.0006696485825960314, "loss": 0.1755, "step": 63584 }, { "epoch": 0.11274323369495354, "grad_norm": 0.515625, "learning_rate": 0.0006695963101975569, "loss": 0.1913, "step": 63586 }, { "epoch": 0.11274677986026335, "grad_norm": 0.59765625, "learning_rate": 0.0006695440396814183, "loss": 0.2275, "step": 63588 }, { "epoch": 0.11275032602557317, "grad_norm": 0.41796875, "learning_rate": 0.0006694917710478439, "loss": 0.2092, "step": 63590 }, { "epoch": 0.11275387219088298, "grad_norm": 0.515625, "learning_rate": 0.0006694395042970622, "loss": 0.2014, "step": 63592 }, { "epoch": 0.1127574183561928, "grad_norm": 0.3359375, "learning_rate": 0.0006693872394293019, "loss": 0.1777, "step": 63594 }, { "epoch": 0.11276096452150261, "grad_norm": 0.54296875, "learning_rate": 0.0006693349764447919, "loss": 0.2219, "step": 63596 }, { "epoch": 0.11276451068681244, "grad_norm": 0.314453125, "learning_rate": 0.0006692827153437606, "loss": 0.1288, "step": 63598 }, { "epoch": 0.11276805685212225, "grad_norm": 0.47265625, "learning_rate": 0.0006692304561264365, "loss": 0.1479, "step": 63600 }, { "epoch": 0.11277160301743207, "grad_norm": 2.4375, "learning_rate": 0.0006691781987930479, "loss": 0.1646, "step": 63602 }, { "epoch": 0.11277514918274188, "grad_norm": 0.55078125, "learning_rate": 0.000669125943343824, "loss": 0.1152, "step": 63604 }, { "epoch": 0.1127786953480517, "grad_norm": 0.328125, "learning_rate": 0.0006690736897789927, "loss": 0.1368, "step": 63606 }, { "epoch": 0.11278224151336151, "grad_norm": 0.248046875, "learning_rate": 0.000669021438098783, "loss": 0.1271, "step": 63608 }, { "epoch": 0.11278578767867133, "grad_norm": 0.427734375, "learning_rate": 0.000668969188303423, "loss": 0.1939, "step": 63610 }, { "epoch": 0.11278933384398114, "grad_norm": 0.5703125, "learning_rate": 0.0006689169403931413, "loss": 0.1779, "step": 63612 }, { "epoch": 0.11279288000929096, "grad_norm": 0.40625, "learning_rate": 0.0006688646943681664, "loss": 0.1499, "step": 63614 }, { "epoch": 0.11279642617460077, "grad_norm": 1.1484375, "learning_rate": 0.0006688124502287273, "loss": 0.2692, "step": 63616 }, { "epoch": 0.11279997233991058, "grad_norm": 0.62109375, "learning_rate": 0.000668760207975052, "loss": 0.1698, "step": 63618 }, { "epoch": 0.1128035185052204, "grad_norm": 0.96484375, "learning_rate": 0.0006687079676073691, "loss": 0.2442, "step": 63620 }, { "epoch": 0.11280706467053021, "grad_norm": 0.79296875, "learning_rate": 0.0006686557291259066, "loss": 0.31, "step": 63622 }, { "epoch": 0.11281061083584003, "grad_norm": 0.412109375, "learning_rate": 0.0006686034925308939, "loss": 0.182, "step": 63624 }, { "epoch": 0.11281415700114984, "grad_norm": 0.326171875, "learning_rate": 0.0006685512578225585, "loss": 0.1683, "step": 63626 }, { "epoch": 0.11281770316645966, "grad_norm": 1.203125, "learning_rate": 0.0006684990250011294, "loss": 0.2173, "step": 63628 }, { "epoch": 0.11282124933176947, "grad_norm": 0.50390625, "learning_rate": 0.0006684467940668348, "loss": 0.1542, "step": 63630 }, { "epoch": 0.11282479549707929, "grad_norm": 0.294921875, "learning_rate": 0.0006683945650199039, "loss": 0.2095, "step": 63632 }, { "epoch": 0.1128283416623891, "grad_norm": 0.5390625, "learning_rate": 0.0006683423378605638, "loss": 0.1613, "step": 63634 }, { "epoch": 0.11283188782769892, "grad_norm": 0.59375, "learning_rate": 0.0006682901125890439, "loss": 0.212, "step": 63636 }, { "epoch": 0.11283543399300873, "grad_norm": 0.55859375, "learning_rate": 0.0006682378892055721, "loss": 0.2603, "step": 63638 }, { "epoch": 0.11283898015831854, "grad_norm": 0.36328125, "learning_rate": 0.0006681856677103773, "loss": 0.1506, "step": 63640 }, { "epoch": 0.11284252632362836, "grad_norm": 0.283203125, "learning_rate": 0.0006681334481036873, "loss": 0.1471, "step": 63642 }, { "epoch": 0.11284607248893819, "grad_norm": 0.384765625, "learning_rate": 0.0006680812303857304, "loss": 0.1805, "step": 63644 }, { "epoch": 0.112849618654248, "grad_norm": 0.322265625, "learning_rate": 0.0006680290145567358, "loss": 0.1656, "step": 63646 }, { "epoch": 0.11285316481955782, "grad_norm": 0.20703125, "learning_rate": 0.0006679768006169312, "loss": 0.1542, "step": 63648 }, { "epoch": 0.11285671098486763, "grad_norm": 0.8046875, "learning_rate": 0.0006679245885665454, "loss": 0.187, "step": 63650 }, { "epoch": 0.11286025715017745, "grad_norm": 0.33203125, "learning_rate": 0.0006678723784058058, "loss": 0.1583, "step": 63652 }, { "epoch": 0.11286380331548726, "grad_norm": 0.298828125, "learning_rate": 0.000667820170134942, "loss": 0.3209, "step": 63654 }, { "epoch": 0.11286734948079707, "grad_norm": 0.484375, "learning_rate": 0.0006677679637541816, "loss": 0.2332, "step": 63656 }, { "epoch": 0.11287089564610689, "grad_norm": 0.15625, "learning_rate": 0.0006677157592637531, "loss": 0.1975, "step": 63658 }, { "epoch": 0.1128744418114167, "grad_norm": 0.4609375, "learning_rate": 0.0006676635566638849, "loss": 0.1909, "step": 63660 }, { "epoch": 0.11287798797672652, "grad_norm": 0.6796875, "learning_rate": 0.0006676113559548045, "loss": 0.191, "step": 63662 }, { "epoch": 0.11288153414203633, "grad_norm": 0.236328125, "learning_rate": 0.0006675591571367417, "loss": 0.2058, "step": 63664 }, { "epoch": 0.11288508030734615, "grad_norm": 0.2333984375, "learning_rate": 0.0006675069602099235, "loss": 0.1935, "step": 63666 }, { "epoch": 0.11288862647265596, "grad_norm": 0.375, "learning_rate": 0.0006674547651745788, "loss": 0.2055, "step": 63668 }, { "epoch": 0.11289217263796578, "grad_norm": 0.30859375, "learning_rate": 0.0006674025720309353, "loss": 0.1919, "step": 63670 }, { "epoch": 0.11289571880327559, "grad_norm": 0.197265625, "learning_rate": 0.0006673503807792225, "loss": 0.1555, "step": 63672 }, { "epoch": 0.1128992649685854, "grad_norm": 0.349609375, "learning_rate": 0.000667298191419667, "loss": 0.2005, "step": 63674 }, { "epoch": 0.11290281113389522, "grad_norm": 0.50390625, "learning_rate": 0.0006672460039524982, "loss": 0.1343, "step": 63676 }, { "epoch": 0.11290635729920503, "grad_norm": 0.45703125, "learning_rate": 0.000667193818377944, "loss": 0.1699, "step": 63678 }, { "epoch": 0.11290990346451485, "grad_norm": 0.84375, "learning_rate": 0.0006671416346962327, "loss": 0.1992, "step": 63680 }, { "epoch": 0.11291344962982466, "grad_norm": 0.220703125, "learning_rate": 0.0006670894529075924, "loss": 0.1787, "step": 63682 }, { "epoch": 0.11291699579513448, "grad_norm": 0.408203125, "learning_rate": 0.0006670372730122513, "loss": 0.1423, "step": 63684 }, { "epoch": 0.11292054196044429, "grad_norm": 0.73046875, "learning_rate": 0.0006669850950104377, "loss": 0.2299, "step": 63686 }, { "epoch": 0.11292408812575411, "grad_norm": 0.1611328125, "learning_rate": 0.0006669329189023796, "loss": 0.1517, "step": 63688 }, { "epoch": 0.11292763429106394, "grad_norm": 1.6875, "learning_rate": 0.0006668807446883059, "loss": 0.2521, "step": 63690 }, { "epoch": 0.11293118045637375, "grad_norm": 0.515625, "learning_rate": 0.0006668285723684436, "loss": 0.1341, "step": 63692 }, { "epoch": 0.11293472662168356, "grad_norm": 1.0703125, "learning_rate": 0.0006667764019430219, "loss": 0.1814, "step": 63694 }, { "epoch": 0.11293827278699338, "grad_norm": 0.55078125, "learning_rate": 0.0006667242334122685, "loss": 0.3402, "step": 63696 }, { "epoch": 0.1129418189523032, "grad_norm": 0.490234375, "learning_rate": 0.0006666720667764116, "loss": 0.1874, "step": 63698 }, { "epoch": 0.11294536511761301, "grad_norm": 0.64453125, "learning_rate": 0.0006666199020356796, "loss": 0.2578, "step": 63700 }, { "epoch": 0.11294891128292282, "grad_norm": 2.21875, "learning_rate": 0.0006665677391902998, "loss": 0.239, "step": 63702 }, { "epoch": 0.11295245744823264, "grad_norm": 0.29296875, "learning_rate": 0.0006665155782405015, "loss": 0.1587, "step": 63704 }, { "epoch": 0.11295600361354245, "grad_norm": 1.265625, "learning_rate": 0.0006664634191865121, "loss": 0.1781, "step": 63706 }, { "epoch": 0.11295954977885227, "grad_norm": 1.765625, "learning_rate": 0.0006664112620285599, "loss": 0.2161, "step": 63708 }, { "epoch": 0.11296309594416208, "grad_norm": 0.77734375, "learning_rate": 0.0006663591067668726, "loss": 0.162, "step": 63710 }, { "epoch": 0.1129666421094719, "grad_norm": 0.337890625, "learning_rate": 0.0006663069534016791, "loss": 0.1723, "step": 63712 }, { "epoch": 0.11297018827478171, "grad_norm": 1.1875, "learning_rate": 0.000666254801933207, "loss": 0.1794, "step": 63714 }, { "epoch": 0.11297373444009152, "grad_norm": 0.5078125, "learning_rate": 0.0006662026523616846, "loss": 0.1962, "step": 63716 }, { "epoch": 0.11297728060540134, "grad_norm": 1.59375, "learning_rate": 0.0006661505046873398, "loss": 0.2449, "step": 63718 }, { "epoch": 0.11298082677071115, "grad_norm": 0.6171875, "learning_rate": 0.0006660983589104002, "loss": 0.16, "step": 63720 }, { "epoch": 0.11298437293602097, "grad_norm": 0.26171875, "learning_rate": 0.0006660462150310951, "loss": 0.2038, "step": 63722 }, { "epoch": 0.11298791910133078, "grad_norm": 0.515625, "learning_rate": 0.000665994073049651, "loss": 0.1602, "step": 63724 }, { "epoch": 0.1129914652666406, "grad_norm": 0.29296875, "learning_rate": 0.0006659419329662973, "loss": 0.189, "step": 63726 }, { "epoch": 0.11299501143195041, "grad_norm": 0.66015625, "learning_rate": 0.000665889794781261, "loss": 0.1873, "step": 63728 }, { "epoch": 0.11299855759726023, "grad_norm": 0.80859375, "learning_rate": 0.0006658376584947712, "loss": 0.2041, "step": 63730 }, { "epoch": 0.11300210376257004, "grad_norm": 0.40625, "learning_rate": 0.0006657855241070545, "loss": 0.1098, "step": 63732 }, { "epoch": 0.11300564992787987, "grad_norm": 0.421875, "learning_rate": 0.0006657333916183402, "loss": 0.2048, "step": 63734 }, { "epoch": 0.11300919609318968, "grad_norm": 0.419921875, "learning_rate": 0.0006656812610288559, "loss": 0.1757, "step": 63736 }, { "epoch": 0.1130127422584995, "grad_norm": 0.83203125, "learning_rate": 0.0006656291323388293, "loss": 0.1777, "step": 63738 }, { "epoch": 0.11301628842380931, "grad_norm": 0.80078125, "learning_rate": 0.0006655770055484886, "loss": 0.1709, "step": 63740 }, { "epoch": 0.11301983458911913, "grad_norm": 0.4453125, "learning_rate": 0.0006655248806580613, "loss": 0.1891, "step": 63742 }, { "epoch": 0.11302338075442894, "grad_norm": 0.314453125, "learning_rate": 0.0006654727576677764, "loss": 0.2396, "step": 63744 }, { "epoch": 0.11302692691973876, "grad_norm": 0.88671875, "learning_rate": 0.000665420636577861, "loss": 0.1691, "step": 63746 }, { "epoch": 0.11303047308504857, "grad_norm": 0.34765625, "learning_rate": 0.0006653685173885434, "loss": 0.1442, "step": 63748 }, { "epoch": 0.11303401925035839, "grad_norm": 26.75, "learning_rate": 0.0006653164001000511, "loss": 0.2073, "step": 63750 }, { "epoch": 0.1130375654156682, "grad_norm": 1.109375, "learning_rate": 0.0006652642847126127, "loss": 0.1082, "step": 63752 }, { "epoch": 0.11304111158097802, "grad_norm": 1.125, "learning_rate": 0.0006652121712264559, "loss": 0.2028, "step": 63754 }, { "epoch": 0.11304465774628783, "grad_norm": 0.734375, "learning_rate": 0.0006651600596418083, "loss": 0.1478, "step": 63756 }, { "epoch": 0.11304820391159764, "grad_norm": 0.6328125, "learning_rate": 0.0006651079499588981, "loss": 0.1915, "step": 63758 }, { "epoch": 0.11305175007690746, "grad_norm": 1.90625, "learning_rate": 0.0006650558421779529, "loss": 0.2441, "step": 63760 }, { "epoch": 0.11305529624221727, "grad_norm": 0.333984375, "learning_rate": 0.0006650037362992014, "loss": 0.1889, "step": 63762 }, { "epoch": 0.11305884240752709, "grad_norm": 0.203125, "learning_rate": 0.0006649516323228702, "loss": 0.1721, "step": 63764 }, { "epoch": 0.1130623885728369, "grad_norm": 0.6796875, "learning_rate": 0.000664899530249188, "loss": 0.2031, "step": 63766 }, { "epoch": 0.11306593473814672, "grad_norm": 0.8046875, "learning_rate": 0.0006648474300783824, "loss": 0.1592, "step": 63768 }, { "epoch": 0.11306948090345653, "grad_norm": 0.2890625, "learning_rate": 0.0006647953318106816, "loss": 0.177, "step": 63770 }, { "epoch": 0.11307302706876635, "grad_norm": 0.6875, "learning_rate": 0.0006647432354463134, "loss": 0.3237, "step": 63772 }, { "epoch": 0.11307657323407616, "grad_norm": 0.78125, "learning_rate": 0.0006646911409855053, "loss": 0.2236, "step": 63774 }, { "epoch": 0.11308011939938598, "grad_norm": 1.4140625, "learning_rate": 0.0006646390484284855, "loss": 0.2392, "step": 63776 }, { "epoch": 0.11308366556469579, "grad_norm": 0.17578125, "learning_rate": 0.0006645869577754811, "loss": 0.1929, "step": 63778 }, { "epoch": 0.11308721173000562, "grad_norm": 1.46875, "learning_rate": 0.000664534869026721, "loss": 0.1525, "step": 63780 }, { "epoch": 0.11309075789531543, "grad_norm": 0.5078125, "learning_rate": 0.000664482782182432, "loss": 0.168, "step": 63782 }, { "epoch": 0.11309430406062525, "grad_norm": 0.470703125, "learning_rate": 0.0006644306972428425, "loss": 0.2099, "step": 63784 }, { "epoch": 0.11309785022593506, "grad_norm": 0.361328125, "learning_rate": 0.0006643786142081802, "loss": 0.1446, "step": 63786 }, { "epoch": 0.11310139639124488, "grad_norm": 0.23828125, "learning_rate": 0.0006643265330786727, "loss": 0.184, "step": 63788 }, { "epoch": 0.11310494255655469, "grad_norm": 0.462890625, "learning_rate": 0.000664274453854548, "loss": 0.1394, "step": 63790 }, { "epoch": 0.1131084887218645, "grad_norm": 0.458984375, "learning_rate": 0.0006642223765360333, "loss": 0.3675, "step": 63792 }, { "epoch": 0.11311203488717432, "grad_norm": 0.326171875, "learning_rate": 0.0006641703011233572, "loss": 0.1746, "step": 63794 }, { "epoch": 0.11311558105248413, "grad_norm": 0.515625, "learning_rate": 0.000664118227616747, "loss": 0.1437, "step": 63796 }, { "epoch": 0.11311912721779395, "grad_norm": 1.7578125, "learning_rate": 0.0006640661560164305, "loss": 0.1818, "step": 63798 }, { "epoch": 0.11312267338310376, "grad_norm": 0.29296875, "learning_rate": 0.0006640140863226351, "loss": 0.2547, "step": 63800 }, { "epoch": 0.11312621954841358, "grad_norm": 0.294921875, "learning_rate": 0.0006639620185355891, "loss": 0.1507, "step": 63802 }, { "epoch": 0.1131297657137234, "grad_norm": 0.7890625, "learning_rate": 0.0006639099526555201, "loss": 0.1721, "step": 63804 }, { "epoch": 0.11313331187903321, "grad_norm": 0.484375, "learning_rate": 0.0006638578886826556, "loss": 0.1405, "step": 63806 }, { "epoch": 0.11313685804434302, "grad_norm": 0.375, "learning_rate": 0.0006638058266172232, "loss": 0.1627, "step": 63808 }, { "epoch": 0.11314040420965284, "grad_norm": 0.39453125, "learning_rate": 0.0006637537664594505, "loss": 0.1942, "step": 63810 }, { "epoch": 0.11314395037496265, "grad_norm": 1.7109375, "learning_rate": 0.0006637017082095662, "loss": 0.1444, "step": 63812 }, { "epoch": 0.11314749654027247, "grad_norm": 0.2412109375, "learning_rate": 0.0006636496518677965, "loss": 0.2555, "step": 63814 }, { "epoch": 0.11315104270558228, "grad_norm": 1.1484375, "learning_rate": 0.00066359759743437, "loss": 0.1878, "step": 63816 }, { "epoch": 0.1131545888708921, "grad_norm": 0.48828125, "learning_rate": 0.0006635455449095138, "loss": 0.1525, "step": 63818 }, { "epoch": 0.11315813503620191, "grad_norm": 1.6171875, "learning_rate": 0.0006634934942934566, "loss": 0.3372, "step": 63820 }, { "epoch": 0.11316168120151172, "grad_norm": 0.265625, "learning_rate": 0.0006634414455864245, "loss": 0.1955, "step": 63822 }, { "epoch": 0.11316522736682154, "grad_norm": 0.52734375, "learning_rate": 0.0006633893987886463, "loss": 0.1497, "step": 63824 }, { "epoch": 0.11316877353213137, "grad_norm": 0.306640625, "learning_rate": 0.0006633373539003493, "loss": 0.1903, "step": 63826 }, { "epoch": 0.11317231969744118, "grad_norm": 0.671875, "learning_rate": 0.0006632853109217611, "loss": 0.1846, "step": 63828 }, { "epoch": 0.113175865862751, "grad_norm": 0.2314453125, "learning_rate": 0.0006632332698531094, "loss": 0.2278, "step": 63830 }, { "epoch": 0.11317941202806081, "grad_norm": 0.1943359375, "learning_rate": 0.000663181230694621, "loss": 0.1561, "step": 63832 }, { "epoch": 0.11318295819337063, "grad_norm": 0.408203125, "learning_rate": 0.0006631291934465249, "loss": 0.1726, "step": 63834 }, { "epoch": 0.11318650435868044, "grad_norm": 0.396484375, "learning_rate": 0.0006630771581090475, "loss": 0.2344, "step": 63836 }, { "epoch": 0.11319005052399025, "grad_norm": 0.90234375, "learning_rate": 0.0006630251246824173, "loss": 0.2266, "step": 63838 }, { "epoch": 0.11319359668930007, "grad_norm": 0.373046875, "learning_rate": 0.0006629730931668607, "loss": 0.1537, "step": 63840 }, { "epoch": 0.11319714285460988, "grad_norm": 0.234375, "learning_rate": 0.0006629210635626064, "loss": 0.1804, "step": 63842 }, { "epoch": 0.1132006890199197, "grad_norm": 0.365234375, "learning_rate": 0.0006628690358698815, "loss": 0.1684, "step": 63844 }, { "epoch": 0.11320423518522951, "grad_norm": 0.32421875, "learning_rate": 0.0006628170100889133, "loss": 0.1805, "step": 63846 }, { "epoch": 0.11320778135053933, "grad_norm": 0.41015625, "learning_rate": 0.0006627649862199299, "loss": 0.1478, "step": 63848 }, { "epoch": 0.11321132751584914, "grad_norm": 0.212890625, "learning_rate": 0.000662712964263158, "loss": 0.1618, "step": 63850 }, { "epoch": 0.11321487368115896, "grad_norm": 1.2109375, "learning_rate": 0.0006626609442188259, "loss": 0.1153, "step": 63852 }, { "epoch": 0.11321841984646877, "grad_norm": 0.38671875, "learning_rate": 0.0006626089260871606, "loss": 0.13, "step": 63854 }, { "epoch": 0.11322196601177859, "grad_norm": 0.82421875, "learning_rate": 0.00066255690986839, "loss": 0.1736, "step": 63856 }, { "epoch": 0.1132255121770884, "grad_norm": 1.4140625, "learning_rate": 0.000662504895562741, "loss": 0.2879, "step": 63858 }, { "epoch": 0.11322905834239821, "grad_norm": 0.21875, "learning_rate": 0.0006624528831704418, "loss": 0.2239, "step": 63860 }, { "epoch": 0.11323260450770803, "grad_norm": 0.65234375, "learning_rate": 0.0006624008726917196, "loss": 0.1592, "step": 63862 }, { "epoch": 0.11323615067301784, "grad_norm": 0.345703125, "learning_rate": 0.0006623488641268017, "loss": 0.2928, "step": 63864 }, { "epoch": 0.11323969683832766, "grad_norm": 0.466796875, "learning_rate": 0.0006622968574759157, "loss": 0.1852, "step": 63866 }, { "epoch": 0.11324324300363747, "grad_norm": 0.4609375, "learning_rate": 0.0006622448527392886, "loss": 0.1753, "step": 63868 }, { "epoch": 0.1132467891689473, "grad_norm": 1.953125, "learning_rate": 0.0006621928499171491, "loss": 0.187, "step": 63870 }, { "epoch": 0.11325033533425712, "grad_norm": 0.1669921875, "learning_rate": 0.0006621408490097227, "loss": 0.2105, "step": 63872 }, { "epoch": 0.11325388149956693, "grad_norm": 0.5, "learning_rate": 0.0006620888500172386, "loss": 0.1768, "step": 63874 }, { "epoch": 0.11325742766487674, "grad_norm": 0.98828125, "learning_rate": 0.000662036852939923, "loss": 0.2523, "step": 63876 }, { "epoch": 0.11326097383018656, "grad_norm": 0.384765625, "learning_rate": 0.0006619848577780046, "loss": 0.1739, "step": 63878 }, { "epoch": 0.11326451999549637, "grad_norm": 0.546875, "learning_rate": 0.0006619328645317091, "loss": 0.2181, "step": 63880 }, { "epoch": 0.11326806616080619, "grad_norm": 0.1953125, "learning_rate": 0.0006618808732012653, "loss": 0.2595, "step": 63882 }, { "epoch": 0.113271612326116, "grad_norm": 0.2333984375, "learning_rate": 0.0006618288837868999, "loss": 0.4418, "step": 63884 }, { "epoch": 0.11327515849142582, "grad_norm": 0.330078125, "learning_rate": 0.0006617768962888409, "loss": 0.1791, "step": 63886 }, { "epoch": 0.11327870465673563, "grad_norm": 0.62890625, "learning_rate": 0.0006617249107073148, "loss": 0.2371, "step": 63888 }, { "epoch": 0.11328225082204545, "grad_norm": 5.5625, "learning_rate": 0.0006616729270425491, "loss": 0.2865, "step": 63890 }, { "epoch": 0.11328579698735526, "grad_norm": 0.26171875, "learning_rate": 0.0006616209452947716, "loss": 0.1499, "step": 63892 }, { "epoch": 0.11328934315266508, "grad_norm": 0.58984375, "learning_rate": 0.0006615689654642096, "loss": 0.1609, "step": 63894 }, { "epoch": 0.11329288931797489, "grad_norm": 0.36328125, "learning_rate": 0.0006615169875510905, "loss": 0.169, "step": 63896 }, { "epoch": 0.1132964354832847, "grad_norm": 0.859375, "learning_rate": 0.0006614650115556406, "loss": 0.1742, "step": 63898 }, { "epoch": 0.11329998164859452, "grad_norm": 0.2353515625, "learning_rate": 0.0006614130374780887, "loss": 0.1606, "step": 63900 }, { "epoch": 0.11330352781390433, "grad_norm": 0.4921875, "learning_rate": 0.0006613610653186612, "loss": 0.1769, "step": 63902 }, { "epoch": 0.11330707397921415, "grad_norm": 0.2578125, "learning_rate": 0.0006613090950775858, "loss": 0.1945, "step": 63904 }, { "epoch": 0.11331062014452396, "grad_norm": 0.96875, "learning_rate": 0.0006612571267550896, "loss": 0.2203, "step": 63906 }, { "epoch": 0.11331416630983378, "grad_norm": 0.1826171875, "learning_rate": 0.0006612051603513994, "loss": 0.1282, "step": 63908 }, { "epoch": 0.11331771247514359, "grad_norm": 0.796875, "learning_rate": 0.0006611531958667437, "loss": 0.1727, "step": 63910 }, { "epoch": 0.1133212586404534, "grad_norm": 0.2060546875, "learning_rate": 0.0006611012333013482, "loss": 0.1587, "step": 63912 }, { "epoch": 0.11332480480576322, "grad_norm": 0.640625, "learning_rate": 0.0006610492726554413, "loss": 0.4733, "step": 63914 }, { "epoch": 0.11332835097107305, "grad_norm": 0.421875, "learning_rate": 0.0006609973139292498, "loss": 0.1896, "step": 63916 }, { "epoch": 0.11333189713638286, "grad_norm": 0.6640625, "learning_rate": 0.0006609453571230015, "loss": 0.1483, "step": 63918 }, { "epoch": 0.11333544330169268, "grad_norm": 1.484375, "learning_rate": 0.0006608934022369226, "loss": 0.1477, "step": 63920 }, { "epoch": 0.1133389894670025, "grad_norm": 0.373046875, "learning_rate": 0.0006608414492712411, "loss": 0.1882, "step": 63922 }, { "epoch": 0.11334253563231231, "grad_norm": 0.30859375, "learning_rate": 0.0006607894982261842, "loss": 0.1506, "step": 63924 }, { "epoch": 0.11334608179762212, "grad_norm": 2.125, "learning_rate": 0.0006607375491019788, "loss": 0.2345, "step": 63926 }, { "epoch": 0.11334962796293194, "grad_norm": 0.2216796875, "learning_rate": 0.0006606856018988523, "loss": 0.1772, "step": 63928 }, { "epoch": 0.11335317412824175, "grad_norm": 0.28515625, "learning_rate": 0.0006606336566170312, "loss": 0.1467, "step": 63930 }, { "epoch": 0.11335672029355157, "grad_norm": 0.244140625, "learning_rate": 0.0006605817132567439, "loss": 0.16, "step": 63932 }, { "epoch": 0.11336026645886138, "grad_norm": 0.380859375, "learning_rate": 0.0006605297718182168, "loss": 0.1741, "step": 63934 }, { "epoch": 0.1133638126241712, "grad_norm": 0.69921875, "learning_rate": 0.0006604778323016774, "loss": 0.1689, "step": 63936 }, { "epoch": 0.11336735878948101, "grad_norm": 0.408203125, "learning_rate": 0.000660425894707352, "loss": 0.1849, "step": 63938 }, { "epoch": 0.11337090495479082, "grad_norm": 0.2265625, "learning_rate": 0.0006603739590354689, "loss": 0.2129, "step": 63940 }, { "epoch": 0.11337445112010064, "grad_norm": 0.4921875, "learning_rate": 0.0006603220252862547, "loss": 0.1974, "step": 63942 }, { "epoch": 0.11337799728541045, "grad_norm": 0.2734375, "learning_rate": 0.0006602700934599369, "loss": 0.1795, "step": 63944 }, { "epoch": 0.11338154345072027, "grad_norm": 1.3359375, "learning_rate": 0.0006602181635567418, "loss": 0.255, "step": 63946 }, { "epoch": 0.11338508961603008, "grad_norm": 0.396484375, "learning_rate": 0.000660166235576897, "loss": 0.2048, "step": 63948 }, { "epoch": 0.1133886357813399, "grad_norm": 1.734375, "learning_rate": 0.0006601143095206297, "loss": 0.2067, "step": 63950 }, { "epoch": 0.11339218194664971, "grad_norm": 1.515625, "learning_rate": 0.000660062385388167, "loss": 0.2074, "step": 63952 }, { "epoch": 0.11339572811195953, "grad_norm": 0.248046875, "learning_rate": 0.0006600104631797359, "loss": 0.1455, "step": 63954 }, { "epoch": 0.11339927427726934, "grad_norm": 0.54296875, "learning_rate": 0.0006599585428955633, "loss": 0.2277, "step": 63956 }, { "epoch": 0.11340282044257916, "grad_norm": 0.58984375, "learning_rate": 0.0006599066245358763, "loss": 0.2174, "step": 63958 }, { "epoch": 0.11340636660788897, "grad_norm": 0.265625, "learning_rate": 0.0006598547081009023, "loss": 0.1364, "step": 63960 }, { "epoch": 0.1134099127731988, "grad_norm": 0.70703125, "learning_rate": 0.0006598027935908682, "loss": 0.1367, "step": 63962 }, { "epoch": 0.11341345893850861, "grad_norm": 0.6875, "learning_rate": 0.0006597508810060009, "loss": 0.1348, "step": 63964 }, { "epoch": 0.11341700510381843, "grad_norm": 1.140625, "learning_rate": 0.0006596989703465272, "loss": 0.1789, "step": 63966 }, { "epoch": 0.11342055126912824, "grad_norm": 0.9296875, "learning_rate": 0.0006596470616126753, "loss": 0.2106, "step": 63968 }, { "epoch": 0.11342409743443806, "grad_norm": 0.48046875, "learning_rate": 0.0006595951548046704, "loss": 0.1983, "step": 63970 }, { "epoch": 0.11342764359974787, "grad_norm": 0.7109375, "learning_rate": 0.000659543249922741, "loss": 0.1678, "step": 63972 }, { "epoch": 0.11343118976505769, "grad_norm": 1.046875, "learning_rate": 0.0006594913469671136, "loss": 0.1874, "step": 63974 }, { "epoch": 0.1134347359303675, "grad_norm": 0.66015625, "learning_rate": 0.0006594394459380151, "loss": 0.1983, "step": 63976 }, { "epoch": 0.11343828209567731, "grad_norm": 0.130859375, "learning_rate": 0.0006593875468356724, "loss": 0.1536, "step": 63978 }, { "epoch": 0.11344182826098713, "grad_norm": 0.212890625, "learning_rate": 0.0006593356496603126, "loss": 0.1622, "step": 63980 }, { "epoch": 0.11344537442629694, "grad_norm": 1.0625, "learning_rate": 0.0006592837544121629, "loss": 0.2311, "step": 63982 }, { "epoch": 0.11344892059160676, "grad_norm": 0.46484375, "learning_rate": 0.00065923186109145, "loss": 0.2403, "step": 63984 }, { "epoch": 0.11345246675691657, "grad_norm": 3.046875, "learning_rate": 0.000659179969698401, "loss": 0.222, "step": 63986 }, { "epoch": 0.11345601292222639, "grad_norm": 0.72265625, "learning_rate": 0.0006591280802332424, "loss": 0.2022, "step": 63988 }, { "epoch": 0.1134595590875362, "grad_norm": 0.51171875, "learning_rate": 0.0006590761926962019, "loss": 0.2038, "step": 63990 }, { "epoch": 0.11346310525284602, "grad_norm": 0.396484375, "learning_rate": 0.0006590243070875058, "loss": 0.2245, "step": 63992 }, { "epoch": 0.11346665141815583, "grad_norm": 0.33203125, "learning_rate": 0.0006589724234073814, "loss": 0.1993, "step": 63994 }, { "epoch": 0.11347019758346565, "grad_norm": 0.63671875, "learning_rate": 0.0006589205416560554, "loss": 0.3858, "step": 63996 }, { "epoch": 0.11347374374877546, "grad_norm": 0.6640625, "learning_rate": 0.0006588686618337544, "loss": 0.1799, "step": 63998 }, { "epoch": 0.11347728991408527, "grad_norm": 7.6875, "learning_rate": 0.0006588167839407064, "loss": 0.2839, "step": 64000 }, { "epoch": 0.11348083607939509, "grad_norm": 0.90625, "learning_rate": 0.0006587649079771368, "loss": 0.2308, "step": 64002 }, { "epoch": 0.1134843822447049, "grad_norm": 0.3671875, "learning_rate": 0.0006587130339432734, "loss": 0.307, "step": 64004 }, { "epoch": 0.11348792841001473, "grad_norm": 0.44921875, "learning_rate": 0.0006586611618393425, "loss": 0.1523, "step": 64006 }, { "epoch": 0.11349147457532455, "grad_norm": 0.30859375, "learning_rate": 0.0006586092916655721, "loss": 0.1817, "step": 64008 }, { "epoch": 0.11349502074063436, "grad_norm": 2.109375, "learning_rate": 0.0006585574234221877, "loss": 0.2758, "step": 64010 }, { "epoch": 0.11349856690594418, "grad_norm": 0.84375, "learning_rate": 0.000658505557109417, "loss": 0.2038, "step": 64012 }, { "epoch": 0.11350211307125399, "grad_norm": 0.7734375, "learning_rate": 0.0006584536927274864, "loss": 0.1707, "step": 64014 }, { "epoch": 0.1135056592365638, "grad_norm": 0.73046875, "learning_rate": 0.000658401830276623, "loss": 0.1535, "step": 64016 }, { "epoch": 0.11350920540187362, "grad_norm": 0.20703125, "learning_rate": 0.0006583499697570534, "loss": 0.1929, "step": 64018 }, { "epoch": 0.11351275156718343, "grad_norm": 1.28125, "learning_rate": 0.0006582981111690043, "loss": 0.1564, "step": 64020 }, { "epoch": 0.11351629773249325, "grad_norm": 0.353515625, "learning_rate": 0.0006582462545127028, "loss": 0.1414, "step": 64022 }, { "epoch": 0.11351984389780306, "grad_norm": 0.302734375, "learning_rate": 0.0006581943997883753, "loss": 0.1839, "step": 64024 }, { "epoch": 0.11352339006311288, "grad_norm": 0.330078125, "learning_rate": 0.0006581425469962496, "loss": 0.1741, "step": 64026 }, { "epoch": 0.11352693622842269, "grad_norm": 0.2275390625, "learning_rate": 0.0006580906961365511, "loss": 0.1912, "step": 64028 }, { "epoch": 0.11353048239373251, "grad_norm": 0.42578125, "learning_rate": 0.0006580388472095074, "loss": 0.1624, "step": 64030 }, { "epoch": 0.11353402855904232, "grad_norm": 0.353515625, "learning_rate": 0.0006579870002153454, "loss": 0.1393, "step": 64032 }, { "epoch": 0.11353757472435214, "grad_norm": 0.447265625, "learning_rate": 0.0006579351551542911, "loss": 0.1723, "step": 64034 }, { "epoch": 0.11354112088966195, "grad_norm": 0.60546875, "learning_rate": 0.000657883312026572, "loss": 0.1647, "step": 64036 }, { "epoch": 0.11354466705497177, "grad_norm": 1.7578125, "learning_rate": 0.0006578314708324137, "loss": 0.2493, "step": 64038 }, { "epoch": 0.11354821322028158, "grad_norm": 1.421875, "learning_rate": 0.0006577796315720446, "loss": 0.1869, "step": 64040 }, { "epoch": 0.1135517593855914, "grad_norm": 0.462890625, "learning_rate": 0.00065772779424569, "loss": 0.1442, "step": 64042 }, { "epoch": 0.11355530555090121, "grad_norm": 0.1298828125, "learning_rate": 0.0006576759588535774, "loss": 0.1468, "step": 64044 }, { "epoch": 0.11355885171621102, "grad_norm": 0.279296875, "learning_rate": 0.0006576241253959329, "loss": 0.1276, "step": 64046 }, { "epoch": 0.11356239788152084, "grad_norm": 0.302734375, "learning_rate": 0.0006575722938729838, "loss": 0.1854, "step": 64048 }, { "epoch": 0.11356594404683065, "grad_norm": 0.66796875, "learning_rate": 0.0006575204642849564, "loss": 0.183, "step": 64050 }, { "epoch": 0.11356949021214048, "grad_norm": 0.2255859375, "learning_rate": 0.0006574686366320777, "loss": 0.17, "step": 64052 }, { "epoch": 0.1135730363774503, "grad_norm": 0.380859375, "learning_rate": 0.000657416810914574, "loss": 0.1703, "step": 64054 }, { "epoch": 0.11357658254276011, "grad_norm": 1.4453125, "learning_rate": 0.0006573649871326716, "loss": 0.2413, "step": 64056 }, { "epoch": 0.11358012870806992, "grad_norm": 0.8046875, "learning_rate": 0.0006573131652865985, "loss": 0.1882, "step": 64058 }, { "epoch": 0.11358367487337974, "grad_norm": 1.8359375, "learning_rate": 0.0006572613453765798, "loss": 0.2331, "step": 64060 }, { "epoch": 0.11358722103868955, "grad_norm": 0.2197265625, "learning_rate": 0.000657209527402843, "loss": 0.1158, "step": 64062 }, { "epoch": 0.11359076720399937, "grad_norm": 0.62890625, "learning_rate": 0.0006571577113656142, "loss": 0.2152, "step": 64064 }, { "epoch": 0.11359431336930918, "grad_norm": 0.5546875, "learning_rate": 0.0006571058972651212, "loss": 0.2081, "step": 64066 }, { "epoch": 0.113597859534619, "grad_norm": 0.28515625, "learning_rate": 0.0006570540851015889, "loss": 0.1834, "step": 64068 }, { "epoch": 0.11360140569992881, "grad_norm": 0.291015625, "learning_rate": 0.0006570022748752451, "loss": 0.1971, "step": 64070 }, { "epoch": 0.11360495186523863, "grad_norm": 0.27734375, "learning_rate": 0.0006569504665863159, "loss": 0.2211, "step": 64072 }, { "epoch": 0.11360849803054844, "grad_norm": 0.474609375, "learning_rate": 0.0006568986602350283, "loss": 0.1442, "step": 64074 }, { "epoch": 0.11361204419585826, "grad_norm": 0.392578125, "learning_rate": 0.0006568468558216085, "loss": 0.1857, "step": 64076 }, { "epoch": 0.11361559036116807, "grad_norm": 0.76953125, "learning_rate": 0.0006567950533462825, "loss": 0.1487, "step": 64078 }, { "epoch": 0.11361913652647788, "grad_norm": 0.51171875, "learning_rate": 0.0006567432528092782, "loss": 0.2993, "step": 64080 }, { "epoch": 0.1136226826917877, "grad_norm": 0.287109375, "learning_rate": 0.0006566914542108213, "loss": 0.2171, "step": 64082 }, { "epoch": 0.11362622885709751, "grad_norm": 0.5625, "learning_rate": 0.0006566396575511385, "loss": 0.1408, "step": 64084 }, { "epoch": 0.11362977502240733, "grad_norm": 0.5546875, "learning_rate": 0.0006565878628304559, "loss": 0.2018, "step": 64086 }, { "epoch": 0.11363332118771714, "grad_norm": 0.27734375, "learning_rate": 0.0006565360700490007, "loss": 0.1691, "step": 64088 }, { "epoch": 0.11363686735302696, "grad_norm": 0.6484375, "learning_rate": 0.0006564842792069992, "loss": 0.2152, "step": 64090 }, { "epoch": 0.11364041351833677, "grad_norm": 0.39453125, "learning_rate": 0.0006564324903046779, "loss": 0.1744, "step": 64092 }, { "epoch": 0.11364395968364659, "grad_norm": 0.2890625, "learning_rate": 0.0006563807033422633, "loss": 0.429, "step": 64094 }, { "epoch": 0.1136475058489564, "grad_norm": 0.4453125, "learning_rate": 0.0006563289183199814, "loss": 0.2083, "step": 64096 }, { "epoch": 0.11365105201426623, "grad_norm": 0.244140625, "learning_rate": 0.0006562771352380597, "loss": 0.16, "step": 64098 }, { "epoch": 0.11365459817957604, "grad_norm": 0.44140625, "learning_rate": 0.0006562253540967234, "loss": 0.1626, "step": 64100 }, { "epoch": 0.11365814434488586, "grad_norm": 4.09375, "learning_rate": 0.0006561735748962001, "loss": 0.5206, "step": 64102 }, { "epoch": 0.11366169051019567, "grad_norm": 0.20703125, "learning_rate": 0.0006561217976367153, "loss": 0.1684, "step": 64104 }, { "epoch": 0.11366523667550549, "grad_norm": 1.46875, "learning_rate": 0.0006560700223184964, "loss": 0.2782, "step": 64106 }, { "epoch": 0.1136687828408153, "grad_norm": 0.6640625, "learning_rate": 0.0006560182489417692, "loss": 0.2284, "step": 64108 }, { "epoch": 0.11367232900612512, "grad_norm": 1.578125, "learning_rate": 0.0006559664775067603, "loss": 0.3153, "step": 64110 }, { "epoch": 0.11367587517143493, "grad_norm": 0.59765625, "learning_rate": 0.0006559147080136962, "loss": 0.1609, "step": 64112 }, { "epoch": 0.11367942133674475, "grad_norm": 0.232421875, "learning_rate": 0.0006558629404628028, "loss": 0.1698, "step": 64114 }, { "epoch": 0.11368296750205456, "grad_norm": 0.25, "learning_rate": 0.0006558111748543076, "loss": 0.202, "step": 64116 }, { "epoch": 0.11368651366736438, "grad_norm": 0.251953125, "learning_rate": 0.0006557594111884357, "loss": 0.172, "step": 64118 }, { "epoch": 0.11369005983267419, "grad_norm": 0.7109375, "learning_rate": 0.0006557076494654142, "loss": 0.1511, "step": 64120 }, { "epoch": 0.113693605997984, "grad_norm": 1.2890625, "learning_rate": 0.0006556558896854693, "loss": 0.2655, "step": 64122 }, { "epoch": 0.11369715216329382, "grad_norm": 0.65234375, "learning_rate": 0.000655604131848828, "loss": 0.2301, "step": 64124 }, { "epoch": 0.11370069832860363, "grad_norm": 7.375, "learning_rate": 0.0006555523759557153, "loss": 0.3932, "step": 64126 }, { "epoch": 0.11370424449391345, "grad_norm": 0.5078125, "learning_rate": 0.0006555006220063588, "loss": 0.1693, "step": 64128 }, { "epoch": 0.11370779065922326, "grad_norm": 0.255859375, "learning_rate": 0.0006554488700009847, "loss": 0.27, "step": 64130 }, { "epoch": 0.11371133682453308, "grad_norm": 2.53125, "learning_rate": 0.0006553971199398185, "loss": 0.2041, "step": 64132 }, { "epoch": 0.11371488298984289, "grad_norm": 0.498046875, "learning_rate": 0.0006553453718230873, "loss": 0.16, "step": 64134 }, { "epoch": 0.1137184291551527, "grad_norm": 2.875, "learning_rate": 0.0006552936256510167, "loss": 0.1991, "step": 64136 }, { "epoch": 0.11372197532046252, "grad_norm": 1.109375, "learning_rate": 0.0006552418814238339, "loss": 0.2025, "step": 64138 }, { "epoch": 0.11372552148577234, "grad_norm": 0.197265625, "learning_rate": 0.0006551901391417648, "loss": 0.1778, "step": 64140 }, { "epoch": 0.11372906765108216, "grad_norm": 0.6875, "learning_rate": 0.0006551383988050354, "loss": 0.28, "step": 64142 }, { "epoch": 0.11373261381639198, "grad_norm": 0.490234375, "learning_rate": 0.0006550866604138725, "loss": 0.1852, "step": 64144 }, { "epoch": 0.11373615998170179, "grad_norm": 0.318359375, "learning_rate": 0.0006550349239685017, "loss": 0.2354, "step": 64146 }, { "epoch": 0.11373970614701161, "grad_norm": 0.74609375, "learning_rate": 0.0006549831894691505, "loss": 0.1655, "step": 64148 }, { "epoch": 0.11374325231232142, "grad_norm": 0.423828125, "learning_rate": 0.0006549314569160434, "loss": 0.174, "step": 64150 }, { "epoch": 0.11374679847763124, "grad_norm": 0.41796875, "learning_rate": 0.000654879726309408, "loss": 0.1812, "step": 64152 }, { "epoch": 0.11375034464294105, "grad_norm": 2.828125, "learning_rate": 0.00065482799764947, "loss": 0.2552, "step": 64154 }, { "epoch": 0.11375389080825087, "grad_norm": 0.671875, "learning_rate": 0.0006547762709364562, "loss": 0.2073, "step": 64156 }, { "epoch": 0.11375743697356068, "grad_norm": 0.2353515625, "learning_rate": 0.0006547245461705917, "loss": 0.1131, "step": 64158 }, { "epoch": 0.1137609831388705, "grad_norm": 0.318359375, "learning_rate": 0.0006546728233521037, "loss": 0.2711, "step": 64160 }, { "epoch": 0.11376452930418031, "grad_norm": 0.5078125, "learning_rate": 0.000654621102481218, "loss": 0.1628, "step": 64162 }, { "epoch": 0.11376807546949012, "grad_norm": 0.78515625, "learning_rate": 0.0006545693835581612, "loss": 0.3171, "step": 64164 }, { "epoch": 0.11377162163479994, "grad_norm": 0.3671875, "learning_rate": 0.0006545176665831591, "loss": 0.238, "step": 64166 }, { "epoch": 0.11377516780010975, "grad_norm": 0.4296875, "learning_rate": 0.0006544659515564375, "loss": 0.1399, "step": 64168 }, { "epoch": 0.11377871396541957, "grad_norm": 0.373046875, "learning_rate": 0.0006544142384782234, "loss": 0.1598, "step": 64170 }, { "epoch": 0.11378226013072938, "grad_norm": 0.4375, "learning_rate": 0.0006543625273487426, "loss": 0.1889, "step": 64172 }, { "epoch": 0.1137858062960392, "grad_norm": 0.296875, "learning_rate": 0.0006543108181682213, "loss": 0.1613, "step": 64174 }, { "epoch": 0.11378935246134901, "grad_norm": 0.482421875, "learning_rate": 0.0006542591109368851, "loss": 0.168, "step": 64176 }, { "epoch": 0.11379289862665883, "grad_norm": 2.515625, "learning_rate": 0.0006542074056549612, "loss": 0.2437, "step": 64178 }, { "epoch": 0.11379644479196864, "grad_norm": 0.234375, "learning_rate": 0.000654155702322675, "loss": 0.1341, "step": 64180 }, { "epoch": 0.11379999095727845, "grad_norm": 0.76171875, "learning_rate": 0.0006541040009402528, "loss": 0.1735, "step": 64182 }, { "epoch": 0.11380353712258827, "grad_norm": 0.59375, "learning_rate": 0.0006540523015079208, "loss": 0.1454, "step": 64184 }, { "epoch": 0.11380708328789808, "grad_norm": 2.375, "learning_rate": 0.0006540006040259046, "loss": 0.3567, "step": 64186 }, { "epoch": 0.11381062945320791, "grad_norm": 0.40625, "learning_rate": 0.0006539489084944308, "loss": 0.2164, "step": 64188 }, { "epoch": 0.11381417561851773, "grad_norm": 0.7265625, "learning_rate": 0.0006538972149137257, "loss": 0.1454, "step": 64190 }, { "epoch": 0.11381772178382754, "grad_norm": 0.251953125, "learning_rate": 0.000653845523284015, "loss": 0.2524, "step": 64192 }, { "epoch": 0.11382126794913736, "grad_norm": 0.55078125, "learning_rate": 0.0006537938336055243, "loss": 0.2004, "step": 64194 }, { "epoch": 0.11382481411444717, "grad_norm": 0.255859375, "learning_rate": 0.0006537421458784806, "loss": 0.2305, "step": 64196 }, { "epoch": 0.11382836027975698, "grad_norm": 0.57421875, "learning_rate": 0.0006536904601031096, "loss": 0.1784, "step": 64198 }, { "epoch": 0.1138319064450668, "grad_norm": 0.9765625, "learning_rate": 0.0006536387762796371, "loss": 0.1898, "step": 64200 }, { "epoch": 0.11383545261037661, "grad_norm": 0.384765625, "learning_rate": 0.0006535870944082897, "loss": 0.1909, "step": 64202 }, { "epoch": 0.11383899877568643, "grad_norm": 0.22265625, "learning_rate": 0.0006535354144892924, "loss": 0.156, "step": 64204 }, { "epoch": 0.11384254494099624, "grad_norm": 1.3984375, "learning_rate": 0.0006534837365228727, "loss": 0.2846, "step": 64206 }, { "epoch": 0.11384609110630606, "grad_norm": 3.6875, "learning_rate": 0.000653432060509255, "loss": 0.1858, "step": 64208 }, { "epoch": 0.11384963727161587, "grad_norm": 0.546875, "learning_rate": 0.0006533803864486665, "loss": 0.2379, "step": 64210 }, { "epoch": 0.11385318343692569, "grad_norm": 1.046875, "learning_rate": 0.0006533287143413322, "loss": 0.1801, "step": 64212 }, { "epoch": 0.1138567296022355, "grad_norm": 0.70703125, "learning_rate": 0.0006532770441874796, "loss": 0.2538, "step": 64214 }, { "epoch": 0.11386027576754532, "grad_norm": 1.6484375, "learning_rate": 0.0006532253759873327, "loss": 0.1843, "step": 64216 }, { "epoch": 0.11386382193285513, "grad_norm": 0.5234375, "learning_rate": 0.0006531737097411191, "loss": 0.178, "step": 64218 }, { "epoch": 0.11386736809816494, "grad_norm": 0.255859375, "learning_rate": 0.000653122045449064, "loss": 0.2779, "step": 64220 }, { "epoch": 0.11387091426347476, "grad_norm": 0.482421875, "learning_rate": 0.0006530703831113937, "loss": 0.1856, "step": 64222 }, { "epoch": 0.11387446042878457, "grad_norm": 0.361328125, "learning_rate": 0.0006530187227283338, "loss": 0.176, "step": 64224 }, { "epoch": 0.11387800659409439, "grad_norm": 0.296875, "learning_rate": 0.00065296706430011, "loss": 0.2053, "step": 64226 }, { "epoch": 0.1138815527594042, "grad_norm": 0.58203125, "learning_rate": 0.000652915407826949, "loss": 0.1585, "step": 64228 }, { "epoch": 0.11388509892471402, "grad_norm": 0.55078125, "learning_rate": 0.0006528637533090763, "loss": 0.2179, "step": 64230 }, { "epoch": 0.11388864509002383, "grad_norm": 0.408203125, "learning_rate": 0.0006528121007467178, "loss": 0.1336, "step": 64232 }, { "epoch": 0.11389219125533366, "grad_norm": 0.5625, "learning_rate": 0.0006527604501400991, "loss": 0.1504, "step": 64234 }, { "epoch": 0.11389573742064348, "grad_norm": 0.404296875, "learning_rate": 0.0006527088014894466, "loss": 0.1955, "step": 64236 }, { "epoch": 0.11389928358595329, "grad_norm": 0.51953125, "learning_rate": 0.0006526571547949862, "loss": 0.1596, "step": 64238 }, { "epoch": 0.1139028297512631, "grad_norm": 0.26953125, "learning_rate": 0.0006526055100569435, "loss": 0.1763, "step": 64240 }, { "epoch": 0.11390637591657292, "grad_norm": 0.2216796875, "learning_rate": 0.0006525538672755443, "loss": 0.1378, "step": 64242 }, { "epoch": 0.11390992208188273, "grad_norm": 1.5546875, "learning_rate": 0.0006525022264510143, "loss": 0.2533, "step": 64244 }, { "epoch": 0.11391346824719255, "grad_norm": 1.125, "learning_rate": 0.0006524505875835803, "loss": 0.1934, "step": 64246 }, { "epoch": 0.11391701441250236, "grad_norm": 0.451171875, "learning_rate": 0.0006523989506734669, "loss": 0.3317, "step": 64248 }, { "epoch": 0.11392056057781218, "grad_norm": 0.8125, "learning_rate": 0.0006523473157209007, "loss": 0.198, "step": 64250 }, { "epoch": 0.11392410674312199, "grad_norm": 0.55859375, "learning_rate": 0.000652295682726107, "loss": 0.1538, "step": 64252 }, { "epoch": 0.1139276529084318, "grad_norm": 0.283203125, "learning_rate": 0.0006522440516893125, "loss": 0.1767, "step": 64254 }, { "epoch": 0.11393119907374162, "grad_norm": 0.392578125, "learning_rate": 0.0006521924226107421, "loss": 0.1565, "step": 64256 }, { "epoch": 0.11393474523905144, "grad_norm": 0.265625, "learning_rate": 0.0006521407954906218, "loss": 0.1425, "step": 64258 }, { "epoch": 0.11393829140436125, "grad_norm": 0.345703125, "learning_rate": 0.0006520891703291779, "loss": 0.1478, "step": 64260 }, { "epoch": 0.11394183756967106, "grad_norm": 0.30078125, "learning_rate": 0.0006520375471266355, "loss": 0.2014, "step": 64262 }, { "epoch": 0.11394538373498088, "grad_norm": 0.7890625, "learning_rate": 0.0006519859258832208, "loss": 0.2337, "step": 64264 }, { "epoch": 0.1139489299002907, "grad_norm": 0.61328125, "learning_rate": 0.0006519343065991592, "loss": 0.1793, "step": 64266 }, { "epoch": 0.11395247606560051, "grad_norm": 0.48828125, "learning_rate": 0.0006518826892746767, "loss": 0.5925, "step": 64268 }, { "epoch": 0.11395602223091032, "grad_norm": 6.71875, "learning_rate": 0.0006518310739099993, "loss": 0.3747, "step": 64270 }, { "epoch": 0.11395956839622014, "grad_norm": 0.4765625, "learning_rate": 0.0006517794605053524, "loss": 0.1756, "step": 64272 }, { "epoch": 0.11396311456152995, "grad_norm": 0.298828125, "learning_rate": 0.0006517278490609614, "loss": 0.1785, "step": 64274 }, { "epoch": 0.11396666072683977, "grad_norm": 1.71875, "learning_rate": 0.0006516762395770528, "loss": 0.1951, "step": 64276 }, { "epoch": 0.1139702068921496, "grad_norm": 0.359375, "learning_rate": 0.0006516246320538517, "loss": 0.3256, "step": 64278 }, { "epoch": 0.11397375305745941, "grad_norm": 0.333984375, "learning_rate": 0.0006515730264915842, "loss": 0.1741, "step": 64280 }, { "epoch": 0.11397729922276922, "grad_norm": 0.19921875, "learning_rate": 0.0006515214228904757, "loss": 0.2066, "step": 64282 }, { "epoch": 0.11398084538807904, "grad_norm": 0.189453125, "learning_rate": 0.000651469821250752, "loss": 0.163, "step": 64284 }, { "epoch": 0.11398439155338885, "grad_norm": 0.26171875, "learning_rate": 0.0006514182215726388, "loss": 0.2261, "step": 64286 }, { "epoch": 0.11398793771869867, "grad_norm": 0.48828125, "learning_rate": 0.0006513666238563615, "loss": 0.4611, "step": 64288 }, { "epoch": 0.11399148388400848, "grad_norm": 0.40234375, "learning_rate": 0.0006513150281021464, "loss": 0.1615, "step": 64290 }, { "epoch": 0.1139950300493183, "grad_norm": 0.267578125, "learning_rate": 0.0006512634343102182, "loss": 0.141, "step": 64292 }, { "epoch": 0.11399857621462811, "grad_norm": 0.61328125, "learning_rate": 0.0006512118424808036, "loss": 0.1487, "step": 64294 }, { "epoch": 0.11400212237993793, "grad_norm": 1.75, "learning_rate": 0.0006511602526141274, "loss": 0.284, "step": 64296 }, { "epoch": 0.11400566854524774, "grad_norm": 0.46875, "learning_rate": 0.0006511086647104159, "loss": 0.2126, "step": 64298 }, { "epoch": 0.11400921471055755, "grad_norm": 0.484375, "learning_rate": 0.0006510570787698942, "loss": 0.1664, "step": 64300 }, { "epoch": 0.11401276087586737, "grad_norm": 1.53125, "learning_rate": 0.0006510054947927875, "loss": 0.255, "step": 64302 }, { "epoch": 0.11401630704117718, "grad_norm": 0.3203125, "learning_rate": 0.0006509539127793231, "loss": 0.1904, "step": 64304 }, { "epoch": 0.114019853206487, "grad_norm": 0.1865234375, "learning_rate": 0.0006509023327297243, "loss": 0.1608, "step": 64306 }, { "epoch": 0.11402339937179681, "grad_norm": 0.453125, "learning_rate": 0.0006508507546442186, "loss": 0.1457, "step": 64308 }, { "epoch": 0.11402694553710663, "grad_norm": 0.58203125, "learning_rate": 0.0006507991785230305, "loss": 0.1926, "step": 64310 }, { "epoch": 0.11403049170241644, "grad_norm": 0.416015625, "learning_rate": 0.0006507476043663858, "loss": 0.1903, "step": 64312 }, { "epoch": 0.11403403786772626, "grad_norm": 0.546875, "learning_rate": 0.0006506960321745103, "loss": 0.2173, "step": 64314 }, { "epoch": 0.11403758403303607, "grad_norm": 1.0234375, "learning_rate": 0.000650644461947629, "loss": 0.1712, "step": 64316 }, { "epoch": 0.11404113019834589, "grad_norm": 0.31640625, "learning_rate": 0.0006505928936859682, "loss": 0.1608, "step": 64318 }, { "epoch": 0.1140446763636557, "grad_norm": 0.2431640625, "learning_rate": 0.0006505413273897529, "loss": 0.1544, "step": 64320 }, { "epoch": 0.11404822252896551, "grad_norm": 0.53125, "learning_rate": 0.0006504897630592088, "loss": 0.1998, "step": 64322 }, { "epoch": 0.11405176869427534, "grad_norm": 0.2216796875, "learning_rate": 0.000650438200694561, "loss": 0.1713, "step": 64324 }, { "epoch": 0.11405531485958516, "grad_norm": 0.1953125, "learning_rate": 0.0006503866402960359, "loss": 0.1723, "step": 64326 }, { "epoch": 0.11405886102489497, "grad_norm": 0.46484375, "learning_rate": 0.0006503350818638583, "loss": 0.1789, "step": 64328 }, { "epoch": 0.11406240719020479, "grad_norm": 0.2333984375, "learning_rate": 0.000650283525398254, "loss": 0.1731, "step": 64330 }, { "epoch": 0.1140659533555146, "grad_norm": 0.349609375, "learning_rate": 0.0006502319708994482, "loss": 0.1576, "step": 64332 }, { "epoch": 0.11406949952082442, "grad_norm": 0.35546875, "learning_rate": 0.0006501804183676662, "loss": 0.2063, "step": 64334 }, { "epoch": 0.11407304568613423, "grad_norm": 0.1455078125, "learning_rate": 0.0006501288678031344, "loss": 0.1242, "step": 64336 }, { "epoch": 0.11407659185144405, "grad_norm": 0.408203125, "learning_rate": 0.0006500773192060769, "loss": 0.2192, "step": 64338 }, { "epoch": 0.11408013801675386, "grad_norm": 0.265625, "learning_rate": 0.0006500257725767205, "loss": 0.203, "step": 64340 }, { "epoch": 0.11408368418206367, "grad_norm": 0.193359375, "learning_rate": 0.0006499742279152894, "loss": 0.1738, "step": 64342 }, { "epoch": 0.11408723034737349, "grad_norm": 0.58203125, "learning_rate": 0.0006499226852220103, "loss": 0.21, "step": 64344 }, { "epoch": 0.1140907765126833, "grad_norm": 0.2255859375, "learning_rate": 0.0006498711444971073, "loss": 0.1647, "step": 64346 }, { "epoch": 0.11409432267799312, "grad_norm": 0.671875, "learning_rate": 0.0006498196057408067, "loss": 0.1769, "step": 64348 }, { "epoch": 0.11409786884330293, "grad_norm": 0.87109375, "learning_rate": 0.0006497680689533337, "loss": 0.1325, "step": 64350 }, { "epoch": 0.11410141500861275, "grad_norm": 0.400390625, "learning_rate": 0.0006497165341349139, "loss": 0.2332, "step": 64352 }, { "epoch": 0.11410496117392256, "grad_norm": 0.337890625, "learning_rate": 0.0006496650012857721, "loss": 0.1779, "step": 64354 }, { "epoch": 0.11410850733923238, "grad_norm": 0.5078125, "learning_rate": 0.0006496134704061336, "loss": 0.1749, "step": 64356 }, { "epoch": 0.11411205350454219, "grad_norm": 0.609375, "learning_rate": 0.0006495619414962247, "loss": 0.1774, "step": 64358 }, { "epoch": 0.114115599669852, "grad_norm": 0.35546875, "learning_rate": 0.0006495104145562698, "loss": 0.1568, "step": 64360 }, { "epoch": 0.11411914583516182, "grad_norm": 0.22265625, "learning_rate": 0.0006494588895864955, "loss": 0.1842, "step": 64362 }, { "epoch": 0.11412269200047163, "grad_norm": 1.703125, "learning_rate": 0.0006494073665871255, "loss": 0.3847, "step": 64364 }, { "epoch": 0.11412623816578145, "grad_norm": 0.58984375, "learning_rate": 0.0006493558455583863, "loss": 0.1758, "step": 64366 }, { "epoch": 0.11412978433109126, "grad_norm": 2.296875, "learning_rate": 0.0006493043265005029, "loss": 0.275, "step": 64368 }, { "epoch": 0.11413333049640109, "grad_norm": 0.24609375, "learning_rate": 0.0006492528094137005, "loss": 0.1884, "step": 64370 }, { "epoch": 0.1141368766617109, "grad_norm": 0.26953125, "learning_rate": 0.0006492012942982046, "loss": 0.4317, "step": 64372 }, { "epoch": 0.11414042282702072, "grad_norm": 0.283203125, "learning_rate": 0.0006491497811542399, "loss": 0.2014, "step": 64374 }, { "epoch": 0.11414396899233054, "grad_norm": 1.421875, "learning_rate": 0.0006490982699820327, "loss": 0.2081, "step": 64376 }, { "epoch": 0.11414751515764035, "grad_norm": 0.94140625, "learning_rate": 0.0006490467607818076, "loss": 0.1993, "step": 64378 }, { "epoch": 0.11415106132295016, "grad_norm": 0.5625, "learning_rate": 0.0006489952535537899, "loss": 0.1606, "step": 64380 }, { "epoch": 0.11415460748825998, "grad_norm": 0.376953125, "learning_rate": 0.000648943748298205, "loss": 0.1805, "step": 64382 }, { "epoch": 0.1141581536535698, "grad_norm": 0.380859375, "learning_rate": 0.0006488922450152782, "loss": 0.1668, "step": 64384 }, { "epoch": 0.11416169981887961, "grad_norm": 1.0546875, "learning_rate": 0.0006488407437052348, "loss": 0.1413, "step": 64386 }, { "epoch": 0.11416524598418942, "grad_norm": 4.03125, "learning_rate": 0.0006487892443683, "loss": 0.3525, "step": 64388 }, { "epoch": 0.11416879214949924, "grad_norm": 0.75390625, "learning_rate": 0.0006487377470046989, "loss": 0.2131, "step": 64390 }, { "epoch": 0.11417233831480905, "grad_norm": 0.35546875, "learning_rate": 0.0006486862516146563, "loss": 0.2062, "step": 64392 }, { "epoch": 0.11417588448011887, "grad_norm": 0.703125, "learning_rate": 0.0006486347581983986, "loss": 0.194, "step": 64394 }, { "epoch": 0.11417943064542868, "grad_norm": 0.322265625, "learning_rate": 0.0006485832667561497, "loss": 0.155, "step": 64396 }, { "epoch": 0.1141829768107385, "grad_norm": 0.9296875, "learning_rate": 0.0006485317772881356, "loss": 0.4839, "step": 64398 }, { "epoch": 0.11418652297604831, "grad_norm": 0.609375, "learning_rate": 0.0006484802897945809, "loss": 0.2154, "step": 64400 }, { "epoch": 0.11419006914135812, "grad_norm": 0.462890625, "learning_rate": 0.000648428804275712, "loss": 0.1983, "step": 64402 }, { "epoch": 0.11419361530666794, "grad_norm": 0.357421875, "learning_rate": 0.0006483773207317525, "loss": 0.1805, "step": 64404 }, { "epoch": 0.11419716147197775, "grad_norm": 0.373046875, "learning_rate": 0.0006483258391629285, "loss": 0.192, "step": 64406 }, { "epoch": 0.11420070763728757, "grad_norm": 0.78515625, "learning_rate": 0.000648274359569465, "loss": 0.2244, "step": 64408 }, { "epoch": 0.11420425380259738, "grad_norm": 0.41796875, "learning_rate": 0.000648222881951587, "loss": 0.1772, "step": 64410 }, { "epoch": 0.1142077999679072, "grad_norm": 0.318359375, "learning_rate": 0.0006481714063095197, "loss": 0.1726, "step": 64412 }, { "epoch": 0.11421134613321703, "grad_norm": 1.1171875, "learning_rate": 0.0006481199326434879, "loss": 0.2531, "step": 64414 }, { "epoch": 0.11421489229852684, "grad_norm": 0.42578125, "learning_rate": 0.0006480684609537173, "loss": 0.1562, "step": 64416 }, { "epoch": 0.11421843846383666, "grad_norm": 0.34765625, "learning_rate": 0.0006480169912404328, "loss": 0.1645, "step": 64418 }, { "epoch": 0.11422198462914647, "grad_norm": 0.330078125, "learning_rate": 0.0006479655235038595, "loss": 0.1785, "step": 64420 }, { "epoch": 0.11422553079445628, "grad_norm": 0.259765625, "learning_rate": 0.0006479140577442219, "loss": 0.1571, "step": 64422 }, { "epoch": 0.1142290769597661, "grad_norm": 0.921875, "learning_rate": 0.0006478625939617461, "loss": 0.2167, "step": 64424 }, { "epoch": 0.11423262312507591, "grad_norm": 0.404296875, "learning_rate": 0.0006478111321566567, "loss": 0.3328, "step": 64426 }, { "epoch": 0.11423616929038573, "grad_norm": 0.34375, "learning_rate": 0.0006477596723291787, "loss": 0.211, "step": 64428 }, { "epoch": 0.11423971545569554, "grad_norm": 0.28515625, "learning_rate": 0.0006477082144795372, "loss": 0.1736, "step": 64430 }, { "epoch": 0.11424326162100536, "grad_norm": 0.408203125, "learning_rate": 0.0006476567586079569, "loss": 0.1928, "step": 64432 }, { "epoch": 0.11424680778631517, "grad_norm": 0.6796875, "learning_rate": 0.0006476053047146639, "loss": 0.1645, "step": 64434 }, { "epoch": 0.11425035395162499, "grad_norm": 0.2421875, "learning_rate": 0.0006475538527998816, "loss": 0.1793, "step": 64436 }, { "epoch": 0.1142539001169348, "grad_norm": 0.3203125, "learning_rate": 0.0006475024028638367, "loss": 0.1457, "step": 64438 }, { "epoch": 0.11425744628224462, "grad_norm": 0.28125, "learning_rate": 0.0006474509549067528, "loss": 0.1483, "step": 64440 }, { "epoch": 0.11426099244755443, "grad_norm": 1.171875, "learning_rate": 0.000647399508928856, "loss": 0.2034, "step": 64442 }, { "epoch": 0.11426453861286424, "grad_norm": 2.234375, "learning_rate": 0.0006473480649303709, "loss": 0.3978, "step": 64444 }, { "epoch": 0.11426808477817406, "grad_norm": 0.734375, "learning_rate": 0.0006472966229115223, "loss": 0.2037, "step": 64446 }, { "epoch": 0.11427163094348387, "grad_norm": 0.275390625, "learning_rate": 0.0006472451828725353, "loss": 0.1156, "step": 64448 }, { "epoch": 0.11427517710879369, "grad_norm": 0.68359375, "learning_rate": 0.0006471937448136346, "loss": 0.2555, "step": 64450 }, { "epoch": 0.1142787232741035, "grad_norm": 0.86328125, "learning_rate": 0.0006471423087350461, "loss": 0.2138, "step": 64452 }, { "epoch": 0.11428226943941332, "grad_norm": 0.2373046875, "learning_rate": 0.0006470908746369935, "loss": 0.1678, "step": 64454 }, { "epoch": 0.11428581560472313, "grad_norm": 1.640625, "learning_rate": 0.0006470394425197026, "loss": 0.1355, "step": 64456 }, { "epoch": 0.11428936177003295, "grad_norm": 0.53515625, "learning_rate": 0.0006469880123833977, "loss": 0.1403, "step": 64458 }, { "epoch": 0.11429290793534277, "grad_norm": 1.453125, "learning_rate": 0.0006469365842283048, "loss": 0.1962, "step": 64460 }, { "epoch": 0.11429645410065259, "grad_norm": 0.6015625, "learning_rate": 0.0006468851580546475, "loss": 0.5154, "step": 64462 }, { "epoch": 0.1143000002659624, "grad_norm": 0.51171875, "learning_rate": 0.0006468337338626515, "loss": 0.224, "step": 64464 }, { "epoch": 0.11430354643127222, "grad_norm": 0.4453125, "learning_rate": 0.0006467823116525417, "loss": 0.1835, "step": 64466 }, { "epoch": 0.11430709259658203, "grad_norm": 0.203125, "learning_rate": 0.0006467308914245427, "loss": 0.1581, "step": 64468 }, { "epoch": 0.11431063876189185, "grad_norm": 1.3203125, "learning_rate": 0.0006466794731788796, "loss": 0.2145, "step": 64470 }, { "epoch": 0.11431418492720166, "grad_norm": 0.21875, "learning_rate": 0.0006466280569157767, "loss": 0.1947, "step": 64472 }, { "epoch": 0.11431773109251148, "grad_norm": 0.30859375, "learning_rate": 0.0006465766426354595, "loss": 0.1315, "step": 64474 }, { "epoch": 0.11432127725782129, "grad_norm": 1.4609375, "learning_rate": 0.0006465252303381531, "loss": 0.2793, "step": 64476 }, { "epoch": 0.1143248234231311, "grad_norm": 0.3046875, "learning_rate": 0.0006464738200240816, "loss": 0.1795, "step": 64478 }, { "epoch": 0.11432836958844092, "grad_norm": 0.55859375, "learning_rate": 0.00064642241169347, "loss": 0.2796, "step": 64480 }, { "epoch": 0.11433191575375073, "grad_norm": 0.38671875, "learning_rate": 0.0006463710053465437, "loss": 0.1428, "step": 64482 }, { "epoch": 0.11433546191906055, "grad_norm": 0.302734375, "learning_rate": 0.000646319600983527, "loss": 0.1345, "step": 64484 }, { "epoch": 0.11433900808437036, "grad_norm": 0.6328125, "learning_rate": 0.0006462681986046449, "loss": 0.2052, "step": 64486 }, { "epoch": 0.11434255424968018, "grad_norm": 0.3828125, "learning_rate": 0.000646216798210122, "loss": 0.2865, "step": 64488 }, { "epoch": 0.11434610041498999, "grad_norm": 0.5703125, "learning_rate": 0.000646165399800183, "loss": 0.1653, "step": 64490 }, { "epoch": 0.11434964658029981, "grad_norm": 0.267578125, "learning_rate": 0.0006461140033750536, "loss": 0.1569, "step": 64492 }, { "epoch": 0.11435319274560962, "grad_norm": 0.4453125, "learning_rate": 0.0006460626089349571, "loss": 0.3044, "step": 64494 }, { "epoch": 0.11435673891091944, "grad_norm": 0.5625, "learning_rate": 0.0006460112164801196, "loss": 0.1598, "step": 64496 }, { "epoch": 0.11436028507622925, "grad_norm": 0.28125, "learning_rate": 0.0006459598260107653, "loss": 0.1669, "step": 64498 }, { "epoch": 0.11436383124153907, "grad_norm": 1.171875, "learning_rate": 0.000645908437527119, "loss": 0.1534, "step": 64500 }, { "epoch": 0.11436737740684888, "grad_norm": 0.408203125, "learning_rate": 0.0006458570510294053, "loss": 0.1846, "step": 64502 }, { "epoch": 0.1143709235721587, "grad_norm": 0.30859375, "learning_rate": 0.0006458056665178489, "loss": 0.147, "step": 64504 }, { "epoch": 0.11437446973746852, "grad_norm": 0.2216796875, "learning_rate": 0.0006457542839926748, "loss": 0.197, "step": 64506 }, { "epoch": 0.11437801590277834, "grad_norm": 0.333984375, "learning_rate": 0.0006457029034541078, "loss": 0.2446, "step": 64508 }, { "epoch": 0.11438156206808815, "grad_norm": 0.1767578125, "learning_rate": 0.0006456515249023722, "loss": 0.1302, "step": 64510 }, { "epoch": 0.11438510823339797, "grad_norm": 0.337890625, "learning_rate": 0.0006456001483376928, "loss": 0.1656, "step": 64512 }, { "epoch": 0.11438865439870778, "grad_norm": 0.93359375, "learning_rate": 0.0006455487737602948, "loss": 0.2181, "step": 64514 }, { "epoch": 0.1143922005640176, "grad_norm": 0.251953125, "learning_rate": 0.0006454974011704022, "loss": 0.1595, "step": 64516 }, { "epoch": 0.11439574672932741, "grad_norm": 0.375, "learning_rate": 0.0006454460305682401, "loss": 0.1356, "step": 64518 }, { "epoch": 0.11439929289463723, "grad_norm": 0.392578125, "learning_rate": 0.0006453946619540329, "loss": 0.1861, "step": 64520 }, { "epoch": 0.11440283905994704, "grad_norm": 0.2001953125, "learning_rate": 0.0006453432953280052, "loss": 0.1792, "step": 64522 }, { "epoch": 0.11440638522525685, "grad_norm": 0.294921875, "learning_rate": 0.0006452919306903822, "loss": 0.1526, "step": 64524 }, { "epoch": 0.11440993139056667, "grad_norm": 0.3203125, "learning_rate": 0.0006452405680413882, "loss": 0.3094, "step": 64526 }, { "epoch": 0.11441347755587648, "grad_norm": 0.48046875, "learning_rate": 0.0006451892073812476, "loss": 0.1585, "step": 64528 }, { "epoch": 0.1144170237211863, "grad_norm": 0.58203125, "learning_rate": 0.000645137848710185, "loss": 0.2307, "step": 64530 }, { "epoch": 0.11442056988649611, "grad_norm": 0.1875, "learning_rate": 0.0006450864920284256, "loss": 0.1696, "step": 64532 }, { "epoch": 0.11442411605180593, "grad_norm": 0.482421875, "learning_rate": 0.0006450351373361936, "loss": 0.1972, "step": 64534 }, { "epoch": 0.11442766221711574, "grad_norm": 0.72265625, "learning_rate": 0.0006449837846337137, "loss": 0.1756, "step": 64536 }, { "epoch": 0.11443120838242556, "grad_norm": 0.298828125, "learning_rate": 0.0006449324339212102, "loss": 0.196, "step": 64538 }, { "epoch": 0.11443475454773537, "grad_norm": 0.21484375, "learning_rate": 0.0006448810851989078, "loss": 0.1656, "step": 64540 }, { "epoch": 0.11443830071304519, "grad_norm": 0.49609375, "learning_rate": 0.0006448297384670317, "loss": 0.238, "step": 64542 }, { "epoch": 0.114441846878355, "grad_norm": 2.671875, "learning_rate": 0.0006447783937258054, "loss": 0.2135, "step": 64544 }, { "epoch": 0.11444539304366481, "grad_norm": 0.326171875, "learning_rate": 0.0006447270509754541, "loss": 0.2389, "step": 64546 }, { "epoch": 0.11444893920897463, "grad_norm": 0.1923828125, "learning_rate": 0.0006446757102162022, "loss": 0.1952, "step": 64548 }, { "epoch": 0.11445248537428446, "grad_norm": 0.2333984375, "learning_rate": 0.0006446243714482747, "loss": 0.1734, "step": 64550 }, { "epoch": 0.11445603153959427, "grad_norm": 0.64453125, "learning_rate": 0.0006445730346718951, "loss": 0.142, "step": 64552 }, { "epoch": 0.11445957770490409, "grad_norm": 0.54296875, "learning_rate": 0.0006445216998872888, "loss": 0.1782, "step": 64554 }, { "epoch": 0.1144631238702139, "grad_norm": 0.359375, "learning_rate": 0.0006444703670946801, "loss": 0.1478, "step": 64556 }, { "epoch": 0.11446667003552372, "grad_norm": 4.09375, "learning_rate": 0.0006444190362942932, "loss": 0.2035, "step": 64558 }, { "epoch": 0.11447021620083353, "grad_norm": 0.73046875, "learning_rate": 0.000644367707486353, "loss": 0.2298, "step": 64560 }, { "epoch": 0.11447376236614334, "grad_norm": 2.03125, "learning_rate": 0.0006443163806710833, "loss": 0.2603, "step": 64562 }, { "epoch": 0.11447730853145316, "grad_norm": 0.48828125, "learning_rate": 0.0006442650558487095, "loss": 0.1862, "step": 64564 }, { "epoch": 0.11448085469676297, "grad_norm": 0.2138671875, "learning_rate": 0.0006442137330194556, "loss": 0.1533, "step": 64566 }, { "epoch": 0.11448440086207279, "grad_norm": 0.88671875, "learning_rate": 0.0006441624121835461, "loss": 0.2169, "step": 64568 }, { "epoch": 0.1144879470273826, "grad_norm": 0.68359375, "learning_rate": 0.0006441110933412052, "loss": 0.1854, "step": 64570 }, { "epoch": 0.11449149319269242, "grad_norm": 0.228515625, "learning_rate": 0.0006440597764926577, "loss": 0.1657, "step": 64572 }, { "epoch": 0.11449503935800223, "grad_norm": 0.48828125, "learning_rate": 0.000644008461638128, "loss": 0.1421, "step": 64574 }, { "epoch": 0.11449858552331205, "grad_norm": 5.6875, "learning_rate": 0.0006439571487778404, "loss": 0.2898, "step": 64576 }, { "epoch": 0.11450213168862186, "grad_norm": 0.421875, "learning_rate": 0.0006439058379120192, "loss": 0.2022, "step": 64578 }, { "epoch": 0.11450567785393168, "grad_norm": 0.2734375, "learning_rate": 0.0006438545290408888, "loss": 0.1429, "step": 64580 }, { "epoch": 0.11450922401924149, "grad_norm": 0.302734375, "learning_rate": 0.0006438032221646742, "loss": 0.1795, "step": 64582 }, { "epoch": 0.1145127701845513, "grad_norm": 0.953125, "learning_rate": 0.0006437519172835988, "loss": 0.2114, "step": 64584 }, { "epoch": 0.11451631634986112, "grad_norm": 1.2109375, "learning_rate": 0.0006437006143978878, "loss": 0.2488, "step": 64586 }, { "epoch": 0.11451986251517093, "grad_norm": 0.470703125, "learning_rate": 0.000643649313507765, "loss": 0.1816, "step": 64588 }, { "epoch": 0.11452340868048075, "grad_norm": 0.2490234375, "learning_rate": 0.0006435980146134554, "loss": 0.2228, "step": 64590 }, { "epoch": 0.11452695484579056, "grad_norm": 0.3125, "learning_rate": 0.0006435467177151827, "loss": 0.1942, "step": 64592 }, { "epoch": 0.11453050101110038, "grad_norm": 0.53515625, "learning_rate": 0.0006434954228131716, "loss": 0.1569, "step": 64594 }, { "epoch": 0.1145340471764102, "grad_norm": 0.482421875, "learning_rate": 0.0006434441299076466, "loss": 0.1796, "step": 64596 }, { "epoch": 0.11453759334172002, "grad_norm": 2.59375, "learning_rate": 0.0006433928389988315, "loss": 0.4351, "step": 64598 }, { "epoch": 0.11454113950702983, "grad_norm": 1.3359375, "learning_rate": 0.0006433415500869509, "loss": 0.2393, "step": 64600 }, { "epoch": 0.11454468567233965, "grad_norm": 0.2470703125, "learning_rate": 0.0006432902631722288, "loss": 0.161, "step": 64602 }, { "epoch": 0.11454823183764946, "grad_norm": 0.435546875, "learning_rate": 0.0006432389782548903, "loss": 0.2255, "step": 64604 }, { "epoch": 0.11455177800295928, "grad_norm": 0.439453125, "learning_rate": 0.000643187695335159, "loss": 0.1477, "step": 64606 }, { "epoch": 0.1145553241682691, "grad_norm": 0.6640625, "learning_rate": 0.0006431364144132596, "loss": 0.1744, "step": 64608 }, { "epoch": 0.11455887033357891, "grad_norm": 1.4296875, "learning_rate": 0.0006430851354894156, "loss": 0.2238, "step": 64610 }, { "epoch": 0.11456241649888872, "grad_norm": 0.875, "learning_rate": 0.0006430338585638522, "loss": 0.2412, "step": 64612 }, { "epoch": 0.11456596266419854, "grad_norm": 0.19140625, "learning_rate": 0.0006429825836367932, "loss": 0.1614, "step": 64614 }, { "epoch": 0.11456950882950835, "grad_norm": 0.2890625, "learning_rate": 0.0006429313107084631, "loss": 0.1808, "step": 64616 }, { "epoch": 0.11457305499481817, "grad_norm": 0.32421875, "learning_rate": 0.0006428800397790857, "loss": 0.2034, "step": 64618 }, { "epoch": 0.11457660116012798, "grad_norm": 0.2333984375, "learning_rate": 0.0006428287708488854, "loss": 0.1633, "step": 64620 }, { "epoch": 0.1145801473254378, "grad_norm": 0.625, "learning_rate": 0.0006427775039180866, "loss": 0.2123, "step": 64622 }, { "epoch": 0.11458369349074761, "grad_norm": 0.3203125, "learning_rate": 0.0006427262389869136, "loss": 0.2051, "step": 64624 }, { "epoch": 0.11458723965605742, "grad_norm": 0.984375, "learning_rate": 0.0006426749760555903, "loss": 0.1772, "step": 64626 }, { "epoch": 0.11459078582136724, "grad_norm": 0.1513671875, "learning_rate": 0.0006426237151243406, "loss": 0.1432, "step": 64628 }, { "epoch": 0.11459433198667705, "grad_norm": 0.29296875, "learning_rate": 0.0006425724561933897, "loss": 0.1503, "step": 64630 }, { "epoch": 0.11459787815198687, "grad_norm": 0.2333984375, "learning_rate": 0.0006425211992629609, "loss": 0.1312, "step": 64632 }, { "epoch": 0.11460142431729668, "grad_norm": 1.296875, "learning_rate": 0.0006424699443332789, "loss": 0.2014, "step": 64634 }, { "epoch": 0.1146049704826065, "grad_norm": 1.1953125, "learning_rate": 0.0006424186914045675, "loss": 0.16, "step": 64636 }, { "epoch": 0.11460851664791631, "grad_norm": 0.53515625, "learning_rate": 0.0006423674404770505, "loss": 0.1363, "step": 64638 }, { "epoch": 0.11461206281322613, "grad_norm": 0.259765625, "learning_rate": 0.0006423161915509532, "loss": 0.2579, "step": 64640 }, { "epoch": 0.11461560897853595, "grad_norm": 0.703125, "learning_rate": 0.0006422649446264984, "loss": 0.1984, "step": 64642 }, { "epoch": 0.11461915514384577, "grad_norm": 0.267578125, "learning_rate": 0.0006422136997039111, "loss": 0.1728, "step": 64644 }, { "epoch": 0.11462270130915558, "grad_norm": 0.56640625, "learning_rate": 0.0006421624567834147, "loss": 0.1644, "step": 64646 }, { "epoch": 0.1146262474744654, "grad_norm": 0.302734375, "learning_rate": 0.0006421112158652348, "loss": 0.1736, "step": 64648 }, { "epoch": 0.11462979363977521, "grad_norm": 0.3828125, "learning_rate": 0.0006420599769495937, "loss": 0.1577, "step": 64650 }, { "epoch": 0.11463333980508503, "grad_norm": 7.40625, "learning_rate": 0.0006420087400367165, "loss": 0.2437, "step": 64652 }, { "epoch": 0.11463688597039484, "grad_norm": 0.427734375, "learning_rate": 0.000641957505126827, "loss": 0.1765, "step": 64654 }, { "epoch": 0.11464043213570466, "grad_norm": 0.75390625, "learning_rate": 0.0006419062722201494, "loss": 0.2012, "step": 64656 }, { "epoch": 0.11464397830101447, "grad_norm": 0.25, "learning_rate": 0.0006418550413169077, "loss": 0.2394, "step": 64658 }, { "epoch": 0.11464752446632429, "grad_norm": 0.291015625, "learning_rate": 0.0006418038124173255, "loss": 0.1719, "step": 64660 }, { "epoch": 0.1146510706316341, "grad_norm": 0.171875, "learning_rate": 0.0006417525855216276, "loss": 0.2029, "step": 64662 }, { "epoch": 0.11465461679694391, "grad_norm": 0.1826171875, "learning_rate": 0.0006417013606300377, "loss": 0.1405, "step": 64664 }, { "epoch": 0.11465816296225373, "grad_norm": 0.234375, "learning_rate": 0.0006416501377427799, "loss": 0.1443, "step": 64666 }, { "epoch": 0.11466170912756354, "grad_norm": 0.625, "learning_rate": 0.0006415989168600782, "loss": 0.2174, "step": 64668 }, { "epoch": 0.11466525529287336, "grad_norm": 0.66796875, "learning_rate": 0.0006415476979821561, "loss": 0.2184, "step": 64670 }, { "epoch": 0.11466880145818317, "grad_norm": 0.171875, "learning_rate": 0.000641496481109239, "loss": 0.1799, "step": 64672 }, { "epoch": 0.11467234762349299, "grad_norm": 0.470703125, "learning_rate": 0.000641445266241549, "loss": 0.1781, "step": 64674 }, { "epoch": 0.1146758937888028, "grad_norm": 0.5078125, "learning_rate": 0.0006413940533793115, "loss": 0.1521, "step": 64676 }, { "epoch": 0.11467943995411262, "grad_norm": 0.302734375, "learning_rate": 0.0006413428425227496, "loss": 0.1768, "step": 64678 }, { "epoch": 0.11468298611942243, "grad_norm": 0.921875, "learning_rate": 0.0006412916336720885, "loss": 0.2241, "step": 64680 }, { "epoch": 0.11468653228473225, "grad_norm": 0.7421875, "learning_rate": 0.0006412404268275507, "loss": 0.169, "step": 64682 }, { "epoch": 0.11469007845004206, "grad_norm": 0.458984375, "learning_rate": 0.0006411892219893609, "loss": 0.1447, "step": 64684 }, { "epoch": 0.11469362461535189, "grad_norm": 0.703125, "learning_rate": 0.0006411380191577432, "loss": 0.2612, "step": 64686 }, { "epoch": 0.1146971707806617, "grad_norm": 2.125, "learning_rate": 0.0006410868183329211, "loss": 0.2603, "step": 64688 }, { "epoch": 0.11470071694597152, "grad_norm": 0.2021484375, "learning_rate": 0.0006410356195151187, "loss": 0.2164, "step": 64690 }, { "epoch": 0.11470426311128133, "grad_norm": 0.48046875, "learning_rate": 0.0006409844227045594, "loss": 0.1317, "step": 64692 }, { "epoch": 0.11470780927659115, "grad_norm": 0.349609375, "learning_rate": 0.0006409332279014683, "loss": 0.1882, "step": 64694 }, { "epoch": 0.11471135544190096, "grad_norm": 1.6171875, "learning_rate": 0.0006408820351060681, "loss": 0.2768, "step": 64696 }, { "epoch": 0.11471490160721078, "grad_norm": 0.59765625, "learning_rate": 0.0006408308443185837, "loss": 0.1659, "step": 64698 }, { "epoch": 0.11471844777252059, "grad_norm": 0.5, "learning_rate": 0.000640779655539238, "loss": 0.1824, "step": 64700 }, { "epoch": 0.1147219939378304, "grad_norm": 0.216796875, "learning_rate": 0.0006407284687682557, "loss": 0.133, "step": 64702 }, { "epoch": 0.11472554010314022, "grad_norm": 1.2421875, "learning_rate": 0.0006406772840058603, "loss": 0.2909, "step": 64704 }, { "epoch": 0.11472908626845003, "grad_norm": 0.59375, "learning_rate": 0.0006406261012522756, "loss": 0.2224, "step": 64706 }, { "epoch": 0.11473263243375985, "grad_norm": 2.1875, "learning_rate": 0.0006405749205077254, "loss": 0.1931, "step": 64708 }, { "epoch": 0.11473617859906966, "grad_norm": 0.255859375, "learning_rate": 0.0006405237417724335, "loss": 0.1802, "step": 64710 }, { "epoch": 0.11473972476437948, "grad_norm": 0.53125, "learning_rate": 0.0006404725650466239, "loss": 0.1413, "step": 64712 }, { "epoch": 0.11474327092968929, "grad_norm": 1.3828125, "learning_rate": 0.0006404213903305207, "loss": 0.1875, "step": 64714 }, { "epoch": 0.1147468170949991, "grad_norm": 1.453125, "learning_rate": 0.0006403702176243471, "loss": 0.2228, "step": 64716 }, { "epoch": 0.11475036326030892, "grad_norm": 0.26171875, "learning_rate": 0.000640319046928327, "loss": 0.1417, "step": 64718 }, { "epoch": 0.11475390942561874, "grad_norm": 0.412109375, "learning_rate": 0.0006402678782426848, "loss": 0.2244, "step": 64720 }, { "epoch": 0.11475745559092855, "grad_norm": 0.357421875, "learning_rate": 0.0006402167115676437, "loss": 0.1451, "step": 64722 }, { "epoch": 0.11476100175623836, "grad_norm": 3.1875, "learning_rate": 0.0006401655469034275, "loss": 0.2971, "step": 64724 }, { "epoch": 0.11476454792154818, "grad_norm": 0.64453125, "learning_rate": 0.0006401143842502604, "loss": 0.1944, "step": 64726 }, { "epoch": 0.114768094086858, "grad_norm": 0.5234375, "learning_rate": 0.0006400632236083654, "loss": 0.2099, "step": 64728 }, { "epoch": 0.11477164025216781, "grad_norm": 0.451171875, "learning_rate": 0.0006400120649779674, "loss": 0.2299, "step": 64730 }, { "epoch": 0.11477518641747764, "grad_norm": 2.0, "learning_rate": 0.0006399609083592886, "loss": 0.3642, "step": 64732 }, { "epoch": 0.11477873258278745, "grad_norm": 1.46875, "learning_rate": 0.000639909753752554, "loss": 0.1955, "step": 64734 }, { "epoch": 0.11478227874809727, "grad_norm": 0.65234375, "learning_rate": 0.0006398586011579865, "loss": 0.2081, "step": 64736 }, { "epoch": 0.11478582491340708, "grad_norm": 0.57421875, "learning_rate": 0.0006398074505758111, "loss": 0.1624, "step": 64738 }, { "epoch": 0.1147893710787169, "grad_norm": 0.5625, "learning_rate": 0.0006397563020062497, "loss": 0.1986, "step": 64740 }, { "epoch": 0.11479291724402671, "grad_norm": 0.2265625, "learning_rate": 0.0006397051554495272, "loss": 0.1191, "step": 64742 }, { "epoch": 0.11479646340933652, "grad_norm": 0.5390625, "learning_rate": 0.0006396540109058673, "loss": 0.3271, "step": 64744 }, { "epoch": 0.11480000957464634, "grad_norm": 0.23046875, "learning_rate": 0.000639602868375493, "loss": 0.1868, "step": 64746 }, { "epoch": 0.11480355573995615, "grad_norm": 0.359375, "learning_rate": 0.0006395517278586284, "loss": 0.1308, "step": 64748 }, { "epoch": 0.11480710190526597, "grad_norm": 0.59765625, "learning_rate": 0.0006395005893554968, "loss": 0.286, "step": 64750 }, { "epoch": 0.11481064807057578, "grad_norm": 0.1220703125, "learning_rate": 0.0006394494528663225, "loss": 0.1252, "step": 64752 }, { "epoch": 0.1148141942358856, "grad_norm": 0.45703125, "learning_rate": 0.0006393983183913288, "loss": 0.2069, "step": 64754 }, { "epoch": 0.11481774040119541, "grad_norm": 0.4765625, "learning_rate": 0.0006393471859307392, "loss": 0.305, "step": 64756 }, { "epoch": 0.11482128656650523, "grad_norm": 0.486328125, "learning_rate": 0.0006392960554847771, "loss": 0.1725, "step": 64758 }, { "epoch": 0.11482483273181504, "grad_norm": 0.953125, "learning_rate": 0.0006392449270536669, "loss": 0.1911, "step": 64760 }, { "epoch": 0.11482837889712486, "grad_norm": 0.3125, "learning_rate": 0.0006391938006376315, "loss": 0.1862, "step": 64762 }, { "epoch": 0.11483192506243467, "grad_norm": 0.640625, "learning_rate": 0.000639142676236895, "loss": 0.2303, "step": 64764 }, { "epoch": 0.11483547122774448, "grad_norm": 0.21875, "learning_rate": 0.0006390915538516807, "loss": 0.2061, "step": 64766 }, { "epoch": 0.1148390173930543, "grad_norm": 0.62890625, "learning_rate": 0.0006390404334822117, "loss": 0.1808, "step": 64768 }, { "epoch": 0.11484256355836411, "grad_norm": 0.349609375, "learning_rate": 0.0006389893151287129, "loss": 0.1661, "step": 64770 }, { "epoch": 0.11484610972367393, "grad_norm": 0.671875, "learning_rate": 0.0006389381987914064, "loss": 0.2978, "step": 64772 }, { "epoch": 0.11484965588898374, "grad_norm": 0.5703125, "learning_rate": 0.0006388870844705168, "loss": 0.1528, "step": 64774 }, { "epoch": 0.11485320205429356, "grad_norm": 0.240234375, "learning_rate": 0.0006388359721662669, "loss": 0.1266, "step": 64776 }, { "epoch": 0.11485674821960339, "grad_norm": 0.6171875, "learning_rate": 0.0006387848618788808, "loss": 0.1523, "step": 64778 }, { "epoch": 0.1148602943849132, "grad_norm": 0.6171875, "learning_rate": 0.0006387337536085817, "loss": 0.2078, "step": 64780 }, { "epoch": 0.11486384055022301, "grad_norm": 0.7890625, "learning_rate": 0.0006386826473555934, "loss": 0.2098, "step": 64782 }, { "epoch": 0.11486738671553283, "grad_norm": 0.51953125, "learning_rate": 0.0006386315431201393, "loss": 0.2135, "step": 64784 }, { "epoch": 0.11487093288084264, "grad_norm": 0.33203125, "learning_rate": 0.0006385804409024423, "loss": 0.1801, "step": 64786 }, { "epoch": 0.11487447904615246, "grad_norm": 0.353515625, "learning_rate": 0.0006385293407027273, "loss": 0.1974, "step": 64788 }, { "epoch": 0.11487802521146227, "grad_norm": 0.298828125, "learning_rate": 0.0006384782425212162, "loss": 0.2193, "step": 64790 }, { "epoch": 0.11488157137677209, "grad_norm": 0.5703125, "learning_rate": 0.0006384271463581334, "loss": 0.1699, "step": 64792 }, { "epoch": 0.1148851175420819, "grad_norm": 0.310546875, "learning_rate": 0.0006383760522137017, "loss": 0.1857, "step": 64794 }, { "epoch": 0.11488866370739172, "grad_norm": 0.83203125, "learning_rate": 0.000638324960088146, "loss": 0.2057, "step": 64796 }, { "epoch": 0.11489220987270153, "grad_norm": 0.412109375, "learning_rate": 0.0006382738699816879, "loss": 0.1584, "step": 64798 }, { "epoch": 0.11489575603801135, "grad_norm": 1.0390625, "learning_rate": 0.0006382227818945521, "loss": 0.2039, "step": 64800 }, { "epoch": 0.11489930220332116, "grad_norm": 0.4140625, "learning_rate": 0.0006381716958269616, "loss": 0.1901, "step": 64802 }, { "epoch": 0.11490284836863097, "grad_norm": 0.578125, "learning_rate": 0.0006381206117791399, "loss": 0.2009, "step": 64804 }, { "epoch": 0.11490639453394079, "grad_norm": 0.224609375, "learning_rate": 0.0006380695297513102, "loss": 0.1502, "step": 64806 }, { "epoch": 0.1149099406992506, "grad_norm": 0.35546875, "learning_rate": 0.0006380184497436956, "loss": 0.2074, "step": 64808 }, { "epoch": 0.11491348686456042, "grad_norm": 1.109375, "learning_rate": 0.0006379673717565207, "loss": 0.2233, "step": 64810 }, { "epoch": 0.11491703302987023, "grad_norm": 0.65234375, "learning_rate": 0.0006379162957900078, "loss": 0.185, "step": 64812 }, { "epoch": 0.11492057919518005, "grad_norm": 0.9296875, "learning_rate": 0.0006378652218443808, "loss": 0.2009, "step": 64814 }, { "epoch": 0.11492412536048986, "grad_norm": 1.96875, "learning_rate": 0.0006378141499198624, "loss": 0.1987, "step": 64816 }, { "epoch": 0.11492767152579968, "grad_norm": 0.283203125, "learning_rate": 0.0006377630800166767, "loss": 0.1581, "step": 64818 }, { "epoch": 0.11493121769110949, "grad_norm": 0.5, "learning_rate": 0.000637712012135047, "loss": 0.1282, "step": 64820 }, { "epoch": 0.11493476385641932, "grad_norm": 3.671875, "learning_rate": 0.0006376609462751964, "loss": 0.2011, "step": 64822 }, { "epoch": 0.11493831002172913, "grad_norm": 0.6015625, "learning_rate": 0.0006376098824373482, "loss": 0.2015, "step": 64824 }, { "epoch": 0.11494185618703895, "grad_norm": 0.478515625, "learning_rate": 0.0006375588206217255, "loss": 0.1888, "step": 64826 }, { "epoch": 0.11494540235234876, "grad_norm": 0.51953125, "learning_rate": 0.0006375077608285527, "loss": 0.2068, "step": 64828 }, { "epoch": 0.11494894851765858, "grad_norm": 0.66796875, "learning_rate": 0.0006374567030580513, "loss": 0.1988, "step": 64830 }, { "epoch": 0.11495249468296839, "grad_norm": 0.2080078125, "learning_rate": 0.0006374056473104462, "loss": 0.165, "step": 64832 }, { "epoch": 0.11495604084827821, "grad_norm": 1.15625, "learning_rate": 0.0006373545935859596, "loss": 0.1535, "step": 64834 }, { "epoch": 0.11495958701358802, "grad_norm": 0.55859375, "learning_rate": 0.000637303541884816, "loss": 0.1458, "step": 64836 }, { "epoch": 0.11496313317889784, "grad_norm": 0.22265625, "learning_rate": 0.0006372524922072374, "loss": 0.1761, "step": 64838 }, { "epoch": 0.11496667934420765, "grad_norm": 0.44921875, "learning_rate": 0.0006372014445534478, "loss": 0.2572, "step": 64840 }, { "epoch": 0.11497022550951747, "grad_norm": 0.40234375, "learning_rate": 0.0006371503989236702, "loss": 0.1594, "step": 64842 }, { "epoch": 0.11497377167482728, "grad_norm": 1.359375, "learning_rate": 0.0006370993553181279, "loss": 0.2996, "step": 64844 }, { "epoch": 0.1149773178401371, "grad_norm": 0.17578125, "learning_rate": 0.0006370483137370445, "loss": 0.1911, "step": 64846 }, { "epoch": 0.11498086400544691, "grad_norm": 0.162109375, "learning_rate": 0.000636997274180642, "loss": 0.2421, "step": 64848 }, { "epoch": 0.11498441017075672, "grad_norm": 0.4453125, "learning_rate": 0.0006369462366491452, "loss": 0.1609, "step": 64850 }, { "epoch": 0.11498795633606654, "grad_norm": 0.3046875, "learning_rate": 0.0006368952011427763, "loss": 0.2142, "step": 64852 }, { "epoch": 0.11499150250137635, "grad_norm": 0.349609375, "learning_rate": 0.0006368441676617592, "loss": 0.2288, "step": 64854 }, { "epoch": 0.11499504866668617, "grad_norm": 0.6171875, "learning_rate": 0.0006367931362063163, "loss": 0.2263, "step": 64856 }, { "epoch": 0.11499859483199598, "grad_norm": 1.2890625, "learning_rate": 0.0006367421067766708, "loss": 0.1841, "step": 64858 }, { "epoch": 0.1150021409973058, "grad_norm": 0.3515625, "learning_rate": 0.0006366910793730468, "loss": 0.1928, "step": 64860 }, { "epoch": 0.11500568716261561, "grad_norm": 0.36328125, "learning_rate": 0.0006366400539956667, "loss": 0.1593, "step": 64862 }, { "epoch": 0.11500923332792543, "grad_norm": 1.703125, "learning_rate": 0.000636589030644754, "loss": 0.2986, "step": 64864 }, { "epoch": 0.11501277949323524, "grad_norm": 0.29296875, "learning_rate": 0.0006365380093205313, "loss": 0.1711, "step": 64866 }, { "epoch": 0.11501632565854507, "grad_norm": 0.4921875, "learning_rate": 0.0006364869900232225, "loss": 0.5591, "step": 64868 }, { "epoch": 0.11501987182385488, "grad_norm": 0.62109375, "learning_rate": 0.0006364359727530504, "loss": 0.1426, "step": 64870 }, { "epoch": 0.1150234179891647, "grad_norm": 0.4609375, "learning_rate": 0.0006363849575102379, "loss": 0.1898, "step": 64872 }, { "epoch": 0.11502696415447451, "grad_norm": 0.322265625, "learning_rate": 0.0006363339442950085, "loss": 0.2195, "step": 64874 }, { "epoch": 0.11503051031978433, "grad_norm": 0.314453125, "learning_rate": 0.0006362829331075846, "loss": 0.1537, "step": 64876 }, { "epoch": 0.11503405648509414, "grad_norm": 0.50390625, "learning_rate": 0.0006362319239481908, "loss": 0.3677, "step": 64878 }, { "epoch": 0.11503760265040396, "grad_norm": 0.478515625, "learning_rate": 0.0006361809168170482, "loss": 0.1801, "step": 64880 }, { "epoch": 0.11504114881571377, "grad_norm": 0.349609375, "learning_rate": 0.0006361299117143812, "loss": 0.1999, "step": 64882 }, { "epoch": 0.11504469498102358, "grad_norm": 0.30859375, "learning_rate": 0.0006360789086404123, "loss": 0.1875, "step": 64884 }, { "epoch": 0.1150482411463334, "grad_norm": 0.27734375, "learning_rate": 0.0006360279075953652, "loss": 0.157, "step": 64886 }, { "epoch": 0.11505178731164321, "grad_norm": 0.259765625, "learning_rate": 0.0006359769085794622, "loss": 0.1939, "step": 64888 }, { "epoch": 0.11505533347695303, "grad_norm": 0.349609375, "learning_rate": 0.0006359259115929268, "loss": 0.174, "step": 64890 }, { "epoch": 0.11505887964226284, "grad_norm": 0.314453125, "learning_rate": 0.000635874916635982, "loss": 0.1613, "step": 64892 }, { "epoch": 0.11506242580757266, "grad_norm": 0.2080078125, "learning_rate": 0.0006358239237088506, "loss": 0.1848, "step": 64894 }, { "epoch": 0.11506597197288247, "grad_norm": 0.2578125, "learning_rate": 0.0006357729328117558, "loss": 0.1383, "step": 64896 }, { "epoch": 0.11506951813819229, "grad_norm": 0.232421875, "learning_rate": 0.0006357219439449202, "loss": 0.1629, "step": 64898 }, { "epoch": 0.1150730643035021, "grad_norm": 0.37890625, "learning_rate": 0.0006356709571085674, "loss": 0.1581, "step": 64900 }, { "epoch": 0.11507661046881192, "grad_norm": 0.28125, "learning_rate": 0.0006356199723029203, "loss": 0.1265, "step": 64902 }, { "epoch": 0.11508015663412173, "grad_norm": 1.140625, "learning_rate": 0.0006355689895282015, "loss": 0.1963, "step": 64904 }, { "epoch": 0.11508370279943154, "grad_norm": 0.5, "learning_rate": 0.0006355180087846337, "loss": 0.1989, "step": 64906 }, { "epoch": 0.11508724896474136, "grad_norm": 0.61328125, "learning_rate": 0.000635467030072441, "loss": 0.1926, "step": 64908 }, { "epoch": 0.11509079513005117, "grad_norm": 1.3125, "learning_rate": 0.0006354160533918456, "loss": 0.2343, "step": 64910 }, { "epoch": 0.11509434129536099, "grad_norm": 0.77734375, "learning_rate": 0.0006353650787430705, "loss": 0.1715, "step": 64912 }, { "epoch": 0.11509788746067082, "grad_norm": 0.546875, "learning_rate": 0.0006353141061263385, "loss": 0.2006, "step": 64914 }, { "epoch": 0.11510143362598063, "grad_norm": 0.455078125, "learning_rate": 0.0006352631355418724, "loss": 0.1558, "step": 64916 }, { "epoch": 0.11510497979129045, "grad_norm": 0.462890625, "learning_rate": 0.0006352121669898962, "loss": 0.1747, "step": 64918 }, { "epoch": 0.11510852595660026, "grad_norm": 1.9921875, "learning_rate": 0.0006351612004706313, "loss": 0.2321, "step": 64920 }, { "epoch": 0.11511207212191008, "grad_norm": 0.291015625, "learning_rate": 0.0006351102359843015, "loss": 0.154, "step": 64922 }, { "epoch": 0.11511561828721989, "grad_norm": 1.046875, "learning_rate": 0.0006350592735311293, "loss": 0.1868, "step": 64924 }, { "epoch": 0.1151191644525297, "grad_norm": 0.3515625, "learning_rate": 0.0006350083131113385, "loss": 0.1722, "step": 64926 }, { "epoch": 0.11512271061783952, "grad_norm": 1.0625, "learning_rate": 0.0006349573547251503, "loss": 0.2022, "step": 64928 }, { "epoch": 0.11512625678314933, "grad_norm": 0.369140625, "learning_rate": 0.0006349063983727889, "loss": 0.1787, "step": 64930 }, { "epoch": 0.11512980294845915, "grad_norm": 0.353515625, "learning_rate": 0.0006348554440544769, "loss": 0.1457, "step": 64932 }, { "epoch": 0.11513334911376896, "grad_norm": 0.2236328125, "learning_rate": 0.000634804491770437, "loss": 0.1454, "step": 64934 }, { "epoch": 0.11513689527907878, "grad_norm": 0.25390625, "learning_rate": 0.0006347535415208918, "loss": 0.2066, "step": 64936 }, { "epoch": 0.11514044144438859, "grad_norm": 0.58203125, "learning_rate": 0.0006347025933060644, "loss": 0.1834, "step": 64938 }, { "epoch": 0.1151439876096984, "grad_norm": 2.0625, "learning_rate": 0.0006346516471261776, "loss": 0.2299, "step": 64940 }, { "epoch": 0.11514753377500822, "grad_norm": 0.17578125, "learning_rate": 0.000634600702981454, "loss": 0.1613, "step": 64942 }, { "epoch": 0.11515107994031804, "grad_norm": 0.5546875, "learning_rate": 0.0006345497608721172, "loss": 0.1398, "step": 64944 }, { "epoch": 0.11515462610562785, "grad_norm": 0.27734375, "learning_rate": 0.0006344988207983888, "loss": 0.1886, "step": 64946 }, { "epoch": 0.11515817227093766, "grad_norm": 0.412109375, "learning_rate": 0.0006344478827604923, "loss": 0.1915, "step": 64948 }, { "epoch": 0.11516171843624748, "grad_norm": 0.328125, "learning_rate": 0.0006343969467586506, "loss": 0.2121, "step": 64950 }, { "epoch": 0.1151652646015573, "grad_norm": 0.57421875, "learning_rate": 0.000634346012793086, "loss": 0.1574, "step": 64952 }, { "epoch": 0.11516881076686711, "grad_norm": 0.50390625, "learning_rate": 0.0006342950808640215, "loss": 0.1971, "step": 64954 }, { "epoch": 0.11517235693217692, "grad_norm": 0.23828125, "learning_rate": 0.0006342441509716797, "loss": 0.1426, "step": 64956 }, { "epoch": 0.11517590309748675, "grad_norm": 0.3359375, "learning_rate": 0.0006341932231162835, "loss": 0.1844, "step": 64958 }, { "epoch": 0.11517944926279657, "grad_norm": 0.41015625, "learning_rate": 0.0006341422972980555, "loss": 0.2569, "step": 64960 }, { "epoch": 0.11518299542810638, "grad_norm": 1.2734375, "learning_rate": 0.0006340913735172187, "loss": 0.1818, "step": 64962 }, { "epoch": 0.1151865415934162, "grad_norm": 0.56640625, "learning_rate": 0.0006340404517739953, "loss": 0.2474, "step": 64964 }, { "epoch": 0.11519008775872601, "grad_norm": 0.36328125, "learning_rate": 0.0006339895320686084, "loss": 0.3018, "step": 64966 }, { "epoch": 0.11519363392403582, "grad_norm": 0.5625, "learning_rate": 0.0006339386144012807, "loss": 0.1517, "step": 64968 }, { "epoch": 0.11519718008934564, "grad_norm": 0.400390625, "learning_rate": 0.0006338876987722348, "loss": 0.1711, "step": 64970 }, { "epoch": 0.11520072625465545, "grad_norm": 0.5078125, "learning_rate": 0.0006338367851816935, "loss": 0.215, "step": 64972 }, { "epoch": 0.11520427241996527, "grad_norm": 0.28125, "learning_rate": 0.0006337858736298787, "loss": 0.2138, "step": 64974 }, { "epoch": 0.11520781858527508, "grad_norm": 0.29296875, "learning_rate": 0.0006337349641170145, "loss": 0.1624, "step": 64976 }, { "epoch": 0.1152113647505849, "grad_norm": 0.470703125, "learning_rate": 0.0006336840566433221, "loss": 0.1871, "step": 64978 }, { "epoch": 0.11521491091589471, "grad_norm": 0.24609375, "learning_rate": 0.000633633151209025, "loss": 0.595, "step": 64980 }, { "epoch": 0.11521845708120453, "grad_norm": 0.373046875, "learning_rate": 0.0006335822478143454, "loss": 0.1831, "step": 64982 }, { "epoch": 0.11522200324651434, "grad_norm": 0.423828125, "learning_rate": 0.0006335313464595067, "loss": 0.1785, "step": 64984 }, { "epoch": 0.11522554941182415, "grad_norm": 0.875, "learning_rate": 0.0006334804471447302, "loss": 0.2014, "step": 64986 }, { "epoch": 0.11522909557713397, "grad_norm": 1.3125, "learning_rate": 0.0006334295498702398, "loss": 0.2262, "step": 64988 }, { "epoch": 0.11523264174244378, "grad_norm": 0.2578125, "learning_rate": 0.0006333786546362572, "loss": 0.2509, "step": 64990 }, { "epoch": 0.1152361879077536, "grad_norm": 1.0078125, "learning_rate": 0.0006333277614430055, "loss": 0.2956, "step": 64992 }, { "epoch": 0.11523973407306341, "grad_norm": 0.392578125, "learning_rate": 0.0006332768702907072, "loss": 0.2454, "step": 64994 }, { "epoch": 0.11524328023837323, "grad_norm": 0.7421875, "learning_rate": 0.0006332259811795844, "loss": 0.1684, "step": 64996 }, { "epoch": 0.11524682640368304, "grad_norm": 0.3046875, "learning_rate": 0.0006331750941098601, "loss": 0.204, "step": 64998 }, { "epoch": 0.11525037256899286, "grad_norm": 1.1171875, "learning_rate": 0.000633124209081757, "loss": 0.2613, "step": 65000 }, { "epoch": 0.11525391873430267, "grad_norm": 0.4765625, "learning_rate": 0.0006330733260954976, "loss": 0.1892, "step": 65002 }, { "epoch": 0.1152574648996125, "grad_norm": 0.40625, "learning_rate": 0.0006330224451513036, "loss": 0.165, "step": 65004 }, { "epoch": 0.11526101106492231, "grad_norm": 0.234375, "learning_rate": 0.0006329715662493985, "loss": 0.1436, "step": 65006 }, { "epoch": 0.11526455723023213, "grad_norm": 1.46875, "learning_rate": 0.0006329206893900045, "loss": 0.2027, "step": 65008 }, { "epoch": 0.11526810339554194, "grad_norm": 3.796875, "learning_rate": 0.0006328698145733442, "loss": 0.2567, "step": 65010 }, { "epoch": 0.11527164956085176, "grad_norm": 0.421875, "learning_rate": 0.0006328189417996401, "loss": 0.204, "step": 65012 }, { "epoch": 0.11527519572616157, "grad_norm": 0.26171875, "learning_rate": 0.000632768071069114, "loss": 0.1533, "step": 65014 }, { "epoch": 0.11527874189147139, "grad_norm": 0.357421875, "learning_rate": 0.0006327172023819898, "loss": 0.174, "step": 65016 }, { "epoch": 0.1152822880567812, "grad_norm": 0.83984375, "learning_rate": 0.0006326663357384883, "loss": 0.1611, "step": 65018 }, { "epoch": 0.11528583422209102, "grad_norm": 103.5, "learning_rate": 0.0006326154711388332, "loss": 0.4111, "step": 65020 }, { "epoch": 0.11528938038740083, "grad_norm": 0.251953125, "learning_rate": 0.0006325646085832466, "loss": 0.1964, "step": 65022 }, { "epoch": 0.11529292655271065, "grad_norm": 0.67578125, "learning_rate": 0.000632513748071951, "loss": 0.2145, "step": 65024 }, { "epoch": 0.11529647271802046, "grad_norm": 0.3984375, "learning_rate": 0.0006324628896051684, "loss": 0.1622, "step": 65026 }, { "epoch": 0.11530001888333027, "grad_norm": 0.5703125, "learning_rate": 0.0006324120331831213, "loss": 0.2036, "step": 65028 }, { "epoch": 0.11530356504864009, "grad_norm": 0.306640625, "learning_rate": 0.0006323611788060328, "loss": 0.1746, "step": 65030 }, { "epoch": 0.1153071112139499, "grad_norm": 0.318359375, "learning_rate": 0.0006323103264741246, "loss": 0.1297, "step": 65032 }, { "epoch": 0.11531065737925972, "grad_norm": 0.345703125, "learning_rate": 0.00063225947618762, "loss": 0.2164, "step": 65034 }, { "epoch": 0.11531420354456953, "grad_norm": 0.53125, "learning_rate": 0.0006322086279467398, "loss": 0.1449, "step": 65036 }, { "epoch": 0.11531774970987935, "grad_norm": 0.59765625, "learning_rate": 0.0006321577817517079, "loss": 0.1438, "step": 65038 }, { "epoch": 0.11532129587518916, "grad_norm": 1.015625, "learning_rate": 0.0006321069376027461, "loss": 0.235, "step": 65040 }, { "epoch": 0.11532484204049898, "grad_norm": 0.396484375, "learning_rate": 0.0006320560955000767, "loss": 0.2548, "step": 65042 }, { "epoch": 0.11532838820580879, "grad_norm": 0.439453125, "learning_rate": 0.0006320052554439222, "loss": 0.2803, "step": 65044 }, { "epoch": 0.1153319343711186, "grad_norm": 0.5625, "learning_rate": 0.0006319544174345046, "loss": 0.1661, "step": 65046 }, { "epoch": 0.11533548053642842, "grad_norm": 0.462890625, "learning_rate": 0.0006319035814720466, "loss": 0.2614, "step": 65048 }, { "epoch": 0.11533902670173825, "grad_norm": 0.41796875, "learning_rate": 0.0006318527475567706, "loss": 0.1782, "step": 65050 }, { "epoch": 0.11534257286704806, "grad_norm": 0.1884765625, "learning_rate": 0.0006318019156888988, "loss": 0.2338, "step": 65052 }, { "epoch": 0.11534611903235788, "grad_norm": 0.546875, "learning_rate": 0.0006317510858686531, "loss": 0.1602, "step": 65054 }, { "epoch": 0.11534966519766769, "grad_norm": 0.33984375, "learning_rate": 0.0006317002580962564, "loss": 0.4512, "step": 65056 }, { "epoch": 0.1153532113629775, "grad_norm": 0.5546875, "learning_rate": 0.0006316494323719309, "loss": 0.2182, "step": 65058 }, { "epoch": 0.11535675752828732, "grad_norm": 0.2734375, "learning_rate": 0.0006315986086958986, "loss": 0.1646, "step": 65060 }, { "epoch": 0.11536030369359714, "grad_norm": 0.703125, "learning_rate": 0.0006315477870683819, "loss": 0.2083, "step": 65062 }, { "epoch": 0.11536384985890695, "grad_norm": 0.2255859375, "learning_rate": 0.0006314969674896027, "loss": 0.1759, "step": 65064 }, { "epoch": 0.11536739602421676, "grad_norm": 0.2451171875, "learning_rate": 0.0006314461499597845, "loss": 0.1675, "step": 65066 }, { "epoch": 0.11537094218952658, "grad_norm": 1.09375, "learning_rate": 0.0006313953344791479, "loss": 0.2, "step": 65068 }, { "epoch": 0.1153744883548364, "grad_norm": 0.1337890625, "learning_rate": 0.000631344521047916, "loss": 0.1131, "step": 65070 }, { "epoch": 0.11537803452014621, "grad_norm": 0.255859375, "learning_rate": 0.000631293709666311, "loss": 0.1205, "step": 65072 }, { "epoch": 0.11538158068545602, "grad_norm": 1.2578125, "learning_rate": 0.0006312429003345556, "loss": 0.1924, "step": 65074 }, { "epoch": 0.11538512685076584, "grad_norm": 0.31640625, "learning_rate": 0.0006311920930528706, "loss": 0.1994, "step": 65076 }, { "epoch": 0.11538867301607565, "grad_norm": 0.55078125, "learning_rate": 0.0006311412878214796, "loss": 0.1469, "step": 65078 }, { "epoch": 0.11539221918138547, "grad_norm": 2.40625, "learning_rate": 0.0006310904846406043, "loss": 0.2627, "step": 65080 }, { "epoch": 0.11539576534669528, "grad_norm": 0.423828125, "learning_rate": 0.0006310396835104666, "loss": 0.2043, "step": 65082 }, { "epoch": 0.1153993115120051, "grad_norm": 0.2734375, "learning_rate": 0.0006309888844312892, "loss": 0.1588, "step": 65084 }, { "epoch": 0.11540285767731491, "grad_norm": 0.451171875, "learning_rate": 0.0006309380874032935, "loss": 0.3368, "step": 65086 }, { "epoch": 0.11540640384262472, "grad_norm": 0.546875, "learning_rate": 0.0006308872924267024, "loss": 0.1828, "step": 65088 }, { "epoch": 0.11540995000793454, "grad_norm": 0.310546875, "learning_rate": 0.0006308364995017379, "loss": 0.1821, "step": 65090 }, { "epoch": 0.11541349617324435, "grad_norm": 0.48046875, "learning_rate": 0.0006307857086286222, "loss": 0.1639, "step": 65092 }, { "epoch": 0.11541704233855417, "grad_norm": 0.298828125, "learning_rate": 0.0006307349198075767, "loss": 0.1315, "step": 65094 }, { "epoch": 0.115420588503864, "grad_norm": 0.59765625, "learning_rate": 0.0006306841330388244, "loss": 0.1652, "step": 65096 }, { "epoch": 0.11542413466917381, "grad_norm": 0.306640625, "learning_rate": 0.0006306333483225872, "loss": 0.1633, "step": 65098 }, { "epoch": 0.11542768083448363, "grad_norm": 0.21875, "learning_rate": 0.000630582565659087, "loss": 0.1495, "step": 65100 }, { "epoch": 0.11543122699979344, "grad_norm": 0.236328125, "learning_rate": 0.0006305317850485462, "loss": 0.1744, "step": 65102 }, { "epoch": 0.11543477316510325, "grad_norm": 0.263671875, "learning_rate": 0.0006304810064911859, "loss": 0.185, "step": 65104 }, { "epoch": 0.11543831933041307, "grad_norm": 0.2314453125, "learning_rate": 0.00063043022998723, "loss": 0.2888, "step": 65106 }, { "epoch": 0.11544186549572288, "grad_norm": 0.267578125, "learning_rate": 0.0006303794555368987, "loss": 0.1944, "step": 65108 }, { "epoch": 0.1154454116610327, "grad_norm": 0.333984375, "learning_rate": 0.0006303286831404151, "loss": 0.1753, "step": 65110 }, { "epoch": 0.11544895782634251, "grad_norm": 0.412109375, "learning_rate": 0.000630277912798001, "loss": 0.1792, "step": 65112 }, { "epoch": 0.11545250399165233, "grad_norm": 0.4765625, "learning_rate": 0.0006302271445098786, "loss": 0.2225, "step": 65114 }, { "epoch": 0.11545605015696214, "grad_norm": 0.2412109375, "learning_rate": 0.0006301763782762698, "loss": 0.189, "step": 65116 }, { "epoch": 0.11545959632227196, "grad_norm": 0.482421875, "learning_rate": 0.0006301256140973967, "loss": 0.1589, "step": 65118 }, { "epoch": 0.11546314248758177, "grad_norm": 0.25, "learning_rate": 0.0006300748519734811, "loss": 0.1419, "step": 65120 }, { "epoch": 0.11546668865289159, "grad_norm": 0.388671875, "learning_rate": 0.0006300240919047449, "loss": 0.198, "step": 65122 }, { "epoch": 0.1154702348182014, "grad_norm": 0.294921875, "learning_rate": 0.0006299733338914109, "loss": 0.2042, "step": 65124 }, { "epoch": 0.11547378098351122, "grad_norm": 0.283203125, "learning_rate": 0.0006299225779336999, "loss": 0.1635, "step": 65126 }, { "epoch": 0.11547732714882103, "grad_norm": 0.56640625, "learning_rate": 0.0006298718240318349, "loss": 0.1738, "step": 65128 }, { "epoch": 0.11548087331413084, "grad_norm": 0.322265625, "learning_rate": 0.0006298210721860369, "loss": 0.1601, "step": 65130 }, { "epoch": 0.11548441947944066, "grad_norm": 0.3828125, "learning_rate": 0.0006297703223965294, "loss": 0.1472, "step": 65132 }, { "epoch": 0.11548796564475047, "grad_norm": 0.419921875, "learning_rate": 0.0006297195746635325, "loss": 0.1349, "step": 65134 }, { "epoch": 0.11549151181006029, "grad_norm": 1.1953125, "learning_rate": 0.0006296688289872695, "loss": 0.1794, "step": 65136 }, { "epoch": 0.1154950579753701, "grad_norm": 1.4140625, "learning_rate": 0.0006296180853679616, "loss": 0.3218, "step": 65138 }, { "epoch": 0.11549860414067993, "grad_norm": 0.30859375, "learning_rate": 0.0006295673438058312, "loss": 0.1756, "step": 65140 }, { "epoch": 0.11550215030598975, "grad_norm": 0.263671875, "learning_rate": 0.0006295166043010997, "loss": 0.1158, "step": 65142 }, { "epoch": 0.11550569647129956, "grad_norm": 4.3125, "learning_rate": 0.0006294658668539893, "loss": 0.3023, "step": 65144 }, { "epoch": 0.11550924263660937, "grad_norm": 0.353515625, "learning_rate": 0.0006294151314647218, "loss": 0.1873, "step": 65146 }, { "epoch": 0.11551278880191919, "grad_norm": 0.44140625, "learning_rate": 0.0006293643981335195, "loss": 0.1969, "step": 65148 }, { "epoch": 0.115516334967229, "grad_norm": 1.7578125, "learning_rate": 0.0006293136668606039, "loss": 0.1209, "step": 65150 }, { "epoch": 0.11551988113253882, "grad_norm": 0.400390625, "learning_rate": 0.0006292629376461965, "loss": 0.2053, "step": 65152 }, { "epoch": 0.11552342729784863, "grad_norm": 0.5625, "learning_rate": 0.0006292122104905198, "loss": 0.2173, "step": 65154 }, { "epoch": 0.11552697346315845, "grad_norm": 1.5625, "learning_rate": 0.0006291614853937956, "loss": 0.6187, "step": 65156 }, { "epoch": 0.11553051962846826, "grad_norm": 0.2451171875, "learning_rate": 0.0006291107623562455, "loss": 0.1586, "step": 65158 }, { "epoch": 0.11553406579377808, "grad_norm": 1.234375, "learning_rate": 0.0006290600413780915, "loss": 0.1866, "step": 65160 }, { "epoch": 0.11553761195908789, "grad_norm": 0.58203125, "learning_rate": 0.0006290093224595549, "loss": 0.4168, "step": 65162 }, { "epoch": 0.1155411581243977, "grad_norm": 0.30078125, "learning_rate": 0.0006289586056008585, "loss": 0.1626, "step": 65164 }, { "epoch": 0.11554470428970752, "grad_norm": 0.427734375, "learning_rate": 0.000628907890802223, "loss": 0.1345, "step": 65166 }, { "epoch": 0.11554825045501733, "grad_norm": 0.1748046875, "learning_rate": 0.0006288571780638712, "loss": 0.1587, "step": 65168 }, { "epoch": 0.11555179662032715, "grad_norm": 0.3515625, "learning_rate": 0.0006288064673860239, "loss": 0.1753, "step": 65170 }, { "epoch": 0.11555534278563696, "grad_norm": 0.28515625, "learning_rate": 0.0006287557587689041, "loss": 0.1741, "step": 65172 }, { "epoch": 0.11555888895094678, "grad_norm": 0.357421875, "learning_rate": 0.0006287050522127323, "loss": 0.132, "step": 65174 }, { "epoch": 0.11556243511625659, "grad_norm": 0.609375, "learning_rate": 0.0006286543477177311, "loss": 0.1184, "step": 65176 }, { "epoch": 0.11556598128156641, "grad_norm": 0.6328125, "learning_rate": 0.0006286036452841221, "loss": 0.1706, "step": 65178 }, { "epoch": 0.11556952744687622, "grad_norm": 0.59375, "learning_rate": 0.0006285529449121267, "loss": 0.1941, "step": 65180 }, { "epoch": 0.11557307361218604, "grad_norm": 0.25390625, "learning_rate": 0.0006285022466019672, "loss": 0.1666, "step": 65182 }, { "epoch": 0.11557661977749585, "grad_norm": 0.59375, "learning_rate": 0.0006284515503538644, "loss": 0.169, "step": 65184 }, { "epoch": 0.11558016594280568, "grad_norm": 0.384765625, "learning_rate": 0.0006284008561680411, "loss": 0.163, "step": 65186 }, { "epoch": 0.1155837121081155, "grad_norm": 0.40625, "learning_rate": 0.0006283501640447184, "loss": 0.1587, "step": 65188 }, { "epoch": 0.11558725827342531, "grad_norm": 0.3125, "learning_rate": 0.0006282994739841181, "loss": 0.2023, "step": 65190 }, { "epoch": 0.11559080443873512, "grad_norm": 1.9609375, "learning_rate": 0.0006282487859864618, "loss": 0.2872, "step": 65192 }, { "epoch": 0.11559435060404494, "grad_norm": 0.369140625, "learning_rate": 0.0006281981000519714, "loss": 0.1759, "step": 65194 }, { "epoch": 0.11559789676935475, "grad_norm": 0.3515625, "learning_rate": 0.0006281474161808684, "loss": 0.1835, "step": 65196 }, { "epoch": 0.11560144293466457, "grad_norm": 0.83203125, "learning_rate": 0.0006280967343733748, "loss": 0.2014, "step": 65198 }, { "epoch": 0.11560498909997438, "grad_norm": 0.4765625, "learning_rate": 0.0006280460546297118, "loss": 0.1621, "step": 65200 }, { "epoch": 0.1156085352652842, "grad_norm": 0.310546875, "learning_rate": 0.0006279953769501009, "loss": 0.1865, "step": 65202 }, { "epoch": 0.11561208143059401, "grad_norm": 1.2421875, "learning_rate": 0.0006279447013347643, "loss": 0.1844, "step": 65204 }, { "epoch": 0.11561562759590382, "grad_norm": 0.330078125, "learning_rate": 0.0006278940277839234, "loss": 0.3805, "step": 65206 }, { "epoch": 0.11561917376121364, "grad_norm": 0.6953125, "learning_rate": 0.0006278433562977999, "loss": 0.1822, "step": 65208 }, { "epoch": 0.11562271992652345, "grad_norm": 0.263671875, "learning_rate": 0.0006277926868766153, "loss": 0.2349, "step": 65210 }, { "epoch": 0.11562626609183327, "grad_norm": 0.33984375, "learning_rate": 0.0006277420195205907, "loss": 0.2049, "step": 65212 }, { "epoch": 0.11562981225714308, "grad_norm": 0.49609375, "learning_rate": 0.000627691354229949, "loss": 0.2301, "step": 65214 }, { "epoch": 0.1156333584224529, "grad_norm": 0.30859375, "learning_rate": 0.0006276406910049101, "loss": 0.2187, "step": 65216 }, { "epoch": 0.11563690458776271, "grad_norm": 2.265625, "learning_rate": 0.000627590029845697, "loss": 0.2443, "step": 65218 }, { "epoch": 0.11564045075307253, "grad_norm": 0.41796875, "learning_rate": 0.0006275393707525303, "loss": 0.176, "step": 65220 }, { "epoch": 0.11564399691838234, "grad_norm": 0.17578125, "learning_rate": 0.0006274887137256327, "loss": 0.1518, "step": 65222 }, { "epoch": 0.11564754308369216, "grad_norm": 0.287109375, "learning_rate": 0.0006274380587652242, "loss": 0.209, "step": 65224 }, { "epoch": 0.11565108924900197, "grad_norm": 0.412109375, "learning_rate": 0.0006273874058715274, "loss": 0.1849, "step": 65226 }, { "epoch": 0.11565463541431178, "grad_norm": 0.6015625, "learning_rate": 0.0006273367550447637, "loss": 0.1518, "step": 65228 }, { "epoch": 0.1156581815796216, "grad_norm": 1.6171875, "learning_rate": 0.0006272861062851545, "loss": 0.2536, "step": 65230 }, { "epoch": 0.11566172774493143, "grad_norm": 0.416015625, "learning_rate": 0.0006272354595929213, "loss": 0.2543, "step": 65232 }, { "epoch": 0.11566527391024124, "grad_norm": 1.4296875, "learning_rate": 0.000627184814968285, "loss": 0.1996, "step": 65234 }, { "epoch": 0.11566882007555106, "grad_norm": 0.2431640625, "learning_rate": 0.0006271341724114684, "loss": 0.1224, "step": 65236 }, { "epoch": 0.11567236624086087, "grad_norm": 0.50390625, "learning_rate": 0.0006270835319226919, "loss": 0.2423, "step": 65238 }, { "epoch": 0.11567591240617069, "grad_norm": 0.38671875, "learning_rate": 0.0006270328935021775, "loss": 0.2069, "step": 65240 }, { "epoch": 0.1156794585714805, "grad_norm": 2.578125, "learning_rate": 0.0006269822571501462, "loss": 0.2197, "step": 65242 }, { "epoch": 0.11568300473679032, "grad_norm": 0.35546875, "learning_rate": 0.00062693162286682, "loss": 0.1388, "step": 65244 }, { "epoch": 0.11568655090210013, "grad_norm": 0.56640625, "learning_rate": 0.00062688099065242, "loss": 0.2003, "step": 65246 }, { "epoch": 0.11569009706740994, "grad_norm": 0.419921875, "learning_rate": 0.0006268303605071679, "loss": 0.1368, "step": 65248 }, { "epoch": 0.11569364323271976, "grad_norm": 0.98828125, "learning_rate": 0.0006267797324312849, "loss": 0.2074, "step": 65250 }, { "epoch": 0.11569718939802957, "grad_norm": 0.3125, "learning_rate": 0.000626729106424992, "loss": 0.1794, "step": 65252 }, { "epoch": 0.11570073556333939, "grad_norm": 1.03125, "learning_rate": 0.0006266784824885119, "loss": 0.1865, "step": 65254 }, { "epoch": 0.1157042817286492, "grad_norm": 0.302734375, "learning_rate": 0.0006266278606220645, "loss": 0.1759, "step": 65256 }, { "epoch": 0.11570782789395902, "grad_norm": 0.3671875, "learning_rate": 0.0006265772408258723, "loss": 0.1301, "step": 65258 }, { "epoch": 0.11571137405926883, "grad_norm": 0.515625, "learning_rate": 0.0006265266231001559, "loss": 0.1581, "step": 65260 }, { "epoch": 0.11571492022457865, "grad_norm": 1.265625, "learning_rate": 0.0006264760074451374, "loss": 0.2188, "step": 65262 }, { "epoch": 0.11571846638988846, "grad_norm": 0.337890625, "learning_rate": 0.0006264253938610373, "loss": 0.1383, "step": 65264 }, { "epoch": 0.11572201255519828, "grad_norm": 0.2294921875, "learning_rate": 0.0006263747823480779, "loss": 0.1821, "step": 65266 }, { "epoch": 0.11572555872050809, "grad_norm": 0.609375, "learning_rate": 0.0006263241729064799, "loss": 0.2287, "step": 65268 }, { "epoch": 0.1157291048858179, "grad_norm": 0.50390625, "learning_rate": 0.0006262735655364649, "loss": 0.1608, "step": 65270 }, { "epoch": 0.11573265105112772, "grad_norm": 0.42578125, "learning_rate": 0.0006262229602382543, "loss": 0.1944, "step": 65272 }, { "epoch": 0.11573619721643753, "grad_norm": 1.0859375, "learning_rate": 0.0006261723570120688, "loss": 0.1948, "step": 65274 }, { "epoch": 0.11573974338174736, "grad_norm": 0.52734375, "learning_rate": 0.0006261217558581304, "loss": 0.1723, "step": 65276 }, { "epoch": 0.11574328954705718, "grad_norm": 1.3984375, "learning_rate": 0.00062607115677666, "loss": 0.1988, "step": 65278 }, { "epoch": 0.11574683571236699, "grad_norm": 1.328125, "learning_rate": 0.0006260205597678796, "loss": 0.2654, "step": 65280 }, { "epoch": 0.1157503818776768, "grad_norm": 0.369140625, "learning_rate": 0.0006259699648320094, "loss": 0.1821, "step": 65282 }, { "epoch": 0.11575392804298662, "grad_norm": 1.0546875, "learning_rate": 0.0006259193719692715, "loss": 0.1971, "step": 65284 }, { "epoch": 0.11575747420829643, "grad_norm": 1.4609375, "learning_rate": 0.000625868781179887, "loss": 0.182, "step": 65286 }, { "epoch": 0.11576102037360625, "grad_norm": 1.5546875, "learning_rate": 0.0006258181924640769, "loss": 0.2151, "step": 65288 }, { "epoch": 0.11576456653891606, "grad_norm": 1.0078125, "learning_rate": 0.0006257676058220627, "loss": 0.1872, "step": 65290 }, { "epoch": 0.11576811270422588, "grad_norm": 1.40625, "learning_rate": 0.0006257170212540652, "loss": 0.1816, "step": 65292 }, { "epoch": 0.11577165886953569, "grad_norm": 1.5078125, "learning_rate": 0.0006256664387603062, "loss": 0.2388, "step": 65294 }, { "epoch": 0.11577520503484551, "grad_norm": 0.291015625, "learning_rate": 0.0006256158583410066, "loss": 0.1467, "step": 65296 }, { "epoch": 0.11577875120015532, "grad_norm": 0.2138671875, "learning_rate": 0.0006255652799963877, "loss": 0.2359, "step": 65298 }, { "epoch": 0.11578229736546514, "grad_norm": 0.28125, "learning_rate": 0.0006255147037266706, "loss": 0.1454, "step": 65300 }, { "epoch": 0.11578584353077495, "grad_norm": 0.6328125, "learning_rate": 0.0006254641295320766, "loss": 0.1826, "step": 65302 }, { "epoch": 0.11578938969608477, "grad_norm": 0.36328125, "learning_rate": 0.0006254135574128272, "loss": 0.1867, "step": 65304 }, { "epoch": 0.11579293586139458, "grad_norm": 0.59765625, "learning_rate": 0.0006253629873691429, "loss": 0.2127, "step": 65306 }, { "epoch": 0.1157964820267044, "grad_norm": 0.2734375, "learning_rate": 0.0006253124194012453, "loss": 0.2229, "step": 65308 }, { "epoch": 0.11580002819201421, "grad_norm": 2.546875, "learning_rate": 0.0006252618535093551, "loss": 0.2494, "step": 65310 }, { "epoch": 0.11580357435732402, "grad_norm": 0.359375, "learning_rate": 0.0006252112896936946, "loss": 0.1743, "step": 65312 }, { "epoch": 0.11580712052263384, "grad_norm": 0.3984375, "learning_rate": 0.0006251607279544833, "loss": 0.1583, "step": 65314 }, { "epoch": 0.11581066668794365, "grad_norm": 0.478515625, "learning_rate": 0.0006251101682919434, "loss": 0.1271, "step": 65316 }, { "epoch": 0.11581421285325347, "grad_norm": 0.431640625, "learning_rate": 0.0006250596107062956, "loss": 0.1387, "step": 65318 }, { "epoch": 0.11581775901856328, "grad_norm": 0.828125, "learning_rate": 0.0006250090551977619, "loss": 0.1792, "step": 65320 }, { "epoch": 0.11582130518387311, "grad_norm": 0.625, "learning_rate": 0.000624958501766562, "loss": 0.1746, "step": 65322 }, { "epoch": 0.11582485134918293, "grad_norm": 1.265625, "learning_rate": 0.0006249079504129178, "loss": 0.3301, "step": 65324 }, { "epoch": 0.11582839751449274, "grad_norm": 1.1328125, "learning_rate": 0.0006248574011370505, "loss": 0.1715, "step": 65326 }, { "epoch": 0.11583194367980255, "grad_norm": 1.1484375, "learning_rate": 0.0006248068539391808, "loss": 0.1923, "step": 65328 }, { "epoch": 0.11583548984511237, "grad_norm": 0.59375, "learning_rate": 0.00062475630881953, "loss": 0.2695, "step": 65330 }, { "epoch": 0.11583903601042218, "grad_norm": 1.578125, "learning_rate": 0.0006247057657783186, "loss": 0.208, "step": 65332 }, { "epoch": 0.115842582175732, "grad_norm": 0.1875, "learning_rate": 0.0006246552248157684, "loss": 0.1567, "step": 65334 }, { "epoch": 0.11584612834104181, "grad_norm": 0.63671875, "learning_rate": 0.0006246046859321001, "loss": 0.229, "step": 65336 }, { "epoch": 0.11584967450635163, "grad_norm": 0.435546875, "learning_rate": 0.000624554149127535, "loss": 0.1828, "step": 65338 }, { "epoch": 0.11585322067166144, "grad_norm": 0.703125, "learning_rate": 0.0006245036144022936, "loss": 0.1965, "step": 65340 }, { "epoch": 0.11585676683697126, "grad_norm": 0.8046875, "learning_rate": 0.0006244530817565972, "loss": 0.1692, "step": 65342 }, { "epoch": 0.11586031300228107, "grad_norm": 0.8046875, "learning_rate": 0.000624402551190667, "loss": 0.1807, "step": 65344 }, { "epoch": 0.11586385916759089, "grad_norm": 0.384765625, "learning_rate": 0.0006243520227047237, "loss": 0.2456, "step": 65346 }, { "epoch": 0.1158674053329007, "grad_norm": 0.265625, "learning_rate": 0.0006243014962989884, "loss": 0.202, "step": 65348 }, { "epoch": 0.11587095149821051, "grad_norm": 0.376953125, "learning_rate": 0.0006242509719736817, "loss": 0.3169, "step": 65350 }, { "epoch": 0.11587449766352033, "grad_norm": 0.203125, "learning_rate": 0.0006242004497290255, "loss": 0.3041, "step": 65352 }, { "epoch": 0.11587804382883014, "grad_norm": 0.462890625, "learning_rate": 0.0006241499295652395, "loss": 0.1883, "step": 65354 }, { "epoch": 0.11588158999413996, "grad_norm": 2.375, "learning_rate": 0.0006240994114825458, "loss": 0.2748, "step": 65356 }, { "epoch": 0.11588513615944977, "grad_norm": 1.484375, "learning_rate": 0.0006240488954811642, "loss": 0.3497, "step": 65358 }, { "epoch": 0.11588868232475959, "grad_norm": 0.470703125, "learning_rate": 0.0006239983815613172, "loss": 0.168, "step": 65360 }, { "epoch": 0.1158922284900694, "grad_norm": 0.408203125, "learning_rate": 0.000623947869723224, "loss": 0.1946, "step": 65362 }, { "epoch": 0.11589577465537922, "grad_norm": 0.5078125, "learning_rate": 0.0006238973599671067, "loss": 0.2035, "step": 65364 }, { "epoch": 0.11589932082068903, "grad_norm": 0.2734375, "learning_rate": 0.0006238468522931857, "loss": 0.2416, "step": 65366 }, { "epoch": 0.11590286698599886, "grad_norm": 0.1708984375, "learning_rate": 0.0006237963467016818, "loss": 0.1596, "step": 65368 }, { "epoch": 0.11590641315130867, "grad_norm": 0.40625, "learning_rate": 0.0006237458431928167, "loss": 0.1614, "step": 65370 }, { "epoch": 0.11590995931661849, "grad_norm": 0.15625, "learning_rate": 0.0006236953417668099, "loss": 0.1734, "step": 65372 }, { "epoch": 0.1159135054819283, "grad_norm": 0.3515625, "learning_rate": 0.0006236448424238833, "loss": 0.2113, "step": 65374 }, { "epoch": 0.11591705164723812, "grad_norm": 0.27734375, "learning_rate": 0.0006235943451642575, "loss": 0.1626, "step": 65376 }, { "epoch": 0.11592059781254793, "grad_norm": 3.03125, "learning_rate": 0.0006235438499881534, "loss": 0.2433, "step": 65378 }, { "epoch": 0.11592414397785775, "grad_norm": 1.359375, "learning_rate": 0.0006234933568957916, "loss": 0.2119, "step": 65380 }, { "epoch": 0.11592769014316756, "grad_norm": 0.447265625, "learning_rate": 0.0006234428658873927, "loss": 0.1619, "step": 65382 }, { "epoch": 0.11593123630847738, "grad_norm": 0.50390625, "learning_rate": 0.0006233923769631784, "loss": 0.2435, "step": 65384 }, { "epoch": 0.11593478247378719, "grad_norm": 0.302734375, "learning_rate": 0.0006233418901233688, "loss": 0.183, "step": 65386 }, { "epoch": 0.115938328639097, "grad_norm": 0.6015625, "learning_rate": 0.0006232914053681851, "loss": 0.2734, "step": 65388 }, { "epoch": 0.11594187480440682, "grad_norm": 0.546875, "learning_rate": 0.0006232409226978474, "loss": 0.1496, "step": 65390 }, { "epoch": 0.11594542096971663, "grad_norm": 0.984375, "learning_rate": 0.0006231904421125774, "loss": 0.1853, "step": 65392 }, { "epoch": 0.11594896713502645, "grad_norm": 0.63671875, "learning_rate": 0.0006231399636125954, "loss": 0.1586, "step": 65394 }, { "epoch": 0.11595251330033626, "grad_norm": 0.578125, "learning_rate": 0.0006230894871981223, "loss": 0.1574, "step": 65396 }, { "epoch": 0.11595605946564608, "grad_norm": 0.251953125, "learning_rate": 0.0006230390128693786, "loss": 0.1749, "step": 65398 }, { "epoch": 0.11595960563095589, "grad_norm": 1.6484375, "learning_rate": 0.0006229885406265849, "loss": 0.2251, "step": 65400 }, { "epoch": 0.1159631517962657, "grad_norm": 4.71875, "learning_rate": 0.0006229380704699629, "loss": 0.2797, "step": 65402 }, { "epoch": 0.11596669796157552, "grad_norm": 0.373046875, "learning_rate": 0.0006228876023997322, "loss": 0.1597, "step": 65404 }, { "epoch": 0.11597024412688534, "grad_norm": 0.7421875, "learning_rate": 0.0006228371364161142, "loss": 0.1433, "step": 65406 }, { "epoch": 0.11597379029219515, "grad_norm": 0.396484375, "learning_rate": 0.0006227866725193289, "loss": 0.1589, "step": 65408 }, { "epoch": 0.11597733645750496, "grad_norm": 1.1015625, "learning_rate": 0.0006227362107095984, "loss": 0.1562, "step": 65410 }, { "epoch": 0.1159808826228148, "grad_norm": 0.435546875, "learning_rate": 0.0006226857509871417, "loss": 0.1639, "step": 65412 }, { "epoch": 0.11598442878812461, "grad_norm": 0.48828125, "learning_rate": 0.0006226352933521806, "loss": 0.1858, "step": 65414 }, { "epoch": 0.11598797495343442, "grad_norm": 0.330078125, "learning_rate": 0.0006225848378049355, "loss": 0.1867, "step": 65416 }, { "epoch": 0.11599152111874424, "grad_norm": 0.29296875, "learning_rate": 0.000622534384345627, "loss": 0.151, "step": 65418 }, { "epoch": 0.11599506728405405, "grad_norm": 0.2734375, "learning_rate": 0.0006224839329744757, "loss": 0.166, "step": 65420 }, { "epoch": 0.11599861344936387, "grad_norm": 1.0546875, "learning_rate": 0.0006224334836917021, "loss": 0.1824, "step": 65422 }, { "epoch": 0.11600215961467368, "grad_norm": 0.494140625, "learning_rate": 0.0006223830364975271, "loss": 0.1309, "step": 65424 }, { "epoch": 0.1160057057799835, "grad_norm": 0.79296875, "learning_rate": 0.0006223325913921714, "loss": 0.1754, "step": 65426 }, { "epoch": 0.11600925194529331, "grad_norm": 0.361328125, "learning_rate": 0.0006222821483758556, "loss": 0.1211, "step": 65428 }, { "epoch": 0.11601279811060312, "grad_norm": 0.75, "learning_rate": 0.0006222317074487997, "loss": 0.2123, "step": 65430 }, { "epoch": 0.11601634427591294, "grad_norm": 0.400390625, "learning_rate": 0.0006221812686112251, "loss": 0.2002, "step": 65432 }, { "epoch": 0.11601989044122275, "grad_norm": 0.4921875, "learning_rate": 0.0006221308318633523, "loss": 0.1455, "step": 65434 }, { "epoch": 0.11602343660653257, "grad_norm": 0.34375, "learning_rate": 0.0006220803972054015, "loss": 0.1645, "step": 65436 }, { "epoch": 0.11602698277184238, "grad_norm": 0.25, "learning_rate": 0.0006220299646375935, "loss": 0.1287, "step": 65438 }, { "epoch": 0.1160305289371522, "grad_norm": 0.267578125, "learning_rate": 0.0006219795341601484, "loss": 0.1437, "step": 65440 }, { "epoch": 0.11603407510246201, "grad_norm": 1.03125, "learning_rate": 0.0006219291057732878, "loss": 0.184, "step": 65442 }, { "epoch": 0.11603762126777183, "grad_norm": 0.3125, "learning_rate": 0.0006218786794772311, "loss": 0.1765, "step": 65444 }, { "epoch": 0.11604116743308164, "grad_norm": 0.443359375, "learning_rate": 0.0006218282552721995, "loss": 0.1818, "step": 65446 }, { "epoch": 0.11604471359839146, "grad_norm": 0.51171875, "learning_rate": 0.0006217778331584131, "loss": 0.1727, "step": 65448 }, { "epoch": 0.11604825976370127, "grad_norm": 0.36328125, "learning_rate": 0.000621727413136093, "loss": 0.1641, "step": 65450 }, { "epoch": 0.11605180592901108, "grad_norm": 1.78125, "learning_rate": 0.0006216769952054594, "loss": 0.1592, "step": 65452 }, { "epoch": 0.1160553520943209, "grad_norm": 1.0390625, "learning_rate": 0.0006216265793667327, "loss": 0.4002, "step": 65454 }, { "epoch": 0.11605889825963071, "grad_norm": 1.9921875, "learning_rate": 0.0006215761656201335, "loss": 0.2796, "step": 65456 }, { "epoch": 0.11606244442494054, "grad_norm": 0.53125, "learning_rate": 0.0006215257539658819, "loss": 0.201, "step": 65458 }, { "epoch": 0.11606599059025036, "grad_norm": 1.2421875, "learning_rate": 0.0006214753444041994, "loss": 0.1699, "step": 65460 }, { "epoch": 0.11606953675556017, "grad_norm": 0.41015625, "learning_rate": 0.0006214249369353051, "loss": 0.1476, "step": 65462 }, { "epoch": 0.11607308292086999, "grad_norm": 0.70703125, "learning_rate": 0.0006213745315594206, "loss": 0.1648, "step": 65464 }, { "epoch": 0.1160766290861798, "grad_norm": 1.6875, "learning_rate": 0.0006213241282767653, "loss": 0.2368, "step": 65466 }, { "epoch": 0.11608017525148961, "grad_norm": 0.49609375, "learning_rate": 0.0006212737270875612, "loss": 0.1606, "step": 65468 }, { "epoch": 0.11608372141679943, "grad_norm": 0.400390625, "learning_rate": 0.0006212233279920267, "loss": 0.1328, "step": 65470 }, { "epoch": 0.11608726758210924, "grad_norm": 1.515625, "learning_rate": 0.000621172930990384, "loss": 0.2956, "step": 65472 }, { "epoch": 0.11609081374741906, "grad_norm": 0.4375, "learning_rate": 0.0006211225360828524, "loss": 0.1833, "step": 65474 }, { "epoch": 0.11609435991272887, "grad_norm": 1.125, "learning_rate": 0.0006210721432696529, "loss": 0.1941, "step": 65476 }, { "epoch": 0.11609790607803869, "grad_norm": 0.328125, "learning_rate": 0.0006210217525510056, "loss": 0.1713, "step": 65478 }, { "epoch": 0.1161014522433485, "grad_norm": 0.296875, "learning_rate": 0.0006209713639271304, "loss": 0.1342, "step": 65480 }, { "epoch": 0.11610499840865832, "grad_norm": 0.5234375, "learning_rate": 0.0006209209773982488, "loss": 0.1645, "step": 65482 }, { "epoch": 0.11610854457396813, "grad_norm": 2.875, "learning_rate": 0.0006208705929645805, "loss": 0.608, "step": 65484 }, { "epoch": 0.11611209073927795, "grad_norm": 0.26171875, "learning_rate": 0.0006208202106263459, "loss": 0.1332, "step": 65486 }, { "epoch": 0.11611563690458776, "grad_norm": 0.267578125, "learning_rate": 0.000620769830383765, "loss": 0.2271, "step": 65488 }, { "epoch": 0.11611918306989757, "grad_norm": 0.63671875, "learning_rate": 0.0006207194522370587, "loss": 0.1568, "step": 65490 }, { "epoch": 0.11612272923520739, "grad_norm": 0.2734375, "learning_rate": 0.0006206690761864473, "loss": 0.179, "step": 65492 }, { "epoch": 0.1161262754005172, "grad_norm": 0.373046875, "learning_rate": 0.0006206187022321508, "loss": 0.2059, "step": 65494 }, { "epoch": 0.11612982156582702, "grad_norm": 2.546875, "learning_rate": 0.0006205683303743899, "loss": 0.3227, "step": 65496 }, { "epoch": 0.11613336773113683, "grad_norm": 0.337890625, "learning_rate": 0.000620517960613384, "loss": 0.126, "step": 65498 }, { "epoch": 0.11613691389644665, "grad_norm": 0.220703125, "learning_rate": 0.0006204675929493549, "loss": 0.1592, "step": 65500 }, { "epoch": 0.11614046006175646, "grad_norm": 1.7421875, "learning_rate": 0.0006204172273825213, "loss": 0.1762, "step": 65502 }, { "epoch": 0.11614400622706629, "grad_norm": 0.2294921875, "learning_rate": 0.0006203668639131044, "loss": 0.1988, "step": 65504 }, { "epoch": 0.1161475523923761, "grad_norm": 0.470703125, "learning_rate": 0.0006203165025413239, "loss": 0.2208, "step": 65506 }, { "epoch": 0.11615109855768592, "grad_norm": 0.2275390625, "learning_rate": 0.000620266143267401, "loss": 0.1953, "step": 65508 }, { "epoch": 0.11615464472299573, "grad_norm": 0.224609375, "learning_rate": 0.0006202157860915548, "loss": 0.1779, "step": 65510 }, { "epoch": 0.11615819088830555, "grad_norm": 0.396484375, "learning_rate": 0.0006201654310140064, "loss": 0.1758, "step": 65512 }, { "epoch": 0.11616173705361536, "grad_norm": 0.53515625, "learning_rate": 0.0006201150780349756, "loss": 0.2058, "step": 65514 }, { "epoch": 0.11616528321892518, "grad_norm": 0.4140625, "learning_rate": 0.0006200647271546826, "loss": 0.15, "step": 65516 }, { "epoch": 0.11616882938423499, "grad_norm": 0.87890625, "learning_rate": 0.0006200143783733481, "loss": 0.1637, "step": 65518 }, { "epoch": 0.1161723755495448, "grad_norm": 0.8671875, "learning_rate": 0.0006199640316911912, "loss": 0.2037, "step": 65520 }, { "epoch": 0.11617592171485462, "grad_norm": 0.6875, "learning_rate": 0.0006199136871084332, "loss": 0.154, "step": 65522 }, { "epoch": 0.11617946788016444, "grad_norm": 0.625, "learning_rate": 0.0006198633446252939, "loss": 0.166, "step": 65524 }, { "epoch": 0.11618301404547425, "grad_norm": 0.380859375, "learning_rate": 0.0006198130042419933, "loss": 0.195, "step": 65526 }, { "epoch": 0.11618656021078407, "grad_norm": 0.349609375, "learning_rate": 0.0006197626659587516, "loss": 0.2118, "step": 65528 }, { "epoch": 0.11619010637609388, "grad_norm": 0.82421875, "learning_rate": 0.0006197123297757892, "loss": 0.1901, "step": 65530 }, { "epoch": 0.1161936525414037, "grad_norm": 0.421875, "learning_rate": 0.0006196619956933263, "loss": 0.1455, "step": 65532 }, { "epoch": 0.11619719870671351, "grad_norm": 0.16796875, "learning_rate": 0.0006196116637115825, "loss": 0.2672, "step": 65534 }, { "epoch": 0.11620074487202332, "grad_norm": 0.232421875, "learning_rate": 0.0006195613338307785, "loss": 0.1974, "step": 65536 }, { "epoch": 0.11620429103733314, "grad_norm": 0.39453125, "learning_rate": 0.0006195110060511338, "loss": 0.1547, "step": 65538 }, { "epoch": 0.11620783720264295, "grad_norm": 0.388671875, "learning_rate": 0.0006194606803728691, "loss": 0.1672, "step": 65540 }, { "epoch": 0.11621138336795277, "grad_norm": 0.484375, "learning_rate": 0.0006194103567962042, "loss": 0.2162, "step": 65542 }, { "epoch": 0.11621492953326258, "grad_norm": 0.91796875, "learning_rate": 0.0006193600353213593, "loss": 0.3442, "step": 65544 }, { "epoch": 0.1162184756985724, "grad_norm": 0.57421875, "learning_rate": 0.0006193097159485544, "loss": 0.2073, "step": 65546 }, { "epoch": 0.11622202186388222, "grad_norm": 0.765625, "learning_rate": 0.0006192593986780093, "loss": 0.4319, "step": 65548 }, { "epoch": 0.11622556802919204, "grad_norm": 0.287109375, "learning_rate": 0.0006192090835099446, "loss": 0.213, "step": 65550 }, { "epoch": 0.11622911419450185, "grad_norm": 1.375, "learning_rate": 0.0006191587704445802, "loss": 0.1807, "step": 65552 }, { "epoch": 0.11623266035981167, "grad_norm": 0.2314453125, "learning_rate": 0.0006191084594821359, "loss": 0.2088, "step": 65554 }, { "epoch": 0.11623620652512148, "grad_norm": 0.470703125, "learning_rate": 0.0006190581506228316, "loss": 0.1934, "step": 65556 }, { "epoch": 0.1162397526904313, "grad_norm": 0.322265625, "learning_rate": 0.0006190078438668885, "loss": 0.19, "step": 65558 }, { "epoch": 0.11624329885574111, "grad_norm": 0.259765625, "learning_rate": 0.0006189575392145245, "loss": 0.1549, "step": 65560 }, { "epoch": 0.11624684502105093, "grad_norm": 0.267578125, "learning_rate": 0.0006189072366659616, "loss": 0.1532, "step": 65562 }, { "epoch": 0.11625039118636074, "grad_norm": 1.03125, "learning_rate": 0.0006188569362214188, "loss": 0.207, "step": 65564 }, { "epoch": 0.11625393735167056, "grad_norm": 0.58984375, "learning_rate": 0.0006188066378811163, "loss": 0.1651, "step": 65566 }, { "epoch": 0.11625748351698037, "grad_norm": 0.96484375, "learning_rate": 0.0006187563416452741, "loss": 0.2254, "step": 65568 }, { "epoch": 0.11626102968229018, "grad_norm": 0.65625, "learning_rate": 0.0006187060475141118, "loss": 0.2517, "step": 65570 }, { "epoch": 0.1162645758476, "grad_norm": 0.40234375, "learning_rate": 0.0006186557554878498, "loss": 0.1944, "step": 65572 }, { "epoch": 0.11626812201290981, "grad_norm": 0.1787109375, "learning_rate": 0.0006186054655667082, "loss": 0.1609, "step": 65574 }, { "epoch": 0.11627166817821963, "grad_norm": 0.47265625, "learning_rate": 0.0006185551777509065, "loss": 0.1956, "step": 65576 }, { "epoch": 0.11627521434352944, "grad_norm": 0.83203125, "learning_rate": 0.0006185048920406644, "loss": 0.1799, "step": 65578 }, { "epoch": 0.11627876050883926, "grad_norm": 0.42578125, "learning_rate": 0.0006184546084362027, "loss": 0.1751, "step": 65580 }, { "epoch": 0.11628230667414907, "grad_norm": 0.236328125, "learning_rate": 0.0006184043269377408, "loss": 0.1916, "step": 65582 }, { "epoch": 0.11628585283945889, "grad_norm": 0.349609375, "learning_rate": 0.0006183540475454988, "loss": 0.1502, "step": 65584 }, { "epoch": 0.1162893990047687, "grad_norm": 0.515625, "learning_rate": 0.000618303770259696, "loss": 0.1482, "step": 65586 }, { "epoch": 0.11629294517007852, "grad_norm": 0.421875, "learning_rate": 0.0006182534950805528, "loss": 0.2343, "step": 65588 }, { "epoch": 0.11629649133538833, "grad_norm": 0.40234375, "learning_rate": 0.0006182032220082894, "loss": 0.1618, "step": 65590 }, { "epoch": 0.11630003750069814, "grad_norm": 1.75, "learning_rate": 0.0006181529510431247, "loss": 0.2319, "step": 65592 }, { "epoch": 0.11630358366600797, "grad_norm": 0.345703125, "learning_rate": 0.0006181026821852793, "loss": 0.1745, "step": 65594 }, { "epoch": 0.11630712983131779, "grad_norm": 0.318359375, "learning_rate": 0.0006180524154349726, "loss": 0.1606, "step": 65596 }, { "epoch": 0.1163106759966276, "grad_norm": 0.271484375, "learning_rate": 0.0006180021507924254, "loss": 0.1464, "step": 65598 }, { "epoch": 0.11631422216193742, "grad_norm": 0.3515625, "learning_rate": 0.0006179518882578561, "loss": 0.2138, "step": 65600 }, { "epoch": 0.11631776832724723, "grad_norm": 0.56640625, "learning_rate": 0.0006179016278314855, "loss": 0.2216, "step": 65602 }, { "epoch": 0.11632131449255705, "grad_norm": 0.32421875, "learning_rate": 0.0006178513695135333, "loss": 0.18, "step": 65604 }, { "epoch": 0.11632486065786686, "grad_norm": 0.28125, "learning_rate": 0.0006178011133042189, "loss": 0.1291, "step": 65606 }, { "epoch": 0.11632840682317667, "grad_norm": 0.30859375, "learning_rate": 0.0006177508592037625, "loss": 0.1778, "step": 65608 }, { "epoch": 0.11633195298848649, "grad_norm": 0.21484375, "learning_rate": 0.0006177006072123835, "loss": 0.1364, "step": 65610 }, { "epoch": 0.1163354991537963, "grad_norm": 0.2392578125, "learning_rate": 0.000617650357330302, "loss": 0.1767, "step": 65612 }, { "epoch": 0.11633904531910612, "grad_norm": 0.31640625, "learning_rate": 0.0006176001095577375, "loss": 0.2158, "step": 65614 }, { "epoch": 0.11634259148441593, "grad_norm": 0.6953125, "learning_rate": 0.0006175498638949104, "loss": 0.2231, "step": 65616 }, { "epoch": 0.11634613764972575, "grad_norm": 0.640625, "learning_rate": 0.0006174996203420395, "loss": 0.3931, "step": 65618 }, { "epoch": 0.11634968381503556, "grad_norm": 0.3125, "learning_rate": 0.0006174493788993451, "loss": 0.1964, "step": 65620 }, { "epoch": 0.11635322998034538, "grad_norm": 0.474609375, "learning_rate": 0.0006173991395670471, "loss": 0.1641, "step": 65622 }, { "epoch": 0.11635677614565519, "grad_norm": 0.283203125, "learning_rate": 0.0006173489023453645, "loss": 0.1992, "step": 65624 }, { "epoch": 0.116360322310965, "grad_norm": 1.0390625, "learning_rate": 0.0006172986672345179, "loss": 0.2311, "step": 65626 }, { "epoch": 0.11636386847627482, "grad_norm": 0.486328125, "learning_rate": 0.000617248434234726, "loss": 0.2348, "step": 65628 }, { "epoch": 0.11636741464158463, "grad_norm": 1.1171875, "learning_rate": 0.0006171982033462093, "loss": 0.2292, "step": 65630 }, { "epoch": 0.11637096080689445, "grad_norm": 0.455078125, "learning_rate": 0.0006171479745691873, "loss": 0.2589, "step": 65632 }, { "epoch": 0.11637450697220426, "grad_norm": 0.2275390625, "learning_rate": 0.0006170977479038796, "loss": 0.2328, "step": 65634 }, { "epoch": 0.11637805313751408, "grad_norm": 0.52734375, "learning_rate": 0.0006170475233505055, "loss": 0.1735, "step": 65636 }, { "epoch": 0.1163815993028239, "grad_norm": 0.2734375, "learning_rate": 0.0006169973009092853, "loss": 0.1606, "step": 65638 }, { "epoch": 0.11638514546813372, "grad_norm": 0.375, "learning_rate": 0.0006169470805804384, "loss": 0.1847, "step": 65640 }, { "epoch": 0.11638869163344354, "grad_norm": 0.75, "learning_rate": 0.0006168968623641844, "loss": 0.1735, "step": 65642 }, { "epoch": 0.11639223779875335, "grad_norm": 0.359375, "learning_rate": 0.0006168466462607427, "loss": 0.1722, "step": 65644 }, { "epoch": 0.11639578396406317, "grad_norm": 0.50390625, "learning_rate": 0.000616796432270333, "loss": 0.1905, "step": 65646 }, { "epoch": 0.11639933012937298, "grad_norm": 0.296875, "learning_rate": 0.0006167462203931758, "loss": 0.2204, "step": 65648 }, { "epoch": 0.1164028762946828, "grad_norm": 0.4765625, "learning_rate": 0.000616696010629489, "loss": 0.165, "step": 65650 }, { "epoch": 0.11640642245999261, "grad_norm": 0.5390625, "learning_rate": 0.0006166458029794934, "loss": 0.1571, "step": 65652 }, { "epoch": 0.11640996862530242, "grad_norm": 0.515625, "learning_rate": 0.0006165955974434083, "loss": 0.148, "step": 65654 }, { "epoch": 0.11641351479061224, "grad_norm": 0.6875, "learning_rate": 0.0006165453940214536, "loss": 0.1699, "step": 65656 }, { "epoch": 0.11641706095592205, "grad_norm": 1.0078125, "learning_rate": 0.000616495192713848, "loss": 0.2786, "step": 65658 }, { "epoch": 0.11642060712123187, "grad_norm": 0.26171875, "learning_rate": 0.0006164449935208119, "loss": 0.131, "step": 65660 }, { "epoch": 0.11642415328654168, "grad_norm": 0.337890625, "learning_rate": 0.0006163947964425646, "loss": 0.198, "step": 65662 }, { "epoch": 0.1164276994518515, "grad_norm": 0.9453125, "learning_rate": 0.0006163446014793254, "loss": 0.1742, "step": 65664 }, { "epoch": 0.11643124561716131, "grad_norm": 0.51953125, "learning_rate": 0.000616294408631314, "loss": 0.1591, "step": 65666 }, { "epoch": 0.11643479178247113, "grad_norm": 0.396484375, "learning_rate": 0.0006162442178987498, "loss": 0.1946, "step": 65668 }, { "epoch": 0.11643833794778094, "grad_norm": 0.388671875, "learning_rate": 0.0006161940292818523, "loss": 0.1624, "step": 65670 }, { "epoch": 0.11644188411309075, "grad_norm": 0.2373046875, "learning_rate": 0.0006161438427808413, "loss": 0.1512, "step": 65672 }, { "epoch": 0.11644543027840057, "grad_norm": 0.6875, "learning_rate": 0.0006160936583959361, "loss": 0.286, "step": 65674 }, { "epoch": 0.11644897644371038, "grad_norm": 0.54296875, "learning_rate": 0.0006160434761273559, "loss": 0.1731, "step": 65676 }, { "epoch": 0.1164525226090202, "grad_norm": 0.3203125, "learning_rate": 0.0006159932959753205, "loss": 0.1778, "step": 65678 }, { "epoch": 0.11645606877433001, "grad_norm": 0.490234375, "learning_rate": 0.0006159431179400494, "loss": 0.1674, "step": 65680 }, { "epoch": 0.11645961493963983, "grad_norm": 2.234375, "learning_rate": 0.0006158929420217621, "loss": 0.2438, "step": 65682 }, { "epoch": 0.11646316110494966, "grad_norm": 0.451171875, "learning_rate": 0.0006158427682206776, "loss": 0.1894, "step": 65684 }, { "epoch": 0.11646670727025947, "grad_norm": 0.2265625, "learning_rate": 0.0006157925965370155, "loss": 0.1708, "step": 65686 }, { "epoch": 0.11647025343556928, "grad_norm": 0.27734375, "learning_rate": 0.0006157424269709958, "loss": 0.1516, "step": 65688 }, { "epoch": 0.1164737996008791, "grad_norm": 0.44921875, "learning_rate": 0.000615692259522837, "loss": 0.1532, "step": 65690 }, { "epoch": 0.11647734576618891, "grad_norm": 0.365234375, "learning_rate": 0.000615642094192759, "loss": 0.1894, "step": 65692 }, { "epoch": 0.11648089193149873, "grad_norm": 0.3828125, "learning_rate": 0.000615591930980981, "loss": 0.1486, "step": 65694 }, { "epoch": 0.11648443809680854, "grad_norm": 0.365234375, "learning_rate": 0.0006155417698877231, "loss": 0.1765, "step": 65696 }, { "epoch": 0.11648798426211836, "grad_norm": 0.5546875, "learning_rate": 0.0006154916109132034, "loss": 0.1547, "step": 65698 }, { "epoch": 0.11649153042742817, "grad_norm": 0.333984375, "learning_rate": 0.0006154414540576423, "loss": 0.2201, "step": 65700 }, { "epoch": 0.11649507659273799, "grad_norm": 0.57421875, "learning_rate": 0.0006153912993212588, "loss": 0.2078, "step": 65702 }, { "epoch": 0.1164986227580478, "grad_norm": 0.384765625, "learning_rate": 0.000615341146704272, "loss": 0.2009, "step": 65704 }, { "epoch": 0.11650216892335762, "grad_norm": 0.734375, "learning_rate": 0.000615290996206902, "loss": 0.1417, "step": 65706 }, { "epoch": 0.11650571508866743, "grad_norm": 0.404296875, "learning_rate": 0.0006152408478293672, "loss": 0.1781, "step": 65708 }, { "epoch": 0.11650926125397724, "grad_norm": 0.28515625, "learning_rate": 0.0006151907015718875, "loss": 0.2071, "step": 65710 }, { "epoch": 0.11651280741928706, "grad_norm": 0.5234375, "learning_rate": 0.0006151405574346818, "loss": 0.3059, "step": 65712 }, { "epoch": 0.11651635358459687, "grad_norm": 0.8203125, "learning_rate": 0.0006150904154179703, "loss": 0.2246, "step": 65714 }, { "epoch": 0.11651989974990669, "grad_norm": 0.22265625, "learning_rate": 0.0006150402755219711, "loss": 0.1394, "step": 65716 }, { "epoch": 0.1165234459152165, "grad_norm": 0.3046875, "learning_rate": 0.0006149901377469042, "loss": 0.1802, "step": 65718 }, { "epoch": 0.11652699208052632, "grad_norm": 0.26953125, "learning_rate": 0.0006149400020929889, "loss": 0.1477, "step": 65720 }, { "epoch": 0.11653053824583613, "grad_norm": 0.21484375, "learning_rate": 0.000614889868560444, "loss": 0.1446, "step": 65722 }, { "epoch": 0.11653408441114595, "grad_norm": 0.404296875, "learning_rate": 0.0006148397371494893, "loss": 0.1571, "step": 65724 }, { "epoch": 0.11653763057645576, "grad_norm": 1.15625, "learning_rate": 0.0006147896078603434, "loss": 0.3228, "step": 65726 }, { "epoch": 0.11654117674176558, "grad_norm": 0.314453125, "learning_rate": 0.000614739480693226, "loss": 0.2077, "step": 65728 }, { "epoch": 0.1165447229070754, "grad_norm": 0.32421875, "learning_rate": 0.0006146893556483564, "loss": 0.1906, "step": 65730 }, { "epoch": 0.11654826907238522, "grad_norm": 0.609375, "learning_rate": 0.0006146392327259538, "loss": 0.2093, "step": 65732 }, { "epoch": 0.11655181523769503, "grad_norm": 1.3125, "learning_rate": 0.0006145891119262371, "loss": 0.1847, "step": 65734 }, { "epoch": 0.11655536140300485, "grad_norm": 0.470703125, "learning_rate": 0.0006145389932494254, "loss": 0.1766, "step": 65736 }, { "epoch": 0.11655890756831466, "grad_norm": 0.396484375, "learning_rate": 0.0006144888766957388, "loss": 0.1929, "step": 65738 }, { "epoch": 0.11656245373362448, "grad_norm": 0.4140625, "learning_rate": 0.0006144387622653951, "loss": 0.1698, "step": 65740 }, { "epoch": 0.11656599989893429, "grad_norm": 0.345703125, "learning_rate": 0.0006143886499586147, "loss": 0.1371, "step": 65742 }, { "epoch": 0.1165695460642441, "grad_norm": 0.373046875, "learning_rate": 0.0006143385397756158, "loss": 0.1921, "step": 65744 }, { "epoch": 0.11657309222955392, "grad_norm": 0.2255859375, "learning_rate": 0.0006142884317166188, "loss": 0.1636, "step": 65746 }, { "epoch": 0.11657663839486374, "grad_norm": 0.2578125, "learning_rate": 0.0006142383257818412, "loss": 0.1477, "step": 65748 }, { "epoch": 0.11658018456017355, "grad_norm": 0.34375, "learning_rate": 0.0006141882219715036, "loss": 0.1591, "step": 65750 }, { "epoch": 0.11658373072548336, "grad_norm": 0.2060546875, "learning_rate": 0.0006141381202858242, "loss": 0.209, "step": 65752 }, { "epoch": 0.11658727689079318, "grad_norm": 0.23828125, "learning_rate": 0.0006140880207250228, "loss": 0.1573, "step": 65754 }, { "epoch": 0.116590823056103, "grad_norm": 0.2158203125, "learning_rate": 0.0006140379232893179, "loss": 0.1779, "step": 65756 }, { "epoch": 0.11659436922141281, "grad_norm": 1.8359375, "learning_rate": 0.0006139878279789286, "loss": 0.2349, "step": 65758 }, { "epoch": 0.11659791538672262, "grad_norm": 0.79296875, "learning_rate": 0.0006139377347940745, "loss": 0.1938, "step": 65760 }, { "epoch": 0.11660146155203244, "grad_norm": 0.59765625, "learning_rate": 0.0006138876437349745, "loss": 0.1993, "step": 65762 }, { "epoch": 0.11660500771734225, "grad_norm": 0.3046875, "learning_rate": 0.0006138375548018476, "loss": 0.2048, "step": 65764 }, { "epoch": 0.11660855388265207, "grad_norm": 0.6953125, "learning_rate": 0.0006137874679949122, "loss": 0.1643, "step": 65766 }, { "epoch": 0.11661210004796188, "grad_norm": 0.30078125, "learning_rate": 0.0006137373833143886, "loss": 0.2082, "step": 65768 }, { "epoch": 0.1166156462132717, "grad_norm": 0.73046875, "learning_rate": 0.0006136873007604953, "loss": 0.1749, "step": 65770 }, { "epoch": 0.11661919237858151, "grad_norm": 1.125, "learning_rate": 0.000613637220333451, "loss": 0.2367, "step": 65772 }, { "epoch": 0.11662273854389132, "grad_norm": 0.6171875, "learning_rate": 0.0006135871420334753, "loss": 0.1747, "step": 65774 }, { "epoch": 0.11662628470920115, "grad_norm": 0.67578125, "learning_rate": 0.0006135370658607864, "loss": 0.1773, "step": 65776 }, { "epoch": 0.11662983087451097, "grad_norm": 0.1611328125, "learning_rate": 0.0006134869918156045, "loss": 0.1854, "step": 65778 }, { "epoch": 0.11663337703982078, "grad_norm": 0.35546875, "learning_rate": 0.0006134369198981471, "loss": 0.2329, "step": 65780 }, { "epoch": 0.1166369232051306, "grad_norm": 0.27734375, "learning_rate": 0.0006133868501086345, "loss": 0.205, "step": 65782 }, { "epoch": 0.11664046937044041, "grad_norm": 0.42578125, "learning_rate": 0.0006133367824472846, "loss": 0.2084, "step": 65784 }, { "epoch": 0.11664401553575023, "grad_norm": 0.78125, "learning_rate": 0.0006132867169143175, "loss": 0.1146, "step": 65786 }, { "epoch": 0.11664756170106004, "grad_norm": 0.41796875, "learning_rate": 0.0006132366535099514, "loss": 0.1414, "step": 65788 }, { "epoch": 0.11665110786636985, "grad_norm": 0.2734375, "learning_rate": 0.0006131865922344056, "loss": 0.1565, "step": 65790 }, { "epoch": 0.11665465403167967, "grad_norm": 0.625, "learning_rate": 0.0006131365330878987, "loss": 0.2039, "step": 65792 }, { "epoch": 0.11665820019698948, "grad_norm": 0.60546875, "learning_rate": 0.0006130864760706496, "loss": 0.1887, "step": 65794 }, { "epoch": 0.1166617463622993, "grad_norm": 0.484375, "learning_rate": 0.000613036421182878, "loss": 0.2562, "step": 65796 }, { "epoch": 0.11666529252760911, "grad_norm": 0.259765625, "learning_rate": 0.0006129863684248017, "loss": 0.2871, "step": 65798 }, { "epoch": 0.11666883869291893, "grad_norm": 0.91015625, "learning_rate": 0.0006129363177966403, "loss": 0.4318, "step": 65800 }, { "epoch": 0.11667238485822874, "grad_norm": 0.16796875, "learning_rate": 0.0006128862692986124, "loss": 0.1511, "step": 65802 }, { "epoch": 0.11667593102353856, "grad_norm": 0.466796875, "learning_rate": 0.0006128362229309374, "loss": 0.1713, "step": 65804 }, { "epoch": 0.11667947718884837, "grad_norm": 0.51953125, "learning_rate": 0.0006127861786938333, "loss": 0.171, "step": 65806 }, { "epoch": 0.11668302335415819, "grad_norm": 0.30859375, "learning_rate": 0.0006127361365875198, "loss": 0.1837, "step": 65808 }, { "epoch": 0.116686569519468, "grad_norm": 0.193359375, "learning_rate": 0.0006126860966122153, "loss": 0.2092, "step": 65810 }, { "epoch": 0.11669011568477781, "grad_norm": 0.1865234375, "learning_rate": 0.0006126360587681387, "loss": 0.1311, "step": 65812 }, { "epoch": 0.11669366185008763, "grad_norm": 0.408203125, "learning_rate": 0.000612586023055509, "loss": 0.2355, "step": 65814 }, { "epoch": 0.11669720801539744, "grad_norm": 0.59375, "learning_rate": 0.0006125359894745446, "loss": 0.3376, "step": 65816 }, { "epoch": 0.11670075418070726, "grad_norm": 3.4375, "learning_rate": 0.0006124859580254649, "loss": 0.2802, "step": 65818 }, { "epoch": 0.11670430034601709, "grad_norm": 0.212890625, "learning_rate": 0.0006124359287084884, "loss": 0.1503, "step": 65820 }, { "epoch": 0.1167078465113269, "grad_norm": 0.376953125, "learning_rate": 0.0006123859015238341, "loss": 0.216, "step": 65822 }, { "epoch": 0.11671139267663672, "grad_norm": 0.1962890625, "learning_rate": 0.0006123358764717202, "loss": 0.1755, "step": 65824 }, { "epoch": 0.11671493884194653, "grad_norm": 0.337890625, "learning_rate": 0.0006122858535523661, "loss": 0.1146, "step": 65826 }, { "epoch": 0.11671848500725635, "grad_norm": 0.2158203125, "learning_rate": 0.0006122358327659906, "loss": 0.1686, "step": 65828 }, { "epoch": 0.11672203117256616, "grad_norm": 0.240234375, "learning_rate": 0.000612185814112812, "loss": 0.1382, "step": 65830 }, { "epoch": 0.11672557733787597, "grad_norm": 0.60546875, "learning_rate": 0.0006121357975930496, "loss": 0.172, "step": 65832 }, { "epoch": 0.11672912350318579, "grad_norm": 0.2333984375, "learning_rate": 0.0006120857832069214, "loss": 0.1989, "step": 65834 }, { "epoch": 0.1167326696684956, "grad_norm": 0.302734375, "learning_rate": 0.000612035770954647, "loss": 0.2522, "step": 65836 }, { "epoch": 0.11673621583380542, "grad_norm": 0.1787109375, "learning_rate": 0.0006119857608364443, "loss": 0.1451, "step": 65838 }, { "epoch": 0.11673976199911523, "grad_norm": 1.2421875, "learning_rate": 0.0006119357528525326, "loss": 0.228, "step": 65840 }, { "epoch": 0.11674330816442505, "grad_norm": 1.8359375, "learning_rate": 0.0006118857470031301, "loss": 0.4159, "step": 65842 }, { "epoch": 0.11674685432973486, "grad_norm": 0.451171875, "learning_rate": 0.0006118357432884565, "loss": 0.2037, "step": 65844 }, { "epoch": 0.11675040049504468, "grad_norm": 0.55078125, "learning_rate": 0.0006117857417087292, "loss": 0.5178, "step": 65846 }, { "epoch": 0.11675394666035449, "grad_norm": 1.03125, "learning_rate": 0.0006117357422641676, "loss": 0.1547, "step": 65848 }, { "epoch": 0.1167574928256643, "grad_norm": 1.171875, "learning_rate": 0.0006116857449549905, "loss": 0.2091, "step": 65850 }, { "epoch": 0.11676103899097412, "grad_norm": 0.26953125, "learning_rate": 0.0006116357497814162, "loss": 0.1304, "step": 65852 }, { "epoch": 0.11676458515628393, "grad_norm": 0.34375, "learning_rate": 0.0006115857567436636, "loss": 0.16, "step": 65854 }, { "epoch": 0.11676813132159375, "grad_norm": 0.359375, "learning_rate": 0.0006115357658419506, "loss": 0.2139, "step": 65856 }, { "epoch": 0.11677167748690356, "grad_norm": 0.390625, "learning_rate": 0.0006114857770764968, "loss": 0.1703, "step": 65858 }, { "epoch": 0.11677522365221338, "grad_norm": 0.484375, "learning_rate": 0.0006114357904475205, "loss": 0.1576, "step": 65860 }, { "epoch": 0.11677876981752319, "grad_norm": 0.3828125, "learning_rate": 0.0006113858059552404, "loss": 0.1867, "step": 65862 }, { "epoch": 0.11678231598283301, "grad_norm": 0.29296875, "learning_rate": 0.0006113358235998745, "loss": 0.1935, "step": 65864 }, { "epoch": 0.11678586214814284, "grad_norm": 0.376953125, "learning_rate": 0.0006112858433816421, "loss": 0.2357, "step": 65866 }, { "epoch": 0.11678940831345265, "grad_norm": 0.349609375, "learning_rate": 0.0006112358653007617, "loss": 0.1753, "step": 65868 }, { "epoch": 0.11679295447876246, "grad_norm": 0.388671875, "learning_rate": 0.0006111858893574518, "loss": 0.1764, "step": 65870 }, { "epoch": 0.11679650064407228, "grad_norm": 3.875, "learning_rate": 0.0006111359155519306, "loss": 0.2284, "step": 65872 }, { "epoch": 0.1168000468093821, "grad_norm": 0.27734375, "learning_rate": 0.0006110859438844167, "loss": 0.2016, "step": 65874 }, { "epoch": 0.11680359297469191, "grad_norm": 0.2177734375, "learning_rate": 0.0006110359743551294, "loss": 0.1894, "step": 65876 }, { "epoch": 0.11680713914000172, "grad_norm": 1.03125, "learning_rate": 0.0006109860069642866, "loss": 0.3768, "step": 65878 }, { "epoch": 0.11681068530531154, "grad_norm": 0.96484375, "learning_rate": 0.0006109360417121068, "loss": 0.3455, "step": 65880 }, { "epoch": 0.11681423147062135, "grad_norm": 4.125, "learning_rate": 0.0006108860785988086, "loss": 0.2198, "step": 65882 }, { "epoch": 0.11681777763593117, "grad_norm": 0.25390625, "learning_rate": 0.0006108361176246109, "loss": 0.1373, "step": 65884 }, { "epoch": 0.11682132380124098, "grad_norm": 0.275390625, "learning_rate": 0.0006107861587897317, "loss": 0.115, "step": 65886 }, { "epoch": 0.1168248699665508, "grad_norm": 0.416015625, "learning_rate": 0.0006107362020943897, "loss": 0.1235, "step": 65888 }, { "epoch": 0.11682841613186061, "grad_norm": 2.453125, "learning_rate": 0.0006106862475388034, "loss": 0.3617, "step": 65890 }, { "epoch": 0.11683196229717042, "grad_norm": 0.7890625, "learning_rate": 0.000610636295123191, "loss": 0.2445, "step": 65892 }, { "epoch": 0.11683550846248024, "grad_norm": 0.60546875, "learning_rate": 0.0006105863448477716, "loss": 0.1871, "step": 65894 }, { "epoch": 0.11683905462779005, "grad_norm": 0.2890625, "learning_rate": 0.0006105363967127629, "loss": 0.1619, "step": 65896 }, { "epoch": 0.11684260079309987, "grad_norm": 0.80078125, "learning_rate": 0.0006104864507183839, "loss": 0.1922, "step": 65898 }, { "epoch": 0.11684614695840968, "grad_norm": 1.859375, "learning_rate": 0.0006104365068648526, "loss": 0.2988, "step": 65900 }, { "epoch": 0.1168496931237195, "grad_norm": 0.48828125, "learning_rate": 0.0006103865651523879, "loss": 0.2107, "step": 65902 }, { "epoch": 0.11685323928902931, "grad_norm": 0.396484375, "learning_rate": 0.000610336625581208, "loss": 0.1558, "step": 65904 }, { "epoch": 0.11685678545433913, "grad_norm": 0.361328125, "learning_rate": 0.0006102866881515308, "loss": 0.1605, "step": 65906 }, { "epoch": 0.11686033161964894, "grad_norm": 0.44140625, "learning_rate": 0.0006102367528635754, "loss": 0.163, "step": 65908 }, { "epoch": 0.11686387778495876, "grad_norm": 0.82421875, "learning_rate": 0.0006101868197175602, "loss": 0.3027, "step": 65910 }, { "epoch": 0.11686742395026858, "grad_norm": 0.53515625, "learning_rate": 0.000610136888713703, "loss": 0.1535, "step": 65912 }, { "epoch": 0.1168709701155784, "grad_norm": 0.2109375, "learning_rate": 0.0006100869598522225, "loss": 0.1355, "step": 65914 }, { "epoch": 0.11687451628088821, "grad_norm": 1.171875, "learning_rate": 0.0006100370331333371, "loss": 0.3045, "step": 65916 }, { "epoch": 0.11687806244619803, "grad_norm": 0.4453125, "learning_rate": 0.0006099871085572655, "loss": 0.1646, "step": 65918 }, { "epoch": 0.11688160861150784, "grad_norm": 0.4453125, "learning_rate": 0.0006099371861242254, "loss": 0.2033, "step": 65920 }, { "epoch": 0.11688515477681766, "grad_norm": 0.2216796875, "learning_rate": 0.0006098872658344354, "loss": 0.1768, "step": 65922 }, { "epoch": 0.11688870094212747, "grad_norm": 0.189453125, "learning_rate": 0.0006098373476881134, "loss": 0.2861, "step": 65924 }, { "epoch": 0.11689224710743729, "grad_norm": 0.38671875, "learning_rate": 0.000609787431685479, "loss": 0.1747, "step": 65926 }, { "epoch": 0.1168957932727471, "grad_norm": 0.298828125, "learning_rate": 0.0006097375178267488, "loss": 0.1693, "step": 65928 }, { "epoch": 0.11689933943805692, "grad_norm": 0.9296875, "learning_rate": 0.0006096876061121424, "loss": 0.1812, "step": 65930 }, { "epoch": 0.11690288560336673, "grad_norm": 0.349609375, "learning_rate": 0.000609637696541877, "loss": 0.1669, "step": 65932 }, { "epoch": 0.11690643176867654, "grad_norm": 0.248046875, "learning_rate": 0.0006095877891161723, "loss": 0.1661, "step": 65934 }, { "epoch": 0.11690997793398636, "grad_norm": 0.66015625, "learning_rate": 0.0006095378838352451, "loss": 0.2186, "step": 65936 }, { "epoch": 0.11691352409929617, "grad_norm": 0.1787109375, "learning_rate": 0.0006094879806993146, "loss": 0.3754, "step": 65938 }, { "epoch": 0.11691707026460599, "grad_norm": 0.41796875, "learning_rate": 0.000609438079708599, "loss": 0.203, "step": 65940 }, { "epoch": 0.1169206164299158, "grad_norm": 0.1572265625, "learning_rate": 0.000609388180863316, "loss": 0.159, "step": 65942 }, { "epoch": 0.11692416259522562, "grad_norm": 0.322265625, "learning_rate": 0.0006093382841636841, "loss": 0.1917, "step": 65944 }, { "epoch": 0.11692770876053543, "grad_norm": 1.2578125, "learning_rate": 0.0006092883896099213, "loss": 0.5539, "step": 65946 }, { "epoch": 0.11693125492584525, "grad_norm": 0.384765625, "learning_rate": 0.0006092384972022463, "loss": 0.1283, "step": 65948 }, { "epoch": 0.11693480109115506, "grad_norm": 0.54296875, "learning_rate": 0.0006091886069408768, "loss": 0.2784, "step": 65950 }, { "epoch": 0.11693834725646488, "grad_norm": 0.72265625, "learning_rate": 0.000609138718826032, "loss": 0.1615, "step": 65952 }, { "epoch": 0.11694189342177469, "grad_norm": 0.1982421875, "learning_rate": 0.0006090888328579285, "loss": 0.1581, "step": 65954 }, { "epoch": 0.11694543958708452, "grad_norm": 1.015625, "learning_rate": 0.0006090389490367856, "loss": 0.2459, "step": 65956 }, { "epoch": 0.11694898575239433, "grad_norm": 0.283203125, "learning_rate": 0.0006089890673628212, "loss": 0.1885, "step": 65958 }, { "epoch": 0.11695253191770415, "grad_norm": 0.53515625, "learning_rate": 0.0006089391878362534, "loss": 0.1828, "step": 65960 }, { "epoch": 0.11695607808301396, "grad_norm": 0.42578125, "learning_rate": 0.0006088893104573003, "loss": 0.1786, "step": 65962 }, { "epoch": 0.11695962424832378, "grad_norm": 0.2734375, "learning_rate": 0.0006088394352261799, "loss": 0.1538, "step": 65964 }, { "epoch": 0.11696317041363359, "grad_norm": 0.376953125, "learning_rate": 0.0006087895621431108, "loss": 0.1608, "step": 65966 }, { "epoch": 0.1169667165789434, "grad_norm": 0.3359375, "learning_rate": 0.0006087396912083107, "loss": 0.179, "step": 65968 }, { "epoch": 0.11697026274425322, "grad_norm": 0.9609375, "learning_rate": 0.0006086898224219978, "loss": 0.2298, "step": 65970 }, { "epoch": 0.11697380890956303, "grad_norm": 0.26953125, "learning_rate": 0.00060863995578439, "loss": 0.1619, "step": 65972 }, { "epoch": 0.11697735507487285, "grad_norm": 0.267578125, "learning_rate": 0.0006085900912957061, "loss": 0.1444, "step": 65974 }, { "epoch": 0.11698090124018266, "grad_norm": 0.326171875, "learning_rate": 0.0006085402289561635, "loss": 0.1893, "step": 65976 }, { "epoch": 0.11698444740549248, "grad_norm": 0.2109375, "learning_rate": 0.0006084903687659805, "loss": 0.1696, "step": 65978 }, { "epoch": 0.11698799357080229, "grad_norm": 0.3828125, "learning_rate": 0.0006084405107253752, "loss": 0.134, "step": 65980 }, { "epoch": 0.11699153973611211, "grad_norm": 0.45703125, "learning_rate": 0.000608390654834565, "loss": 0.1485, "step": 65982 }, { "epoch": 0.11699508590142192, "grad_norm": 0.46875, "learning_rate": 0.0006083408010937694, "loss": 0.2316, "step": 65984 }, { "epoch": 0.11699863206673174, "grad_norm": 0.234375, "learning_rate": 0.000608290949503205, "loss": 0.1766, "step": 65986 }, { "epoch": 0.11700217823204155, "grad_norm": 1.3359375, "learning_rate": 0.0006082411000630906, "loss": 0.2024, "step": 65988 }, { "epoch": 0.11700572439735137, "grad_norm": 0.1376953125, "learning_rate": 0.0006081912527736435, "loss": 0.1783, "step": 65990 }, { "epoch": 0.11700927056266118, "grad_norm": 1.0625, "learning_rate": 0.0006081414076350829, "loss": 0.1699, "step": 65992 }, { "epoch": 0.117012816727971, "grad_norm": 0.400390625, "learning_rate": 0.0006080915646476255, "loss": 0.1856, "step": 65994 }, { "epoch": 0.11701636289328081, "grad_norm": 0.392578125, "learning_rate": 0.0006080417238114902, "loss": 0.173, "step": 65996 }, { "epoch": 0.11701990905859062, "grad_norm": 0.42578125, "learning_rate": 0.0006079918851268944, "loss": 0.1481, "step": 65998 }, { "epoch": 0.11702345522390044, "grad_norm": 0.7421875, "learning_rate": 0.0006079420485940565, "loss": 0.1811, "step": 66000 }, { "epoch": 0.11702700138921027, "grad_norm": 0.5078125, "learning_rate": 0.0006078922142131945, "loss": 0.1789, "step": 66002 }, { "epoch": 0.11703054755452008, "grad_norm": 0.55078125, "learning_rate": 0.0006078423819845253, "loss": 0.2165, "step": 66004 }, { "epoch": 0.1170340937198299, "grad_norm": 0.373046875, "learning_rate": 0.0006077925519082683, "loss": 0.126, "step": 66006 }, { "epoch": 0.11703763988513971, "grad_norm": 0.37109375, "learning_rate": 0.0006077427239846406, "loss": 0.1854, "step": 66008 }, { "epoch": 0.11704118605044952, "grad_norm": 0.4609375, "learning_rate": 0.0006076928982138603, "loss": 0.1456, "step": 66010 }, { "epoch": 0.11704473221575934, "grad_norm": 0.267578125, "learning_rate": 0.0006076430745961451, "loss": 0.1454, "step": 66012 }, { "epoch": 0.11704827838106915, "grad_norm": 0.24609375, "learning_rate": 0.0006075932531317132, "loss": 0.1904, "step": 66014 }, { "epoch": 0.11705182454637897, "grad_norm": 0.40234375, "learning_rate": 0.0006075434338207826, "loss": 0.1836, "step": 66016 }, { "epoch": 0.11705537071168878, "grad_norm": 0.3125, "learning_rate": 0.000607493616663571, "loss": 0.1646, "step": 66018 }, { "epoch": 0.1170589168769986, "grad_norm": 0.32421875, "learning_rate": 0.0006074438016602961, "loss": 0.1879, "step": 66020 }, { "epoch": 0.11706246304230841, "grad_norm": 0.5078125, "learning_rate": 0.0006073939888111756, "loss": 0.195, "step": 66022 }, { "epoch": 0.11706600920761823, "grad_norm": 0.44921875, "learning_rate": 0.0006073441781164284, "loss": 0.1722, "step": 66024 }, { "epoch": 0.11706955537292804, "grad_norm": 0.302734375, "learning_rate": 0.0006072943695762708, "loss": 0.3422, "step": 66026 }, { "epoch": 0.11707310153823786, "grad_norm": 1.5546875, "learning_rate": 0.0006072445631909218, "loss": 0.1926, "step": 66028 }, { "epoch": 0.11707664770354767, "grad_norm": 0.427734375, "learning_rate": 0.0006071947589605986, "loss": 0.1591, "step": 66030 }, { "epoch": 0.11708019386885749, "grad_norm": 0.2255859375, "learning_rate": 0.0006071449568855198, "loss": 0.1536, "step": 66032 }, { "epoch": 0.1170837400341673, "grad_norm": 0.42578125, "learning_rate": 0.0006070951569659021, "loss": 0.3907, "step": 66034 }, { "epoch": 0.11708728619947711, "grad_norm": 0.6015625, "learning_rate": 0.0006070453592019642, "loss": 0.1814, "step": 66036 }, { "epoch": 0.11709083236478693, "grad_norm": 3.203125, "learning_rate": 0.0006069955635939235, "loss": 0.1937, "step": 66038 }, { "epoch": 0.11709437853009674, "grad_norm": 1.5390625, "learning_rate": 0.0006069457701419976, "loss": 0.2064, "step": 66040 }, { "epoch": 0.11709792469540656, "grad_norm": 0.296875, "learning_rate": 0.0006068959788464052, "loss": 0.2276, "step": 66042 }, { "epoch": 0.11710147086071637, "grad_norm": 0.55078125, "learning_rate": 0.0006068461897073625, "loss": 0.204, "step": 66044 }, { "epoch": 0.11710501702602619, "grad_norm": 1.875, "learning_rate": 0.0006067964027250885, "loss": 0.1664, "step": 66046 }, { "epoch": 0.11710856319133602, "grad_norm": 0.69921875, "learning_rate": 0.0006067466178998002, "loss": 0.1701, "step": 66048 }, { "epoch": 0.11711210935664583, "grad_norm": 1.8984375, "learning_rate": 0.0006066968352317165, "loss": 0.2228, "step": 66050 }, { "epoch": 0.11711565552195564, "grad_norm": 2.65625, "learning_rate": 0.0006066470547210536, "loss": 0.46, "step": 66052 }, { "epoch": 0.11711920168726546, "grad_norm": 0.59375, "learning_rate": 0.0006065972763680301, "loss": 0.1657, "step": 66054 }, { "epoch": 0.11712274785257527, "grad_norm": 0.3203125, "learning_rate": 0.0006065475001728637, "loss": 0.4602, "step": 66056 }, { "epoch": 0.11712629401788509, "grad_norm": 1.09375, "learning_rate": 0.0006064977261357717, "loss": 0.1771, "step": 66058 }, { "epoch": 0.1171298401831949, "grad_norm": 0.52734375, "learning_rate": 0.000606447954256972, "loss": 0.2358, "step": 66060 }, { "epoch": 0.11713338634850472, "grad_norm": 0.37109375, "learning_rate": 0.0006063981845366821, "loss": 0.2062, "step": 66062 }, { "epoch": 0.11713693251381453, "grad_norm": 0.267578125, "learning_rate": 0.0006063484169751201, "loss": 0.1926, "step": 66064 }, { "epoch": 0.11714047867912435, "grad_norm": 0.54296875, "learning_rate": 0.0006062986515725033, "loss": 0.2047, "step": 66066 }, { "epoch": 0.11714402484443416, "grad_norm": 0.83984375, "learning_rate": 0.0006062488883290495, "loss": 0.1909, "step": 66068 }, { "epoch": 0.11714757100974398, "grad_norm": 0.57421875, "learning_rate": 0.0006061991272449758, "loss": 0.2367, "step": 66070 }, { "epoch": 0.11715111717505379, "grad_norm": 3.3125, "learning_rate": 0.0006061493683205007, "loss": 0.1844, "step": 66072 }, { "epoch": 0.1171546633403636, "grad_norm": 1.5390625, "learning_rate": 0.0006060996115558415, "loss": 0.1514, "step": 66074 }, { "epoch": 0.11715820950567342, "grad_norm": 1.4765625, "learning_rate": 0.0006060498569512156, "loss": 0.1714, "step": 66076 }, { "epoch": 0.11716175567098323, "grad_norm": 0.51171875, "learning_rate": 0.0006060001045068408, "loss": 0.2025, "step": 66078 }, { "epoch": 0.11716530183629305, "grad_norm": 0.291015625, "learning_rate": 0.0006059503542229341, "loss": 0.1824, "step": 66080 }, { "epoch": 0.11716884800160286, "grad_norm": 0.33984375, "learning_rate": 0.0006059006060997143, "loss": 0.1768, "step": 66082 }, { "epoch": 0.11717239416691268, "grad_norm": 0.77734375, "learning_rate": 0.0006058508601373975, "loss": 0.2963, "step": 66084 }, { "epoch": 0.11717594033222249, "grad_norm": 0.71484375, "learning_rate": 0.0006058011163362025, "loss": 0.2335, "step": 66086 }, { "epoch": 0.1171794864975323, "grad_norm": 0.310546875, "learning_rate": 0.0006057513746963463, "loss": 0.133, "step": 66088 }, { "epoch": 0.11718303266284212, "grad_norm": 0.341796875, "learning_rate": 0.0006057016352180464, "loss": 0.1475, "step": 66090 }, { "epoch": 0.11718657882815195, "grad_norm": 0.546875, "learning_rate": 0.0006056518979015204, "loss": 0.1342, "step": 66092 }, { "epoch": 0.11719012499346176, "grad_norm": 0.22265625, "learning_rate": 0.0006056021627469856, "loss": 0.2964, "step": 66094 }, { "epoch": 0.11719367115877158, "grad_norm": 0.345703125, "learning_rate": 0.0006055524297546603, "loss": 0.1497, "step": 66096 }, { "epoch": 0.11719721732408139, "grad_norm": 0.3125, "learning_rate": 0.0006055026989247612, "loss": 0.2666, "step": 66098 }, { "epoch": 0.11720076348939121, "grad_norm": 0.53515625, "learning_rate": 0.0006054529702575061, "loss": 0.2519, "step": 66100 }, { "epoch": 0.11720430965470102, "grad_norm": 0.83203125, "learning_rate": 0.0006054032437531121, "loss": 0.1904, "step": 66102 }, { "epoch": 0.11720785582001084, "grad_norm": 0.259765625, "learning_rate": 0.0006053535194117975, "loss": 0.1808, "step": 66104 }, { "epoch": 0.11721140198532065, "grad_norm": 0.25390625, "learning_rate": 0.0006053037972337792, "loss": 0.2076, "step": 66106 }, { "epoch": 0.11721494815063047, "grad_norm": 0.26171875, "learning_rate": 0.0006052540772192747, "loss": 0.1697, "step": 66108 }, { "epoch": 0.11721849431594028, "grad_norm": 1.6484375, "learning_rate": 0.0006052043593685013, "loss": 0.2301, "step": 66110 }, { "epoch": 0.1172220404812501, "grad_norm": 0.5, "learning_rate": 0.0006051546436816766, "loss": 0.194, "step": 66112 }, { "epoch": 0.11722558664655991, "grad_norm": 0.416015625, "learning_rate": 0.0006051049301590185, "loss": 0.202, "step": 66114 }, { "epoch": 0.11722913281186972, "grad_norm": 0.671875, "learning_rate": 0.0006050552188007434, "loss": 0.1603, "step": 66116 }, { "epoch": 0.11723267897717954, "grad_norm": 0.236328125, "learning_rate": 0.0006050055096070696, "loss": 0.1094, "step": 66118 }, { "epoch": 0.11723622514248935, "grad_norm": 0.68359375, "learning_rate": 0.0006049558025782139, "loss": 0.1781, "step": 66120 }, { "epoch": 0.11723977130779917, "grad_norm": 0.36328125, "learning_rate": 0.0006049060977143941, "loss": 0.1686, "step": 66122 }, { "epoch": 0.11724331747310898, "grad_norm": 0.5546875, "learning_rate": 0.0006048563950158276, "loss": 0.25, "step": 66124 }, { "epoch": 0.1172468636384188, "grad_norm": 0.333984375, "learning_rate": 0.0006048066944827316, "loss": 0.1838, "step": 66126 }, { "epoch": 0.11725040980372861, "grad_norm": 0.349609375, "learning_rate": 0.0006047569961153235, "loss": 0.2098, "step": 66128 }, { "epoch": 0.11725395596903843, "grad_norm": 0.53125, "learning_rate": 0.0006047072999138202, "loss": 0.161, "step": 66130 }, { "epoch": 0.11725750213434824, "grad_norm": 2.015625, "learning_rate": 0.0006046576058784403, "loss": 0.3846, "step": 66132 }, { "epoch": 0.11726104829965805, "grad_norm": 0.447265625, "learning_rate": 0.0006046079140093994, "loss": 0.2058, "step": 66134 }, { "epoch": 0.11726459446496787, "grad_norm": 2.953125, "learning_rate": 0.0006045582243069163, "loss": 0.1571, "step": 66136 }, { "epoch": 0.1172681406302777, "grad_norm": 0.6953125, "learning_rate": 0.0006045085367712074, "loss": 0.1666, "step": 66138 }, { "epoch": 0.11727168679558751, "grad_norm": 0.494140625, "learning_rate": 0.0006044588514024907, "loss": 0.192, "step": 66140 }, { "epoch": 0.11727523296089733, "grad_norm": 0.2216796875, "learning_rate": 0.0006044091682009826, "loss": 0.1378, "step": 66142 }, { "epoch": 0.11727877912620714, "grad_norm": 0.57421875, "learning_rate": 0.0006043594871669016, "loss": 0.1752, "step": 66144 }, { "epoch": 0.11728232529151696, "grad_norm": 1.203125, "learning_rate": 0.0006043098083004639, "loss": 0.1953, "step": 66146 }, { "epoch": 0.11728587145682677, "grad_norm": 0.46875, "learning_rate": 0.0006042601316018873, "loss": 0.1651, "step": 66148 }, { "epoch": 0.11728941762213659, "grad_norm": 1.359375, "learning_rate": 0.0006042104570713889, "loss": 0.1699, "step": 66150 }, { "epoch": 0.1172929637874464, "grad_norm": 0.408203125, "learning_rate": 0.0006041607847091857, "loss": 0.1692, "step": 66152 }, { "epoch": 0.11729650995275621, "grad_norm": 0.578125, "learning_rate": 0.0006041111145154956, "loss": 0.1327, "step": 66154 }, { "epoch": 0.11730005611806603, "grad_norm": 0.51171875, "learning_rate": 0.0006040614464905353, "loss": 0.1753, "step": 66156 }, { "epoch": 0.11730360228337584, "grad_norm": 0.5078125, "learning_rate": 0.0006040117806345222, "loss": 0.1705, "step": 66158 }, { "epoch": 0.11730714844868566, "grad_norm": 0.5625, "learning_rate": 0.0006039621169476731, "loss": 0.2004, "step": 66160 }, { "epoch": 0.11731069461399547, "grad_norm": 0.5859375, "learning_rate": 0.000603912455430206, "loss": 0.1907, "step": 66162 }, { "epoch": 0.11731424077930529, "grad_norm": 0.359375, "learning_rate": 0.0006038627960823376, "loss": 0.157, "step": 66164 }, { "epoch": 0.1173177869446151, "grad_norm": 0.40234375, "learning_rate": 0.0006038131389042851, "loss": 0.2051, "step": 66166 }, { "epoch": 0.11732133310992492, "grad_norm": 0.2578125, "learning_rate": 0.0006037634838962658, "loss": 0.1629, "step": 66168 }, { "epoch": 0.11732487927523473, "grad_norm": 0.326171875, "learning_rate": 0.0006037138310584964, "loss": 0.1507, "step": 66170 }, { "epoch": 0.11732842544054455, "grad_norm": 0.328125, "learning_rate": 0.0006036641803911951, "loss": 0.1969, "step": 66172 }, { "epoch": 0.11733197160585436, "grad_norm": 0.4765625, "learning_rate": 0.0006036145318945776, "loss": 0.2125, "step": 66174 }, { "epoch": 0.11733551777116417, "grad_norm": 0.31640625, "learning_rate": 0.0006035648855688623, "loss": 0.1771, "step": 66176 }, { "epoch": 0.11733906393647399, "grad_norm": 0.64453125, "learning_rate": 0.0006035152414142653, "loss": 0.2395, "step": 66178 }, { "epoch": 0.1173426101017838, "grad_norm": 0.55859375, "learning_rate": 0.000603465599431005, "loss": 0.1526, "step": 66180 }, { "epoch": 0.11734615626709362, "grad_norm": 0.33203125, "learning_rate": 0.0006034159596192972, "loss": 0.1591, "step": 66182 }, { "epoch": 0.11734970243240345, "grad_norm": 0.3671875, "learning_rate": 0.0006033663219793597, "loss": 0.1526, "step": 66184 }, { "epoch": 0.11735324859771326, "grad_norm": 0.3984375, "learning_rate": 0.0006033166865114096, "loss": 0.1643, "step": 66186 }, { "epoch": 0.11735679476302308, "grad_norm": 2.09375, "learning_rate": 0.0006032670532156636, "loss": 0.2091, "step": 66188 }, { "epoch": 0.11736034092833289, "grad_norm": 0.4375, "learning_rate": 0.0006032174220923392, "loss": 0.1829, "step": 66190 }, { "epoch": 0.1173638870936427, "grad_norm": 0.5, "learning_rate": 0.0006031677931416527, "loss": 0.204, "step": 66192 }, { "epoch": 0.11736743325895252, "grad_norm": 0.55859375, "learning_rate": 0.0006031181663638221, "loss": 0.1922, "step": 66194 }, { "epoch": 0.11737097942426233, "grad_norm": 0.22265625, "learning_rate": 0.000603068541759064, "loss": 0.1745, "step": 66196 }, { "epoch": 0.11737452558957215, "grad_norm": 0.41796875, "learning_rate": 0.0006030189193275956, "loss": 0.178, "step": 66198 }, { "epoch": 0.11737807175488196, "grad_norm": 0.80078125, "learning_rate": 0.0006029692990696332, "loss": 0.2226, "step": 66200 }, { "epoch": 0.11738161792019178, "grad_norm": 0.44140625, "learning_rate": 0.0006029196809853949, "loss": 0.2292, "step": 66202 }, { "epoch": 0.11738516408550159, "grad_norm": 0.365234375, "learning_rate": 0.0006028700650750969, "loss": 0.2042, "step": 66204 }, { "epoch": 0.1173887102508114, "grad_norm": 0.2373046875, "learning_rate": 0.0006028204513389567, "loss": 0.1959, "step": 66206 }, { "epoch": 0.11739225641612122, "grad_norm": 0.97265625, "learning_rate": 0.0006027708397771909, "loss": 0.1669, "step": 66208 }, { "epoch": 0.11739580258143104, "grad_norm": 0.298828125, "learning_rate": 0.0006027212303900165, "loss": 0.2067, "step": 66210 }, { "epoch": 0.11739934874674085, "grad_norm": 2.65625, "learning_rate": 0.0006026716231776508, "loss": 0.1477, "step": 66212 }, { "epoch": 0.11740289491205066, "grad_norm": 1.859375, "learning_rate": 0.0006026220181403106, "loss": 0.1798, "step": 66214 }, { "epoch": 0.11740644107736048, "grad_norm": 0.6953125, "learning_rate": 0.0006025724152782126, "loss": 0.192, "step": 66216 }, { "epoch": 0.1174099872426703, "grad_norm": 0.421875, "learning_rate": 0.0006025228145915737, "loss": 0.1842, "step": 66218 }, { "epoch": 0.11741353340798011, "grad_norm": 0.5546875, "learning_rate": 0.0006024732160806115, "loss": 0.1635, "step": 66220 }, { "epoch": 0.11741707957328992, "grad_norm": 2.109375, "learning_rate": 0.0006024236197455423, "loss": 0.3975, "step": 66222 }, { "epoch": 0.11742062573859974, "grad_norm": 1.9140625, "learning_rate": 0.000602374025586583, "loss": 0.2157, "step": 66224 }, { "epoch": 0.11742417190390955, "grad_norm": 0.462890625, "learning_rate": 0.000602324433603951, "loss": 0.1904, "step": 66226 }, { "epoch": 0.11742771806921938, "grad_norm": 0.283203125, "learning_rate": 0.0006022748437978623, "loss": 0.1479, "step": 66228 }, { "epoch": 0.1174312642345292, "grad_norm": 0.4921875, "learning_rate": 0.000602225256168535, "loss": 0.1627, "step": 66230 }, { "epoch": 0.11743481039983901, "grad_norm": 0.279296875, "learning_rate": 0.0006021756707161848, "loss": 0.1467, "step": 66232 }, { "epoch": 0.11743835656514882, "grad_norm": 1.71875, "learning_rate": 0.0006021260874410292, "loss": 0.3747, "step": 66234 }, { "epoch": 0.11744190273045864, "grad_norm": 0.306640625, "learning_rate": 0.0006020765063432845, "loss": 0.1499, "step": 66236 }, { "epoch": 0.11744544889576845, "grad_norm": 0.58984375, "learning_rate": 0.0006020269274231688, "loss": 0.1568, "step": 66238 }, { "epoch": 0.11744899506107827, "grad_norm": 0.451171875, "learning_rate": 0.0006019773506808974, "loss": 0.3715, "step": 66240 }, { "epoch": 0.11745254122638808, "grad_norm": 0.18359375, "learning_rate": 0.0006019277761166879, "loss": 0.1106, "step": 66242 }, { "epoch": 0.1174560873916979, "grad_norm": 0.328125, "learning_rate": 0.0006018782037307571, "loss": 0.1179, "step": 66244 }, { "epoch": 0.11745963355700771, "grad_norm": 0.62890625, "learning_rate": 0.0006018286335233216, "loss": 0.1463, "step": 66246 }, { "epoch": 0.11746317972231753, "grad_norm": 0.4140625, "learning_rate": 0.0006017790654945984, "loss": 0.1419, "step": 66248 }, { "epoch": 0.11746672588762734, "grad_norm": 0.21875, "learning_rate": 0.0006017294996448039, "loss": 0.175, "step": 66250 }, { "epoch": 0.11747027205293716, "grad_norm": 0.474609375, "learning_rate": 0.0006016799359741554, "loss": 0.4763, "step": 66252 }, { "epoch": 0.11747381821824697, "grad_norm": 0.2109375, "learning_rate": 0.0006016303744828693, "loss": 0.1152, "step": 66254 }, { "epoch": 0.11747736438355678, "grad_norm": 0.40234375, "learning_rate": 0.0006015808151711626, "loss": 0.2028, "step": 66256 }, { "epoch": 0.1174809105488666, "grad_norm": 0.203125, "learning_rate": 0.0006015312580392518, "loss": 0.1826, "step": 66258 }, { "epoch": 0.11748445671417641, "grad_norm": 0.609375, "learning_rate": 0.0006014817030873534, "loss": 0.6076, "step": 66260 }, { "epoch": 0.11748800287948623, "grad_norm": 0.48828125, "learning_rate": 0.0006014321503156852, "loss": 0.1722, "step": 66262 }, { "epoch": 0.11749154904479604, "grad_norm": 0.216796875, "learning_rate": 0.0006013825997244624, "loss": 0.3208, "step": 66264 }, { "epoch": 0.11749509521010586, "grad_norm": 0.64453125, "learning_rate": 0.000601333051313903, "loss": 0.1213, "step": 66266 }, { "epoch": 0.11749864137541567, "grad_norm": 0.365234375, "learning_rate": 0.0006012835050842227, "loss": 0.2253, "step": 66268 }, { "epoch": 0.11750218754072549, "grad_norm": 0.294921875, "learning_rate": 0.0006012339610356395, "loss": 0.1832, "step": 66270 }, { "epoch": 0.1175057337060353, "grad_norm": 0.46875, "learning_rate": 0.0006011844191683683, "loss": 0.2155, "step": 66272 }, { "epoch": 0.11750927987134513, "grad_norm": 0.443359375, "learning_rate": 0.000601134879482627, "loss": 0.1505, "step": 66274 }, { "epoch": 0.11751282603665494, "grad_norm": 0.5859375, "learning_rate": 0.0006010853419786322, "loss": 0.1361, "step": 66276 }, { "epoch": 0.11751637220196476, "grad_norm": 0.78125, "learning_rate": 0.0006010358066566002, "loss": 0.1572, "step": 66278 }, { "epoch": 0.11751991836727457, "grad_norm": 1.1328125, "learning_rate": 0.0006009862735167478, "loss": 0.2113, "step": 66280 }, { "epoch": 0.11752346453258439, "grad_norm": 1.1484375, "learning_rate": 0.0006009367425592912, "loss": 0.2369, "step": 66282 }, { "epoch": 0.1175270106978942, "grad_norm": 0.67578125, "learning_rate": 0.0006008872137844478, "loss": 0.1762, "step": 66284 }, { "epoch": 0.11753055686320402, "grad_norm": 0.82421875, "learning_rate": 0.0006008376871924335, "loss": 0.1955, "step": 66286 }, { "epoch": 0.11753410302851383, "grad_norm": 0.41796875, "learning_rate": 0.0006007881627834657, "loss": 0.176, "step": 66288 }, { "epoch": 0.11753764919382365, "grad_norm": 0.486328125, "learning_rate": 0.0006007386405577599, "loss": 0.1853, "step": 66290 }, { "epoch": 0.11754119535913346, "grad_norm": 1.421875, "learning_rate": 0.0006006891205155337, "loss": 0.2114, "step": 66292 }, { "epoch": 0.11754474152444327, "grad_norm": 0.30859375, "learning_rate": 0.0006006396026570031, "loss": 0.1477, "step": 66294 }, { "epoch": 0.11754828768975309, "grad_norm": 0.2060546875, "learning_rate": 0.0006005900869823851, "loss": 0.1802, "step": 66296 }, { "epoch": 0.1175518338550629, "grad_norm": 0.66796875, "learning_rate": 0.0006005405734918957, "loss": 0.1932, "step": 66298 }, { "epoch": 0.11755538002037272, "grad_norm": 0.58984375, "learning_rate": 0.0006004910621857515, "loss": 0.2047, "step": 66300 }, { "epoch": 0.11755892618568253, "grad_norm": 0.328125, "learning_rate": 0.0006004415530641695, "loss": 0.2239, "step": 66302 }, { "epoch": 0.11756247235099235, "grad_norm": 0.38671875, "learning_rate": 0.000600392046127366, "loss": 0.2088, "step": 66304 }, { "epoch": 0.11756601851630216, "grad_norm": 0.6953125, "learning_rate": 0.0006003425413755575, "loss": 0.1721, "step": 66306 }, { "epoch": 0.11756956468161198, "grad_norm": 0.1318359375, "learning_rate": 0.0006002930388089601, "loss": 0.146, "step": 66308 }, { "epoch": 0.11757311084692179, "grad_norm": 0.451171875, "learning_rate": 0.0006002435384277912, "loss": 0.1989, "step": 66310 }, { "epoch": 0.1175766570122316, "grad_norm": 0.76953125, "learning_rate": 0.0006001940402322665, "loss": 0.1791, "step": 66312 }, { "epoch": 0.11758020317754142, "grad_norm": 0.134765625, "learning_rate": 0.0006001445442226028, "loss": 0.1612, "step": 66314 }, { "epoch": 0.11758374934285123, "grad_norm": 0.54296875, "learning_rate": 0.0006000950503990167, "loss": 0.1966, "step": 66316 }, { "epoch": 0.11758729550816105, "grad_norm": 0.4609375, "learning_rate": 0.000600045558761724, "loss": 0.219, "step": 66318 }, { "epoch": 0.11759084167347088, "grad_norm": 0.291015625, "learning_rate": 0.0005999960693109423, "loss": 0.1413, "step": 66320 }, { "epoch": 0.11759438783878069, "grad_norm": 0.51171875, "learning_rate": 0.0005999465820468868, "loss": 0.1911, "step": 66322 }, { "epoch": 0.1175979340040905, "grad_norm": 0.498046875, "learning_rate": 0.0005998970969697746, "loss": 0.2082, "step": 66324 }, { "epoch": 0.11760148016940032, "grad_norm": 0.55859375, "learning_rate": 0.0005998476140798219, "loss": 0.153, "step": 66326 }, { "epoch": 0.11760502633471014, "grad_norm": 0.50390625, "learning_rate": 0.0005997981333772457, "loss": 0.1825, "step": 66328 }, { "epoch": 0.11760857250001995, "grad_norm": 0.77734375, "learning_rate": 0.0005997486548622613, "loss": 0.1878, "step": 66330 }, { "epoch": 0.11761211866532977, "grad_norm": 0.31640625, "learning_rate": 0.000599699178535086, "loss": 0.2023, "step": 66332 }, { "epoch": 0.11761566483063958, "grad_norm": 0.419921875, "learning_rate": 0.000599649704395936, "loss": 0.1807, "step": 66334 }, { "epoch": 0.1176192109959494, "grad_norm": 0.43359375, "learning_rate": 0.0005996002324450276, "loss": 0.1737, "step": 66336 }, { "epoch": 0.11762275716125921, "grad_norm": 0.4375, "learning_rate": 0.0005995507626825769, "loss": 0.1845, "step": 66338 }, { "epoch": 0.11762630332656902, "grad_norm": 0.44921875, "learning_rate": 0.0005995012951088002, "loss": 0.2011, "step": 66340 }, { "epoch": 0.11762984949187884, "grad_norm": 0.37109375, "learning_rate": 0.0005994518297239144, "loss": 0.1971, "step": 66342 }, { "epoch": 0.11763339565718865, "grad_norm": 0.486328125, "learning_rate": 0.0005994023665281357, "loss": 0.1809, "step": 66344 }, { "epoch": 0.11763694182249847, "grad_norm": 0.291015625, "learning_rate": 0.0005993529055216803, "loss": 0.1189, "step": 66346 }, { "epoch": 0.11764048798780828, "grad_norm": 0.31640625, "learning_rate": 0.000599303446704764, "loss": 0.1776, "step": 66348 }, { "epoch": 0.1176440341531181, "grad_norm": 2.09375, "learning_rate": 0.000599253990077604, "loss": 0.1489, "step": 66350 }, { "epoch": 0.11764758031842791, "grad_norm": 0.46875, "learning_rate": 0.0005992045356404161, "loss": 0.2373, "step": 66352 }, { "epoch": 0.11765112648373773, "grad_norm": 0.5625, "learning_rate": 0.0005991550833934166, "loss": 0.149, "step": 66354 }, { "epoch": 0.11765467264904754, "grad_norm": 0.65234375, "learning_rate": 0.0005991056333368221, "loss": 0.2219, "step": 66356 }, { "epoch": 0.11765821881435735, "grad_norm": 0.5078125, "learning_rate": 0.000599056185470848, "loss": 0.1768, "step": 66358 }, { "epoch": 0.11766176497966717, "grad_norm": 0.3671875, "learning_rate": 0.000599006739795712, "loss": 0.2404, "step": 66360 }, { "epoch": 0.11766531114497698, "grad_norm": 0.25390625, "learning_rate": 0.0005989572963116288, "loss": 0.146, "step": 66362 }, { "epoch": 0.11766885731028681, "grad_norm": 0.78515625, "learning_rate": 0.0005989078550188156, "loss": 0.1726, "step": 66364 }, { "epoch": 0.11767240347559663, "grad_norm": 0.63671875, "learning_rate": 0.0005988584159174881, "loss": 0.1306, "step": 66366 }, { "epoch": 0.11767594964090644, "grad_norm": 1.515625, "learning_rate": 0.0005988089790078632, "loss": 0.3233, "step": 66368 }, { "epoch": 0.11767949580621626, "grad_norm": 0.203125, "learning_rate": 0.0005987595442901566, "loss": 0.1499, "step": 66370 }, { "epoch": 0.11768304197152607, "grad_norm": 0.80078125, "learning_rate": 0.0005987101117645846, "loss": 0.242, "step": 66372 }, { "epoch": 0.11768658813683588, "grad_norm": 0.89453125, "learning_rate": 0.0005986606814313636, "loss": 0.1784, "step": 66374 }, { "epoch": 0.1176901343021457, "grad_norm": 1.359375, "learning_rate": 0.0005986112532907089, "loss": 0.1926, "step": 66376 }, { "epoch": 0.11769368046745551, "grad_norm": 0.302734375, "learning_rate": 0.0005985618273428382, "loss": 0.1556, "step": 66378 }, { "epoch": 0.11769722663276533, "grad_norm": 0.4765625, "learning_rate": 0.0005985124035879662, "loss": 0.1844, "step": 66380 }, { "epoch": 0.11770077279807514, "grad_norm": 0.52734375, "learning_rate": 0.0005984629820263097, "loss": 0.1968, "step": 66382 }, { "epoch": 0.11770431896338496, "grad_norm": 0.27734375, "learning_rate": 0.0005984135626580846, "loss": 0.1377, "step": 66384 }, { "epoch": 0.11770786512869477, "grad_norm": 0.5, "learning_rate": 0.0005983641454835079, "loss": 0.157, "step": 66386 }, { "epoch": 0.11771141129400459, "grad_norm": 0.357421875, "learning_rate": 0.0005983147305027944, "loss": 0.2765, "step": 66388 }, { "epoch": 0.1177149574593144, "grad_norm": 0.3359375, "learning_rate": 0.0005982653177161612, "loss": 0.1843, "step": 66390 }, { "epoch": 0.11771850362462422, "grad_norm": 1.09375, "learning_rate": 0.000598215907123824, "loss": 0.1845, "step": 66392 }, { "epoch": 0.11772204978993403, "grad_norm": 0.318359375, "learning_rate": 0.0005981664987259991, "loss": 0.1966, "step": 66394 }, { "epoch": 0.11772559595524384, "grad_norm": 0.80078125, "learning_rate": 0.0005981170925229022, "loss": 0.1742, "step": 66396 }, { "epoch": 0.11772914212055366, "grad_norm": 0.279296875, "learning_rate": 0.0005980676885147494, "loss": 0.1563, "step": 66398 }, { "epoch": 0.11773268828586347, "grad_norm": 0.2158203125, "learning_rate": 0.0005980182867017574, "loss": 0.1417, "step": 66400 }, { "epoch": 0.11773623445117329, "grad_norm": 0.2236328125, "learning_rate": 0.0005979688870841418, "loss": 0.1747, "step": 66402 }, { "epoch": 0.1177397806164831, "grad_norm": 0.1630859375, "learning_rate": 0.0005979194896621185, "loss": 0.1381, "step": 66404 }, { "epoch": 0.11774332678179292, "grad_norm": 0.5078125, "learning_rate": 0.0005978700944359035, "loss": 0.1259, "step": 66406 }, { "epoch": 0.11774687294710273, "grad_norm": 0.419921875, "learning_rate": 0.0005978207014057133, "loss": 0.1598, "step": 66408 }, { "epoch": 0.11775041911241256, "grad_norm": 0.4296875, "learning_rate": 0.0005977713105717637, "loss": 0.1726, "step": 66410 }, { "epoch": 0.11775396527772237, "grad_norm": 0.494140625, "learning_rate": 0.0005977219219342706, "loss": 0.1752, "step": 66412 }, { "epoch": 0.11775751144303219, "grad_norm": 0.69140625, "learning_rate": 0.0005976725354934502, "loss": 0.2104, "step": 66414 }, { "epoch": 0.117761057608342, "grad_norm": 0.2373046875, "learning_rate": 0.0005976231512495179, "loss": 0.1377, "step": 66416 }, { "epoch": 0.11776460377365182, "grad_norm": 0.494140625, "learning_rate": 0.0005975737692026908, "loss": 0.1539, "step": 66418 }, { "epoch": 0.11776814993896163, "grad_norm": 0.32421875, "learning_rate": 0.0005975243893531837, "loss": 0.2008, "step": 66420 }, { "epoch": 0.11777169610427145, "grad_norm": 0.91796875, "learning_rate": 0.0005974750117012131, "loss": 0.2756, "step": 66422 }, { "epoch": 0.11777524226958126, "grad_norm": 1.203125, "learning_rate": 0.0005974256362469946, "loss": 0.2036, "step": 66424 }, { "epoch": 0.11777878843489108, "grad_norm": 1.125, "learning_rate": 0.0005973762629907452, "loss": 0.2389, "step": 66426 }, { "epoch": 0.11778233460020089, "grad_norm": 0.455078125, "learning_rate": 0.0005973268919326794, "loss": 0.1942, "step": 66428 }, { "epoch": 0.1177858807655107, "grad_norm": 0.4140625, "learning_rate": 0.0005972775230730141, "loss": 0.1507, "step": 66430 }, { "epoch": 0.11778942693082052, "grad_norm": 0.275390625, "learning_rate": 0.000597228156411965, "loss": 0.1312, "step": 66432 }, { "epoch": 0.11779297309613034, "grad_norm": 0.2119140625, "learning_rate": 0.0005971787919497477, "loss": 0.1544, "step": 66434 }, { "epoch": 0.11779651926144015, "grad_norm": 0.359375, "learning_rate": 0.0005971294296865784, "loss": 0.2347, "step": 66436 }, { "epoch": 0.11780006542674996, "grad_norm": 0.7890625, "learning_rate": 0.0005970800696226726, "loss": 0.1256, "step": 66438 }, { "epoch": 0.11780361159205978, "grad_norm": 0.474609375, "learning_rate": 0.0005970307117582468, "loss": 0.1615, "step": 66440 }, { "epoch": 0.1178071577573696, "grad_norm": 0.53515625, "learning_rate": 0.0005969813560935162, "loss": 0.1647, "step": 66442 }, { "epoch": 0.11781070392267941, "grad_norm": 0.322265625, "learning_rate": 0.0005969320026286973, "loss": 0.1556, "step": 66444 }, { "epoch": 0.11781425008798922, "grad_norm": 0.1826171875, "learning_rate": 0.0005968826513640053, "loss": 0.1906, "step": 66446 }, { "epoch": 0.11781779625329904, "grad_norm": 1.1015625, "learning_rate": 0.0005968333022996564, "loss": 0.2237, "step": 66448 }, { "epoch": 0.11782134241860885, "grad_norm": 0.310546875, "learning_rate": 0.0005967839554358667, "loss": 0.1783, "step": 66450 }, { "epoch": 0.11782488858391867, "grad_norm": 0.6328125, "learning_rate": 0.0005967346107728512, "loss": 0.1904, "step": 66452 }, { "epoch": 0.11782843474922848, "grad_norm": 0.353515625, "learning_rate": 0.0005966852683108264, "loss": 0.1784, "step": 66454 }, { "epoch": 0.11783198091453831, "grad_norm": 0.39453125, "learning_rate": 0.0005966359280500076, "loss": 0.1455, "step": 66456 }, { "epoch": 0.11783552707984812, "grad_norm": 1.8671875, "learning_rate": 0.0005965865899906112, "loss": 0.2493, "step": 66458 }, { "epoch": 0.11783907324515794, "grad_norm": 0.32421875, "learning_rate": 0.0005965372541328526, "loss": 0.1584, "step": 66460 }, { "epoch": 0.11784261941046775, "grad_norm": 0.404296875, "learning_rate": 0.0005964879204769476, "loss": 0.3928, "step": 66462 }, { "epoch": 0.11784616557577757, "grad_norm": 0.376953125, "learning_rate": 0.0005964385890231119, "loss": 0.182, "step": 66464 }, { "epoch": 0.11784971174108738, "grad_norm": 0.69140625, "learning_rate": 0.0005963892597715612, "loss": 0.133, "step": 66466 }, { "epoch": 0.1178532579063972, "grad_norm": 0.7578125, "learning_rate": 0.0005963399327225118, "loss": 0.2036, "step": 66468 }, { "epoch": 0.11785680407170701, "grad_norm": 0.474609375, "learning_rate": 0.0005962906078761784, "loss": 0.1953, "step": 66470 }, { "epoch": 0.11786035023701683, "grad_norm": 0.21875, "learning_rate": 0.0005962412852327776, "loss": 0.1945, "step": 66472 }, { "epoch": 0.11786389640232664, "grad_norm": 0.48046875, "learning_rate": 0.0005961919647925244, "loss": 0.1574, "step": 66474 }, { "epoch": 0.11786744256763645, "grad_norm": 0.39453125, "learning_rate": 0.0005961426465556357, "loss": 0.2036, "step": 66476 }, { "epoch": 0.11787098873294627, "grad_norm": 0.35546875, "learning_rate": 0.0005960933305223257, "loss": 0.1768, "step": 66478 }, { "epoch": 0.11787453489825608, "grad_norm": 1.0625, "learning_rate": 0.000596044016692811, "loss": 0.1782, "step": 66480 }, { "epoch": 0.1178780810635659, "grad_norm": 0.640625, "learning_rate": 0.0005959947050673071, "loss": 0.1439, "step": 66482 }, { "epoch": 0.11788162722887571, "grad_norm": 0.39453125, "learning_rate": 0.0005959453956460296, "loss": 0.1877, "step": 66484 }, { "epoch": 0.11788517339418553, "grad_norm": 0.435546875, "learning_rate": 0.0005958960884291941, "loss": 0.1613, "step": 66486 }, { "epoch": 0.11788871955949534, "grad_norm": 0.2392578125, "learning_rate": 0.0005958467834170161, "loss": 0.1543, "step": 66488 }, { "epoch": 0.11789226572480516, "grad_norm": 0.75, "learning_rate": 0.0005957974806097115, "loss": 0.1677, "step": 66490 }, { "epoch": 0.11789581189011497, "grad_norm": 0.478515625, "learning_rate": 0.0005957481800074962, "loss": 0.2236, "step": 66492 }, { "epoch": 0.11789935805542479, "grad_norm": 0.314453125, "learning_rate": 0.0005956988816105852, "loss": 0.1705, "step": 66494 }, { "epoch": 0.1179029042207346, "grad_norm": 0.208984375, "learning_rate": 0.000595649585419194, "loss": 0.1929, "step": 66496 }, { "epoch": 0.11790645038604441, "grad_norm": 1.90625, "learning_rate": 0.000595600291433539, "loss": 0.3257, "step": 66498 }, { "epoch": 0.11790999655135424, "grad_norm": 0.578125, "learning_rate": 0.0005955509996538353, "loss": 0.1688, "step": 66500 }, { "epoch": 0.11791354271666406, "grad_norm": 0.298828125, "learning_rate": 0.0005955017100802986, "loss": 0.1748, "step": 66502 }, { "epoch": 0.11791708888197387, "grad_norm": 0.431640625, "learning_rate": 0.0005954524227131441, "loss": 0.1601, "step": 66504 }, { "epoch": 0.11792063504728369, "grad_norm": 0.26171875, "learning_rate": 0.0005954031375525876, "loss": 0.1655, "step": 66506 }, { "epoch": 0.1179241812125935, "grad_norm": 0.51953125, "learning_rate": 0.0005953538545988453, "loss": 0.1955, "step": 66508 }, { "epoch": 0.11792772737790332, "grad_norm": 1.28125, "learning_rate": 0.0005953045738521312, "loss": 0.3434, "step": 66510 }, { "epoch": 0.11793127354321313, "grad_norm": 0.734375, "learning_rate": 0.0005952552953126623, "loss": 0.1446, "step": 66512 }, { "epoch": 0.11793481970852294, "grad_norm": 0.56640625, "learning_rate": 0.0005952060189806532, "loss": 0.182, "step": 66514 }, { "epoch": 0.11793836587383276, "grad_norm": 0.984375, "learning_rate": 0.0005951567448563202, "loss": 0.2035, "step": 66516 }, { "epoch": 0.11794191203914257, "grad_norm": 0.9375, "learning_rate": 0.000595107472939878, "loss": 0.2692, "step": 66518 }, { "epoch": 0.11794545820445239, "grad_norm": 0.279296875, "learning_rate": 0.0005950582032315426, "loss": 0.1588, "step": 66520 }, { "epoch": 0.1179490043697622, "grad_norm": 0.5078125, "learning_rate": 0.0005950089357315292, "loss": 0.1564, "step": 66522 }, { "epoch": 0.11795255053507202, "grad_norm": 0.35546875, "learning_rate": 0.0005949596704400536, "loss": 0.1599, "step": 66524 }, { "epoch": 0.11795609670038183, "grad_norm": 0.1806640625, "learning_rate": 0.000594910407357331, "loss": 0.1204, "step": 66526 }, { "epoch": 0.11795964286569165, "grad_norm": 0.26953125, "learning_rate": 0.0005948611464835764, "loss": 0.1682, "step": 66528 }, { "epoch": 0.11796318903100146, "grad_norm": 0.37890625, "learning_rate": 0.0005948118878190062, "loss": 0.1851, "step": 66530 }, { "epoch": 0.11796673519631128, "grad_norm": 0.451171875, "learning_rate": 0.0005947626313638352, "loss": 0.1292, "step": 66532 }, { "epoch": 0.11797028136162109, "grad_norm": 0.3671875, "learning_rate": 0.0005947133771182791, "loss": 0.1875, "step": 66534 }, { "epoch": 0.1179738275269309, "grad_norm": 0.5390625, "learning_rate": 0.000594664125082553, "loss": 0.1981, "step": 66536 }, { "epoch": 0.11797737369224072, "grad_norm": 1.4375, "learning_rate": 0.0005946148752568725, "loss": 0.1583, "step": 66538 }, { "epoch": 0.11798091985755053, "grad_norm": 0.3515625, "learning_rate": 0.0005945656276414532, "loss": 0.1509, "step": 66540 }, { "epoch": 0.11798446602286035, "grad_norm": 2.375, "learning_rate": 0.0005945163822365103, "loss": 0.3708, "step": 66542 }, { "epoch": 0.11798801218817016, "grad_norm": 0.486328125, "learning_rate": 0.0005944671390422589, "loss": 0.2003, "step": 66544 }, { "epoch": 0.11799155835347999, "grad_norm": 0.55859375, "learning_rate": 0.0005944178980589143, "loss": 0.1764, "step": 66546 }, { "epoch": 0.1179951045187898, "grad_norm": 0.9921875, "learning_rate": 0.0005943686592866926, "loss": 0.164, "step": 66548 }, { "epoch": 0.11799865068409962, "grad_norm": 0.447265625, "learning_rate": 0.0005943194227258086, "loss": 0.1451, "step": 66550 }, { "epoch": 0.11800219684940944, "grad_norm": 0.7734375, "learning_rate": 0.0005942701883764777, "loss": 0.2645, "step": 66552 }, { "epoch": 0.11800574301471925, "grad_norm": 0.66796875, "learning_rate": 0.0005942209562389149, "loss": 0.2728, "step": 66554 }, { "epoch": 0.11800928918002906, "grad_norm": 0.2490234375, "learning_rate": 0.0005941717263133362, "loss": 0.193, "step": 66556 }, { "epoch": 0.11801283534533888, "grad_norm": 0.443359375, "learning_rate": 0.0005941224985999566, "loss": 0.1572, "step": 66558 }, { "epoch": 0.1180163815106487, "grad_norm": 0.4453125, "learning_rate": 0.0005940732730989913, "loss": 0.1749, "step": 66560 }, { "epoch": 0.11801992767595851, "grad_norm": 0.279296875, "learning_rate": 0.0005940240498106555, "loss": 0.1517, "step": 66562 }, { "epoch": 0.11802347384126832, "grad_norm": 0.28125, "learning_rate": 0.0005939748287351644, "loss": 0.1757, "step": 66564 }, { "epoch": 0.11802702000657814, "grad_norm": 0.259765625, "learning_rate": 0.0005939256098727342, "loss": 0.2109, "step": 66566 }, { "epoch": 0.11803056617188795, "grad_norm": 0.57421875, "learning_rate": 0.0005938763932235787, "loss": 0.1607, "step": 66568 }, { "epoch": 0.11803411233719777, "grad_norm": 0.26953125, "learning_rate": 0.0005938271787879142, "loss": 0.1614, "step": 66570 }, { "epoch": 0.11803765850250758, "grad_norm": 1.046875, "learning_rate": 0.0005937779665659551, "loss": 0.234, "step": 66572 }, { "epoch": 0.1180412046678174, "grad_norm": 1.9453125, "learning_rate": 0.000593728756557918, "loss": 0.1792, "step": 66574 }, { "epoch": 0.11804475083312721, "grad_norm": 0.228515625, "learning_rate": 0.0005936795487640164, "loss": 0.2104, "step": 66576 }, { "epoch": 0.11804829699843702, "grad_norm": 0.392578125, "learning_rate": 0.0005936303431844668, "loss": 0.1701, "step": 66578 }, { "epoch": 0.11805184316374684, "grad_norm": 1.5859375, "learning_rate": 0.0005935811398194839, "loss": 0.2938, "step": 66580 }, { "epoch": 0.11805538932905665, "grad_norm": 0.419921875, "learning_rate": 0.000593531938669283, "loss": 0.1278, "step": 66582 }, { "epoch": 0.11805893549436647, "grad_norm": 0.2275390625, "learning_rate": 0.0005934827397340792, "loss": 0.1576, "step": 66584 }, { "epoch": 0.11806248165967628, "grad_norm": 4.375, "learning_rate": 0.0005934335430140872, "loss": 0.2451, "step": 66586 }, { "epoch": 0.1180660278249861, "grad_norm": 0.50390625, "learning_rate": 0.0005933843485095231, "loss": 0.2438, "step": 66588 }, { "epoch": 0.11806957399029591, "grad_norm": 0.96484375, "learning_rate": 0.0005933351562206015, "loss": 0.1611, "step": 66590 }, { "epoch": 0.11807312015560574, "grad_norm": 0.357421875, "learning_rate": 0.0005932859661475379, "loss": 0.1533, "step": 66592 }, { "epoch": 0.11807666632091555, "grad_norm": 0.345703125, "learning_rate": 0.0005932367782905464, "loss": 0.1869, "step": 66594 }, { "epoch": 0.11808021248622537, "grad_norm": 0.52734375, "learning_rate": 0.0005931875926498435, "loss": 0.2174, "step": 66596 }, { "epoch": 0.11808375865153518, "grad_norm": 0.71484375, "learning_rate": 0.0005931384092256435, "loss": 0.2674, "step": 66598 }, { "epoch": 0.118087304816845, "grad_norm": 1.0703125, "learning_rate": 0.0005930892280181616, "loss": 0.2586, "step": 66600 }, { "epoch": 0.11809085098215481, "grad_norm": 0.353515625, "learning_rate": 0.0005930400490276133, "loss": 0.2237, "step": 66602 }, { "epoch": 0.11809439714746463, "grad_norm": 0.43359375, "learning_rate": 0.0005929908722542128, "loss": 0.1893, "step": 66604 }, { "epoch": 0.11809794331277444, "grad_norm": 0.5078125, "learning_rate": 0.0005929416976981763, "loss": 0.1853, "step": 66606 }, { "epoch": 0.11810148947808426, "grad_norm": 0.1650390625, "learning_rate": 0.0005928925253597178, "loss": 0.1077, "step": 66608 }, { "epoch": 0.11810503564339407, "grad_norm": 0.6015625, "learning_rate": 0.000592843355239053, "loss": 0.1379, "step": 66610 }, { "epoch": 0.11810858180870389, "grad_norm": 0.298828125, "learning_rate": 0.0005927941873363969, "loss": 0.2071, "step": 66612 }, { "epoch": 0.1181121279740137, "grad_norm": 0.8515625, "learning_rate": 0.0005927450216519645, "loss": 0.2178, "step": 66614 }, { "epoch": 0.11811567413932351, "grad_norm": 0.546875, "learning_rate": 0.0005926958581859706, "loss": 0.2703, "step": 66616 }, { "epoch": 0.11811922030463333, "grad_norm": 0.357421875, "learning_rate": 0.0005926466969386299, "loss": 0.1742, "step": 66618 }, { "epoch": 0.11812276646994314, "grad_norm": 20.375, "learning_rate": 0.0005925975379101582, "loss": 0.1748, "step": 66620 }, { "epoch": 0.11812631263525296, "grad_norm": 1.5, "learning_rate": 0.0005925483811007699, "loss": 0.2243, "step": 66622 }, { "epoch": 0.11812985880056277, "grad_norm": 0.9140625, "learning_rate": 0.000592499226510681, "loss": 0.1536, "step": 66624 }, { "epoch": 0.11813340496587259, "grad_norm": 0.73046875, "learning_rate": 0.0005924500741401049, "loss": 0.1649, "step": 66626 }, { "epoch": 0.1181369511311824, "grad_norm": 0.373046875, "learning_rate": 0.0005924009239892577, "loss": 0.2141, "step": 66628 }, { "epoch": 0.11814049729649222, "grad_norm": 0.2119140625, "learning_rate": 0.0005923517760583539, "loss": 0.1539, "step": 66630 }, { "epoch": 0.11814404346180203, "grad_norm": 1.7109375, "learning_rate": 0.0005923026303476087, "loss": 0.2215, "step": 66632 }, { "epoch": 0.11814758962711185, "grad_norm": 0.375, "learning_rate": 0.0005922534868572369, "loss": 0.1865, "step": 66634 }, { "epoch": 0.11815113579242167, "grad_norm": 0.365234375, "learning_rate": 0.000592204345587453, "loss": 0.2998, "step": 66636 }, { "epoch": 0.11815468195773149, "grad_norm": 0.404296875, "learning_rate": 0.0005921552065384725, "loss": 0.1203, "step": 66638 }, { "epoch": 0.1181582281230413, "grad_norm": 0.462890625, "learning_rate": 0.0005921060697105104, "loss": 0.2394, "step": 66640 }, { "epoch": 0.11816177428835112, "grad_norm": 0.53125, "learning_rate": 0.0005920569351037812, "loss": 0.2526, "step": 66642 }, { "epoch": 0.11816532045366093, "grad_norm": 0.2080078125, "learning_rate": 0.0005920078027184996, "loss": 0.1327, "step": 66644 }, { "epoch": 0.11816886661897075, "grad_norm": 0.6953125, "learning_rate": 0.0005919586725548813, "loss": 0.247, "step": 66646 }, { "epoch": 0.11817241278428056, "grad_norm": 0.24609375, "learning_rate": 0.0005919095446131404, "loss": 0.114, "step": 66648 }, { "epoch": 0.11817595894959038, "grad_norm": 1.4453125, "learning_rate": 0.0005918604188934922, "loss": 0.2273, "step": 66650 }, { "epoch": 0.11817950511490019, "grad_norm": 0.369140625, "learning_rate": 0.0005918112953961511, "loss": 0.1992, "step": 66652 }, { "epoch": 0.11818305128021, "grad_norm": 0.9375, "learning_rate": 0.0005917621741213323, "loss": 0.1447, "step": 66654 }, { "epoch": 0.11818659744551982, "grad_norm": 0.1796875, "learning_rate": 0.0005917130550692509, "loss": 0.217, "step": 66656 }, { "epoch": 0.11819014361082963, "grad_norm": 0.353515625, "learning_rate": 0.0005916639382401208, "loss": 0.1345, "step": 66658 }, { "epoch": 0.11819368977613945, "grad_norm": 0.43359375, "learning_rate": 0.0005916148236341577, "loss": 0.1485, "step": 66660 }, { "epoch": 0.11819723594144926, "grad_norm": 0.7734375, "learning_rate": 0.0005915657112515755, "loss": 0.1598, "step": 66662 }, { "epoch": 0.11820078210675908, "grad_norm": 0.353515625, "learning_rate": 0.0005915166010925904, "loss": 0.1812, "step": 66664 }, { "epoch": 0.11820432827206889, "grad_norm": 0.3046875, "learning_rate": 0.0005914674931574157, "loss": 0.1165, "step": 66666 }, { "epoch": 0.11820787443737871, "grad_norm": 0.3125, "learning_rate": 0.000591418387446267, "loss": 0.2417, "step": 66668 }, { "epoch": 0.11821142060268852, "grad_norm": 0.50390625, "learning_rate": 0.000591369283959359, "loss": 0.1759, "step": 66670 }, { "epoch": 0.11821496676799834, "grad_norm": 3.75, "learning_rate": 0.0005913201826969061, "loss": 0.3366, "step": 66672 }, { "epoch": 0.11821851293330815, "grad_norm": 0.78515625, "learning_rate": 0.0005912710836591234, "loss": 0.2445, "step": 66674 }, { "epoch": 0.11822205909861797, "grad_norm": 0.27734375, "learning_rate": 0.000591221986846225, "loss": 0.1943, "step": 66676 }, { "epoch": 0.11822560526392778, "grad_norm": 0.212890625, "learning_rate": 0.0005911728922584267, "loss": 0.1316, "step": 66678 }, { "epoch": 0.1182291514292376, "grad_norm": 0.64453125, "learning_rate": 0.0005911237998959425, "loss": 0.3031, "step": 66680 }, { "epoch": 0.11823269759454742, "grad_norm": 1.2890625, "learning_rate": 0.0005910747097589871, "loss": 0.1641, "step": 66682 }, { "epoch": 0.11823624375985724, "grad_norm": 0.486328125, "learning_rate": 0.000591025621847775, "loss": 0.1612, "step": 66684 }, { "epoch": 0.11823978992516705, "grad_norm": 0.3359375, "learning_rate": 0.0005909765361625215, "loss": 0.198, "step": 66686 }, { "epoch": 0.11824333609047687, "grad_norm": 0.53125, "learning_rate": 0.0005909274527034409, "loss": 0.2114, "step": 66688 }, { "epoch": 0.11824688225578668, "grad_norm": 1.5625, "learning_rate": 0.0005908783714707481, "loss": 0.2744, "step": 66690 }, { "epoch": 0.1182504284210965, "grad_norm": 0.419921875, "learning_rate": 0.0005908292924646575, "loss": 0.4152, "step": 66692 }, { "epoch": 0.11825397458640631, "grad_norm": 1.5390625, "learning_rate": 0.0005907802156853835, "loss": 0.2393, "step": 66694 }, { "epoch": 0.11825752075171612, "grad_norm": 0.609375, "learning_rate": 0.0005907311411331416, "loss": 0.1722, "step": 66696 }, { "epoch": 0.11826106691702594, "grad_norm": 0.3828125, "learning_rate": 0.0005906820688081451, "loss": 0.2021, "step": 66698 }, { "epoch": 0.11826461308233575, "grad_norm": 0.392578125, "learning_rate": 0.0005906329987106101, "loss": 0.1759, "step": 66700 }, { "epoch": 0.11826815924764557, "grad_norm": 0.90234375, "learning_rate": 0.00059058393084075, "loss": 0.1619, "step": 66702 }, { "epoch": 0.11827170541295538, "grad_norm": 0.39453125, "learning_rate": 0.00059053486519878, "loss": 0.1694, "step": 66704 }, { "epoch": 0.1182752515782652, "grad_norm": 0.361328125, "learning_rate": 0.0005904858017849146, "loss": 0.1849, "step": 66706 }, { "epoch": 0.11827879774357501, "grad_norm": 0.2265625, "learning_rate": 0.0005904367405993685, "loss": 0.1264, "step": 66708 }, { "epoch": 0.11828234390888483, "grad_norm": 0.5546875, "learning_rate": 0.0005903876816423561, "loss": 0.1793, "step": 66710 }, { "epoch": 0.11828589007419464, "grad_norm": 0.33203125, "learning_rate": 0.0005903386249140916, "loss": 0.216, "step": 66712 }, { "epoch": 0.11828943623950446, "grad_norm": 0.640625, "learning_rate": 0.0005902895704147906, "loss": 0.2211, "step": 66714 }, { "epoch": 0.11829298240481427, "grad_norm": 0.484375, "learning_rate": 0.0005902405181446663, "loss": 0.1593, "step": 66716 }, { "epoch": 0.11829652857012408, "grad_norm": 0.365234375, "learning_rate": 0.0005901914681039341, "loss": 0.1613, "step": 66718 }, { "epoch": 0.1183000747354339, "grad_norm": 0.7734375, "learning_rate": 0.000590142420292808, "loss": 0.1577, "step": 66720 }, { "epoch": 0.11830362090074371, "grad_norm": 0.291015625, "learning_rate": 0.0005900933747115034, "loss": 0.1526, "step": 66722 }, { "epoch": 0.11830716706605353, "grad_norm": 0.318359375, "learning_rate": 0.0005900443313602337, "loss": 0.1405, "step": 66724 }, { "epoch": 0.11831071323136334, "grad_norm": 0.42578125, "learning_rate": 0.000589995290239214, "loss": 0.1794, "step": 66726 }, { "epoch": 0.11831425939667317, "grad_norm": 0.169921875, "learning_rate": 0.0005899462513486588, "loss": 0.175, "step": 66728 }, { "epoch": 0.11831780556198299, "grad_norm": 0.98046875, "learning_rate": 0.0005898972146887824, "loss": 0.2916, "step": 66730 }, { "epoch": 0.1183213517272928, "grad_norm": 0.53125, "learning_rate": 0.0005898481802597994, "loss": 0.1459, "step": 66732 }, { "epoch": 0.11832489789260262, "grad_norm": 0.2138671875, "learning_rate": 0.0005897991480619236, "loss": 0.2047, "step": 66734 }, { "epoch": 0.11832844405791243, "grad_norm": 0.52734375, "learning_rate": 0.0005897501180953703, "loss": 0.1995, "step": 66736 }, { "epoch": 0.11833199022322224, "grad_norm": 0.484375, "learning_rate": 0.0005897010903603537, "loss": 0.1709, "step": 66738 }, { "epoch": 0.11833553638853206, "grad_norm": 1.2578125, "learning_rate": 0.000589652064857088, "loss": 0.1674, "step": 66740 }, { "epoch": 0.11833908255384187, "grad_norm": 0.26953125, "learning_rate": 0.0005896030415857875, "loss": 0.1754, "step": 66742 }, { "epoch": 0.11834262871915169, "grad_norm": 0.5, "learning_rate": 0.0005895540205466671, "loss": 0.1759, "step": 66744 }, { "epoch": 0.1183461748844615, "grad_norm": 0.79296875, "learning_rate": 0.0005895050017399408, "loss": 0.1296, "step": 66746 }, { "epoch": 0.11834972104977132, "grad_norm": 2.46875, "learning_rate": 0.0005894559851658232, "loss": 0.3995, "step": 66748 }, { "epoch": 0.11835326721508113, "grad_norm": 0.515625, "learning_rate": 0.0005894069708245286, "loss": 0.1653, "step": 66750 }, { "epoch": 0.11835681338039095, "grad_norm": 0.77734375, "learning_rate": 0.0005893579587162712, "loss": 0.1826, "step": 66752 }, { "epoch": 0.11836035954570076, "grad_norm": 0.439453125, "learning_rate": 0.0005893089488412657, "loss": 0.1611, "step": 66754 }, { "epoch": 0.11836390571101058, "grad_norm": 1.6484375, "learning_rate": 0.000589259941199726, "loss": 0.3359, "step": 66756 }, { "epoch": 0.11836745187632039, "grad_norm": 0.77734375, "learning_rate": 0.0005892109357918666, "loss": 0.1654, "step": 66758 }, { "epoch": 0.1183709980416302, "grad_norm": 0.42578125, "learning_rate": 0.0005891619326179017, "loss": 0.2539, "step": 66760 }, { "epoch": 0.11837454420694002, "grad_norm": 0.5625, "learning_rate": 0.0005891129316780464, "loss": 0.1701, "step": 66762 }, { "epoch": 0.11837809037224983, "grad_norm": 0.58203125, "learning_rate": 0.0005890639329725136, "loss": 0.1874, "step": 66764 }, { "epoch": 0.11838163653755965, "grad_norm": 1.5234375, "learning_rate": 0.0005890149365015188, "loss": 0.2383, "step": 66766 }, { "epoch": 0.11838518270286946, "grad_norm": 0.38671875, "learning_rate": 0.0005889659422652759, "loss": 0.4378, "step": 66768 }, { "epoch": 0.11838872886817928, "grad_norm": 0.380859375, "learning_rate": 0.0005889169502639991, "loss": 0.1754, "step": 66770 }, { "epoch": 0.1183922750334891, "grad_norm": 0.3515625, "learning_rate": 0.0005888679604979025, "loss": 0.15, "step": 66772 }, { "epoch": 0.11839582119879892, "grad_norm": 0.5546875, "learning_rate": 0.0005888189729672007, "loss": 0.1547, "step": 66774 }, { "epoch": 0.11839936736410873, "grad_norm": 0.365234375, "learning_rate": 0.0005887699876721077, "loss": 0.1671, "step": 66776 }, { "epoch": 0.11840291352941855, "grad_norm": 0.78515625, "learning_rate": 0.000588721004612838, "loss": 0.2192, "step": 66778 }, { "epoch": 0.11840645969472836, "grad_norm": 0.30078125, "learning_rate": 0.0005886720237896055, "loss": 0.151, "step": 66780 }, { "epoch": 0.11841000586003818, "grad_norm": 0.353515625, "learning_rate": 0.0005886230452026242, "loss": 0.1806, "step": 66782 }, { "epoch": 0.11841355202534799, "grad_norm": 0.4375, "learning_rate": 0.000588574068852109, "loss": 0.1512, "step": 66784 }, { "epoch": 0.11841709819065781, "grad_norm": 0.51171875, "learning_rate": 0.0005885250947382739, "loss": 0.1774, "step": 66786 }, { "epoch": 0.11842064435596762, "grad_norm": 0.380859375, "learning_rate": 0.0005884761228613329, "loss": 0.1992, "step": 66788 }, { "epoch": 0.11842419052127744, "grad_norm": 1.03125, "learning_rate": 0.0005884271532215002, "loss": 0.2189, "step": 66790 }, { "epoch": 0.11842773668658725, "grad_norm": 0.70703125, "learning_rate": 0.0005883781858189895, "loss": 0.1938, "step": 66792 }, { "epoch": 0.11843128285189707, "grad_norm": 0.84375, "learning_rate": 0.000588329220654016, "loss": 0.1301, "step": 66794 }, { "epoch": 0.11843482901720688, "grad_norm": 0.9609375, "learning_rate": 0.0005882802577267931, "loss": 0.2481, "step": 66796 }, { "epoch": 0.1184383751825167, "grad_norm": 0.29296875, "learning_rate": 0.0005882312970375352, "loss": 0.1419, "step": 66798 }, { "epoch": 0.11844192134782651, "grad_norm": 0.326171875, "learning_rate": 0.0005881823385864562, "loss": 0.192, "step": 66800 }, { "epoch": 0.11844546751313632, "grad_norm": 0.4453125, "learning_rate": 0.0005881333823737702, "loss": 0.1683, "step": 66802 }, { "epoch": 0.11844901367844614, "grad_norm": 0.8359375, "learning_rate": 0.000588084428399692, "loss": 0.1656, "step": 66804 }, { "epoch": 0.11845255984375595, "grad_norm": 1.0390625, "learning_rate": 0.0005880354766644345, "loss": 0.3063, "step": 66806 }, { "epoch": 0.11845610600906577, "grad_norm": 0.283203125, "learning_rate": 0.0005879865271682128, "loss": 0.117, "step": 66808 }, { "epoch": 0.11845965217437558, "grad_norm": 0.271484375, "learning_rate": 0.0005879375799112403, "loss": 0.2042, "step": 66810 }, { "epoch": 0.1184631983396854, "grad_norm": 0.220703125, "learning_rate": 0.000587888634893732, "loss": 0.1258, "step": 66812 }, { "epoch": 0.11846674450499521, "grad_norm": 0.5, "learning_rate": 0.0005878396921159008, "loss": 0.2693, "step": 66814 }, { "epoch": 0.11847029067030503, "grad_norm": 0.2001953125, "learning_rate": 0.0005877907515779615, "loss": 0.1768, "step": 66816 }, { "epoch": 0.11847383683561485, "grad_norm": 1.2421875, "learning_rate": 0.0005877418132801278, "loss": 0.1865, "step": 66818 }, { "epoch": 0.11847738300092467, "grad_norm": 0.66015625, "learning_rate": 0.0005876928772226142, "loss": 0.1932, "step": 66820 }, { "epoch": 0.11848092916623448, "grad_norm": 0.2373046875, "learning_rate": 0.0005876439434056341, "loss": 0.1565, "step": 66822 }, { "epoch": 0.1184844753315443, "grad_norm": 0.8125, "learning_rate": 0.0005875950118294016, "loss": 0.1848, "step": 66824 }, { "epoch": 0.11848802149685411, "grad_norm": 0.5546875, "learning_rate": 0.0005875460824941311, "loss": 0.1864, "step": 66826 }, { "epoch": 0.11849156766216393, "grad_norm": 0.9921875, "learning_rate": 0.0005874971554000363, "loss": 0.2402, "step": 66828 }, { "epoch": 0.11849511382747374, "grad_norm": 0.46875, "learning_rate": 0.0005874482305473314, "loss": 0.1551, "step": 66830 }, { "epoch": 0.11849865999278356, "grad_norm": 0.29296875, "learning_rate": 0.0005873993079362299, "loss": 0.1586, "step": 66832 }, { "epoch": 0.11850220615809337, "grad_norm": 0.388671875, "learning_rate": 0.0005873503875669464, "loss": 0.1865, "step": 66834 }, { "epoch": 0.11850575232340319, "grad_norm": 0.1767578125, "learning_rate": 0.0005873014694396943, "loss": 0.4204, "step": 66836 }, { "epoch": 0.118509298488713, "grad_norm": 0.66796875, "learning_rate": 0.000587252553554688, "loss": 0.2091, "step": 66838 }, { "epoch": 0.11851284465402281, "grad_norm": 2.453125, "learning_rate": 0.0005872036399121411, "loss": 0.2297, "step": 66840 }, { "epoch": 0.11851639081933263, "grad_norm": 0.15625, "learning_rate": 0.0005871547285122673, "loss": 0.1556, "step": 66842 }, { "epoch": 0.11851993698464244, "grad_norm": 0.34765625, "learning_rate": 0.0005871058193552815, "loss": 0.1502, "step": 66844 }, { "epoch": 0.11852348314995226, "grad_norm": 0.1806640625, "learning_rate": 0.0005870569124413963, "loss": 0.1473, "step": 66846 }, { "epoch": 0.11852702931526207, "grad_norm": 0.50390625, "learning_rate": 0.0005870080077708264, "loss": 0.1952, "step": 66848 }, { "epoch": 0.11853057548057189, "grad_norm": 0.298828125, "learning_rate": 0.0005869591053437853, "loss": 0.1851, "step": 66850 }, { "epoch": 0.1185341216458817, "grad_norm": 0.703125, "learning_rate": 0.0005869102051604875, "loss": 0.1778, "step": 66852 }, { "epoch": 0.11853766781119152, "grad_norm": 0.65625, "learning_rate": 0.0005868613072211459, "loss": 0.192, "step": 66854 }, { "epoch": 0.11854121397650133, "grad_norm": 0.470703125, "learning_rate": 0.0005868124115259754, "loss": 0.1822, "step": 66856 }, { "epoch": 0.11854476014181115, "grad_norm": 0.291015625, "learning_rate": 0.0005867635180751891, "loss": 0.1861, "step": 66858 }, { "epoch": 0.11854830630712096, "grad_norm": 0.197265625, "learning_rate": 0.0005867146268690012, "loss": 0.1222, "step": 66860 }, { "epoch": 0.11855185247243077, "grad_norm": 0.341796875, "learning_rate": 0.0005866657379076252, "loss": 0.1794, "step": 66862 }, { "epoch": 0.1185553986377406, "grad_norm": 0.7109375, "learning_rate": 0.0005866168511912749, "loss": 0.1794, "step": 66864 }, { "epoch": 0.11855894480305042, "grad_norm": 0.72265625, "learning_rate": 0.0005865679667201644, "loss": 0.1958, "step": 66866 }, { "epoch": 0.11856249096836023, "grad_norm": 0.31640625, "learning_rate": 0.0005865190844945076, "loss": 0.1829, "step": 66868 }, { "epoch": 0.11856603713367005, "grad_norm": 0.94921875, "learning_rate": 0.0005864702045145179, "loss": 0.1663, "step": 66870 }, { "epoch": 0.11856958329897986, "grad_norm": 0.3359375, "learning_rate": 0.000586421326780409, "loss": 0.2158, "step": 66872 }, { "epoch": 0.11857312946428968, "grad_norm": 0.46484375, "learning_rate": 0.0005863724512923951, "loss": 0.1572, "step": 66874 }, { "epoch": 0.11857667562959949, "grad_norm": 0.5703125, "learning_rate": 0.0005863235780506899, "loss": 0.2089, "step": 66876 }, { "epoch": 0.1185802217949093, "grad_norm": 0.328125, "learning_rate": 0.000586274707055507, "loss": 0.1811, "step": 66878 }, { "epoch": 0.11858376796021912, "grad_norm": 0.220703125, "learning_rate": 0.00058622583830706, "loss": 0.1997, "step": 66880 }, { "epoch": 0.11858731412552893, "grad_norm": 1.71875, "learning_rate": 0.0005861769718055627, "loss": 0.2642, "step": 66882 }, { "epoch": 0.11859086029083875, "grad_norm": 0.1708984375, "learning_rate": 0.0005861281075512288, "loss": 0.1582, "step": 66884 }, { "epoch": 0.11859440645614856, "grad_norm": 0.259765625, "learning_rate": 0.0005860792455442723, "loss": 0.1612, "step": 66886 }, { "epoch": 0.11859795262145838, "grad_norm": 0.23046875, "learning_rate": 0.0005860303857849065, "loss": 0.1332, "step": 66888 }, { "epoch": 0.11860149878676819, "grad_norm": 2.265625, "learning_rate": 0.0005859815282733451, "loss": 0.2627, "step": 66890 }, { "epoch": 0.118605044952078, "grad_norm": 0.201171875, "learning_rate": 0.0005859326730098022, "loss": 0.1749, "step": 66892 }, { "epoch": 0.11860859111738782, "grad_norm": 0.29296875, "learning_rate": 0.0005858838199944912, "loss": 0.147, "step": 66894 }, { "epoch": 0.11861213728269764, "grad_norm": 1.1640625, "learning_rate": 0.0005858349692276256, "loss": 0.2301, "step": 66896 }, { "epoch": 0.11861568344800745, "grad_norm": 0.66015625, "learning_rate": 0.0005857861207094193, "loss": 0.2051, "step": 66898 }, { "epoch": 0.11861922961331726, "grad_norm": 0.400390625, "learning_rate": 0.0005857372744400853, "loss": 0.1849, "step": 66900 }, { "epoch": 0.11862277577862708, "grad_norm": 0.201171875, "learning_rate": 0.0005856884304198387, "loss": 0.1506, "step": 66902 }, { "epoch": 0.1186263219439369, "grad_norm": 0.5078125, "learning_rate": 0.0005856395886488912, "loss": 0.1821, "step": 66904 }, { "epoch": 0.11862986810924671, "grad_norm": 7.4375, "learning_rate": 0.000585590749127458, "loss": 0.2638, "step": 66906 }, { "epoch": 0.11863341427455654, "grad_norm": 0.36328125, "learning_rate": 0.0005855419118557515, "loss": 0.1893, "step": 66908 }, { "epoch": 0.11863696043986635, "grad_norm": 0.58984375, "learning_rate": 0.0005854930768339865, "loss": 0.2718, "step": 66910 }, { "epoch": 0.11864050660517617, "grad_norm": 0.5703125, "learning_rate": 0.0005854442440623754, "loss": 0.2021, "step": 66912 }, { "epoch": 0.11864405277048598, "grad_norm": 0.2451171875, "learning_rate": 0.0005853954135411326, "loss": 0.1641, "step": 66914 }, { "epoch": 0.1186475989357958, "grad_norm": 0.498046875, "learning_rate": 0.0005853465852704713, "loss": 0.1989, "step": 66916 }, { "epoch": 0.11865114510110561, "grad_norm": 0.53515625, "learning_rate": 0.000585297759250605, "loss": 0.2193, "step": 66918 }, { "epoch": 0.11865469126641542, "grad_norm": 0.1982421875, "learning_rate": 0.0005852489354817474, "loss": 0.1523, "step": 66920 }, { "epoch": 0.11865823743172524, "grad_norm": 0.15625, "learning_rate": 0.0005852001139641116, "loss": 0.0892, "step": 66922 }, { "epoch": 0.11866178359703505, "grad_norm": 0.341796875, "learning_rate": 0.0005851512946979119, "loss": 0.3504, "step": 66924 }, { "epoch": 0.11866532976234487, "grad_norm": 0.6953125, "learning_rate": 0.0005851024776833613, "loss": 0.2035, "step": 66926 }, { "epoch": 0.11866887592765468, "grad_norm": 1.6328125, "learning_rate": 0.0005850536629206735, "loss": 0.1987, "step": 66928 }, { "epoch": 0.1186724220929645, "grad_norm": 1.0390625, "learning_rate": 0.0005850048504100614, "loss": 0.1393, "step": 66930 }, { "epoch": 0.11867596825827431, "grad_norm": 0.4609375, "learning_rate": 0.0005849560401517392, "loss": 0.1611, "step": 66932 }, { "epoch": 0.11867951442358413, "grad_norm": 0.259765625, "learning_rate": 0.0005849072321459202, "loss": 0.1909, "step": 66934 }, { "epoch": 0.11868306058889394, "grad_norm": 0.326171875, "learning_rate": 0.0005848584263928178, "loss": 0.1825, "step": 66936 }, { "epoch": 0.11868660675420376, "grad_norm": 0.1884765625, "learning_rate": 0.0005848096228926454, "loss": 0.1657, "step": 66938 }, { "epoch": 0.11869015291951357, "grad_norm": 0.55078125, "learning_rate": 0.0005847608216456162, "loss": 0.1502, "step": 66940 }, { "epoch": 0.11869369908482338, "grad_norm": 0.287109375, "learning_rate": 0.0005847120226519443, "loss": 0.182, "step": 66942 }, { "epoch": 0.1186972452501332, "grad_norm": 0.30859375, "learning_rate": 0.0005846632259118422, "loss": 0.1984, "step": 66944 }, { "epoch": 0.11870079141544301, "grad_norm": 0.28515625, "learning_rate": 0.0005846144314255242, "loss": 0.1997, "step": 66946 }, { "epoch": 0.11870433758075283, "grad_norm": 0.318359375, "learning_rate": 0.0005845656391932028, "loss": 0.19, "step": 66948 }, { "epoch": 0.11870788374606264, "grad_norm": 0.427734375, "learning_rate": 0.0005845168492150927, "loss": 0.2103, "step": 66950 }, { "epoch": 0.11871142991137246, "grad_norm": 0.474609375, "learning_rate": 0.0005844680614914056, "loss": 0.196, "step": 66952 }, { "epoch": 0.11871497607668229, "grad_norm": 0.44140625, "learning_rate": 0.0005844192760223563, "loss": 0.1743, "step": 66954 }, { "epoch": 0.1187185222419921, "grad_norm": 0.64453125, "learning_rate": 0.0005843704928081574, "loss": 0.2779, "step": 66956 }, { "epoch": 0.11872206840730191, "grad_norm": 1.0390625, "learning_rate": 0.0005843217118490225, "loss": 0.3165, "step": 66958 }, { "epoch": 0.11872561457261173, "grad_norm": 0.28125, "learning_rate": 0.0005842729331451652, "loss": 0.1785, "step": 66960 }, { "epoch": 0.11872916073792154, "grad_norm": 0.57421875, "learning_rate": 0.0005842241566967978, "loss": 0.1963, "step": 66962 }, { "epoch": 0.11873270690323136, "grad_norm": 0.43359375, "learning_rate": 0.0005841753825041351, "loss": 0.1868, "step": 66964 }, { "epoch": 0.11873625306854117, "grad_norm": 0.3671875, "learning_rate": 0.0005841266105673892, "loss": 0.1637, "step": 66966 }, { "epoch": 0.11873979923385099, "grad_norm": 0.81640625, "learning_rate": 0.0005840778408867742, "loss": 0.1963, "step": 66968 }, { "epoch": 0.1187433453991608, "grad_norm": 0.609375, "learning_rate": 0.0005840290734625028, "loss": 0.1991, "step": 66970 }, { "epoch": 0.11874689156447062, "grad_norm": 0.26171875, "learning_rate": 0.0005839803082947884, "loss": 0.1331, "step": 66972 }, { "epoch": 0.11875043772978043, "grad_norm": 0.6484375, "learning_rate": 0.0005839315453838445, "loss": 0.1989, "step": 66974 }, { "epoch": 0.11875398389509025, "grad_norm": 0.365234375, "learning_rate": 0.0005838827847298842, "loss": 0.1681, "step": 66976 }, { "epoch": 0.11875753006040006, "grad_norm": 1.4453125, "learning_rate": 0.000583834026333121, "loss": 0.4894, "step": 66978 }, { "epoch": 0.11876107622570987, "grad_norm": 1.8203125, "learning_rate": 0.0005837852701937676, "loss": 0.2025, "step": 66980 }, { "epoch": 0.11876462239101969, "grad_norm": 0.28125, "learning_rate": 0.0005837365163120378, "loss": 0.1517, "step": 66982 }, { "epoch": 0.1187681685563295, "grad_norm": 0.30078125, "learning_rate": 0.0005836877646881447, "loss": 0.1648, "step": 66984 }, { "epoch": 0.11877171472163932, "grad_norm": 1.6171875, "learning_rate": 0.0005836390153223013, "loss": 0.3641, "step": 66986 }, { "epoch": 0.11877526088694913, "grad_norm": 0.67578125, "learning_rate": 0.000583590268214721, "loss": 0.2589, "step": 66988 }, { "epoch": 0.11877880705225895, "grad_norm": 0.25390625, "learning_rate": 0.0005835415233656165, "loss": 0.1695, "step": 66990 }, { "epoch": 0.11878235321756876, "grad_norm": 0.3984375, "learning_rate": 0.0005834927807752022, "loss": 0.1253, "step": 66992 }, { "epoch": 0.11878589938287858, "grad_norm": 0.38671875, "learning_rate": 0.0005834440404436895, "loss": 0.1773, "step": 66994 }, { "epoch": 0.11878944554818839, "grad_norm": 0.36328125, "learning_rate": 0.0005833953023712932, "loss": 0.4878, "step": 66996 }, { "epoch": 0.1187929917134982, "grad_norm": 0.23828125, "learning_rate": 0.0005833465665582251, "loss": 0.1809, "step": 66998 }, { "epoch": 0.11879653787880803, "grad_norm": 0.37109375, "learning_rate": 0.0005832978330046998, "loss": 0.1879, "step": 67000 }, { "epoch": 0.11880008404411785, "grad_norm": 1.265625, "learning_rate": 0.0005832491017109292, "loss": 0.1858, "step": 67002 }, { "epoch": 0.11880363020942766, "grad_norm": 0.396484375, "learning_rate": 0.0005832003726771269, "loss": 0.1989, "step": 67004 }, { "epoch": 0.11880717637473748, "grad_norm": 0.25390625, "learning_rate": 0.0005831516459035061, "loss": 0.1833, "step": 67006 }, { "epoch": 0.11881072254004729, "grad_norm": 0.52734375, "learning_rate": 0.0005831029213902799, "loss": 0.2202, "step": 67008 }, { "epoch": 0.1188142687053571, "grad_norm": 0.44140625, "learning_rate": 0.000583054199137661, "loss": 0.2232, "step": 67010 }, { "epoch": 0.11881781487066692, "grad_norm": 1.4765625, "learning_rate": 0.0005830054791458626, "loss": 0.2186, "step": 67012 }, { "epoch": 0.11882136103597674, "grad_norm": 0.609375, "learning_rate": 0.0005829567614150982, "loss": 0.2082, "step": 67014 }, { "epoch": 0.11882490720128655, "grad_norm": 0.4375, "learning_rate": 0.0005829080459455807, "loss": 0.2488, "step": 67016 }, { "epoch": 0.11882845336659636, "grad_norm": 0.703125, "learning_rate": 0.0005828593327375229, "loss": 0.2202, "step": 67018 }, { "epoch": 0.11883199953190618, "grad_norm": 0.302734375, "learning_rate": 0.0005828106217911378, "loss": 0.2153, "step": 67020 }, { "epoch": 0.118835545697216, "grad_norm": 0.447265625, "learning_rate": 0.0005827619131066388, "loss": 0.1898, "step": 67022 }, { "epoch": 0.11883909186252581, "grad_norm": 0.57421875, "learning_rate": 0.0005827132066842387, "loss": 0.3393, "step": 67024 }, { "epoch": 0.11884263802783562, "grad_norm": 0.2392578125, "learning_rate": 0.0005826645025241509, "loss": 0.1317, "step": 67026 }, { "epoch": 0.11884618419314544, "grad_norm": 1.03125, "learning_rate": 0.0005826158006265876, "loss": 0.1683, "step": 67028 }, { "epoch": 0.11884973035845525, "grad_norm": 0.337890625, "learning_rate": 0.0005825671009917621, "loss": 0.1851, "step": 67030 }, { "epoch": 0.11885327652376507, "grad_norm": 3.078125, "learning_rate": 0.0005825184036198883, "loss": 0.1471, "step": 67032 }, { "epoch": 0.11885682268907488, "grad_norm": 0.271484375, "learning_rate": 0.0005824697085111779, "loss": 0.2255, "step": 67034 }, { "epoch": 0.1188603688543847, "grad_norm": 0.451171875, "learning_rate": 0.0005824210156658443, "loss": 0.1338, "step": 67036 }, { "epoch": 0.11886391501969451, "grad_norm": 0.30859375, "learning_rate": 0.0005823723250841004, "loss": 0.1794, "step": 67038 }, { "epoch": 0.11886746118500433, "grad_norm": 0.4453125, "learning_rate": 0.0005823236367661597, "loss": 0.15, "step": 67040 }, { "epoch": 0.11887100735031414, "grad_norm": 0.3984375, "learning_rate": 0.0005822749507122347, "loss": 0.1469, "step": 67042 }, { "epoch": 0.11887455351562397, "grad_norm": 0.154296875, "learning_rate": 0.0005822262669225382, "loss": 0.1604, "step": 67044 }, { "epoch": 0.11887809968093378, "grad_norm": 0.53515625, "learning_rate": 0.0005821775853972833, "loss": 0.217, "step": 67046 }, { "epoch": 0.1188816458462436, "grad_norm": 0.33984375, "learning_rate": 0.0005821289061366825, "loss": 0.1511, "step": 67048 }, { "epoch": 0.11888519201155341, "grad_norm": 0.373046875, "learning_rate": 0.0005820802291409498, "loss": 0.2132, "step": 67050 }, { "epoch": 0.11888873817686323, "grad_norm": 0.412109375, "learning_rate": 0.0005820315544102966, "loss": 0.2416, "step": 67052 }, { "epoch": 0.11889228434217304, "grad_norm": 0.2578125, "learning_rate": 0.000581982881944937, "loss": 0.1738, "step": 67054 }, { "epoch": 0.11889583050748286, "grad_norm": 0.244140625, "learning_rate": 0.0005819342117450827, "loss": 0.201, "step": 67056 }, { "epoch": 0.11889937667279267, "grad_norm": 0.376953125, "learning_rate": 0.0005818855438109483, "loss": 0.4724, "step": 67058 }, { "epoch": 0.11890292283810248, "grad_norm": 13.1875, "learning_rate": 0.0005818368781427447, "loss": 0.2487, "step": 67060 }, { "epoch": 0.1189064690034123, "grad_norm": 0.53125, "learning_rate": 0.0005817882147406859, "loss": 0.1829, "step": 67062 }, { "epoch": 0.11891001516872211, "grad_norm": 0.2578125, "learning_rate": 0.0005817395536049844, "loss": 0.1814, "step": 67064 }, { "epoch": 0.11891356133403193, "grad_norm": 0.2734375, "learning_rate": 0.0005816908947358533, "loss": 0.156, "step": 67066 }, { "epoch": 0.11891710749934174, "grad_norm": 0.3046875, "learning_rate": 0.0005816422381335049, "loss": 0.1631, "step": 67068 }, { "epoch": 0.11892065366465156, "grad_norm": 0.60546875, "learning_rate": 0.0005815935837981521, "loss": 0.2271, "step": 67070 }, { "epoch": 0.11892419982996137, "grad_norm": 0.54296875, "learning_rate": 0.0005815449317300081, "loss": 0.2804, "step": 67072 }, { "epoch": 0.11892774599527119, "grad_norm": 0.181640625, "learning_rate": 0.0005814962819292853, "loss": 0.1169, "step": 67074 }, { "epoch": 0.118931292160581, "grad_norm": 1.7421875, "learning_rate": 0.0005814476343961967, "loss": 0.2806, "step": 67076 }, { "epoch": 0.11893483832589082, "grad_norm": 0.4140625, "learning_rate": 0.0005813989891309546, "loss": 0.1782, "step": 67078 }, { "epoch": 0.11893838449120063, "grad_norm": 0.32421875, "learning_rate": 0.0005813503461337724, "loss": 0.1703, "step": 67080 }, { "epoch": 0.11894193065651044, "grad_norm": 0.494140625, "learning_rate": 0.0005813017054048626, "loss": 0.1738, "step": 67082 }, { "epoch": 0.11894547682182026, "grad_norm": 0.53515625, "learning_rate": 0.0005812530669444377, "loss": 0.2067, "step": 67084 }, { "epoch": 0.11894902298713007, "grad_norm": 0.30078125, "learning_rate": 0.0005812044307527106, "loss": 0.1855, "step": 67086 }, { "epoch": 0.11895256915243989, "grad_norm": 0.302734375, "learning_rate": 0.0005811557968298936, "loss": 0.1689, "step": 67088 }, { "epoch": 0.11895611531774972, "grad_norm": 0.1337890625, "learning_rate": 0.0005811071651762005, "loss": 0.14, "step": 67090 }, { "epoch": 0.11895966148305953, "grad_norm": 0.423828125, "learning_rate": 0.0005810585357918425, "loss": 0.1995, "step": 67092 }, { "epoch": 0.11896320764836935, "grad_norm": 0.80078125, "learning_rate": 0.0005810099086770335, "loss": 0.1973, "step": 67094 }, { "epoch": 0.11896675381367916, "grad_norm": 0.65625, "learning_rate": 0.0005809612838319853, "loss": 0.1988, "step": 67096 }, { "epoch": 0.11897029997898897, "grad_norm": 0.361328125, "learning_rate": 0.0005809126612569117, "loss": 0.1658, "step": 67098 }, { "epoch": 0.11897384614429879, "grad_norm": 0.236328125, "learning_rate": 0.0005808640409520238, "loss": 0.1378, "step": 67100 }, { "epoch": 0.1189773923096086, "grad_norm": 0.2373046875, "learning_rate": 0.0005808154229175355, "loss": 0.1905, "step": 67102 }, { "epoch": 0.11898093847491842, "grad_norm": 0.625, "learning_rate": 0.0005807668071536588, "loss": 0.1931, "step": 67104 }, { "epoch": 0.11898448464022823, "grad_norm": 0.25390625, "learning_rate": 0.0005807181936606066, "loss": 0.1729, "step": 67106 }, { "epoch": 0.11898803080553805, "grad_norm": 0.4609375, "learning_rate": 0.0005806695824385913, "loss": 0.1886, "step": 67108 }, { "epoch": 0.11899157697084786, "grad_norm": 0.828125, "learning_rate": 0.0005806209734878253, "loss": 0.2178, "step": 67110 }, { "epoch": 0.11899512313615768, "grad_norm": 0.376953125, "learning_rate": 0.0005805723668085217, "loss": 0.277, "step": 67112 }, { "epoch": 0.11899866930146749, "grad_norm": 1.4609375, "learning_rate": 0.000580523762400893, "loss": 0.1652, "step": 67114 }, { "epoch": 0.1190022154667773, "grad_norm": 0.63671875, "learning_rate": 0.0005804751602651515, "loss": 0.1622, "step": 67116 }, { "epoch": 0.11900576163208712, "grad_norm": 0.251953125, "learning_rate": 0.0005804265604015097, "loss": 0.1629, "step": 67118 }, { "epoch": 0.11900930779739693, "grad_norm": 0.5546875, "learning_rate": 0.0005803779628101805, "loss": 0.1893, "step": 67120 }, { "epoch": 0.11901285396270675, "grad_norm": 0.515625, "learning_rate": 0.0005803293674913763, "loss": 0.1578, "step": 67122 }, { "epoch": 0.11901640012801656, "grad_norm": 0.1806640625, "learning_rate": 0.0005802807744453096, "loss": 0.155, "step": 67124 }, { "epoch": 0.11901994629332638, "grad_norm": 0.98828125, "learning_rate": 0.0005802321836721929, "loss": 0.1721, "step": 67126 }, { "epoch": 0.1190234924586362, "grad_norm": 0.1708984375, "learning_rate": 0.0005801835951722384, "loss": 0.1417, "step": 67128 }, { "epoch": 0.11902703862394601, "grad_norm": 0.95703125, "learning_rate": 0.0005801350089456592, "loss": 0.3979, "step": 67130 }, { "epoch": 0.11903058478925582, "grad_norm": 0.318359375, "learning_rate": 0.0005800864249926675, "loss": 0.1542, "step": 67132 }, { "epoch": 0.11903413095456564, "grad_norm": 0.283203125, "learning_rate": 0.0005800378433134757, "loss": 0.1346, "step": 67134 }, { "epoch": 0.11903767711987547, "grad_norm": 1.453125, "learning_rate": 0.0005799892639082965, "loss": 0.2824, "step": 67136 }, { "epoch": 0.11904122328518528, "grad_norm": 0.4921875, "learning_rate": 0.000579940686777342, "loss": 0.1619, "step": 67138 }, { "epoch": 0.1190447694504951, "grad_norm": 1.671875, "learning_rate": 0.000579892111920825, "loss": 0.249, "step": 67140 }, { "epoch": 0.11904831561580491, "grad_norm": 0.671875, "learning_rate": 0.0005798435393389576, "loss": 0.1185, "step": 67142 }, { "epoch": 0.11905186178111472, "grad_norm": 0.2275390625, "learning_rate": 0.0005797949690319526, "loss": 0.1462, "step": 67144 }, { "epoch": 0.11905540794642454, "grad_norm": 0.28125, "learning_rate": 0.0005797464010000219, "loss": 0.1823, "step": 67146 }, { "epoch": 0.11905895411173435, "grad_norm": 0.482421875, "learning_rate": 0.0005796978352433788, "loss": 0.1632, "step": 67148 }, { "epoch": 0.11906250027704417, "grad_norm": 0.2470703125, "learning_rate": 0.0005796492717622347, "loss": 0.1502, "step": 67150 }, { "epoch": 0.11906604644235398, "grad_norm": 4.28125, "learning_rate": 0.0005796007105568026, "loss": 0.284, "step": 67152 }, { "epoch": 0.1190695926076638, "grad_norm": 0.9296875, "learning_rate": 0.0005795521516272948, "loss": 0.2874, "step": 67154 }, { "epoch": 0.11907313877297361, "grad_norm": 0.294921875, "learning_rate": 0.0005795035949739234, "loss": 0.2444, "step": 67156 }, { "epoch": 0.11907668493828343, "grad_norm": 0.8671875, "learning_rate": 0.0005794550405969012, "loss": 0.2214, "step": 67158 }, { "epoch": 0.11908023110359324, "grad_norm": 0.8125, "learning_rate": 0.0005794064884964399, "loss": 0.1688, "step": 67160 }, { "epoch": 0.11908377726890305, "grad_norm": 0.2294921875, "learning_rate": 0.0005793579386727523, "loss": 0.2235, "step": 67162 }, { "epoch": 0.11908732343421287, "grad_norm": 0.26953125, "learning_rate": 0.0005793093911260508, "loss": 0.2416, "step": 67164 }, { "epoch": 0.11909086959952268, "grad_norm": 0.953125, "learning_rate": 0.0005792608458565476, "loss": 0.2399, "step": 67166 }, { "epoch": 0.1190944157648325, "grad_norm": 1.0625, "learning_rate": 0.0005792123028644547, "loss": 0.172, "step": 67168 }, { "epoch": 0.11909796193014231, "grad_norm": 1.578125, "learning_rate": 0.0005791637621499849, "loss": 0.1908, "step": 67170 }, { "epoch": 0.11910150809545213, "grad_norm": 0.302734375, "learning_rate": 0.0005791152237133503, "loss": 0.1365, "step": 67172 }, { "epoch": 0.11910505426076194, "grad_norm": 1.4609375, "learning_rate": 0.0005790666875547632, "loss": 0.2252, "step": 67174 }, { "epoch": 0.11910860042607176, "grad_norm": 0.5390625, "learning_rate": 0.0005790181536744356, "loss": 0.1427, "step": 67176 }, { "epoch": 0.11911214659138157, "grad_norm": 0.75390625, "learning_rate": 0.00057896962207258, "loss": 0.2121, "step": 67178 }, { "epoch": 0.1191156927566914, "grad_norm": 0.53125, "learning_rate": 0.0005789210927494091, "loss": 0.2241, "step": 67180 }, { "epoch": 0.11911923892200121, "grad_norm": 0.828125, "learning_rate": 0.0005788725657051339, "loss": 0.2172, "step": 67182 }, { "epoch": 0.11912278508731103, "grad_norm": 0.2431640625, "learning_rate": 0.0005788240409399679, "loss": 0.1363, "step": 67184 }, { "epoch": 0.11912633125262084, "grad_norm": 1.1484375, "learning_rate": 0.0005787755184541223, "loss": 0.1914, "step": 67186 }, { "epoch": 0.11912987741793066, "grad_norm": 0.216796875, "learning_rate": 0.0005787269982478107, "loss": 0.1772, "step": 67188 }, { "epoch": 0.11913342358324047, "grad_norm": 1.3515625, "learning_rate": 0.0005786784803212436, "loss": 0.2133, "step": 67190 }, { "epoch": 0.11913696974855029, "grad_norm": 0.7421875, "learning_rate": 0.0005786299646746343, "loss": 0.171, "step": 67192 }, { "epoch": 0.1191405159138601, "grad_norm": 0.32421875, "learning_rate": 0.0005785814513081948, "loss": 0.1804, "step": 67194 }, { "epoch": 0.11914406207916992, "grad_norm": 0.404296875, "learning_rate": 0.0005785329402221371, "loss": 0.22, "step": 67196 }, { "epoch": 0.11914760824447973, "grad_norm": 0.44140625, "learning_rate": 0.0005784844314166735, "loss": 0.1557, "step": 67198 }, { "epoch": 0.11915115440978954, "grad_norm": 0.33984375, "learning_rate": 0.0005784359248920156, "loss": 0.1553, "step": 67200 }, { "epoch": 0.11915470057509936, "grad_norm": 1.6796875, "learning_rate": 0.0005783874206483765, "loss": 0.212, "step": 67202 }, { "epoch": 0.11915824674040917, "grad_norm": 0.4765625, "learning_rate": 0.0005783389186859676, "loss": 0.2101, "step": 67204 }, { "epoch": 0.11916179290571899, "grad_norm": 0.400390625, "learning_rate": 0.0005782904190050015, "loss": 0.193, "step": 67206 }, { "epoch": 0.1191653390710288, "grad_norm": 0.478515625, "learning_rate": 0.0005782419216056895, "loss": 0.1052, "step": 67208 }, { "epoch": 0.11916888523633862, "grad_norm": 0.6875, "learning_rate": 0.0005781934264882447, "loss": 0.2135, "step": 67210 }, { "epoch": 0.11917243140164843, "grad_norm": 0.40625, "learning_rate": 0.0005781449336528788, "loss": 0.227, "step": 67212 }, { "epoch": 0.11917597756695825, "grad_norm": 0.294921875, "learning_rate": 0.0005780964430998039, "loss": 0.2132, "step": 67214 }, { "epoch": 0.11917952373226806, "grad_norm": 0.400390625, "learning_rate": 0.0005780479548292318, "loss": 0.1759, "step": 67216 }, { "epoch": 0.11918306989757788, "grad_norm": 0.921875, "learning_rate": 0.0005779994688413745, "loss": 0.2458, "step": 67218 }, { "epoch": 0.11918661606288769, "grad_norm": 0.296875, "learning_rate": 0.0005779509851364446, "loss": 0.1336, "step": 67220 }, { "epoch": 0.1191901622281975, "grad_norm": 0.390625, "learning_rate": 0.000577902503714654, "loss": 0.2845, "step": 67222 }, { "epoch": 0.11919370839350732, "grad_norm": 0.37109375, "learning_rate": 0.0005778540245762145, "loss": 0.1774, "step": 67224 }, { "epoch": 0.11919725455881715, "grad_norm": 0.431640625, "learning_rate": 0.0005778055477213378, "loss": 0.1715, "step": 67226 }, { "epoch": 0.11920080072412696, "grad_norm": 0.74609375, "learning_rate": 0.0005777570731502368, "loss": 0.1814, "step": 67228 }, { "epoch": 0.11920434688943678, "grad_norm": 0.58984375, "learning_rate": 0.0005777086008631228, "loss": 0.1403, "step": 67230 }, { "epoch": 0.11920789305474659, "grad_norm": 0.33203125, "learning_rate": 0.0005776601308602081, "loss": 0.1692, "step": 67232 }, { "epoch": 0.1192114392200564, "grad_norm": 0.4453125, "learning_rate": 0.0005776116631417045, "loss": 0.16, "step": 67234 }, { "epoch": 0.11921498538536622, "grad_norm": 1.7578125, "learning_rate": 0.000577563197707824, "loss": 0.3517, "step": 67236 }, { "epoch": 0.11921853155067604, "grad_norm": 0.59375, "learning_rate": 0.000577514734558779, "loss": 0.1852, "step": 67238 }, { "epoch": 0.11922207771598585, "grad_norm": 0.341796875, "learning_rate": 0.0005774662736947805, "loss": 0.206, "step": 67240 }, { "epoch": 0.11922562388129566, "grad_norm": 0.181640625, "learning_rate": 0.0005774178151160412, "loss": 0.1945, "step": 67242 }, { "epoch": 0.11922917004660548, "grad_norm": 1.53125, "learning_rate": 0.0005773693588227725, "loss": 0.2427, "step": 67244 }, { "epoch": 0.1192327162119153, "grad_norm": 0.37890625, "learning_rate": 0.0005773209048151875, "loss": 0.1678, "step": 67246 }, { "epoch": 0.11923626237722511, "grad_norm": 0.17578125, "learning_rate": 0.0005772724530934964, "loss": 0.1321, "step": 67248 }, { "epoch": 0.11923980854253492, "grad_norm": 0.443359375, "learning_rate": 0.0005772240036579124, "loss": 0.2249, "step": 67250 }, { "epoch": 0.11924335470784474, "grad_norm": 0.66015625, "learning_rate": 0.0005771755565086469, "loss": 0.191, "step": 67252 }, { "epoch": 0.11924690087315455, "grad_norm": 0.326171875, "learning_rate": 0.0005771271116459118, "loss": 0.186, "step": 67254 }, { "epoch": 0.11925044703846437, "grad_norm": 0.3515625, "learning_rate": 0.000577078669069919, "loss": 0.198, "step": 67256 }, { "epoch": 0.11925399320377418, "grad_norm": 0.58984375, "learning_rate": 0.0005770302287808801, "loss": 0.2156, "step": 67258 }, { "epoch": 0.119257539369084, "grad_norm": 0.7109375, "learning_rate": 0.0005769817907790073, "loss": 0.2766, "step": 67260 }, { "epoch": 0.11926108553439381, "grad_norm": 2.203125, "learning_rate": 0.0005769333550645126, "loss": 0.3566, "step": 67262 }, { "epoch": 0.11926463169970362, "grad_norm": 0.58984375, "learning_rate": 0.0005768849216376075, "loss": 0.1645, "step": 67264 }, { "epoch": 0.11926817786501344, "grad_norm": 0.294921875, "learning_rate": 0.0005768364904985035, "loss": 0.1846, "step": 67266 }, { "epoch": 0.11927172403032325, "grad_norm": 0.61328125, "learning_rate": 0.0005767880616474133, "loss": 0.2243, "step": 67268 }, { "epoch": 0.11927527019563307, "grad_norm": 0.259765625, "learning_rate": 0.000576739635084548, "loss": 0.1747, "step": 67270 }, { "epoch": 0.1192788163609429, "grad_norm": 0.181640625, "learning_rate": 0.0005766912108101196, "loss": 0.1473, "step": 67272 }, { "epoch": 0.11928236252625271, "grad_norm": 0.201171875, "learning_rate": 0.00057664278882434, "loss": 0.1501, "step": 67274 }, { "epoch": 0.11928590869156253, "grad_norm": 0.33984375, "learning_rate": 0.0005765943691274205, "loss": 0.158, "step": 67276 }, { "epoch": 0.11928945485687234, "grad_norm": 0.62890625, "learning_rate": 0.0005765459517195738, "loss": 0.1839, "step": 67278 }, { "epoch": 0.11929300102218215, "grad_norm": 0.53125, "learning_rate": 0.0005764975366010103, "loss": 0.2443, "step": 67280 }, { "epoch": 0.11929654718749197, "grad_norm": 0.5078125, "learning_rate": 0.0005764491237719429, "loss": 0.2865, "step": 67282 }, { "epoch": 0.11930009335280178, "grad_norm": 0.447265625, "learning_rate": 0.0005764007132325827, "loss": 0.1487, "step": 67284 }, { "epoch": 0.1193036395181116, "grad_norm": 0.3359375, "learning_rate": 0.0005763523049831423, "loss": 0.1607, "step": 67286 }, { "epoch": 0.11930718568342141, "grad_norm": 0.515625, "learning_rate": 0.0005763038990238318, "loss": 0.168, "step": 67288 }, { "epoch": 0.11931073184873123, "grad_norm": 0.455078125, "learning_rate": 0.0005762554953548645, "loss": 0.1825, "step": 67290 }, { "epoch": 0.11931427801404104, "grad_norm": 0.37890625, "learning_rate": 0.0005762070939764512, "loss": 0.2398, "step": 67292 }, { "epoch": 0.11931782417935086, "grad_norm": 0.447265625, "learning_rate": 0.0005761586948888035, "loss": 0.1725, "step": 67294 }, { "epoch": 0.11932137034466067, "grad_norm": 0.310546875, "learning_rate": 0.0005761102980921342, "loss": 0.2231, "step": 67296 }, { "epoch": 0.11932491650997049, "grad_norm": 1.15625, "learning_rate": 0.0005760619035866534, "loss": 0.2043, "step": 67298 }, { "epoch": 0.1193284626752803, "grad_norm": 0.796875, "learning_rate": 0.000576013511372574, "loss": 0.1737, "step": 67300 }, { "epoch": 0.11933200884059011, "grad_norm": 0.0986328125, "learning_rate": 0.0005759651214501065, "loss": 0.1319, "step": 67302 }, { "epoch": 0.11933555500589993, "grad_norm": 0.27734375, "learning_rate": 0.000575916733819464, "loss": 0.2225, "step": 67304 }, { "epoch": 0.11933910117120974, "grad_norm": 0.380859375, "learning_rate": 0.0005758683484808566, "loss": 0.1408, "step": 67306 }, { "epoch": 0.11934264733651956, "grad_norm": 0.4375, "learning_rate": 0.0005758199654344969, "loss": 0.1817, "step": 67308 }, { "epoch": 0.11934619350182937, "grad_norm": 0.3125, "learning_rate": 0.0005757715846805962, "loss": 0.2486, "step": 67310 }, { "epoch": 0.11934973966713919, "grad_norm": 1.1484375, "learning_rate": 0.0005757232062193661, "loss": 0.2682, "step": 67312 }, { "epoch": 0.119353285832449, "grad_norm": 0.220703125, "learning_rate": 0.0005756748300510181, "loss": 0.1603, "step": 67314 }, { "epoch": 0.11935683199775883, "grad_norm": 0.1845703125, "learning_rate": 0.0005756264561757636, "loss": 0.3705, "step": 67316 }, { "epoch": 0.11936037816306865, "grad_norm": 0.31640625, "learning_rate": 0.0005755780845938147, "loss": 0.1358, "step": 67318 }, { "epoch": 0.11936392432837846, "grad_norm": 0.1796875, "learning_rate": 0.0005755297153053825, "loss": 0.2071, "step": 67320 }, { "epoch": 0.11936747049368827, "grad_norm": 1.046875, "learning_rate": 0.0005754813483106788, "loss": 0.2078, "step": 67322 }, { "epoch": 0.11937101665899809, "grad_norm": 0.40625, "learning_rate": 0.000575432983609915, "loss": 0.2763, "step": 67324 }, { "epoch": 0.1193745628243079, "grad_norm": 0.23828125, "learning_rate": 0.0005753846212033023, "loss": 0.1432, "step": 67326 }, { "epoch": 0.11937810898961772, "grad_norm": 1.0625, "learning_rate": 0.000575336261091053, "loss": 0.1904, "step": 67328 }, { "epoch": 0.11938165515492753, "grad_norm": 2.0625, "learning_rate": 0.0005752879032733778, "loss": 0.1486, "step": 67330 }, { "epoch": 0.11938520132023735, "grad_norm": 0.43359375, "learning_rate": 0.0005752395477504886, "loss": 0.154, "step": 67332 }, { "epoch": 0.11938874748554716, "grad_norm": 0.5390625, "learning_rate": 0.0005751911945225966, "loss": 0.1639, "step": 67334 }, { "epoch": 0.11939229365085698, "grad_norm": 0.298828125, "learning_rate": 0.0005751428435899142, "loss": 0.1872, "step": 67336 }, { "epoch": 0.11939583981616679, "grad_norm": 0.1767578125, "learning_rate": 0.0005750944949526513, "loss": 0.4226, "step": 67338 }, { "epoch": 0.1193993859814766, "grad_norm": 0.8359375, "learning_rate": 0.0005750461486110206, "loss": 0.2605, "step": 67340 }, { "epoch": 0.11940293214678642, "grad_norm": 1.375, "learning_rate": 0.0005749978045652331, "loss": 0.2303, "step": 67342 }, { "epoch": 0.11940647831209623, "grad_norm": 0.8515625, "learning_rate": 0.0005749494628155002, "loss": 0.2165, "step": 67344 }, { "epoch": 0.11941002447740605, "grad_norm": 0.84375, "learning_rate": 0.0005749011233620333, "loss": 0.1554, "step": 67346 }, { "epoch": 0.11941357064271586, "grad_norm": 0.439453125, "learning_rate": 0.0005748527862050437, "loss": 0.2293, "step": 67348 }, { "epoch": 0.11941711680802568, "grad_norm": 1.578125, "learning_rate": 0.0005748044513447432, "loss": 0.1952, "step": 67350 }, { "epoch": 0.11942066297333549, "grad_norm": 0.314453125, "learning_rate": 0.000574756118781343, "loss": 0.1844, "step": 67352 }, { "epoch": 0.11942420913864531, "grad_norm": 0.96875, "learning_rate": 0.0005747077885150545, "loss": 0.3502, "step": 67354 }, { "epoch": 0.11942775530395512, "grad_norm": 0.87109375, "learning_rate": 0.0005746594605460886, "loss": 0.2003, "step": 67356 }, { "epoch": 0.11943130146926494, "grad_norm": 2.078125, "learning_rate": 0.0005746111348746574, "loss": 0.3119, "step": 67358 }, { "epoch": 0.11943484763457475, "grad_norm": 0.248046875, "learning_rate": 0.000574562811500972, "loss": 0.135, "step": 67360 }, { "epoch": 0.11943839379988458, "grad_norm": 0.82421875, "learning_rate": 0.0005745144904252435, "loss": 0.3042, "step": 67362 }, { "epoch": 0.1194419399651944, "grad_norm": 0.283203125, "learning_rate": 0.0005744661716476834, "loss": 0.2051, "step": 67364 }, { "epoch": 0.11944548613050421, "grad_norm": 0.37109375, "learning_rate": 0.0005744178551685029, "loss": 0.1581, "step": 67366 }, { "epoch": 0.11944903229581402, "grad_norm": 0.33203125, "learning_rate": 0.0005743695409879138, "loss": 0.1582, "step": 67368 }, { "epoch": 0.11945257846112384, "grad_norm": 0.431640625, "learning_rate": 0.0005743212291061264, "loss": 0.1398, "step": 67370 }, { "epoch": 0.11945612462643365, "grad_norm": 0.439453125, "learning_rate": 0.0005742729195233529, "loss": 0.2357, "step": 67372 }, { "epoch": 0.11945967079174347, "grad_norm": 1.0078125, "learning_rate": 0.000574224612239804, "loss": 0.1467, "step": 67374 }, { "epoch": 0.11946321695705328, "grad_norm": 0.47265625, "learning_rate": 0.0005741763072556917, "loss": 0.1525, "step": 67376 }, { "epoch": 0.1194667631223631, "grad_norm": 0.244140625, "learning_rate": 0.0005741280045712267, "loss": 0.1917, "step": 67378 }, { "epoch": 0.11947030928767291, "grad_norm": 0.27734375, "learning_rate": 0.0005740797041866203, "loss": 0.1416, "step": 67380 }, { "epoch": 0.11947385545298272, "grad_norm": 1.3359375, "learning_rate": 0.0005740314061020838, "loss": 0.1464, "step": 67382 }, { "epoch": 0.11947740161829254, "grad_norm": 3.828125, "learning_rate": 0.000573983110317828, "loss": 0.2809, "step": 67384 }, { "epoch": 0.11948094778360235, "grad_norm": 1.1875, "learning_rate": 0.0005739348168340652, "loss": 0.1775, "step": 67386 }, { "epoch": 0.11948449394891217, "grad_norm": 0.1962890625, "learning_rate": 0.0005738865256510054, "loss": 0.1486, "step": 67388 }, { "epoch": 0.11948804011422198, "grad_norm": 0.76171875, "learning_rate": 0.0005738382367688607, "loss": 0.2127, "step": 67390 }, { "epoch": 0.1194915862795318, "grad_norm": 0.5078125, "learning_rate": 0.0005737899501878414, "loss": 0.2639, "step": 67392 }, { "epoch": 0.11949513244484161, "grad_norm": 0.326171875, "learning_rate": 0.00057374166590816, "loss": 0.2084, "step": 67394 }, { "epoch": 0.11949867861015143, "grad_norm": 0.359375, "learning_rate": 0.0005736933839300262, "loss": 0.2021, "step": 67396 }, { "epoch": 0.11950222477546124, "grad_norm": 0.375, "learning_rate": 0.0005736451042536522, "loss": 0.1684, "step": 67398 }, { "epoch": 0.11950577094077106, "grad_norm": 0.33203125, "learning_rate": 0.0005735968268792487, "loss": 0.1945, "step": 67400 }, { "epoch": 0.11950931710608087, "grad_norm": 0.4296875, "learning_rate": 0.0005735485518070267, "loss": 0.3794, "step": 67402 }, { "epoch": 0.11951286327139068, "grad_norm": 1.828125, "learning_rate": 0.0005735002790371978, "loss": 0.2371, "step": 67404 }, { "epoch": 0.1195164094367005, "grad_norm": 0.259765625, "learning_rate": 0.0005734520085699724, "loss": 0.1562, "step": 67406 }, { "epoch": 0.11951995560201033, "grad_norm": 0.796875, "learning_rate": 0.0005734037404055625, "loss": 0.2119, "step": 67408 }, { "epoch": 0.11952350176732014, "grad_norm": 0.66796875, "learning_rate": 0.0005733554745441787, "loss": 0.1745, "step": 67410 }, { "epoch": 0.11952704793262996, "grad_norm": 1.6953125, "learning_rate": 0.0005733072109860323, "loss": 0.3618, "step": 67412 }, { "epoch": 0.11953059409793977, "grad_norm": 1.2421875, "learning_rate": 0.0005732589497313336, "loss": 0.3485, "step": 67414 }, { "epoch": 0.11953414026324959, "grad_norm": 0.279296875, "learning_rate": 0.0005732106907802947, "loss": 0.1794, "step": 67416 }, { "epoch": 0.1195376864285594, "grad_norm": 2.484375, "learning_rate": 0.0005731624341331263, "loss": 0.2989, "step": 67418 }, { "epoch": 0.11954123259386921, "grad_norm": 0.44140625, "learning_rate": 0.0005731141797900394, "loss": 0.2048, "step": 67420 }, { "epoch": 0.11954477875917903, "grad_norm": 0.640625, "learning_rate": 0.0005730659277512451, "loss": 0.16, "step": 67422 }, { "epoch": 0.11954832492448884, "grad_norm": 0.53125, "learning_rate": 0.0005730176780169538, "loss": 0.1888, "step": 67424 }, { "epoch": 0.11955187108979866, "grad_norm": 48.75, "learning_rate": 0.0005729694305873779, "loss": 0.2662, "step": 67426 }, { "epoch": 0.11955541725510847, "grad_norm": 0.2373046875, "learning_rate": 0.0005729211854627269, "loss": 0.1751, "step": 67428 }, { "epoch": 0.11955896342041829, "grad_norm": 1.296875, "learning_rate": 0.0005728729426432126, "loss": 0.1694, "step": 67430 }, { "epoch": 0.1195625095857281, "grad_norm": 1.3515625, "learning_rate": 0.0005728247021290458, "loss": 0.5668, "step": 67432 }, { "epoch": 0.11956605575103792, "grad_norm": 0.478515625, "learning_rate": 0.0005727764639204383, "loss": 0.1969, "step": 67434 }, { "epoch": 0.11956960191634773, "grad_norm": 0.3828125, "learning_rate": 0.0005727282280175994, "loss": 0.142, "step": 67436 }, { "epoch": 0.11957314808165755, "grad_norm": 0.291015625, "learning_rate": 0.0005726799944207413, "loss": 0.1677, "step": 67438 }, { "epoch": 0.11957669424696736, "grad_norm": 0.38671875, "learning_rate": 0.0005726317631300745, "loss": 0.2098, "step": 67440 }, { "epoch": 0.11958024041227718, "grad_norm": 3.96875, "learning_rate": 0.0005725835341458101, "loss": 0.4608, "step": 67442 }, { "epoch": 0.11958378657758699, "grad_norm": 0.2197265625, "learning_rate": 0.0005725353074681591, "loss": 0.1561, "step": 67444 }, { "epoch": 0.1195873327428968, "grad_norm": 0.7421875, "learning_rate": 0.0005724870830973319, "loss": 0.2615, "step": 67446 }, { "epoch": 0.11959087890820662, "grad_norm": 0.34375, "learning_rate": 0.00057243886103354, "loss": 0.1766, "step": 67448 }, { "epoch": 0.11959442507351643, "grad_norm": 0.380859375, "learning_rate": 0.0005723906412769942, "loss": 0.2417, "step": 67450 }, { "epoch": 0.11959797123882625, "grad_norm": 0.74609375, "learning_rate": 0.0005723424238279052, "loss": 0.2038, "step": 67452 }, { "epoch": 0.11960151740413608, "grad_norm": 0.578125, "learning_rate": 0.0005722942086864836, "loss": 0.1588, "step": 67454 }, { "epoch": 0.11960506356944589, "grad_norm": 0.60546875, "learning_rate": 0.0005722459958529409, "loss": 0.137, "step": 67456 }, { "epoch": 0.1196086097347557, "grad_norm": 1.4375, "learning_rate": 0.0005721977853274878, "loss": 0.3368, "step": 67458 }, { "epoch": 0.11961215590006552, "grad_norm": 0.310546875, "learning_rate": 0.0005721495771103349, "loss": 0.2302, "step": 67460 }, { "epoch": 0.11961570206537533, "grad_norm": 0.2373046875, "learning_rate": 0.0005721013712016932, "loss": 0.1671, "step": 67462 }, { "epoch": 0.11961924823068515, "grad_norm": 0.1982421875, "learning_rate": 0.0005720531676017731, "loss": 0.1595, "step": 67464 }, { "epoch": 0.11962279439599496, "grad_norm": 0.53125, "learning_rate": 0.000572004966310786, "loss": 0.138, "step": 67466 }, { "epoch": 0.11962634056130478, "grad_norm": 0.21875, "learning_rate": 0.0005719567673289427, "loss": 0.2326, "step": 67468 }, { "epoch": 0.11962988672661459, "grad_norm": 1.1484375, "learning_rate": 0.0005719085706564535, "loss": 0.2881, "step": 67470 }, { "epoch": 0.11963343289192441, "grad_norm": 0.859375, "learning_rate": 0.0005718603762935293, "loss": 0.1546, "step": 67472 }, { "epoch": 0.11963697905723422, "grad_norm": 0.52734375, "learning_rate": 0.0005718121842403814, "loss": 0.2221, "step": 67474 }, { "epoch": 0.11964052522254404, "grad_norm": 0.404296875, "learning_rate": 0.0005717639944972201, "loss": 0.2301, "step": 67476 }, { "epoch": 0.11964407138785385, "grad_norm": 2.8125, "learning_rate": 0.0005717158070642563, "loss": 0.2731, "step": 67478 }, { "epoch": 0.11964761755316367, "grad_norm": 2.25, "learning_rate": 0.0005716676219417007, "loss": 0.1559, "step": 67480 }, { "epoch": 0.11965116371847348, "grad_norm": 1.03125, "learning_rate": 0.0005716194391297637, "loss": 0.2188, "step": 67482 }, { "epoch": 0.1196547098837833, "grad_norm": 0.2265625, "learning_rate": 0.000571571258628657, "loss": 0.1861, "step": 67484 }, { "epoch": 0.11965825604909311, "grad_norm": 0.7109375, "learning_rate": 0.00057152308043859, "loss": 0.1955, "step": 67486 }, { "epoch": 0.11966180221440292, "grad_norm": 0.2578125, "learning_rate": 0.0005714749045597745, "loss": 0.1863, "step": 67488 }, { "epoch": 0.11966534837971274, "grad_norm": 0.486328125, "learning_rate": 0.0005714267309924206, "loss": 0.1877, "step": 67490 }, { "epoch": 0.11966889454502255, "grad_norm": 0.4140625, "learning_rate": 0.0005713785597367392, "loss": 0.1264, "step": 67492 }, { "epoch": 0.11967244071033237, "grad_norm": 2.3125, "learning_rate": 0.0005713303907929411, "loss": 0.1895, "step": 67494 }, { "epoch": 0.11967598687564218, "grad_norm": 0.80859375, "learning_rate": 0.0005712822241612362, "loss": 0.1668, "step": 67496 }, { "epoch": 0.11967953304095201, "grad_norm": 0.2890625, "learning_rate": 0.000571234059841836, "loss": 0.1706, "step": 67498 }, { "epoch": 0.11968307920626182, "grad_norm": 0.83984375, "learning_rate": 0.0005711858978349511, "loss": 0.2578, "step": 67500 }, { "epoch": 0.11968662537157164, "grad_norm": 0.73046875, "learning_rate": 0.000571137738140792, "loss": 0.2214, "step": 67502 }, { "epoch": 0.11969017153688145, "grad_norm": 0.52734375, "learning_rate": 0.0005710895807595686, "loss": 0.2315, "step": 67504 }, { "epoch": 0.11969371770219127, "grad_norm": 3.03125, "learning_rate": 0.0005710414256914927, "loss": 0.3184, "step": 67506 }, { "epoch": 0.11969726386750108, "grad_norm": 0.283203125, "learning_rate": 0.0005709932729367744, "loss": 0.1795, "step": 67508 }, { "epoch": 0.1197008100328109, "grad_norm": 0.359375, "learning_rate": 0.0005709451224956239, "loss": 0.1609, "step": 67510 }, { "epoch": 0.11970435619812071, "grad_norm": 0.8125, "learning_rate": 0.0005708969743682525, "loss": 0.1291, "step": 67512 }, { "epoch": 0.11970790236343053, "grad_norm": 0.29296875, "learning_rate": 0.0005708488285548699, "loss": 0.1825, "step": 67514 }, { "epoch": 0.11971144852874034, "grad_norm": 0.271484375, "learning_rate": 0.0005708006850556877, "loss": 0.1845, "step": 67516 }, { "epoch": 0.11971499469405016, "grad_norm": 0.310546875, "learning_rate": 0.0005707525438709154, "loss": 0.1557, "step": 67518 }, { "epoch": 0.11971854085935997, "grad_norm": 0.2890625, "learning_rate": 0.0005707044050007644, "loss": 0.2386, "step": 67520 }, { "epoch": 0.11972208702466978, "grad_norm": 0.50390625, "learning_rate": 0.0005706562684454445, "loss": 0.152, "step": 67522 }, { "epoch": 0.1197256331899796, "grad_norm": 0.63671875, "learning_rate": 0.0005706081342051672, "loss": 0.2019, "step": 67524 }, { "epoch": 0.11972917935528941, "grad_norm": 0.3125, "learning_rate": 0.0005705600022801418, "loss": 0.1492, "step": 67526 }, { "epoch": 0.11973272552059923, "grad_norm": 0.203125, "learning_rate": 0.0005705118726705798, "loss": 0.1228, "step": 67528 }, { "epoch": 0.11973627168590904, "grad_norm": 0.255859375, "learning_rate": 0.0005704637453766914, "loss": 0.173, "step": 67530 }, { "epoch": 0.11973981785121886, "grad_norm": 0.60546875, "learning_rate": 0.0005704156203986867, "loss": 0.1922, "step": 67532 }, { "epoch": 0.11974336401652867, "grad_norm": 1.109375, "learning_rate": 0.0005703674977367765, "loss": 0.1831, "step": 67534 }, { "epoch": 0.11974691018183849, "grad_norm": 1.0, "learning_rate": 0.0005703193773911712, "loss": 0.1413, "step": 67536 }, { "epoch": 0.1197504563471483, "grad_norm": 0.91015625, "learning_rate": 0.0005702712593620813, "loss": 0.1833, "step": 67538 }, { "epoch": 0.11975400251245812, "grad_norm": 0.341796875, "learning_rate": 0.0005702231436497174, "loss": 0.1464, "step": 67540 }, { "epoch": 0.11975754867776793, "grad_norm": 0.68359375, "learning_rate": 0.0005701750302542897, "loss": 0.1667, "step": 67542 }, { "epoch": 0.11976109484307776, "grad_norm": 1.546875, "learning_rate": 0.0005701269191760081, "loss": 0.1694, "step": 67544 }, { "epoch": 0.11976464100838757, "grad_norm": 0.267578125, "learning_rate": 0.0005700788104150842, "loss": 0.1767, "step": 67546 }, { "epoch": 0.11976818717369739, "grad_norm": 0.5234375, "learning_rate": 0.0005700307039717276, "loss": 0.2706, "step": 67548 }, { "epoch": 0.1197717333390072, "grad_norm": 1.9140625, "learning_rate": 0.0005699825998461489, "loss": 0.2747, "step": 67550 }, { "epoch": 0.11977527950431702, "grad_norm": 0.40625, "learning_rate": 0.0005699344980385583, "loss": 0.1876, "step": 67552 }, { "epoch": 0.11977882566962683, "grad_norm": 0.298828125, "learning_rate": 0.0005698863985491662, "loss": 0.1682, "step": 67554 }, { "epoch": 0.11978237183493665, "grad_norm": 0.6875, "learning_rate": 0.0005698383013781834, "loss": 0.1203, "step": 67556 }, { "epoch": 0.11978591800024646, "grad_norm": 1.1484375, "learning_rate": 0.0005697902065258198, "loss": 0.2648, "step": 67558 }, { "epoch": 0.11978946416555628, "grad_norm": 0.2412109375, "learning_rate": 0.0005697421139922858, "loss": 0.1644, "step": 67560 }, { "epoch": 0.11979301033086609, "grad_norm": 0.2109375, "learning_rate": 0.0005696940237777915, "loss": 0.1392, "step": 67562 }, { "epoch": 0.1197965564961759, "grad_norm": 0.1884765625, "learning_rate": 0.0005696459358825479, "loss": 0.1804, "step": 67564 }, { "epoch": 0.11980010266148572, "grad_norm": 0.66015625, "learning_rate": 0.0005695978503067649, "loss": 0.1647, "step": 67566 }, { "epoch": 0.11980364882679553, "grad_norm": 0.4921875, "learning_rate": 0.0005695497670506527, "loss": 0.1403, "step": 67568 }, { "epoch": 0.11980719499210535, "grad_norm": 0.66015625, "learning_rate": 0.0005695016861144218, "loss": 0.1753, "step": 67570 }, { "epoch": 0.11981074115741516, "grad_norm": 0.70703125, "learning_rate": 0.0005694536074982821, "loss": 0.1496, "step": 67572 }, { "epoch": 0.11981428732272498, "grad_norm": 0.26953125, "learning_rate": 0.0005694055312024448, "loss": 0.1695, "step": 67574 }, { "epoch": 0.11981783348803479, "grad_norm": 0.9921875, "learning_rate": 0.0005693574572271187, "loss": 0.1686, "step": 67576 }, { "epoch": 0.1198213796533446, "grad_norm": 1.3984375, "learning_rate": 0.0005693093855725153, "loss": 0.2447, "step": 67578 }, { "epoch": 0.11982492581865442, "grad_norm": 0.73828125, "learning_rate": 0.0005692613162388438, "loss": 0.1471, "step": 67580 }, { "epoch": 0.11982847198396424, "grad_norm": 0.69921875, "learning_rate": 0.000569213249226316, "loss": 0.1983, "step": 67582 }, { "epoch": 0.11983201814927405, "grad_norm": 0.33203125, "learning_rate": 0.0005691651845351404, "loss": 0.1973, "step": 67584 }, { "epoch": 0.11983556431458386, "grad_norm": 0.341796875, "learning_rate": 0.0005691171221655282, "loss": 0.1798, "step": 67586 }, { "epoch": 0.11983911047989368, "grad_norm": 0.98046875, "learning_rate": 0.0005690690621176893, "loss": 0.2727, "step": 67588 }, { "epoch": 0.11984265664520351, "grad_norm": 0.2421875, "learning_rate": 0.000569021004391834, "loss": 0.1691, "step": 67590 }, { "epoch": 0.11984620281051332, "grad_norm": 1.9921875, "learning_rate": 0.0005689729489881723, "loss": 0.2227, "step": 67592 }, { "epoch": 0.11984974897582314, "grad_norm": 0.2578125, "learning_rate": 0.0005689248959069143, "loss": 0.1482, "step": 67594 }, { "epoch": 0.11985329514113295, "grad_norm": 0.60546875, "learning_rate": 0.0005688768451482705, "loss": 0.168, "step": 67596 }, { "epoch": 0.11985684130644277, "grad_norm": 0.55859375, "learning_rate": 0.0005688287967124509, "loss": 0.1879, "step": 67598 }, { "epoch": 0.11986038747175258, "grad_norm": 0.5625, "learning_rate": 0.0005687807505996656, "loss": 0.199, "step": 67600 }, { "epoch": 0.1198639336370624, "grad_norm": 0.33984375, "learning_rate": 0.0005687327068101245, "loss": 0.188, "step": 67602 }, { "epoch": 0.11986747980237221, "grad_norm": 0.2451171875, "learning_rate": 0.0005686846653440381, "loss": 0.21, "step": 67604 }, { "epoch": 0.11987102596768202, "grad_norm": 0.6015625, "learning_rate": 0.0005686366262016165, "loss": 0.1648, "step": 67606 }, { "epoch": 0.11987457213299184, "grad_norm": 0.5078125, "learning_rate": 0.0005685885893830696, "loss": 0.1458, "step": 67608 }, { "epoch": 0.11987811829830165, "grad_norm": 0.412109375, "learning_rate": 0.0005685405548886074, "loss": 0.2229, "step": 67610 }, { "epoch": 0.11988166446361147, "grad_norm": 0.373046875, "learning_rate": 0.00056849252271844, "loss": 0.1688, "step": 67612 }, { "epoch": 0.11988521062892128, "grad_norm": 0.240234375, "learning_rate": 0.0005684444928727779, "loss": 0.1992, "step": 67614 }, { "epoch": 0.1198887567942311, "grad_norm": 0.84765625, "learning_rate": 0.0005683964653518305, "loss": 0.2063, "step": 67616 }, { "epoch": 0.11989230295954091, "grad_norm": 0.7109375, "learning_rate": 0.0005683484401558084, "loss": 0.1998, "step": 67618 }, { "epoch": 0.11989584912485073, "grad_norm": 0.1923828125, "learning_rate": 0.000568300417284921, "loss": 0.2002, "step": 67620 }, { "epoch": 0.11989939529016054, "grad_norm": 0.78515625, "learning_rate": 0.0005682523967393795, "loss": 0.1511, "step": 67622 }, { "epoch": 0.11990294145547035, "grad_norm": 0.30859375, "learning_rate": 0.0005682043785193925, "loss": 0.1556, "step": 67624 }, { "epoch": 0.11990648762078017, "grad_norm": 0.890625, "learning_rate": 0.0005681563626251709, "loss": 0.1741, "step": 67626 }, { "epoch": 0.11991003378608998, "grad_norm": 0.765625, "learning_rate": 0.0005681083490569243, "loss": 0.149, "step": 67628 }, { "epoch": 0.1199135799513998, "grad_norm": 1.171875, "learning_rate": 0.0005680603378148627, "loss": 0.2136, "step": 67630 }, { "epoch": 0.11991712611670961, "grad_norm": 0.71875, "learning_rate": 0.0005680123288991967, "loss": 0.2253, "step": 67632 }, { "epoch": 0.11992067228201944, "grad_norm": 0.322265625, "learning_rate": 0.0005679643223101352, "loss": 0.2198, "step": 67634 }, { "epoch": 0.11992421844732926, "grad_norm": 1.8125, "learning_rate": 0.000567916318047889, "loss": 0.247, "step": 67636 }, { "epoch": 0.11992776461263907, "grad_norm": 0.298828125, "learning_rate": 0.0005678683161126674, "loss": 0.1666, "step": 67638 }, { "epoch": 0.11993131077794889, "grad_norm": 0.6484375, "learning_rate": 0.0005678203165046813, "loss": 0.3501, "step": 67640 }, { "epoch": 0.1199348569432587, "grad_norm": 0.53515625, "learning_rate": 0.0005677723192241396, "loss": 0.3719, "step": 67642 }, { "epoch": 0.11993840310856851, "grad_norm": 2.40625, "learning_rate": 0.0005677243242712528, "loss": 0.2587, "step": 67644 }, { "epoch": 0.11994194927387833, "grad_norm": 0.5625, "learning_rate": 0.0005676763316462306, "loss": 0.2078, "step": 67646 }, { "epoch": 0.11994549543918814, "grad_norm": 0.33984375, "learning_rate": 0.0005676283413492829, "loss": 0.1693, "step": 67648 }, { "epoch": 0.11994904160449796, "grad_norm": 0.298828125, "learning_rate": 0.0005675803533806196, "loss": 0.1467, "step": 67650 }, { "epoch": 0.11995258776980777, "grad_norm": 0.267578125, "learning_rate": 0.0005675323677404502, "loss": 0.1568, "step": 67652 }, { "epoch": 0.11995613393511759, "grad_norm": 0.9140625, "learning_rate": 0.0005674843844289854, "loss": 0.2649, "step": 67654 }, { "epoch": 0.1199596801004274, "grad_norm": 0.408203125, "learning_rate": 0.0005674364034464345, "loss": 0.1961, "step": 67656 }, { "epoch": 0.11996322626573722, "grad_norm": 0.5390625, "learning_rate": 0.0005673884247930074, "loss": 0.2035, "step": 67658 }, { "epoch": 0.11996677243104703, "grad_norm": 0.388671875, "learning_rate": 0.0005673404484689138, "loss": 0.2168, "step": 67660 }, { "epoch": 0.11997031859635685, "grad_norm": 0.375, "learning_rate": 0.0005672924744743637, "loss": 0.1649, "step": 67662 }, { "epoch": 0.11997386476166666, "grad_norm": 0.421875, "learning_rate": 0.0005672445028095669, "loss": 0.2036, "step": 67664 }, { "epoch": 0.11997741092697647, "grad_norm": 0.306640625, "learning_rate": 0.0005671965334747334, "loss": 0.1896, "step": 67666 }, { "epoch": 0.11998095709228629, "grad_norm": 0.51171875, "learning_rate": 0.0005671485664700728, "loss": 0.2604, "step": 67668 }, { "epoch": 0.1199845032575961, "grad_norm": 0.1923828125, "learning_rate": 0.0005671006017957943, "loss": 0.2059, "step": 67670 }, { "epoch": 0.11998804942290592, "grad_norm": 0.48828125, "learning_rate": 0.0005670526394521091, "loss": 0.2192, "step": 67672 }, { "epoch": 0.11999159558821573, "grad_norm": 0.95703125, "learning_rate": 0.0005670046794392252, "loss": 0.2124, "step": 67674 }, { "epoch": 0.11999514175352555, "grad_norm": 0.248046875, "learning_rate": 0.0005669567217573537, "loss": 0.1506, "step": 67676 }, { "epoch": 0.11999868791883536, "grad_norm": 0.59375, "learning_rate": 0.0005669087664067038, "loss": 0.2489, "step": 67678 }, { "epoch": 0.12000223408414519, "grad_norm": 0.546875, "learning_rate": 0.0005668608133874854, "loss": 0.2415, "step": 67680 }, { "epoch": 0.120005780249455, "grad_norm": 0.2099609375, "learning_rate": 0.000566812862699908, "loss": 0.1671, "step": 67682 }, { "epoch": 0.12000932641476482, "grad_norm": 0.474609375, "learning_rate": 0.0005667649143441813, "loss": 0.22, "step": 67684 }, { "epoch": 0.12001287258007463, "grad_norm": 0.5234375, "learning_rate": 0.0005667169683205153, "loss": 0.2359, "step": 67686 }, { "epoch": 0.12001641874538445, "grad_norm": 0.4296875, "learning_rate": 0.0005666690246291195, "loss": 0.1511, "step": 67688 }, { "epoch": 0.12001996491069426, "grad_norm": 0.67578125, "learning_rate": 0.0005666210832702037, "loss": 0.1596, "step": 67690 }, { "epoch": 0.12002351107600408, "grad_norm": 0.310546875, "learning_rate": 0.000566573144243977, "loss": 0.1962, "step": 67692 }, { "epoch": 0.12002705724131389, "grad_norm": 0.306640625, "learning_rate": 0.00056652520755065, "loss": 0.1775, "step": 67694 }, { "epoch": 0.1200306034066237, "grad_norm": 0.232421875, "learning_rate": 0.0005664772731904316, "loss": 0.1842, "step": 67696 }, { "epoch": 0.12003414957193352, "grad_norm": 0.984375, "learning_rate": 0.0005664293411635319, "loss": 0.1712, "step": 67698 }, { "epoch": 0.12003769573724334, "grad_norm": 0.26953125, "learning_rate": 0.0005663814114701602, "loss": 0.142, "step": 67700 }, { "epoch": 0.12004124190255315, "grad_norm": 0.248046875, "learning_rate": 0.0005663334841105261, "loss": 0.1579, "step": 67702 }, { "epoch": 0.12004478806786296, "grad_norm": 0.6953125, "learning_rate": 0.0005662855590848399, "loss": 0.3014, "step": 67704 }, { "epoch": 0.12004833423317278, "grad_norm": 0.8203125, "learning_rate": 0.0005662376363933099, "loss": 0.2816, "step": 67706 }, { "epoch": 0.1200518803984826, "grad_norm": 0.296875, "learning_rate": 0.0005661897160361469, "loss": 0.1621, "step": 67708 }, { "epoch": 0.12005542656379241, "grad_norm": 0.59375, "learning_rate": 0.0005661417980135597, "loss": 0.1792, "step": 67710 }, { "epoch": 0.12005897272910222, "grad_norm": 0.3828125, "learning_rate": 0.0005660938823257584, "loss": 0.1553, "step": 67712 }, { "epoch": 0.12006251889441204, "grad_norm": 0.369140625, "learning_rate": 0.0005660459689729524, "loss": 0.1551, "step": 67714 }, { "epoch": 0.12006606505972185, "grad_norm": 0.337890625, "learning_rate": 0.0005659980579553511, "loss": 0.2257, "step": 67716 }, { "epoch": 0.12006961122503167, "grad_norm": 1.1953125, "learning_rate": 0.0005659501492731642, "loss": 0.1854, "step": 67718 }, { "epoch": 0.12007315739034148, "grad_norm": 1.109375, "learning_rate": 0.0005659022429266008, "loss": 0.2366, "step": 67720 }, { "epoch": 0.1200767035556513, "grad_norm": 0.453125, "learning_rate": 0.0005658543389158712, "loss": 0.3181, "step": 67722 }, { "epoch": 0.12008024972096111, "grad_norm": 1.921875, "learning_rate": 0.0005658064372411841, "loss": 0.2891, "step": 67724 }, { "epoch": 0.12008379588627094, "grad_norm": 2.984375, "learning_rate": 0.0005657585379027494, "loss": 0.3348, "step": 67726 }, { "epoch": 0.12008734205158075, "grad_norm": 0.310546875, "learning_rate": 0.0005657106409007764, "loss": 0.1592, "step": 67728 }, { "epoch": 0.12009088821689057, "grad_norm": 0.302734375, "learning_rate": 0.0005656627462354753, "loss": 0.1926, "step": 67730 }, { "epoch": 0.12009443438220038, "grad_norm": 0.6015625, "learning_rate": 0.0005656148539070542, "loss": 0.1765, "step": 67732 }, { "epoch": 0.1200979805475102, "grad_norm": 0.2138671875, "learning_rate": 0.0005655669639157239, "loss": 0.1897, "step": 67734 }, { "epoch": 0.12010152671282001, "grad_norm": 0.5546875, "learning_rate": 0.000565519076261693, "loss": 0.1203, "step": 67736 }, { "epoch": 0.12010507287812983, "grad_norm": 0.35546875, "learning_rate": 0.0005654711909451714, "loss": 0.1707, "step": 67738 }, { "epoch": 0.12010861904343964, "grad_norm": 0.50390625, "learning_rate": 0.0005654233079663683, "loss": 0.1573, "step": 67740 }, { "epoch": 0.12011216520874946, "grad_norm": 2.390625, "learning_rate": 0.0005653754273254927, "loss": 0.319, "step": 67742 }, { "epoch": 0.12011571137405927, "grad_norm": 0.2109375, "learning_rate": 0.0005653275490227549, "loss": 0.1667, "step": 67744 }, { "epoch": 0.12011925753936908, "grad_norm": 0.51953125, "learning_rate": 0.0005652796730583637, "loss": 0.1613, "step": 67746 }, { "epoch": 0.1201228037046789, "grad_norm": 4.0, "learning_rate": 0.0005652317994325287, "loss": 0.1873, "step": 67748 }, { "epoch": 0.12012634986998871, "grad_norm": 0.28125, "learning_rate": 0.0005651839281454587, "loss": 0.2279, "step": 67750 }, { "epoch": 0.12012989603529853, "grad_norm": 0.4296875, "learning_rate": 0.000565136059197364, "loss": 0.1994, "step": 67752 }, { "epoch": 0.12013344220060834, "grad_norm": 0.279296875, "learning_rate": 0.0005650881925884535, "loss": 0.1927, "step": 67754 }, { "epoch": 0.12013698836591816, "grad_norm": 0.671875, "learning_rate": 0.0005650403283189362, "loss": 0.2251, "step": 67756 }, { "epoch": 0.12014053453122797, "grad_norm": 0.181640625, "learning_rate": 0.0005649924663890223, "loss": 0.1619, "step": 67758 }, { "epoch": 0.12014408069653779, "grad_norm": 0.5859375, "learning_rate": 0.0005649446067989199, "loss": 0.1702, "step": 67760 }, { "epoch": 0.1201476268618476, "grad_norm": 0.337890625, "learning_rate": 0.0005648967495488397, "loss": 0.1461, "step": 67762 }, { "epoch": 0.12015117302715742, "grad_norm": 0.37109375, "learning_rate": 0.0005648488946389898, "loss": 0.1649, "step": 67764 }, { "epoch": 0.12015471919246723, "grad_norm": 0.55859375, "learning_rate": 0.0005648010420695802, "loss": 0.186, "step": 67766 }, { "epoch": 0.12015826535777704, "grad_norm": 0.953125, "learning_rate": 0.0005647531918408196, "loss": 0.1701, "step": 67768 }, { "epoch": 0.12016181152308687, "grad_norm": 0.287109375, "learning_rate": 0.0005647053439529182, "loss": 0.2068, "step": 67770 }, { "epoch": 0.12016535768839669, "grad_norm": 0.53515625, "learning_rate": 0.0005646574984060842, "loss": 0.1508, "step": 67772 }, { "epoch": 0.1201689038537065, "grad_norm": 0.376953125, "learning_rate": 0.0005646096552005275, "loss": 0.1638, "step": 67774 }, { "epoch": 0.12017245001901632, "grad_norm": 0.734375, "learning_rate": 0.0005645618143364575, "loss": 0.1509, "step": 67776 }, { "epoch": 0.12017599618432613, "grad_norm": 0.2890625, "learning_rate": 0.000564513975814083, "loss": 0.1719, "step": 67778 }, { "epoch": 0.12017954234963595, "grad_norm": 0.357421875, "learning_rate": 0.0005644661396336131, "loss": 0.1716, "step": 67780 }, { "epoch": 0.12018308851494576, "grad_norm": 1.4765625, "learning_rate": 0.000564418305795257, "loss": 0.1775, "step": 67782 }, { "epoch": 0.12018663468025557, "grad_norm": 0.369140625, "learning_rate": 0.0005643704742992244, "loss": 0.2276, "step": 67784 }, { "epoch": 0.12019018084556539, "grad_norm": 0.87109375, "learning_rate": 0.0005643226451457244, "loss": 0.2096, "step": 67786 }, { "epoch": 0.1201937270108752, "grad_norm": 0.1611328125, "learning_rate": 0.0005642748183349658, "loss": 0.1466, "step": 67788 }, { "epoch": 0.12019727317618502, "grad_norm": 0.48046875, "learning_rate": 0.0005642269938671578, "loss": 0.13, "step": 67790 }, { "epoch": 0.12020081934149483, "grad_norm": 0.298828125, "learning_rate": 0.0005641791717425099, "loss": 0.215, "step": 67792 }, { "epoch": 0.12020436550680465, "grad_norm": 0.1728515625, "learning_rate": 0.0005641313519612312, "loss": 0.2144, "step": 67794 }, { "epoch": 0.12020791167211446, "grad_norm": 0.357421875, "learning_rate": 0.0005640835345235304, "loss": 0.2243, "step": 67796 }, { "epoch": 0.12021145783742428, "grad_norm": 0.259765625, "learning_rate": 0.0005640357194296173, "loss": 0.1691, "step": 67798 }, { "epoch": 0.12021500400273409, "grad_norm": 1.0625, "learning_rate": 0.0005639879066797001, "loss": 0.187, "step": 67800 }, { "epoch": 0.1202185501680439, "grad_norm": 0.1669921875, "learning_rate": 0.0005639400962739888, "loss": 0.1446, "step": 67802 }, { "epoch": 0.12022209633335372, "grad_norm": 0.365234375, "learning_rate": 0.0005638922882126922, "loss": 0.2111, "step": 67804 }, { "epoch": 0.12022564249866353, "grad_norm": 1.09375, "learning_rate": 0.0005638444824960191, "loss": 0.2758, "step": 67806 }, { "epoch": 0.12022918866397335, "grad_norm": 0.42578125, "learning_rate": 0.0005637966791241789, "loss": 0.2436, "step": 67808 }, { "epoch": 0.12023273482928316, "grad_norm": 0.8046875, "learning_rate": 0.0005637488780973805, "loss": 0.1782, "step": 67810 }, { "epoch": 0.12023628099459298, "grad_norm": 0.73828125, "learning_rate": 0.0005637010794158332, "loss": 0.2232, "step": 67812 }, { "epoch": 0.12023982715990279, "grad_norm": 0.283203125, "learning_rate": 0.0005636532830797457, "loss": 0.2188, "step": 67814 }, { "epoch": 0.12024337332521262, "grad_norm": 0.40234375, "learning_rate": 0.0005636054890893274, "loss": 0.1866, "step": 67816 }, { "epoch": 0.12024691949052244, "grad_norm": 0.2734375, "learning_rate": 0.0005635576974447867, "loss": 0.1514, "step": 67818 }, { "epoch": 0.12025046565583225, "grad_norm": 0.330078125, "learning_rate": 0.0005635099081463338, "loss": 0.1626, "step": 67820 }, { "epoch": 0.12025401182114206, "grad_norm": 0.466796875, "learning_rate": 0.000563462121194176, "loss": 0.1976, "step": 67822 }, { "epoch": 0.12025755798645188, "grad_norm": 0.45703125, "learning_rate": 0.0005634143365885237, "loss": 0.1668, "step": 67824 }, { "epoch": 0.1202611041517617, "grad_norm": 0.3515625, "learning_rate": 0.0005633665543295851, "loss": 0.1905, "step": 67826 }, { "epoch": 0.12026465031707151, "grad_norm": 0.4765625, "learning_rate": 0.00056331877441757, "loss": 0.1961, "step": 67828 }, { "epoch": 0.12026819648238132, "grad_norm": 0.486328125, "learning_rate": 0.0005632709968526864, "loss": 0.1534, "step": 67830 }, { "epoch": 0.12027174264769114, "grad_norm": 1.3046875, "learning_rate": 0.0005632232216351439, "loss": 0.2328, "step": 67832 }, { "epoch": 0.12027528881300095, "grad_norm": 0.25390625, "learning_rate": 0.0005631754487651513, "loss": 0.162, "step": 67834 }, { "epoch": 0.12027883497831077, "grad_norm": 0.609375, "learning_rate": 0.0005631276782429173, "loss": 0.2641, "step": 67836 }, { "epoch": 0.12028238114362058, "grad_norm": 0.55859375, "learning_rate": 0.0005630799100686513, "loss": 0.2025, "step": 67838 }, { "epoch": 0.1202859273089304, "grad_norm": 0.453125, "learning_rate": 0.0005630321442425612, "loss": 0.1862, "step": 67840 }, { "epoch": 0.12028947347424021, "grad_norm": 0.376953125, "learning_rate": 0.0005629843807648571, "loss": 0.1891, "step": 67842 }, { "epoch": 0.12029301963955003, "grad_norm": 0.453125, "learning_rate": 0.0005629366196357472, "loss": 0.182, "step": 67844 }, { "epoch": 0.12029656580485984, "grad_norm": 0.51953125, "learning_rate": 0.000562888860855441, "loss": 0.2661, "step": 67846 }, { "epoch": 0.12030011197016965, "grad_norm": 1.140625, "learning_rate": 0.0005628411044241465, "loss": 0.2781, "step": 67848 }, { "epoch": 0.12030365813547947, "grad_norm": 0.18359375, "learning_rate": 0.0005627933503420728, "loss": 0.1649, "step": 67850 }, { "epoch": 0.12030720430078928, "grad_norm": 0.6484375, "learning_rate": 0.0005627455986094296, "loss": 0.1748, "step": 67852 }, { "epoch": 0.1203107504660991, "grad_norm": 0.212890625, "learning_rate": 0.0005626978492264246, "loss": 0.1645, "step": 67854 }, { "epoch": 0.12031429663140891, "grad_norm": 0.47265625, "learning_rate": 0.0005626501021932671, "loss": 0.1734, "step": 67856 }, { "epoch": 0.12031784279671873, "grad_norm": 0.384765625, "learning_rate": 0.0005626023575101658, "loss": 0.259, "step": 67858 }, { "epoch": 0.12032138896202854, "grad_norm": 0.373046875, "learning_rate": 0.0005625546151773302, "loss": 0.2051, "step": 67860 }, { "epoch": 0.12032493512733837, "grad_norm": 2.703125, "learning_rate": 0.000562506875194968, "loss": 0.3361, "step": 67862 }, { "epoch": 0.12032848129264818, "grad_norm": 0.302734375, "learning_rate": 0.0005624591375632888, "loss": 0.1214, "step": 67864 }, { "epoch": 0.120332027457958, "grad_norm": 0.2490234375, "learning_rate": 0.000562411402282501, "loss": 0.1408, "step": 67866 }, { "epoch": 0.12033557362326781, "grad_norm": 0.1748046875, "learning_rate": 0.0005623636693528136, "loss": 0.1368, "step": 67868 }, { "epoch": 0.12033911978857763, "grad_norm": 0.2490234375, "learning_rate": 0.000562315938774435, "loss": 0.1853, "step": 67870 }, { "epoch": 0.12034266595388744, "grad_norm": 0.8125, "learning_rate": 0.000562268210547574, "loss": 0.198, "step": 67872 }, { "epoch": 0.12034621211919726, "grad_norm": 0.6953125, "learning_rate": 0.0005622204846724398, "loss": 0.1795, "step": 67874 }, { "epoch": 0.12034975828450707, "grad_norm": 0.96875, "learning_rate": 0.0005621727611492409, "loss": 0.2011, "step": 67876 }, { "epoch": 0.12035330444981689, "grad_norm": 0.375, "learning_rate": 0.0005621250399781857, "loss": 0.1704, "step": 67878 }, { "epoch": 0.1203568506151267, "grad_norm": 0.427734375, "learning_rate": 0.0005620773211594829, "loss": 0.1447, "step": 67880 }, { "epoch": 0.12036039678043652, "grad_norm": 0.4453125, "learning_rate": 0.0005620296046933419, "loss": 0.1943, "step": 67882 }, { "epoch": 0.12036394294574633, "grad_norm": 0.328125, "learning_rate": 0.0005619818905799707, "loss": 0.2199, "step": 67884 }, { "epoch": 0.12036748911105614, "grad_norm": 0.734375, "learning_rate": 0.0005619341788195784, "loss": 0.2434, "step": 67886 }, { "epoch": 0.12037103527636596, "grad_norm": 0.32421875, "learning_rate": 0.0005618864694123733, "loss": 0.1499, "step": 67888 }, { "epoch": 0.12037458144167577, "grad_norm": 1.203125, "learning_rate": 0.0005618387623585641, "loss": 0.2985, "step": 67890 }, { "epoch": 0.12037812760698559, "grad_norm": 0.2431640625, "learning_rate": 0.0005617910576583597, "loss": 0.1497, "step": 67892 }, { "epoch": 0.1203816737722954, "grad_norm": 0.373046875, "learning_rate": 0.0005617433553119686, "loss": 0.1922, "step": 67894 }, { "epoch": 0.12038521993760522, "grad_norm": 0.77734375, "learning_rate": 0.0005616956553195994, "loss": 0.1883, "step": 67896 }, { "epoch": 0.12038876610291503, "grad_norm": 0.484375, "learning_rate": 0.0005616479576814605, "loss": 0.2062, "step": 67898 }, { "epoch": 0.12039231226822485, "grad_norm": 0.466796875, "learning_rate": 0.000561600262397761, "loss": 0.2067, "step": 67900 }, { "epoch": 0.12039585843353466, "grad_norm": 0.67578125, "learning_rate": 0.0005615525694687092, "loss": 0.1805, "step": 67902 }, { "epoch": 0.12039940459884448, "grad_norm": 0.2333984375, "learning_rate": 0.0005615048788945136, "loss": 0.2145, "step": 67904 }, { "epoch": 0.1204029507641543, "grad_norm": 0.169921875, "learning_rate": 0.0005614571906753829, "loss": 0.1812, "step": 67906 }, { "epoch": 0.12040649692946412, "grad_norm": 0.796875, "learning_rate": 0.0005614095048115255, "loss": 0.1436, "step": 67908 }, { "epoch": 0.12041004309477393, "grad_norm": 0.46875, "learning_rate": 0.0005613618213031504, "loss": 0.1802, "step": 67910 }, { "epoch": 0.12041358926008375, "grad_norm": 0.5234375, "learning_rate": 0.0005613141401504654, "loss": 0.4139, "step": 67912 }, { "epoch": 0.12041713542539356, "grad_norm": 0.52734375, "learning_rate": 0.0005612664613536798, "loss": 0.1953, "step": 67914 }, { "epoch": 0.12042068159070338, "grad_norm": 0.1748046875, "learning_rate": 0.0005612187849130013, "loss": 0.1717, "step": 67916 }, { "epoch": 0.12042422775601319, "grad_norm": 0.3203125, "learning_rate": 0.0005611711108286395, "loss": 0.1673, "step": 67918 }, { "epoch": 0.120427773921323, "grad_norm": 0.53125, "learning_rate": 0.0005611234391008018, "loss": 0.1635, "step": 67920 }, { "epoch": 0.12043132008663282, "grad_norm": 0.31640625, "learning_rate": 0.0005610757697296973, "loss": 0.3438, "step": 67922 }, { "epoch": 0.12043486625194263, "grad_norm": 1.421875, "learning_rate": 0.0005610281027155344, "loss": 0.2245, "step": 67924 }, { "epoch": 0.12043841241725245, "grad_norm": 3.46875, "learning_rate": 0.0005609804380585215, "loss": 0.2515, "step": 67926 }, { "epoch": 0.12044195858256226, "grad_norm": 0.5390625, "learning_rate": 0.0005609327757588672, "loss": 0.1474, "step": 67928 }, { "epoch": 0.12044550474787208, "grad_norm": 0.123046875, "learning_rate": 0.0005608851158167794, "loss": 0.1981, "step": 67930 }, { "epoch": 0.1204490509131819, "grad_norm": 0.318359375, "learning_rate": 0.0005608374582324672, "loss": 0.1889, "step": 67932 }, { "epoch": 0.12045259707849171, "grad_norm": 0.2265625, "learning_rate": 0.0005607898030061387, "loss": 0.1544, "step": 67934 }, { "epoch": 0.12045614324380152, "grad_norm": 0.29296875, "learning_rate": 0.0005607421501380025, "loss": 0.1613, "step": 67936 }, { "epoch": 0.12045968940911134, "grad_norm": 0.48046875, "learning_rate": 0.0005606944996282666, "loss": 0.1604, "step": 67938 }, { "epoch": 0.12046323557442115, "grad_norm": 0.984375, "learning_rate": 0.0005606468514771401, "loss": 0.1834, "step": 67940 }, { "epoch": 0.12046678173973097, "grad_norm": 0.94140625, "learning_rate": 0.0005605992056848308, "loss": 0.2121, "step": 67942 }, { "epoch": 0.12047032790504078, "grad_norm": 0.283203125, "learning_rate": 0.0005605515622515474, "loss": 0.1564, "step": 67944 }, { "epoch": 0.1204738740703506, "grad_norm": 0.50390625, "learning_rate": 0.0005605039211774981, "loss": 0.2058, "step": 67946 }, { "epoch": 0.12047742023566041, "grad_norm": 0.55078125, "learning_rate": 0.0005604562824628908, "loss": 0.3486, "step": 67948 }, { "epoch": 0.12048096640097022, "grad_norm": 0.482421875, "learning_rate": 0.000560408646107935, "loss": 0.1935, "step": 67950 }, { "epoch": 0.12048451256628005, "grad_norm": 0.29296875, "learning_rate": 0.0005603610121128379, "loss": 0.1588, "step": 67952 }, { "epoch": 0.12048805873158987, "grad_norm": 0.203125, "learning_rate": 0.0005603133804778084, "loss": 0.1276, "step": 67954 }, { "epoch": 0.12049160489689968, "grad_norm": 0.248046875, "learning_rate": 0.0005602657512030543, "loss": 0.1413, "step": 67956 }, { "epoch": 0.1204951510622095, "grad_norm": 0.296875, "learning_rate": 0.0005602181242887851, "loss": 0.1769, "step": 67958 }, { "epoch": 0.12049869722751931, "grad_norm": 0.353515625, "learning_rate": 0.0005601704997352078, "loss": 0.1662, "step": 67960 }, { "epoch": 0.12050224339282913, "grad_norm": 1.140625, "learning_rate": 0.0005601228775425311, "loss": 0.203, "step": 67962 }, { "epoch": 0.12050578955813894, "grad_norm": 0.474609375, "learning_rate": 0.0005600752577109636, "loss": 0.1619, "step": 67964 }, { "epoch": 0.12050933572344875, "grad_norm": 0.29296875, "learning_rate": 0.0005600276402407129, "loss": 0.174, "step": 67966 }, { "epoch": 0.12051288188875857, "grad_norm": 0.369140625, "learning_rate": 0.0005599800251319882, "loss": 0.1556, "step": 67968 }, { "epoch": 0.12051642805406838, "grad_norm": 0.55078125, "learning_rate": 0.0005599324123849967, "loss": 0.1446, "step": 67970 }, { "epoch": 0.1205199742193782, "grad_norm": 0.42578125, "learning_rate": 0.0005598848019999474, "loss": 0.2324, "step": 67972 }, { "epoch": 0.12052352038468801, "grad_norm": 0.8671875, "learning_rate": 0.0005598371939770478, "loss": 0.2534, "step": 67974 }, { "epoch": 0.12052706654999783, "grad_norm": 0.3125, "learning_rate": 0.0005597895883165073, "loss": 0.2029, "step": 67976 }, { "epoch": 0.12053061271530764, "grad_norm": 0.23828125, "learning_rate": 0.0005597419850185327, "loss": 0.1976, "step": 67978 }, { "epoch": 0.12053415888061746, "grad_norm": 0.9375, "learning_rate": 0.0005596943840833331, "loss": 0.2207, "step": 67980 }, { "epoch": 0.12053770504592727, "grad_norm": 0.65234375, "learning_rate": 0.0005596467855111165, "loss": 0.1733, "step": 67982 }, { "epoch": 0.12054125121123709, "grad_norm": 0.9296875, "learning_rate": 0.0005595991893020909, "loss": 0.2207, "step": 67984 }, { "epoch": 0.1205447973765469, "grad_norm": 1.1015625, "learning_rate": 0.0005595515954564648, "loss": 0.2553, "step": 67986 }, { "epoch": 0.12054834354185671, "grad_norm": 0.224609375, "learning_rate": 0.0005595040039744455, "loss": 0.1338, "step": 67988 }, { "epoch": 0.12055188970716653, "grad_norm": 1.328125, "learning_rate": 0.000559456414856242, "loss": 0.2024, "step": 67990 }, { "epoch": 0.12055543587247634, "grad_norm": 0.3984375, "learning_rate": 0.0005594088281020622, "loss": 0.1424, "step": 67992 }, { "epoch": 0.12055898203778616, "grad_norm": 0.37890625, "learning_rate": 0.0005593612437121143, "loss": 0.133, "step": 67994 }, { "epoch": 0.12056252820309597, "grad_norm": 0.361328125, "learning_rate": 0.0005593136616866059, "loss": 0.2002, "step": 67996 }, { "epoch": 0.1205660743684058, "grad_norm": 0.5078125, "learning_rate": 0.0005592660820257458, "loss": 0.188, "step": 67998 }, { "epoch": 0.12056962053371562, "grad_norm": 0.23828125, "learning_rate": 0.0005592185047297418, "loss": 0.1393, "step": 68000 }, { "epoch": 0.12057316669902543, "grad_norm": 0.26171875, "learning_rate": 0.0005591709297988019, "loss": 0.1537, "step": 68002 }, { "epoch": 0.12057671286433524, "grad_norm": 0.1826171875, "learning_rate": 0.0005591233572331343, "loss": 0.1387, "step": 68004 }, { "epoch": 0.12058025902964506, "grad_norm": 0.375, "learning_rate": 0.0005590757870329463, "loss": 0.103, "step": 68006 }, { "epoch": 0.12058380519495487, "grad_norm": 0.3671875, "learning_rate": 0.0005590282191984476, "loss": 0.1825, "step": 68008 }, { "epoch": 0.12058735136026469, "grad_norm": 0.376953125, "learning_rate": 0.0005589806537298444, "loss": 0.1887, "step": 68010 }, { "epoch": 0.1205908975255745, "grad_norm": 0.435546875, "learning_rate": 0.000558933090627346, "loss": 0.1671, "step": 68012 }, { "epoch": 0.12059444369088432, "grad_norm": 0.369140625, "learning_rate": 0.0005588855298911596, "loss": 0.216, "step": 68014 }, { "epoch": 0.12059798985619413, "grad_norm": 0.5078125, "learning_rate": 0.0005588379715214943, "loss": 0.1526, "step": 68016 }, { "epoch": 0.12060153602150395, "grad_norm": 0.5859375, "learning_rate": 0.0005587904155185566, "loss": 0.173, "step": 68018 }, { "epoch": 0.12060508218681376, "grad_norm": 0.55859375, "learning_rate": 0.0005587428618825557, "loss": 0.2009, "step": 68020 }, { "epoch": 0.12060862835212358, "grad_norm": 0.703125, "learning_rate": 0.0005586953106136989, "loss": 0.4172, "step": 68022 }, { "epoch": 0.12061217451743339, "grad_norm": 0.2353515625, "learning_rate": 0.0005586477617121945, "loss": 0.1582, "step": 68024 }, { "epoch": 0.1206157206827432, "grad_norm": 0.44140625, "learning_rate": 0.0005586002151782506, "loss": 0.1897, "step": 68026 }, { "epoch": 0.12061926684805302, "grad_norm": 0.39453125, "learning_rate": 0.0005585526710120742, "loss": 0.1417, "step": 68028 }, { "epoch": 0.12062281301336283, "grad_norm": 0.62109375, "learning_rate": 0.0005585051292138743, "loss": 0.1918, "step": 68030 }, { "epoch": 0.12062635917867265, "grad_norm": 0.97265625, "learning_rate": 0.0005584575897838584, "loss": 0.1432, "step": 68032 }, { "epoch": 0.12062990534398246, "grad_norm": 0.439453125, "learning_rate": 0.0005584100527222346, "loss": 0.1531, "step": 68034 }, { "epoch": 0.12063345150929228, "grad_norm": 0.33984375, "learning_rate": 0.0005583625180292105, "loss": 0.1549, "step": 68036 }, { "epoch": 0.12063699767460209, "grad_norm": 0.8125, "learning_rate": 0.0005583149857049939, "loss": 0.1762, "step": 68038 }, { "epoch": 0.1206405438399119, "grad_norm": 0.357421875, "learning_rate": 0.0005582674557497936, "loss": 0.1587, "step": 68040 }, { "epoch": 0.12064409000522174, "grad_norm": 0.349609375, "learning_rate": 0.0005582199281638159, "loss": 0.1784, "step": 68042 }, { "epoch": 0.12064763617053155, "grad_norm": 0.27734375, "learning_rate": 0.00055817240294727, "loss": 0.149, "step": 68044 }, { "epoch": 0.12065118233584136, "grad_norm": 0.22265625, "learning_rate": 0.0005581248801003629, "loss": 0.1627, "step": 68046 }, { "epoch": 0.12065472850115118, "grad_norm": 0.6015625, "learning_rate": 0.0005580773596233032, "loss": 0.1614, "step": 68048 }, { "epoch": 0.120658274666461, "grad_norm": 0.470703125, "learning_rate": 0.0005580298415162983, "loss": 0.3847, "step": 68050 }, { "epoch": 0.12066182083177081, "grad_norm": 0.59765625, "learning_rate": 0.0005579823257795562, "loss": 0.1784, "step": 68052 }, { "epoch": 0.12066536699708062, "grad_norm": 0.220703125, "learning_rate": 0.0005579348124132844, "loss": 0.1476, "step": 68054 }, { "epoch": 0.12066891316239044, "grad_norm": 0.2392578125, "learning_rate": 0.0005578873014176905, "loss": 0.1413, "step": 68056 }, { "epoch": 0.12067245932770025, "grad_norm": 0.330078125, "learning_rate": 0.0005578397927929835, "loss": 0.1859, "step": 68058 }, { "epoch": 0.12067600549301007, "grad_norm": 0.36328125, "learning_rate": 0.0005577922865393695, "loss": 0.4091, "step": 68060 }, { "epoch": 0.12067955165831988, "grad_norm": 0.71875, "learning_rate": 0.0005577447826570574, "loss": 0.1976, "step": 68062 }, { "epoch": 0.1206830978236297, "grad_norm": 0.5234375, "learning_rate": 0.0005576972811462544, "loss": 0.1285, "step": 68064 }, { "epoch": 0.12068664398893951, "grad_norm": 2.59375, "learning_rate": 0.0005576497820071691, "loss": 0.3909, "step": 68066 }, { "epoch": 0.12069019015424932, "grad_norm": 0.8125, "learning_rate": 0.000557602285240008, "loss": 0.1685, "step": 68068 }, { "epoch": 0.12069373631955914, "grad_norm": 0.5859375, "learning_rate": 0.0005575547908449797, "loss": 0.2004, "step": 68070 }, { "epoch": 0.12069728248486895, "grad_norm": 0.46875, "learning_rate": 0.0005575072988222918, "loss": 0.1468, "step": 68072 }, { "epoch": 0.12070082865017877, "grad_norm": 0.259765625, "learning_rate": 0.0005574598091721517, "loss": 0.1683, "step": 68074 }, { "epoch": 0.12070437481548858, "grad_norm": 0.1962890625, "learning_rate": 0.0005574123218947673, "loss": 0.1583, "step": 68076 }, { "epoch": 0.1207079209807984, "grad_norm": 0.474609375, "learning_rate": 0.0005573648369903459, "loss": 0.1633, "step": 68078 }, { "epoch": 0.12071146714610821, "grad_norm": 0.365234375, "learning_rate": 0.000557317354459096, "loss": 0.1629, "step": 68080 }, { "epoch": 0.12071501331141803, "grad_norm": 0.3515625, "learning_rate": 0.0005572698743012244, "loss": 0.1645, "step": 68082 }, { "epoch": 0.12071855947672784, "grad_norm": 0.36328125, "learning_rate": 0.0005572223965169394, "loss": 0.1843, "step": 68084 }, { "epoch": 0.12072210564203766, "grad_norm": 0.55078125, "learning_rate": 0.000557174921106448, "loss": 0.1428, "step": 68086 }, { "epoch": 0.12072565180734748, "grad_norm": 0.216796875, "learning_rate": 0.0005571274480699584, "loss": 0.1965, "step": 68088 }, { "epoch": 0.1207291979726573, "grad_norm": 0.54296875, "learning_rate": 0.000557079977407678, "loss": 0.2019, "step": 68090 }, { "epoch": 0.12073274413796711, "grad_norm": 0.3515625, "learning_rate": 0.0005570325091198146, "loss": 0.1432, "step": 68092 }, { "epoch": 0.12073629030327693, "grad_norm": 0.416015625, "learning_rate": 0.0005569850432065755, "loss": 0.2567, "step": 68094 }, { "epoch": 0.12073983646858674, "grad_norm": 0.322265625, "learning_rate": 0.000556937579668168, "loss": 0.2503, "step": 68096 }, { "epoch": 0.12074338263389656, "grad_norm": 0.578125, "learning_rate": 0.0005568901185048008, "loss": 0.1782, "step": 68098 }, { "epoch": 0.12074692879920637, "grad_norm": 0.439453125, "learning_rate": 0.0005568426597166801, "loss": 0.1649, "step": 68100 }, { "epoch": 0.12075047496451619, "grad_norm": 1.28125, "learning_rate": 0.0005567952033040145, "loss": 0.2319, "step": 68102 }, { "epoch": 0.120754021129826, "grad_norm": 0.470703125, "learning_rate": 0.0005567477492670108, "loss": 0.1404, "step": 68104 }, { "epoch": 0.12075756729513581, "grad_norm": 0.380859375, "learning_rate": 0.0005567002976058775, "loss": 0.1585, "step": 68106 }, { "epoch": 0.12076111346044563, "grad_norm": 0.416015625, "learning_rate": 0.0005566528483208209, "loss": 0.1719, "step": 68108 }, { "epoch": 0.12076465962575544, "grad_norm": 0.419921875, "learning_rate": 0.0005566054014120493, "loss": 0.2027, "step": 68110 }, { "epoch": 0.12076820579106526, "grad_norm": 0.263671875, "learning_rate": 0.00055655795687977, "loss": 0.21, "step": 68112 }, { "epoch": 0.12077175195637507, "grad_norm": 0.265625, "learning_rate": 0.0005565105147241906, "loss": 0.1752, "step": 68114 }, { "epoch": 0.12077529812168489, "grad_norm": 0.46484375, "learning_rate": 0.0005564630749455187, "loss": 0.2216, "step": 68116 }, { "epoch": 0.1207788442869947, "grad_norm": 0.546875, "learning_rate": 0.0005564156375439612, "loss": 0.1951, "step": 68118 }, { "epoch": 0.12078239045230452, "grad_norm": 0.32421875, "learning_rate": 0.000556368202519726, "loss": 0.1862, "step": 68120 }, { "epoch": 0.12078593661761433, "grad_norm": 0.431640625, "learning_rate": 0.0005563207698730208, "loss": 0.1749, "step": 68122 }, { "epoch": 0.12078948278292415, "grad_norm": 0.208984375, "learning_rate": 0.0005562733396040527, "loss": 0.1384, "step": 68124 }, { "epoch": 0.12079302894823396, "grad_norm": 0.427734375, "learning_rate": 0.0005562259117130288, "loss": 0.193, "step": 68126 }, { "epoch": 0.12079657511354377, "grad_norm": 0.84765625, "learning_rate": 0.0005561784862001573, "loss": 0.2072, "step": 68128 }, { "epoch": 0.12080012127885359, "grad_norm": 0.392578125, "learning_rate": 0.0005561310630656451, "loss": 0.1555, "step": 68130 }, { "epoch": 0.1208036674441634, "grad_norm": 0.2158203125, "learning_rate": 0.0005560836423096998, "loss": 0.1662, "step": 68132 }, { "epoch": 0.12080721360947323, "grad_norm": 2.140625, "learning_rate": 0.0005560362239325288, "loss": 0.2529, "step": 68134 }, { "epoch": 0.12081075977478305, "grad_norm": 0.51171875, "learning_rate": 0.0005559888079343387, "loss": 0.1484, "step": 68136 }, { "epoch": 0.12081430594009286, "grad_norm": 0.31640625, "learning_rate": 0.0005559413943153382, "loss": 0.2642, "step": 68138 }, { "epoch": 0.12081785210540268, "grad_norm": 0.314453125, "learning_rate": 0.0005558939830757339, "loss": 0.1265, "step": 68140 }, { "epoch": 0.12082139827071249, "grad_norm": 0.4609375, "learning_rate": 0.0005558465742157333, "loss": 0.3571, "step": 68142 }, { "epoch": 0.1208249444360223, "grad_norm": 0.275390625, "learning_rate": 0.0005557991677355434, "loss": 0.1535, "step": 68144 }, { "epoch": 0.12082849060133212, "grad_norm": 0.33984375, "learning_rate": 0.0005557517636353721, "loss": 0.1888, "step": 68146 }, { "epoch": 0.12083203676664193, "grad_norm": 0.302734375, "learning_rate": 0.0005557043619154264, "loss": 0.262, "step": 68148 }, { "epoch": 0.12083558293195175, "grad_norm": 0.275390625, "learning_rate": 0.0005556569625759138, "loss": 0.1327, "step": 68150 }, { "epoch": 0.12083912909726156, "grad_norm": 0.484375, "learning_rate": 0.0005556095656170414, "loss": 0.2421, "step": 68152 }, { "epoch": 0.12084267526257138, "grad_norm": 0.388671875, "learning_rate": 0.0005555621710390162, "loss": 0.2077, "step": 68154 }, { "epoch": 0.12084622142788119, "grad_norm": 0.53125, "learning_rate": 0.0005555147788420464, "loss": 0.2131, "step": 68156 }, { "epoch": 0.12084976759319101, "grad_norm": 0.546875, "learning_rate": 0.0005554673890263381, "loss": 0.1488, "step": 68158 }, { "epoch": 0.12085331375850082, "grad_norm": 0.6875, "learning_rate": 0.0005554200015920994, "loss": 0.1813, "step": 68160 }, { "epoch": 0.12085685992381064, "grad_norm": 0.341796875, "learning_rate": 0.0005553726165395369, "loss": 0.221, "step": 68162 }, { "epoch": 0.12086040608912045, "grad_norm": 0.70703125, "learning_rate": 0.0005553252338688592, "loss": 0.2045, "step": 68164 }, { "epoch": 0.12086395225443027, "grad_norm": 1.1484375, "learning_rate": 0.0005552778535802716, "loss": 0.2027, "step": 68166 }, { "epoch": 0.12086749841974008, "grad_norm": 0.3828125, "learning_rate": 0.0005552304756739826, "loss": 0.1525, "step": 68168 }, { "epoch": 0.1208710445850499, "grad_norm": 1.2890625, "learning_rate": 0.0005551831001501992, "loss": 0.1918, "step": 68170 }, { "epoch": 0.12087459075035971, "grad_norm": 0.169921875, "learning_rate": 0.0005551357270091284, "loss": 0.1495, "step": 68172 }, { "epoch": 0.12087813691566952, "grad_norm": 0.466796875, "learning_rate": 0.0005550883562509774, "loss": 0.1953, "step": 68174 }, { "epoch": 0.12088168308097934, "grad_norm": 0.33203125, "learning_rate": 0.000555040987875953, "loss": 0.1258, "step": 68176 }, { "epoch": 0.12088522924628917, "grad_norm": 0.263671875, "learning_rate": 0.0005549936218842633, "loss": 0.2529, "step": 68178 }, { "epoch": 0.12088877541159898, "grad_norm": 0.15625, "learning_rate": 0.0005549462582761146, "loss": 0.1308, "step": 68180 }, { "epoch": 0.1208923215769088, "grad_norm": 0.875, "learning_rate": 0.0005548988970517147, "loss": 0.2531, "step": 68182 }, { "epoch": 0.12089586774221861, "grad_norm": 1.21875, "learning_rate": 0.00055485153821127, "loss": 0.3492, "step": 68184 }, { "epoch": 0.12089941390752842, "grad_norm": 0.423828125, "learning_rate": 0.0005548041817549881, "loss": 0.1828, "step": 68186 }, { "epoch": 0.12090296007283824, "grad_norm": 0.45703125, "learning_rate": 0.0005547568276830763, "loss": 0.1707, "step": 68188 }, { "epoch": 0.12090650623814805, "grad_norm": 0.427734375, "learning_rate": 0.0005547094759957413, "loss": 0.2955, "step": 68190 }, { "epoch": 0.12091005240345787, "grad_norm": 0.64453125, "learning_rate": 0.0005546621266931903, "loss": 0.171, "step": 68192 }, { "epoch": 0.12091359856876768, "grad_norm": 0.27734375, "learning_rate": 0.0005546147797756302, "loss": 0.1817, "step": 68194 }, { "epoch": 0.1209171447340775, "grad_norm": 0.46875, "learning_rate": 0.0005545674352432686, "loss": 0.1802, "step": 68196 }, { "epoch": 0.12092069089938731, "grad_norm": 0.1943359375, "learning_rate": 0.0005545200930963118, "loss": 0.1267, "step": 68198 }, { "epoch": 0.12092423706469713, "grad_norm": 0.345703125, "learning_rate": 0.0005544727533349675, "loss": 0.1673, "step": 68200 }, { "epoch": 0.12092778323000694, "grad_norm": 1.6015625, "learning_rate": 0.0005544254159594427, "loss": 0.2306, "step": 68202 }, { "epoch": 0.12093132939531676, "grad_norm": 0.302734375, "learning_rate": 0.0005543780809699439, "loss": 0.1894, "step": 68204 }, { "epoch": 0.12093487556062657, "grad_norm": 0.2431640625, "learning_rate": 0.0005543307483666786, "loss": 0.1591, "step": 68206 }, { "epoch": 0.12093842172593638, "grad_norm": 0.48046875, "learning_rate": 0.0005542834181498533, "loss": 0.1507, "step": 68208 }, { "epoch": 0.1209419678912462, "grad_norm": 0.50390625, "learning_rate": 0.0005542360903196757, "loss": 0.1947, "step": 68210 }, { "epoch": 0.12094551405655601, "grad_norm": 0.53515625, "learning_rate": 0.0005541887648763524, "loss": 0.1593, "step": 68212 }, { "epoch": 0.12094906022186583, "grad_norm": 0.349609375, "learning_rate": 0.0005541414418200902, "loss": 0.2104, "step": 68214 }, { "epoch": 0.12095260638717564, "grad_norm": 0.2177734375, "learning_rate": 0.0005540941211510964, "loss": 0.1575, "step": 68216 }, { "epoch": 0.12095615255248546, "grad_norm": 0.357421875, "learning_rate": 0.0005540468028695778, "loss": 0.1432, "step": 68218 }, { "epoch": 0.12095969871779527, "grad_norm": 0.203125, "learning_rate": 0.0005539994869757414, "loss": 0.2056, "step": 68220 }, { "epoch": 0.12096324488310509, "grad_norm": 0.2412109375, "learning_rate": 0.0005539521734697941, "loss": 0.1511, "step": 68222 }, { "epoch": 0.12096679104841492, "grad_norm": 0.33203125, "learning_rate": 0.0005539048623519428, "loss": 0.1866, "step": 68224 }, { "epoch": 0.12097033721372473, "grad_norm": 0.76953125, "learning_rate": 0.0005538575536223942, "loss": 0.1467, "step": 68226 }, { "epoch": 0.12097388337903454, "grad_norm": 0.51171875, "learning_rate": 0.0005538102472813556, "loss": 0.2444, "step": 68228 }, { "epoch": 0.12097742954434436, "grad_norm": 0.5234375, "learning_rate": 0.0005537629433290339, "loss": 0.1626, "step": 68230 }, { "epoch": 0.12098097570965417, "grad_norm": 0.263671875, "learning_rate": 0.0005537156417656357, "loss": 0.2305, "step": 68232 }, { "epoch": 0.12098452187496399, "grad_norm": 2.390625, "learning_rate": 0.0005536683425913676, "loss": 0.1929, "step": 68234 }, { "epoch": 0.1209880680402738, "grad_norm": 0.279296875, "learning_rate": 0.0005536210458064372, "loss": 0.2001, "step": 68236 }, { "epoch": 0.12099161420558362, "grad_norm": 0.52734375, "learning_rate": 0.0005535737514110508, "loss": 0.1901, "step": 68238 }, { "epoch": 0.12099516037089343, "grad_norm": 0.3125, "learning_rate": 0.0005535264594054157, "loss": 0.2324, "step": 68240 }, { "epoch": 0.12099870653620325, "grad_norm": 0.345703125, "learning_rate": 0.0005534791697897383, "loss": 0.1985, "step": 68242 }, { "epoch": 0.12100225270151306, "grad_norm": 0.27734375, "learning_rate": 0.0005534318825642252, "loss": 0.1631, "step": 68244 }, { "epoch": 0.12100579886682288, "grad_norm": 0.546875, "learning_rate": 0.0005533845977290842, "loss": 0.151, "step": 68246 }, { "epoch": 0.12100934503213269, "grad_norm": 0.46484375, "learning_rate": 0.0005533373152845207, "loss": 0.1861, "step": 68248 }, { "epoch": 0.1210128911974425, "grad_norm": 0.30859375, "learning_rate": 0.0005532900352307428, "loss": 0.1906, "step": 68250 }, { "epoch": 0.12101643736275232, "grad_norm": 0.361328125, "learning_rate": 0.0005532427575679564, "loss": 0.154, "step": 68252 }, { "epoch": 0.12101998352806213, "grad_norm": 0.427734375, "learning_rate": 0.000553195482296369, "loss": 0.1854, "step": 68254 }, { "epoch": 0.12102352969337195, "grad_norm": 0.921875, "learning_rate": 0.0005531482094161864, "loss": 0.2645, "step": 68256 }, { "epoch": 0.12102707585868176, "grad_norm": 0.267578125, "learning_rate": 0.0005531009389276163, "loss": 0.2777, "step": 68258 }, { "epoch": 0.12103062202399158, "grad_norm": 0.388671875, "learning_rate": 0.0005530536708308649, "loss": 0.2076, "step": 68260 }, { "epoch": 0.12103416818930139, "grad_norm": 0.5234375, "learning_rate": 0.0005530064051261392, "loss": 0.1992, "step": 68262 }, { "epoch": 0.1210377143546112, "grad_norm": 0.240234375, "learning_rate": 0.0005529591418136455, "loss": 0.1677, "step": 68264 }, { "epoch": 0.12104126051992102, "grad_norm": 0.9765625, "learning_rate": 0.0005529118808935907, "loss": 0.1544, "step": 68266 }, { "epoch": 0.12104480668523084, "grad_norm": 0.478515625, "learning_rate": 0.0005528646223661817, "loss": 0.1858, "step": 68268 }, { "epoch": 0.12104835285054066, "grad_norm": 0.1787109375, "learning_rate": 0.0005528173662316249, "loss": 0.1943, "step": 68270 }, { "epoch": 0.12105189901585048, "grad_norm": 1.2890625, "learning_rate": 0.0005527701124901274, "loss": 0.3501, "step": 68272 }, { "epoch": 0.12105544518116029, "grad_norm": 0.330078125, "learning_rate": 0.000552722861141895, "loss": 0.1831, "step": 68274 }, { "epoch": 0.12105899134647011, "grad_norm": 0.2021484375, "learning_rate": 0.0005526756121871355, "loss": 0.1661, "step": 68276 }, { "epoch": 0.12106253751177992, "grad_norm": 0.2431640625, "learning_rate": 0.0005526283656260547, "loss": 0.1905, "step": 68278 }, { "epoch": 0.12106608367708974, "grad_norm": 0.408203125, "learning_rate": 0.0005525811214588594, "loss": 0.1999, "step": 68280 }, { "epoch": 0.12106962984239955, "grad_norm": 0.296875, "learning_rate": 0.0005525338796857565, "loss": 0.4084, "step": 68282 }, { "epoch": 0.12107317600770937, "grad_norm": 0.8046875, "learning_rate": 0.0005524866403069521, "loss": 0.1789, "step": 68284 }, { "epoch": 0.12107672217301918, "grad_norm": 0.25, "learning_rate": 0.0005524394033226537, "loss": 0.1488, "step": 68286 }, { "epoch": 0.121080268338329, "grad_norm": 0.37890625, "learning_rate": 0.0005523921687330667, "loss": 0.1629, "step": 68288 }, { "epoch": 0.12108381450363881, "grad_norm": 0.58203125, "learning_rate": 0.0005523449365383983, "loss": 0.1857, "step": 68290 }, { "epoch": 0.12108736066894862, "grad_norm": 0.32421875, "learning_rate": 0.000552297706738855, "loss": 0.178, "step": 68292 }, { "epoch": 0.12109090683425844, "grad_norm": 1.1875, "learning_rate": 0.000552250479334644, "loss": 0.1892, "step": 68294 }, { "epoch": 0.12109445299956825, "grad_norm": 2.234375, "learning_rate": 0.0005522032543259705, "loss": 0.5568, "step": 68296 }, { "epoch": 0.12109799916487807, "grad_norm": 1.1640625, "learning_rate": 0.0005521560317130421, "loss": 0.1752, "step": 68298 }, { "epoch": 0.12110154533018788, "grad_norm": 0.310546875, "learning_rate": 0.000552108811496065, "loss": 0.1671, "step": 68300 }, { "epoch": 0.1211050914954977, "grad_norm": 2.15625, "learning_rate": 0.0005520615936752454, "loss": 0.5018, "step": 68302 }, { "epoch": 0.12110863766080751, "grad_norm": 0.443359375, "learning_rate": 0.0005520143782507909, "loss": 0.1723, "step": 68304 }, { "epoch": 0.12111218382611733, "grad_norm": 0.466796875, "learning_rate": 0.0005519671652229063, "loss": 0.2897, "step": 68306 }, { "epoch": 0.12111572999142714, "grad_norm": 0.921875, "learning_rate": 0.0005519199545917995, "loss": 0.1666, "step": 68308 }, { "epoch": 0.12111927615673695, "grad_norm": 1.2421875, "learning_rate": 0.000551872746357676, "loss": 0.2241, "step": 68310 }, { "epoch": 0.12112282232204677, "grad_norm": 0.216796875, "learning_rate": 0.0005518255405207434, "loss": 0.1626, "step": 68312 }, { "epoch": 0.1211263684873566, "grad_norm": 0.294921875, "learning_rate": 0.0005517783370812069, "loss": 0.1621, "step": 68314 }, { "epoch": 0.12112991465266641, "grad_norm": 0.322265625, "learning_rate": 0.0005517311360392738, "loss": 0.1919, "step": 68316 }, { "epoch": 0.12113346081797623, "grad_norm": 0.28515625, "learning_rate": 0.0005516839373951502, "loss": 0.1251, "step": 68318 }, { "epoch": 0.12113700698328604, "grad_norm": 0.1982421875, "learning_rate": 0.0005516367411490426, "loss": 0.1679, "step": 68320 }, { "epoch": 0.12114055314859586, "grad_norm": 0.296875, "learning_rate": 0.0005515895473011573, "loss": 0.1934, "step": 68322 }, { "epoch": 0.12114409931390567, "grad_norm": 0.859375, "learning_rate": 0.0005515423558517006, "loss": 0.2425, "step": 68324 }, { "epoch": 0.12114764547921548, "grad_norm": 3.359375, "learning_rate": 0.000551495166800879, "loss": 0.305, "step": 68326 }, { "epoch": 0.1211511916445253, "grad_norm": 0.3359375, "learning_rate": 0.0005514479801488993, "loss": 0.2153, "step": 68328 }, { "epoch": 0.12115473780983511, "grad_norm": 0.27734375, "learning_rate": 0.0005514007958959675, "loss": 0.2246, "step": 68330 }, { "epoch": 0.12115828397514493, "grad_norm": 1.7265625, "learning_rate": 0.0005513536140422894, "loss": 0.2062, "step": 68332 }, { "epoch": 0.12116183014045474, "grad_norm": 0.6015625, "learning_rate": 0.0005513064345880723, "loss": 0.1296, "step": 68334 }, { "epoch": 0.12116537630576456, "grad_norm": 0.466796875, "learning_rate": 0.0005512592575335222, "loss": 0.2096, "step": 68336 }, { "epoch": 0.12116892247107437, "grad_norm": 0.35546875, "learning_rate": 0.0005512120828788454, "loss": 0.1836, "step": 68338 }, { "epoch": 0.12117246863638419, "grad_norm": 0.2421875, "learning_rate": 0.000551164910624248, "loss": 0.1775, "step": 68340 }, { "epoch": 0.121176014801694, "grad_norm": 0.41015625, "learning_rate": 0.0005511177407699363, "loss": 0.1593, "step": 68342 }, { "epoch": 0.12117956096700382, "grad_norm": 0.2294921875, "learning_rate": 0.0005510705733161174, "loss": 0.2339, "step": 68344 }, { "epoch": 0.12118310713231363, "grad_norm": 0.306640625, "learning_rate": 0.0005510234082629963, "loss": 0.1421, "step": 68346 }, { "epoch": 0.12118665329762345, "grad_norm": 0.73046875, "learning_rate": 0.0005509762456107802, "loss": 0.2605, "step": 68348 }, { "epoch": 0.12119019946293326, "grad_norm": 0.76953125, "learning_rate": 0.0005509290853596749, "loss": 0.2079, "step": 68350 }, { "epoch": 0.12119374562824307, "grad_norm": 0.255859375, "learning_rate": 0.0005508819275098873, "loss": 0.1875, "step": 68352 }, { "epoch": 0.12119729179355289, "grad_norm": 0.267578125, "learning_rate": 0.0005508347720616227, "loss": 0.2149, "step": 68354 }, { "epoch": 0.1212008379588627, "grad_norm": 0.263671875, "learning_rate": 0.0005507876190150882, "loss": 0.1614, "step": 68356 }, { "epoch": 0.12120438412417252, "grad_norm": 0.44921875, "learning_rate": 0.0005507404683704894, "loss": 0.1852, "step": 68358 }, { "epoch": 0.12120793028948235, "grad_norm": 0.46484375, "learning_rate": 0.0005506933201280329, "loss": 0.2005, "step": 68360 }, { "epoch": 0.12121147645479216, "grad_norm": 2.25, "learning_rate": 0.0005506461742879246, "loss": 0.1927, "step": 68362 }, { "epoch": 0.12121502262010198, "grad_norm": 0.333984375, "learning_rate": 0.0005505990308503707, "loss": 0.1824, "step": 68364 }, { "epoch": 0.12121856878541179, "grad_norm": 0.369140625, "learning_rate": 0.0005505518898155777, "loss": 0.1283, "step": 68366 }, { "epoch": 0.1212221149507216, "grad_norm": 0.474609375, "learning_rate": 0.0005505047511837515, "loss": 0.1418, "step": 68368 }, { "epoch": 0.12122566111603142, "grad_norm": 0.251953125, "learning_rate": 0.0005504576149550986, "loss": 0.2067, "step": 68370 }, { "epoch": 0.12122920728134123, "grad_norm": 0.19140625, "learning_rate": 0.0005504104811298243, "loss": 0.1596, "step": 68372 }, { "epoch": 0.12123275344665105, "grad_norm": 0.45703125, "learning_rate": 0.0005503633497081358, "loss": 0.1752, "step": 68374 }, { "epoch": 0.12123629961196086, "grad_norm": 0.7265625, "learning_rate": 0.0005503162206902386, "loss": 0.2122, "step": 68376 }, { "epoch": 0.12123984577727068, "grad_norm": 1.21875, "learning_rate": 0.0005502690940763392, "loss": 0.4306, "step": 68378 }, { "epoch": 0.12124339194258049, "grad_norm": 0.208984375, "learning_rate": 0.0005502219698666433, "loss": 0.1546, "step": 68380 }, { "epoch": 0.1212469381078903, "grad_norm": 0.4296875, "learning_rate": 0.0005501748480613568, "loss": 0.2053, "step": 68382 }, { "epoch": 0.12125048427320012, "grad_norm": 0.54296875, "learning_rate": 0.0005501277286606864, "loss": 0.2052, "step": 68384 }, { "epoch": 0.12125403043850994, "grad_norm": 0.35546875, "learning_rate": 0.000550080611664838, "loss": 0.194, "step": 68386 }, { "epoch": 0.12125757660381975, "grad_norm": 0.474609375, "learning_rate": 0.0005500334970740177, "loss": 0.188, "step": 68388 }, { "epoch": 0.12126112276912956, "grad_norm": 0.265625, "learning_rate": 0.0005499863848884313, "loss": 0.1979, "step": 68390 }, { "epoch": 0.12126466893443938, "grad_norm": 0.30859375, "learning_rate": 0.0005499392751082846, "loss": 0.1581, "step": 68392 }, { "epoch": 0.1212682150997492, "grad_norm": 0.193359375, "learning_rate": 0.0005498921677337847, "loss": 0.153, "step": 68394 }, { "epoch": 0.12127176126505901, "grad_norm": 0.8671875, "learning_rate": 0.0005498450627651363, "loss": 0.1825, "step": 68396 }, { "epoch": 0.12127530743036882, "grad_norm": 0.37890625, "learning_rate": 0.0005497979602025463, "loss": 0.1893, "step": 68398 }, { "epoch": 0.12127885359567864, "grad_norm": 0.279296875, "learning_rate": 0.0005497508600462201, "loss": 0.1367, "step": 68400 }, { "epoch": 0.12128239976098845, "grad_norm": 0.3671875, "learning_rate": 0.0005497037622963646, "loss": 0.1598, "step": 68402 }, { "epoch": 0.12128594592629827, "grad_norm": 0.2158203125, "learning_rate": 0.0005496566669531848, "loss": 0.1362, "step": 68404 }, { "epoch": 0.1212894920916081, "grad_norm": 0.9296875, "learning_rate": 0.0005496095740168871, "loss": 0.2562, "step": 68406 }, { "epoch": 0.12129303825691791, "grad_norm": 0.494140625, "learning_rate": 0.0005495624834876776, "loss": 0.1884, "step": 68408 }, { "epoch": 0.12129658442222772, "grad_norm": 0.50390625, "learning_rate": 0.0005495153953657619, "loss": 0.1985, "step": 68410 }, { "epoch": 0.12130013058753754, "grad_norm": 0.4453125, "learning_rate": 0.0005494683096513462, "loss": 0.216, "step": 68412 }, { "epoch": 0.12130367675284735, "grad_norm": 0.65234375, "learning_rate": 0.0005494212263446361, "loss": 0.206, "step": 68414 }, { "epoch": 0.12130722291815717, "grad_norm": 0.283203125, "learning_rate": 0.0005493741454458378, "loss": 0.1998, "step": 68416 }, { "epoch": 0.12131076908346698, "grad_norm": 0.435546875, "learning_rate": 0.0005493270669551573, "loss": 0.2079, "step": 68418 }, { "epoch": 0.1213143152487768, "grad_norm": 0.279296875, "learning_rate": 0.0005492799908728004, "loss": 0.1749, "step": 68420 }, { "epoch": 0.12131786141408661, "grad_norm": 0.283203125, "learning_rate": 0.0005492329171989726, "loss": 0.2205, "step": 68422 }, { "epoch": 0.12132140757939643, "grad_norm": 0.5, "learning_rate": 0.0005491858459338803, "loss": 0.1691, "step": 68424 }, { "epoch": 0.12132495374470624, "grad_norm": 0.29296875, "learning_rate": 0.0005491387770777293, "loss": 0.1365, "step": 68426 }, { "epoch": 0.12132849991001605, "grad_norm": 0.314453125, "learning_rate": 0.0005490917106307253, "loss": 0.2307, "step": 68428 }, { "epoch": 0.12133204607532587, "grad_norm": 0.8359375, "learning_rate": 0.0005490446465930741, "loss": 0.1481, "step": 68430 }, { "epoch": 0.12133559224063568, "grad_norm": 0.62109375, "learning_rate": 0.000548997584964981, "loss": 0.142, "step": 68432 }, { "epoch": 0.1213391384059455, "grad_norm": 0.30859375, "learning_rate": 0.0005489505257466534, "loss": 0.2855, "step": 68434 }, { "epoch": 0.12134268457125531, "grad_norm": 0.5390625, "learning_rate": 0.0005489034689382954, "loss": 0.1296, "step": 68436 }, { "epoch": 0.12134623073656513, "grad_norm": 1.2421875, "learning_rate": 0.0005488564145401138, "loss": 0.2824, "step": 68438 }, { "epoch": 0.12134977690187494, "grad_norm": 0.40234375, "learning_rate": 0.0005488093625523138, "loss": 0.1712, "step": 68440 }, { "epoch": 0.12135332306718476, "grad_norm": 0.419921875, "learning_rate": 0.000548762312975102, "loss": 0.2049, "step": 68442 }, { "epoch": 0.12135686923249457, "grad_norm": 0.228515625, "learning_rate": 0.0005487152658086833, "loss": 0.1618, "step": 68444 }, { "epoch": 0.12136041539780439, "grad_norm": 0.244140625, "learning_rate": 0.0005486682210532639, "loss": 0.1616, "step": 68446 }, { "epoch": 0.1213639615631142, "grad_norm": 0.271484375, "learning_rate": 0.0005486211787090495, "loss": 0.2166, "step": 68448 }, { "epoch": 0.12136750772842403, "grad_norm": 0.42578125, "learning_rate": 0.0005485741387762457, "loss": 0.1695, "step": 68450 }, { "epoch": 0.12137105389373384, "grad_norm": 0.26953125, "learning_rate": 0.0005485271012550583, "loss": 0.1531, "step": 68452 }, { "epoch": 0.12137460005904366, "grad_norm": 0.376953125, "learning_rate": 0.000548480066145693, "loss": 0.2167, "step": 68454 }, { "epoch": 0.12137814622435347, "grad_norm": 0.423828125, "learning_rate": 0.0005484330334483555, "loss": 0.1626, "step": 68456 }, { "epoch": 0.12138169238966329, "grad_norm": 1.4296875, "learning_rate": 0.0005483860031632517, "loss": 0.2608, "step": 68458 }, { "epoch": 0.1213852385549731, "grad_norm": 0.197265625, "learning_rate": 0.0005483389752905871, "loss": 0.1831, "step": 68460 }, { "epoch": 0.12138878472028292, "grad_norm": 0.177734375, "learning_rate": 0.000548291949830567, "loss": 0.1125, "step": 68462 }, { "epoch": 0.12139233088559273, "grad_norm": 0.95703125, "learning_rate": 0.0005482449267833978, "loss": 0.1801, "step": 68464 }, { "epoch": 0.12139587705090255, "grad_norm": 0.490234375, "learning_rate": 0.0005481979061492848, "loss": 0.3019, "step": 68466 }, { "epoch": 0.12139942321621236, "grad_norm": 0.75390625, "learning_rate": 0.0005481508879284336, "loss": 0.1947, "step": 68468 }, { "epoch": 0.12140296938152217, "grad_norm": 0.53125, "learning_rate": 0.0005481038721210498, "loss": 0.1699, "step": 68470 }, { "epoch": 0.12140651554683199, "grad_norm": 0.302734375, "learning_rate": 0.0005480568587273389, "loss": 0.1431, "step": 68472 }, { "epoch": 0.1214100617121418, "grad_norm": 0.314453125, "learning_rate": 0.000548009847747507, "loss": 0.1667, "step": 68474 }, { "epoch": 0.12141360787745162, "grad_norm": 0.26953125, "learning_rate": 0.0005479628391817593, "loss": 0.2071, "step": 68476 }, { "epoch": 0.12141715404276143, "grad_norm": 0.345703125, "learning_rate": 0.0005479158330303015, "loss": 0.1756, "step": 68478 }, { "epoch": 0.12142070020807125, "grad_norm": 0.5390625, "learning_rate": 0.0005478688292933388, "loss": 0.1873, "step": 68480 }, { "epoch": 0.12142424637338106, "grad_norm": 0.330078125, "learning_rate": 0.0005478218279710775, "loss": 0.2185, "step": 68482 }, { "epoch": 0.12142779253869088, "grad_norm": 0.65234375, "learning_rate": 0.0005477748290637228, "loss": 0.1568, "step": 68484 }, { "epoch": 0.12143133870400069, "grad_norm": 0.287109375, "learning_rate": 0.0005477278325714803, "loss": 0.2619, "step": 68486 }, { "epoch": 0.1214348848693105, "grad_norm": 0.63671875, "learning_rate": 0.0005476808384945553, "loss": 0.1543, "step": 68488 }, { "epoch": 0.12143843103462032, "grad_norm": 0.26171875, "learning_rate": 0.0005476338468331532, "loss": 0.1762, "step": 68490 }, { "epoch": 0.12144197719993013, "grad_norm": 0.263671875, "learning_rate": 0.0005475868575874805, "loss": 0.1825, "step": 68492 }, { "epoch": 0.12144552336523995, "grad_norm": 0.46484375, "learning_rate": 0.0005475398707577414, "loss": 0.2219, "step": 68494 }, { "epoch": 0.12144906953054978, "grad_norm": 0.34375, "learning_rate": 0.0005474928863441422, "loss": 0.2188, "step": 68496 }, { "epoch": 0.12145261569585959, "grad_norm": 0.32421875, "learning_rate": 0.000547445904346888, "loss": 0.2574, "step": 68498 }, { "epoch": 0.1214561618611694, "grad_norm": 0.7109375, "learning_rate": 0.000547398924766185, "loss": 0.1932, "step": 68500 }, { "epoch": 0.12145970802647922, "grad_norm": 0.2060546875, "learning_rate": 0.0005473519476022375, "loss": 0.2634, "step": 68502 }, { "epoch": 0.12146325419178904, "grad_norm": 0.66015625, "learning_rate": 0.000547304972855252, "loss": 0.2248, "step": 68504 }, { "epoch": 0.12146680035709885, "grad_norm": 0.296875, "learning_rate": 0.0005472580005254333, "loss": 0.3714, "step": 68506 }, { "epoch": 0.12147034652240866, "grad_norm": 0.515625, "learning_rate": 0.000547211030612987, "loss": 0.1467, "step": 68508 }, { "epoch": 0.12147389268771848, "grad_norm": 1.53125, "learning_rate": 0.0005471640631181187, "loss": 0.2319, "step": 68510 }, { "epoch": 0.1214774388530283, "grad_norm": 0.6328125, "learning_rate": 0.0005471170980410334, "loss": 0.2081, "step": 68512 }, { "epoch": 0.12148098501833811, "grad_norm": 0.89453125, "learning_rate": 0.000547070135381937, "loss": 0.1982, "step": 68514 }, { "epoch": 0.12148453118364792, "grad_norm": 0.46875, "learning_rate": 0.0005470231751410346, "loss": 0.1852, "step": 68516 }, { "epoch": 0.12148807734895774, "grad_norm": 0.62890625, "learning_rate": 0.0005469762173185318, "loss": 0.1558, "step": 68518 }, { "epoch": 0.12149162351426755, "grad_norm": 0.455078125, "learning_rate": 0.0005469292619146332, "loss": 0.1759, "step": 68520 }, { "epoch": 0.12149516967957737, "grad_norm": 0.427734375, "learning_rate": 0.0005468823089295453, "loss": 0.2055, "step": 68522 }, { "epoch": 0.12149871584488718, "grad_norm": 0.58203125, "learning_rate": 0.0005468353583634728, "loss": 0.1958, "step": 68524 }, { "epoch": 0.121502262010197, "grad_norm": 0.37890625, "learning_rate": 0.0005467884102166212, "loss": 0.1461, "step": 68526 }, { "epoch": 0.12150580817550681, "grad_norm": 0.2294921875, "learning_rate": 0.0005467414644891955, "loss": 0.163, "step": 68528 }, { "epoch": 0.12150935434081662, "grad_norm": 0.361328125, "learning_rate": 0.0005466945211814012, "loss": 0.1637, "step": 68530 }, { "epoch": 0.12151290050612644, "grad_norm": 0.345703125, "learning_rate": 0.0005466475802934442, "loss": 0.1909, "step": 68532 }, { "epoch": 0.12151644667143625, "grad_norm": 0.2470703125, "learning_rate": 0.0005466006418255285, "loss": 0.1175, "step": 68534 }, { "epoch": 0.12151999283674607, "grad_norm": 0.47265625, "learning_rate": 0.0005465537057778607, "loss": 0.1951, "step": 68536 }, { "epoch": 0.12152353900205588, "grad_norm": 0.58984375, "learning_rate": 0.0005465067721506452, "loss": 0.1964, "step": 68538 }, { "epoch": 0.1215270851673657, "grad_norm": 0.443359375, "learning_rate": 0.0005464598409440879, "loss": 0.1535, "step": 68540 }, { "epoch": 0.12153063133267553, "grad_norm": 0.416015625, "learning_rate": 0.0005464129121583935, "loss": 0.198, "step": 68542 }, { "epoch": 0.12153417749798534, "grad_norm": 0.28125, "learning_rate": 0.0005463659857937675, "loss": 0.1794, "step": 68544 }, { "epoch": 0.12153772366329516, "grad_norm": 1.3359375, "learning_rate": 0.0005463190618504152, "loss": 0.25, "step": 68546 }, { "epoch": 0.12154126982860497, "grad_norm": 0.341796875, "learning_rate": 0.0005462721403285413, "loss": 0.2019, "step": 68548 }, { "epoch": 0.12154481599391478, "grad_norm": 0.515625, "learning_rate": 0.000546225221228352, "loss": 0.1961, "step": 68550 }, { "epoch": 0.1215483621592246, "grad_norm": 0.345703125, "learning_rate": 0.0005461783045500514, "loss": 0.1744, "step": 68552 }, { "epoch": 0.12155190832453441, "grad_norm": 0.73828125, "learning_rate": 0.0005461313902938456, "loss": 0.1676, "step": 68554 }, { "epoch": 0.12155545448984423, "grad_norm": 0.5390625, "learning_rate": 0.0005460844784599393, "loss": 0.1749, "step": 68556 }, { "epoch": 0.12155900065515404, "grad_norm": 0.419921875, "learning_rate": 0.0005460375690485378, "loss": 0.1358, "step": 68558 }, { "epoch": 0.12156254682046386, "grad_norm": 0.41015625, "learning_rate": 0.000545990662059846, "loss": 0.2063, "step": 68560 }, { "epoch": 0.12156609298577367, "grad_norm": 1.6328125, "learning_rate": 0.0005459437574940689, "loss": 0.1737, "step": 68562 }, { "epoch": 0.12156963915108349, "grad_norm": 0.625, "learning_rate": 0.0005458968553514126, "loss": 0.2193, "step": 68564 }, { "epoch": 0.1215731853163933, "grad_norm": 0.97265625, "learning_rate": 0.0005458499556320813, "loss": 0.1629, "step": 68566 }, { "epoch": 0.12157673148170312, "grad_norm": 0.291015625, "learning_rate": 0.0005458030583362806, "loss": 0.1702, "step": 68568 }, { "epoch": 0.12158027764701293, "grad_norm": 0.43359375, "learning_rate": 0.0005457561634642147, "loss": 0.2769, "step": 68570 }, { "epoch": 0.12158382381232274, "grad_norm": 2.375, "learning_rate": 0.0005457092710160901, "loss": 0.2972, "step": 68572 }, { "epoch": 0.12158736997763256, "grad_norm": 0.93359375, "learning_rate": 0.000545662380992111, "loss": 0.2397, "step": 68574 }, { "epoch": 0.12159091614294237, "grad_norm": 0.447265625, "learning_rate": 0.0005456154933924828, "loss": 0.139, "step": 68576 }, { "epoch": 0.12159446230825219, "grad_norm": 0.38671875, "learning_rate": 0.0005455686082174105, "loss": 0.2375, "step": 68578 }, { "epoch": 0.121598008473562, "grad_norm": 0.734375, "learning_rate": 0.0005455217254670986, "loss": 0.23, "step": 68580 }, { "epoch": 0.12160155463887182, "grad_norm": 0.28125, "learning_rate": 0.000545474845141753, "loss": 0.1158, "step": 68582 }, { "epoch": 0.12160510080418163, "grad_norm": 0.4609375, "learning_rate": 0.0005454279672415781, "loss": 0.2866, "step": 68584 }, { "epoch": 0.12160864696949146, "grad_norm": 0.73828125, "learning_rate": 0.0005453810917667792, "loss": 0.2046, "step": 68586 }, { "epoch": 0.12161219313480127, "grad_norm": 0.380859375, "learning_rate": 0.000545334218717561, "loss": 0.1612, "step": 68588 }, { "epoch": 0.12161573930011109, "grad_norm": 0.2578125, "learning_rate": 0.0005452873480941294, "loss": 0.2076, "step": 68590 }, { "epoch": 0.1216192854654209, "grad_norm": 0.578125, "learning_rate": 0.0005452404798966879, "loss": 0.2129, "step": 68592 }, { "epoch": 0.12162283163073072, "grad_norm": 0.357421875, "learning_rate": 0.0005451936141254428, "loss": 0.1563, "step": 68594 }, { "epoch": 0.12162637779604053, "grad_norm": 0.8203125, "learning_rate": 0.0005451467507805985, "loss": 0.2247, "step": 68596 }, { "epoch": 0.12162992396135035, "grad_norm": 0.380859375, "learning_rate": 0.0005450998898623601, "loss": 0.3657, "step": 68598 }, { "epoch": 0.12163347012666016, "grad_norm": 0.7265625, "learning_rate": 0.0005450530313709323, "loss": 0.1833, "step": 68600 }, { "epoch": 0.12163701629196998, "grad_norm": 0.369140625, "learning_rate": 0.00054500617530652, "loss": 0.303, "step": 68602 }, { "epoch": 0.12164056245727979, "grad_norm": 1.78125, "learning_rate": 0.0005449593216693285, "loss": 0.2638, "step": 68604 }, { "epoch": 0.1216441086225896, "grad_norm": 0.287109375, "learning_rate": 0.0005449124704595627, "loss": 0.1686, "step": 68606 }, { "epoch": 0.12164765478789942, "grad_norm": 0.287109375, "learning_rate": 0.0005448656216774272, "loss": 0.1397, "step": 68608 }, { "epoch": 0.12165120095320923, "grad_norm": 1.9453125, "learning_rate": 0.0005448187753231267, "loss": 0.1979, "step": 68610 }, { "epoch": 0.12165474711851905, "grad_norm": 0.3125, "learning_rate": 0.0005447719313968667, "loss": 0.2022, "step": 68612 }, { "epoch": 0.12165829328382886, "grad_norm": 0.7109375, "learning_rate": 0.0005447250898988518, "loss": 0.1835, "step": 68614 }, { "epoch": 0.12166183944913868, "grad_norm": 0.94140625, "learning_rate": 0.0005446782508292869, "loss": 0.1376, "step": 68616 }, { "epoch": 0.12166538561444849, "grad_norm": 0.1982421875, "learning_rate": 0.0005446314141883766, "loss": 0.2062, "step": 68618 }, { "epoch": 0.12166893177975831, "grad_norm": 0.2158203125, "learning_rate": 0.0005445845799763256, "loss": 0.1594, "step": 68620 }, { "epoch": 0.12167247794506812, "grad_norm": 0.345703125, "learning_rate": 0.0005445377481933397, "loss": 0.1884, "step": 68622 }, { "epoch": 0.12167602411037794, "grad_norm": 0.625, "learning_rate": 0.0005444909188396225, "loss": 0.2016, "step": 68624 }, { "epoch": 0.12167957027568775, "grad_norm": 0.287109375, "learning_rate": 0.0005444440919153794, "loss": 0.1637, "step": 68626 }, { "epoch": 0.12168311644099757, "grad_norm": 0.490234375, "learning_rate": 0.000544397267420815, "loss": 0.1874, "step": 68628 }, { "epoch": 0.12168666260630738, "grad_norm": 0.2578125, "learning_rate": 0.000544350445356135, "loss": 0.2119, "step": 68630 }, { "epoch": 0.12169020877161721, "grad_norm": 0.373046875, "learning_rate": 0.0005443036257215426, "loss": 0.1571, "step": 68632 }, { "epoch": 0.12169375493692702, "grad_norm": 0.2080078125, "learning_rate": 0.0005442568085172436, "loss": 0.141, "step": 68634 }, { "epoch": 0.12169730110223684, "grad_norm": 1.8984375, "learning_rate": 0.0005442099937434426, "loss": 0.2114, "step": 68636 }, { "epoch": 0.12170084726754665, "grad_norm": 0.310546875, "learning_rate": 0.0005441631814003438, "loss": 0.2009, "step": 68638 }, { "epoch": 0.12170439343285647, "grad_norm": 0.455078125, "learning_rate": 0.0005441163714881532, "loss": 0.1768, "step": 68640 }, { "epoch": 0.12170793959816628, "grad_norm": 0.7421875, "learning_rate": 0.000544069564007074, "loss": 0.178, "step": 68642 }, { "epoch": 0.1217114857634761, "grad_norm": 0.3359375, "learning_rate": 0.0005440227589573118, "loss": 0.1974, "step": 68644 }, { "epoch": 0.12171503192878591, "grad_norm": 0.439453125, "learning_rate": 0.0005439759563390709, "loss": 0.2737, "step": 68646 }, { "epoch": 0.12171857809409573, "grad_norm": 1.078125, "learning_rate": 0.0005439291561525569, "loss": 0.1659, "step": 68648 }, { "epoch": 0.12172212425940554, "grad_norm": 0.91796875, "learning_rate": 0.000543882358397973, "loss": 0.2206, "step": 68650 }, { "epoch": 0.12172567042471535, "grad_norm": 0.96484375, "learning_rate": 0.0005438355630755249, "loss": 0.2439, "step": 68652 }, { "epoch": 0.12172921659002517, "grad_norm": 0.419921875, "learning_rate": 0.0005437887701854172, "loss": 0.1906, "step": 68654 }, { "epoch": 0.12173276275533498, "grad_norm": 1.5078125, "learning_rate": 0.0005437419797278541, "loss": 0.2045, "step": 68656 }, { "epoch": 0.1217363089206448, "grad_norm": 0.2138671875, "learning_rate": 0.0005436951917030406, "loss": 0.1626, "step": 68658 }, { "epoch": 0.12173985508595461, "grad_norm": 0.4453125, "learning_rate": 0.0005436484061111808, "loss": 0.1698, "step": 68660 }, { "epoch": 0.12174340125126443, "grad_norm": 0.484375, "learning_rate": 0.0005436016229524801, "loss": 0.1809, "step": 68662 }, { "epoch": 0.12174694741657424, "grad_norm": 0.42578125, "learning_rate": 0.0005435548422271428, "loss": 0.1779, "step": 68664 }, { "epoch": 0.12175049358188406, "grad_norm": 0.5390625, "learning_rate": 0.0005435080639353732, "loss": 0.1404, "step": 68666 }, { "epoch": 0.12175403974719387, "grad_norm": 2.046875, "learning_rate": 0.0005434612880773759, "loss": 0.2702, "step": 68668 }, { "epoch": 0.12175758591250369, "grad_norm": 0.1357421875, "learning_rate": 0.0005434145146533558, "loss": 0.1643, "step": 68670 }, { "epoch": 0.1217611320778135, "grad_norm": 0.34375, "learning_rate": 0.0005433677436635175, "loss": 0.1529, "step": 68672 }, { "epoch": 0.12176467824312331, "grad_norm": 0.2578125, "learning_rate": 0.0005433209751080652, "loss": 0.1401, "step": 68674 }, { "epoch": 0.12176822440843313, "grad_norm": 1.0859375, "learning_rate": 0.0005432742089872038, "loss": 0.1814, "step": 68676 }, { "epoch": 0.12177177057374296, "grad_norm": 0.2138671875, "learning_rate": 0.000543227445301137, "loss": 0.1629, "step": 68678 }, { "epoch": 0.12177531673905277, "grad_norm": 0.55078125, "learning_rate": 0.0005431806840500708, "loss": 0.1796, "step": 68680 }, { "epoch": 0.12177886290436259, "grad_norm": 0.69921875, "learning_rate": 0.0005431339252342083, "loss": 0.1695, "step": 68682 }, { "epoch": 0.1217824090696724, "grad_norm": 0.35546875, "learning_rate": 0.0005430871688537547, "loss": 0.2437, "step": 68684 }, { "epoch": 0.12178595523498222, "grad_norm": 2.28125, "learning_rate": 0.000543040414908914, "loss": 0.2364, "step": 68686 }, { "epoch": 0.12178950140029203, "grad_norm": 0.361328125, "learning_rate": 0.0005429936633998916, "loss": 0.1929, "step": 68688 }, { "epoch": 0.12179304756560184, "grad_norm": 0.4375, "learning_rate": 0.0005429469143268908, "loss": 0.188, "step": 68690 }, { "epoch": 0.12179659373091166, "grad_norm": 0.73046875, "learning_rate": 0.000542900167690117, "loss": 0.2982, "step": 68692 }, { "epoch": 0.12180013989622147, "grad_norm": 0.29296875, "learning_rate": 0.0005428534234897743, "loss": 0.2232, "step": 68694 }, { "epoch": 0.12180368606153129, "grad_norm": 0.375, "learning_rate": 0.000542806681726067, "loss": 0.152, "step": 68696 }, { "epoch": 0.1218072322268411, "grad_norm": 0.54296875, "learning_rate": 0.0005427599423991996, "loss": 0.2114, "step": 68698 }, { "epoch": 0.12181077839215092, "grad_norm": 0.71484375, "learning_rate": 0.0005427132055093762, "loss": 0.1796, "step": 68700 }, { "epoch": 0.12181432455746073, "grad_norm": 0.498046875, "learning_rate": 0.0005426664710568019, "loss": 0.5028, "step": 68702 }, { "epoch": 0.12181787072277055, "grad_norm": 0.265625, "learning_rate": 0.0005426197390416808, "loss": 0.2146, "step": 68704 }, { "epoch": 0.12182141688808036, "grad_norm": 0.2451171875, "learning_rate": 0.0005425730094642171, "loss": 0.1481, "step": 68706 }, { "epoch": 0.12182496305339018, "grad_norm": 0.23046875, "learning_rate": 0.0005425262823246148, "loss": 0.2313, "step": 68708 }, { "epoch": 0.12182850921869999, "grad_norm": 0.4921875, "learning_rate": 0.0005424795576230792, "loss": 0.1647, "step": 68710 }, { "epoch": 0.1218320553840098, "grad_norm": 0.283203125, "learning_rate": 0.0005424328353598142, "loss": 0.2236, "step": 68712 }, { "epoch": 0.12183560154931962, "grad_norm": 3.34375, "learning_rate": 0.000542386115535024, "loss": 0.2889, "step": 68714 }, { "epoch": 0.12183914771462943, "grad_norm": 0.333984375, "learning_rate": 0.0005423393981489132, "loss": 0.1476, "step": 68716 }, { "epoch": 0.12184269387993925, "grad_norm": 0.298828125, "learning_rate": 0.0005422926832016855, "loss": 0.1861, "step": 68718 }, { "epoch": 0.12184624004524906, "grad_norm": 2.21875, "learning_rate": 0.0005422459706935461, "loss": 0.1598, "step": 68720 }, { "epoch": 0.12184978621055889, "grad_norm": 0.30078125, "learning_rate": 0.0005421992606246986, "loss": 0.1447, "step": 68722 }, { "epoch": 0.1218533323758687, "grad_norm": 0.365234375, "learning_rate": 0.0005421525529953477, "loss": 0.1977, "step": 68724 }, { "epoch": 0.12185687854117852, "grad_norm": 0.9609375, "learning_rate": 0.0005421058478056975, "loss": 0.215, "step": 68726 }, { "epoch": 0.12186042470648834, "grad_norm": 0.59765625, "learning_rate": 0.0005420591450559519, "loss": 0.195, "step": 68728 }, { "epoch": 0.12186397087179815, "grad_norm": 0.33984375, "learning_rate": 0.0005420124447463158, "loss": 0.1638, "step": 68730 }, { "epoch": 0.12186751703710796, "grad_norm": 2.71875, "learning_rate": 0.0005419657468769931, "loss": 0.15, "step": 68732 }, { "epoch": 0.12187106320241778, "grad_norm": 0.2470703125, "learning_rate": 0.0005419190514481882, "loss": 0.2019, "step": 68734 }, { "epoch": 0.1218746093677276, "grad_norm": 0.5703125, "learning_rate": 0.0005418723584601049, "loss": 0.2076, "step": 68736 }, { "epoch": 0.12187815553303741, "grad_norm": 0.2158203125, "learning_rate": 0.0005418256679129483, "loss": 0.1867, "step": 68738 }, { "epoch": 0.12188170169834722, "grad_norm": 0.47265625, "learning_rate": 0.0005417789798069213, "loss": 0.176, "step": 68740 }, { "epoch": 0.12188524786365704, "grad_norm": 0.271484375, "learning_rate": 0.0005417322941422291, "loss": 0.1793, "step": 68742 }, { "epoch": 0.12188879402896685, "grad_norm": 0.671875, "learning_rate": 0.0005416856109190755, "loss": 0.1218, "step": 68744 }, { "epoch": 0.12189234019427667, "grad_norm": 0.3515625, "learning_rate": 0.000541638930137665, "loss": 0.1518, "step": 68746 }, { "epoch": 0.12189588635958648, "grad_norm": 0.494140625, "learning_rate": 0.0005415922517982012, "loss": 0.2005, "step": 68748 }, { "epoch": 0.1218994325248963, "grad_norm": 0.1337890625, "learning_rate": 0.0005415455759008884, "loss": 0.1364, "step": 68750 }, { "epoch": 0.12190297869020611, "grad_norm": 0.64453125, "learning_rate": 0.0005414989024459311, "loss": 0.1949, "step": 68752 }, { "epoch": 0.12190652485551592, "grad_norm": 0.6171875, "learning_rate": 0.0005414522314335331, "loss": 0.1977, "step": 68754 }, { "epoch": 0.12191007102082574, "grad_norm": 0.5859375, "learning_rate": 0.0005414055628638987, "loss": 0.1821, "step": 68756 }, { "epoch": 0.12191361718613555, "grad_norm": 1.1953125, "learning_rate": 0.0005413588967372317, "loss": 0.3243, "step": 68758 }, { "epoch": 0.12191716335144537, "grad_norm": 0.578125, "learning_rate": 0.0005413122330537365, "loss": 0.279, "step": 68760 }, { "epoch": 0.12192070951675518, "grad_norm": 0.216796875, "learning_rate": 0.0005412655718136173, "loss": 0.1841, "step": 68762 }, { "epoch": 0.121924255682065, "grad_norm": 0.322265625, "learning_rate": 0.0005412189130170777, "loss": 0.1748, "step": 68764 }, { "epoch": 0.12192780184737481, "grad_norm": 0.62109375, "learning_rate": 0.0005411722566643221, "loss": 0.1305, "step": 68766 }, { "epoch": 0.12193134801268464, "grad_norm": 1.046875, "learning_rate": 0.000541125602755554, "loss": 0.2834, "step": 68768 }, { "epoch": 0.12193489417799445, "grad_norm": 0.3828125, "learning_rate": 0.0005410789512909786, "loss": 0.1759, "step": 68770 }, { "epoch": 0.12193844034330427, "grad_norm": 0.431640625, "learning_rate": 0.0005410323022707987, "loss": 0.1802, "step": 68772 }, { "epoch": 0.12194198650861408, "grad_norm": 0.2578125, "learning_rate": 0.000540985655695219, "loss": 0.192, "step": 68774 }, { "epoch": 0.1219455326739239, "grad_norm": 0.22265625, "learning_rate": 0.000540939011564443, "loss": 0.2191, "step": 68776 }, { "epoch": 0.12194907883923371, "grad_norm": 0.396484375, "learning_rate": 0.0005408923698786757, "loss": 0.1831, "step": 68778 }, { "epoch": 0.12195262500454353, "grad_norm": 0.33984375, "learning_rate": 0.0005408457306381199, "loss": 0.1725, "step": 68780 }, { "epoch": 0.12195617116985334, "grad_norm": 0.3984375, "learning_rate": 0.0005407990938429803, "loss": 0.4347, "step": 68782 }, { "epoch": 0.12195971733516316, "grad_norm": 0.337890625, "learning_rate": 0.0005407524594934606, "loss": 0.1387, "step": 68784 }, { "epoch": 0.12196326350047297, "grad_norm": 2.453125, "learning_rate": 0.0005407058275897647, "loss": 0.5825, "step": 68786 }, { "epoch": 0.12196680966578279, "grad_norm": 0.828125, "learning_rate": 0.0005406591981320967, "loss": 0.1773, "step": 68788 }, { "epoch": 0.1219703558310926, "grad_norm": 0.2177734375, "learning_rate": 0.0005406125711206602, "loss": 0.1687, "step": 68790 }, { "epoch": 0.12197390199640241, "grad_norm": 0.546875, "learning_rate": 0.0005405659465556596, "loss": 0.2726, "step": 68792 }, { "epoch": 0.12197744816171223, "grad_norm": 0.546875, "learning_rate": 0.0005405193244372988, "loss": 0.1823, "step": 68794 }, { "epoch": 0.12198099432702204, "grad_norm": 1.0234375, "learning_rate": 0.0005404727047657814, "loss": 0.1222, "step": 68796 }, { "epoch": 0.12198454049233186, "grad_norm": 0.6796875, "learning_rate": 0.000540426087541311, "loss": 0.2351, "step": 68798 }, { "epoch": 0.12198808665764167, "grad_norm": 0.65234375, "learning_rate": 0.0005403794727640922, "loss": 0.1815, "step": 68800 }, { "epoch": 0.12199163282295149, "grad_norm": 0.73046875, "learning_rate": 0.0005403328604343285, "loss": 0.2993, "step": 68802 }, { "epoch": 0.1219951789882613, "grad_norm": 0.34375, "learning_rate": 0.0005402862505522237, "loss": 0.1884, "step": 68804 }, { "epoch": 0.12199872515357112, "grad_norm": 0.474609375, "learning_rate": 0.0005402396431179819, "loss": 0.2191, "step": 68806 }, { "epoch": 0.12200227131888093, "grad_norm": 0.56640625, "learning_rate": 0.0005401930381318063, "loss": 0.1692, "step": 68808 }, { "epoch": 0.12200581748419075, "grad_norm": 0.31640625, "learning_rate": 0.0005401464355939017, "loss": 0.2068, "step": 68810 }, { "epoch": 0.12200936364950056, "grad_norm": 0.291015625, "learning_rate": 0.0005400998355044711, "loss": 0.179, "step": 68812 }, { "epoch": 0.12201290981481039, "grad_norm": 0.3984375, "learning_rate": 0.0005400532378637186, "loss": 0.1454, "step": 68814 }, { "epoch": 0.1220164559801202, "grad_norm": 0.326171875, "learning_rate": 0.0005400066426718479, "loss": 0.1716, "step": 68816 }, { "epoch": 0.12202000214543002, "grad_norm": 0.2099609375, "learning_rate": 0.0005399600499290631, "loss": 0.2119, "step": 68818 }, { "epoch": 0.12202354831073983, "grad_norm": 0.2353515625, "learning_rate": 0.0005399134596355677, "loss": 0.1664, "step": 68820 }, { "epoch": 0.12202709447604965, "grad_norm": 0.2578125, "learning_rate": 0.0005398668717915654, "loss": 0.176, "step": 68822 }, { "epoch": 0.12203064064135946, "grad_norm": 0.64453125, "learning_rate": 0.0005398202863972601, "loss": 0.2136, "step": 68824 }, { "epoch": 0.12203418680666928, "grad_norm": 0.5546875, "learning_rate": 0.0005397737034528553, "loss": 0.1826, "step": 68826 }, { "epoch": 0.12203773297197909, "grad_norm": 0.68359375, "learning_rate": 0.0005397271229585554, "loss": 0.1788, "step": 68828 }, { "epoch": 0.1220412791372889, "grad_norm": 0.609375, "learning_rate": 0.000539680544914563, "loss": 0.1778, "step": 68830 }, { "epoch": 0.12204482530259872, "grad_norm": 0.2734375, "learning_rate": 0.0005396339693210828, "loss": 0.1144, "step": 68832 }, { "epoch": 0.12204837146790853, "grad_norm": 0.283203125, "learning_rate": 0.0005395873961783177, "loss": 0.2191, "step": 68834 }, { "epoch": 0.12205191763321835, "grad_norm": 0.4140625, "learning_rate": 0.0005395408254864724, "loss": 0.161, "step": 68836 }, { "epoch": 0.12205546379852816, "grad_norm": 0.453125, "learning_rate": 0.0005394942572457495, "loss": 0.1444, "step": 68838 }, { "epoch": 0.12205900996383798, "grad_norm": 0.373046875, "learning_rate": 0.0005394476914563533, "loss": 0.1635, "step": 68840 }, { "epoch": 0.12206255612914779, "grad_norm": 0.1884765625, "learning_rate": 0.0005394011281184875, "loss": 0.1784, "step": 68842 }, { "epoch": 0.1220661022944576, "grad_norm": 1.109375, "learning_rate": 0.0005393545672323554, "loss": 0.1366, "step": 68844 }, { "epoch": 0.12206964845976742, "grad_norm": 0.38671875, "learning_rate": 0.0005393080087981606, "loss": 0.1512, "step": 68846 }, { "epoch": 0.12207319462507724, "grad_norm": 0.51953125, "learning_rate": 0.0005392614528161068, "loss": 0.2037, "step": 68848 }, { "epoch": 0.12207674079038705, "grad_norm": 1.3359375, "learning_rate": 0.0005392148992863979, "loss": 0.2109, "step": 68850 }, { "epoch": 0.12208028695569687, "grad_norm": 0.341796875, "learning_rate": 0.0005391683482092374, "loss": 0.1577, "step": 68852 }, { "epoch": 0.12208383312100668, "grad_norm": 0.2294921875, "learning_rate": 0.0005391217995848287, "loss": 0.1769, "step": 68854 }, { "epoch": 0.1220873792863165, "grad_norm": 0.373046875, "learning_rate": 0.0005390752534133751, "loss": 0.2065, "step": 68856 }, { "epoch": 0.12209092545162632, "grad_norm": 0.185546875, "learning_rate": 0.000539028709695081, "loss": 0.16, "step": 68858 }, { "epoch": 0.12209447161693614, "grad_norm": 0.2890625, "learning_rate": 0.0005389821684301492, "loss": 0.1555, "step": 68860 }, { "epoch": 0.12209801778224595, "grad_norm": 0.2294921875, "learning_rate": 0.0005389356296187837, "loss": 0.1414, "step": 68862 }, { "epoch": 0.12210156394755577, "grad_norm": 0.283203125, "learning_rate": 0.0005388890932611878, "loss": 0.194, "step": 68864 }, { "epoch": 0.12210511011286558, "grad_norm": 0.408203125, "learning_rate": 0.0005388425593575648, "loss": 0.1825, "step": 68866 }, { "epoch": 0.1221086562781754, "grad_norm": 0.32421875, "learning_rate": 0.000538796027908119, "loss": 0.1633, "step": 68868 }, { "epoch": 0.12211220244348521, "grad_norm": 0.65625, "learning_rate": 0.000538749498913053, "loss": 0.1805, "step": 68870 }, { "epoch": 0.12211574860879502, "grad_norm": 2.640625, "learning_rate": 0.0005387029723725708, "loss": 0.4031, "step": 68872 }, { "epoch": 0.12211929477410484, "grad_norm": 0.609375, "learning_rate": 0.0005386564482868754, "loss": 0.1702, "step": 68874 }, { "epoch": 0.12212284093941465, "grad_norm": 0.2890625, "learning_rate": 0.0005386099266561715, "loss": 0.1537, "step": 68876 }, { "epoch": 0.12212638710472447, "grad_norm": 0.51171875, "learning_rate": 0.0005385634074806608, "loss": 0.1693, "step": 68878 }, { "epoch": 0.12212993327003428, "grad_norm": 0.7265625, "learning_rate": 0.000538516890760548, "loss": 0.1987, "step": 68880 }, { "epoch": 0.1221334794353441, "grad_norm": 0.52734375, "learning_rate": 0.0005384703764960363, "loss": 0.1709, "step": 68882 }, { "epoch": 0.12213702560065391, "grad_norm": 0.232421875, "learning_rate": 0.0005384238646873285, "loss": 0.1941, "step": 68884 }, { "epoch": 0.12214057176596373, "grad_norm": 0.83203125, "learning_rate": 0.0005383773553346293, "loss": 0.1843, "step": 68886 }, { "epoch": 0.12214411793127354, "grad_norm": 1.125, "learning_rate": 0.0005383308484381407, "loss": 0.1811, "step": 68888 }, { "epoch": 0.12214766409658336, "grad_norm": 0.75, "learning_rate": 0.0005382843439980669, "loss": 0.1921, "step": 68890 }, { "epoch": 0.12215121026189317, "grad_norm": 0.359375, "learning_rate": 0.0005382378420146108, "loss": 0.1777, "step": 68892 }, { "epoch": 0.12215475642720298, "grad_norm": 0.291015625, "learning_rate": 0.0005381913424879767, "loss": 0.1651, "step": 68894 }, { "epoch": 0.1221583025925128, "grad_norm": 0.498046875, "learning_rate": 0.0005381448454183669, "loss": 0.1642, "step": 68896 }, { "epoch": 0.12216184875782261, "grad_norm": 1.3203125, "learning_rate": 0.0005380983508059853, "loss": 0.3913, "step": 68898 }, { "epoch": 0.12216539492313243, "grad_norm": 1.0078125, "learning_rate": 0.0005380518586510352, "loss": 0.1392, "step": 68900 }, { "epoch": 0.12216894108844224, "grad_norm": 0.421875, "learning_rate": 0.0005380053689537196, "loss": 0.1339, "step": 68902 }, { "epoch": 0.12217248725375207, "grad_norm": 0.59765625, "learning_rate": 0.0005379588817142424, "loss": 0.1363, "step": 68904 }, { "epoch": 0.12217603341906189, "grad_norm": 0.314453125, "learning_rate": 0.0005379123969328062, "loss": 0.1832, "step": 68906 }, { "epoch": 0.1221795795843717, "grad_norm": 0.5078125, "learning_rate": 0.0005378659146096148, "loss": 0.2019, "step": 68908 }, { "epoch": 0.12218312574968151, "grad_norm": 0.31640625, "learning_rate": 0.0005378194347448715, "loss": 0.1869, "step": 68910 }, { "epoch": 0.12218667191499133, "grad_norm": 0.8828125, "learning_rate": 0.0005377729573387795, "loss": 0.2159, "step": 68912 }, { "epoch": 0.12219021808030114, "grad_norm": 1.890625, "learning_rate": 0.000537726482391542, "loss": 0.1908, "step": 68914 }, { "epoch": 0.12219376424561096, "grad_norm": 1.4453125, "learning_rate": 0.0005376800099033619, "loss": 0.4446, "step": 68916 }, { "epoch": 0.12219731041092077, "grad_norm": 1.2578125, "learning_rate": 0.0005376335398744434, "loss": 0.4267, "step": 68918 }, { "epoch": 0.12220085657623059, "grad_norm": 0.2373046875, "learning_rate": 0.0005375870723049885, "loss": 0.2955, "step": 68920 }, { "epoch": 0.1222044027415404, "grad_norm": 1.9921875, "learning_rate": 0.0005375406071952013, "loss": 0.2515, "step": 68922 }, { "epoch": 0.12220794890685022, "grad_norm": 0.68359375, "learning_rate": 0.0005374941445452846, "loss": 0.1596, "step": 68924 }, { "epoch": 0.12221149507216003, "grad_norm": 0.31640625, "learning_rate": 0.0005374476843554424, "loss": 0.1748, "step": 68926 }, { "epoch": 0.12221504123746985, "grad_norm": 0.609375, "learning_rate": 0.0005374012266258766, "loss": 0.1489, "step": 68928 }, { "epoch": 0.12221858740277966, "grad_norm": 0.345703125, "learning_rate": 0.0005373547713567914, "loss": 0.2008, "step": 68930 }, { "epoch": 0.12222213356808947, "grad_norm": 0.361328125, "learning_rate": 0.0005373083185483895, "loss": 0.1577, "step": 68932 }, { "epoch": 0.12222567973339929, "grad_norm": 0.27734375, "learning_rate": 0.0005372618682008741, "loss": 0.1162, "step": 68934 }, { "epoch": 0.1222292258987091, "grad_norm": 0.38671875, "learning_rate": 0.0005372154203144486, "loss": 0.1618, "step": 68936 }, { "epoch": 0.12223277206401892, "grad_norm": 0.419921875, "learning_rate": 0.0005371689748893156, "loss": 0.2026, "step": 68938 }, { "epoch": 0.12223631822932873, "grad_norm": 0.50390625, "learning_rate": 0.0005371225319256788, "loss": 0.156, "step": 68940 }, { "epoch": 0.12223986439463855, "grad_norm": 0.64453125, "learning_rate": 0.000537076091423741, "loss": 0.2946, "step": 68942 }, { "epoch": 0.12224341055994836, "grad_norm": 0.19921875, "learning_rate": 0.0005370296533837056, "loss": 0.1752, "step": 68944 }, { "epoch": 0.12224695672525818, "grad_norm": 5.5625, "learning_rate": 0.000536983217805775, "loss": 0.3432, "step": 68946 }, { "epoch": 0.12225050289056799, "grad_norm": 0.2734375, "learning_rate": 0.0005369367846901531, "loss": 0.14, "step": 68948 }, { "epoch": 0.12225404905587782, "grad_norm": 0.2001953125, "learning_rate": 0.0005368903540370429, "loss": 0.1988, "step": 68950 }, { "epoch": 0.12225759522118763, "grad_norm": 0.2255859375, "learning_rate": 0.000536843925846647, "loss": 0.1376, "step": 68952 }, { "epoch": 0.12226114138649745, "grad_norm": 0.287109375, "learning_rate": 0.0005367975001191686, "loss": 0.1586, "step": 68954 }, { "epoch": 0.12226468755180726, "grad_norm": 0.1982421875, "learning_rate": 0.0005367510768548105, "loss": 0.1976, "step": 68956 }, { "epoch": 0.12226823371711708, "grad_norm": 0.609375, "learning_rate": 0.0005367046560537767, "loss": 0.2117, "step": 68958 }, { "epoch": 0.12227177988242689, "grad_norm": 0.287109375, "learning_rate": 0.0005366582377162688, "loss": 0.1418, "step": 68960 }, { "epoch": 0.12227532604773671, "grad_norm": 0.92578125, "learning_rate": 0.0005366118218424909, "loss": 0.2395, "step": 68962 }, { "epoch": 0.12227887221304652, "grad_norm": 0.251953125, "learning_rate": 0.0005365654084326455, "loss": 0.241, "step": 68964 }, { "epoch": 0.12228241837835634, "grad_norm": 0.458984375, "learning_rate": 0.0005365189974869362, "loss": 0.2593, "step": 68966 }, { "epoch": 0.12228596454366615, "grad_norm": 0.447265625, "learning_rate": 0.0005364725890055649, "loss": 0.1529, "step": 68968 }, { "epoch": 0.12228951070897597, "grad_norm": 0.3828125, "learning_rate": 0.0005364261829887354, "loss": 0.2016, "step": 68970 }, { "epoch": 0.12229305687428578, "grad_norm": 0.69140625, "learning_rate": 0.0005363797794366504, "loss": 0.2593, "step": 68972 }, { "epoch": 0.1222966030395956, "grad_norm": 0.3125, "learning_rate": 0.0005363333783495126, "loss": 0.2062, "step": 68974 }, { "epoch": 0.12230014920490541, "grad_norm": 1.9375, "learning_rate": 0.0005362869797275259, "loss": 0.1863, "step": 68976 }, { "epoch": 0.12230369537021522, "grad_norm": 0.62890625, "learning_rate": 0.0005362405835708917, "loss": 0.1731, "step": 68978 }, { "epoch": 0.12230724153552504, "grad_norm": 0.265625, "learning_rate": 0.0005361941898798141, "loss": 0.1873, "step": 68980 }, { "epoch": 0.12231078770083485, "grad_norm": 0.20703125, "learning_rate": 0.0005361477986544953, "loss": 0.1116, "step": 68982 }, { "epoch": 0.12231433386614467, "grad_norm": 0.55859375, "learning_rate": 0.0005361014098951391, "loss": 0.2088, "step": 68984 }, { "epoch": 0.12231788003145448, "grad_norm": 0.314453125, "learning_rate": 0.0005360550236019472, "loss": 0.1872, "step": 68986 }, { "epoch": 0.1223214261967643, "grad_norm": 0.1748046875, "learning_rate": 0.0005360086397751233, "loss": 0.2322, "step": 68988 }, { "epoch": 0.12232497236207411, "grad_norm": 0.5546875, "learning_rate": 0.0005359622584148702, "loss": 0.1226, "step": 68990 }, { "epoch": 0.12232851852738393, "grad_norm": 0.490234375, "learning_rate": 0.0005359158795213905, "loss": 0.1843, "step": 68992 }, { "epoch": 0.12233206469269375, "grad_norm": 0.322265625, "learning_rate": 0.0005358695030948871, "loss": 0.1385, "step": 68994 }, { "epoch": 0.12233561085800357, "grad_norm": 0.32421875, "learning_rate": 0.0005358231291355623, "loss": 0.1837, "step": 68996 }, { "epoch": 0.12233915702331338, "grad_norm": 1.3671875, "learning_rate": 0.0005357767576436198, "loss": 0.1784, "step": 68998 }, { "epoch": 0.1223427031886232, "grad_norm": 0.27734375, "learning_rate": 0.0005357303886192622, "loss": 0.1807, "step": 69000 }, { "epoch": 0.12234624935393301, "grad_norm": 0.484375, "learning_rate": 0.0005356840220626918, "loss": 0.2383, "step": 69002 }, { "epoch": 0.12234979551924283, "grad_norm": 0.2890625, "learning_rate": 0.0005356376579741116, "loss": 0.1504, "step": 69004 }, { "epoch": 0.12235334168455264, "grad_norm": 0.447265625, "learning_rate": 0.0005355912963537246, "loss": 0.1988, "step": 69006 }, { "epoch": 0.12235688784986246, "grad_norm": 0.337890625, "learning_rate": 0.0005355449372017335, "loss": 0.2214, "step": 69008 }, { "epoch": 0.12236043401517227, "grad_norm": 0.53125, "learning_rate": 0.0005354985805183412, "loss": 0.1445, "step": 69010 }, { "epoch": 0.12236398018048208, "grad_norm": 1.7109375, "learning_rate": 0.00053545222630375, "loss": 0.2017, "step": 69012 }, { "epoch": 0.1223675263457919, "grad_norm": 0.462890625, "learning_rate": 0.0005354058745581626, "loss": 0.1689, "step": 69014 }, { "epoch": 0.12237107251110171, "grad_norm": 0.72265625, "learning_rate": 0.0005353595252817825, "loss": 0.2263, "step": 69016 }, { "epoch": 0.12237461867641153, "grad_norm": 0.5625, "learning_rate": 0.0005353131784748113, "loss": 0.2103, "step": 69018 }, { "epoch": 0.12237816484172134, "grad_norm": 0.349609375, "learning_rate": 0.0005352668341374524, "loss": 0.2373, "step": 69020 }, { "epoch": 0.12238171100703116, "grad_norm": 0.76171875, "learning_rate": 0.0005352204922699082, "loss": 0.1957, "step": 69022 }, { "epoch": 0.12238525717234097, "grad_norm": 0.416015625, "learning_rate": 0.000535174152872382, "loss": 0.1559, "step": 69024 }, { "epoch": 0.12238880333765079, "grad_norm": 0.51171875, "learning_rate": 0.0005351278159450754, "loss": 0.2451, "step": 69026 }, { "epoch": 0.1223923495029606, "grad_norm": 0.455078125, "learning_rate": 0.000535081481488192, "loss": 0.1412, "step": 69028 }, { "epoch": 0.12239589566827042, "grad_norm": 0.58203125, "learning_rate": 0.000535035149501934, "loss": 0.1764, "step": 69030 }, { "epoch": 0.12239944183358023, "grad_norm": 1.3515625, "learning_rate": 0.0005349888199865041, "loss": 0.3311, "step": 69032 }, { "epoch": 0.12240298799889004, "grad_norm": 0.296875, "learning_rate": 0.0005349424929421047, "loss": 0.1855, "step": 69034 }, { "epoch": 0.12240653416419986, "grad_norm": 1.7109375, "learning_rate": 0.0005348961683689386, "loss": 0.2927, "step": 69036 }, { "epoch": 0.12241008032950967, "grad_norm": 0.404296875, "learning_rate": 0.0005348498462672085, "loss": 0.1952, "step": 69038 }, { "epoch": 0.1224136264948195, "grad_norm": 0.828125, "learning_rate": 0.0005348035266371171, "loss": 0.1535, "step": 69040 }, { "epoch": 0.12241717266012932, "grad_norm": 2.703125, "learning_rate": 0.0005347572094788666, "loss": 0.342, "step": 69042 }, { "epoch": 0.12242071882543913, "grad_norm": 0.26171875, "learning_rate": 0.0005347108947926595, "loss": 0.348, "step": 69044 }, { "epoch": 0.12242426499074895, "grad_norm": 0.69921875, "learning_rate": 0.0005346645825786988, "loss": 0.1762, "step": 69046 }, { "epoch": 0.12242781115605876, "grad_norm": 0.423828125, "learning_rate": 0.000534618272837187, "loss": 0.1542, "step": 69048 }, { "epoch": 0.12243135732136858, "grad_norm": 0.6640625, "learning_rate": 0.0005345719655683264, "loss": 0.1738, "step": 69050 }, { "epoch": 0.12243490348667839, "grad_norm": 0.2890625, "learning_rate": 0.0005345256607723196, "loss": 0.3216, "step": 69052 }, { "epoch": 0.1224384496519882, "grad_norm": 1.4765625, "learning_rate": 0.0005344793584493687, "loss": 0.2394, "step": 69054 }, { "epoch": 0.12244199581729802, "grad_norm": 0.447265625, "learning_rate": 0.0005344330585996769, "loss": 0.1758, "step": 69056 }, { "epoch": 0.12244554198260783, "grad_norm": 0.54296875, "learning_rate": 0.0005343867612234463, "loss": 0.214, "step": 69058 }, { "epoch": 0.12244908814791765, "grad_norm": 0.6015625, "learning_rate": 0.0005343404663208797, "loss": 0.1452, "step": 69060 }, { "epoch": 0.12245263431322746, "grad_norm": 0.30078125, "learning_rate": 0.0005342941738921789, "loss": 0.1564, "step": 69062 }, { "epoch": 0.12245618047853728, "grad_norm": 0.3671875, "learning_rate": 0.0005342478839375471, "loss": 0.1643, "step": 69064 }, { "epoch": 0.12245972664384709, "grad_norm": 0.29296875, "learning_rate": 0.0005342015964571865, "loss": 0.1893, "step": 69066 }, { "epoch": 0.1224632728091569, "grad_norm": 0.94921875, "learning_rate": 0.0005341553114512993, "loss": 0.2661, "step": 69068 }, { "epoch": 0.12246681897446672, "grad_norm": 0.55859375, "learning_rate": 0.0005341090289200881, "loss": 0.2024, "step": 69070 }, { "epoch": 0.12247036513977654, "grad_norm": 0.30078125, "learning_rate": 0.000534062748863755, "loss": 0.2357, "step": 69072 }, { "epoch": 0.12247391130508635, "grad_norm": 0.828125, "learning_rate": 0.0005340164712825035, "loss": 0.1809, "step": 69074 }, { "epoch": 0.12247745747039616, "grad_norm": 0.48828125, "learning_rate": 0.0005339701961765344, "loss": 0.1748, "step": 69076 }, { "epoch": 0.12248100363570598, "grad_norm": 1.8359375, "learning_rate": 0.0005339239235460513, "loss": 0.2521, "step": 69078 }, { "epoch": 0.1224845498010158, "grad_norm": 0.2119140625, "learning_rate": 0.0005338776533912561, "loss": 0.1264, "step": 69080 }, { "epoch": 0.12248809596632561, "grad_norm": 0.5078125, "learning_rate": 0.0005338313857123511, "loss": 0.2038, "step": 69082 }, { "epoch": 0.12249164213163542, "grad_norm": 0.58984375, "learning_rate": 0.0005337851205095387, "loss": 0.234, "step": 69084 }, { "epoch": 0.12249518829694525, "grad_norm": 2.546875, "learning_rate": 0.0005337388577830213, "loss": 0.5306, "step": 69086 }, { "epoch": 0.12249873446225507, "grad_norm": 0.33203125, "learning_rate": 0.0005336925975330013, "loss": 0.1873, "step": 69088 }, { "epoch": 0.12250228062756488, "grad_norm": 0.52734375, "learning_rate": 0.0005336463397596809, "loss": 0.1929, "step": 69090 }, { "epoch": 0.1225058267928747, "grad_norm": 0.349609375, "learning_rate": 0.0005336000844632625, "loss": 0.2444, "step": 69092 }, { "epoch": 0.12250937295818451, "grad_norm": 0.3203125, "learning_rate": 0.0005335538316439479, "loss": 0.1741, "step": 69094 }, { "epoch": 0.12251291912349432, "grad_norm": 0.6328125, "learning_rate": 0.0005335075813019403, "loss": 0.2138, "step": 69096 }, { "epoch": 0.12251646528880414, "grad_norm": 0.412109375, "learning_rate": 0.0005334613334374414, "loss": 0.2197, "step": 69098 }, { "epoch": 0.12252001145411395, "grad_norm": 0.2265625, "learning_rate": 0.0005334150880506535, "loss": 0.1743, "step": 69100 }, { "epoch": 0.12252355761942377, "grad_norm": 1.234375, "learning_rate": 0.000533368845141779, "loss": 0.2144, "step": 69102 }, { "epoch": 0.12252710378473358, "grad_norm": 0.396484375, "learning_rate": 0.0005333226047110197, "loss": 0.2013, "step": 69104 }, { "epoch": 0.1225306499500434, "grad_norm": 0.275390625, "learning_rate": 0.0005332763667585788, "loss": 0.174, "step": 69106 }, { "epoch": 0.12253419611535321, "grad_norm": 0.328125, "learning_rate": 0.0005332301312846571, "loss": 0.1149, "step": 69108 }, { "epoch": 0.12253774228066303, "grad_norm": 0.369140625, "learning_rate": 0.0005331838982894579, "loss": 0.2241, "step": 69110 }, { "epoch": 0.12254128844597284, "grad_norm": 1.7890625, "learning_rate": 0.0005331376677731829, "loss": 0.2041, "step": 69112 }, { "epoch": 0.12254483461128265, "grad_norm": 0.91015625, "learning_rate": 0.0005330914397360351, "loss": 0.2573, "step": 69114 }, { "epoch": 0.12254838077659247, "grad_norm": 0.2001953125, "learning_rate": 0.0005330452141782154, "loss": 0.1502, "step": 69116 }, { "epoch": 0.12255192694190228, "grad_norm": 0.390625, "learning_rate": 0.0005329989910999269, "loss": 0.1715, "step": 69118 }, { "epoch": 0.1225554731072121, "grad_norm": 0.376953125, "learning_rate": 0.0005329527705013715, "loss": 0.1994, "step": 69120 }, { "epoch": 0.12255901927252191, "grad_norm": 0.306640625, "learning_rate": 0.0005329065523827512, "loss": 0.3808, "step": 69122 }, { "epoch": 0.12256256543783173, "grad_norm": 0.203125, "learning_rate": 0.0005328603367442682, "loss": 0.1507, "step": 69124 }, { "epoch": 0.12256611160314154, "grad_norm": 0.373046875, "learning_rate": 0.0005328141235861246, "loss": 0.1301, "step": 69126 }, { "epoch": 0.12256965776845136, "grad_norm": 0.24609375, "learning_rate": 0.0005327679129085227, "loss": 0.1925, "step": 69128 }, { "epoch": 0.12257320393376119, "grad_norm": 2.328125, "learning_rate": 0.0005327217047116643, "loss": 0.3733, "step": 69130 }, { "epoch": 0.122576750099071, "grad_norm": 0.69140625, "learning_rate": 0.0005326754989957519, "loss": 0.2345, "step": 69132 }, { "epoch": 0.12258029626438081, "grad_norm": 0.294921875, "learning_rate": 0.0005326292957609871, "loss": 0.1883, "step": 69134 }, { "epoch": 0.12258384242969063, "grad_norm": 1.109375, "learning_rate": 0.0005325830950075722, "loss": 0.2022, "step": 69136 }, { "epoch": 0.12258738859500044, "grad_norm": 0.3828125, "learning_rate": 0.0005325368967357094, "loss": 0.1517, "step": 69138 }, { "epoch": 0.12259093476031026, "grad_norm": 0.349609375, "learning_rate": 0.0005324907009456005, "loss": 0.1917, "step": 69140 }, { "epoch": 0.12259448092562007, "grad_norm": 1.1953125, "learning_rate": 0.0005324445076374478, "loss": 0.2596, "step": 69142 }, { "epoch": 0.12259802709092989, "grad_norm": 0.271484375, "learning_rate": 0.0005323983168114527, "loss": 0.1442, "step": 69144 }, { "epoch": 0.1226015732562397, "grad_norm": 0.41796875, "learning_rate": 0.0005323521284678179, "loss": 0.18, "step": 69146 }, { "epoch": 0.12260511942154952, "grad_norm": 0.3125, "learning_rate": 0.0005323059426067453, "loss": 0.2651, "step": 69148 }, { "epoch": 0.12260866558685933, "grad_norm": 0.478515625, "learning_rate": 0.0005322597592284368, "loss": 0.1444, "step": 69150 }, { "epoch": 0.12261221175216915, "grad_norm": 0.55078125, "learning_rate": 0.0005322135783330939, "loss": 0.1731, "step": 69152 }, { "epoch": 0.12261575791747896, "grad_norm": 1.8125, "learning_rate": 0.0005321673999209194, "loss": 0.2927, "step": 69154 }, { "epoch": 0.12261930408278877, "grad_norm": 0.34765625, "learning_rate": 0.0005321212239921148, "loss": 0.1889, "step": 69156 }, { "epoch": 0.12262285024809859, "grad_norm": 0.302734375, "learning_rate": 0.0005320750505468821, "loss": 0.1379, "step": 69158 }, { "epoch": 0.1226263964134084, "grad_norm": 0.55859375, "learning_rate": 0.0005320288795854232, "loss": 0.293, "step": 69160 }, { "epoch": 0.12262994257871822, "grad_norm": 0.76171875, "learning_rate": 0.0005319827111079399, "loss": 0.1757, "step": 69162 }, { "epoch": 0.12263348874402803, "grad_norm": 0.271484375, "learning_rate": 0.0005319365451146347, "loss": 0.1653, "step": 69164 }, { "epoch": 0.12263703490933785, "grad_norm": 0.2353515625, "learning_rate": 0.0005318903816057085, "loss": 0.1641, "step": 69166 }, { "epoch": 0.12264058107464766, "grad_norm": 3.546875, "learning_rate": 0.0005318442205813643, "loss": 0.3773, "step": 69168 }, { "epoch": 0.12264412723995748, "grad_norm": 0.98046875, "learning_rate": 0.0005317980620418029, "loss": 0.2309, "step": 69170 }, { "epoch": 0.12264767340526729, "grad_norm": 0.859375, "learning_rate": 0.0005317519059872275, "loss": 0.1937, "step": 69172 }, { "epoch": 0.1226512195705771, "grad_norm": 1.140625, "learning_rate": 0.0005317057524178385, "loss": 0.2458, "step": 69174 }, { "epoch": 0.12265476573588693, "grad_norm": 0.6875, "learning_rate": 0.0005316596013338388, "loss": 0.1701, "step": 69176 }, { "epoch": 0.12265831190119675, "grad_norm": 0.318359375, "learning_rate": 0.0005316134527354297, "loss": 0.1952, "step": 69178 }, { "epoch": 0.12266185806650656, "grad_norm": 0.1513671875, "learning_rate": 0.0005315673066228133, "loss": 0.1587, "step": 69180 }, { "epoch": 0.12266540423181638, "grad_norm": 0.5, "learning_rate": 0.0005315211629961914, "loss": 0.1715, "step": 69182 }, { "epoch": 0.12266895039712619, "grad_norm": 0.2080078125, "learning_rate": 0.0005314750218557654, "loss": 0.1467, "step": 69184 }, { "epoch": 0.122672496562436, "grad_norm": 1.4140625, "learning_rate": 0.0005314288832017376, "loss": 0.1626, "step": 69186 }, { "epoch": 0.12267604272774582, "grad_norm": 0.439453125, "learning_rate": 0.0005313827470343096, "loss": 0.1809, "step": 69188 }, { "epoch": 0.12267958889305564, "grad_norm": 4.375, "learning_rate": 0.0005313366133536833, "loss": 0.3183, "step": 69190 }, { "epoch": 0.12268313505836545, "grad_norm": 0.39453125, "learning_rate": 0.0005312904821600599, "loss": 0.184, "step": 69192 }, { "epoch": 0.12268668122367526, "grad_norm": 0.455078125, "learning_rate": 0.0005312443534536421, "loss": 0.243, "step": 69194 }, { "epoch": 0.12269022738898508, "grad_norm": 0.3828125, "learning_rate": 0.0005311982272346309, "loss": 0.2303, "step": 69196 }, { "epoch": 0.1226937735542949, "grad_norm": 0.6640625, "learning_rate": 0.0005311521035032284, "loss": 0.1596, "step": 69198 }, { "epoch": 0.12269731971960471, "grad_norm": 1.0859375, "learning_rate": 0.0005311059822596361, "loss": 0.1759, "step": 69200 }, { "epoch": 0.12270086588491452, "grad_norm": 0.62109375, "learning_rate": 0.0005310598635040554, "loss": 0.1988, "step": 69202 }, { "epoch": 0.12270441205022434, "grad_norm": 0.4609375, "learning_rate": 0.0005310137472366892, "loss": 0.1683, "step": 69204 }, { "epoch": 0.12270795821553415, "grad_norm": 0.212890625, "learning_rate": 0.0005309676334577376, "loss": 0.2183, "step": 69206 }, { "epoch": 0.12271150438084397, "grad_norm": 0.482421875, "learning_rate": 0.0005309215221674033, "loss": 0.1731, "step": 69208 }, { "epoch": 0.12271505054615378, "grad_norm": 0.41015625, "learning_rate": 0.0005308754133658875, "loss": 0.2202, "step": 69210 }, { "epoch": 0.1227185967114636, "grad_norm": 0.51953125, "learning_rate": 0.0005308293070533926, "loss": 0.1455, "step": 69212 }, { "epoch": 0.12272214287677341, "grad_norm": 1.046875, "learning_rate": 0.0005307832032301192, "loss": 0.2225, "step": 69214 }, { "epoch": 0.12272568904208322, "grad_norm": 0.984375, "learning_rate": 0.0005307371018962696, "loss": 0.2653, "step": 69216 }, { "epoch": 0.12272923520739304, "grad_norm": 0.392578125, "learning_rate": 0.0005306910030520453, "loss": 0.265, "step": 69218 }, { "epoch": 0.12273278137270285, "grad_norm": 3.65625, "learning_rate": 0.0005306449066976476, "loss": 0.2722, "step": 69220 }, { "epoch": 0.12273632753801268, "grad_norm": 0.302734375, "learning_rate": 0.0005305988128332788, "loss": 0.1632, "step": 69222 }, { "epoch": 0.1227398737033225, "grad_norm": 0.392578125, "learning_rate": 0.0005305527214591397, "loss": 0.1485, "step": 69224 }, { "epoch": 0.12274341986863231, "grad_norm": 0.322265625, "learning_rate": 0.0005305066325754323, "loss": 0.1964, "step": 69226 }, { "epoch": 0.12274696603394213, "grad_norm": 0.373046875, "learning_rate": 0.000530460546182358, "loss": 0.2108, "step": 69228 }, { "epoch": 0.12275051219925194, "grad_norm": 1.046875, "learning_rate": 0.0005304144622801188, "loss": 0.1828, "step": 69230 }, { "epoch": 0.12275405836456176, "grad_norm": 0.64453125, "learning_rate": 0.0005303683808689154, "loss": 0.1862, "step": 69232 }, { "epoch": 0.12275760452987157, "grad_norm": 0.42578125, "learning_rate": 0.0005303223019489501, "loss": 0.2021, "step": 69234 }, { "epoch": 0.12276115069518138, "grad_norm": 0.361328125, "learning_rate": 0.0005302762255204242, "loss": 0.2362, "step": 69236 }, { "epoch": 0.1227646968604912, "grad_norm": 0.5546875, "learning_rate": 0.0005302301515835393, "loss": 0.2198, "step": 69238 }, { "epoch": 0.12276824302580101, "grad_norm": 0.3671875, "learning_rate": 0.0005301840801384964, "loss": 0.2215, "step": 69240 }, { "epoch": 0.12277178919111083, "grad_norm": 0.447265625, "learning_rate": 0.0005301380111854973, "loss": 0.2, "step": 69242 }, { "epoch": 0.12277533535642064, "grad_norm": 0.50390625, "learning_rate": 0.0005300919447247436, "loss": 0.183, "step": 69244 }, { "epoch": 0.12277888152173046, "grad_norm": 0.51953125, "learning_rate": 0.000530045880756437, "loss": 0.2177, "step": 69246 }, { "epoch": 0.12278242768704027, "grad_norm": 0.7109375, "learning_rate": 0.0005299998192807785, "loss": 0.2366, "step": 69248 }, { "epoch": 0.12278597385235009, "grad_norm": 0.20703125, "learning_rate": 0.0005299537602979693, "loss": 0.183, "step": 69250 }, { "epoch": 0.1227895200176599, "grad_norm": 0.1630859375, "learning_rate": 0.0005299077038082116, "loss": 0.1912, "step": 69252 }, { "epoch": 0.12279306618296972, "grad_norm": 2.8125, "learning_rate": 0.0005298616498117065, "loss": 0.3969, "step": 69254 }, { "epoch": 0.12279661234827953, "grad_norm": 0.1728515625, "learning_rate": 0.0005298155983086554, "loss": 0.1311, "step": 69256 }, { "epoch": 0.12280015851358934, "grad_norm": 0.59375, "learning_rate": 0.0005297695492992595, "loss": 0.1533, "step": 69258 }, { "epoch": 0.12280370467889916, "grad_norm": 0.412109375, "learning_rate": 0.0005297235027837202, "loss": 0.2347, "step": 69260 }, { "epoch": 0.12280725084420897, "grad_norm": 0.69921875, "learning_rate": 0.0005296774587622397, "loss": 0.1681, "step": 69262 }, { "epoch": 0.12281079700951879, "grad_norm": 0.3828125, "learning_rate": 0.0005296314172350178, "loss": 0.1736, "step": 69264 }, { "epoch": 0.12281434317482862, "grad_norm": 0.3125, "learning_rate": 0.0005295853782022575, "loss": 0.1709, "step": 69266 }, { "epoch": 0.12281788934013843, "grad_norm": 0.169921875, "learning_rate": 0.0005295393416641592, "loss": 0.2003, "step": 69268 }, { "epoch": 0.12282143550544825, "grad_norm": 0.1826171875, "learning_rate": 0.0005294933076209243, "loss": 0.1449, "step": 69270 }, { "epoch": 0.12282498167075806, "grad_norm": 0.361328125, "learning_rate": 0.0005294472760727547, "loss": 0.189, "step": 69272 }, { "epoch": 0.12282852783606787, "grad_norm": 0.51171875, "learning_rate": 0.0005294012470198506, "loss": 0.2882, "step": 69274 }, { "epoch": 0.12283207400137769, "grad_norm": 0.462890625, "learning_rate": 0.0005293552204624145, "loss": 0.2181, "step": 69276 }, { "epoch": 0.1228356201666875, "grad_norm": 0.431640625, "learning_rate": 0.0005293091964006471, "loss": 0.1838, "step": 69278 }, { "epoch": 0.12283916633199732, "grad_norm": 0.46875, "learning_rate": 0.0005292631748347498, "loss": 0.2121, "step": 69280 }, { "epoch": 0.12284271249730713, "grad_norm": 0.3671875, "learning_rate": 0.0005292171557649235, "loss": 0.2079, "step": 69282 }, { "epoch": 0.12284625866261695, "grad_norm": 0.40625, "learning_rate": 0.0005291711391913703, "loss": 0.2053, "step": 69284 }, { "epoch": 0.12284980482792676, "grad_norm": 0.353515625, "learning_rate": 0.0005291251251142908, "loss": 0.1489, "step": 69286 }, { "epoch": 0.12285335099323658, "grad_norm": 0.244140625, "learning_rate": 0.0005290791135338865, "loss": 0.1996, "step": 69288 }, { "epoch": 0.12285689715854639, "grad_norm": 0.388671875, "learning_rate": 0.0005290331044503585, "loss": 0.1768, "step": 69290 }, { "epoch": 0.1228604433238562, "grad_norm": 0.609375, "learning_rate": 0.0005289870978639077, "loss": 0.187, "step": 69292 }, { "epoch": 0.12286398948916602, "grad_norm": 0.392578125, "learning_rate": 0.0005289410937747364, "loss": 0.2137, "step": 69294 }, { "epoch": 0.12286753565447583, "grad_norm": 0.76171875, "learning_rate": 0.0005288950921830442, "loss": 0.2189, "step": 69296 }, { "epoch": 0.12287108181978565, "grad_norm": 2.53125, "learning_rate": 0.0005288490930890335, "loss": 0.1609, "step": 69298 }, { "epoch": 0.12287462798509546, "grad_norm": 0.98046875, "learning_rate": 0.0005288030964929049, "loss": 0.2816, "step": 69300 }, { "epoch": 0.12287817415040528, "grad_norm": 0.53125, "learning_rate": 0.0005287571023948603, "loss": 0.2391, "step": 69302 }, { "epoch": 0.12288172031571509, "grad_norm": 0.55078125, "learning_rate": 0.0005287111107950997, "loss": 0.2544, "step": 69304 }, { "epoch": 0.12288526648102491, "grad_norm": 0.9140625, "learning_rate": 0.0005286651216938252, "loss": 0.2452, "step": 69306 }, { "epoch": 0.12288881264633472, "grad_norm": 1.109375, "learning_rate": 0.0005286191350912376, "loss": 0.1614, "step": 69308 }, { "epoch": 0.12289235881164454, "grad_norm": 0.68359375, "learning_rate": 0.0005285731509875376, "loss": 0.1798, "step": 69310 }, { "epoch": 0.12289590497695436, "grad_norm": 1.0390625, "learning_rate": 0.0005285271693829275, "loss": 0.341, "step": 69312 }, { "epoch": 0.12289945114226418, "grad_norm": 0.38671875, "learning_rate": 0.0005284811902776068, "loss": 0.1151, "step": 69314 }, { "epoch": 0.122902997307574, "grad_norm": 0.380859375, "learning_rate": 0.0005284352136717778, "loss": 0.1212, "step": 69316 }, { "epoch": 0.12290654347288381, "grad_norm": 0.30859375, "learning_rate": 0.0005283892395656409, "loss": 0.1697, "step": 69318 }, { "epoch": 0.12291008963819362, "grad_norm": 0.53125, "learning_rate": 0.0005283432679593981, "loss": 0.1082, "step": 69320 }, { "epoch": 0.12291363580350344, "grad_norm": 0.255859375, "learning_rate": 0.0005282972988532491, "loss": 0.2482, "step": 69322 }, { "epoch": 0.12291718196881325, "grad_norm": 1.1484375, "learning_rate": 0.0005282513322473958, "loss": 0.1513, "step": 69324 }, { "epoch": 0.12292072813412307, "grad_norm": 0.40234375, "learning_rate": 0.0005282053681420392, "loss": 0.1528, "step": 69326 }, { "epoch": 0.12292427429943288, "grad_norm": 0.376953125, "learning_rate": 0.0005281594065373802, "loss": 0.2075, "step": 69328 }, { "epoch": 0.1229278204647427, "grad_norm": 0.79296875, "learning_rate": 0.0005281134474336198, "loss": 0.1579, "step": 69330 }, { "epoch": 0.12293136663005251, "grad_norm": 0.376953125, "learning_rate": 0.0005280674908309586, "loss": 0.1858, "step": 69332 }, { "epoch": 0.12293491279536232, "grad_norm": 0.2412109375, "learning_rate": 0.0005280215367295984, "loss": 0.1626, "step": 69334 }, { "epoch": 0.12293845896067214, "grad_norm": 0.203125, "learning_rate": 0.0005279755851297397, "loss": 0.1418, "step": 69336 }, { "epoch": 0.12294200512598195, "grad_norm": 0.51171875, "learning_rate": 0.0005279296360315834, "loss": 0.1485, "step": 69338 }, { "epoch": 0.12294555129129177, "grad_norm": 2.8125, "learning_rate": 0.0005278836894353304, "loss": 0.2676, "step": 69340 }, { "epoch": 0.12294909745660158, "grad_norm": 0.83984375, "learning_rate": 0.000527837745341182, "loss": 0.2038, "step": 69342 }, { "epoch": 0.1229526436219114, "grad_norm": 0.337890625, "learning_rate": 0.000527791803749339, "loss": 0.1609, "step": 69344 }, { "epoch": 0.12295618978722121, "grad_norm": 0.29296875, "learning_rate": 0.0005277458646600024, "loss": 0.2094, "step": 69346 }, { "epoch": 0.12295973595253103, "grad_norm": 1.3203125, "learning_rate": 0.000527699928073373, "loss": 0.2405, "step": 69348 }, { "epoch": 0.12296328211784084, "grad_norm": 0.64453125, "learning_rate": 0.0005276539939896512, "loss": 0.1322, "step": 69350 }, { "epoch": 0.12296682828315066, "grad_norm": 0.333984375, "learning_rate": 0.000527608062409039, "loss": 0.1592, "step": 69352 }, { "epoch": 0.12297037444846047, "grad_norm": 1.4453125, "learning_rate": 0.0005275621333317361, "loss": 0.4421, "step": 69354 }, { "epoch": 0.12297392061377029, "grad_norm": 1.03125, "learning_rate": 0.0005275162067579442, "loss": 0.2241, "step": 69356 }, { "epoch": 0.12297746677908011, "grad_norm": 1.921875, "learning_rate": 0.0005274702826878637, "loss": 0.2137, "step": 69358 }, { "epoch": 0.12298101294438993, "grad_norm": 1.0, "learning_rate": 0.000527424361121696, "loss": 0.1583, "step": 69360 }, { "epoch": 0.12298455910969974, "grad_norm": 0.1875, "learning_rate": 0.0005273784420596412, "loss": 0.1523, "step": 69362 }, { "epoch": 0.12298810527500956, "grad_norm": 1.03125, "learning_rate": 0.0005273325255019006, "loss": 0.212, "step": 69364 }, { "epoch": 0.12299165144031937, "grad_norm": 0.423828125, "learning_rate": 0.0005272866114486749, "loss": 0.1953, "step": 69366 }, { "epoch": 0.12299519760562919, "grad_norm": 0.6328125, "learning_rate": 0.0005272406999001648, "loss": 0.2026, "step": 69368 }, { "epoch": 0.122998743770939, "grad_norm": 0.31640625, "learning_rate": 0.0005271947908565715, "loss": 0.1536, "step": 69370 }, { "epoch": 0.12300228993624882, "grad_norm": 0.1708984375, "learning_rate": 0.000527148884318095, "loss": 0.1082, "step": 69372 }, { "epoch": 0.12300583610155863, "grad_norm": 1.0234375, "learning_rate": 0.0005271029802849368, "loss": 0.3152, "step": 69374 }, { "epoch": 0.12300938226686844, "grad_norm": 0.28125, "learning_rate": 0.0005270570787572973, "loss": 0.1986, "step": 69376 }, { "epoch": 0.12301292843217826, "grad_norm": 0.859375, "learning_rate": 0.0005270111797353774, "loss": 0.1469, "step": 69378 }, { "epoch": 0.12301647459748807, "grad_norm": 0.21484375, "learning_rate": 0.0005269652832193774, "loss": 0.3039, "step": 69380 }, { "epoch": 0.12302002076279789, "grad_norm": 2.328125, "learning_rate": 0.0005269193892094988, "loss": 0.3475, "step": 69382 }, { "epoch": 0.1230235669281077, "grad_norm": 0.20703125, "learning_rate": 0.0005268734977059419, "loss": 0.1566, "step": 69384 }, { "epoch": 0.12302711309341752, "grad_norm": 0.66015625, "learning_rate": 0.0005268276087089073, "loss": 0.342, "step": 69386 }, { "epoch": 0.12303065925872733, "grad_norm": 0.2734375, "learning_rate": 0.0005267817222185958, "loss": 0.2703, "step": 69388 }, { "epoch": 0.12303420542403715, "grad_norm": 0.306640625, "learning_rate": 0.000526735838235208, "loss": 0.2065, "step": 69390 }, { "epoch": 0.12303775158934696, "grad_norm": 0.3828125, "learning_rate": 0.0005266899567589449, "loss": 0.2632, "step": 69392 }, { "epoch": 0.12304129775465678, "grad_norm": 0.58203125, "learning_rate": 0.0005266440777900067, "loss": 0.1893, "step": 69394 }, { "epoch": 0.12304484391996659, "grad_norm": 0.404296875, "learning_rate": 0.0005265982013285944, "loss": 0.1924, "step": 69396 }, { "epoch": 0.1230483900852764, "grad_norm": 0.263671875, "learning_rate": 0.0005265523273749082, "loss": 0.1649, "step": 69398 }, { "epoch": 0.12305193625058622, "grad_norm": 0.39453125, "learning_rate": 0.0005265064559291493, "loss": 0.1742, "step": 69400 }, { "epoch": 0.12305548241589605, "grad_norm": 0.546875, "learning_rate": 0.000526460586991518, "loss": 0.2621, "step": 69402 }, { "epoch": 0.12305902858120586, "grad_norm": 1.7578125, "learning_rate": 0.000526414720562215, "loss": 0.2761, "step": 69404 }, { "epoch": 0.12306257474651568, "grad_norm": 0.310546875, "learning_rate": 0.0005263688566414409, "loss": 0.148, "step": 69406 }, { "epoch": 0.12306612091182549, "grad_norm": 1.4765625, "learning_rate": 0.0005263229952293958, "loss": 0.1997, "step": 69408 }, { "epoch": 0.1230696670771353, "grad_norm": 0.330078125, "learning_rate": 0.0005262771363262812, "loss": 0.1354, "step": 69410 }, { "epoch": 0.12307321324244512, "grad_norm": 0.5390625, "learning_rate": 0.0005262312799322968, "loss": 0.1905, "step": 69412 }, { "epoch": 0.12307675940775493, "grad_norm": 0.55859375, "learning_rate": 0.0005261854260476438, "loss": 0.207, "step": 69414 }, { "epoch": 0.12308030557306475, "grad_norm": 0.65625, "learning_rate": 0.0005261395746725219, "loss": 0.2016, "step": 69416 }, { "epoch": 0.12308385173837456, "grad_norm": 0.5390625, "learning_rate": 0.0005260937258071328, "loss": 0.1732, "step": 69418 }, { "epoch": 0.12308739790368438, "grad_norm": 1.9921875, "learning_rate": 0.0005260478794516758, "loss": 0.1552, "step": 69420 }, { "epoch": 0.12309094406899419, "grad_norm": 0.26171875, "learning_rate": 0.0005260020356063524, "loss": 0.1932, "step": 69422 }, { "epoch": 0.12309449023430401, "grad_norm": 0.2890625, "learning_rate": 0.0005259561942713626, "loss": 0.2123, "step": 69424 }, { "epoch": 0.12309803639961382, "grad_norm": 0.412109375, "learning_rate": 0.0005259103554469071, "loss": 0.1981, "step": 69426 }, { "epoch": 0.12310158256492364, "grad_norm": 0.52734375, "learning_rate": 0.000525864519133186, "loss": 0.2056, "step": 69428 }, { "epoch": 0.12310512873023345, "grad_norm": 0.234375, "learning_rate": 0.0005258186853303998, "loss": 0.1639, "step": 69430 }, { "epoch": 0.12310867489554327, "grad_norm": 0.4921875, "learning_rate": 0.0005257728540387494, "loss": 0.146, "step": 69432 }, { "epoch": 0.12311222106085308, "grad_norm": 0.431640625, "learning_rate": 0.0005257270252584351, "loss": 0.2056, "step": 69434 }, { "epoch": 0.1231157672261629, "grad_norm": 0.203125, "learning_rate": 0.0005256811989896571, "loss": 0.1493, "step": 69436 }, { "epoch": 0.12311931339147271, "grad_norm": 0.193359375, "learning_rate": 0.0005256353752326159, "loss": 0.1476, "step": 69438 }, { "epoch": 0.12312285955678252, "grad_norm": 0.375, "learning_rate": 0.0005255895539875117, "loss": 0.1469, "step": 69440 }, { "epoch": 0.12312640572209234, "grad_norm": 2.09375, "learning_rate": 0.0005255437352545459, "loss": 0.2187, "step": 69442 }, { "epoch": 0.12312995188740215, "grad_norm": 0.42578125, "learning_rate": 0.0005254979190339173, "loss": 0.1555, "step": 69444 }, { "epoch": 0.12313349805271197, "grad_norm": 0.380859375, "learning_rate": 0.0005254521053258274, "loss": 0.2113, "step": 69446 }, { "epoch": 0.1231370442180218, "grad_norm": 0.6171875, "learning_rate": 0.0005254062941304761, "loss": 0.2544, "step": 69448 }, { "epoch": 0.12314059038333161, "grad_norm": 0.5546875, "learning_rate": 0.0005253604854480644, "loss": 0.2371, "step": 69450 }, { "epoch": 0.12314413654864143, "grad_norm": 0.45703125, "learning_rate": 0.0005253146792787918, "loss": 0.1789, "step": 69452 }, { "epoch": 0.12314768271395124, "grad_norm": 0.388671875, "learning_rate": 0.0005252688756228589, "loss": 0.1975, "step": 69454 }, { "epoch": 0.12315122887926105, "grad_norm": 4.34375, "learning_rate": 0.0005252230744804663, "loss": 0.2509, "step": 69456 }, { "epoch": 0.12315477504457087, "grad_norm": 0.6484375, "learning_rate": 0.0005251772758518139, "loss": 0.2829, "step": 69458 }, { "epoch": 0.12315832120988068, "grad_norm": 0.3515625, "learning_rate": 0.0005251314797371024, "loss": 0.1672, "step": 69460 }, { "epoch": 0.1231618673751905, "grad_norm": 0.1982421875, "learning_rate": 0.0005250856861365315, "loss": 0.15, "step": 69462 }, { "epoch": 0.12316541354050031, "grad_norm": 0.76953125, "learning_rate": 0.0005250398950503021, "loss": 0.1639, "step": 69464 }, { "epoch": 0.12316895970581013, "grad_norm": 0.53125, "learning_rate": 0.0005249941064786143, "loss": 0.1695, "step": 69466 }, { "epoch": 0.12317250587111994, "grad_norm": 0.671875, "learning_rate": 0.0005249483204216683, "loss": 0.2212, "step": 69468 }, { "epoch": 0.12317605203642976, "grad_norm": 0.2373046875, "learning_rate": 0.000524902536879664, "loss": 0.2994, "step": 69470 }, { "epoch": 0.12317959820173957, "grad_norm": 0.263671875, "learning_rate": 0.0005248567558528023, "loss": 0.1426, "step": 69472 }, { "epoch": 0.12318314436704939, "grad_norm": 0.2119140625, "learning_rate": 0.000524810977341283, "loss": 0.2157, "step": 69474 }, { "epoch": 0.1231866905323592, "grad_norm": 3.375, "learning_rate": 0.0005247652013453064, "loss": 0.4373, "step": 69476 }, { "epoch": 0.12319023669766901, "grad_norm": 0.271484375, "learning_rate": 0.0005247194278650727, "loss": 0.2591, "step": 69478 }, { "epoch": 0.12319378286297883, "grad_norm": 0.322265625, "learning_rate": 0.0005246736569007816, "loss": 0.184, "step": 69480 }, { "epoch": 0.12319732902828864, "grad_norm": 0.5234375, "learning_rate": 0.0005246278884526342, "loss": 0.1965, "step": 69482 }, { "epoch": 0.12320087519359846, "grad_norm": 0.271484375, "learning_rate": 0.0005245821225208301, "loss": 0.1677, "step": 69484 }, { "epoch": 0.12320442135890827, "grad_norm": 0.25390625, "learning_rate": 0.0005245363591055696, "loss": 0.1323, "step": 69486 }, { "epoch": 0.12320796752421809, "grad_norm": 0.87109375, "learning_rate": 0.0005244905982070524, "loss": 0.1586, "step": 69488 }, { "epoch": 0.1232115136895279, "grad_norm": 0.302734375, "learning_rate": 0.0005244448398254796, "loss": 0.1502, "step": 69490 }, { "epoch": 0.12321505985483772, "grad_norm": 3.296875, "learning_rate": 0.0005243990839610506, "loss": 0.3292, "step": 69492 }, { "epoch": 0.12321860602014754, "grad_norm": 0.2431640625, "learning_rate": 0.0005243533306139656, "loss": 0.3189, "step": 69494 }, { "epoch": 0.12322215218545736, "grad_norm": 1.90625, "learning_rate": 0.0005243075797844248, "loss": 0.1731, "step": 69496 }, { "epoch": 0.12322569835076717, "grad_norm": 0.734375, "learning_rate": 0.000524261831472628, "loss": 0.1759, "step": 69498 }, { "epoch": 0.12322924451607699, "grad_norm": 1.359375, "learning_rate": 0.000524216085678776, "loss": 0.268, "step": 69500 }, { "epoch": 0.1232327906813868, "grad_norm": 2.46875, "learning_rate": 0.0005241703424030678, "loss": 0.2244, "step": 69502 }, { "epoch": 0.12323633684669662, "grad_norm": 0.8125, "learning_rate": 0.0005241246016457044, "loss": 0.187, "step": 69504 }, { "epoch": 0.12323988301200643, "grad_norm": 0.337890625, "learning_rate": 0.0005240788634068851, "loss": 0.1714, "step": 69506 }, { "epoch": 0.12324342917731625, "grad_norm": 0.50390625, "learning_rate": 0.0005240331276868111, "loss": 0.1936, "step": 69508 }, { "epoch": 0.12324697534262606, "grad_norm": 0.416015625, "learning_rate": 0.0005239873944856807, "loss": 0.1531, "step": 69510 }, { "epoch": 0.12325052150793588, "grad_norm": 0.734375, "learning_rate": 0.0005239416638036956, "loss": 0.1968, "step": 69512 }, { "epoch": 0.12325406767324569, "grad_norm": 1.8515625, "learning_rate": 0.0005238959356410548, "loss": 0.3637, "step": 69514 }, { "epoch": 0.1232576138385555, "grad_norm": 0.1552734375, "learning_rate": 0.0005238502099979586, "loss": 0.16, "step": 69516 }, { "epoch": 0.12326116000386532, "grad_norm": 0.482421875, "learning_rate": 0.0005238044868746068, "loss": 0.1431, "step": 69518 }, { "epoch": 0.12326470616917513, "grad_norm": 0.68359375, "learning_rate": 0.0005237587662711992, "loss": 0.2089, "step": 69520 }, { "epoch": 0.12326825233448495, "grad_norm": 0.59375, "learning_rate": 0.0005237130481879364, "loss": 0.1429, "step": 69522 }, { "epoch": 0.12327179849979476, "grad_norm": 0.310546875, "learning_rate": 0.000523667332625018, "loss": 0.2683, "step": 69524 }, { "epoch": 0.12327534466510458, "grad_norm": 0.69140625, "learning_rate": 0.000523621619582644, "loss": 0.2005, "step": 69526 }, { "epoch": 0.12327889083041439, "grad_norm": 0.1982421875, "learning_rate": 0.0005235759090610137, "loss": 0.1754, "step": 69528 }, { "epoch": 0.1232824369957242, "grad_norm": 0.498046875, "learning_rate": 0.000523530201060328, "loss": 0.1511, "step": 69530 }, { "epoch": 0.12328598316103402, "grad_norm": 0.3671875, "learning_rate": 0.0005234844955807862, "loss": 0.2488, "step": 69532 }, { "epoch": 0.12328952932634384, "grad_norm": 0.443359375, "learning_rate": 0.0005234387926225885, "loss": 0.1507, "step": 69534 }, { "epoch": 0.12329307549165365, "grad_norm": 0.62109375, "learning_rate": 0.0005233930921859347, "loss": 0.2658, "step": 69536 }, { "epoch": 0.12329662165696348, "grad_norm": 0.30859375, "learning_rate": 0.0005233473942710241, "loss": 0.1566, "step": 69538 }, { "epoch": 0.1233001678222733, "grad_norm": 0.3046875, "learning_rate": 0.0005233016988780576, "loss": 0.1333, "step": 69540 }, { "epoch": 0.12330371398758311, "grad_norm": 0.765625, "learning_rate": 0.000523256006007234, "loss": 0.1654, "step": 69542 }, { "epoch": 0.12330726015289292, "grad_norm": 0.86328125, "learning_rate": 0.0005232103156587539, "loss": 0.1972, "step": 69544 }, { "epoch": 0.12331080631820274, "grad_norm": 0.466796875, "learning_rate": 0.0005231646278328165, "loss": 0.2073, "step": 69546 }, { "epoch": 0.12331435248351255, "grad_norm": 0.2373046875, "learning_rate": 0.0005231189425296225, "loss": 0.1797, "step": 69548 }, { "epoch": 0.12331789864882237, "grad_norm": 1.140625, "learning_rate": 0.0005230732597493705, "loss": 0.1782, "step": 69550 }, { "epoch": 0.12332144481413218, "grad_norm": 0.53515625, "learning_rate": 0.0005230275794922615, "loss": 0.1432, "step": 69552 }, { "epoch": 0.123324990979442, "grad_norm": 0.51171875, "learning_rate": 0.0005229819017584946, "loss": 0.1604, "step": 69554 }, { "epoch": 0.12332853714475181, "grad_norm": 0.341796875, "learning_rate": 0.0005229362265482693, "loss": 0.1868, "step": 69556 }, { "epoch": 0.12333208331006162, "grad_norm": 0.39453125, "learning_rate": 0.0005228905538617865, "loss": 0.1609, "step": 69558 }, { "epoch": 0.12333562947537144, "grad_norm": 0.2470703125, "learning_rate": 0.0005228448836992446, "loss": 0.1962, "step": 69560 }, { "epoch": 0.12333917564068125, "grad_norm": 0.75, "learning_rate": 0.0005227992160608441, "loss": 0.3216, "step": 69562 }, { "epoch": 0.12334272180599107, "grad_norm": 3.0, "learning_rate": 0.0005227535509467842, "loss": 0.2399, "step": 69564 }, { "epoch": 0.12334626797130088, "grad_norm": 0.87890625, "learning_rate": 0.0005227078883572658, "loss": 0.2086, "step": 69566 }, { "epoch": 0.1233498141366107, "grad_norm": 0.6875, "learning_rate": 0.0005226622282924869, "loss": 0.2291, "step": 69568 }, { "epoch": 0.12335336030192051, "grad_norm": 0.369140625, "learning_rate": 0.0005226165707526485, "loss": 0.1638, "step": 69570 }, { "epoch": 0.12335690646723033, "grad_norm": 1.09375, "learning_rate": 0.0005225709157379499, "loss": 0.1585, "step": 69572 }, { "epoch": 0.12336045263254014, "grad_norm": 0.37890625, "learning_rate": 0.0005225252632485906, "loss": 0.3071, "step": 69574 }, { "epoch": 0.12336399879784996, "grad_norm": 1.3359375, "learning_rate": 0.0005224796132847705, "loss": 0.1659, "step": 69576 }, { "epoch": 0.12336754496315977, "grad_norm": 0.4765625, "learning_rate": 0.0005224339658466885, "loss": 0.1846, "step": 69578 }, { "epoch": 0.12337109112846958, "grad_norm": 0.431640625, "learning_rate": 0.0005223883209345455, "loss": 0.2015, "step": 69580 }, { "epoch": 0.1233746372937794, "grad_norm": 0.1806640625, "learning_rate": 0.0005223426785485402, "loss": 0.2045, "step": 69582 }, { "epoch": 0.12337818345908923, "grad_norm": 0.42578125, "learning_rate": 0.0005222970386888725, "loss": 0.1986, "step": 69584 }, { "epoch": 0.12338172962439904, "grad_norm": 0.2578125, "learning_rate": 0.0005222514013557418, "loss": 0.1838, "step": 69586 }, { "epoch": 0.12338527578970886, "grad_norm": 0.3984375, "learning_rate": 0.0005222057665493482, "loss": 0.1656, "step": 69588 }, { "epoch": 0.12338882195501867, "grad_norm": 1.25, "learning_rate": 0.0005221601342698908, "loss": 0.2192, "step": 69590 }, { "epoch": 0.12339236812032849, "grad_norm": 0.466796875, "learning_rate": 0.0005221145045175695, "loss": 0.1561, "step": 69592 }, { "epoch": 0.1233959142856383, "grad_norm": 1.078125, "learning_rate": 0.0005220688772925834, "loss": 0.1642, "step": 69594 }, { "epoch": 0.12339946045094811, "grad_norm": 0.275390625, "learning_rate": 0.0005220232525951324, "loss": 0.1693, "step": 69596 }, { "epoch": 0.12340300661625793, "grad_norm": 0.365234375, "learning_rate": 0.0005219776304254165, "loss": 0.1784, "step": 69598 }, { "epoch": 0.12340655278156774, "grad_norm": 2.59375, "learning_rate": 0.0005219320107836339, "loss": 0.1762, "step": 69600 }, { "epoch": 0.12341009894687756, "grad_norm": 0.322265625, "learning_rate": 0.0005218863936699852, "loss": 0.1927, "step": 69602 }, { "epoch": 0.12341364511218737, "grad_norm": 0.25390625, "learning_rate": 0.0005218407790846695, "loss": 0.252, "step": 69604 }, { "epoch": 0.12341719127749719, "grad_norm": 0.2080078125, "learning_rate": 0.0005217951670278868, "loss": 0.1833, "step": 69606 }, { "epoch": 0.123420737442807, "grad_norm": 0.244140625, "learning_rate": 0.0005217495574998356, "loss": 0.1917, "step": 69608 }, { "epoch": 0.12342428360811682, "grad_norm": 1.265625, "learning_rate": 0.0005217039505007164, "loss": 0.2087, "step": 69610 }, { "epoch": 0.12342782977342663, "grad_norm": 0.2314453125, "learning_rate": 0.0005216583460307282, "loss": 0.1872, "step": 69612 }, { "epoch": 0.12343137593873645, "grad_norm": 0.419921875, "learning_rate": 0.0005216127440900702, "loss": 0.1628, "step": 69614 }, { "epoch": 0.12343492210404626, "grad_norm": 0.353515625, "learning_rate": 0.0005215671446789422, "loss": 0.1541, "step": 69616 }, { "epoch": 0.12343846826935607, "grad_norm": 0.451171875, "learning_rate": 0.0005215215477975435, "loss": 0.1726, "step": 69618 }, { "epoch": 0.12344201443466589, "grad_norm": 0.322265625, "learning_rate": 0.0005214759534460735, "loss": 0.2101, "step": 69620 }, { "epoch": 0.1234455605999757, "grad_norm": 0.34375, "learning_rate": 0.0005214303616247318, "loss": 0.1009, "step": 69622 }, { "epoch": 0.12344910676528552, "grad_norm": 0.4609375, "learning_rate": 0.0005213847723337175, "loss": 0.1222, "step": 69624 }, { "epoch": 0.12345265293059533, "grad_norm": 0.3125, "learning_rate": 0.0005213391855732301, "loss": 0.1853, "step": 69626 }, { "epoch": 0.12345619909590515, "grad_norm": 0.193359375, "learning_rate": 0.0005212936013434688, "loss": 0.1645, "step": 69628 }, { "epoch": 0.12345974526121498, "grad_norm": 0.46875, "learning_rate": 0.0005212480196446337, "loss": 0.1922, "step": 69630 }, { "epoch": 0.12346329142652479, "grad_norm": 0.1826171875, "learning_rate": 0.0005212024404769229, "loss": 0.1815, "step": 69632 }, { "epoch": 0.1234668375918346, "grad_norm": 0.337890625, "learning_rate": 0.0005211568638405368, "loss": 0.1429, "step": 69634 }, { "epoch": 0.12347038375714442, "grad_norm": 0.2490234375, "learning_rate": 0.000521111289735674, "loss": 0.2071, "step": 69636 }, { "epoch": 0.12347392992245423, "grad_norm": 0.298828125, "learning_rate": 0.0005210657181625345, "loss": 0.1602, "step": 69638 }, { "epoch": 0.12347747608776405, "grad_norm": 0.62890625, "learning_rate": 0.0005210201491213174, "loss": 0.1662, "step": 69640 }, { "epoch": 0.12348102225307386, "grad_norm": 0.2578125, "learning_rate": 0.0005209745826122216, "loss": 0.2946, "step": 69642 }, { "epoch": 0.12348456841838368, "grad_norm": 0.8203125, "learning_rate": 0.0005209290186354468, "loss": 0.1736, "step": 69644 }, { "epoch": 0.12348811458369349, "grad_norm": 0.6640625, "learning_rate": 0.0005208834571911917, "loss": 0.2249, "step": 69646 }, { "epoch": 0.1234916607490033, "grad_norm": 0.8046875, "learning_rate": 0.0005208378982796566, "loss": 0.2399, "step": 69648 }, { "epoch": 0.12349520691431312, "grad_norm": 1.59375, "learning_rate": 0.0005207923419010397, "loss": 0.2231, "step": 69650 }, { "epoch": 0.12349875307962294, "grad_norm": 0.4453125, "learning_rate": 0.0005207467880555407, "loss": 0.202, "step": 69652 }, { "epoch": 0.12350229924493275, "grad_norm": 0.404296875, "learning_rate": 0.0005207012367433586, "loss": 0.1537, "step": 69654 }, { "epoch": 0.12350584541024257, "grad_norm": 3.34375, "learning_rate": 0.0005206556879646933, "loss": 0.2343, "step": 69656 }, { "epoch": 0.12350939157555238, "grad_norm": 0.80859375, "learning_rate": 0.0005206101417197428, "loss": 0.1833, "step": 69658 }, { "epoch": 0.1235129377408622, "grad_norm": 0.55859375, "learning_rate": 0.0005205645980087077, "loss": 0.1821, "step": 69660 }, { "epoch": 0.12351648390617201, "grad_norm": 0.55859375, "learning_rate": 0.0005205190568317861, "loss": 0.312, "step": 69662 }, { "epoch": 0.12352003007148182, "grad_norm": 0.25390625, "learning_rate": 0.0005204735181891777, "loss": 0.2184, "step": 69664 }, { "epoch": 0.12352357623679164, "grad_norm": 0.2314453125, "learning_rate": 0.0005204279820810815, "loss": 0.168, "step": 69666 }, { "epoch": 0.12352712240210145, "grad_norm": 0.31640625, "learning_rate": 0.0005203824485076963, "loss": 0.1559, "step": 69668 }, { "epoch": 0.12353066856741127, "grad_norm": 0.43359375, "learning_rate": 0.0005203369174692218, "loss": 0.1533, "step": 69670 }, { "epoch": 0.12353421473272108, "grad_norm": 0.28125, "learning_rate": 0.0005202913889658571, "loss": 0.1686, "step": 69672 }, { "epoch": 0.12353776089803091, "grad_norm": 4.0, "learning_rate": 0.0005202458629978011, "loss": 0.2964, "step": 69674 }, { "epoch": 0.12354130706334072, "grad_norm": 0.578125, "learning_rate": 0.0005202003395652526, "loss": 0.1979, "step": 69676 }, { "epoch": 0.12354485322865054, "grad_norm": 1.0234375, "learning_rate": 0.0005201548186684113, "loss": 0.2993, "step": 69678 }, { "epoch": 0.12354839939396035, "grad_norm": 0.255859375, "learning_rate": 0.000520109300307476, "loss": 0.2264, "step": 69680 }, { "epoch": 0.12355194555927017, "grad_norm": 0.85546875, "learning_rate": 0.0005200637844826457, "loss": 0.1633, "step": 69682 }, { "epoch": 0.12355549172457998, "grad_norm": 0.369140625, "learning_rate": 0.0005200182711941197, "loss": 0.2149, "step": 69684 }, { "epoch": 0.1235590378898898, "grad_norm": 0.412109375, "learning_rate": 0.0005199727604420965, "loss": 0.1384, "step": 69686 }, { "epoch": 0.12356258405519961, "grad_norm": 0.6015625, "learning_rate": 0.0005199272522267761, "loss": 0.1632, "step": 69688 }, { "epoch": 0.12356613022050943, "grad_norm": 1.953125, "learning_rate": 0.0005198817465483565, "loss": 0.1766, "step": 69690 }, { "epoch": 0.12356967638581924, "grad_norm": 0.345703125, "learning_rate": 0.0005198362434070373, "loss": 0.1678, "step": 69692 }, { "epoch": 0.12357322255112906, "grad_norm": 0.287109375, "learning_rate": 0.0005197907428030172, "loss": 0.1978, "step": 69694 }, { "epoch": 0.12357676871643887, "grad_norm": 0.72265625, "learning_rate": 0.0005197452447364959, "loss": 0.1808, "step": 69696 }, { "epoch": 0.12358031488174868, "grad_norm": 0.390625, "learning_rate": 0.0005196997492076714, "loss": 0.13, "step": 69698 }, { "epoch": 0.1235838610470585, "grad_norm": 0.27734375, "learning_rate": 0.0005196542562167433, "loss": 0.1916, "step": 69700 }, { "epoch": 0.12358740721236831, "grad_norm": 0.404296875, "learning_rate": 0.0005196087657639101, "loss": 0.2171, "step": 69702 }, { "epoch": 0.12359095337767813, "grad_norm": 0.376953125, "learning_rate": 0.0005195632778493716, "loss": 0.1599, "step": 69704 }, { "epoch": 0.12359449954298794, "grad_norm": 0.72265625, "learning_rate": 0.0005195177924733256, "loss": 0.165, "step": 69706 }, { "epoch": 0.12359804570829776, "grad_norm": 0.2138671875, "learning_rate": 0.0005194723096359716, "loss": 0.1511, "step": 69708 }, { "epoch": 0.12360159187360757, "grad_norm": 1.5078125, "learning_rate": 0.0005194268293375086, "loss": 0.2528, "step": 69710 }, { "epoch": 0.12360513803891739, "grad_norm": 0.46484375, "learning_rate": 0.0005193813515781358, "loss": 0.1643, "step": 69712 }, { "epoch": 0.1236086842042272, "grad_norm": 0.380859375, "learning_rate": 0.0005193358763580513, "loss": 0.1639, "step": 69714 }, { "epoch": 0.12361223036953702, "grad_norm": 0.9765625, "learning_rate": 0.0005192904036774543, "loss": 0.1469, "step": 69716 }, { "epoch": 0.12361577653484683, "grad_norm": 0.2158203125, "learning_rate": 0.000519244933536544, "loss": 0.208, "step": 69718 }, { "epoch": 0.12361932270015666, "grad_norm": 0.306640625, "learning_rate": 0.000519199465935519, "loss": 0.1611, "step": 69720 }, { "epoch": 0.12362286886546647, "grad_norm": 0.3125, "learning_rate": 0.0005191540008745783, "loss": 0.2094, "step": 69722 }, { "epoch": 0.12362641503077629, "grad_norm": 0.208984375, "learning_rate": 0.0005191085383539205, "loss": 0.1682, "step": 69724 }, { "epoch": 0.1236299611960861, "grad_norm": 0.27734375, "learning_rate": 0.0005190630783737442, "loss": 0.1417, "step": 69726 }, { "epoch": 0.12363350736139592, "grad_norm": 0.482421875, "learning_rate": 0.0005190176209342489, "loss": 0.2721, "step": 69728 }, { "epoch": 0.12363705352670573, "grad_norm": 0.365234375, "learning_rate": 0.000518972166035633, "loss": 0.1498, "step": 69730 }, { "epoch": 0.12364059969201555, "grad_norm": 0.69140625, "learning_rate": 0.0005189267136780953, "loss": 0.2361, "step": 69732 }, { "epoch": 0.12364414585732536, "grad_norm": 0.52734375, "learning_rate": 0.0005188812638618344, "loss": 0.1673, "step": 69734 }, { "epoch": 0.12364769202263518, "grad_norm": 0.408203125, "learning_rate": 0.0005188358165870496, "loss": 0.2082, "step": 69736 }, { "epoch": 0.12365123818794499, "grad_norm": 0.38671875, "learning_rate": 0.0005187903718539394, "loss": 0.1078, "step": 69738 }, { "epoch": 0.1236547843532548, "grad_norm": 0.212890625, "learning_rate": 0.0005187449296627025, "loss": 0.1528, "step": 69740 }, { "epoch": 0.12365833051856462, "grad_norm": 0.7421875, "learning_rate": 0.0005186994900135375, "loss": 0.1389, "step": 69742 }, { "epoch": 0.12366187668387443, "grad_norm": 0.2080078125, "learning_rate": 0.0005186540529066432, "loss": 0.134, "step": 69744 }, { "epoch": 0.12366542284918425, "grad_norm": 0.353515625, "learning_rate": 0.0005186086183422187, "loss": 0.1918, "step": 69746 }, { "epoch": 0.12366896901449406, "grad_norm": 0.86328125, "learning_rate": 0.0005185631863204621, "loss": 0.1481, "step": 69748 }, { "epoch": 0.12367251517980388, "grad_norm": 0.28515625, "learning_rate": 0.0005185177568415724, "loss": 0.1765, "step": 69750 }, { "epoch": 0.12367606134511369, "grad_norm": 3.78125, "learning_rate": 0.0005184723299057482, "loss": 0.1906, "step": 69752 }, { "epoch": 0.1236796075104235, "grad_norm": 0.24609375, "learning_rate": 0.0005184269055131888, "loss": 0.163, "step": 69754 }, { "epoch": 0.12368315367573332, "grad_norm": 0.431640625, "learning_rate": 0.0005183814836640916, "loss": 0.2061, "step": 69756 }, { "epoch": 0.12368669984104314, "grad_norm": 0.33203125, "learning_rate": 0.0005183360643586563, "loss": 0.1623, "step": 69758 }, { "epoch": 0.12369024600635295, "grad_norm": 1.046875, "learning_rate": 0.0005182906475970812, "loss": 0.2704, "step": 69760 }, { "epoch": 0.12369379217166276, "grad_norm": 0.26953125, "learning_rate": 0.0005182452333795649, "loss": 0.1355, "step": 69762 }, { "epoch": 0.12369733833697258, "grad_norm": 0.337890625, "learning_rate": 0.000518199821706306, "loss": 0.1799, "step": 69764 }, { "epoch": 0.12370088450228241, "grad_norm": 0.5234375, "learning_rate": 0.0005181544125775027, "loss": 0.2004, "step": 69766 }, { "epoch": 0.12370443066759222, "grad_norm": 0.296875, "learning_rate": 0.0005181090059933546, "loss": 0.1412, "step": 69768 }, { "epoch": 0.12370797683290204, "grad_norm": 0.8046875, "learning_rate": 0.0005180636019540594, "loss": 0.1488, "step": 69770 }, { "epoch": 0.12371152299821185, "grad_norm": 0.453125, "learning_rate": 0.0005180182004598161, "loss": 0.1451, "step": 69772 }, { "epoch": 0.12371506916352167, "grad_norm": 0.1787109375, "learning_rate": 0.0005179728015108229, "loss": 0.2414, "step": 69774 }, { "epoch": 0.12371861532883148, "grad_norm": 0.416015625, "learning_rate": 0.000517927405107279, "loss": 0.1597, "step": 69776 }, { "epoch": 0.1237221614941413, "grad_norm": 0.287109375, "learning_rate": 0.0005178820112493823, "loss": 0.269, "step": 69778 }, { "epoch": 0.12372570765945111, "grad_norm": 0.439453125, "learning_rate": 0.0005178366199373316, "loss": 0.1986, "step": 69780 }, { "epoch": 0.12372925382476092, "grad_norm": 0.369140625, "learning_rate": 0.0005177912311713255, "loss": 0.1755, "step": 69782 }, { "epoch": 0.12373279999007074, "grad_norm": 0.29296875, "learning_rate": 0.000517745844951562, "loss": 0.1934, "step": 69784 }, { "epoch": 0.12373634615538055, "grad_norm": 4.875, "learning_rate": 0.0005177004612782406, "loss": 0.5657, "step": 69786 }, { "epoch": 0.12373989232069037, "grad_norm": 4.65625, "learning_rate": 0.0005176550801515585, "loss": 0.3475, "step": 69788 }, { "epoch": 0.12374343848600018, "grad_norm": 1.71875, "learning_rate": 0.0005176097015717149, "loss": 0.2036, "step": 69790 }, { "epoch": 0.12374698465131, "grad_norm": 0.6640625, "learning_rate": 0.0005175643255389084, "loss": 0.1629, "step": 69792 }, { "epoch": 0.12375053081661981, "grad_norm": 0.255859375, "learning_rate": 0.0005175189520533372, "loss": 0.1701, "step": 69794 }, { "epoch": 0.12375407698192963, "grad_norm": 0.486328125, "learning_rate": 0.0005174735811151998, "loss": 0.2048, "step": 69796 }, { "epoch": 0.12375762314723944, "grad_norm": 0.421875, "learning_rate": 0.0005174282127246941, "loss": 0.1843, "step": 69798 }, { "epoch": 0.12376116931254925, "grad_norm": 0.3515625, "learning_rate": 0.0005173828468820194, "loss": 0.1267, "step": 69800 }, { "epoch": 0.12376471547785907, "grad_norm": 0.98046875, "learning_rate": 0.0005173374835873738, "loss": 0.2679, "step": 69802 }, { "epoch": 0.12376826164316888, "grad_norm": 0.2890625, "learning_rate": 0.0005172921228409555, "loss": 0.1681, "step": 69804 }, { "epoch": 0.1237718078084787, "grad_norm": 0.416015625, "learning_rate": 0.0005172467646429626, "loss": 0.327, "step": 69806 }, { "epoch": 0.12377535397378851, "grad_norm": 0.404296875, "learning_rate": 0.0005172014089935943, "loss": 0.162, "step": 69808 }, { "epoch": 0.12377890013909834, "grad_norm": 0.314453125, "learning_rate": 0.0005171560558930484, "loss": 0.166, "step": 69810 }, { "epoch": 0.12378244630440816, "grad_norm": 0.61328125, "learning_rate": 0.0005171107053415233, "loss": 0.2266, "step": 69812 }, { "epoch": 0.12378599246971797, "grad_norm": 0.2001953125, "learning_rate": 0.0005170653573392177, "loss": 0.1313, "step": 69814 }, { "epoch": 0.12378953863502778, "grad_norm": 1.6953125, "learning_rate": 0.000517020011886329, "loss": 0.1885, "step": 69816 }, { "epoch": 0.1237930848003376, "grad_norm": 0.283203125, "learning_rate": 0.0005169746689830566, "loss": 0.3819, "step": 69818 }, { "epoch": 0.12379663096564741, "grad_norm": 2.171875, "learning_rate": 0.0005169293286295983, "loss": 0.2101, "step": 69820 }, { "epoch": 0.12380017713095723, "grad_norm": 0.2890625, "learning_rate": 0.0005168839908261525, "loss": 0.1411, "step": 69822 }, { "epoch": 0.12380372329626704, "grad_norm": 0.419921875, "learning_rate": 0.000516838655572917, "loss": 0.1752, "step": 69824 }, { "epoch": 0.12380726946157686, "grad_norm": 0.6484375, "learning_rate": 0.0005167933228700908, "loss": 0.2092, "step": 69826 }, { "epoch": 0.12381081562688667, "grad_norm": 0.333984375, "learning_rate": 0.0005167479927178718, "loss": 0.132, "step": 69828 }, { "epoch": 0.12381436179219649, "grad_norm": 0.2578125, "learning_rate": 0.0005167026651164584, "loss": 0.1523, "step": 69830 }, { "epoch": 0.1238179079575063, "grad_norm": 0.66796875, "learning_rate": 0.0005166573400660486, "loss": 0.189, "step": 69832 }, { "epoch": 0.12382145412281612, "grad_norm": 0.458984375, "learning_rate": 0.0005166120175668405, "loss": 0.1386, "step": 69834 }, { "epoch": 0.12382500028812593, "grad_norm": 0.33203125, "learning_rate": 0.0005165666976190333, "loss": 0.1756, "step": 69836 }, { "epoch": 0.12382854645343574, "grad_norm": 0.216796875, "learning_rate": 0.0005165213802228236, "loss": 0.1663, "step": 69838 }, { "epoch": 0.12383209261874556, "grad_norm": 0.60546875, "learning_rate": 0.0005164760653784109, "loss": 0.1668, "step": 69840 }, { "epoch": 0.12383563878405537, "grad_norm": 0.58203125, "learning_rate": 0.0005164307530859927, "loss": 0.1918, "step": 69842 }, { "epoch": 0.12383918494936519, "grad_norm": 0.59765625, "learning_rate": 0.0005163854433457678, "loss": 0.1763, "step": 69844 }, { "epoch": 0.123842731114675, "grad_norm": 0.287109375, "learning_rate": 0.0005163401361579336, "loss": 0.1365, "step": 69846 }, { "epoch": 0.12384627727998482, "grad_norm": 0.1533203125, "learning_rate": 0.0005162948315226887, "loss": 0.1309, "step": 69848 }, { "epoch": 0.12384982344529463, "grad_norm": 0.37890625, "learning_rate": 0.0005162495294402314, "loss": 0.3463, "step": 69850 }, { "epoch": 0.12385336961060445, "grad_norm": 0.4140625, "learning_rate": 0.0005162042299107594, "loss": 0.1682, "step": 69852 }, { "epoch": 0.12385691577591426, "grad_norm": 0.43359375, "learning_rate": 0.0005161589329344712, "loss": 0.171, "step": 69854 }, { "epoch": 0.12386046194122409, "grad_norm": 0.439453125, "learning_rate": 0.000516113638511564, "loss": 0.1633, "step": 69856 }, { "epoch": 0.1238640081065339, "grad_norm": 0.232421875, "learning_rate": 0.0005160683466422372, "loss": 0.1633, "step": 69858 }, { "epoch": 0.12386755427184372, "grad_norm": 0.64453125, "learning_rate": 0.0005160230573266882, "loss": 0.1498, "step": 69860 }, { "epoch": 0.12387110043715353, "grad_norm": 0.65234375, "learning_rate": 0.000515977770565115, "loss": 0.1396, "step": 69862 }, { "epoch": 0.12387464660246335, "grad_norm": 0.37109375, "learning_rate": 0.0005159324863577156, "loss": 0.1994, "step": 69864 }, { "epoch": 0.12387819276777316, "grad_norm": 0.703125, "learning_rate": 0.0005158872047046887, "loss": 0.1629, "step": 69866 }, { "epoch": 0.12388173893308298, "grad_norm": 0.5, "learning_rate": 0.0005158419256062317, "loss": 0.2232, "step": 69868 }, { "epoch": 0.12388528509839279, "grad_norm": 0.68359375, "learning_rate": 0.0005157966490625428, "loss": 0.1465, "step": 69870 }, { "epoch": 0.1238888312637026, "grad_norm": 0.416015625, "learning_rate": 0.0005157513750738198, "loss": 0.141, "step": 69872 }, { "epoch": 0.12389237742901242, "grad_norm": 0.291015625, "learning_rate": 0.0005157061036402611, "loss": 0.2082, "step": 69874 }, { "epoch": 0.12389592359432224, "grad_norm": 0.298828125, "learning_rate": 0.0005156608347620647, "loss": 0.191, "step": 69876 }, { "epoch": 0.12389946975963205, "grad_norm": 0.333984375, "learning_rate": 0.0005156155684394279, "loss": 0.1988, "step": 69878 }, { "epoch": 0.12390301592494186, "grad_norm": 0.2578125, "learning_rate": 0.0005155703046725495, "loss": 0.1692, "step": 69880 }, { "epoch": 0.12390656209025168, "grad_norm": 0.451171875, "learning_rate": 0.0005155250434616267, "loss": 0.1827, "step": 69882 }, { "epoch": 0.1239101082555615, "grad_norm": 0.45703125, "learning_rate": 0.0005154797848068586, "loss": 0.1769, "step": 69884 }, { "epoch": 0.12391365442087131, "grad_norm": 0.345703125, "learning_rate": 0.0005154345287084418, "loss": 0.1778, "step": 69886 }, { "epoch": 0.12391720058618112, "grad_norm": 0.53515625, "learning_rate": 0.000515389275166575, "loss": 0.1658, "step": 69888 }, { "epoch": 0.12392074675149094, "grad_norm": 1.0078125, "learning_rate": 0.000515344024181456, "loss": 0.1982, "step": 69890 }, { "epoch": 0.12392429291680075, "grad_norm": 0.283203125, "learning_rate": 0.0005152987757532823, "loss": 0.2951, "step": 69892 }, { "epoch": 0.12392783908211057, "grad_norm": 0.69140625, "learning_rate": 0.0005152535298822527, "loss": 0.1916, "step": 69894 }, { "epoch": 0.12393138524742038, "grad_norm": 0.2373046875, "learning_rate": 0.0005152082865685639, "loss": 0.17, "step": 69896 }, { "epoch": 0.1239349314127302, "grad_norm": 0.703125, "learning_rate": 0.0005151630458124147, "loss": 0.1569, "step": 69898 }, { "epoch": 0.12393847757804001, "grad_norm": 0.373046875, "learning_rate": 0.0005151178076140024, "loss": 0.1743, "step": 69900 }, { "epoch": 0.12394202374334984, "grad_norm": 0.416015625, "learning_rate": 0.0005150725719735255, "loss": 0.1641, "step": 69902 }, { "epoch": 0.12394556990865965, "grad_norm": 0.439453125, "learning_rate": 0.0005150273388911809, "loss": 0.2044, "step": 69904 }, { "epoch": 0.12394911607396947, "grad_norm": 0.345703125, "learning_rate": 0.0005149821083671673, "loss": 0.2018, "step": 69906 }, { "epoch": 0.12395266223927928, "grad_norm": 0.6484375, "learning_rate": 0.000514936880401682, "loss": 0.1661, "step": 69908 }, { "epoch": 0.1239562084045891, "grad_norm": 2.03125, "learning_rate": 0.000514891654994923, "loss": 0.3958, "step": 69910 }, { "epoch": 0.12395975456989891, "grad_norm": 0.62890625, "learning_rate": 0.0005148464321470881, "loss": 0.2603, "step": 69912 }, { "epoch": 0.12396330073520873, "grad_norm": 0.44140625, "learning_rate": 0.0005148012118583744, "loss": 0.2532, "step": 69914 }, { "epoch": 0.12396684690051854, "grad_norm": 1.8125, "learning_rate": 0.0005147559941289809, "loss": 0.2095, "step": 69916 }, { "epoch": 0.12397039306582835, "grad_norm": 0.60546875, "learning_rate": 0.0005147107789591048, "loss": 0.2757, "step": 69918 }, { "epoch": 0.12397393923113817, "grad_norm": 0.373046875, "learning_rate": 0.0005146655663489435, "loss": 0.1634, "step": 69920 }, { "epoch": 0.12397748539644798, "grad_norm": 2.0625, "learning_rate": 0.0005146203562986949, "loss": 0.1665, "step": 69922 }, { "epoch": 0.1239810315617578, "grad_norm": 0.27734375, "learning_rate": 0.000514575148808557, "loss": 0.1612, "step": 69924 }, { "epoch": 0.12398457772706761, "grad_norm": 0.61328125, "learning_rate": 0.0005145299438787275, "loss": 0.1863, "step": 69926 }, { "epoch": 0.12398812389237743, "grad_norm": 0.302734375, "learning_rate": 0.0005144847415094038, "loss": 0.1967, "step": 69928 }, { "epoch": 0.12399167005768724, "grad_norm": 0.498046875, "learning_rate": 0.0005144395417007837, "loss": 0.2238, "step": 69930 }, { "epoch": 0.12399521622299706, "grad_norm": 0.7421875, "learning_rate": 0.0005143943444530648, "loss": 0.1906, "step": 69932 }, { "epoch": 0.12399876238830687, "grad_norm": 0.318359375, "learning_rate": 0.0005143491497664453, "loss": 0.1701, "step": 69934 }, { "epoch": 0.12400230855361669, "grad_norm": 0.353515625, "learning_rate": 0.0005143039576411219, "loss": 0.1492, "step": 69936 }, { "epoch": 0.1240058547189265, "grad_norm": 1.6640625, "learning_rate": 0.0005142587680772931, "loss": 0.2272, "step": 69938 }, { "epoch": 0.12400940088423631, "grad_norm": 4.78125, "learning_rate": 0.0005142135810751558, "loss": 0.3211, "step": 69940 }, { "epoch": 0.12401294704954613, "grad_norm": 0.5078125, "learning_rate": 0.0005141683966349088, "loss": 0.1761, "step": 69942 }, { "epoch": 0.12401649321485594, "grad_norm": 0.18359375, "learning_rate": 0.0005141232147567483, "loss": 0.1679, "step": 69944 }, { "epoch": 0.12402003938016576, "grad_norm": 0.349609375, "learning_rate": 0.0005140780354408727, "loss": 0.1214, "step": 69946 }, { "epoch": 0.12402358554547559, "grad_norm": 0.64453125, "learning_rate": 0.0005140328586874796, "loss": 0.1968, "step": 69948 }, { "epoch": 0.1240271317107854, "grad_norm": 0.375, "learning_rate": 0.0005139876844967664, "loss": 0.1921, "step": 69950 }, { "epoch": 0.12403067787609522, "grad_norm": 0.51171875, "learning_rate": 0.0005139425128689306, "loss": 0.2016, "step": 69952 }, { "epoch": 0.12403422404140503, "grad_norm": 0.40234375, "learning_rate": 0.0005138973438041695, "loss": 0.1328, "step": 69954 }, { "epoch": 0.12403777020671485, "grad_norm": 0.314453125, "learning_rate": 0.0005138521773026812, "loss": 0.1506, "step": 69956 }, { "epoch": 0.12404131637202466, "grad_norm": 0.28125, "learning_rate": 0.0005138070133646631, "loss": 0.1528, "step": 69958 }, { "epoch": 0.12404486253733447, "grad_norm": 0.1689453125, "learning_rate": 0.0005137618519903127, "loss": 0.1858, "step": 69960 }, { "epoch": 0.12404840870264429, "grad_norm": 0.37109375, "learning_rate": 0.000513716693179827, "loss": 0.1545, "step": 69962 }, { "epoch": 0.1240519548679541, "grad_norm": 0.33984375, "learning_rate": 0.0005136715369334044, "loss": 0.209, "step": 69964 }, { "epoch": 0.12405550103326392, "grad_norm": 0.330078125, "learning_rate": 0.0005136263832512418, "loss": 0.1604, "step": 69966 }, { "epoch": 0.12405904719857373, "grad_norm": 0.419921875, "learning_rate": 0.0005135812321335367, "loss": 0.1826, "step": 69968 }, { "epoch": 0.12406259336388355, "grad_norm": 1.109375, "learning_rate": 0.0005135360835804869, "loss": 0.1845, "step": 69970 }, { "epoch": 0.12406613952919336, "grad_norm": 1.390625, "learning_rate": 0.0005134909375922892, "loss": 0.2474, "step": 69972 }, { "epoch": 0.12406968569450318, "grad_norm": 0.39453125, "learning_rate": 0.0005134457941691415, "loss": 0.1593, "step": 69974 }, { "epoch": 0.12407323185981299, "grad_norm": 1.6640625, "learning_rate": 0.0005134006533112415, "loss": 0.2436, "step": 69976 }, { "epoch": 0.1240767780251228, "grad_norm": 0.546875, "learning_rate": 0.0005133555150187863, "loss": 0.1629, "step": 69978 }, { "epoch": 0.12408032419043262, "grad_norm": 0.16796875, "learning_rate": 0.0005133103792919732, "loss": 0.1451, "step": 69980 }, { "epoch": 0.12408387035574243, "grad_norm": 0.474609375, "learning_rate": 0.0005132652461309994, "loss": 0.1804, "step": 69982 }, { "epoch": 0.12408741652105225, "grad_norm": 0.359375, "learning_rate": 0.0005132201155360631, "loss": 0.1495, "step": 69984 }, { "epoch": 0.12409096268636206, "grad_norm": 0.2001953125, "learning_rate": 0.0005131749875073609, "loss": 0.1795, "step": 69986 }, { "epoch": 0.12409450885167188, "grad_norm": 0.451171875, "learning_rate": 0.0005131298620450903, "loss": 0.2157, "step": 69988 }, { "epoch": 0.12409805501698169, "grad_norm": 0.89453125, "learning_rate": 0.0005130847391494489, "loss": 0.1677, "step": 69990 }, { "epoch": 0.12410160118229152, "grad_norm": 1.0234375, "learning_rate": 0.0005130396188206343, "loss": 0.2208, "step": 69992 }, { "epoch": 0.12410514734760134, "grad_norm": 0.9453125, "learning_rate": 0.0005129945010588431, "loss": 0.2987, "step": 69994 }, { "epoch": 0.12410869351291115, "grad_norm": 0.1767578125, "learning_rate": 0.000512949385864273, "loss": 0.1594, "step": 69996 }, { "epoch": 0.12411223967822096, "grad_norm": 0.50390625, "learning_rate": 0.0005129042732371214, "loss": 0.2044, "step": 69998 }, { "epoch": 0.12411578584353078, "grad_norm": 0.5234375, "learning_rate": 0.0005128591631775854, "loss": 0.1403, "step": 70000 }, { "epoch": 0.1241193320088406, "grad_norm": 0.53515625, "learning_rate": 0.0005128140556858625, "loss": 0.1604, "step": 70002 }, { "epoch": 0.12412287817415041, "grad_norm": 0.275390625, "learning_rate": 0.0005127689507621495, "loss": 0.1813, "step": 70004 }, { "epoch": 0.12412642433946022, "grad_norm": 0.28125, "learning_rate": 0.0005127238484066442, "loss": 0.1935, "step": 70006 }, { "epoch": 0.12412997050477004, "grad_norm": 0.484375, "learning_rate": 0.0005126787486195437, "loss": 0.1895, "step": 70008 }, { "epoch": 0.12413351667007985, "grad_norm": 1.7265625, "learning_rate": 0.0005126336514010453, "loss": 0.3208, "step": 70010 }, { "epoch": 0.12413706283538967, "grad_norm": 0.625, "learning_rate": 0.0005125885567513458, "loss": 0.1997, "step": 70012 }, { "epoch": 0.12414060900069948, "grad_norm": 0.330078125, "learning_rate": 0.000512543464670643, "loss": 0.1564, "step": 70014 }, { "epoch": 0.1241441551660093, "grad_norm": 0.62109375, "learning_rate": 0.0005124983751591339, "loss": 0.1748, "step": 70016 }, { "epoch": 0.12414770133131911, "grad_norm": 0.228515625, "learning_rate": 0.0005124532882170156, "loss": 0.1913, "step": 70018 }, { "epoch": 0.12415124749662892, "grad_norm": 2.734375, "learning_rate": 0.0005124082038444852, "loss": 0.2984, "step": 70020 }, { "epoch": 0.12415479366193874, "grad_norm": 0.55859375, "learning_rate": 0.0005123631220417399, "loss": 0.1818, "step": 70022 }, { "epoch": 0.12415833982724855, "grad_norm": 0.388671875, "learning_rate": 0.0005123180428089775, "loss": 0.1625, "step": 70024 }, { "epoch": 0.12416188599255837, "grad_norm": 0.23046875, "learning_rate": 0.000512272966146394, "loss": 0.1533, "step": 70026 }, { "epoch": 0.12416543215786818, "grad_norm": 0.97265625, "learning_rate": 0.0005122278920541877, "loss": 0.1515, "step": 70028 }, { "epoch": 0.124168978323178, "grad_norm": 1.546875, "learning_rate": 0.0005121828205325545, "loss": 0.4474, "step": 70030 }, { "epoch": 0.12417252448848781, "grad_norm": 0.64453125, "learning_rate": 0.000512137751581693, "loss": 0.1999, "step": 70032 }, { "epoch": 0.12417607065379763, "grad_norm": 0.359375, "learning_rate": 0.0005120926852017987, "loss": 0.1645, "step": 70034 }, { "epoch": 0.12417961681910744, "grad_norm": 0.51171875, "learning_rate": 0.00051204762139307, "loss": 0.1908, "step": 70036 }, { "epoch": 0.12418316298441727, "grad_norm": 0.203125, "learning_rate": 0.0005120025601557034, "loss": 0.1785, "step": 70038 }, { "epoch": 0.12418670914972708, "grad_norm": 2.03125, "learning_rate": 0.0005119575014898962, "loss": 0.4532, "step": 70040 }, { "epoch": 0.1241902553150369, "grad_norm": 0.318359375, "learning_rate": 0.0005119124453958452, "loss": 0.2188, "step": 70042 }, { "epoch": 0.12419380148034671, "grad_norm": 0.1611328125, "learning_rate": 0.0005118673918737472, "loss": 0.172, "step": 70044 }, { "epoch": 0.12419734764565653, "grad_norm": 0.298828125, "learning_rate": 0.0005118223409237999, "loss": 0.184, "step": 70046 }, { "epoch": 0.12420089381096634, "grad_norm": 0.234375, "learning_rate": 0.0005117772925462001, "loss": 0.1567, "step": 70048 }, { "epoch": 0.12420443997627616, "grad_norm": 0.330078125, "learning_rate": 0.0005117322467411448, "loss": 0.2203, "step": 70050 }, { "epoch": 0.12420798614158597, "grad_norm": 0.859375, "learning_rate": 0.0005116872035088305, "loss": 0.1991, "step": 70052 }, { "epoch": 0.12421153230689579, "grad_norm": 1.8125, "learning_rate": 0.0005116421628494549, "loss": 0.2923, "step": 70054 }, { "epoch": 0.1242150784722056, "grad_norm": 0.5234375, "learning_rate": 0.000511597124763215, "loss": 0.2856, "step": 70056 }, { "epoch": 0.12421862463751542, "grad_norm": 0.2353515625, "learning_rate": 0.0005115520892503073, "loss": 0.2103, "step": 70058 }, { "epoch": 0.12422217080282523, "grad_norm": 1.5234375, "learning_rate": 0.0005115070563109289, "loss": 0.2146, "step": 70060 }, { "epoch": 0.12422571696813504, "grad_norm": 0.50390625, "learning_rate": 0.0005114620259452767, "loss": 0.1565, "step": 70062 }, { "epoch": 0.12422926313344486, "grad_norm": 0.271484375, "learning_rate": 0.0005114169981535478, "loss": 0.1553, "step": 70064 }, { "epoch": 0.12423280929875467, "grad_norm": 0.234375, "learning_rate": 0.0005113719729359392, "loss": 0.2242, "step": 70066 }, { "epoch": 0.12423635546406449, "grad_norm": 0.90234375, "learning_rate": 0.0005113269502926476, "loss": 0.2057, "step": 70068 }, { "epoch": 0.1242399016293743, "grad_norm": 0.859375, "learning_rate": 0.0005112819302238699, "loss": 0.2212, "step": 70070 }, { "epoch": 0.12424344779468412, "grad_norm": 0.6640625, "learning_rate": 0.0005112369127298031, "loss": 0.2563, "step": 70072 }, { "epoch": 0.12424699395999393, "grad_norm": 0.6875, "learning_rate": 0.000511191897810644, "loss": 0.1799, "step": 70074 }, { "epoch": 0.12425054012530375, "grad_norm": 0.279296875, "learning_rate": 0.0005111468854665897, "loss": 0.3252, "step": 70076 }, { "epoch": 0.12425408629061356, "grad_norm": 0.515625, "learning_rate": 0.000511101875697837, "loss": 0.1981, "step": 70078 }, { "epoch": 0.12425763245592338, "grad_norm": 1.53125, "learning_rate": 0.000511056868504582, "loss": 0.2127, "step": 70080 }, { "epoch": 0.12426117862123319, "grad_norm": 0.310546875, "learning_rate": 0.0005110118638870229, "loss": 0.1708, "step": 70082 }, { "epoch": 0.12426472478654302, "grad_norm": 0.427734375, "learning_rate": 0.0005109668618453552, "loss": 0.2982, "step": 70084 }, { "epoch": 0.12426827095185283, "grad_norm": 1.6796875, "learning_rate": 0.0005109218623797766, "loss": 0.1326, "step": 70086 }, { "epoch": 0.12427181711716265, "grad_norm": 0.1357421875, "learning_rate": 0.0005108768654904834, "loss": 0.1385, "step": 70088 }, { "epoch": 0.12427536328247246, "grad_norm": 0.48046875, "learning_rate": 0.000510831871177673, "loss": 0.2493, "step": 70090 }, { "epoch": 0.12427890944778228, "grad_norm": 0.47265625, "learning_rate": 0.0005107868794415413, "loss": 0.1467, "step": 70092 }, { "epoch": 0.12428245561309209, "grad_norm": 0.267578125, "learning_rate": 0.0005107418902822857, "loss": 0.1997, "step": 70094 }, { "epoch": 0.1242860017784019, "grad_norm": 0.28515625, "learning_rate": 0.0005106969037001028, "loss": 0.1421, "step": 70096 }, { "epoch": 0.12428954794371172, "grad_norm": 0.2890625, "learning_rate": 0.0005106519196951896, "loss": 0.1819, "step": 70098 }, { "epoch": 0.12429309410902153, "grad_norm": 0.392578125, "learning_rate": 0.0005106069382677422, "loss": 0.1755, "step": 70100 }, { "epoch": 0.12429664027433135, "grad_norm": 0.35546875, "learning_rate": 0.0005105619594179576, "loss": 0.2134, "step": 70102 }, { "epoch": 0.12430018643964116, "grad_norm": 1.234375, "learning_rate": 0.0005105169831460329, "loss": 0.1892, "step": 70104 }, { "epoch": 0.12430373260495098, "grad_norm": 0.1796875, "learning_rate": 0.0005104720094521644, "loss": 0.1559, "step": 70106 }, { "epoch": 0.12430727877026079, "grad_norm": 1.0234375, "learning_rate": 0.000510427038336549, "loss": 0.1711, "step": 70108 }, { "epoch": 0.12431082493557061, "grad_norm": 0.3125, "learning_rate": 0.0005103820697993828, "loss": 0.1584, "step": 70110 }, { "epoch": 0.12431437110088042, "grad_norm": 0.287109375, "learning_rate": 0.0005103371038408636, "loss": 0.2242, "step": 70112 }, { "epoch": 0.12431791726619024, "grad_norm": 0.3046875, "learning_rate": 0.0005102921404611871, "loss": 0.1523, "step": 70114 }, { "epoch": 0.12432146343150005, "grad_norm": 0.2373046875, "learning_rate": 0.0005102471796605501, "loss": 0.1628, "step": 70116 }, { "epoch": 0.12432500959680987, "grad_norm": 0.2421875, "learning_rate": 0.0005102022214391498, "loss": 0.1633, "step": 70118 }, { "epoch": 0.12432855576211968, "grad_norm": 1.203125, "learning_rate": 0.0005101572657971818, "loss": 0.2406, "step": 70120 }, { "epoch": 0.1243321019274295, "grad_norm": 0.78125, "learning_rate": 0.000510112312734844, "loss": 0.152, "step": 70122 }, { "epoch": 0.12433564809273931, "grad_norm": 0.8671875, "learning_rate": 0.0005100673622523315, "loss": 0.1464, "step": 70124 }, { "epoch": 0.12433919425804912, "grad_norm": 0.431640625, "learning_rate": 0.000510022414349842, "loss": 0.1691, "step": 70126 }, { "epoch": 0.12434274042335895, "grad_norm": 1.1875, "learning_rate": 0.0005099774690275718, "loss": 0.1667, "step": 70128 }, { "epoch": 0.12434628658866877, "grad_norm": 0.2421875, "learning_rate": 0.0005099325262857177, "loss": 0.1828, "step": 70130 }, { "epoch": 0.12434983275397858, "grad_norm": 0.7890625, "learning_rate": 0.0005098875861244757, "loss": 0.157, "step": 70132 }, { "epoch": 0.1243533789192884, "grad_norm": 0.40625, "learning_rate": 0.0005098426485440426, "loss": 0.192, "step": 70134 }, { "epoch": 0.12435692508459821, "grad_norm": 0.1552734375, "learning_rate": 0.000509797713544615, "loss": 0.2432, "step": 70136 }, { "epoch": 0.12436047124990803, "grad_norm": 0.455078125, "learning_rate": 0.0005097527811263893, "loss": 0.1777, "step": 70138 }, { "epoch": 0.12436401741521784, "grad_norm": 0.28515625, "learning_rate": 0.0005097078512895624, "loss": 0.1599, "step": 70140 }, { "epoch": 0.12436756358052765, "grad_norm": 0.376953125, "learning_rate": 0.0005096629240343298, "loss": 0.1372, "step": 70142 }, { "epoch": 0.12437110974583747, "grad_norm": 0.28515625, "learning_rate": 0.0005096179993608893, "loss": 0.1593, "step": 70144 }, { "epoch": 0.12437465591114728, "grad_norm": 0.384765625, "learning_rate": 0.0005095730772694364, "loss": 0.1959, "step": 70146 }, { "epoch": 0.1243782020764571, "grad_norm": 0.67578125, "learning_rate": 0.0005095281577601682, "loss": 0.1785, "step": 70148 }, { "epoch": 0.12438174824176691, "grad_norm": 0.3671875, "learning_rate": 0.0005094832408332808, "loss": 0.2142, "step": 70150 }, { "epoch": 0.12438529440707673, "grad_norm": 0.3359375, "learning_rate": 0.0005094383264889704, "loss": 0.3195, "step": 70152 }, { "epoch": 0.12438884057238654, "grad_norm": 0.349609375, "learning_rate": 0.0005093934147274338, "loss": 0.1662, "step": 70154 }, { "epoch": 0.12439238673769636, "grad_norm": 0.74609375, "learning_rate": 0.0005093485055488676, "loss": 0.1799, "step": 70156 }, { "epoch": 0.12439593290300617, "grad_norm": 0.349609375, "learning_rate": 0.0005093035989534677, "loss": 0.2233, "step": 70158 }, { "epoch": 0.12439947906831599, "grad_norm": 0.375, "learning_rate": 0.0005092586949414306, "loss": 0.138, "step": 70160 }, { "epoch": 0.1244030252336258, "grad_norm": 0.419921875, "learning_rate": 0.000509213793512953, "loss": 0.2633, "step": 70162 }, { "epoch": 0.12440657139893561, "grad_norm": 0.48046875, "learning_rate": 0.0005091688946682314, "loss": 0.1609, "step": 70164 }, { "epoch": 0.12441011756424543, "grad_norm": 0.43359375, "learning_rate": 0.0005091239984074615, "loss": 0.2224, "step": 70166 }, { "epoch": 0.12441366372955524, "grad_norm": 0.1611328125, "learning_rate": 0.0005090791047308402, "loss": 0.1755, "step": 70168 }, { "epoch": 0.12441720989486506, "grad_norm": 0.6171875, "learning_rate": 0.0005090342136385633, "loss": 0.3689, "step": 70170 }, { "epoch": 0.12442075606017487, "grad_norm": 3.953125, "learning_rate": 0.000508989325130828, "loss": 0.2808, "step": 70172 }, { "epoch": 0.1244243022254847, "grad_norm": 0.2294921875, "learning_rate": 0.0005089444392078296, "loss": 0.1318, "step": 70174 }, { "epoch": 0.12442784839079452, "grad_norm": 1.65625, "learning_rate": 0.0005088995558697652, "loss": 0.2522, "step": 70176 }, { "epoch": 0.12443139455610433, "grad_norm": 0.32421875, "learning_rate": 0.0005088546751168305, "loss": 0.1441, "step": 70178 }, { "epoch": 0.12443494072141414, "grad_norm": 0.8359375, "learning_rate": 0.0005088097969492227, "loss": 0.1974, "step": 70180 }, { "epoch": 0.12443848688672396, "grad_norm": 0.359375, "learning_rate": 0.0005087649213671366, "loss": 0.3198, "step": 70182 }, { "epoch": 0.12444203305203377, "grad_norm": 0.32421875, "learning_rate": 0.0005087200483707699, "loss": 0.1993, "step": 70184 }, { "epoch": 0.12444557921734359, "grad_norm": 0.400390625, "learning_rate": 0.0005086751779603181, "loss": 0.1665, "step": 70186 }, { "epoch": 0.1244491253826534, "grad_norm": 0.2197265625, "learning_rate": 0.0005086303101359777, "loss": 0.1461, "step": 70188 }, { "epoch": 0.12445267154796322, "grad_norm": 1.515625, "learning_rate": 0.0005085854448979445, "loss": 0.3279, "step": 70190 }, { "epoch": 0.12445621771327303, "grad_norm": 0.388671875, "learning_rate": 0.0005085405822464151, "loss": 0.1847, "step": 70192 }, { "epoch": 0.12445976387858285, "grad_norm": 0.38671875, "learning_rate": 0.0005084957221815859, "loss": 0.195, "step": 70194 }, { "epoch": 0.12446331004389266, "grad_norm": 0.322265625, "learning_rate": 0.0005084508647036527, "loss": 0.2044, "step": 70196 }, { "epoch": 0.12446685620920248, "grad_norm": 0.5, "learning_rate": 0.0005084060098128118, "loss": 0.1747, "step": 70198 }, { "epoch": 0.12447040237451229, "grad_norm": 0.26171875, "learning_rate": 0.000508361157509259, "loss": 0.1872, "step": 70200 }, { "epoch": 0.1244739485398221, "grad_norm": 0.34375, "learning_rate": 0.0005083163077931914, "loss": 0.1417, "step": 70202 }, { "epoch": 0.12447749470513192, "grad_norm": 0.349609375, "learning_rate": 0.0005082714606648045, "loss": 0.1739, "step": 70204 }, { "epoch": 0.12448104087044173, "grad_norm": 0.7265625, "learning_rate": 0.0005082266161242943, "loss": 0.2574, "step": 70206 }, { "epoch": 0.12448458703575155, "grad_norm": 0.51953125, "learning_rate": 0.0005081817741718573, "loss": 0.1843, "step": 70208 }, { "epoch": 0.12448813320106136, "grad_norm": 0.546875, "learning_rate": 0.0005081369348076891, "loss": 0.2502, "step": 70210 }, { "epoch": 0.12449167936637118, "grad_norm": 0.59765625, "learning_rate": 0.0005080920980319867, "loss": 0.1674, "step": 70212 }, { "epoch": 0.12449522553168099, "grad_norm": 0.32421875, "learning_rate": 0.0005080472638449451, "loss": 0.2309, "step": 70214 }, { "epoch": 0.1244987716969908, "grad_norm": 0.248046875, "learning_rate": 0.0005080024322467613, "loss": 0.129, "step": 70216 }, { "epoch": 0.12450231786230062, "grad_norm": 0.44921875, "learning_rate": 0.0005079576032376307, "loss": 0.1763, "step": 70218 }, { "epoch": 0.12450586402761045, "grad_norm": 0.2353515625, "learning_rate": 0.0005079127768177502, "loss": 0.1487, "step": 70220 }, { "epoch": 0.12450941019292026, "grad_norm": 0.86328125, "learning_rate": 0.0005078679529873146, "loss": 0.2293, "step": 70222 }, { "epoch": 0.12451295635823008, "grad_norm": 0.337890625, "learning_rate": 0.0005078231317465209, "loss": 0.193, "step": 70224 }, { "epoch": 0.1245165025235399, "grad_norm": 0.48828125, "learning_rate": 0.0005077783130955651, "loss": 0.1466, "step": 70226 }, { "epoch": 0.12452004868884971, "grad_norm": 0.412109375, "learning_rate": 0.0005077334970346423, "loss": 0.1592, "step": 70228 }, { "epoch": 0.12452359485415952, "grad_norm": 1.546875, "learning_rate": 0.0005076886835639501, "loss": 0.2278, "step": 70230 }, { "epoch": 0.12452714101946934, "grad_norm": 0.59765625, "learning_rate": 0.0005076438726836827, "loss": 0.2308, "step": 70232 }, { "epoch": 0.12453068718477915, "grad_norm": 0.34765625, "learning_rate": 0.0005075990643940374, "loss": 0.1901, "step": 70234 }, { "epoch": 0.12453423335008897, "grad_norm": 0.275390625, "learning_rate": 0.0005075542586952091, "loss": 0.1344, "step": 70236 }, { "epoch": 0.12453777951539878, "grad_norm": 0.279296875, "learning_rate": 0.0005075094555873951, "loss": 0.1635, "step": 70238 }, { "epoch": 0.1245413256807086, "grad_norm": 0.39453125, "learning_rate": 0.00050746465507079, "loss": 0.1912, "step": 70240 }, { "epoch": 0.12454487184601841, "grad_norm": 0.26171875, "learning_rate": 0.0005074198571455907, "loss": 0.1947, "step": 70242 }, { "epoch": 0.12454841801132822, "grad_norm": 1.1953125, "learning_rate": 0.0005073750618119927, "loss": 0.3279, "step": 70244 }, { "epoch": 0.12455196417663804, "grad_norm": 0.54296875, "learning_rate": 0.0005073302690701919, "loss": 0.189, "step": 70246 }, { "epoch": 0.12455551034194785, "grad_norm": 0.2431640625, "learning_rate": 0.0005072854789203843, "loss": 0.1301, "step": 70248 }, { "epoch": 0.12455905650725767, "grad_norm": 0.462890625, "learning_rate": 0.0005072406913627653, "loss": 0.2542, "step": 70250 }, { "epoch": 0.12456260267256748, "grad_norm": 0.4375, "learning_rate": 0.0005071959063975316, "loss": 0.1843, "step": 70252 }, { "epoch": 0.1245661488378773, "grad_norm": 0.451171875, "learning_rate": 0.0005071511240248785, "loss": 0.166, "step": 70254 }, { "epoch": 0.12456969500318711, "grad_norm": 0.69921875, "learning_rate": 0.0005071063442450021, "loss": 0.1858, "step": 70256 }, { "epoch": 0.12457324116849693, "grad_norm": 0.36328125, "learning_rate": 0.0005070615670580979, "loss": 0.1348, "step": 70258 }, { "epoch": 0.12457678733380674, "grad_norm": 0.82421875, "learning_rate": 0.0005070167924643623, "loss": 0.1557, "step": 70260 }, { "epoch": 0.12458033349911656, "grad_norm": 0.55859375, "learning_rate": 0.0005069720204639907, "loss": 0.1751, "step": 70262 }, { "epoch": 0.12458387966442638, "grad_norm": 0.423828125, "learning_rate": 0.0005069272510571792, "loss": 0.1549, "step": 70264 }, { "epoch": 0.1245874258297362, "grad_norm": 0.419921875, "learning_rate": 0.0005068824842441233, "loss": 0.1596, "step": 70266 }, { "epoch": 0.12459097199504601, "grad_norm": 0.25, "learning_rate": 0.0005068377200250186, "loss": 0.1791, "step": 70268 }, { "epoch": 0.12459451816035583, "grad_norm": 0.40625, "learning_rate": 0.0005067929584000618, "loss": 0.157, "step": 70270 }, { "epoch": 0.12459806432566564, "grad_norm": 0.486328125, "learning_rate": 0.0005067481993694474, "loss": 0.1607, "step": 70272 }, { "epoch": 0.12460161049097546, "grad_norm": 0.6796875, "learning_rate": 0.000506703442933372, "loss": 0.1442, "step": 70274 }, { "epoch": 0.12460515665628527, "grad_norm": 0.1669921875, "learning_rate": 0.0005066586890920308, "loss": 0.1567, "step": 70276 }, { "epoch": 0.12460870282159509, "grad_norm": 0.8515625, "learning_rate": 0.0005066139378456205, "loss": 0.1779, "step": 70278 }, { "epoch": 0.1246122489869049, "grad_norm": 0.69140625, "learning_rate": 0.0005065691891943356, "loss": 0.178, "step": 70280 }, { "epoch": 0.12461579515221471, "grad_norm": 1.2265625, "learning_rate": 0.0005065244431383726, "loss": 0.1409, "step": 70282 }, { "epoch": 0.12461934131752453, "grad_norm": 0.248046875, "learning_rate": 0.000506479699677927, "loss": 0.1348, "step": 70284 }, { "epoch": 0.12462288748283434, "grad_norm": 1.28125, "learning_rate": 0.0005064349588131945, "loss": 0.2363, "step": 70286 }, { "epoch": 0.12462643364814416, "grad_norm": 0.89453125, "learning_rate": 0.0005063902205443704, "loss": 0.232, "step": 70288 }, { "epoch": 0.12462997981345397, "grad_norm": 0.7109375, "learning_rate": 0.0005063454848716508, "loss": 0.1675, "step": 70290 }, { "epoch": 0.12463352597876379, "grad_norm": 0.345703125, "learning_rate": 0.0005063007517952312, "loss": 0.1683, "step": 70292 }, { "epoch": 0.1246370721440736, "grad_norm": 0.4609375, "learning_rate": 0.0005062560213153073, "loss": 0.1821, "step": 70294 }, { "epoch": 0.12464061830938342, "grad_norm": 0.515625, "learning_rate": 0.0005062112934320749, "loss": 0.1857, "step": 70296 }, { "epoch": 0.12464416447469323, "grad_norm": 0.69921875, "learning_rate": 0.0005061665681457288, "loss": 0.1432, "step": 70298 }, { "epoch": 0.12464771064000305, "grad_norm": 0.52734375, "learning_rate": 0.0005061218454564656, "loss": 0.1711, "step": 70300 }, { "epoch": 0.12465125680531286, "grad_norm": 0.3125, "learning_rate": 0.0005060771253644805, "loss": 0.1225, "step": 70302 }, { "epoch": 0.12465480297062267, "grad_norm": 0.7890625, "learning_rate": 0.0005060324078699691, "loss": 0.1638, "step": 70304 }, { "epoch": 0.12465834913593249, "grad_norm": 1.0234375, "learning_rate": 0.0005059876929731269, "loss": 0.2236, "step": 70306 }, { "epoch": 0.1246618953012423, "grad_norm": 0.287109375, "learning_rate": 0.0005059429806741491, "loss": 0.1998, "step": 70308 }, { "epoch": 0.12466544146655213, "grad_norm": 0.2119140625, "learning_rate": 0.0005058982709732321, "loss": 0.1651, "step": 70310 }, { "epoch": 0.12466898763186195, "grad_norm": 0.37890625, "learning_rate": 0.0005058535638705708, "loss": 0.2009, "step": 70312 }, { "epoch": 0.12467253379717176, "grad_norm": 0.330078125, "learning_rate": 0.000505808859366361, "loss": 0.1513, "step": 70314 }, { "epoch": 0.12467607996248158, "grad_norm": 0.1884765625, "learning_rate": 0.0005057641574607981, "loss": 0.1665, "step": 70316 }, { "epoch": 0.12467962612779139, "grad_norm": 0.609375, "learning_rate": 0.0005057194581540771, "loss": 0.1788, "step": 70318 }, { "epoch": 0.1246831722931012, "grad_norm": 1.0, "learning_rate": 0.0005056747614463948, "loss": 0.363, "step": 70320 }, { "epoch": 0.12468671845841102, "grad_norm": 0.28125, "learning_rate": 0.0005056300673379453, "loss": 0.3185, "step": 70322 }, { "epoch": 0.12469026462372083, "grad_norm": 0.318359375, "learning_rate": 0.0005055853758289249, "loss": 0.1819, "step": 70324 }, { "epoch": 0.12469381078903065, "grad_norm": 0.412109375, "learning_rate": 0.0005055406869195284, "loss": 0.3757, "step": 70326 }, { "epoch": 0.12469735695434046, "grad_norm": 0.49609375, "learning_rate": 0.0005054960006099522, "loss": 0.1711, "step": 70328 }, { "epoch": 0.12470090311965028, "grad_norm": 0.32421875, "learning_rate": 0.0005054513169003907, "loss": 0.1299, "step": 70330 }, { "epoch": 0.12470444928496009, "grad_norm": 0.62109375, "learning_rate": 0.0005054066357910401, "loss": 0.1764, "step": 70332 }, { "epoch": 0.1247079954502699, "grad_norm": 0.71875, "learning_rate": 0.0005053619572820954, "loss": 0.2162, "step": 70334 }, { "epoch": 0.12471154161557972, "grad_norm": 0.208984375, "learning_rate": 0.0005053172813737521, "loss": 0.1877, "step": 70336 }, { "epoch": 0.12471508778088954, "grad_norm": 0.4140625, "learning_rate": 0.0005052726080662055, "loss": 0.1689, "step": 70338 }, { "epoch": 0.12471863394619935, "grad_norm": 0.458984375, "learning_rate": 0.0005052279373596509, "loss": 0.2016, "step": 70340 }, { "epoch": 0.12472218011150916, "grad_norm": 1.84375, "learning_rate": 0.000505183269254284, "loss": 0.1525, "step": 70342 }, { "epoch": 0.12472572627681898, "grad_norm": 0.25390625, "learning_rate": 0.0005051386037503001, "loss": 0.1671, "step": 70344 }, { "epoch": 0.1247292724421288, "grad_norm": 1.75, "learning_rate": 0.0005050939408478942, "loss": 0.1577, "step": 70346 }, { "epoch": 0.12473281860743861, "grad_norm": 0.81640625, "learning_rate": 0.0005050492805472618, "loss": 0.1743, "step": 70348 }, { "epoch": 0.12473636477274842, "grad_norm": 0.5625, "learning_rate": 0.0005050046228485985, "loss": 0.226, "step": 70350 }, { "epoch": 0.12473991093805824, "grad_norm": 1.8125, "learning_rate": 0.0005049599677520993, "loss": 0.2372, "step": 70352 }, { "epoch": 0.12474345710336805, "grad_norm": 0.365234375, "learning_rate": 0.0005049153152579595, "loss": 0.1312, "step": 70354 }, { "epoch": 0.12474700326867788, "grad_norm": 0.23046875, "learning_rate": 0.0005048706653663744, "loss": 0.1466, "step": 70356 }, { "epoch": 0.1247505494339877, "grad_norm": 0.396484375, "learning_rate": 0.0005048260180775392, "loss": 0.1688, "step": 70358 }, { "epoch": 0.12475409559929751, "grad_norm": 0.384765625, "learning_rate": 0.00050478137339165, "loss": 0.2208, "step": 70360 }, { "epoch": 0.12475764176460732, "grad_norm": 0.310546875, "learning_rate": 0.0005047367313089004, "loss": 0.1624, "step": 70362 }, { "epoch": 0.12476118792991714, "grad_norm": 0.400390625, "learning_rate": 0.0005046920918294871, "loss": 0.1909, "step": 70364 }, { "epoch": 0.12476473409522695, "grad_norm": 0.29296875, "learning_rate": 0.0005046474549536043, "loss": 0.1781, "step": 70366 }, { "epoch": 0.12476828026053677, "grad_norm": 0.3984375, "learning_rate": 0.0005046028206814484, "loss": 0.1746, "step": 70368 }, { "epoch": 0.12477182642584658, "grad_norm": 0.271484375, "learning_rate": 0.0005045581890132134, "loss": 0.1781, "step": 70370 }, { "epoch": 0.1247753725911564, "grad_norm": 0.91796875, "learning_rate": 0.0005045135599490951, "loss": 0.2092, "step": 70372 }, { "epoch": 0.12477891875646621, "grad_norm": 0.56640625, "learning_rate": 0.0005044689334892889, "loss": 0.1521, "step": 70374 }, { "epoch": 0.12478246492177603, "grad_norm": 0.65234375, "learning_rate": 0.0005044243096339894, "loss": 0.201, "step": 70376 }, { "epoch": 0.12478601108708584, "grad_norm": 0.470703125, "learning_rate": 0.000504379688383392, "loss": 0.1557, "step": 70378 }, { "epoch": 0.12478955725239566, "grad_norm": 0.267578125, "learning_rate": 0.0005043350697376917, "loss": 0.1308, "step": 70380 }, { "epoch": 0.12479310341770547, "grad_norm": 0.2490234375, "learning_rate": 0.000504290453697084, "loss": 0.2152, "step": 70382 }, { "epoch": 0.12479664958301528, "grad_norm": 0.98046875, "learning_rate": 0.000504245840261764, "loss": 0.1899, "step": 70384 }, { "epoch": 0.1248001957483251, "grad_norm": 0.7109375, "learning_rate": 0.0005042012294319266, "loss": 0.1621, "step": 70386 }, { "epoch": 0.12480374191363491, "grad_norm": 0.384765625, "learning_rate": 0.0005041566212077666, "loss": 0.152, "step": 70388 }, { "epoch": 0.12480728807894473, "grad_norm": 0.326171875, "learning_rate": 0.0005041120155894796, "loss": 0.1744, "step": 70390 }, { "epoch": 0.12481083424425454, "grad_norm": 1.640625, "learning_rate": 0.0005040674125772606, "loss": 0.17, "step": 70392 }, { "epoch": 0.12481438040956436, "grad_norm": 0.298828125, "learning_rate": 0.0005040228121713046, "loss": 0.1894, "step": 70394 }, { "epoch": 0.12481792657487417, "grad_norm": 1.3125, "learning_rate": 0.0005039782143718065, "loss": 0.1334, "step": 70396 }, { "epoch": 0.12482147274018399, "grad_norm": 0.2099609375, "learning_rate": 0.0005039336191789614, "loss": 0.2042, "step": 70398 }, { "epoch": 0.12482501890549381, "grad_norm": 0.5546875, "learning_rate": 0.0005038890265929648, "loss": 0.1782, "step": 70400 }, { "epoch": 0.12482856507080363, "grad_norm": 0.51171875, "learning_rate": 0.000503844436614011, "loss": 0.1348, "step": 70402 }, { "epoch": 0.12483211123611344, "grad_norm": 0.32421875, "learning_rate": 0.0005037998492422955, "loss": 0.1986, "step": 70404 }, { "epoch": 0.12483565740142326, "grad_norm": 2.9375, "learning_rate": 0.0005037552644780129, "loss": 0.2029, "step": 70406 }, { "epoch": 0.12483920356673307, "grad_norm": 0.84765625, "learning_rate": 0.0005037106823213587, "loss": 0.1794, "step": 70408 }, { "epoch": 0.12484274973204289, "grad_norm": 1.4921875, "learning_rate": 0.0005036661027725275, "loss": 0.1675, "step": 70410 }, { "epoch": 0.1248462958973527, "grad_norm": 0.8671875, "learning_rate": 0.0005036215258317145, "loss": 0.1748, "step": 70412 }, { "epoch": 0.12484984206266252, "grad_norm": 0.37890625, "learning_rate": 0.0005035769514991146, "loss": 0.1718, "step": 70414 }, { "epoch": 0.12485338822797233, "grad_norm": 0.240234375, "learning_rate": 0.000503532379774922, "loss": 0.1737, "step": 70416 }, { "epoch": 0.12485693439328215, "grad_norm": 0.388671875, "learning_rate": 0.0005034878106593332, "loss": 0.1829, "step": 70418 }, { "epoch": 0.12486048055859196, "grad_norm": 0.390625, "learning_rate": 0.0005034432441525417, "loss": 0.1801, "step": 70420 }, { "epoch": 0.12486402672390177, "grad_norm": 0.36328125, "learning_rate": 0.000503398680254743, "loss": 0.1949, "step": 70422 }, { "epoch": 0.12486757288921159, "grad_norm": 1.0234375, "learning_rate": 0.0005033541189661318, "loss": 0.1537, "step": 70424 }, { "epoch": 0.1248711190545214, "grad_norm": 0.53125, "learning_rate": 0.0005033095602869037, "loss": 0.1754, "step": 70426 }, { "epoch": 0.12487466521983122, "grad_norm": 0.7421875, "learning_rate": 0.0005032650042172523, "loss": 0.1601, "step": 70428 }, { "epoch": 0.12487821138514103, "grad_norm": 0.67578125, "learning_rate": 0.0005032204507573735, "loss": 0.1907, "step": 70430 }, { "epoch": 0.12488175755045085, "grad_norm": 1.375, "learning_rate": 0.0005031758999074618, "loss": 0.3629, "step": 70432 }, { "epoch": 0.12488530371576066, "grad_norm": 0.375, "learning_rate": 0.0005031313516677121, "loss": 0.1931, "step": 70434 }, { "epoch": 0.12488884988107048, "grad_norm": 0.271484375, "learning_rate": 0.0005030868060383189, "loss": 0.1668, "step": 70436 }, { "epoch": 0.12489239604638029, "grad_norm": 0.87890625, "learning_rate": 0.0005030422630194772, "loss": 0.201, "step": 70438 }, { "epoch": 0.1248959422116901, "grad_norm": 0.5859375, "learning_rate": 0.0005029977226113822, "loss": 0.1501, "step": 70440 }, { "epoch": 0.12489948837699992, "grad_norm": 0.94921875, "learning_rate": 0.0005029531848142284, "loss": 0.1817, "step": 70442 }, { "epoch": 0.12490303454230973, "grad_norm": 0.40625, "learning_rate": 0.0005029086496282104, "loss": 0.1812, "step": 70444 }, { "epoch": 0.12490658070761956, "grad_norm": 0.28515625, "learning_rate": 0.0005028641170535229, "loss": 0.1492, "step": 70446 }, { "epoch": 0.12491012687292938, "grad_norm": 0.48046875, "learning_rate": 0.0005028195870903614, "loss": 0.1913, "step": 70448 }, { "epoch": 0.12491367303823919, "grad_norm": 0.8203125, "learning_rate": 0.0005027750597389198, "loss": 0.2401, "step": 70450 }, { "epoch": 0.12491721920354901, "grad_norm": 0.51953125, "learning_rate": 0.0005027305349993934, "loss": 0.2068, "step": 70452 }, { "epoch": 0.12492076536885882, "grad_norm": 0.74609375, "learning_rate": 0.0005026860128719767, "loss": 0.1936, "step": 70454 }, { "epoch": 0.12492431153416864, "grad_norm": 1.8359375, "learning_rate": 0.0005026414933568639, "loss": 0.1718, "step": 70456 }, { "epoch": 0.12492785769947845, "grad_norm": 1.203125, "learning_rate": 0.000502596976454251, "loss": 0.51, "step": 70458 }, { "epoch": 0.12493140386478827, "grad_norm": 0.337890625, "learning_rate": 0.0005025524621643314, "loss": 0.2308, "step": 70460 }, { "epoch": 0.12493495003009808, "grad_norm": 0.40625, "learning_rate": 0.0005025079504873004, "loss": 0.3436, "step": 70462 }, { "epoch": 0.1249384961954079, "grad_norm": 0.515625, "learning_rate": 0.0005024634414233524, "loss": 0.2043, "step": 70464 }, { "epoch": 0.12494204236071771, "grad_norm": 0.890625, "learning_rate": 0.0005024189349726829, "loss": 0.2095, "step": 70466 }, { "epoch": 0.12494558852602752, "grad_norm": 0.33984375, "learning_rate": 0.0005023744311354849, "loss": 0.1882, "step": 70468 }, { "epoch": 0.12494913469133734, "grad_norm": 0.2197265625, "learning_rate": 0.0005023299299119547, "loss": 0.282, "step": 70470 }, { "epoch": 0.12495268085664715, "grad_norm": 0.341796875, "learning_rate": 0.000502285431302286, "loss": 0.1838, "step": 70472 }, { "epoch": 0.12495622702195697, "grad_norm": 1.5234375, "learning_rate": 0.0005022409353066737, "loss": 0.166, "step": 70474 }, { "epoch": 0.12495977318726678, "grad_norm": 0.263671875, "learning_rate": 0.0005021964419253123, "loss": 0.1727, "step": 70476 }, { "epoch": 0.1249633193525766, "grad_norm": 0.74609375, "learning_rate": 0.000502151951158396, "loss": 0.1221, "step": 70478 }, { "epoch": 0.12496686551788641, "grad_norm": 0.345703125, "learning_rate": 0.0005021074630061203, "loss": 0.2529, "step": 70480 }, { "epoch": 0.12497041168319623, "grad_norm": 0.271484375, "learning_rate": 0.0005020629774686789, "loss": 0.1568, "step": 70482 }, { "epoch": 0.12497395784850604, "grad_norm": 0.58203125, "learning_rate": 0.0005020184945462672, "loss": 0.4513, "step": 70484 }, { "epoch": 0.12497750401381585, "grad_norm": 1.546875, "learning_rate": 0.0005019740142390789, "loss": 0.3189, "step": 70486 }, { "epoch": 0.12498105017912567, "grad_norm": 0.388671875, "learning_rate": 0.0005019295365473089, "loss": 0.284, "step": 70488 }, { "epoch": 0.12498459634443548, "grad_norm": 0.33203125, "learning_rate": 0.0005018850614711518, "loss": 0.2008, "step": 70490 }, { "epoch": 0.12498814250974531, "grad_norm": 0.275390625, "learning_rate": 0.0005018405890108022, "loss": 0.1788, "step": 70492 }, { "epoch": 0.12499168867505513, "grad_norm": 0.1962890625, "learning_rate": 0.0005017961191664541, "loss": 0.1591, "step": 70494 }, { "epoch": 0.12499523484036494, "grad_norm": 0.58203125, "learning_rate": 0.0005017516519383023, "loss": 0.221, "step": 70496 }, { "epoch": 0.12499878100567476, "grad_norm": 0.875, "learning_rate": 0.0005017071873265413, "loss": 0.2101, "step": 70498 }, { "epoch": 0.12500232717098456, "grad_norm": 0.53125, "learning_rate": 0.0005016627253313657, "loss": 0.1482, "step": 70500 }, { "epoch": 0.12500587333629437, "grad_norm": 1.3984375, "learning_rate": 0.0005016182659529697, "loss": 0.1912, "step": 70502 }, { "epoch": 0.12500941950160419, "grad_norm": 0.98046875, "learning_rate": 0.0005015738091915479, "loss": 0.1752, "step": 70504 }, { "epoch": 0.125012965666914, "grad_norm": 0.59765625, "learning_rate": 0.0005015293550472943, "loss": 0.1893, "step": 70506 }, { "epoch": 0.12501651183222381, "grad_norm": 0.7578125, "learning_rate": 0.0005014849035204043, "loss": 0.1977, "step": 70508 }, { "epoch": 0.12502005799753363, "grad_norm": 1.0, "learning_rate": 0.0005014404546110711, "loss": 0.1934, "step": 70510 }, { "epoch": 0.12502360416284344, "grad_norm": 0.296875, "learning_rate": 0.0005013960083194899, "loss": 0.1695, "step": 70512 }, { "epoch": 0.12502715032815329, "grad_norm": 0.255859375, "learning_rate": 0.0005013515646458546, "loss": 0.1345, "step": 70514 }, { "epoch": 0.1250306964934631, "grad_norm": 0.28515625, "learning_rate": 0.0005013071235903605, "loss": 0.1589, "step": 70516 }, { "epoch": 0.12503424265877291, "grad_norm": 0.3359375, "learning_rate": 0.0005012626851532005, "loss": 0.145, "step": 70518 }, { "epoch": 0.12503778882408273, "grad_norm": 0.439453125, "learning_rate": 0.0005012182493345701, "loss": 0.1707, "step": 70520 }, { "epoch": 0.12504133498939254, "grad_norm": 0.57421875, "learning_rate": 0.0005011738161346633, "loss": 0.3407, "step": 70522 }, { "epoch": 0.12504488115470236, "grad_norm": 0.59375, "learning_rate": 0.0005011293855536744, "loss": 0.1665, "step": 70524 }, { "epoch": 0.12504842732001217, "grad_norm": 1.3515625, "learning_rate": 0.0005010849575917976, "loss": 0.209, "step": 70526 }, { "epoch": 0.125051973485322, "grad_norm": 0.365234375, "learning_rate": 0.0005010405322492271, "loss": 0.1999, "step": 70528 }, { "epoch": 0.1250555196506318, "grad_norm": 0.3046875, "learning_rate": 0.0005009961095261578, "loss": 0.1816, "step": 70530 }, { "epoch": 0.12505906581594162, "grad_norm": 0.2080078125, "learning_rate": 0.0005009516894227832, "loss": 0.1785, "step": 70532 }, { "epoch": 0.12506261198125143, "grad_norm": 0.4453125, "learning_rate": 0.0005009072719392982, "loss": 0.2358, "step": 70534 }, { "epoch": 0.12506615814656125, "grad_norm": 0.359375, "learning_rate": 0.0005008628570758964, "loss": 0.1849, "step": 70536 }, { "epoch": 0.12506970431187106, "grad_norm": 1.2734375, "learning_rate": 0.0005008184448327729, "loss": 0.1855, "step": 70538 }, { "epoch": 0.12507325047718088, "grad_norm": 0.80078125, "learning_rate": 0.0005007740352101213, "loss": 0.1457, "step": 70540 }, { "epoch": 0.1250767966424907, "grad_norm": 0.326171875, "learning_rate": 0.0005007296282081361, "loss": 0.1386, "step": 70542 }, { "epoch": 0.1250803428078005, "grad_norm": 0.265625, "learning_rate": 0.0005006852238270115, "loss": 0.1694, "step": 70544 }, { "epoch": 0.12508388897311032, "grad_norm": 0.18359375, "learning_rate": 0.0005006408220669412, "loss": 0.1496, "step": 70546 }, { "epoch": 0.12508743513842013, "grad_norm": 0.384765625, "learning_rate": 0.0005005964229281203, "loss": 0.216, "step": 70548 }, { "epoch": 0.12509098130372995, "grad_norm": 1.8671875, "learning_rate": 0.0005005520264107421, "loss": 0.5086, "step": 70550 }, { "epoch": 0.12509452746903976, "grad_norm": 0.52734375, "learning_rate": 0.0005005076325150013, "loss": 0.1817, "step": 70552 }, { "epoch": 0.12509807363434958, "grad_norm": 0.392578125, "learning_rate": 0.0005004632412410917, "loss": 0.1825, "step": 70554 }, { "epoch": 0.1251016197996594, "grad_norm": 1.0, "learning_rate": 0.000500418852589208, "loss": 0.1785, "step": 70556 }, { "epoch": 0.1251051659649692, "grad_norm": 1.28125, "learning_rate": 0.0005003744665595435, "loss": 0.1964, "step": 70558 }, { "epoch": 0.12510871213027902, "grad_norm": 0.84375, "learning_rate": 0.000500330083152293, "loss": 0.176, "step": 70560 }, { "epoch": 0.12511225829558884, "grad_norm": 0.6171875, "learning_rate": 0.0005002857023676504, "loss": 0.1576, "step": 70562 }, { "epoch": 0.12511580446089865, "grad_norm": 0.390625, "learning_rate": 0.0005002413242058097, "loss": 0.1562, "step": 70564 }, { "epoch": 0.12511935062620846, "grad_norm": 0.3828125, "learning_rate": 0.0005001969486669655, "loss": 0.1916, "step": 70566 }, { "epoch": 0.12512289679151828, "grad_norm": 0.7109375, "learning_rate": 0.0005001525757513108, "loss": 0.2513, "step": 70568 }, { "epoch": 0.1251264429568281, "grad_norm": 0.2412109375, "learning_rate": 0.0005001082054590407, "loss": 0.1674, "step": 70570 }, { "epoch": 0.1251299891221379, "grad_norm": 2.328125, "learning_rate": 0.0005000638377903484, "loss": 0.2902, "step": 70572 }, { "epoch": 0.12513353528744772, "grad_norm": 0.25390625, "learning_rate": 0.000500019472745429, "loss": 0.1903, "step": 70574 }, { "epoch": 0.12513708145275754, "grad_norm": 0.37109375, "learning_rate": 0.0004999751103244754, "loss": 0.1911, "step": 70576 }, { "epoch": 0.12514062761806735, "grad_norm": 1.015625, "learning_rate": 0.0004999307505276825, "loss": 0.1895, "step": 70578 }, { "epoch": 0.12514417378337717, "grad_norm": 0.42578125, "learning_rate": 0.0004998863933552438, "loss": 0.1431, "step": 70580 }, { "epoch": 0.12514771994868698, "grad_norm": 0.384765625, "learning_rate": 0.0004998420388073536, "loss": 0.1606, "step": 70582 }, { "epoch": 0.1251512661139968, "grad_norm": 0.498046875, "learning_rate": 0.0004997976868842056, "loss": 0.1503, "step": 70584 }, { "epoch": 0.1251548122793066, "grad_norm": 0.224609375, "learning_rate": 0.0004997533375859935, "loss": 0.1635, "step": 70586 }, { "epoch": 0.12515835844461642, "grad_norm": 0.2080078125, "learning_rate": 0.000499708990912912, "loss": 0.1659, "step": 70588 }, { "epoch": 0.12516190460992624, "grad_norm": 0.51171875, "learning_rate": 0.0004996646468651548, "loss": 0.1622, "step": 70590 }, { "epoch": 0.12516545077523605, "grad_norm": 1.6796875, "learning_rate": 0.0004996203054429156, "loss": 0.1871, "step": 70592 }, { "epoch": 0.12516899694054587, "grad_norm": 0.314453125, "learning_rate": 0.0004995759666463881, "loss": 0.1099, "step": 70594 }, { "epoch": 0.12517254310585568, "grad_norm": 0.390625, "learning_rate": 0.000499531630475767, "loss": 0.2098, "step": 70596 }, { "epoch": 0.1251760892711655, "grad_norm": 0.59375, "learning_rate": 0.0004994872969312456, "loss": 0.1581, "step": 70598 }, { "epoch": 0.1251796354364753, "grad_norm": 0.314453125, "learning_rate": 0.0004994429660130181, "loss": 0.1756, "step": 70600 }, { "epoch": 0.12518318160178513, "grad_norm": 0.63671875, "learning_rate": 0.0004993986377212781, "loss": 0.1848, "step": 70602 }, { "epoch": 0.12518672776709497, "grad_norm": 0.37109375, "learning_rate": 0.0004993543120562193, "loss": 0.1959, "step": 70604 }, { "epoch": 0.12519027393240478, "grad_norm": 0.56640625, "learning_rate": 0.0004993099890180365, "loss": 0.1689, "step": 70606 }, { "epoch": 0.1251938200977146, "grad_norm": 0.37109375, "learning_rate": 0.0004992656686069221, "loss": 0.1444, "step": 70608 }, { "epoch": 0.1251973662630244, "grad_norm": 0.2470703125, "learning_rate": 0.0004992213508230712, "loss": 0.1235, "step": 70610 }, { "epoch": 0.12520091242833423, "grad_norm": 0.65625, "learning_rate": 0.0004991770356666769, "loss": 0.2906, "step": 70612 }, { "epoch": 0.12520445859364404, "grad_norm": 0.470703125, "learning_rate": 0.0004991327231379338, "loss": 0.1905, "step": 70614 }, { "epoch": 0.12520800475895386, "grad_norm": 0.1650390625, "learning_rate": 0.0004990884132370345, "loss": 0.1298, "step": 70616 }, { "epoch": 0.12521155092426367, "grad_norm": 0.361328125, "learning_rate": 0.0004990441059641737, "loss": 0.2116, "step": 70618 }, { "epoch": 0.12521509708957348, "grad_norm": 0.4765625, "learning_rate": 0.0004989998013195449, "loss": 0.1815, "step": 70620 }, { "epoch": 0.1252186432548833, "grad_norm": 0.2060546875, "learning_rate": 0.0004989554993033419, "loss": 0.128, "step": 70622 }, { "epoch": 0.12522218942019311, "grad_norm": 0.2421875, "learning_rate": 0.0004989111999157584, "loss": 0.1866, "step": 70624 }, { "epoch": 0.12522573558550293, "grad_norm": 0.51171875, "learning_rate": 0.0004988669031569879, "loss": 0.191, "step": 70626 }, { "epoch": 0.12522928175081274, "grad_norm": 0.208984375, "learning_rate": 0.0004988226090272246, "loss": 0.1612, "step": 70628 }, { "epoch": 0.12523282791612256, "grad_norm": 0.447265625, "learning_rate": 0.0004987783175266622, "loss": 0.1593, "step": 70630 }, { "epoch": 0.12523637408143237, "grad_norm": 0.5390625, "learning_rate": 0.0004987340286554939, "loss": 0.2095, "step": 70632 }, { "epoch": 0.1252399202467422, "grad_norm": 0.490234375, "learning_rate": 0.0004986897424139136, "loss": 0.2056, "step": 70634 }, { "epoch": 0.125243466412052, "grad_norm": 4.71875, "learning_rate": 0.0004986454588021154, "loss": 0.2737, "step": 70636 }, { "epoch": 0.12524701257736182, "grad_norm": 0.62109375, "learning_rate": 0.0004986011778202926, "loss": 0.1869, "step": 70638 }, { "epoch": 0.12525055874267163, "grad_norm": 0.99609375, "learning_rate": 0.0004985568994686389, "loss": 0.3766, "step": 70640 }, { "epoch": 0.12525410490798145, "grad_norm": 1.7421875, "learning_rate": 0.0004985126237473479, "loss": 0.1838, "step": 70642 }, { "epoch": 0.12525765107329126, "grad_norm": 0.2412109375, "learning_rate": 0.0004984683506566132, "loss": 0.1944, "step": 70644 }, { "epoch": 0.12526119723860107, "grad_norm": 0.32421875, "learning_rate": 0.0004984240801966287, "loss": 0.1724, "step": 70646 }, { "epoch": 0.1252647434039109, "grad_norm": 0.37890625, "learning_rate": 0.0004983798123675879, "loss": 0.1797, "step": 70648 }, { "epoch": 0.1252682895692207, "grad_norm": 1.1484375, "learning_rate": 0.0004983355471696842, "loss": 0.1676, "step": 70650 }, { "epoch": 0.12527183573453052, "grad_norm": 0.25, "learning_rate": 0.0004982912846031111, "loss": 0.2406, "step": 70652 }, { "epoch": 0.12527538189984033, "grad_norm": 0.2421875, "learning_rate": 0.0004982470246680628, "loss": 0.1879, "step": 70654 }, { "epoch": 0.12527892806515015, "grad_norm": 14.625, "learning_rate": 0.0004982027673647324, "loss": 0.2013, "step": 70656 }, { "epoch": 0.12528247423045996, "grad_norm": 0.546875, "learning_rate": 0.0004981585126933134, "loss": 0.1687, "step": 70658 }, { "epoch": 0.12528602039576978, "grad_norm": 0.2734375, "learning_rate": 0.0004981142606539997, "loss": 0.1997, "step": 70660 }, { "epoch": 0.1252895665610796, "grad_norm": 0.193359375, "learning_rate": 0.0004980700112469841, "loss": 0.1283, "step": 70662 }, { "epoch": 0.1252931127263894, "grad_norm": 0.302734375, "learning_rate": 0.0004980257644724614, "loss": 0.22, "step": 70664 }, { "epoch": 0.12529665889169922, "grad_norm": 0.25390625, "learning_rate": 0.0004979815203306237, "loss": 0.1553, "step": 70666 }, { "epoch": 0.12530020505700903, "grad_norm": 0.431640625, "learning_rate": 0.0004979372788216654, "loss": 0.1795, "step": 70668 }, { "epoch": 0.12530375122231885, "grad_norm": 0.271484375, "learning_rate": 0.0004978930399457798, "loss": 0.1979, "step": 70670 }, { "epoch": 0.12530729738762866, "grad_norm": 19.5, "learning_rate": 0.0004978488037031601, "loss": 0.2542, "step": 70672 }, { "epoch": 0.12531084355293848, "grad_norm": 0.5, "learning_rate": 0.0004978045700940001, "loss": 0.3752, "step": 70674 }, { "epoch": 0.1253143897182483, "grad_norm": 0.94921875, "learning_rate": 0.0004977603391184929, "loss": 0.2494, "step": 70676 }, { "epoch": 0.1253179358835581, "grad_norm": 0.65625, "learning_rate": 0.0004977161107768323, "loss": 0.2149, "step": 70678 }, { "epoch": 0.12532148204886792, "grad_norm": 2.4375, "learning_rate": 0.0004976718850692116, "loss": 0.254, "step": 70680 }, { "epoch": 0.12532502821417774, "grad_norm": 0.31640625, "learning_rate": 0.0004976276619958243, "loss": 0.1157, "step": 70682 }, { "epoch": 0.12532857437948755, "grad_norm": 0.287109375, "learning_rate": 0.0004975834415568633, "loss": 0.1504, "step": 70684 }, { "epoch": 0.12533212054479737, "grad_norm": 0.80078125, "learning_rate": 0.0004975392237525228, "loss": 0.1952, "step": 70686 }, { "epoch": 0.12533566671010718, "grad_norm": 0.54296875, "learning_rate": 0.0004974950085829957, "loss": 0.1807, "step": 70688 }, { "epoch": 0.125339212875417, "grad_norm": 0.2734375, "learning_rate": 0.0004974507960484755, "loss": 0.1743, "step": 70690 }, { "epoch": 0.1253427590407268, "grad_norm": 0.33984375, "learning_rate": 0.0004974065861491555, "loss": 0.1532, "step": 70692 }, { "epoch": 0.12534630520603662, "grad_norm": 0.80078125, "learning_rate": 0.0004973623788852287, "loss": 0.1133, "step": 70694 }, { "epoch": 0.12534985137134647, "grad_norm": 0.5390625, "learning_rate": 0.0004973181742568895, "loss": 0.1994, "step": 70696 }, { "epoch": 0.12535339753665628, "grad_norm": 0.345703125, "learning_rate": 0.0004972739722643299, "loss": 0.1432, "step": 70698 }, { "epoch": 0.1253569437019661, "grad_norm": 0.2734375, "learning_rate": 0.0004972297729077442, "loss": 0.167, "step": 70700 }, { "epoch": 0.1253604898672759, "grad_norm": 0.33984375, "learning_rate": 0.000497185576187325, "loss": 0.2135, "step": 70702 }, { "epoch": 0.12536403603258572, "grad_norm": 1.21875, "learning_rate": 0.0004971413821032664, "loss": 0.1899, "step": 70704 }, { "epoch": 0.12536758219789554, "grad_norm": 0.72265625, "learning_rate": 0.0004970971906557608, "loss": 0.1525, "step": 70706 }, { "epoch": 0.12537112836320535, "grad_norm": 1.796875, "learning_rate": 0.000497053001845002, "loss": 0.3424, "step": 70708 }, { "epoch": 0.12537467452851517, "grad_norm": 0.5, "learning_rate": 0.0004970088156711833, "loss": 0.1924, "step": 70710 }, { "epoch": 0.12537822069382498, "grad_norm": 0.22265625, "learning_rate": 0.0004969646321344978, "loss": 0.1862, "step": 70712 }, { "epoch": 0.1253817668591348, "grad_norm": 0.31640625, "learning_rate": 0.0004969204512351385, "loss": 0.1805, "step": 70714 }, { "epoch": 0.1253853130244446, "grad_norm": 0.4140625, "learning_rate": 0.0004968762729732987, "loss": 0.1563, "step": 70716 }, { "epoch": 0.12538885918975443, "grad_norm": 0.400390625, "learning_rate": 0.0004968320973491721, "loss": 0.1414, "step": 70718 }, { "epoch": 0.12539240535506424, "grad_norm": 0.447265625, "learning_rate": 0.0004967879243629515, "loss": 0.1836, "step": 70720 }, { "epoch": 0.12539595152037405, "grad_norm": 0.43359375, "learning_rate": 0.00049674375401483, "loss": 0.2655, "step": 70722 }, { "epoch": 0.12539949768568387, "grad_norm": 0.265625, "learning_rate": 0.0004966995863050008, "loss": 0.1525, "step": 70724 }, { "epoch": 0.12540304385099368, "grad_norm": 0.25390625, "learning_rate": 0.0004966554212336574, "loss": 0.1447, "step": 70726 }, { "epoch": 0.1254065900163035, "grad_norm": 0.6328125, "learning_rate": 0.0004966112588009926, "loss": 0.1984, "step": 70728 }, { "epoch": 0.1254101361816133, "grad_norm": 0.4375, "learning_rate": 0.0004965670990071998, "loss": 0.1986, "step": 70730 }, { "epoch": 0.12541368234692313, "grad_norm": 0.515625, "learning_rate": 0.000496522941852472, "loss": 0.1715, "step": 70732 }, { "epoch": 0.12541722851223294, "grad_norm": 5.6875, "learning_rate": 0.0004964787873370019, "loss": 0.2386, "step": 70734 }, { "epoch": 0.12542077467754276, "grad_norm": 0.60546875, "learning_rate": 0.0004964346354609834, "loss": 0.1856, "step": 70736 }, { "epoch": 0.12542432084285257, "grad_norm": 0.212890625, "learning_rate": 0.0004963904862246092, "loss": 0.187, "step": 70738 }, { "epoch": 0.12542786700816239, "grad_norm": 8.125, "learning_rate": 0.0004963463396280724, "loss": 0.1999, "step": 70740 }, { "epoch": 0.1254314131734722, "grad_norm": 0.53515625, "learning_rate": 0.0004963021956715658, "loss": 0.2082, "step": 70742 }, { "epoch": 0.12543495933878201, "grad_norm": 0.23828125, "learning_rate": 0.0004962580543552829, "loss": 0.2723, "step": 70744 }, { "epoch": 0.12543850550409183, "grad_norm": 0.25, "learning_rate": 0.0004962139156794166, "loss": 0.1544, "step": 70746 }, { "epoch": 0.12544205166940164, "grad_norm": 0.380859375, "learning_rate": 0.0004961697796441601, "loss": 0.1984, "step": 70748 }, { "epoch": 0.12544559783471146, "grad_norm": 0.38671875, "learning_rate": 0.000496125646249706, "loss": 0.1849, "step": 70750 }, { "epoch": 0.12544914400002127, "grad_norm": 0.48046875, "learning_rate": 0.0004960815154962475, "loss": 0.1276, "step": 70752 }, { "epoch": 0.1254526901653311, "grad_norm": 0.1806640625, "learning_rate": 0.0004960373873839779, "loss": 0.1809, "step": 70754 }, { "epoch": 0.1254562363306409, "grad_norm": 0.57421875, "learning_rate": 0.0004959932619130897, "loss": 0.1636, "step": 70756 }, { "epoch": 0.12545978249595072, "grad_norm": 0.306640625, "learning_rate": 0.0004959491390837763, "loss": 0.1764, "step": 70758 }, { "epoch": 0.12546332866126053, "grad_norm": 0.51953125, "learning_rate": 0.0004959050188962301, "loss": 0.1635, "step": 70760 }, { "epoch": 0.12546687482657035, "grad_norm": 3.796875, "learning_rate": 0.0004958609013506451, "loss": 0.2941, "step": 70762 }, { "epoch": 0.12547042099188016, "grad_norm": 0.427734375, "learning_rate": 0.0004958167864472131, "loss": 0.1773, "step": 70764 }, { "epoch": 0.12547396715718998, "grad_norm": 0.5, "learning_rate": 0.0004957726741861276, "loss": 0.2199, "step": 70766 }, { "epoch": 0.1254775133224998, "grad_norm": 0.3671875, "learning_rate": 0.0004957285645675818, "loss": 0.1704, "step": 70768 }, { "epoch": 0.1254810594878096, "grad_norm": 0.1640625, "learning_rate": 0.0004956844575917679, "loss": 0.1359, "step": 70770 }, { "epoch": 0.12548460565311942, "grad_norm": 0.33984375, "learning_rate": 0.0004956403532588794, "loss": 0.1531, "step": 70772 }, { "epoch": 0.12548815181842923, "grad_norm": 0.30078125, "learning_rate": 0.0004955962515691084, "loss": 0.3227, "step": 70774 }, { "epoch": 0.12549169798373905, "grad_norm": 3.5625, "learning_rate": 0.0004955521525226488, "loss": 0.4194, "step": 70776 }, { "epoch": 0.12549524414904886, "grad_norm": 0.279296875, "learning_rate": 0.000495508056119693, "loss": 0.1396, "step": 70778 }, { "epoch": 0.12549879031435868, "grad_norm": 0.54296875, "learning_rate": 0.0004954639623604337, "loss": 0.1672, "step": 70780 }, { "epoch": 0.1255023364796685, "grad_norm": 0.44140625, "learning_rate": 0.0004954198712450636, "loss": 0.2026, "step": 70782 }, { "epoch": 0.1255058826449783, "grad_norm": 0.408203125, "learning_rate": 0.0004953757827737762, "loss": 0.1633, "step": 70784 }, { "epoch": 0.12550942881028815, "grad_norm": 0.53125, "learning_rate": 0.0004953316969467639, "loss": 0.2904, "step": 70786 }, { "epoch": 0.12551297497559796, "grad_norm": 0.359375, "learning_rate": 0.0004952876137642194, "loss": 0.164, "step": 70788 }, { "epoch": 0.12551652114090778, "grad_norm": 0.44140625, "learning_rate": 0.0004952435332263357, "loss": 0.1971, "step": 70790 }, { "epoch": 0.1255200673062176, "grad_norm": 0.34765625, "learning_rate": 0.0004951994553333052, "loss": 0.1712, "step": 70792 }, { "epoch": 0.1255236134715274, "grad_norm": 1.296875, "learning_rate": 0.0004951553800853217, "loss": 0.1221, "step": 70794 }, { "epoch": 0.12552715963683722, "grad_norm": 0.353515625, "learning_rate": 0.0004951113074825764, "loss": 0.1874, "step": 70796 }, { "epoch": 0.12553070580214704, "grad_norm": 0.59375, "learning_rate": 0.0004950672375252633, "loss": 0.1977, "step": 70798 }, { "epoch": 0.12553425196745685, "grad_norm": 0.3359375, "learning_rate": 0.0004950231702135744, "loss": 0.1704, "step": 70800 }, { "epoch": 0.12553779813276666, "grad_norm": 0.42578125, "learning_rate": 0.0004949791055477033, "loss": 0.1648, "step": 70802 }, { "epoch": 0.12554134429807648, "grad_norm": 0.427734375, "learning_rate": 0.0004949350435278415, "loss": 0.1697, "step": 70804 }, { "epoch": 0.1255448904633863, "grad_norm": 0.5234375, "learning_rate": 0.0004948909841541828, "loss": 0.2566, "step": 70806 }, { "epoch": 0.1255484366286961, "grad_norm": 0.87109375, "learning_rate": 0.0004948469274269194, "loss": 0.1782, "step": 70808 }, { "epoch": 0.12555198279400592, "grad_norm": 0.72265625, "learning_rate": 0.0004948028733462441, "loss": 0.18, "step": 70810 }, { "epoch": 0.12555552895931574, "grad_norm": 0.6875, "learning_rate": 0.0004947588219123494, "loss": 0.1709, "step": 70812 }, { "epoch": 0.12555907512462555, "grad_norm": 0.279296875, "learning_rate": 0.0004947147731254278, "loss": 0.1782, "step": 70814 }, { "epoch": 0.12556262128993537, "grad_norm": 0.138671875, "learning_rate": 0.0004946707269856724, "loss": 0.1549, "step": 70816 }, { "epoch": 0.12556616745524518, "grad_norm": 0.271484375, "learning_rate": 0.0004946266834932754, "loss": 0.1531, "step": 70818 }, { "epoch": 0.125569713620555, "grad_norm": 0.703125, "learning_rate": 0.0004945826426484303, "loss": 0.1726, "step": 70820 }, { "epoch": 0.1255732597858648, "grad_norm": 0.52734375, "learning_rate": 0.0004945386044513283, "loss": 0.2129, "step": 70822 }, { "epoch": 0.12557680595117462, "grad_norm": 1.0234375, "learning_rate": 0.0004944945689021631, "loss": 0.1892, "step": 70824 }, { "epoch": 0.12558035211648444, "grad_norm": 0.453125, "learning_rate": 0.000494450536001127, "loss": 0.1963, "step": 70826 }, { "epoch": 0.12558389828179425, "grad_norm": 0.271484375, "learning_rate": 0.0004944065057484124, "loss": 0.1448, "step": 70828 }, { "epoch": 0.12558744444710407, "grad_norm": 0.79296875, "learning_rate": 0.0004943624781442121, "loss": 0.1441, "step": 70830 }, { "epoch": 0.12559099061241388, "grad_norm": 0.3046875, "learning_rate": 0.0004943184531887182, "loss": 0.1955, "step": 70832 }, { "epoch": 0.1255945367777237, "grad_norm": 1.3984375, "learning_rate": 0.0004942744308821238, "loss": 0.211, "step": 70834 }, { "epoch": 0.1255980829430335, "grad_norm": 0.474609375, "learning_rate": 0.0004942304112246212, "loss": 0.2358, "step": 70836 }, { "epoch": 0.12560162910834333, "grad_norm": 0.609375, "learning_rate": 0.0004941863942164029, "loss": 0.1445, "step": 70838 }, { "epoch": 0.12560517527365314, "grad_norm": 0.396484375, "learning_rate": 0.0004941423798576612, "loss": 0.1821, "step": 70840 }, { "epoch": 0.12560872143896296, "grad_norm": 0.287109375, "learning_rate": 0.000494098368148589, "loss": 0.1936, "step": 70842 }, { "epoch": 0.12561226760427277, "grad_norm": 0.83203125, "learning_rate": 0.0004940543590893786, "loss": 0.1673, "step": 70844 }, { "epoch": 0.12561581376958258, "grad_norm": 0.609375, "learning_rate": 0.0004940103526802225, "loss": 0.1815, "step": 70846 }, { "epoch": 0.1256193599348924, "grad_norm": 2.078125, "learning_rate": 0.0004939663489213132, "loss": 0.177, "step": 70848 }, { "epoch": 0.12562290610020221, "grad_norm": 0.458984375, "learning_rate": 0.0004939223478128426, "loss": 0.1661, "step": 70850 }, { "epoch": 0.12562645226551203, "grad_norm": 0.1396484375, "learning_rate": 0.0004938783493550043, "loss": 0.1784, "step": 70852 }, { "epoch": 0.12562999843082184, "grad_norm": 0.2373046875, "learning_rate": 0.0004938343535479895, "loss": 0.2088, "step": 70854 }, { "epoch": 0.12563354459613166, "grad_norm": 0.875, "learning_rate": 0.0004937903603919912, "loss": 0.2392, "step": 70856 }, { "epoch": 0.12563709076144147, "grad_norm": 0.25390625, "learning_rate": 0.0004937463698872019, "loss": 0.1191, "step": 70858 }, { "epoch": 0.1256406369267513, "grad_norm": 0.2431640625, "learning_rate": 0.0004937023820338137, "loss": 0.2262, "step": 70860 }, { "epoch": 0.1256441830920611, "grad_norm": 1.8359375, "learning_rate": 0.0004936583968320194, "loss": 0.1937, "step": 70862 }, { "epoch": 0.12564772925737092, "grad_norm": 0.275390625, "learning_rate": 0.0004936144142820105, "loss": 0.1622, "step": 70864 }, { "epoch": 0.12565127542268073, "grad_norm": 0.30078125, "learning_rate": 0.0004935704343839802, "loss": 0.1974, "step": 70866 }, { "epoch": 0.12565482158799055, "grad_norm": 0.435546875, "learning_rate": 0.0004935264571381206, "loss": 0.1347, "step": 70868 }, { "epoch": 0.12565836775330036, "grad_norm": 0.490234375, "learning_rate": 0.0004934824825446241, "loss": 0.1603, "step": 70870 }, { "epoch": 0.12566191391861017, "grad_norm": 0.2890625, "learning_rate": 0.0004934385106036825, "loss": 0.1965, "step": 70872 }, { "epoch": 0.12566546008392, "grad_norm": 0.80078125, "learning_rate": 0.000493394541315489, "loss": 0.2247, "step": 70874 }, { "epoch": 0.12566900624922983, "grad_norm": 0.51953125, "learning_rate": 0.0004933505746802353, "loss": 0.1976, "step": 70876 }, { "epoch": 0.12567255241453965, "grad_norm": 0.361328125, "learning_rate": 0.0004933066106981138, "loss": 0.1779, "step": 70878 }, { "epoch": 0.12567609857984946, "grad_norm": 0.515625, "learning_rate": 0.0004932626493693167, "loss": 0.1759, "step": 70880 }, { "epoch": 0.12567964474515927, "grad_norm": 0.96484375, "learning_rate": 0.0004932186906940362, "loss": 0.1877, "step": 70882 }, { "epoch": 0.1256831909104691, "grad_norm": 0.3671875, "learning_rate": 0.0004931747346724652, "loss": 0.1642, "step": 70884 }, { "epoch": 0.1256867370757789, "grad_norm": 0.59765625, "learning_rate": 0.0004931307813047948, "loss": 0.1675, "step": 70886 }, { "epoch": 0.12569028324108872, "grad_norm": 0.73828125, "learning_rate": 0.0004930868305912181, "loss": 0.19, "step": 70888 }, { "epoch": 0.12569382940639853, "grad_norm": 0.39453125, "learning_rate": 0.0004930428825319268, "loss": 0.2325, "step": 70890 }, { "epoch": 0.12569737557170835, "grad_norm": 0.2392578125, "learning_rate": 0.0004929989371271141, "loss": 0.168, "step": 70892 }, { "epoch": 0.12570092173701816, "grad_norm": 0.263671875, "learning_rate": 0.0004929549943769707, "loss": 0.1706, "step": 70894 }, { "epoch": 0.12570446790232798, "grad_norm": 1.796875, "learning_rate": 0.0004929110542816899, "loss": 0.2728, "step": 70896 }, { "epoch": 0.1257080140676378, "grad_norm": 0.8125, "learning_rate": 0.0004928671168414636, "loss": 0.1947, "step": 70898 }, { "epoch": 0.1257115602329476, "grad_norm": 0.3828125, "learning_rate": 0.0004928231820564834, "loss": 0.2006, "step": 70900 }, { "epoch": 0.12571510639825742, "grad_norm": 0.4453125, "learning_rate": 0.0004927792499269427, "loss": 0.2951, "step": 70902 }, { "epoch": 0.12571865256356723, "grad_norm": 0.50390625, "learning_rate": 0.0004927353204530321, "loss": 0.1457, "step": 70904 }, { "epoch": 0.12572219872887705, "grad_norm": 0.498046875, "learning_rate": 0.0004926913936349448, "loss": 0.2133, "step": 70906 }, { "epoch": 0.12572574489418686, "grad_norm": 1.1796875, "learning_rate": 0.0004926474694728723, "loss": 0.2023, "step": 70908 }, { "epoch": 0.12572929105949668, "grad_norm": 0.36328125, "learning_rate": 0.0004926035479670075, "loss": 0.2028, "step": 70910 }, { "epoch": 0.1257328372248065, "grad_norm": 0.25390625, "learning_rate": 0.0004925596291175414, "loss": 0.1785, "step": 70912 }, { "epoch": 0.1257363833901163, "grad_norm": 0.34765625, "learning_rate": 0.000492515712924667, "loss": 0.1759, "step": 70914 }, { "epoch": 0.12573992955542612, "grad_norm": 0.2578125, "learning_rate": 0.0004924717993885759, "loss": 0.2248, "step": 70916 }, { "epoch": 0.12574347572073594, "grad_norm": 0.3671875, "learning_rate": 0.0004924278885094604, "loss": 0.198, "step": 70918 }, { "epoch": 0.12574702188604575, "grad_norm": 0.2353515625, "learning_rate": 0.0004923839802875123, "loss": 0.1448, "step": 70920 }, { "epoch": 0.12575056805135557, "grad_norm": 0.29296875, "learning_rate": 0.0004923400747229235, "loss": 0.2671, "step": 70922 }, { "epoch": 0.12575411421666538, "grad_norm": 0.2236328125, "learning_rate": 0.0004922961718158865, "loss": 0.152, "step": 70924 }, { "epoch": 0.1257576603819752, "grad_norm": 0.45703125, "learning_rate": 0.000492252271566593, "loss": 0.1868, "step": 70926 }, { "epoch": 0.125761206547285, "grad_norm": 0.64453125, "learning_rate": 0.0004922083739752351, "loss": 0.1595, "step": 70928 }, { "epoch": 0.12576475271259482, "grad_norm": 0.5234375, "learning_rate": 0.0004921644790420043, "loss": 0.1896, "step": 70930 }, { "epoch": 0.12576829887790464, "grad_norm": 0.46484375, "learning_rate": 0.0004921205867670934, "loss": 0.204, "step": 70932 }, { "epoch": 0.12577184504321445, "grad_norm": 0.390625, "learning_rate": 0.000492076697150694, "loss": 0.1843, "step": 70934 }, { "epoch": 0.12577539120852427, "grad_norm": 0.55859375, "learning_rate": 0.0004920328101929979, "loss": 0.2658, "step": 70936 }, { "epoch": 0.12577893737383408, "grad_norm": 0.609375, "learning_rate": 0.0004919889258941972, "loss": 0.2635, "step": 70938 }, { "epoch": 0.1257824835391439, "grad_norm": 0.263671875, "learning_rate": 0.0004919450442544836, "loss": 0.1775, "step": 70940 }, { "epoch": 0.1257860297044537, "grad_norm": 0.5703125, "learning_rate": 0.0004919011652740495, "loss": 0.1744, "step": 70942 }, { "epoch": 0.12578957586976353, "grad_norm": 0.375, "learning_rate": 0.000491857288953086, "loss": 0.1475, "step": 70944 }, { "epoch": 0.12579312203507334, "grad_norm": 0.310546875, "learning_rate": 0.0004918134152917859, "loss": 0.1863, "step": 70946 }, { "epoch": 0.12579666820038315, "grad_norm": 0.236328125, "learning_rate": 0.0004917695442903401, "loss": 0.1868, "step": 70948 }, { "epoch": 0.12580021436569297, "grad_norm": 0.302734375, "learning_rate": 0.0004917256759489417, "loss": 0.1898, "step": 70950 }, { "epoch": 0.12580376053100278, "grad_norm": 0.52734375, "learning_rate": 0.0004916818102677814, "loss": 0.1592, "step": 70952 }, { "epoch": 0.1258073066963126, "grad_norm": 0.58984375, "learning_rate": 0.0004916379472470516, "loss": 0.1857, "step": 70954 }, { "epoch": 0.1258108528616224, "grad_norm": 1.7265625, "learning_rate": 0.0004915940868869442, "loss": 0.1602, "step": 70956 }, { "epoch": 0.12581439902693223, "grad_norm": 0.6171875, "learning_rate": 0.0004915502291876508, "loss": 0.184, "step": 70958 }, { "epoch": 0.12581794519224204, "grad_norm": 0.380859375, "learning_rate": 0.0004915063741493632, "loss": 0.154, "step": 70960 }, { "epoch": 0.12582149135755186, "grad_norm": 0.400390625, "learning_rate": 0.0004914625217722731, "loss": 0.2049, "step": 70962 }, { "epoch": 0.12582503752286167, "grad_norm": 0.8828125, "learning_rate": 0.0004914186720565727, "loss": 0.2981, "step": 70964 }, { "epoch": 0.12582858368817149, "grad_norm": 1.421875, "learning_rate": 0.0004913748250024534, "loss": 0.3061, "step": 70966 }, { "epoch": 0.12583212985348133, "grad_norm": 0.62890625, "learning_rate": 0.0004913309806101073, "loss": 0.1773, "step": 70968 }, { "epoch": 0.12583567601879114, "grad_norm": 0.859375, "learning_rate": 0.0004912871388797255, "loss": 0.1848, "step": 70970 }, { "epoch": 0.12583922218410096, "grad_norm": 1.7421875, "learning_rate": 0.0004912432998115005, "loss": 0.2052, "step": 70972 }, { "epoch": 0.12584276834941077, "grad_norm": 0.2216796875, "learning_rate": 0.0004911994634056238, "loss": 0.2302, "step": 70974 }, { "epoch": 0.1258463145147206, "grad_norm": 0.3515625, "learning_rate": 0.0004911556296622869, "loss": 0.369, "step": 70976 }, { "epoch": 0.1258498606800304, "grad_norm": 0.201171875, "learning_rate": 0.0004911117985816816, "loss": 0.163, "step": 70978 }, { "epoch": 0.12585340684534022, "grad_norm": 0.41796875, "learning_rate": 0.0004910679701639994, "loss": 0.1509, "step": 70980 }, { "epoch": 0.12585695301065003, "grad_norm": 0.353515625, "learning_rate": 0.0004910241444094323, "loss": 0.1619, "step": 70982 }, { "epoch": 0.12586049917595984, "grad_norm": 0.6640625, "learning_rate": 0.000490980321318172, "loss": 0.1855, "step": 70984 }, { "epoch": 0.12586404534126966, "grad_norm": 0.283203125, "learning_rate": 0.00049093650089041, "loss": 0.1917, "step": 70986 }, { "epoch": 0.12586759150657947, "grad_norm": 1.1015625, "learning_rate": 0.0004908926831263377, "loss": 0.1568, "step": 70988 }, { "epoch": 0.1258711376718893, "grad_norm": 1.34375, "learning_rate": 0.0004908488680261472, "loss": 0.4611, "step": 70990 }, { "epoch": 0.1258746838371991, "grad_norm": 0.53125, "learning_rate": 0.00049080505559003, "loss": 0.141, "step": 70992 }, { "epoch": 0.12587823000250892, "grad_norm": 0.2236328125, "learning_rate": 0.0004907612458181775, "loss": 0.1492, "step": 70994 }, { "epoch": 0.12588177616781873, "grad_norm": 0.1884765625, "learning_rate": 0.0004907174387107816, "loss": 0.1189, "step": 70996 }, { "epoch": 0.12588532233312855, "grad_norm": 0.56640625, "learning_rate": 0.0004906736342680333, "loss": 0.1651, "step": 70998 }, { "epoch": 0.12588886849843836, "grad_norm": 0.53515625, "learning_rate": 0.0004906298324901251, "loss": 0.1649, "step": 71000 }, { "epoch": 0.12589241466374818, "grad_norm": 0.1513671875, "learning_rate": 0.0004905860333772476, "loss": 0.1395, "step": 71002 }, { "epoch": 0.125895960829058, "grad_norm": 0.58203125, "learning_rate": 0.0004905422369295931, "loss": 0.2439, "step": 71004 }, { "epoch": 0.1258995069943678, "grad_norm": 0.58203125, "learning_rate": 0.0004904984431473525, "loss": 0.1684, "step": 71006 }, { "epoch": 0.12590305315967762, "grad_norm": 1.3046875, "learning_rate": 0.0004904546520307182, "loss": 0.1444, "step": 71008 }, { "epoch": 0.12590659932498743, "grad_norm": 0.8515625, "learning_rate": 0.0004904108635798807, "loss": 0.1942, "step": 71010 }, { "epoch": 0.12591014549029725, "grad_norm": 0.279296875, "learning_rate": 0.0004903670777950322, "loss": 0.2014, "step": 71012 }, { "epoch": 0.12591369165560706, "grad_norm": 0.265625, "learning_rate": 0.000490323294676364, "loss": 0.1713, "step": 71014 }, { "epoch": 0.12591723782091688, "grad_norm": 0.490234375, "learning_rate": 0.0004902795142240677, "loss": 0.2247, "step": 71016 }, { "epoch": 0.1259207839862267, "grad_norm": 0.6875, "learning_rate": 0.0004902357364383345, "loss": 0.1713, "step": 71018 }, { "epoch": 0.1259243301515365, "grad_norm": 0.375, "learning_rate": 0.0004901919613193557, "loss": 0.1823, "step": 71020 }, { "epoch": 0.12592787631684632, "grad_norm": 0.43359375, "learning_rate": 0.0004901481888673234, "loss": 0.173, "step": 71022 }, { "epoch": 0.12593142248215614, "grad_norm": 0.29296875, "learning_rate": 0.0004901044190824286, "loss": 0.1692, "step": 71024 }, { "epoch": 0.12593496864746595, "grad_norm": 0.375, "learning_rate": 0.0004900606519648628, "loss": 0.2019, "step": 71026 }, { "epoch": 0.12593851481277576, "grad_norm": 0.416015625, "learning_rate": 0.0004900168875148174, "loss": 0.1647, "step": 71028 }, { "epoch": 0.12594206097808558, "grad_norm": 1.59375, "learning_rate": 0.0004899731257324837, "loss": 0.1984, "step": 71030 }, { "epoch": 0.1259456071433954, "grad_norm": 1.1484375, "learning_rate": 0.0004899293666180537, "loss": 0.2219, "step": 71032 }, { "epoch": 0.1259491533087052, "grad_norm": 0.96875, "learning_rate": 0.0004898856101717176, "loss": 0.2055, "step": 71034 }, { "epoch": 0.12595269947401502, "grad_norm": 0.66796875, "learning_rate": 0.0004898418563936678, "loss": 0.2307, "step": 71036 }, { "epoch": 0.12595624563932484, "grad_norm": 0.2392578125, "learning_rate": 0.000489798105284095, "loss": 0.1687, "step": 71038 }, { "epoch": 0.12595979180463465, "grad_norm": 0.6328125, "learning_rate": 0.0004897543568431914, "loss": 0.1683, "step": 71040 }, { "epoch": 0.12596333796994447, "grad_norm": 0.5, "learning_rate": 0.0004897106110711472, "loss": 0.1729, "step": 71042 }, { "epoch": 0.12596688413525428, "grad_norm": 0.390625, "learning_rate": 0.0004896668679681546, "loss": 0.2254, "step": 71044 }, { "epoch": 0.1259704303005641, "grad_norm": 0.236328125, "learning_rate": 0.0004896231275344046, "loss": 0.1878, "step": 71046 }, { "epoch": 0.1259739764658739, "grad_norm": 0.3984375, "learning_rate": 0.0004895793897700885, "loss": 0.1448, "step": 71048 }, { "epoch": 0.12597752263118372, "grad_norm": 0.4765625, "learning_rate": 0.0004895356546753976, "loss": 0.1477, "step": 71050 }, { "epoch": 0.12598106879649354, "grad_norm": 0.3515625, "learning_rate": 0.0004894919222505227, "loss": 0.2033, "step": 71052 }, { "epoch": 0.12598461496180335, "grad_norm": 0.71875, "learning_rate": 0.000489448192495656, "loss": 0.1547, "step": 71054 }, { "epoch": 0.12598816112711317, "grad_norm": 0.97265625, "learning_rate": 0.0004894044654109881, "loss": 0.1815, "step": 71056 }, { "epoch": 0.125991707292423, "grad_norm": 0.279296875, "learning_rate": 0.0004893607409967105, "loss": 0.1248, "step": 71058 }, { "epoch": 0.12599525345773283, "grad_norm": 0.236328125, "learning_rate": 0.000489317019253014, "loss": 0.3829, "step": 71060 }, { "epoch": 0.12599879962304264, "grad_norm": 1.359375, "learning_rate": 0.0004892733001800903, "loss": 0.1908, "step": 71062 }, { "epoch": 0.12600234578835245, "grad_norm": 0.58984375, "learning_rate": 0.0004892295837781306, "loss": 0.1839, "step": 71064 }, { "epoch": 0.12600589195366227, "grad_norm": 0.3203125, "learning_rate": 0.0004891858700473258, "loss": 0.2277, "step": 71066 }, { "epoch": 0.12600943811897208, "grad_norm": 0.9765625, "learning_rate": 0.0004891421589878674, "loss": 0.1684, "step": 71068 }, { "epoch": 0.1260129842842819, "grad_norm": 0.51171875, "learning_rate": 0.0004890984505999458, "loss": 0.1194, "step": 71070 }, { "epoch": 0.1260165304495917, "grad_norm": 2.203125, "learning_rate": 0.0004890547448837531, "loss": 0.2496, "step": 71072 }, { "epoch": 0.12602007661490153, "grad_norm": 0.3125, "learning_rate": 0.0004890110418394801, "loss": 0.1704, "step": 71074 }, { "epoch": 0.12602362278021134, "grad_norm": 0.259765625, "learning_rate": 0.000488967341467318, "loss": 0.245, "step": 71076 }, { "epoch": 0.12602716894552116, "grad_norm": 0.71484375, "learning_rate": 0.0004889236437674575, "loss": 0.1816, "step": 71078 }, { "epoch": 0.12603071511083097, "grad_norm": 0.828125, "learning_rate": 0.0004888799487400902, "loss": 0.2125, "step": 71080 }, { "epoch": 0.12603426127614079, "grad_norm": 1.546875, "learning_rate": 0.0004888362563854071, "loss": 0.4848, "step": 71082 }, { "epoch": 0.1260378074414506, "grad_norm": 0.390625, "learning_rate": 0.0004887925667035992, "loss": 0.1609, "step": 71084 }, { "epoch": 0.12604135360676041, "grad_norm": 0.46484375, "learning_rate": 0.0004887488796948575, "loss": 0.1973, "step": 71086 }, { "epoch": 0.12604489977207023, "grad_norm": 0.80078125, "learning_rate": 0.000488705195359373, "loss": 0.2543, "step": 71088 }, { "epoch": 0.12604844593738004, "grad_norm": 0.318359375, "learning_rate": 0.0004886615136973374, "loss": 0.1628, "step": 71090 }, { "epoch": 0.12605199210268986, "grad_norm": 0.37890625, "learning_rate": 0.0004886178347089407, "loss": 0.1465, "step": 71092 }, { "epoch": 0.12605553826799967, "grad_norm": 0.51953125, "learning_rate": 0.0004885741583943747, "loss": 0.1821, "step": 71094 }, { "epoch": 0.1260590844333095, "grad_norm": 0.6015625, "learning_rate": 0.0004885304847538298, "loss": 0.2267, "step": 71096 }, { "epoch": 0.1260626305986193, "grad_norm": 0.255859375, "learning_rate": 0.000488486813787498, "loss": 0.2336, "step": 71098 }, { "epoch": 0.12606617676392912, "grad_norm": 0.38671875, "learning_rate": 0.0004884431454955692, "loss": 0.1808, "step": 71100 }, { "epoch": 0.12606972292923893, "grad_norm": 0.83203125, "learning_rate": 0.0004883994798782351, "loss": 0.2289, "step": 71102 }, { "epoch": 0.12607326909454875, "grad_norm": 0.28515625, "learning_rate": 0.0004883558169356864, "loss": 0.1823, "step": 71104 }, { "epoch": 0.12607681525985856, "grad_norm": 0.2578125, "learning_rate": 0.000488312156668114, "loss": 0.1467, "step": 71106 }, { "epoch": 0.12608036142516837, "grad_norm": 1.1328125, "learning_rate": 0.000488268499075709, "loss": 0.1513, "step": 71108 }, { "epoch": 0.1260839075904782, "grad_norm": 0.318359375, "learning_rate": 0.0004882248441586619, "loss": 0.1889, "step": 71110 }, { "epoch": 0.126087453755788, "grad_norm": 0.70703125, "learning_rate": 0.0004881811919171643, "loss": 0.4851, "step": 71112 }, { "epoch": 0.12609099992109782, "grad_norm": 0.56640625, "learning_rate": 0.00048813754235140676, "loss": 0.2199, "step": 71114 }, { "epoch": 0.12609454608640763, "grad_norm": 1.7109375, "learning_rate": 0.00048809389546158015, "loss": 0.2567, "step": 71116 }, { "epoch": 0.12609809225171745, "grad_norm": 0.60546875, "learning_rate": 0.0004880502512478752, "loss": 0.1885, "step": 71118 }, { "epoch": 0.12610163841702726, "grad_norm": 0.2412109375, "learning_rate": 0.0004880066097104833, "loss": 0.1646, "step": 71120 }, { "epoch": 0.12610518458233708, "grad_norm": 0.83984375, "learning_rate": 0.00048796297084959493, "loss": 0.1703, "step": 71122 }, { "epoch": 0.1261087307476469, "grad_norm": 0.271484375, "learning_rate": 0.00048791933466540104, "loss": 0.3439, "step": 71124 }, { "epoch": 0.1261122769129567, "grad_norm": 0.357421875, "learning_rate": 0.00048787570115809244, "loss": 0.1958, "step": 71126 }, { "epoch": 0.12611582307826652, "grad_norm": 0.30078125, "learning_rate": 0.00048783207032785964, "loss": 0.1999, "step": 71128 }, { "epoch": 0.12611936924357633, "grad_norm": 0.4921875, "learning_rate": 0.00048778844217489437, "loss": 0.1317, "step": 71130 }, { "epoch": 0.12612291540888615, "grad_norm": 0.578125, "learning_rate": 0.0004877448166993863, "loss": 0.2015, "step": 71132 }, { "epoch": 0.12612646157419596, "grad_norm": 0.220703125, "learning_rate": 0.00048770119390152694, "loss": 0.1352, "step": 71134 }, { "epoch": 0.12613000773950578, "grad_norm": 6.09375, "learning_rate": 0.00048765757378150675, "loss": 0.2059, "step": 71136 }, { "epoch": 0.1261335539048156, "grad_norm": 0.29296875, "learning_rate": 0.0004876139563395171, "loss": 0.1617, "step": 71138 }, { "epoch": 0.1261371000701254, "grad_norm": 0.37890625, "learning_rate": 0.00048757034157574777, "loss": 0.1429, "step": 71140 }, { "epoch": 0.12614064623543522, "grad_norm": 1.109375, "learning_rate": 0.0004875267294903904, "loss": 0.1982, "step": 71142 }, { "epoch": 0.12614419240074504, "grad_norm": 0.39453125, "learning_rate": 0.0004874831200836352, "loss": 0.1204, "step": 71144 }, { "epoch": 0.12614773856605485, "grad_norm": 0.46484375, "learning_rate": 0.0004874395133556731, "loss": 0.1829, "step": 71146 }, { "epoch": 0.1261512847313647, "grad_norm": 1.453125, "learning_rate": 0.0004873959093066947, "loss": 0.1842, "step": 71148 }, { "epoch": 0.1261548308966745, "grad_norm": 0.1689453125, "learning_rate": 0.0004873523079368905, "loss": 0.1807, "step": 71150 }, { "epoch": 0.12615837706198432, "grad_norm": 0.451171875, "learning_rate": 0.0004873087092464517, "loss": 0.3386, "step": 71152 }, { "epoch": 0.12616192322729414, "grad_norm": 0.36328125, "learning_rate": 0.0004872651132355685, "loss": 0.1619, "step": 71154 }, { "epoch": 0.12616546939260395, "grad_norm": 0.34765625, "learning_rate": 0.0004872215199044323, "loss": 0.2218, "step": 71156 }, { "epoch": 0.12616901555791377, "grad_norm": 0.390625, "learning_rate": 0.00048717792925323267, "loss": 0.1567, "step": 71158 }, { "epoch": 0.12617256172322358, "grad_norm": 0.220703125, "learning_rate": 0.0004871343412821611, "loss": 0.1968, "step": 71160 }, { "epoch": 0.1261761078885334, "grad_norm": 1.2421875, "learning_rate": 0.0004870907559914079, "loss": 0.4381, "step": 71162 }, { "epoch": 0.1261796540538432, "grad_norm": 0.373046875, "learning_rate": 0.00048704717338116376, "loss": 0.1788, "step": 71164 }, { "epoch": 0.12618320021915302, "grad_norm": 0.384765625, "learning_rate": 0.0004870035934516192, "loss": 0.2037, "step": 71166 }, { "epoch": 0.12618674638446284, "grad_norm": 0.46875, "learning_rate": 0.0004869600162029645, "loss": 0.2172, "step": 71168 }, { "epoch": 0.12619029254977265, "grad_norm": 1.2890625, "learning_rate": 0.00048691644163539096, "loss": 0.2249, "step": 71170 }, { "epoch": 0.12619383871508247, "grad_norm": 0.423828125, "learning_rate": 0.00048687286974908873, "loss": 0.1624, "step": 71172 }, { "epoch": 0.12619738488039228, "grad_norm": 0.400390625, "learning_rate": 0.0004868293005442485, "loss": 0.2058, "step": 71174 }, { "epoch": 0.1262009310457021, "grad_norm": 0.40625, "learning_rate": 0.00048678573402106045, "loss": 0.1878, "step": 71176 }, { "epoch": 0.1262044772110119, "grad_norm": 0.59375, "learning_rate": 0.00048674217017971567, "loss": 0.2372, "step": 71178 }, { "epoch": 0.12620802337632173, "grad_norm": 0.16015625, "learning_rate": 0.00048669860902040435, "loss": 0.1546, "step": 71180 }, { "epoch": 0.12621156954163154, "grad_norm": 0.255859375, "learning_rate": 0.0004866550505433171, "loss": 0.203, "step": 71182 }, { "epoch": 0.12621511570694136, "grad_norm": 0.6015625, "learning_rate": 0.0004866114947486444, "loss": 0.1979, "step": 71184 }, { "epoch": 0.12621866187225117, "grad_norm": 1.3828125, "learning_rate": 0.0004865679416365764, "loss": 0.3838, "step": 71186 }, { "epoch": 0.12622220803756098, "grad_norm": 0.96484375, "learning_rate": 0.00048652439120730457, "loss": 0.1712, "step": 71188 }, { "epoch": 0.1262257542028708, "grad_norm": 0.5390625, "learning_rate": 0.00048648084346101806, "loss": 0.2037, "step": 71190 }, { "epoch": 0.1262293003681806, "grad_norm": 0.275390625, "learning_rate": 0.0004864372983979082, "loss": 0.3841, "step": 71192 }, { "epoch": 0.12623284653349043, "grad_norm": 0.380859375, "learning_rate": 0.00048639375601816507, "loss": 0.1413, "step": 71194 }, { "epoch": 0.12623639269880024, "grad_norm": 0.478515625, "learning_rate": 0.0004863502163219797, "loss": 0.1769, "step": 71196 }, { "epoch": 0.12623993886411006, "grad_norm": 1.09375, "learning_rate": 0.00048630667930954144, "loss": 0.1674, "step": 71198 }, { "epoch": 0.12624348502941987, "grad_norm": 0.234375, "learning_rate": 0.00048626314498104157, "loss": 0.1645, "step": 71200 }, { "epoch": 0.1262470311947297, "grad_norm": 0.64453125, "learning_rate": 0.00048621961333667027, "loss": 0.1725, "step": 71202 }, { "epoch": 0.1262505773600395, "grad_norm": 0.8125, "learning_rate": 0.00048617608437661784, "loss": 0.2492, "step": 71204 }, { "epoch": 0.12625412352534932, "grad_norm": 0.259765625, "learning_rate": 0.0004861325581010746, "loss": 0.1769, "step": 71206 }, { "epoch": 0.12625766969065913, "grad_norm": 0.3125, "learning_rate": 0.0004860890345102307, "loss": 0.251, "step": 71208 }, { "epoch": 0.12626121585596894, "grad_norm": 0.421875, "learning_rate": 0.00048604551360427705, "loss": 0.16, "step": 71210 }, { "epoch": 0.12626476202127876, "grad_norm": 0.296875, "learning_rate": 0.0004860019953834038, "loss": 0.1384, "step": 71212 }, { "epoch": 0.12626830818658857, "grad_norm": 0.423828125, "learning_rate": 0.0004859584798478011, "loss": 0.194, "step": 71214 }, { "epoch": 0.1262718543518984, "grad_norm": 0.32421875, "learning_rate": 0.00048591496699765924, "loss": 0.2204, "step": 71216 }, { "epoch": 0.1262754005172082, "grad_norm": 0.361328125, "learning_rate": 0.00048587145683316843, "loss": 0.1917, "step": 71218 }, { "epoch": 0.12627894668251802, "grad_norm": 0.55078125, "learning_rate": 0.00048582794935451973, "loss": 0.2016, "step": 71220 }, { "epoch": 0.12628249284782783, "grad_norm": 0.259765625, "learning_rate": 0.0004857844445619021, "loss": 0.1857, "step": 71222 }, { "epoch": 0.12628603901313765, "grad_norm": 0.359375, "learning_rate": 0.000485740942455507, "loss": 0.1756, "step": 71224 }, { "epoch": 0.12628958517844746, "grad_norm": 0.5546875, "learning_rate": 0.0004856974430355239, "loss": 0.1708, "step": 71226 }, { "epoch": 0.12629313134375728, "grad_norm": 0.84765625, "learning_rate": 0.0004856539463021438, "loss": 0.2125, "step": 71228 }, { "epoch": 0.1262966775090671, "grad_norm": 0.2080078125, "learning_rate": 0.000485610452255556, "loss": 0.1551, "step": 71230 }, { "epoch": 0.1263002236743769, "grad_norm": 0.9140625, "learning_rate": 0.00048556696089595156, "loss": 0.3229, "step": 71232 }, { "epoch": 0.12630376983968672, "grad_norm": 0.6171875, "learning_rate": 0.00048552347222352024, "loss": 0.191, "step": 71234 }, { "epoch": 0.12630731600499653, "grad_norm": 0.384765625, "learning_rate": 0.0004854799862384521, "loss": 0.1671, "step": 71236 }, { "epoch": 0.12631086217030635, "grad_norm": 0.1611328125, "learning_rate": 0.000485436502940938, "loss": 0.1333, "step": 71238 }, { "epoch": 0.1263144083356162, "grad_norm": 0.23046875, "learning_rate": 0.0004853930223311671, "loss": 0.1641, "step": 71240 }, { "epoch": 0.126317954500926, "grad_norm": 0.6328125, "learning_rate": 0.00048534954440933054, "loss": 0.2395, "step": 71242 }, { "epoch": 0.12632150066623582, "grad_norm": 0.294921875, "learning_rate": 0.00048530606917561767, "loss": 0.1823, "step": 71244 }, { "epoch": 0.12632504683154563, "grad_norm": 0.5234375, "learning_rate": 0.00048526259663021954, "loss": 0.1969, "step": 71246 }, { "epoch": 0.12632859299685545, "grad_norm": 0.6171875, "learning_rate": 0.0004852191267733252, "loss": 0.1669, "step": 71248 }, { "epoch": 0.12633213916216526, "grad_norm": 0.7890625, "learning_rate": 0.00048517565960512547, "loss": 0.1565, "step": 71250 }, { "epoch": 0.12633568532747508, "grad_norm": 0.80078125, "learning_rate": 0.0004851321951258104, "loss": 0.5296, "step": 71252 }, { "epoch": 0.1263392314927849, "grad_norm": 0.56640625, "learning_rate": 0.00048508873333556995, "loss": 0.1955, "step": 71254 }, { "epoch": 0.1263427776580947, "grad_norm": 0.27734375, "learning_rate": 0.00048504527423459416, "loss": 0.1854, "step": 71256 }, { "epoch": 0.12634632382340452, "grad_norm": 0.6015625, "learning_rate": 0.0004850018178230728, "loss": 0.1367, "step": 71258 }, { "epoch": 0.12634986998871434, "grad_norm": 0.478515625, "learning_rate": 0.00048495836410119664, "loss": 0.1415, "step": 71260 }, { "epoch": 0.12635341615402415, "grad_norm": 0.96875, "learning_rate": 0.00048491491306915525, "loss": 0.1692, "step": 71262 }, { "epoch": 0.12635696231933397, "grad_norm": 0.361328125, "learning_rate": 0.0004848714647271388, "loss": 0.1428, "step": 71264 }, { "epoch": 0.12636050848464378, "grad_norm": 0.828125, "learning_rate": 0.00048482801907533693, "loss": 0.1519, "step": 71266 }, { "epoch": 0.1263640546499536, "grad_norm": 1.0390625, "learning_rate": 0.0004847845761139403, "loss": 0.1799, "step": 71268 }, { "epoch": 0.1263676008152634, "grad_norm": 0.58203125, "learning_rate": 0.00048474113584313856, "loss": 0.1379, "step": 71270 }, { "epoch": 0.12637114698057322, "grad_norm": 0.421875, "learning_rate": 0.00048469769826312166, "loss": 0.1995, "step": 71272 }, { "epoch": 0.12637469314588304, "grad_norm": 0.625, "learning_rate": 0.0004846542633740797, "loss": 0.1779, "step": 71274 }, { "epoch": 0.12637823931119285, "grad_norm": 1.84375, "learning_rate": 0.00048461083117620226, "loss": 0.3811, "step": 71276 }, { "epoch": 0.12638178547650267, "grad_norm": 0.796875, "learning_rate": 0.0004845674016696801, "loss": 0.1878, "step": 71278 }, { "epoch": 0.12638533164181248, "grad_norm": 0.8828125, "learning_rate": 0.00048452397485470206, "loss": 0.2141, "step": 71280 }, { "epoch": 0.1263888778071223, "grad_norm": 0.333984375, "learning_rate": 0.000484480550731459, "loss": 0.2397, "step": 71282 }, { "epoch": 0.1263924239724321, "grad_norm": 0.228515625, "learning_rate": 0.0004844371293001401, "loss": 0.3164, "step": 71284 }, { "epoch": 0.12639597013774193, "grad_norm": 0.451171875, "learning_rate": 0.0004843937105609362, "loss": 0.172, "step": 71286 }, { "epoch": 0.12639951630305174, "grad_norm": 0.349609375, "learning_rate": 0.00048435029451403613, "loss": 0.1727, "step": 71288 }, { "epoch": 0.12640306246836155, "grad_norm": 0.23828125, "learning_rate": 0.0004843068811596305, "loss": 0.1376, "step": 71290 }, { "epoch": 0.12640660863367137, "grad_norm": 0.443359375, "learning_rate": 0.00048426347049790905, "loss": 0.1301, "step": 71292 }, { "epoch": 0.12641015479898118, "grad_norm": 0.310546875, "learning_rate": 0.0004842200625290613, "loss": 0.1003, "step": 71294 }, { "epoch": 0.126413700964291, "grad_norm": 0.48828125, "learning_rate": 0.0004841766572532775, "loss": 0.1392, "step": 71296 }, { "epoch": 0.1264172471296008, "grad_norm": 0.34375, "learning_rate": 0.00048413325467074693, "loss": 0.1741, "step": 71298 }, { "epoch": 0.12642079329491063, "grad_norm": 0.8671875, "learning_rate": 0.0004840898547816601, "loss": 0.1551, "step": 71300 }, { "epoch": 0.12642433946022044, "grad_norm": 1.4921875, "learning_rate": 0.0004840464575862064, "loss": 0.4949, "step": 71302 }, { "epoch": 0.12642788562553026, "grad_norm": 0.61328125, "learning_rate": 0.00048400306308457575, "loss": 0.2144, "step": 71304 }, { "epoch": 0.12643143179084007, "grad_norm": 1.609375, "learning_rate": 0.00048395967127695764, "loss": 0.2956, "step": 71306 }, { "epoch": 0.12643497795614989, "grad_norm": 0.326171875, "learning_rate": 0.00048391628216354224, "loss": 0.21, "step": 71308 }, { "epoch": 0.1264385241214597, "grad_norm": 0.357421875, "learning_rate": 0.00048387289574451924, "loss": 0.1179, "step": 71310 }, { "epoch": 0.12644207028676951, "grad_norm": 1.0703125, "learning_rate": 0.0004838295120200784, "loss": 0.2697, "step": 71312 }, { "epoch": 0.12644561645207933, "grad_norm": 0.435546875, "learning_rate": 0.0004837861309904093, "loss": 0.1503, "step": 71314 }, { "epoch": 0.12644916261738914, "grad_norm": 0.16796875, "learning_rate": 0.00048374275265570133, "loss": 0.137, "step": 71316 }, { "epoch": 0.12645270878269896, "grad_norm": 1.3203125, "learning_rate": 0.00048369937701614495, "loss": 0.3947, "step": 71318 }, { "epoch": 0.12645625494800877, "grad_norm": 0.41796875, "learning_rate": 0.0004836560040719296, "loss": 0.1491, "step": 71320 }, { "epoch": 0.1264598011133186, "grad_norm": 0.453125, "learning_rate": 0.0004836126338232447, "loss": 0.1655, "step": 71322 }, { "epoch": 0.1264633472786284, "grad_norm": 0.69140625, "learning_rate": 0.00048356926627027996, "loss": 0.2022, "step": 71324 }, { "epoch": 0.12646689344393822, "grad_norm": 0.33203125, "learning_rate": 0.00048352590141322537, "loss": 0.1414, "step": 71326 }, { "epoch": 0.12647043960924803, "grad_norm": 0.302734375, "learning_rate": 0.00048348253925227047, "loss": 0.2299, "step": 71328 }, { "epoch": 0.12647398577455787, "grad_norm": 0.48828125, "learning_rate": 0.0004834391797876048, "loss": 0.1815, "step": 71330 }, { "epoch": 0.1264775319398677, "grad_norm": 0.671875, "learning_rate": 0.00048339582301941804, "loss": 0.2265, "step": 71332 }, { "epoch": 0.1264810781051775, "grad_norm": 0.2421875, "learning_rate": 0.00048335246894789944, "loss": 0.181, "step": 71334 }, { "epoch": 0.12648462427048732, "grad_norm": 0.322265625, "learning_rate": 0.00048330911757323957, "loss": 0.1795, "step": 71336 }, { "epoch": 0.12648817043579713, "grad_norm": 0.333984375, "learning_rate": 0.0004832657688956268, "loss": 0.1787, "step": 71338 }, { "epoch": 0.12649171660110695, "grad_norm": 0.578125, "learning_rate": 0.0004832224229152516, "loss": 0.1857, "step": 71340 }, { "epoch": 0.12649526276641676, "grad_norm": 0.2099609375, "learning_rate": 0.000483179079632303, "loss": 0.1806, "step": 71342 }, { "epoch": 0.12649880893172658, "grad_norm": 0.46875, "learning_rate": 0.0004831357390469714, "loss": 0.1618, "step": 71344 }, { "epoch": 0.1265023550970364, "grad_norm": 0.4453125, "learning_rate": 0.0004830924011594452, "loss": 0.1572, "step": 71346 }, { "epoch": 0.1265059012623462, "grad_norm": 0.61328125, "learning_rate": 0.00048304906596991463, "loss": 0.1316, "step": 71348 }, { "epoch": 0.12650944742765602, "grad_norm": 0.23828125, "learning_rate": 0.0004830057334785692, "loss": 0.1354, "step": 71350 }, { "epoch": 0.12651299359296583, "grad_norm": 0.5859375, "learning_rate": 0.00048296240368559827, "loss": 0.2376, "step": 71352 }, { "epoch": 0.12651653975827565, "grad_norm": 0.423828125, "learning_rate": 0.00048291907659119145, "loss": 0.1428, "step": 71354 }, { "epoch": 0.12652008592358546, "grad_norm": 0.380859375, "learning_rate": 0.00048287575219553775, "loss": 0.1556, "step": 71356 }, { "epoch": 0.12652363208889528, "grad_norm": 0.22265625, "learning_rate": 0.00048283243049882744, "loss": 0.1496, "step": 71358 }, { "epoch": 0.1265271782542051, "grad_norm": 0.28125, "learning_rate": 0.00048278911150124944, "loss": 0.2459, "step": 71360 }, { "epoch": 0.1265307244195149, "grad_norm": 0.455078125, "learning_rate": 0.0004827457952029935, "loss": 0.2401, "step": 71362 }, { "epoch": 0.12653427058482472, "grad_norm": 0.296875, "learning_rate": 0.0004827024816042485, "loss": 0.2386, "step": 71364 }, { "epoch": 0.12653781675013454, "grad_norm": 0.32421875, "learning_rate": 0.00048265917070520467, "loss": 0.3965, "step": 71366 }, { "epoch": 0.12654136291544435, "grad_norm": 0.62109375, "learning_rate": 0.00048261586250605094, "loss": 0.1895, "step": 71368 }, { "epoch": 0.12654490908075416, "grad_norm": 0.328125, "learning_rate": 0.00048257255700697685, "loss": 0.1718, "step": 71370 }, { "epoch": 0.12654845524606398, "grad_norm": 0.337890625, "learning_rate": 0.00048252925420817175, "loss": 0.1328, "step": 71372 }, { "epoch": 0.1265520014113738, "grad_norm": 0.83203125, "learning_rate": 0.0004824859541098247, "loss": 0.2657, "step": 71374 }, { "epoch": 0.1265555475766836, "grad_norm": 0.345703125, "learning_rate": 0.000482442656712126, "loss": 0.1681, "step": 71376 }, { "epoch": 0.12655909374199342, "grad_norm": 1.09375, "learning_rate": 0.00048239936201526385, "loss": 0.2498, "step": 71378 }, { "epoch": 0.12656263990730324, "grad_norm": 1.2578125, "learning_rate": 0.0004823560700194283, "loss": 0.1405, "step": 71380 }, { "epoch": 0.12656618607261305, "grad_norm": 1.90625, "learning_rate": 0.00048231278072480874, "loss": 0.2602, "step": 71382 }, { "epoch": 0.12656973223792287, "grad_norm": 0.404296875, "learning_rate": 0.00048226949413159405, "loss": 0.1355, "step": 71384 }, { "epoch": 0.12657327840323268, "grad_norm": 0.2490234375, "learning_rate": 0.00048222621023997396, "loss": 0.1578, "step": 71386 }, { "epoch": 0.1265768245685425, "grad_norm": 1.8125, "learning_rate": 0.00048218292905013717, "loss": 0.238, "step": 71388 }, { "epoch": 0.1265803707338523, "grad_norm": 0.4765625, "learning_rate": 0.00048213965056227384, "loss": 0.1553, "step": 71390 }, { "epoch": 0.12658391689916212, "grad_norm": 0.310546875, "learning_rate": 0.0004820963747765725, "loss": 0.2008, "step": 71392 }, { "epoch": 0.12658746306447194, "grad_norm": 0.232421875, "learning_rate": 0.00048205310169322294, "loss": 0.1749, "step": 71394 }, { "epoch": 0.12659100922978175, "grad_norm": 0.392578125, "learning_rate": 0.0004820098313124138, "loss": 0.1595, "step": 71396 }, { "epoch": 0.12659455539509157, "grad_norm": 1.3828125, "learning_rate": 0.00048196656363433496, "loss": 0.2117, "step": 71398 }, { "epoch": 0.12659810156040138, "grad_norm": 0.32421875, "learning_rate": 0.00048192329865917545, "loss": 0.1559, "step": 71400 }, { "epoch": 0.1266016477257112, "grad_norm": 0.9765625, "learning_rate": 0.0004818800363871243, "loss": 0.1749, "step": 71402 }, { "epoch": 0.126605193891021, "grad_norm": 0.35546875, "learning_rate": 0.0004818367768183709, "loss": 0.1801, "step": 71404 }, { "epoch": 0.12660874005633083, "grad_norm": 0.33984375, "learning_rate": 0.00048179351995310406, "loss": 0.156, "step": 71406 }, { "epoch": 0.12661228622164064, "grad_norm": 0.83203125, "learning_rate": 0.0004817502657915135, "loss": 0.1814, "step": 71408 }, { "epoch": 0.12661583238695046, "grad_norm": 0.439453125, "learning_rate": 0.00048170701433378817, "loss": 0.2134, "step": 71410 }, { "epoch": 0.12661937855226027, "grad_norm": 0.1845703125, "learning_rate": 0.00048166376558011727, "loss": 0.1477, "step": 71412 }, { "epoch": 0.12662292471757008, "grad_norm": 0.3515625, "learning_rate": 0.0004816205195306896, "loss": 0.1729, "step": 71414 }, { "epoch": 0.1266264708828799, "grad_norm": 0.25390625, "learning_rate": 0.0004815772761856948, "loss": 0.1878, "step": 71416 }, { "epoch": 0.1266300170481897, "grad_norm": 0.322265625, "learning_rate": 0.0004815340355453218, "loss": 0.1715, "step": 71418 }, { "epoch": 0.12663356321349956, "grad_norm": 0.47265625, "learning_rate": 0.0004814907976097597, "loss": 0.2296, "step": 71420 }, { "epoch": 0.12663710937880937, "grad_norm": 0.435546875, "learning_rate": 0.0004814475623791976, "loss": 0.1922, "step": 71422 }, { "epoch": 0.12664065554411919, "grad_norm": 0.365234375, "learning_rate": 0.0004814043298538242, "loss": 0.1971, "step": 71424 }, { "epoch": 0.126644201709429, "grad_norm": 0.57421875, "learning_rate": 0.0004813611000338296, "loss": 0.1623, "step": 71426 }, { "epoch": 0.12664774787473881, "grad_norm": 0.263671875, "learning_rate": 0.00048131787291940157, "loss": 0.1822, "step": 71428 }, { "epoch": 0.12665129404004863, "grad_norm": 0.60546875, "learning_rate": 0.0004812746485107301, "loss": 0.175, "step": 71430 }, { "epoch": 0.12665484020535844, "grad_norm": 1.0625, "learning_rate": 0.00048123142680800364, "loss": 0.2724, "step": 71432 }, { "epoch": 0.12665838637066826, "grad_norm": 0.3359375, "learning_rate": 0.000481188207811412, "loss": 0.1758, "step": 71434 }, { "epoch": 0.12666193253597807, "grad_norm": 0.71484375, "learning_rate": 0.0004811449915211432, "loss": 0.2049, "step": 71436 }, { "epoch": 0.1266654787012879, "grad_norm": 2.765625, "learning_rate": 0.000481101777937387, "loss": 0.2281, "step": 71438 }, { "epoch": 0.1266690248665977, "grad_norm": 0.64453125, "learning_rate": 0.0004810585670603321, "loss": 0.2189, "step": 71440 }, { "epoch": 0.12667257103190752, "grad_norm": 0.427734375, "learning_rate": 0.0004810153588901676, "loss": 0.1607, "step": 71442 }, { "epoch": 0.12667611719721733, "grad_norm": 0.23828125, "learning_rate": 0.0004809721534270822, "loss": 0.1336, "step": 71444 }, { "epoch": 0.12667966336252715, "grad_norm": 0.609375, "learning_rate": 0.00048092895067126484, "loss": 0.1607, "step": 71446 }, { "epoch": 0.12668320952783696, "grad_norm": 0.40234375, "learning_rate": 0.00048088575062290494, "loss": 0.1975, "step": 71448 }, { "epoch": 0.12668675569314677, "grad_norm": 0.47265625, "learning_rate": 0.0004808425532821911, "loss": 0.1584, "step": 71450 }, { "epoch": 0.1266903018584566, "grad_norm": 1.5, "learning_rate": 0.00048079935864931236, "loss": 0.1883, "step": 71452 }, { "epoch": 0.1266938480237664, "grad_norm": 2.125, "learning_rate": 0.0004807561667244572, "loss": 0.42, "step": 71454 }, { "epoch": 0.12669739418907622, "grad_norm": 0.220703125, "learning_rate": 0.0004807129775078152, "loss": 0.1928, "step": 71456 }, { "epoch": 0.12670094035438603, "grad_norm": 0.3984375, "learning_rate": 0.00048066979099957484, "loss": 0.1889, "step": 71458 }, { "epoch": 0.12670448651969585, "grad_norm": 0.62890625, "learning_rate": 0.00048062660719992505, "loss": 0.2097, "step": 71460 }, { "epoch": 0.12670803268500566, "grad_norm": 0.2578125, "learning_rate": 0.0004805834261090548, "loss": 0.2314, "step": 71462 }, { "epoch": 0.12671157885031548, "grad_norm": 0.267578125, "learning_rate": 0.00048054024772715254, "loss": 0.2274, "step": 71464 }, { "epoch": 0.1267151250156253, "grad_norm": 0.94921875, "learning_rate": 0.00048049707205440794, "loss": 0.2842, "step": 71466 }, { "epoch": 0.1267186711809351, "grad_norm": 0.2216796875, "learning_rate": 0.0004804538990910087, "loss": 0.1889, "step": 71468 }, { "epoch": 0.12672221734624492, "grad_norm": 0.361328125, "learning_rate": 0.0004804107288371446, "loss": 0.1787, "step": 71470 }, { "epoch": 0.12672576351155473, "grad_norm": 0.21484375, "learning_rate": 0.0004803675612930038, "loss": 0.1456, "step": 71472 }, { "epoch": 0.12672930967686455, "grad_norm": 0.90234375, "learning_rate": 0.0004803243964587759, "loss": 0.1922, "step": 71474 }, { "epoch": 0.12673285584217436, "grad_norm": 0.81640625, "learning_rate": 0.00048028123433464847, "loss": 0.2163, "step": 71476 }, { "epoch": 0.12673640200748418, "grad_norm": 0.41015625, "learning_rate": 0.00048023807492081133, "loss": 0.2161, "step": 71478 }, { "epoch": 0.126739948172794, "grad_norm": 0.17578125, "learning_rate": 0.0004801949182174529, "loss": 0.1971, "step": 71480 }, { "epoch": 0.1267434943381038, "grad_norm": 0.78515625, "learning_rate": 0.0004801517642247618, "loss": 0.1447, "step": 71482 }, { "epoch": 0.12674704050341362, "grad_norm": 0.58984375, "learning_rate": 0.00048010861294292675, "loss": 0.2381, "step": 71484 }, { "epoch": 0.12675058666872344, "grad_norm": 0.3046875, "learning_rate": 0.0004800654643721364, "loss": 0.165, "step": 71486 }, { "epoch": 0.12675413283403325, "grad_norm": 1.3828125, "learning_rate": 0.0004800223185125798, "loss": 0.2136, "step": 71488 }, { "epoch": 0.12675767899934307, "grad_norm": 0.81640625, "learning_rate": 0.00047997917536444525, "loss": 0.1763, "step": 71490 }, { "epoch": 0.12676122516465288, "grad_norm": 0.94140625, "learning_rate": 0.00047993603492792213, "loss": 0.2042, "step": 71492 }, { "epoch": 0.1267647713299627, "grad_norm": 0.39453125, "learning_rate": 0.0004798928972031982, "loss": 0.1897, "step": 71494 }, { "epoch": 0.1267683174952725, "grad_norm": 0.384765625, "learning_rate": 0.0004798497621904626, "loss": 0.1278, "step": 71496 }, { "epoch": 0.12677186366058232, "grad_norm": 0.2451171875, "learning_rate": 0.00047980662988990396, "loss": 0.1539, "step": 71498 }, { "epoch": 0.12677540982589214, "grad_norm": 0.46875, "learning_rate": 0.00047976350030171105, "loss": 0.4209, "step": 71500 }, { "epoch": 0.12677895599120195, "grad_norm": 0.337890625, "learning_rate": 0.0004797203734260722, "loss": 0.1899, "step": 71502 }, { "epoch": 0.12678250215651177, "grad_norm": 0.314453125, "learning_rate": 0.0004796772492631758, "loss": 0.1772, "step": 71504 }, { "epoch": 0.12678604832182158, "grad_norm": 0.6640625, "learning_rate": 0.00047963412781321113, "loss": 0.1645, "step": 71506 }, { "epoch": 0.1267895944871314, "grad_norm": 0.306640625, "learning_rate": 0.0004795910090763664, "loss": 0.1815, "step": 71508 }, { "epoch": 0.1267931406524412, "grad_norm": 0.5703125, "learning_rate": 0.00047954789305283024, "loss": 0.1659, "step": 71510 }, { "epoch": 0.12679668681775105, "grad_norm": 0.54296875, "learning_rate": 0.00047950477974279096, "loss": 0.1658, "step": 71512 }, { "epoch": 0.12680023298306087, "grad_norm": 0.404296875, "learning_rate": 0.00047946166914643757, "loss": 0.2187, "step": 71514 }, { "epoch": 0.12680377914837068, "grad_norm": 0.75390625, "learning_rate": 0.00047941856126395833, "loss": 0.1739, "step": 71516 }, { "epoch": 0.1268073253136805, "grad_norm": 0.5078125, "learning_rate": 0.00047937545609554196, "loss": 0.1952, "step": 71518 }, { "epoch": 0.1268108714789903, "grad_norm": 0.376953125, "learning_rate": 0.0004793323536413767, "loss": 0.1695, "step": 71520 }, { "epoch": 0.12681441764430013, "grad_norm": 1.859375, "learning_rate": 0.00047928925390165104, "loss": 0.1425, "step": 71522 }, { "epoch": 0.12681796380960994, "grad_norm": 0.2578125, "learning_rate": 0.00047924615687655413, "loss": 0.1647, "step": 71524 }, { "epoch": 0.12682150997491975, "grad_norm": 1.0703125, "learning_rate": 0.0004792030625662735, "loss": 0.3936, "step": 71526 }, { "epoch": 0.12682505614022957, "grad_norm": 0.216796875, "learning_rate": 0.0004791599709709982, "loss": 0.1175, "step": 71528 }, { "epoch": 0.12682860230553938, "grad_norm": 0.2294921875, "learning_rate": 0.00047911688209091634, "loss": 0.1627, "step": 71530 }, { "epoch": 0.1268321484708492, "grad_norm": 0.4375, "learning_rate": 0.0004790737959262172, "loss": 0.1793, "step": 71532 }, { "epoch": 0.126835694636159, "grad_norm": 1.4609375, "learning_rate": 0.0004790307124770879, "loss": 0.4609, "step": 71534 }, { "epoch": 0.12683924080146883, "grad_norm": 0.3828125, "learning_rate": 0.00047898763174371794, "loss": 0.2047, "step": 71536 }, { "epoch": 0.12684278696677864, "grad_norm": 0.251953125, "learning_rate": 0.0004789445537262953, "loss": 0.1412, "step": 71538 }, { "epoch": 0.12684633313208846, "grad_norm": 0.93359375, "learning_rate": 0.00047890147842500854, "loss": 0.2298, "step": 71540 }, { "epoch": 0.12684987929739827, "grad_norm": 0.35546875, "learning_rate": 0.0004788584058400459, "loss": 0.1396, "step": 71542 }, { "epoch": 0.12685342546270809, "grad_norm": 0.72265625, "learning_rate": 0.00047881533597159544, "loss": 0.1986, "step": 71544 }, { "epoch": 0.1268569716280179, "grad_norm": 0.1650390625, "learning_rate": 0.00047877226881984613, "loss": 0.3333, "step": 71546 }, { "epoch": 0.12686051779332772, "grad_norm": 0.310546875, "learning_rate": 0.00047872920438498626, "loss": 0.1733, "step": 71548 }, { "epoch": 0.12686406395863753, "grad_norm": 0.267578125, "learning_rate": 0.00047868614266720377, "loss": 0.2086, "step": 71550 }, { "epoch": 0.12686761012394734, "grad_norm": 0.375, "learning_rate": 0.0004786430836666873, "loss": 0.1589, "step": 71552 }, { "epoch": 0.12687115628925716, "grad_norm": 1.4375, "learning_rate": 0.00047860002738362475, "loss": 0.2324, "step": 71554 }, { "epoch": 0.12687470245456697, "grad_norm": 1.140625, "learning_rate": 0.00047855697381820494, "loss": 0.1561, "step": 71556 }, { "epoch": 0.1268782486198768, "grad_norm": 1.671875, "learning_rate": 0.000478513922970616, "loss": 0.2537, "step": 71558 }, { "epoch": 0.1268817947851866, "grad_norm": 0.58984375, "learning_rate": 0.0004784708748410462, "loss": 0.2767, "step": 71560 }, { "epoch": 0.12688534095049642, "grad_norm": 0.166015625, "learning_rate": 0.00047842782942968343, "loss": 0.1734, "step": 71562 }, { "epoch": 0.12688888711580623, "grad_norm": 0.90234375, "learning_rate": 0.00047838478673671684, "loss": 0.1901, "step": 71564 }, { "epoch": 0.12689243328111605, "grad_norm": 0.484375, "learning_rate": 0.00047834174676233363, "loss": 0.1588, "step": 71566 }, { "epoch": 0.12689597944642586, "grad_norm": 0.6171875, "learning_rate": 0.0004782987095067227, "loss": 0.1844, "step": 71568 }, { "epoch": 0.12689952561173568, "grad_norm": 0.228515625, "learning_rate": 0.0004782556749700721, "loss": 0.2384, "step": 71570 }, { "epoch": 0.1269030717770455, "grad_norm": 0.23046875, "learning_rate": 0.00047821264315256977, "loss": 0.1846, "step": 71572 }, { "epoch": 0.1269066179423553, "grad_norm": 0.2451171875, "learning_rate": 0.00047816961405440473, "loss": 0.1839, "step": 71574 }, { "epoch": 0.12691016410766512, "grad_norm": 0.240234375, "learning_rate": 0.0004781265876757641, "loss": 0.155, "step": 71576 }, { "epoch": 0.12691371027297493, "grad_norm": 0.361328125, "learning_rate": 0.00047808356401683673, "loss": 0.1692, "step": 71578 }, { "epoch": 0.12691725643828475, "grad_norm": 0.58984375, "learning_rate": 0.0004780405430778103, "loss": 0.2076, "step": 71580 }, { "epoch": 0.12692080260359456, "grad_norm": 1.0859375, "learning_rate": 0.000477997524858874, "loss": 0.1813, "step": 71582 }, { "epoch": 0.12692434876890438, "grad_norm": 0.625, "learning_rate": 0.00047795450936021443, "loss": 0.2164, "step": 71584 }, { "epoch": 0.1269278949342142, "grad_norm": 0.27734375, "learning_rate": 0.0004779114965820209, "loss": 0.1907, "step": 71586 }, { "epoch": 0.126931441099524, "grad_norm": 1.484375, "learning_rate": 0.0004778684865244812, "loss": 0.3638, "step": 71588 }, { "epoch": 0.12693498726483382, "grad_norm": 2.953125, "learning_rate": 0.00047782547918778323, "loss": 0.5459, "step": 71590 }, { "epoch": 0.12693853343014364, "grad_norm": 0.376953125, "learning_rate": 0.00047778247457211533, "loss": 0.1909, "step": 71592 }, { "epoch": 0.12694207959545345, "grad_norm": 0.4296875, "learning_rate": 0.00047773947267766505, "loss": 0.2796, "step": 71594 }, { "epoch": 0.12694562576076326, "grad_norm": 0.318359375, "learning_rate": 0.00047769647350462126, "loss": 0.1445, "step": 71596 }, { "epoch": 0.12694917192607308, "grad_norm": 0.3359375, "learning_rate": 0.0004776534770531715, "loss": 0.1513, "step": 71598 }, { "epoch": 0.1269527180913829, "grad_norm": 0.43359375, "learning_rate": 0.000477610483323504, "loss": 0.1859, "step": 71600 }, { "epoch": 0.12695626425669274, "grad_norm": 0.2099609375, "learning_rate": 0.0004775674923158065, "loss": 0.1992, "step": 71602 }, { "epoch": 0.12695981042200255, "grad_norm": 0.142578125, "learning_rate": 0.00047752450403026745, "loss": 0.16, "step": 71604 }, { "epoch": 0.12696335658731236, "grad_norm": 0.462890625, "learning_rate": 0.0004774815184670746, "loss": 0.1825, "step": 71606 }, { "epoch": 0.12696690275262218, "grad_norm": 0.55078125, "learning_rate": 0.00047743853562641606, "loss": 0.2627, "step": 71608 }, { "epoch": 0.126970448917932, "grad_norm": 0.89453125, "learning_rate": 0.0004773955555084797, "loss": 0.1988, "step": 71610 }, { "epoch": 0.1269739950832418, "grad_norm": 0.26953125, "learning_rate": 0.0004773525781134533, "loss": 0.1476, "step": 71612 }, { "epoch": 0.12697754124855162, "grad_norm": 0.41015625, "learning_rate": 0.0004773096034415255, "loss": 0.1658, "step": 71614 }, { "epoch": 0.12698108741386144, "grad_norm": 0.328125, "learning_rate": 0.0004772666314928834, "loss": 0.1673, "step": 71616 }, { "epoch": 0.12698463357917125, "grad_norm": 0.251953125, "learning_rate": 0.00047722366226771553, "loss": 0.1452, "step": 71618 }, { "epoch": 0.12698817974448107, "grad_norm": 0.5390625, "learning_rate": 0.0004771806957662092, "loss": 0.5041, "step": 71620 }, { "epoch": 0.12699172590979088, "grad_norm": 0.8828125, "learning_rate": 0.0004771377319885534, "loss": 0.1624, "step": 71622 }, { "epoch": 0.1269952720751007, "grad_norm": 0.265625, "learning_rate": 0.0004770947709349349, "loss": 0.134, "step": 71624 }, { "epoch": 0.1269988182404105, "grad_norm": 0.34375, "learning_rate": 0.00047705181260554214, "loss": 0.1664, "step": 71626 }, { "epoch": 0.12700236440572032, "grad_norm": 5.40625, "learning_rate": 0.00047700885700056303, "loss": 0.3942, "step": 71628 }, { "epoch": 0.12700591057103014, "grad_norm": 0.99609375, "learning_rate": 0.0004769659041201853, "loss": 0.4398, "step": 71630 }, { "epoch": 0.12700945673633995, "grad_norm": 0.53125, "learning_rate": 0.0004769229539645967, "loss": 0.1975, "step": 71632 }, { "epoch": 0.12701300290164977, "grad_norm": 0.58984375, "learning_rate": 0.00047688000653398494, "loss": 0.2266, "step": 71634 }, { "epoch": 0.12701654906695958, "grad_norm": 9.0625, "learning_rate": 0.00047683706182853836, "loss": 0.3486, "step": 71636 }, { "epoch": 0.1270200952322694, "grad_norm": 0.447265625, "learning_rate": 0.00047679411984844454, "loss": 0.1751, "step": 71638 }, { "epoch": 0.1270236413975792, "grad_norm": 0.337890625, "learning_rate": 0.00047675118059389116, "loss": 0.2486, "step": 71640 }, { "epoch": 0.12702718756288903, "grad_norm": 0.4453125, "learning_rate": 0.0004767082440650659, "loss": 0.2132, "step": 71642 }, { "epoch": 0.12703073372819884, "grad_norm": 0.1884765625, "learning_rate": 0.0004766653102621569, "loss": 0.1486, "step": 71644 }, { "epoch": 0.12703427989350866, "grad_norm": 0.33984375, "learning_rate": 0.0004766223791853519, "loss": 0.2282, "step": 71646 }, { "epoch": 0.12703782605881847, "grad_norm": 1.7109375, "learning_rate": 0.00047657945083483846, "loss": 0.2349, "step": 71648 }, { "epoch": 0.12704137222412829, "grad_norm": 0.4140625, "learning_rate": 0.0004765365252108044, "loss": 0.1697, "step": 71650 }, { "epoch": 0.1270449183894381, "grad_norm": 1.1015625, "learning_rate": 0.00047649360231343706, "loss": 0.334, "step": 71652 }, { "epoch": 0.12704846455474791, "grad_norm": 0.75, "learning_rate": 0.0004764506821429248, "loss": 0.2055, "step": 71654 }, { "epoch": 0.12705201072005773, "grad_norm": 0.291015625, "learning_rate": 0.0004764077646994551, "loss": 0.1828, "step": 71656 }, { "epoch": 0.12705555688536754, "grad_norm": 3.203125, "learning_rate": 0.0004763648499832156, "loss": 0.2404, "step": 71658 }, { "epoch": 0.12705910305067736, "grad_norm": 0.267578125, "learning_rate": 0.0004763219379943938, "loss": 0.1748, "step": 71660 }, { "epoch": 0.12706264921598717, "grad_norm": 0.6328125, "learning_rate": 0.0004762790287331778, "loss": 0.2435, "step": 71662 }, { "epoch": 0.127066195381297, "grad_norm": 0.7734375, "learning_rate": 0.000476236122199755, "loss": 0.1941, "step": 71664 }, { "epoch": 0.1270697415466068, "grad_norm": 0.2890625, "learning_rate": 0.000476193218394313, "loss": 0.1476, "step": 71666 }, { "epoch": 0.12707328771191662, "grad_norm": 0.87890625, "learning_rate": 0.0004761503173170396, "loss": 0.1959, "step": 71668 }, { "epoch": 0.12707683387722643, "grad_norm": 0.2470703125, "learning_rate": 0.0004761074189681221, "loss": 0.1773, "step": 71670 }, { "epoch": 0.12708038004253625, "grad_norm": 0.298828125, "learning_rate": 0.0004760645233477489, "loss": 0.2185, "step": 71672 }, { "epoch": 0.12708392620784606, "grad_norm": 0.455078125, "learning_rate": 0.00047602163045610646, "loss": 0.17, "step": 71674 }, { "epoch": 0.12708747237315587, "grad_norm": 0.279296875, "learning_rate": 0.0004759787402933832, "loss": 0.165, "step": 71676 }, { "epoch": 0.1270910185384657, "grad_norm": 0.65625, "learning_rate": 0.0004759358528597662, "loss": 0.1871, "step": 71678 }, { "epoch": 0.1270945647037755, "grad_norm": 0.349609375, "learning_rate": 0.0004758929681554439, "loss": 0.1799, "step": 71680 }, { "epoch": 0.12709811086908532, "grad_norm": 0.294921875, "learning_rate": 0.0004758500861806026, "loss": 0.2279, "step": 71682 }, { "epoch": 0.12710165703439513, "grad_norm": 0.361328125, "learning_rate": 0.0004758072069354307, "loss": 0.1217, "step": 71684 }, { "epoch": 0.12710520319970495, "grad_norm": 0.61328125, "learning_rate": 0.00047576433042011563, "loss": 0.2159, "step": 71686 }, { "epoch": 0.12710874936501476, "grad_norm": 0.400390625, "learning_rate": 0.0004757214566348447, "loss": 0.1915, "step": 71688 }, { "epoch": 0.12711229553032458, "grad_norm": 0.3046875, "learning_rate": 0.00047567858557980556, "loss": 0.1623, "step": 71690 }, { "epoch": 0.12711584169563442, "grad_norm": 1.1328125, "learning_rate": 0.00047563571725518534, "loss": 0.1631, "step": 71692 }, { "epoch": 0.12711938786094423, "grad_norm": 0.734375, "learning_rate": 0.000475592851661172, "loss": 0.1922, "step": 71694 }, { "epoch": 0.12712293402625405, "grad_norm": 0.193359375, "learning_rate": 0.000475549988797953, "loss": 0.1388, "step": 71696 }, { "epoch": 0.12712648019156386, "grad_norm": 0.263671875, "learning_rate": 0.00047550712866571546, "loss": 0.1941, "step": 71698 }, { "epoch": 0.12713002635687368, "grad_norm": 0.361328125, "learning_rate": 0.00047546427126464685, "loss": 0.4966, "step": 71700 }, { "epoch": 0.1271335725221835, "grad_norm": 0.404296875, "learning_rate": 0.000475421416594935, "loss": 0.1204, "step": 71702 }, { "epoch": 0.1271371186874933, "grad_norm": 1.265625, "learning_rate": 0.00047537856465676697, "loss": 0.1925, "step": 71704 }, { "epoch": 0.12714066485280312, "grad_norm": 0.6171875, "learning_rate": 0.00047533571545033034, "loss": 0.2419, "step": 71706 }, { "epoch": 0.12714421101811293, "grad_norm": 1.2265625, "learning_rate": 0.00047529286897581247, "loss": 0.5053, "step": 71708 }, { "epoch": 0.12714775718342275, "grad_norm": 1.4453125, "learning_rate": 0.00047525002523340036, "loss": 0.2196, "step": 71710 }, { "epoch": 0.12715130334873256, "grad_norm": 0.78515625, "learning_rate": 0.00047520718422328236, "loss": 0.1746, "step": 71712 }, { "epoch": 0.12715484951404238, "grad_norm": 0.51171875, "learning_rate": 0.00047516434594564467, "loss": 0.1835, "step": 71714 }, { "epoch": 0.1271583956793522, "grad_norm": 0.24609375, "learning_rate": 0.00047512151040067534, "loss": 0.1357, "step": 71716 }, { "epoch": 0.127161941844662, "grad_norm": 1.2578125, "learning_rate": 0.0004750786775885614, "loss": 0.2085, "step": 71718 }, { "epoch": 0.12716548800997182, "grad_norm": 0.33984375, "learning_rate": 0.00047503584750949087, "loss": 0.164, "step": 71720 }, { "epoch": 0.12716903417528164, "grad_norm": 0.3515625, "learning_rate": 0.00047499302016365, "loss": 0.1583, "step": 71722 }, { "epoch": 0.12717258034059145, "grad_norm": 0.2451171875, "learning_rate": 0.0004749501955512268, "loss": 0.1696, "step": 71724 }, { "epoch": 0.12717612650590127, "grad_norm": 0.2177734375, "learning_rate": 0.0004749073736724084, "loss": 0.1635, "step": 71726 }, { "epoch": 0.12717967267121108, "grad_norm": 0.59765625, "learning_rate": 0.0004748645545273821, "loss": 0.204, "step": 71728 }, { "epoch": 0.1271832188365209, "grad_norm": 0.4609375, "learning_rate": 0.00047482173811633503, "loss": 0.2482, "step": 71730 }, { "epoch": 0.1271867650018307, "grad_norm": 0.94921875, "learning_rate": 0.0004747789244394543, "loss": 0.1712, "step": 71732 }, { "epoch": 0.12719031116714052, "grad_norm": 0.298828125, "learning_rate": 0.0004747361134969277, "loss": 0.1758, "step": 71734 }, { "epoch": 0.12719385733245034, "grad_norm": 0.2890625, "learning_rate": 0.00047469330528894207, "loss": 0.1742, "step": 71736 }, { "epoch": 0.12719740349776015, "grad_norm": 0.640625, "learning_rate": 0.0004746504998156847, "loss": 0.2076, "step": 71738 }, { "epoch": 0.12720094966306997, "grad_norm": 0.28515625, "learning_rate": 0.0004746076970773428, "loss": 0.1814, "step": 71740 }, { "epoch": 0.12720449582837978, "grad_norm": 0.375, "learning_rate": 0.0004745648970741034, "loss": 0.1922, "step": 71742 }, { "epoch": 0.1272080419936896, "grad_norm": 0.20703125, "learning_rate": 0.00047452209980615407, "loss": 0.1089, "step": 71744 }, { "epoch": 0.1272115881589994, "grad_norm": 0.302734375, "learning_rate": 0.00047447930527368164, "loss": 0.1245, "step": 71746 }, { "epoch": 0.12721513432430923, "grad_norm": 0.2470703125, "learning_rate": 0.0004744365134768735, "loss": 0.1846, "step": 71748 }, { "epoch": 0.12721868048961904, "grad_norm": 0.34375, "learning_rate": 0.00047439372441591634, "loss": 0.2001, "step": 71750 }, { "epoch": 0.12722222665492885, "grad_norm": 0.291015625, "learning_rate": 0.000474350938090998, "loss": 0.1572, "step": 71752 }, { "epoch": 0.12722577282023867, "grad_norm": 0.375, "learning_rate": 0.0004743081545023052, "loss": 0.1649, "step": 71754 }, { "epoch": 0.12722931898554848, "grad_norm": 0.1513671875, "learning_rate": 0.00047426537365002496, "loss": 0.1414, "step": 71756 }, { "epoch": 0.1272328651508583, "grad_norm": 1.1171875, "learning_rate": 0.0004742225955343447, "loss": 0.1562, "step": 71758 }, { "epoch": 0.1272364113161681, "grad_norm": 0.1767578125, "learning_rate": 0.0004741798201554509, "loss": 0.1218, "step": 71760 }, { "epoch": 0.12723995748147793, "grad_norm": 0.345703125, "learning_rate": 0.0004741370475135316, "loss": 0.1714, "step": 71762 }, { "epoch": 0.12724350364678774, "grad_norm": 0.46875, "learning_rate": 0.00047409427760877277, "loss": 0.1719, "step": 71764 }, { "epoch": 0.12724704981209756, "grad_norm": 0.21875, "learning_rate": 0.00047405151044136243, "loss": 0.179, "step": 71766 }, { "epoch": 0.12725059597740737, "grad_norm": 1.296875, "learning_rate": 0.00047400874601148674, "loss": 0.3621, "step": 71768 }, { "epoch": 0.12725414214271719, "grad_norm": 3.578125, "learning_rate": 0.0004739659843193338, "loss": 0.2476, "step": 71770 }, { "epoch": 0.127257688308027, "grad_norm": 0.310546875, "learning_rate": 0.00047392322536508934, "loss": 0.1515, "step": 71772 }, { "epoch": 0.12726123447333682, "grad_norm": 0.58203125, "learning_rate": 0.0004738804691489414, "loss": 0.2563, "step": 71774 }, { "epoch": 0.12726478063864663, "grad_norm": 0.84375, "learning_rate": 0.00047383771567107666, "loss": 0.2221, "step": 71776 }, { "epoch": 0.12726832680395644, "grad_norm": 1.21875, "learning_rate": 0.0004737949649316819, "loss": 0.2143, "step": 71778 }, { "epoch": 0.12727187296926626, "grad_norm": 0.412109375, "learning_rate": 0.00047375221693094426, "loss": 0.203, "step": 71780 }, { "epoch": 0.12727541913457607, "grad_norm": 0.5703125, "learning_rate": 0.00047370947166905044, "loss": 0.2116, "step": 71782 }, { "epoch": 0.12727896529988592, "grad_norm": 0.9296875, "learning_rate": 0.00047366672914618785, "loss": 0.2211, "step": 71784 }, { "epoch": 0.12728251146519573, "grad_norm": 0.412109375, "learning_rate": 0.0004736239893625431, "loss": 0.1923, "step": 71786 }, { "epoch": 0.12728605763050554, "grad_norm": 0.2578125, "learning_rate": 0.0004735812523183032, "loss": 0.1669, "step": 71788 }, { "epoch": 0.12728960379581536, "grad_norm": 0.26953125, "learning_rate": 0.00047353851801365483, "loss": 0.1825, "step": 71790 }, { "epoch": 0.12729314996112517, "grad_norm": 0.35546875, "learning_rate": 0.00047349578644878526, "loss": 0.1782, "step": 71792 }, { "epoch": 0.127296696126435, "grad_norm": 0.443359375, "learning_rate": 0.0004734530576238813, "loss": 0.1571, "step": 71794 }, { "epoch": 0.1273002422917448, "grad_norm": 0.392578125, "learning_rate": 0.00047341033153912966, "loss": 0.1447, "step": 71796 }, { "epoch": 0.12730378845705462, "grad_norm": 0.28515625, "learning_rate": 0.00047336760819471734, "loss": 0.1976, "step": 71798 }, { "epoch": 0.12730733462236443, "grad_norm": 0.5703125, "learning_rate": 0.0004733248875908308, "loss": 0.2073, "step": 71800 }, { "epoch": 0.12731088078767425, "grad_norm": 0.486328125, "learning_rate": 0.0004732821697276577, "loss": 0.1311, "step": 71802 }, { "epoch": 0.12731442695298406, "grad_norm": 0.283203125, "learning_rate": 0.00047323945460538385, "loss": 0.1679, "step": 71804 }, { "epoch": 0.12731797311829388, "grad_norm": 0.3828125, "learning_rate": 0.0004731967422241968, "loss": 0.1694, "step": 71806 }, { "epoch": 0.1273215192836037, "grad_norm": 0.267578125, "learning_rate": 0.0004731540325842828, "loss": 0.164, "step": 71808 }, { "epoch": 0.1273250654489135, "grad_norm": 0.5703125, "learning_rate": 0.0004731113256858296, "loss": 0.1928, "step": 71810 }, { "epoch": 0.12732861161422332, "grad_norm": 0.341796875, "learning_rate": 0.00047306862152902263, "loss": 0.2163, "step": 71812 }, { "epoch": 0.12733215777953313, "grad_norm": 0.326171875, "learning_rate": 0.00047302592011404975, "loss": 0.1945, "step": 71814 }, { "epoch": 0.12733570394484295, "grad_norm": 0.9765625, "learning_rate": 0.0004729832214410972, "loss": 0.2366, "step": 71816 }, { "epoch": 0.12733925011015276, "grad_norm": 0.34375, "learning_rate": 0.0004729405255103519, "loss": 0.18, "step": 71818 }, { "epoch": 0.12734279627546258, "grad_norm": 0.2294921875, "learning_rate": 0.0004728978323220005, "loss": 0.1493, "step": 71820 }, { "epoch": 0.1273463424407724, "grad_norm": 0.49609375, "learning_rate": 0.00047285514187622935, "loss": 0.1572, "step": 71822 }, { "epoch": 0.1273498886060822, "grad_norm": 0.84375, "learning_rate": 0.00047281245417322596, "loss": 0.2274, "step": 71824 }, { "epoch": 0.12735343477139202, "grad_norm": 0.2353515625, "learning_rate": 0.0004727697692131762, "loss": 0.17, "step": 71826 }, { "epoch": 0.12735698093670184, "grad_norm": 0.369140625, "learning_rate": 0.00047272708699626765, "loss": 0.2104, "step": 71828 }, { "epoch": 0.12736052710201165, "grad_norm": 2.203125, "learning_rate": 0.00047268440752268597, "loss": 0.301, "step": 71830 }, { "epoch": 0.12736407326732146, "grad_norm": 0.65234375, "learning_rate": 0.0004726417307926185, "loss": 0.1909, "step": 71832 }, { "epoch": 0.12736761943263128, "grad_norm": 0.24609375, "learning_rate": 0.0004725990568062518, "loss": 0.1579, "step": 71834 }, { "epoch": 0.1273711655979411, "grad_norm": 0.470703125, "learning_rate": 0.0004725563855637723, "loss": 0.2795, "step": 71836 }, { "epoch": 0.1273747117632509, "grad_norm": 0.6640625, "learning_rate": 0.0004725137170653667, "loss": 0.279, "step": 71838 }, { "epoch": 0.12737825792856072, "grad_norm": 0.76171875, "learning_rate": 0.0004724710513112214, "loss": 0.1882, "step": 71840 }, { "epoch": 0.12738180409387054, "grad_norm": 0.328125, "learning_rate": 0.00047242838830152353, "loss": 0.1431, "step": 71842 }, { "epoch": 0.12738535025918035, "grad_norm": 0.43359375, "learning_rate": 0.0004723857280364592, "loss": 0.1564, "step": 71844 }, { "epoch": 0.12738889642449017, "grad_norm": 0.259765625, "learning_rate": 0.0004723430705162153, "loss": 0.1757, "step": 71846 }, { "epoch": 0.12739244258979998, "grad_norm": 0.404296875, "learning_rate": 0.00047230041574097787, "loss": 0.1823, "step": 71848 }, { "epoch": 0.1273959887551098, "grad_norm": 0.578125, "learning_rate": 0.000472257763710934, "loss": 0.1831, "step": 71850 }, { "epoch": 0.1273995349204196, "grad_norm": 0.68359375, "learning_rate": 0.0004722151144262701, "loss": 0.2104, "step": 71852 }, { "epoch": 0.12740308108572942, "grad_norm": 0.3515625, "learning_rate": 0.0004721724678871727, "loss": 0.1468, "step": 71854 }, { "epoch": 0.12740662725103924, "grad_norm": 0.462890625, "learning_rate": 0.00047212982409382823, "loss": 0.1682, "step": 71856 }, { "epoch": 0.12741017341634905, "grad_norm": 0.75, "learning_rate": 0.0004720871830464229, "loss": 0.1584, "step": 71858 }, { "epoch": 0.12741371958165887, "grad_norm": 0.185546875, "learning_rate": 0.000472044544745144, "loss": 0.1852, "step": 71860 }, { "epoch": 0.12741726574696868, "grad_norm": 0.458984375, "learning_rate": 0.0004720019091901769, "loss": 0.2349, "step": 71862 }, { "epoch": 0.1274208119122785, "grad_norm": 0.61328125, "learning_rate": 0.00047195927638170917, "loss": 0.2126, "step": 71864 }, { "epoch": 0.1274243580775883, "grad_norm": 0.470703125, "learning_rate": 0.0004719166463199262, "loss": 0.2032, "step": 71866 }, { "epoch": 0.12742790424289813, "grad_norm": 0.6015625, "learning_rate": 0.0004718740190050157, "loss": 0.2745, "step": 71868 }, { "epoch": 0.12743145040820794, "grad_norm": 0.474609375, "learning_rate": 0.0004718313944371628, "loss": 0.2164, "step": 71870 }, { "epoch": 0.12743499657351776, "grad_norm": 0.86328125, "learning_rate": 0.0004717887726165547, "loss": 0.1861, "step": 71872 }, { "epoch": 0.1274385427388276, "grad_norm": 0.890625, "learning_rate": 0.00047174615354337777, "loss": 0.2016, "step": 71874 }, { "epoch": 0.1274420889041374, "grad_norm": 0.81640625, "learning_rate": 0.00047170353721781805, "loss": 0.1969, "step": 71876 }, { "epoch": 0.12744563506944723, "grad_norm": 0.87890625, "learning_rate": 0.0004716609236400623, "loss": 0.2098, "step": 71878 }, { "epoch": 0.12744918123475704, "grad_norm": 1.9609375, "learning_rate": 0.00047161831281029626, "loss": 0.6401, "step": 71880 }, { "epoch": 0.12745272740006686, "grad_norm": 0.25, "learning_rate": 0.000471575704728707, "loss": 0.1628, "step": 71882 }, { "epoch": 0.12745627356537667, "grad_norm": 1.3515625, "learning_rate": 0.00047153309939548056, "loss": 0.3446, "step": 71884 }, { "epoch": 0.12745981973068649, "grad_norm": 1.453125, "learning_rate": 0.00047149049681080335, "loss": 0.2131, "step": 71886 }, { "epoch": 0.1274633658959963, "grad_norm": 0.21875, "learning_rate": 0.00047144789697486136, "loss": 0.1851, "step": 71888 }, { "epoch": 0.12746691206130611, "grad_norm": 0.875, "learning_rate": 0.0004714052998878414, "loss": 0.1483, "step": 71890 }, { "epoch": 0.12747045822661593, "grad_norm": 0.5703125, "learning_rate": 0.0004713627055499295, "loss": 0.414, "step": 71892 }, { "epoch": 0.12747400439192574, "grad_norm": 0.5, "learning_rate": 0.000471320113961312, "loss": 0.1492, "step": 71894 }, { "epoch": 0.12747755055723556, "grad_norm": 0.4765625, "learning_rate": 0.0004712775251221751, "loss": 0.2297, "step": 71896 }, { "epoch": 0.12748109672254537, "grad_norm": 1.328125, "learning_rate": 0.00047123493903270485, "loss": 0.22, "step": 71898 }, { "epoch": 0.1274846428878552, "grad_norm": 0.578125, "learning_rate": 0.00047119235569308837, "loss": 0.1628, "step": 71900 }, { "epoch": 0.127488189053165, "grad_norm": 1.8515625, "learning_rate": 0.0004711497751035106, "loss": 0.5648, "step": 71902 }, { "epoch": 0.12749173521847482, "grad_norm": 3.125, "learning_rate": 0.00047110719726415874, "loss": 0.219, "step": 71904 }, { "epoch": 0.12749528138378463, "grad_norm": 0.6640625, "learning_rate": 0.0004710646221752188, "loss": 0.1844, "step": 71906 }, { "epoch": 0.12749882754909445, "grad_norm": 0.373046875, "learning_rate": 0.0004710220498368765, "loss": 0.2087, "step": 71908 }, { "epoch": 0.12750237371440426, "grad_norm": 0.1845703125, "learning_rate": 0.00047097948024931894, "loss": 0.1633, "step": 71910 }, { "epoch": 0.12750591987971407, "grad_norm": 1.1328125, "learning_rate": 0.0004709369134127313, "loss": 0.2084, "step": 71912 }, { "epoch": 0.1275094660450239, "grad_norm": 0.52734375, "learning_rate": 0.0004708943493273003, "loss": 0.1726, "step": 71914 }, { "epoch": 0.1275130122103337, "grad_norm": 1.0703125, "learning_rate": 0.0004708517879932118, "loss": 0.1475, "step": 71916 }, { "epoch": 0.12751655837564352, "grad_norm": 1.640625, "learning_rate": 0.00047080922941065273, "loss": 0.2906, "step": 71918 }, { "epoch": 0.12752010454095333, "grad_norm": 0.287109375, "learning_rate": 0.000470766673579808, "loss": 0.1824, "step": 71920 }, { "epoch": 0.12752365070626315, "grad_norm": 0.6015625, "learning_rate": 0.0004707241205008647, "loss": 0.1777, "step": 71922 }, { "epoch": 0.12752719687157296, "grad_norm": 2.484375, "learning_rate": 0.0004706815701740085, "loss": 0.2184, "step": 71924 }, { "epoch": 0.12753074303688278, "grad_norm": 0.31640625, "learning_rate": 0.0004706390225994256, "loss": 0.1876, "step": 71926 }, { "epoch": 0.1275342892021926, "grad_norm": 0.314453125, "learning_rate": 0.00047059647777730203, "loss": 0.1757, "step": 71928 }, { "epoch": 0.1275378353675024, "grad_norm": 0.2041015625, "learning_rate": 0.0004705539357078235, "loss": 0.1656, "step": 71930 }, { "epoch": 0.12754138153281222, "grad_norm": 0.328125, "learning_rate": 0.0004705113963911768, "loss": 0.1934, "step": 71932 }, { "epoch": 0.12754492769812203, "grad_norm": 0.4609375, "learning_rate": 0.00047046885982754763, "loss": 0.211, "step": 71934 }, { "epoch": 0.12754847386343185, "grad_norm": 0.396484375, "learning_rate": 0.00047042632601712185, "loss": 0.2314, "step": 71936 }, { "epoch": 0.12755202002874166, "grad_norm": 0.291015625, "learning_rate": 0.00047038379496008544, "loss": 0.1749, "step": 71938 }, { "epoch": 0.12755556619405148, "grad_norm": 1.2421875, "learning_rate": 0.00047034126665662494, "loss": 0.224, "step": 71940 }, { "epoch": 0.1275591123593613, "grad_norm": 0.69140625, "learning_rate": 0.00047029874110692584, "loss": 0.1855, "step": 71942 }, { "epoch": 0.1275626585246711, "grad_norm": 0.42578125, "learning_rate": 0.0004702562183111744, "loss": 0.1568, "step": 71944 }, { "epoch": 0.12756620468998092, "grad_norm": 0.56640625, "learning_rate": 0.00047021369826955636, "loss": 0.1805, "step": 71946 }, { "epoch": 0.12756975085529074, "grad_norm": 1.6015625, "learning_rate": 0.0004701711809822576, "loss": 0.2427, "step": 71948 }, { "epoch": 0.12757329702060055, "grad_norm": 0.33203125, "learning_rate": 0.00047012866644946473, "loss": 0.1717, "step": 71950 }, { "epoch": 0.12757684318591037, "grad_norm": 0.99609375, "learning_rate": 0.00047008615467136267, "loss": 0.1653, "step": 71952 }, { "epoch": 0.12758038935122018, "grad_norm": 0.6171875, "learning_rate": 0.0004700436456481381, "loss": 0.1699, "step": 71954 }, { "epoch": 0.12758393551653, "grad_norm": 0.1962890625, "learning_rate": 0.0004700011393799765, "loss": 0.2857, "step": 71956 }, { "epoch": 0.1275874816818398, "grad_norm": 2.765625, "learning_rate": 0.0004699586358670645, "loss": 0.2477, "step": 71958 }, { "epoch": 0.12759102784714962, "grad_norm": 0.1484375, "learning_rate": 0.00046991613510958684, "loss": 0.1809, "step": 71960 }, { "epoch": 0.12759457401245944, "grad_norm": 0.98828125, "learning_rate": 0.0004698736371077303, "loss": 0.2924, "step": 71962 }, { "epoch": 0.12759812017776928, "grad_norm": 1.234375, "learning_rate": 0.0004698311418616806, "loss": 0.2046, "step": 71964 }, { "epoch": 0.1276016663430791, "grad_norm": 1.0703125, "learning_rate": 0.00046978864937162327, "loss": 0.2239, "step": 71966 }, { "epoch": 0.1276052125083889, "grad_norm": 0.16015625, "learning_rate": 0.0004697461596377443, "loss": 0.1537, "step": 71968 }, { "epoch": 0.12760875867369872, "grad_norm": 0.89453125, "learning_rate": 0.00046970367266022936, "loss": 0.1629, "step": 71970 }, { "epoch": 0.12761230483900854, "grad_norm": 0.279296875, "learning_rate": 0.00046966118843926475, "loss": 0.1356, "step": 71972 }, { "epoch": 0.12761585100431835, "grad_norm": 0.703125, "learning_rate": 0.0004696187069750359, "loss": 0.2098, "step": 71974 }, { "epoch": 0.12761939716962817, "grad_norm": 1.1796875, "learning_rate": 0.0004695762282677286, "loss": 0.1942, "step": 71976 }, { "epoch": 0.12762294333493798, "grad_norm": 0.3671875, "learning_rate": 0.0004695337523175285, "loss": 0.1642, "step": 71978 }, { "epoch": 0.1276264895002478, "grad_norm": 0.55078125, "learning_rate": 0.00046949127912462174, "loss": 0.1924, "step": 71980 }, { "epoch": 0.1276300356655576, "grad_norm": 0.64453125, "learning_rate": 0.00046944880868919387, "loss": 0.1745, "step": 71982 }, { "epoch": 0.12763358183086743, "grad_norm": 0.59765625, "learning_rate": 0.00046940634101143075, "loss": 0.2288, "step": 71984 }, { "epoch": 0.12763712799617724, "grad_norm": 1.234375, "learning_rate": 0.00046936387609151795, "loss": 0.1773, "step": 71986 }, { "epoch": 0.12764067416148706, "grad_norm": 0.37109375, "learning_rate": 0.0004693214139296409, "loss": 0.1692, "step": 71988 }, { "epoch": 0.12764422032679687, "grad_norm": 0.625, "learning_rate": 0.00046927895452598603, "loss": 0.2418, "step": 71990 }, { "epoch": 0.12764776649210668, "grad_norm": 0.7421875, "learning_rate": 0.0004692364978807386, "loss": 0.1891, "step": 71992 }, { "epoch": 0.1276513126574165, "grad_norm": 0.2431640625, "learning_rate": 0.0004691940439940843, "loss": 0.1313, "step": 71994 }, { "epoch": 0.1276548588227263, "grad_norm": 0.26953125, "learning_rate": 0.0004691515928662086, "loss": 0.237, "step": 71996 }, { "epoch": 0.12765840498803613, "grad_norm": 0.3515625, "learning_rate": 0.00046910914449729764, "loss": 0.1933, "step": 71998 }, { "epoch": 0.12766195115334594, "grad_norm": 0.30078125, "learning_rate": 0.0004690666988875369, "loss": 0.1775, "step": 72000 }, { "epoch": 0.12766549731865576, "grad_norm": 0.625, "learning_rate": 0.0004690242560371118, "loss": 0.1737, "step": 72002 }, { "epoch": 0.12766904348396557, "grad_norm": 0.33203125, "learning_rate": 0.0004689818159462082, "loss": 0.1663, "step": 72004 }, { "epoch": 0.1276725896492754, "grad_norm": 0.80859375, "learning_rate": 0.00046893937861501123, "loss": 0.2136, "step": 72006 }, { "epoch": 0.1276761358145852, "grad_norm": 0.58984375, "learning_rate": 0.0004688969440437074, "loss": 0.1609, "step": 72008 }, { "epoch": 0.12767968197989502, "grad_norm": 1.09375, "learning_rate": 0.00046885451223248125, "loss": 0.1326, "step": 72010 }, { "epoch": 0.12768322814520483, "grad_norm": 0.26171875, "learning_rate": 0.0004688120831815192, "loss": 0.1525, "step": 72012 }, { "epoch": 0.12768677431051464, "grad_norm": 0.212890625, "learning_rate": 0.00046876965689100606, "loss": 0.1327, "step": 72014 }, { "epoch": 0.12769032047582446, "grad_norm": 0.2578125, "learning_rate": 0.00046872723336112844, "loss": 0.1656, "step": 72016 }, { "epoch": 0.12769386664113427, "grad_norm": 0.296875, "learning_rate": 0.0004686848125920706, "loss": 0.1574, "step": 72018 }, { "epoch": 0.1276974128064441, "grad_norm": 0.251953125, "learning_rate": 0.0004686423945840189, "loss": 0.1301, "step": 72020 }, { "epoch": 0.1277009589717539, "grad_norm": 0.333984375, "learning_rate": 0.0004685999793371588, "loss": 0.2113, "step": 72022 }, { "epoch": 0.12770450513706372, "grad_norm": 0.4296875, "learning_rate": 0.0004685575668516755, "loss": 0.1489, "step": 72024 }, { "epoch": 0.12770805130237353, "grad_norm": 0.83203125, "learning_rate": 0.0004685151571277547, "loss": 0.1901, "step": 72026 }, { "epoch": 0.12771159746768335, "grad_norm": 2.828125, "learning_rate": 0.0004684727501655814, "loss": 0.2956, "step": 72028 }, { "epoch": 0.12771514363299316, "grad_norm": 0.75, "learning_rate": 0.00046843034596534176, "loss": 0.1438, "step": 72030 }, { "epoch": 0.12771868979830298, "grad_norm": 0.2216796875, "learning_rate": 0.00046838794452722094, "loss": 0.1997, "step": 72032 }, { "epoch": 0.1277222359636128, "grad_norm": 0.57421875, "learning_rate": 0.0004683455458514044, "loss": 0.2108, "step": 72034 }, { "epoch": 0.1277257821289226, "grad_norm": 0.296875, "learning_rate": 0.0004683031499380772, "loss": 0.173, "step": 72036 }, { "epoch": 0.12772932829423242, "grad_norm": 0.2333984375, "learning_rate": 0.0004682607567874254, "loss": 0.1521, "step": 72038 }, { "epoch": 0.12773287445954223, "grad_norm": 0.37890625, "learning_rate": 0.0004682183663996341, "loss": 0.1366, "step": 72040 }, { "epoch": 0.12773642062485205, "grad_norm": 0.384765625, "learning_rate": 0.00046817597877488876, "loss": 0.2248, "step": 72042 }, { "epoch": 0.12773996679016186, "grad_norm": 0.2353515625, "learning_rate": 0.0004681335939133745, "loss": 0.2077, "step": 72044 }, { "epoch": 0.12774351295547168, "grad_norm": 0.52734375, "learning_rate": 0.00046809121181527676, "loss": 0.1825, "step": 72046 }, { "epoch": 0.1277470591207815, "grad_norm": 0.1796875, "learning_rate": 0.0004680488324807815, "loss": 0.1622, "step": 72048 }, { "epoch": 0.1277506052860913, "grad_norm": 0.57421875, "learning_rate": 0.000468006455910073, "loss": 0.1798, "step": 72050 }, { "epoch": 0.12775415145140112, "grad_norm": 0.296875, "learning_rate": 0.00046796408210333733, "loss": 0.1714, "step": 72052 }, { "epoch": 0.12775769761671094, "grad_norm": 1.265625, "learning_rate": 0.00046792171106075953, "loss": 0.1466, "step": 72054 }, { "epoch": 0.12776124378202078, "grad_norm": 0.3125, "learning_rate": 0.00046787934278252555, "loss": 0.2269, "step": 72056 }, { "epoch": 0.1277647899473306, "grad_norm": 0.75, "learning_rate": 0.0004678369772688195, "loss": 0.1467, "step": 72058 }, { "epoch": 0.1277683361126404, "grad_norm": 0.8359375, "learning_rate": 0.00046779461451982766, "loss": 0.2507, "step": 72060 }, { "epoch": 0.12777188227795022, "grad_norm": 0.357421875, "learning_rate": 0.0004677522545357348, "loss": 0.1642, "step": 72062 }, { "epoch": 0.12777542844326004, "grad_norm": 1.171875, "learning_rate": 0.0004677098973167265, "loss": 0.1886, "step": 72064 }, { "epoch": 0.12777897460856985, "grad_norm": 0.51953125, "learning_rate": 0.00046766754286298775, "loss": 0.2129, "step": 72066 }, { "epoch": 0.12778252077387967, "grad_norm": 0.27734375, "learning_rate": 0.0004676251911747036, "loss": 0.1416, "step": 72068 }, { "epoch": 0.12778606693918948, "grad_norm": 0.5078125, "learning_rate": 0.00046758284225205984, "loss": 0.1549, "step": 72070 }, { "epoch": 0.1277896131044993, "grad_norm": 0.296875, "learning_rate": 0.000467540496095241, "loss": 0.1708, "step": 72072 }, { "epoch": 0.1277931592698091, "grad_norm": 1.1171875, "learning_rate": 0.00046749815270443336, "loss": 0.1993, "step": 72074 }, { "epoch": 0.12779670543511892, "grad_norm": 0.287109375, "learning_rate": 0.0004674558120798208, "loss": 0.2045, "step": 72076 }, { "epoch": 0.12780025160042874, "grad_norm": 2.90625, "learning_rate": 0.0004674134742215893, "loss": 0.3275, "step": 72078 }, { "epoch": 0.12780379776573855, "grad_norm": 0.1962890625, "learning_rate": 0.0004673711391299238, "loss": 0.1593, "step": 72080 }, { "epoch": 0.12780734393104837, "grad_norm": 0.265625, "learning_rate": 0.00046732880680500956, "loss": 0.1456, "step": 72082 }, { "epoch": 0.12781089009635818, "grad_norm": 0.36328125, "learning_rate": 0.0004672864772470315, "loss": 0.2561, "step": 72084 }, { "epoch": 0.127814436261668, "grad_norm": 0.33984375, "learning_rate": 0.00046724415045617477, "loss": 0.2061, "step": 72086 }, { "epoch": 0.1278179824269778, "grad_norm": 0.7734375, "learning_rate": 0.00046720182643262474, "loss": 0.1673, "step": 72088 }, { "epoch": 0.12782152859228763, "grad_norm": 0.498046875, "learning_rate": 0.0004671595051765663, "loss": 0.1258, "step": 72090 }, { "epoch": 0.12782507475759744, "grad_norm": 0.26171875, "learning_rate": 0.0004671171866881847, "loss": 0.1413, "step": 72092 }, { "epoch": 0.12782862092290725, "grad_norm": 0.5859375, "learning_rate": 0.0004670748709676649, "loss": 0.1694, "step": 72094 }, { "epoch": 0.12783216708821707, "grad_norm": 0.8984375, "learning_rate": 0.00046703255801519155, "loss": 0.2253, "step": 72096 }, { "epoch": 0.12783571325352688, "grad_norm": 0.224609375, "learning_rate": 0.0004669902478309508, "loss": 0.2575, "step": 72098 }, { "epoch": 0.1278392594188367, "grad_norm": 0.431640625, "learning_rate": 0.00046694794041512647, "loss": 0.2046, "step": 72100 }, { "epoch": 0.1278428055841465, "grad_norm": 0.478515625, "learning_rate": 0.0004669056357679043, "loss": 0.1681, "step": 72102 }, { "epoch": 0.12784635174945633, "grad_norm": 0.349609375, "learning_rate": 0.0004668633338894689, "loss": 0.1797, "step": 72104 }, { "epoch": 0.12784989791476614, "grad_norm": 0.15625, "learning_rate": 0.0004668210347800061, "loss": 0.13, "step": 72106 }, { "epoch": 0.12785344408007596, "grad_norm": 0.578125, "learning_rate": 0.00046677873843969974, "loss": 0.235, "step": 72108 }, { "epoch": 0.12785699024538577, "grad_norm": 0.47265625, "learning_rate": 0.00046673644486873564, "loss": 0.2067, "step": 72110 }, { "epoch": 0.12786053641069559, "grad_norm": 0.96484375, "learning_rate": 0.00046669415406729844, "loss": 0.2, "step": 72112 }, { "epoch": 0.1278640825760054, "grad_norm": 0.546875, "learning_rate": 0.00046665186603557317, "loss": 0.1898, "step": 72114 }, { "epoch": 0.12786762874131521, "grad_norm": 0.50390625, "learning_rate": 0.00046660958077374473, "loss": 0.1846, "step": 72116 }, { "epoch": 0.12787117490662503, "grad_norm": 1.8671875, "learning_rate": 0.00046656729828199777, "loss": 0.2008, "step": 72118 }, { "epoch": 0.12787472107193484, "grad_norm": 0.263671875, "learning_rate": 0.00046652501856051784, "loss": 0.164, "step": 72120 }, { "epoch": 0.12787826723724466, "grad_norm": 0.298828125, "learning_rate": 0.0004664827416094895, "loss": 0.3535, "step": 72122 }, { "epoch": 0.12788181340255447, "grad_norm": 6.09375, "learning_rate": 0.0004664404674290976, "loss": 0.2318, "step": 72124 }, { "epoch": 0.1278853595678643, "grad_norm": 0.275390625, "learning_rate": 0.00046639819601952687, "loss": 0.1986, "step": 72126 }, { "epoch": 0.1278889057331741, "grad_norm": 4.59375, "learning_rate": 0.0004663559273809626, "loss": 0.2263, "step": 72128 }, { "epoch": 0.12789245189848392, "grad_norm": 0.88671875, "learning_rate": 0.0004663136615135895, "loss": 0.1846, "step": 72130 }, { "epoch": 0.12789599806379373, "grad_norm": 0.31640625, "learning_rate": 0.00046627139841759236, "loss": 0.1718, "step": 72132 }, { "epoch": 0.12789954422910355, "grad_norm": 0.1669921875, "learning_rate": 0.00046622913809315597, "loss": 0.0991, "step": 72134 }, { "epoch": 0.12790309039441336, "grad_norm": 0.59375, "learning_rate": 0.0004661868805404649, "loss": 0.1947, "step": 72136 }, { "epoch": 0.12790663655972317, "grad_norm": 0.388671875, "learning_rate": 0.00046614462575970484, "loss": 0.1542, "step": 72138 }, { "epoch": 0.127910182725033, "grad_norm": 0.349609375, "learning_rate": 0.00046610237375105954, "loss": 0.1244, "step": 72140 }, { "epoch": 0.1279137288903428, "grad_norm": 0.61328125, "learning_rate": 0.00046606012451471437, "loss": 0.2198, "step": 72142 }, { "epoch": 0.12791727505565262, "grad_norm": 0.330078125, "learning_rate": 0.00046601787805085375, "loss": 0.2504, "step": 72144 }, { "epoch": 0.12792082122096246, "grad_norm": 0.279296875, "learning_rate": 0.0004659756343596633, "loss": 0.1689, "step": 72146 }, { "epoch": 0.12792436738627228, "grad_norm": 0.33203125, "learning_rate": 0.0004659333934413264, "loss": 0.1731, "step": 72148 }, { "epoch": 0.1279279135515821, "grad_norm": 0.96484375, "learning_rate": 0.000465891155296029, "loss": 0.2111, "step": 72150 }, { "epoch": 0.1279314597168919, "grad_norm": 0.55078125, "learning_rate": 0.0004658489199239553, "loss": 0.1837, "step": 72152 }, { "epoch": 0.12793500588220172, "grad_norm": 1.1875, "learning_rate": 0.00046580668732528976, "loss": 0.2035, "step": 72154 }, { "epoch": 0.12793855204751153, "grad_norm": 0.416015625, "learning_rate": 0.0004657644575002179, "loss": 0.1681, "step": 72156 }, { "epoch": 0.12794209821282135, "grad_norm": 0.396484375, "learning_rate": 0.00046572223044892336, "loss": 0.2667, "step": 72158 }, { "epoch": 0.12794564437813116, "grad_norm": 1.2109375, "learning_rate": 0.00046568000617159167, "loss": 0.1904, "step": 72160 }, { "epoch": 0.12794919054344098, "grad_norm": 2.6875, "learning_rate": 0.0004656377846684069, "loss": 0.309, "step": 72162 }, { "epoch": 0.1279527367087508, "grad_norm": 0.375, "learning_rate": 0.0004655955659395545, "loss": 0.1669, "step": 72164 }, { "epoch": 0.1279562828740606, "grad_norm": 0.435546875, "learning_rate": 0.00046555334998521817, "loss": 0.204, "step": 72166 }, { "epoch": 0.12795982903937042, "grad_norm": 0.345703125, "learning_rate": 0.00046551113680558297, "loss": 0.1503, "step": 72168 }, { "epoch": 0.12796337520468024, "grad_norm": 1.1796875, "learning_rate": 0.0004654689264008337, "loss": 0.2914, "step": 72170 }, { "epoch": 0.12796692136999005, "grad_norm": 0.275390625, "learning_rate": 0.0004654267187711548, "loss": 0.1505, "step": 72172 }, { "epoch": 0.12797046753529986, "grad_norm": 0.39453125, "learning_rate": 0.0004653845139167307, "loss": 0.1678, "step": 72174 }, { "epoch": 0.12797401370060968, "grad_norm": 0.59375, "learning_rate": 0.00046534231183774583, "loss": 0.2048, "step": 72176 }, { "epoch": 0.1279775598659195, "grad_norm": 0.416015625, "learning_rate": 0.0004653001125343854, "loss": 0.2177, "step": 72178 }, { "epoch": 0.1279811060312293, "grad_norm": 2.59375, "learning_rate": 0.00046525791600683354, "loss": 0.255, "step": 72180 }, { "epoch": 0.12798465219653912, "grad_norm": 0.333984375, "learning_rate": 0.00046521572225527484, "loss": 0.2191, "step": 72182 }, { "epoch": 0.12798819836184894, "grad_norm": 0.265625, "learning_rate": 0.0004651735312798936, "loss": 0.1061, "step": 72184 }, { "epoch": 0.12799174452715875, "grad_norm": 1.265625, "learning_rate": 0.00046513134308087483, "loss": 0.1943, "step": 72186 }, { "epoch": 0.12799529069246857, "grad_norm": 0.65625, "learning_rate": 0.00046508915765840295, "loss": 0.1219, "step": 72188 }, { "epoch": 0.12799883685777838, "grad_norm": 1.1796875, "learning_rate": 0.00046504697501266207, "loss": 0.1704, "step": 72190 }, { "epoch": 0.1280023830230882, "grad_norm": 0.59765625, "learning_rate": 0.000465004795143837, "loss": 0.1864, "step": 72192 }, { "epoch": 0.128005929188398, "grad_norm": 17.75, "learning_rate": 0.0004649626180521118, "loss": 0.3325, "step": 72194 }, { "epoch": 0.12800947535370782, "grad_norm": 0.515625, "learning_rate": 0.0004649204437376719, "loss": 0.2173, "step": 72196 }, { "epoch": 0.12801302151901764, "grad_norm": 0.4609375, "learning_rate": 0.0004648782722007004, "loss": 0.4204, "step": 72198 }, { "epoch": 0.12801656768432745, "grad_norm": 0.2255859375, "learning_rate": 0.0004648361034413827, "loss": 0.1083, "step": 72200 }, { "epoch": 0.12802011384963727, "grad_norm": 0.37890625, "learning_rate": 0.00046479393745990264, "loss": 0.1867, "step": 72202 }, { "epoch": 0.12802366001494708, "grad_norm": 0.66796875, "learning_rate": 0.0004647517742564456, "loss": 0.2648, "step": 72204 }, { "epoch": 0.1280272061802569, "grad_norm": 0.1318359375, "learning_rate": 0.00046470961383119464, "loss": 0.1284, "step": 72206 }, { "epoch": 0.1280307523455667, "grad_norm": 0.6171875, "learning_rate": 0.0004646674561843351, "loss": 0.1718, "step": 72208 }, { "epoch": 0.12803429851087653, "grad_norm": 0.341796875, "learning_rate": 0.00046462530131605113, "loss": 0.1617, "step": 72210 }, { "epoch": 0.12803784467618634, "grad_norm": 0.310546875, "learning_rate": 0.000464583149226527, "loss": 0.1718, "step": 72212 }, { "epoch": 0.12804139084149616, "grad_norm": 0.73046875, "learning_rate": 0.000464540999915947, "loss": 0.2244, "step": 72214 }, { "epoch": 0.12804493700680597, "grad_norm": 0.443359375, "learning_rate": 0.0004644988533844954, "loss": 0.1542, "step": 72216 }, { "epoch": 0.12804848317211578, "grad_norm": 0.34765625, "learning_rate": 0.0004644567096323568, "loss": 0.17, "step": 72218 }, { "epoch": 0.1280520293374256, "grad_norm": 0.60546875, "learning_rate": 0.00046441456865971547, "loss": 0.145, "step": 72220 }, { "epoch": 0.1280555755027354, "grad_norm": 0.2080078125, "learning_rate": 0.0004643724304667556, "loss": 0.1644, "step": 72222 }, { "epoch": 0.12805912166804523, "grad_norm": 0.37890625, "learning_rate": 0.0004643302950536613, "loss": 0.174, "step": 72224 }, { "epoch": 0.12806266783335504, "grad_norm": 0.419921875, "learning_rate": 0.0004642881624206173, "loss": 0.1829, "step": 72226 }, { "epoch": 0.12806621399866486, "grad_norm": 2.28125, "learning_rate": 0.00046424603256780753, "loss": 0.3299, "step": 72228 }, { "epoch": 0.12806976016397467, "grad_norm": 0.283203125, "learning_rate": 0.0004642039054954163, "loss": 0.1844, "step": 72230 }, { "epoch": 0.1280733063292845, "grad_norm": 0.291015625, "learning_rate": 0.0004641617812036279, "loss": 0.1392, "step": 72232 }, { "epoch": 0.1280768524945943, "grad_norm": 0.275390625, "learning_rate": 0.0004641196596926264, "loss": 0.2021, "step": 72234 }, { "epoch": 0.12808039865990412, "grad_norm": 0.45703125, "learning_rate": 0.00046407754096259657, "loss": 0.1756, "step": 72236 }, { "epoch": 0.12808394482521396, "grad_norm": 0.353515625, "learning_rate": 0.0004640354250137217, "loss": 0.1742, "step": 72238 }, { "epoch": 0.12808749099052377, "grad_norm": 1.5, "learning_rate": 0.0004639933118461866, "loss": 0.3512, "step": 72240 }, { "epoch": 0.1280910371558336, "grad_norm": 0.33984375, "learning_rate": 0.0004639512014601753, "loss": 0.1757, "step": 72242 }, { "epoch": 0.1280945833211434, "grad_norm": 0.447265625, "learning_rate": 0.000463909093855872, "loss": 0.3428, "step": 72244 }, { "epoch": 0.12809812948645322, "grad_norm": 0.32421875, "learning_rate": 0.000463866989033461, "loss": 0.1839, "step": 72246 }, { "epoch": 0.12810167565176303, "grad_norm": 0.23046875, "learning_rate": 0.00046382488699312624, "loss": 0.1713, "step": 72248 }, { "epoch": 0.12810522181707285, "grad_norm": 0.412109375, "learning_rate": 0.00046378278773505187, "loss": 0.1301, "step": 72250 }, { "epoch": 0.12810876798238266, "grad_norm": 0.5390625, "learning_rate": 0.00046374069125942175, "loss": 0.1835, "step": 72252 }, { "epoch": 0.12811231414769247, "grad_norm": 1.3125, "learning_rate": 0.0004636985975664208, "loss": 0.3197, "step": 72254 }, { "epoch": 0.1281158603130023, "grad_norm": 0.27734375, "learning_rate": 0.00046365650665623206, "loss": 0.1587, "step": 72256 }, { "epoch": 0.1281194064783121, "grad_norm": 0.41796875, "learning_rate": 0.0004636144185290404, "loss": 0.2143, "step": 72258 }, { "epoch": 0.12812295264362192, "grad_norm": 0.5234375, "learning_rate": 0.00046357233318502963, "loss": 0.1619, "step": 72260 }, { "epoch": 0.12812649880893173, "grad_norm": 0.96875, "learning_rate": 0.0004635302506243838, "loss": 0.1639, "step": 72262 }, { "epoch": 0.12813004497424155, "grad_norm": 0.298828125, "learning_rate": 0.0004634881708472869, "loss": 0.1539, "step": 72264 }, { "epoch": 0.12813359113955136, "grad_norm": 0.396484375, "learning_rate": 0.00046344609385392274, "loss": 0.1712, "step": 72266 }, { "epoch": 0.12813713730486118, "grad_norm": 0.1845703125, "learning_rate": 0.0004634040196444759, "loss": 0.1316, "step": 72268 }, { "epoch": 0.128140683470171, "grad_norm": 0.4921875, "learning_rate": 0.0004633619482191301, "loss": 0.1986, "step": 72270 }, { "epoch": 0.1281442296354808, "grad_norm": 0.2890625, "learning_rate": 0.00046331987957806936, "loss": 0.1383, "step": 72272 }, { "epoch": 0.12814777580079062, "grad_norm": 0.32421875, "learning_rate": 0.0004632778137214773, "loss": 0.1313, "step": 72274 }, { "epoch": 0.12815132196610043, "grad_norm": 0.6953125, "learning_rate": 0.00046323575064953856, "loss": 0.2303, "step": 72276 }, { "epoch": 0.12815486813141025, "grad_norm": 0.287109375, "learning_rate": 0.00046319369036243674, "loss": 0.182, "step": 72278 }, { "epoch": 0.12815841429672006, "grad_norm": 0.6015625, "learning_rate": 0.0004631516328603559, "loss": 0.1506, "step": 72280 }, { "epoch": 0.12816196046202988, "grad_norm": 1.5703125, "learning_rate": 0.0004631095781434798, "loss": 0.2186, "step": 72282 }, { "epoch": 0.1281655066273397, "grad_norm": 0.244140625, "learning_rate": 0.0004630675262119922, "loss": 0.2179, "step": 72284 }, { "epoch": 0.1281690527926495, "grad_norm": 0.6796875, "learning_rate": 0.00046302547706607777, "loss": 0.2981, "step": 72286 }, { "epoch": 0.12817259895795932, "grad_norm": 0.447265625, "learning_rate": 0.0004629834307059194, "loss": 0.1918, "step": 72288 }, { "epoch": 0.12817614512326914, "grad_norm": 0.27734375, "learning_rate": 0.00046294138713170183, "loss": 0.1576, "step": 72290 }, { "epoch": 0.12817969128857895, "grad_norm": 0.92578125, "learning_rate": 0.0004628993463436082, "loss": 0.1958, "step": 72292 }, { "epoch": 0.12818323745388877, "grad_norm": 0.2470703125, "learning_rate": 0.0004628573083418233, "loss": 0.1451, "step": 72294 }, { "epoch": 0.12818678361919858, "grad_norm": 0.73046875, "learning_rate": 0.00046281527312652996, "loss": 0.2046, "step": 72296 }, { "epoch": 0.1281903297845084, "grad_norm": 0.46875, "learning_rate": 0.00046277324069791276, "loss": 0.1399, "step": 72298 }, { "epoch": 0.1281938759498182, "grad_norm": 0.419921875, "learning_rate": 0.00046273121105615527, "loss": 0.1724, "step": 72300 }, { "epoch": 0.12819742211512802, "grad_norm": 0.35546875, "learning_rate": 0.0004626891842014412, "loss": 0.1958, "step": 72302 }, { "epoch": 0.12820096828043784, "grad_norm": 0.8984375, "learning_rate": 0.0004626471601339546, "loss": 0.3648, "step": 72304 }, { "epoch": 0.12820451444574765, "grad_norm": 0.5078125, "learning_rate": 0.0004626051388538787, "loss": 0.1757, "step": 72306 }, { "epoch": 0.12820806061105747, "grad_norm": 0.306640625, "learning_rate": 0.00046256312036139797, "loss": 0.337, "step": 72308 }, { "epoch": 0.12821160677636728, "grad_norm": 1.703125, "learning_rate": 0.00046252110465669593, "loss": 0.229, "step": 72310 }, { "epoch": 0.1282151529416771, "grad_norm": 0.2294921875, "learning_rate": 0.0004624790917399563, "loss": 0.1419, "step": 72312 }, { "epoch": 0.1282186991069869, "grad_norm": 0.337890625, "learning_rate": 0.0004624370816113624, "loss": 0.1745, "step": 72314 }, { "epoch": 0.12822224527229673, "grad_norm": 1.6875, "learning_rate": 0.00046239507427109876, "loss": 0.2318, "step": 72316 }, { "epoch": 0.12822579143760654, "grad_norm": 0.2265625, "learning_rate": 0.0004623530697193487, "loss": 0.153, "step": 72318 }, { "epoch": 0.12822933760291635, "grad_norm": 0.369140625, "learning_rate": 0.00046231106795629594, "loss": 0.2452, "step": 72320 }, { "epoch": 0.12823288376822617, "grad_norm": 0.92578125, "learning_rate": 0.0004622690689821241, "loss": 0.2509, "step": 72322 }, { "epoch": 0.12823642993353598, "grad_norm": 0.1357421875, "learning_rate": 0.00046222707279701663, "loss": 0.146, "step": 72324 }, { "epoch": 0.1282399760988458, "grad_norm": 0.82421875, "learning_rate": 0.0004621850794011578, "loss": 0.1408, "step": 72326 }, { "epoch": 0.12824352226415564, "grad_norm": 0.1748046875, "learning_rate": 0.00046214308879473094, "loss": 0.1339, "step": 72328 }, { "epoch": 0.12824706842946546, "grad_norm": 0.51171875, "learning_rate": 0.0004621011009779197, "loss": 0.191, "step": 72330 }, { "epoch": 0.12825061459477527, "grad_norm": 0.63671875, "learning_rate": 0.00046205911595090737, "loss": 0.3501, "step": 72332 }, { "epoch": 0.12825416076008508, "grad_norm": 0.30078125, "learning_rate": 0.0004620171337138782, "loss": 0.1635, "step": 72334 }, { "epoch": 0.1282577069253949, "grad_norm": 0.2421875, "learning_rate": 0.00046197515426701555, "loss": 0.1783, "step": 72336 }, { "epoch": 0.1282612530907047, "grad_norm": 0.5234375, "learning_rate": 0.00046193317761050295, "loss": 0.2018, "step": 72338 }, { "epoch": 0.12826479925601453, "grad_norm": 0.328125, "learning_rate": 0.00046189120374452405, "loss": 0.1128, "step": 72340 }, { "epoch": 0.12826834542132434, "grad_norm": 0.326171875, "learning_rate": 0.00046184923266926197, "loss": 0.1374, "step": 72342 }, { "epoch": 0.12827189158663416, "grad_norm": 0.1962890625, "learning_rate": 0.00046180726438490135, "loss": 0.2081, "step": 72344 }, { "epoch": 0.12827543775194397, "grad_norm": 0.404296875, "learning_rate": 0.00046176529889162444, "loss": 0.2371, "step": 72346 }, { "epoch": 0.12827898391725379, "grad_norm": 1.3125, "learning_rate": 0.0004617233361896155, "loss": 0.2531, "step": 72348 }, { "epoch": 0.1282825300825636, "grad_norm": 0.314453125, "learning_rate": 0.0004616813762790579, "loss": 0.1651, "step": 72350 }, { "epoch": 0.12828607624787342, "grad_norm": 0.95703125, "learning_rate": 0.00046163941916013565, "loss": 0.127, "step": 72352 }, { "epoch": 0.12828962241318323, "grad_norm": 0.68359375, "learning_rate": 0.00046159746483303123, "loss": 0.1667, "step": 72354 }, { "epoch": 0.12829316857849304, "grad_norm": 0.73046875, "learning_rate": 0.0004615555132979289, "loss": 0.1669, "step": 72356 }, { "epoch": 0.12829671474380286, "grad_norm": 0.5234375, "learning_rate": 0.000461513564555012, "loss": 0.1838, "step": 72358 }, { "epoch": 0.12830026090911267, "grad_norm": 0.2578125, "learning_rate": 0.0004614716186044637, "loss": 0.4004, "step": 72360 }, { "epoch": 0.1283038070744225, "grad_norm": 0.5078125, "learning_rate": 0.00046142967544646787, "loss": 0.1616, "step": 72362 }, { "epoch": 0.1283073532397323, "grad_norm": 0.205078125, "learning_rate": 0.0004613877350812073, "loss": 0.2295, "step": 72364 }, { "epoch": 0.12831089940504212, "grad_norm": 0.361328125, "learning_rate": 0.00046134579750886607, "loss": 0.2203, "step": 72366 }, { "epoch": 0.12831444557035193, "grad_norm": 1.6015625, "learning_rate": 0.00046130386272962736, "loss": 0.355, "step": 72368 }, { "epoch": 0.12831799173566175, "grad_norm": 0.259765625, "learning_rate": 0.0004612619307436747, "loss": 0.1363, "step": 72370 }, { "epoch": 0.12832153790097156, "grad_norm": 2.234375, "learning_rate": 0.00046122000155119096, "loss": 0.2131, "step": 72372 }, { "epoch": 0.12832508406628138, "grad_norm": 0.50390625, "learning_rate": 0.00046117807515236016, "loss": 0.1747, "step": 72374 }, { "epoch": 0.1283286302315912, "grad_norm": 0.6015625, "learning_rate": 0.00046113615154736543, "loss": 0.1872, "step": 72376 }, { "epoch": 0.128332176396901, "grad_norm": 0.349609375, "learning_rate": 0.00046109423073638996, "loss": 0.2356, "step": 72378 }, { "epoch": 0.12833572256221082, "grad_norm": 1.671875, "learning_rate": 0.0004610523127196173, "loss": 0.1297, "step": 72380 }, { "epoch": 0.12833926872752063, "grad_norm": 0.427734375, "learning_rate": 0.0004610103974972303, "loss": 0.1685, "step": 72382 }, { "epoch": 0.12834281489283045, "grad_norm": 1.0390625, "learning_rate": 0.0004609684850694134, "loss": 0.1622, "step": 72384 }, { "epoch": 0.12834636105814026, "grad_norm": 0.408203125, "learning_rate": 0.0004609265754363485, "loss": 0.1718, "step": 72386 }, { "epoch": 0.12834990722345008, "grad_norm": 0.3125, "learning_rate": 0.0004608846685982198, "loss": 0.1767, "step": 72388 }, { "epoch": 0.1283534533887599, "grad_norm": 0.208984375, "learning_rate": 0.00046084276455521, "loss": 0.1333, "step": 72390 }, { "epoch": 0.1283569995540697, "grad_norm": 2.25, "learning_rate": 0.00046080086330750326, "loss": 0.3095, "step": 72392 }, { "epoch": 0.12836054571937952, "grad_norm": 0.474609375, "learning_rate": 0.00046075896485528174, "loss": 0.1601, "step": 72394 }, { "epoch": 0.12836409188468934, "grad_norm": 0.380859375, "learning_rate": 0.00046071706919872946, "loss": 0.166, "step": 72396 }, { "epoch": 0.12836763804999915, "grad_norm": 0.40625, "learning_rate": 0.0004606751763380293, "loss": 0.1685, "step": 72398 }, { "epoch": 0.12837118421530896, "grad_norm": 0.39453125, "learning_rate": 0.00046063328627336467, "loss": 0.1471, "step": 72400 }, { "epoch": 0.12837473038061878, "grad_norm": 0.609375, "learning_rate": 0.00046059139900491856, "loss": 0.1589, "step": 72402 }, { "epoch": 0.1283782765459286, "grad_norm": 0.9453125, "learning_rate": 0.00046054951453287414, "loss": 0.3479, "step": 72404 }, { "epoch": 0.1283818227112384, "grad_norm": 0.4375, "learning_rate": 0.00046050763285741483, "loss": 0.1499, "step": 72406 }, { "epoch": 0.12838536887654822, "grad_norm": 0.3828125, "learning_rate": 0.00046046575397872344, "loss": 0.2465, "step": 72408 }, { "epoch": 0.12838891504185804, "grad_norm": 0.76953125, "learning_rate": 0.00046042387789698385, "loss": 0.316, "step": 72410 }, { "epoch": 0.12839246120716785, "grad_norm": 1.171875, "learning_rate": 0.00046038200461237825, "loss": 0.1042, "step": 72412 }, { "epoch": 0.12839600737247767, "grad_norm": 0.26171875, "learning_rate": 0.0004603401341250905, "loss": 0.1829, "step": 72414 }, { "epoch": 0.12839955353778748, "grad_norm": 0.53515625, "learning_rate": 0.0004602982664353034, "loss": 0.177, "step": 72416 }, { "epoch": 0.12840309970309732, "grad_norm": 1.5703125, "learning_rate": 0.0004602564015432001, "loss": 0.246, "step": 72418 }, { "epoch": 0.12840664586840714, "grad_norm": 3.296875, "learning_rate": 0.0004602145394489637, "loss": 0.3841, "step": 72420 }, { "epoch": 0.12841019203371695, "grad_norm": 0.453125, "learning_rate": 0.00046017268015277705, "loss": 0.1891, "step": 72422 }, { "epoch": 0.12841373819902677, "grad_norm": 1.6484375, "learning_rate": 0.0004601308236548236, "loss": 0.2923, "step": 72424 }, { "epoch": 0.12841728436433658, "grad_norm": 0.69140625, "learning_rate": 0.0004600889699552864, "loss": 0.166, "step": 72426 }, { "epoch": 0.1284208305296464, "grad_norm": 0.484375, "learning_rate": 0.0004600471190543482, "loss": 0.233, "step": 72428 }, { "epoch": 0.1284243766949562, "grad_norm": 0.2431640625, "learning_rate": 0.00046000527095219193, "loss": 0.1437, "step": 72430 }, { "epoch": 0.12842792286026602, "grad_norm": 1.25, "learning_rate": 0.0004599634256490012, "loss": 0.2702, "step": 72432 }, { "epoch": 0.12843146902557584, "grad_norm": 0.3984375, "learning_rate": 0.0004599215831449586, "loss": 0.1421, "step": 72434 }, { "epoch": 0.12843501519088565, "grad_norm": 0.5234375, "learning_rate": 0.0004598797434402472, "loss": 0.1295, "step": 72436 }, { "epoch": 0.12843856135619547, "grad_norm": 0.60546875, "learning_rate": 0.00045983790653505, "loss": 0.1926, "step": 72438 }, { "epoch": 0.12844210752150528, "grad_norm": 0.50390625, "learning_rate": 0.0004597960724295496, "loss": 0.2206, "step": 72440 }, { "epoch": 0.1284456536868151, "grad_norm": 0.54296875, "learning_rate": 0.00045975424112393, "loss": 0.156, "step": 72442 }, { "epoch": 0.1284491998521249, "grad_norm": 0.7890625, "learning_rate": 0.00045971241261837277, "loss": 0.1714, "step": 72444 }, { "epoch": 0.12845274601743473, "grad_norm": 1.5078125, "learning_rate": 0.0004596705869130619, "loss": 0.2397, "step": 72446 }, { "epoch": 0.12845629218274454, "grad_norm": 0.458984375, "learning_rate": 0.00045962876400817987, "loss": 0.1877, "step": 72448 }, { "epoch": 0.12845983834805436, "grad_norm": 0.318359375, "learning_rate": 0.0004595869439039096, "loss": 0.144, "step": 72450 }, { "epoch": 0.12846338451336417, "grad_norm": 0.279296875, "learning_rate": 0.0004595451266004342, "loss": 0.195, "step": 72452 }, { "epoch": 0.12846693067867399, "grad_norm": 0.37890625, "learning_rate": 0.0004595033120979359, "loss": 0.1777, "step": 72454 }, { "epoch": 0.1284704768439838, "grad_norm": 0.41015625, "learning_rate": 0.0004594615003965985, "loss": 0.2827, "step": 72456 }, { "epoch": 0.12847402300929361, "grad_norm": 0.35546875, "learning_rate": 0.00045941969149660447, "loss": 0.1573, "step": 72458 }, { "epoch": 0.12847756917460343, "grad_norm": 0.255859375, "learning_rate": 0.0004593778853981365, "loss": 0.1339, "step": 72460 }, { "epoch": 0.12848111533991324, "grad_norm": 0.25390625, "learning_rate": 0.0004593360821013773, "loss": 0.1426, "step": 72462 }, { "epoch": 0.12848466150522306, "grad_norm": 0.58203125, "learning_rate": 0.00045929428160651026, "loss": 0.2233, "step": 72464 }, { "epoch": 0.12848820767053287, "grad_norm": 0.2197265625, "learning_rate": 0.0004592524839137178, "loss": 0.1535, "step": 72466 }, { "epoch": 0.1284917538358427, "grad_norm": 0.310546875, "learning_rate": 0.00045921068902318285, "loss": 0.176, "step": 72468 }, { "epoch": 0.1284953000011525, "grad_norm": 3.796875, "learning_rate": 0.0004591688969350881, "loss": 0.2596, "step": 72470 }, { "epoch": 0.12849884616646232, "grad_norm": 0.345703125, "learning_rate": 0.00045912710764961604, "loss": 0.1613, "step": 72472 }, { "epoch": 0.12850239233177213, "grad_norm": 0.388671875, "learning_rate": 0.0004590853211669503, "loss": 0.1343, "step": 72474 }, { "epoch": 0.12850593849708195, "grad_norm": 0.52734375, "learning_rate": 0.00045904353748727264, "loss": 0.2157, "step": 72476 }, { "epoch": 0.12850948466239176, "grad_norm": 0.2275390625, "learning_rate": 0.0004590017566107664, "loss": 0.3143, "step": 72478 }, { "epoch": 0.12851303082770157, "grad_norm": 2.640625, "learning_rate": 0.00045895997853761383, "loss": 0.3353, "step": 72480 }, { "epoch": 0.1285165769930114, "grad_norm": 0.54296875, "learning_rate": 0.0004589182032679987, "loss": 0.1758, "step": 72482 }, { "epoch": 0.1285201231583212, "grad_norm": 1.1015625, "learning_rate": 0.0004588764308021022, "loss": 0.2305, "step": 72484 }, { "epoch": 0.12852366932363102, "grad_norm": 0.388671875, "learning_rate": 0.00045883466114010813, "loss": 0.1564, "step": 72486 }, { "epoch": 0.12852721548894083, "grad_norm": 5.5625, "learning_rate": 0.0004587928942821989, "loss": 0.2475, "step": 72488 }, { "epoch": 0.12853076165425065, "grad_norm": 0.5625, "learning_rate": 0.00045875113022855664, "loss": 0.135, "step": 72490 }, { "epoch": 0.12853430781956046, "grad_norm": 0.376953125, "learning_rate": 0.0004587093689793652, "loss": 0.1625, "step": 72492 }, { "epoch": 0.12853785398487028, "grad_norm": 0.796875, "learning_rate": 0.00045866761053480577, "loss": 0.2078, "step": 72494 }, { "epoch": 0.1285414001501801, "grad_norm": 0.62109375, "learning_rate": 0.000458625854895062, "loss": 0.153, "step": 72496 }, { "epoch": 0.1285449463154899, "grad_norm": 0.1943359375, "learning_rate": 0.0004585841020603159, "loss": 0.1972, "step": 72498 }, { "epoch": 0.12854849248079972, "grad_norm": 0.263671875, "learning_rate": 0.000458542352030751, "loss": 0.1526, "step": 72500 }, { "epoch": 0.12855203864610953, "grad_norm": 0.49609375, "learning_rate": 0.00045850060480654857, "loss": 0.1962, "step": 72502 }, { "epoch": 0.12855558481141935, "grad_norm": 0.29296875, "learning_rate": 0.0004584588603878923, "loss": 0.1888, "step": 72504 }, { "epoch": 0.12855913097672916, "grad_norm": 0.33203125, "learning_rate": 0.0004584171187749643, "loss": 0.1843, "step": 72506 }, { "epoch": 0.12856267714203898, "grad_norm": 0.3828125, "learning_rate": 0.00045837537996794704, "loss": 0.2734, "step": 72508 }, { "epoch": 0.12856622330734882, "grad_norm": 0.189453125, "learning_rate": 0.0004583336439670232, "loss": 0.2493, "step": 72510 }, { "epoch": 0.12856976947265863, "grad_norm": 0.609375, "learning_rate": 0.00045829191077237505, "loss": 0.2086, "step": 72512 }, { "epoch": 0.12857331563796845, "grad_norm": 1.390625, "learning_rate": 0.00045825018038418556, "loss": 0.1882, "step": 72514 }, { "epoch": 0.12857686180327826, "grad_norm": 0.341796875, "learning_rate": 0.00045820845280263706, "loss": 0.182, "step": 72516 }, { "epoch": 0.12858040796858808, "grad_norm": 0.3671875, "learning_rate": 0.0004581667280279119, "loss": 0.1635, "step": 72518 }, { "epoch": 0.1285839541338979, "grad_norm": 1.109375, "learning_rate": 0.0004581250060601925, "loss": 0.2374, "step": 72520 }, { "epoch": 0.1285875002992077, "grad_norm": 0.322265625, "learning_rate": 0.00045808328689966155, "loss": 0.1838, "step": 72522 }, { "epoch": 0.12859104646451752, "grad_norm": 0.201171875, "learning_rate": 0.0004580415705465016, "loss": 0.1805, "step": 72524 }, { "epoch": 0.12859459262982734, "grad_norm": 0.376953125, "learning_rate": 0.0004579998570008949, "loss": 0.1976, "step": 72526 }, { "epoch": 0.12859813879513715, "grad_norm": 1.40625, "learning_rate": 0.0004579581462630239, "loss": 0.2431, "step": 72528 }, { "epoch": 0.12860168496044697, "grad_norm": 0.25390625, "learning_rate": 0.0004579164383330708, "loss": 0.2025, "step": 72530 }, { "epoch": 0.12860523112575678, "grad_norm": 0.3828125, "learning_rate": 0.0004578747332112187, "loss": 0.132, "step": 72532 }, { "epoch": 0.1286087772910666, "grad_norm": 0.52734375, "learning_rate": 0.00045783303089764907, "loss": 0.1462, "step": 72534 }, { "epoch": 0.1286123234563764, "grad_norm": 0.66796875, "learning_rate": 0.0004577913313925449, "loss": 0.226, "step": 72536 }, { "epoch": 0.12861586962168622, "grad_norm": 1.5859375, "learning_rate": 0.00045774963469608814, "loss": 0.1539, "step": 72538 }, { "epoch": 0.12861941578699604, "grad_norm": 2.5625, "learning_rate": 0.00045770794080846213, "loss": 0.21, "step": 72540 }, { "epoch": 0.12862296195230585, "grad_norm": 1.8828125, "learning_rate": 0.0004576662497298478, "loss": 0.2257, "step": 72542 }, { "epoch": 0.12862650811761567, "grad_norm": 1.8359375, "learning_rate": 0.00045762456146042845, "loss": 0.2115, "step": 72544 }, { "epoch": 0.12863005428292548, "grad_norm": 0.625, "learning_rate": 0.00045758287600038616, "loss": 0.2425, "step": 72546 }, { "epoch": 0.1286336004482353, "grad_norm": 0.298828125, "learning_rate": 0.0004575411933499033, "loss": 0.1833, "step": 72548 }, { "epoch": 0.1286371466135451, "grad_norm": 0.3046875, "learning_rate": 0.00045749951350916205, "loss": 0.2473, "step": 72550 }, { "epoch": 0.12864069277885493, "grad_norm": 0.515625, "learning_rate": 0.0004574578364783443, "loss": 0.1964, "step": 72552 }, { "epoch": 0.12864423894416474, "grad_norm": 0.41796875, "learning_rate": 0.0004574161622576331, "loss": 0.2343, "step": 72554 }, { "epoch": 0.12864778510947456, "grad_norm": 1.484375, "learning_rate": 0.0004573744908472104, "loss": 0.3852, "step": 72556 }, { "epoch": 0.12865133127478437, "grad_norm": 0.25390625, "learning_rate": 0.0004573328222472583, "loss": 0.1646, "step": 72558 }, { "epoch": 0.12865487744009418, "grad_norm": 1.765625, "learning_rate": 0.0004572911564579589, "loss": 0.2014, "step": 72560 }, { "epoch": 0.128658423605404, "grad_norm": 7.125, "learning_rate": 0.00045724949347949496, "loss": 0.1577, "step": 72562 }, { "epoch": 0.1286619697707138, "grad_norm": 0.3515625, "learning_rate": 0.00045720783331204833, "loss": 0.2283, "step": 72564 }, { "epoch": 0.12866551593602363, "grad_norm": 0.91796875, "learning_rate": 0.00045716617595580137, "loss": 0.1673, "step": 72566 }, { "epoch": 0.12866906210133344, "grad_norm": 0.26953125, "learning_rate": 0.00045712452141093607, "loss": 0.2284, "step": 72568 }, { "epoch": 0.12867260826664326, "grad_norm": 0.46484375, "learning_rate": 0.0004570828696776344, "loss": 0.1968, "step": 72570 }, { "epoch": 0.12867615443195307, "grad_norm": 0.65625, "learning_rate": 0.00045704122075607937, "loss": 0.1498, "step": 72572 }, { "epoch": 0.12867970059726289, "grad_norm": 0.412109375, "learning_rate": 0.000456999574646452, "loss": 0.192, "step": 72574 }, { "epoch": 0.1286832467625727, "grad_norm": 0.7265625, "learning_rate": 0.00045695793134893533, "loss": 0.1566, "step": 72576 }, { "epoch": 0.12868679292788252, "grad_norm": 0.3203125, "learning_rate": 0.0004569162908637108, "loss": 0.1955, "step": 72578 }, { "epoch": 0.12869033909319233, "grad_norm": 1.5625, "learning_rate": 0.00045687465319096113, "loss": 0.2425, "step": 72580 }, { "epoch": 0.12869388525850214, "grad_norm": 0.80078125, "learning_rate": 0.00045683301833086815, "loss": 0.162, "step": 72582 }, { "epoch": 0.12869743142381196, "grad_norm": 0.40625, "learning_rate": 0.00045679138628361405, "loss": 0.1301, "step": 72584 }, { "epoch": 0.12870097758912177, "grad_norm": 1.0625, "learning_rate": 0.00045674975704938076, "loss": 0.2534, "step": 72586 }, { "epoch": 0.1287045237544316, "grad_norm": 0.1591796875, "learning_rate": 0.00045670813062835005, "loss": 0.1317, "step": 72588 }, { "epoch": 0.1287080699197414, "grad_norm": 0.72265625, "learning_rate": 0.00045666650702070493, "loss": 0.1694, "step": 72590 }, { "epoch": 0.12871161608505122, "grad_norm": 0.296875, "learning_rate": 0.00045662488622662625, "loss": 0.1632, "step": 72592 }, { "epoch": 0.12871516225036103, "grad_norm": 0.515625, "learning_rate": 0.00045658326824629685, "loss": 0.2013, "step": 72594 }, { "epoch": 0.12871870841567085, "grad_norm": 0.58203125, "learning_rate": 0.0004565416530798983, "loss": 0.1718, "step": 72596 }, { "epoch": 0.12872225458098066, "grad_norm": 0.76171875, "learning_rate": 0.00045650004072761327, "loss": 0.1702, "step": 72598 }, { "epoch": 0.1287258007462905, "grad_norm": 0.5, "learning_rate": 0.0004564584311896227, "loss": 0.1995, "step": 72600 }, { "epoch": 0.12872934691160032, "grad_norm": 0.306640625, "learning_rate": 0.0004564168244661095, "loss": 0.2253, "step": 72602 }, { "epoch": 0.12873289307691013, "grad_norm": 1.2890625, "learning_rate": 0.0004563752205572552, "loss": 0.3481, "step": 72604 }, { "epoch": 0.12873643924221995, "grad_norm": 0.3125, "learning_rate": 0.0004563336194632419, "loss": 0.2127, "step": 72606 }, { "epoch": 0.12873998540752976, "grad_norm": 0.52734375, "learning_rate": 0.00045629202118425136, "loss": 0.1853, "step": 72608 }, { "epoch": 0.12874353157283958, "grad_norm": 0.33203125, "learning_rate": 0.0004562504257204654, "loss": 0.18, "step": 72610 }, { "epoch": 0.1287470777381494, "grad_norm": 0.2080078125, "learning_rate": 0.0004562088330720664, "loss": 0.2107, "step": 72612 }, { "epoch": 0.1287506239034592, "grad_norm": 0.6953125, "learning_rate": 0.00045616724323923604, "loss": 0.1545, "step": 72614 }, { "epoch": 0.12875417006876902, "grad_norm": 0.42578125, "learning_rate": 0.00045612565622215614, "loss": 0.242, "step": 72616 }, { "epoch": 0.12875771623407883, "grad_norm": 0.255859375, "learning_rate": 0.0004560840720210086, "loss": 0.1544, "step": 72618 }, { "epoch": 0.12876126239938865, "grad_norm": 0.412109375, "learning_rate": 0.000456042490635975, "loss": 0.1711, "step": 72620 }, { "epoch": 0.12876480856469846, "grad_norm": 0.2080078125, "learning_rate": 0.0004560009120672381, "loss": 0.1727, "step": 72622 }, { "epoch": 0.12876835473000828, "grad_norm": 0.365234375, "learning_rate": 0.0004559593363149786, "loss": 0.1533, "step": 72624 }, { "epoch": 0.1287719008953181, "grad_norm": 0.390625, "learning_rate": 0.00045591776337937904, "loss": 0.174, "step": 72626 }, { "epoch": 0.1287754470606279, "grad_norm": 0.45703125, "learning_rate": 0.00045587619326062086, "loss": 0.1677, "step": 72628 }, { "epoch": 0.12877899322593772, "grad_norm": 0.185546875, "learning_rate": 0.0004558346259588866, "loss": 0.1499, "step": 72630 }, { "epoch": 0.12878253939124754, "grad_norm": 1.2265625, "learning_rate": 0.00045579306147435693, "loss": 0.1703, "step": 72632 }, { "epoch": 0.12878608555655735, "grad_norm": 0.578125, "learning_rate": 0.00045575149980721445, "loss": 0.2049, "step": 72634 }, { "epoch": 0.12878963172186716, "grad_norm": 1.421875, "learning_rate": 0.0004557099409576408, "loss": 0.2261, "step": 72636 }, { "epoch": 0.12879317788717698, "grad_norm": 0.427734375, "learning_rate": 0.0004556683849258175, "loss": 0.1747, "step": 72638 }, { "epoch": 0.1287967240524868, "grad_norm": 0.6875, "learning_rate": 0.00045562683171192653, "loss": 0.3291, "step": 72640 }, { "epoch": 0.1288002702177966, "grad_norm": 0.29296875, "learning_rate": 0.00045558528131614914, "loss": 0.2565, "step": 72642 }, { "epoch": 0.12880381638310642, "grad_norm": 0.267578125, "learning_rate": 0.0004555437337386677, "loss": 0.212, "step": 72644 }, { "epoch": 0.12880736254841624, "grad_norm": 0.9921875, "learning_rate": 0.0004555021889796636, "loss": 0.1799, "step": 72646 }, { "epoch": 0.12881090871372605, "grad_norm": 0.447265625, "learning_rate": 0.00045546064703931864, "loss": 0.1962, "step": 72648 }, { "epoch": 0.12881445487903587, "grad_norm": 0.498046875, "learning_rate": 0.00045541910791781405, "loss": 0.234, "step": 72650 }, { "epoch": 0.12881800104434568, "grad_norm": 0.427734375, "learning_rate": 0.0004553775716153323, "loss": 0.1775, "step": 72652 }, { "epoch": 0.1288215472096555, "grad_norm": 0.2578125, "learning_rate": 0.0004553360381320544, "loss": 0.1414, "step": 72654 }, { "epoch": 0.1288250933749653, "grad_norm": 0.53125, "learning_rate": 0.00045529450746816235, "loss": 0.1582, "step": 72656 }, { "epoch": 0.12882863954027512, "grad_norm": 0.51171875, "learning_rate": 0.00045525297962383757, "loss": 0.1691, "step": 72658 }, { "epoch": 0.12883218570558494, "grad_norm": 0.4609375, "learning_rate": 0.00045521145459926146, "loss": 0.2564, "step": 72660 }, { "epoch": 0.12883573187089475, "grad_norm": 0.333984375, "learning_rate": 0.0004551699323946162, "loss": 0.2565, "step": 72662 }, { "epoch": 0.12883927803620457, "grad_norm": 2.40625, "learning_rate": 0.0004551284130100831, "loss": 0.2392, "step": 72664 }, { "epoch": 0.12884282420151438, "grad_norm": 0.38671875, "learning_rate": 0.0004550868964458437, "loss": 0.1663, "step": 72666 }, { "epoch": 0.1288463703668242, "grad_norm": 6.40625, "learning_rate": 0.00045504538270207945, "loss": 0.2671, "step": 72668 }, { "epoch": 0.128849916532134, "grad_norm": 0.486328125, "learning_rate": 0.0004550038717789722, "loss": 0.1706, "step": 72670 }, { "epoch": 0.12885346269744383, "grad_norm": 1.9296875, "learning_rate": 0.0004549623636767034, "loss": 0.2573, "step": 72672 }, { "epoch": 0.12885700886275364, "grad_norm": 0.271484375, "learning_rate": 0.0004549208583954545, "loss": 0.1982, "step": 72674 }, { "epoch": 0.12886055502806346, "grad_norm": 0.294921875, "learning_rate": 0.00045487935593540717, "loss": 0.1648, "step": 72676 }, { "epoch": 0.12886410119337327, "grad_norm": 0.353515625, "learning_rate": 0.0004548378562967425, "loss": 0.1871, "step": 72678 }, { "epoch": 0.12886764735868309, "grad_norm": 2.609375, "learning_rate": 0.00045479635947964276, "loss": 0.227, "step": 72680 }, { "epoch": 0.1288711935239929, "grad_norm": 0.306640625, "learning_rate": 0.00045475486548428843, "loss": 0.28, "step": 72682 }, { "epoch": 0.12887473968930271, "grad_norm": 0.294921875, "learning_rate": 0.00045471337431086186, "loss": 0.1844, "step": 72684 }, { "epoch": 0.12887828585461253, "grad_norm": 0.95703125, "learning_rate": 0.00045467188595954387, "loss": 0.1977, "step": 72686 }, { "epoch": 0.12888183201992234, "grad_norm": 0.322265625, "learning_rate": 0.00045463040043051674, "loss": 0.1451, "step": 72688 }, { "epoch": 0.12888537818523219, "grad_norm": 1.4765625, "learning_rate": 0.00045458891772396075, "loss": 0.3513, "step": 72690 }, { "epoch": 0.128888924350542, "grad_norm": 0.494140625, "learning_rate": 0.00045454743784005823, "loss": 0.2124, "step": 72692 }, { "epoch": 0.12889247051585181, "grad_norm": 0.39453125, "learning_rate": 0.00045450596077899034, "loss": 0.2006, "step": 72694 }, { "epoch": 0.12889601668116163, "grad_norm": 0.279296875, "learning_rate": 0.00045446448654093847, "loss": 0.2114, "step": 72696 }, { "epoch": 0.12889956284647144, "grad_norm": 3.9375, "learning_rate": 0.0004544230151260839, "loss": 0.1891, "step": 72698 }, { "epoch": 0.12890310901178126, "grad_norm": 0.6328125, "learning_rate": 0.00045438154653460784, "loss": 0.2225, "step": 72700 }, { "epoch": 0.12890665517709107, "grad_norm": 0.375, "learning_rate": 0.0004543400807666921, "loss": 0.1741, "step": 72702 }, { "epoch": 0.1289102013424009, "grad_norm": 0.314453125, "learning_rate": 0.00045429861782251776, "loss": 0.1872, "step": 72704 }, { "epoch": 0.1289137475077107, "grad_norm": 0.2578125, "learning_rate": 0.0004542571577022663, "loss": 0.1485, "step": 72706 }, { "epoch": 0.12891729367302052, "grad_norm": 3.46875, "learning_rate": 0.0004542157004061186, "loss": 0.4931, "step": 72708 }, { "epoch": 0.12892083983833033, "grad_norm": 0.4765625, "learning_rate": 0.00045417424593425664, "loss": 0.2652, "step": 72710 }, { "epoch": 0.12892438600364015, "grad_norm": 0.3671875, "learning_rate": 0.0004541327942868613, "loss": 0.195, "step": 72712 }, { "epoch": 0.12892793216894996, "grad_norm": 0.65234375, "learning_rate": 0.0004540913454641141, "loss": 0.1402, "step": 72714 }, { "epoch": 0.12893147833425977, "grad_norm": 0.365234375, "learning_rate": 0.000454049899466196, "loss": 0.2398, "step": 72716 }, { "epoch": 0.1289350244995696, "grad_norm": 0.306640625, "learning_rate": 0.0004540084562932883, "loss": 0.1518, "step": 72718 }, { "epoch": 0.1289385706648794, "grad_norm": 0.310546875, "learning_rate": 0.0004539670159455728, "loss": 0.1415, "step": 72720 }, { "epoch": 0.12894211683018922, "grad_norm": 0.478515625, "learning_rate": 0.00045392557842322977, "loss": 0.2021, "step": 72722 }, { "epoch": 0.12894566299549903, "grad_norm": 0.1845703125, "learning_rate": 0.0004538841437264414, "loss": 0.1928, "step": 72724 }, { "epoch": 0.12894920916080885, "grad_norm": 0.47265625, "learning_rate": 0.00045384271185538793, "loss": 0.2646, "step": 72726 }, { "epoch": 0.12895275532611866, "grad_norm": 0.353515625, "learning_rate": 0.00045380128281025193, "loss": 0.1712, "step": 72728 }, { "epoch": 0.12895630149142848, "grad_norm": 0.6328125, "learning_rate": 0.00045375985659121303, "loss": 0.1643, "step": 72730 }, { "epoch": 0.1289598476567383, "grad_norm": 0.7265625, "learning_rate": 0.00045371843319845354, "loss": 0.1575, "step": 72732 }, { "epoch": 0.1289633938220481, "grad_norm": 0.2314453125, "learning_rate": 0.00045367701263215415, "loss": 0.186, "step": 72734 }, { "epoch": 0.12896693998735792, "grad_norm": 0.369140625, "learning_rate": 0.00045363559489249613, "loss": 0.1319, "step": 72736 }, { "epoch": 0.12897048615266773, "grad_norm": 0.60546875, "learning_rate": 0.00045359417997966044, "loss": 0.1574, "step": 72738 }, { "epoch": 0.12897403231797755, "grad_norm": 0.76171875, "learning_rate": 0.00045355276789382814, "loss": 0.2207, "step": 72740 }, { "epoch": 0.12897757848328736, "grad_norm": 2.25, "learning_rate": 0.0004535113586351808, "loss": 0.6088, "step": 72742 }, { "epoch": 0.12898112464859718, "grad_norm": 0.52734375, "learning_rate": 0.000453469952203899, "loss": 0.2667, "step": 72744 }, { "epoch": 0.128984670813907, "grad_norm": 1.140625, "learning_rate": 0.0004534285486001646, "loss": 0.1765, "step": 72746 }, { "epoch": 0.1289882169792168, "grad_norm": 0.65234375, "learning_rate": 0.0004533871478241577, "loss": 0.1029, "step": 72748 }, { "epoch": 0.12899176314452662, "grad_norm": 0.69140625, "learning_rate": 0.0004533457498760599, "loss": 0.2361, "step": 72750 }, { "epoch": 0.12899530930983644, "grad_norm": 0.431640625, "learning_rate": 0.00045330435475605236, "loss": 0.1854, "step": 72752 }, { "epoch": 0.12899885547514625, "grad_norm": 0.330078125, "learning_rate": 0.0004532629624643159, "loss": 0.1889, "step": 72754 }, { "epoch": 0.12900240164045607, "grad_norm": 0.255859375, "learning_rate": 0.0004532215730010315, "loss": 0.1643, "step": 72756 }, { "epoch": 0.12900594780576588, "grad_norm": 0.55859375, "learning_rate": 0.0004531801863663799, "loss": 0.1512, "step": 72758 }, { "epoch": 0.1290094939710757, "grad_norm": 0.2060546875, "learning_rate": 0.00045313880256054283, "loss": 0.1441, "step": 72760 }, { "epoch": 0.1290130401363855, "grad_norm": 1.328125, "learning_rate": 0.00045309742158370085, "loss": 0.2871, "step": 72762 }, { "epoch": 0.12901658630169532, "grad_norm": 0.181640625, "learning_rate": 0.00045305604343603506, "loss": 0.1445, "step": 72764 }, { "epoch": 0.12902013246700514, "grad_norm": 0.404296875, "learning_rate": 0.0004530146681177261, "loss": 0.1885, "step": 72766 }, { "epoch": 0.12902367863231495, "grad_norm": 0.54296875, "learning_rate": 0.00045297329562895525, "loss": 0.1687, "step": 72768 }, { "epoch": 0.12902722479762477, "grad_norm": 1.7578125, "learning_rate": 0.0004529319259699035, "loss": 0.1618, "step": 72770 }, { "epoch": 0.12903077096293458, "grad_norm": 0.5703125, "learning_rate": 0.0004528905591407516, "loss": 0.1345, "step": 72772 }, { "epoch": 0.1290343171282444, "grad_norm": 0.21484375, "learning_rate": 0.00045284919514168055, "loss": 0.1839, "step": 72774 }, { "epoch": 0.1290378632935542, "grad_norm": 0.92578125, "learning_rate": 0.0004528078339728709, "loss": 0.1853, "step": 72776 }, { "epoch": 0.12904140945886403, "grad_norm": 0.404296875, "learning_rate": 0.00045276647563450445, "loss": 0.1589, "step": 72778 }, { "epoch": 0.12904495562417384, "grad_norm": 0.27734375, "learning_rate": 0.00045272512012676093, "loss": 0.1874, "step": 72780 }, { "epoch": 0.12904850178948368, "grad_norm": 0.1923828125, "learning_rate": 0.0004526837674498219, "loss": 0.1119, "step": 72782 }, { "epoch": 0.1290520479547935, "grad_norm": 1.6328125, "learning_rate": 0.00045264241760386827, "loss": 0.3411, "step": 72784 }, { "epoch": 0.1290555941201033, "grad_norm": 0.298828125, "learning_rate": 0.00045260107058908044, "loss": 0.1958, "step": 72786 }, { "epoch": 0.12905914028541313, "grad_norm": 0.875, "learning_rate": 0.00045255972640563936, "loss": 0.2303, "step": 72788 }, { "epoch": 0.12906268645072294, "grad_norm": 0.23828125, "learning_rate": 0.00045251838505372627, "loss": 0.1741, "step": 72790 }, { "epoch": 0.12906623261603276, "grad_norm": 0.41796875, "learning_rate": 0.0004524770465335215, "loss": 0.1607, "step": 72792 }, { "epoch": 0.12906977878134257, "grad_norm": 0.265625, "learning_rate": 0.0004524357108452061, "loss": 0.1519, "step": 72794 }, { "epoch": 0.12907332494665238, "grad_norm": 0.212890625, "learning_rate": 0.0004523943779889608, "loss": 0.218, "step": 72796 }, { "epoch": 0.1290768711119622, "grad_norm": 0.462890625, "learning_rate": 0.00045235304796496594, "loss": 0.2396, "step": 72798 }, { "epoch": 0.129080417277272, "grad_norm": 0.5546875, "learning_rate": 0.00045231172077340293, "loss": 0.1961, "step": 72800 }, { "epoch": 0.12908396344258183, "grad_norm": 0.455078125, "learning_rate": 0.00045227039641445227, "loss": 0.1993, "step": 72802 }, { "epoch": 0.12908750960789164, "grad_norm": 3.1875, "learning_rate": 0.00045222907488829474, "loss": 0.172, "step": 72804 }, { "epoch": 0.12909105577320146, "grad_norm": 0.2421875, "learning_rate": 0.0004521877561951108, "loss": 0.1545, "step": 72806 }, { "epoch": 0.12909460193851127, "grad_norm": 0.8828125, "learning_rate": 0.00045214644033508123, "loss": 0.1917, "step": 72808 }, { "epoch": 0.1290981481038211, "grad_norm": 0.25, "learning_rate": 0.00045210512730838715, "loss": 0.1514, "step": 72810 }, { "epoch": 0.1291016942691309, "grad_norm": 0.263671875, "learning_rate": 0.00045206381711520845, "loss": 0.1423, "step": 72812 }, { "epoch": 0.12910524043444072, "grad_norm": 0.431640625, "learning_rate": 0.0004520225097557265, "loss": 0.1735, "step": 72814 }, { "epoch": 0.12910878659975053, "grad_norm": 0.76953125, "learning_rate": 0.00045198120523012153, "loss": 0.2333, "step": 72816 }, { "epoch": 0.12911233276506034, "grad_norm": 0.76171875, "learning_rate": 0.00045193990353857485, "loss": 0.2348, "step": 72818 }, { "epoch": 0.12911587893037016, "grad_norm": 0.6640625, "learning_rate": 0.00045189860468126605, "loss": 0.1451, "step": 72820 }, { "epoch": 0.12911942509567997, "grad_norm": 0.6328125, "learning_rate": 0.0004518573086583765, "loss": 0.1991, "step": 72822 }, { "epoch": 0.1291229712609898, "grad_norm": 0.365234375, "learning_rate": 0.00045181601547008665, "loss": 0.1326, "step": 72824 }, { "epoch": 0.1291265174262996, "grad_norm": 0.66015625, "learning_rate": 0.00045177472511657676, "loss": 0.18, "step": 72826 }, { "epoch": 0.12913006359160942, "grad_norm": 0.287109375, "learning_rate": 0.0004517334375980282, "loss": 0.1589, "step": 72828 }, { "epoch": 0.12913360975691923, "grad_norm": 0.36328125, "learning_rate": 0.00045169215291462054, "loss": 0.1513, "step": 72830 }, { "epoch": 0.12913715592222905, "grad_norm": 0.55859375, "learning_rate": 0.000451650871066535, "loss": 0.1814, "step": 72832 }, { "epoch": 0.12914070208753886, "grad_norm": 0.67578125, "learning_rate": 0.00045160959205395177, "loss": 0.1612, "step": 72834 }, { "epoch": 0.12914424825284868, "grad_norm": 0.35546875, "learning_rate": 0.00045156831587705206, "loss": 0.1844, "step": 72836 }, { "epoch": 0.1291477944181585, "grad_norm": 0.69140625, "learning_rate": 0.0004515270425360153, "loss": 0.1871, "step": 72838 }, { "epoch": 0.1291513405834683, "grad_norm": 0.4609375, "learning_rate": 0.0004514857720310229, "loss": 0.2364, "step": 72840 }, { "epoch": 0.12915488674877812, "grad_norm": 0.859375, "learning_rate": 0.000451444504362255, "loss": 0.1579, "step": 72842 }, { "epoch": 0.12915843291408793, "grad_norm": 0.6953125, "learning_rate": 0.00045140323952989214, "loss": 0.1717, "step": 72844 }, { "epoch": 0.12916197907939775, "grad_norm": 0.255859375, "learning_rate": 0.00045136197753411475, "loss": 0.2149, "step": 72846 }, { "epoch": 0.12916552524470756, "grad_norm": 0.546875, "learning_rate": 0.000451320718375103, "loss": 0.1934, "step": 72848 }, { "epoch": 0.12916907141001738, "grad_norm": 1.21875, "learning_rate": 0.00045127946205303793, "loss": 0.2662, "step": 72850 }, { "epoch": 0.1291726175753272, "grad_norm": 0.640625, "learning_rate": 0.00045123820856809966, "loss": 0.1942, "step": 72852 }, { "epoch": 0.129176163740637, "grad_norm": 0.6015625, "learning_rate": 0.0004511969579204686, "loss": 0.2498, "step": 72854 }, { "epoch": 0.12917970990594682, "grad_norm": 0.48828125, "learning_rate": 0.000451155710110325, "loss": 0.1912, "step": 72856 }, { "epoch": 0.12918325607125664, "grad_norm": 0.62109375, "learning_rate": 0.0004511144651378496, "loss": 0.1751, "step": 72858 }, { "epoch": 0.12918680223656645, "grad_norm": 0.333984375, "learning_rate": 0.0004510732230032227, "loss": 0.2098, "step": 72860 }, { "epoch": 0.12919034840187626, "grad_norm": 0.37890625, "learning_rate": 0.00045103198370662447, "loss": 0.1409, "step": 72862 }, { "epoch": 0.12919389456718608, "grad_norm": 0.48828125, "learning_rate": 0.0004509907472482354, "loss": 0.1366, "step": 72864 }, { "epoch": 0.1291974407324959, "grad_norm": 0.419921875, "learning_rate": 0.0004509495136282356, "loss": 0.2032, "step": 72866 }, { "epoch": 0.1292009868978057, "grad_norm": 0.296875, "learning_rate": 0.00045090828284680617, "loss": 0.138, "step": 72868 }, { "epoch": 0.12920453306311552, "grad_norm": 0.404296875, "learning_rate": 0.00045086705490412625, "loss": 0.2991, "step": 72870 }, { "epoch": 0.12920807922842537, "grad_norm": 0.490234375, "learning_rate": 0.0004508258298003771, "loss": 0.1801, "step": 72872 }, { "epoch": 0.12921162539373518, "grad_norm": 0.37109375, "learning_rate": 0.00045078460753573843, "loss": 0.1843, "step": 72874 }, { "epoch": 0.129215171559045, "grad_norm": 0.51953125, "learning_rate": 0.00045074338811039124, "loss": 0.2825, "step": 72876 }, { "epoch": 0.1292187177243548, "grad_norm": 0.2373046875, "learning_rate": 0.0004507021715245148, "loss": 0.1506, "step": 72878 }, { "epoch": 0.12922226388966462, "grad_norm": 0.6640625, "learning_rate": 0.00045066095777829024, "loss": 0.1736, "step": 72880 }, { "epoch": 0.12922581005497444, "grad_norm": 0.4609375, "learning_rate": 0.00045061974687189753, "loss": 0.1703, "step": 72882 }, { "epoch": 0.12922935622028425, "grad_norm": 0.41015625, "learning_rate": 0.00045057853880551673, "loss": 0.1873, "step": 72884 }, { "epoch": 0.12923290238559407, "grad_norm": 2.578125, "learning_rate": 0.00045053733357932826, "loss": 0.268, "step": 72886 }, { "epoch": 0.12923644855090388, "grad_norm": 0.2041015625, "learning_rate": 0.00045049613119351183, "loss": 0.1459, "step": 72888 }, { "epoch": 0.1292399947162137, "grad_norm": 0.1669921875, "learning_rate": 0.0004504549316482484, "loss": 0.1466, "step": 72890 }, { "epoch": 0.1292435408815235, "grad_norm": 0.490234375, "learning_rate": 0.00045041373494371785, "loss": 0.2697, "step": 72892 }, { "epoch": 0.12924708704683333, "grad_norm": 1.4140625, "learning_rate": 0.0004503725410801002, "loss": 0.196, "step": 72894 }, { "epoch": 0.12925063321214314, "grad_norm": 0.2001953125, "learning_rate": 0.0004503313500575754, "loss": 0.1393, "step": 72896 }, { "epoch": 0.12925417937745295, "grad_norm": 0.388671875, "learning_rate": 0.00045029016187632436, "loss": 0.1782, "step": 72898 }, { "epoch": 0.12925772554276277, "grad_norm": 0.3515625, "learning_rate": 0.00045024897653652665, "loss": 0.1815, "step": 72900 }, { "epoch": 0.12926127170807258, "grad_norm": 0.4296875, "learning_rate": 0.0004502077940383625, "loss": 0.2039, "step": 72902 }, { "epoch": 0.1292648178733824, "grad_norm": 0.5546875, "learning_rate": 0.000450166614382012, "loss": 0.1896, "step": 72904 }, { "epoch": 0.1292683640386922, "grad_norm": 0.197265625, "learning_rate": 0.00045012543756765497, "loss": 0.1594, "step": 72906 }, { "epoch": 0.12927191020400203, "grad_norm": 1.515625, "learning_rate": 0.00045008426359547234, "loss": 0.2278, "step": 72908 }, { "epoch": 0.12927545636931184, "grad_norm": 0.333984375, "learning_rate": 0.0004500430924656432, "loss": 0.1638, "step": 72910 }, { "epoch": 0.12927900253462166, "grad_norm": 0.38671875, "learning_rate": 0.00045000192417834816, "loss": 0.2398, "step": 72912 }, { "epoch": 0.12928254869993147, "grad_norm": 0.380859375, "learning_rate": 0.00044996075873376685, "loss": 0.1898, "step": 72914 }, { "epoch": 0.12928609486524129, "grad_norm": 1.3359375, "learning_rate": 0.00044991959613208, "loss": 0.1831, "step": 72916 }, { "epoch": 0.1292896410305511, "grad_norm": 0.291015625, "learning_rate": 0.0004498784363734671, "loss": 0.2448, "step": 72918 }, { "epoch": 0.12929318719586091, "grad_norm": 0.94140625, "learning_rate": 0.0004498372794581083, "loss": 0.2123, "step": 72920 }, { "epoch": 0.12929673336117073, "grad_norm": 3.78125, "learning_rate": 0.0004497961253861838, "loss": 0.212, "step": 72922 }, { "epoch": 0.12930027952648054, "grad_norm": 0.291015625, "learning_rate": 0.0004497549741578728, "loss": 0.1757, "step": 72924 }, { "epoch": 0.12930382569179036, "grad_norm": 1.1015625, "learning_rate": 0.00044971382577335646, "loss": 0.2182, "step": 72926 }, { "epoch": 0.12930737185710017, "grad_norm": 0.40625, "learning_rate": 0.0004496726802328137, "loss": 0.1476, "step": 72928 }, { "epoch": 0.12931091802241, "grad_norm": 0.455078125, "learning_rate": 0.000449631537536425, "loss": 0.1813, "step": 72930 }, { "epoch": 0.1293144641877198, "grad_norm": 0.33203125, "learning_rate": 0.00044959039768437, "loss": 0.2096, "step": 72932 }, { "epoch": 0.12931801035302962, "grad_norm": 0.44921875, "learning_rate": 0.00044954926067682923, "loss": 0.3841, "step": 72934 }, { "epoch": 0.12932155651833943, "grad_norm": 0.53125, "learning_rate": 0.0004495081265139817, "loss": 0.1744, "step": 72936 }, { "epoch": 0.12932510268364925, "grad_norm": 0.39453125, "learning_rate": 0.00044946699519600805, "loss": 0.1293, "step": 72938 }, { "epoch": 0.12932864884895906, "grad_norm": 0.4375, "learning_rate": 0.0004494258667230878, "loss": 0.1226, "step": 72940 }, { "epoch": 0.12933219501426887, "grad_norm": 0.2275390625, "learning_rate": 0.0004493847410954011, "loss": 0.2338, "step": 72942 }, { "epoch": 0.1293357411795787, "grad_norm": 0.625, "learning_rate": 0.0004493436183131274, "loss": 0.1977, "step": 72944 }, { "epoch": 0.1293392873448885, "grad_norm": 0.2265625, "learning_rate": 0.00044930249837644667, "loss": 0.2259, "step": 72946 }, { "epoch": 0.12934283351019832, "grad_norm": 0.49609375, "learning_rate": 0.00044926138128553906, "loss": 0.1699, "step": 72948 }, { "epoch": 0.12934637967550813, "grad_norm": 0.435546875, "learning_rate": 0.00044922026704058416, "loss": 0.1461, "step": 72950 }, { "epoch": 0.12934992584081795, "grad_norm": 0.30859375, "learning_rate": 0.0004491791556417617, "loss": 0.2025, "step": 72952 }, { "epoch": 0.12935347200612776, "grad_norm": 0.26953125, "learning_rate": 0.00044913804708925147, "loss": 0.2304, "step": 72954 }, { "epoch": 0.12935701817143758, "grad_norm": 0.283203125, "learning_rate": 0.00044909694138323353, "loss": 0.1512, "step": 72956 }, { "epoch": 0.1293605643367474, "grad_norm": 0.19921875, "learning_rate": 0.00044905583852388755, "loss": 0.1275, "step": 72958 }, { "epoch": 0.1293641105020572, "grad_norm": 0.18359375, "learning_rate": 0.00044901473851139307, "loss": 0.155, "step": 72960 }, { "epoch": 0.12936765666736705, "grad_norm": 0.5703125, "learning_rate": 0.00044897364134593017, "loss": 0.2045, "step": 72962 }, { "epoch": 0.12937120283267686, "grad_norm": 0.50390625, "learning_rate": 0.00044893254702767807, "loss": 0.2497, "step": 72964 }, { "epoch": 0.12937474899798668, "grad_norm": 0.8515625, "learning_rate": 0.0004488914555568173, "loss": 0.2134, "step": 72966 }, { "epoch": 0.1293782951632965, "grad_norm": 0.302734375, "learning_rate": 0.0004488503669335267, "loss": 0.2062, "step": 72968 }, { "epoch": 0.1293818413286063, "grad_norm": 0.375, "learning_rate": 0.0004488092811579864, "loss": 0.172, "step": 72970 }, { "epoch": 0.12938538749391612, "grad_norm": 0.625, "learning_rate": 0.0004487681982303763, "loss": 0.1924, "step": 72972 }, { "epoch": 0.12938893365922594, "grad_norm": 0.1787109375, "learning_rate": 0.00044872711815087564, "loss": 0.1592, "step": 72974 }, { "epoch": 0.12939247982453575, "grad_norm": 0.8828125, "learning_rate": 0.00044868604091966426, "loss": 0.1709, "step": 72976 }, { "epoch": 0.12939602598984556, "grad_norm": 0.52734375, "learning_rate": 0.0004486449665369217, "loss": 0.2473, "step": 72978 }, { "epoch": 0.12939957215515538, "grad_norm": 0.7265625, "learning_rate": 0.0004486038950028278, "loss": 0.2231, "step": 72980 }, { "epoch": 0.1294031183204652, "grad_norm": 0.40625, "learning_rate": 0.0004485628263175622, "loss": 0.1966, "step": 72982 }, { "epoch": 0.129406664485775, "grad_norm": 0.416015625, "learning_rate": 0.00044852176048130435, "loss": 0.1716, "step": 72984 }, { "epoch": 0.12941021065108482, "grad_norm": 0.1865234375, "learning_rate": 0.00044848069749423366, "loss": 0.1633, "step": 72986 }, { "epoch": 0.12941375681639464, "grad_norm": 0.578125, "learning_rate": 0.00044843963735653015, "loss": 0.2303, "step": 72988 }, { "epoch": 0.12941730298170445, "grad_norm": 0.4375, "learning_rate": 0.0004483985800683733, "loss": 0.1419, "step": 72990 }, { "epoch": 0.12942084914701427, "grad_norm": 0.53125, "learning_rate": 0.0004483575256299425, "loss": 0.138, "step": 72992 }, { "epoch": 0.12942439531232408, "grad_norm": 0.306640625, "learning_rate": 0.00044831647404141727, "loss": 0.1666, "step": 72994 }, { "epoch": 0.1294279414776339, "grad_norm": 0.419921875, "learning_rate": 0.0004482754253029772, "loss": 0.1649, "step": 72996 }, { "epoch": 0.1294314876429437, "grad_norm": 0.40625, "learning_rate": 0.0004482343794148019, "loss": 0.1648, "step": 72998 }, { "epoch": 0.12943503380825352, "grad_norm": 0.24609375, "learning_rate": 0.000448193336377071, "loss": 0.1901, "step": 73000 }, { "epoch": 0.12943857997356334, "grad_norm": 1.4296875, "learning_rate": 0.00044815229618996367, "loss": 0.2126, "step": 73002 }, { "epoch": 0.12944212613887315, "grad_norm": 0.333984375, "learning_rate": 0.00044811125885365943, "loss": 0.2303, "step": 73004 }, { "epoch": 0.12944567230418297, "grad_norm": 0.412109375, "learning_rate": 0.0004480702243683381, "loss": 0.2002, "step": 73006 }, { "epoch": 0.12944921846949278, "grad_norm": 0.443359375, "learning_rate": 0.00044802919273417895, "loss": 0.1914, "step": 73008 }, { "epoch": 0.1294527646348026, "grad_norm": 0.31640625, "learning_rate": 0.00044798816395136135, "loss": 0.1556, "step": 73010 }, { "epoch": 0.1294563108001124, "grad_norm": 0.34765625, "learning_rate": 0.00044794713802006475, "loss": 0.2758, "step": 73012 }, { "epoch": 0.12945985696542223, "grad_norm": 0.259765625, "learning_rate": 0.00044790611494046847, "loss": 0.1574, "step": 73014 }, { "epoch": 0.12946340313073204, "grad_norm": 0.9609375, "learning_rate": 0.0004478650947127525, "loss": 0.215, "step": 73016 }, { "epoch": 0.12946694929604186, "grad_norm": 0.5859375, "learning_rate": 0.0004478240773370953, "loss": 0.162, "step": 73018 }, { "epoch": 0.12947049546135167, "grad_norm": 0.298828125, "learning_rate": 0.0004477830628136771, "loss": 0.1854, "step": 73020 }, { "epoch": 0.12947404162666148, "grad_norm": 0.4453125, "learning_rate": 0.0004477420511426765, "loss": 0.254, "step": 73022 }, { "epoch": 0.1294775877919713, "grad_norm": 0.71875, "learning_rate": 0.00044770104232427397, "loss": 0.1854, "step": 73024 }, { "epoch": 0.1294811339572811, "grad_norm": 0.390625, "learning_rate": 0.0004476600363586475, "loss": 0.2001, "step": 73026 }, { "epoch": 0.12948468012259093, "grad_norm": 1.25, "learning_rate": 0.00044761903324597754, "loss": 0.1499, "step": 73028 }, { "epoch": 0.12948822628790074, "grad_norm": 0.2080078125, "learning_rate": 0.00044757803298644286, "loss": 0.1961, "step": 73030 }, { "epoch": 0.12949177245321056, "grad_norm": 0.369140625, "learning_rate": 0.000447537035580223, "loss": 0.1481, "step": 73032 }, { "epoch": 0.12949531861852037, "grad_norm": 0.30859375, "learning_rate": 0.000447496041027497, "loss": 0.1598, "step": 73034 }, { "epoch": 0.1294988647838302, "grad_norm": 0.2265625, "learning_rate": 0.00044745504932844403, "loss": 0.1464, "step": 73036 }, { "epoch": 0.12950241094914, "grad_norm": 0.255859375, "learning_rate": 0.00044741406048324405, "loss": 0.1673, "step": 73038 }, { "epoch": 0.12950595711444982, "grad_norm": 0.298828125, "learning_rate": 0.0004473730744920758, "loss": 0.1899, "step": 73040 }, { "epoch": 0.12950950327975963, "grad_norm": 0.2041015625, "learning_rate": 0.0004473320913551187, "loss": 0.1758, "step": 73042 }, { "epoch": 0.12951304944506944, "grad_norm": 0.466796875, "learning_rate": 0.0004472911110725516, "loss": 0.1778, "step": 73044 }, { "epoch": 0.12951659561037926, "grad_norm": 2.1875, "learning_rate": 0.00044725013364455436, "loss": 0.2972, "step": 73046 }, { "epoch": 0.12952014177568907, "grad_norm": 0.5546875, "learning_rate": 0.0004472091590713059, "loss": 0.1517, "step": 73048 }, { "epoch": 0.1295236879409989, "grad_norm": 0.435546875, "learning_rate": 0.00044716818735298524, "loss": 0.158, "step": 73050 }, { "epoch": 0.1295272341063087, "grad_norm": 0.2265625, "learning_rate": 0.00044712721848977186, "loss": 0.1611, "step": 73052 }, { "epoch": 0.12953078027161855, "grad_norm": 0.375, "learning_rate": 0.00044708625248184443, "loss": 0.2059, "step": 73054 }, { "epoch": 0.12953432643692836, "grad_norm": 0.296875, "learning_rate": 0.0004470452893293832, "loss": 0.1713, "step": 73056 }, { "epoch": 0.12953787260223817, "grad_norm": 0.333984375, "learning_rate": 0.0004470043290325658, "loss": 0.1659, "step": 73058 }, { "epoch": 0.129541418767548, "grad_norm": 0.5625, "learning_rate": 0.00044696337159157254, "loss": 0.1809, "step": 73060 }, { "epoch": 0.1295449649328578, "grad_norm": 0.47265625, "learning_rate": 0.0004469224170065819, "loss": 0.1626, "step": 73062 }, { "epoch": 0.12954851109816762, "grad_norm": 1.5078125, "learning_rate": 0.00044688146527777377, "loss": 0.2087, "step": 73064 }, { "epoch": 0.12955205726347743, "grad_norm": 1.3359375, "learning_rate": 0.0004468405164053262, "loss": 0.184, "step": 73066 }, { "epoch": 0.12955560342878725, "grad_norm": 0.76953125, "learning_rate": 0.00044679957038941913, "loss": 0.2175, "step": 73068 }, { "epoch": 0.12955914959409706, "grad_norm": 0.322265625, "learning_rate": 0.0004467586272302312, "loss": 0.3681, "step": 73070 }, { "epoch": 0.12956269575940688, "grad_norm": 0.259765625, "learning_rate": 0.00044671768692794173, "loss": 0.1665, "step": 73072 }, { "epoch": 0.1295662419247167, "grad_norm": 1.515625, "learning_rate": 0.00044667674948272964, "loss": 0.1839, "step": 73074 }, { "epoch": 0.1295697880900265, "grad_norm": 7.5, "learning_rate": 0.00044663581489477354, "loss": 0.2284, "step": 73076 }, { "epoch": 0.12957333425533632, "grad_norm": 0.2734375, "learning_rate": 0.00044659488316425324, "loss": 0.1829, "step": 73078 }, { "epoch": 0.12957688042064613, "grad_norm": 0.58203125, "learning_rate": 0.000446553954291347, "loss": 0.1775, "step": 73080 }, { "epoch": 0.12958042658595595, "grad_norm": 0.93359375, "learning_rate": 0.00044651302827623487, "loss": 0.1801, "step": 73082 }, { "epoch": 0.12958397275126576, "grad_norm": 0.59765625, "learning_rate": 0.00044647210511909453, "loss": 0.1758, "step": 73084 }, { "epoch": 0.12958751891657558, "grad_norm": 0.1982421875, "learning_rate": 0.00044643118482010584, "loss": 0.2135, "step": 73086 }, { "epoch": 0.1295910650818854, "grad_norm": 0.412109375, "learning_rate": 0.00044639026737944764, "loss": 0.1667, "step": 73088 }, { "epoch": 0.1295946112471952, "grad_norm": 1.453125, "learning_rate": 0.0004463493527972987, "loss": 0.175, "step": 73090 }, { "epoch": 0.12959815741250502, "grad_norm": 0.59765625, "learning_rate": 0.0004463084410738379, "loss": 0.1777, "step": 73092 }, { "epoch": 0.12960170357781484, "grad_norm": 0.3671875, "learning_rate": 0.00044626753220924404, "loss": 0.1686, "step": 73094 }, { "epoch": 0.12960524974312465, "grad_norm": 0.6328125, "learning_rate": 0.00044622662620369657, "loss": 0.1881, "step": 73096 }, { "epoch": 0.12960879590843447, "grad_norm": 5.59375, "learning_rate": 0.0004461857230573741, "loss": 0.2456, "step": 73098 }, { "epoch": 0.12961234207374428, "grad_norm": 1.65625, "learning_rate": 0.00044614482277045554, "loss": 0.2187, "step": 73100 }, { "epoch": 0.1296158882390541, "grad_norm": 0.89453125, "learning_rate": 0.0004461039253431193, "loss": 0.1783, "step": 73102 }, { "epoch": 0.1296194344043639, "grad_norm": 0.4140625, "learning_rate": 0.000446063030775545, "loss": 0.1305, "step": 73104 }, { "epoch": 0.12962298056967372, "grad_norm": 0.287109375, "learning_rate": 0.00044602213906791123, "loss": 0.1734, "step": 73106 }, { "epoch": 0.12962652673498354, "grad_norm": 0.376953125, "learning_rate": 0.00044598125022039673, "loss": 0.1745, "step": 73108 }, { "epoch": 0.12963007290029335, "grad_norm": 0.59375, "learning_rate": 0.0004459403642331802, "loss": 0.1935, "step": 73110 }, { "epoch": 0.12963361906560317, "grad_norm": 0.384765625, "learning_rate": 0.0004458994811064404, "loss": 0.1742, "step": 73112 }, { "epoch": 0.12963716523091298, "grad_norm": 0.93359375, "learning_rate": 0.00044585860084035695, "loss": 0.2111, "step": 73114 }, { "epoch": 0.1296407113962228, "grad_norm": 0.6484375, "learning_rate": 0.0004458177234351073, "loss": 0.1944, "step": 73116 }, { "epoch": 0.1296442575615326, "grad_norm": 0.373046875, "learning_rate": 0.00044577684889087133, "loss": 0.1736, "step": 73118 }, { "epoch": 0.12964780372684243, "grad_norm": 0.2373046875, "learning_rate": 0.0004457359772078271, "loss": 0.1731, "step": 73120 }, { "epoch": 0.12965134989215224, "grad_norm": 0.255859375, "learning_rate": 0.00044569510838615424, "loss": 0.1618, "step": 73122 }, { "epoch": 0.12965489605746205, "grad_norm": 0.2099609375, "learning_rate": 0.00044565424242603034, "loss": 0.1325, "step": 73124 }, { "epoch": 0.12965844222277187, "grad_norm": 0.2412109375, "learning_rate": 0.00044561337932763503, "loss": 0.2147, "step": 73126 }, { "epoch": 0.12966198838808168, "grad_norm": 0.439453125, "learning_rate": 0.0004455725190911466, "loss": 0.158, "step": 73128 }, { "epoch": 0.1296655345533915, "grad_norm": 0.5859375, "learning_rate": 0.0004455316617167439, "loss": 0.1531, "step": 73130 }, { "epoch": 0.1296690807187013, "grad_norm": 0.3984375, "learning_rate": 0.00044549080720460545, "loss": 0.1781, "step": 73132 }, { "epoch": 0.12967262688401113, "grad_norm": 0.306640625, "learning_rate": 0.0004454499555549098, "loss": 0.1765, "step": 73134 }, { "epoch": 0.12967617304932094, "grad_norm": 0.4609375, "learning_rate": 0.0004454091067678362, "loss": 0.192, "step": 73136 }, { "epoch": 0.12967971921463076, "grad_norm": 0.421875, "learning_rate": 0.00044536826084356286, "loss": 0.1703, "step": 73138 }, { "epoch": 0.12968326537994057, "grad_norm": 0.30078125, "learning_rate": 0.0004453274177822685, "loss": 0.1871, "step": 73140 }, { "epoch": 0.12968681154525039, "grad_norm": 0.24609375, "learning_rate": 0.00044528657758413165, "loss": 0.2772, "step": 73142 }, { "epoch": 0.12969035771056023, "grad_norm": 0.322265625, "learning_rate": 0.0004452457402493308, "loss": 0.2052, "step": 73144 }, { "epoch": 0.12969390387587004, "grad_norm": 0.60546875, "learning_rate": 0.00044520490577804506, "loss": 0.137, "step": 73146 }, { "epoch": 0.12969745004117986, "grad_norm": 1.0, "learning_rate": 0.00044516407417045267, "loss": 0.3463, "step": 73148 }, { "epoch": 0.12970099620648967, "grad_norm": 0.349609375, "learning_rate": 0.00044512324542673234, "loss": 0.1561, "step": 73150 }, { "epoch": 0.12970454237179949, "grad_norm": 0.90234375, "learning_rate": 0.0004450824195470622, "loss": 0.2345, "step": 73152 }, { "epoch": 0.1297080885371093, "grad_norm": 0.224609375, "learning_rate": 0.00044504159653162165, "loss": 0.1301, "step": 73154 }, { "epoch": 0.12971163470241912, "grad_norm": 0.236328125, "learning_rate": 0.00044500077638058816, "loss": 0.1983, "step": 73156 }, { "epoch": 0.12971518086772893, "grad_norm": 0.33984375, "learning_rate": 0.0004449599590941411, "loss": 0.162, "step": 73158 }, { "epoch": 0.12971872703303874, "grad_norm": 1.390625, "learning_rate": 0.00044491914467245875, "loss": 0.2596, "step": 73160 }, { "epoch": 0.12972227319834856, "grad_norm": 0.3203125, "learning_rate": 0.00044487833311571915, "loss": 0.1565, "step": 73162 }, { "epoch": 0.12972581936365837, "grad_norm": 0.458984375, "learning_rate": 0.00044483752442410186, "loss": 0.2122, "step": 73164 }, { "epoch": 0.1297293655289682, "grad_norm": 0.265625, "learning_rate": 0.00044479671859778405, "loss": 0.4211, "step": 73166 }, { "epoch": 0.129732911694278, "grad_norm": 0.4453125, "learning_rate": 0.00044475591563694503, "loss": 0.2227, "step": 73168 }, { "epoch": 0.12973645785958782, "grad_norm": 2.328125, "learning_rate": 0.00044471511554176283, "loss": 0.2258, "step": 73170 }, { "epoch": 0.12974000402489763, "grad_norm": 0.357421875, "learning_rate": 0.00044467431831241656, "loss": 0.2114, "step": 73172 }, { "epoch": 0.12974355019020745, "grad_norm": 0.494140625, "learning_rate": 0.0004446335239490836, "loss": 0.5647, "step": 73174 }, { "epoch": 0.12974709635551726, "grad_norm": 0.267578125, "learning_rate": 0.0004445927324519434, "loss": 0.2264, "step": 73176 }, { "epoch": 0.12975064252082708, "grad_norm": 0.66796875, "learning_rate": 0.0004445519438211737, "loss": 0.2512, "step": 73178 }, { "epoch": 0.1297541886861369, "grad_norm": 0.369140625, "learning_rate": 0.0004445111580569531, "loss": 0.1819, "step": 73180 }, { "epoch": 0.1297577348514467, "grad_norm": 2.546875, "learning_rate": 0.00044447037515946, "loss": 0.1937, "step": 73182 }, { "epoch": 0.12976128101675652, "grad_norm": 0.35546875, "learning_rate": 0.00044442959512887227, "loss": 0.1879, "step": 73184 }, { "epoch": 0.12976482718206633, "grad_norm": 0.361328125, "learning_rate": 0.0004443888179653692, "loss": 0.1505, "step": 73186 }, { "epoch": 0.12976837334737615, "grad_norm": 0.29296875, "learning_rate": 0.0004443480436691284, "loss": 0.1941, "step": 73188 }, { "epoch": 0.12977191951268596, "grad_norm": 0.7421875, "learning_rate": 0.0004443072722403285, "loss": 0.1679, "step": 73190 }, { "epoch": 0.12977546567799578, "grad_norm": 0.37890625, "learning_rate": 0.00044426650367914745, "loss": 0.1718, "step": 73192 }, { "epoch": 0.1297790118433056, "grad_norm": 0.31640625, "learning_rate": 0.00044422573798576405, "loss": 0.1832, "step": 73194 }, { "epoch": 0.1297825580086154, "grad_norm": 0.37109375, "learning_rate": 0.0004441849751603565, "loss": 0.1361, "step": 73196 }, { "epoch": 0.12978610417392522, "grad_norm": 2.328125, "learning_rate": 0.0004441442152031028, "loss": 0.3755, "step": 73198 }, { "epoch": 0.12978965033923504, "grad_norm": 0.455078125, "learning_rate": 0.0004441034581141813, "loss": 0.198, "step": 73200 }, { "epoch": 0.12979319650454485, "grad_norm": 0.466796875, "learning_rate": 0.00044406270389377004, "loss": 0.1754, "step": 73202 }, { "epoch": 0.12979674266985466, "grad_norm": 0.5078125, "learning_rate": 0.00044402195254204807, "loss": 0.1585, "step": 73204 }, { "epoch": 0.12980028883516448, "grad_norm": 1.1328125, "learning_rate": 0.00044398120405919253, "loss": 0.193, "step": 73206 }, { "epoch": 0.1298038350004743, "grad_norm": 0.4140625, "learning_rate": 0.00044394045844538233, "loss": 0.1894, "step": 73208 }, { "epoch": 0.1298073811657841, "grad_norm": 0.337890625, "learning_rate": 0.0004438997157007952, "loss": 0.2302, "step": 73210 }, { "epoch": 0.12981092733109392, "grad_norm": 0.310546875, "learning_rate": 0.0004438589758256103, "loss": 0.1645, "step": 73212 }, { "epoch": 0.12981447349640374, "grad_norm": 0.287109375, "learning_rate": 0.0004438182388200045, "loss": 0.1647, "step": 73214 }, { "epoch": 0.12981801966171355, "grad_norm": 0.248046875, "learning_rate": 0.0004437775046841568, "loss": 0.1838, "step": 73216 }, { "epoch": 0.12982156582702337, "grad_norm": 0.208984375, "learning_rate": 0.00044373677341824516, "loss": 0.1494, "step": 73218 }, { "epoch": 0.12982511199233318, "grad_norm": 0.59375, "learning_rate": 0.0004436960450224475, "loss": 0.2125, "step": 73220 }, { "epoch": 0.129828658157643, "grad_norm": 0.6640625, "learning_rate": 0.00044365531949694235, "loss": 0.136, "step": 73222 }, { "epoch": 0.1298322043229528, "grad_norm": 1.5390625, "learning_rate": 0.00044361459684190717, "loss": 0.1825, "step": 73224 }, { "epoch": 0.12983575048826262, "grad_norm": 0.283203125, "learning_rate": 0.00044357387705752085, "loss": 0.1476, "step": 73226 }, { "epoch": 0.12983929665357244, "grad_norm": 0.28125, "learning_rate": 0.00044353316014396094, "loss": 0.1891, "step": 73228 }, { "epoch": 0.12984284281888225, "grad_norm": 0.25, "learning_rate": 0.00044349244610140584, "loss": 0.1821, "step": 73230 }, { "epoch": 0.12984638898419207, "grad_norm": 0.353515625, "learning_rate": 0.000443451734930033, "loss": 0.3179, "step": 73232 }, { "epoch": 0.1298499351495019, "grad_norm": 1.6484375, "learning_rate": 0.0004434110266300212, "loss": 0.2602, "step": 73234 }, { "epoch": 0.12985348131481173, "grad_norm": 1.59375, "learning_rate": 0.00044337032120154817, "loss": 0.1823, "step": 73236 }, { "epoch": 0.12985702748012154, "grad_norm": 0.478515625, "learning_rate": 0.0004433296186447921, "loss": 0.1804, "step": 73238 }, { "epoch": 0.12986057364543135, "grad_norm": 1.1015625, "learning_rate": 0.0004432889189599308, "loss": 0.1849, "step": 73240 }, { "epoch": 0.12986411981074117, "grad_norm": 2.265625, "learning_rate": 0.00044324822214714186, "loss": 0.207, "step": 73242 }, { "epoch": 0.12986766597605098, "grad_norm": 0.46875, "learning_rate": 0.00044320752820660415, "loss": 0.1346, "step": 73244 }, { "epoch": 0.1298712121413608, "grad_norm": 0.2265625, "learning_rate": 0.0004431668371384951, "loss": 0.1279, "step": 73246 }, { "epoch": 0.1298747583066706, "grad_norm": 0.400390625, "learning_rate": 0.0004431261489429928, "loss": 0.3887, "step": 73248 }, { "epoch": 0.12987830447198043, "grad_norm": 0.63671875, "learning_rate": 0.00044308546362027495, "loss": 0.1613, "step": 73250 }, { "epoch": 0.12988185063729024, "grad_norm": 0.31640625, "learning_rate": 0.00044304478117051995, "loss": 0.1376, "step": 73252 }, { "epoch": 0.12988539680260006, "grad_norm": 0.26953125, "learning_rate": 0.00044300410159390537, "loss": 0.1327, "step": 73254 }, { "epoch": 0.12988894296790987, "grad_norm": 0.396484375, "learning_rate": 0.00044296342489060924, "loss": 0.1613, "step": 73256 }, { "epoch": 0.12989248913321969, "grad_norm": 0.65234375, "learning_rate": 0.00044292275106080957, "loss": 0.1483, "step": 73258 }, { "epoch": 0.1298960352985295, "grad_norm": 0.35546875, "learning_rate": 0.00044288208010468376, "loss": 0.1688, "step": 73260 }, { "epoch": 0.12989958146383931, "grad_norm": 0.240234375, "learning_rate": 0.00044284141202241057, "loss": 0.2076, "step": 73262 }, { "epoch": 0.12990312762914913, "grad_norm": 1.3828125, "learning_rate": 0.0004428007468141668, "loss": 0.2092, "step": 73264 }, { "epoch": 0.12990667379445894, "grad_norm": 0.248046875, "learning_rate": 0.00044276008448013106, "loss": 0.2074, "step": 73266 }, { "epoch": 0.12991021995976876, "grad_norm": 0.93359375, "learning_rate": 0.00044271942502048067, "loss": 0.2461, "step": 73268 }, { "epoch": 0.12991376612507857, "grad_norm": 0.46484375, "learning_rate": 0.0004426787684353943, "loss": 0.1777, "step": 73270 }, { "epoch": 0.1299173122903884, "grad_norm": 0.52734375, "learning_rate": 0.00044263811472504866, "loss": 0.1717, "step": 73272 }, { "epoch": 0.1299208584556982, "grad_norm": 3.09375, "learning_rate": 0.0004425974638896223, "loss": 0.1672, "step": 73274 }, { "epoch": 0.12992440462100802, "grad_norm": 0.4296875, "learning_rate": 0.00044255681592929275, "loss": 0.2145, "step": 73276 }, { "epoch": 0.12992795078631783, "grad_norm": 0.306640625, "learning_rate": 0.0004425161708442378, "loss": 0.1346, "step": 73278 }, { "epoch": 0.12993149695162765, "grad_norm": 0.1318359375, "learning_rate": 0.00044247552863463515, "loss": 0.1145, "step": 73280 }, { "epoch": 0.12993504311693746, "grad_norm": 0.54296875, "learning_rate": 0.0004424348893006624, "loss": 0.2236, "step": 73282 }, { "epoch": 0.12993858928224727, "grad_norm": 0.326171875, "learning_rate": 0.0004423942528424978, "loss": 0.1636, "step": 73284 }, { "epoch": 0.1299421354475571, "grad_norm": 0.62890625, "learning_rate": 0.0004423536192603187, "loss": 0.279, "step": 73286 }, { "epoch": 0.1299456816128669, "grad_norm": 0.447265625, "learning_rate": 0.00044231298855430286, "loss": 0.1738, "step": 73288 }, { "epoch": 0.12994922777817672, "grad_norm": 3.4375, "learning_rate": 0.00044227236072462775, "loss": 0.3391, "step": 73290 }, { "epoch": 0.12995277394348653, "grad_norm": 0.255859375, "learning_rate": 0.0004422317357714716, "loss": 0.142, "step": 73292 }, { "epoch": 0.12995632010879635, "grad_norm": 0.326171875, "learning_rate": 0.0004421911136950116, "loss": 0.1782, "step": 73294 }, { "epoch": 0.12995986627410616, "grad_norm": 0.1962890625, "learning_rate": 0.00044215049449542566, "loss": 0.3255, "step": 73296 }, { "epoch": 0.12996341243941598, "grad_norm": 0.6328125, "learning_rate": 0.0004421098781728913, "loss": 0.2236, "step": 73298 }, { "epoch": 0.1299669586047258, "grad_norm": 0.32421875, "learning_rate": 0.0004420692647275859, "loss": 0.19, "step": 73300 }, { "epoch": 0.1299705047700356, "grad_norm": 1.2265625, "learning_rate": 0.00044202865415968797, "loss": 0.2246, "step": 73302 }, { "epoch": 0.12997405093534542, "grad_norm": 0.1884765625, "learning_rate": 0.00044198804646937396, "loss": 0.1693, "step": 73304 }, { "epoch": 0.12997759710065523, "grad_norm": 0.9609375, "learning_rate": 0.0004419474416568222, "loss": 0.2621, "step": 73306 }, { "epoch": 0.12998114326596505, "grad_norm": 0.328125, "learning_rate": 0.0004419068397222099, "loss": 0.208, "step": 73308 }, { "epoch": 0.12998468943127486, "grad_norm": 0.75, "learning_rate": 0.00044186624066571534, "loss": 0.1938, "step": 73310 }, { "epoch": 0.12998823559658468, "grad_norm": 1.046875, "learning_rate": 0.0004418256444875151, "loss": 0.1791, "step": 73312 }, { "epoch": 0.1299917817618945, "grad_norm": 2.375, "learning_rate": 0.0004417850511877873, "loss": 0.2396, "step": 73314 }, { "epoch": 0.1299953279272043, "grad_norm": 1.328125, "learning_rate": 0.0004417444607667094, "loss": 0.3685, "step": 73316 }, { "epoch": 0.12999887409251412, "grad_norm": 0.37109375, "learning_rate": 0.0004417038732244589, "loss": 0.174, "step": 73318 }, { "epoch": 0.13000242025782394, "grad_norm": 0.3125, "learning_rate": 0.0004416632885612133, "loss": 0.193, "step": 73320 }, { "epoch": 0.13000596642313375, "grad_norm": 0.39453125, "learning_rate": 0.00044162270677714974, "loss": 0.2214, "step": 73322 }, { "epoch": 0.13000951258844357, "grad_norm": 0.62109375, "learning_rate": 0.0004415821278724463, "loss": 0.1442, "step": 73324 }, { "epoch": 0.1300130587537534, "grad_norm": 1.28125, "learning_rate": 0.00044154155184728016, "loss": 0.3011, "step": 73326 }, { "epoch": 0.13001660491906322, "grad_norm": 0.890625, "learning_rate": 0.00044150097870182894, "loss": 0.1754, "step": 73328 }, { "epoch": 0.13002015108437304, "grad_norm": 0.251953125, "learning_rate": 0.0004414604084362698, "loss": 0.2049, "step": 73330 }, { "epoch": 0.13002369724968285, "grad_norm": 0.19921875, "learning_rate": 0.00044141984105078004, "loss": 0.1938, "step": 73332 }, { "epoch": 0.13002724341499267, "grad_norm": 0.1748046875, "learning_rate": 0.00044137927654553754, "loss": 0.1504, "step": 73334 }, { "epoch": 0.13003078958030248, "grad_norm": 8.375, "learning_rate": 0.0004413387149207196, "loss": 0.3611, "step": 73336 }, { "epoch": 0.1300343357456123, "grad_norm": 0.455078125, "learning_rate": 0.0004412981561765036, "loss": 0.1523, "step": 73338 }, { "epoch": 0.1300378819109221, "grad_norm": 0.32421875, "learning_rate": 0.0004412576003130665, "loss": 0.1937, "step": 73340 }, { "epoch": 0.13004142807623192, "grad_norm": 0.6015625, "learning_rate": 0.0004412170473305863, "loss": 0.368, "step": 73342 }, { "epoch": 0.13004497424154174, "grad_norm": 0.462890625, "learning_rate": 0.00044117649722924003, "loss": 0.2358, "step": 73344 }, { "epoch": 0.13004852040685155, "grad_norm": 0.67578125, "learning_rate": 0.00044113595000920514, "loss": 0.3284, "step": 73346 }, { "epoch": 0.13005206657216137, "grad_norm": 0.42578125, "learning_rate": 0.0004410954056706588, "loss": 0.238, "step": 73348 }, { "epoch": 0.13005561273747118, "grad_norm": 0.31640625, "learning_rate": 0.0004410548642137782, "loss": 0.1891, "step": 73350 }, { "epoch": 0.130059158902781, "grad_norm": 0.2109375, "learning_rate": 0.0004410143256387415, "loss": 0.1774, "step": 73352 }, { "epoch": 0.1300627050680908, "grad_norm": 1.4609375, "learning_rate": 0.00044097378994572466, "loss": 0.2075, "step": 73354 }, { "epoch": 0.13006625123340063, "grad_norm": 0.5078125, "learning_rate": 0.00044093325713490594, "loss": 0.2319, "step": 73356 }, { "epoch": 0.13006979739871044, "grad_norm": 0.83203125, "learning_rate": 0.00044089272720646217, "loss": 0.1445, "step": 73358 }, { "epoch": 0.13007334356402026, "grad_norm": 0.296875, "learning_rate": 0.00044085220016057116, "loss": 0.1448, "step": 73360 }, { "epoch": 0.13007688972933007, "grad_norm": 0.2734375, "learning_rate": 0.00044081167599740935, "loss": 0.2042, "step": 73362 }, { "epoch": 0.13008043589463988, "grad_norm": 0.3359375, "learning_rate": 0.0004407711547171545, "loss": 0.1456, "step": 73364 }, { "epoch": 0.1300839820599497, "grad_norm": 0.62109375, "learning_rate": 0.00044073063631998376, "loss": 0.1813, "step": 73366 }, { "epoch": 0.1300875282252595, "grad_norm": 0.392578125, "learning_rate": 0.0004406901208060742, "loss": 0.2242, "step": 73368 }, { "epoch": 0.13009107439056933, "grad_norm": 0.29296875, "learning_rate": 0.0004406496081756032, "loss": 0.2132, "step": 73370 }, { "epoch": 0.13009462055587914, "grad_norm": 0.3125, "learning_rate": 0.00044060909842874753, "loss": 0.1618, "step": 73372 }, { "epoch": 0.13009816672118896, "grad_norm": 0.77734375, "learning_rate": 0.00044056859156568485, "loss": 0.2259, "step": 73374 }, { "epoch": 0.13010171288649877, "grad_norm": 0.40234375, "learning_rate": 0.00044052808758659215, "loss": 0.1577, "step": 73376 }, { "epoch": 0.13010525905180859, "grad_norm": 2.6875, "learning_rate": 0.00044048758649164647, "loss": 0.2695, "step": 73378 }, { "epoch": 0.1301088052171184, "grad_norm": 0.3515625, "learning_rate": 0.00044044708828102475, "loss": 0.1595, "step": 73380 }, { "epoch": 0.13011235138242822, "grad_norm": 0.53515625, "learning_rate": 0.0004404065929549046, "loss": 0.1704, "step": 73382 }, { "epoch": 0.13011589754773803, "grad_norm": 1.5234375, "learning_rate": 0.00044036610051346293, "loss": 0.2184, "step": 73384 }, { "epoch": 0.13011944371304784, "grad_norm": 0.15625, "learning_rate": 0.00044032561095687676, "loss": 0.1335, "step": 73386 }, { "epoch": 0.13012298987835766, "grad_norm": 0.400390625, "learning_rate": 0.0004402851242853232, "loss": 0.1145, "step": 73388 }, { "epoch": 0.13012653604366747, "grad_norm": 0.458984375, "learning_rate": 0.00044024464049897906, "loss": 0.1669, "step": 73390 }, { "epoch": 0.1301300822089773, "grad_norm": 0.5546875, "learning_rate": 0.000440204159598022, "loss": 0.145, "step": 73392 }, { "epoch": 0.1301336283742871, "grad_norm": 0.361328125, "learning_rate": 0.0004401636815826283, "loss": 0.1637, "step": 73394 }, { "epoch": 0.13013717453959692, "grad_norm": 0.30078125, "learning_rate": 0.0004401232064529756, "loss": 0.1591, "step": 73396 }, { "epoch": 0.13014072070490673, "grad_norm": 0.6328125, "learning_rate": 0.0004400827342092404, "loss": 0.2329, "step": 73398 }, { "epoch": 0.13014426687021655, "grad_norm": 1.359375, "learning_rate": 0.0004400422648516006, "loss": 0.3376, "step": 73400 }, { "epoch": 0.13014781303552636, "grad_norm": 0.68359375, "learning_rate": 0.000440001798380232, "loss": 0.1961, "step": 73402 }, { "epoch": 0.13015135920083618, "grad_norm": 0.1337890625, "learning_rate": 0.0004399613347953125, "loss": 0.1581, "step": 73404 }, { "epoch": 0.130154905366146, "grad_norm": 1.921875, "learning_rate": 0.0004399208740970186, "loss": 0.1993, "step": 73406 }, { "epoch": 0.1301584515314558, "grad_norm": 0.47265625, "learning_rate": 0.00043988041628552756, "loss": 0.2161, "step": 73408 }, { "epoch": 0.13016199769676562, "grad_norm": 3.0625, "learning_rate": 0.000439839961361016, "loss": 0.247, "step": 73410 }, { "epoch": 0.13016554386207543, "grad_norm": 0.5078125, "learning_rate": 0.0004397995093236607, "loss": 0.1995, "step": 73412 }, { "epoch": 0.13016909002738525, "grad_norm": 0.73828125, "learning_rate": 0.0004397590601736392, "loss": 0.1518, "step": 73414 }, { "epoch": 0.1301726361926951, "grad_norm": 0.48046875, "learning_rate": 0.0004397186139111278, "loss": 0.2143, "step": 73416 }, { "epoch": 0.1301761823580049, "grad_norm": 0.28125, "learning_rate": 0.00043967817053630403, "loss": 0.2235, "step": 73418 }, { "epoch": 0.13017972852331472, "grad_norm": 0.29296875, "learning_rate": 0.00043963773004934394, "loss": 0.1461, "step": 73420 }, { "epoch": 0.13018327468862453, "grad_norm": 0.68359375, "learning_rate": 0.0004395972924504252, "loss": 0.1658, "step": 73422 }, { "epoch": 0.13018682085393435, "grad_norm": 0.5, "learning_rate": 0.0004395568577397242, "loss": 0.2015, "step": 73424 }, { "epoch": 0.13019036701924416, "grad_norm": 0.158203125, "learning_rate": 0.0004395164259174178, "loss": 0.1166, "step": 73426 }, { "epoch": 0.13019391318455398, "grad_norm": 1.640625, "learning_rate": 0.000439475996983683, "loss": 0.1745, "step": 73428 }, { "epoch": 0.1301974593498638, "grad_norm": 0.263671875, "learning_rate": 0.0004394355709386963, "loss": 0.1236, "step": 73430 }, { "epoch": 0.1302010055151736, "grad_norm": 0.287109375, "learning_rate": 0.00043939514778263485, "loss": 0.1781, "step": 73432 }, { "epoch": 0.13020455168048342, "grad_norm": 0.703125, "learning_rate": 0.00043935472751567534, "loss": 0.1771, "step": 73434 }, { "epoch": 0.13020809784579324, "grad_norm": 4.4375, "learning_rate": 0.00043931431013799455, "loss": 0.3115, "step": 73436 }, { "epoch": 0.13021164401110305, "grad_norm": 0.408203125, "learning_rate": 0.0004392738956497688, "loss": 0.1636, "step": 73438 }, { "epoch": 0.13021519017641286, "grad_norm": 0.3984375, "learning_rate": 0.0004392334840511757, "loss": 0.152, "step": 73440 }, { "epoch": 0.13021873634172268, "grad_norm": 0.2421875, "learning_rate": 0.0004391930753423914, "loss": 0.1507, "step": 73442 }, { "epoch": 0.1302222825070325, "grad_norm": 0.38671875, "learning_rate": 0.00043915266952359276, "loss": 0.1668, "step": 73444 }, { "epoch": 0.1302258286723423, "grad_norm": 0.458984375, "learning_rate": 0.00043911226659495647, "loss": 0.2116, "step": 73446 }, { "epoch": 0.13022937483765212, "grad_norm": 0.33984375, "learning_rate": 0.000439071866556659, "loss": 0.1568, "step": 73448 }, { "epoch": 0.13023292100296194, "grad_norm": 0.390625, "learning_rate": 0.0004390314694088778, "loss": 0.1739, "step": 73450 }, { "epoch": 0.13023646716827175, "grad_norm": 0.427734375, "learning_rate": 0.0004389910751517885, "loss": 0.1773, "step": 73452 }, { "epoch": 0.13024001333358157, "grad_norm": 0.52734375, "learning_rate": 0.00043895068378556844, "loss": 0.1604, "step": 73454 }, { "epoch": 0.13024355949889138, "grad_norm": 0.9453125, "learning_rate": 0.00043891029531039396, "loss": 0.1559, "step": 73456 }, { "epoch": 0.1302471056642012, "grad_norm": 0.328125, "learning_rate": 0.0004388699097264424, "loss": 0.1608, "step": 73458 }, { "epoch": 0.130250651829511, "grad_norm": 0.2421875, "learning_rate": 0.00043882952703388914, "loss": 0.2025, "step": 73460 }, { "epoch": 0.13025419799482083, "grad_norm": 0.275390625, "learning_rate": 0.0004387891472329118, "loss": 0.1693, "step": 73462 }, { "epoch": 0.13025774416013064, "grad_norm": 0.5234375, "learning_rate": 0.0004387487703236867, "loss": 0.1516, "step": 73464 }, { "epoch": 0.13026129032544045, "grad_norm": 0.70703125, "learning_rate": 0.0004387083963063903, "loss": 0.1962, "step": 73466 }, { "epoch": 0.13026483649075027, "grad_norm": 0.25, "learning_rate": 0.0004386680251811994, "loss": 0.1773, "step": 73468 }, { "epoch": 0.13026838265606008, "grad_norm": 0.498046875, "learning_rate": 0.00043862765694829007, "loss": 0.3068, "step": 73470 }, { "epoch": 0.1302719288213699, "grad_norm": 0.55859375, "learning_rate": 0.0004385872916078394, "loss": 0.2175, "step": 73472 }, { "epoch": 0.1302754749866797, "grad_norm": 0.328125, "learning_rate": 0.00043854692916002377, "loss": 0.119, "step": 73474 }, { "epoch": 0.13027902115198953, "grad_norm": 0.43359375, "learning_rate": 0.0004385065696050197, "loss": 0.3114, "step": 73476 }, { "epoch": 0.13028256731729934, "grad_norm": 0.51953125, "learning_rate": 0.0004384662129430033, "loss": 0.2155, "step": 73478 }, { "epoch": 0.13028611348260916, "grad_norm": 0.6015625, "learning_rate": 0.0004384258591741517, "loss": 0.2088, "step": 73480 }, { "epoch": 0.13028965964791897, "grad_norm": 1.4765625, "learning_rate": 0.0004383855082986411, "loss": 0.263, "step": 73482 }, { "epoch": 0.13029320581322879, "grad_norm": 1.4296875, "learning_rate": 0.00043834516031664796, "loss": 0.2848, "step": 73484 }, { "epoch": 0.1302967519785386, "grad_norm": 0.328125, "learning_rate": 0.00043830481522834884, "loss": 0.1638, "step": 73486 }, { "epoch": 0.13030029814384841, "grad_norm": 0.515625, "learning_rate": 0.0004382644730339198, "loss": 0.212, "step": 73488 }, { "epoch": 0.13030384430915823, "grad_norm": 0.515625, "learning_rate": 0.0004382241337335381, "loss": 0.1294, "step": 73490 }, { "epoch": 0.13030739047446804, "grad_norm": 0.40625, "learning_rate": 0.0004381837973273791, "loss": 0.134, "step": 73492 }, { "epoch": 0.13031093663977786, "grad_norm": 0.57421875, "learning_rate": 0.00043814346381561997, "loss": 0.1784, "step": 73494 }, { "epoch": 0.13031448280508767, "grad_norm": 0.5625, "learning_rate": 0.00043810313319843694, "loss": 0.2975, "step": 73496 }, { "epoch": 0.1303180289703975, "grad_norm": 0.59375, "learning_rate": 0.00043806280547600607, "loss": 0.1547, "step": 73498 }, { "epoch": 0.1303215751357073, "grad_norm": 2.234375, "learning_rate": 0.0004380224806485044, "loss": 0.2901, "step": 73500 }, { "epoch": 0.13032512130101712, "grad_norm": 0.263671875, "learning_rate": 0.0004379821587161075, "loss": 0.2432, "step": 73502 }, { "epoch": 0.13032866746632693, "grad_norm": 0.279296875, "learning_rate": 0.0004379418396789923, "loss": 0.173, "step": 73504 }, { "epoch": 0.13033221363163677, "grad_norm": 0.400390625, "learning_rate": 0.0004379015235373346, "loss": 0.1852, "step": 73506 }, { "epoch": 0.1303357597969466, "grad_norm": 0.3828125, "learning_rate": 0.00043786121029131165, "loss": 0.2659, "step": 73508 }, { "epoch": 0.1303393059622564, "grad_norm": 0.34765625, "learning_rate": 0.00043782089994109864, "loss": 0.226, "step": 73510 }, { "epoch": 0.13034285212756622, "grad_norm": 0.341796875, "learning_rate": 0.0004377805924868726, "loss": 0.3588, "step": 73512 }, { "epoch": 0.13034639829287603, "grad_norm": 0.29296875, "learning_rate": 0.00043774028792880953, "loss": 0.1283, "step": 73514 }, { "epoch": 0.13034994445818585, "grad_norm": 0.236328125, "learning_rate": 0.0004376999862670858, "loss": 0.168, "step": 73516 }, { "epoch": 0.13035349062349566, "grad_norm": 0.4453125, "learning_rate": 0.00043765968750187766, "loss": 0.2073, "step": 73518 }, { "epoch": 0.13035703678880547, "grad_norm": 0.28515625, "learning_rate": 0.00043761939163336103, "loss": 0.1913, "step": 73520 }, { "epoch": 0.1303605829541153, "grad_norm": 0.3828125, "learning_rate": 0.00043757909866171265, "loss": 0.218, "step": 73522 }, { "epoch": 0.1303641291194251, "grad_norm": 0.91015625, "learning_rate": 0.00043753880858710847, "loss": 0.2766, "step": 73524 }, { "epoch": 0.13036767528473492, "grad_norm": 0.62109375, "learning_rate": 0.0004374985214097249, "loss": 0.1656, "step": 73526 }, { "epoch": 0.13037122145004473, "grad_norm": 0.283203125, "learning_rate": 0.00043745823712973756, "loss": 0.1278, "step": 73528 }, { "epoch": 0.13037476761535455, "grad_norm": 0.287109375, "learning_rate": 0.00043741795574732335, "loss": 0.1579, "step": 73530 }, { "epoch": 0.13037831378066436, "grad_norm": 1.015625, "learning_rate": 0.0004373776772626581, "loss": 0.1862, "step": 73532 }, { "epoch": 0.13038185994597418, "grad_norm": 2.671875, "learning_rate": 0.00043733740167591805, "loss": 0.5558, "step": 73534 }, { "epoch": 0.130385406111284, "grad_norm": 0.392578125, "learning_rate": 0.00043729712898727926, "loss": 0.1493, "step": 73536 }, { "epoch": 0.1303889522765938, "grad_norm": 0.3359375, "learning_rate": 0.0004372568591969176, "loss": 0.1816, "step": 73538 }, { "epoch": 0.13039249844190362, "grad_norm": 0.4296875, "learning_rate": 0.0004372165923050101, "loss": 0.195, "step": 73540 }, { "epoch": 0.13039604460721343, "grad_norm": 0.490234375, "learning_rate": 0.0004371763283117316, "loss": 0.1838, "step": 73542 }, { "epoch": 0.13039959077252325, "grad_norm": 1.53125, "learning_rate": 0.00043713606721725907, "loss": 0.1829, "step": 73544 }, { "epoch": 0.13040313693783306, "grad_norm": 0.404296875, "learning_rate": 0.0004370958090217681, "loss": 0.4379, "step": 73546 }, { "epoch": 0.13040668310314288, "grad_norm": 0.48828125, "learning_rate": 0.0004370555537254356, "loss": 0.1434, "step": 73548 }, { "epoch": 0.1304102292684527, "grad_norm": 0.4140625, "learning_rate": 0.00043701530132843634, "loss": 0.2293, "step": 73550 }, { "epoch": 0.1304137754337625, "grad_norm": 0.369140625, "learning_rate": 0.00043697505183094745, "loss": 0.1942, "step": 73552 }, { "epoch": 0.13041732159907232, "grad_norm": 0.5078125, "learning_rate": 0.00043693480523314464, "loss": 0.1555, "step": 73554 }, { "epoch": 0.13042086776438214, "grad_norm": 0.375, "learning_rate": 0.0004368945615352037, "loss": 0.127, "step": 73556 }, { "epoch": 0.13042441392969195, "grad_norm": 0.2890625, "learning_rate": 0.0004368543207373008, "loss": 0.1797, "step": 73558 }, { "epoch": 0.13042796009500177, "grad_norm": 0.26953125, "learning_rate": 0.0004368140828396115, "loss": 0.2032, "step": 73560 }, { "epoch": 0.13043150626031158, "grad_norm": 0.392578125, "learning_rate": 0.0004367738478423127, "loss": 0.192, "step": 73562 }, { "epoch": 0.1304350524256214, "grad_norm": 0.65234375, "learning_rate": 0.0004367336157455797, "loss": 0.3139, "step": 73564 }, { "epoch": 0.1304385985909312, "grad_norm": 0.515625, "learning_rate": 0.00043669338654958855, "loss": 0.1715, "step": 73566 }, { "epoch": 0.13044214475624102, "grad_norm": 0.46484375, "learning_rate": 0.00043665316025451507, "loss": 0.2238, "step": 73568 }, { "epoch": 0.13044569092155084, "grad_norm": 0.2333984375, "learning_rate": 0.0004366129368605355, "loss": 0.2342, "step": 73570 }, { "epoch": 0.13044923708686065, "grad_norm": 0.3359375, "learning_rate": 0.00043657271636782577, "loss": 0.1592, "step": 73572 }, { "epoch": 0.13045278325217047, "grad_norm": 0.271484375, "learning_rate": 0.0004365324987765615, "loss": 0.2, "step": 73574 }, { "epoch": 0.13045632941748028, "grad_norm": 0.84765625, "learning_rate": 0.00043649228408691867, "loss": 0.1512, "step": 73576 }, { "epoch": 0.1304598755827901, "grad_norm": 0.361328125, "learning_rate": 0.0004364520722990729, "loss": 0.1435, "step": 73578 }, { "epoch": 0.1304634217480999, "grad_norm": 0.5546875, "learning_rate": 0.00043641186341320074, "loss": 0.2634, "step": 73580 }, { "epoch": 0.13046696791340973, "grad_norm": 0.703125, "learning_rate": 0.0004363716574294774, "loss": 0.1711, "step": 73582 }, { "epoch": 0.13047051407871954, "grad_norm": 0.54296875, "learning_rate": 0.0004363314543480792, "loss": 0.1694, "step": 73584 }, { "epoch": 0.13047406024402936, "grad_norm": 0.435546875, "learning_rate": 0.00043629125416918125, "loss": 0.1938, "step": 73586 }, { "epoch": 0.13047760640933917, "grad_norm": 0.384765625, "learning_rate": 0.00043625105689296025, "loss": 0.1457, "step": 73588 }, { "epoch": 0.13048115257464898, "grad_norm": 0.43359375, "learning_rate": 0.0004362108625195915, "loss": 0.1652, "step": 73590 }, { "epoch": 0.1304846987399588, "grad_norm": 0.373046875, "learning_rate": 0.0004361706710492508, "loss": 0.1414, "step": 73592 }, { "epoch": 0.1304882449052686, "grad_norm": 0.328125, "learning_rate": 0.00043613048248211406, "loss": 0.2093, "step": 73594 }, { "epoch": 0.13049179107057843, "grad_norm": 0.400390625, "learning_rate": 0.00043609029681835677, "loss": 0.206, "step": 73596 }, { "epoch": 0.13049533723588827, "grad_norm": 0.220703125, "learning_rate": 0.00043605011405815534, "loss": 0.1828, "step": 73598 }, { "epoch": 0.13049888340119808, "grad_norm": 0.2470703125, "learning_rate": 0.00043600993420168455, "loss": 0.1718, "step": 73600 }, { "epoch": 0.1305024295665079, "grad_norm": 0.16796875, "learning_rate": 0.00043596975724912075, "loss": 0.1317, "step": 73602 }, { "epoch": 0.1305059757318177, "grad_norm": 0.384765625, "learning_rate": 0.00043592958320063936, "loss": 0.2674, "step": 73604 }, { "epoch": 0.13050952189712753, "grad_norm": 0.44921875, "learning_rate": 0.0004358894120564167, "loss": 0.2725, "step": 73606 }, { "epoch": 0.13051306806243734, "grad_norm": 0.41796875, "learning_rate": 0.00043584924381662757, "loss": 0.146, "step": 73608 }, { "epoch": 0.13051661422774716, "grad_norm": 0.439453125, "learning_rate": 0.00043580907848144823, "loss": 0.1419, "step": 73610 }, { "epoch": 0.13052016039305697, "grad_norm": 0.66015625, "learning_rate": 0.00043576891605105425, "loss": 0.2265, "step": 73612 }, { "epoch": 0.1305237065583668, "grad_norm": 0.20703125, "learning_rate": 0.0004357287565256211, "loss": 0.1468, "step": 73614 }, { "epoch": 0.1305272527236766, "grad_norm": 0.271484375, "learning_rate": 0.00043568859990532463, "loss": 0.2817, "step": 73616 }, { "epoch": 0.13053079888898642, "grad_norm": 0.41796875, "learning_rate": 0.00043564844619034006, "loss": 0.1981, "step": 73618 }, { "epoch": 0.13053434505429623, "grad_norm": 0.54296875, "learning_rate": 0.00043560829538084346, "loss": 0.1581, "step": 73620 }, { "epoch": 0.13053789121960604, "grad_norm": 0.67578125, "learning_rate": 0.00043556814747701037, "loss": 0.1509, "step": 73622 }, { "epoch": 0.13054143738491586, "grad_norm": 0.208984375, "learning_rate": 0.00043552800247901615, "loss": 0.2034, "step": 73624 }, { "epoch": 0.13054498355022567, "grad_norm": 0.287109375, "learning_rate": 0.0004354878603870362, "loss": 0.1564, "step": 73626 }, { "epoch": 0.1305485297155355, "grad_norm": 0.28125, "learning_rate": 0.00043544772120124667, "loss": 0.1614, "step": 73628 }, { "epoch": 0.1305520758808453, "grad_norm": 0.265625, "learning_rate": 0.00043540758492182274, "loss": 0.1955, "step": 73630 }, { "epoch": 0.13055562204615512, "grad_norm": 0.88671875, "learning_rate": 0.00043536745154894004, "loss": 0.2364, "step": 73632 }, { "epoch": 0.13055916821146493, "grad_norm": 0.443359375, "learning_rate": 0.0004353273210827741, "loss": 0.2099, "step": 73634 }, { "epoch": 0.13056271437677475, "grad_norm": 0.326171875, "learning_rate": 0.0004352871935235, "loss": 0.1468, "step": 73636 }, { "epoch": 0.13056626054208456, "grad_norm": 0.49609375, "learning_rate": 0.0004352470688712942, "loss": 0.1745, "step": 73638 }, { "epoch": 0.13056980670739438, "grad_norm": 0.30078125, "learning_rate": 0.000435206947126331, "loss": 0.1848, "step": 73640 }, { "epoch": 0.1305733528727042, "grad_norm": 3.109375, "learning_rate": 0.00043516682828878686, "loss": 0.2537, "step": 73642 }, { "epoch": 0.130576899038014, "grad_norm": 0.7109375, "learning_rate": 0.00043512671235883645, "loss": 0.2296, "step": 73644 }, { "epoch": 0.13058044520332382, "grad_norm": 0.38671875, "learning_rate": 0.00043508659933665604, "loss": 0.1331, "step": 73646 }, { "epoch": 0.13058399136863363, "grad_norm": 0.32421875, "learning_rate": 0.0004350464892224203, "loss": 0.2017, "step": 73648 }, { "epoch": 0.13058753753394345, "grad_norm": 0.53125, "learning_rate": 0.00043500638201630513, "loss": 0.2553, "step": 73650 }, { "epoch": 0.13059108369925326, "grad_norm": 1.2421875, "learning_rate": 0.0004349662777184857, "loss": 0.2107, "step": 73652 }, { "epoch": 0.13059462986456308, "grad_norm": 1.2890625, "learning_rate": 0.0004349261763291375, "loss": 0.2642, "step": 73654 }, { "epoch": 0.1305981760298729, "grad_norm": 0.6640625, "learning_rate": 0.0004348860778484359, "loss": 0.1884, "step": 73656 }, { "epoch": 0.1306017221951827, "grad_norm": 0.2216796875, "learning_rate": 0.00043484598227655605, "loss": 0.1128, "step": 73658 }, { "epoch": 0.13060526836049252, "grad_norm": 0.322265625, "learning_rate": 0.0004348058896136737, "loss": 0.2016, "step": 73660 }, { "epoch": 0.13060881452580234, "grad_norm": 0.51171875, "learning_rate": 0.000434765799859964, "loss": 0.2484, "step": 73662 }, { "epoch": 0.13061236069111215, "grad_norm": 0.36328125, "learning_rate": 0.00043472571301560235, "loss": 0.1825, "step": 73664 }, { "epoch": 0.13061590685642196, "grad_norm": 0.458984375, "learning_rate": 0.0004346856290807637, "loss": 0.1873, "step": 73666 }, { "epoch": 0.13061945302173178, "grad_norm": 0.24609375, "learning_rate": 0.00043464554805562383, "loss": 0.1676, "step": 73668 }, { "epoch": 0.1306229991870416, "grad_norm": 3.40625, "learning_rate": 0.0004346054699403579, "loss": 0.4342, "step": 73670 }, { "epoch": 0.1306265453523514, "grad_norm": 0.373046875, "learning_rate": 0.0004345653947351412, "loss": 0.1452, "step": 73672 }, { "epoch": 0.13063009151766122, "grad_norm": 0.85546875, "learning_rate": 0.00043452532244014886, "loss": 0.2062, "step": 73674 }, { "epoch": 0.13063363768297104, "grad_norm": 0.357421875, "learning_rate": 0.00043448525305555603, "loss": 0.1499, "step": 73676 }, { "epoch": 0.13063718384828085, "grad_norm": 0.359375, "learning_rate": 0.0004344451865815383, "loss": 0.1827, "step": 73678 }, { "epoch": 0.13064073001359067, "grad_norm": 0.2451171875, "learning_rate": 0.0004344051230182707, "loss": 0.2081, "step": 73680 }, { "epoch": 0.13064427617890048, "grad_norm": 1.890625, "learning_rate": 0.00043436506236592854, "loss": 0.3846, "step": 73682 }, { "epoch": 0.1306478223442103, "grad_norm": 0.240234375, "learning_rate": 0.0004343250046246869, "loss": 0.1498, "step": 73684 }, { "epoch": 0.1306513685095201, "grad_norm": 0.71484375, "learning_rate": 0.0004342849497947208, "loss": 0.2455, "step": 73686 }, { "epoch": 0.13065491467482995, "grad_norm": 0.51953125, "learning_rate": 0.00043424489787620614, "loss": 0.1614, "step": 73688 }, { "epoch": 0.13065846084013977, "grad_norm": 0.263671875, "learning_rate": 0.0004342048488693171, "loss": 0.1843, "step": 73690 }, { "epoch": 0.13066200700544958, "grad_norm": 0.283203125, "learning_rate": 0.00043416480277422954, "loss": 0.1712, "step": 73692 }, { "epoch": 0.1306655531707594, "grad_norm": 0.578125, "learning_rate": 0.00043412475959111807, "loss": 0.1662, "step": 73694 }, { "epoch": 0.1306690993360692, "grad_norm": 0.384765625, "learning_rate": 0.00043408471932015856, "loss": 0.2024, "step": 73696 }, { "epoch": 0.13067264550137903, "grad_norm": 0.462890625, "learning_rate": 0.00043404468196152536, "loss": 0.1807, "step": 73698 }, { "epoch": 0.13067619166668884, "grad_norm": 0.298828125, "learning_rate": 0.00043400464751539405, "loss": 0.2033, "step": 73700 }, { "epoch": 0.13067973783199865, "grad_norm": 0.34765625, "learning_rate": 0.0004339646159819396, "loss": 0.2393, "step": 73702 }, { "epoch": 0.13068328399730847, "grad_norm": 0.453125, "learning_rate": 0.000433924587361337, "loss": 0.2687, "step": 73704 }, { "epoch": 0.13068683016261828, "grad_norm": 0.50390625, "learning_rate": 0.00043388456165376125, "loss": 0.1595, "step": 73706 }, { "epoch": 0.1306903763279281, "grad_norm": 0.9296875, "learning_rate": 0.00043384453885938747, "loss": 0.1578, "step": 73708 }, { "epoch": 0.1306939224932379, "grad_norm": 0.302734375, "learning_rate": 0.0004338045189783908, "loss": 0.1836, "step": 73710 }, { "epoch": 0.13069746865854773, "grad_norm": 0.74609375, "learning_rate": 0.0004337645020109462, "loss": 0.2011, "step": 73712 }, { "epoch": 0.13070101482385754, "grad_norm": 0.2099609375, "learning_rate": 0.00043372448795722874, "loss": 0.2053, "step": 73714 }, { "epoch": 0.13070456098916736, "grad_norm": 0.86328125, "learning_rate": 0.00043368447681741307, "loss": 0.1461, "step": 73716 }, { "epoch": 0.13070810715447717, "grad_norm": 0.416015625, "learning_rate": 0.0004336444685916747, "loss": 0.152, "step": 73718 }, { "epoch": 0.13071165331978699, "grad_norm": 0.482421875, "learning_rate": 0.00043360446328018844, "loss": 0.1984, "step": 73720 }, { "epoch": 0.1307151994850968, "grad_norm": 1.2578125, "learning_rate": 0.0004335644608831292, "loss": 0.2358, "step": 73722 }, { "epoch": 0.13071874565040661, "grad_norm": 0.478515625, "learning_rate": 0.0004335244614006717, "loss": 0.1725, "step": 73724 }, { "epoch": 0.13072229181571643, "grad_norm": 0.390625, "learning_rate": 0.000433484464832991, "loss": 0.1572, "step": 73726 }, { "epoch": 0.13072583798102624, "grad_norm": 1.375, "learning_rate": 0.0004334444711802625, "loss": 0.2028, "step": 73728 }, { "epoch": 0.13072938414633606, "grad_norm": 0.32421875, "learning_rate": 0.0004334044804426602, "loss": 0.1917, "step": 73730 }, { "epoch": 0.13073293031164587, "grad_norm": 0.220703125, "learning_rate": 0.00043336449262035975, "loss": 0.1179, "step": 73732 }, { "epoch": 0.1307364764769557, "grad_norm": 0.2421875, "learning_rate": 0.0004333245077135355, "loss": 0.2511, "step": 73734 }, { "epoch": 0.1307400226422655, "grad_norm": 0.58984375, "learning_rate": 0.0004332845257223632, "loss": 0.1999, "step": 73736 }, { "epoch": 0.13074356880757532, "grad_norm": 0.310546875, "learning_rate": 0.0004332445466470165, "loss": 0.14, "step": 73738 }, { "epoch": 0.13074711497288513, "grad_norm": 0.5703125, "learning_rate": 0.00043320457048767123, "loss": 0.163, "step": 73740 }, { "epoch": 0.13075066113819495, "grad_norm": 0.30859375, "learning_rate": 0.0004331645972445018, "loss": 0.1509, "step": 73742 }, { "epoch": 0.13075420730350476, "grad_norm": 0.26171875, "learning_rate": 0.0004331246269176831, "loss": 0.2177, "step": 73744 }, { "epoch": 0.13075775346881457, "grad_norm": 0.416015625, "learning_rate": 0.00043308465950738986, "loss": 0.1792, "step": 73746 }, { "epoch": 0.1307612996341244, "grad_norm": 0.3828125, "learning_rate": 0.0004330446950137966, "loss": 0.1516, "step": 73748 }, { "epoch": 0.1307648457994342, "grad_norm": 0.345703125, "learning_rate": 0.00043300473343707883, "loss": 0.1232, "step": 73750 }, { "epoch": 0.13076839196474402, "grad_norm": 0.224609375, "learning_rate": 0.00043296477477741056, "loss": 0.1837, "step": 73752 }, { "epoch": 0.13077193813005383, "grad_norm": 0.333984375, "learning_rate": 0.0004329248190349675, "loss": 0.1383, "step": 73754 }, { "epoch": 0.13077548429536365, "grad_norm": 0.228515625, "learning_rate": 0.0004328848662099232, "loss": 0.1657, "step": 73756 }, { "epoch": 0.13077903046067346, "grad_norm": 0.625, "learning_rate": 0.0004328449163024534, "loss": 0.1978, "step": 73758 }, { "epoch": 0.13078257662598328, "grad_norm": 0.49609375, "learning_rate": 0.00043280496931273225, "loss": 0.2093, "step": 73760 }, { "epoch": 0.1307861227912931, "grad_norm": 0.4375, "learning_rate": 0.00043276502524093466, "loss": 0.1167, "step": 73762 }, { "epoch": 0.1307896689566029, "grad_norm": 1.796875, "learning_rate": 0.00043272508408723524, "loss": 0.2717, "step": 73764 }, { "epoch": 0.13079321512191272, "grad_norm": 0.55078125, "learning_rate": 0.00043268514585180853, "loss": 0.1713, "step": 73766 }, { "epoch": 0.13079676128722253, "grad_norm": 0.28515625, "learning_rate": 0.00043264521053482954, "loss": 0.1728, "step": 73768 }, { "epoch": 0.13080030745253235, "grad_norm": 0.28515625, "learning_rate": 0.00043260527813647286, "loss": 0.2016, "step": 73770 }, { "epoch": 0.13080385361784216, "grad_norm": 1.3203125, "learning_rate": 0.0004325653486569129, "loss": 0.2293, "step": 73772 }, { "epoch": 0.13080739978315198, "grad_norm": 0.263671875, "learning_rate": 0.00043252542209632427, "loss": 0.1334, "step": 73774 }, { "epoch": 0.1308109459484618, "grad_norm": 0.1767578125, "learning_rate": 0.00043248549845488184, "loss": 0.1878, "step": 73776 }, { "epoch": 0.13081449211377164, "grad_norm": 0.30078125, "learning_rate": 0.0004324455777327603, "loss": 0.1791, "step": 73778 }, { "epoch": 0.13081803827908145, "grad_norm": 0.28125, "learning_rate": 0.00043240565993013394, "loss": 0.1893, "step": 73780 }, { "epoch": 0.13082158444439126, "grad_norm": 0.498046875, "learning_rate": 0.0004323657450471774, "loss": 0.1888, "step": 73782 }, { "epoch": 0.13082513060970108, "grad_norm": 0.5234375, "learning_rate": 0.00043232583308406495, "loss": 0.1882, "step": 73784 }, { "epoch": 0.1308286767750109, "grad_norm": 0.60546875, "learning_rate": 0.00043228592404097213, "loss": 0.1962, "step": 73786 }, { "epoch": 0.1308322229403207, "grad_norm": 0.875, "learning_rate": 0.00043224601791807225, "loss": 0.23, "step": 73788 }, { "epoch": 0.13083576910563052, "grad_norm": 0.34765625, "learning_rate": 0.0004322061147155407, "loss": 0.1755, "step": 73790 }, { "epoch": 0.13083931527094034, "grad_norm": 0.244140625, "learning_rate": 0.0004321662144335514, "loss": 0.1393, "step": 73792 }, { "epoch": 0.13084286143625015, "grad_norm": 1.484375, "learning_rate": 0.0004321263170722798, "loss": 0.147, "step": 73794 }, { "epoch": 0.13084640760155997, "grad_norm": 0.4765625, "learning_rate": 0.00043208642263189913, "loss": 0.1515, "step": 73796 }, { "epoch": 0.13084995376686978, "grad_norm": 0.8203125, "learning_rate": 0.00043204653111258474, "loss": 0.1765, "step": 73798 }, { "epoch": 0.1308534999321796, "grad_norm": 0.365234375, "learning_rate": 0.0004320066425145108, "loss": 0.1606, "step": 73800 }, { "epoch": 0.1308570460974894, "grad_norm": 0.298828125, "learning_rate": 0.0004319667568378518, "loss": 0.147, "step": 73802 }, { "epoch": 0.13086059226279922, "grad_norm": 0.68359375, "learning_rate": 0.0004319268740827822, "loss": 0.1644, "step": 73804 }, { "epoch": 0.13086413842810904, "grad_norm": 11.0625, "learning_rate": 0.0004318869942494761, "loss": 0.2922, "step": 73806 }, { "epoch": 0.13086768459341885, "grad_norm": 0.54296875, "learning_rate": 0.00043184711733810854, "loss": 0.1358, "step": 73808 }, { "epoch": 0.13087123075872867, "grad_norm": 0.482421875, "learning_rate": 0.0004318072433488535, "loss": 0.2011, "step": 73810 }, { "epoch": 0.13087477692403848, "grad_norm": 0.90625, "learning_rate": 0.00043176737228188544, "loss": 0.2085, "step": 73812 }, { "epoch": 0.1308783230893483, "grad_norm": 0.384765625, "learning_rate": 0.0004317275041373785, "loss": 0.1284, "step": 73814 }, { "epoch": 0.1308818692546581, "grad_norm": 0.33984375, "learning_rate": 0.00043168763891550746, "loss": 0.1701, "step": 73816 }, { "epoch": 0.13088541541996793, "grad_norm": 0.59375, "learning_rate": 0.00043164777661644667, "loss": 0.1639, "step": 73818 }, { "epoch": 0.13088896158527774, "grad_norm": 0.2314453125, "learning_rate": 0.00043160791724037006, "loss": 0.1936, "step": 73820 }, { "epoch": 0.13089250775058756, "grad_norm": 0.29296875, "learning_rate": 0.00043156806078745234, "loss": 0.1937, "step": 73822 }, { "epoch": 0.13089605391589737, "grad_norm": 0.2197265625, "learning_rate": 0.0004315282072578673, "loss": 0.1149, "step": 73824 }, { "epoch": 0.13089960008120718, "grad_norm": 0.71484375, "learning_rate": 0.0004314883566517902, "loss": 0.2436, "step": 73826 }, { "epoch": 0.130903146246517, "grad_norm": 0.2373046875, "learning_rate": 0.00043144850896939416, "loss": 0.1221, "step": 73828 }, { "epoch": 0.1309066924118268, "grad_norm": 0.416015625, "learning_rate": 0.0004314086642108541, "loss": 0.1412, "step": 73830 }, { "epoch": 0.13091023857713663, "grad_norm": 0.9140625, "learning_rate": 0.0004313688223763441, "loss": 0.1958, "step": 73832 }, { "epoch": 0.13091378474244644, "grad_norm": 0.45703125, "learning_rate": 0.0004313289834660387, "loss": 0.2226, "step": 73834 }, { "epoch": 0.13091733090775626, "grad_norm": 1.7109375, "learning_rate": 0.0004312891474801119, "loss": 0.1648, "step": 73836 }, { "epoch": 0.13092087707306607, "grad_norm": 0.263671875, "learning_rate": 0.00043124931441873793, "loss": 0.1248, "step": 73838 }, { "epoch": 0.1309244232383759, "grad_norm": 0.70703125, "learning_rate": 0.00043120948428209104, "loss": 0.1555, "step": 73840 }, { "epoch": 0.1309279694036857, "grad_norm": 0.26953125, "learning_rate": 0.0004311696570703451, "loss": 0.1375, "step": 73842 }, { "epoch": 0.13093151556899552, "grad_norm": 0.33203125, "learning_rate": 0.0004311298327836751, "loss": 0.1886, "step": 73844 }, { "epoch": 0.13093506173430533, "grad_norm": 0.375, "learning_rate": 0.0004310900114222542, "loss": 0.2154, "step": 73846 }, { "epoch": 0.13093860789961514, "grad_norm": 0.314453125, "learning_rate": 0.0004310501929862572, "loss": 0.2016, "step": 73848 }, { "epoch": 0.13094215406492496, "grad_norm": 1.5859375, "learning_rate": 0.0004310103774758582, "loss": 0.2326, "step": 73850 }, { "epoch": 0.13094570023023477, "grad_norm": 0.234375, "learning_rate": 0.00043097056489123115, "loss": 0.126, "step": 73852 }, { "epoch": 0.1309492463955446, "grad_norm": 0.28515625, "learning_rate": 0.0004309307552325502, "loss": 0.1803, "step": 73854 }, { "epoch": 0.1309527925608544, "grad_norm": 0.50390625, "learning_rate": 0.00043089094849998934, "loss": 0.1547, "step": 73856 }, { "epoch": 0.13095633872616422, "grad_norm": 0.3046875, "learning_rate": 0.000430851144693723, "loss": 0.1631, "step": 73858 }, { "epoch": 0.13095988489147403, "grad_norm": 0.232421875, "learning_rate": 0.00043081134381392505, "loss": 0.1932, "step": 73860 }, { "epoch": 0.13096343105678385, "grad_norm": 0.40625, "learning_rate": 0.00043077154586076955, "loss": 0.1638, "step": 73862 }, { "epoch": 0.13096697722209366, "grad_norm": 0.35546875, "learning_rate": 0.00043073175083443034, "loss": 0.1929, "step": 73864 }, { "epoch": 0.13097052338740348, "grad_norm": 0.67578125, "learning_rate": 0.0004306919587350819, "loss": 0.1834, "step": 73866 }, { "epoch": 0.1309740695527133, "grad_norm": 1.34375, "learning_rate": 0.00043065216956289824, "loss": 0.2118, "step": 73868 }, { "epoch": 0.13097761571802313, "grad_norm": 0.5078125, "learning_rate": 0.000430612383318053, "loss": 0.1472, "step": 73870 }, { "epoch": 0.13098116188333295, "grad_norm": 0.51953125, "learning_rate": 0.0004305726000007204, "loss": 0.2139, "step": 73872 }, { "epoch": 0.13098470804864276, "grad_norm": 0.2470703125, "learning_rate": 0.00043053281961107423, "loss": 0.1696, "step": 73874 }, { "epoch": 0.13098825421395258, "grad_norm": 0.76953125, "learning_rate": 0.000430493042149289, "loss": 0.1739, "step": 73876 }, { "epoch": 0.1309918003792624, "grad_norm": 0.5703125, "learning_rate": 0.00043045326761553793, "loss": 0.2277, "step": 73878 }, { "epoch": 0.1309953465445722, "grad_norm": 0.359375, "learning_rate": 0.0004304134960099955, "loss": 0.1569, "step": 73880 }, { "epoch": 0.13099889270988202, "grad_norm": 0.1884765625, "learning_rate": 0.0004303737273328353, "loss": 0.2127, "step": 73882 }, { "epoch": 0.13100243887519183, "grad_norm": 1.09375, "learning_rate": 0.000430333961584232, "loss": 0.2424, "step": 73884 }, { "epoch": 0.13100598504050165, "grad_norm": 0.17578125, "learning_rate": 0.0004302941987643584, "loss": 0.1174, "step": 73886 }, { "epoch": 0.13100953120581146, "grad_norm": 0.2177734375, "learning_rate": 0.0004302544388733892, "loss": 0.3358, "step": 73888 }, { "epoch": 0.13101307737112128, "grad_norm": 0.76953125, "learning_rate": 0.000430214681911498, "loss": 0.2201, "step": 73890 }, { "epoch": 0.1310166235364311, "grad_norm": 1.3515625, "learning_rate": 0.00043017492787885894, "loss": 0.1765, "step": 73892 }, { "epoch": 0.1310201697017409, "grad_norm": 0.94921875, "learning_rate": 0.0004301351767756455, "loss": 0.2826, "step": 73894 }, { "epoch": 0.13102371586705072, "grad_norm": 10.125, "learning_rate": 0.00043009542860203144, "loss": 0.2532, "step": 73896 }, { "epoch": 0.13102726203236054, "grad_norm": 0.53515625, "learning_rate": 0.0004300556833581911, "loss": 0.1531, "step": 73898 }, { "epoch": 0.13103080819767035, "grad_norm": 0.326171875, "learning_rate": 0.0004300159410442981, "loss": 0.1622, "step": 73900 }, { "epoch": 0.13103435436298017, "grad_norm": 0.294921875, "learning_rate": 0.0004299762016605262, "loss": 0.1644, "step": 73902 }, { "epoch": 0.13103790052828998, "grad_norm": 0.2412109375, "learning_rate": 0.0004299364652070489, "loss": 0.1591, "step": 73904 }, { "epoch": 0.1310414466935998, "grad_norm": 0.65625, "learning_rate": 0.0004298967316840406, "loss": 0.1522, "step": 73906 }, { "epoch": 0.1310449928589096, "grad_norm": 0.5546875, "learning_rate": 0.00042985700109167467, "loss": 0.1578, "step": 73908 }, { "epoch": 0.13104853902421942, "grad_norm": 1.0078125, "learning_rate": 0.00042981727343012504, "loss": 0.3388, "step": 73910 }, { "epoch": 0.13105208518952924, "grad_norm": 0.5859375, "learning_rate": 0.00042977754869956533, "loss": 0.1337, "step": 73912 }, { "epoch": 0.13105563135483905, "grad_norm": 0.8046875, "learning_rate": 0.0004297378269001691, "loss": 0.1592, "step": 73914 }, { "epoch": 0.13105917752014887, "grad_norm": 0.2431640625, "learning_rate": 0.0004296981080321104, "loss": 0.2126, "step": 73916 }, { "epoch": 0.13106272368545868, "grad_norm": 0.4453125, "learning_rate": 0.0004296583920955629, "loss": 0.1631, "step": 73918 }, { "epoch": 0.1310662698507685, "grad_norm": 0.62109375, "learning_rate": 0.00042961867909070017, "loss": 0.297, "step": 73920 }, { "epoch": 0.1310698160160783, "grad_norm": 1.0390625, "learning_rate": 0.00042957896901769574, "loss": 0.2324, "step": 73922 }, { "epoch": 0.13107336218138813, "grad_norm": 0.2001953125, "learning_rate": 0.00042953926187672364, "loss": 0.1448, "step": 73924 }, { "epoch": 0.13107690834669794, "grad_norm": 0.365234375, "learning_rate": 0.00042949955766795756, "loss": 0.1414, "step": 73926 }, { "epoch": 0.13108045451200775, "grad_norm": 3.015625, "learning_rate": 0.0004294598563915708, "loss": 0.2928, "step": 73928 }, { "epoch": 0.13108400067731757, "grad_norm": 0.6796875, "learning_rate": 0.0004294201580477371, "loss": 0.4058, "step": 73930 }, { "epoch": 0.13108754684262738, "grad_norm": 0.318359375, "learning_rate": 0.00042938046263662995, "loss": 0.126, "step": 73932 }, { "epoch": 0.1310910930079372, "grad_norm": 0.9609375, "learning_rate": 0.00042934077015842353, "loss": 0.2398, "step": 73934 }, { "epoch": 0.131094639173247, "grad_norm": 0.71484375, "learning_rate": 0.00042930108061329055, "loss": 0.2698, "step": 73936 }, { "epoch": 0.13109818533855683, "grad_norm": 1.3203125, "learning_rate": 0.0004292613940014054, "loss": 0.2238, "step": 73938 }, { "epoch": 0.13110173150386664, "grad_norm": 0.7890625, "learning_rate": 0.0004292217103229409, "loss": 0.1813, "step": 73940 }, { "epoch": 0.13110527766917646, "grad_norm": 0.33984375, "learning_rate": 0.0004291820295780715, "loss": 0.1744, "step": 73942 }, { "epoch": 0.13110882383448627, "grad_norm": 0.9375, "learning_rate": 0.0004291423517669698, "loss": 0.1773, "step": 73944 }, { "epoch": 0.13111236999979609, "grad_norm": 0.3828125, "learning_rate": 0.00042910267688981004, "loss": 0.1895, "step": 73946 }, { "epoch": 0.1311159161651059, "grad_norm": 0.50390625, "learning_rate": 0.0004290630049467654, "loss": 0.1721, "step": 73948 }, { "epoch": 0.13111946233041571, "grad_norm": 0.349609375, "learning_rate": 0.00042902333593800956, "loss": 0.1849, "step": 73950 }, { "epoch": 0.13112300849572553, "grad_norm": 1.3203125, "learning_rate": 0.00042898366986371595, "loss": 0.1954, "step": 73952 }, { "epoch": 0.13112655466103534, "grad_norm": 0.34375, "learning_rate": 0.00042894400672405757, "loss": 0.1611, "step": 73954 }, { "epoch": 0.13113010082634516, "grad_norm": 0.1552734375, "learning_rate": 0.00042890434651920864, "loss": 0.1528, "step": 73956 }, { "epoch": 0.13113364699165497, "grad_norm": 0.3828125, "learning_rate": 0.0004288646892493423, "loss": 0.1611, "step": 73958 }, { "epoch": 0.13113719315696482, "grad_norm": 0.46484375, "learning_rate": 0.00042882503491463187, "loss": 0.1559, "step": 73960 }, { "epoch": 0.13114073932227463, "grad_norm": 0.333984375, "learning_rate": 0.0004287853835152507, "loss": 0.223, "step": 73962 }, { "epoch": 0.13114428548758444, "grad_norm": 0.400390625, "learning_rate": 0.00042874573505137265, "loss": 0.2262, "step": 73964 }, { "epoch": 0.13114783165289426, "grad_norm": 0.1669921875, "learning_rate": 0.00042870608952317083, "loss": 0.134, "step": 73966 }, { "epoch": 0.13115137781820407, "grad_norm": 0.197265625, "learning_rate": 0.0004286664469308186, "loss": 0.1476, "step": 73968 }, { "epoch": 0.1311549239835139, "grad_norm": 0.294921875, "learning_rate": 0.0004286268072744893, "loss": 0.154, "step": 73970 }, { "epoch": 0.1311584701488237, "grad_norm": 0.2890625, "learning_rate": 0.00042858717055435625, "loss": 0.1949, "step": 73972 }, { "epoch": 0.13116201631413352, "grad_norm": 1.15625, "learning_rate": 0.00042854753677059346, "loss": 0.1504, "step": 73974 }, { "epoch": 0.13116556247944333, "grad_norm": 0.314453125, "learning_rate": 0.0004285079059233731, "loss": 0.1623, "step": 73976 }, { "epoch": 0.13116910864475315, "grad_norm": 0.333984375, "learning_rate": 0.00042846827801286934, "loss": 0.2171, "step": 73978 }, { "epoch": 0.13117265481006296, "grad_norm": 0.291015625, "learning_rate": 0.00042842865303925506, "loss": 0.1566, "step": 73980 }, { "epoch": 0.13117620097537278, "grad_norm": 0.421875, "learning_rate": 0.0004283890310027043, "loss": 0.2507, "step": 73982 }, { "epoch": 0.1311797471406826, "grad_norm": 0.333984375, "learning_rate": 0.0004283494119033893, "loss": 0.1383, "step": 73984 }, { "epoch": 0.1311832933059924, "grad_norm": 1.875, "learning_rate": 0.00042830979574148397, "loss": 0.3, "step": 73986 }, { "epoch": 0.13118683947130222, "grad_norm": 0.3828125, "learning_rate": 0.00042827018251716156, "loss": 0.3438, "step": 73988 }, { "epoch": 0.13119038563661203, "grad_norm": 0.2578125, "learning_rate": 0.00042823057223059514, "loss": 0.2693, "step": 73990 }, { "epoch": 0.13119393180192185, "grad_norm": 0.259765625, "learning_rate": 0.000428190964881958, "loss": 0.1995, "step": 73992 }, { "epoch": 0.13119747796723166, "grad_norm": 0.35546875, "learning_rate": 0.000428151360471423, "loss": 0.1577, "step": 73994 }, { "epoch": 0.13120102413254148, "grad_norm": 0.65625, "learning_rate": 0.00042811175899916404, "loss": 0.2353, "step": 73996 }, { "epoch": 0.1312045702978513, "grad_norm": 0.279296875, "learning_rate": 0.0004280721604653539, "loss": 0.1373, "step": 73998 }, { "epoch": 0.1312081164631611, "grad_norm": 0.482421875, "learning_rate": 0.00042803256487016584, "loss": 0.1646, "step": 74000 }, { "epoch": 0.13121166262847092, "grad_norm": 0.181640625, "learning_rate": 0.00042799297221377284, "loss": 0.1819, "step": 74002 }, { "epoch": 0.13121520879378074, "grad_norm": 0.29296875, "learning_rate": 0.0004279533824963484, "loss": 0.186, "step": 74004 }, { "epoch": 0.13121875495909055, "grad_norm": 0.2021484375, "learning_rate": 0.00042791379571806553, "loss": 0.1838, "step": 74006 }, { "epoch": 0.13122230112440036, "grad_norm": 0.353515625, "learning_rate": 0.00042787421187909735, "loss": 0.1466, "step": 74008 }, { "epoch": 0.13122584728971018, "grad_norm": 0.953125, "learning_rate": 0.00042783463097961685, "loss": 0.6519, "step": 74010 }, { "epoch": 0.13122939345502, "grad_norm": 0.419921875, "learning_rate": 0.0004277950530197971, "loss": 0.2224, "step": 74012 }, { "epoch": 0.1312329396203298, "grad_norm": 1.703125, "learning_rate": 0.00042775547799981154, "loss": 0.2199, "step": 74014 }, { "epoch": 0.13123648578563962, "grad_norm": 0.82421875, "learning_rate": 0.000427715905919833, "loss": 0.2405, "step": 74016 }, { "epoch": 0.13124003195094944, "grad_norm": 0.490234375, "learning_rate": 0.00042767633678003457, "loss": 0.2264, "step": 74018 }, { "epoch": 0.13124357811625925, "grad_norm": 0.90625, "learning_rate": 0.00042763677058058915, "loss": 0.3527, "step": 74020 }, { "epoch": 0.13124712428156907, "grad_norm": 0.6875, "learning_rate": 0.0004275972073216701, "loss": 0.1909, "step": 74022 }, { "epoch": 0.13125067044687888, "grad_norm": 0.5703125, "learning_rate": 0.00042755764700345034, "loss": 0.1733, "step": 74024 }, { "epoch": 0.1312542166121887, "grad_norm": 2.953125, "learning_rate": 0.00042751808962610276, "loss": 0.3723, "step": 74026 }, { "epoch": 0.1312577627774985, "grad_norm": 0.3828125, "learning_rate": 0.00042747853518980053, "loss": 0.1506, "step": 74028 }, { "epoch": 0.13126130894280832, "grad_norm": 0.56640625, "learning_rate": 0.0004274389836947162, "loss": 0.1447, "step": 74030 }, { "epoch": 0.13126485510811814, "grad_norm": 0.515625, "learning_rate": 0.00042739943514102366, "loss": 0.2392, "step": 74032 }, { "epoch": 0.13126840127342795, "grad_norm": 0.640625, "learning_rate": 0.0004273598895288947, "loss": 0.3397, "step": 74034 }, { "epoch": 0.13127194743873777, "grad_norm": 0.359375, "learning_rate": 0.0004273203468585033, "loss": 0.1236, "step": 74036 }, { "epoch": 0.13127549360404758, "grad_norm": 0.5546875, "learning_rate": 0.0004272808071300218, "loss": 0.1634, "step": 74038 }, { "epoch": 0.1312790397693574, "grad_norm": 0.46484375, "learning_rate": 0.0004272412703436233, "loss": 0.1697, "step": 74040 }, { "epoch": 0.1312825859346672, "grad_norm": 0.859375, "learning_rate": 0.0004272017364994808, "loss": 0.1937, "step": 74042 }, { "epoch": 0.13128613209997703, "grad_norm": 0.396484375, "learning_rate": 0.00042716220559776676, "loss": 0.2102, "step": 74044 }, { "epoch": 0.13128967826528684, "grad_norm": 0.94921875, "learning_rate": 0.00042712267763865466, "loss": 0.1947, "step": 74046 }, { "epoch": 0.13129322443059666, "grad_norm": 1.328125, "learning_rate": 0.00042708315262231716, "loss": 0.1865, "step": 74048 }, { "epoch": 0.1312967705959065, "grad_norm": 0.2265625, "learning_rate": 0.00042704363054892703, "loss": 0.2277, "step": 74050 }, { "epoch": 0.1313003167612163, "grad_norm": 0.44140625, "learning_rate": 0.0004270041114186569, "loss": 0.1365, "step": 74052 }, { "epoch": 0.13130386292652613, "grad_norm": 0.71875, "learning_rate": 0.0004269645952316802, "loss": 0.1623, "step": 74054 }, { "epoch": 0.13130740909183594, "grad_norm": 0.42578125, "learning_rate": 0.0004269250819881693, "loss": 0.161, "step": 74056 }, { "epoch": 0.13131095525714576, "grad_norm": 0.4140625, "learning_rate": 0.00042688557168829724, "loss": 0.183, "step": 74058 }, { "epoch": 0.13131450142245557, "grad_norm": 1.203125, "learning_rate": 0.0004268460643322366, "loss": 0.302, "step": 74060 }, { "epoch": 0.13131804758776539, "grad_norm": 0.455078125, "learning_rate": 0.00042680655992016005, "loss": 0.2646, "step": 74062 }, { "epoch": 0.1313215937530752, "grad_norm": 0.9140625, "learning_rate": 0.000426767058452241, "loss": 0.2016, "step": 74064 }, { "epoch": 0.13132513991838501, "grad_norm": 0.349609375, "learning_rate": 0.0004267275599286513, "loss": 0.1529, "step": 74066 }, { "epoch": 0.13132868608369483, "grad_norm": 0.30078125, "learning_rate": 0.00042668806434956446, "loss": 0.229, "step": 74068 }, { "epoch": 0.13133223224900464, "grad_norm": 1.625, "learning_rate": 0.00042664857171515266, "loss": 0.2015, "step": 74070 }, { "epoch": 0.13133577841431446, "grad_norm": 0.51953125, "learning_rate": 0.0004266090820255894, "loss": 0.1879, "step": 74072 }, { "epoch": 0.13133932457962427, "grad_norm": 0.39453125, "learning_rate": 0.0004265695952810463, "loss": 0.178, "step": 74074 }, { "epoch": 0.1313428707449341, "grad_norm": 0.7890625, "learning_rate": 0.00042653011148169697, "loss": 0.2479, "step": 74076 }, { "epoch": 0.1313464169102439, "grad_norm": 0.27734375, "learning_rate": 0.00042649063062771364, "loss": 0.1584, "step": 74078 }, { "epoch": 0.13134996307555372, "grad_norm": 0.48828125, "learning_rate": 0.00042645115271926914, "loss": 0.2178, "step": 74080 }, { "epoch": 0.13135350924086353, "grad_norm": 0.404296875, "learning_rate": 0.00042641167775653614, "loss": 0.1893, "step": 74082 }, { "epoch": 0.13135705540617335, "grad_norm": 0.283203125, "learning_rate": 0.00042637220573968674, "loss": 0.1557, "step": 74084 }, { "epoch": 0.13136060157148316, "grad_norm": 0.447265625, "learning_rate": 0.00042633273666889437, "loss": 0.2292, "step": 74086 }, { "epoch": 0.13136414773679297, "grad_norm": 0.408203125, "learning_rate": 0.00042629327054433103, "loss": 0.2148, "step": 74088 }, { "epoch": 0.1313676939021028, "grad_norm": 0.470703125, "learning_rate": 0.00042625380736617004, "loss": 0.1756, "step": 74090 }, { "epoch": 0.1313712400674126, "grad_norm": 0.1943359375, "learning_rate": 0.0004262143471345832, "loss": 0.1684, "step": 74092 }, { "epoch": 0.13137478623272242, "grad_norm": 0.25390625, "learning_rate": 0.0004261748898497435, "loss": 0.1591, "step": 74094 }, { "epoch": 0.13137833239803223, "grad_norm": 0.5390625, "learning_rate": 0.00042613543551182345, "loss": 0.4287, "step": 74096 }, { "epoch": 0.13138187856334205, "grad_norm": 1.125, "learning_rate": 0.0004260959841209957, "loss": 0.179, "step": 74098 }, { "epoch": 0.13138542472865186, "grad_norm": 0.390625, "learning_rate": 0.00042605653567743255, "loss": 0.3154, "step": 74100 }, { "epoch": 0.13138897089396168, "grad_norm": 0.341796875, "learning_rate": 0.00042601709018130643, "loss": 0.1777, "step": 74102 }, { "epoch": 0.1313925170592715, "grad_norm": 0.400390625, "learning_rate": 0.00042597764763279035, "loss": 0.3621, "step": 74104 }, { "epoch": 0.1313960632245813, "grad_norm": 0.267578125, "learning_rate": 0.0004259382080320565, "loss": 0.1653, "step": 74106 }, { "epoch": 0.13139960938989112, "grad_norm": 0.859375, "learning_rate": 0.00042589877137927737, "loss": 0.2466, "step": 74108 }, { "epoch": 0.13140315555520093, "grad_norm": 0.91796875, "learning_rate": 0.00042585933767462514, "loss": 0.1798, "step": 74110 }, { "epoch": 0.13140670172051075, "grad_norm": 0.291015625, "learning_rate": 0.00042581990691827287, "loss": 0.1569, "step": 74112 }, { "epoch": 0.13141024788582056, "grad_norm": 0.60546875, "learning_rate": 0.0004257804791103928, "loss": 0.1503, "step": 74114 }, { "epoch": 0.13141379405113038, "grad_norm": 0.5, "learning_rate": 0.00042574105425115716, "loss": 0.1911, "step": 74116 }, { "epoch": 0.1314173402164402, "grad_norm": 0.380859375, "learning_rate": 0.00042570163234073854, "loss": 0.2012, "step": 74118 }, { "epoch": 0.13142088638175, "grad_norm": 0.5078125, "learning_rate": 0.0004256622133793091, "loss": 0.1963, "step": 74120 }, { "epoch": 0.13142443254705982, "grad_norm": 0.82421875, "learning_rate": 0.0004256227973670419, "loss": 0.2576, "step": 74122 }, { "epoch": 0.13142797871236964, "grad_norm": 1.578125, "learning_rate": 0.0004255833843041084, "loss": 0.2567, "step": 74124 }, { "epoch": 0.13143152487767945, "grad_norm": 0.921875, "learning_rate": 0.0004255439741906816, "loss": 0.3936, "step": 74126 }, { "epoch": 0.13143507104298927, "grad_norm": 0.263671875, "learning_rate": 0.00042550456702693343, "loss": 0.1491, "step": 74128 }, { "epoch": 0.13143861720829908, "grad_norm": 0.7109375, "learning_rate": 0.00042546516281303707, "loss": 0.2248, "step": 74130 }, { "epoch": 0.1314421633736089, "grad_norm": 0.58984375, "learning_rate": 0.00042542576154916374, "loss": 0.2706, "step": 74132 }, { "epoch": 0.1314457095389187, "grad_norm": 0.228515625, "learning_rate": 0.00042538636323548654, "loss": 0.1667, "step": 74134 }, { "epoch": 0.13144925570422852, "grad_norm": 0.20703125, "learning_rate": 0.00042534696787217754, "loss": 0.1565, "step": 74136 }, { "epoch": 0.13145280186953834, "grad_norm": 0.23046875, "learning_rate": 0.00042530757545940895, "loss": 0.1552, "step": 74138 }, { "epoch": 0.13145634803484815, "grad_norm": 0.3125, "learning_rate": 0.0004252681859973532, "loss": 0.1612, "step": 74140 }, { "epoch": 0.131459894200158, "grad_norm": 0.236328125, "learning_rate": 0.0004252287994861822, "loss": 0.2229, "step": 74142 }, { "epoch": 0.1314634403654678, "grad_norm": 0.53125, "learning_rate": 0.0004251894159260686, "loss": 0.1399, "step": 74144 }, { "epoch": 0.13146698653077762, "grad_norm": 0.89453125, "learning_rate": 0.0004251500353171847, "loss": 0.2011, "step": 74146 }, { "epoch": 0.13147053269608744, "grad_norm": 0.296875, "learning_rate": 0.0004251106576597025, "loss": 0.1952, "step": 74148 }, { "epoch": 0.13147407886139725, "grad_norm": 0.36328125, "learning_rate": 0.000425071282953794, "loss": 0.1956, "step": 74150 }, { "epoch": 0.13147762502670707, "grad_norm": 0.2158203125, "learning_rate": 0.0004250319111996319, "loss": 0.1865, "step": 74152 }, { "epoch": 0.13148117119201688, "grad_norm": 0.28515625, "learning_rate": 0.00042499254239738803, "loss": 0.201, "step": 74154 }, { "epoch": 0.1314847173573267, "grad_norm": 0.26953125, "learning_rate": 0.0004249531765472349, "loss": 0.1414, "step": 74156 }, { "epoch": 0.1314882635226365, "grad_norm": 0.5078125, "learning_rate": 0.0004249138136493443, "loss": 0.1959, "step": 74158 }, { "epoch": 0.13149180968794633, "grad_norm": 0.30859375, "learning_rate": 0.0004248744537038884, "loss": 0.2122, "step": 74160 }, { "epoch": 0.13149535585325614, "grad_norm": 0.31640625, "learning_rate": 0.00042483509671104004, "loss": 0.1498, "step": 74162 }, { "epoch": 0.13149890201856596, "grad_norm": 0.486328125, "learning_rate": 0.0004247957426709701, "loss": 0.1576, "step": 74164 }, { "epoch": 0.13150244818387577, "grad_norm": 0.26171875, "learning_rate": 0.00042475639158385175, "loss": 0.2018, "step": 74166 }, { "epoch": 0.13150599434918558, "grad_norm": 0.7734375, "learning_rate": 0.0004247170434498564, "loss": 0.2467, "step": 74168 }, { "epoch": 0.1315095405144954, "grad_norm": 0.6171875, "learning_rate": 0.0004246776982691567, "loss": 0.2092, "step": 74170 }, { "epoch": 0.1315130866798052, "grad_norm": 0.26171875, "learning_rate": 0.0004246383560419246, "loss": 0.1682, "step": 74172 }, { "epoch": 0.13151663284511503, "grad_norm": 0.29296875, "learning_rate": 0.0004245990167683319, "loss": 0.1476, "step": 74174 }, { "epoch": 0.13152017901042484, "grad_norm": 0.220703125, "learning_rate": 0.0004245596804485509, "loss": 0.1725, "step": 74176 }, { "epoch": 0.13152372517573466, "grad_norm": 0.318359375, "learning_rate": 0.0004245203470827531, "loss": 0.2368, "step": 74178 }, { "epoch": 0.13152727134104447, "grad_norm": 0.5625, "learning_rate": 0.0004244810166711115, "loss": 0.1303, "step": 74180 }, { "epoch": 0.1315308175063543, "grad_norm": 1.015625, "learning_rate": 0.0004244416892137971, "loss": 0.1818, "step": 74182 }, { "epoch": 0.1315343636716641, "grad_norm": 0.333984375, "learning_rate": 0.0004244023647109825, "loss": 0.1399, "step": 74184 }, { "epoch": 0.13153790983697392, "grad_norm": 0.37890625, "learning_rate": 0.0004243630431628394, "loss": 0.1533, "step": 74186 }, { "epoch": 0.13154145600228373, "grad_norm": 0.470703125, "learning_rate": 0.0004243237245695404, "loss": 0.2011, "step": 74188 }, { "epoch": 0.13154500216759354, "grad_norm": 1.0859375, "learning_rate": 0.0004242844089312563, "loss": 0.167, "step": 74190 }, { "epoch": 0.13154854833290336, "grad_norm": 3.765625, "learning_rate": 0.00042424509624816, "loss": 0.2046, "step": 74192 }, { "epoch": 0.13155209449821317, "grad_norm": 0.70703125, "learning_rate": 0.0004242057865204231, "loss": 0.162, "step": 74194 }, { "epoch": 0.131555640663523, "grad_norm": 0.498046875, "learning_rate": 0.0004241664797482176, "loss": 0.159, "step": 74196 }, { "epoch": 0.1315591868288328, "grad_norm": 3.96875, "learning_rate": 0.0004241271759317154, "loss": 0.5084, "step": 74198 }, { "epoch": 0.13156273299414262, "grad_norm": 0.46484375, "learning_rate": 0.00042408787507108793, "loss": 0.3496, "step": 74200 }, { "epoch": 0.13156627915945243, "grad_norm": 0.1806640625, "learning_rate": 0.0004240485771665079, "loss": 0.3169, "step": 74202 }, { "epoch": 0.13156982532476225, "grad_norm": 0.64453125, "learning_rate": 0.00042400928221814673, "loss": 0.3246, "step": 74204 }, { "epoch": 0.13157337149007206, "grad_norm": 0.65234375, "learning_rate": 0.00042396999022617613, "loss": 0.1526, "step": 74206 }, { "epoch": 0.13157691765538188, "grad_norm": 0.27734375, "learning_rate": 0.00042393070119076825, "loss": 0.1689, "step": 74208 }, { "epoch": 0.1315804638206917, "grad_norm": 1.265625, "learning_rate": 0.00042389141511209446, "loss": 0.1642, "step": 74210 }, { "epoch": 0.1315840099860015, "grad_norm": 0.5390625, "learning_rate": 0.0004238521319903275, "loss": 0.1803, "step": 74212 }, { "epoch": 0.13158755615131132, "grad_norm": 0.1904296875, "learning_rate": 0.000423812851825638, "loss": 0.1756, "step": 74214 }, { "epoch": 0.13159110231662113, "grad_norm": 0.6953125, "learning_rate": 0.0004237735746181985, "loss": 0.1845, "step": 74216 }, { "epoch": 0.13159464848193095, "grad_norm": 0.359375, "learning_rate": 0.0004237343003681804, "loss": 0.2332, "step": 74218 }, { "epoch": 0.13159819464724076, "grad_norm": 0.453125, "learning_rate": 0.0004236950290757561, "loss": 0.1873, "step": 74220 }, { "epoch": 0.13160174081255058, "grad_norm": 0.251953125, "learning_rate": 0.00042365576074109647, "loss": 0.1824, "step": 74222 }, { "epoch": 0.1316052869778604, "grad_norm": 0.3984375, "learning_rate": 0.00042361649536437374, "loss": 0.1306, "step": 74224 }, { "epoch": 0.1316088331431702, "grad_norm": 0.353515625, "learning_rate": 0.0004235772329457598, "loss": 0.2042, "step": 74226 }, { "epoch": 0.13161237930848002, "grad_norm": 1.078125, "learning_rate": 0.000423537973485426, "loss": 0.1749, "step": 74228 }, { "epoch": 0.13161592547378984, "grad_norm": 0.98828125, "learning_rate": 0.0004234987169835443, "loss": 0.2395, "step": 74230 }, { "epoch": 0.13161947163909968, "grad_norm": 0.78125, "learning_rate": 0.00042345946344028577, "loss": 0.1381, "step": 74232 }, { "epoch": 0.1316230178044095, "grad_norm": 0.48828125, "learning_rate": 0.000423420212855823, "loss": 0.1594, "step": 74234 }, { "epoch": 0.1316265639697193, "grad_norm": 0.98046875, "learning_rate": 0.0004233809652303272, "loss": 0.2598, "step": 74236 }, { "epoch": 0.13163011013502912, "grad_norm": 0.2041015625, "learning_rate": 0.0004233417205639699, "loss": 0.1379, "step": 74238 }, { "epoch": 0.13163365630033894, "grad_norm": 0.380859375, "learning_rate": 0.0004233024788569228, "loss": 0.1672, "step": 74240 }, { "epoch": 0.13163720246564875, "grad_norm": 0.365234375, "learning_rate": 0.00042326324010935775, "loss": 0.2119, "step": 74242 }, { "epoch": 0.13164074863095857, "grad_norm": 0.953125, "learning_rate": 0.0004232240043214461, "loss": 0.185, "step": 74244 }, { "epoch": 0.13164429479626838, "grad_norm": 3.734375, "learning_rate": 0.0004231847714933597, "loss": 0.1979, "step": 74246 }, { "epoch": 0.1316478409615782, "grad_norm": 0.8046875, "learning_rate": 0.00042314554162526985, "loss": 0.4046, "step": 74248 }, { "epoch": 0.131651387126888, "grad_norm": 0.248046875, "learning_rate": 0.00042310631471734807, "loss": 0.3244, "step": 74250 }, { "epoch": 0.13165493329219782, "grad_norm": 0.298828125, "learning_rate": 0.00042306709076976633, "loss": 0.1968, "step": 74252 }, { "epoch": 0.13165847945750764, "grad_norm": 0.26953125, "learning_rate": 0.0004230278697826959, "loss": 0.1615, "step": 74254 }, { "epoch": 0.13166202562281745, "grad_norm": 0.380859375, "learning_rate": 0.0004229886517563083, "loss": 0.1743, "step": 74256 }, { "epoch": 0.13166557178812727, "grad_norm": 0.703125, "learning_rate": 0.0004229494366907748, "loss": 0.2652, "step": 74258 }, { "epoch": 0.13166911795343708, "grad_norm": 0.296875, "learning_rate": 0.00042291022458626747, "loss": 0.2584, "step": 74260 }, { "epoch": 0.1316726641187469, "grad_norm": 1.25, "learning_rate": 0.0004228710154429576, "loss": 0.2255, "step": 74262 }, { "epoch": 0.1316762102840567, "grad_norm": 0.7421875, "learning_rate": 0.00042283180926101663, "loss": 0.2322, "step": 74264 }, { "epoch": 0.13167975644936653, "grad_norm": 0.490234375, "learning_rate": 0.00042279260604061587, "loss": 0.1601, "step": 74266 }, { "epoch": 0.13168330261467634, "grad_norm": 0.326171875, "learning_rate": 0.00042275340578192656, "loss": 0.1881, "step": 74268 }, { "epoch": 0.13168684877998615, "grad_norm": 0.515625, "learning_rate": 0.00042271420848512107, "loss": 0.1596, "step": 74270 }, { "epoch": 0.13169039494529597, "grad_norm": 0.275390625, "learning_rate": 0.0004226750141503697, "loss": 0.1499, "step": 74272 }, { "epoch": 0.13169394111060578, "grad_norm": 0.54296875, "learning_rate": 0.0004226358227778446, "loss": 0.1623, "step": 74274 }, { "epoch": 0.1316974872759156, "grad_norm": 0.6015625, "learning_rate": 0.00042259663436771674, "loss": 0.1794, "step": 74276 }, { "epoch": 0.1317010334412254, "grad_norm": 0.271484375, "learning_rate": 0.00042255744892015815, "loss": 0.1819, "step": 74278 }, { "epoch": 0.13170457960653523, "grad_norm": 1.1015625, "learning_rate": 0.0004225182664353393, "loss": 0.267, "step": 74280 }, { "epoch": 0.13170812577184504, "grad_norm": 0.423828125, "learning_rate": 0.00042247908691343236, "loss": 0.1498, "step": 74282 }, { "epoch": 0.13171167193715486, "grad_norm": 0.921875, "learning_rate": 0.00042243991035460825, "loss": 0.1906, "step": 74284 }, { "epoch": 0.13171521810246467, "grad_norm": 1.1640625, "learning_rate": 0.00042240073675903846, "loss": 0.1791, "step": 74286 }, { "epoch": 0.13171876426777449, "grad_norm": 0.2421875, "learning_rate": 0.0004223615661268942, "loss": 0.237, "step": 74288 }, { "epoch": 0.1317223104330843, "grad_norm": 0.8203125, "learning_rate": 0.0004223223984583466, "loss": 0.1917, "step": 74290 }, { "epoch": 0.13172585659839411, "grad_norm": 0.953125, "learning_rate": 0.0004222832337535674, "loss": 0.1564, "step": 74292 }, { "epoch": 0.13172940276370393, "grad_norm": 0.32421875, "learning_rate": 0.00042224407201272776, "loss": 0.1619, "step": 74294 }, { "epoch": 0.13173294892901374, "grad_norm": 1.1640625, "learning_rate": 0.00042220491323599884, "loss": 0.2071, "step": 74296 }, { "epoch": 0.13173649509432356, "grad_norm": 0.5859375, "learning_rate": 0.0004221657574235516, "loss": 0.1711, "step": 74298 }, { "epoch": 0.13174004125963337, "grad_norm": 0.2353515625, "learning_rate": 0.0004221266045755579, "loss": 0.1913, "step": 74300 }, { "epoch": 0.1317435874249432, "grad_norm": 0.361328125, "learning_rate": 0.0004220874546921886, "loss": 0.2164, "step": 74302 }, { "epoch": 0.131747133590253, "grad_norm": 0.1796875, "learning_rate": 0.00042204830777361514, "loss": 0.1435, "step": 74304 }, { "epoch": 0.13175067975556282, "grad_norm": 0.373046875, "learning_rate": 0.0004220091638200085, "loss": 0.1601, "step": 74306 }, { "epoch": 0.13175422592087263, "grad_norm": 0.66015625, "learning_rate": 0.00042197002283153975, "loss": 0.1858, "step": 74308 }, { "epoch": 0.13175777208618245, "grad_norm": 0.2119140625, "learning_rate": 0.00042193088480838064, "loss": 0.1228, "step": 74310 }, { "epoch": 0.13176131825149226, "grad_norm": 0.76953125, "learning_rate": 0.00042189174975070166, "loss": 0.161, "step": 74312 }, { "epoch": 0.13176486441680207, "grad_norm": 0.609375, "learning_rate": 0.0004218526176586744, "loss": 0.2265, "step": 74314 }, { "epoch": 0.1317684105821119, "grad_norm": 0.279296875, "learning_rate": 0.00042181348853246957, "loss": 0.144, "step": 74316 }, { "epoch": 0.1317719567474217, "grad_norm": 0.31640625, "learning_rate": 0.0004217743623722593, "loss": 0.1819, "step": 74318 }, { "epoch": 0.13177550291273152, "grad_norm": 0.4609375, "learning_rate": 0.0004217352391782133, "loss": 0.1541, "step": 74320 }, { "epoch": 0.13177904907804136, "grad_norm": 0.7109375, "learning_rate": 0.0004216961189505038, "loss": 0.1849, "step": 74322 }, { "epoch": 0.13178259524335117, "grad_norm": 0.412109375, "learning_rate": 0.00042165700168930145, "loss": 0.1874, "step": 74324 }, { "epoch": 0.131786141408661, "grad_norm": 0.435546875, "learning_rate": 0.00042161788739477723, "loss": 0.1629, "step": 74326 }, { "epoch": 0.1317896875739708, "grad_norm": 0.57421875, "learning_rate": 0.0004215787760671024, "loss": 0.2072, "step": 74328 }, { "epoch": 0.13179323373928062, "grad_norm": 0.5390625, "learning_rate": 0.0004215396677064476, "loss": 0.1492, "step": 74330 }, { "epoch": 0.13179677990459043, "grad_norm": 0.412109375, "learning_rate": 0.00042150056231298453, "loss": 0.1661, "step": 74332 }, { "epoch": 0.13180032606990025, "grad_norm": 0.609375, "learning_rate": 0.00042146145988688355, "loss": 0.1923, "step": 74334 }, { "epoch": 0.13180387223521006, "grad_norm": 0.62890625, "learning_rate": 0.00042142236042831666, "loss": 0.1587, "step": 74336 }, { "epoch": 0.13180741840051988, "grad_norm": 0.54296875, "learning_rate": 0.00042138326393745355, "loss": 0.2134, "step": 74338 }, { "epoch": 0.1318109645658297, "grad_norm": 0.291015625, "learning_rate": 0.00042134417041446616, "loss": 0.254, "step": 74340 }, { "epoch": 0.1318145107311395, "grad_norm": 0.1953125, "learning_rate": 0.00042130507985952515, "loss": 0.167, "step": 74342 }, { "epoch": 0.13181805689644932, "grad_norm": 0.2431640625, "learning_rate": 0.0004212659922728015, "loss": 0.2037, "step": 74344 }, { "epoch": 0.13182160306175914, "grad_norm": 2.453125, "learning_rate": 0.00042122690765446615, "loss": 0.2526, "step": 74346 }, { "epoch": 0.13182514922706895, "grad_norm": 0.79296875, "learning_rate": 0.00042118782600468973, "loss": 0.2269, "step": 74348 }, { "epoch": 0.13182869539237876, "grad_norm": 0.6171875, "learning_rate": 0.0004211487473236437, "loss": 0.1944, "step": 74350 }, { "epoch": 0.13183224155768858, "grad_norm": 0.2216796875, "learning_rate": 0.0004211096716114988, "loss": 0.3273, "step": 74352 }, { "epoch": 0.1318357877229984, "grad_norm": 1.2734375, "learning_rate": 0.00042107059886842583, "loss": 0.1781, "step": 74354 }, { "epoch": 0.1318393338883082, "grad_norm": 0.296875, "learning_rate": 0.00042103152909459544, "loss": 0.1735, "step": 74356 }, { "epoch": 0.13184288005361802, "grad_norm": 0.31640625, "learning_rate": 0.000420992462290179, "loss": 0.1521, "step": 74358 }, { "epoch": 0.13184642621892784, "grad_norm": 1.4609375, "learning_rate": 0.00042095339845534707, "loss": 0.2618, "step": 74360 }, { "epoch": 0.13184997238423765, "grad_norm": 0.162109375, "learning_rate": 0.00042091433759027056, "loss": 0.1488, "step": 74362 }, { "epoch": 0.13185351854954747, "grad_norm": 0.39453125, "learning_rate": 0.00042087527969512033, "loss": 0.2507, "step": 74364 }, { "epoch": 0.13185706471485728, "grad_norm": 0.447265625, "learning_rate": 0.0004208362247700669, "loss": 0.1471, "step": 74366 }, { "epoch": 0.1318606108801671, "grad_norm": 0.33203125, "learning_rate": 0.0004207971728152817, "loss": 0.1952, "step": 74368 }, { "epoch": 0.1318641570454769, "grad_norm": 0.47265625, "learning_rate": 0.0004207581238309347, "loss": 0.2106, "step": 74370 }, { "epoch": 0.13186770321078672, "grad_norm": 0.298828125, "learning_rate": 0.0004207190778171974, "loss": 0.1637, "step": 74372 }, { "epoch": 0.13187124937609654, "grad_norm": 0.458984375, "learning_rate": 0.00042068003477424034, "loss": 0.1467, "step": 74374 }, { "epoch": 0.13187479554140635, "grad_norm": 0.421875, "learning_rate": 0.00042064099470223417, "loss": 0.1614, "step": 74376 }, { "epoch": 0.13187834170671617, "grad_norm": 0.1865234375, "learning_rate": 0.00042060195760134934, "loss": 0.2278, "step": 74378 }, { "epoch": 0.13188188787202598, "grad_norm": 0.3515625, "learning_rate": 0.00042056292347175705, "loss": 0.1725, "step": 74380 }, { "epoch": 0.1318854340373358, "grad_norm": 0.4765625, "learning_rate": 0.0004205238923136281, "loss": 0.1816, "step": 74382 }, { "epoch": 0.1318889802026456, "grad_norm": 0.921875, "learning_rate": 0.00042048486412713285, "loss": 0.2016, "step": 74384 }, { "epoch": 0.13189252636795543, "grad_norm": 0.75390625, "learning_rate": 0.00042044583891244215, "loss": 0.1958, "step": 74386 }, { "epoch": 0.13189607253326524, "grad_norm": 1.8828125, "learning_rate": 0.0004204068166697264, "loss": 0.4712, "step": 74388 }, { "epoch": 0.13189961869857506, "grad_norm": 0.3515625, "learning_rate": 0.0004203677973991565, "loss": 0.34, "step": 74390 }, { "epoch": 0.13190316486388487, "grad_norm": 0.26953125, "learning_rate": 0.00042032878110090334, "loss": 0.1503, "step": 74392 }, { "epoch": 0.13190671102919468, "grad_norm": 0.353515625, "learning_rate": 0.00042028976777513716, "loss": 0.1423, "step": 74394 }, { "epoch": 0.1319102571945045, "grad_norm": 0.224609375, "learning_rate": 0.0004202507574220288, "loss": 0.1229, "step": 74396 }, { "epoch": 0.1319138033598143, "grad_norm": 0.89453125, "learning_rate": 0.0004202117500417484, "loss": 0.1498, "step": 74398 }, { "epoch": 0.13191734952512413, "grad_norm": 0.53515625, "learning_rate": 0.0004201727456344675, "loss": 0.191, "step": 74400 }, { "epoch": 0.13192089569043394, "grad_norm": 3.421875, "learning_rate": 0.0004201337442003558, "loss": 0.1433, "step": 74402 }, { "epoch": 0.13192444185574376, "grad_norm": 1.6171875, "learning_rate": 0.00042009474573958416, "loss": 0.2636, "step": 74404 }, { "epoch": 0.13192798802105357, "grad_norm": 0.6875, "learning_rate": 0.00042005575025232307, "loss": 0.1864, "step": 74406 }, { "epoch": 0.1319315341863634, "grad_norm": 0.1591796875, "learning_rate": 0.0004200167577387436, "loss": 0.1713, "step": 74408 }, { "epoch": 0.1319350803516732, "grad_norm": 0.1767578125, "learning_rate": 0.0004199777681990154, "loss": 0.1537, "step": 74410 }, { "epoch": 0.13193862651698302, "grad_norm": 0.55078125, "learning_rate": 0.0004199387816333097, "loss": 0.1512, "step": 74412 }, { "epoch": 0.13194217268229286, "grad_norm": 0.2138671875, "learning_rate": 0.0004198997980417967, "loss": 0.1651, "step": 74414 }, { "epoch": 0.13194571884760267, "grad_norm": 0.255859375, "learning_rate": 0.0004198608174246469, "loss": 0.1836, "step": 74416 }, { "epoch": 0.1319492650129125, "grad_norm": 0.314453125, "learning_rate": 0.00041982183978203093, "loss": 0.1929, "step": 74418 }, { "epoch": 0.1319528111782223, "grad_norm": 0.2060546875, "learning_rate": 0.0004197828651141188, "loss": 0.1839, "step": 74420 }, { "epoch": 0.13195635734353212, "grad_norm": 0.265625, "learning_rate": 0.00041974389342108156, "loss": 0.1766, "step": 74422 }, { "epoch": 0.13195990350884193, "grad_norm": 0.287109375, "learning_rate": 0.0004197049247030891, "loss": 0.1477, "step": 74424 }, { "epoch": 0.13196344967415174, "grad_norm": 0.279296875, "learning_rate": 0.0004196659589603127, "loss": 0.163, "step": 74426 }, { "epoch": 0.13196699583946156, "grad_norm": 0.4609375, "learning_rate": 0.00041962699619292176, "loss": 0.1469, "step": 74428 }, { "epoch": 0.13197054200477137, "grad_norm": 0.486328125, "learning_rate": 0.00041958803640108726, "loss": 0.1391, "step": 74430 }, { "epoch": 0.1319740881700812, "grad_norm": 0.22265625, "learning_rate": 0.00041954907958497947, "loss": 0.1481, "step": 74432 }, { "epoch": 0.131977634335391, "grad_norm": 0.5625, "learning_rate": 0.0004195101257447688, "loss": 0.1672, "step": 74434 }, { "epoch": 0.13198118050070082, "grad_norm": 0.21875, "learning_rate": 0.0004194711748806257, "loss": 0.1128, "step": 74436 }, { "epoch": 0.13198472666601063, "grad_norm": 0.90234375, "learning_rate": 0.00041943222699271994, "loss": 0.209, "step": 74438 }, { "epoch": 0.13198827283132045, "grad_norm": 0.89453125, "learning_rate": 0.00041939328208122253, "loss": 0.1669, "step": 74440 }, { "epoch": 0.13199181899663026, "grad_norm": 0.388671875, "learning_rate": 0.00041935434014630363, "loss": 0.1456, "step": 74442 }, { "epoch": 0.13199536516194008, "grad_norm": 1.328125, "learning_rate": 0.0004193154011881336, "loss": 0.254, "step": 74444 }, { "epoch": 0.1319989113272499, "grad_norm": 0.88671875, "learning_rate": 0.00041927646520688224, "loss": 0.2274, "step": 74446 }, { "epoch": 0.1320024574925597, "grad_norm": 1.9140625, "learning_rate": 0.0004192375322027204, "loss": 0.3845, "step": 74448 }, { "epoch": 0.13200600365786952, "grad_norm": 0.2197265625, "learning_rate": 0.0004191986021758182, "loss": 0.1208, "step": 74450 }, { "epoch": 0.13200954982317933, "grad_norm": 0.1650390625, "learning_rate": 0.00041915967512634594, "loss": 0.1663, "step": 74452 }, { "epoch": 0.13201309598848915, "grad_norm": 0.39453125, "learning_rate": 0.00041912075105447364, "loss": 0.1671, "step": 74454 }, { "epoch": 0.13201664215379896, "grad_norm": 0.41015625, "learning_rate": 0.0004190818299603716, "loss": 0.1778, "step": 74456 }, { "epoch": 0.13202018831910878, "grad_norm": 0.5703125, "learning_rate": 0.0004190429118442105, "loss": 0.2243, "step": 74458 }, { "epoch": 0.1320237344844186, "grad_norm": 0.78125, "learning_rate": 0.00041900399670615976, "loss": 0.1727, "step": 74460 }, { "epoch": 0.1320272806497284, "grad_norm": 0.5390625, "learning_rate": 0.00041896508454639006, "loss": 0.1294, "step": 74462 }, { "epoch": 0.13203082681503822, "grad_norm": 0.419921875, "learning_rate": 0.0004189261753650714, "loss": 0.1881, "step": 74464 }, { "epoch": 0.13203437298034804, "grad_norm": 0.5, "learning_rate": 0.0004188872691623744, "loss": 0.1763, "step": 74466 }, { "epoch": 0.13203791914565785, "grad_norm": 0.259765625, "learning_rate": 0.0004188483659384684, "loss": 0.1513, "step": 74468 }, { "epoch": 0.13204146531096767, "grad_norm": 0.291015625, "learning_rate": 0.0004188094656935243, "loss": 0.2094, "step": 74470 }, { "epoch": 0.13204501147627748, "grad_norm": 0.4609375, "learning_rate": 0.00041877056842771175, "loss": 0.2166, "step": 74472 }, { "epoch": 0.1320485576415873, "grad_norm": 6.21875, "learning_rate": 0.0004187316741412012, "loss": 0.1985, "step": 74474 }, { "epoch": 0.1320521038068971, "grad_norm": 0.306640625, "learning_rate": 0.0004186927828341625, "loss": 0.1932, "step": 74476 }, { "epoch": 0.13205564997220692, "grad_norm": 0.263671875, "learning_rate": 0.0004186538945067656, "loss": 0.2419, "step": 74478 }, { "epoch": 0.13205919613751674, "grad_norm": 0.25390625, "learning_rate": 0.00041861500915918096, "loss": 0.1829, "step": 74480 }, { "epoch": 0.13206274230282655, "grad_norm": 0.51171875, "learning_rate": 0.0004185761267915785, "loss": 0.1962, "step": 74482 }, { "epoch": 0.13206628846813637, "grad_norm": 0.87890625, "learning_rate": 0.00041853724740412835, "loss": 0.145, "step": 74484 }, { "epoch": 0.13206983463344618, "grad_norm": 0.6875, "learning_rate": 0.0004184983709970001, "loss": 0.1941, "step": 74486 }, { "epoch": 0.132073380798756, "grad_norm": 0.25, "learning_rate": 0.0004184594975703644, "loss": 0.1361, "step": 74488 }, { "epoch": 0.1320769269640658, "grad_norm": 0.443359375, "learning_rate": 0.0004184206271243909, "loss": 0.1658, "step": 74490 }, { "epoch": 0.13208047312937563, "grad_norm": 0.3671875, "learning_rate": 0.00041838175965924973, "loss": 0.2356, "step": 74492 }, { "epoch": 0.13208401929468544, "grad_norm": 1.765625, "learning_rate": 0.00041834289517511084, "loss": 0.1742, "step": 74494 }, { "epoch": 0.13208756545999525, "grad_norm": 0.578125, "learning_rate": 0.0004183040336721439, "loss": 0.2187, "step": 74496 }, { "epoch": 0.13209111162530507, "grad_norm": 0.640625, "learning_rate": 0.00041826517515051965, "loss": 0.4071, "step": 74498 }, { "epoch": 0.13209465779061488, "grad_norm": 0.29296875, "learning_rate": 0.00041822631961040694, "loss": 0.1837, "step": 74500 }, { "epoch": 0.1320982039559247, "grad_norm": 0.357421875, "learning_rate": 0.0004181874670519766, "loss": 0.1336, "step": 74502 }, { "epoch": 0.13210175012123454, "grad_norm": 0.189453125, "learning_rate": 0.000418148617475398, "loss": 0.1928, "step": 74504 }, { "epoch": 0.13210529628654435, "grad_norm": 0.287109375, "learning_rate": 0.00041810977088084153, "loss": 0.2232, "step": 74506 }, { "epoch": 0.13210884245185417, "grad_norm": 0.310546875, "learning_rate": 0.0004180709272684768, "loss": 0.1752, "step": 74508 }, { "epoch": 0.13211238861716398, "grad_norm": 0.44140625, "learning_rate": 0.00041803208663847367, "loss": 0.2039, "step": 74510 }, { "epoch": 0.1321159347824738, "grad_norm": 0.53515625, "learning_rate": 0.0004179932489910022, "loss": 0.2011, "step": 74512 }, { "epoch": 0.1321194809477836, "grad_norm": 0.4921875, "learning_rate": 0.0004179544143262317, "loss": 0.1676, "step": 74514 }, { "epoch": 0.13212302711309343, "grad_norm": 0.38671875, "learning_rate": 0.00041791558264433305, "loss": 0.1238, "step": 74516 }, { "epoch": 0.13212657327840324, "grad_norm": 0.8203125, "learning_rate": 0.00041787675394547487, "loss": 0.2972, "step": 74518 }, { "epoch": 0.13213011944371306, "grad_norm": 0.33203125, "learning_rate": 0.00041783792822982786, "loss": 0.1944, "step": 74520 }, { "epoch": 0.13213366560902287, "grad_norm": 0.333984375, "learning_rate": 0.00041779910549756116, "loss": 0.1502, "step": 74522 }, { "epoch": 0.13213721177433269, "grad_norm": 0.58203125, "learning_rate": 0.00041776028574884547, "loss": 0.2352, "step": 74524 }, { "epoch": 0.1321407579396425, "grad_norm": 0.23828125, "learning_rate": 0.00041772146898384944, "loss": 0.1552, "step": 74526 }, { "epoch": 0.13214430410495231, "grad_norm": 0.357421875, "learning_rate": 0.00041768265520274373, "loss": 0.1801, "step": 74528 }, { "epoch": 0.13214785027026213, "grad_norm": 0.2236328125, "learning_rate": 0.0004176438444056977, "loss": 0.1976, "step": 74530 }, { "epoch": 0.13215139643557194, "grad_norm": 0.4609375, "learning_rate": 0.00041760503659288105, "loss": 0.1622, "step": 74532 }, { "epoch": 0.13215494260088176, "grad_norm": 0.451171875, "learning_rate": 0.0004175662317644637, "loss": 0.1553, "step": 74534 }, { "epoch": 0.13215848876619157, "grad_norm": 1.0703125, "learning_rate": 0.00041752742992061495, "loss": 0.3061, "step": 74536 }, { "epoch": 0.1321620349315014, "grad_norm": 0.52734375, "learning_rate": 0.0004174886310615051, "loss": 0.1804, "step": 74538 }, { "epoch": 0.1321655810968112, "grad_norm": 0.2216796875, "learning_rate": 0.0004174498351873034, "loss": 0.2372, "step": 74540 }, { "epoch": 0.13216912726212102, "grad_norm": 0.81640625, "learning_rate": 0.00041741104229817984, "loss": 0.1493, "step": 74542 }, { "epoch": 0.13217267342743083, "grad_norm": 0.3515625, "learning_rate": 0.00041737225239430343, "loss": 0.2051, "step": 74544 }, { "epoch": 0.13217621959274065, "grad_norm": 0.8671875, "learning_rate": 0.0004173334654758446, "loss": 0.2173, "step": 74546 }, { "epoch": 0.13217976575805046, "grad_norm": 0.35546875, "learning_rate": 0.0004172946815429728, "loss": 0.1485, "step": 74548 }, { "epoch": 0.13218331192336027, "grad_norm": 0.380859375, "learning_rate": 0.00041725590059585733, "loss": 0.1971, "step": 74550 }, { "epoch": 0.1321868580886701, "grad_norm": 2.015625, "learning_rate": 0.00041721712263466805, "loss": 0.2686, "step": 74552 }, { "epoch": 0.1321904042539799, "grad_norm": 5.6875, "learning_rate": 0.0004171783476595742, "loss": 0.2886, "step": 74554 }, { "epoch": 0.13219395041928972, "grad_norm": 0.384765625, "learning_rate": 0.00041713957567074606, "loss": 0.1792, "step": 74556 }, { "epoch": 0.13219749658459953, "grad_norm": 0.41015625, "learning_rate": 0.0004171008066683523, "loss": 0.1614, "step": 74558 }, { "epoch": 0.13220104274990935, "grad_norm": 4.875, "learning_rate": 0.0004170620406525632, "loss": 0.2516, "step": 74560 }, { "epoch": 0.13220458891521916, "grad_norm": 0.2578125, "learning_rate": 0.000417023277623548, "loss": 0.174, "step": 74562 }, { "epoch": 0.13220813508052898, "grad_norm": 0.39453125, "learning_rate": 0.0004169845175814764, "loss": 0.1322, "step": 74564 }, { "epoch": 0.1322116812458388, "grad_norm": 0.396484375, "learning_rate": 0.0004169457605265176, "loss": 0.1532, "step": 74566 }, { "epoch": 0.1322152274111486, "grad_norm": 0.96484375, "learning_rate": 0.00041690700645884116, "loss": 0.1453, "step": 74568 }, { "epoch": 0.13221877357645842, "grad_norm": 0.69921875, "learning_rate": 0.0004168682553786167, "loss": 0.1667, "step": 74570 }, { "epoch": 0.13222231974176823, "grad_norm": 0.31640625, "learning_rate": 0.000416829507286014, "loss": 0.2653, "step": 74572 }, { "epoch": 0.13222586590707805, "grad_norm": 0.197265625, "learning_rate": 0.00041679076218120205, "loss": 0.1524, "step": 74574 }, { "epoch": 0.13222941207238786, "grad_norm": 0.419921875, "learning_rate": 0.0004167520200643501, "loss": 0.1891, "step": 74576 }, { "epoch": 0.13223295823769768, "grad_norm": 0.19140625, "learning_rate": 0.0004167132809356283, "loss": 0.1594, "step": 74578 }, { "epoch": 0.1322365044030075, "grad_norm": 0.427734375, "learning_rate": 0.00041667454479520576, "loss": 0.1646, "step": 74580 }, { "epoch": 0.1322400505683173, "grad_norm": 0.154296875, "learning_rate": 0.0004166358116432518, "loss": 0.1586, "step": 74582 }, { "epoch": 0.13224359673362712, "grad_norm": 0.38671875, "learning_rate": 0.0004165970814799358, "loss": 0.2161, "step": 74584 }, { "epoch": 0.13224714289893694, "grad_norm": 0.330078125, "learning_rate": 0.0004165583543054271, "loss": 0.1291, "step": 74586 }, { "epoch": 0.13225068906424675, "grad_norm": 0.421875, "learning_rate": 0.00041651963011989524, "loss": 0.2416, "step": 74588 }, { "epoch": 0.13225423522955657, "grad_norm": 0.4140625, "learning_rate": 0.00041648090892350953, "loss": 0.1948, "step": 74590 }, { "epoch": 0.13225778139486638, "grad_norm": 1.2890625, "learning_rate": 0.0004164421907164393, "loss": 0.3082, "step": 74592 }, { "epoch": 0.13226132756017622, "grad_norm": 0.58984375, "learning_rate": 0.0004164034754988537, "loss": 0.1803, "step": 74594 }, { "epoch": 0.13226487372548604, "grad_norm": 0.291015625, "learning_rate": 0.00041636476327092245, "loss": 0.1866, "step": 74596 }, { "epoch": 0.13226841989079585, "grad_norm": 0.58984375, "learning_rate": 0.00041632605403281453, "loss": 0.1599, "step": 74598 }, { "epoch": 0.13227196605610567, "grad_norm": 0.4375, "learning_rate": 0.00041628734778469947, "loss": 0.2012, "step": 74600 }, { "epoch": 0.13227551222141548, "grad_norm": 0.26171875, "learning_rate": 0.00041624864452674635, "loss": 0.1523, "step": 74602 }, { "epoch": 0.1322790583867253, "grad_norm": 1.6953125, "learning_rate": 0.0004162099442591242, "loss": 0.2303, "step": 74604 }, { "epoch": 0.1322826045520351, "grad_norm": 3.96875, "learning_rate": 0.000416171246982003, "loss": 0.252, "step": 74606 }, { "epoch": 0.13228615071734492, "grad_norm": 0.27734375, "learning_rate": 0.0004161325526955511, "loss": 0.2074, "step": 74608 }, { "epoch": 0.13228969688265474, "grad_norm": 0.470703125, "learning_rate": 0.00041609386139993855, "loss": 0.1475, "step": 74610 }, { "epoch": 0.13229324304796455, "grad_norm": 0.267578125, "learning_rate": 0.0004160551730953338, "loss": 0.1332, "step": 74612 }, { "epoch": 0.13229678921327437, "grad_norm": 0.2275390625, "learning_rate": 0.000416016487781907, "loss": 0.1426, "step": 74614 }, { "epoch": 0.13230033537858418, "grad_norm": 0.302734375, "learning_rate": 0.0004159778054598262, "loss": 0.2162, "step": 74616 }, { "epoch": 0.132303881543894, "grad_norm": 0.22265625, "learning_rate": 0.00041593912612926143, "loss": 0.2578, "step": 74618 }, { "epoch": 0.1323074277092038, "grad_norm": 0.55078125, "learning_rate": 0.0004159004497903816, "loss": 0.2322, "step": 74620 }, { "epoch": 0.13231097387451363, "grad_norm": 1.671875, "learning_rate": 0.00041586177644335585, "loss": 0.3281, "step": 74622 }, { "epoch": 0.13231452003982344, "grad_norm": 0.2001953125, "learning_rate": 0.0004158231060883533, "loss": 0.182, "step": 74624 }, { "epoch": 0.13231806620513326, "grad_norm": 0.361328125, "learning_rate": 0.0004157844387255428, "loss": 0.1974, "step": 74626 }, { "epoch": 0.13232161237044307, "grad_norm": 0.359375, "learning_rate": 0.00041574577435509396, "loss": 0.2053, "step": 74628 }, { "epoch": 0.13232515853575288, "grad_norm": 0.66015625, "learning_rate": 0.0004157071129771756, "loss": 0.1801, "step": 74630 }, { "epoch": 0.1323287047010627, "grad_norm": 0.46875, "learning_rate": 0.0004156684545919569, "loss": 0.1568, "step": 74632 }, { "epoch": 0.13233225086637251, "grad_norm": 0.1640625, "learning_rate": 0.00041562979919960665, "loss": 0.1678, "step": 74634 }, { "epoch": 0.13233579703168233, "grad_norm": 0.251953125, "learning_rate": 0.00041559114680029435, "loss": 0.2099, "step": 74636 }, { "epoch": 0.13233934319699214, "grad_norm": 0.400390625, "learning_rate": 0.00041555249739418885, "loss": 0.2336, "step": 74638 }, { "epoch": 0.13234288936230196, "grad_norm": 0.32421875, "learning_rate": 0.00041551385098145897, "loss": 0.2069, "step": 74640 }, { "epoch": 0.13234643552761177, "grad_norm": 0.349609375, "learning_rate": 0.0004154752075622741, "loss": 0.201, "step": 74642 }, { "epoch": 0.1323499816929216, "grad_norm": 0.33203125, "learning_rate": 0.00041543656713680284, "loss": 0.1457, "step": 74644 }, { "epoch": 0.1323535278582314, "grad_norm": 0.2734375, "learning_rate": 0.0004153979297052148, "loss": 0.1441, "step": 74646 }, { "epoch": 0.13235707402354122, "grad_norm": 0.291015625, "learning_rate": 0.000415359295267678, "loss": 0.1882, "step": 74648 }, { "epoch": 0.13236062018885103, "grad_norm": 2.328125, "learning_rate": 0.00041532066382436234, "loss": 0.341, "step": 74650 }, { "epoch": 0.13236416635416084, "grad_norm": 1.1171875, "learning_rate": 0.000415282035375436, "loss": 0.1515, "step": 74652 }, { "epoch": 0.13236771251947066, "grad_norm": 1.6484375, "learning_rate": 0.0004152434099210689, "loss": 0.2337, "step": 74654 }, { "epoch": 0.13237125868478047, "grad_norm": 0.30859375, "learning_rate": 0.00041520478746142887, "loss": 0.1525, "step": 74656 }, { "epoch": 0.1323748048500903, "grad_norm": 0.65234375, "learning_rate": 0.0004151661679966855, "loss": 0.1715, "step": 74658 }, { "epoch": 0.1323783510154001, "grad_norm": 0.6953125, "learning_rate": 0.00041512755152700755, "loss": 0.2121, "step": 74660 }, { "epoch": 0.13238189718070992, "grad_norm": 1.65625, "learning_rate": 0.000415088938052564, "loss": 0.2033, "step": 74662 }, { "epoch": 0.13238544334601973, "grad_norm": 0.47265625, "learning_rate": 0.0004150503275735234, "loss": 0.1809, "step": 74664 }, { "epoch": 0.13238898951132955, "grad_norm": 0.408203125, "learning_rate": 0.0004150117200900546, "loss": 0.2231, "step": 74666 }, { "epoch": 0.13239253567663936, "grad_norm": 0.51953125, "learning_rate": 0.0004149731156023269, "loss": 0.2731, "step": 74668 }, { "epoch": 0.13239608184194918, "grad_norm": 0.349609375, "learning_rate": 0.0004149345141105087, "loss": 0.1771, "step": 74670 }, { "epoch": 0.132399628007259, "grad_norm": 0.62890625, "learning_rate": 0.00041489591561476936, "loss": 0.2218, "step": 74672 }, { "epoch": 0.1324031741725688, "grad_norm": 0.259765625, "learning_rate": 0.000414857320115277, "loss": 0.1161, "step": 74674 }, { "epoch": 0.13240672033787862, "grad_norm": 0.609375, "learning_rate": 0.00041481872761220086, "loss": 0.1815, "step": 74676 }, { "epoch": 0.13241026650318843, "grad_norm": 0.294921875, "learning_rate": 0.0004147801381057097, "loss": 0.1477, "step": 74678 }, { "epoch": 0.13241381266849825, "grad_norm": 0.466796875, "learning_rate": 0.0004147415515959722, "loss": 0.1901, "step": 74680 }, { "epoch": 0.13241735883380806, "grad_norm": 0.37109375, "learning_rate": 0.00041470296808315707, "loss": 0.1717, "step": 74682 }, { "epoch": 0.13242090499911788, "grad_norm": 0.310546875, "learning_rate": 0.0004146643875674328, "loss": 0.2032, "step": 74684 }, { "epoch": 0.13242445116442772, "grad_norm": 0.294921875, "learning_rate": 0.0004146258100489686, "loss": 0.209, "step": 74686 }, { "epoch": 0.13242799732973753, "grad_norm": 0.41015625, "learning_rate": 0.0004145872355279331, "loss": 0.1584, "step": 74688 }, { "epoch": 0.13243154349504735, "grad_norm": 0.140625, "learning_rate": 0.0004145486640044949, "loss": 0.2061, "step": 74690 }, { "epoch": 0.13243508966035716, "grad_norm": 0.69921875, "learning_rate": 0.0004145100954788224, "loss": 0.2408, "step": 74692 }, { "epoch": 0.13243863582566698, "grad_norm": 0.16015625, "learning_rate": 0.00041447152995108483, "loss": 0.1775, "step": 74694 }, { "epoch": 0.1324421819909768, "grad_norm": 0.93359375, "learning_rate": 0.00041443296742145057, "loss": 0.1804, "step": 74696 }, { "epoch": 0.1324457281562866, "grad_norm": 0.40234375, "learning_rate": 0.0004143944078900883, "loss": 0.1721, "step": 74698 }, { "epoch": 0.13244927432159642, "grad_norm": 1.65625, "learning_rate": 0.0004143558513571667, "loss": 0.3154, "step": 74700 }, { "epoch": 0.13245282048690624, "grad_norm": 1.2421875, "learning_rate": 0.000414317297822854, "loss": 0.1416, "step": 74702 }, { "epoch": 0.13245636665221605, "grad_norm": 0.310546875, "learning_rate": 0.0004142787472873196, "loss": 0.1901, "step": 74704 }, { "epoch": 0.13245991281752587, "grad_norm": 0.2060546875, "learning_rate": 0.00041424019975073133, "loss": 0.1577, "step": 74706 }, { "epoch": 0.13246345898283568, "grad_norm": 0.4296875, "learning_rate": 0.00041420165521325816, "loss": 0.1902, "step": 74708 }, { "epoch": 0.1324670051481455, "grad_norm": 0.56640625, "learning_rate": 0.00041416311367506843, "loss": 0.2204, "step": 74710 }, { "epoch": 0.1324705513134553, "grad_norm": 0.255859375, "learning_rate": 0.0004141245751363314, "loss": 0.1237, "step": 74712 }, { "epoch": 0.13247409747876512, "grad_norm": 0.4609375, "learning_rate": 0.0004140860395972145, "loss": 0.2237, "step": 74714 }, { "epoch": 0.13247764364407494, "grad_norm": 0.50390625, "learning_rate": 0.00041404750705788707, "loss": 0.2043, "step": 74716 }, { "epoch": 0.13248118980938475, "grad_norm": 1.8046875, "learning_rate": 0.0004140089775185174, "loss": 0.2717, "step": 74718 }, { "epoch": 0.13248473597469457, "grad_norm": 0.4296875, "learning_rate": 0.000413970450979274, "loss": 0.2211, "step": 74720 }, { "epoch": 0.13248828214000438, "grad_norm": 0.439453125, "learning_rate": 0.00041393192744032536, "loss": 0.152, "step": 74722 }, { "epoch": 0.1324918283053142, "grad_norm": 0.61328125, "learning_rate": 0.0004138934069018397, "loss": 0.186, "step": 74724 }, { "epoch": 0.132495374470624, "grad_norm": 0.64453125, "learning_rate": 0.00041385488936398606, "loss": 0.1443, "step": 74726 }, { "epoch": 0.13249892063593383, "grad_norm": 0.466796875, "learning_rate": 0.00041381637482693247, "loss": 0.2463, "step": 74728 }, { "epoch": 0.13250246680124364, "grad_norm": 0.337890625, "learning_rate": 0.00041377786329084755, "loss": 0.1949, "step": 74730 }, { "epoch": 0.13250601296655345, "grad_norm": 0.380859375, "learning_rate": 0.0004137393547558996, "loss": 0.2479, "step": 74732 }, { "epoch": 0.13250955913186327, "grad_norm": 0.470703125, "learning_rate": 0.00041370084922225694, "loss": 0.1348, "step": 74734 }, { "epoch": 0.13251310529717308, "grad_norm": 0.8046875, "learning_rate": 0.00041366234669008823, "loss": 0.2025, "step": 74736 }, { "epoch": 0.1325166514624829, "grad_norm": 0.169921875, "learning_rate": 0.0004136238471595618, "loss": 0.1769, "step": 74738 }, { "epoch": 0.1325201976277927, "grad_norm": 1.1015625, "learning_rate": 0.00041358535063084603, "loss": 0.2338, "step": 74740 }, { "epoch": 0.13252374379310253, "grad_norm": 0.375, "learning_rate": 0.00041354685710410887, "loss": 0.1807, "step": 74742 }, { "epoch": 0.13252728995841234, "grad_norm": 1.28125, "learning_rate": 0.0004135083665795195, "loss": 0.1829, "step": 74744 }, { "epoch": 0.13253083612372216, "grad_norm": 0.5, "learning_rate": 0.0004134698790572454, "loss": 0.2303, "step": 74746 }, { "epoch": 0.13253438228903197, "grad_norm": 2.109375, "learning_rate": 0.00041343139453745535, "loss": 0.2373, "step": 74748 }, { "epoch": 0.13253792845434179, "grad_norm": 0.71875, "learning_rate": 0.00041339291302031777, "loss": 0.1983, "step": 74750 }, { "epoch": 0.1325414746196516, "grad_norm": 0.59765625, "learning_rate": 0.0004133544345060006, "loss": 0.3112, "step": 74752 }, { "epoch": 0.13254502078496141, "grad_norm": 0.51171875, "learning_rate": 0.0004133159589946724, "loss": 0.1946, "step": 74754 }, { "epoch": 0.13254856695027123, "grad_norm": 0.7734375, "learning_rate": 0.00041327748648650097, "loss": 0.1718, "step": 74756 }, { "epoch": 0.13255211311558104, "grad_norm": 0.412109375, "learning_rate": 0.0004132390169816552, "loss": 0.1822, "step": 74758 }, { "epoch": 0.13255565928089086, "grad_norm": 0.482421875, "learning_rate": 0.0004132005504803028, "loss": 0.1787, "step": 74760 }, { "epoch": 0.13255920544620067, "grad_norm": 0.2451171875, "learning_rate": 0.0004131620869826127, "loss": 0.1313, "step": 74762 }, { "epoch": 0.1325627516115105, "grad_norm": 0.333984375, "learning_rate": 0.00041312362648875226, "loss": 0.1634, "step": 74764 }, { "epoch": 0.1325662977768203, "grad_norm": 1.15625, "learning_rate": 0.00041308516899889023, "loss": 0.3007, "step": 74766 }, { "epoch": 0.13256984394213012, "grad_norm": 0.8828125, "learning_rate": 0.00041304671451319484, "loss": 0.1629, "step": 74768 }, { "epoch": 0.13257339010743993, "grad_norm": 1.71875, "learning_rate": 0.000413008263031834, "loss": 0.2027, "step": 74770 }, { "epoch": 0.13257693627274975, "grad_norm": 0.1435546875, "learning_rate": 0.00041296981455497594, "loss": 0.1367, "step": 74772 }, { "epoch": 0.13258048243805956, "grad_norm": 0.328125, "learning_rate": 0.0004129313690827886, "loss": 0.191, "step": 74774 }, { "epoch": 0.1325840286033694, "grad_norm": 0.63671875, "learning_rate": 0.00041289292661544067, "loss": 0.289, "step": 74776 }, { "epoch": 0.13258757476867922, "grad_norm": 0.22265625, "learning_rate": 0.0004128544871530998, "loss": 0.1544, "step": 74778 }, { "epoch": 0.13259112093398903, "grad_norm": 0.42578125, "learning_rate": 0.0004128160506959344, "loss": 0.2345, "step": 74780 }, { "epoch": 0.13259466709929885, "grad_norm": 0.47265625, "learning_rate": 0.0004127776172441121, "loss": 0.3138, "step": 74782 }, { "epoch": 0.13259821326460866, "grad_norm": 0.30078125, "learning_rate": 0.00041273918679780164, "loss": 0.2051, "step": 74784 }, { "epoch": 0.13260175942991848, "grad_norm": 0.54296875, "learning_rate": 0.00041270075935717073, "loss": 0.1468, "step": 74786 }, { "epoch": 0.1326053055952283, "grad_norm": 0.609375, "learning_rate": 0.00041266233492238757, "loss": 0.1757, "step": 74788 }, { "epoch": 0.1326088517605381, "grad_norm": 0.33203125, "learning_rate": 0.0004126239134936199, "loss": 0.1498, "step": 74790 }, { "epoch": 0.13261239792584792, "grad_norm": 0.1484375, "learning_rate": 0.00041258549507103596, "loss": 0.1333, "step": 74792 }, { "epoch": 0.13261594409115773, "grad_norm": 0.9921875, "learning_rate": 0.00041254707965480405, "loss": 0.2375, "step": 74794 }, { "epoch": 0.13261949025646755, "grad_norm": 0.609375, "learning_rate": 0.0004125086672450914, "loss": 0.2622, "step": 74796 }, { "epoch": 0.13262303642177736, "grad_norm": 1.015625, "learning_rate": 0.0004124702578420669, "loss": 0.1817, "step": 74798 }, { "epoch": 0.13262658258708718, "grad_norm": 1.125, "learning_rate": 0.00041243185144589767, "loss": 0.2672, "step": 74800 }, { "epoch": 0.132630128752397, "grad_norm": 0.330078125, "learning_rate": 0.0004123934480567528, "loss": 0.1252, "step": 74802 }, { "epoch": 0.1326336749177068, "grad_norm": 0.2314453125, "learning_rate": 0.00041235504767479905, "loss": 0.1473, "step": 74804 }, { "epoch": 0.13263722108301662, "grad_norm": 0.60546875, "learning_rate": 0.00041231665030020515, "loss": 0.193, "step": 74806 }, { "epoch": 0.13264076724832644, "grad_norm": 0.24609375, "learning_rate": 0.00041227825593313875, "loss": 0.1373, "step": 74808 }, { "epoch": 0.13264431341363625, "grad_norm": 1.3203125, "learning_rate": 0.00041223986457376773, "loss": 0.2378, "step": 74810 }, { "epoch": 0.13264785957894606, "grad_norm": 0.49609375, "learning_rate": 0.00041220147622226015, "loss": 0.2201, "step": 74812 }, { "epoch": 0.13265140574425588, "grad_norm": 0.2734375, "learning_rate": 0.00041216309087878345, "loss": 0.1362, "step": 74814 }, { "epoch": 0.1326549519095657, "grad_norm": 0.65234375, "learning_rate": 0.000412124708543506, "loss": 0.1589, "step": 74816 }, { "epoch": 0.1326584980748755, "grad_norm": 0.33203125, "learning_rate": 0.0004120863292165957, "loss": 0.1854, "step": 74818 }, { "epoch": 0.13266204424018532, "grad_norm": 0.54296875, "learning_rate": 0.00041204795289822015, "loss": 0.1395, "step": 74820 }, { "epoch": 0.13266559040549514, "grad_norm": 0.88671875, "learning_rate": 0.000412009579588547, "loss": 0.1764, "step": 74822 }, { "epoch": 0.13266913657080495, "grad_norm": 0.51953125, "learning_rate": 0.00041197120928774454, "loss": 0.3634, "step": 74824 }, { "epoch": 0.13267268273611477, "grad_norm": 5.46875, "learning_rate": 0.00041193284199598037, "loss": 0.2251, "step": 74826 }, { "epoch": 0.13267622890142458, "grad_norm": 0.396484375, "learning_rate": 0.0004118944777134223, "loss": 0.1408, "step": 74828 }, { "epoch": 0.1326797750667344, "grad_norm": 0.451171875, "learning_rate": 0.00041185611644023803, "loss": 0.162, "step": 74830 }, { "epoch": 0.1326833212320442, "grad_norm": 0.349609375, "learning_rate": 0.00041181775817659516, "loss": 0.1649, "step": 74832 }, { "epoch": 0.13268686739735402, "grad_norm": 0.48828125, "learning_rate": 0.0004117794029226621, "loss": 0.212, "step": 74834 }, { "epoch": 0.13269041356266384, "grad_norm": 0.69921875, "learning_rate": 0.00041174105067860576, "loss": 0.3504, "step": 74836 }, { "epoch": 0.13269395972797365, "grad_norm": 1.7109375, "learning_rate": 0.00041170270144459453, "loss": 0.3307, "step": 74838 }, { "epoch": 0.13269750589328347, "grad_norm": 0.50390625, "learning_rate": 0.0004116643552207957, "loss": 0.1926, "step": 74840 }, { "epoch": 0.13270105205859328, "grad_norm": 0.2890625, "learning_rate": 0.00041162601200737725, "loss": 0.1539, "step": 74842 }, { "epoch": 0.1327045982239031, "grad_norm": 0.263671875, "learning_rate": 0.00041158767180450687, "loss": 0.1886, "step": 74844 }, { "epoch": 0.1327081443892129, "grad_norm": 0.455078125, "learning_rate": 0.000411549334612352, "loss": 0.1863, "step": 74846 }, { "epoch": 0.13271169055452273, "grad_norm": 0.453125, "learning_rate": 0.0004115110004310806, "loss": 0.1427, "step": 74848 }, { "epoch": 0.13271523671983254, "grad_norm": 0.224609375, "learning_rate": 0.0004114726692608598, "loss": 0.1709, "step": 74850 }, { "epoch": 0.13271878288514236, "grad_norm": 0.265625, "learning_rate": 0.00041143434110185804, "loss": 0.1495, "step": 74852 }, { "epoch": 0.13272232905045217, "grad_norm": 0.349609375, "learning_rate": 0.0004113960159542421, "loss": 0.1814, "step": 74854 }, { "epoch": 0.13272587521576198, "grad_norm": 0.296875, "learning_rate": 0.00041135769381818026, "loss": 0.2079, "step": 74856 }, { "epoch": 0.1327294213810718, "grad_norm": 0.306640625, "learning_rate": 0.00041131937469383965, "loss": 0.162, "step": 74858 }, { "epoch": 0.1327329675463816, "grad_norm": 0.482421875, "learning_rate": 0.00041128105858138854, "loss": 0.2249, "step": 74860 }, { "epoch": 0.13273651371169143, "grad_norm": 0.6015625, "learning_rate": 0.0004112427454809935, "loss": 0.2257, "step": 74862 }, { "epoch": 0.13274005987700124, "grad_norm": 0.28515625, "learning_rate": 0.00041120443539282274, "loss": 0.2539, "step": 74864 }, { "epoch": 0.13274360604231106, "grad_norm": 0.7734375, "learning_rate": 0.0004111661283170438, "loss": 0.1791, "step": 74866 }, { "epoch": 0.1327471522076209, "grad_norm": 0.369140625, "learning_rate": 0.00041112782425382406, "loss": 0.1739, "step": 74868 }, { "epoch": 0.13275069837293071, "grad_norm": 0.341796875, "learning_rate": 0.00041108952320333114, "loss": 0.1616, "step": 74870 }, { "epoch": 0.13275424453824053, "grad_norm": 0.8671875, "learning_rate": 0.00041105122516573215, "loss": 0.2204, "step": 74872 }, { "epoch": 0.13275779070355034, "grad_norm": 0.2431640625, "learning_rate": 0.0004110129301411952, "loss": 0.1539, "step": 74874 }, { "epoch": 0.13276133686886016, "grad_norm": 0.78125, "learning_rate": 0.0004109746381298875, "loss": 0.2237, "step": 74876 }, { "epoch": 0.13276488303416997, "grad_norm": 1.625, "learning_rate": 0.0004109363491319764, "loss": 0.2392, "step": 74878 }, { "epoch": 0.1327684291994798, "grad_norm": 0.181640625, "learning_rate": 0.0004108980631476294, "loss": 0.1448, "step": 74880 }, { "epoch": 0.1327719753647896, "grad_norm": 0.279296875, "learning_rate": 0.0004108597801770141, "loss": 0.1744, "step": 74882 }, { "epoch": 0.13277552153009942, "grad_norm": 0.400390625, "learning_rate": 0.00041082150022029787, "loss": 0.1753, "step": 74884 }, { "epoch": 0.13277906769540923, "grad_norm": 0.51171875, "learning_rate": 0.00041078322327764796, "loss": 0.2418, "step": 74886 }, { "epoch": 0.13278261386071905, "grad_norm": 0.330078125, "learning_rate": 0.00041074494934923214, "loss": 0.1889, "step": 74888 }, { "epoch": 0.13278616002602886, "grad_norm": 0.369140625, "learning_rate": 0.000410706678435217, "loss": 0.2311, "step": 74890 }, { "epoch": 0.13278970619133867, "grad_norm": 0.59765625, "learning_rate": 0.00041066841053577117, "loss": 0.165, "step": 74892 }, { "epoch": 0.1327932523566485, "grad_norm": 0.19921875, "learning_rate": 0.0004106301456510606, "loss": 0.2034, "step": 74894 }, { "epoch": 0.1327967985219583, "grad_norm": 0.279296875, "learning_rate": 0.0004105918837812537, "loss": 0.1704, "step": 74896 }, { "epoch": 0.13280034468726812, "grad_norm": 0.69921875, "learning_rate": 0.00041055362492651724, "loss": 0.1674, "step": 74898 }, { "epoch": 0.13280389085257793, "grad_norm": 1.078125, "learning_rate": 0.0004105153690870193, "loss": 0.2192, "step": 74900 }, { "epoch": 0.13280743701788775, "grad_norm": 0.4765625, "learning_rate": 0.000410477116262926, "loss": 0.1665, "step": 74902 }, { "epoch": 0.13281098318319756, "grad_norm": 0.474609375, "learning_rate": 0.0004104388664544055, "loss": 0.1832, "step": 74904 }, { "epoch": 0.13281452934850738, "grad_norm": 0.53125, "learning_rate": 0.000410400619661625, "loss": 0.1549, "step": 74906 }, { "epoch": 0.1328180755138172, "grad_norm": 0.3046875, "learning_rate": 0.00041036237588475155, "loss": 0.174, "step": 74908 }, { "epoch": 0.132821621679127, "grad_norm": 0.373046875, "learning_rate": 0.0004103241351239524, "loss": 0.15, "step": 74910 }, { "epoch": 0.13282516784443682, "grad_norm": 0.4609375, "learning_rate": 0.0004102858973793947, "loss": 0.2086, "step": 74912 }, { "epoch": 0.13282871400974663, "grad_norm": 0.3203125, "learning_rate": 0.00041024766265124614, "loss": 0.1916, "step": 74914 }, { "epoch": 0.13283226017505645, "grad_norm": 0.625, "learning_rate": 0.00041020943093967357, "loss": 0.1655, "step": 74916 }, { "epoch": 0.13283580634036626, "grad_norm": 0.25390625, "learning_rate": 0.0004101712022448444, "loss": 0.1447, "step": 74918 }, { "epoch": 0.13283935250567608, "grad_norm": 0.55859375, "learning_rate": 0.0004101329765669256, "loss": 0.2698, "step": 74920 }, { "epoch": 0.1328428986709859, "grad_norm": 0.421875, "learning_rate": 0.0004100947539060842, "loss": 0.1789, "step": 74922 }, { "epoch": 0.1328464448362957, "grad_norm": 0.208984375, "learning_rate": 0.00041005653426248777, "loss": 0.1522, "step": 74924 }, { "epoch": 0.13284999100160552, "grad_norm": 0.58984375, "learning_rate": 0.0004100183176363034, "loss": 0.1932, "step": 74926 }, { "epoch": 0.13285353716691534, "grad_norm": 0.8046875, "learning_rate": 0.000409980104027698, "loss": 0.1859, "step": 74928 }, { "epoch": 0.13285708333222515, "grad_norm": 0.62890625, "learning_rate": 0.00040994189343683876, "loss": 0.2139, "step": 74930 }, { "epoch": 0.13286062949753497, "grad_norm": 1.4375, "learning_rate": 0.00040990368586389295, "loss": 0.4912, "step": 74932 }, { "epoch": 0.13286417566284478, "grad_norm": 0.2138671875, "learning_rate": 0.00040986548130902755, "loss": 0.1526, "step": 74934 }, { "epoch": 0.1328677218281546, "grad_norm": 0.26171875, "learning_rate": 0.0004098272797724097, "loss": 0.143, "step": 74936 }, { "epoch": 0.1328712679934644, "grad_norm": 0.447265625, "learning_rate": 0.00040978908125420646, "loss": 0.1706, "step": 74938 }, { "epoch": 0.13287481415877422, "grad_norm": 0.3203125, "learning_rate": 0.00040975088575458455, "loss": 0.1464, "step": 74940 }, { "epoch": 0.13287836032408404, "grad_norm": 0.498046875, "learning_rate": 0.00040971269327371183, "loss": 0.1845, "step": 74942 }, { "epoch": 0.13288190648939385, "grad_norm": 0.2099609375, "learning_rate": 0.00040967450381175437, "loss": 0.1793, "step": 74944 }, { "epoch": 0.13288545265470367, "grad_norm": 0.86328125, "learning_rate": 0.0004096363173688797, "loss": 0.1832, "step": 74946 }, { "epoch": 0.13288899882001348, "grad_norm": 0.43359375, "learning_rate": 0.0004095981339452547, "loss": 0.1644, "step": 74948 }, { "epoch": 0.1328925449853233, "grad_norm": 1.6875, "learning_rate": 0.00040955995354104705, "loss": 0.201, "step": 74950 }, { "epoch": 0.1328960911506331, "grad_norm": 0.38671875, "learning_rate": 0.0004095217761564224, "loss": 0.241, "step": 74952 }, { "epoch": 0.13289963731594293, "grad_norm": 0.44140625, "learning_rate": 0.00040948360179154865, "loss": 0.151, "step": 74954 }, { "epoch": 0.13290318348125274, "grad_norm": 0.189453125, "learning_rate": 0.00040944543044659274, "loss": 0.1838, "step": 74956 }, { "epoch": 0.13290672964656258, "grad_norm": 2.546875, "learning_rate": 0.00040940726212172117, "loss": 0.5733, "step": 74958 }, { "epoch": 0.1329102758118724, "grad_norm": 0.29296875, "learning_rate": 0.0004093690968171013, "loss": 0.146, "step": 74960 }, { "epoch": 0.1329138219771822, "grad_norm": 2.546875, "learning_rate": 0.0004093309345328994, "loss": 0.2878, "step": 74962 }, { "epoch": 0.13291736814249203, "grad_norm": 0.421875, "learning_rate": 0.0004092927752692831, "loss": 0.1109, "step": 74964 }, { "epoch": 0.13292091430780184, "grad_norm": 1.09375, "learning_rate": 0.00040925461902641905, "loss": 0.2441, "step": 74966 }, { "epoch": 0.13292446047311166, "grad_norm": 0.671875, "learning_rate": 0.00040921646580447405, "loss": 0.1419, "step": 74968 }, { "epoch": 0.13292800663842147, "grad_norm": 1.5703125, "learning_rate": 0.00040917831560361464, "loss": 0.1877, "step": 74970 }, { "epoch": 0.13293155280373128, "grad_norm": 0.26953125, "learning_rate": 0.00040914016842400836, "loss": 0.2167, "step": 74972 }, { "epoch": 0.1329350989690411, "grad_norm": 1.1640625, "learning_rate": 0.00040910202426582167, "loss": 0.3661, "step": 74974 }, { "epoch": 0.1329386451343509, "grad_norm": 0.57421875, "learning_rate": 0.00040906388312922126, "loss": 0.1752, "step": 74976 }, { "epoch": 0.13294219129966073, "grad_norm": 0.396484375, "learning_rate": 0.0004090257450143743, "loss": 0.1762, "step": 74978 }, { "epoch": 0.13294573746497054, "grad_norm": 0.421875, "learning_rate": 0.000408987609921447, "loss": 0.2806, "step": 74980 }, { "epoch": 0.13294928363028036, "grad_norm": 0.380859375, "learning_rate": 0.000408949477850607, "loss": 0.3191, "step": 74982 }, { "epoch": 0.13295282979559017, "grad_norm": 0.890625, "learning_rate": 0.00040891134880202007, "loss": 0.1612, "step": 74984 }, { "epoch": 0.1329563759609, "grad_norm": 1.7265625, "learning_rate": 0.0004088732227758538, "loss": 0.2186, "step": 74986 }, { "epoch": 0.1329599221262098, "grad_norm": 0.2099609375, "learning_rate": 0.0004088350997722742, "loss": 0.1329, "step": 74988 }, { "epoch": 0.13296346829151962, "grad_norm": 0.29296875, "learning_rate": 0.00040879697979144904, "loss": 0.145, "step": 74990 }, { "epoch": 0.13296701445682943, "grad_norm": 0.3515625, "learning_rate": 0.0004087588628335439, "loss": 0.1592, "step": 74992 }, { "epoch": 0.13297056062213924, "grad_norm": 0.99609375, "learning_rate": 0.00040872074889872613, "loss": 0.2679, "step": 74994 }, { "epoch": 0.13297410678744906, "grad_norm": 0.4921875, "learning_rate": 0.00040868263798716223, "loss": 0.2555, "step": 74996 }, { "epoch": 0.13297765295275887, "grad_norm": 0.396484375, "learning_rate": 0.000408644530099019, "loss": 0.1891, "step": 74998 }, { "epoch": 0.1329811991180687, "grad_norm": 0.3125, "learning_rate": 0.000408606425234463, "loss": 0.1706, "step": 75000 }, { "epoch": 0.1329847452833785, "grad_norm": 2.15625, "learning_rate": 0.0004085683233936606, "loss": 0.4386, "step": 75002 }, { "epoch": 0.13298829144868832, "grad_norm": 0.43359375, "learning_rate": 0.00040853022457677893, "loss": 0.1564, "step": 75004 }, { "epoch": 0.13299183761399813, "grad_norm": 0.29296875, "learning_rate": 0.00040849212878398423, "loss": 0.1714, "step": 75006 }, { "epoch": 0.13299538377930795, "grad_norm": 0.478515625, "learning_rate": 0.0004084540360154437, "loss": 0.1857, "step": 75008 }, { "epoch": 0.13299892994461776, "grad_norm": 0.2578125, "learning_rate": 0.00040841594627132315, "loss": 0.1314, "step": 75010 }, { "epoch": 0.13300247610992758, "grad_norm": 0.578125, "learning_rate": 0.0004083778595517897, "loss": 0.285, "step": 75012 }, { "epoch": 0.1330060222752374, "grad_norm": 0.5859375, "learning_rate": 0.0004083397758570098, "loss": 0.1905, "step": 75014 }, { "epoch": 0.1330095684405472, "grad_norm": 0.33203125, "learning_rate": 0.0004083016951871499, "loss": 0.2297, "step": 75016 }, { "epoch": 0.13301311460585702, "grad_norm": 0.44140625, "learning_rate": 0.00040826361754237663, "loss": 0.2277, "step": 75018 }, { "epoch": 0.13301666077116683, "grad_norm": 0.98046875, "learning_rate": 0.0004082255429228562, "loss": 0.2107, "step": 75020 }, { "epoch": 0.13302020693647665, "grad_norm": 0.322265625, "learning_rate": 0.0004081874713287557, "loss": 0.2138, "step": 75022 }, { "epoch": 0.13302375310178646, "grad_norm": 0.8125, "learning_rate": 0.00040814940276024124, "loss": 0.173, "step": 75024 }, { "epoch": 0.13302729926709628, "grad_norm": 0.2119140625, "learning_rate": 0.0004081113372174796, "loss": 0.2203, "step": 75026 }, { "epoch": 0.1330308454324061, "grad_norm": 0.287109375, "learning_rate": 0.0004080732747006368, "loss": 0.1665, "step": 75028 }, { "epoch": 0.1330343915977159, "grad_norm": 0.326171875, "learning_rate": 0.00040803521520987976, "loss": 0.221, "step": 75030 }, { "epoch": 0.13303793776302572, "grad_norm": 0.298828125, "learning_rate": 0.0004079971587453747, "loss": 0.1891, "step": 75032 }, { "epoch": 0.13304148392833554, "grad_norm": 0.44140625, "learning_rate": 0.0004079591053072882, "loss": 0.1268, "step": 75034 }, { "epoch": 0.13304503009364535, "grad_norm": 0.201171875, "learning_rate": 0.0004079210548957866, "loss": 0.1982, "step": 75036 }, { "epoch": 0.13304857625895516, "grad_norm": 0.3046875, "learning_rate": 0.000407883007511036, "loss": 0.1537, "step": 75038 }, { "epoch": 0.13305212242426498, "grad_norm": 2.6875, "learning_rate": 0.0004078449631532035, "loss": 0.2323, "step": 75040 }, { "epoch": 0.1330556685895748, "grad_norm": 0.484375, "learning_rate": 0.00040780692182245477, "loss": 0.2366, "step": 75042 }, { "epoch": 0.1330592147548846, "grad_norm": 0.6875, "learning_rate": 0.00040776888351895665, "loss": 0.193, "step": 75044 }, { "epoch": 0.13306276092019442, "grad_norm": 0.453125, "learning_rate": 0.00040773084824287506, "loss": 0.1608, "step": 75046 }, { "epoch": 0.13306630708550427, "grad_norm": 0.431640625, "learning_rate": 0.00040769281599437713, "loss": 0.2008, "step": 75048 }, { "epoch": 0.13306985325081408, "grad_norm": 0.51953125, "learning_rate": 0.0004076547867736281, "loss": 0.1816, "step": 75050 }, { "epoch": 0.1330733994161239, "grad_norm": 0.52734375, "learning_rate": 0.0004076167605807953, "loss": 0.1898, "step": 75052 }, { "epoch": 0.1330769455814337, "grad_norm": 0.228515625, "learning_rate": 0.0004075787374160446, "loss": 0.2538, "step": 75054 }, { "epoch": 0.13308049174674352, "grad_norm": 0.76953125, "learning_rate": 0.0004075407172795421, "loss": 0.265, "step": 75056 }, { "epoch": 0.13308403791205334, "grad_norm": 0.25, "learning_rate": 0.0004075027001714544, "loss": 0.1551, "step": 75058 }, { "epoch": 0.13308758407736315, "grad_norm": 0.2001953125, "learning_rate": 0.00040746468609194743, "loss": 0.1613, "step": 75060 }, { "epoch": 0.13309113024267297, "grad_norm": 0.73046875, "learning_rate": 0.00040742667504118784, "loss": 0.1498, "step": 75062 }, { "epoch": 0.13309467640798278, "grad_norm": 0.361328125, "learning_rate": 0.00040738866701934173, "loss": 0.1419, "step": 75064 }, { "epoch": 0.1330982225732926, "grad_norm": 0.69140625, "learning_rate": 0.00040735066202657517, "loss": 0.2054, "step": 75066 }, { "epoch": 0.1331017687386024, "grad_norm": 0.462890625, "learning_rate": 0.00040731266006305435, "loss": 0.1438, "step": 75068 }, { "epoch": 0.13310531490391223, "grad_norm": 0.359375, "learning_rate": 0.0004072746611289458, "loss": 0.1837, "step": 75070 }, { "epoch": 0.13310886106922204, "grad_norm": 0.267578125, "learning_rate": 0.0004072366652244154, "loss": 0.1946, "step": 75072 }, { "epoch": 0.13311240723453185, "grad_norm": 0.357421875, "learning_rate": 0.0004071986723496296, "loss": 0.1697, "step": 75074 }, { "epoch": 0.13311595339984167, "grad_norm": 0.55859375, "learning_rate": 0.00040716068250475426, "loss": 0.1926, "step": 75076 }, { "epoch": 0.13311949956515148, "grad_norm": 0.4765625, "learning_rate": 0.0004071226956899554, "loss": 0.3478, "step": 75078 }, { "epoch": 0.1331230457304613, "grad_norm": 4.3125, "learning_rate": 0.00040708471190539986, "loss": 0.224, "step": 75080 }, { "epoch": 0.1331265918957711, "grad_norm": 1.4296875, "learning_rate": 0.000407046731151253, "loss": 0.2432, "step": 75082 }, { "epoch": 0.13313013806108093, "grad_norm": 0.34765625, "learning_rate": 0.0004070087534276813, "loss": 0.1388, "step": 75084 }, { "epoch": 0.13313368422639074, "grad_norm": 0.44140625, "learning_rate": 0.0004069707787348508, "loss": 0.2213, "step": 75086 }, { "epoch": 0.13313723039170056, "grad_norm": 0.89453125, "learning_rate": 0.00040693280707292757, "loss": 0.2016, "step": 75088 }, { "epoch": 0.13314077655701037, "grad_norm": 0.326171875, "learning_rate": 0.0004068948384420776, "loss": 0.16, "step": 75090 }, { "epoch": 0.13314432272232019, "grad_norm": 0.2099609375, "learning_rate": 0.00040685687284246674, "loss": 0.1666, "step": 75092 }, { "epoch": 0.13314786888763, "grad_norm": 0.66015625, "learning_rate": 0.00040681891027426165, "loss": 0.1778, "step": 75094 }, { "epoch": 0.13315141505293981, "grad_norm": 0.2060546875, "learning_rate": 0.00040678095073762765, "loss": 0.1725, "step": 75096 }, { "epoch": 0.13315496121824963, "grad_norm": 0.234375, "learning_rate": 0.00040674299423273176, "loss": 0.225, "step": 75098 }, { "epoch": 0.13315850738355944, "grad_norm": 0.388671875, "learning_rate": 0.0004067050407597388, "loss": 0.1696, "step": 75100 }, { "epoch": 0.13316205354886926, "grad_norm": 0.322265625, "learning_rate": 0.00040666709031881535, "loss": 0.1792, "step": 75102 }, { "epoch": 0.13316559971417907, "grad_norm": 0.5390625, "learning_rate": 0.0004066291429101274, "loss": 0.2913, "step": 75104 }, { "epoch": 0.1331691458794889, "grad_norm": 0.357421875, "learning_rate": 0.0004065911985338409, "loss": 0.2423, "step": 75106 }, { "epoch": 0.1331726920447987, "grad_norm": 0.3125, "learning_rate": 0.0004065532571901216, "loss": 0.2112, "step": 75108 }, { "epoch": 0.13317623821010852, "grad_norm": 1.03125, "learning_rate": 0.00040651531887913526, "loss": 0.3793, "step": 75110 }, { "epoch": 0.13317978437541833, "grad_norm": 0.310546875, "learning_rate": 0.0004064773836010485, "loss": 0.1647, "step": 75112 }, { "epoch": 0.13318333054072815, "grad_norm": 0.28125, "learning_rate": 0.0004064394513560268, "loss": 0.1806, "step": 75114 }, { "epoch": 0.13318687670603796, "grad_norm": 0.28515625, "learning_rate": 0.000406401522144236, "loss": 0.1535, "step": 75116 }, { "epoch": 0.13319042287134777, "grad_norm": 0.380859375, "learning_rate": 0.0004063635959658418, "loss": 0.1814, "step": 75118 }, { "epoch": 0.1331939690366576, "grad_norm": 1.375, "learning_rate": 0.0004063256728210106, "loss": 0.2139, "step": 75120 }, { "epoch": 0.1331975152019674, "grad_norm": 0.37890625, "learning_rate": 0.00040628775270990793, "loss": 0.1809, "step": 75122 }, { "epoch": 0.13320106136727722, "grad_norm": 0.380859375, "learning_rate": 0.00040624983563269967, "loss": 0.1657, "step": 75124 }, { "epoch": 0.13320460753258703, "grad_norm": 0.52734375, "learning_rate": 0.00040621192158955176, "loss": 0.2387, "step": 75126 }, { "epoch": 0.13320815369789685, "grad_norm": 0.306640625, "learning_rate": 0.0004061740105806295, "loss": 0.1923, "step": 75128 }, { "epoch": 0.13321169986320666, "grad_norm": 0.3203125, "learning_rate": 0.0004061361026060998, "loss": 0.1393, "step": 75130 }, { "epoch": 0.13321524602851648, "grad_norm": 0.5234375, "learning_rate": 0.00040609819766612715, "loss": 0.1895, "step": 75132 }, { "epoch": 0.1332187921938263, "grad_norm": 0.37890625, "learning_rate": 0.0004060602957608781, "loss": 0.1018, "step": 75134 }, { "epoch": 0.1332223383591361, "grad_norm": 0.58984375, "learning_rate": 0.00040602239689051803, "loss": 0.2303, "step": 75136 }, { "epoch": 0.13322588452444592, "grad_norm": 0.1826171875, "learning_rate": 0.00040598450105521335, "loss": 0.1728, "step": 75138 }, { "epoch": 0.13322943068975576, "grad_norm": 0.3828125, "learning_rate": 0.00040594660825512885, "loss": 0.2026, "step": 75140 }, { "epoch": 0.13323297685506558, "grad_norm": 0.21875, "learning_rate": 0.00040590871849043096, "loss": 0.1506, "step": 75142 }, { "epoch": 0.1332365230203754, "grad_norm": 0.5859375, "learning_rate": 0.00040587083176128524, "loss": 0.1805, "step": 75144 }, { "epoch": 0.1332400691856852, "grad_norm": 1.0390625, "learning_rate": 0.0004058329480678573, "loss": 0.3351, "step": 75146 }, { "epoch": 0.13324361535099502, "grad_norm": 0.412109375, "learning_rate": 0.00040579506741031276, "loss": 0.3728, "step": 75148 }, { "epoch": 0.13324716151630484, "grad_norm": 0.220703125, "learning_rate": 0.0004057571897888172, "loss": 0.1926, "step": 75150 }, { "epoch": 0.13325070768161465, "grad_norm": 0.8359375, "learning_rate": 0.0004057193152035364, "loss": 0.1455, "step": 75152 }, { "epoch": 0.13325425384692446, "grad_norm": 0.26953125, "learning_rate": 0.0004056814436546363, "loss": 0.1951, "step": 75154 }, { "epoch": 0.13325780001223428, "grad_norm": 0.328125, "learning_rate": 0.00040564357514228223, "loss": 0.2029, "step": 75156 }, { "epoch": 0.1332613461775441, "grad_norm": 0.62890625, "learning_rate": 0.0004056057096666395, "loss": 0.1315, "step": 75158 }, { "epoch": 0.1332648923428539, "grad_norm": 0.431640625, "learning_rate": 0.00040556784722787435, "loss": 0.1573, "step": 75160 }, { "epoch": 0.13326843850816372, "grad_norm": 0.53125, "learning_rate": 0.000405529987826152, "loss": 0.1625, "step": 75162 }, { "epoch": 0.13327198467347354, "grad_norm": 0.515625, "learning_rate": 0.0004054921314616381, "loss": 0.1244, "step": 75164 }, { "epoch": 0.13327553083878335, "grad_norm": 0.6953125, "learning_rate": 0.0004054542781344983, "loss": 0.1936, "step": 75166 }, { "epoch": 0.13327907700409317, "grad_norm": 0.396484375, "learning_rate": 0.0004054164278448977, "loss": 0.2253, "step": 75168 }, { "epoch": 0.13328262316940298, "grad_norm": 0.2255859375, "learning_rate": 0.00040537858059300273, "loss": 0.1451, "step": 75170 }, { "epoch": 0.1332861693347128, "grad_norm": 0.275390625, "learning_rate": 0.00040534073637897783, "loss": 0.1738, "step": 75172 }, { "epoch": 0.1332897155000226, "grad_norm": 2.515625, "learning_rate": 0.00040530289520298934, "loss": 0.2203, "step": 75174 }, { "epoch": 0.13329326166533242, "grad_norm": 0.33984375, "learning_rate": 0.0004052650570652021, "loss": 0.1789, "step": 75176 }, { "epoch": 0.13329680783064224, "grad_norm": 0.29296875, "learning_rate": 0.0004052272219657822, "loss": 0.1685, "step": 75178 }, { "epoch": 0.13330035399595205, "grad_norm": 0.51953125, "learning_rate": 0.000405189389904895, "loss": 0.162, "step": 75180 }, { "epoch": 0.13330390016126187, "grad_norm": 1.15625, "learning_rate": 0.00040515156088270576, "loss": 0.2957, "step": 75182 }, { "epoch": 0.13330744632657168, "grad_norm": 0.3203125, "learning_rate": 0.00040511373489938, "loss": 0.1345, "step": 75184 }, { "epoch": 0.1333109924918815, "grad_norm": 0.56640625, "learning_rate": 0.00040507591195508287, "loss": 0.1685, "step": 75186 }, { "epoch": 0.1333145386571913, "grad_norm": 0.455078125, "learning_rate": 0.00040503809204998047, "loss": 0.1899, "step": 75188 }, { "epoch": 0.13331808482250113, "grad_norm": 0.326171875, "learning_rate": 0.0004050002751842373, "loss": 0.1893, "step": 75190 }, { "epoch": 0.13332163098781094, "grad_norm": 0.482421875, "learning_rate": 0.0004049624613580194, "loss": 0.1593, "step": 75192 }, { "epoch": 0.13332517715312076, "grad_norm": 0.357421875, "learning_rate": 0.00040492465057149183, "loss": 0.1595, "step": 75194 }, { "epoch": 0.13332872331843057, "grad_norm": 0.494140625, "learning_rate": 0.0004048868428248206, "loss": 0.1482, "step": 75196 }, { "epoch": 0.13333226948374038, "grad_norm": 0.5546875, "learning_rate": 0.00040484903811817, "loss": 0.1604, "step": 75198 }, { "epoch": 0.1333358156490502, "grad_norm": 0.310546875, "learning_rate": 0.0004048112364517061, "loss": 0.1638, "step": 75200 }, { "epoch": 0.13333936181436, "grad_norm": 0.44921875, "learning_rate": 0.0004047734378255942, "loss": 0.1823, "step": 75202 }, { "epoch": 0.13334290797966983, "grad_norm": 0.279296875, "learning_rate": 0.0004047356422399994, "loss": 0.1704, "step": 75204 }, { "epoch": 0.13334645414497964, "grad_norm": 0.314453125, "learning_rate": 0.000404697849695087, "loss": 0.1751, "step": 75206 }, { "epoch": 0.13335000031028946, "grad_norm": 0.2158203125, "learning_rate": 0.000404660060191022, "loss": 0.166, "step": 75208 }, { "epoch": 0.13335354647559927, "grad_norm": 0.69921875, "learning_rate": 0.00040462227372797046, "loss": 0.1654, "step": 75210 }, { "epoch": 0.1333570926409091, "grad_norm": 0.48828125, "learning_rate": 0.00040458449030609703, "loss": 0.1696, "step": 75212 }, { "epoch": 0.1333606388062189, "grad_norm": 0.294921875, "learning_rate": 0.0004045467099255671, "loss": 0.1714, "step": 75214 }, { "epoch": 0.13336418497152872, "grad_norm": 1.6796875, "learning_rate": 0.0004045089325865455, "loss": 0.19, "step": 75216 }, { "epoch": 0.13336773113683853, "grad_norm": 1.25, "learning_rate": 0.0004044711582891983, "loss": 0.1995, "step": 75218 }, { "epoch": 0.13337127730214834, "grad_norm": 0.275390625, "learning_rate": 0.00040443338703369014, "loss": 0.1282, "step": 75220 }, { "epoch": 0.13337482346745816, "grad_norm": 0.5859375, "learning_rate": 0.0004043956188201864, "loss": 0.1736, "step": 75222 }, { "epoch": 0.13337836963276797, "grad_norm": 0.515625, "learning_rate": 0.00040435785364885217, "loss": 0.1785, "step": 75224 }, { "epoch": 0.1333819157980778, "grad_norm": 8.8125, "learning_rate": 0.0004043200915198523, "loss": 0.1862, "step": 75226 }, { "epoch": 0.1333854619633876, "grad_norm": 0.490234375, "learning_rate": 0.00040428233243335277, "loss": 0.1701, "step": 75228 }, { "epoch": 0.13338900812869744, "grad_norm": 0.32421875, "learning_rate": 0.0004042445763895176, "loss": 0.1533, "step": 75230 }, { "epoch": 0.13339255429400726, "grad_norm": 0.228515625, "learning_rate": 0.000404206823388513, "loss": 0.1565, "step": 75232 }, { "epoch": 0.13339610045931707, "grad_norm": 0.5625, "learning_rate": 0.0004041690734305031, "loss": 0.3049, "step": 75234 }, { "epoch": 0.1333996466246269, "grad_norm": 0.2578125, "learning_rate": 0.0004041313265156541, "loss": 0.2097, "step": 75236 }, { "epoch": 0.1334031927899367, "grad_norm": 0.2734375, "learning_rate": 0.0004040935826441299, "loss": 0.1477, "step": 75238 }, { "epoch": 0.13340673895524652, "grad_norm": 0.85546875, "learning_rate": 0.00040405584181609644, "loss": 0.2486, "step": 75240 }, { "epoch": 0.13341028512055633, "grad_norm": 0.2392578125, "learning_rate": 0.0004040181040317185, "loss": 0.139, "step": 75242 }, { "epoch": 0.13341383128586615, "grad_norm": 0.5, "learning_rate": 0.00040398036929116104, "loss": 0.1609, "step": 75244 }, { "epoch": 0.13341737745117596, "grad_norm": 0.2080078125, "learning_rate": 0.0004039426375945892, "loss": 0.1785, "step": 75246 }, { "epoch": 0.13342092361648578, "grad_norm": 1.1015625, "learning_rate": 0.00040390490894216764, "loss": 0.2654, "step": 75248 }, { "epoch": 0.1334244697817956, "grad_norm": 0.4765625, "learning_rate": 0.00040386718333406204, "loss": 0.1376, "step": 75250 }, { "epoch": 0.1334280159471054, "grad_norm": 0.4921875, "learning_rate": 0.0004038294607704368, "loss": 0.1679, "step": 75252 }, { "epoch": 0.13343156211241522, "grad_norm": 0.3515625, "learning_rate": 0.00040379174125145727, "loss": 0.1898, "step": 75254 }, { "epoch": 0.13343510827772503, "grad_norm": 0.45703125, "learning_rate": 0.00040375402477728806, "loss": 0.1448, "step": 75256 }, { "epoch": 0.13343865444303485, "grad_norm": 0.7734375, "learning_rate": 0.00040371631134809437, "loss": 0.2687, "step": 75258 }, { "epoch": 0.13344220060834466, "grad_norm": 0.318359375, "learning_rate": 0.0004036786009640413, "loss": 0.1784, "step": 75260 }, { "epoch": 0.13344574677365448, "grad_norm": 0.8359375, "learning_rate": 0.0004036408936252934, "loss": 0.1552, "step": 75262 }, { "epoch": 0.1334492929389643, "grad_norm": 0.4765625, "learning_rate": 0.00040360318933201577, "loss": 0.1436, "step": 75264 }, { "epoch": 0.1334528391042741, "grad_norm": 0.248046875, "learning_rate": 0.0004035654880843731, "loss": 0.1419, "step": 75266 }, { "epoch": 0.13345638526958392, "grad_norm": 0.2294921875, "learning_rate": 0.00040352778988253057, "loss": 0.1674, "step": 75268 }, { "epoch": 0.13345993143489374, "grad_norm": 3.203125, "learning_rate": 0.0004034900947266531, "loss": 0.2834, "step": 75270 }, { "epoch": 0.13346347760020355, "grad_norm": 0.57421875, "learning_rate": 0.0004034524026169053, "loss": 0.4096, "step": 75272 }, { "epoch": 0.13346702376551337, "grad_norm": 0.439453125, "learning_rate": 0.00040341471355345205, "loss": 0.203, "step": 75274 }, { "epoch": 0.13347056993082318, "grad_norm": 2.6875, "learning_rate": 0.000403377027536458, "loss": 0.3544, "step": 75276 }, { "epoch": 0.133474116096133, "grad_norm": 3.03125, "learning_rate": 0.00040333934456608867, "loss": 0.2269, "step": 75278 }, { "epoch": 0.1334776622614428, "grad_norm": 1.171875, "learning_rate": 0.00040330166464250787, "loss": 0.2272, "step": 75280 }, { "epoch": 0.13348120842675262, "grad_norm": 0.5390625, "learning_rate": 0.00040326398776588114, "loss": 0.2992, "step": 75282 }, { "epoch": 0.13348475459206244, "grad_norm": 0.2890625, "learning_rate": 0.0004032263139363728, "loss": 0.1599, "step": 75284 }, { "epoch": 0.13348830075737225, "grad_norm": 0.9140625, "learning_rate": 0.0004031886431541483, "loss": 0.1597, "step": 75286 }, { "epoch": 0.13349184692268207, "grad_norm": 1.703125, "learning_rate": 0.00040315097541937143, "loss": 0.1783, "step": 75288 }, { "epoch": 0.13349539308799188, "grad_norm": 0.62109375, "learning_rate": 0.0004031133107322077, "loss": 0.1795, "step": 75290 }, { "epoch": 0.1334989392533017, "grad_norm": 1.296875, "learning_rate": 0.00040307564909282146, "loss": 0.2719, "step": 75292 }, { "epoch": 0.1335024854186115, "grad_norm": 0.74609375, "learning_rate": 0.00040303799050137756, "loss": 0.1659, "step": 75294 }, { "epoch": 0.13350603158392133, "grad_norm": 0.322265625, "learning_rate": 0.00040300033495804067, "loss": 0.171, "step": 75296 }, { "epoch": 0.13350957774923114, "grad_norm": 0.984375, "learning_rate": 0.00040296268246297517, "loss": 0.1819, "step": 75298 }, { "epoch": 0.13351312391454095, "grad_norm": 0.20703125, "learning_rate": 0.00040292503301634627, "loss": 0.1426, "step": 75300 }, { "epoch": 0.13351667007985077, "grad_norm": 0.6640625, "learning_rate": 0.00040288738661831835, "loss": 0.2933, "step": 75302 }, { "epoch": 0.13352021624516058, "grad_norm": 0.546875, "learning_rate": 0.0004028497432690561, "loss": 0.1583, "step": 75304 }, { "epoch": 0.1335237624104704, "grad_norm": 0.55859375, "learning_rate": 0.0004028121029687239, "loss": 0.1974, "step": 75306 }, { "epoch": 0.1335273085757802, "grad_norm": 0.25390625, "learning_rate": 0.00040277446571748677, "loss": 0.1458, "step": 75308 }, { "epoch": 0.13353085474109003, "grad_norm": 0.34765625, "learning_rate": 0.0004027368315155091, "loss": 0.1638, "step": 75310 }, { "epoch": 0.13353440090639984, "grad_norm": 0.279296875, "learning_rate": 0.00040269920036295557, "loss": 0.1533, "step": 75312 }, { "epoch": 0.13353794707170966, "grad_norm": 0.2470703125, "learning_rate": 0.00040266157225999076, "loss": 0.1529, "step": 75314 }, { "epoch": 0.13354149323701947, "grad_norm": 0.2578125, "learning_rate": 0.00040262394720677884, "loss": 0.138, "step": 75316 }, { "epoch": 0.13354503940232929, "grad_norm": 1.0078125, "learning_rate": 0.0004025863252034852, "loss": 0.1795, "step": 75318 }, { "epoch": 0.13354858556763913, "grad_norm": 1.7265625, "learning_rate": 0.0004025487062502734, "loss": 0.1765, "step": 75320 }, { "epoch": 0.13355213173294894, "grad_norm": 1.171875, "learning_rate": 0.0004025110903473086, "loss": 0.2106, "step": 75322 }, { "epoch": 0.13355567789825876, "grad_norm": 0.4453125, "learning_rate": 0.0004024734774947549, "loss": 0.1837, "step": 75324 }, { "epoch": 0.13355922406356857, "grad_norm": 0.5703125, "learning_rate": 0.00040243586769277763, "loss": 0.191, "step": 75326 }, { "epoch": 0.13356277022887839, "grad_norm": 0.45703125, "learning_rate": 0.00040239826094154007, "loss": 0.2691, "step": 75328 }, { "epoch": 0.1335663163941882, "grad_norm": 0.369140625, "learning_rate": 0.00040236065724120763, "loss": 0.1668, "step": 75330 }, { "epoch": 0.13356986255949801, "grad_norm": 0.67578125, "learning_rate": 0.00040232305659194446, "loss": 0.2061, "step": 75332 }, { "epoch": 0.13357340872480783, "grad_norm": 1.1796875, "learning_rate": 0.00040228545899391487, "loss": 0.1884, "step": 75334 }, { "epoch": 0.13357695489011764, "grad_norm": 0.6796875, "learning_rate": 0.0004022478644472835, "loss": 0.1891, "step": 75336 }, { "epoch": 0.13358050105542746, "grad_norm": 0.7421875, "learning_rate": 0.00040221027295221445, "loss": 0.1884, "step": 75338 }, { "epoch": 0.13358404722073727, "grad_norm": 0.67578125, "learning_rate": 0.00040217268450887245, "loss": 0.1896, "step": 75340 }, { "epoch": 0.1335875933860471, "grad_norm": 0.26953125, "learning_rate": 0.0004021350991174217, "loss": 0.2038, "step": 75342 }, { "epoch": 0.1335911395513569, "grad_norm": 0.44921875, "learning_rate": 0.000402097516778027, "loss": 0.2006, "step": 75344 }, { "epoch": 0.13359468571666672, "grad_norm": 0.20703125, "learning_rate": 0.00040205993749085195, "loss": 0.1363, "step": 75346 }, { "epoch": 0.13359823188197653, "grad_norm": 0.275390625, "learning_rate": 0.0004020223612560615, "loss": 0.2027, "step": 75348 }, { "epoch": 0.13360177804728635, "grad_norm": 1.71875, "learning_rate": 0.0004019847880738199, "loss": 0.2838, "step": 75350 }, { "epoch": 0.13360532421259616, "grad_norm": 0.2294921875, "learning_rate": 0.0004019472179442914, "loss": 0.153, "step": 75352 }, { "epoch": 0.13360887037790597, "grad_norm": 0.392578125, "learning_rate": 0.0004019096508676402, "loss": 0.1901, "step": 75354 }, { "epoch": 0.1336124165432158, "grad_norm": 0.87890625, "learning_rate": 0.00040187208684403056, "loss": 0.187, "step": 75356 }, { "epoch": 0.1336159627085256, "grad_norm": 0.95703125, "learning_rate": 0.00040183452587362714, "loss": 0.1429, "step": 75358 }, { "epoch": 0.13361950887383542, "grad_norm": 0.306640625, "learning_rate": 0.0004017969679565938, "loss": 0.2491, "step": 75360 }, { "epoch": 0.13362305503914523, "grad_norm": 0.337890625, "learning_rate": 0.00040175941309309506, "loss": 0.1522, "step": 75362 }, { "epoch": 0.13362660120445505, "grad_norm": 0.62890625, "learning_rate": 0.0004017218612832948, "loss": 0.1444, "step": 75364 }, { "epoch": 0.13363014736976486, "grad_norm": 0.15625, "learning_rate": 0.0004016843125273577, "loss": 0.1298, "step": 75366 }, { "epoch": 0.13363369353507468, "grad_norm": 0.2080078125, "learning_rate": 0.0004016467668254479, "loss": 0.1638, "step": 75368 }, { "epoch": 0.1336372397003845, "grad_norm": 0.5703125, "learning_rate": 0.00040160922417772937, "loss": 0.1433, "step": 75370 }, { "epoch": 0.1336407858656943, "grad_norm": 0.2275390625, "learning_rate": 0.00040157168458436655, "loss": 0.1288, "step": 75372 }, { "epoch": 0.13364433203100412, "grad_norm": 0.353515625, "learning_rate": 0.00040153414804552316, "loss": 0.189, "step": 75374 }, { "epoch": 0.13364787819631394, "grad_norm": 0.5, "learning_rate": 0.0004014966145613641, "loss": 0.1562, "step": 75376 }, { "epoch": 0.13365142436162375, "grad_norm": 0.7890625, "learning_rate": 0.0004014590841320528, "loss": 0.2206, "step": 75378 }, { "epoch": 0.13365497052693356, "grad_norm": 0.52734375, "learning_rate": 0.00040142155675775384, "loss": 0.145, "step": 75380 }, { "epoch": 0.13365851669224338, "grad_norm": 0.37109375, "learning_rate": 0.00040138403243863096, "loss": 0.215, "step": 75382 }, { "epoch": 0.1336620628575532, "grad_norm": 0.458984375, "learning_rate": 0.0004013465111748491, "loss": 0.1897, "step": 75384 }, { "epoch": 0.133665609022863, "grad_norm": 0.50390625, "learning_rate": 0.0004013089929665712, "loss": 0.1701, "step": 75386 }, { "epoch": 0.13366915518817282, "grad_norm": 0.296875, "learning_rate": 0.0004012714778139621, "loss": 0.1948, "step": 75388 }, { "epoch": 0.13367270135348264, "grad_norm": 3.140625, "learning_rate": 0.0004012339657171858, "loss": 0.1944, "step": 75390 }, { "epoch": 0.13367624751879245, "grad_norm": 0.33203125, "learning_rate": 0.00040119645667640626, "loss": 0.1494, "step": 75392 }, { "epoch": 0.13367979368410227, "grad_norm": 0.53125, "learning_rate": 0.00040115895069178743, "loss": 0.2255, "step": 75394 }, { "epoch": 0.13368333984941208, "grad_norm": 2.71875, "learning_rate": 0.0004011214477634931, "loss": 0.3517, "step": 75396 }, { "epoch": 0.1336868860147219, "grad_norm": 0.419921875, "learning_rate": 0.00040108394789168793, "loss": 0.2021, "step": 75398 }, { "epoch": 0.1336904321800317, "grad_norm": 0.50390625, "learning_rate": 0.0004010464510765357, "loss": 0.1813, "step": 75400 }, { "epoch": 0.13369397834534152, "grad_norm": 0.419921875, "learning_rate": 0.00040100895731820014, "loss": 0.1914, "step": 75402 }, { "epoch": 0.13369752451065134, "grad_norm": 0.50390625, "learning_rate": 0.0004009714666168451, "loss": 0.2019, "step": 75404 }, { "epoch": 0.13370107067596115, "grad_norm": 0.435546875, "learning_rate": 0.0004009339789726352, "loss": 0.3123, "step": 75406 }, { "epoch": 0.13370461684127097, "grad_norm": 18.25, "learning_rate": 0.00040089649438573404, "loss": 0.3501, "step": 75408 }, { "epoch": 0.13370816300658078, "grad_norm": 0.373046875, "learning_rate": 0.0004008590128563053, "loss": 0.156, "step": 75410 }, { "epoch": 0.13371170917189062, "grad_norm": 1.015625, "learning_rate": 0.0004008215343845133, "loss": 0.1676, "step": 75412 }, { "epoch": 0.13371525533720044, "grad_norm": 0.216796875, "learning_rate": 0.0004007840589705216, "loss": 0.1831, "step": 75414 }, { "epoch": 0.13371880150251025, "grad_norm": 0.384765625, "learning_rate": 0.0004007465866144947, "loss": 0.1509, "step": 75416 }, { "epoch": 0.13372234766782007, "grad_norm": 0.26953125, "learning_rate": 0.0004007091173165955, "loss": 0.1846, "step": 75418 }, { "epoch": 0.13372589383312988, "grad_norm": 0.52734375, "learning_rate": 0.0004006716510769888, "loss": 0.1589, "step": 75420 }, { "epoch": 0.1337294399984397, "grad_norm": 0.66796875, "learning_rate": 0.00040063418789583764, "loss": 0.1403, "step": 75422 }, { "epoch": 0.1337329861637495, "grad_norm": 0.6875, "learning_rate": 0.0004005967277733068, "loss": 0.1712, "step": 75424 }, { "epoch": 0.13373653232905933, "grad_norm": 0.68359375, "learning_rate": 0.00040055927070955925, "loss": 0.191, "step": 75426 }, { "epoch": 0.13374007849436914, "grad_norm": 0.40234375, "learning_rate": 0.00040052181670475947, "loss": 0.2123, "step": 75428 }, { "epoch": 0.13374362465967896, "grad_norm": 0.416015625, "learning_rate": 0.0004004843657590708, "loss": 0.1515, "step": 75430 }, { "epoch": 0.13374717082498877, "grad_norm": 0.384765625, "learning_rate": 0.000400446917872657, "loss": 0.2049, "step": 75432 }, { "epoch": 0.13375071699029858, "grad_norm": 1.234375, "learning_rate": 0.00040040947304568255, "loss": 0.1512, "step": 75434 }, { "epoch": 0.1337542631556084, "grad_norm": 0.177734375, "learning_rate": 0.00040037203127831026, "loss": 0.1848, "step": 75436 }, { "epoch": 0.13375780932091821, "grad_norm": 0.2578125, "learning_rate": 0.0004003345925707044, "loss": 0.1927, "step": 75438 }, { "epoch": 0.13376135548622803, "grad_norm": 0.173828125, "learning_rate": 0.0004002971569230287, "loss": 0.4195, "step": 75440 }, { "epoch": 0.13376490165153784, "grad_norm": 4.15625, "learning_rate": 0.00040025972433544685, "loss": 0.1992, "step": 75442 }, { "epoch": 0.13376844781684766, "grad_norm": 0.57421875, "learning_rate": 0.0004002222948081224, "loss": 0.1738, "step": 75444 }, { "epoch": 0.13377199398215747, "grad_norm": 0.416015625, "learning_rate": 0.0004001848683412189, "loss": 0.305, "step": 75446 }, { "epoch": 0.1337755401474673, "grad_norm": 0.4140625, "learning_rate": 0.0004001474449349007, "loss": 0.159, "step": 75448 }, { "epoch": 0.1337790863127771, "grad_norm": 0.298828125, "learning_rate": 0.00040011002458933086, "loss": 0.1768, "step": 75450 }, { "epoch": 0.13378263247808692, "grad_norm": 0.4453125, "learning_rate": 0.0004000726073046733, "loss": 0.1799, "step": 75452 }, { "epoch": 0.13378617864339673, "grad_norm": 0.2265625, "learning_rate": 0.0004000351930810915, "loss": 0.2416, "step": 75454 }, { "epoch": 0.13378972480870654, "grad_norm": 0.255859375, "learning_rate": 0.00039999778191874923, "loss": 0.1458, "step": 75456 }, { "epoch": 0.13379327097401636, "grad_norm": 0.490234375, "learning_rate": 0.00039996037381781, "loss": 0.2073, "step": 75458 }, { "epoch": 0.13379681713932617, "grad_norm": 0.2236328125, "learning_rate": 0.0003999229687784377, "loss": 0.1535, "step": 75460 }, { "epoch": 0.133800363304636, "grad_norm": 1.546875, "learning_rate": 0.0003998855668007955, "loss": 0.2389, "step": 75462 }, { "epoch": 0.1338039094699458, "grad_norm": 1.0859375, "learning_rate": 0.00039984816788504697, "loss": 0.5251, "step": 75464 }, { "epoch": 0.13380745563525562, "grad_norm": 0.265625, "learning_rate": 0.0003998107720313563, "loss": 0.1717, "step": 75466 }, { "epoch": 0.13381100180056543, "grad_norm": 0.69921875, "learning_rate": 0.0003997733792398861, "loss": 0.1369, "step": 75468 }, { "epoch": 0.13381454796587525, "grad_norm": 0.322265625, "learning_rate": 0.0003997359895108008, "loss": 0.1663, "step": 75470 }, { "epoch": 0.13381809413118506, "grad_norm": 0.453125, "learning_rate": 0.0003996986028442632, "loss": 0.1739, "step": 75472 }, { "epoch": 0.13382164029649488, "grad_norm": 0.27734375, "learning_rate": 0.00039966121924043754, "loss": 0.1822, "step": 75474 }, { "epoch": 0.1338251864618047, "grad_norm": 6.5625, "learning_rate": 0.00039962383869948637, "loss": 0.3419, "step": 75476 }, { "epoch": 0.1338287326271145, "grad_norm": 1.640625, "learning_rate": 0.00039958646122157407, "loss": 0.2953, "step": 75478 }, { "epoch": 0.13383227879242432, "grad_norm": 5.25, "learning_rate": 0.00039954908680686364, "loss": 0.349, "step": 75480 }, { "epoch": 0.13383582495773413, "grad_norm": 0.24609375, "learning_rate": 0.00039951171545551874, "loss": 0.2452, "step": 75482 }, { "epoch": 0.13383937112304395, "grad_norm": 0.2080078125, "learning_rate": 0.00039947434716770266, "loss": 0.1954, "step": 75484 }, { "epoch": 0.13384291728835376, "grad_norm": 0.64453125, "learning_rate": 0.00039943698194357867, "loss": 0.1356, "step": 75486 }, { "epoch": 0.13384646345366358, "grad_norm": 0.64453125, "learning_rate": 0.00039939961978331055, "loss": 0.2274, "step": 75488 }, { "epoch": 0.1338500096189734, "grad_norm": 0.33984375, "learning_rate": 0.0003993622606870616, "loss": 0.1751, "step": 75490 }, { "epoch": 0.1338535557842832, "grad_norm": 0.3984375, "learning_rate": 0.0003993249046549951, "loss": 0.2172, "step": 75492 }, { "epoch": 0.13385710194959302, "grad_norm": 0.2353515625, "learning_rate": 0.0003992875516872742, "loss": 0.1426, "step": 75494 }, { "epoch": 0.13386064811490284, "grad_norm": 0.5234375, "learning_rate": 0.00039925020178406285, "loss": 0.1761, "step": 75496 }, { "epoch": 0.13386419428021265, "grad_norm": 0.478515625, "learning_rate": 0.00039921285494552407, "loss": 0.1357, "step": 75498 }, { "epoch": 0.13386774044552247, "grad_norm": 0.171875, "learning_rate": 0.00039917551117182106, "loss": 0.1633, "step": 75500 }, { "epoch": 0.1338712866108323, "grad_norm": 0.27734375, "learning_rate": 0.0003991381704631174, "loss": 0.1621, "step": 75502 }, { "epoch": 0.13387483277614212, "grad_norm": 0.2236328125, "learning_rate": 0.000399100832819576, "loss": 0.178, "step": 75504 }, { "epoch": 0.13387837894145194, "grad_norm": 0.328125, "learning_rate": 0.0003990634982413609, "loss": 0.1572, "step": 75506 }, { "epoch": 0.13388192510676175, "grad_norm": 0.390625, "learning_rate": 0.00039902616672863445, "loss": 0.2336, "step": 75508 }, { "epoch": 0.13388547127207157, "grad_norm": 9.625, "learning_rate": 0.0003989888382815606, "loss": 0.2588, "step": 75510 }, { "epoch": 0.13388901743738138, "grad_norm": 0.349609375, "learning_rate": 0.0003989515129003022, "loss": 0.1618, "step": 75512 }, { "epoch": 0.1338925636026912, "grad_norm": 3.921875, "learning_rate": 0.00039891419058502275, "loss": 0.2099, "step": 75514 }, { "epoch": 0.133896109768001, "grad_norm": 0.85546875, "learning_rate": 0.00039887687133588544, "loss": 0.2894, "step": 75516 }, { "epoch": 0.13389965593331082, "grad_norm": 0.77734375, "learning_rate": 0.0003988395551530535, "loss": 0.1957, "step": 75518 }, { "epoch": 0.13390320209862064, "grad_norm": 0.4765625, "learning_rate": 0.00039880224203669003, "loss": 0.2132, "step": 75520 }, { "epoch": 0.13390674826393045, "grad_norm": 0.8984375, "learning_rate": 0.0003987649319869581, "loss": 0.1506, "step": 75522 }, { "epoch": 0.13391029442924027, "grad_norm": 0.1376953125, "learning_rate": 0.0003987276250040214, "loss": 0.1522, "step": 75524 }, { "epoch": 0.13391384059455008, "grad_norm": 0.94140625, "learning_rate": 0.00039869032108804227, "loss": 0.2106, "step": 75526 }, { "epoch": 0.1339173867598599, "grad_norm": 8.5, "learning_rate": 0.0003986530202391846, "loss": 0.4411, "step": 75528 }, { "epoch": 0.1339209329251697, "grad_norm": 0.447265625, "learning_rate": 0.0003986157224576109, "loss": 0.2023, "step": 75530 }, { "epoch": 0.13392447909047953, "grad_norm": 1.1875, "learning_rate": 0.0003985784277434851, "loss": 0.1641, "step": 75532 }, { "epoch": 0.13392802525578934, "grad_norm": 0.44921875, "learning_rate": 0.0003985411360969694, "loss": 0.1902, "step": 75534 }, { "epoch": 0.13393157142109915, "grad_norm": 0.65234375, "learning_rate": 0.0003985038475182276, "loss": 0.1599, "step": 75536 }, { "epoch": 0.13393511758640897, "grad_norm": 1.1171875, "learning_rate": 0.00039846656200742236, "loss": 0.186, "step": 75538 }, { "epoch": 0.13393866375171878, "grad_norm": 0.490234375, "learning_rate": 0.000398429279564717, "loss": 0.1781, "step": 75540 }, { "epoch": 0.1339422099170286, "grad_norm": 0.73828125, "learning_rate": 0.00039839200019027435, "loss": 0.1821, "step": 75542 }, { "epoch": 0.1339457560823384, "grad_norm": 0.45703125, "learning_rate": 0.00039835472388425735, "loss": 0.2114, "step": 75544 }, { "epoch": 0.13394930224764823, "grad_norm": 0.7578125, "learning_rate": 0.00039831745064682956, "loss": 0.2284, "step": 75546 }, { "epoch": 0.13395284841295804, "grad_norm": 0.73828125, "learning_rate": 0.00039828018047815346, "loss": 0.1633, "step": 75548 }, { "epoch": 0.13395639457826786, "grad_norm": 0.294921875, "learning_rate": 0.00039824291337839244, "loss": 0.1629, "step": 75550 }, { "epoch": 0.13395994074357767, "grad_norm": 0.76171875, "learning_rate": 0.00039820564934770885, "loss": 0.2148, "step": 75552 }, { "epoch": 0.13396348690888749, "grad_norm": 0.3359375, "learning_rate": 0.00039816838838626647, "loss": 0.1574, "step": 75554 }, { "epoch": 0.1339670330741973, "grad_norm": 0.9765625, "learning_rate": 0.00039813113049422786, "loss": 0.2148, "step": 75556 }, { "epoch": 0.13397057923950711, "grad_norm": 0.4921875, "learning_rate": 0.00039809387567175605, "loss": 0.1989, "step": 75558 }, { "epoch": 0.13397412540481693, "grad_norm": 1.0625, "learning_rate": 0.0003980566239190138, "loss": 0.2673, "step": 75560 }, { "epoch": 0.13397767157012674, "grad_norm": 0.439453125, "learning_rate": 0.0003980193752361639, "loss": 0.2123, "step": 75562 }, { "epoch": 0.13398121773543656, "grad_norm": 0.39453125, "learning_rate": 0.0003979821296233701, "loss": 0.1796, "step": 75564 }, { "epoch": 0.13398476390074637, "grad_norm": 0.291015625, "learning_rate": 0.00039794488708079414, "loss": 0.1682, "step": 75566 }, { "epoch": 0.1339883100660562, "grad_norm": 0.283203125, "learning_rate": 0.00039790764760859964, "loss": 0.2108, "step": 75568 }, { "epoch": 0.133991856231366, "grad_norm": 0.4453125, "learning_rate": 0.000397870411206949, "loss": 0.1689, "step": 75570 }, { "epoch": 0.13399540239667582, "grad_norm": 0.6484375, "learning_rate": 0.0003978331778760059, "loss": 0.3439, "step": 75572 }, { "epoch": 0.13399894856198563, "grad_norm": 0.32421875, "learning_rate": 0.00039779594761593217, "loss": 0.1595, "step": 75574 }, { "epoch": 0.13400249472729545, "grad_norm": 1.0546875, "learning_rate": 0.0003977587204268913, "loss": 0.2112, "step": 75576 }, { "epoch": 0.13400604089260526, "grad_norm": 0.328125, "learning_rate": 0.00039772149630904586, "loss": 0.1729, "step": 75578 }, { "epoch": 0.13400958705791507, "grad_norm": 1.6484375, "learning_rate": 0.00039768427526255855, "loss": 0.577, "step": 75580 }, { "epoch": 0.1340131332232249, "grad_norm": 0.39453125, "learning_rate": 0.0003976470572875923, "loss": 0.1767, "step": 75582 }, { "epoch": 0.1340166793885347, "grad_norm": 0.333984375, "learning_rate": 0.00039760984238430976, "loss": 0.176, "step": 75584 }, { "epoch": 0.13402022555384452, "grad_norm": 0.31640625, "learning_rate": 0.0003975726305528739, "loss": 0.1631, "step": 75586 }, { "epoch": 0.13402377171915433, "grad_norm": 0.263671875, "learning_rate": 0.0003975354217934473, "loss": 0.1629, "step": 75588 }, { "epoch": 0.13402731788446415, "grad_norm": 0.44921875, "learning_rate": 0.00039749821610619276, "loss": 0.1697, "step": 75590 }, { "epoch": 0.134030864049774, "grad_norm": 0.91796875, "learning_rate": 0.0003974610134912727, "loss": 0.1731, "step": 75592 }, { "epoch": 0.1340344102150838, "grad_norm": 0.5234375, "learning_rate": 0.0003974238139488504, "loss": 0.2903, "step": 75594 }, { "epoch": 0.13403795638039362, "grad_norm": 0.51953125, "learning_rate": 0.0003973866174790882, "loss": 0.1726, "step": 75596 }, { "epoch": 0.13404150254570343, "grad_norm": 0.404296875, "learning_rate": 0.0003973494240821488, "loss": 0.184, "step": 75598 }, { "epoch": 0.13404504871101325, "grad_norm": 1.046875, "learning_rate": 0.0003973122337581949, "loss": 0.188, "step": 75600 }, { "epoch": 0.13404859487632306, "grad_norm": 0.25390625, "learning_rate": 0.00039727504650738875, "loss": 0.1608, "step": 75602 }, { "epoch": 0.13405214104163288, "grad_norm": 0.388671875, "learning_rate": 0.0003972378623298938, "loss": 0.2134, "step": 75604 }, { "epoch": 0.1340556872069427, "grad_norm": 3.71875, "learning_rate": 0.00039720068122587205, "loss": 0.3995, "step": 75606 }, { "epoch": 0.1340592333722525, "grad_norm": 0.87890625, "learning_rate": 0.0003971635031954864, "loss": 0.1509, "step": 75608 }, { "epoch": 0.13406277953756232, "grad_norm": 0.57421875, "learning_rate": 0.00039712632823889904, "loss": 0.1442, "step": 75610 }, { "epoch": 0.13406632570287214, "grad_norm": 0.353515625, "learning_rate": 0.0003970891563562731, "loss": 0.1498, "step": 75612 }, { "epoch": 0.13406987186818195, "grad_norm": 1.46875, "learning_rate": 0.0003970519875477708, "loss": 0.181, "step": 75614 }, { "epoch": 0.13407341803349176, "grad_norm": 1.46875, "learning_rate": 0.000397014821813555, "loss": 0.4134, "step": 75616 }, { "epoch": 0.13407696419880158, "grad_norm": 0.89453125, "learning_rate": 0.00039697765915378783, "loss": 0.3521, "step": 75618 }, { "epoch": 0.1340805103641114, "grad_norm": 0.8671875, "learning_rate": 0.00039694049956863183, "loss": 0.2148, "step": 75620 }, { "epoch": 0.1340840565294212, "grad_norm": 1.6796875, "learning_rate": 0.0003969033430582502, "loss": 0.3247, "step": 75622 }, { "epoch": 0.13408760269473102, "grad_norm": 0.3359375, "learning_rate": 0.0003968661896228044, "loss": 0.1933, "step": 75624 }, { "epoch": 0.13409114886004084, "grad_norm": 0.23828125, "learning_rate": 0.00039682903926245783, "loss": 0.175, "step": 75626 }, { "epoch": 0.13409469502535065, "grad_norm": 0.224609375, "learning_rate": 0.0003967918919773724, "loss": 0.1701, "step": 75628 }, { "epoch": 0.13409824119066047, "grad_norm": 1.203125, "learning_rate": 0.00039675474776771095, "loss": 0.2712, "step": 75630 }, { "epoch": 0.13410178735597028, "grad_norm": 0.2373046875, "learning_rate": 0.00039671760663363573, "loss": 0.1521, "step": 75632 }, { "epoch": 0.1341053335212801, "grad_norm": 0.6875, "learning_rate": 0.0003966804685753088, "loss": 0.2069, "step": 75634 }, { "epoch": 0.1341088796865899, "grad_norm": 0.6640625, "learning_rate": 0.0003966433335928933, "loss": 0.4274, "step": 75636 }, { "epoch": 0.13411242585189972, "grad_norm": 0.1640625, "learning_rate": 0.0003966062016865513, "loss": 0.1555, "step": 75638 }, { "epoch": 0.13411597201720954, "grad_norm": 0.224609375, "learning_rate": 0.00039656907285644515, "loss": 0.1785, "step": 75640 }, { "epoch": 0.13411951818251935, "grad_norm": 0.6171875, "learning_rate": 0.000396531947102737, "loss": 0.2437, "step": 75642 }, { "epoch": 0.13412306434782917, "grad_norm": 0.3515625, "learning_rate": 0.0003964948244255898, "loss": 0.1824, "step": 75644 }, { "epoch": 0.13412661051313898, "grad_norm": 0.5703125, "learning_rate": 0.00039645770482516564, "loss": 0.2082, "step": 75646 }, { "epoch": 0.1341301566784488, "grad_norm": 0.390625, "learning_rate": 0.00039642058830162674, "loss": 0.1837, "step": 75648 }, { "epoch": 0.1341337028437586, "grad_norm": 0.21484375, "learning_rate": 0.0003963834748551355, "loss": 0.1502, "step": 75650 }, { "epoch": 0.13413724900906843, "grad_norm": 0.337890625, "learning_rate": 0.00039634636448585404, "loss": 0.1363, "step": 75652 }, { "epoch": 0.13414079517437824, "grad_norm": 0.30078125, "learning_rate": 0.0003963092571939453, "loss": 0.1496, "step": 75654 }, { "epoch": 0.13414434133968806, "grad_norm": 0.609375, "learning_rate": 0.0003962721529795706, "loss": 0.1921, "step": 75656 }, { "epoch": 0.13414788750499787, "grad_norm": 1.109375, "learning_rate": 0.000396235051842893, "loss": 0.1717, "step": 75658 }, { "epoch": 0.13415143367030768, "grad_norm": 0.30078125, "learning_rate": 0.00039619795378407433, "loss": 0.1505, "step": 75660 }, { "epoch": 0.1341549798356175, "grad_norm": 1.5, "learning_rate": 0.00039616085880327736, "loss": 0.1325, "step": 75662 }, { "epoch": 0.13415852600092731, "grad_norm": 0.30078125, "learning_rate": 0.00039612376690066343, "loss": 0.1277, "step": 75664 }, { "epoch": 0.13416207216623713, "grad_norm": 0.76953125, "learning_rate": 0.0003960866780763956, "loss": 0.1681, "step": 75666 }, { "epoch": 0.13416561833154694, "grad_norm": 0.396484375, "learning_rate": 0.00039604959233063586, "loss": 0.1558, "step": 75668 }, { "epoch": 0.13416916449685676, "grad_norm": 1.390625, "learning_rate": 0.0003960125096635461, "loss": 0.2943, "step": 75670 }, { "epoch": 0.13417271066216657, "grad_norm": 1.09375, "learning_rate": 0.00039597543007528893, "loss": 0.4213, "step": 75672 }, { "epoch": 0.1341762568274764, "grad_norm": 0.6953125, "learning_rate": 0.0003959383535660258, "loss": 0.1611, "step": 75674 }, { "epoch": 0.1341798029927862, "grad_norm": 1.1953125, "learning_rate": 0.0003959012801359197, "loss": 0.2025, "step": 75676 }, { "epoch": 0.13418334915809602, "grad_norm": 0.498046875, "learning_rate": 0.00039586420978513214, "loss": 0.1758, "step": 75678 }, { "epoch": 0.13418689532340583, "grad_norm": 0.29296875, "learning_rate": 0.0003958271425138261, "loss": 0.1709, "step": 75680 }, { "epoch": 0.13419044148871564, "grad_norm": 1.3984375, "learning_rate": 0.0003957900783221626, "loss": 0.2285, "step": 75682 }, { "epoch": 0.1341939876540255, "grad_norm": 1.5859375, "learning_rate": 0.0003957530172103043, "loss": 0.4237, "step": 75684 }, { "epoch": 0.1341975338193353, "grad_norm": 0.30859375, "learning_rate": 0.0003957159591784134, "loss": 0.1733, "step": 75686 }, { "epoch": 0.13420107998464512, "grad_norm": 0.197265625, "learning_rate": 0.0003956789042266519, "loss": 0.2088, "step": 75688 }, { "epoch": 0.13420462614995493, "grad_norm": 0.3046875, "learning_rate": 0.00039564185235518146, "loss": 0.1306, "step": 75690 }, { "epoch": 0.13420817231526475, "grad_norm": 0.69140625, "learning_rate": 0.0003956048035641644, "loss": 0.2273, "step": 75692 }, { "epoch": 0.13421171848057456, "grad_norm": 1.078125, "learning_rate": 0.00039556775785376286, "loss": 0.3261, "step": 75694 }, { "epoch": 0.13421526464588437, "grad_norm": 2.46875, "learning_rate": 0.0003955307152241389, "loss": 0.2637, "step": 75696 }, { "epoch": 0.1342188108111942, "grad_norm": 0.52734375, "learning_rate": 0.00039549367567545435, "loss": 0.1369, "step": 75698 }, { "epoch": 0.134222356976504, "grad_norm": 0.294921875, "learning_rate": 0.00039545663920787096, "loss": 0.1616, "step": 75700 }, { "epoch": 0.13422590314181382, "grad_norm": 0.26171875, "learning_rate": 0.0003954196058215512, "loss": 0.1516, "step": 75702 }, { "epoch": 0.13422944930712363, "grad_norm": 0.146484375, "learning_rate": 0.0003953825755166569, "loss": 0.1597, "step": 75704 }, { "epoch": 0.13423299547243345, "grad_norm": 0.34765625, "learning_rate": 0.0003953455482933498, "loss": 0.183, "step": 75706 }, { "epoch": 0.13423654163774326, "grad_norm": 0.26171875, "learning_rate": 0.0003953085241517921, "loss": 0.175, "step": 75708 }, { "epoch": 0.13424008780305308, "grad_norm": 0.421875, "learning_rate": 0.0003952715030921452, "loss": 0.1368, "step": 75710 }, { "epoch": 0.1342436339683629, "grad_norm": 0.1845703125, "learning_rate": 0.00039523448511457205, "loss": 0.1729, "step": 75712 }, { "epoch": 0.1342471801336727, "grad_norm": 0.796875, "learning_rate": 0.00039519747021923325, "loss": 0.125, "step": 75714 }, { "epoch": 0.13425072629898252, "grad_norm": 0.451171875, "learning_rate": 0.0003951604584062917, "loss": 0.1369, "step": 75716 }, { "epoch": 0.13425427246429233, "grad_norm": 1.2265625, "learning_rate": 0.00039512344967590853, "loss": 0.1722, "step": 75718 }, { "epoch": 0.13425781862960215, "grad_norm": 0.984375, "learning_rate": 0.00039508644402824647, "loss": 0.2485, "step": 75720 }, { "epoch": 0.13426136479491196, "grad_norm": 0.458984375, "learning_rate": 0.00039504944146346633, "loss": 0.1703, "step": 75722 }, { "epoch": 0.13426491096022178, "grad_norm": 0.251953125, "learning_rate": 0.0003950124419817307, "loss": 0.1696, "step": 75724 }, { "epoch": 0.1342684571255316, "grad_norm": 0.251953125, "learning_rate": 0.0003949754455832012, "loss": 0.1674, "step": 75726 }, { "epoch": 0.1342720032908414, "grad_norm": 0.453125, "learning_rate": 0.0003949384522680395, "loss": 0.1689, "step": 75728 }, { "epoch": 0.13427554945615122, "grad_norm": 0.3828125, "learning_rate": 0.0003949014620364075, "loss": 0.1611, "step": 75730 }, { "epoch": 0.13427909562146104, "grad_norm": 0.328125, "learning_rate": 0.00039486447488846664, "loss": 0.1774, "step": 75732 }, { "epoch": 0.13428264178677085, "grad_norm": 0.421875, "learning_rate": 0.00039482749082437923, "loss": 0.2358, "step": 75734 }, { "epoch": 0.13428618795208067, "grad_norm": 0.6796875, "learning_rate": 0.00039479050984430686, "loss": 0.1824, "step": 75736 }, { "epoch": 0.13428973411739048, "grad_norm": 0.28125, "learning_rate": 0.000394753531948411, "loss": 0.1623, "step": 75738 }, { "epoch": 0.1342932802827003, "grad_norm": 0.373046875, "learning_rate": 0.0003947165571368534, "loss": 0.1911, "step": 75740 }, { "epoch": 0.1342968264480101, "grad_norm": 0.43359375, "learning_rate": 0.0003946795854097962, "loss": 0.1655, "step": 75742 }, { "epoch": 0.13430037261331992, "grad_norm": 0.80859375, "learning_rate": 0.0003946426167674008, "loss": 0.185, "step": 75744 }, { "epoch": 0.13430391877862974, "grad_norm": 0.5234375, "learning_rate": 0.0003946056512098287, "loss": 0.1564, "step": 75746 }, { "epoch": 0.13430746494393955, "grad_norm": 0.466796875, "learning_rate": 0.0003945686887372418, "loss": 0.2214, "step": 75748 }, { "epoch": 0.13431101110924937, "grad_norm": 0.859375, "learning_rate": 0.00039453172934980163, "loss": 0.1766, "step": 75750 }, { "epoch": 0.13431455727455918, "grad_norm": 0.82421875, "learning_rate": 0.0003944947730476702, "loss": 0.2437, "step": 75752 }, { "epoch": 0.134318103439869, "grad_norm": 2.515625, "learning_rate": 0.0003944578198310084, "loss": 0.2141, "step": 75754 }, { "epoch": 0.1343216496051788, "grad_norm": 0.37890625, "learning_rate": 0.00039442086969997846, "loss": 0.2067, "step": 75756 }, { "epoch": 0.13432519577048863, "grad_norm": 0.75390625, "learning_rate": 0.0003943839226547416, "loss": 0.2208, "step": 75758 }, { "epoch": 0.13432874193579844, "grad_norm": 1.4765625, "learning_rate": 0.0003943469786954597, "loss": 0.2417, "step": 75760 }, { "epoch": 0.13433228810110825, "grad_norm": 0.373046875, "learning_rate": 0.00039431003782229443, "loss": 0.1929, "step": 75762 }, { "epoch": 0.13433583426641807, "grad_norm": 1.078125, "learning_rate": 0.00039427310003540704, "loss": 0.2686, "step": 75764 }, { "epoch": 0.13433938043172788, "grad_norm": 0.333984375, "learning_rate": 0.00039423616533495917, "loss": 0.2231, "step": 75766 }, { "epoch": 0.1343429265970377, "grad_norm": 0.77734375, "learning_rate": 0.0003941992337211121, "loss": 0.2235, "step": 75768 }, { "epoch": 0.1343464727623475, "grad_norm": 1.234375, "learning_rate": 0.0003941623051940281, "loss": 0.4582, "step": 75770 }, { "epoch": 0.13435001892765733, "grad_norm": 0.6328125, "learning_rate": 0.00039412537975386773, "loss": 0.2564, "step": 75772 }, { "epoch": 0.13435356509296717, "grad_norm": 0.36328125, "learning_rate": 0.00039408845740079314, "loss": 0.2013, "step": 75774 }, { "epoch": 0.13435711125827698, "grad_norm": 0.6328125, "learning_rate": 0.00039405153813496543, "loss": 0.1825, "step": 75776 }, { "epoch": 0.1343606574235868, "grad_norm": 1.359375, "learning_rate": 0.0003940146219565465, "loss": 0.3471, "step": 75778 }, { "epoch": 0.1343642035888966, "grad_norm": 0.369140625, "learning_rate": 0.00039397770886569726, "loss": 0.2404, "step": 75780 }, { "epoch": 0.13436774975420643, "grad_norm": 1.2578125, "learning_rate": 0.0003939407988625796, "loss": 0.2116, "step": 75782 }, { "epoch": 0.13437129591951624, "grad_norm": 2.1875, "learning_rate": 0.0003939038919473547, "loss": 0.2451, "step": 75784 }, { "epoch": 0.13437484208482606, "grad_norm": 0.578125, "learning_rate": 0.0003938669881201841, "loss": 0.2154, "step": 75786 }, { "epoch": 0.13437838825013587, "grad_norm": 0.462890625, "learning_rate": 0.0003938300873812291, "loss": 0.2079, "step": 75788 }, { "epoch": 0.1343819344154457, "grad_norm": 0.5234375, "learning_rate": 0.0003937931897306508, "loss": 0.2036, "step": 75790 }, { "epoch": 0.1343854805807555, "grad_norm": 0.76171875, "learning_rate": 0.0003937562951686112, "loss": 0.2013, "step": 75792 }, { "epoch": 0.13438902674606532, "grad_norm": 0.54296875, "learning_rate": 0.0003937194036952714, "loss": 0.1537, "step": 75794 }, { "epoch": 0.13439257291137513, "grad_norm": 0.189453125, "learning_rate": 0.0003936825153107925, "loss": 0.1269, "step": 75796 }, { "epoch": 0.13439611907668494, "grad_norm": 0.69921875, "learning_rate": 0.00039364563001533605, "loss": 0.146, "step": 75798 }, { "epoch": 0.13439966524199476, "grad_norm": 0.291015625, "learning_rate": 0.0003936087478090631, "loss": 0.1669, "step": 75800 }, { "epoch": 0.13440321140730457, "grad_norm": 1.2421875, "learning_rate": 0.0003935718686921356, "loss": 0.3283, "step": 75802 }, { "epoch": 0.1344067575726144, "grad_norm": 0.2265625, "learning_rate": 0.00039353499266471395, "loss": 0.1426, "step": 75804 }, { "epoch": 0.1344103037379242, "grad_norm": 4.25, "learning_rate": 0.00039349811972696016, "loss": 0.5302, "step": 75806 }, { "epoch": 0.13441384990323402, "grad_norm": 2.296875, "learning_rate": 0.00039346124987903495, "loss": 0.2299, "step": 75808 }, { "epoch": 0.13441739606854383, "grad_norm": 0.65234375, "learning_rate": 0.00039342438312110035, "loss": 0.2611, "step": 75810 }, { "epoch": 0.13442094223385365, "grad_norm": 0.5234375, "learning_rate": 0.00039338751945331655, "loss": 0.2509, "step": 75812 }, { "epoch": 0.13442448839916346, "grad_norm": 0.283203125, "learning_rate": 0.00039335065887584553, "loss": 0.1761, "step": 75814 }, { "epoch": 0.13442803456447328, "grad_norm": 0.53125, "learning_rate": 0.00039331380138884814, "loss": 0.1866, "step": 75816 }, { "epoch": 0.1344315807297831, "grad_norm": 0.27734375, "learning_rate": 0.00039327694699248586, "loss": 0.1513, "step": 75818 }, { "epoch": 0.1344351268950929, "grad_norm": 0.453125, "learning_rate": 0.00039324009568691965, "loss": 0.1897, "step": 75820 }, { "epoch": 0.13443867306040272, "grad_norm": 0.6640625, "learning_rate": 0.0003932032474723105, "loss": 0.1801, "step": 75822 }, { "epoch": 0.13444221922571253, "grad_norm": 1.1875, "learning_rate": 0.00039316640234882, "loss": 0.2516, "step": 75824 }, { "epoch": 0.13444576539102235, "grad_norm": 0.431640625, "learning_rate": 0.00039312956031660907, "loss": 0.1791, "step": 75826 }, { "epoch": 0.13444931155633216, "grad_norm": 0.56640625, "learning_rate": 0.00039309272137583883, "loss": 0.1587, "step": 75828 }, { "epoch": 0.13445285772164198, "grad_norm": 0.609375, "learning_rate": 0.0003930558855266702, "loss": 0.1868, "step": 75830 }, { "epoch": 0.1344564038869518, "grad_norm": 0.6796875, "learning_rate": 0.0003930190527692647, "loss": 0.4024, "step": 75832 }, { "epoch": 0.1344599500522616, "grad_norm": 0.43359375, "learning_rate": 0.0003929822231037833, "loss": 0.2238, "step": 75834 }, { "epoch": 0.13446349621757142, "grad_norm": 0.458984375, "learning_rate": 0.0003929453965303867, "loss": 0.233, "step": 75836 }, { "epoch": 0.13446704238288124, "grad_norm": 0.310546875, "learning_rate": 0.00039290857304923656, "loss": 0.1606, "step": 75838 }, { "epoch": 0.13447058854819105, "grad_norm": 0.318359375, "learning_rate": 0.0003928717526604932, "loss": 0.1234, "step": 75840 }, { "epoch": 0.13447413471350086, "grad_norm": 0.25, "learning_rate": 0.0003928349353643185, "loss": 0.2106, "step": 75842 }, { "epoch": 0.13447768087881068, "grad_norm": 0.302734375, "learning_rate": 0.00039279812116087265, "loss": 0.1909, "step": 75844 }, { "epoch": 0.1344812270441205, "grad_norm": 0.5078125, "learning_rate": 0.00039276131005031717, "loss": 0.2046, "step": 75846 }, { "epoch": 0.1344847732094303, "grad_norm": 0.341796875, "learning_rate": 0.0003927245020328126, "loss": 0.1507, "step": 75848 }, { "epoch": 0.13448831937474012, "grad_norm": 0.37890625, "learning_rate": 0.0003926876971085207, "loss": 0.1762, "step": 75850 }, { "epoch": 0.13449186554004994, "grad_norm": 0.396484375, "learning_rate": 0.00039265089527760176, "loss": 0.1368, "step": 75852 }, { "epoch": 0.13449541170535975, "grad_norm": 0.5234375, "learning_rate": 0.000392614096540217, "loss": 0.192, "step": 75854 }, { "epoch": 0.13449895787066957, "grad_norm": 0.3203125, "learning_rate": 0.0003925773008965272, "loss": 0.1689, "step": 75856 }, { "epoch": 0.13450250403597938, "grad_norm": 0.267578125, "learning_rate": 0.0003925405083466932, "loss": 0.1737, "step": 75858 }, { "epoch": 0.1345060502012892, "grad_norm": 0.5546875, "learning_rate": 0.00039250371889087656, "loss": 0.1935, "step": 75860 }, { "epoch": 0.134509596366599, "grad_norm": 0.337890625, "learning_rate": 0.00039246693252923727, "loss": 0.142, "step": 75862 }, { "epoch": 0.13451314253190885, "grad_norm": 0.4375, "learning_rate": 0.00039243014926193667, "loss": 0.2142, "step": 75864 }, { "epoch": 0.13451668869721867, "grad_norm": 0.54296875, "learning_rate": 0.0003923933690891355, "loss": 0.1589, "step": 75866 }, { "epoch": 0.13452023486252848, "grad_norm": 0.59375, "learning_rate": 0.0003923565920109952, "loss": 0.1596, "step": 75868 }, { "epoch": 0.1345237810278383, "grad_norm": 0.1787109375, "learning_rate": 0.0003923198180276757, "loss": 0.1315, "step": 75870 }, { "epoch": 0.1345273271931481, "grad_norm": 0.404296875, "learning_rate": 0.0003922830471393384, "loss": 0.1361, "step": 75872 }, { "epoch": 0.13453087335845793, "grad_norm": 0.330078125, "learning_rate": 0.0003922462793461439, "loss": 0.1294, "step": 75874 }, { "epoch": 0.13453441952376774, "grad_norm": 1.015625, "learning_rate": 0.0003922095146482531, "loss": 0.2332, "step": 75876 }, { "epoch": 0.13453796568907755, "grad_norm": 0.294921875, "learning_rate": 0.0003921727530458268, "loss": 0.1458, "step": 75878 }, { "epoch": 0.13454151185438737, "grad_norm": 0.451171875, "learning_rate": 0.00039213599453902535, "loss": 0.1632, "step": 75880 }, { "epoch": 0.13454505801969718, "grad_norm": 0.263671875, "learning_rate": 0.00039209923912801023, "loss": 0.1727, "step": 75882 }, { "epoch": 0.134548604185007, "grad_norm": 0.20703125, "learning_rate": 0.00039206248681294183, "loss": 0.1804, "step": 75884 }, { "epoch": 0.1345521503503168, "grad_norm": 0.6796875, "learning_rate": 0.0003920257375939809, "loss": 0.1646, "step": 75886 }, { "epoch": 0.13455569651562663, "grad_norm": 0.357421875, "learning_rate": 0.00039198899147128795, "loss": 0.16, "step": 75888 }, { "epoch": 0.13455924268093644, "grad_norm": 0.6015625, "learning_rate": 0.0003919522484450241, "loss": 0.1747, "step": 75890 }, { "epoch": 0.13456278884624626, "grad_norm": 0.625, "learning_rate": 0.00039191550851534973, "loss": 0.1668, "step": 75892 }, { "epoch": 0.13456633501155607, "grad_norm": 0.34375, "learning_rate": 0.0003918787716824257, "loss": 0.2054, "step": 75894 }, { "epoch": 0.13456988117686589, "grad_norm": 0.232421875, "learning_rate": 0.0003918420379464126, "loss": 0.1967, "step": 75896 }, { "epoch": 0.1345734273421757, "grad_norm": 1.2265625, "learning_rate": 0.000391805307307471, "loss": 0.2427, "step": 75898 }, { "epoch": 0.13457697350748551, "grad_norm": 0.53515625, "learning_rate": 0.00039176857976576184, "loss": 0.1858, "step": 75900 }, { "epoch": 0.13458051967279533, "grad_norm": 0.298828125, "learning_rate": 0.00039173185532144516, "loss": 0.1842, "step": 75902 }, { "epoch": 0.13458406583810514, "grad_norm": 0.6171875, "learning_rate": 0.0003916951339746822, "loss": 0.1638, "step": 75904 }, { "epoch": 0.13458761200341496, "grad_norm": 1.5546875, "learning_rate": 0.0003916584157256329, "loss": 0.3069, "step": 75906 }, { "epoch": 0.13459115816872477, "grad_norm": 0.7734375, "learning_rate": 0.0003916217005744589, "loss": 0.1371, "step": 75908 }, { "epoch": 0.1345947043340346, "grad_norm": 0.55078125, "learning_rate": 0.00039158498852131954, "loss": 0.2025, "step": 75910 }, { "epoch": 0.1345982504993444, "grad_norm": 0.6796875, "learning_rate": 0.00039154827956637626, "loss": 0.1425, "step": 75912 }, { "epoch": 0.13460179666465422, "grad_norm": 0.267578125, "learning_rate": 0.00039151157370978924, "loss": 0.1455, "step": 75914 }, { "epoch": 0.13460534282996403, "grad_norm": 0.365234375, "learning_rate": 0.00039147487095171895, "loss": 0.1597, "step": 75916 }, { "epoch": 0.13460888899527385, "grad_norm": 0.271484375, "learning_rate": 0.00039143817129232606, "loss": 0.1321, "step": 75918 }, { "epoch": 0.13461243516058366, "grad_norm": 0.42578125, "learning_rate": 0.0003914014747317709, "loss": 0.1407, "step": 75920 }, { "epoch": 0.13461598132589347, "grad_norm": 1.15625, "learning_rate": 0.0003913647812702144, "loss": 0.1996, "step": 75922 }, { "epoch": 0.1346195274912033, "grad_norm": 0.2578125, "learning_rate": 0.0003913280909078164, "loss": 0.1696, "step": 75924 }, { "epoch": 0.1346230736565131, "grad_norm": 0.2373046875, "learning_rate": 0.0003912914036447379, "loss": 0.4098, "step": 75926 }, { "epoch": 0.13462661982182292, "grad_norm": 0.28125, "learning_rate": 0.00039125471948113884, "loss": 0.1548, "step": 75928 }, { "epoch": 0.13463016598713273, "grad_norm": 0.70703125, "learning_rate": 0.0003912180384171803, "loss": 0.2018, "step": 75930 }, { "epoch": 0.13463371215244255, "grad_norm": 0.484375, "learning_rate": 0.00039118136045302224, "loss": 0.1804, "step": 75932 }, { "epoch": 0.13463725831775236, "grad_norm": 0.287109375, "learning_rate": 0.00039114468558882514, "loss": 0.1429, "step": 75934 }, { "epoch": 0.13464080448306218, "grad_norm": 1.53125, "learning_rate": 0.0003911080138247496, "loss": 0.1348, "step": 75936 }, { "epoch": 0.134644350648372, "grad_norm": 0.71875, "learning_rate": 0.00039107134516095537, "loss": 0.1575, "step": 75938 }, { "epoch": 0.1346478968136818, "grad_norm": 0.9765625, "learning_rate": 0.00039103467959760373, "loss": 0.209, "step": 75940 }, { "epoch": 0.13465144297899162, "grad_norm": 0.376953125, "learning_rate": 0.0003909980171348544, "loss": 0.2021, "step": 75942 }, { "epoch": 0.13465498914430143, "grad_norm": 0.4453125, "learning_rate": 0.00039096135777286813, "loss": 0.1745, "step": 75944 }, { "epoch": 0.13465853530961125, "grad_norm": 0.306640625, "learning_rate": 0.0003909247015118046, "loss": 0.1316, "step": 75946 }, { "epoch": 0.13466208147492106, "grad_norm": 0.32421875, "learning_rate": 0.0003908880483518248, "loss": 0.2323, "step": 75948 }, { "epoch": 0.13466562764023088, "grad_norm": 0.392578125, "learning_rate": 0.0003908513982930888, "loss": 0.179, "step": 75950 }, { "epoch": 0.1346691738055407, "grad_norm": 0.58984375, "learning_rate": 0.00039081475133575686, "loss": 0.1932, "step": 75952 }, { "epoch": 0.1346727199708505, "grad_norm": 0.392578125, "learning_rate": 0.00039077810747998926, "loss": 0.1488, "step": 75954 }, { "epoch": 0.13467626613616035, "grad_norm": 0.390625, "learning_rate": 0.00039074146672594614, "loss": 0.1755, "step": 75956 }, { "epoch": 0.13467981230147016, "grad_norm": 0.78515625, "learning_rate": 0.00039070482907378816, "loss": 0.2319, "step": 75958 }, { "epoch": 0.13468335846677998, "grad_norm": 0.306640625, "learning_rate": 0.0003906681945236748, "loss": 0.1698, "step": 75960 }, { "epoch": 0.1346869046320898, "grad_norm": 0.2578125, "learning_rate": 0.00039063156307576696, "loss": 0.1108, "step": 75962 }, { "epoch": 0.1346904507973996, "grad_norm": 0.337890625, "learning_rate": 0.00039059493473022467, "loss": 0.1758, "step": 75964 }, { "epoch": 0.13469399696270942, "grad_norm": 0.328125, "learning_rate": 0.00039055830948720805, "loss": 0.172, "step": 75966 }, { "epoch": 0.13469754312801924, "grad_norm": 0.59765625, "learning_rate": 0.00039052168734687707, "loss": 0.1757, "step": 75968 }, { "epoch": 0.13470108929332905, "grad_norm": 0.4296875, "learning_rate": 0.00039048506830939225, "loss": 0.1494, "step": 75970 }, { "epoch": 0.13470463545863887, "grad_norm": 1.2421875, "learning_rate": 0.00039044845237491373, "loss": 0.1979, "step": 75972 }, { "epoch": 0.13470818162394868, "grad_norm": 0.16796875, "learning_rate": 0.0003904118395436015, "loss": 0.1258, "step": 75974 }, { "epoch": 0.1347117277892585, "grad_norm": 0.400390625, "learning_rate": 0.0003903752298156157, "loss": 0.2588, "step": 75976 }, { "epoch": 0.1347152739545683, "grad_norm": 0.310546875, "learning_rate": 0.00039033862319111617, "loss": 0.1203, "step": 75978 }, { "epoch": 0.13471882011987812, "grad_norm": 0.310546875, "learning_rate": 0.00039030201967026355, "loss": 0.1754, "step": 75980 }, { "epoch": 0.13472236628518794, "grad_norm": 0.279296875, "learning_rate": 0.00039026541925321764, "loss": 0.152, "step": 75982 }, { "epoch": 0.13472591245049775, "grad_norm": 3.375, "learning_rate": 0.0003902288219401385, "loss": 0.2822, "step": 75984 }, { "epoch": 0.13472945861580757, "grad_norm": 1.203125, "learning_rate": 0.0003901922277311863, "loss": 0.1852, "step": 75986 }, { "epoch": 0.13473300478111738, "grad_norm": 0.306640625, "learning_rate": 0.00039015563662652065, "loss": 0.1894, "step": 75988 }, { "epoch": 0.1347365509464272, "grad_norm": 0.36328125, "learning_rate": 0.00039011904862630244, "loss": 0.1317, "step": 75990 }, { "epoch": 0.134740097111737, "grad_norm": 0.431640625, "learning_rate": 0.0003900824637306907, "loss": 0.1625, "step": 75992 }, { "epoch": 0.13474364327704683, "grad_norm": 1.8359375, "learning_rate": 0.00039004588193984613, "loss": 0.3393, "step": 75994 }, { "epoch": 0.13474718944235664, "grad_norm": 0.30859375, "learning_rate": 0.0003900093032539281, "loss": 0.1609, "step": 75996 }, { "epoch": 0.13475073560766646, "grad_norm": 0.24609375, "learning_rate": 0.00038997272767309765, "loss": 0.1607, "step": 75998 }, { "epoch": 0.13475428177297627, "grad_norm": 0.34375, "learning_rate": 0.0003899361551975136, "loss": 0.1806, "step": 76000 }, { "epoch": 0.13475782793828608, "grad_norm": 0.1875, "learning_rate": 0.00038989958582733643, "loss": 0.1961, "step": 76002 }, { "epoch": 0.1347613741035959, "grad_norm": 0.1279296875, "learning_rate": 0.00038986301956272606, "loss": 0.1391, "step": 76004 }, { "epoch": 0.1347649202689057, "grad_norm": 0.205078125, "learning_rate": 0.0003898264564038423, "loss": 0.1573, "step": 76006 }, { "epoch": 0.13476846643421553, "grad_norm": 0.2734375, "learning_rate": 0.0003897898963508452, "loss": 0.1868, "step": 76008 }, { "epoch": 0.13477201259952534, "grad_norm": 0.291015625, "learning_rate": 0.0003897533394038942, "loss": 0.1627, "step": 76010 }, { "epoch": 0.13477555876483516, "grad_norm": 1.7578125, "learning_rate": 0.0003897167855631499, "loss": 0.2169, "step": 76012 }, { "epoch": 0.13477910493014497, "grad_norm": 0.416015625, "learning_rate": 0.00038968023482877144, "loss": 0.2241, "step": 76014 }, { "epoch": 0.1347826510954548, "grad_norm": 0.498046875, "learning_rate": 0.0003896436872009196, "loss": 0.172, "step": 76016 }, { "epoch": 0.1347861972607646, "grad_norm": 0.439453125, "learning_rate": 0.00038960714267975303, "loss": 0.2052, "step": 76018 }, { "epoch": 0.13478974342607442, "grad_norm": 0.34375, "learning_rate": 0.0003895706012654325, "loss": 0.1298, "step": 76020 }, { "epoch": 0.13479328959138423, "grad_norm": 0.7265625, "learning_rate": 0.0003895340629581176, "loss": 0.2193, "step": 76022 }, { "epoch": 0.13479683575669404, "grad_norm": 2.109375, "learning_rate": 0.00038949752775796796, "loss": 0.2319, "step": 76024 }, { "epoch": 0.13480038192200386, "grad_norm": 0.2490234375, "learning_rate": 0.0003894609956651434, "loss": 0.1521, "step": 76026 }, { "epoch": 0.13480392808731367, "grad_norm": 0.70703125, "learning_rate": 0.00038942446667980345, "loss": 0.1361, "step": 76028 }, { "epoch": 0.1348074742526235, "grad_norm": 0.337890625, "learning_rate": 0.00038938794080210835, "loss": 0.1435, "step": 76030 }, { "epoch": 0.1348110204179333, "grad_norm": 0.55859375, "learning_rate": 0.00038935141803221775, "loss": 0.1911, "step": 76032 }, { "epoch": 0.13481456658324312, "grad_norm": 0.18359375, "learning_rate": 0.0003893148983702911, "loss": 0.1695, "step": 76034 }, { "epoch": 0.13481811274855293, "grad_norm": 0.7421875, "learning_rate": 0.0003892783818164881, "loss": 0.2605, "step": 76036 }, { "epoch": 0.13482165891386275, "grad_norm": 0.34375, "learning_rate": 0.00038924186837096885, "loss": 0.1685, "step": 76038 }, { "epoch": 0.13482520507917256, "grad_norm": 1.03125, "learning_rate": 0.0003892053580338929, "loss": 0.1523, "step": 76040 }, { "epoch": 0.13482875124448238, "grad_norm": 0.255859375, "learning_rate": 0.0003891688508054197, "loss": 0.1898, "step": 76042 }, { "epoch": 0.1348322974097922, "grad_norm": 0.359375, "learning_rate": 0.00038913234668570904, "loss": 0.1563, "step": 76044 }, { "epoch": 0.13483584357510203, "grad_norm": 0.578125, "learning_rate": 0.0003890958456749204, "loss": 0.1579, "step": 76046 }, { "epoch": 0.13483938974041185, "grad_norm": 0.2412109375, "learning_rate": 0.0003890593477732141, "loss": 0.1835, "step": 76048 }, { "epoch": 0.13484293590572166, "grad_norm": 0.546875, "learning_rate": 0.0003890228529807488, "loss": 0.2453, "step": 76050 }, { "epoch": 0.13484648207103148, "grad_norm": 0.349609375, "learning_rate": 0.00038898636129768466, "loss": 0.1653, "step": 76052 }, { "epoch": 0.1348500282363413, "grad_norm": 0.2265625, "learning_rate": 0.000388949872724181, "loss": 0.1552, "step": 76054 }, { "epoch": 0.1348535744016511, "grad_norm": 0.380859375, "learning_rate": 0.000388913387260398, "loss": 0.1409, "step": 76056 }, { "epoch": 0.13485712056696092, "grad_norm": 0.3203125, "learning_rate": 0.00038887690490649443, "loss": 0.1825, "step": 76058 }, { "epoch": 0.13486066673227073, "grad_norm": 0.26953125, "learning_rate": 0.0003888404256626304, "loss": 0.1684, "step": 76060 }, { "epoch": 0.13486421289758055, "grad_norm": 0.5625, "learning_rate": 0.00038880394952896523, "loss": 0.1829, "step": 76062 }, { "epoch": 0.13486775906289036, "grad_norm": 0.859375, "learning_rate": 0.0003887674765056585, "loss": 0.1855, "step": 76064 }, { "epoch": 0.13487130522820018, "grad_norm": 0.314453125, "learning_rate": 0.00038873100659286966, "loss": 0.1731, "step": 76066 }, { "epoch": 0.13487485139351, "grad_norm": 0.205078125, "learning_rate": 0.00038869453979075797, "loss": 0.1702, "step": 76068 }, { "epoch": 0.1348783975588198, "grad_norm": 0.359375, "learning_rate": 0.00038865807609948354, "loss": 0.191, "step": 76070 }, { "epoch": 0.13488194372412962, "grad_norm": 0.89453125, "learning_rate": 0.0003886216155192054, "loss": 0.2958, "step": 76072 }, { "epoch": 0.13488548988943944, "grad_norm": 0.39453125, "learning_rate": 0.0003885851580500832, "loss": 0.1809, "step": 76074 }, { "epoch": 0.13488903605474925, "grad_norm": 0.140625, "learning_rate": 0.0003885487036922759, "loss": 0.1666, "step": 76076 }, { "epoch": 0.13489258222005907, "grad_norm": 0.310546875, "learning_rate": 0.0003885122524459437, "loss": 0.188, "step": 76078 }, { "epoch": 0.13489612838536888, "grad_norm": 0.6640625, "learning_rate": 0.0003884758043112455, "loss": 0.1749, "step": 76080 }, { "epoch": 0.1348996745506787, "grad_norm": 0.234375, "learning_rate": 0.0003884393592883408, "loss": 0.1741, "step": 76082 }, { "epoch": 0.1349032207159885, "grad_norm": 0.373046875, "learning_rate": 0.0003884029173773891, "loss": 0.2275, "step": 76084 }, { "epoch": 0.13490676688129832, "grad_norm": 0.267578125, "learning_rate": 0.00038836647857854947, "loss": 0.1722, "step": 76086 }, { "epoch": 0.13491031304660814, "grad_norm": 0.30859375, "learning_rate": 0.00038833004289198195, "loss": 0.1947, "step": 76088 }, { "epoch": 0.13491385921191795, "grad_norm": 0.474609375, "learning_rate": 0.00038829361031784493, "loss": 0.2315, "step": 76090 }, { "epoch": 0.13491740537722777, "grad_norm": 0.314453125, "learning_rate": 0.00038825718085629856, "loss": 0.1558, "step": 76092 }, { "epoch": 0.13492095154253758, "grad_norm": 0.361328125, "learning_rate": 0.00038822075450750155, "loss": 0.1888, "step": 76094 }, { "epoch": 0.1349244977078474, "grad_norm": 0.326171875, "learning_rate": 0.0003881843312716138, "loss": 0.18, "step": 76096 }, { "epoch": 0.1349280438731572, "grad_norm": 0.44921875, "learning_rate": 0.0003881479111487942, "loss": 0.1532, "step": 76098 }, { "epoch": 0.13493159003846703, "grad_norm": 0.5, "learning_rate": 0.0003881114941392022, "loss": 0.1516, "step": 76100 }, { "epoch": 0.13493513620377684, "grad_norm": 0.2734375, "learning_rate": 0.0003880750802429971, "loss": 0.1667, "step": 76102 }, { "epoch": 0.13493868236908665, "grad_norm": 0.5078125, "learning_rate": 0.0003880386694603377, "loss": 0.1836, "step": 76104 }, { "epoch": 0.13494222853439647, "grad_norm": 0.392578125, "learning_rate": 0.00038800226179138423, "loss": 0.1904, "step": 76106 }, { "epoch": 0.13494577469970628, "grad_norm": 0.287109375, "learning_rate": 0.00038796585723629463, "loss": 0.1593, "step": 76108 }, { "epoch": 0.1349493208650161, "grad_norm": 0.1826171875, "learning_rate": 0.0003879294557952291, "loss": 0.1748, "step": 76110 }, { "epoch": 0.1349528670303259, "grad_norm": 0.8828125, "learning_rate": 0.0003878930574683464, "loss": 0.1692, "step": 76112 }, { "epoch": 0.13495641319563573, "grad_norm": 1.1796875, "learning_rate": 0.0003878566622558061, "loss": 0.2133, "step": 76114 }, { "epoch": 0.13495995936094554, "grad_norm": 0.58203125, "learning_rate": 0.00038782027015776675, "loss": 0.1419, "step": 76116 }, { "epoch": 0.13496350552625536, "grad_norm": 0.5703125, "learning_rate": 0.000387783881174388, "loss": 0.1521, "step": 76118 }, { "epoch": 0.13496705169156517, "grad_norm": 0.3828125, "learning_rate": 0.0003877474953058289, "loss": 0.1599, "step": 76120 }, { "epoch": 0.13497059785687499, "grad_norm": 0.609375, "learning_rate": 0.00038771111255224845, "loss": 0.2641, "step": 76122 }, { "epoch": 0.1349741440221848, "grad_norm": 0.490234375, "learning_rate": 0.00038767473291380594, "loss": 0.1945, "step": 76124 }, { "epoch": 0.13497769018749461, "grad_norm": 0.625, "learning_rate": 0.00038763835639066014, "loss": 0.2086, "step": 76126 }, { "epoch": 0.13498123635280443, "grad_norm": 0.380859375, "learning_rate": 0.0003876019829829707, "loss": 0.1807, "step": 76128 }, { "epoch": 0.13498478251811424, "grad_norm": 0.58203125, "learning_rate": 0.0003875656126908962, "loss": 0.1668, "step": 76130 }, { "epoch": 0.13498832868342406, "grad_norm": 0.8828125, "learning_rate": 0.0003875292455145959, "loss": 0.1463, "step": 76132 }, { "epoch": 0.13499187484873387, "grad_norm": 0.41796875, "learning_rate": 0.0003874928814542287, "loss": 0.1709, "step": 76134 }, { "epoch": 0.13499542101404371, "grad_norm": 0.39453125, "learning_rate": 0.00038745652050995393, "loss": 0.173, "step": 76136 }, { "epoch": 0.13499896717935353, "grad_norm": 0.734375, "learning_rate": 0.00038742016268193045, "loss": 0.1746, "step": 76138 }, { "epoch": 0.13500251334466334, "grad_norm": 0.296875, "learning_rate": 0.00038738380797031727, "loss": 0.1393, "step": 76140 }, { "epoch": 0.13500605950997316, "grad_norm": 0.337890625, "learning_rate": 0.00038734745637527347, "loss": 0.1823, "step": 76142 }, { "epoch": 0.13500960567528297, "grad_norm": 0.9609375, "learning_rate": 0.0003873111078969575, "loss": 0.2524, "step": 76144 }, { "epoch": 0.1350131518405928, "grad_norm": 0.6171875, "learning_rate": 0.0003872747625355294, "loss": 0.2596, "step": 76146 }, { "epoch": 0.1350166980059026, "grad_norm": 0.2099609375, "learning_rate": 0.0003872384202911469, "loss": 0.1689, "step": 76148 }, { "epoch": 0.13502024417121242, "grad_norm": 0.890625, "learning_rate": 0.0003872020811639698, "loss": 0.1876, "step": 76150 }, { "epoch": 0.13502379033652223, "grad_norm": 0.68359375, "learning_rate": 0.0003871657451541568, "loss": 0.2029, "step": 76152 }, { "epoch": 0.13502733650183205, "grad_norm": 0.361328125, "learning_rate": 0.0003871294122618668, "loss": 0.1822, "step": 76154 }, { "epoch": 0.13503088266714186, "grad_norm": 0.5234375, "learning_rate": 0.0003870930824872586, "loss": 0.151, "step": 76156 }, { "epoch": 0.13503442883245168, "grad_norm": 0.2578125, "learning_rate": 0.00038705675583049096, "loss": 0.1578, "step": 76158 }, { "epoch": 0.1350379749977615, "grad_norm": 0.294921875, "learning_rate": 0.0003870204322917231, "loss": 0.1748, "step": 76160 }, { "epoch": 0.1350415211630713, "grad_norm": 0.40625, "learning_rate": 0.0003869841118711138, "loss": 0.1426, "step": 76162 }, { "epoch": 0.13504506732838112, "grad_norm": 0.302734375, "learning_rate": 0.00038694779456882177, "loss": 0.2145, "step": 76164 }, { "epoch": 0.13504861349369093, "grad_norm": 0.419921875, "learning_rate": 0.0003869114803850057, "loss": 0.1809, "step": 76166 }, { "epoch": 0.13505215965900075, "grad_norm": 0.359375, "learning_rate": 0.00038687516931982483, "loss": 0.1815, "step": 76168 }, { "epoch": 0.13505570582431056, "grad_norm": 0.2890625, "learning_rate": 0.0003868388613734378, "loss": 0.1463, "step": 76170 }, { "epoch": 0.13505925198962038, "grad_norm": 0.2138671875, "learning_rate": 0.0003868025565460033, "loss": 0.1487, "step": 76172 }, { "epoch": 0.1350627981549302, "grad_norm": 0.625, "learning_rate": 0.00038676625483768, "loss": 0.2202, "step": 76174 }, { "epoch": 0.13506634432024, "grad_norm": 0.40625, "learning_rate": 0.0003867299562486267, "loss": 0.1825, "step": 76176 }, { "epoch": 0.13506989048554982, "grad_norm": 0.310546875, "learning_rate": 0.0003866936607790028, "loss": 0.2886, "step": 76178 }, { "epoch": 0.13507343665085964, "grad_norm": 0.28515625, "learning_rate": 0.0003866573684289659, "loss": 0.2033, "step": 76180 }, { "epoch": 0.13507698281616945, "grad_norm": 0.6953125, "learning_rate": 0.00038662107919867553, "loss": 0.2408, "step": 76182 }, { "epoch": 0.13508052898147926, "grad_norm": 1.3125, "learning_rate": 0.00038658479308828996, "loss": 0.1726, "step": 76184 }, { "epoch": 0.13508407514678908, "grad_norm": 0.486328125, "learning_rate": 0.0003865485100979685, "loss": 0.2137, "step": 76186 }, { "epoch": 0.1350876213120989, "grad_norm": 0.30859375, "learning_rate": 0.00038651223022786926, "loss": 0.1064, "step": 76188 }, { "epoch": 0.1350911674774087, "grad_norm": 0.68359375, "learning_rate": 0.0003864759534781511, "loss": 0.1428, "step": 76190 }, { "epoch": 0.13509471364271852, "grad_norm": 0.361328125, "learning_rate": 0.0003864396798489727, "loss": 0.2129, "step": 76192 }, { "epoch": 0.13509825980802834, "grad_norm": 0.53125, "learning_rate": 0.00038640340934049234, "loss": 0.2125, "step": 76194 }, { "epoch": 0.13510180597333815, "grad_norm": 0.294921875, "learning_rate": 0.0003863671419528696, "loss": 0.1534, "step": 76196 }, { "epoch": 0.13510535213864797, "grad_norm": 0.2177734375, "learning_rate": 0.0003863308776862618, "loss": 0.1605, "step": 76198 }, { "epoch": 0.13510889830395778, "grad_norm": 0.59375, "learning_rate": 0.00038629461654082856, "loss": 0.2129, "step": 76200 }, { "epoch": 0.1351124444692676, "grad_norm": 0.203125, "learning_rate": 0.00038625835851672786, "loss": 0.1517, "step": 76202 }, { "epoch": 0.1351159906345774, "grad_norm": 0.314453125, "learning_rate": 0.00038622210361411883, "loss": 0.1482, "step": 76204 }, { "epoch": 0.13511953679988722, "grad_norm": 0.33203125, "learning_rate": 0.00038618585183315934, "loss": 0.1491, "step": 76206 }, { "epoch": 0.13512308296519704, "grad_norm": 0.2578125, "learning_rate": 0.00038614960317400853, "loss": 0.1682, "step": 76208 }, { "epoch": 0.13512662913050685, "grad_norm": 0.2578125, "learning_rate": 0.00038611335763682477, "loss": 0.1786, "step": 76210 }, { "epoch": 0.13513017529581667, "grad_norm": 0.85546875, "learning_rate": 0.00038607711522176645, "loss": 0.1503, "step": 76212 }, { "epoch": 0.13513372146112648, "grad_norm": 0.421875, "learning_rate": 0.00038604087592899214, "loss": 0.1374, "step": 76214 }, { "epoch": 0.1351372676264363, "grad_norm": 0.51953125, "learning_rate": 0.00038600463975865996, "loss": 0.2419, "step": 76216 }, { "epoch": 0.1351408137917461, "grad_norm": 1.1171875, "learning_rate": 0.00038596840671092914, "loss": 0.2408, "step": 76218 }, { "epoch": 0.13514435995705593, "grad_norm": 0.279296875, "learning_rate": 0.0003859321767859577, "loss": 0.1655, "step": 76220 }, { "epoch": 0.13514790612236574, "grad_norm": 0.294921875, "learning_rate": 0.0003858959499839042, "loss": 0.2082, "step": 76222 }, { "epoch": 0.13515145228767556, "grad_norm": 0.33984375, "learning_rate": 0.00038585972630492674, "loss": 0.1781, "step": 76224 }, { "epoch": 0.13515499845298537, "grad_norm": 0.6796875, "learning_rate": 0.0003858235057491842, "loss": 0.2034, "step": 76226 }, { "epoch": 0.1351585446182952, "grad_norm": 0.388671875, "learning_rate": 0.00038578728831683483, "loss": 0.1416, "step": 76228 }, { "epoch": 0.13516209078360503, "grad_norm": 0.35546875, "learning_rate": 0.000385751074008037, "loss": 0.1732, "step": 76230 }, { "epoch": 0.13516563694891484, "grad_norm": 0.3984375, "learning_rate": 0.0003857148628229491, "loss": 0.2037, "step": 76232 }, { "epoch": 0.13516918311422466, "grad_norm": 2.6875, "learning_rate": 0.00038567865476172924, "loss": 0.2533, "step": 76234 }, { "epoch": 0.13517272927953447, "grad_norm": 0.416015625, "learning_rate": 0.0003856424498245365, "loss": 0.1627, "step": 76236 }, { "epoch": 0.13517627544484428, "grad_norm": 0.55078125, "learning_rate": 0.00038560624801152823, "loss": 0.1819, "step": 76238 }, { "epoch": 0.1351798216101541, "grad_norm": 0.33203125, "learning_rate": 0.0003855700493228636, "loss": 0.1616, "step": 76240 }, { "epoch": 0.13518336777546391, "grad_norm": 0.1689453125, "learning_rate": 0.0003855338537587002, "loss": 0.163, "step": 76242 }, { "epoch": 0.13518691394077373, "grad_norm": 0.330078125, "learning_rate": 0.0003854976613191972, "loss": 0.1918, "step": 76244 }, { "epoch": 0.13519046010608354, "grad_norm": 0.6015625, "learning_rate": 0.0003854614720045121, "loss": 0.1434, "step": 76246 }, { "epoch": 0.13519400627139336, "grad_norm": 0.55859375, "learning_rate": 0.0003854252858148036, "loss": 0.1342, "step": 76248 }, { "epoch": 0.13519755243670317, "grad_norm": 1.7421875, "learning_rate": 0.00038538910275022974, "loss": 0.2394, "step": 76250 }, { "epoch": 0.135201098602013, "grad_norm": 0.271484375, "learning_rate": 0.0003853529228109489, "loss": 0.1708, "step": 76252 }, { "epoch": 0.1352046447673228, "grad_norm": 0.53515625, "learning_rate": 0.0003853167459971193, "loss": 0.1721, "step": 76254 }, { "epoch": 0.13520819093263262, "grad_norm": 0.330078125, "learning_rate": 0.0003852805723088989, "loss": 0.1642, "step": 76256 }, { "epoch": 0.13521173709794243, "grad_norm": 0.4140625, "learning_rate": 0.00038524440174644625, "loss": 0.1439, "step": 76258 }, { "epoch": 0.13521528326325225, "grad_norm": 0.275390625, "learning_rate": 0.00038520823430991945, "loss": 0.1969, "step": 76260 }, { "epoch": 0.13521882942856206, "grad_norm": 0.51171875, "learning_rate": 0.00038517206999947673, "loss": 0.2237, "step": 76262 }, { "epoch": 0.13522237559387187, "grad_norm": 0.220703125, "learning_rate": 0.00038513590881527585, "loss": 0.1497, "step": 76264 }, { "epoch": 0.1352259217591817, "grad_norm": 0.259765625, "learning_rate": 0.00038509975075747536, "loss": 0.1833, "step": 76266 }, { "epoch": 0.1352294679244915, "grad_norm": 0.40625, "learning_rate": 0.0003850635958262335, "loss": 0.1325, "step": 76268 }, { "epoch": 0.13523301408980132, "grad_norm": 0.349609375, "learning_rate": 0.00038502744402170815, "loss": 0.153, "step": 76270 }, { "epoch": 0.13523656025511113, "grad_norm": 0.4375, "learning_rate": 0.0003849912953440573, "loss": 0.1739, "step": 76272 }, { "epoch": 0.13524010642042095, "grad_norm": 0.3203125, "learning_rate": 0.00038495514979343913, "loss": 0.136, "step": 76274 }, { "epoch": 0.13524365258573076, "grad_norm": 0.455078125, "learning_rate": 0.00038491900737001194, "loss": 0.218, "step": 76276 }, { "epoch": 0.13524719875104058, "grad_norm": 0.36328125, "learning_rate": 0.00038488286807393366, "loss": 0.1805, "step": 76278 }, { "epoch": 0.1352507449163504, "grad_norm": 1.046875, "learning_rate": 0.0003848467319053623, "loss": 0.558, "step": 76280 }, { "epoch": 0.1352542910816602, "grad_norm": 0.263671875, "learning_rate": 0.0003848105988644558, "loss": 0.1373, "step": 76282 }, { "epoch": 0.13525783724697002, "grad_norm": 0.326171875, "learning_rate": 0.0003847744689513725, "loss": 0.1702, "step": 76284 }, { "epoch": 0.13526138341227983, "grad_norm": 0.201171875, "learning_rate": 0.0003847383421662701, "loss": 0.1307, "step": 76286 }, { "epoch": 0.13526492957758965, "grad_norm": 0.337890625, "learning_rate": 0.00038470221850930684, "loss": 0.1768, "step": 76288 }, { "epoch": 0.13526847574289946, "grad_norm": 1.2734375, "learning_rate": 0.00038466609798064056, "loss": 0.2513, "step": 76290 }, { "epoch": 0.13527202190820928, "grad_norm": 0.91015625, "learning_rate": 0.0003846299805804291, "loss": 0.1815, "step": 76292 }, { "epoch": 0.1352755680735191, "grad_norm": 0.56640625, "learning_rate": 0.00038459386630883085, "loss": 0.1671, "step": 76294 }, { "epoch": 0.1352791142388289, "grad_norm": 0.35546875, "learning_rate": 0.00038455775516600324, "loss": 0.1688, "step": 76296 }, { "epoch": 0.13528266040413872, "grad_norm": 0.5625, "learning_rate": 0.00038452164715210445, "loss": 0.2795, "step": 76298 }, { "epoch": 0.13528620656944854, "grad_norm": 0.98046875, "learning_rate": 0.00038448554226729216, "loss": 0.3581, "step": 76300 }, { "epoch": 0.13528975273475835, "grad_norm": 1.7109375, "learning_rate": 0.00038444944051172506, "loss": 0.1939, "step": 76302 }, { "epoch": 0.13529329890006817, "grad_norm": 0.35546875, "learning_rate": 0.00038441334188555996, "loss": 0.167, "step": 76304 }, { "epoch": 0.13529684506537798, "grad_norm": 0.3203125, "learning_rate": 0.00038437724638895544, "loss": 0.1389, "step": 76306 }, { "epoch": 0.1353003912306878, "grad_norm": 0.5546875, "learning_rate": 0.00038434115402206925, "loss": 0.2508, "step": 76308 }, { "epoch": 0.1353039373959976, "grad_norm": 0.296875, "learning_rate": 0.000384305064785059, "loss": 0.1822, "step": 76310 }, { "epoch": 0.13530748356130742, "grad_norm": 0.51953125, "learning_rate": 0.00038426897867808286, "loss": 0.2024, "step": 76312 }, { "epoch": 0.13531102972661724, "grad_norm": 0.439453125, "learning_rate": 0.00038423289570129805, "loss": 0.1725, "step": 76314 }, { "epoch": 0.13531457589192705, "grad_norm": 0.1728515625, "learning_rate": 0.0003841968158548631, "loss": 0.2066, "step": 76316 }, { "epoch": 0.1353181220572369, "grad_norm": 1.140625, "learning_rate": 0.00038416073913893553, "loss": 0.2614, "step": 76318 }, { "epoch": 0.1353216682225467, "grad_norm": 0.265625, "learning_rate": 0.000384124665553673, "loss": 0.1688, "step": 76320 }, { "epoch": 0.13532521438785652, "grad_norm": 0.53125, "learning_rate": 0.0003840885950992334, "loss": 0.2149, "step": 76322 }, { "epoch": 0.13532876055316634, "grad_norm": 0.376953125, "learning_rate": 0.00038405252777577416, "loss": 0.1812, "step": 76324 }, { "epoch": 0.13533230671847615, "grad_norm": 0.35546875, "learning_rate": 0.0003840164635834539, "loss": 0.2478, "step": 76326 }, { "epoch": 0.13533585288378597, "grad_norm": 0.291015625, "learning_rate": 0.00038398040252242904, "loss": 0.1258, "step": 76328 }, { "epoch": 0.13533939904909578, "grad_norm": 0.7109375, "learning_rate": 0.0003839443445928584, "loss": 0.1449, "step": 76330 }, { "epoch": 0.1353429452144056, "grad_norm": 0.220703125, "learning_rate": 0.0003839082897948989, "loss": 0.1758, "step": 76332 }, { "epoch": 0.1353464913797154, "grad_norm": 0.59765625, "learning_rate": 0.0003838722381287091, "loss": 0.2367, "step": 76334 }, { "epoch": 0.13535003754502523, "grad_norm": 0.44140625, "learning_rate": 0.00038383618959444564, "loss": 0.1355, "step": 76336 }, { "epoch": 0.13535358371033504, "grad_norm": 0.50390625, "learning_rate": 0.00038380014419226707, "loss": 0.1571, "step": 76338 }, { "epoch": 0.13535712987564485, "grad_norm": 0.33203125, "learning_rate": 0.0003837641019223306, "loss": 0.1535, "step": 76340 }, { "epoch": 0.13536067604095467, "grad_norm": 0.298828125, "learning_rate": 0.00038372806278479376, "loss": 0.1512, "step": 76342 }, { "epoch": 0.13536422220626448, "grad_norm": 0.27734375, "learning_rate": 0.00038369202677981446, "loss": 0.1812, "step": 76344 }, { "epoch": 0.1353677683715743, "grad_norm": 0.78125, "learning_rate": 0.0003836559939075499, "loss": 0.1684, "step": 76346 }, { "epoch": 0.1353713145368841, "grad_norm": 0.3984375, "learning_rate": 0.00038361996416815816, "loss": 0.1829, "step": 76348 }, { "epoch": 0.13537486070219393, "grad_norm": 0.2197265625, "learning_rate": 0.00038358393756179627, "loss": 0.1396, "step": 76350 }, { "epoch": 0.13537840686750374, "grad_norm": 0.224609375, "learning_rate": 0.00038354791408862264, "loss": 0.1685, "step": 76352 }, { "epoch": 0.13538195303281356, "grad_norm": 0.30078125, "learning_rate": 0.0003835118937487938, "loss": 0.1671, "step": 76354 }, { "epoch": 0.13538549919812337, "grad_norm": 0.75, "learning_rate": 0.000383475876542468, "loss": 0.4365, "step": 76356 }, { "epoch": 0.13538904536343319, "grad_norm": 1.4296875, "learning_rate": 0.00038343986246980246, "loss": 0.3863, "step": 76358 }, { "epoch": 0.135392591528743, "grad_norm": 0.373046875, "learning_rate": 0.0003834038515309546, "loss": 0.3514, "step": 76360 }, { "epoch": 0.13539613769405281, "grad_norm": 0.287109375, "learning_rate": 0.00038336784372608224, "loss": 0.1241, "step": 76362 }, { "epoch": 0.13539968385936263, "grad_norm": 0.291015625, "learning_rate": 0.0003833318390553424, "loss": 0.2163, "step": 76364 }, { "epoch": 0.13540323002467244, "grad_norm": 0.72265625, "learning_rate": 0.000383295837518893, "loss": 0.1584, "step": 76366 }, { "epoch": 0.13540677618998226, "grad_norm": 0.85546875, "learning_rate": 0.0003832598391168913, "loss": 0.1296, "step": 76368 }, { "epoch": 0.13541032235529207, "grad_norm": 0.298828125, "learning_rate": 0.0003832238438494948, "loss": 0.1553, "step": 76370 }, { "epoch": 0.1354138685206019, "grad_norm": 0.2177734375, "learning_rate": 0.00038318785171686056, "loss": 0.2442, "step": 76372 }, { "epoch": 0.1354174146859117, "grad_norm": 0.1923828125, "learning_rate": 0.0003831518627191465, "loss": 0.1738, "step": 76374 }, { "epoch": 0.13542096085122152, "grad_norm": 0.61328125, "learning_rate": 0.0003831158768565098, "loss": 0.1632, "step": 76376 }, { "epoch": 0.13542450701653133, "grad_norm": 0.67578125, "learning_rate": 0.0003830798941291078, "loss": 0.1734, "step": 76378 }, { "epoch": 0.13542805318184115, "grad_norm": 0.298828125, "learning_rate": 0.000383043914537098, "loss": 0.3258, "step": 76380 }, { "epoch": 0.13543159934715096, "grad_norm": 0.384765625, "learning_rate": 0.0003830079380806374, "loss": 0.1567, "step": 76382 }, { "epoch": 0.13543514551246078, "grad_norm": 0.328125, "learning_rate": 0.00038297196475988403, "loss": 0.188, "step": 76384 }, { "epoch": 0.1354386916777706, "grad_norm": 0.32421875, "learning_rate": 0.0003829359945749944, "loss": 0.1433, "step": 76386 }, { "epoch": 0.1354422378430804, "grad_norm": 0.359375, "learning_rate": 0.0003829000275261264, "loss": 0.1756, "step": 76388 }, { "epoch": 0.13544578400839022, "grad_norm": 0.482421875, "learning_rate": 0.0003828640636134368, "loss": 0.175, "step": 76390 }, { "epoch": 0.13544933017370003, "grad_norm": 0.2333984375, "learning_rate": 0.0003828281028370838, "loss": 0.161, "step": 76392 }, { "epoch": 0.13545287633900985, "grad_norm": 0.279296875, "learning_rate": 0.0003827921451972236, "loss": 0.1968, "step": 76394 }, { "epoch": 0.13545642250431966, "grad_norm": 0.6171875, "learning_rate": 0.00038275619069401423, "loss": 0.2282, "step": 76396 }, { "epoch": 0.13545996866962948, "grad_norm": 0.45703125, "learning_rate": 0.0003827202393276127, "loss": 0.3068, "step": 76398 }, { "epoch": 0.1354635148349393, "grad_norm": 1.1171875, "learning_rate": 0.00038268429109817605, "loss": 0.2075, "step": 76400 }, { "epoch": 0.1354670610002491, "grad_norm": 0.296875, "learning_rate": 0.00038264834600586174, "loss": 0.1537, "step": 76402 }, { "epoch": 0.13547060716555892, "grad_norm": 0.828125, "learning_rate": 0.00038261240405082665, "loss": 0.2014, "step": 76404 }, { "epoch": 0.13547415333086874, "grad_norm": 0.419921875, "learning_rate": 0.00038257646523322833, "loss": 0.1865, "step": 76406 }, { "epoch": 0.13547769949617858, "grad_norm": 0.61328125, "learning_rate": 0.0003825405295532239, "loss": 0.2804, "step": 76408 }, { "epoch": 0.1354812456614884, "grad_norm": 0.2255859375, "learning_rate": 0.00038250459701097043, "loss": 0.1673, "step": 76410 }, { "epoch": 0.1354847918267982, "grad_norm": 1.078125, "learning_rate": 0.00038246866760662483, "loss": 0.1712, "step": 76412 }, { "epoch": 0.13548833799210802, "grad_norm": 0.455078125, "learning_rate": 0.0003824327413403448, "loss": 0.1127, "step": 76414 }, { "epoch": 0.13549188415741784, "grad_norm": 0.287109375, "learning_rate": 0.000382396818212287, "loss": 0.175, "step": 76416 }, { "epoch": 0.13549543032272765, "grad_norm": 1.34375, "learning_rate": 0.0003823608982226089, "loss": 0.2414, "step": 76418 }, { "epoch": 0.13549897648803746, "grad_norm": 0.2197265625, "learning_rate": 0.0003823249813714671, "loss": 0.1522, "step": 76420 }, { "epoch": 0.13550252265334728, "grad_norm": 0.58984375, "learning_rate": 0.00038228906765901886, "loss": 0.1698, "step": 76422 }, { "epoch": 0.1355060688186571, "grad_norm": 1.171875, "learning_rate": 0.00038225315708542197, "loss": 0.2224, "step": 76424 }, { "epoch": 0.1355096149839669, "grad_norm": 0.578125, "learning_rate": 0.00038221724965083214, "loss": 0.2059, "step": 76426 }, { "epoch": 0.13551316114927672, "grad_norm": 0.431640625, "learning_rate": 0.0003821813453554075, "loss": 0.4291, "step": 76428 }, { "epoch": 0.13551670731458654, "grad_norm": 0.431640625, "learning_rate": 0.0003821454441993044, "loss": 0.1745, "step": 76430 }, { "epoch": 0.13552025347989635, "grad_norm": 0.4375, "learning_rate": 0.0003821095461826804, "loss": 0.2263, "step": 76432 }, { "epoch": 0.13552379964520617, "grad_norm": 1.3046875, "learning_rate": 0.0003820736513056923, "loss": 0.2141, "step": 76434 }, { "epoch": 0.13552734581051598, "grad_norm": 0.19140625, "learning_rate": 0.00038203775956849686, "loss": 0.1504, "step": 76436 }, { "epoch": 0.1355308919758258, "grad_norm": 0.671875, "learning_rate": 0.0003820018709712513, "loss": 0.246, "step": 76438 }, { "epoch": 0.1355344381411356, "grad_norm": 2.578125, "learning_rate": 0.0003819659855141123, "loss": 0.1873, "step": 76440 }, { "epoch": 0.13553798430644542, "grad_norm": 0.30078125, "learning_rate": 0.00038193010319723754, "loss": 0.171, "step": 76442 }, { "epoch": 0.13554153047175524, "grad_norm": 0.2412109375, "learning_rate": 0.00038189422402078285, "loss": 0.2134, "step": 76444 }, { "epoch": 0.13554507663706505, "grad_norm": 0.65625, "learning_rate": 0.0003818583479849058, "loss": 0.1626, "step": 76446 }, { "epoch": 0.13554862280237487, "grad_norm": 0.44140625, "learning_rate": 0.0003818224750897632, "loss": 0.1913, "step": 76448 }, { "epoch": 0.13555216896768468, "grad_norm": 0.94140625, "learning_rate": 0.00038178660533551225, "loss": 0.3119, "step": 76450 }, { "epoch": 0.1355557151329945, "grad_norm": 0.63671875, "learning_rate": 0.0003817507387223091, "loss": 0.1145, "step": 76452 }, { "epoch": 0.1355592612983043, "grad_norm": 0.2353515625, "learning_rate": 0.0003817148752503111, "loss": 0.1664, "step": 76454 }, { "epoch": 0.13556280746361413, "grad_norm": 1.328125, "learning_rate": 0.00038167901491967514, "loss": 0.2837, "step": 76456 }, { "epoch": 0.13556635362892394, "grad_norm": 0.44921875, "learning_rate": 0.0003816431577305579, "loss": 0.1997, "step": 76458 }, { "epoch": 0.13556989979423376, "grad_norm": 0.34375, "learning_rate": 0.00038160730368311615, "loss": 0.1645, "step": 76460 }, { "epoch": 0.13557344595954357, "grad_norm": 0.71484375, "learning_rate": 0.0003815714527775067, "loss": 0.1816, "step": 76462 }, { "epoch": 0.13557699212485338, "grad_norm": 1.1015625, "learning_rate": 0.0003815356050138864, "loss": 0.2155, "step": 76464 }, { "epoch": 0.1355805382901632, "grad_norm": 2.28125, "learning_rate": 0.0003814997603924122, "loss": 0.2055, "step": 76466 }, { "epoch": 0.13558408445547301, "grad_norm": 0.30078125, "learning_rate": 0.00038146391891324066, "loss": 0.1914, "step": 76468 }, { "epoch": 0.13558763062078283, "grad_norm": 0.443359375, "learning_rate": 0.0003814280805765283, "loss": 0.2165, "step": 76470 }, { "epoch": 0.13559117678609264, "grad_norm": 0.474609375, "learning_rate": 0.0003813922453824324, "loss": 0.1719, "step": 76472 }, { "epoch": 0.13559472295140246, "grad_norm": 0.25390625, "learning_rate": 0.0003813564133311093, "loss": 0.3961, "step": 76474 }, { "epoch": 0.13559826911671227, "grad_norm": 0.26953125, "learning_rate": 0.000381320584422716, "loss": 0.1519, "step": 76476 }, { "epoch": 0.1356018152820221, "grad_norm": 0.326171875, "learning_rate": 0.0003812847586574088, "loss": 0.2027, "step": 76478 }, { "epoch": 0.1356053614473319, "grad_norm": 0.53515625, "learning_rate": 0.0003812489360353444, "loss": 0.1774, "step": 76480 }, { "epoch": 0.13560890761264172, "grad_norm": 0.609375, "learning_rate": 0.00038121311655668025, "loss": 0.1353, "step": 76482 }, { "epoch": 0.13561245377795153, "grad_norm": 1.1875, "learning_rate": 0.00038117730022157185, "loss": 0.2081, "step": 76484 }, { "epoch": 0.13561599994326134, "grad_norm": 0.279296875, "learning_rate": 0.0003811414870301766, "loss": 0.1978, "step": 76486 }, { "epoch": 0.13561954610857116, "grad_norm": 0.2373046875, "learning_rate": 0.0003811056769826506, "loss": 0.4411, "step": 76488 }, { "epoch": 0.13562309227388097, "grad_norm": 0.23046875, "learning_rate": 0.0003810698700791514, "loss": 0.1437, "step": 76490 }, { "epoch": 0.1356266384391908, "grad_norm": 0.291015625, "learning_rate": 0.0003810340663198346, "loss": 0.1412, "step": 76492 }, { "epoch": 0.1356301846045006, "grad_norm": 0.2177734375, "learning_rate": 0.0003809982657048572, "loss": 0.1865, "step": 76494 }, { "epoch": 0.13563373076981042, "grad_norm": 0.3125, "learning_rate": 0.00038096246823437585, "loss": 0.1691, "step": 76496 }, { "epoch": 0.13563727693512023, "grad_norm": 0.26171875, "learning_rate": 0.00038092667390854704, "loss": 0.1665, "step": 76498 }, { "epoch": 0.13564082310043007, "grad_norm": 0.419921875, "learning_rate": 0.0003808908827275272, "loss": 0.1509, "step": 76500 }, { "epoch": 0.1356443692657399, "grad_norm": 1.1953125, "learning_rate": 0.00038085509469147264, "loss": 0.2507, "step": 76502 }, { "epoch": 0.1356479154310497, "grad_norm": 0.400390625, "learning_rate": 0.0003808193098005405, "loss": 0.1246, "step": 76504 }, { "epoch": 0.13565146159635952, "grad_norm": 0.294921875, "learning_rate": 0.0003807835280548869, "loss": 0.1661, "step": 76506 }, { "epoch": 0.13565500776166933, "grad_norm": 0.65234375, "learning_rate": 0.0003807477494546684, "loss": 0.1871, "step": 76508 }, { "epoch": 0.13565855392697915, "grad_norm": 0.5390625, "learning_rate": 0.0003807119740000414, "loss": 0.25, "step": 76510 }, { "epoch": 0.13566210009228896, "grad_norm": 0.296875, "learning_rate": 0.0003806762016911623, "loss": 0.155, "step": 76512 }, { "epoch": 0.13566564625759878, "grad_norm": 0.234375, "learning_rate": 0.00038064043252818803, "loss": 0.1805, "step": 76514 }, { "epoch": 0.1356691924229086, "grad_norm": 0.70703125, "learning_rate": 0.0003806046665112742, "loss": 0.1838, "step": 76516 }, { "epoch": 0.1356727385882184, "grad_norm": 0.435546875, "learning_rate": 0.00038056890364057786, "loss": 0.1714, "step": 76518 }, { "epoch": 0.13567628475352822, "grad_norm": 0.390625, "learning_rate": 0.0003805331439162551, "loss": 0.1624, "step": 76520 }, { "epoch": 0.13567983091883803, "grad_norm": 2.3125, "learning_rate": 0.0003804973873384626, "loss": 0.2146, "step": 76522 }, { "epoch": 0.13568337708414785, "grad_norm": 0.42578125, "learning_rate": 0.00038046163390735674, "loss": 0.1799, "step": 76524 }, { "epoch": 0.13568692324945766, "grad_norm": 0.50390625, "learning_rate": 0.0003804258836230937, "loss": 0.1656, "step": 76526 }, { "epoch": 0.13569046941476748, "grad_norm": 0.427734375, "learning_rate": 0.0003803901364858298, "loss": 0.2268, "step": 76528 }, { "epoch": 0.1356940155800773, "grad_norm": 0.1923828125, "learning_rate": 0.0003803543924957211, "loss": 0.2399, "step": 76530 }, { "epoch": 0.1356975617453871, "grad_norm": 1.03125, "learning_rate": 0.0003803186516529249, "loss": 0.3979, "step": 76532 }, { "epoch": 0.13570110791069692, "grad_norm": 1.3046875, "learning_rate": 0.00038028291395759636, "loss": 0.2784, "step": 76534 }, { "epoch": 0.13570465407600674, "grad_norm": 0.32421875, "learning_rate": 0.0003802471794098925, "loss": 0.1932, "step": 76536 }, { "epoch": 0.13570820024131655, "grad_norm": 0.259765625, "learning_rate": 0.00038021144800996933, "loss": 0.1924, "step": 76538 }, { "epoch": 0.13571174640662637, "grad_norm": 0.404296875, "learning_rate": 0.0003801757197579835, "loss": 0.1422, "step": 76540 }, { "epoch": 0.13571529257193618, "grad_norm": 0.15234375, "learning_rate": 0.00038013999465409036, "loss": 0.1857, "step": 76542 }, { "epoch": 0.135718838737246, "grad_norm": 0.54296875, "learning_rate": 0.0003801042726984472, "loss": 0.1917, "step": 76544 }, { "epoch": 0.1357223849025558, "grad_norm": 0.65625, "learning_rate": 0.00038006855389120965, "loss": 0.175, "step": 76546 }, { "epoch": 0.13572593106786562, "grad_norm": 3.796875, "learning_rate": 0.0003800328382325342, "loss": 0.1938, "step": 76548 }, { "epoch": 0.13572947723317544, "grad_norm": 0.421875, "learning_rate": 0.00037999712572257675, "loss": 0.2146, "step": 76550 }, { "epoch": 0.13573302339848525, "grad_norm": 0.392578125, "learning_rate": 0.0003799614163614934, "loss": 0.137, "step": 76552 }, { "epoch": 0.13573656956379507, "grad_norm": 0.333984375, "learning_rate": 0.0003799257101494409, "loss": 0.155, "step": 76554 }, { "epoch": 0.13574011572910488, "grad_norm": 0.376953125, "learning_rate": 0.00037989000708657506, "loss": 0.16, "step": 76556 }, { "epoch": 0.1357436618944147, "grad_norm": 0.2890625, "learning_rate": 0.000379854307173052, "loss": 0.3527, "step": 76558 }, { "epoch": 0.1357472080597245, "grad_norm": 0.49609375, "learning_rate": 0.0003798186104090277, "loss": 0.2056, "step": 76560 }, { "epoch": 0.13575075422503433, "grad_norm": 0.2431640625, "learning_rate": 0.0003797829167946587, "loss": 0.218, "step": 76562 }, { "epoch": 0.13575430039034414, "grad_norm": 0.359375, "learning_rate": 0.00037974722633010087, "loss": 0.2013, "step": 76564 }, { "epoch": 0.13575784655565395, "grad_norm": 0.671875, "learning_rate": 0.0003797115390155103, "loss": 0.2002, "step": 76566 }, { "epoch": 0.13576139272096377, "grad_norm": 0.73828125, "learning_rate": 0.000379675854851043, "loss": 0.2088, "step": 76568 }, { "epoch": 0.13576493888627358, "grad_norm": 0.5, "learning_rate": 0.0003796401738368549, "loss": 0.1579, "step": 76570 }, { "epoch": 0.1357684850515834, "grad_norm": 0.1943359375, "learning_rate": 0.0003796044959731028, "loss": 0.1719, "step": 76572 }, { "epoch": 0.1357720312168932, "grad_norm": 0.515625, "learning_rate": 0.0003795688212599416, "loss": 0.1882, "step": 76574 }, { "epoch": 0.13577557738220303, "grad_norm": 0.369140625, "learning_rate": 0.0003795331496975282, "loss": 0.1468, "step": 76576 }, { "epoch": 0.13577912354751284, "grad_norm": 0.61328125, "learning_rate": 0.000379497481286018, "loss": 0.1916, "step": 76578 }, { "epoch": 0.13578266971282266, "grad_norm": 0.291015625, "learning_rate": 0.00037946181602556786, "loss": 0.1496, "step": 76580 }, { "epoch": 0.13578621587813247, "grad_norm": 0.5078125, "learning_rate": 0.00037942615391633273, "loss": 0.1814, "step": 76582 }, { "epoch": 0.13578976204344229, "grad_norm": 0.26953125, "learning_rate": 0.00037939049495846924, "loss": 0.1614, "step": 76584 }, { "epoch": 0.1357933082087521, "grad_norm": 0.31640625, "learning_rate": 0.000379354839152133, "loss": 0.2049, "step": 76586 }, { "epoch": 0.13579685437406191, "grad_norm": 0.232421875, "learning_rate": 0.0003793191864974803, "loss": 0.2153, "step": 76588 }, { "epoch": 0.13580040053937176, "grad_norm": 0.1923828125, "learning_rate": 0.00037928353699466673, "loss": 0.1753, "step": 76590 }, { "epoch": 0.13580394670468157, "grad_norm": 0.494140625, "learning_rate": 0.00037924789064384826, "loss": 0.2633, "step": 76592 }, { "epoch": 0.1358074928699914, "grad_norm": 5.53125, "learning_rate": 0.0003792122474451809, "loss": 0.2617, "step": 76594 }, { "epoch": 0.1358110390353012, "grad_norm": 0.4453125, "learning_rate": 0.00037917660739882053, "loss": 0.18, "step": 76596 }, { "epoch": 0.13581458520061102, "grad_norm": 1.1328125, "learning_rate": 0.0003791409705049231, "loss": 0.2789, "step": 76598 }, { "epoch": 0.13581813136592083, "grad_norm": 0.33203125, "learning_rate": 0.0003791053367636439, "loss": 0.1632, "step": 76600 }, { "epoch": 0.13582167753123064, "grad_norm": 0.28125, "learning_rate": 0.0003790697061751396, "loss": 0.1557, "step": 76602 }, { "epoch": 0.13582522369654046, "grad_norm": 0.353515625, "learning_rate": 0.00037903407873956564, "loss": 0.1675, "step": 76604 }, { "epoch": 0.13582876986185027, "grad_norm": 0.59375, "learning_rate": 0.00037899845445707784, "loss": 0.182, "step": 76606 }, { "epoch": 0.1358323160271601, "grad_norm": 0.4765625, "learning_rate": 0.0003789628333278318, "loss": 0.1691, "step": 76608 }, { "epoch": 0.1358358621924699, "grad_norm": 0.87890625, "learning_rate": 0.00037892721535198336, "loss": 0.3461, "step": 76610 }, { "epoch": 0.13583940835777972, "grad_norm": 0.80078125, "learning_rate": 0.0003788916005296888, "loss": 0.2046, "step": 76612 }, { "epoch": 0.13584295452308953, "grad_norm": 0.79296875, "learning_rate": 0.0003788559888611034, "loss": 0.2103, "step": 76614 }, { "epoch": 0.13584650068839935, "grad_norm": 0.3828125, "learning_rate": 0.000378820380346383, "loss": 0.1722, "step": 76616 }, { "epoch": 0.13585004685370916, "grad_norm": 0.765625, "learning_rate": 0.0003787847749856832, "loss": 0.1282, "step": 76618 }, { "epoch": 0.13585359301901898, "grad_norm": 0.294921875, "learning_rate": 0.0003787491727791601, "loss": 0.1695, "step": 76620 }, { "epoch": 0.1358571391843288, "grad_norm": 0.326171875, "learning_rate": 0.00037871357372696907, "loss": 0.185, "step": 76622 }, { "epoch": 0.1358606853496386, "grad_norm": 0.21875, "learning_rate": 0.0003786779778292659, "loss": 0.1576, "step": 76624 }, { "epoch": 0.13586423151494842, "grad_norm": 0.2041015625, "learning_rate": 0.0003786423850862063, "loss": 0.1466, "step": 76626 }, { "epoch": 0.13586777768025823, "grad_norm": 0.474609375, "learning_rate": 0.00037860679549794565, "loss": 0.1873, "step": 76628 }, { "epoch": 0.13587132384556805, "grad_norm": 0.75390625, "learning_rate": 0.00037857120906464035, "loss": 0.1738, "step": 76630 }, { "epoch": 0.13587487001087786, "grad_norm": 0.5, "learning_rate": 0.00037853562578644506, "loss": 0.1786, "step": 76632 }, { "epoch": 0.13587841617618768, "grad_norm": 0.28515625, "learning_rate": 0.00037850004566351603, "loss": 0.1358, "step": 76634 }, { "epoch": 0.1358819623414975, "grad_norm": 0.5390625, "learning_rate": 0.0003784644686960085, "loss": 0.1527, "step": 76636 }, { "epoch": 0.1358855085068073, "grad_norm": 0.416015625, "learning_rate": 0.0003784288948840787, "loss": 0.1666, "step": 76638 }, { "epoch": 0.13588905467211712, "grad_norm": 0.6015625, "learning_rate": 0.0003783933242278814, "loss": 0.2557, "step": 76640 }, { "epoch": 0.13589260083742694, "grad_norm": 0.486328125, "learning_rate": 0.0003783577567275727, "loss": 0.1737, "step": 76642 }, { "epoch": 0.13589614700273675, "grad_norm": 0.2353515625, "learning_rate": 0.000378322192383308, "loss": 0.1753, "step": 76644 }, { "epoch": 0.13589969316804656, "grad_norm": 1.265625, "learning_rate": 0.0003782866311952428, "loss": 0.201, "step": 76646 }, { "epoch": 0.13590323933335638, "grad_norm": 0.17578125, "learning_rate": 0.00037825107316353276, "loss": 0.1618, "step": 76648 }, { "epoch": 0.1359067854986662, "grad_norm": 1.828125, "learning_rate": 0.0003782155182883329, "loss": 0.2273, "step": 76650 }, { "epoch": 0.135910331663976, "grad_norm": 0.3203125, "learning_rate": 0.0003781799665697995, "loss": 0.1543, "step": 76652 }, { "epoch": 0.13591387782928582, "grad_norm": 1.71875, "learning_rate": 0.0003781444180080875, "loss": 0.1955, "step": 76654 }, { "epoch": 0.13591742399459564, "grad_norm": 1.8359375, "learning_rate": 0.0003781088726033527, "loss": 0.216, "step": 76656 }, { "epoch": 0.13592097015990545, "grad_norm": 0.33203125, "learning_rate": 0.00037807333035574985, "loss": 0.203, "step": 76658 }, { "epoch": 0.13592451632521527, "grad_norm": 0.64453125, "learning_rate": 0.00037803779126543535, "loss": 0.226, "step": 76660 }, { "epoch": 0.13592806249052508, "grad_norm": 0.232421875, "learning_rate": 0.0003780022553325642, "loss": 0.1393, "step": 76662 }, { "epoch": 0.1359316086558349, "grad_norm": 1.34375, "learning_rate": 0.00037796672255729176, "loss": 0.1774, "step": 76664 }, { "epoch": 0.1359351548211447, "grad_norm": 1.40625, "learning_rate": 0.0003779311929397734, "loss": 0.2455, "step": 76666 }, { "epoch": 0.13593870098645452, "grad_norm": 0.201171875, "learning_rate": 0.0003778956664801644, "loss": 0.1486, "step": 76668 }, { "epoch": 0.13594224715176434, "grad_norm": 0.455078125, "learning_rate": 0.0003778601431786207, "loss": 0.1744, "step": 76670 }, { "epoch": 0.13594579331707415, "grad_norm": 0.63671875, "learning_rate": 0.0003778246230352969, "loss": 0.1868, "step": 76672 }, { "epoch": 0.13594933948238397, "grad_norm": 0.376953125, "learning_rate": 0.0003777891060503489, "loss": 0.1806, "step": 76674 }, { "epoch": 0.13595288564769378, "grad_norm": 0.216796875, "learning_rate": 0.00037775359222393183, "loss": 0.1948, "step": 76676 }, { "epoch": 0.1359564318130036, "grad_norm": 2.765625, "learning_rate": 0.000377718081556201, "loss": 0.2507, "step": 76678 }, { "epoch": 0.13595997797831344, "grad_norm": 1.640625, "learning_rate": 0.00037768257404731175, "loss": 0.2876, "step": 76680 }, { "epoch": 0.13596352414362325, "grad_norm": 0.25390625, "learning_rate": 0.00037764706969741917, "loss": 0.1379, "step": 76682 }, { "epoch": 0.13596707030893307, "grad_norm": 0.169921875, "learning_rate": 0.0003776115685066789, "loss": 0.1668, "step": 76684 }, { "epoch": 0.13597061647424288, "grad_norm": 0.91015625, "learning_rate": 0.0003775760704752457, "loss": 0.1332, "step": 76686 }, { "epoch": 0.1359741626395527, "grad_norm": 0.349609375, "learning_rate": 0.0003775405756032758, "loss": 0.1813, "step": 76688 }, { "epoch": 0.1359777088048625, "grad_norm": 0.349609375, "learning_rate": 0.00037750508389092316, "loss": 0.2393, "step": 76690 }, { "epoch": 0.13598125497017233, "grad_norm": 0.6484375, "learning_rate": 0.0003774695953383438, "loss": 0.2008, "step": 76692 }, { "epoch": 0.13598480113548214, "grad_norm": 1.1015625, "learning_rate": 0.0003774341099456928, "loss": 0.1944, "step": 76694 }, { "epoch": 0.13598834730079196, "grad_norm": 0.25, "learning_rate": 0.0003773986277131253, "loss": 0.1456, "step": 76696 }, { "epoch": 0.13599189346610177, "grad_norm": 3.78125, "learning_rate": 0.0003773631486407965, "loss": 0.3194, "step": 76698 }, { "epoch": 0.13599543963141159, "grad_norm": 0.1875, "learning_rate": 0.0003773276727288613, "loss": 0.1487, "step": 76700 }, { "epoch": 0.1359989857967214, "grad_norm": 0.34375, "learning_rate": 0.0003772921999774752, "loss": 0.2035, "step": 76702 }, { "epoch": 0.13600253196203121, "grad_norm": 0.23046875, "learning_rate": 0.00037725673038679333, "loss": 0.1321, "step": 76704 }, { "epoch": 0.13600607812734103, "grad_norm": 0.859375, "learning_rate": 0.00037722126395697066, "loss": 0.2132, "step": 76706 }, { "epoch": 0.13600962429265084, "grad_norm": 0.3359375, "learning_rate": 0.0003771858006881621, "loss": 0.1844, "step": 76708 }, { "epoch": 0.13601317045796066, "grad_norm": 0.26171875, "learning_rate": 0.00037715034058052324, "loss": 0.146, "step": 76710 }, { "epoch": 0.13601671662327047, "grad_norm": 0.63671875, "learning_rate": 0.0003771148836342089, "loss": 0.1615, "step": 76712 }, { "epoch": 0.1360202627885803, "grad_norm": 0.6875, "learning_rate": 0.00037707942984937414, "loss": 0.1606, "step": 76714 }, { "epoch": 0.1360238089538901, "grad_norm": 2.890625, "learning_rate": 0.0003770439792261741, "loss": 0.1556, "step": 76716 }, { "epoch": 0.13602735511919992, "grad_norm": 1.265625, "learning_rate": 0.0003770085317647634, "loss": 0.1815, "step": 76718 }, { "epoch": 0.13603090128450973, "grad_norm": 0.359375, "learning_rate": 0.00037697308746529793, "loss": 0.1456, "step": 76720 }, { "epoch": 0.13603444744981955, "grad_norm": 0.423828125, "learning_rate": 0.0003769376463279318, "loss": 0.169, "step": 76722 }, { "epoch": 0.13603799361512936, "grad_norm": 0.640625, "learning_rate": 0.0003769022083528205, "loss": 0.1855, "step": 76724 }, { "epoch": 0.13604153978043917, "grad_norm": 0.328125, "learning_rate": 0.0003768667735401188, "loss": 0.1356, "step": 76726 }, { "epoch": 0.136045085945749, "grad_norm": 0.20703125, "learning_rate": 0.00037683134188998225, "loss": 0.171, "step": 76728 }, { "epoch": 0.1360486321110588, "grad_norm": 0.373046875, "learning_rate": 0.00037679591340256477, "loss": 0.1579, "step": 76730 }, { "epoch": 0.13605217827636862, "grad_norm": 0.25, "learning_rate": 0.0003767604880780221, "loss": 0.1555, "step": 76732 }, { "epoch": 0.13605572444167843, "grad_norm": 0.625, "learning_rate": 0.0003767250659165091, "loss": 0.4069, "step": 76734 }, { "epoch": 0.13605927060698825, "grad_norm": 0.302734375, "learning_rate": 0.0003766896469181805, "loss": 0.1796, "step": 76736 }, { "epoch": 0.13606281677229806, "grad_norm": 0.466796875, "learning_rate": 0.00037665423108319113, "loss": 0.1861, "step": 76738 }, { "epoch": 0.13606636293760788, "grad_norm": 0.267578125, "learning_rate": 0.00037661881841169584, "loss": 0.1633, "step": 76740 }, { "epoch": 0.1360699091029177, "grad_norm": 1.8359375, "learning_rate": 0.0003765834089038499, "loss": 0.2472, "step": 76742 }, { "epoch": 0.1360734552682275, "grad_norm": 0.435546875, "learning_rate": 0.0003765480025598078, "loss": 0.2073, "step": 76744 }, { "epoch": 0.13607700143353732, "grad_norm": 0.38671875, "learning_rate": 0.0003765125993797247, "loss": 0.2179, "step": 76746 }, { "epoch": 0.13608054759884713, "grad_norm": 0.41796875, "learning_rate": 0.0003764771993637548, "loss": 0.1601, "step": 76748 }, { "epoch": 0.13608409376415695, "grad_norm": 1.6171875, "learning_rate": 0.00037644180251205375, "loss": 0.2199, "step": 76750 }, { "epoch": 0.13608763992946676, "grad_norm": 0.66796875, "learning_rate": 0.0003764064088247758, "loss": 0.1514, "step": 76752 }, { "epoch": 0.13609118609477658, "grad_norm": 0.291015625, "learning_rate": 0.000376371018302076, "loss": 0.1669, "step": 76754 }, { "epoch": 0.1360947322600864, "grad_norm": 0.515625, "learning_rate": 0.00037633563094410913, "loss": 0.1678, "step": 76756 }, { "epoch": 0.1360982784253962, "grad_norm": 0.703125, "learning_rate": 0.00037630024675102953, "loss": 0.1591, "step": 76758 }, { "epoch": 0.13610182459070602, "grad_norm": 0.404296875, "learning_rate": 0.00037626486572299266, "loss": 0.1906, "step": 76760 }, { "epoch": 0.13610537075601584, "grad_norm": 1.4453125, "learning_rate": 0.00037622948786015256, "loss": 0.1769, "step": 76762 }, { "epoch": 0.13610891692132565, "grad_norm": 0.80859375, "learning_rate": 0.0003761941131626644, "loss": 0.2725, "step": 76764 }, { "epoch": 0.13611246308663547, "grad_norm": 0.26171875, "learning_rate": 0.0003761587416306825, "loss": 0.1555, "step": 76766 }, { "epoch": 0.13611600925194528, "grad_norm": 0.267578125, "learning_rate": 0.0003761233732643621, "loss": 0.1102, "step": 76768 }, { "epoch": 0.1361195554172551, "grad_norm": 0.2138671875, "learning_rate": 0.00037608800806385753, "loss": 0.1766, "step": 76770 }, { "epoch": 0.13612310158256494, "grad_norm": 0.6953125, "learning_rate": 0.00037605264602932366, "loss": 0.1553, "step": 76772 }, { "epoch": 0.13612664774787475, "grad_norm": 0.390625, "learning_rate": 0.0003760172871609148, "loss": 0.1929, "step": 76774 }, { "epoch": 0.13613019391318457, "grad_norm": 0.8046875, "learning_rate": 0.00037598193145878565, "loss": 0.2949, "step": 76776 }, { "epoch": 0.13613374007849438, "grad_norm": 0.71875, "learning_rate": 0.0003759465789230915, "loss": 0.4242, "step": 76778 }, { "epoch": 0.1361372862438042, "grad_norm": 1.0859375, "learning_rate": 0.00037591122955398583, "loss": 0.2117, "step": 76780 }, { "epoch": 0.136140832409114, "grad_norm": 0.54296875, "learning_rate": 0.0003758758833516241, "loss": 0.1709, "step": 76782 }, { "epoch": 0.13614437857442382, "grad_norm": 0.54296875, "learning_rate": 0.0003758405403161605, "loss": 0.1355, "step": 76784 }, { "epoch": 0.13614792473973364, "grad_norm": 0.34765625, "learning_rate": 0.0003758052004477502, "loss": 0.2039, "step": 76786 }, { "epoch": 0.13615147090504345, "grad_norm": 1.109375, "learning_rate": 0.00037576986374654675, "loss": 0.3451, "step": 76788 }, { "epoch": 0.13615501707035327, "grad_norm": 0.890625, "learning_rate": 0.0003757345302127054, "loss": 0.1668, "step": 76790 }, { "epoch": 0.13615856323566308, "grad_norm": 0.375, "learning_rate": 0.0003756991998463806, "loss": 0.1388, "step": 76792 }, { "epoch": 0.1361621094009729, "grad_norm": 0.4375, "learning_rate": 0.00037566387264772685, "loss": 0.1677, "step": 76794 }, { "epoch": 0.1361656555662827, "grad_norm": 0.19921875, "learning_rate": 0.0003756285486168984, "loss": 0.1442, "step": 76796 }, { "epoch": 0.13616920173159253, "grad_norm": 0.67578125, "learning_rate": 0.0003755932277540498, "loss": 0.1623, "step": 76798 }, { "epoch": 0.13617274789690234, "grad_norm": 0.40234375, "learning_rate": 0.00037555791005933594, "loss": 0.1855, "step": 76800 }, { "epoch": 0.13617629406221216, "grad_norm": 0.412109375, "learning_rate": 0.0003755225955329108, "loss": 0.1551, "step": 76802 }, { "epoch": 0.13617984022752197, "grad_norm": 0.4296875, "learning_rate": 0.00037548728417492917, "loss": 0.2359, "step": 76804 }, { "epoch": 0.13618338639283178, "grad_norm": 0.8203125, "learning_rate": 0.00037545197598554494, "loss": 0.1842, "step": 76806 }, { "epoch": 0.1361869325581416, "grad_norm": 0.5234375, "learning_rate": 0.0003754166709649133, "loss": 0.1548, "step": 76808 }, { "epoch": 0.1361904787234514, "grad_norm": 0.298828125, "learning_rate": 0.000375381369113188, "loss": 0.149, "step": 76810 }, { "epoch": 0.13619402488876123, "grad_norm": 0.181640625, "learning_rate": 0.00037534607043052386, "loss": 0.1173, "step": 76812 }, { "epoch": 0.13619757105407104, "grad_norm": 0.75, "learning_rate": 0.00037531077491707507, "loss": 0.169, "step": 76814 }, { "epoch": 0.13620111721938086, "grad_norm": 0.279296875, "learning_rate": 0.0003752754825729958, "loss": 0.1517, "step": 76816 }, { "epoch": 0.13620466338469067, "grad_norm": 0.19140625, "learning_rate": 0.00037524019339844097, "loss": 0.1424, "step": 76818 }, { "epoch": 0.1362082095500005, "grad_norm": 0.341796875, "learning_rate": 0.00037520490739356417, "loss": 0.168, "step": 76820 }, { "epoch": 0.1362117557153103, "grad_norm": 0.369140625, "learning_rate": 0.0003751696245585202, "loss": 0.1489, "step": 76822 }, { "epoch": 0.13621530188062012, "grad_norm": 0.27734375, "learning_rate": 0.0003751343448934631, "loss": 0.1717, "step": 76824 }, { "epoch": 0.13621884804592993, "grad_norm": 0.2099609375, "learning_rate": 0.0003750990683985478, "loss": 0.1452, "step": 76826 }, { "epoch": 0.13622239421123974, "grad_norm": 0.55859375, "learning_rate": 0.0003750637950739276, "loss": 0.1914, "step": 76828 }, { "epoch": 0.13622594037654956, "grad_norm": 0.4375, "learning_rate": 0.0003750285249197576, "loss": 0.1756, "step": 76830 }, { "epoch": 0.13622948654185937, "grad_norm": 0.357421875, "learning_rate": 0.00037499325793619156, "loss": 0.1625, "step": 76832 }, { "epoch": 0.1362330327071692, "grad_norm": 0.3828125, "learning_rate": 0.00037495799412338396, "loss": 0.1998, "step": 76834 }, { "epoch": 0.136236578872479, "grad_norm": 0.53125, "learning_rate": 0.00037492273348148895, "loss": 0.2117, "step": 76836 }, { "epoch": 0.13624012503778882, "grad_norm": 0.515625, "learning_rate": 0.00037488747601066043, "loss": 0.1649, "step": 76838 }, { "epoch": 0.13624367120309863, "grad_norm": 1.5078125, "learning_rate": 0.0003748522217110533, "loss": 0.337, "step": 76840 }, { "epoch": 0.13624721736840845, "grad_norm": 0.9609375, "learning_rate": 0.00037481697058282113, "loss": 0.2556, "step": 76842 }, { "epoch": 0.13625076353371826, "grad_norm": 0.63671875, "learning_rate": 0.0003747817226261184, "loss": 0.2093, "step": 76844 }, { "epoch": 0.13625430969902808, "grad_norm": 0.546875, "learning_rate": 0.00037474647784109896, "loss": 0.1659, "step": 76846 }, { "epoch": 0.1362578558643379, "grad_norm": 0.353515625, "learning_rate": 0.00037471123622791723, "loss": 0.1559, "step": 76848 }, { "epoch": 0.1362614020296477, "grad_norm": 0.59375, "learning_rate": 0.00037467599778672747, "loss": 0.2367, "step": 76850 }, { "epoch": 0.13626494819495752, "grad_norm": 0.58984375, "learning_rate": 0.0003746407625176833, "loss": 0.1746, "step": 76852 }, { "epoch": 0.13626849436026733, "grad_norm": 0.72265625, "learning_rate": 0.00037460553042093924, "loss": 0.1385, "step": 76854 }, { "epoch": 0.13627204052557715, "grad_norm": 0.306640625, "learning_rate": 0.00037457030149664917, "loss": 0.1847, "step": 76856 }, { "epoch": 0.13627558669088696, "grad_norm": 0.3359375, "learning_rate": 0.00037453507574496727, "loss": 0.1774, "step": 76858 }, { "epoch": 0.13627913285619678, "grad_norm": 0.384765625, "learning_rate": 0.0003744998531660475, "loss": 0.1631, "step": 76860 }, { "epoch": 0.13628267902150662, "grad_norm": 0.515625, "learning_rate": 0.00037446463376004404, "loss": 0.1766, "step": 76862 }, { "epoch": 0.13628622518681643, "grad_norm": 0.474609375, "learning_rate": 0.00037442941752711083, "loss": 0.1732, "step": 76864 }, { "epoch": 0.13628977135212625, "grad_norm": 0.408203125, "learning_rate": 0.0003743942044674016, "loss": 0.1684, "step": 76866 }, { "epoch": 0.13629331751743606, "grad_norm": 0.2041015625, "learning_rate": 0.0003743589945810712, "loss": 0.1617, "step": 76868 }, { "epoch": 0.13629686368274588, "grad_norm": 0.162109375, "learning_rate": 0.0003743237878682725, "loss": 0.1577, "step": 76870 }, { "epoch": 0.1363004098480557, "grad_norm": 0.490234375, "learning_rate": 0.00037428858432916027, "loss": 0.3215, "step": 76872 }, { "epoch": 0.1363039560133655, "grad_norm": 0.474609375, "learning_rate": 0.0003742533839638881, "loss": 0.167, "step": 76874 }, { "epoch": 0.13630750217867532, "grad_norm": 0.2421875, "learning_rate": 0.00037421818677261045, "loss": 0.3856, "step": 76876 }, { "epoch": 0.13631104834398514, "grad_norm": 0.546875, "learning_rate": 0.0003741829927554804, "loss": 0.1657, "step": 76878 }, { "epoch": 0.13631459450929495, "grad_norm": 0.31640625, "learning_rate": 0.0003741478019126526, "loss": 0.1643, "step": 76880 }, { "epoch": 0.13631814067460477, "grad_norm": 1.796875, "learning_rate": 0.0003741126142442806, "loss": 0.2475, "step": 76882 }, { "epoch": 0.13632168683991458, "grad_norm": 0.462890625, "learning_rate": 0.0003740774297505184, "loss": 0.1999, "step": 76884 }, { "epoch": 0.1363252330052244, "grad_norm": 0.48828125, "learning_rate": 0.00037404224843152003, "loss": 0.412, "step": 76886 }, { "epoch": 0.1363287791705342, "grad_norm": 0.4453125, "learning_rate": 0.0003740070702874387, "loss": 0.1735, "step": 76888 }, { "epoch": 0.13633232533584402, "grad_norm": 0.40625, "learning_rate": 0.0003739718953184291, "loss": 0.1258, "step": 76890 }, { "epoch": 0.13633587150115384, "grad_norm": 0.71484375, "learning_rate": 0.0003739367235246447, "loss": 0.2268, "step": 76892 }, { "epoch": 0.13633941766646365, "grad_norm": 0.466796875, "learning_rate": 0.00037390155490623934, "loss": 0.2057, "step": 76894 }, { "epoch": 0.13634296383177347, "grad_norm": 0.51953125, "learning_rate": 0.0003738663894633665, "loss": 0.1669, "step": 76896 }, { "epoch": 0.13634650999708328, "grad_norm": 0.53515625, "learning_rate": 0.0003738312271961806, "loss": 0.1737, "step": 76898 }, { "epoch": 0.1363500561623931, "grad_norm": 0.1943359375, "learning_rate": 0.00037379606810483506, "loss": 0.1924, "step": 76900 }, { "epoch": 0.1363536023277029, "grad_norm": 0.2421875, "learning_rate": 0.0003737609121894837, "loss": 0.2586, "step": 76902 }, { "epoch": 0.13635714849301273, "grad_norm": 0.189453125, "learning_rate": 0.0003737257594502803, "loss": 0.196, "step": 76904 }, { "epoch": 0.13636069465832254, "grad_norm": 0.41015625, "learning_rate": 0.00037369060988737827, "loss": 0.1666, "step": 76906 }, { "epoch": 0.13636424082363235, "grad_norm": 3.921875, "learning_rate": 0.00037365546350093216, "loss": 0.1678, "step": 76908 }, { "epoch": 0.13636778698894217, "grad_norm": 0.193359375, "learning_rate": 0.0003736203202910945, "loss": 0.1661, "step": 76910 }, { "epoch": 0.13637133315425198, "grad_norm": 0.35546875, "learning_rate": 0.00037358518025802, "loss": 0.1922, "step": 76912 }, { "epoch": 0.1363748793195618, "grad_norm": 0.291015625, "learning_rate": 0.0003735500434018617, "loss": 0.214, "step": 76914 }, { "epoch": 0.1363784254848716, "grad_norm": 0.392578125, "learning_rate": 0.0003735149097227741, "loss": 0.256, "step": 76916 }, { "epoch": 0.13638197165018143, "grad_norm": 0.6953125, "learning_rate": 0.0003734797792209097, "loss": 0.2043, "step": 76918 }, { "epoch": 0.13638551781549124, "grad_norm": 1.0625, "learning_rate": 0.0003734446518964231, "loss": 0.2483, "step": 76920 }, { "epoch": 0.13638906398080106, "grad_norm": 0.244140625, "learning_rate": 0.0003734095277494674, "loss": 0.1743, "step": 76922 }, { "epoch": 0.13639261014611087, "grad_norm": 0.50390625, "learning_rate": 0.00037337440678019643, "loss": 0.2286, "step": 76924 }, { "epoch": 0.13639615631142069, "grad_norm": 0.27734375, "learning_rate": 0.00037333928898876373, "loss": 0.1392, "step": 76926 }, { "epoch": 0.1363997024767305, "grad_norm": 1.0859375, "learning_rate": 0.00037330417437532264, "loss": 0.1578, "step": 76928 }, { "epoch": 0.13640324864204031, "grad_norm": 1.0078125, "learning_rate": 0.0003732690629400273, "loss": 0.1616, "step": 76930 }, { "epoch": 0.13640679480735013, "grad_norm": 2.296875, "learning_rate": 0.00037323395468303087, "loss": 0.4198, "step": 76932 }, { "epoch": 0.13641034097265994, "grad_norm": 0.5703125, "learning_rate": 0.0003731988496044869, "loss": 0.163, "step": 76934 }, { "epoch": 0.13641388713796976, "grad_norm": 0.44921875, "learning_rate": 0.00037316374770454886, "loss": 0.1634, "step": 76936 }, { "epoch": 0.13641743330327957, "grad_norm": 0.271484375, "learning_rate": 0.0003731286489833706, "loss": 0.1184, "step": 76938 }, { "epoch": 0.1364209794685894, "grad_norm": 0.796875, "learning_rate": 0.00037309355344110535, "loss": 0.1559, "step": 76940 }, { "epoch": 0.1364245256338992, "grad_norm": 0.53125, "learning_rate": 0.00037305846107790676, "loss": 0.1269, "step": 76942 }, { "epoch": 0.13642807179920902, "grad_norm": 0.53515625, "learning_rate": 0.0003730233718939281, "loss": 0.189, "step": 76944 }, { "epoch": 0.13643161796451883, "grad_norm": 0.1904296875, "learning_rate": 0.0003729882858893228, "loss": 0.1557, "step": 76946 }, { "epoch": 0.13643516412982865, "grad_norm": 0.248046875, "learning_rate": 0.00037295320306424465, "loss": 0.156, "step": 76948 }, { "epoch": 0.13643871029513846, "grad_norm": 0.53125, "learning_rate": 0.0003729181234188468, "loss": 0.1828, "step": 76950 }, { "epoch": 0.1364422564604483, "grad_norm": 0.2490234375, "learning_rate": 0.0003728830469532828, "loss": 0.1851, "step": 76952 }, { "epoch": 0.13644580262575812, "grad_norm": 0.421875, "learning_rate": 0.00037284797366770575, "loss": 0.1793, "step": 76954 }, { "epoch": 0.13644934879106793, "grad_norm": 0.275390625, "learning_rate": 0.0003728129035622695, "loss": 0.1415, "step": 76956 }, { "epoch": 0.13645289495637775, "grad_norm": 0.2099609375, "learning_rate": 0.00037277783663712717, "loss": 0.1515, "step": 76958 }, { "epoch": 0.13645644112168756, "grad_norm": 0.1689453125, "learning_rate": 0.00037274277289243225, "loss": 0.1705, "step": 76960 }, { "epoch": 0.13645998728699738, "grad_norm": 0.9453125, "learning_rate": 0.00037270771232833787, "loss": 0.2847, "step": 76962 }, { "epoch": 0.1364635334523072, "grad_norm": 0.59375, "learning_rate": 0.0003726726549449974, "loss": 0.1978, "step": 76964 }, { "epoch": 0.136467079617617, "grad_norm": 0.328125, "learning_rate": 0.0003726376007425647, "loss": 0.1466, "step": 76966 }, { "epoch": 0.13647062578292682, "grad_norm": 0.48046875, "learning_rate": 0.0003726025497211921, "loss": 0.2451, "step": 76968 }, { "epoch": 0.13647417194823663, "grad_norm": 0.3515625, "learning_rate": 0.0003725675018810337, "loss": 0.1268, "step": 76970 }, { "epoch": 0.13647771811354645, "grad_norm": 0.25390625, "learning_rate": 0.0003725324572222423, "loss": 0.1902, "step": 76972 }, { "epoch": 0.13648126427885626, "grad_norm": 1.109375, "learning_rate": 0.0003724974157449719, "loss": 0.1848, "step": 76974 }, { "epoch": 0.13648481044416608, "grad_norm": 0.25390625, "learning_rate": 0.0003724623774493745, "loss": 0.1516, "step": 76976 }, { "epoch": 0.1364883566094759, "grad_norm": 0.29296875, "learning_rate": 0.0003724273423356045, "loss": 0.1775, "step": 76978 }, { "epoch": 0.1364919027747857, "grad_norm": 0.287109375, "learning_rate": 0.00037239231040381467, "loss": 0.1257, "step": 76980 }, { "epoch": 0.13649544894009552, "grad_norm": 0.609375, "learning_rate": 0.00037235728165415814, "loss": 0.1887, "step": 76982 }, { "epoch": 0.13649899510540534, "grad_norm": 1.6875, "learning_rate": 0.00037232225608678824, "loss": 0.2416, "step": 76984 }, { "epoch": 0.13650254127071515, "grad_norm": 0.392578125, "learning_rate": 0.0003722872337018579, "loss": 0.213, "step": 76986 }, { "epoch": 0.13650608743602496, "grad_norm": 0.177734375, "learning_rate": 0.00037225221449952063, "loss": 0.2074, "step": 76988 }, { "epoch": 0.13650963360133478, "grad_norm": 0.71875, "learning_rate": 0.00037221719847992953, "loss": 0.1702, "step": 76990 }, { "epoch": 0.1365131797666446, "grad_norm": 1.0390625, "learning_rate": 0.0003721821856432377, "loss": 0.3947, "step": 76992 }, { "epoch": 0.1365167259319544, "grad_norm": 0.2890625, "learning_rate": 0.0003721471759895979, "loss": 0.1664, "step": 76994 }, { "epoch": 0.13652027209726422, "grad_norm": 0.94140625, "learning_rate": 0.00037211216951916386, "loss": 0.6435, "step": 76996 }, { "epoch": 0.13652381826257404, "grad_norm": 0.65234375, "learning_rate": 0.0003720771662320883, "loss": 0.2411, "step": 76998 }, { "epoch": 0.13652736442788385, "grad_norm": 0.41015625, "learning_rate": 0.0003720421661285245, "loss": 0.291, "step": 77000 }, { "epoch": 0.13653091059319367, "grad_norm": 0.59765625, "learning_rate": 0.00037200716920862545, "loss": 0.1779, "step": 77002 }, { "epoch": 0.13653445675850348, "grad_norm": 0.74609375, "learning_rate": 0.00037197217547254376, "loss": 0.1634, "step": 77004 }, { "epoch": 0.1365380029238133, "grad_norm": 1.1796875, "learning_rate": 0.0003719371849204334, "loss": 0.2154, "step": 77006 }, { "epoch": 0.1365415490891231, "grad_norm": 0.70703125, "learning_rate": 0.0003719021975524465, "loss": 0.1842, "step": 77008 }, { "epoch": 0.13654509525443292, "grad_norm": 0.494140625, "learning_rate": 0.00037186721336873664, "loss": 0.1846, "step": 77010 }, { "epoch": 0.13654864141974274, "grad_norm": 0.171875, "learning_rate": 0.0003718322323694564, "loss": 0.154, "step": 77012 }, { "epoch": 0.13655218758505255, "grad_norm": 0.3828125, "learning_rate": 0.0003717972545547594, "loss": 0.1451, "step": 77014 }, { "epoch": 0.13655573375036237, "grad_norm": 0.6640625, "learning_rate": 0.00037176227992479795, "loss": 0.1967, "step": 77016 }, { "epoch": 0.13655927991567218, "grad_norm": 0.3984375, "learning_rate": 0.0003717273084797254, "loss": 0.183, "step": 77018 }, { "epoch": 0.136562826080982, "grad_norm": 0.30078125, "learning_rate": 0.00037169234021969456, "loss": 0.1586, "step": 77020 }, { "epoch": 0.1365663722462918, "grad_norm": 0.43359375, "learning_rate": 0.0003716573751448582, "loss": 0.1672, "step": 77022 }, { "epoch": 0.13656991841160163, "grad_norm": 0.3984375, "learning_rate": 0.00037162241325536976, "loss": 0.1358, "step": 77024 }, { "epoch": 0.13657346457691144, "grad_norm": 1.8125, "learning_rate": 0.00037158745455138144, "loss": 0.2642, "step": 77026 }, { "epoch": 0.13657701074222126, "grad_norm": 0.251953125, "learning_rate": 0.00037155249903304664, "loss": 0.168, "step": 77028 }, { "epoch": 0.13658055690753107, "grad_norm": 1.0390625, "learning_rate": 0.0003715175467005181, "loss": 0.1877, "step": 77030 }, { "epoch": 0.13658410307284088, "grad_norm": 0.66015625, "learning_rate": 0.0003714825975539486, "loss": 0.169, "step": 77032 }, { "epoch": 0.1365876492381507, "grad_norm": 0.3125, "learning_rate": 0.00037144765159349114, "loss": 0.1488, "step": 77034 }, { "epoch": 0.1365911954034605, "grad_norm": 1.1640625, "learning_rate": 0.00037141270881929823, "loss": 0.1908, "step": 77036 }, { "epoch": 0.13659474156877033, "grad_norm": 1.0, "learning_rate": 0.0003713777692315231, "loss": 0.3013, "step": 77038 }, { "epoch": 0.13659828773408014, "grad_norm": 0.47265625, "learning_rate": 0.0003713428328303184, "loss": 0.168, "step": 77040 }, { "epoch": 0.13660183389938996, "grad_norm": 0.3203125, "learning_rate": 0.0003713078996158369, "loss": 0.2365, "step": 77042 }, { "epoch": 0.1366053800646998, "grad_norm": 0.60546875, "learning_rate": 0.00037127296958823123, "loss": 0.2177, "step": 77044 }, { "epoch": 0.13660892623000961, "grad_norm": 0.5390625, "learning_rate": 0.0003712380427476544, "loss": 0.1634, "step": 77046 }, { "epoch": 0.13661247239531943, "grad_norm": 0.7109375, "learning_rate": 0.000371203119094259, "loss": 0.2038, "step": 77048 }, { "epoch": 0.13661601856062924, "grad_norm": 2.265625, "learning_rate": 0.00037116819862819783, "loss": 0.3163, "step": 77050 }, { "epoch": 0.13661956472593906, "grad_norm": 0.2392578125, "learning_rate": 0.0003711332813496238, "loss": 0.13, "step": 77052 }, { "epoch": 0.13662311089124887, "grad_norm": 0.6875, "learning_rate": 0.00037109836725868894, "loss": 0.1188, "step": 77054 }, { "epoch": 0.1366266570565587, "grad_norm": 0.484375, "learning_rate": 0.0003710634563555471, "loss": 0.2002, "step": 77056 }, { "epoch": 0.1366302032218685, "grad_norm": 0.373046875, "learning_rate": 0.00037102854864034976, "loss": 0.2229, "step": 77058 }, { "epoch": 0.13663374938717832, "grad_norm": 0.232421875, "learning_rate": 0.0003709936441132503, "loss": 0.1757, "step": 77060 }, { "epoch": 0.13663729555248813, "grad_norm": 0.296875, "learning_rate": 0.0003709587427744009, "loss": 0.1937, "step": 77062 }, { "epoch": 0.13664084171779795, "grad_norm": 0.52734375, "learning_rate": 0.00037092384462395503, "loss": 0.2176, "step": 77064 }, { "epoch": 0.13664438788310776, "grad_norm": 0.298828125, "learning_rate": 0.0003708889496620642, "loss": 0.184, "step": 77066 }, { "epoch": 0.13664793404841757, "grad_norm": 0.373046875, "learning_rate": 0.00037085405788888205, "loss": 0.1792, "step": 77068 }, { "epoch": 0.1366514802137274, "grad_norm": 0.384765625, "learning_rate": 0.0003708191693045605, "loss": 0.1573, "step": 77070 }, { "epoch": 0.1366550263790372, "grad_norm": 0.3671875, "learning_rate": 0.0003707842839092523, "loss": 0.1258, "step": 77072 }, { "epoch": 0.13665857254434702, "grad_norm": 0.7109375, "learning_rate": 0.0003707494017031103, "loss": 0.2437, "step": 77074 }, { "epoch": 0.13666211870965683, "grad_norm": 1.0078125, "learning_rate": 0.00037071452268628647, "loss": 0.1745, "step": 77076 }, { "epoch": 0.13666566487496665, "grad_norm": 0.33203125, "learning_rate": 0.00037067964685893383, "loss": 0.2667, "step": 77078 }, { "epoch": 0.13666921104027646, "grad_norm": 0.1708984375, "learning_rate": 0.0003706447742212049, "loss": 0.2591, "step": 77080 }, { "epoch": 0.13667275720558628, "grad_norm": 0.357421875, "learning_rate": 0.0003706099047732521, "loss": 0.1687, "step": 77082 }, { "epoch": 0.1366763033708961, "grad_norm": 0.275390625, "learning_rate": 0.0003705750385152275, "loss": 0.2159, "step": 77084 }, { "epoch": 0.1366798495362059, "grad_norm": 0.54296875, "learning_rate": 0.00037054017544728435, "loss": 0.3252, "step": 77086 }, { "epoch": 0.13668339570151572, "grad_norm": 0.345703125, "learning_rate": 0.0003705053155695747, "loss": 0.1681, "step": 77088 }, { "epoch": 0.13668694186682553, "grad_norm": 2.875, "learning_rate": 0.00037047045888225107, "loss": 0.4698, "step": 77090 }, { "epoch": 0.13669048803213535, "grad_norm": 0.73828125, "learning_rate": 0.00037043560538546584, "loss": 0.1953, "step": 77092 }, { "epoch": 0.13669403419744516, "grad_norm": 0.5859375, "learning_rate": 0.0003704007550793714, "loss": 0.143, "step": 77094 }, { "epoch": 0.13669758036275498, "grad_norm": 1.4453125, "learning_rate": 0.0003703659079641206, "loss": 0.2521, "step": 77096 }, { "epoch": 0.1367011265280648, "grad_norm": 0.58203125, "learning_rate": 0.000370331064039865, "loss": 0.2046, "step": 77098 }, { "epoch": 0.1367046726933746, "grad_norm": 0.18359375, "learning_rate": 0.00037029622330675784, "loss": 0.1412, "step": 77100 }, { "epoch": 0.13670821885868442, "grad_norm": 0.279296875, "learning_rate": 0.00037026138576495084, "loss": 0.2367, "step": 77102 }, { "epoch": 0.13671176502399424, "grad_norm": 0.275390625, "learning_rate": 0.0003702265514145969, "loss": 0.1763, "step": 77104 }, { "epoch": 0.13671531118930405, "grad_norm": 0.322265625, "learning_rate": 0.00037019172025584804, "loss": 0.2061, "step": 77106 }, { "epoch": 0.13671885735461387, "grad_norm": 0.337890625, "learning_rate": 0.00037015689228885675, "loss": 0.2083, "step": 77108 }, { "epoch": 0.13672240351992368, "grad_norm": 0.294921875, "learning_rate": 0.0003701220675137753, "loss": 0.1625, "step": 77110 }, { "epoch": 0.1367259496852335, "grad_norm": 0.494140625, "learning_rate": 0.00037008724593075564, "loss": 0.1483, "step": 77112 }, { "epoch": 0.1367294958505433, "grad_norm": 0.8984375, "learning_rate": 0.0003700524275399509, "loss": 0.1996, "step": 77114 }, { "epoch": 0.13673304201585312, "grad_norm": 1.703125, "learning_rate": 0.00037001761234151245, "loss": 0.2391, "step": 77116 }, { "epoch": 0.13673658818116294, "grad_norm": 0.55859375, "learning_rate": 0.0003699828003355931, "loss": 0.1529, "step": 77118 }, { "epoch": 0.13674013434647275, "grad_norm": 0.6484375, "learning_rate": 0.0003699479915223447, "loss": 0.1971, "step": 77120 }, { "epoch": 0.13674368051178257, "grad_norm": 1.4453125, "learning_rate": 0.00036991318590192014, "loss": 0.2115, "step": 77122 }, { "epoch": 0.13674722667709238, "grad_norm": 1.1015625, "learning_rate": 0.0003698783834744708, "loss": 0.2268, "step": 77124 }, { "epoch": 0.1367507728424022, "grad_norm": 0.43359375, "learning_rate": 0.00036984358424014955, "loss": 0.2193, "step": 77126 }, { "epoch": 0.136754319007712, "grad_norm": 0.439453125, "learning_rate": 0.00036980878819910837, "loss": 0.1428, "step": 77128 }, { "epoch": 0.13675786517302183, "grad_norm": 0.455078125, "learning_rate": 0.00036977399535149935, "loss": 0.1672, "step": 77130 }, { "epoch": 0.13676141133833164, "grad_norm": 1.5390625, "learning_rate": 0.0003697392056974747, "loss": 0.1931, "step": 77132 }, { "epoch": 0.13676495750364148, "grad_norm": 0.34375, "learning_rate": 0.00036970441923718636, "loss": 0.2051, "step": 77134 }, { "epoch": 0.1367685036689513, "grad_norm": 1.9765625, "learning_rate": 0.00036966963597078687, "loss": 0.2328, "step": 77136 }, { "epoch": 0.1367720498342611, "grad_norm": 0.376953125, "learning_rate": 0.0003696348558984283, "loss": 0.1518, "step": 77138 }, { "epoch": 0.13677559599957093, "grad_norm": 0.484375, "learning_rate": 0.0003696000790202625, "loss": 0.2468, "step": 77140 }, { "epoch": 0.13677914216488074, "grad_norm": 0.373046875, "learning_rate": 0.0003695653053364415, "loss": 0.1589, "step": 77142 }, { "epoch": 0.13678268833019055, "grad_norm": 0.431640625, "learning_rate": 0.00036953053484711784, "loss": 0.1681, "step": 77144 }, { "epoch": 0.13678623449550037, "grad_norm": 0.27734375, "learning_rate": 0.00036949576755244317, "loss": 0.2011, "step": 77146 }, { "epoch": 0.13678978066081018, "grad_norm": 0.2021484375, "learning_rate": 0.00036946100345256985, "loss": 0.1792, "step": 77148 }, { "epoch": 0.13679332682612, "grad_norm": 0.41015625, "learning_rate": 0.0003694262425476497, "loss": 0.1524, "step": 77150 }, { "epoch": 0.1367968729914298, "grad_norm": 0.23046875, "learning_rate": 0.0003693914848378345, "loss": 0.1484, "step": 77152 }, { "epoch": 0.13680041915673963, "grad_norm": 0.83203125, "learning_rate": 0.00036935673032327716, "loss": 0.196, "step": 77154 }, { "epoch": 0.13680396532204944, "grad_norm": 0.2138671875, "learning_rate": 0.00036932197900412845, "loss": 0.2004, "step": 77156 }, { "epoch": 0.13680751148735926, "grad_norm": 0.361328125, "learning_rate": 0.00036928723088054135, "loss": 0.1767, "step": 77158 }, { "epoch": 0.13681105765266907, "grad_norm": 0.2158203125, "learning_rate": 0.00036925248595266714, "loss": 0.156, "step": 77160 }, { "epoch": 0.13681460381797889, "grad_norm": 0.36328125, "learning_rate": 0.00036921774422065843, "loss": 0.1684, "step": 77162 }, { "epoch": 0.1368181499832887, "grad_norm": 0.337890625, "learning_rate": 0.0003691830056846666, "loss": 0.1623, "step": 77164 }, { "epoch": 0.13682169614859852, "grad_norm": 0.578125, "learning_rate": 0.0003691482703448439, "loss": 0.1994, "step": 77166 }, { "epoch": 0.13682524231390833, "grad_norm": 0.51171875, "learning_rate": 0.0003691135382013422, "loss": 0.2146, "step": 77168 }, { "epoch": 0.13682878847921814, "grad_norm": 0.78125, "learning_rate": 0.00036907880925431325, "loss": 0.1821, "step": 77170 }, { "epoch": 0.13683233464452796, "grad_norm": 0.490234375, "learning_rate": 0.00036904408350390904, "loss": 0.1802, "step": 77172 }, { "epoch": 0.13683588080983777, "grad_norm": 0.6171875, "learning_rate": 0.00036900936095028117, "loss": 0.1723, "step": 77174 }, { "epoch": 0.1368394269751476, "grad_norm": 2.3125, "learning_rate": 0.0003689746415935821, "loss": 0.1815, "step": 77176 }, { "epoch": 0.1368429731404574, "grad_norm": 0.318359375, "learning_rate": 0.0003689399254339633, "loss": 0.2316, "step": 77178 }, { "epoch": 0.13684651930576722, "grad_norm": 0.53515625, "learning_rate": 0.0003689052124715765, "loss": 0.1727, "step": 77180 }, { "epoch": 0.13685006547107703, "grad_norm": 0.451171875, "learning_rate": 0.0003688705027065734, "loss": 0.2099, "step": 77182 }, { "epoch": 0.13685361163638685, "grad_norm": 0.23046875, "learning_rate": 0.00036883579613910633, "loss": 0.168, "step": 77184 }, { "epoch": 0.13685715780169666, "grad_norm": 0.322265625, "learning_rate": 0.0003688010927693267, "loss": 0.1743, "step": 77186 }, { "epoch": 0.13686070396700648, "grad_norm": 0.953125, "learning_rate": 0.00036876639259738643, "loss": 0.1676, "step": 77188 }, { "epoch": 0.1368642501323163, "grad_norm": 0.427734375, "learning_rate": 0.0003687316956234371, "loss": 0.2366, "step": 77190 }, { "epoch": 0.1368677962976261, "grad_norm": 0.28125, "learning_rate": 0.0003686970018476306, "loss": 0.1829, "step": 77192 }, { "epoch": 0.13687134246293592, "grad_norm": 0.470703125, "learning_rate": 0.00036866231127011853, "loss": 0.1657, "step": 77194 }, { "epoch": 0.13687488862824573, "grad_norm": 0.6640625, "learning_rate": 0.00036862762389105287, "loss": 0.1902, "step": 77196 }, { "epoch": 0.13687843479355555, "grad_norm": 0.279296875, "learning_rate": 0.00036859293971058517, "loss": 0.1784, "step": 77198 }, { "epoch": 0.13688198095886536, "grad_norm": 0.392578125, "learning_rate": 0.00036855825872886684, "loss": 0.1955, "step": 77200 }, { "epoch": 0.13688552712417518, "grad_norm": 0.373046875, "learning_rate": 0.00036852358094605, "loss": 0.1897, "step": 77202 }, { "epoch": 0.136889073289485, "grad_norm": 0.392578125, "learning_rate": 0.00036848890636228615, "loss": 0.1441, "step": 77204 }, { "epoch": 0.1368926194547948, "grad_norm": 0.337890625, "learning_rate": 0.0003684542349777269, "loss": 0.1782, "step": 77206 }, { "epoch": 0.13689616562010462, "grad_norm": 0.6484375, "learning_rate": 0.00036841956679252387, "loss": 0.2204, "step": 77208 }, { "epoch": 0.13689971178541444, "grad_norm": 0.408203125, "learning_rate": 0.00036838490180682857, "loss": 0.1773, "step": 77210 }, { "epoch": 0.13690325795072425, "grad_norm": 0.171875, "learning_rate": 0.00036835024002079315, "loss": 0.1625, "step": 77212 }, { "epoch": 0.13690680411603406, "grad_norm": 0.34375, "learning_rate": 0.0003683155814345682, "loss": 0.1729, "step": 77214 }, { "epoch": 0.13691035028134388, "grad_norm": 0.91015625, "learning_rate": 0.0003682809260483063, "loss": 0.1952, "step": 77216 }, { "epoch": 0.1369138964466537, "grad_norm": 0.5625, "learning_rate": 0.00036824627386215845, "loss": 0.2814, "step": 77218 }, { "epoch": 0.1369174426119635, "grad_norm": 0.8125, "learning_rate": 0.0003682116248762765, "loss": 0.127, "step": 77220 }, { "epoch": 0.13692098877727332, "grad_norm": 0.30859375, "learning_rate": 0.0003681769790908117, "loss": 0.2019, "step": 77222 }, { "epoch": 0.13692453494258314, "grad_norm": 0.96875, "learning_rate": 0.00036814233650591554, "loss": 0.1633, "step": 77224 }, { "epoch": 0.13692808110789298, "grad_norm": 0.21875, "learning_rate": 0.00036810769712173995, "loss": 0.16, "step": 77226 }, { "epoch": 0.1369316272732028, "grad_norm": 0.35546875, "learning_rate": 0.00036807306093843606, "loss": 0.2131, "step": 77228 }, { "epoch": 0.1369351734385126, "grad_norm": 0.265625, "learning_rate": 0.0003680384279561555, "loss": 0.1735, "step": 77230 }, { "epoch": 0.13693871960382242, "grad_norm": 0.65234375, "learning_rate": 0.0003680037981750495, "loss": 0.2425, "step": 77232 }, { "epoch": 0.13694226576913224, "grad_norm": 0.9453125, "learning_rate": 0.0003679691715952698, "loss": 0.1434, "step": 77234 }, { "epoch": 0.13694581193444205, "grad_norm": 1.453125, "learning_rate": 0.0003679345482169678, "loss": 0.3071, "step": 77236 }, { "epoch": 0.13694935809975187, "grad_norm": 0.5234375, "learning_rate": 0.0003678999280402948, "loss": 0.2609, "step": 77238 }, { "epoch": 0.13695290426506168, "grad_norm": 1.796875, "learning_rate": 0.0003678653110654023, "loss": 0.2482, "step": 77240 }, { "epoch": 0.1369564504303715, "grad_norm": 0.53125, "learning_rate": 0.00036783069729244143, "loss": 0.1401, "step": 77242 }, { "epoch": 0.1369599965956813, "grad_norm": 0.3984375, "learning_rate": 0.00036779608672156427, "loss": 0.1843, "step": 77244 }, { "epoch": 0.13696354276099112, "grad_norm": 0.498046875, "learning_rate": 0.00036776147935292127, "loss": 0.1919, "step": 77246 }, { "epoch": 0.13696708892630094, "grad_norm": 0.80859375, "learning_rate": 0.0003677268751866645, "loss": 0.1408, "step": 77248 }, { "epoch": 0.13697063509161075, "grad_norm": 0.37109375, "learning_rate": 0.0003676922742229446, "loss": 0.1406, "step": 77250 }, { "epoch": 0.13697418125692057, "grad_norm": 0.169921875, "learning_rate": 0.00036765767646191396, "loss": 0.1666, "step": 77252 }, { "epoch": 0.13697772742223038, "grad_norm": 0.53515625, "learning_rate": 0.0003676230819037227, "loss": 0.179, "step": 77254 }, { "epoch": 0.1369812735875402, "grad_norm": 0.271484375, "learning_rate": 0.000367588490548523, "loss": 0.1497, "step": 77256 }, { "epoch": 0.13698481975285, "grad_norm": 1.4609375, "learning_rate": 0.00036755390239646567, "loss": 0.1916, "step": 77258 }, { "epoch": 0.13698836591815983, "grad_norm": 0.265625, "learning_rate": 0.00036751931744770215, "loss": 0.3254, "step": 77260 }, { "epoch": 0.13699191208346964, "grad_norm": 0.28125, "learning_rate": 0.00036748473570238373, "loss": 0.1938, "step": 77262 }, { "epoch": 0.13699545824877946, "grad_norm": 0.73046875, "learning_rate": 0.00036745015716066134, "loss": 0.1596, "step": 77264 }, { "epoch": 0.13699900441408927, "grad_norm": 0.39453125, "learning_rate": 0.00036741558182268666, "loss": 0.1427, "step": 77266 }, { "epoch": 0.13700255057939908, "grad_norm": 0.412109375, "learning_rate": 0.00036738100968861066, "loss": 0.1768, "step": 77268 }, { "epoch": 0.1370060967447089, "grad_norm": 1.203125, "learning_rate": 0.0003673464407585846, "loss": 0.23, "step": 77270 }, { "epoch": 0.13700964291001871, "grad_norm": 0.392578125, "learning_rate": 0.0003673118750327593, "loss": 0.1718, "step": 77272 }, { "epoch": 0.13701318907532853, "grad_norm": 0.26953125, "learning_rate": 0.0003672773125112866, "loss": 0.1814, "step": 77274 }, { "epoch": 0.13701673524063834, "grad_norm": 0.396484375, "learning_rate": 0.00036724275319431724, "loss": 0.1245, "step": 77276 }, { "epoch": 0.13702028140594816, "grad_norm": 0.236328125, "learning_rate": 0.0003672081970820025, "loss": 0.1571, "step": 77278 }, { "epoch": 0.13702382757125797, "grad_norm": 0.36328125, "learning_rate": 0.0003671736441744934, "loss": 0.4174, "step": 77280 }, { "epoch": 0.1370273737365678, "grad_norm": 0.1357421875, "learning_rate": 0.00036713909447194106, "loss": 0.248, "step": 77282 }, { "epoch": 0.1370309199018776, "grad_norm": 0.58984375, "learning_rate": 0.0003671045479744966, "loss": 0.1577, "step": 77284 }, { "epoch": 0.13703446606718742, "grad_norm": 1.0, "learning_rate": 0.00036707000468231123, "loss": 0.1705, "step": 77286 }, { "epoch": 0.13703801223249723, "grad_norm": 0.208984375, "learning_rate": 0.00036703546459553607, "loss": 0.16, "step": 77288 }, { "epoch": 0.13704155839780705, "grad_norm": 0.470703125, "learning_rate": 0.00036700092771432157, "loss": 0.1935, "step": 77290 }, { "epoch": 0.13704510456311686, "grad_norm": 6.125, "learning_rate": 0.0003669663940388196, "loss": 0.2629, "step": 77292 }, { "epoch": 0.13704865072842667, "grad_norm": 0.3515625, "learning_rate": 0.0003669318635691807, "loss": 0.181, "step": 77294 }, { "epoch": 0.1370521968937365, "grad_norm": 0.51953125, "learning_rate": 0.0003668973363055562, "loss": 0.1663, "step": 77296 }, { "epoch": 0.1370557430590463, "grad_norm": 0.1865234375, "learning_rate": 0.0003668628122480968, "loss": 0.1496, "step": 77298 }, { "epoch": 0.13705928922435612, "grad_norm": 1.3359375, "learning_rate": 0.0003668282913969534, "loss": 0.1993, "step": 77300 }, { "epoch": 0.13706283538966593, "grad_norm": 0.64453125, "learning_rate": 0.0003667937737522776, "loss": 0.1876, "step": 77302 }, { "epoch": 0.13706638155497575, "grad_norm": 0.2060546875, "learning_rate": 0.0003667592593142195, "loss": 0.1546, "step": 77304 }, { "epoch": 0.13706992772028556, "grad_norm": 0.203125, "learning_rate": 0.00036672474808293075, "loss": 0.119, "step": 77306 }, { "epoch": 0.13707347388559538, "grad_norm": 0.44921875, "learning_rate": 0.00036669024005856176, "loss": 0.1709, "step": 77308 }, { "epoch": 0.1370770200509052, "grad_norm": 0.56640625, "learning_rate": 0.00036665573524126416, "loss": 0.1826, "step": 77310 }, { "epoch": 0.137080566216215, "grad_norm": 0.50390625, "learning_rate": 0.0003666212336311878, "loss": 0.2111, "step": 77312 }, { "epoch": 0.13708411238152482, "grad_norm": 0.2470703125, "learning_rate": 0.0003665867352284846, "loss": 0.2012, "step": 77314 }, { "epoch": 0.13708765854683466, "grad_norm": 0.423828125, "learning_rate": 0.00036655224003330494, "loss": 0.1477, "step": 77316 }, { "epoch": 0.13709120471214448, "grad_norm": 0.3046875, "learning_rate": 0.0003665177480457997, "loss": 0.1565, "step": 77318 }, { "epoch": 0.1370947508774543, "grad_norm": 0.4609375, "learning_rate": 0.0003664832592661198, "loss": 0.1802, "step": 77320 }, { "epoch": 0.1370982970427641, "grad_norm": 0.328125, "learning_rate": 0.0003664487736944159, "loss": 0.2133, "step": 77322 }, { "epoch": 0.13710184320807392, "grad_norm": 0.275390625, "learning_rate": 0.00036641429133083903, "loss": 0.1401, "step": 77324 }, { "epoch": 0.13710538937338373, "grad_norm": 0.734375, "learning_rate": 0.00036637981217554005, "loss": 0.1829, "step": 77326 }, { "epoch": 0.13710893553869355, "grad_norm": 0.3515625, "learning_rate": 0.00036634533622866965, "loss": 0.1601, "step": 77328 }, { "epoch": 0.13711248170400336, "grad_norm": 0.55078125, "learning_rate": 0.00036631086349037833, "loss": 0.1935, "step": 77330 }, { "epoch": 0.13711602786931318, "grad_norm": 0.275390625, "learning_rate": 0.00036627639396081727, "loss": 0.1483, "step": 77332 }, { "epoch": 0.137119574034623, "grad_norm": 1.9609375, "learning_rate": 0.0003662419276401372, "loss": 0.2872, "step": 77334 }, { "epoch": 0.1371231201999328, "grad_norm": 0.353515625, "learning_rate": 0.00036620746452848866, "loss": 0.1961, "step": 77336 }, { "epoch": 0.13712666636524262, "grad_norm": 0.419921875, "learning_rate": 0.00036617300462602246, "loss": 0.1792, "step": 77338 }, { "epoch": 0.13713021253055244, "grad_norm": 3.15625, "learning_rate": 0.0003661385479328889, "loss": 0.356, "step": 77340 }, { "epoch": 0.13713375869586225, "grad_norm": 0.287109375, "learning_rate": 0.0003661040944492397, "loss": 0.1702, "step": 77342 }, { "epoch": 0.13713730486117207, "grad_norm": 0.3125, "learning_rate": 0.0003660696441752243, "loss": 0.2165, "step": 77344 }, { "epoch": 0.13714085102648188, "grad_norm": 0.447265625, "learning_rate": 0.00036603519711099426, "loss": 0.1186, "step": 77346 }, { "epoch": 0.1371443971917917, "grad_norm": 0.470703125, "learning_rate": 0.0003660007532566997, "loss": 0.2298, "step": 77348 }, { "epoch": 0.1371479433571015, "grad_norm": 2.203125, "learning_rate": 0.00036596631261249195, "loss": 0.1561, "step": 77350 }, { "epoch": 0.13715148952241132, "grad_norm": 0.3359375, "learning_rate": 0.0003659318751785207, "loss": 0.1851, "step": 77352 }, { "epoch": 0.13715503568772114, "grad_norm": 0.345703125, "learning_rate": 0.0003658974409549373, "loss": 0.1834, "step": 77354 }, { "epoch": 0.13715858185303095, "grad_norm": 0.271484375, "learning_rate": 0.00036586300994189205, "loss": 0.1557, "step": 77356 }, { "epoch": 0.13716212801834077, "grad_norm": 0.494140625, "learning_rate": 0.00036582858213953526, "loss": 0.1926, "step": 77358 }, { "epoch": 0.13716567418365058, "grad_norm": 1.890625, "learning_rate": 0.00036579415754801827, "loss": 0.1865, "step": 77360 }, { "epoch": 0.1371692203489604, "grad_norm": 0.3125, "learning_rate": 0.0003657597361674908, "loss": 0.2156, "step": 77362 }, { "epoch": 0.1371727665142702, "grad_norm": 0.5703125, "learning_rate": 0.00036572531799810383, "loss": 0.1792, "step": 77364 }, { "epoch": 0.13717631267958003, "grad_norm": 0.259765625, "learning_rate": 0.00036569090304000774, "loss": 0.1559, "step": 77366 }, { "epoch": 0.13717985884488984, "grad_norm": 1.65625, "learning_rate": 0.00036565649129335345, "loss": 0.1869, "step": 77368 }, { "epoch": 0.13718340501019965, "grad_norm": 0.40625, "learning_rate": 0.0003656220827582907, "loss": 0.2619, "step": 77370 }, { "epoch": 0.13718695117550947, "grad_norm": 1.84375, "learning_rate": 0.0003655876774349705, "loss": 0.1936, "step": 77372 }, { "epoch": 0.13719049734081928, "grad_norm": 0.48828125, "learning_rate": 0.00036555327532354333, "loss": 0.1318, "step": 77374 }, { "epoch": 0.1371940435061291, "grad_norm": 0.33203125, "learning_rate": 0.0003655188764241595, "loss": 0.1356, "step": 77376 }, { "epoch": 0.1371975896714389, "grad_norm": 0.259765625, "learning_rate": 0.00036548448073696945, "loss": 0.1303, "step": 77378 }, { "epoch": 0.13720113583674873, "grad_norm": 0.291015625, "learning_rate": 0.0003654500882621234, "loss": 0.1771, "step": 77380 }, { "epoch": 0.13720468200205854, "grad_norm": 3.8125, "learning_rate": 0.00036541569899977214, "loss": 0.3209, "step": 77382 }, { "epoch": 0.13720822816736836, "grad_norm": 0.77734375, "learning_rate": 0.000365381312950066, "loss": 0.2326, "step": 77384 }, { "epoch": 0.13721177433267817, "grad_norm": 0.8203125, "learning_rate": 0.00036534693011315534, "loss": 0.2475, "step": 77386 }, { "epoch": 0.13721532049798799, "grad_norm": 0.294921875, "learning_rate": 0.0003653125504891903, "loss": 0.2544, "step": 77388 }, { "epoch": 0.1372188666632978, "grad_norm": 1.65625, "learning_rate": 0.00036527817407832133, "loss": 0.2749, "step": 77390 }, { "epoch": 0.13722241282860762, "grad_norm": 0.2275390625, "learning_rate": 0.0003652438008806992, "loss": 0.1624, "step": 77392 }, { "epoch": 0.13722595899391743, "grad_norm": 0.2294921875, "learning_rate": 0.0003652094308964735, "loss": 0.1588, "step": 77394 }, { "epoch": 0.13722950515922724, "grad_norm": 0.57421875, "learning_rate": 0.00036517506412579503, "loss": 0.19, "step": 77396 }, { "epoch": 0.13723305132453706, "grad_norm": 0.42578125, "learning_rate": 0.00036514070056881386, "loss": 0.1679, "step": 77398 }, { "epoch": 0.13723659748984687, "grad_norm": 1.3125, "learning_rate": 0.00036510634022568093, "loss": 0.1872, "step": 77400 }, { "epoch": 0.1372401436551567, "grad_norm": 0.322265625, "learning_rate": 0.00036507198309654543, "loss": 0.1277, "step": 77402 }, { "epoch": 0.1372436898204665, "grad_norm": 0.2255859375, "learning_rate": 0.00036503762918155855, "loss": 0.2583, "step": 77404 }, { "epoch": 0.13724723598577634, "grad_norm": 0.443359375, "learning_rate": 0.00036500327848087, "loss": 0.2041, "step": 77406 }, { "epoch": 0.13725078215108616, "grad_norm": 0.25390625, "learning_rate": 0.00036496893099463037, "loss": 0.2757, "step": 77408 }, { "epoch": 0.13725432831639597, "grad_norm": 0.390625, "learning_rate": 0.0003649345867229895, "loss": 0.1377, "step": 77410 }, { "epoch": 0.1372578744817058, "grad_norm": 0.87890625, "learning_rate": 0.0003649002456660976, "loss": 0.1818, "step": 77412 }, { "epoch": 0.1372614206470156, "grad_norm": 0.267578125, "learning_rate": 0.00036486590782410525, "loss": 0.1516, "step": 77414 }, { "epoch": 0.13726496681232542, "grad_norm": 0.486328125, "learning_rate": 0.00036483157319716235, "loss": 0.1875, "step": 77416 }, { "epoch": 0.13726851297763523, "grad_norm": 0.2216796875, "learning_rate": 0.0003647972417854192, "loss": 0.2191, "step": 77418 }, { "epoch": 0.13727205914294505, "grad_norm": 2.234375, "learning_rate": 0.0003647629135890256, "loss": 0.3467, "step": 77420 }, { "epoch": 0.13727560530825486, "grad_norm": 0.2138671875, "learning_rate": 0.00036472858860813205, "loss": 0.1487, "step": 77422 }, { "epoch": 0.13727915147356468, "grad_norm": 3.375, "learning_rate": 0.00036469426684288876, "loss": 0.3727, "step": 77424 }, { "epoch": 0.1372826976388745, "grad_norm": 1.515625, "learning_rate": 0.00036465994829344554, "loss": 0.3813, "step": 77426 }, { "epoch": 0.1372862438041843, "grad_norm": 1.6796875, "learning_rate": 0.00036462563295995256, "loss": 0.2701, "step": 77428 }, { "epoch": 0.13728978996949412, "grad_norm": 0.267578125, "learning_rate": 0.00036459132084255965, "loss": 0.1861, "step": 77430 }, { "epoch": 0.13729333613480393, "grad_norm": 0.33984375, "learning_rate": 0.00036455701194141764, "loss": 0.1704, "step": 77432 }, { "epoch": 0.13729688230011375, "grad_norm": 0.435546875, "learning_rate": 0.0003645227062566756, "loss": 0.1879, "step": 77434 }, { "epoch": 0.13730042846542356, "grad_norm": 0.453125, "learning_rate": 0.00036448840378848425, "loss": 0.1228, "step": 77436 }, { "epoch": 0.13730397463073338, "grad_norm": 0.765625, "learning_rate": 0.0003644541045369931, "loss": 0.1915, "step": 77438 }, { "epoch": 0.1373075207960432, "grad_norm": 0.33203125, "learning_rate": 0.0003644198085023528, "loss": 0.1628, "step": 77440 }, { "epoch": 0.137311066961353, "grad_norm": 0.38671875, "learning_rate": 0.0003643855156847128, "loss": 0.2371, "step": 77442 }, { "epoch": 0.13731461312666282, "grad_norm": 0.330078125, "learning_rate": 0.00036435122608422345, "loss": 0.1309, "step": 77444 }, { "epoch": 0.13731815929197264, "grad_norm": 0.5078125, "learning_rate": 0.00036431693970103443, "loss": 0.1844, "step": 77446 }, { "epoch": 0.13732170545728245, "grad_norm": 0.326171875, "learning_rate": 0.0003642826565352956, "loss": 0.134, "step": 77448 }, { "epoch": 0.13732525162259226, "grad_norm": 0.34765625, "learning_rate": 0.00036424837658715736, "loss": 0.2216, "step": 77450 }, { "epoch": 0.13732879778790208, "grad_norm": 0.271484375, "learning_rate": 0.00036421409985676903, "loss": 0.1582, "step": 77452 }, { "epoch": 0.1373323439532119, "grad_norm": 0.1806640625, "learning_rate": 0.00036417982634428104, "loss": 0.1399, "step": 77454 }, { "epoch": 0.1373358901185217, "grad_norm": 0.3828125, "learning_rate": 0.0003641455560498429, "loss": 0.174, "step": 77456 }, { "epoch": 0.13733943628383152, "grad_norm": 0.55078125, "learning_rate": 0.000364111288973605, "loss": 0.1537, "step": 77458 }, { "epoch": 0.13734298244914134, "grad_norm": 0.5859375, "learning_rate": 0.0003640770251157165, "loss": 0.1813, "step": 77460 }, { "epoch": 0.13734652861445115, "grad_norm": 0.390625, "learning_rate": 0.0003640427644763278, "loss": 0.4415, "step": 77462 }, { "epoch": 0.13735007477976097, "grad_norm": 0.271484375, "learning_rate": 0.0003640085070555886, "loss": 0.1842, "step": 77464 }, { "epoch": 0.13735362094507078, "grad_norm": 0.376953125, "learning_rate": 0.0003639742528536486, "loss": 0.2052, "step": 77466 }, { "epoch": 0.1373571671103806, "grad_norm": 0.734375, "learning_rate": 0.0003639400018706578, "loss": 0.184, "step": 77468 }, { "epoch": 0.1373607132756904, "grad_norm": 0.9609375, "learning_rate": 0.0003639057541067656, "loss": 0.2862, "step": 77470 }, { "epoch": 0.13736425944100022, "grad_norm": 0.2080078125, "learning_rate": 0.00036387150956212225, "loss": 0.1908, "step": 77472 }, { "epoch": 0.13736780560631004, "grad_norm": 8.375, "learning_rate": 0.00036383726823687736, "loss": 0.2129, "step": 77474 }, { "epoch": 0.13737135177161985, "grad_norm": 0.353515625, "learning_rate": 0.00036380303013118064, "loss": 0.1225, "step": 77476 }, { "epoch": 0.13737489793692967, "grad_norm": 0.341796875, "learning_rate": 0.0003637687952451816, "loss": 0.1775, "step": 77478 }, { "epoch": 0.13737844410223948, "grad_norm": 0.38671875, "learning_rate": 0.00036373456357903034, "loss": 0.2047, "step": 77480 }, { "epoch": 0.1373819902675493, "grad_norm": 0.51171875, "learning_rate": 0.0003637003351328766, "loss": 0.1912, "step": 77482 }, { "epoch": 0.1373855364328591, "grad_norm": 0.9765625, "learning_rate": 0.00036366610990686975, "loss": 0.2339, "step": 77484 }, { "epoch": 0.13738908259816893, "grad_norm": 0.27734375, "learning_rate": 0.00036363188790115966, "loss": 0.213, "step": 77486 }, { "epoch": 0.13739262876347874, "grad_norm": 1.546875, "learning_rate": 0.0003635976691158958, "loss": 0.4799, "step": 77488 }, { "epoch": 0.13739617492878856, "grad_norm": 0.60546875, "learning_rate": 0.00036356345355122833, "loss": 0.1969, "step": 77490 }, { "epoch": 0.13739972109409837, "grad_norm": 1.5546875, "learning_rate": 0.0003635292412073061, "loss": 0.3377, "step": 77492 }, { "epoch": 0.13740326725940818, "grad_norm": 0.447265625, "learning_rate": 0.0003634950320842795, "loss": 0.1919, "step": 77494 }, { "epoch": 0.137406813424718, "grad_norm": 0.703125, "learning_rate": 0.00036346082618229745, "loss": 0.1916, "step": 77496 }, { "epoch": 0.13741035959002784, "grad_norm": 0.359375, "learning_rate": 0.0003634266235015106, "loss": 0.1862, "step": 77498 }, { "epoch": 0.13741390575533766, "grad_norm": 0.5078125, "learning_rate": 0.00036339242404206717, "loss": 0.1909, "step": 77500 }, { "epoch": 0.13741745192064747, "grad_norm": 0.5546875, "learning_rate": 0.0003633582278041176, "loss": 0.2368, "step": 77502 }, { "epoch": 0.13742099808595729, "grad_norm": 1.5390625, "learning_rate": 0.0003633240347878114, "loss": 0.1737, "step": 77504 }, { "epoch": 0.1374245442512671, "grad_norm": 0.408203125, "learning_rate": 0.00036328984499329797, "loss": 0.2234, "step": 77506 }, { "epoch": 0.13742809041657691, "grad_norm": 0.83203125, "learning_rate": 0.00036325565842072673, "loss": 0.1737, "step": 77508 }, { "epoch": 0.13743163658188673, "grad_norm": 0.4765625, "learning_rate": 0.00036322147507024714, "loss": 0.214, "step": 77510 }, { "epoch": 0.13743518274719654, "grad_norm": 0.3828125, "learning_rate": 0.00036318729494200897, "loss": 0.1895, "step": 77512 }, { "epoch": 0.13743872891250636, "grad_norm": 0.52734375, "learning_rate": 0.0003631531180361617, "loss": 0.2292, "step": 77514 }, { "epoch": 0.13744227507781617, "grad_norm": 0.361328125, "learning_rate": 0.00036311894435285465, "loss": 0.1423, "step": 77516 }, { "epoch": 0.137445821243126, "grad_norm": 0.7265625, "learning_rate": 0.0003630847738922369, "loss": 0.1762, "step": 77518 }, { "epoch": 0.1374493674084358, "grad_norm": 0.240234375, "learning_rate": 0.0003630506066544587, "loss": 0.1637, "step": 77520 }, { "epoch": 0.13745291357374562, "grad_norm": 0.44140625, "learning_rate": 0.000363016442639669, "loss": 0.2167, "step": 77522 }, { "epoch": 0.13745645973905543, "grad_norm": 0.244140625, "learning_rate": 0.0003629822818480174, "loss": 0.3162, "step": 77524 }, { "epoch": 0.13746000590436525, "grad_norm": 0.244140625, "learning_rate": 0.0003629481242796532, "loss": 0.1293, "step": 77526 }, { "epoch": 0.13746355206967506, "grad_norm": 1.28125, "learning_rate": 0.00036291396993472534, "loss": 0.2172, "step": 77528 }, { "epoch": 0.13746709823498487, "grad_norm": 0.255859375, "learning_rate": 0.00036287981881338405, "loss": 0.1463, "step": 77530 }, { "epoch": 0.1374706444002947, "grad_norm": 0.26171875, "learning_rate": 0.00036284567091577827, "loss": 0.224, "step": 77532 }, { "epoch": 0.1374741905656045, "grad_norm": 0.35546875, "learning_rate": 0.0003628115262420572, "loss": 0.1859, "step": 77534 }, { "epoch": 0.13747773673091432, "grad_norm": 0.5234375, "learning_rate": 0.0003627773847923701, "loss": 0.2386, "step": 77536 }, { "epoch": 0.13748128289622413, "grad_norm": 0.2578125, "learning_rate": 0.00036274324656686676, "loss": 0.1307, "step": 77538 }, { "epoch": 0.13748482906153395, "grad_norm": 0.439453125, "learning_rate": 0.0003627091115656963, "loss": 0.1791, "step": 77540 }, { "epoch": 0.13748837522684376, "grad_norm": 0.9296875, "learning_rate": 0.0003626749797890078, "loss": 0.1804, "step": 77542 }, { "epoch": 0.13749192139215358, "grad_norm": 0.310546875, "learning_rate": 0.0003626408512369507, "loss": 0.2458, "step": 77544 }, { "epoch": 0.1374954675574634, "grad_norm": 0.1845703125, "learning_rate": 0.0003626067259096739, "loss": 0.1423, "step": 77546 }, { "epoch": 0.1374990137227732, "grad_norm": 0.4765625, "learning_rate": 0.00036257260380732746, "loss": 0.1723, "step": 77548 }, { "epoch": 0.13750255988808302, "grad_norm": 0.80078125, "learning_rate": 0.00036253848493005957, "loss": 0.1767, "step": 77550 }, { "epoch": 0.13750610605339283, "grad_norm": 0.21484375, "learning_rate": 0.0003625043692780203, "loss": 0.1548, "step": 77552 }, { "epoch": 0.13750965221870265, "grad_norm": 0.375, "learning_rate": 0.00036247025685135853, "loss": 0.2155, "step": 77554 }, { "epoch": 0.13751319838401246, "grad_norm": 0.4609375, "learning_rate": 0.0003624361476502235, "loss": 0.1725, "step": 77556 }, { "epoch": 0.13751674454932228, "grad_norm": 0.375, "learning_rate": 0.00036240204167476423, "loss": 0.1946, "step": 77558 }, { "epoch": 0.1375202907146321, "grad_norm": 0.3828125, "learning_rate": 0.0003623679389251298, "loss": 0.1864, "step": 77560 }, { "epoch": 0.1375238368799419, "grad_norm": 0.298828125, "learning_rate": 0.00036233383940146967, "loss": 0.1712, "step": 77562 }, { "epoch": 0.13752738304525172, "grad_norm": 0.57421875, "learning_rate": 0.000362299743103933, "loss": 0.1629, "step": 77564 }, { "epoch": 0.13753092921056154, "grad_norm": 0.287109375, "learning_rate": 0.0003622656500326686, "loss": 0.175, "step": 77566 }, { "epoch": 0.13753447537587135, "grad_norm": 0.4609375, "learning_rate": 0.0003622315601878256, "loss": 0.1913, "step": 77568 }, { "epoch": 0.13753802154118117, "grad_norm": 0.5703125, "learning_rate": 0.00036219747356955345, "loss": 0.138, "step": 77570 }, { "epoch": 0.13754156770649098, "grad_norm": 0.3515625, "learning_rate": 0.00036216339017800084, "loss": 0.201, "step": 77572 }, { "epoch": 0.1375451138718008, "grad_norm": 0.25, "learning_rate": 0.00036212931001331714, "loss": 0.1786, "step": 77574 }, { "epoch": 0.1375486600371106, "grad_norm": 0.189453125, "learning_rate": 0.00036209523307565106, "loss": 0.1562, "step": 77576 }, { "epoch": 0.13755220620242042, "grad_norm": 1.1875, "learning_rate": 0.0003620611593651517, "loss": 0.1992, "step": 77578 }, { "epoch": 0.13755575236773024, "grad_norm": 0.357421875, "learning_rate": 0.0003620270888819687, "loss": 0.1484, "step": 77580 }, { "epoch": 0.13755929853304005, "grad_norm": 0.796875, "learning_rate": 0.0003619930216262499, "loss": 0.1839, "step": 77582 }, { "epoch": 0.13756284469834987, "grad_norm": 0.392578125, "learning_rate": 0.0003619589575981452, "loss": 0.1321, "step": 77584 }, { "epoch": 0.13756639086365968, "grad_norm": 1.109375, "learning_rate": 0.0003619248967978033, "loss": 0.2656, "step": 77586 }, { "epoch": 0.13756993702896952, "grad_norm": 0.30859375, "learning_rate": 0.00036189083922537346, "loss": 0.2078, "step": 77588 }, { "epoch": 0.13757348319427934, "grad_norm": 0.37890625, "learning_rate": 0.0003618567848810039, "loss": 0.195, "step": 77590 }, { "epoch": 0.13757702935958915, "grad_norm": 0.416015625, "learning_rate": 0.00036182273376484433, "loss": 0.1461, "step": 77592 }, { "epoch": 0.13758057552489897, "grad_norm": 0.4140625, "learning_rate": 0.0003617886858770432, "loss": 0.1777, "step": 77594 }, { "epoch": 0.13758412169020878, "grad_norm": 0.33203125, "learning_rate": 0.00036175464121774957, "loss": 0.1534, "step": 77596 }, { "epoch": 0.1375876678555186, "grad_norm": 0.671875, "learning_rate": 0.0003617205997871123, "loss": 0.3432, "step": 77598 }, { "epoch": 0.1375912140208284, "grad_norm": 5.46875, "learning_rate": 0.0003616865615852802, "loss": 0.1611, "step": 77600 }, { "epoch": 0.13759476018613823, "grad_norm": 1.109375, "learning_rate": 0.0003616525266124023, "loss": 0.2413, "step": 77602 }, { "epoch": 0.13759830635144804, "grad_norm": 0.60546875, "learning_rate": 0.00036161849486862743, "loss": 0.1815, "step": 77604 }, { "epoch": 0.13760185251675786, "grad_norm": 0.2451171875, "learning_rate": 0.00036158446635410434, "loss": 0.1625, "step": 77606 }, { "epoch": 0.13760539868206767, "grad_norm": 0.55078125, "learning_rate": 0.00036155044106898166, "loss": 0.1698, "step": 77608 }, { "epoch": 0.13760894484737748, "grad_norm": 0.353515625, "learning_rate": 0.0003615164190134086, "loss": 0.1914, "step": 77610 }, { "epoch": 0.1376124910126873, "grad_norm": 0.333984375, "learning_rate": 0.00036148240018753377, "loss": 0.1685, "step": 77612 }, { "epoch": 0.1376160371779971, "grad_norm": 1.5390625, "learning_rate": 0.000361448384591506, "loss": 0.4062, "step": 77614 }, { "epoch": 0.13761958334330693, "grad_norm": 1.25, "learning_rate": 0.0003614143722254739, "loss": 0.1664, "step": 77616 }, { "epoch": 0.13762312950861674, "grad_norm": 1.6796875, "learning_rate": 0.0003613803630895861, "loss": 0.3341, "step": 77618 }, { "epoch": 0.13762667567392656, "grad_norm": 1.2109375, "learning_rate": 0.0003613463571839919, "loss": 0.2785, "step": 77620 }, { "epoch": 0.13763022183923637, "grad_norm": 0.30859375, "learning_rate": 0.0003613123545088395, "loss": 0.1216, "step": 77622 }, { "epoch": 0.1376337680045462, "grad_norm": 0.263671875, "learning_rate": 0.0003612783550642779, "loss": 0.1742, "step": 77624 }, { "epoch": 0.137637314169856, "grad_norm": 0.298828125, "learning_rate": 0.00036124435885045547, "loss": 0.1559, "step": 77626 }, { "epoch": 0.13764086033516582, "grad_norm": 1.6015625, "learning_rate": 0.0003612103658675212, "loss": 0.3805, "step": 77628 }, { "epoch": 0.13764440650047563, "grad_norm": 0.19140625, "learning_rate": 0.0003611763761156237, "loss": 0.1692, "step": 77630 }, { "epoch": 0.13764795266578544, "grad_norm": 1.25, "learning_rate": 0.00036114238959491157, "loss": 0.189, "step": 77632 }, { "epoch": 0.13765149883109526, "grad_norm": 0.58984375, "learning_rate": 0.00036110840630553356, "loss": 0.1582, "step": 77634 }, { "epoch": 0.13765504499640507, "grad_norm": 10.75, "learning_rate": 0.00036107442624763774, "loss": 0.3075, "step": 77636 }, { "epoch": 0.1376585911617149, "grad_norm": 0.5390625, "learning_rate": 0.00036104044942137384, "loss": 0.2203, "step": 77638 }, { "epoch": 0.1376621373270247, "grad_norm": 0.2177734375, "learning_rate": 0.00036100647582688915, "loss": 0.1486, "step": 77640 }, { "epoch": 0.13766568349233452, "grad_norm": 0.390625, "learning_rate": 0.00036097250546433324, "loss": 0.2441, "step": 77642 }, { "epoch": 0.13766922965764433, "grad_norm": 4.0, "learning_rate": 0.00036093853833385405, "loss": 0.2639, "step": 77644 }, { "epoch": 0.13767277582295415, "grad_norm": 0.59765625, "learning_rate": 0.00036090457443560076, "loss": 0.1642, "step": 77646 }, { "epoch": 0.13767632198826396, "grad_norm": 0.353515625, "learning_rate": 0.0003608706137697211, "loss": 0.2027, "step": 77648 }, { "epoch": 0.13767986815357378, "grad_norm": 0.84765625, "learning_rate": 0.0003608366563363643, "loss": 0.3099, "step": 77650 }, { "epoch": 0.1376834143188836, "grad_norm": 0.82421875, "learning_rate": 0.0003608027021356786, "loss": 0.1966, "step": 77652 }, { "epoch": 0.1376869604841934, "grad_norm": 0.470703125, "learning_rate": 0.00036076875116781255, "loss": 0.2, "step": 77654 }, { "epoch": 0.13769050664950322, "grad_norm": 0.169921875, "learning_rate": 0.0003607348034329145, "loss": 0.1481, "step": 77656 }, { "epoch": 0.13769405281481303, "grad_norm": 1.5390625, "learning_rate": 0.00036070085893113286, "loss": 0.191, "step": 77658 }, { "epoch": 0.13769759898012285, "grad_norm": 0.38671875, "learning_rate": 0.0003606669176626164, "loss": 0.1515, "step": 77660 }, { "epoch": 0.13770114514543266, "grad_norm": 0.78125, "learning_rate": 0.0003606329796275134, "loss": 0.1377, "step": 77662 }, { "epoch": 0.13770469131074248, "grad_norm": 0.373046875, "learning_rate": 0.0003605990448259723, "loss": 0.1684, "step": 77664 }, { "epoch": 0.1377082374760523, "grad_norm": 0.2412109375, "learning_rate": 0.00036056511325814123, "loss": 0.2804, "step": 77666 }, { "epoch": 0.1377117836413621, "grad_norm": 1.5546875, "learning_rate": 0.00036053118492416917, "loss": 0.1958, "step": 77668 }, { "epoch": 0.13771532980667192, "grad_norm": 0.306640625, "learning_rate": 0.0003604972598242041, "loss": 0.1742, "step": 77670 }, { "epoch": 0.13771887597198174, "grad_norm": 0.51953125, "learning_rate": 0.0003604633379583944, "loss": 0.2688, "step": 77672 }, { "epoch": 0.13772242213729155, "grad_norm": 0.3515625, "learning_rate": 0.0003604294193268884, "loss": 0.1386, "step": 77674 }, { "epoch": 0.13772596830260136, "grad_norm": 0.640625, "learning_rate": 0.0003603955039298345, "loss": 0.2282, "step": 77676 }, { "epoch": 0.1377295144679112, "grad_norm": 1.9609375, "learning_rate": 0.0003603615917673814, "loss": 0.2188, "step": 77678 }, { "epoch": 0.13773306063322102, "grad_norm": 0.4453125, "learning_rate": 0.0003603276828396767, "loss": 0.1537, "step": 77680 }, { "epoch": 0.13773660679853084, "grad_norm": 0.42578125, "learning_rate": 0.00036029377714686924, "loss": 0.1738, "step": 77682 }, { "epoch": 0.13774015296384065, "grad_norm": 0.2314453125, "learning_rate": 0.00036025987468910693, "loss": 0.2107, "step": 77684 }, { "epoch": 0.13774369912915047, "grad_norm": 0.328125, "learning_rate": 0.0003602259754665386, "loss": 0.1995, "step": 77686 }, { "epoch": 0.13774724529446028, "grad_norm": 0.7578125, "learning_rate": 0.00036019207947931176, "loss": 0.2575, "step": 77688 }, { "epoch": 0.1377507914597701, "grad_norm": 0.265625, "learning_rate": 0.00036015818672757515, "loss": 0.1377, "step": 77690 }, { "epoch": 0.1377543376250799, "grad_norm": 0.1884765625, "learning_rate": 0.00036012429721147697, "loss": 0.3812, "step": 77692 }, { "epoch": 0.13775788379038972, "grad_norm": 1.0625, "learning_rate": 0.0003600904109311651, "loss": 0.1926, "step": 77694 }, { "epoch": 0.13776142995569954, "grad_norm": 0.8125, "learning_rate": 0.0003600565278867885, "loss": 0.1998, "step": 77696 }, { "epoch": 0.13776497612100935, "grad_norm": 0.29296875, "learning_rate": 0.0003600226480784944, "loss": 0.1521, "step": 77698 }, { "epoch": 0.13776852228631917, "grad_norm": 0.4765625, "learning_rate": 0.0003599887715064315, "loss": 0.232, "step": 77700 }, { "epoch": 0.13777206845162898, "grad_norm": 0.7421875, "learning_rate": 0.00035995489817074774, "loss": 0.1926, "step": 77702 }, { "epoch": 0.1377756146169388, "grad_norm": 0.6875, "learning_rate": 0.0003599210280715918, "loss": 0.2429, "step": 77704 }, { "epoch": 0.1377791607822486, "grad_norm": 0.72265625, "learning_rate": 0.0003598871612091109, "loss": 0.1847, "step": 77706 }, { "epoch": 0.13778270694755843, "grad_norm": 0.14453125, "learning_rate": 0.00035985329758345396, "loss": 0.1341, "step": 77708 }, { "epoch": 0.13778625311286824, "grad_norm": 0.859375, "learning_rate": 0.0003598194371947686, "loss": 0.1584, "step": 77710 }, { "epoch": 0.13778979927817805, "grad_norm": 0.5234375, "learning_rate": 0.0003597855800432033, "loss": 0.1354, "step": 77712 }, { "epoch": 0.13779334544348787, "grad_norm": 0.49609375, "learning_rate": 0.0003597517261289058, "loss": 0.159, "step": 77714 }, { "epoch": 0.13779689160879768, "grad_norm": 0.67578125, "learning_rate": 0.00035971787545202404, "loss": 0.1969, "step": 77716 }, { "epoch": 0.1378004377741075, "grad_norm": 0.2275390625, "learning_rate": 0.0003596840280127065, "loss": 0.1521, "step": 77718 }, { "epoch": 0.1378039839394173, "grad_norm": 0.314453125, "learning_rate": 0.00035965018381110104, "loss": 0.2392, "step": 77720 }, { "epoch": 0.13780753010472713, "grad_norm": 0.2333984375, "learning_rate": 0.00035961634284735553, "loss": 0.1803, "step": 77722 }, { "epoch": 0.13781107627003694, "grad_norm": 0.51171875, "learning_rate": 0.00035958250512161793, "loss": 0.1944, "step": 77724 }, { "epoch": 0.13781462243534676, "grad_norm": 0.41796875, "learning_rate": 0.00035954867063403665, "loss": 0.2178, "step": 77726 }, { "epoch": 0.13781816860065657, "grad_norm": 3.46875, "learning_rate": 0.00035951483938475925, "loss": 0.1483, "step": 77728 }, { "epoch": 0.13782171476596639, "grad_norm": 0.26953125, "learning_rate": 0.00035948101137393384, "loss": 0.2175, "step": 77730 }, { "epoch": 0.1378252609312762, "grad_norm": 0.98828125, "learning_rate": 0.0003594471866017084, "loss": 0.4493, "step": 77732 }, { "epoch": 0.13782880709658601, "grad_norm": 0.326171875, "learning_rate": 0.0003594133650682305, "loss": 0.1353, "step": 77734 }, { "epoch": 0.13783235326189583, "grad_norm": 0.205078125, "learning_rate": 0.00035937954677364893, "loss": 0.1598, "step": 77736 }, { "epoch": 0.13783589942720564, "grad_norm": 0.2060546875, "learning_rate": 0.0003593457317181105, "loss": 0.1341, "step": 77738 }, { "epoch": 0.13783944559251546, "grad_norm": 0.296875, "learning_rate": 0.00035931191990176383, "loss": 0.1638, "step": 77740 }, { "epoch": 0.13784299175782527, "grad_norm": 0.224609375, "learning_rate": 0.0003592781113247565, "loss": 0.2152, "step": 77742 }, { "epoch": 0.1378465379231351, "grad_norm": 0.2021484375, "learning_rate": 0.0003592443059872367, "loss": 0.2335, "step": 77744 }, { "epoch": 0.1378500840884449, "grad_norm": 0.453125, "learning_rate": 0.00035921050388935185, "loss": 0.1743, "step": 77746 }, { "epoch": 0.13785363025375472, "grad_norm": 0.474609375, "learning_rate": 0.00035917670503124964, "loss": 0.124, "step": 77748 }, { "epoch": 0.13785717641906453, "grad_norm": 0.177734375, "learning_rate": 0.0003591429094130786, "loss": 0.2122, "step": 77750 }, { "epoch": 0.13786072258437435, "grad_norm": 0.25, "learning_rate": 0.00035910911703498607, "loss": 0.1852, "step": 77752 }, { "epoch": 0.13786426874968416, "grad_norm": 0.357421875, "learning_rate": 0.0003590753278971198, "loss": 0.1824, "step": 77754 }, { "epoch": 0.13786781491499397, "grad_norm": 0.3671875, "learning_rate": 0.00035904154199962757, "loss": 0.1773, "step": 77756 }, { "epoch": 0.1378713610803038, "grad_norm": 0.466796875, "learning_rate": 0.0003590077593426574, "loss": 0.1431, "step": 77758 }, { "epoch": 0.1378749072456136, "grad_norm": 0.2197265625, "learning_rate": 0.00035897397992635686, "loss": 0.1525, "step": 77760 }, { "epoch": 0.13787845341092342, "grad_norm": 0.27734375, "learning_rate": 0.0003589402037508738, "loss": 0.1608, "step": 77762 }, { "epoch": 0.13788199957623323, "grad_norm": 0.404296875, "learning_rate": 0.00035890643081635566, "loss": 0.1921, "step": 77764 }, { "epoch": 0.13788554574154305, "grad_norm": 0.298828125, "learning_rate": 0.00035887266112295026, "loss": 0.1577, "step": 77766 }, { "epoch": 0.13788909190685286, "grad_norm": 0.404296875, "learning_rate": 0.00035883889467080564, "loss": 0.1168, "step": 77768 }, { "epoch": 0.1378926380721627, "grad_norm": 0.298828125, "learning_rate": 0.00035880513146006887, "loss": 0.198, "step": 77770 }, { "epoch": 0.13789618423747252, "grad_norm": 0.39453125, "learning_rate": 0.000358771371490888, "loss": 0.2595, "step": 77772 }, { "epoch": 0.13789973040278233, "grad_norm": 1.5625, "learning_rate": 0.0003587376147634105, "loss": 0.1701, "step": 77774 }, { "epoch": 0.13790327656809215, "grad_norm": 0.796875, "learning_rate": 0.0003587038612777842, "loss": 0.2072, "step": 77776 }, { "epoch": 0.13790682273340196, "grad_norm": 0.1923828125, "learning_rate": 0.0003586701110341567, "loss": 0.1595, "step": 77778 }, { "epoch": 0.13791036889871178, "grad_norm": 0.5234375, "learning_rate": 0.00035863636403267555, "loss": 0.2225, "step": 77780 }, { "epoch": 0.1379139150640216, "grad_norm": 0.3984375, "learning_rate": 0.00035860262027348825, "loss": 0.1941, "step": 77782 }, { "epoch": 0.1379174612293314, "grad_norm": 0.74609375, "learning_rate": 0.00035856887975674225, "loss": 0.1985, "step": 77784 }, { "epoch": 0.13792100739464122, "grad_norm": 0.1630859375, "learning_rate": 0.0003585351424825857, "loss": 0.1417, "step": 77786 }, { "epoch": 0.13792455355995104, "grad_norm": 0.53515625, "learning_rate": 0.0003585014084511653, "loss": 0.222, "step": 77788 }, { "epoch": 0.13792809972526085, "grad_norm": 0.482421875, "learning_rate": 0.0003584676776626292, "loss": 0.2312, "step": 77790 }, { "epoch": 0.13793164589057066, "grad_norm": 0.6484375, "learning_rate": 0.00035843395011712453, "loss": 0.1429, "step": 77792 }, { "epoch": 0.13793519205588048, "grad_norm": 0.1689453125, "learning_rate": 0.00035840022581479957, "loss": 0.1759, "step": 77794 }, { "epoch": 0.1379387382211903, "grad_norm": 1.015625, "learning_rate": 0.0003583665047558007, "loss": 0.3259, "step": 77796 }, { "epoch": 0.1379422843865001, "grad_norm": 0.421875, "learning_rate": 0.0003583327869402763, "loss": 0.1851, "step": 77798 }, { "epoch": 0.13794583055180992, "grad_norm": 1.625, "learning_rate": 0.00035829907236837336, "loss": 0.198, "step": 77800 }, { "epoch": 0.13794937671711974, "grad_norm": 0.37109375, "learning_rate": 0.0003582653610402395, "loss": 0.1671, "step": 77802 }, { "epoch": 0.13795292288242955, "grad_norm": 0.5390625, "learning_rate": 0.000358231652956022, "loss": 0.1813, "step": 77804 }, { "epoch": 0.13795646904773937, "grad_norm": 0.326171875, "learning_rate": 0.00035819794811586825, "loss": 0.2003, "step": 77806 }, { "epoch": 0.13796001521304918, "grad_norm": 0.330078125, "learning_rate": 0.00035816424651992595, "loss": 0.2188, "step": 77808 }, { "epoch": 0.137963561378359, "grad_norm": 0.5, "learning_rate": 0.00035813054816834244, "loss": 0.175, "step": 77810 }, { "epoch": 0.1379671075436688, "grad_norm": 0.359375, "learning_rate": 0.0003580968530612648, "loss": 0.17, "step": 77812 }, { "epoch": 0.13797065370897862, "grad_norm": 0.53125, "learning_rate": 0.0003580631611988405, "loss": 0.1233, "step": 77814 }, { "epoch": 0.13797419987428844, "grad_norm": 3.609375, "learning_rate": 0.0003580294725812171, "loss": 0.177, "step": 77816 }, { "epoch": 0.13797774603959825, "grad_norm": 0.53515625, "learning_rate": 0.00035799578720854185, "loss": 0.2129, "step": 77818 }, { "epoch": 0.13798129220490807, "grad_norm": 0.5546875, "learning_rate": 0.0003579621050809621, "loss": 0.225, "step": 77820 }, { "epoch": 0.13798483837021788, "grad_norm": 0.265625, "learning_rate": 0.000357928426198625, "loss": 0.1939, "step": 77822 }, { "epoch": 0.1379883845355277, "grad_norm": 0.3671875, "learning_rate": 0.00035789475056167763, "loss": 0.1937, "step": 77824 }, { "epoch": 0.1379919307008375, "grad_norm": 1.2578125, "learning_rate": 0.00035786107817026806, "loss": 0.1898, "step": 77826 }, { "epoch": 0.13799547686614733, "grad_norm": 0.369140625, "learning_rate": 0.0003578274090245426, "loss": 0.2034, "step": 77828 }, { "epoch": 0.13799902303145714, "grad_norm": 0.5859375, "learning_rate": 0.0003577937431246491, "loss": 0.1881, "step": 77830 }, { "epoch": 0.13800256919676696, "grad_norm": 1.5390625, "learning_rate": 0.00035776008047073456, "loss": 0.2892, "step": 77832 }, { "epoch": 0.13800611536207677, "grad_norm": 0.236328125, "learning_rate": 0.0003577264210629466, "loss": 0.1772, "step": 77834 }, { "epoch": 0.13800966152738658, "grad_norm": 1.3515625, "learning_rate": 0.0003576927649014317, "loss": 0.2733, "step": 77836 }, { "epoch": 0.1380132076926964, "grad_norm": 0.439453125, "learning_rate": 0.0003576591119863377, "loss": 0.1731, "step": 77838 }, { "epoch": 0.1380167538580062, "grad_norm": 0.296875, "learning_rate": 0.0003576254623178116, "loss": 0.2481, "step": 77840 }, { "epoch": 0.13802030002331603, "grad_norm": 0.53515625, "learning_rate": 0.00035759181589600043, "loss": 0.259, "step": 77842 }, { "epoch": 0.13802384618862584, "grad_norm": 0.333984375, "learning_rate": 0.0003575581727210515, "loss": 0.1713, "step": 77844 }, { "epoch": 0.13802739235393566, "grad_norm": 1.7890625, "learning_rate": 0.00035752453279311156, "loss": 0.2371, "step": 77846 }, { "epoch": 0.13803093851924547, "grad_norm": 0.474609375, "learning_rate": 0.0003574908961123282, "loss": 0.2213, "step": 77848 }, { "epoch": 0.1380344846845553, "grad_norm": 0.294921875, "learning_rate": 0.0003574572626788485, "loss": 0.1532, "step": 77850 }, { "epoch": 0.1380380308498651, "grad_norm": 1.5703125, "learning_rate": 0.0003574236324928194, "loss": 0.2632, "step": 77852 }, { "epoch": 0.13804157701517492, "grad_norm": 0.34765625, "learning_rate": 0.0003573900055543877, "loss": 0.1749, "step": 77854 }, { "epoch": 0.13804512318048473, "grad_norm": 0.57421875, "learning_rate": 0.000357356381863701, "loss": 0.1578, "step": 77856 }, { "epoch": 0.13804866934579454, "grad_norm": 0.2080078125, "learning_rate": 0.000357322761420906, "loss": 0.1159, "step": 77858 }, { "epoch": 0.1380522155111044, "grad_norm": 0.7109375, "learning_rate": 0.00035728914422615, "loss": 0.1493, "step": 77860 }, { "epoch": 0.1380557616764142, "grad_norm": 0.1923828125, "learning_rate": 0.0003572555302795798, "loss": 0.1256, "step": 77862 }, { "epoch": 0.13805930784172402, "grad_norm": 0.390625, "learning_rate": 0.0003572219195813422, "loss": 0.1689, "step": 77864 }, { "epoch": 0.13806285400703383, "grad_norm": 0.3828125, "learning_rate": 0.0003571883121315848, "loss": 0.1846, "step": 77866 }, { "epoch": 0.13806640017234365, "grad_norm": 0.31640625, "learning_rate": 0.0003571547079304541, "loss": 0.1634, "step": 77868 }, { "epoch": 0.13806994633765346, "grad_norm": 1.9140625, "learning_rate": 0.00035712110697809735, "loss": 0.2407, "step": 77870 }, { "epoch": 0.13807349250296327, "grad_norm": 0.53125, "learning_rate": 0.00035708750927466114, "loss": 0.2599, "step": 77872 }, { "epoch": 0.1380770386682731, "grad_norm": 0.455078125, "learning_rate": 0.0003570539148202928, "loss": 0.2978, "step": 77874 }, { "epoch": 0.1380805848335829, "grad_norm": 0.361328125, "learning_rate": 0.00035702032361513913, "loss": 0.1665, "step": 77876 }, { "epoch": 0.13808413099889272, "grad_norm": 0.76953125, "learning_rate": 0.0003569867356593469, "loss": 0.1992, "step": 77878 }, { "epoch": 0.13808767716420253, "grad_norm": 0.36328125, "learning_rate": 0.0003569531509530632, "loss": 0.1315, "step": 77880 }, { "epoch": 0.13809122332951235, "grad_norm": 0.56640625, "learning_rate": 0.00035691956949643447, "loss": 0.1706, "step": 77882 }, { "epoch": 0.13809476949482216, "grad_norm": 0.64453125, "learning_rate": 0.00035688599128960843, "loss": 0.1764, "step": 77884 }, { "epoch": 0.13809831566013198, "grad_norm": 0.291015625, "learning_rate": 0.00035685241633273104, "loss": 0.187, "step": 77886 }, { "epoch": 0.1381018618254418, "grad_norm": 0.9921875, "learning_rate": 0.0003568188446259496, "loss": 0.3943, "step": 77888 }, { "epoch": 0.1381054079907516, "grad_norm": 0.23046875, "learning_rate": 0.00035678527616941056, "loss": 0.1594, "step": 77890 }, { "epoch": 0.13810895415606142, "grad_norm": 0.671875, "learning_rate": 0.00035675171096326167, "loss": 0.1579, "step": 77892 }, { "epoch": 0.13811250032137123, "grad_norm": 0.3359375, "learning_rate": 0.0003567181490076484, "loss": 0.1747, "step": 77894 }, { "epoch": 0.13811604648668105, "grad_norm": 0.431640625, "learning_rate": 0.0003566845903027186, "loss": 0.2408, "step": 77896 }, { "epoch": 0.13811959265199086, "grad_norm": 0.5625, "learning_rate": 0.0003566510348486186, "loss": 0.1869, "step": 77898 }, { "epoch": 0.13812313881730068, "grad_norm": 1.203125, "learning_rate": 0.0003566174826454952, "loss": 0.2497, "step": 77900 }, { "epoch": 0.1381266849826105, "grad_norm": 1.34375, "learning_rate": 0.00035658393369349497, "loss": 0.2796, "step": 77902 }, { "epoch": 0.1381302311479203, "grad_norm": 0.470703125, "learning_rate": 0.0003565503879927647, "loss": 0.1937, "step": 77904 }, { "epoch": 0.13813377731323012, "grad_norm": 1.0703125, "learning_rate": 0.00035651684554345136, "loss": 0.1978, "step": 77906 }, { "epoch": 0.13813732347853994, "grad_norm": 0.390625, "learning_rate": 0.00035648330634570146, "loss": 0.2511, "step": 77908 }, { "epoch": 0.13814086964384975, "grad_norm": 0.3515625, "learning_rate": 0.00035644977039966176, "loss": 0.1972, "step": 77910 }, { "epoch": 0.13814441580915957, "grad_norm": 0.98828125, "learning_rate": 0.0003564162377054788, "loss": 0.2188, "step": 77912 }, { "epoch": 0.13814796197446938, "grad_norm": 0.2734375, "learning_rate": 0.0003563827082632991, "loss": 0.1944, "step": 77914 }, { "epoch": 0.1381515081397792, "grad_norm": 0.53515625, "learning_rate": 0.0003563491820732699, "loss": 0.3012, "step": 77916 }, { "epoch": 0.138155054305089, "grad_norm": 0.6796875, "learning_rate": 0.00035631565913553706, "loss": 0.1906, "step": 77918 }, { "epoch": 0.13815860047039882, "grad_norm": 0.5703125, "learning_rate": 0.0003562821394502476, "loss": 0.2212, "step": 77920 }, { "epoch": 0.13816214663570864, "grad_norm": 0.26953125, "learning_rate": 0.000356248623017548, "loss": 0.1841, "step": 77922 }, { "epoch": 0.13816569280101845, "grad_norm": 0.55859375, "learning_rate": 0.00035621510983758527, "loss": 0.1867, "step": 77924 }, { "epoch": 0.13816923896632827, "grad_norm": 0.46484375, "learning_rate": 0.0003561815999105052, "loss": 0.1576, "step": 77926 }, { "epoch": 0.13817278513163808, "grad_norm": 0.3359375, "learning_rate": 0.00035614809323645494, "loss": 0.1697, "step": 77928 }, { "epoch": 0.1381763312969479, "grad_norm": 0.578125, "learning_rate": 0.00035611458981558086, "loss": 0.3207, "step": 77930 }, { "epoch": 0.1381798774622577, "grad_norm": 0.29296875, "learning_rate": 0.0003560810896480293, "loss": 0.2216, "step": 77932 }, { "epoch": 0.13818342362756753, "grad_norm": 0.4140625, "learning_rate": 0.00035604759273394717, "loss": 0.1545, "step": 77934 }, { "epoch": 0.13818696979287734, "grad_norm": 0.2109375, "learning_rate": 0.00035601409907348047, "loss": 0.2529, "step": 77936 }, { "epoch": 0.13819051595818715, "grad_norm": 0.4140625, "learning_rate": 0.00035598060866677604, "loss": 0.314, "step": 77938 }, { "epoch": 0.13819406212349697, "grad_norm": 0.30078125, "learning_rate": 0.0003559471215139801, "loss": 0.2242, "step": 77940 }, { "epoch": 0.13819760828880678, "grad_norm": 0.49609375, "learning_rate": 0.00035591363761523976, "loss": 0.1678, "step": 77942 }, { "epoch": 0.1382011544541166, "grad_norm": 0.31640625, "learning_rate": 0.0003558801569707005, "loss": 0.1364, "step": 77944 }, { "epoch": 0.1382047006194264, "grad_norm": 0.287109375, "learning_rate": 0.0003558466795805093, "loss": 0.1853, "step": 77946 }, { "epoch": 0.13820824678473623, "grad_norm": 0.416015625, "learning_rate": 0.00035581320544481255, "loss": 0.1933, "step": 77948 }, { "epoch": 0.13821179295004607, "grad_norm": 0.341796875, "learning_rate": 0.0003557797345637565, "loss": 0.1882, "step": 77950 }, { "epoch": 0.13821533911535588, "grad_norm": 1.796875, "learning_rate": 0.0003557462669374877, "loss": 0.2308, "step": 77952 }, { "epoch": 0.1382188852806657, "grad_norm": 2.234375, "learning_rate": 0.0003557128025661521, "loss": 0.2356, "step": 77954 }, { "epoch": 0.1382224314459755, "grad_norm": 0.41015625, "learning_rate": 0.00035567934144989664, "loss": 0.1835, "step": 77956 }, { "epoch": 0.13822597761128533, "grad_norm": 0.609375, "learning_rate": 0.0003556458835888674, "loss": 0.2129, "step": 77958 }, { "epoch": 0.13822952377659514, "grad_norm": 0.328125, "learning_rate": 0.00035561242898321066, "loss": 0.2078, "step": 77960 }, { "epoch": 0.13823306994190496, "grad_norm": 0.455078125, "learning_rate": 0.0003555789776330726, "loss": 0.1666, "step": 77962 }, { "epoch": 0.13823661610721477, "grad_norm": 0.216796875, "learning_rate": 0.0003555455295385998, "loss": 0.1393, "step": 77964 }, { "epoch": 0.13824016227252459, "grad_norm": 0.369140625, "learning_rate": 0.00035551208469993846, "loss": 0.1906, "step": 77966 }, { "epoch": 0.1382437084378344, "grad_norm": 0.359375, "learning_rate": 0.00035547864311723487, "loss": 0.2064, "step": 77968 }, { "epoch": 0.13824725460314422, "grad_norm": 0.48828125, "learning_rate": 0.00035544520479063516, "loss": 0.1424, "step": 77970 }, { "epoch": 0.13825080076845403, "grad_norm": 1.0546875, "learning_rate": 0.0003554117697202854, "loss": 0.2986, "step": 77972 }, { "epoch": 0.13825434693376384, "grad_norm": 0.267578125, "learning_rate": 0.0003553783379063326, "loss": 0.1406, "step": 77974 }, { "epoch": 0.13825789309907366, "grad_norm": 0.38671875, "learning_rate": 0.0003553449093489219, "loss": 0.1771, "step": 77976 }, { "epoch": 0.13826143926438347, "grad_norm": 0.326171875, "learning_rate": 0.00035531148404820026, "loss": 0.1835, "step": 77978 }, { "epoch": 0.1382649854296933, "grad_norm": 0.984375, "learning_rate": 0.00035527806200431335, "loss": 0.452, "step": 77980 }, { "epoch": 0.1382685315950031, "grad_norm": 0.337890625, "learning_rate": 0.0003552446432174081, "loss": 0.1846, "step": 77982 }, { "epoch": 0.13827207776031292, "grad_norm": 0.57421875, "learning_rate": 0.0003552112276876297, "loss": 0.1691, "step": 77984 }, { "epoch": 0.13827562392562273, "grad_norm": 0.35546875, "learning_rate": 0.00035517781541512505, "loss": 0.1999, "step": 77986 }, { "epoch": 0.13827917009093255, "grad_norm": 0.52734375, "learning_rate": 0.00035514440640004, "loss": 0.1498, "step": 77988 }, { "epoch": 0.13828271625624236, "grad_norm": 5.65625, "learning_rate": 0.00035511100064252056, "loss": 0.3978, "step": 77990 }, { "epoch": 0.13828626242155218, "grad_norm": 0.294921875, "learning_rate": 0.0003550775981427129, "loss": 0.159, "step": 77992 }, { "epoch": 0.138289808586862, "grad_norm": 0.59765625, "learning_rate": 0.000355044198900763, "loss": 0.1689, "step": 77994 }, { "epoch": 0.1382933547521718, "grad_norm": 0.63671875, "learning_rate": 0.0003550108029168173, "loss": 0.2339, "step": 77996 }, { "epoch": 0.13829690091748162, "grad_norm": 1.171875, "learning_rate": 0.0003549774101910216, "loss": 0.4819, "step": 77998 }, { "epoch": 0.13830044708279143, "grad_norm": 0.21484375, "learning_rate": 0.00035494402072352185, "loss": 0.2399, "step": 78000 }, { "epoch": 0.13830399324810125, "grad_norm": 2.390625, "learning_rate": 0.0003549106345144639, "loss": 0.2043, "step": 78002 }, { "epoch": 0.13830753941341106, "grad_norm": 0.79296875, "learning_rate": 0.00035487725156399454, "loss": 0.1753, "step": 78004 }, { "epoch": 0.13831108557872088, "grad_norm": 0.36328125, "learning_rate": 0.00035484387187225905, "loss": 0.2054, "step": 78006 }, { "epoch": 0.1383146317440307, "grad_norm": 1.078125, "learning_rate": 0.00035481049543940364, "loss": 0.1702, "step": 78008 }, { "epoch": 0.1383181779093405, "grad_norm": 0.890625, "learning_rate": 0.00035477712226557426, "loss": 0.1792, "step": 78010 }, { "epoch": 0.13832172407465032, "grad_norm": 1.9296875, "learning_rate": 0.0003547437523509167, "loss": 0.2384, "step": 78012 }, { "epoch": 0.13832527023996014, "grad_norm": 0.73046875, "learning_rate": 0.00035471038569557747, "loss": 0.2065, "step": 78014 }, { "epoch": 0.13832881640526995, "grad_norm": 1.8515625, "learning_rate": 0.00035467702229970166, "loss": 0.1982, "step": 78016 }, { "epoch": 0.13833236257057976, "grad_norm": 0.447265625, "learning_rate": 0.0003546436621634358, "loss": 0.2043, "step": 78018 }, { "epoch": 0.13833590873588958, "grad_norm": 0.42578125, "learning_rate": 0.00035461030528692545, "loss": 0.2187, "step": 78020 }, { "epoch": 0.1383394549011994, "grad_norm": 0.279296875, "learning_rate": 0.000354576951670317, "loss": 0.2373, "step": 78022 }, { "epoch": 0.1383430010665092, "grad_norm": 0.50390625, "learning_rate": 0.00035454360131375565, "loss": 0.2003, "step": 78024 }, { "epoch": 0.13834654723181902, "grad_norm": 0.41796875, "learning_rate": 0.0003545102542173878, "loss": 0.1929, "step": 78026 }, { "epoch": 0.13835009339712884, "grad_norm": 1.2578125, "learning_rate": 0.00035447691038135906, "loss": 0.1776, "step": 78028 }, { "epoch": 0.13835363956243865, "grad_norm": 0.1826171875, "learning_rate": 0.0003544435698058149, "loss": 0.1634, "step": 78030 }, { "epoch": 0.13835718572774847, "grad_norm": 0.76953125, "learning_rate": 0.0003544102324909021, "loss": 0.1769, "step": 78032 }, { "epoch": 0.13836073189305828, "grad_norm": 1.140625, "learning_rate": 0.00035437689843676536, "loss": 0.2962, "step": 78034 }, { "epoch": 0.1383642780583681, "grad_norm": 1.09375, "learning_rate": 0.00035434356764355116, "loss": 0.4063, "step": 78036 }, { "epoch": 0.1383678242236779, "grad_norm": 0.44140625, "learning_rate": 0.00035431024011140476, "loss": 0.1505, "step": 78038 }, { "epoch": 0.13837137038898772, "grad_norm": 0.53515625, "learning_rate": 0.0003542769158404729, "loss": 0.1949, "step": 78040 }, { "epoch": 0.13837491655429757, "grad_norm": 0.3046875, "learning_rate": 0.00035424359483090005, "loss": 0.1696, "step": 78042 }, { "epoch": 0.13837846271960738, "grad_norm": 0.279296875, "learning_rate": 0.00035421027708283274, "loss": 0.1529, "step": 78044 }, { "epoch": 0.1383820088849172, "grad_norm": 0.5234375, "learning_rate": 0.0003541769625964164, "loss": 0.1925, "step": 78046 }, { "epoch": 0.138385555050227, "grad_norm": 0.345703125, "learning_rate": 0.00035414365137179696, "loss": 0.1496, "step": 78048 }, { "epoch": 0.13838910121553682, "grad_norm": 0.3984375, "learning_rate": 0.00035411034340911986, "loss": 0.1142, "step": 78050 }, { "epoch": 0.13839264738084664, "grad_norm": 1.2265625, "learning_rate": 0.0003540770387085306, "loss": 0.1573, "step": 78052 }, { "epoch": 0.13839619354615645, "grad_norm": 0.87109375, "learning_rate": 0.0003540437372701754, "loss": 0.2084, "step": 78054 }, { "epoch": 0.13839973971146627, "grad_norm": 0.50390625, "learning_rate": 0.0003540104390941996, "loss": 0.4049, "step": 78056 }, { "epoch": 0.13840328587677608, "grad_norm": 0.298828125, "learning_rate": 0.00035397714418074867, "loss": 0.1795, "step": 78058 }, { "epoch": 0.1384068320420859, "grad_norm": 0.5859375, "learning_rate": 0.0003539438525299683, "loss": 0.1868, "step": 78060 }, { "epoch": 0.1384103782073957, "grad_norm": 0.259765625, "learning_rate": 0.00035391056414200436, "loss": 0.1927, "step": 78062 }, { "epoch": 0.13841392437270553, "grad_norm": 0.4296875, "learning_rate": 0.0003538772790170021, "loss": 0.1542, "step": 78064 }, { "epoch": 0.13841747053801534, "grad_norm": 0.2080078125, "learning_rate": 0.0003538439971551074, "loss": 0.1333, "step": 78066 }, { "epoch": 0.13842101670332516, "grad_norm": 0.796875, "learning_rate": 0.0003538107185564655, "loss": 0.239, "step": 78068 }, { "epoch": 0.13842456286863497, "grad_norm": 0.267578125, "learning_rate": 0.0003537774432212219, "loss": 0.2173, "step": 78070 }, { "epoch": 0.13842810903394479, "grad_norm": 0.3984375, "learning_rate": 0.0003537441711495228, "loss": 0.1975, "step": 78072 }, { "epoch": 0.1384316551992546, "grad_norm": 0.2275390625, "learning_rate": 0.00035371090234151273, "loss": 0.1734, "step": 78074 }, { "epoch": 0.13843520136456441, "grad_norm": 0.36328125, "learning_rate": 0.0003536776367973379, "loss": 0.1758, "step": 78076 }, { "epoch": 0.13843874752987423, "grad_norm": 0.9921875, "learning_rate": 0.0003536443745171433, "loss": 0.1569, "step": 78078 }, { "epoch": 0.13844229369518404, "grad_norm": 0.62890625, "learning_rate": 0.00035361111550107516, "loss": 0.2234, "step": 78080 }, { "epoch": 0.13844583986049386, "grad_norm": 0.8828125, "learning_rate": 0.00035357785974927806, "loss": 0.1595, "step": 78082 }, { "epoch": 0.13844938602580367, "grad_norm": 0.44140625, "learning_rate": 0.00035354460726189795, "loss": 0.2072, "step": 78084 }, { "epoch": 0.1384529321911135, "grad_norm": 0.306640625, "learning_rate": 0.0003535113580390802, "loss": 0.1403, "step": 78086 }, { "epoch": 0.1384564783564233, "grad_norm": 0.56640625, "learning_rate": 0.00035347811208097016, "loss": 0.192, "step": 78088 }, { "epoch": 0.13846002452173312, "grad_norm": 0.515625, "learning_rate": 0.0003534448693877132, "loss": 0.1365, "step": 78090 }, { "epoch": 0.13846357068704293, "grad_norm": 0.33984375, "learning_rate": 0.00035341162995945454, "loss": 0.2408, "step": 78092 }, { "epoch": 0.13846711685235275, "grad_norm": 0.2197265625, "learning_rate": 0.00035337839379633983, "loss": 0.139, "step": 78094 }, { "epoch": 0.13847066301766256, "grad_norm": 0.2890625, "learning_rate": 0.00035334516089851454, "loss": 0.1703, "step": 78096 }, { "epoch": 0.13847420918297237, "grad_norm": 0.6171875, "learning_rate": 0.0003533119312661237, "loss": 0.2045, "step": 78098 }, { "epoch": 0.1384777553482822, "grad_norm": 0.216796875, "learning_rate": 0.0003532787048993127, "loss": 0.1878, "step": 78100 }, { "epoch": 0.138481301513592, "grad_norm": 0.6171875, "learning_rate": 0.00035324548179822677, "loss": 0.2196, "step": 78102 }, { "epoch": 0.13848484767890182, "grad_norm": 0.4140625, "learning_rate": 0.0003532122619630117, "loss": 0.1785, "step": 78104 }, { "epoch": 0.13848839384421163, "grad_norm": 0.6484375, "learning_rate": 0.0003531790453938121, "loss": 0.2124, "step": 78106 }, { "epoch": 0.13849194000952145, "grad_norm": 0.4140625, "learning_rate": 0.00035314583209077353, "loss": 0.1954, "step": 78108 }, { "epoch": 0.13849548617483126, "grad_norm": 0.314453125, "learning_rate": 0.00035311262205404124, "loss": 0.1709, "step": 78110 }, { "epoch": 0.13849903234014108, "grad_norm": 1.484375, "learning_rate": 0.00035307941528376073, "loss": 0.4178, "step": 78112 }, { "epoch": 0.1385025785054509, "grad_norm": 0.419921875, "learning_rate": 0.0003530462117800769, "loss": 0.139, "step": 78114 }, { "epoch": 0.1385061246707607, "grad_norm": 0.6640625, "learning_rate": 0.0003530130115431352, "loss": 0.2013, "step": 78116 }, { "epoch": 0.13850967083607052, "grad_norm": 0.2021484375, "learning_rate": 0.0003529798145730805, "loss": 0.2144, "step": 78118 }, { "epoch": 0.13851321700138033, "grad_norm": 0.78125, "learning_rate": 0.0003529466208700581, "loss": 0.2077, "step": 78120 }, { "epoch": 0.13851676316669015, "grad_norm": 0.171875, "learning_rate": 0.00035291343043421355, "loss": 0.1829, "step": 78122 }, { "epoch": 0.13852030933199996, "grad_norm": 0.310546875, "learning_rate": 0.00035288024326569136, "loss": 0.15, "step": 78124 }, { "epoch": 0.13852385549730978, "grad_norm": 0.6640625, "learning_rate": 0.0003528470593646371, "loss": 0.2072, "step": 78126 }, { "epoch": 0.1385274016626196, "grad_norm": 0.263671875, "learning_rate": 0.00035281387873119577, "loss": 0.1655, "step": 78128 }, { "epoch": 0.1385309478279294, "grad_norm": 1.578125, "learning_rate": 0.0003527807013655128, "loss": 0.1899, "step": 78130 }, { "epoch": 0.13853449399323925, "grad_norm": 0.53515625, "learning_rate": 0.00035274752726773266, "loss": 0.2097, "step": 78132 }, { "epoch": 0.13853804015854906, "grad_norm": 0.4609375, "learning_rate": 0.00035271435643800087, "loss": 0.2206, "step": 78134 }, { "epoch": 0.13854158632385888, "grad_norm": 0.74609375, "learning_rate": 0.0003526811888764625, "loss": 0.1944, "step": 78136 }, { "epoch": 0.1385451324891687, "grad_norm": 0.41796875, "learning_rate": 0.0003526480245832626, "loss": 0.1588, "step": 78138 }, { "epoch": 0.1385486786544785, "grad_norm": 0.3125, "learning_rate": 0.00035261486355854597, "loss": 0.1275, "step": 78140 }, { "epoch": 0.13855222481978832, "grad_norm": 0.447265625, "learning_rate": 0.0003525817058024577, "loss": 0.1715, "step": 78142 }, { "epoch": 0.13855577098509814, "grad_norm": 0.27734375, "learning_rate": 0.00035254855131514306, "loss": 0.1746, "step": 78144 }, { "epoch": 0.13855931715040795, "grad_norm": 0.330078125, "learning_rate": 0.0003525154000967469, "loss": 0.1908, "step": 78146 }, { "epoch": 0.13856286331571777, "grad_norm": 1.2109375, "learning_rate": 0.00035248225214741427, "loss": 0.22, "step": 78148 }, { "epoch": 0.13856640948102758, "grad_norm": 0.37890625, "learning_rate": 0.00035244910746728964, "loss": 0.1994, "step": 78150 }, { "epoch": 0.1385699556463374, "grad_norm": 0.51171875, "learning_rate": 0.0003524159660565187, "loss": 0.164, "step": 78152 }, { "epoch": 0.1385735018116472, "grad_norm": 1.2265625, "learning_rate": 0.00035238282791524606, "loss": 0.2248, "step": 78154 }, { "epoch": 0.13857704797695702, "grad_norm": 0.65625, "learning_rate": 0.0003523496930436167, "loss": 0.2949, "step": 78156 }, { "epoch": 0.13858059414226684, "grad_norm": 0.38671875, "learning_rate": 0.00035231656144177554, "loss": 0.1667, "step": 78158 }, { "epoch": 0.13858414030757665, "grad_norm": 2.609375, "learning_rate": 0.0003522834331098671, "loss": 0.1763, "step": 78160 }, { "epoch": 0.13858768647288647, "grad_norm": 0.4140625, "learning_rate": 0.00035225030804803714, "loss": 0.1692, "step": 78162 }, { "epoch": 0.13859123263819628, "grad_norm": 2.078125, "learning_rate": 0.0003522171862564295, "loss": 0.1947, "step": 78164 }, { "epoch": 0.1385947788035061, "grad_norm": 0.427734375, "learning_rate": 0.00035218406773518976, "loss": 0.1962, "step": 78166 }, { "epoch": 0.1385983249688159, "grad_norm": 0.640625, "learning_rate": 0.00035215095248446225, "loss": 0.1712, "step": 78168 }, { "epoch": 0.13860187113412573, "grad_norm": 0.37890625, "learning_rate": 0.00035211784050439253, "loss": 0.1628, "step": 78170 }, { "epoch": 0.13860541729943554, "grad_norm": 0.53125, "learning_rate": 0.00035208473179512467, "loss": 0.182, "step": 78172 }, { "epoch": 0.13860896346474536, "grad_norm": 0.78515625, "learning_rate": 0.00035205162635680375, "loss": 0.2146, "step": 78174 }, { "epoch": 0.13861250963005517, "grad_norm": 0.58984375, "learning_rate": 0.00035201852418957467, "loss": 0.2493, "step": 78176 }, { "epoch": 0.13861605579536498, "grad_norm": 0.60546875, "learning_rate": 0.000351985425293582, "loss": 0.1707, "step": 78178 }, { "epoch": 0.1386196019606748, "grad_norm": 0.291015625, "learning_rate": 0.0003519523296689707, "loss": 0.1328, "step": 78180 }, { "epoch": 0.1386231481259846, "grad_norm": 0.2265625, "learning_rate": 0.00035191923731588517, "loss": 0.1638, "step": 78182 }, { "epoch": 0.13862669429129443, "grad_norm": 0.33984375, "learning_rate": 0.0003518861482344705, "loss": 0.1553, "step": 78184 }, { "epoch": 0.13863024045660424, "grad_norm": 0.91015625, "learning_rate": 0.0003518530624248713, "loss": 0.1668, "step": 78186 }, { "epoch": 0.13863378662191406, "grad_norm": 0.251953125, "learning_rate": 0.00035181997988723213, "loss": 0.1446, "step": 78188 }, { "epoch": 0.13863733278722387, "grad_norm": 0.55859375, "learning_rate": 0.0003517869006216977, "loss": 0.2117, "step": 78190 }, { "epoch": 0.13864087895253369, "grad_norm": 0.609375, "learning_rate": 0.0003517538246284129, "loss": 0.1674, "step": 78192 }, { "epoch": 0.1386444251178435, "grad_norm": 0.384765625, "learning_rate": 0.0003517207519075223, "loss": 0.1746, "step": 78194 }, { "epoch": 0.13864797128315332, "grad_norm": 0.5390625, "learning_rate": 0.00035168768245917044, "loss": 0.1397, "step": 78196 }, { "epoch": 0.13865151744846313, "grad_norm": 0.2080078125, "learning_rate": 0.00035165461628350195, "loss": 0.1466, "step": 78198 }, { "epoch": 0.13865506361377294, "grad_norm": 0.1728515625, "learning_rate": 0.0003516215533806612, "loss": 0.1198, "step": 78200 }, { "epoch": 0.13865860977908276, "grad_norm": 0.435546875, "learning_rate": 0.0003515884937507935, "loss": 0.1966, "step": 78202 }, { "epoch": 0.13866215594439257, "grad_norm": 0.3671875, "learning_rate": 0.0003515554373940428, "loss": 0.2012, "step": 78204 }, { "epoch": 0.1386657021097024, "grad_norm": 0.439453125, "learning_rate": 0.0003515223843105539, "loss": 0.1763, "step": 78206 }, { "epoch": 0.1386692482750122, "grad_norm": 0.515625, "learning_rate": 0.0003514893345004712, "loss": 0.156, "step": 78208 }, { "epoch": 0.13867279444032202, "grad_norm": 0.439453125, "learning_rate": 0.0003514562879639395, "loss": 0.1497, "step": 78210 }, { "epoch": 0.13867634060563183, "grad_norm": 0.76171875, "learning_rate": 0.0003514232447011032, "loss": 0.1744, "step": 78212 }, { "epoch": 0.13867988677094165, "grad_norm": 0.462890625, "learning_rate": 0.00035139020471210684, "loss": 0.2019, "step": 78214 }, { "epoch": 0.13868343293625146, "grad_norm": 0.78515625, "learning_rate": 0.0003513571679970948, "loss": 0.1716, "step": 78216 }, { "epoch": 0.13868697910156128, "grad_norm": 0.59765625, "learning_rate": 0.0003513241345562115, "loss": 0.1553, "step": 78218 }, { "epoch": 0.1386905252668711, "grad_norm": 0.376953125, "learning_rate": 0.000351291104389602, "loss": 0.1732, "step": 78220 }, { "epoch": 0.13869407143218093, "grad_norm": 0.59375, "learning_rate": 0.00035125807749740977, "loss": 0.1719, "step": 78222 }, { "epoch": 0.13869761759749075, "grad_norm": 0.419921875, "learning_rate": 0.0003512250538797802, "loss": 0.2277, "step": 78224 }, { "epoch": 0.13870116376280056, "grad_norm": 0.400390625, "learning_rate": 0.0003511920335368569, "loss": 0.2652, "step": 78226 }, { "epoch": 0.13870470992811038, "grad_norm": 0.578125, "learning_rate": 0.00035115901646878524, "loss": 0.1913, "step": 78228 }, { "epoch": 0.1387082560934202, "grad_norm": 0.21875, "learning_rate": 0.0003511260026757085, "loss": 0.1429, "step": 78230 }, { "epoch": 0.13871180225873, "grad_norm": 1.0859375, "learning_rate": 0.000351092992157772, "loss": 0.196, "step": 78232 }, { "epoch": 0.13871534842403982, "grad_norm": 0.96484375, "learning_rate": 0.0003510599849151196, "loss": 0.4094, "step": 78234 }, { "epoch": 0.13871889458934963, "grad_norm": 0.47265625, "learning_rate": 0.0003510269809478959, "loss": 0.1762, "step": 78236 }, { "epoch": 0.13872244075465945, "grad_norm": 0.26171875, "learning_rate": 0.00035099398025624506, "loss": 0.1728, "step": 78238 }, { "epoch": 0.13872598691996926, "grad_norm": 0.23828125, "learning_rate": 0.0003509609828403112, "loss": 0.1752, "step": 78240 }, { "epoch": 0.13872953308527908, "grad_norm": 0.345703125, "learning_rate": 0.00035092798870023926, "loss": 0.1357, "step": 78242 }, { "epoch": 0.1387330792505889, "grad_norm": 0.5390625, "learning_rate": 0.0003508949978361732, "loss": 0.2609, "step": 78244 }, { "epoch": 0.1387366254158987, "grad_norm": 0.91796875, "learning_rate": 0.00035086201024825707, "loss": 0.1448, "step": 78246 }, { "epoch": 0.13874017158120852, "grad_norm": 0.36328125, "learning_rate": 0.00035082902593663537, "loss": 0.1555, "step": 78248 }, { "epoch": 0.13874371774651834, "grad_norm": 0.2333984375, "learning_rate": 0.00035079604490145246, "loss": 0.1581, "step": 78250 }, { "epoch": 0.13874726391182815, "grad_norm": 0.66015625, "learning_rate": 0.00035076306714285245, "loss": 0.157, "step": 78252 }, { "epoch": 0.13875081007713796, "grad_norm": 0.4765625, "learning_rate": 0.00035073009266097953, "loss": 0.1321, "step": 78254 }, { "epoch": 0.13875435624244778, "grad_norm": 0.2431640625, "learning_rate": 0.00035069712145597805, "loss": 0.1368, "step": 78256 }, { "epoch": 0.1387579024077576, "grad_norm": 0.451171875, "learning_rate": 0.00035066415352799185, "loss": 0.1977, "step": 78258 }, { "epoch": 0.1387614485730674, "grad_norm": 0.447265625, "learning_rate": 0.0003506311888771658, "loss": 0.1385, "step": 78260 }, { "epoch": 0.13876499473837722, "grad_norm": 0.34375, "learning_rate": 0.00035059822750364324, "loss": 0.1661, "step": 78262 }, { "epoch": 0.13876854090368704, "grad_norm": 0.48828125, "learning_rate": 0.00035056526940756896, "loss": 0.1891, "step": 78264 }, { "epoch": 0.13877208706899685, "grad_norm": 0.26953125, "learning_rate": 0.00035053231458908685, "loss": 0.1548, "step": 78266 }, { "epoch": 0.13877563323430667, "grad_norm": 0.7578125, "learning_rate": 0.00035049936304834106, "loss": 0.1636, "step": 78268 }, { "epoch": 0.13877917939961648, "grad_norm": 0.4453125, "learning_rate": 0.0003504664147854757, "loss": 0.163, "step": 78270 }, { "epoch": 0.1387827255649263, "grad_norm": 0.2021484375, "learning_rate": 0.00035043346980063463, "loss": 0.1575, "step": 78272 }, { "epoch": 0.1387862717302361, "grad_norm": 0.5625, "learning_rate": 0.0003504005280939624, "loss": 0.1503, "step": 78274 }, { "epoch": 0.13878981789554592, "grad_norm": 0.2412109375, "learning_rate": 0.00035036758966560267, "loss": 0.1386, "step": 78276 }, { "epoch": 0.13879336406085574, "grad_norm": 0.390625, "learning_rate": 0.0003503346545156999, "loss": 0.1662, "step": 78278 }, { "epoch": 0.13879691022616555, "grad_norm": 0.515625, "learning_rate": 0.0003503017226443977, "loss": 0.2146, "step": 78280 }, { "epoch": 0.13880045639147537, "grad_norm": 1.0546875, "learning_rate": 0.00035026879405184033, "loss": 0.3662, "step": 78282 }, { "epoch": 0.13880400255678518, "grad_norm": 0.4453125, "learning_rate": 0.0003502358687381718, "loss": 0.2285, "step": 78284 }, { "epoch": 0.138807548722095, "grad_norm": 0.2021484375, "learning_rate": 0.0003502029467035361, "loss": 0.1735, "step": 78286 }, { "epoch": 0.1388110948874048, "grad_norm": 0.291015625, "learning_rate": 0.0003501700279480773, "loss": 0.2512, "step": 78288 }, { "epoch": 0.13881464105271463, "grad_norm": 0.66015625, "learning_rate": 0.00035013711247193887, "loss": 0.2021, "step": 78290 }, { "epoch": 0.13881818721802444, "grad_norm": 0.4921875, "learning_rate": 0.00035010420027526535, "loss": 0.1545, "step": 78292 }, { "epoch": 0.13882173338333426, "grad_norm": 3.3125, "learning_rate": 0.0003500712913582005, "loss": 0.3279, "step": 78294 }, { "epoch": 0.13882527954864407, "grad_norm": 0.40625, "learning_rate": 0.00035003838572088817, "loss": 0.1373, "step": 78296 }, { "epoch": 0.13882882571395389, "grad_norm": 0.298828125, "learning_rate": 0.00035000548336347215, "loss": 0.1407, "step": 78298 }, { "epoch": 0.1388323718792637, "grad_norm": 0.83984375, "learning_rate": 0.0003499725842860967, "loss": 0.1576, "step": 78300 }, { "epoch": 0.13883591804457351, "grad_norm": 0.1982421875, "learning_rate": 0.00034993968848890544, "loss": 0.1582, "step": 78302 }, { "epoch": 0.13883946420988333, "grad_norm": 0.55078125, "learning_rate": 0.00034990679597204244, "loss": 0.1751, "step": 78304 }, { "epoch": 0.13884301037519314, "grad_norm": 0.25390625, "learning_rate": 0.00034987390673565135, "loss": 0.225, "step": 78306 }, { "epoch": 0.13884655654050296, "grad_norm": 0.9296875, "learning_rate": 0.0003498410207798758, "loss": 0.1824, "step": 78308 }, { "epoch": 0.13885010270581277, "grad_norm": 0.6796875, "learning_rate": 0.0003498081381048603, "loss": 0.1312, "step": 78310 }, { "epoch": 0.1388536488711226, "grad_norm": 0.234375, "learning_rate": 0.00034977525871074784, "loss": 0.1274, "step": 78312 }, { "epoch": 0.13885719503643243, "grad_norm": 0.53125, "learning_rate": 0.00034974238259768286, "loss": 0.3854, "step": 78314 }, { "epoch": 0.13886074120174224, "grad_norm": 0.35546875, "learning_rate": 0.00034970950976580863, "loss": 0.2196, "step": 78316 }, { "epoch": 0.13886428736705206, "grad_norm": 2.4375, "learning_rate": 0.0003496766402152697, "loss": 0.2425, "step": 78318 }, { "epoch": 0.13886783353236187, "grad_norm": 1.828125, "learning_rate": 0.0003496437739462088, "loss": 0.3425, "step": 78320 }, { "epoch": 0.1388713796976717, "grad_norm": 2.046875, "learning_rate": 0.00034961091095877037, "loss": 0.3239, "step": 78322 }, { "epoch": 0.1388749258629815, "grad_norm": 0.2470703125, "learning_rate": 0.000349578051253098, "loss": 0.1805, "step": 78324 }, { "epoch": 0.13887847202829132, "grad_norm": 0.671875, "learning_rate": 0.00034954519482933527, "loss": 0.1646, "step": 78326 }, { "epoch": 0.13888201819360113, "grad_norm": 0.431640625, "learning_rate": 0.000349512341687626, "loss": 0.1692, "step": 78328 }, { "epoch": 0.13888556435891095, "grad_norm": 0.2109375, "learning_rate": 0.00034947949182811364, "loss": 0.175, "step": 78330 }, { "epoch": 0.13888911052422076, "grad_norm": 0.375, "learning_rate": 0.00034944664525094214, "loss": 0.1618, "step": 78332 }, { "epoch": 0.13889265668953057, "grad_norm": 0.474609375, "learning_rate": 0.00034941380195625506, "loss": 0.1806, "step": 78334 }, { "epoch": 0.1388962028548404, "grad_norm": 0.43359375, "learning_rate": 0.0003493809619441961, "loss": 0.1872, "step": 78336 }, { "epoch": 0.1388997490201502, "grad_norm": 0.8984375, "learning_rate": 0.00034934812521490846, "loss": 0.1785, "step": 78338 }, { "epoch": 0.13890329518546002, "grad_norm": 0.48046875, "learning_rate": 0.0003493152917685364, "loss": 0.1938, "step": 78340 }, { "epoch": 0.13890684135076983, "grad_norm": 1.390625, "learning_rate": 0.00034928246160522325, "loss": 0.2259, "step": 78342 }, { "epoch": 0.13891038751607965, "grad_norm": 0.283203125, "learning_rate": 0.0003492496347251124, "loss": 0.1521, "step": 78344 }, { "epoch": 0.13891393368138946, "grad_norm": 1.0546875, "learning_rate": 0.00034921681112834766, "loss": 0.1071, "step": 78346 }, { "epoch": 0.13891747984669928, "grad_norm": 0.5625, "learning_rate": 0.0003491839908150722, "loss": 0.3153, "step": 78348 }, { "epoch": 0.1389210260120091, "grad_norm": 2.046875, "learning_rate": 0.0003491511737854304, "loss": 0.2275, "step": 78350 }, { "epoch": 0.1389245721773189, "grad_norm": 0.5078125, "learning_rate": 0.00034911836003956466, "loss": 0.2526, "step": 78352 }, { "epoch": 0.13892811834262872, "grad_norm": 0.337890625, "learning_rate": 0.00034908554957761926, "loss": 0.1483, "step": 78354 }, { "epoch": 0.13893166450793853, "grad_norm": 0.3515625, "learning_rate": 0.00034905274239973737, "loss": 0.1519, "step": 78356 }, { "epoch": 0.13893521067324835, "grad_norm": 0.32421875, "learning_rate": 0.00034901993850606296, "loss": 0.1468, "step": 78358 }, { "epoch": 0.13893875683855816, "grad_norm": 1.1171875, "learning_rate": 0.00034898713789673866, "loss": 0.3459, "step": 78360 }, { "epoch": 0.13894230300386798, "grad_norm": 0.39453125, "learning_rate": 0.0003489543405719086, "loss": 0.1902, "step": 78362 }, { "epoch": 0.1389458491691778, "grad_norm": 0.52734375, "learning_rate": 0.000348921546531716, "loss": 0.1962, "step": 78364 }, { "epoch": 0.1389493953344876, "grad_norm": 0.44140625, "learning_rate": 0.00034888875577630406, "loss": 0.1701, "step": 78366 }, { "epoch": 0.13895294149979742, "grad_norm": 0.33203125, "learning_rate": 0.0003488559683058168, "loss": 0.1758, "step": 78368 }, { "epoch": 0.13895648766510724, "grad_norm": 0.25390625, "learning_rate": 0.00034882318412039694, "loss": 0.2109, "step": 78370 }, { "epoch": 0.13896003383041705, "grad_norm": 0.263671875, "learning_rate": 0.00034879040322018817, "loss": 0.1741, "step": 78372 }, { "epoch": 0.13896357999572687, "grad_norm": 0.33984375, "learning_rate": 0.00034875762560533365, "loss": 0.1621, "step": 78374 }, { "epoch": 0.13896712616103668, "grad_norm": 0.283203125, "learning_rate": 0.0003487248512759774, "loss": 0.2261, "step": 78376 }, { "epoch": 0.1389706723263465, "grad_norm": 0.55078125, "learning_rate": 0.00034869208023226186, "loss": 0.1312, "step": 78378 }, { "epoch": 0.1389742184916563, "grad_norm": 2.3125, "learning_rate": 0.00034865931247433094, "loss": 0.2572, "step": 78380 }, { "epoch": 0.13897776465696612, "grad_norm": 0.65625, "learning_rate": 0.00034862654800232764, "loss": 0.1338, "step": 78382 }, { "epoch": 0.13898131082227594, "grad_norm": 0.73828125, "learning_rate": 0.0003485937868163954, "loss": 0.2004, "step": 78384 }, { "epoch": 0.13898485698758575, "grad_norm": 0.2216796875, "learning_rate": 0.00034856102891667764, "loss": 0.156, "step": 78386 }, { "epoch": 0.13898840315289557, "grad_norm": 0.392578125, "learning_rate": 0.000348528274303317, "loss": 0.1779, "step": 78388 }, { "epoch": 0.13899194931820538, "grad_norm": 0.6640625, "learning_rate": 0.0003484955229764576, "loss": 0.1904, "step": 78390 }, { "epoch": 0.1389954954835152, "grad_norm": 1.546875, "learning_rate": 0.00034846277493624214, "loss": 0.3167, "step": 78392 }, { "epoch": 0.138999041648825, "grad_norm": 0.287109375, "learning_rate": 0.0003484300301828139, "loss": 0.156, "step": 78394 }, { "epoch": 0.13900258781413483, "grad_norm": 0.310546875, "learning_rate": 0.00034839728871631606, "loss": 0.1691, "step": 78396 }, { "epoch": 0.13900613397944464, "grad_norm": 3.5, "learning_rate": 0.00034836455053689205, "loss": 0.2914, "step": 78398 }, { "epoch": 0.13900968014475446, "grad_norm": 0.7890625, "learning_rate": 0.000348331815644685, "loss": 0.174, "step": 78400 }, { "epoch": 0.13901322631006427, "grad_norm": 0.47265625, "learning_rate": 0.00034829908403983775, "loss": 0.1605, "step": 78402 }, { "epoch": 0.1390167724753741, "grad_norm": 0.4921875, "learning_rate": 0.00034826635572249374, "loss": 0.2855, "step": 78404 }, { "epoch": 0.13902031864068393, "grad_norm": 0.26953125, "learning_rate": 0.0003482336306927959, "loss": 0.1392, "step": 78406 }, { "epoch": 0.13902386480599374, "grad_norm": 0.396484375, "learning_rate": 0.000348200908950888, "loss": 0.1924, "step": 78408 }, { "epoch": 0.13902741097130356, "grad_norm": 0.8125, "learning_rate": 0.00034816819049691207, "loss": 0.1594, "step": 78410 }, { "epoch": 0.13903095713661337, "grad_norm": 0.36328125, "learning_rate": 0.0003481354753310119, "loss": 0.1262, "step": 78412 }, { "epoch": 0.13903450330192318, "grad_norm": 0.1689453125, "learning_rate": 0.0003481027634533303, "loss": 0.1568, "step": 78414 }, { "epoch": 0.139038049467233, "grad_norm": 2.328125, "learning_rate": 0.00034807005486401087, "loss": 0.2676, "step": 78416 }, { "epoch": 0.1390415956325428, "grad_norm": 0.34375, "learning_rate": 0.00034803734956319576, "loss": 0.1653, "step": 78418 }, { "epoch": 0.13904514179785263, "grad_norm": 0.3125, "learning_rate": 0.00034800464755102876, "loss": 0.1514, "step": 78420 }, { "epoch": 0.13904868796316244, "grad_norm": 1.5234375, "learning_rate": 0.0003479719488276527, "loss": 0.2169, "step": 78422 }, { "epoch": 0.13905223412847226, "grad_norm": 0.53125, "learning_rate": 0.0003479392533932105, "loss": 0.1727, "step": 78424 }, { "epoch": 0.13905578029378207, "grad_norm": 0.314453125, "learning_rate": 0.00034790656124784506, "loss": 0.2021, "step": 78426 }, { "epoch": 0.1390593264590919, "grad_norm": 0.2890625, "learning_rate": 0.0003478738723916992, "loss": 0.1591, "step": 78428 }, { "epoch": 0.1390628726244017, "grad_norm": 0.87890625, "learning_rate": 0.0003478411868249165, "loss": 0.1703, "step": 78430 }, { "epoch": 0.13906641878971152, "grad_norm": 0.421875, "learning_rate": 0.00034780850454763947, "loss": 0.1736, "step": 78432 }, { "epoch": 0.13906996495502133, "grad_norm": 0.251953125, "learning_rate": 0.00034777582556001107, "loss": 0.1631, "step": 78434 }, { "epoch": 0.13907351112033114, "grad_norm": 0.58984375, "learning_rate": 0.000347743149862174, "loss": 0.1987, "step": 78436 }, { "epoch": 0.13907705728564096, "grad_norm": 0.2197265625, "learning_rate": 0.00034771047745427175, "loss": 0.1524, "step": 78438 }, { "epoch": 0.13908060345095077, "grad_norm": 0.5390625, "learning_rate": 0.0003476778083364469, "loss": 0.1312, "step": 78440 }, { "epoch": 0.1390841496162606, "grad_norm": 0.69921875, "learning_rate": 0.0003476451425088424, "loss": 0.2004, "step": 78442 }, { "epoch": 0.1390876957815704, "grad_norm": 0.59375, "learning_rate": 0.0003476124799716009, "loss": 0.1855, "step": 78444 }, { "epoch": 0.13909124194688022, "grad_norm": 0.8984375, "learning_rate": 0.00034757982072486517, "loss": 0.196, "step": 78446 }, { "epoch": 0.13909478811219003, "grad_norm": 0.71875, "learning_rate": 0.00034754716476877845, "loss": 0.1992, "step": 78448 }, { "epoch": 0.13909833427749985, "grad_norm": 0.51953125, "learning_rate": 0.0003475145121034834, "loss": 0.2026, "step": 78450 }, { "epoch": 0.13910188044280966, "grad_norm": 0.6015625, "learning_rate": 0.00034748186272912275, "loss": 0.1251, "step": 78452 }, { "epoch": 0.13910542660811948, "grad_norm": 2.671875, "learning_rate": 0.00034744921664583934, "loss": 0.2499, "step": 78454 }, { "epoch": 0.1391089727734293, "grad_norm": 0.47265625, "learning_rate": 0.00034741657385377566, "loss": 0.1766, "step": 78456 }, { "epoch": 0.1391125189387391, "grad_norm": 0.69921875, "learning_rate": 0.00034738393435307526, "loss": 0.1885, "step": 78458 }, { "epoch": 0.13911606510404892, "grad_norm": 1.3359375, "learning_rate": 0.0003473512981438798, "loss": 0.2298, "step": 78460 }, { "epoch": 0.13911961126935873, "grad_norm": 0.13671875, "learning_rate": 0.00034731866522633284, "loss": 0.1805, "step": 78462 }, { "epoch": 0.13912315743466855, "grad_norm": 1.234375, "learning_rate": 0.0003472860356005766, "loss": 0.191, "step": 78464 }, { "epoch": 0.13912670359997836, "grad_norm": 0.8046875, "learning_rate": 0.0003472534092667545, "loss": 0.1711, "step": 78466 }, { "epoch": 0.13913024976528818, "grad_norm": 0.328125, "learning_rate": 0.0003472207862250083, "loss": 0.2117, "step": 78468 }, { "epoch": 0.139133795930598, "grad_norm": 0.17578125, "learning_rate": 0.00034718816647548125, "loss": 0.1393, "step": 78470 }, { "epoch": 0.1391373420959078, "grad_norm": 0.74609375, "learning_rate": 0.00034715555001831586, "loss": 0.1764, "step": 78472 }, { "epoch": 0.13914088826121762, "grad_norm": 0.2451171875, "learning_rate": 0.000347122936853655, "loss": 0.2918, "step": 78474 }, { "epoch": 0.13914443442652744, "grad_norm": 2.5625, "learning_rate": 0.0003470903269816409, "loss": 0.3206, "step": 78476 }, { "epoch": 0.13914798059183725, "grad_norm": 0.63671875, "learning_rate": 0.0003470577204024162, "loss": 0.4584, "step": 78478 }, { "epoch": 0.13915152675714706, "grad_norm": 0.365234375, "learning_rate": 0.00034702511711612387, "loss": 0.2557, "step": 78480 }, { "epoch": 0.13915507292245688, "grad_norm": 0.283203125, "learning_rate": 0.00034699251712290636, "loss": 0.2094, "step": 78482 }, { "epoch": 0.1391586190877667, "grad_norm": 0.53515625, "learning_rate": 0.0003469599204229061, "loss": 0.1616, "step": 78484 }, { "epoch": 0.1391621652530765, "grad_norm": 0.2294921875, "learning_rate": 0.0003469273270162656, "loss": 0.1499, "step": 78486 }, { "epoch": 0.13916571141838632, "grad_norm": 0.212890625, "learning_rate": 0.00034689473690312765, "loss": 0.3678, "step": 78488 }, { "epoch": 0.13916925758369614, "grad_norm": 0.2138671875, "learning_rate": 0.00034686215008363473, "loss": 0.183, "step": 78490 }, { "epoch": 0.13917280374900595, "grad_norm": 0.78515625, "learning_rate": 0.0003468295665579293, "loss": 0.1967, "step": 78492 }, { "epoch": 0.1391763499143158, "grad_norm": 0.4453125, "learning_rate": 0.00034679698632615385, "loss": 0.1712, "step": 78494 }, { "epoch": 0.1391798960796256, "grad_norm": 0.515625, "learning_rate": 0.0003467644093884506, "loss": 0.1543, "step": 78496 }, { "epoch": 0.13918344224493542, "grad_norm": 2.09375, "learning_rate": 0.0003467318357449628, "loss": 0.175, "step": 78498 }, { "epoch": 0.13918698841024524, "grad_norm": 0.3125, "learning_rate": 0.00034669926539583195, "loss": 0.1885, "step": 78500 }, { "epoch": 0.13919053457555505, "grad_norm": 1.40625, "learning_rate": 0.00034666669834120114, "loss": 0.2577, "step": 78502 }, { "epoch": 0.13919408074086487, "grad_norm": 0.484375, "learning_rate": 0.0003466341345812124, "loss": 0.1627, "step": 78504 }, { "epoch": 0.13919762690617468, "grad_norm": 1.78125, "learning_rate": 0.00034660157411600873, "loss": 0.2463, "step": 78506 }, { "epoch": 0.1392011730714845, "grad_norm": 0.361328125, "learning_rate": 0.00034656901694573173, "loss": 0.1645, "step": 78508 }, { "epoch": 0.1392047192367943, "grad_norm": 0.35546875, "learning_rate": 0.0003465364630705245, "loss": 0.1765, "step": 78510 }, { "epoch": 0.13920826540210413, "grad_norm": 0.263671875, "learning_rate": 0.00034650391249052906, "loss": 0.1418, "step": 78512 }, { "epoch": 0.13921181156741394, "grad_norm": 0.21484375, "learning_rate": 0.00034647136520588777, "loss": 0.1632, "step": 78514 }, { "epoch": 0.13921535773272375, "grad_norm": 0.2578125, "learning_rate": 0.0003464388212167431, "loss": 0.187, "step": 78516 }, { "epoch": 0.13921890389803357, "grad_norm": 2.1875, "learning_rate": 0.0003464062805232371, "loss": 0.2601, "step": 78518 }, { "epoch": 0.13922245006334338, "grad_norm": 0.46875, "learning_rate": 0.0003463737431255124, "loss": 0.2151, "step": 78520 }, { "epoch": 0.1392259962286532, "grad_norm": 0.48828125, "learning_rate": 0.0003463412090237114, "loss": 0.1729, "step": 78522 }, { "epoch": 0.139229542393963, "grad_norm": 3.5625, "learning_rate": 0.00034630867821797595, "loss": 0.1889, "step": 78524 }, { "epoch": 0.13923308855927283, "grad_norm": 0.48828125, "learning_rate": 0.00034627615070844845, "loss": 0.1682, "step": 78526 }, { "epoch": 0.13923663472458264, "grad_norm": 1.4921875, "learning_rate": 0.0003462436264952714, "loss": 0.2028, "step": 78528 }, { "epoch": 0.13924018088989246, "grad_norm": 0.61328125, "learning_rate": 0.0003462111055785869, "loss": 0.1943, "step": 78530 }, { "epoch": 0.13924372705520227, "grad_norm": 0.314453125, "learning_rate": 0.0003461785879585373, "loss": 0.1634, "step": 78532 }, { "epoch": 0.13924727322051209, "grad_norm": 1.265625, "learning_rate": 0.00034614607363526453, "loss": 0.2239, "step": 78534 }, { "epoch": 0.1392508193858219, "grad_norm": 0.56640625, "learning_rate": 0.0003461135626089108, "loss": 0.1861, "step": 78536 }, { "epoch": 0.13925436555113171, "grad_norm": 0.2275390625, "learning_rate": 0.0003460810548796186, "loss": 0.1446, "step": 78538 }, { "epoch": 0.13925791171644153, "grad_norm": 1.9765625, "learning_rate": 0.00034604855044753, "loss": 0.2459, "step": 78540 }, { "epoch": 0.13926145788175134, "grad_norm": 0.28125, "learning_rate": 0.00034601604931278713, "loss": 0.2055, "step": 78542 }, { "epoch": 0.13926500404706116, "grad_norm": 0.408203125, "learning_rate": 0.0003459835514755318, "loss": 0.1499, "step": 78544 }, { "epoch": 0.13926855021237097, "grad_norm": 0.2578125, "learning_rate": 0.00034595105693590667, "loss": 0.2282, "step": 78546 }, { "epoch": 0.1392720963776808, "grad_norm": 1.53125, "learning_rate": 0.0003459185656940537, "loss": 0.3397, "step": 78548 }, { "epoch": 0.1392756425429906, "grad_norm": 1.453125, "learning_rate": 0.00034588607775011467, "loss": 0.1669, "step": 78550 }, { "epoch": 0.13927918870830042, "grad_norm": 1.5625, "learning_rate": 0.00034585359310423204, "loss": 0.1855, "step": 78552 }, { "epoch": 0.13928273487361023, "grad_norm": 0.6171875, "learning_rate": 0.0003458211117565475, "loss": 0.1477, "step": 78554 }, { "epoch": 0.13928628103892005, "grad_norm": 1.2890625, "learning_rate": 0.0003457886337072038, "loss": 0.3678, "step": 78556 }, { "epoch": 0.13928982720422986, "grad_norm": 0.33984375, "learning_rate": 0.00034575615895634203, "loss": 0.1726, "step": 78558 }, { "epoch": 0.13929337336953967, "grad_norm": 0.69140625, "learning_rate": 0.000345723687504105, "loss": 0.2276, "step": 78560 }, { "epoch": 0.1392969195348495, "grad_norm": 1.328125, "learning_rate": 0.0003456912193506342, "loss": 0.1713, "step": 78562 }, { "epoch": 0.1393004657001593, "grad_norm": 0.44921875, "learning_rate": 0.0003456587544960722, "loss": 0.1421, "step": 78564 }, { "epoch": 0.13930401186546912, "grad_norm": 0.703125, "learning_rate": 0.00034562629294056035, "loss": 0.1636, "step": 78566 }, { "epoch": 0.13930755803077893, "grad_norm": 1.375, "learning_rate": 0.000345593834684241, "loss": 0.3496, "step": 78568 }, { "epoch": 0.13931110419608875, "grad_norm": 0.427734375, "learning_rate": 0.00034556137972725597, "loss": 0.1603, "step": 78570 }, { "epoch": 0.13931465036139856, "grad_norm": 0.36328125, "learning_rate": 0.00034552892806974736, "loss": 0.2687, "step": 78572 }, { "epoch": 0.13931819652670838, "grad_norm": 0.41796875, "learning_rate": 0.0003454964797118569, "loss": 0.1449, "step": 78574 }, { "epoch": 0.1393217426920182, "grad_norm": 0.197265625, "learning_rate": 0.0003454640346537263, "loss": 0.1646, "step": 78576 }, { "epoch": 0.139325288857328, "grad_norm": 0.54296875, "learning_rate": 0.00034543159289549804, "loss": 0.1884, "step": 78578 }, { "epoch": 0.13932883502263782, "grad_norm": 0.412109375, "learning_rate": 0.0003453991544373136, "loss": 0.1523, "step": 78580 }, { "epoch": 0.13933238118794763, "grad_norm": 0.466796875, "learning_rate": 0.00034536671927931504, "loss": 0.135, "step": 78582 }, { "epoch": 0.13933592735325745, "grad_norm": 0.1845703125, "learning_rate": 0.00034533428742164376, "loss": 0.1418, "step": 78584 }, { "epoch": 0.1393394735185673, "grad_norm": 0.1982421875, "learning_rate": 0.0003453018588644422, "loss": 0.1822, "step": 78586 }, { "epoch": 0.1393430196838771, "grad_norm": 0.39453125, "learning_rate": 0.00034526943360785203, "loss": 0.14, "step": 78588 }, { "epoch": 0.13934656584918692, "grad_norm": 0.19140625, "learning_rate": 0.00034523701165201494, "loss": 0.1991, "step": 78590 }, { "epoch": 0.13935011201449674, "grad_norm": 0.51171875, "learning_rate": 0.0003452045929970726, "loss": 0.1791, "step": 78592 }, { "epoch": 0.13935365817980655, "grad_norm": 0.546875, "learning_rate": 0.00034517217764316676, "loss": 0.1687, "step": 78594 }, { "epoch": 0.13935720434511636, "grad_norm": 0.66796875, "learning_rate": 0.00034513976559043995, "loss": 0.2056, "step": 78596 }, { "epoch": 0.13936075051042618, "grad_norm": 0.58203125, "learning_rate": 0.0003451073568390327, "loss": 0.1789, "step": 78598 }, { "epoch": 0.139364296675736, "grad_norm": 0.412109375, "learning_rate": 0.00034507495138908775, "loss": 0.2255, "step": 78600 }, { "epoch": 0.1393678428410458, "grad_norm": 0.376953125, "learning_rate": 0.0003450425492407462, "loss": 0.1624, "step": 78602 }, { "epoch": 0.13937138900635562, "grad_norm": 0.8515625, "learning_rate": 0.0003450101503941504, "loss": 0.189, "step": 78604 }, { "epoch": 0.13937493517166544, "grad_norm": 0.3515625, "learning_rate": 0.0003449777548494412, "loss": 0.1975, "step": 78606 }, { "epoch": 0.13937848133697525, "grad_norm": 0.236328125, "learning_rate": 0.00034494536260676095, "loss": 0.1864, "step": 78608 }, { "epoch": 0.13938202750228507, "grad_norm": 0.2353515625, "learning_rate": 0.0003449129736662512, "loss": 0.1102, "step": 78610 }, { "epoch": 0.13938557366759488, "grad_norm": 0.5234375, "learning_rate": 0.0003448805880280531, "loss": 0.2151, "step": 78612 }, { "epoch": 0.1393891198329047, "grad_norm": 0.69921875, "learning_rate": 0.00034484820569230924, "loss": 0.1878, "step": 78614 }, { "epoch": 0.1393926659982145, "grad_norm": 1.078125, "learning_rate": 0.00034481582665916023, "loss": 0.2644, "step": 78616 }, { "epoch": 0.13939621216352432, "grad_norm": 0.59765625, "learning_rate": 0.0003447834509287483, "loss": 0.1824, "step": 78618 }, { "epoch": 0.13939975832883414, "grad_norm": 0.86328125, "learning_rate": 0.0003447510785012149, "loss": 0.168, "step": 78620 }, { "epoch": 0.13940330449414395, "grad_norm": 0.2412109375, "learning_rate": 0.00034471870937670163, "loss": 0.1771, "step": 78622 }, { "epoch": 0.13940685065945377, "grad_norm": 0.384765625, "learning_rate": 0.00034468634355535, "loss": 0.1448, "step": 78624 }, { "epoch": 0.13941039682476358, "grad_norm": 0.166015625, "learning_rate": 0.00034465398103730136, "loss": 0.1487, "step": 78626 }, { "epoch": 0.1394139429900734, "grad_norm": 0.341796875, "learning_rate": 0.00034462162182269753, "loss": 0.1745, "step": 78628 }, { "epoch": 0.1394174891553832, "grad_norm": 0.5859375, "learning_rate": 0.00034458926591168006, "loss": 0.1456, "step": 78630 }, { "epoch": 0.13942103532069303, "grad_norm": 0.400390625, "learning_rate": 0.0003445569133043903, "loss": 0.1829, "step": 78632 }, { "epoch": 0.13942458148600284, "grad_norm": 0.89453125, "learning_rate": 0.00034452456400096965, "loss": 0.1655, "step": 78634 }, { "epoch": 0.13942812765131266, "grad_norm": 0.30078125, "learning_rate": 0.0003444922180015599, "loss": 0.1847, "step": 78636 }, { "epoch": 0.13943167381662247, "grad_norm": 1.1640625, "learning_rate": 0.0003444598753063023, "loss": 0.1539, "step": 78638 }, { "epoch": 0.13943521998193228, "grad_norm": 0.484375, "learning_rate": 0.0003444275359153384, "loss": 0.1871, "step": 78640 }, { "epoch": 0.1394387661472421, "grad_norm": 0.42578125, "learning_rate": 0.00034439519982880953, "loss": 0.1953, "step": 78642 }, { "epoch": 0.1394423123125519, "grad_norm": 0.470703125, "learning_rate": 0.0003443628670468569, "loss": 0.1747, "step": 78644 }, { "epoch": 0.13944585847786173, "grad_norm": 0.2216796875, "learning_rate": 0.0003443305375696227, "loss": 0.1284, "step": 78646 }, { "epoch": 0.13944940464317154, "grad_norm": 0.2138671875, "learning_rate": 0.0003442982113972473, "loss": 0.3218, "step": 78648 }, { "epoch": 0.13945295080848136, "grad_norm": 0.189453125, "learning_rate": 0.00034426588852987274, "loss": 0.1791, "step": 78650 }, { "epoch": 0.13945649697379117, "grad_norm": 0.65625, "learning_rate": 0.00034423356896764003, "loss": 0.2822, "step": 78652 }, { "epoch": 0.139460043139101, "grad_norm": 1.1171875, "learning_rate": 0.000344201252710691, "loss": 0.1566, "step": 78654 }, { "epoch": 0.1394635893044108, "grad_norm": 1.671875, "learning_rate": 0.00034416893975916643, "loss": 0.1959, "step": 78656 }, { "epoch": 0.13946713546972062, "grad_norm": 0.439453125, "learning_rate": 0.00034413663011320796, "loss": 0.1874, "step": 78658 }, { "epoch": 0.13947068163503043, "grad_norm": 0.59375, "learning_rate": 0.0003441043237729569, "loss": 0.174, "step": 78660 }, { "epoch": 0.13947422780034024, "grad_norm": 0.388671875, "learning_rate": 0.0003440720207385544, "loss": 0.1284, "step": 78662 }, { "epoch": 0.13947777396565006, "grad_norm": 0.2392578125, "learning_rate": 0.0003440397210101418, "loss": 0.1348, "step": 78664 }, { "epoch": 0.13948132013095987, "grad_norm": 0.5625, "learning_rate": 0.00034400742458786006, "loss": 0.1938, "step": 78666 }, { "epoch": 0.1394848662962697, "grad_norm": 0.318359375, "learning_rate": 0.000343975131471851, "loss": 0.1839, "step": 78668 }, { "epoch": 0.1394884124615795, "grad_norm": 0.158203125, "learning_rate": 0.00034394284166225554, "loss": 0.2358, "step": 78670 }, { "epoch": 0.13949195862688932, "grad_norm": 0.419921875, "learning_rate": 0.000343910555159215, "loss": 0.2168, "step": 78672 }, { "epoch": 0.13949550479219913, "grad_norm": 0.412109375, "learning_rate": 0.00034387827196287013, "loss": 0.1748, "step": 78674 }, { "epoch": 0.13949905095750897, "grad_norm": 7.9375, "learning_rate": 0.00034384599207336276, "loss": 0.3019, "step": 78676 }, { "epoch": 0.1395025971228188, "grad_norm": 0.4140625, "learning_rate": 0.0003438137154908339, "loss": 0.1632, "step": 78678 }, { "epoch": 0.1395061432881286, "grad_norm": 0.9375, "learning_rate": 0.0003437814422154245, "loss": 0.2636, "step": 78680 }, { "epoch": 0.13950968945343842, "grad_norm": 0.5234375, "learning_rate": 0.00034374917224727587, "loss": 0.1724, "step": 78682 }, { "epoch": 0.13951323561874823, "grad_norm": 0.451171875, "learning_rate": 0.0003437169055865289, "loss": 0.153, "step": 78684 }, { "epoch": 0.13951678178405805, "grad_norm": 0.1630859375, "learning_rate": 0.0003436846422333252, "loss": 0.1574, "step": 78686 }, { "epoch": 0.13952032794936786, "grad_norm": 0.412109375, "learning_rate": 0.0003436523821878051, "loss": 0.1693, "step": 78688 }, { "epoch": 0.13952387411467768, "grad_norm": 0.4609375, "learning_rate": 0.00034362012545011047, "loss": 0.1762, "step": 78690 }, { "epoch": 0.1395274202799875, "grad_norm": 0.78125, "learning_rate": 0.00034358787202038186, "loss": 0.1683, "step": 78692 }, { "epoch": 0.1395309664452973, "grad_norm": 0.35546875, "learning_rate": 0.00034355562189876084, "loss": 0.1383, "step": 78694 }, { "epoch": 0.13953451261060712, "grad_norm": 0.271484375, "learning_rate": 0.00034352337508538775, "loss": 0.1311, "step": 78696 }, { "epoch": 0.13953805877591693, "grad_norm": 6.1875, "learning_rate": 0.0003434911315804042, "loss": 0.1375, "step": 78698 }, { "epoch": 0.13954160494122675, "grad_norm": 0.546875, "learning_rate": 0.000343458891383951, "loss": 0.2195, "step": 78700 }, { "epoch": 0.13954515110653656, "grad_norm": 0.365234375, "learning_rate": 0.00034342665449616904, "loss": 0.1378, "step": 78702 }, { "epoch": 0.13954869727184638, "grad_norm": 0.455078125, "learning_rate": 0.00034339442091719976, "loss": 0.1647, "step": 78704 }, { "epoch": 0.1395522434371562, "grad_norm": 1.859375, "learning_rate": 0.0003433621906471834, "loss": 0.2461, "step": 78706 }, { "epoch": 0.139555789602466, "grad_norm": 0.88671875, "learning_rate": 0.00034332996368626156, "loss": 0.2701, "step": 78708 }, { "epoch": 0.13955933576777582, "grad_norm": 0.67578125, "learning_rate": 0.0003432977400345747, "loss": 0.1582, "step": 78710 }, { "epoch": 0.13956288193308564, "grad_norm": 0.390625, "learning_rate": 0.00034326551969226436, "loss": 0.1952, "step": 78712 }, { "epoch": 0.13956642809839545, "grad_norm": 1.90625, "learning_rate": 0.0003432333026594706, "loss": 0.1751, "step": 78714 }, { "epoch": 0.13956997426370527, "grad_norm": 0.3671875, "learning_rate": 0.00034320108893633514, "loss": 0.1761, "step": 78716 }, { "epoch": 0.13957352042901508, "grad_norm": 0.515625, "learning_rate": 0.00034316887852299836, "loss": 0.1506, "step": 78718 }, { "epoch": 0.1395770665943249, "grad_norm": 0.1640625, "learning_rate": 0.00034313667141960136, "loss": 0.204, "step": 78720 }, { "epoch": 0.1395806127596347, "grad_norm": 0.294921875, "learning_rate": 0.000343104467626285, "loss": 0.2203, "step": 78722 }, { "epoch": 0.13958415892494452, "grad_norm": 0.37109375, "learning_rate": 0.0003430722671431897, "loss": 0.1922, "step": 78724 }, { "epoch": 0.13958770509025434, "grad_norm": 0.50390625, "learning_rate": 0.000343040069970457, "loss": 0.1533, "step": 78726 }, { "epoch": 0.13959125125556415, "grad_norm": 0.29296875, "learning_rate": 0.0003430078761082272, "loss": 0.145, "step": 78728 }, { "epoch": 0.13959479742087397, "grad_norm": 0.255859375, "learning_rate": 0.00034297568555664127, "loss": 0.1231, "step": 78730 }, { "epoch": 0.13959834358618378, "grad_norm": 0.310546875, "learning_rate": 0.0003429434983158397, "loss": 0.2068, "step": 78732 }, { "epoch": 0.1396018897514936, "grad_norm": 0.671875, "learning_rate": 0.00034291131438596393, "loss": 0.1644, "step": 78734 }, { "epoch": 0.1396054359168034, "grad_norm": 0.447265625, "learning_rate": 0.0003428791337671541, "loss": 0.1054, "step": 78736 }, { "epoch": 0.13960898208211323, "grad_norm": 0.359375, "learning_rate": 0.0003428469564595511, "loss": 0.1985, "step": 78738 }, { "epoch": 0.13961252824742304, "grad_norm": 0.1806640625, "learning_rate": 0.0003428147824632959, "loss": 0.1392, "step": 78740 }, { "epoch": 0.13961607441273285, "grad_norm": 0.9921875, "learning_rate": 0.0003427826117785287, "loss": 0.3621, "step": 78742 }, { "epoch": 0.13961962057804267, "grad_norm": 0.671875, "learning_rate": 0.0003427504444053909, "loss": 0.2231, "step": 78744 }, { "epoch": 0.13962316674335248, "grad_norm": 0.57421875, "learning_rate": 0.0003427182803440224, "loss": 0.2698, "step": 78746 }, { "epoch": 0.1396267129086623, "grad_norm": 6.6875, "learning_rate": 0.0003426861195945646, "loss": 0.2491, "step": 78748 }, { "epoch": 0.1396302590739721, "grad_norm": 0.59375, "learning_rate": 0.0003426539621571575, "loss": 0.1981, "step": 78750 }, { "epoch": 0.13963380523928193, "grad_norm": 0.330078125, "learning_rate": 0.00034262180803194245, "loss": 0.1703, "step": 78752 }, { "epoch": 0.13963735140459174, "grad_norm": 0.369140625, "learning_rate": 0.00034258965721905925, "loss": 0.3917, "step": 78754 }, { "epoch": 0.13964089756990156, "grad_norm": 0.341796875, "learning_rate": 0.0003425575097186492, "loss": 0.1576, "step": 78756 }, { "epoch": 0.13964444373521137, "grad_norm": 0.17578125, "learning_rate": 0.00034252536553085254, "loss": 0.3372, "step": 78758 }, { "epoch": 0.13964798990052119, "grad_norm": 0.546875, "learning_rate": 0.00034249322465581, "loss": 0.1838, "step": 78760 }, { "epoch": 0.139651536065831, "grad_norm": 0.34765625, "learning_rate": 0.0003424610870936621, "loss": 0.1927, "step": 78762 }, { "epoch": 0.13965508223114081, "grad_norm": 0.19921875, "learning_rate": 0.0003424289528445491, "loss": 0.1667, "step": 78764 }, { "epoch": 0.13965862839645066, "grad_norm": 0.7890625, "learning_rate": 0.0003423968219086121, "loss": 0.1668, "step": 78766 }, { "epoch": 0.13966217456176047, "grad_norm": 0.80078125, "learning_rate": 0.0003423646942859913, "loss": 0.122, "step": 78768 }, { "epoch": 0.13966572072707029, "grad_norm": 0.240234375, "learning_rate": 0.00034233256997682717, "loss": 0.1768, "step": 78770 }, { "epoch": 0.1396692668923801, "grad_norm": 0.5078125, "learning_rate": 0.0003423004489812602, "loss": 0.2722, "step": 78772 }, { "epoch": 0.13967281305768992, "grad_norm": 0.474609375, "learning_rate": 0.00034226833129943104, "loss": 0.1712, "step": 78774 }, { "epoch": 0.13967635922299973, "grad_norm": 0.216796875, "learning_rate": 0.0003422362169314801, "loss": 0.161, "step": 78776 }, { "epoch": 0.13967990538830954, "grad_norm": 0.2099609375, "learning_rate": 0.0003422041058775477, "loss": 0.1611, "step": 78778 }, { "epoch": 0.13968345155361936, "grad_norm": 0.373046875, "learning_rate": 0.0003421719981377745, "loss": 0.1207, "step": 78780 }, { "epoch": 0.13968699771892917, "grad_norm": 0.458984375, "learning_rate": 0.0003421398937123005, "loss": 0.2155, "step": 78782 }, { "epoch": 0.139690543884239, "grad_norm": 0.419921875, "learning_rate": 0.00034210779260126654, "loss": 0.1506, "step": 78784 }, { "epoch": 0.1396940900495488, "grad_norm": 0.703125, "learning_rate": 0.00034207569480481303, "loss": 0.3172, "step": 78786 }, { "epoch": 0.13969763621485862, "grad_norm": 0.25390625, "learning_rate": 0.0003420436003230801, "loss": 0.1676, "step": 78788 }, { "epoch": 0.13970118238016843, "grad_norm": 0.251953125, "learning_rate": 0.0003420115091562081, "loss": 0.152, "step": 78790 }, { "epoch": 0.13970472854547825, "grad_norm": 0.435546875, "learning_rate": 0.00034197942130433735, "loss": 0.2175, "step": 78792 }, { "epoch": 0.13970827471078806, "grad_norm": 0.490234375, "learning_rate": 0.00034194733676760837, "loss": 0.2261, "step": 78794 }, { "epoch": 0.13971182087609788, "grad_norm": 0.33984375, "learning_rate": 0.00034191525554616163, "loss": 0.4396, "step": 78796 }, { "epoch": 0.1397153670414077, "grad_norm": 0.546875, "learning_rate": 0.0003418831776401371, "loss": 0.1433, "step": 78798 }, { "epoch": 0.1397189132067175, "grad_norm": 0.234375, "learning_rate": 0.0003418511030496749, "loss": 0.1462, "step": 78800 }, { "epoch": 0.13972245937202732, "grad_norm": 0.7109375, "learning_rate": 0.00034181903177491613, "loss": 0.1654, "step": 78802 }, { "epoch": 0.13972600553733713, "grad_norm": 0.3203125, "learning_rate": 0.00034178696381600006, "loss": 0.1629, "step": 78804 }, { "epoch": 0.13972955170264695, "grad_norm": 0.189453125, "learning_rate": 0.0003417548991730676, "loss": 0.1386, "step": 78806 }, { "epoch": 0.13973309786795676, "grad_norm": 0.2451171875, "learning_rate": 0.00034172283784625866, "loss": 0.1555, "step": 78808 }, { "epoch": 0.13973664403326658, "grad_norm": 0.185546875, "learning_rate": 0.0003416907798357137, "loss": 0.1352, "step": 78810 }, { "epoch": 0.1397401901985764, "grad_norm": 0.64453125, "learning_rate": 0.0003416587251415728, "loss": 0.2251, "step": 78812 }, { "epoch": 0.1397437363638862, "grad_norm": 0.6640625, "learning_rate": 0.0003416266737639758, "loss": 0.1472, "step": 78814 }, { "epoch": 0.13974728252919602, "grad_norm": 0.64453125, "learning_rate": 0.00034159462570306355, "loss": 0.2334, "step": 78816 }, { "epoch": 0.13975082869450584, "grad_norm": 0.40234375, "learning_rate": 0.0003415625809589758, "loss": 0.1497, "step": 78818 }, { "epoch": 0.13975437485981565, "grad_norm": 0.373046875, "learning_rate": 0.0003415305395318529, "loss": 0.1778, "step": 78820 }, { "epoch": 0.13975792102512546, "grad_norm": 0.51171875, "learning_rate": 0.0003414985014218345, "loss": 0.2019, "step": 78822 }, { "epoch": 0.13976146719043528, "grad_norm": 0.181640625, "learning_rate": 0.0003414664666290612, "loss": 0.1482, "step": 78824 }, { "epoch": 0.1397650133557451, "grad_norm": 0.337890625, "learning_rate": 0.00034143443515367313, "loss": 0.1477, "step": 78826 }, { "epoch": 0.1397685595210549, "grad_norm": 0.2314453125, "learning_rate": 0.00034140240699581014, "loss": 0.1583, "step": 78828 }, { "epoch": 0.13977210568636472, "grad_norm": 0.86328125, "learning_rate": 0.00034137038215561246, "loss": 0.2487, "step": 78830 }, { "epoch": 0.13977565185167454, "grad_norm": 0.349609375, "learning_rate": 0.00034133836063321974, "loss": 0.2273, "step": 78832 }, { "epoch": 0.13977919801698435, "grad_norm": 0.56640625, "learning_rate": 0.00034130634242877277, "loss": 0.1641, "step": 78834 }, { "epoch": 0.13978274418229417, "grad_norm": 0.8125, "learning_rate": 0.0003412743275424108, "loss": 0.1797, "step": 78836 }, { "epoch": 0.13978629034760398, "grad_norm": 0.478515625, "learning_rate": 0.0003412423159742744, "loss": 0.1891, "step": 78838 }, { "epoch": 0.1397898365129138, "grad_norm": 0.73046875, "learning_rate": 0.0003412103077245032, "loss": 0.1577, "step": 78840 }, { "epoch": 0.1397933826782236, "grad_norm": 0.578125, "learning_rate": 0.00034117830279323767, "loss": 0.137, "step": 78842 }, { "epoch": 0.13979692884353342, "grad_norm": 0.52734375, "learning_rate": 0.0003411463011806172, "loss": 0.1953, "step": 78844 }, { "epoch": 0.13980047500884324, "grad_norm": 0.30859375, "learning_rate": 0.00034111430288678206, "loss": 0.2098, "step": 78846 }, { "epoch": 0.13980402117415305, "grad_norm": 0.6796875, "learning_rate": 0.0003410823079118722, "loss": 0.1639, "step": 78848 }, { "epoch": 0.13980756733946287, "grad_norm": 0.140625, "learning_rate": 0.0003410503162560276, "loss": 0.1553, "step": 78850 }, { "epoch": 0.13981111350477268, "grad_norm": 0.134765625, "learning_rate": 0.00034101832791938794, "loss": 0.1512, "step": 78852 }, { "epoch": 0.1398146596700825, "grad_norm": 0.42578125, "learning_rate": 0.00034098634290209305, "loss": 0.1895, "step": 78854 }, { "epoch": 0.1398182058353923, "grad_norm": 0.6953125, "learning_rate": 0.00034095436120428326, "loss": 0.2411, "step": 78856 }, { "epoch": 0.13982175200070215, "grad_norm": 0.455078125, "learning_rate": 0.0003409223828260982, "loss": 0.4889, "step": 78858 }, { "epoch": 0.13982529816601197, "grad_norm": 1.28125, "learning_rate": 0.00034089040776767764, "loss": 0.1645, "step": 78860 }, { "epoch": 0.13982884433132178, "grad_norm": 0.294921875, "learning_rate": 0.0003408584360291613, "loss": 0.1486, "step": 78862 }, { "epoch": 0.1398323904966316, "grad_norm": 0.48828125, "learning_rate": 0.00034082646761068945, "loss": 0.1593, "step": 78864 }, { "epoch": 0.1398359366619414, "grad_norm": 0.470703125, "learning_rate": 0.00034079450251240165, "loss": 0.1398, "step": 78866 }, { "epoch": 0.13983948282725123, "grad_norm": 0.384765625, "learning_rate": 0.0003407625407344377, "loss": 0.1943, "step": 78868 }, { "epoch": 0.13984302899256104, "grad_norm": 0.18359375, "learning_rate": 0.00034073058227693736, "loss": 0.1966, "step": 78870 }, { "epoch": 0.13984657515787086, "grad_norm": 0.2060546875, "learning_rate": 0.00034069862714004016, "loss": 0.2666, "step": 78872 }, { "epoch": 0.13985012132318067, "grad_norm": 0.546875, "learning_rate": 0.0003406666753238864, "loss": 0.1515, "step": 78874 }, { "epoch": 0.13985366748849049, "grad_norm": 0.3203125, "learning_rate": 0.0003406347268286155, "loss": 0.262, "step": 78876 }, { "epoch": 0.1398572136538003, "grad_norm": 0.9296875, "learning_rate": 0.0003406027816543672, "loss": 0.2305, "step": 78878 }, { "epoch": 0.13986075981911011, "grad_norm": 0.212890625, "learning_rate": 0.00034057083980128114, "loss": 0.1246, "step": 78880 }, { "epoch": 0.13986430598441993, "grad_norm": 0.26953125, "learning_rate": 0.00034053890126949706, "loss": 0.1576, "step": 78882 }, { "epoch": 0.13986785214972974, "grad_norm": 0.431640625, "learning_rate": 0.00034050696605915484, "loss": 0.18, "step": 78884 }, { "epoch": 0.13987139831503956, "grad_norm": 0.2890625, "learning_rate": 0.0003404750341703939, "loss": 0.15, "step": 78886 }, { "epoch": 0.13987494448034937, "grad_norm": 0.412109375, "learning_rate": 0.00034044310560335414, "loss": 0.1419, "step": 78888 }, { "epoch": 0.1398784906456592, "grad_norm": 0.275390625, "learning_rate": 0.0003404111803581746, "loss": 0.1927, "step": 78890 }, { "epoch": 0.139882036810969, "grad_norm": 1.21875, "learning_rate": 0.000340379258434996, "loss": 0.2057, "step": 78892 }, { "epoch": 0.13988558297627882, "grad_norm": 0.33203125, "learning_rate": 0.0003403473398339567, "loss": 0.4759, "step": 78894 }, { "epoch": 0.13988912914158863, "grad_norm": 0.265625, "learning_rate": 0.0003403154245551971, "loss": 0.1351, "step": 78896 }, { "epoch": 0.13989267530689845, "grad_norm": 0.51171875, "learning_rate": 0.00034028351259885635, "loss": 0.1908, "step": 78898 }, { "epoch": 0.13989622147220826, "grad_norm": 0.439453125, "learning_rate": 0.0003402516039650746, "loss": 0.1784, "step": 78900 }, { "epoch": 0.13989976763751807, "grad_norm": 0.310546875, "learning_rate": 0.00034021969865399073, "loss": 0.1999, "step": 78902 }, { "epoch": 0.1399033138028279, "grad_norm": 0.1767578125, "learning_rate": 0.00034018779666574465, "loss": 0.1466, "step": 78904 }, { "epoch": 0.1399068599681377, "grad_norm": 0.32421875, "learning_rate": 0.00034015589800047587, "loss": 0.2001, "step": 78906 }, { "epoch": 0.13991040613344752, "grad_norm": 0.220703125, "learning_rate": 0.0003401240026583238, "loss": 0.1593, "step": 78908 }, { "epoch": 0.13991395229875733, "grad_norm": 0.279296875, "learning_rate": 0.0003400921106394279, "loss": 0.1301, "step": 78910 }, { "epoch": 0.13991749846406715, "grad_norm": 0.58984375, "learning_rate": 0.00034006022194392753, "loss": 0.1702, "step": 78912 }, { "epoch": 0.13992104462937696, "grad_norm": 4.375, "learning_rate": 0.00034002833657196266, "loss": 0.2713, "step": 78914 }, { "epoch": 0.13992459079468678, "grad_norm": 2.5, "learning_rate": 0.00033999645452367224, "loss": 0.2469, "step": 78916 }, { "epoch": 0.1399281369599966, "grad_norm": 0.291015625, "learning_rate": 0.0003399645757991959, "loss": 0.1371, "step": 78918 }, { "epoch": 0.1399316831253064, "grad_norm": 0.234375, "learning_rate": 0.00033993270039867285, "loss": 0.1912, "step": 78920 }, { "epoch": 0.13993522929061622, "grad_norm": 0.279296875, "learning_rate": 0.0003399008283222428, "loss": 0.1173, "step": 78922 }, { "epoch": 0.13993877545592603, "grad_norm": 0.3046875, "learning_rate": 0.00033986895957004506, "loss": 0.1976, "step": 78924 }, { "epoch": 0.13994232162123585, "grad_norm": 0.455078125, "learning_rate": 0.000339837094142219, "loss": 0.1575, "step": 78926 }, { "epoch": 0.13994586778654566, "grad_norm": 0.197265625, "learning_rate": 0.00033980523203890387, "loss": 0.1831, "step": 78928 }, { "epoch": 0.13994941395185548, "grad_norm": 0.431640625, "learning_rate": 0.0003397733732602388, "loss": 0.2263, "step": 78930 }, { "epoch": 0.1399529601171653, "grad_norm": 1.5078125, "learning_rate": 0.000339741517806364, "loss": 0.1819, "step": 78932 }, { "epoch": 0.1399565062824751, "grad_norm": 0.3671875, "learning_rate": 0.0003397096656774177, "loss": 0.1493, "step": 78934 }, { "epoch": 0.13996005244778492, "grad_norm": 0.6328125, "learning_rate": 0.00033967781687353993, "loss": 0.2957, "step": 78936 }, { "epoch": 0.13996359861309474, "grad_norm": 0.310546875, "learning_rate": 0.0003396459713948694, "loss": 0.1954, "step": 78938 }, { "epoch": 0.13996714477840455, "grad_norm": 0.8203125, "learning_rate": 0.0003396141292415462, "loss": 0.1004, "step": 78940 }, { "epoch": 0.13997069094371437, "grad_norm": 0.1923828125, "learning_rate": 0.0003395822904137088, "loss": 0.1565, "step": 78942 }, { "epoch": 0.13997423710902418, "grad_norm": 0.7734375, "learning_rate": 0.00033955045491149697, "loss": 0.3364, "step": 78944 }, { "epoch": 0.139977783274334, "grad_norm": 0.55078125, "learning_rate": 0.00033951862273504964, "loss": 0.1755, "step": 78946 }, { "epoch": 0.13998132943964384, "grad_norm": 1.1640625, "learning_rate": 0.0003394867938845059, "loss": 0.1758, "step": 78948 }, { "epoch": 0.13998487560495365, "grad_norm": 0.1865234375, "learning_rate": 0.0003394549683600057, "loss": 0.1468, "step": 78950 }, { "epoch": 0.13998842177026347, "grad_norm": 0.42578125, "learning_rate": 0.00033942314616168713, "loss": 0.1949, "step": 78952 }, { "epoch": 0.13999196793557328, "grad_norm": 0.578125, "learning_rate": 0.00033939132728969006, "loss": 0.1412, "step": 78954 }, { "epoch": 0.1399955141008831, "grad_norm": 0.384765625, "learning_rate": 0.00033935951174415347, "loss": 0.1537, "step": 78956 }, { "epoch": 0.1399990602661929, "grad_norm": 0.73046875, "learning_rate": 0.000339327699525217, "loss": 0.1429, "step": 78958 }, { "epoch": 0.14000260643150272, "grad_norm": 1.0859375, "learning_rate": 0.00033929589063301876, "loss": 0.2263, "step": 78960 }, { "epoch": 0.14000615259681254, "grad_norm": 0.349609375, "learning_rate": 0.0003392640850676987, "loss": 0.1559, "step": 78962 }, { "epoch": 0.14000969876212235, "grad_norm": 0.244140625, "learning_rate": 0.00033923228282939556, "loss": 0.1608, "step": 78964 }, { "epoch": 0.14001324492743217, "grad_norm": 0.26953125, "learning_rate": 0.00033920048391824863, "loss": 0.1561, "step": 78966 }, { "epoch": 0.14001679109274198, "grad_norm": 2.296875, "learning_rate": 0.00033916868833439685, "loss": 0.3471, "step": 78968 }, { "epoch": 0.1400203372580518, "grad_norm": 0.4140625, "learning_rate": 0.000339136896077979, "loss": 0.2398, "step": 78970 }, { "epoch": 0.1400238834233616, "grad_norm": 0.88671875, "learning_rate": 0.00033910510714913464, "loss": 0.1533, "step": 78972 }, { "epoch": 0.14002742958867143, "grad_norm": 0.2158203125, "learning_rate": 0.0003390733215480026, "loss": 0.3987, "step": 78974 }, { "epoch": 0.14003097575398124, "grad_norm": 0.54296875, "learning_rate": 0.0003390415392747218, "loss": 0.2051, "step": 78976 }, { "epoch": 0.14003452191929106, "grad_norm": 0.404296875, "learning_rate": 0.00033900976032943134, "loss": 0.2397, "step": 78978 }, { "epoch": 0.14003806808460087, "grad_norm": 0.70703125, "learning_rate": 0.0003389779847122699, "loss": 0.1608, "step": 78980 }, { "epoch": 0.14004161424991068, "grad_norm": 0.6171875, "learning_rate": 0.0003389462124233771, "loss": 0.1965, "step": 78982 }, { "epoch": 0.1400451604152205, "grad_norm": 0.34765625, "learning_rate": 0.00033891444346289114, "loss": 0.1521, "step": 78984 }, { "epoch": 0.1400487065805303, "grad_norm": 0.82421875, "learning_rate": 0.0003388826778309515, "loss": 0.2763, "step": 78986 }, { "epoch": 0.14005225274584013, "grad_norm": 0.421875, "learning_rate": 0.00033885091552769676, "loss": 0.2428, "step": 78988 }, { "epoch": 0.14005579891114994, "grad_norm": 1.0859375, "learning_rate": 0.00033881915655326636, "loss": 0.2261, "step": 78990 }, { "epoch": 0.14005934507645976, "grad_norm": 0.5859375, "learning_rate": 0.00033878740090779834, "loss": 0.1872, "step": 78992 }, { "epoch": 0.14006289124176957, "grad_norm": 0.1982421875, "learning_rate": 0.00033875564859143234, "loss": 0.175, "step": 78994 }, { "epoch": 0.14006643740707939, "grad_norm": 0.33203125, "learning_rate": 0.00033872389960430693, "loss": 0.189, "step": 78996 }, { "epoch": 0.1400699835723892, "grad_norm": 0.4296875, "learning_rate": 0.00033869215394656093, "loss": 0.1641, "step": 78998 }, { "epoch": 0.14007352973769902, "grad_norm": 0.328125, "learning_rate": 0.0003386604116183334, "loss": 0.1777, "step": 79000 }, { "epoch": 0.14007707590300883, "grad_norm": 0.640625, "learning_rate": 0.0003386286726197626, "loss": 0.2314, "step": 79002 }, { "epoch": 0.14008062206831864, "grad_norm": 0.34375, "learning_rate": 0.000338596936950988, "loss": 0.1389, "step": 79004 }, { "epoch": 0.14008416823362846, "grad_norm": 0.27734375, "learning_rate": 0.0003385652046121481, "loss": 0.1896, "step": 79006 }, { "epoch": 0.14008771439893827, "grad_norm": 0.32421875, "learning_rate": 0.0003385334756033818, "loss": 0.1608, "step": 79008 }, { "epoch": 0.1400912605642481, "grad_norm": 0.431640625, "learning_rate": 0.0003385017499248274, "loss": 0.1768, "step": 79010 }, { "epoch": 0.1400948067295579, "grad_norm": 6.09375, "learning_rate": 0.00033847002757662436, "loss": 0.351, "step": 79012 }, { "epoch": 0.14009835289486772, "grad_norm": 0.30078125, "learning_rate": 0.000338438308558911, "loss": 0.1549, "step": 79014 }, { "epoch": 0.14010189906017753, "grad_norm": 0.314453125, "learning_rate": 0.0003384065928718261, "loss": 0.2597, "step": 79016 }, { "epoch": 0.14010544522548735, "grad_norm": 0.6015625, "learning_rate": 0.00033837488051550844, "loss": 0.1521, "step": 79018 }, { "epoch": 0.14010899139079716, "grad_norm": 0.361328125, "learning_rate": 0.00033834317149009644, "loss": 0.1615, "step": 79020 }, { "epoch": 0.14011253755610698, "grad_norm": 0.87109375, "learning_rate": 0.00033831146579572935, "loss": 0.1808, "step": 79022 }, { "epoch": 0.1401160837214168, "grad_norm": 0.333984375, "learning_rate": 0.000338279763432545, "loss": 0.2057, "step": 79024 }, { "epoch": 0.1401196298867266, "grad_norm": 0.19140625, "learning_rate": 0.0003382480644006828, "loss": 0.1748, "step": 79026 }, { "epoch": 0.14012317605203642, "grad_norm": 0.3125, "learning_rate": 0.00033821636870028093, "loss": 0.1425, "step": 79028 }, { "epoch": 0.14012672221734623, "grad_norm": 0.216796875, "learning_rate": 0.0003381846763314782, "loss": 0.485, "step": 79030 }, { "epoch": 0.14013026838265605, "grad_norm": 0.359375, "learning_rate": 0.00033815298729441336, "loss": 0.1381, "step": 79032 }, { "epoch": 0.14013381454796586, "grad_norm": 0.3046875, "learning_rate": 0.0003381213015892248, "loss": 0.1289, "step": 79034 }, { "epoch": 0.14013736071327568, "grad_norm": 0.5234375, "learning_rate": 0.000338089619216051, "loss": 0.168, "step": 79036 }, { "epoch": 0.14014090687858552, "grad_norm": 0.4609375, "learning_rate": 0.00033805794017503055, "loss": 0.1822, "step": 79038 }, { "epoch": 0.14014445304389533, "grad_norm": 0.1513671875, "learning_rate": 0.0003380262644663025, "loss": 0.1765, "step": 79040 }, { "epoch": 0.14014799920920515, "grad_norm": 1.7265625, "learning_rate": 0.0003379945920900044, "loss": 0.2155, "step": 79042 }, { "epoch": 0.14015154537451496, "grad_norm": 0.466796875, "learning_rate": 0.00033796292304627556, "loss": 0.2047, "step": 79044 }, { "epoch": 0.14015509153982478, "grad_norm": 3.625, "learning_rate": 0.0003379312573352542, "loss": 0.3161, "step": 79046 }, { "epoch": 0.1401586377051346, "grad_norm": 0.640625, "learning_rate": 0.0003378995949570791, "loss": 0.1606, "step": 79048 }, { "epoch": 0.1401621838704444, "grad_norm": 0.291015625, "learning_rate": 0.00033786793591188824, "loss": 0.2603, "step": 79050 }, { "epoch": 0.14016573003575422, "grad_norm": 0.46484375, "learning_rate": 0.0003378362801998204, "loss": 0.26, "step": 79052 }, { "epoch": 0.14016927620106404, "grad_norm": 0.296875, "learning_rate": 0.000337804627821014, "loss": 0.1608, "step": 79054 }, { "epoch": 0.14017282236637385, "grad_norm": 0.65234375, "learning_rate": 0.0003377729787756074, "loss": 0.2027, "step": 79056 }, { "epoch": 0.14017636853168366, "grad_norm": 0.6171875, "learning_rate": 0.0003377413330637391, "loss": 0.355, "step": 79058 }, { "epoch": 0.14017991469699348, "grad_norm": 0.2177734375, "learning_rate": 0.0003377096906855471, "loss": 0.1114, "step": 79060 }, { "epoch": 0.1401834608623033, "grad_norm": 0.28125, "learning_rate": 0.00033767805164117056, "loss": 0.217, "step": 79062 }, { "epoch": 0.1401870070276131, "grad_norm": 0.4296875, "learning_rate": 0.00033764641593074724, "loss": 0.1526, "step": 79064 }, { "epoch": 0.14019055319292292, "grad_norm": 0.4296875, "learning_rate": 0.00033761478355441566, "loss": 0.1406, "step": 79066 }, { "epoch": 0.14019409935823274, "grad_norm": 0.56640625, "learning_rate": 0.00033758315451231405, "loss": 0.1975, "step": 79068 }, { "epoch": 0.14019764552354255, "grad_norm": 0.91015625, "learning_rate": 0.00033755152880458097, "loss": 0.1812, "step": 79070 }, { "epoch": 0.14020119168885237, "grad_norm": 0.2451171875, "learning_rate": 0.00033751990643135477, "loss": 0.1724, "step": 79072 }, { "epoch": 0.14020473785416218, "grad_norm": 0.984375, "learning_rate": 0.0003374882873927734, "loss": 0.2043, "step": 79074 }, { "epoch": 0.140208284019472, "grad_norm": 0.9765625, "learning_rate": 0.00033745667168897547, "loss": 0.1496, "step": 79076 }, { "epoch": 0.1402118301847818, "grad_norm": 0.41015625, "learning_rate": 0.0003374250593200989, "loss": 0.202, "step": 79078 }, { "epoch": 0.14021537635009163, "grad_norm": 0.478515625, "learning_rate": 0.0003373934502862825, "loss": 0.2096, "step": 79080 }, { "epoch": 0.14021892251540144, "grad_norm": 0.2431640625, "learning_rate": 0.00033736184458766386, "loss": 0.0985, "step": 79082 }, { "epoch": 0.14022246868071125, "grad_norm": 0.267578125, "learning_rate": 0.00033733024222438166, "loss": 0.1331, "step": 79084 }, { "epoch": 0.14022601484602107, "grad_norm": 1.140625, "learning_rate": 0.0003372986431965738, "loss": 0.151, "step": 79086 }, { "epoch": 0.14022956101133088, "grad_norm": 0.8046875, "learning_rate": 0.00033726704750437905, "loss": 0.4558, "step": 79088 }, { "epoch": 0.1402331071766407, "grad_norm": 0.255859375, "learning_rate": 0.00033723545514793477, "loss": 0.1405, "step": 79090 }, { "epoch": 0.1402366533419505, "grad_norm": 0.7578125, "learning_rate": 0.0003372038661273798, "loss": 0.2117, "step": 79092 }, { "epoch": 0.14024019950726033, "grad_norm": 0.150390625, "learning_rate": 0.00033717228044285194, "loss": 0.1435, "step": 79094 }, { "epoch": 0.14024374567257014, "grad_norm": 0.78515625, "learning_rate": 0.0003371406980944895, "loss": 0.1792, "step": 79096 }, { "epoch": 0.14024729183787996, "grad_norm": 0.265625, "learning_rate": 0.0003371091190824305, "loss": 0.1749, "step": 79098 }, { "epoch": 0.14025083800318977, "grad_norm": 0.33984375, "learning_rate": 0.00033707754340681285, "loss": 0.1689, "step": 79100 }, { "epoch": 0.14025438416849959, "grad_norm": 0.3125, "learning_rate": 0.00033704597106777503, "loss": 0.207, "step": 79102 }, { "epoch": 0.1402579303338094, "grad_norm": 0.416015625, "learning_rate": 0.0003370144020654551, "loss": 0.1643, "step": 79104 }, { "epoch": 0.14026147649911921, "grad_norm": 0.8984375, "learning_rate": 0.0003369828363999908, "loss": 0.1781, "step": 79106 }, { "epoch": 0.14026502266442903, "grad_norm": 0.34765625, "learning_rate": 0.0003369512740715203, "loss": 0.216, "step": 79108 }, { "epoch": 0.14026856882973884, "grad_norm": 0.255859375, "learning_rate": 0.00033691971508018184, "loss": 0.1652, "step": 79110 }, { "epoch": 0.14027211499504866, "grad_norm": 0.330078125, "learning_rate": 0.00033688815942611335, "loss": 0.1204, "step": 79112 }, { "epoch": 0.14027566116035847, "grad_norm": 1.40625, "learning_rate": 0.0003368566071094528, "loss": 0.191, "step": 79114 }, { "epoch": 0.1402792073256683, "grad_norm": 0.57421875, "learning_rate": 0.0003368250581303379, "loss": 0.1608, "step": 79116 }, { "epoch": 0.1402827534909781, "grad_norm": 0.345703125, "learning_rate": 0.0003367935124889069, "loss": 0.183, "step": 79118 }, { "epoch": 0.14028629965628792, "grad_norm": 0.470703125, "learning_rate": 0.00033676197018529797, "loss": 0.2586, "step": 79120 }, { "epoch": 0.14028984582159773, "grad_norm": 1.828125, "learning_rate": 0.00033673043121964884, "loss": 0.2041, "step": 79122 }, { "epoch": 0.14029339198690755, "grad_norm": 0.302734375, "learning_rate": 0.0003366988955920974, "loss": 0.3084, "step": 79124 }, { "epoch": 0.14029693815221736, "grad_norm": 0.306640625, "learning_rate": 0.0003366673633027815, "loss": 0.1682, "step": 79126 }, { "epoch": 0.14030048431752717, "grad_norm": 0.5078125, "learning_rate": 0.0003366358343518393, "loss": 0.2376, "step": 79128 }, { "epoch": 0.14030403048283702, "grad_norm": 0.953125, "learning_rate": 0.0003366043087394085, "loss": 0.1766, "step": 79130 }, { "epoch": 0.14030757664814683, "grad_norm": 0.455078125, "learning_rate": 0.00033657278646562704, "loss": 0.129, "step": 79132 }, { "epoch": 0.14031112281345665, "grad_norm": 0.61328125, "learning_rate": 0.0003365412675306328, "loss": 0.1711, "step": 79134 }, { "epoch": 0.14031466897876646, "grad_norm": 0.83984375, "learning_rate": 0.00033650975193456336, "loss": 0.1558, "step": 79136 }, { "epoch": 0.14031821514407627, "grad_norm": 0.2041015625, "learning_rate": 0.0003364782396775571, "loss": 0.2683, "step": 79138 }, { "epoch": 0.1403217613093861, "grad_norm": 0.380859375, "learning_rate": 0.0003364467307597512, "loss": 0.1685, "step": 79140 }, { "epoch": 0.1403253074746959, "grad_norm": 0.26953125, "learning_rate": 0.00033641522518128406, "loss": 0.1411, "step": 79142 }, { "epoch": 0.14032885364000572, "grad_norm": 0.185546875, "learning_rate": 0.00033638372294229305, "loss": 0.1229, "step": 79144 }, { "epoch": 0.14033239980531553, "grad_norm": 0.181640625, "learning_rate": 0.0003363522240429161, "loss": 0.1509, "step": 79146 }, { "epoch": 0.14033594597062535, "grad_norm": 0.7734375, "learning_rate": 0.000336320728483291, "loss": 0.2184, "step": 79148 }, { "epoch": 0.14033949213593516, "grad_norm": 1.6484375, "learning_rate": 0.00033628923626355514, "loss": 0.184, "step": 79150 }, { "epoch": 0.14034303830124498, "grad_norm": 0.470703125, "learning_rate": 0.0003362577473838468, "loss": 0.2303, "step": 79152 }, { "epoch": 0.1403465844665548, "grad_norm": 0.4453125, "learning_rate": 0.00033622626184430336, "loss": 0.1823, "step": 79154 }, { "epoch": 0.1403501306318646, "grad_norm": 0.25390625, "learning_rate": 0.0003361947796450628, "loss": 0.1684, "step": 79156 }, { "epoch": 0.14035367679717442, "grad_norm": 2.75, "learning_rate": 0.00033616330078626216, "loss": 0.1675, "step": 79158 }, { "epoch": 0.14035722296248423, "grad_norm": 0.4765625, "learning_rate": 0.00033613182526804, "loss": 0.1373, "step": 79160 }, { "epoch": 0.14036076912779405, "grad_norm": 0.26953125, "learning_rate": 0.0003361003530905333, "loss": 0.1632, "step": 79162 }, { "epoch": 0.14036431529310386, "grad_norm": 0.60546875, "learning_rate": 0.00033606888425388005, "loss": 0.2033, "step": 79164 }, { "epoch": 0.14036786145841368, "grad_norm": 0.28515625, "learning_rate": 0.00033603741875821774, "loss": 0.1966, "step": 79166 }, { "epoch": 0.1403714076237235, "grad_norm": 0.263671875, "learning_rate": 0.0003360059566036838, "loss": 0.1554, "step": 79168 }, { "epoch": 0.1403749537890333, "grad_norm": 0.447265625, "learning_rate": 0.0003359744977904164, "loss": 0.1522, "step": 79170 }, { "epoch": 0.14037849995434312, "grad_norm": 0.31640625, "learning_rate": 0.00033594304231855237, "loss": 0.1897, "step": 79172 }, { "epoch": 0.14038204611965294, "grad_norm": 0.625, "learning_rate": 0.0003359115901882298, "loss": 0.1832, "step": 79174 }, { "epoch": 0.14038559228496275, "grad_norm": 0.423828125, "learning_rate": 0.00033588014139958593, "loss": 0.1639, "step": 79176 }, { "epoch": 0.14038913845027257, "grad_norm": 0.6015625, "learning_rate": 0.0003358486959527589, "loss": 0.1584, "step": 79178 }, { "epoch": 0.14039268461558238, "grad_norm": 0.28125, "learning_rate": 0.0003358172538478854, "loss": 0.2017, "step": 79180 }, { "epoch": 0.1403962307808922, "grad_norm": 0.43359375, "learning_rate": 0.00033578581508510364, "loss": 0.1653, "step": 79182 }, { "epoch": 0.140399776946202, "grad_norm": 0.43359375, "learning_rate": 0.0003357543796645507, "loss": 0.1384, "step": 79184 }, { "epoch": 0.14040332311151182, "grad_norm": 1.3125, "learning_rate": 0.00033572294758636413, "loss": 0.2531, "step": 79186 }, { "epoch": 0.14040686927682164, "grad_norm": 0.4453125, "learning_rate": 0.0003356915188506815, "loss": 0.1841, "step": 79188 }, { "epoch": 0.14041041544213145, "grad_norm": 0.3828125, "learning_rate": 0.00033566009345764006, "loss": 0.2155, "step": 79190 }, { "epoch": 0.14041396160744127, "grad_norm": 0.41015625, "learning_rate": 0.0003356286714073775, "loss": 0.1832, "step": 79192 }, { "epoch": 0.14041750777275108, "grad_norm": 2.65625, "learning_rate": 0.0003355972527000312, "loss": 0.2152, "step": 79194 }, { "epoch": 0.1404210539380609, "grad_norm": 1.0703125, "learning_rate": 0.0003355658373357384, "loss": 0.1795, "step": 79196 }, { "epoch": 0.1404246001033707, "grad_norm": 0.83984375, "learning_rate": 0.00033553442531463645, "loss": 0.1771, "step": 79198 }, { "epoch": 0.14042814626868053, "grad_norm": 0.435546875, "learning_rate": 0.000335503016636863, "loss": 0.1298, "step": 79200 }, { "epoch": 0.14043169243399034, "grad_norm": 1.3125, "learning_rate": 0.00033547161130255545, "loss": 0.15, "step": 79202 }, { "epoch": 0.14043523859930016, "grad_norm": 0.75390625, "learning_rate": 0.0003354402093118509, "loss": 0.2125, "step": 79204 }, { "epoch": 0.14043878476460997, "grad_norm": 3.265625, "learning_rate": 0.0003354088106648867, "loss": 0.2079, "step": 79206 }, { "epoch": 0.14044233092991978, "grad_norm": 0.275390625, "learning_rate": 0.0003353774153618001, "loss": 0.1615, "step": 79208 }, { "epoch": 0.1404458770952296, "grad_norm": 1.1640625, "learning_rate": 0.0003353460234027287, "loss": 0.4351, "step": 79210 }, { "epoch": 0.1404494232605394, "grad_norm": 0.458984375, "learning_rate": 0.0003353146347878097, "loss": 0.2204, "step": 79212 }, { "epoch": 0.14045296942584923, "grad_norm": 0.359375, "learning_rate": 0.00033528324951718027, "loss": 0.1648, "step": 79214 }, { "epoch": 0.14045651559115904, "grad_norm": 2.328125, "learning_rate": 0.0003352518675909775, "loss": 0.3303, "step": 79216 }, { "epoch": 0.14046006175646886, "grad_norm": 0.3203125, "learning_rate": 0.000335220489009339, "loss": 0.1634, "step": 79218 }, { "epoch": 0.1404636079217787, "grad_norm": 0.35546875, "learning_rate": 0.00033518911377240193, "loss": 0.1913, "step": 79220 }, { "epoch": 0.1404671540870885, "grad_norm": 0.6796875, "learning_rate": 0.00033515774188030333, "loss": 0.2098, "step": 79222 }, { "epoch": 0.14047070025239833, "grad_norm": 0.66796875, "learning_rate": 0.00033512637333318055, "loss": 0.215, "step": 79224 }, { "epoch": 0.14047424641770814, "grad_norm": 0.1689453125, "learning_rate": 0.0003350950081311705, "loss": 0.1867, "step": 79226 }, { "epoch": 0.14047779258301796, "grad_norm": 0.75390625, "learning_rate": 0.0003350636462744109, "loss": 0.1791, "step": 79228 }, { "epoch": 0.14048133874832777, "grad_norm": 0.1962890625, "learning_rate": 0.00033503228776303836, "loss": 0.1588, "step": 79230 }, { "epoch": 0.1404848849136376, "grad_norm": 0.341796875, "learning_rate": 0.00033500093259719035, "loss": 0.1354, "step": 79232 }, { "epoch": 0.1404884310789474, "grad_norm": 1.9296875, "learning_rate": 0.00033496958077700376, "loss": 0.2233, "step": 79234 }, { "epoch": 0.14049197724425722, "grad_norm": 0.341796875, "learning_rate": 0.0003349382323026161, "loss": 0.1477, "step": 79236 }, { "epoch": 0.14049552340956703, "grad_norm": 0.423828125, "learning_rate": 0.0003349068871741638, "loss": 0.168, "step": 79238 }, { "epoch": 0.14049906957487684, "grad_norm": 0.59765625, "learning_rate": 0.0003348755453917848, "loss": 0.1465, "step": 79240 }, { "epoch": 0.14050261574018666, "grad_norm": 0.26953125, "learning_rate": 0.00033484420695561556, "loss": 0.1785, "step": 79242 }, { "epoch": 0.14050616190549647, "grad_norm": 0.28125, "learning_rate": 0.00033481287186579324, "loss": 0.1326, "step": 79244 }, { "epoch": 0.1405097080708063, "grad_norm": 0.55859375, "learning_rate": 0.0003347815401224551, "loss": 0.2924, "step": 79246 }, { "epoch": 0.1405132542361161, "grad_norm": 2.828125, "learning_rate": 0.0003347502117257377, "loss": 0.2415, "step": 79248 }, { "epoch": 0.14051680040142592, "grad_norm": 0.38671875, "learning_rate": 0.0003347188866757786, "loss": 0.19, "step": 79250 }, { "epoch": 0.14052034656673573, "grad_norm": 1.0859375, "learning_rate": 0.00033468756497271457, "loss": 0.3508, "step": 79252 }, { "epoch": 0.14052389273204555, "grad_norm": 0.470703125, "learning_rate": 0.0003346562466166826, "loss": 0.1489, "step": 79254 }, { "epoch": 0.14052743889735536, "grad_norm": 0.40234375, "learning_rate": 0.00033462493160781937, "loss": 0.2315, "step": 79256 }, { "epoch": 0.14053098506266518, "grad_norm": 0.177734375, "learning_rate": 0.00033459361994626235, "loss": 0.1794, "step": 79258 }, { "epoch": 0.140534531227975, "grad_norm": 0.26953125, "learning_rate": 0.00033456231163214825, "loss": 0.1618, "step": 79260 }, { "epoch": 0.1405380773932848, "grad_norm": 0.4140625, "learning_rate": 0.000334531006665614, "loss": 0.1746, "step": 79262 }, { "epoch": 0.14054162355859462, "grad_norm": 0.59765625, "learning_rate": 0.0003344997050467965, "loss": 0.1745, "step": 79264 }, { "epoch": 0.14054516972390443, "grad_norm": 0.984375, "learning_rate": 0.00033446840677583235, "loss": 0.1545, "step": 79266 }, { "epoch": 0.14054871588921425, "grad_norm": 0.26171875, "learning_rate": 0.0003344371118528592, "loss": 0.171, "step": 79268 }, { "epoch": 0.14055226205452406, "grad_norm": 0.67578125, "learning_rate": 0.0003344058202780129, "loss": 0.2312, "step": 79270 }, { "epoch": 0.14055580821983388, "grad_norm": 0.291015625, "learning_rate": 0.00033437453205143115, "loss": 0.1555, "step": 79272 }, { "epoch": 0.1405593543851437, "grad_norm": 11.75, "learning_rate": 0.00033434324717325026, "loss": 0.3368, "step": 79274 }, { "epoch": 0.1405629005504535, "grad_norm": 0.439453125, "learning_rate": 0.00033431196564360767, "loss": 0.2191, "step": 79276 }, { "epoch": 0.14056644671576332, "grad_norm": 1.8359375, "learning_rate": 0.00033428068746263934, "loss": 0.1625, "step": 79278 }, { "epoch": 0.14056999288107314, "grad_norm": 0.345703125, "learning_rate": 0.0003342494126304827, "loss": 0.1783, "step": 79280 }, { "epoch": 0.14057353904638295, "grad_norm": 0.357421875, "learning_rate": 0.0003342181411472744, "loss": 0.1598, "step": 79282 }, { "epoch": 0.14057708521169276, "grad_norm": 1.4765625, "learning_rate": 0.00033418687301315075, "loss": 0.2326, "step": 79284 }, { "epoch": 0.14058063137700258, "grad_norm": 0.359375, "learning_rate": 0.0003341556082282495, "loss": 0.1857, "step": 79286 }, { "epoch": 0.1405841775423124, "grad_norm": 0.25390625, "learning_rate": 0.00033412434679270625, "loss": 0.1351, "step": 79288 }, { "epoch": 0.1405877237076222, "grad_norm": 0.44140625, "learning_rate": 0.00033409308870665843, "loss": 0.2139, "step": 79290 }, { "epoch": 0.14059126987293202, "grad_norm": 0.703125, "learning_rate": 0.0003340618339702424, "loss": 0.1893, "step": 79292 }, { "epoch": 0.14059481603824184, "grad_norm": 0.474609375, "learning_rate": 0.0003340305825835953, "loss": 0.1841, "step": 79294 }, { "epoch": 0.14059836220355165, "grad_norm": 0.2138671875, "learning_rate": 0.00033399933454685314, "loss": 0.1447, "step": 79296 }, { "epoch": 0.14060190836886147, "grad_norm": 0.51953125, "learning_rate": 0.00033396808986015325, "loss": 0.162, "step": 79298 }, { "epoch": 0.14060545453417128, "grad_norm": 0.1669921875, "learning_rate": 0.00033393684852363196, "loss": 0.127, "step": 79300 }, { "epoch": 0.1406090006994811, "grad_norm": 0.40625, "learning_rate": 0.00033390561053742575, "loss": 0.1433, "step": 79302 }, { "epoch": 0.1406125468647909, "grad_norm": 0.283203125, "learning_rate": 0.00033387437590167156, "loss": 0.1753, "step": 79304 }, { "epoch": 0.14061609303010073, "grad_norm": 0.283203125, "learning_rate": 0.0003338431446165056, "loss": 0.2513, "step": 79306 }, { "epoch": 0.14061963919541054, "grad_norm": 1.2265625, "learning_rate": 0.0003338119166820648, "loss": 0.2553, "step": 79308 }, { "epoch": 0.14062318536072038, "grad_norm": 0.318359375, "learning_rate": 0.00033378069209848566, "loss": 0.2055, "step": 79310 }, { "epoch": 0.1406267315260302, "grad_norm": 0.384765625, "learning_rate": 0.0003337494708659047, "loss": 0.1626, "step": 79312 }, { "epoch": 0.14063027769134, "grad_norm": 0.345703125, "learning_rate": 0.00033371825298445835, "loss": 0.3541, "step": 79314 }, { "epoch": 0.14063382385664983, "grad_norm": 0.197265625, "learning_rate": 0.00033368703845428326, "loss": 0.125, "step": 79316 }, { "epoch": 0.14063737002195964, "grad_norm": 1.4921875, "learning_rate": 0.00033365582727551613, "loss": 0.2348, "step": 79318 }, { "epoch": 0.14064091618726945, "grad_norm": 0.4453125, "learning_rate": 0.00033362461944829315, "loss": 0.1516, "step": 79320 }, { "epoch": 0.14064446235257927, "grad_norm": 0.166015625, "learning_rate": 0.00033359341497275096, "loss": 0.1746, "step": 79322 }, { "epoch": 0.14064800851788908, "grad_norm": 5.03125, "learning_rate": 0.0003335622138490258, "loss": 0.2663, "step": 79324 }, { "epoch": 0.1406515546831989, "grad_norm": 0.34375, "learning_rate": 0.0003335310160772548, "loss": 0.1839, "step": 79326 }, { "epoch": 0.1406551008485087, "grad_norm": 0.271484375, "learning_rate": 0.00033349982165757343, "loss": 0.1363, "step": 79328 }, { "epoch": 0.14065864701381853, "grad_norm": 0.498046875, "learning_rate": 0.00033346863059011876, "loss": 0.2091, "step": 79330 }, { "epoch": 0.14066219317912834, "grad_norm": 1.1015625, "learning_rate": 0.00033343744287502714, "loss": 0.1831, "step": 79332 }, { "epoch": 0.14066573934443816, "grad_norm": 0.439453125, "learning_rate": 0.0003334062585124348, "loss": 0.1413, "step": 79334 }, { "epoch": 0.14066928550974797, "grad_norm": 0.5546875, "learning_rate": 0.0003333750775024782, "loss": 0.1658, "step": 79336 }, { "epoch": 0.14067283167505779, "grad_norm": 4.25, "learning_rate": 0.00033334389984529354, "loss": 0.2777, "step": 79338 }, { "epoch": 0.1406763778403676, "grad_norm": 0.84765625, "learning_rate": 0.00033331272554101756, "loss": 0.1749, "step": 79340 }, { "epoch": 0.14067992400567741, "grad_norm": 0.296875, "learning_rate": 0.00033328155458978625, "loss": 0.1313, "step": 79342 }, { "epoch": 0.14068347017098723, "grad_norm": 0.484375, "learning_rate": 0.0003332503869917362, "loss": 0.1605, "step": 79344 }, { "epoch": 0.14068701633629704, "grad_norm": 0.341796875, "learning_rate": 0.00033321922274700324, "loss": 0.1286, "step": 79346 }, { "epoch": 0.14069056250160686, "grad_norm": 0.33203125, "learning_rate": 0.0003331880618557243, "loss": 0.1531, "step": 79348 }, { "epoch": 0.14069410866691667, "grad_norm": 1.765625, "learning_rate": 0.0003331569043180353, "loss": 0.2141, "step": 79350 }, { "epoch": 0.1406976548322265, "grad_norm": 1.2109375, "learning_rate": 0.0003331257501340726, "loss": 0.2537, "step": 79352 }, { "epoch": 0.1407012009975363, "grad_norm": 0.59765625, "learning_rate": 0.0003330945993039724, "loss": 0.1867, "step": 79354 }, { "epoch": 0.14070474716284612, "grad_norm": 0.365234375, "learning_rate": 0.0003330634518278708, "loss": 0.1905, "step": 79356 }, { "epoch": 0.14070829332815593, "grad_norm": 0.2255859375, "learning_rate": 0.0003330323077059045, "loss": 0.1463, "step": 79358 }, { "epoch": 0.14071183949346575, "grad_norm": 0.515625, "learning_rate": 0.00033300116693820906, "loss": 0.1918, "step": 79360 }, { "epoch": 0.14071538565877556, "grad_norm": 0.37890625, "learning_rate": 0.00033297002952492117, "loss": 0.1729, "step": 79362 }, { "epoch": 0.14071893182408537, "grad_norm": 0.6171875, "learning_rate": 0.0003329388954661765, "loss": 0.1895, "step": 79364 }, { "epoch": 0.1407224779893952, "grad_norm": 1.1953125, "learning_rate": 0.00033290776476211184, "loss": 0.1754, "step": 79366 }, { "epoch": 0.140726024154705, "grad_norm": 0.51171875, "learning_rate": 0.0003328766374128631, "loss": 0.1742, "step": 79368 }, { "epoch": 0.14072957032001482, "grad_norm": 0.4765625, "learning_rate": 0.0003328455134185663, "loss": 0.1439, "step": 79370 }, { "epoch": 0.14073311648532463, "grad_norm": 0.51953125, "learning_rate": 0.00033281439277935756, "loss": 0.1652, "step": 79372 }, { "epoch": 0.14073666265063445, "grad_norm": 0.435546875, "learning_rate": 0.0003327832754953728, "loss": 0.1578, "step": 79374 }, { "epoch": 0.14074020881594426, "grad_norm": 0.166015625, "learning_rate": 0.00033275216156674893, "loss": 0.1246, "step": 79376 }, { "epoch": 0.14074375498125408, "grad_norm": 0.259765625, "learning_rate": 0.0003327210509936208, "loss": 0.1411, "step": 79378 }, { "epoch": 0.1407473011465639, "grad_norm": 0.439453125, "learning_rate": 0.00033268994377612545, "loss": 0.1798, "step": 79380 }, { "epoch": 0.1407508473118737, "grad_norm": 1.8359375, "learning_rate": 0.00033265883991439834, "loss": 0.2113, "step": 79382 }, { "epoch": 0.14075439347718352, "grad_norm": 1.6796875, "learning_rate": 0.00033262773940857614, "loss": 0.3284, "step": 79384 }, { "epoch": 0.14075793964249333, "grad_norm": 0.384765625, "learning_rate": 0.00033259664225879416, "loss": 0.1782, "step": 79386 }, { "epoch": 0.14076148580780315, "grad_norm": 0.396484375, "learning_rate": 0.00033256554846518874, "loss": 0.3812, "step": 79388 }, { "epoch": 0.14076503197311296, "grad_norm": 0.9765625, "learning_rate": 0.0003325344580278959, "loss": 0.2435, "step": 79390 }, { "epoch": 0.14076857813842278, "grad_norm": 0.59375, "learning_rate": 0.0003325033709470515, "loss": 0.1437, "step": 79392 }, { "epoch": 0.1407721243037326, "grad_norm": 0.357421875, "learning_rate": 0.00033247228722279166, "loss": 0.195, "step": 79394 }, { "epoch": 0.1407756704690424, "grad_norm": 0.51953125, "learning_rate": 0.00033244120685525195, "loss": 0.2129, "step": 79396 }, { "epoch": 0.14077921663435222, "grad_norm": 0.53515625, "learning_rate": 0.00033241012984456863, "loss": 0.2153, "step": 79398 }, { "epoch": 0.14078276279966204, "grad_norm": 0.671875, "learning_rate": 0.00033237905619087774, "loss": 0.185, "step": 79400 }, { "epoch": 0.14078630896497188, "grad_norm": 0.306640625, "learning_rate": 0.0003323479858943148, "loss": 0.1799, "step": 79402 }, { "epoch": 0.1407898551302817, "grad_norm": 1.0234375, "learning_rate": 0.0003323169189550157, "loss": 0.2588, "step": 79404 }, { "epoch": 0.1407934012955915, "grad_norm": 0.404296875, "learning_rate": 0.0003322858553731168, "loss": 0.153, "step": 79406 }, { "epoch": 0.14079694746090132, "grad_norm": 0.234375, "learning_rate": 0.00033225479514875346, "loss": 0.1696, "step": 79408 }, { "epoch": 0.14080049362621114, "grad_norm": 1.5078125, "learning_rate": 0.0003322237382820618, "loss": 0.2891, "step": 79410 }, { "epoch": 0.14080403979152095, "grad_norm": 0.470703125, "learning_rate": 0.0003321926847731775, "loss": 0.2038, "step": 79412 }, { "epoch": 0.14080758595683077, "grad_norm": 0.70703125, "learning_rate": 0.0003321616346222362, "loss": 0.1721, "step": 79414 }, { "epoch": 0.14081113212214058, "grad_norm": 0.2216796875, "learning_rate": 0.00033213058782937435, "loss": 0.1828, "step": 79416 }, { "epoch": 0.1408146782874504, "grad_norm": 0.375, "learning_rate": 0.0003320995443947267, "loss": 0.165, "step": 79418 }, { "epoch": 0.1408182244527602, "grad_norm": 0.185546875, "learning_rate": 0.00033206850431842993, "loss": 0.1386, "step": 79420 }, { "epoch": 0.14082177061807002, "grad_norm": 0.73828125, "learning_rate": 0.0003320374676006193, "loss": 0.1779, "step": 79422 }, { "epoch": 0.14082531678337984, "grad_norm": 0.47265625, "learning_rate": 0.00033200643424143093, "loss": 0.2512, "step": 79424 }, { "epoch": 0.14082886294868965, "grad_norm": 0.54296875, "learning_rate": 0.0003319754042410001, "loss": 0.1766, "step": 79426 }, { "epoch": 0.14083240911399947, "grad_norm": 0.2216796875, "learning_rate": 0.00033194437759946285, "loss": 0.167, "step": 79428 }, { "epoch": 0.14083595527930928, "grad_norm": 2.171875, "learning_rate": 0.00033191335431695466, "loss": 0.3077, "step": 79430 }, { "epoch": 0.1408395014446191, "grad_norm": 0.38671875, "learning_rate": 0.00033188233439361136, "loss": 0.1479, "step": 79432 }, { "epoch": 0.1408430476099289, "grad_norm": 0.515625, "learning_rate": 0.0003318513178295687, "loss": 0.17, "step": 79434 }, { "epoch": 0.14084659377523873, "grad_norm": 0.482421875, "learning_rate": 0.00033182030462496173, "loss": 0.1542, "step": 79436 }, { "epoch": 0.14085013994054854, "grad_norm": 1.546875, "learning_rate": 0.0003317892947799269, "loss": 0.2196, "step": 79438 }, { "epoch": 0.14085368610585836, "grad_norm": 0.298828125, "learning_rate": 0.0003317582882945995, "loss": 0.1613, "step": 79440 }, { "epoch": 0.14085723227116817, "grad_norm": 0.421875, "learning_rate": 0.00033172728516911497, "loss": 0.2435, "step": 79442 }, { "epoch": 0.14086077843647798, "grad_norm": 0.78515625, "learning_rate": 0.000331696285403609, "loss": 0.1821, "step": 79444 }, { "epoch": 0.1408643246017878, "grad_norm": 1.546875, "learning_rate": 0.00033166528899821726, "loss": 0.3154, "step": 79446 }, { "epoch": 0.1408678707670976, "grad_norm": 0.30078125, "learning_rate": 0.0003316342959530753, "loss": 0.1712, "step": 79448 }, { "epoch": 0.14087141693240743, "grad_norm": 2.109375, "learning_rate": 0.00033160330626831846, "loss": 0.2642, "step": 79450 }, { "epoch": 0.14087496309771724, "grad_norm": 0.361328125, "learning_rate": 0.0003315723199440826, "loss": 0.1954, "step": 79452 }, { "epoch": 0.14087850926302706, "grad_norm": 2.34375, "learning_rate": 0.0003315413369805028, "loss": 0.1862, "step": 79454 }, { "epoch": 0.14088205542833687, "grad_norm": 0.439453125, "learning_rate": 0.00033151035737771496, "loss": 0.1921, "step": 79456 }, { "epoch": 0.1408856015936467, "grad_norm": 0.470703125, "learning_rate": 0.0003314793811358545, "loss": 0.204, "step": 79458 }, { "epoch": 0.1408891477589565, "grad_norm": 0.279296875, "learning_rate": 0.00033144840825505685, "loss": 0.1274, "step": 79460 }, { "epoch": 0.14089269392426632, "grad_norm": 0.33984375, "learning_rate": 0.0003314174387354572, "loss": 0.1579, "step": 79462 }, { "epoch": 0.14089624008957613, "grad_norm": 0.486328125, "learning_rate": 0.00033138647257719133, "loss": 0.1936, "step": 79464 }, { "epoch": 0.14089978625488594, "grad_norm": 0.37109375, "learning_rate": 0.00033135550978039463, "loss": 0.1391, "step": 79466 }, { "epoch": 0.14090333242019576, "grad_norm": 1.6640625, "learning_rate": 0.0003313245503452025, "loss": 0.2958, "step": 79468 }, { "epoch": 0.14090687858550557, "grad_norm": 0.1708984375, "learning_rate": 0.0003312935942717502, "loss": 0.1457, "step": 79470 }, { "epoch": 0.1409104247508154, "grad_norm": 0.330078125, "learning_rate": 0.0003312626415601732, "loss": 0.155, "step": 79472 }, { "epoch": 0.1409139709161252, "grad_norm": 0.54296875, "learning_rate": 0.000331231692210607, "loss": 0.2828, "step": 79474 }, { "epoch": 0.14091751708143502, "grad_norm": 0.453125, "learning_rate": 0.00033120074622318657, "loss": 0.1846, "step": 79476 }, { "epoch": 0.14092106324674483, "grad_norm": 0.7109375, "learning_rate": 0.0003311698035980476, "loss": 0.1702, "step": 79478 }, { "epoch": 0.14092460941205465, "grad_norm": 0.8671875, "learning_rate": 0.00033113886433532524, "loss": 0.276, "step": 79480 }, { "epoch": 0.14092815557736446, "grad_norm": 4.15625, "learning_rate": 0.0003311079284351552, "loss": 0.3553, "step": 79482 }, { "epoch": 0.14093170174267428, "grad_norm": 0.23828125, "learning_rate": 0.00033107699589767214, "loss": 0.1701, "step": 79484 }, { "epoch": 0.1409352479079841, "grad_norm": 0.5, "learning_rate": 0.00033104606672301187, "loss": 0.1528, "step": 79486 }, { "epoch": 0.1409387940732939, "grad_norm": 0.35546875, "learning_rate": 0.00033101514091130936, "loss": 0.1885, "step": 79488 }, { "epoch": 0.14094234023860372, "grad_norm": 0.388671875, "learning_rate": 0.00033098421846269996, "loss": 0.1846, "step": 79490 }, { "epoch": 0.14094588640391356, "grad_norm": 0.490234375, "learning_rate": 0.0003309532993773189, "loss": 0.2154, "step": 79492 }, { "epoch": 0.14094943256922338, "grad_norm": 1.015625, "learning_rate": 0.00033092238365530115, "loss": 0.2113, "step": 79494 }, { "epoch": 0.1409529787345332, "grad_norm": 0.150390625, "learning_rate": 0.00033089147129678246, "loss": 0.2055, "step": 79496 }, { "epoch": 0.140956524899843, "grad_norm": 0.134765625, "learning_rate": 0.00033086056230189763, "loss": 0.1606, "step": 79498 }, { "epoch": 0.14096007106515282, "grad_norm": 0.265625, "learning_rate": 0.0003308296566707819, "loss": 0.1797, "step": 79500 }, { "epoch": 0.14096361723046263, "grad_norm": 0.443359375, "learning_rate": 0.0003307987544035706, "loss": 0.3445, "step": 79502 }, { "epoch": 0.14096716339577245, "grad_norm": 0.2265625, "learning_rate": 0.0003307678555003984, "loss": 0.1995, "step": 79504 }, { "epoch": 0.14097070956108226, "grad_norm": 0.4140625, "learning_rate": 0.0003307369599614013, "loss": 0.1738, "step": 79506 }, { "epoch": 0.14097425572639208, "grad_norm": 0.275390625, "learning_rate": 0.0003307060677867134, "loss": 0.1871, "step": 79508 }, { "epoch": 0.1409778018917019, "grad_norm": 0.251953125, "learning_rate": 0.0003306751789764706, "loss": 0.1693, "step": 79510 }, { "epoch": 0.1409813480570117, "grad_norm": 0.609375, "learning_rate": 0.00033064429353080746, "loss": 0.2418, "step": 79512 }, { "epoch": 0.14098489422232152, "grad_norm": 0.34765625, "learning_rate": 0.0003306134114498597, "loss": 0.1464, "step": 79514 }, { "epoch": 0.14098844038763134, "grad_norm": 0.6796875, "learning_rate": 0.0003305825327337615, "loss": 0.2112, "step": 79516 }, { "epoch": 0.14099198655294115, "grad_norm": 0.470703125, "learning_rate": 0.0003305516573826486, "loss": 0.1503, "step": 79518 }, { "epoch": 0.14099553271825097, "grad_norm": 0.4375, "learning_rate": 0.00033052078539665575, "loss": 0.1827, "step": 79520 }, { "epoch": 0.14099907888356078, "grad_norm": 0.28125, "learning_rate": 0.00033048991677591804, "loss": 0.1687, "step": 79522 }, { "epoch": 0.1410026250488706, "grad_norm": 0.66015625, "learning_rate": 0.0003304590515205705, "loss": 0.1817, "step": 79524 }, { "epoch": 0.1410061712141804, "grad_norm": 0.48046875, "learning_rate": 0.00033042818963074786, "loss": 0.1511, "step": 79526 }, { "epoch": 0.14100971737949022, "grad_norm": 0.41015625, "learning_rate": 0.0003303973311065854, "loss": 0.1712, "step": 79528 }, { "epoch": 0.14101326354480004, "grad_norm": 0.1611328125, "learning_rate": 0.00033036647594821813, "loss": 0.1529, "step": 79530 }, { "epoch": 0.14101680971010985, "grad_norm": 0.34765625, "learning_rate": 0.00033033562415578073, "loss": 0.1742, "step": 79532 }, { "epoch": 0.14102035587541967, "grad_norm": 0.48828125, "learning_rate": 0.0003303047757294081, "loss": 0.16, "step": 79534 }, { "epoch": 0.14102390204072948, "grad_norm": 0.498046875, "learning_rate": 0.0003302739306692354, "loss": 0.2181, "step": 79536 }, { "epoch": 0.1410274482060393, "grad_norm": 0.984375, "learning_rate": 0.0003302430889753975, "loss": 0.2031, "step": 79538 }, { "epoch": 0.1410309943713491, "grad_norm": 0.796875, "learning_rate": 0.00033021225064802916, "loss": 0.3242, "step": 79540 }, { "epoch": 0.14103454053665893, "grad_norm": 0.39453125, "learning_rate": 0.00033018141568726535, "loss": 0.1685, "step": 79542 }, { "epoch": 0.14103808670196874, "grad_norm": 0.2177734375, "learning_rate": 0.0003301505840932406, "loss": 0.2978, "step": 79544 }, { "epoch": 0.14104163286727855, "grad_norm": 0.396484375, "learning_rate": 0.00033011975586609023, "loss": 0.1829, "step": 79546 }, { "epoch": 0.14104517903258837, "grad_norm": 0.322265625, "learning_rate": 0.00033008893100594885, "loss": 0.1751, "step": 79548 }, { "epoch": 0.14104872519789818, "grad_norm": 0.2177734375, "learning_rate": 0.0003300581095129512, "loss": 0.2195, "step": 79550 }, { "epoch": 0.141052271363208, "grad_norm": 0.33203125, "learning_rate": 0.0003300272913872321, "loss": 0.1789, "step": 79552 }, { "epoch": 0.1410558175285178, "grad_norm": 1.0546875, "learning_rate": 0.0003299964766289265, "loss": 0.1908, "step": 79554 }, { "epoch": 0.14105936369382763, "grad_norm": 0.208984375, "learning_rate": 0.00032996566523816896, "loss": 0.1994, "step": 79556 }, { "epoch": 0.14106290985913744, "grad_norm": 0.3046875, "learning_rate": 0.0003299348572150944, "loss": 0.1299, "step": 79558 }, { "epoch": 0.14106645602444726, "grad_norm": 0.416015625, "learning_rate": 0.0003299040525598375, "loss": 0.1428, "step": 79560 }, { "epoch": 0.14107000218975707, "grad_norm": 0.7109375, "learning_rate": 0.0003298732512725326, "loss": 0.1543, "step": 79562 }, { "epoch": 0.14107354835506689, "grad_norm": 0.61328125, "learning_rate": 0.0003298424533533153, "loss": 0.1612, "step": 79564 }, { "epoch": 0.1410770945203767, "grad_norm": 0.4140625, "learning_rate": 0.00032981165880231934, "loss": 0.1769, "step": 79566 }, { "epoch": 0.14108064068568651, "grad_norm": 0.3125, "learning_rate": 0.00032978086761967993, "loss": 0.1499, "step": 79568 }, { "epoch": 0.14108418685099633, "grad_norm": 0.333984375, "learning_rate": 0.0003297500798055314, "loss": 0.1346, "step": 79570 }, { "epoch": 0.14108773301630614, "grad_norm": 0.328125, "learning_rate": 0.0003297192953600091, "loss": 0.1878, "step": 79572 }, { "epoch": 0.14109127918161596, "grad_norm": 0.341796875, "learning_rate": 0.0003296885142832467, "loss": 0.1462, "step": 79574 }, { "epoch": 0.14109482534692577, "grad_norm": 0.76171875, "learning_rate": 0.0003296577365753795, "loss": 0.181, "step": 79576 }, { "epoch": 0.1410983715122356, "grad_norm": 0.7734375, "learning_rate": 0.000329626962236542, "loss": 0.1439, "step": 79578 }, { "epoch": 0.1411019176775454, "grad_norm": 0.349609375, "learning_rate": 0.0003295961912668686, "loss": 0.2462, "step": 79580 }, { "epoch": 0.14110546384285522, "grad_norm": 0.267578125, "learning_rate": 0.0003295654236664939, "loss": 0.1119, "step": 79582 }, { "epoch": 0.14110901000816506, "grad_norm": 1.203125, "learning_rate": 0.00032953465943555226, "loss": 0.1641, "step": 79584 }, { "epoch": 0.14111255617347487, "grad_norm": 1.8515625, "learning_rate": 0.00032950389857417875, "loss": 0.2891, "step": 79586 }, { "epoch": 0.1411161023387847, "grad_norm": 0.259765625, "learning_rate": 0.0003294731410825077, "loss": 0.1816, "step": 79588 }, { "epoch": 0.1411196485040945, "grad_norm": 0.27734375, "learning_rate": 0.0003294423869606735, "loss": 0.2195, "step": 79590 }, { "epoch": 0.14112319466940432, "grad_norm": 1.25, "learning_rate": 0.00032941163620881056, "loss": 0.1569, "step": 79592 }, { "epoch": 0.14112674083471413, "grad_norm": 0.244140625, "learning_rate": 0.0003293808888270536, "loss": 0.1621, "step": 79594 }, { "epoch": 0.14113028700002395, "grad_norm": 1.125, "learning_rate": 0.00032935014481553707, "loss": 0.211, "step": 79596 }, { "epoch": 0.14113383316533376, "grad_norm": 0.478515625, "learning_rate": 0.00032931940417439546, "loss": 0.201, "step": 79598 }, { "epoch": 0.14113737933064358, "grad_norm": 0.2265625, "learning_rate": 0.0003292886669037629, "loss": 0.1712, "step": 79600 }, { "epoch": 0.1411409254959534, "grad_norm": 1.5625, "learning_rate": 0.000329257933003774, "loss": 0.1885, "step": 79602 }, { "epoch": 0.1411444716612632, "grad_norm": 0.439453125, "learning_rate": 0.0003292272024745635, "loss": 0.1676, "step": 79604 }, { "epoch": 0.14114801782657302, "grad_norm": 0.8046875, "learning_rate": 0.00032919647531626516, "loss": 0.1874, "step": 79606 }, { "epoch": 0.14115156399188283, "grad_norm": 0.330078125, "learning_rate": 0.0003291657515290139, "loss": 0.1903, "step": 79608 }, { "epoch": 0.14115511015719265, "grad_norm": 0.435546875, "learning_rate": 0.00032913503111294364, "loss": 0.1491, "step": 79610 }, { "epoch": 0.14115865632250246, "grad_norm": 0.29296875, "learning_rate": 0.0003291043140681894, "loss": 0.1267, "step": 79612 }, { "epoch": 0.14116220248781228, "grad_norm": 0.5234375, "learning_rate": 0.0003290736003948848, "loss": 0.1738, "step": 79614 }, { "epoch": 0.1411657486531221, "grad_norm": 0.33984375, "learning_rate": 0.0003290428900931646, "loss": 0.1718, "step": 79616 }, { "epoch": 0.1411692948184319, "grad_norm": 0.1875, "learning_rate": 0.00032901218316316297, "loss": 0.3621, "step": 79618 }, { "epoch": 0.14117284098374172, "grad_norm": 0.5234375, "learning_rate": 0.0003289814796050139, "loss": 0.1606, "step": 79620 }, { "epoch": 0.14117638714905154, "grad_norm": 0.259765625, "learning_rate": 0.00032895077941885243, "loss": 0.1555, "step": 79622 }, { "epoch": 0.14117993331436135, "grad_norm": 0.8671875, "learning_rate": 0.0003289200826048121, "loss": 0.1667, "step": 79624 }, { "epoch": 0.14118347947967116, "grad_norm": 0.5703125, "learning_rate": 0.00032888938916302745, "loss": 0.2152, "step": 79626 }, { "epoch": 0.14118702564498098, "grad_norm": 0.4375, "learning_rate": 0.00032885869909363266, "loss": 0.1981, "step": 79628 }, { "epoch": 0.1411905718102908, "grad_norm": 0.7578125, "learning_rate": 0.0003288280123967623, "loss": 0.1745, "step": 79630 }, { "epoch": 0.1411941179756006, "grad_norm": 0.42578125, "learning_rate": 0.0003287973290725499, "loss": 0.1947, "step": 79632 }, { "epoch": 0.14119766414091042, "grad_norm": 0.353515625, "learning_rate": 0.0003287666491211301, "loss": 0.163, "step": 79634 }, { "epoch": 0.14120121030622024, "grad_norm": 0.41015625, "learning_rate": 0.0003287359725426371, "loss": 0.204, "step": 79636 }, { "epoch": 0.14120475647153005, "grad_norm": 0.41015625, "learning_rate": 0.000328705299337205, "loss": 0.1895, "step": 79638 }, { "epoch": 0.14120830263683987, "grad_norm": 0.197265625, "learning_rate": 0.00032867462950496784, "loss": 0.1357, "step": 79640 }, { "epoch": 0.14121184880214968, "grad_norm": 0.337890625, "learning_rate": 0.0003286439630460596, "loss": 0.1566, "step": 79642 }, { "epoch": 0.1412153949674595, "grad_norm": 0.2265625, "learning_rate": 0.00032861329996061484, "loss": 0.1497, "step": 79644 }, { "epoch": 0.1412189411327693, "grad_norm": 0.96875, "learning_rate": 0.00032858264024876737, "loss": 0.3071, "step": 79646 }, { "epoch": 0.14122248729807912, "grad_norm": 0.423828125, "learning_rate": 0.0003285519839106514, "loss": 0.2044, "step": 79648 }, { "epoch": 0.14122603346338894, "grad_norm": 0.578125, "learning_rate": 0.0003285213309464007, "loss": 0.1614, "step": 79650 }, { "epoch": 0.14122957962869875, "grad_norm": 0.2197265625, "learning_rate": 0.0003284906813561498, "loss": 0.1593, "step": 79652 }, { "epoch": 0.14123312579400857, "grad_norm": 0.36328125, "learning_rate": 0.0003284600351400324, "loss": 0.15, "step": 79654 }, { "epoch": 0.14123667195931838, "grad_norm": 0.390625, "learning_rate": 0.0003284293922981828, "loss": 0.2357, "step": 79656 }, { "epoch": 0.1412402181246282, "grad_norm": 0.337890625, "learning_rate": 0.00032839875283073485, "loss": 0.1629, "step": 79658 }, { "epoch": 0.141243764289938, "grad_norm": 8.875, "learning_rate": 0.0003283681167378223, "loss": 0.3067, "step": 79660 }, { "epoch": 0.14124731045524783, "grad_norm": 0.69140625, "learning_rate": 0.00032833748401957973, "loss": 0.1877, "step": 79662 }, { "epoch": 0.14125085662055764, "grad_norm": 1.203125, "learning_rate": 0.0003283068546761405, "loss": 0.2886, "step": 79664 }, { "epoch": 0.14125440278586746, "grad_norm": 1.4453125, "learning_rate": 0.00032827622870763894, "loss": 0.3959, "step": 79666 }, { "epoch": 0.14125794895117727, "grad_norm": 0.37109375, "learning_rate": 0.0003282456061142088, "loss": 0.1814, "step": 79668 }, { "epoch": 0.14126149511648708, "grad_norm": 0.208984375, "learning_rate": 0.0003282149868959844, "loss": 0.172, "step": 79670 }, { "epoch": 0.1412650412817969, "grad_norm": 0.1572265625, "learning_rate": 0.0003281843710530989, "loss": 0.1602, "step": 79672 }, { "epoch": 0.14126858744710674, "grad_norm": 0.421875, "learning_rate": 0.0003281537585856869, "loss": 0.146, "step": 79674 }, { "epoch": 0.14127213361241656, "grad_norm": 0.216796875, "learning_rate": 0.000328123149493882, "loss": 0.2222, "step": 79676 }, { "epoch": 0.14127567977772637, "grad_norm": 1.390625, "learning_rate": 0.00032809254377781817, "loss": 0.2594, "step": 79678 }, { "epoch": 0.14127922594303619, "grad_norm": 1.0078125, "learning_rate": 0.000328061941437629, "loss": 0.2949, "step": 79680 }, { "epoch": 0.141282772108346, "grad_norm": 0.22265625, "learning_rate": 0.0003280313424734485, "loss": 0.1385, "step": 79682 }, { "epoch": 0.14128631827365581, "grad_norm": 0.421875, "learning_rate": 0.0003280007468854106, "loss": 0.2928, "step": 79684 }, { "epoch": 0.14128986443896563, "grad_norm": 0.259765625, "learning_rate": 0.0003279701546736489, "loss": 0.1522, "step": 79686 }, { "epoch": 0.14129341060427544, "grad_norm": 0.3125, "learning_rate": 0.00032793956583829735, "loss": 0.2076, "step": 79688 }, { "epoch": 0.14129695676958526, "grad_norm": 0.1875, "learning_rate": 0.00032790898037948974, "loss": 0.1597, "step": 79690 }, { "epoch": 0.14130050293489507, "grad_norm": 2.03125, "learning_rate": 0.00032787839829735954, "loss": 0.252, "step": 79692 }, { "epoch": 0.1413040491002049, "grad_norm": 0.189453125, "learning_rate": 0.0003278478195920412, "loss": 0.1766, "step": 79694 }, { "epoch": 0.1413075952655147, "grad_norm": 0.37109375, "learning_rate": 0.00032781724426366745, "loss": 0.1268, "step": 79696 }, { "epoch": 0.14131114143082452, "grad_norm": 1.1640625, "learning_rate": 0.0003277866723123728, "loss": 0.1951, "step": 79698 }, { "epoch": 0.14131468759613433, "grad_norm": 0.81640625, "learning_rate": 0.00032775610373829054, "loss": 0.2117, "step": 79700 }, { "epoch": 0.14131823376144415, "grad_norm": 6.6875, "learning_rate": 0.0003277255385415546, "loss": 0.2843, "step": 79702 }, { "epoch": 0.14132177992675396, "grad_norm": 0.2578125, "learning_rate": 0.0003276949767222986, "loss": 0.1645, "step": 79704 }, { "epoch": 0.14132532609206377, "grad_norm": 0.34765625, "learning_rate": 0.0003276644182806562, "loss": 0.1406, "step": 79706 }, { "epoch": 0.1413288722573736, "grad_norm": 0.90234375, "learning_rate": 0.000327633863216761, "loss": 0.1908, "step": 79708 }, { "epoch": 0.1413324184226834, "grad_norm": 0.474609375, "learning_rate": 0.0003276033115307464, "loss": 0.2049, "step": 79710 }, { "epoch": 0.14133596458799322, "grad_norm": 0.1416015625, "learning_rate": 0.00032757276322274676, "loss": 0.1671, "step": 79712 }, { "epoch": 0.14133951075330303, "grad_norm": 0.283203125, "learning_rate": 0.0003275422182928946, "loss": 0.1685, "step": 79714 }, { "epoch": 0.14134305691861285, "grad_norm": 0.93359375, "learning_rate": 0.00032751167674132444, "loss": 0.3186, "step": 79716 }, { "epoch": 0.14134660308392266, "grad_norm": 1.2421875, "learning_rate": 0.00032748113856816916, "loss": 0.1579, "step": 79718 }, { "epoch": 0.14135014924923248, "grad_norm": 0.5078125, "learning_rate": 0.0003274506037735632, "loss": 0.1813, "step": 79720 }, { "epoch": 0.1413536954145423, "grad_norm": 0.57421875, "learning_rate": 0.00032742007235763906, "loss": 0.3039, "step": 79722 }, { "epoch": 0.1413572415798521, "grad_norm": 0.439453125, "learning_rate": 0.0003273895443205309, "loss": 0.1903, "step": 79724 }, { "epoch": 0.14136078774516192, "grad_norm": 0.61328125, "learning_rate": 0.00032735901966237215, "loss": 0.178, "step": 79726 }, { "epoch": 0.14136433391047173, "grad_norm": 1.5234375, "learning_rate": 0.00032732849838329627, "loss": 0.1953, "step": 79728 }, { "epoch": 0.14136788007578155, "grad_norm": 1.7890625, "learning_rate": 0.0003272979804834367, "loss": 0.3542, "step": 79730 }, { "epoch": 0.14137142624109136, "grad_norm": 0.1787109375, "learning_rate": 0.00032726746596292667, "loss": 0.1472, "step": 79732 }, { "epoch": 0.14137497240640118, "grad_norm": 0.34765625, "learning_rate": 0.0003272369548219001, "loss": 0.1841, "step": 79734 }, { "epoch": 0.141378518571711, "grad_norm": 0.51953125, "learning_rate": 0.0003272064470604903, "loss": 0.2034, "step": 79736 }, { "epoch": 0.1413820647370208, "grad_norm": 0.87109375, "learning_rate": 0.00032717594267883053, "loss": 0.1604, "step": 79738 }, { "epoch": 0.14138561090233062, "grad_norm": 2.15625, "learning_rate": 0.0003271454416770541, "loss": 0.2632, "step": 79740 }, { "epoch": 0.14138915706764044, "grad_norm": 0.376953125, "learning_rate": 0.00032711494405529475, "loss": 0.2224, "step": 79742 }, { "epoch": 0.14139270323295025, "grad_norm": 0.46484375, "learning_rate": 0.00032708444981368576, "loss": 0.1647, "step": 79744 }, { "epoch": 0.14139624939826007, "grad_norm": 1.8828125, "learning_rate": 0.00032705395895236034, "loss": 0.3758, "step": 79746 }, { "epoch": 0.14139979556356988, "grad_norm": 0.259765625, "learning_rate": 0.00032702347147145206, "loss": 0.1762, "step": 79748 }, { "epoch": 0.1414033417288797, "grad_norm": 0.2373046875, "learning_rate": 0.00032699298737109376, "loss": 0.188, "step": 79750 }, { "epoch": 0.1414068878941895, "grad_norm": 1.3125, "learning_rate": 0.0003269625066514196, "loss": 0.2076, "step": 79752 }, { "epoch": 0.14141043405949932, "grad_norm": 0.59375, "learning_rate": 0.00032693202931256203, "loss": 0.1533, "step": 79754 }, { "epoch": 0.14141398022480914, "grad_norm": 0.1875, "learning_rate": 0.00032690155535465483, "loss": 0.1436, "step": 79756 }, { "epoch": 0.14141752639011895, "grad_norm": 0.20703125, "learning_rate": 0.00032687108477783103, "loss": 0.1705, "step": 79758 }, { "epoch": 0.14142107255542877, "grad_norm": 0.9453125, "learning_rate": 0.0003268406175822243, "loss": 0.38, "step": 79760 }, { "epoch": 0.14142461872073858, "grad_norm": 1.6796875, "learning_rate": 0.0003268101537679673, "loss": 0.2597, "step": 79762 }, { "epoch": 0.14142816488604842, "grad_norm": 0.298828125, "learning_rate": 0.0003267796933351938, "loss": 0.1876, "step": 79764 }, { "epoch": 0.14143171105135824, "grad_norm": 0.3046875, "learning_rate": 0.0003267492362840367, "loss": 0.2133, "step": 79766 }, { "epoch": 0.14143525721666805, "grad_norm": 0.380859375, "learning_rate": 0.00032671878261462934, "loss": 0.2439, "step": 79768 }, { "epoch": 0.14143880338197787, "grad_norm": 0.40234375, "learning_rate": 0.00032668833232710485, "loss": 0.3069, "step": 79770 }, { "epoch": 0.14144234954728768, "grad_norm": 3.1875, "learning_rate": 0.0003266578854215962, "loss": 0.2824, "step": 79772 }, { "epoch": 0.1414458957125975, "grad_norm": 0.52734375, "learning_rate": 0.0003266274418982368, "loss": 0.312, "step": 79774 }, { "epoch": 0.1414494418779073, "grad_norm": 0.396484375, "learning_rate": 0.00032659700175716, "loss": 0.2177, "step": 79776 }, { "epoch": 0.14145298804321713, "grad_norm": 0.98828125, "learning_rate": 0.0003265665649984986, "loss": 0.4343, "step": 79778 }, { "epoch": 0.14145653420852694, "grad_norm": 0.271484375, "learning_rate": 0.00032653613162238545, "loss": 0.1842, "step": 79780 }, { "epoch": 0.14146008037383676, "grad_norm": 0.369140625, "learning_rate": 0.0003265057016289542, "loss": 0.1996, "step": 79782 }, { "epoch": 0.14146362653914657, "grad_norm": 0.1953125, "learning_rate": 0.00032647527501833775, "loss": 0.1657, "step": 79784 }, { "epoch": 0.14146717270445638, "grad_norm": 0.30859375, "learning_rate": 0.0003264448517906692, "loss": 0.3441, "step": 79786 }, { "epoch": 0.1414707188697662, "grad_norm": 0.6484375, "learning_rate": 0.00032641443194608146, "loss": 0.2625, "step": 79788 }, { "epoch": 0.141474265035076, "grad_norm": 0.296875, "learning_rate": 0.00032638401548470747, "loss": 0.1685, "step": 79790 }, { "epoch": 0.14147781120038583, "grad_norm": 0.427734375, "learning_rate": 0.00032635360240668057, "loss": 0.2025, "step": 79792 }, { "epoch": 0.14148135736569564, "grad_norm": 0.259765625, "learning_rate": 0.00032632319271213374, "loss": 0.1457, "step": 79794 }, { "epoch": 0.14148490353100546, "grad_norm": 0.326171875, "learning_rate": 0.00032629278640119975, "loss": 0.1544, "step": 79796 }, { "epoch": 0.14148844969631527, "grad_norm": 0.322265625, "learning_rate": 0.0003262623834740115, "loss": 0.1602, "step": 79798 }, { "epoch": 0.1414919958616251, "grad_norm": 0.419921875, "learning_rate": 0.00032623198393070234, "loss": 0.152, "step": 79800 }, { "epoch": 0.1414955420269349, "grad_norm": 0.26953125, "learning_rate": 0.0003262015877714051, "loss": 0.1503, "step": 79802 }, { "epoch": 0.14149908819224472, "grad_norm": 0.52734375, "learning_rate": 0.0003261711949962526, "loss": 0.1556, "step": 79804 }, { "epoch": 0.14150263435755453, "grad_norm": 0.29296875, "learning_rate": 0.00032614080560537775, "loss": 0.1698, "step": 79806 }, { "epoch": 0.14150618052286434, "grad_norm": 0.5390625, "learning_rate": 0.0003261104195989134, "loss": 0.2084, "step": 79808 }, { "epoch": 0.14150972668817416, "grad_norm": 0.419921875, "learning_rate": 0.00032608003697699294, "loss": 0.1506, "step": 79810 }, { "epoch": 0.14151327285348397, "grad_norm": 1.703125, "learning_rate": 0.0003260496577397483, "loss": 0.2757, "step": 79812 }, { "epoch": 0.1415168190187938, "grad_norm": 0.2578125, "learning_rate": 0.0003260192818873132, "loss": 0.1988, "step": 79814 }, { "epoch": 0.1415203651841036, "grad_norm": 0.240234375, "learning_rate": 0.00032598890941982003, "loss": 0.1507, "step": 79816 }, { "epoch": 0.14152391134941342, "grad_norm": 0.330078125, "learning_rate": 0.0003259585403374022, "loss": 0.1985, "step": 79818 }, { "epoch": 0.14152745751472323, "grad_norm": 0.1943359375, "learning_rate": 0.00032592817464019154, "loss": 0.16, "step": 79820 }, { "epoch": 0.14153100368003305, "grad_norm": 1.2109375, "learning_rate": 0.00032589781232832176, "loss": 0.2722, "step": 79822 }, { "epoch": 0.14153454984534286, "grad_norm": 1.4765625, "learning_rate": 0.00032586745340192514, "loss": 0.1876, "step": 79824 }, { "epoch": 0.14153809601065268, "grad_norm": 0.306640625, "learning_rate": 0.0003258370978611348, "loss": 0.1588, "step": 79826 }, { "epoch": 0.1415416421759625, "grad_norm": 0.58984375, "learning_rate": 0.00032580674570608314, "loss": 0.202, "step": 79828 }, { "epoch": 0.1415451883412723, "grad_norm": 0.33203125, "learning_rate": 0.0003257763969369029, "loss": 0.1537, "step": 79830 }, { "epoch": 0.14154873450658212, "grad_norm": 0.376953125, "learning_rate": 0.0003257460515537273, "loss": 0.161, "step": 79832 }, { "epoch": 0.14155228067189193, "grad_norm": 0.255859375, "learning_rate": 0.0003257157095566886, "loss": 0.1665, "step": 79834 }, { "epoch": 0.14155582683720175, "grad_norm": 0.392578125, "learning_rate": 0.00032568537094591963, "loss": 0.1797, "step": 79836 }, { "epoch": 0.14155937300251156, "grad_norm": 0.63671875, "learning_rate": 0.000325655035721553, "loss": 0.239, "step": 79838 }, { "epoch": 0.14156291916782138, "grad_norm": 0.376953125, "learning_rate": 0.00032562470388372144, "loss": 0.1488, "step": 79840 }, { "epoch": 0.1415664653331312, "grad_norm": 0.32421875, "learning_rate": 0.00032559437543255783, "loss": 0.1262, "step": 79842 }, { "epoch": 0.141570011498441, "grad_norm": 0.5703125, "learning_rate": 0.00032556405036819445, "loss": 0.4273, "step": 79844 }, { "epoch": 0.14157355766375082, "grad_norm": 0.451171875, "learning_rate": 0.00032553372869076417, "loss": 0.1617, "step": 79846 }, { "epoch": 0.14157710382906064, "grad_norm": 0.380859375, "learning_rate": 0.0003255034104003992, "loss": 0.1661, "step": 79848 }, { "epoch": 0.14158064999437045, "grad_norm": 0.251953125, "learning_rate": 0.0003254730954972329, "loss": 0.1961, "step": 79850 }, { "epoch": 0.14158419615968026, "grad_norm": 0.7734375, "learning_rate": 0.000325442783981397, "loss": 0.1861, "step": 79852 }, { "epoch": 0.14158774232499008, "grad_norm": 0.484375, "learning_rate": 0.0003254124758530246, "loss": 0.1623, "step": 79854 }, { "epoch": 0.14159128849029992, "grad_norm": 0.87109375, "learning_rate": 0.00032538217111224805, "loss": 0.1527, "step": 79856 }, { "epoch": 0.14159483465560974, "grad_norm": 0.3359375, "learning_rate": 0.0003253518697592001, "loss": 0.2221, "step": 79858 }, { "epoch": 0.14159838082091955, "grad_norm": 0.73828125, "learning_rate": 0.0003253215717940129, "loss": 0.3562, "step": 79860 }, { "epoch": 0.14160192698622937, "grad_norm": 0.27734375, "learning_rate": 0.00032529127721681907, "loss": 0.1507, "step": 79862 }, { "epoch": 0.14160547315153918, "grad_norm": 0.361328125, "learning_rate": 0.0003252609860277513, "loss": 0.125, "step": 79864 }, { "epoch": 0.141609019316849, "grad_norm": 0.2099609375, "learning_rate": 0.00032523069822694206, "loss": 0.1459, "step": 79866 }, { "epoch": 0.1416125654821588, "grad_norm": 0.5625, "learning_rate": 0.00032520041381452365, "loss": 0.1331, "step": 79868 }, { "epoch": 0.14161611164746862, "grad_norm": 0.412109375, "learning_rate": 0.0003251701327906283, "loss": 0.1677, "step": 79870 }, { "epoch": 0.14161965781277844, "grad_norm": 0.28125, "learning_rate": 0.0003251398551553891, "loss": 0.1559, "step": 79872 }, { "epoch": 0.14162320397808825, "grad_norm": 0.671875, "learning_rate": 0.00032510958090893794, "loss": 0.1856, "step": 79874 }, { "epoch": 0.14162675014339807, "grad_norm": 0.5703125, "learning_rate": 0.00032507931005140744, "loss": 0.1544, "step": 79876 }, { "epoch": 0.14163029630870788, "grad_norm": 1.0625, "learning_rate": 0.0003250490425829299, "loss": 0.3664, "step": 79878 }, { "epoch": 0.1416338424740177, "grad_norm": 0.431640625, "learning_rate": 0.0003250187785036375, "loss": 0.1961, "step": 79880 }, { "epoch": 0.1416373886393275, "grad_norm": 0.345703125, "learning_rate": 0.00032498851781366295, "loss": 0.1643, "step": 79882 }, { "epoch": 0.14164093480463733, "grad_norm": 0.353515625, "learning_rate": 0.0003249582605131385, "loss": 0.1889, "step": 79884 }, { "epoch": 0.14164448096994714, "grad_norm": 0.59375, "learning_rate": 0.0003249280066021964, "loss": 0.1981, "step": 79886 }, { "epoch": 0.14164802713525695, "grad_norm": 1.03125, "learning_rate": 0.0003248977560809687, "loss": 0.1526, "step": 79888 }, { "epoch": 0.14165157330056677, "grad_norm": 0.3125, "learning_rate": 0.00032486750894958826, "loss": 0.1543, "step": 79890 }, { "epoch": 0.14165511946587658, "grad_norm": 1.0234375, "learning_rate": 0.00032483726520818714, "loss": 0.1898, "step": 79892 }, { "epoch": 0.1416586656311864, "grad_norm": 0.318359375, "learning_rate": 0.00032480702485689745, "loss": 0.1504, "step": 79894 }, { "epoch": 0.1416622117964962, "grad_norm": 0.5859375, "learning_rate": 0.0003247767878958517, "loss": 0.1652, "step": 79896 }, { "epoch": 0.14166575796180603, "grad_norm": 0.46875, "learning_rate": 0.00032474655432518157, "loss": 0.166, "step": 79898 }, { "epoch": 0.14166930412711584, "grad_norm": 0.23046875, "learning_rate": 0.00032471632414502025, "loss": 0.1472, "step": 79900 }, { "epoch": 0.14167285029242566, "grad_norm": 0.3203125, "learning_rate": 0.00032468609735549885, "loss": 0.1607, "step": 79902 }, { "epoch": 0.14167639645773547, "grad_norm": 0.3828125, "learning_rate": 0.0003246558739567505, "loss": 0.1462, "step": 79904 }, { "epoch": 0.14167994262304529, "grad_norm": 0.265625, "learning_rate": 0.0003246256539489066, "loss": 0.1593, "step": 79906 }, { "epoch": 0.1416834887883551, "grad_norm": 0.56640625, "learning_rate": 0.0003245954373321003, "loss": 0.181, "step": 79908 }, { "epoch": 0.14168703495366491, "grad_norm": 1.28125, "learning_rate": 0.0003245652241064625, "loss": 0.147, "step": 79910 }, { "epoch": 0.14169058111897473, "grad_norm": 1.0234375, "learning_rate": 0.00032453501427212627, "loss": 0.2049, "step": 79912 }, { "epoch": 0.14169412728428454, "grad_norm": 0.259765625, "learning_rate": 0.0003245048078292235, "loss": 0.1764, "step": 79914 }, { "epoch": 0.14169767344959436, "grad_norm": 0.625, "learning_rate": 0.0003244746047778863, "loss": 0.2084, "step": 79916 }, { "epoch": 0.14170121961490417, "grad_norm": 1.9375, "learning_rate": 0.0003244444051182466, "loss": 0.206, "step": 79918 }, { "epoch": 0.141704765780214, "grad_norm": 0.400390625, "learning_rate": 0.0003244142088504364, "loss": 0.1552, "step": 79920 }, { "epoch": 0.1417083119455238, "grad_norm": 0.35546875, "learning_rate": 0.0003243840159745881, "loss": 0.1519, "step": 79922 }, { "epoch": 0.14171185811083362, "grad_norm": 0.40625, "learning_rate": 0.0003243538264908336, "loss": 0.158, "step": 79924 }, { "epoch": 0.14171540427614343, "grad_norm": 0.57421875, "learning_rate": 0.00032432364039930493, "loss": 0.1512, "step": 79926 }, { "epoch": 0.14171895044145325, "grad_norm": 0.357421875, "learning_rate": 0.00032429345770013384, "loss": 0.2133, "step": 79928 }, { "epoch": 0.14172249660676306, "grad_norm": 0.2890625, "learning_rate": 0.0003242632783934528, "loss": 0.1606, "step": 79930 }, { "epoch": 0.14172604277207287, "grad_norm": 0.52734375, "learning_rate": 0.00032423310247939373, "loss": 0.1959, "step": 79932 }, { "epoch": 0.1417295889373827, "grad_norm": 0.3359375, "learning_rate": 0.0003242029299580883, "loss": 0.1855, "step": 79934 }, { "epoch": 0.1417331351026925, "grad_norm": 0.52734375, "learning_rate": 0.00032417276082966865, "loss": 0.1573, "step": 79936 }, { "epoch": 0.14173668126800232, "grad_norm": 0.458984375, "learning_rate": 0.00032414259509426643, "loss": 0.2047, "step": 79938 }, { "epoch": 0.14174022743331213, "grad_norm": 0.271484375, "learning_rate": 0.0003241124327520143, "loss": 0.2087, "step": 79940 }, { "epoch": 0.14174377359862195, "grad_norm": 0.17578125, "learning_rate": 0.0003240822738030433, "loss": 0.1755, "step": 79942 }, { "epoch": 0.14174731976393176, "grad_norm": 0.25390625, "learning_rate": 0.0003240521182474859, "loss": 0.1701, "step": 79944 }, { "epoch": 0.1417508659292416, "grad_norm": 0.275390625, "learning_rate": 0.0003240219660854736, "loss": 0.1305, "step": 79946 }, { "epoch": 0.14175441209455142, "grad_norm": 0.953125, "learning_rate": 0.00032399181731713897, "loss": 0.2386, "step": 79948 }, { "epoch": 0.14175795825986123, "grad_norm": 0.8515625, "learning_rate": 0.0003239616719426128, "loss": 0.2499, "step": 79950 }, { "epoch": 0.14176150442517105, "grad_norm": 0.59375, "learning_rate": 0.00032393152996202783, "loss": 0.128, "step": 79952 }, { "epoch": 0.14176505059048086, "grad_norm": 1.09375, "learning_rate": 0.0003239013913755156, "loss": 0.2245, "step": 79954 }, { "epoch": 0.14176859675579068, "grad_norm": 0.40625, "learning_rate": 0.0003238712561832075, "loss": 0.1821, "step": 79956 }, { "epoch": 0.1417721429211005, "grad_norm": 0.302734375, "learning_rate": 0.00032384112438523617, "loss": 0.1613, "step": 79958 }, { "epoch": 0.1417756890864103, "grad_norm": 0.33203125, "learning_rate": 0.0003238109959817326, "loss": 0.2294, "step": 79960 }, { "epoch": 0.14177923525172012, "grad_norm": 0.9375, "learning_rate": 0.00032378087097282904, "loss": 0.2664, "step": 79962 }, { "epoch": 0.14178278141702993, "grad_norm": 0.51953125, "learning_rate": 0.00032375074935865676, "loss": 0.2017, "step": 79964 }, { "epoch": 0.14178632758233975, "grad_norm": 0.498046875, "learning_rate": 0.00032372063113934835, "loss": 0.178, "step": 79966 }, { "epoch": 0.14178987374764956, "grad_norm": 0.267578125, "learning_rate": 0.0003236905163150345, "loss": 0.1831, "step": 79968 }, { "epoch": 0.14179341991295938, "grad_norm": 0.91796875, "learning_rate": 0.00032366040488584764, "loss": 0.2419, "step": 79970 }, { "epoch": 0.1417969660782692, "grad_norm": 0.296875, "learning_rate": 0.0003236302968519191, "loss": 0.1413, "step": 79972 }, { "epoch": 0.141800512243579, "grad_norm": 0.81640625, "learning_rate": 0.00032360019221338085, "loss": 0.1468, "step": 79974 }, { "epoch": 0.14180405840888882, "grad_norm": 0.439453125, "learning_rate": 0.0003235700909703643, "loss": 0.1706, "step": 79976 }, { "epoch": 0.14180760457419864, "grad_norm": 0.55859375, "learning_rate": 0.0003235399931230009, "loss": 0.1515, "step": 79978 }, { "epoch": 0.14181115073950845, "grad_norm": 0.3125, "learning_rate": 0.0003235098986714228, "loss": 0.1627, "step": 79980 }, { "epoch": 0.14181469690481827, "grad_norm": 1.4453125, "learning_rate": 0.0003234798076157613, "loss": 0.2549, "step": 79982 }, { "epoch": 0.14181824307012808, "grad_norm": 0.7890625, "learning_rate": 0.0003234497199561482, "loss": 0.1554, "step": 79984 }, { "epoch": 0.1418217892354379, "grad_norm": 0.279296875, "learning_rate": 0.0003234196356927147, "loss": 0.1724, "step": 79986 }, { "epoch": 0.1418253354007477, "grad_norm": 0.44140625, "learning_rate": 0.0003233895548255928, "loss": 0.1478, "step": 79988 }, { "epoch": 0.14182888156605752, "grad_norm": 0.9609375, "learning_rate": 0.00032335947735491385, "loss": 0.1945, "step": 79990 }, { "epoch": 0.14183242773136734, "grad_norm": 0.365234375, "learning_rate": 0.00032332940328080943, "loss": 0.3011, "step": 79992 }, { "epoch": 0.14183597389667715, "grad_norm": 0.240234375, "learning_rate": 0.00032329933260341116, "loss": 0.1545, "step": 79994 }, { "epoch": 0.14183952006198697, "grad_norm": 0.33984375, "learning_rate": 0.0003232692653228501, "loss": 0.1538, "step": 79996 }, { "epoch": 0.14184306622729678, "grad_norm": 0.2451171875, "learning_rate": 0.0003232392014392585, "loss": 0.1462, "step": 79998 }, { "epoch": 0.1418466123926066, "grad_norm": 0.93359375, "learning_rate": 0.00032320914095276723, "loss": 0.1687, "step": 80000 }, { "epoch": 0.1418501585579164, "grad_norm": 0.3359375, "learning_rate": 0.000323179083863508, "loss": 0.2041, "step": 80002 }, { "epoch": 0.14185370472322623, "grad_norm": 0.796875, "learning_rate": 0.00032314903017161196, "loss": 0.2204, "step": 80004 }, { "epoch": 0.14185725088853604, "grad_norm": 0.1416015625, "learning_rate": 0.0003231189798772113, "loss": 0.1489, "step": 80006 }, { "epoch": 0.14186079705384586, "grad_norm": 0.390625, "learning_rate": 0.00032308893298043664, "loss": 0.1967, "step": 80008 }, { "epoch": 0.14186434321915567, "grad_norm": 0.26171875, "learning_rate": 0.00032305888948141976, "loss": 0.1704, "step": 80010 }, { "epoch": 0.14186788938446548, "grad_norm": 0.173828125, "learning_rate": 0.000323028849380292, "loss": 0.131, "step": 80012 }, { "epoch": 0.1418714355497753, "grad_norm": 0.400390625, "learning_rate": 0.0003229988126771849, "loss": 0.1852, "step": 80014 }, { "epoch": 0.1418749817150851, "grad_norm": 1.5, "learning_rate": 0.0003229687793722295, "loss": 0.6437, "step": 80016 }, { "epoch": 0.14187852788039493, "grad_norm": 2.25, "learning_rate": 0.0003229387494655572, "loss": 0.1731, "step": 80018 }, { "epoch": 0.14188207404570474, "grad_norm": 0.2392578125, "learning_rate": 0.00032290872295729955, "loss": 0.142, "step": 80020 }, { "epoch": 0.14188562021101456, "grad_norm": 0.478515625, "learning_rate": 0.00032287869984758783, "loss": 0.1808, "step": 80022 }, { "epoch": 0.14188916637632437, "grad_norm": 0.27734375, "learning_rate": 0.00032284868013655323, "loss": 0.1777, "step": 80024 }, { "epoch": 0.14189271254163419, "grad_norm": 0.484375, "learning_rate": 0.000322818663824327, "loss": 0.2718, "step": 80026 }, { "epoch": 0.141896258706944, "grad_norm": 0.74609375, "learning_rate": 0.0003227886509110405, "loss": 0.1607, "step": 80028 }, { "epoch": 0.14189980487225382, "grad_norm": 0.59375, "learning_rate": 0.00032275864139682517, "loss": 0.2169, "step": 80030 }, { "epoch": 0.14190335103756363, "grad_norm": 0.3125, "learning_rate": 0.00032272863528181196, "loss": 0.1974, "step": 80032 }, { "epoch": 0.14190689720287344, "grad_norm": 0.5234375, "learning_rate": 0.0003226986325661322, "loss": 0.1643, "step": 80034 }, { "epoch": 0.1419104433681833, "grad_norm": 0.2294921875, "learning_rate": 0.00032266863324991694, "loss": 0.1877, "step": 80036 }, { "epoch": 0.1419139895334931, "grad_norm": 0.330078125, "learning_rate": 0.0003226386373332977, "loss": 0.2149, "step": 80038 }, { "epoch": 0.14191753569880292, "grad_norm": 3.375, "learning_rate": 0.00032260864481640556, "loss": 0.3311, "step": 80040 }, { "epoch": 0.14192108186411273, "grad_norm": 0.44921875, "learning_rate": 0.0003225786556993716, "loss": 0.1858, "step": 80042 }, { "epoch": 0.14192462802942254, "grad_norm": 1.9765625, "learning_rate": 0.00032254866998232713, "loss": 0.4067, "step": 80044 }, { "epoch": 0.14192817419473236, "grad_norm": 0.4609375, "learning_rate": 0.0003225186876654028, "loss": 0.2082, "step": 80046 }, { "epoch": 0.14193172036004217, "grad_norm": 0.310546875, "learning_rate": 0.0003224887087487307, "loss": 0.2382, "step": 80048 }, { "epoch": 0.141935266525352, "grad_norm": 0.30078125, "learning_rate": 0.0003224587332324409, "loss": 0.1259, "step": 80050 }, { "epoch": 0.1419388126906618, "grad_norm": 0.341796875, "learning_rate": 0.00032242876111666507, "loss": 0.1657, "step": 80052 }, { "epoch": 0.14194235885597162, "grad_norm": 0.52734375, "learning_rate": 0.00032239879240153404, "loss": 0.1711, "step": 80054 }, { "epoch": 0.14194590502128143, "grad_norm": 0.56640625, "learning_rate": 0.00032236882708717936, "loss": 0.1953, "step": 80056 }, { "epoch": 0.14194945118659125, "grad_norm": 0.43359375, "learning_rate": 0.0003223388651737314, "loss": 0.1878, "step": 80058 }, { "epoch": 0.14195299735190106, "grad_norm": 0.5703125, "learning_rate": 0.00032230890666132175, "loss": 0.1478, "step": 80060 }, { "epoch": 0.14195654351721088, "grad_norm": 0.298828125, "learning_rate": 0.00032227895155008114, "loss": 0.1233, "step": 80062 }, { "epoch": 0.1419600896825207, "grad_norm": 0.2392578125, "learning_rate": 0.0003222489998401406, "loss": 0.1561, "step": 80064 }, { "epoch": 0.1419636358478305, "grad_norm": 0.330078125, "learning_rate": 0.00032221905153163123, "loss": 0.1778, "step": 80066 }, { "epoch": 0.14196718201314032, "grad_norm": 0.5234375, "learning_rate": 0.0003221891066246837, "loss": 0.2116, "step": 80068 }, { "epoch": 0.14197072817845013, "grad_norm": 0.3359375, "learning_rate": 0.0003221591651194295, "loss": 0.1968, "step": 80070 }, { "epoch": 0.14197427434375995, "grad_norm": 0.5, "learning_rate": 0.00032212922701599914, "loss": 0.1946, "step": 80072 }, { "epoch": 0.14197782050906976, "grad_norm": 0.61328125, "learning_rate": 0.00032209929231452383, "loss": 0.2235, "step": 80074 }, { "epoch": 0.14198136667437958, "grad_norm": 0.2109375, "learning_rate": 0.00032206936101513415, "loss": 0.1502, "step": 80076 }, { "epoch": 0.1419849128396894, "grad_norm": 0.443359375, "learning_rate": 0.00032203943311796135, "loss": 0.1881, "step": 80078 }, { "epoch": 0.1419884590049992, "grad_norm": 1.0625, "learning_rate": 0.0003220095086231362, "loss": 0.3479, "step": 80080 }, { "epoch": 0.14199200517030902, "grad_norm": 0.2890625, "learning_rate": 0.0003219795875307895, "loss": 0.1591, "step": 80082 }, { "epoch": 0.14199555133561884, "grad_norm": 0.8046875, "learning_rate": 0.00032194966984105223, "loss": 0.1991, "step": 80084 }, { "epoch": 0.14199909750092865, "grad_norm": 0.3984375, "learning_rate": 0.0003219197555540549, "loss": 0.1803, "step": 80086 }, { "epoch": 0.14200264366623847, "grad_norm": 0.458984375, "learning_rate": 0.000321889844669929, "loss": 0.2695, "step": 80088 }, { "epoch": 0.14200618983154828, "grad_norm": 0.330078125, "learning_rate": 0.00032185993718880456, "loss": 0.2068, "step": 80090 }, { "epoch": 0.1420097359968581, "grad_norm": 0.66796875, "learning_rate": 0.000321830033110813, "loss": 0.1851, "step": 80092 }, { "epoch": 0.1420132821621679, "grad_norm": 0.47265625, "learning_rate": 0.0003218001324360846, "loss": 0.1786, "step": 80094 }, { "epoch": 0.14201682832747772, "grad_norm": 1.8046875, "learning_rate": 0.00032177023516475077, "loss": 0.2136, "step": 80096 }, { "epoch": 0.14202037449278754, "grad_norm": 0.2734375, "learning_rate": 0.00032174034129694166, "loss": 0.172, "step": 80098 }, { "epoch": 0.14202392065809735, "grad_norm": 0.2451171875, "learning_rate": 0.0003217104508327882, "loss": 0.2024, "step": 80100 }, { "epoch": 0.14202746682340717, "grad_norm": 0.318359375, "learning_rate": 0.0003216805637724213, "loss": 0.2592, "step": 80102 }, { "epoch": 0.14203101298871698, "grad_norm": 3.234375, "learning_rate": 0.0003216506801159714, "loss": 0.2208, "step": 80104 }, { "epoch": 0.1420345591540268, "grad_norm": 0.546875, "learning_rate": 0.0003216207998635693, "loss": 0.2091, "step": 80106 }, { "epoch": 0.1420381053193366, "grad_norm": 0.38671875, "learning_rate": 0.0003215909230153456, "loss": 0.1548, "step": 80108 }, { "epoch": 0.14204165148464643, "grad_norm": 0.3515625, "learning_rate": 0.00032156104957143115, "loss": 0.1531, "step": 80110 }, { "epoch": 0.14204519764995624, "grad_norm": 0.703125, "learning_rate": 0.00032153117953195653, "loss": 0.1853, "step": 80112 }, { "epoch": 0.14204874381526605, "grad_norm": 0.2021484375, "learning_rate": 0.0003215013128970524, "loss": 0.1793, "step": 80114 }, { "epoch": 0.14205228998057587, "grad_norm": 0.64453125, "learning_rate": 0.000321471449666849, "loss": 0.1749, "step": 80116 }, { "epoch": 0.14205583614588568, "grad_norm": 0.314453125, "learning_rate": 0.0003214415898414776, "loss": 0.167, "step": 80118 }, { "epoch": 0.1420593823111955, "grad_norm": 0.5703125, "learning_rate": 0.0003214117334210683, "loss": 0.1325, "step": 80120 }, { "epoch": 0.1420629284765053, "grad_norm": 1.109375, "learning_rate": 0.00032138188040575197, "loss": 0.1991, "step": 80122 }, { "epoch": 0.14206647464181513, "grad_norm": 0.34375, "learning_rate": 0.0003213520307956589, "loss": 0.1564, "step": 80124 }, { "epoch": 0.14207002080712494, "grad_norm": 0.56640625, "learning_rate": 0.0003213221845909196, "loss": 0.1518, "step": 80126 }, { "epoch": 0.14207356697243478, "grad_norm": 0.37109375, "learning_rate": 0.00032129234179166494, "loss": 0.2057, "step": 80128 }, { "epoch": 0.1420771131377446, "grad_norm": 0.2060546875, "learning_rate": 0.00032126250239802526, "loss": 0.1679, "step": 80130 }, { "epoch": 0.1420806593030544, "grad_norm": 0.73046875, "learning_rate": 0.00032123266641013103, "loss": 0.2276, "step": 80132 }, { "epoch": 0.14208420546836423, "grad_norm": 0.439453125, "learning_rate": 0.00032120283382811265, "loss": 0.2264, "step": 80134 }, { "epoch": 0.14208775163367404, "grad_norm": 0.9453125, "learning_rate": 0.0003211730046521008, "loss": 0.2607, "step": 80136 }, { "epoch": 0.14209129779898386, "grad_norm": 0.54296875, "learning_rate": 0.00032114317888222584, "loss": 0.2585, "step": 80138 }, { "epoch": 0.14209484396429367, "grad_norm": 2.421875, "learning_rate": 0.0003211133565186182, "loss": 0.232, "step": 80140 }, { "epoch": 0.14209839012960349, "grad_norm": 0.8671875, "learning_rate": 0.0003210835375614083, "loss": 0.1588, "step": 80142 }, { "epoch": 0.1421019362949133, "grad_norm": 0.2490234375, "learning_rate": 0.0003210537220107264, "loss": 0.1642, "step": 80144 }, { "epoch": 0.14210548246022311, "grad_norm": 0.439453125, "learning_rate": 0.0003210239098667034, "loss": 0.1868, "step": 80146 }, { "epoch": 0.14210902862553293, "grad_norm": 0.34765625, "learning_rate": 0.000320994101129469, "loss": 0.1752, "step": 80148 }, { "epoch": 0.14211257479084274, "grad_norm": 0.451171875, "learning_rate": 0.0003209642957991541, "loss": 0.1997, "step": 80150 }, { "epoch": 0.14211612095615256, "grad_norm": 0.458984375, "learning_rate": 0.0003209344938758886, "loss": 0.1776, "step": 80152 }, { "epoch": 0.14211966712146237, "grad_norm": 0.4375, "learning_rate": 0.0003209046953598035, "loss": 0.1889, "step": 80154 }, { "epoch": 0.1421232132867722, "grad_norm": 1.484375, "learning_rate": 0.0003208749002510283, "loss": 0.2696, "step": 80156 }, { "epoch": 0.142126759452082, "grad_norm": 0.625, "learning_rate": 0.00032084510854969397, "loss": 0.198, "step": 80158 }, { "epoch": 0.14213030561739182, "grad_norm": 0.287109375, "learning_rate": 0.0003208153202559305, "loss": 0.2241, "step": 80160 }, { "epoch": 0.14213385178270163, "grad_norm": 1.4375, "learning_rate": 0.0003207855353698682, "loss": 0.2637, "step": 80162 }, { "epoch": 0.14213739794801145, "grad_norm": 0.30859375, "learning_rate": 0.00032075575389163736, "loss": 0.1487, "step": 80164 }, { "epoch": 0.14214094411332126, "grad_norm": 0.1787109375, "learning_rate": 0.0003207259758213681, "loss": 0.3034, "step": 80166 }, { "epoch": 0.14214449027863107, "grad_norm": 0.35546875, "learning_rate": 0.00032069620115919085, "loss": 0.1704, "step": 80168 }, { "epoch": 0.1421480364439409, "grad_norm": 0.498046875, "learning_rate": 0.00032066642990523576, "loss": 0.2038, "step": 80170 }, { "epoch": 0.1421515826092507, "grad_norm": 0.37109375, "learning_rate": 0.0003206366620596331, "loss": 0.1266, "step": 80172 }, { "epoch": 0.14215512877456052, "grad_norm": 0.42578125, "learning_rate": 0.0003206068976225127, "loss": 0.1623, "step": 80174 }, { "epoch": 0.14215867493987033, "grad_norm": 0.255859375, "learning_rate": 0.00032057713659400515, "loss": 0.1927, "step": 80176 }, { "epoch": 0.14216222110518015, "grad_norm": 0.2236328125, "learning_rate": 0.00032054737897424057, "loss": 0.1965, "step": 80178 }, { "epoch": 0.14216576727048996, "grad_norm": 0.61328125, "learning_rate": 0.00032051762476334895, "loss": 0.2053, "step": 80180 }, { "epoch": 0.14216931343579978, "grad_norm": 0.373046875, "learning_rate": 0.0003204878739614605, "loss": 0.248, "step": 80182 }, { "epoch": 0.1421728596011096, "grad_norm": 0.6640625, "learning_rate": 0.0003204581265687051, "loss": 0.2226, "step": 80184 }, { "epoch": 0.1421764057664194, "grad_norm": 0.26171875, "learning_rate": 0.0003204283825852134, "loss": 0.2186, "step": 80186 }, { "epoch": 0.14217995193172922, "grad_norm": 0.703125, "learning_rate": 0.0003203986420111147, "loss": 0.1718, "step": 80188 }, { "epoch": 0.14218349809703903, "grad_norm": 0.23828125, "learning_rate": 0.0003203689048465397, "loss": 0.1291, "step": 80190 }, { "epoch": 0.14218704426234885, "grad_norm": 0.5078125, "learning_rate": 0.0003203391710916181, "loss": 0.3941, "step": 80192 }, { "epoch": 0.14219059042765866, "grad_norm": 0.25, "learning_rate": 0.00032030944074648045, "loss": 0.1771, "step": 80194 }, { "epoch": 0.14219413659296848, "grad_norm": 0.7265625, "learning_rate": 0.000320279713811256, "loss": 0.1579, "step": 80196 }, { "epoch": 0.1421976827582783, "grad_norm": 0.404296875, "learning_rate": 0.00032024999028607535, "loss": 0.182, "step": 80198 }, { "epoch": 0.1422012289235881, "grad_norm": 0.2490234375, "learning_rate": 0.00032022027017106837, "loss": 0.1611, "step": 80200 }, { "epoch": 0.14220477508889792, "grad_norm": 0.298828125, "learning_rate": 0.000320190553466365, "loss": 0.1836, "step": 80202 }, { "epoch": 0.14220832125420774, "grad_norm": 0.330078125, "learning_rate": 0.00032016084017209514, "loss": 0.1863, "step": 80204 }, { "epoch": 0.14221186741951755, "grad_norm": 0.2041015625, "learning_rate": 0.0003201311302883885, "loss": 0.1174, "step": 80206 }, { "epoch": 0.14221541358482737, "grad_norm": 0.98828125, "learning_rate": 0.0003201014238153755, "loss": 0.1965, "step": 80208 }, { "epoch": 0.14221895975013718, "grad_norm": 0.76953125, "learning_rate": 0.00032007172075318604, "loss": 0.2203, "step": 80210 }, { "epoch": 0.142222505915447, "grad_norm": 0.67578125, "learning_rate": 0.0003200420211019497, "loss": 0.1777, "step": 80212 }, { "epoch": 0.1422260520807568, "grad_norm": 0.22265625, "learning_rate": 0.00032001232486179653, "loss": 0.211, "step": 80214 }, { "epoch": 0.14222959824606662, "grad_norm": 0.412109375, "learning_rate": 0.00031998263203285613, "loss": 0.1844, "step": 80216 }, { "epoch": 0.14223314441137647, "grad_norm": 0.3125, "learning_rate": 0.0003199529426152588, "loss": 0.1655, "step": 80218 }, { "epoch": 0.14223669057668628, "grad_norm": 0.373046875, "learning_rate": 0.00031992325660913434, "loss": 0.2211, "step": 80220 }, { "epoch": 0.1422402367419961, "grad_norm": 0.2041015625, "learning_rate": 0.00031989357401461234, "loss": 0.165, "step": 80222 }, { "epoch": 0.1422437829073059, "grad_norm": 0.2021484375, "learning_rate": 0.00031986389483182245, "loss": 0.1299, "step": 80224 }, { "epoch": 0.14224732907261572, "grad_norm": 0.349609375, "learning_rate": 0.00031983421906089504, "loss": 0.1779, "step": 80226 }, { "epoch": 0.14225087523792554, "grad_norm": 1.296875, "learning_rate": 0.0003198045467019595, "loss": 0.2055, "step": 80228 }, { "epoch": 0.14225442140323535, "grad_norm": 0.400390625, "learning_rate": 0.0003197748777551457, "loss": 0.1467, "step": 80230 }, { "epoch": 0.14225796756854517, "grad_norm": 0.97265625, "learning_rate": 0.00031974521222058347, "loss": 0.3512, "step": 80232 }, { "epoch": 0.14226151373385498, "grad_norm": 0.69140625, "learning_rate": 0.00031971555009840204, "loss": 0.2081, "step": 80234 }, { "epoch": 0.1422650598991648, "grad_norm": 0.47265625, "learning_rate": 0.00031968589138873214, "loss": 0.1437, "step": 80236 }, { "epoch": 0.1422686060644746, "grad_norm": 0.9765625, "learning_rate": 0.0003196562360917024, "loss": 0.3646, "step": 80238 }, { "epoch": 0.14227215222978443, "grad_norm": 0.3046875, "learning_rate": 0.00031962658420744326, "loss": 0.1862, "step": 80240 }, { "epoch": 0.14227569839509424, "grad_norm": 0.8359375, "learning_rate": 0.00031959693573608384, "loss": 0.1836, "step": 80242 }, { "epoch": 0.14227924456040406, "grad_norm": 0.2294921875, "learning_rate": 0.00031956729067775464, "loss": 0.1671, "step": 80244 }, { "epoch": 0.14228279072571387, "grad_norm": 0.30859375, "learning_rate": 0.0003195376490325843, "loss": 0.1867, "step": 80246 }, { "epoch": 0.14228633689102368, "grad_norm": 0.2109375, "learning_rate": 0.0003195080108007032, "loss": 0.1155, "step": 80248 }, { "epoch": 0.1422898830563335, "grad_norm": 2.609375, "learning_rate": 0.0003194783759822407, "loss": 0.2998, "step": 80250 }, { "epoch": 0.1422934292216433, "grad_norm": 0.2119140625, "learning_rate": 0.00031944874457732634, "loss": 0.1467, "step": 80252 }, { "epoch": 0.14229697538695313, "grad_norm": 0.75, "learning_rate": 0.00031941911658608983, "loss": 0.1548, "step": 80254 }, { "epoch": 0.14230052155226294, "grad_norm": 0.294921875, "learning_rate": 0.00031938949200866054, "loss": 0.1655, "step": 80256 }, { "epoch": 0.14230406771757276, "grad_norm": 1.375, "learning_rate": 0.00031935987084516813, "loss": 0.222, "step": 80258 }, { "epoch": 0.14230761388288257, "grad_norm": 0.6484375, "learning_rate": 0.0003193302530957425, "loss": 0.1801, "step": 80260 }, { "epoch": 0.1423111600481924, "grad_norm": 0.65234375, "learning_rate": 0.0003193006387605127, "loss": 0.2196, "step": 80262 }, { "epoch": 0.1423147062135022, "grad_norm": 1.59375, "learning_rate": 0.00031927102783960826, "loss": 0.2314, "step": 80264 }, { "epoch": 0.14231825237881202, "grad_norm": 0.7421875, "learning_rate": 0.000319241420333159, "loss": 0.1566, "step": 80266 }, { "epoch": 0.14232179854412183, "grad_norm": 0.236328125, "learning_rate": 0.00031921181624129435, "loss": 0.1648, "step": 80268 }, { "epoch": 0.14232534470943164, "grad_norm": 1.0546875, "learning_rate": 0.0003191822155641437, "loss": 0.1465, "step": 80270 }, { "epoch": 0.14232889087474146, "grad_norm": 0.2314453125, "learning_rate": 0.0003191526183018364, "loss": 0.1382, "step": 80272 }, { "epoch": 0.14233243704005127, "grad_norm": 0.28515625, "learning_rate": 0.0003191230244545018, "loss": 0.1596, "step": 80274 }, { "epoch": 0.1423359832053611, "grad_norm": 2.234375, "learning_rate": 0.00031909343402226973, "loss": 0.2762, "step": 80276 }, { "epoch": 0.1423395293706709, "grad_norm": 0.55078125, "learning_rate": 0.0003190638470052692, "loss": 0.2029, "step": 80278 }, { "epoch": 0.14234307553598072, "grad_norm": 0.5546875, "learning_rate": 0.00031903426340362996, "loss": 0.2883, "step": 80280 }, { "epoch": 0.14234662170129053, "grad_norm": 0.45703125, "learning_rate": 0.0003190046832174809, "loss": 0.1623, "step": 80282 }, { "epoch": 0.14235016786660035, "grad_norm": 0.43359375, "learning_rate": 0.0003189751064469522, "loss": 0.1829, "step": 80284 }, { "epoch": 0.14235371403191016, "grad_norm": 0.58203125, "learning_rate": 0.0003189455330921723, "loss": 0.1165, "step": 80286 }, { "epoch": 0.14235726019721998, "grad_norm": 0.7265625, "learning_rate": 0.0003189159631532711, "loss": 0.1461, "step": 80288 }, { "epoch": 0.1423608063625298, "grad_norm": 0.453125, "learning_rate": 0.00031888639663037784, "loss": 0.2247, "step": 80290 }, { "epoch": 0.1423643525278396, "grad_norm": 0.412109375, "learning_rate": 0.0003188568335236215, "loss": 0.1994, "step": 80292 }, { "epoch": 0.14236789869314942, "grad_norm": 2.390625, "learning_rate": 0.0003188272738331321, "loss": 0.2689, "step": 80294 }, { "epoch": 0.14237144485845923, "grad_norm": 0.75390625, "learning_rate": 0.00031879771755903796, "loss": 0.182, "step": 80296 }, { "epoch": 0.14237499102376905, "grad_norm": 0.87109375, "learning_rate": 0.0003187681647014691, "loss": 0.1516, "step": 80298 }, { "epoch": 0.14237853718907886, "grad_norm": 0.5, "learning_rate": 0.00031873861526055425, "loss": 0.1532, "step": 80300 }, { "epoch": 0.14238208335438868, "grad_norm": 0.578125, "learning_rate": 0.0003187090692364232, "loss": 0.1576, "step": 80302 }, { "epoch": 0.1423856295196985, "grad_norm": 1.0390625, "learning_rate": 0.0003186795266292046, "loss": 0.2134, "step": 80304 }, { "epoch": 0.1423891756850083, "grad_norm": 3.71875, "learning_rate": 0.00031864998743902806, "loss": 0.2931, "step": 80306 }, { "epoch": 0.14239272185031815, "grad_norm": 0.1982421875, "learning_rate": 0.00031862045166602254, "loss": 0.1256, "step": 80308 }, { "epoch": 0.14239626801562796, "grad_norm": 1.2890625, "learning_rate": 0.00031859091931031743, "loss": 0.2589, "step": 80310 }, { "epoch": 0.14239981418093778, "grad_norm": 0.8984375, "learning_rate": 0.0003185613903720417, "loss": 0.2207, "step": 80312 }, { "epoch": 0.1424033603462476, "grad_norm": 0.69921875, "learning_rate": 0.0003185318648513242, "loss": 0.1937, "step": 80314 }, { "epoch": 0.1424069065115574, "grad_norm": 0.83203125, "learning_rate": 0.0003185023427482947, "loss": 0.1975, "step": 80316 }, { "epoch": 0.14241045267686722, "grad_norm": 0.376953125, "learning_rate": 0.00031847282406308197, "loss": 0.1589, "step": 80318 }, { "epoch": 0.14241399884217704, "grad_norm": 0.498046875, "learning_rate": 0.00031844330879581516, "loss": 0.2019, "step": 80320 }, { "epoch": 0.14241754500748685, "grad_norm": 0.2734375, "learning_rate": 0.0003184137969466232, "loss": 0.1883, "step": 80322 }, { "epoch": 0.14242109117279667, "grad_norm": 0.4609375, "learning_rate": 0.0003183842885156354, "loss": 0.1836, "step": 80324 }, { "epoch": 0.14242463733810648, "grad_norm": 0.57421875, "learning_rate": 0.00031835478350298067, "loss": 0.2344, "step": 80326 }, { "epoch": 0.1424281835034163, "grad_norm": 0.279296875, "learning_rate": 0.00031832528190878825, "loss": 0.1985, "step": 80328 }, { "epoch": 0.1424317296687261, "grad_norm": 0.498046875, "learning_rate": 0.00031829578373318687, "loss": 0.1783, "step": 80330 }, { "epoch": 0.14243527583403592, "grad_norm": 0.68359375, "learning_rate": 0.0003182662889763054, "loss": 0.1579, "step": 80332 }, { "epoch": 0.14243882199934574, "grad_norm": 0.6953125, "learning_rate": 0.0003182367976382737, "loss": 0.1994, "step": 80334 }, { "epoch": 0.14244236816465555, "grad_norm": 0.31640625, "learning_rate": 0.0003182073097192196, "loss": 0.1545, "step": 80336 }, { "epoch": 0.14244591432996537, "grad_norm": 0.60546875, "learning_rate": 0.00031817782521927285, "loss": 0.2164, "step": 80338 }, { "epoch": 0.14244946049527518, "grad_norm": 0.5625, "learning_rate": 0.0003181483441385619, "loss": 0.172, "step": 80340 }, { "epoch": 0.142453006660585, "grad_norm": 0.1689453125, "learning_rate": 0.00031811886647721647, "loss": 0.1775, "step": 80342 }, { "epoch": 0.1424565528258948, "grad_norm": 0.33984375, "learning_rate": 0.0003180893922353644, "loss": 0.1777, "step": 80344 }, { "epoch": 0.14246009899120463, "grad_norm": 0.69140625, "learning_rate": 0.00031805992141313534, "loss": 0.2054, "step": 80346 }, { "epoch": 0.14246364515651444, "grad_norm": 0.3984375, "learning_rate": 0.0003180304540106581, "loss": 0.1756, "step": 80348 }, { "epoch": 0.14246719132182425, "grad_norm": 8.375, "learning_rate": 0.0003180009900280614, "loss": 0.2767, "step": 80350 }, { "epoch": 0.14247073748713407, "grad_norm": 0.85546875, "learning_rate": 0.000317971529465474, "loss": 0.229, "step": 80352 }, { "epoch": 0.14247428365244388, "grad_norm": 0.6328125, "learning_rate": 0.00031794207232302483, "loss": 0.2018, "step": 80354 }, { "epoch": 0.1424778298177537, "grad_norm": 0.2734375, "learning_rate": 0.00031791261860084297, "loss": 0.1957, "step": 80356 }, { "epoch": 0.1424813759830635, "grad_norm": 1.6328125, "learning_rate": 0.0003178831682990569, "loss": 0.2199, "step": 80358 }, { "epoch": 0.14248492214837333, "grad_norm": 0.75, "learning_rate": 0.0003178537214177956, "loss": 0.2194, "step": 80360 }, { "epoch": 0.14248846831368314, "grad_norm": 0.478515625, "learning_rate": 0.00031782427795718773, "loss": 0.2023, "step": 80362 }, { "epoch": 0.14249201447899296, "grad_norm": 0.72265625, "learning_rate": 0.0003177948379173621, "loss": 0.1952, "step": 80364 }, { "epoch": 0.14249556064430277, "grad_norm": 0.7734375, "learning_rate": 0.0003177654012984476, "loss": 0.1233, "step": 80366 }, { "epoch": 0.14249910680961259, "grad_norm": 0.31640625, "learning_rate": 0.0003177359681005729, "loss": 0.1474, "step": 80368 }, { "epoch": 0.1425026529749224, "grad_norm": 0.28515625, "learning_rate": 0.00031770653832386667, "loss": 0.1553, "step": 80370 }, { "epoch": 0.14250619914023221, "grad_norm": 0.375, "learning_rate": 0.0003176771119684574, "loss": 0.1724, "step": 80372 }, { "epoch": 0.14250974530554203, "grad_norm": 1.1796875, "learning_rate": 0.0003176476890344742, "loss": 0.2117, "step": 80374 }, { "epoch": 0.14251329147085184, "grad_norm": 0.25390625, "learning_rate": 0.00031761826952204556, "loss": 0.1529, "step": 80376 }, { "epoch": 0.14251683763616166, "grad_norm": 0.24609375, "learning_rate": 0.0003175888534313001, "loss": 0.1528, "step": 80378 }, { "epoch": 0.14252038380147147, "grad_norm": 1.015625, "learning_rate": 0.0003175594407623666, "loss": 0.2355, "step": 80380 }, { "epoch": 0.1425239299667813, "grad_norm": 0.2353515625, "learning_rate": 0.00031753003151537343, "loss": 0.2355, "step": 80382 }, { "epoch": 0.1425274761320911, "grad_norm": 1.71875, "learning_rate": 0.00031750062569044956, "loss": 0.3727, "step": 80384 }, { "epoch": 0.14253102229740092, "grad_norm": 0.375, "learning_rate": 0.00031747122328772343, "loss": 0.206, "step": 80386 }, { "epoch": 0.14253456846271073, "grad_norm": 0.66015625, "learning_rate": 0.0003174418243073236, "loss": 0.1756, "step": 80388 }, { "epoch": 0.14253811462802055, "grad_norm": 0.1513671875, "learning_rate": 0.00031741242874937844, "loss": 0.1746, "step": 80390 }, { "epoch": 0.14254166079333036, "grad_norm": 0.6484375, "learning_rate": 0.0003173830366140171, "loss": 0.2154, "step": 80392 }, { "epoch": 0.14254520695864017, "grad_norm": 0.349609375, "learning_rate": 0.0003173536479013675, "loss": 0.2182, "step": 80394 }, { "epoch": 0.14254875312395, "grad_norm": 0.470703125, "learning_rate": 0.00031732426261155854, "loss": 0.2119, "step": 80396 }, { "epoch": 0.1425522992892598, "grad_norm": 0.23046875, "learning_rate": 0.00031729488074471867, "loss": 0.2717, "step": 80398 }, { "epoch": 0.14255584545456965, "grad_norm": 0.26953125, "learning_rate": 0.00031726550230097626, "loss": 0.1551, "step": 80400 }, { "epoch": 0.14255939161987946, "grad_norm": 0.2236328125, "learning_rate": 0.0003172361272804599, "loss": 0.2009, "step": 80402 }, { "epoch": 0.14256293778518928, "grad_norm": 0.52734375, "learning_rate": 0.00031720675568329795, "loss": 0.1978, "step": 80404 }, { "epoch": 0.1425664839504991, "grad_norm": 1.6484375, "learning_rate": 0.0003171773875096191, "loss": 0.2247, "step": 80406 }, { "epoch": 0.1425700301158089, "grad_norm": 0.1943359375, "learning_rate": 0.0003171480227595516, "loss": 0.186, "step": 80408 }, { "epoch": 0.14257357628111872, "grad_norm": 0.55078125, "learning_rate": 0.00031711866143322386, "loss": 0.233, "step": 80410 }, { "epoch": 0.14257712244642853, "grad_norm": 0.328125, "learning_rate": 0.00031708930353076427, "loss": 0.1659, "step": 80412 }, { "epoch": 0.14258066861173835, "grad_norm": 0.2236328125, "learning_rate": 0.00031705994905230144, "loss": 0.2126, "step": 80414 }, { "epoch": 0.14258421477704816, "grad_norm": 0.796875, "learning_rate": 0.0003170305979979636, "loss": 0.5253, "step": 80416 }, { "epoch": 0.14258776094235798, "grad_norm": 0.33203125, "learning_rate": 0.00031700125036787907, "loss": 0.2625, "step": 80418 }, { "epoch": 0.1425913071076678, "grad_norm": 0.41015625, "learning_rate": 0.0003169719061621763, "loss": 0.1748, "step": 80420 }, { "epoch": 0.1425948532729776, "grad_norm": 0.498046875, "learning_rate": 0.00031694256538098337, "loss": 0.1349, "step": 80422 }, { "epoch": 0.14259839943828742, "grad_norm": 0.6640625, "learning_rate": 0.0003169132280244293, "loss": 0.2417, "step": 80424 }, { "epoch": 0.14260194560359724, "grad_norm": 0.314453125, "learning_rate": 0.0003168838940926414, "loss": 0.172, "step": 80426 }, { "epoch": 0.14260549176890705, "grad_norm": 0.384765625, "learning_rate": 0.00031685456358574864, "loss": 0.1511, "step": 80428 }, { "epoch": 0.14260903793421686, "grad_norm": 1.328125, "learning_rate": 0.00031682523650387894, "loss": 0.3143, "step": 80430 }, { "epoch": 0.14261258409952668, "grad_norm": 0.51953125, "learning_rate": 0.0003167959128471611, "loss": 0.155, "step": 80432 }, { "epoch": 0.1426161302648365, "grad_norm": 0.255859375, "learning_rate": 0.0003167665926157227, "loss": 0.1303, "step": 80434 }, { "epoch": 0.1426196764301463, "grad_norm": 0.4140625, "learning_rate": 0.00031673727580969235, "loss": 0.2771, "step": 80436 }, { "epoch": 0.14262322259545612, "grad_norm": 0.27734375, "learning_rate": 0.00031670796242919824, "loss": 0.1389, "step": 80438 }, { "epoch": 0.14262676876076594, "grad_norm": 0.37109375, "learning_rate": 0.00031667865247436857, "loss": 0.2415, "step": 80440 }, { "epoch": 0.14263031492607575, "grad_norm": 0.498046875, "learning_rate": 0.0003166493459453314, "loss": 0.1348, "step": 80442 }, { "epoch": 0.14263386109138557, "grad_norm": 0.337890625, "learning_rate": 0.0003166200428422147, "loss": 0.1884, "step": 80444 }, { "epoch": 0.14263740725669538, "grad_norm": 0.2080078125, "learning_rate": 0.00031659074316514726, "loss": 0.1527, "step": 80446 }, { "epoch": 0.1426409534220052, "grad_norm": 0.2314453125, "learning_rate": 0.0003165614469142568, "loss": 0.1692, "step": 80448 }, { "epoch": 0.142644499587315, "grad_norm": 2.078125, "learning_rate": 0.00031653215408967137, "loss": 0.2454, "step": 80450 }, { "epoch": 0.14264804575262482, "grad_norm": 0.5625, "learning_rate": 0.00031650286469151904, "loss": 0.2115, "step": 80452 }, { "epoch": 0.14265159191793464, "grad_norm": 1.90625, "learning_rate": 0.0003164735787199283, "loss": 0.2408, "step": 80454 }, { "epoch": 0.14265513808324445, "grad_norm": 0.314453125, "learning_rate": 0.0003164442961750271, "loss": 0.2068, "step": 80456 }, { "epoch": 0.14265868424855427, "grad_norm": 0.5078125, "learning_rate": 0.0003164150170569434, "loss": 0.147, "step": 80458 }, { "epoch": 0.14266223041386408, "grad_norm": 0.76953125, "learning_rate": 0.00031638574136580504, "loss": 0.3561, "step": 80460 }, { "epoch": 0.1426657765791739, "grad_norm": 0.212890625, "learning_rate": 0.00031635646910174026, "loss": 0.1774, "step": 80462 }, { "epoch": 0.1426693227444837, "grad_norm": 0.84375, "learning_rate": 0.0003163272002648772, "loss": 0.3703, "step": 80464 }, { "epoch": 0.14267286890979353, "grad_norm": 0.40234375, "learning_rate": 0.00031629793485534385, "loss": 0.176, "step": 80466 }, { "epoch": 0.14267641507510334, "grad_norm": 5.65625, "learning_rate": 0.00031626867287326804, "loss": 0.4054, "step": 80468 }, { "epoch": 0.14267996124041316, "grad_norm": 0.5546875, "learning_rate": 0.00031623941431877764, "loss": 0.1246, "step": 80470 }, { "epoch": 0.14268350740572297, "grad_norm": 1.46875, "learning_rate": 0.0003162101591920009, "loss": 0.2655, "step": 80472 }, { "epoch": 0.14268705357103278, "grad_norm": 0.404296875, "learning_rate": 0.0003161809074930657, "loss": 0.2476, "step": 80474 }, { "epoch": 0.1426905997363426, "grad_norm": 0.380859375, "learning_rate": 0.0003161516592221, "loss": 0.1818, "step": 80476 }, { "epoch": 0.1426941459016524, "grad_norm": 0.146484375, "learning_rate": 0.0003161224143792315, "loss": 0.1349, "step": 80478 }, { "epoch": 0.14269769206696223, "grad_norm": 0.388671875, "learning_rate": 0.00031609317296458806, "loss": 0.1936, "step": 80480 }, { "epoch": 0.14270123823227204, "grad_norm": 0.8515625, "learning_rate": 0.0003160639349782982, "loss": 0.1901, "step": 80482 }, { "epoch": 0.14270478439758186, "grad_norm": 0.451171875, "learning_rate": 0.00031603470042048884, "loss": 0.1509, "step": 80484 }, { "epoch": 0.14270833056289167, "grad_norm": 0.294921875, "learning_rate": 0.00031600546929128856, "loss": 0.2216, "step": 80486 }, { "epoch": 0.1427118767282015, "grad_norm": 0.8203125, "learning_rate": 0.00031597624159082475, "loss": 0.2409, "step": 80488 }, { "epoch": 0.14271542289351133, "grad_norm": 0.201171875, "learning_rate": 0.00031594701731922577, "loss": 0.1659, "step": 80490 }, { "epoch": 0.14271896905882114, "grad_norm": 1.25, "learning_rate": 0.0003159177964766188, "loss": 0.178, "step": 80492 }, { "epoch": 0.14272251522413096, "grad_norm": 0.333984375, "learning_rate": 0.0003158885790631322, "loss": 0.1421, "step": 80494 }, { "epoch": 0.14272606138944077, "grad_norm": 1.0390625, "learning_rate": 0.00031585936507889337, "loss": 0.2436, "step": 80496 }, { "epoch": 0.1427296075547506, "grad_norm": 0.318359375, "learning_rate": 0.00031583015452403025, "loss": 0.2116, "step": 80498 }, { "epoch": 0.1427331537200604, "grad_norm": 0.275390625, "learning_rate": 0.0003158009473986705, "loss": 0.1959, "step": 80500 }, { "epoch": 0.14273669988537022, "grad_norm": 1.1640625, "learning_rate": 0.0003157717437029417, "loss": 0.2096, "step": 80502 }, { "epoch": 0.14274024605068003, "grad_norm": 0.341796875, "learning_rate": 0.0003157425434369719, "loss": 0.1785, "step": 80504 }, { "epoch": 0.14274379221598985, "grad_norm": 1.15625, "learning_rate": 0.0003157133466008888, "loss": 0.1969, "step": 80506 }, { "epoch": 0.14274733838129966, "grad_norm": 0.7890625, "learning_rate": 0.0003156841531948198, "loss": 0.174, "step": 80508 }, { "epoch": 0.14275088454660947, "grad_norm": 0.384765625, "learning_rate": 0.0003156549632188926, "loss": 0.1578, "step": 80510 }, { "epoch": 0.1427544307119193, "grad_norm": 0.248046875, "learning_rate": 0.0003156257766732352, "loss": 0.1648, "step": 80512 }, { "epoch": 0.1427579768772291, "grad_norm": 1.4765625, "learning_rate": 0.0003155965935579751, "loss": 0.1547, "step": 80514 }, { "epoch": 0.14276152304253892, "grad_norm": 0.1904296875, "learning_rate": 0.0003155674138732398, "loss": 0.1632, "step": 80516 }, { "epoch": 0.14276506920784873, "grad_norm": 0.375, "learning_rate": 0.000315538237619157, "loss": 0.2027, "step": 80518 }, { "epoch": 0.14276861537315855, "grad_norm": 1.2578125, "learning_rate": 0.00031550906479585407, "loss": 0.3564, "step": 80520 }, { "epoch": 0.14277216153846836, "grad_norm": 0.26171875, "learning_rate": 0.00031547989540345913, "loss": 0.1733, "step": 80522 }, { "epoch": 0.14277570770377818, "grad_norm": 0.478515625, "learning_rate": 0.00031545072944209906, "loss": 0.1827, "step": 80524 }, { "epoch": 0.142779253869088, "grad_norm": 0.3359375, "learning_rate": 0.000315421566911902, "loss": 0.1615, "step": 80526 }, { "epoch": 0.1427828000343978, "grad_norm": 0.37890625, "learning_rate": 0.0003153924078129952, "loss": 0.1654, "step": 80528 }, { "epoch": 0.14278634619970762, "grad_norm": 0.275390625, "learning_rate": 0.0003153632521455065, "loss": 0.168, "step": 80530 }, { "epoch": 0.14278989236501743, "grad_norm": 0.453125, "learning_rate": 0.00031533409990956273, "loss": 0.2034, "step": 80532 }, { "epoch": 0.14279343853032725, "grad_norm": 0.49609375, "learning_rate": 0.000315304951105292, "loss": 0.2436, "step": 80534 }, { "epoch": 0.14279698469563706, "grad_norm": 0.33984375, "learning_rate": 0.0003152758057328218, "loss": 0.2168, "step": 80536 }, { "epoch": 0.14280053086094688, "grad_norm": 1.140625, "learning_rate": 0.00031524666379227926, "loss": 0.258, "step": 80538 }, { "epoch": 0.1428040770262567, "grad_norm": 0.55078125, "learning_rate": 0.000315217525283792, "loss": 0.1621, "step": 80540 }, { "epoch": 0.1428076231915665, "grad_norm": 0.640625, "learning_rate": 0.00031518839020748724, "loss": 0.3838, "step": 80542 }, { "epoch": 0.14281116935687632, "grad_norm": 0.375, "learning_rate": 0.00031515925856349274, "loss": 0.1268, "step": 80544 }, { "epoch": 0.14281471552218614, "grad_norm": 0.484375, "learning_rate": 0.0003151301303519356, "loss": 0.1377, "step": 80546 }, { "epoch": 0.14281826168749595, "grad_norm": 0.380859375, "learning_rate": 0.0003151010055729438, "loss": 0.1938, "step": 80548 }, { "epoch": 0.14282180785280577, "grad_norm": 0.37109375, "learning_rate": 0.0003150718842266438, "loss": 0.2036, "step": 80550 }, { "epoch": 0.14282535401811558, "grad_norm": 0.28515625, "learning_rate": 0.0003150427663131637, "loss": 0.1501, "step": 80552 }, { "epoch": 0.1428289001834254, "grad_norm": 0.55859375, "learning_rate": 0.00031501365183263057, "loss": 0.2077, "step": 80554 }, { "epoch": 0.1428324463487352, "grad_norm": 0.62890625, "learning_rate": 0.0003149845407851717, "loss": 0.3214, "step": 80556 }, { "epoch": 0.14283599251404502, "grad_norm": 0.267578125, "learning_rate": 0.00031495543317091457, "loss": 0.1599, "step": 80558 }, { "epoch": 0.14283953867935484, "grad_norm": 0.27734375, "learning_rate": 0.00031492632898998606, "loss": 0.2613, "step": 80560 }, { "epoch": 0.14284308484466465, "grad_norm": 0.380859375, "learning_rate": 0.00031489722824251404, "loss": 0.2546, "step": 80562 }, { "epoch": 0.14284663100997447, "grad_norm": 0.92578125, "learning_rate": 0.00031486813092862536, "loss": 0.1634, "step": 80564 }, { "epoch": 0.14285017717528428, "grad_norm": 0.6875, "learning_rate": 0.00031483903704844756, "loss": 0.1807, "step": 80566 }, { "epoch": 0.1428537233405941, "grad_norm": 0.8828125, "learning_rate": 0.00031480994660210767, "loss": 0.1991, "step": 80568 }, { "epoch": 0.1428572695059039, "grad_norm": 0.4609375, "learning_rate": 0.00031478085958973285, "loss": 0.1825, "step": 80570 }, { "epoch": 0.14286081567121373, "grad_norm": 0.41015625, "learning_rate": 0.0003147517760114508, "loss": 0.2275, "step": 80572 }, { "epoch": 0.14286436183652354, "grad_norm": 0.28125, "learning_rate": 0.0003147226958673879, "loss": 0.3665, "step": 80574 }, { "epoch": 0.14286790800183335, "grad_norm": 0.80078125, "learning_rate": 0.000314693619157672, "loss": 0.2443, "step": 80576 }, { "epoch": 0.14287145416714317, "grad_norm": 0.25, "learning_rate": 0.00031466454588243, "loss": 0.1577, "step": 80578 }, { "epoch": 0.142875000332453, "grad_norm": 0.30078125, "learning_rate": 0.00031463547604178933, "loss": 0.1769, "step": 80580 }, { "epoch": 0.14287854649776283, "grad_norm": 0.71484375, "learning_rate": 0.00031460640963587653, "loss": 0.1877, "step": 80582 }, { "epoch": 0.14288209266307264, "grad_norm": 0.1806640625, "learning_rate": 0.0003145773466648193, "loss": 0.1806, "step": 80584 }, { "epoch": 0.14288563882838246, "grad_norm": 0.31640625, "learning_rate": 0.0003145482871287445, "loss": 0.229, "step": 80586 }, { "epoch": 0.14288918499369227, "grad_norm": 3.484375, "learning_rate": 0.00031451923102777924, "loss": 0.474, "step": 80588 }, { "epoch": 0.14289273115900208, "grad_norm": 0.205078125, "learning_rate": 0.00031449017836205067, "loss": 0.2004, "step": 80590 }, { "epoch": 0.1428962773243119, "grad_norm": 0.3828125, "learning_rate": 0.0003144611291316855, "loss": 0.17, "step": 80592 }, { "epoch": 0.1428998234896217, "grad_norm": 0.66015625, "learning_rate": 0.0003144320833368113, "loss": 0.2208, "step": 80594 }, { "epoch": 0.14290336965493153, "grad_norm": 0.279296875, "learning_rate": 0.00031440304097755495, "loss": 0.1695, "step": 80596 }, { "epoch": 0.14290691582024134, "grad_norm": 0.33984375, "learning_rate": 0.00031437400205404326, "loss": 0.1603, "step": 80598 }, { "epoch": 0.14291046198555116, "grad_norm": 0.90234375, "learning_rate": 0.0003143449665664031, "loss": 0.1727, "step": 80600 }, { "epoch": 0.14291400815086097, "grad_norm": 0.396484375, "learning_rate": 0.000314315934514762, "loss": 0.1369, "step": 80602 }, { "epoch": 0.1429175543161708, "grad_norm": 0.64453125, "learning_rate": 0.00031428690589924656, "loss": 0.1544, "step": 80604 }, { "epoch": 0.1429211004814806, "grad_norm": 0.40625, "learning_rate": 0.0003142578807199838, "loss": 0.2032, "step": 80606 }, { "epoch": 0.14292464664679042, "grad_norm": 0.1826171875, "learning_rate": 0.00031422885897710066, "loss": 0.1651, "step": 80608 }, { "epoch": 0.14292819281210023, "grad_norm": 0.2001953125, "learning_rate": 0.0003141998406707239, "loss": 0.1831, "step": 80610 }, { "epoch": 0.14293173897741004, "grad_norm": 0.2373046875, "learning_rate": 0.000314170825800981, "loss": 0.1699, "step": 80612 }, { "epoch": 0.14293528514271986, "grad_norm": 0.8125, "learning_rate": 0.00031414181436799794, "loss": 0.1768, "step": 80614 }, { "epoch": 0.14293883130802967, "grad_norm": 2.796875, "learning_rate": 0.0003141128063719023, "loss": 0.2459, "step": 80616 }, { "epoch": 0.1429423774733395, "grad_norm": 0.609375, "learning_rate": 0.00031408380181282067, "loss": 0.1939, "step": 80618 }, { "epoch": 0.1429459236386493, "grad_norm": 0.2216796875, "learning_rate": 0.0003140548006908803, "loss": 0.1835, "step": 80620 }, { "epoch": 0.14294946980395912, "grad_norm": 0.37890625, "learning_rate": 0.0003140258030062074, "loss": 0.1925, "step": 80622 }, { "epoch": 0.14295301596926893, "grad_norm": 1.2421875, "learning_rate": 0.00031399680875892916, "loss": 0.304, "step": 80624 }, { "epoch": 0.14295656213457875, "grad_norm": 0.61328125, "learning_rate": 0.0003139678179491723, "loss": 0.2118, "step": 80626 }, { "epoch": 0.14296010829988856, "grad_norm": 0.380859375, "learning_rate": 0.00031393883057706355, "loss": 0.1839, "step": 80628 }, { "epoch": 0.14296365446519838, "grad_norm": 0.3828125, "learning_rate": 0.00031390984664273006, "loss": 0.1758, "step": 80630 }, { "epoch": 0.1429672006305082, "grad_norm": 2.21875, "learning_rate": 0.00031388086614629796, "loss": 0.3667, "step": 80632 }, { "epoch": 0.142970746795818, "grad_norm": 0.7109375, "learning_rate": 0.0003138518890878945, "loss": 0.2058, "step": 80634 }, { "epoch": 0.14297429296112782, "grad_norm": 0.2451171875, "learning_rate": 0.000313822915467646, "loss": 0.1325, "step": 80636 }, { "epoch": 0.14297783912643763, "grad_norm": 0.3828125, "learning_rate": 0.00031379394528567974, "loss": 0.1832, "step": 80638 }, { "epoch": 0.14298138529174745, "grad_norm": 0.345703125, "learning_rate": 0.0003137649785421217, "loss": 0.1746, "step": 80640 }, { "epoch": 0.14298493145705726, "grad_norm": 0.318359375, "learning_rate": 0.00031373601523709924, "loss": 0.1601, "step": 80642 }, { "epoch": 0.14298847762236708, "grad_norm": 0.296875, "learning_rate": 0.00031370705537073874, "loss": 0.133, "step": 80644 }, { "epoch": 0.1429920237876769, "grad_norm": 0.64453125, "learning_rate": 0.00031367809894316666, "loss": 0.213, "step": 80646 }, { "epoch": 0.1429955699529867, "grad_norm": 0.392578125, "learning_rate": 0.00031364914595451, "loss": 0.1587, "step": 80648 }, { "epoch": 0.14299911611829652, "grad_norm": 0.375, "learning_rate": 0.00031362019640489503, "loss": 0.168, "step": 80650 }, { "epoch": 0.14300266228360634, "grad_norm": 0.48828125, "learning_rate": 0.00031359125029444863, "loss": 0.2128, "step": 80652 }, { "epoch": 0.14300620844891615, "grad_norm": 0.3359375, "learning_rate": 0.0003135623076232973, "loss": 0.17, "step": 80654 }, { "epoch": 0.14300975461422596, "grad_norm": 0.298828125, "learning_rate": 0.00031353336839156775, "loss": 0.1988, "step": 80656 }, { "epoch": 0.14301330077953578, "grad_norm": 0.50390625, "learning_rate": 0.0003135044325993861, "loss": 0.2316, "step": 80658 }, { "epoch": 0.1430168469448456, "grad_norm": 0.369140625, "learning_rate": 0.0003134755002468794, "loss": 0.1415, "step": 80660 }, { "epoch": 0.1430203931101554, "grad_norm": 2.390625, "learning_rate": 0.0003134465713341741, "loss": 0.3844, "step": 80662 }, { "epoch": 0.14302393927546522, "grad_norm": 1.4140625, "learning_rate": 0.0003134176458613965, "loss": 0.1952, "step": 80664 }, { "epoch": 0.14302748544077504, "grad_norm": 0.482421875, "learning_rate": 0.0003133887238286732, "loss": 0.2105, "step": 80666 }, { "epoch": 0.14303103160608485, "grad_norm": 0.193359375, "learning_rate": 0.00031335980523613063, "loss": 0.1765, "step": 80668 }, { "epoch": 0.14303457777139467, "grad_norm": 0.59375, "learning_rate": 0.00031333089008389565, "loss": 0.2126, "step": 80670 }, { "epoch": 0.1430381239367045, "grad_norm": 0.33984375, "learning_rate": 0.000313301978372094, "loss": 0.1858, "step": 80672 }, { "epoch": 0.14304167010201432, "grad_norm": 0.41796875, "learning_rate": 0.00031327307010085273, "loss": 0.1662, "step": 80674 }, { "epoch": 0.14304521626732414, "grad_norm": 0.259765625, "learning_rate": 0.000313244165270298, "loss": 0.1738, "step": 80676 }, { "epoch": 0.14304876243263395, "grad_norm": 0.310546875, "learning_rate": 0.00031321526388055655, "loss": 0.1328, "step": 80678 }, { "epoch": 0.14305230859794377, "grad_norm": 0.41015625, "learning_rate": 0.0003131863659317541, "loss": 0.1462, "step": 80680 }, { "epoch": 0.14305585476325358, "grad_norm": 0.82421875, "learning_rate": 0.0003131574714240176, "loss": 0.1378, "step": 80682 }, { "epoch": 0.1430594009285634, "grad_norm": 0.439453125, "learning_rate": 0.0003131285803574734, "loss": 0.2521, "step": 80684 }, { "epoch": 0.1430629470938732, "grad_norm": 3.6875, "learning_rate": 0.0003130996927322477, "loss": 0.2138, "step": 80686 }, { "epoch": 0.14306649325918303, "grad_norm": 0.263671875, "learning_rate": 0.0003130708085484668, "loss": 0.1349, "step": 80688 }, { "epoch": 0.14307003942449284, "grad_norm": 0.3515625, "learning_rate": 0.000313041927806257, "loss": 0.1817, "step": 80690 }, { "epoch": 0.14307358558980265, "grad_norm": 1.1953125, "learning_rate": 0.0003130130505057448, "loss": 0.1993, "step": 80692 }, { "epoch": 0.14307713175511247, "grad_norm": 0.412109375, "learning_rate": 0.00031298417664705634, "loss": 0.1964, "step": 80694 }, { "epoch": 0.14308067792042228, "grad_norm": 0.53515625, "learning_rate": 0.000312955306230318, "loss": 0.2192, "step": 80696 }, { "epoch": 0.1430842240857321, "grad_norm": 0.23046875, "learning_rate": 0.00031292643925565577, "loss": 0.1494, "step": 80698 }, { "epoch": 0.1430877702510419, "grad_norm": 0.5703125, "learning_rate": 0.0003128975757231962, "loss": 0.166, "step": 80700 }, { "epoch": 0.14309131641635173, "grad_norm": 0.3203125, "learning_rate": 0.0003128687156330656, "loss": 0.1801, "step": 80702 }, { "epoch": 0.14309486258166154, "grad_norm": 0.60546875, "learning_rate": 0.00031283985898538996, "loss": 0.1284, "step": 80704 }, { "epoch": 0.14309840874697136, "grad_norm": 0.76171875, "learning_rate": 0.00031281100578029554, "loss": 0.1737, "step": 80706 }, { "epoch": 0.14310195491228117, "grad_norm": 0.236328125, "learning_rate": 0.0003127821560179084, "loss": 0.1684, "step": 80708 }, { "epoch": 0.14310550107759099, "grad_norm": 0.3359375, "learning_rate": 0.0003127533096983549, "loss": 0.1589, "step": 80710 }, { "epoch": 0.1431090472429008, "grad_norm": 0.5859375, "learning_rate": 0.00031272446682176116, "loss": 0.1792, "step": 80712 }, { "epoch": 0.14311259340821061, "grad_norm": 0.251953125, "learning_rate": 0.00031269562738825337, "loss": 0.1956, "step": 80714 }, { "epoch": 0.14311613957352043, "grad_norm": 0.259765625, "learning_rate": 0.0003126667913979573, "loss": 0.1693, "step": 80716 }, { "epoch": 0.14311968573883024, "grad_norm": 0.1953125, "learning_rate": 0.0003126379588509996, "loss": 0.2195, "step": 80718 }, { "epoch": 0.14312323190414006, "grad_norm": 1.7265625, "learning_rate": 0.00031260912974750613, "loss": 0.2346, "step": 80720 }, { "epoch": 0.14312677806944987, "grad_norm": 0.2109375, "learning_rate": 0.00031258030408760294, "loss": 0.1744, "step": 80722 }, { "epoch": 0.1431303242347597, "grad_norm": 0.421875, "learning_rate": 0.00031255148187141603, "loss": 0.1945, "step": 80724 }, { "epoch": 0.1431338704000695, "grad_norm": 1.0078125, "learning_rate": 0.0003125226630990714, "loss": 0.3029, "step": 80726 }, { "epoch": 0.14313741656537932, "grad_norm": 0.3671875, "learning_rate": 0.0003124938477706956, "loss": 0.0997, "step": 80728 }, { "epoch": 0.14314096273068913, "grad_norm": 0.5859375, "learning_rate": 0.00031246503588641396, "loss": 0.1865, "step": 80730 }, { "epoch": 0.14314450889599895, "grad_norm": 0.4609375, "learning_rate": 0.00031243622744635296, "loss": 0.2386, "step": 80732 }, { "epoch": 0.14314805506130876, "grad_norm": 1.0234375, "learning_rate": 0.00031240742245063833, "loss": 0.2037, "step": 80734 }, { "epoch": 0.14315160122661857, "grad_norm": 0.53125, "learning_rate": 0.00031237862089939633, "loss": 0.1431, "step": 80736 }, { "epoch": 0.1431551473919284, "grad_norm": 0.72265625, "learning_rate": 0.00031234982279275256, "loss": 0.1747, "step": 80738 }, { "epoch": 0.1431586935572382, "grad_norm": 1.015625, "learning_rate": 0.000312321028130833, "loss": 0.296, "step": 80740 }, { "epoch": 0.14316223972254802, "grad_norm": 0.375, "learning_rate": 0.000312292236913764, "loss": 0.192, "step": 80742 }, { "epoch": 0.14316578588785783, "grad_norm": 0.337890625, "learning_rate": 0.00031226344914167114, "loss": 0.1372, "step": 80744 }, { "epoch": 0.14316933205316765, "grad_norm": 0.244140625, "learning_rate": 0.0003122346648146804, "loss": 0.1362, "step": 80746 }, { "epoch": 0.14317287821847746, "grad_norm": 0.291015625, "learning_rate": 0.0003122058839329174, "loss": 0.1925, "step": 80748 }, { "epoch": 0.14317642438378728, "grad_norm": 0.50390625, "learning_rate": 0.00031217710649650857, "loss": 0.1653, "step": 80750 }, { "epoch": 0.1431799705490971, "grad_norm": 0.419921875, "learning_rate": 0.00031214833250557927, "loss": 0.147, "step": 80752 }, { "epoch": 0.1431835167144069, "grad_norm": 0.421875, "learning_rate": 0.00031211956196025564, "loss": 0.1553, "step": 80754 }, { "epoch": 0.14318706287971672, "grad_norm": 0.59375, "learning_rate": 0.00031209079486066335, "loss": 0.2044, "step": 80756 }, { "epoch": 0.14319060904502653, "grad_norm": 0.376953125, "learning_rate": 0.0003120620312069281, "loss": 0.2326, "step": 80758 }, { "epoch": 0.14319415521033635, "grad_norm": 1.0390625, "learning_rate": 0.00031203327099917614, "loss": 0.2149, "step": 80760 }, { "epoch": 0.1431977013756462, "grad_norm": 0.380859375, "learning_rate": 0.0003120045142375327, "loss": 0.1853, "step": 80762 }, { "epoch": 0.143201247540956, "grad_norm": 3.203125, "learning_rate": 0.0003119757609221239, "loss": 0.3038, "step": 80764 }, { "epoch": 0.14320479370626582, "grad_norm": 0.244140625, "learning_rate": 0.00031194701105307526, "loss": 0.1463, "step": 80766 }, { "epoch": 0.14320833987157564, "grad_norm": 0.439453125, "learning_rate": 0.00031191826463051293, "loss": 0.2411, "step": 80768 }, { "epoch": 0.14321188603688545, "grad_norm": 0.82421875, "learning_rate": 0.000311889521654562, "loss": 0.2473, "step": 80770 }, { "epoch": 0.14321543220219526, "grad_norm": 0.53515625, "learning_rate": 0.00031186078212534877, "loss": 0.1855, "step": 80772 }, { "epoch": 0.14321897836750508, "grad_norm": 0.65625, "learning_rate": 0.00031183204604299847, "loss": 0.2014, "step": 80774 }, { "epoch": 0.1432225245328149, "grad_norm": 1.515625, "learning_rate": 0.0003118033134076371, "loss": 0.2813, "step": 80776 }, { "epoch": 0.1432260706981247, "grad_norm": 0.310546875, "learning_rate": 0.0003117745842193902, "loss": 0.1378, "step": 80778 }, { "epoch": 0.14322961686343452, "grad_norm": 0.328125, "learning_rate": 0.00031174585847838326, "loss": 0.1256, "step": 80780 }, { "epoch": 0.14323316302874434, "grad_norm": 0.263671875, "learning_rate": 0.0003117171361847422, "loss": 0.1674, "step": 80782 }, { "epoch": 0.14323670919405415, "grad_norm": 0.37109375, "learning_rate": 0.00031168841733859246, "loss": 0.1185, "step": 80784 }, { "epoch": 0.14324025535936397, "grad_norm": 0.3515625, "learning_rate": 0.0003116597019400598, "loss": 0.1711, "step": 80786 }, { "epoch": 0.14324380152467378, "grad_norm": 2.984375, "learning_rate": 0.00031163098998926935, "loss": 0.2759, "step": 80788 }, { "epoch": 0.1432473476899836, "grad_norm": 0.28515625, "learning_rate": 0.0003116022814863474, "loss": 0.1783, "step": 80790 }, { "epoch": 0.1432508938552934, "grad_norm": 0.9453125, "learning_rate": 0.0003115735764314189, "loss": 0.1504, "step": 80792 }, { "epoch": 0.14325444002060322, "grad_norm": 0.392578125, "learning_rate": 0.0003115448748246097, "loss": 0.4599, "step": 80794 }, { "epoch": 0.14325798618591304, "grad_norm": 0.8125, "learning_rate": 0.00031151617666604525, "loss": 0.1874, "step": 80796 }, { "epoch": 0.14326153235122285, "grad_norm": 0.3046875, "learning_rate": 0.0003114874819558508, "loss": 0.1791, "step": 80798 }, { "epoch": 0.14326507851653267, "grad_norm": 0.259765625, "learning_rate": 0.0003114587906941523, "loss": 0.2025, "step": 80800 }, { "epoch": 0.14326862468184248, "grad_norm": 0.35546875, "learning_rate": 0.0003114301028810751, "loss": 0.1805, "step": 80802 }, { "epoch": 0.1432721708471523, "grad_norm": 1.5390625, "learning_rate": 0.00031140141851674437, "loss": 0.2586, "step": 80804 }, { "epoch": 0.1432757170124621, "grad_norm": 2.953125, "learning_rate": 0.0003113727376012857, "loss": 0.2016, "step": 80806 }, { "epoch": 0.14327926317777193, "grad_norm": 1.90625, "learning_rate": 0.00031134406013482477, "loss": 0.1629, "step": 80808 }, { "epoch": 0.14328280934308174, "grad_norm": 0.15625, "learning_rate": 0.0003113153861174867, "loss": 0.1412, "step": 80810 }, { "epoch": 0.14328635550839156, "grad_norm": 0.66796875, "learning_rate": 0.0003112867155493972, "loss": 0.1447, "step": 80812 }, { "epoch": 0.14328990167370137, "grad_norm": 0.94140625, "learning_rate": 0.00031125804843068136, "loss": 0.2192, "step": 80814 }, { "epoch": 0.14329344783901118, "grad_norm": 0.2734375, "learning_rate": 0.00031122938476146444, "loss": 0.202, "step": 80816 }, { "epoch": 0.143296994004321, "grad_norm": 0.28515625, "learning_rate": 0.0003112007245418725, "loss": 0.2552, "step": 80818 }, { "epoch": 0.1433005401696308, "grad_norm": 0.259765625, "learning_rate": 0.00031117206777203, "loss": 0.1419, "step": 80820 }, { "epoch": 0.14330408633494063, "grad_norm": 1.4140625, "learning_rate": 0.0003111434144520628, "loss": 0.2746, "step": 80822 }, { "epoch": 0.14330763250025044, "grad_norm": 0.294921875, "learning_rate": 0.00031111476458209585, "loss": 0.2015, "step": 80824 }, { "epoch": 0.14331117866556026, "grad_norm": 0.328125, "learning_rate": 0.00031108611816225523, "loss": 0.1939, "step": 80826 }, { "epoch": 0.14331472483087007, "grad_norm": 1.6953125, "learning_rate": 0.0003110574751926652, "loss": 0.3838, "step": 80828 }, { "epoch": 0.1433182709961799, "grad_norm": 0.27734375, "learning_rate": 0.00031102883567345164, "loss": 0.1443, "step": 80830 }, { "epoch": 0.1433218171614897, "grad_norm": 0.1982421875, "learning_rate": 0.0003110001996047397, "loss": 0.1577, "step": 80832 }, { "epoch": 0.14332536332679952, "grad_norm": 0.62109375, "learning_rate": 0.0003109715669866546, "loss": 0.2245, "step": 80834 }, { "epoch": 0.14332890949210933, "grad_norm": 0.2578125, "learning_rate": 0.0003109429378193215, "loss": 0.1712, "step": 80836 }, { "epoch": 0.14333245565741914, "grad_norm": 0.7421875, "learning_rate": 0.0003109143121028655, "loss": 0.1875, "step": 80838 }, { "epoch": 0.14333600182272896, "grad_norm": 0.240234375, "learning_rate": 0.00031088568983741213, "loss": 0.1828, "step": 80840 }, { "epoch": 0.14333954798803877, "grad_norm": 0.41796875, "learning_rate": 0.00031085707102308633, "loss": 0.1506, "step": 80842 }, { "epoch": 0.1433430941533486, "grad_norm": 0.24609375, "learning_rate": 0.0003108284556600134, "loss": 0.1484, "step": 80844 }, { "epoch": 0.1433466403186584, "grad_norm": 2.359375, "learning_rate": 0.0003107998437483182, "loss": 0.2253, "step": 80846 }, { "epoch": 0.14335018648396822, "grad_norm": 0.78125, "learning_rate": 0.00031077123528812616, "loss": 0.1756, "step": 80848 }, { "epoch": 0.14335373264927803, "grad_norm": 1.5546875, "learning_rate": 0.00031074263027956246, "loss": 0.2796, "step": 80850 }, { "epoch": 0.14335727881458787, "grad_norm": 1.1328125, "learning_rate": 0.0003107140287227519, "loss": 0.2231, "step": 80852 }, { "epoch": 0.1433608249798977, "grad_norm": 0.66796875, "learning_rate": 0.0003106854306178197, "loss": 0.2058, "step": 80854 }, { "epoch": 0.1433643711452075, "grad_norm": 1.9765625, "learning_rate": 0.00031065683596489087, "loss": 0.2409, "step": 80856 }, { "epoch": 0.14336791731051732, "grad_norm": 1.6328125, "learning_rate": 0.00031062824476409074, "loss": 0.5672, "step": 80858 }, { "epoch": 0.14337146347582713, "grad_norm": 0.2451171875, "learning_rate": 0.0003105996570155439, "loss": 0.1349, "step": 80860 }, { "epoch": 0.14337500964113695, "grad_norm": 0.90234375, "learning_rate": 0.0003105710727193758, "loss": 0.1732, "step": 80862 }, { "epoch": 0.14337855580644676, "grad_norm": 0.55078125, "learning_rate": 0.00031054249187571103, "loss": 0.2095, "step": 80864 }, { "epoch": 0.14338210197175658, "grad_norm": 0.68359375, "learning_rate": 0.00031051391448467527, "loss": 0.1884, "step": 80866 }, { "epoch": 0.1433856481370664, "grad_norm": 0.35546875, "learning_rate": 0.0003104853405463926, "loss": 0.217, "step": 80868 }, { "epoch": 0.1433891943023762, "grad_norm": 0.1845703125, "learning_rate": 0.00031045677006098874, "loss": 0.1435, "step": 80870 }, { "epoch": 0.14339274046768602, "grad_norm": 0.224609375, "learning_rate": 0.00031042820302858826, "loss": 0.1773, "step": 80872 }, { "epoch": 0.14339628663299583, "grad_norm": 0.31640625, "learning_rate": 0.00031039963944931625, "loss": 0.21, "step": 80874 }, { "epoch": 0.14339983279830565, "grad_norm": 2.0, "learning_rate": 0.0003103710793232975, "loss": 0.2616, "step": 80876 }, { "epoch": 0.14340337896361546, "grad_norm": 0.6484375, "learning_rate": 0.0003103425226506568, "loss": 0.1921, "step": 80878 }, { "epoch": 0.14340692512892528, "grad_norm": 0.283203125, "learning_rate": 0.0003103139694315195, "loss": 0.1417, "step": 80880 }, { "epoch": 0.1434104712942351, "grad_norm": 2.140625, "learning_rate": 0.0003102854196660099, "loss": 0.1994, "step": 80882 }, { "epoch": 0.1434140174595449, "grad_norm": 0.71875, "learning_rate": 0.0003102568733542535, "loss": 0.2114, "step": 80884 }, { "epoch": 0.14341756362485472, "grad_norm": 0.251953125, "learning_rate": 0.0003102283304963745, "loss": 0.1643, "step": 80886 }, { "epoch": 0.14342110979016454, "grad_norm": 0.5234375, "learning_rate": 0.00031019979109249814, "loss": 0.184, "step": 80888 }, { "epoch": 0.14342465595547435, "grad_norm": 0.578125, "learning_rate": 0.0003101712551427492, "loss": 0.175, "step": 80890 }, { "epoch": 0.14342820212078417, "grad_norm": 0.306640625, "learning_rate": 0.0003101427226472524, "loss": 0.1647, "step": 80892 }, { "epoch": 0.14343174828609398, "grad_norm": 0.6328125, "learning_rate": 0.00031011419360613254, "loss": 0.1911, "step": 80894 }, { "epoch": 0.1434352944514038, "grad_norm": 0.703125, "learning_rate": 0.0003100856680195141, "loss": 0.2249, "step": 80896 }, { "epoch": 0.1434388406167136, "grad_norm": 0.83203125, "learning_rate": 0.00031005714588752236, "loss": 0.1692, "step": 80898 }, { "epoch": 0.14344238678202342, "grad_norm": 0.2080078125, "learning_rate": 0.0003100286272102818, "loss": 0.1566, "step": 80900 }, { "epoch": 0.14344593294733324, "grad_norm": 0.373046875, "learning_rate": 0.00031000011198791715, "loss": 0.1549, "step": 80902 }, { "epoch": 0.14344947911264305, "grad_norm": 0.4140625, "learning_rate": 0.000309971600220553, "loss": 0.1915, "step": 80904 }, { "epoch": 0.14345302527795287, "grad_norm": 0.34375, "learning_rate": 0.0003099430919083143, "loss": 0.1748, "step": 80906 }, { "epoch": 0.14345657144326268, "grad_norm": 0.7890625, "learning_rate": 0.00030991458705132563, "loss": 0.1776, "step": 80908 }, { "epoch": 0.1434601176085725, "grad_norm": 1.6015625, "learning_rate": 0.0003098860856497115, "loss": 0.2672, "step": 80910 }, { "epoch": 0.1434636637738823, "grad_norm": 0.2109375, "learning_rate": 0.0003098575877035968, "loss": 0.1374, "step": 80912 }, { "epoch": 0.14346720993919213, "grad_norm": 0.65234375, "learning_rate": 0.0003098290932131058, "loss": 0.2039, "step": 80914 }, { "epoch": 0.14347075610450194, "grad_norm": 0.2734375, "learning_rate": 0.0003098006021783638, "loss": 0.1677, "step": 80916 }, { "epoch": 0.14347430226981175, "grad_norm": 0.345703125, "learning_rate": 0.0003097721145994945, "loss": 0.1548, "step": 80918 }, { "epoch": 0.14347784843512157, "grad_norm": 0.2275390625, "learning_rate": 0.00030974363047662324, "loss": 0.1572, "step": 80920 }, { "epoch": 0.14348139460043138, "grad_norm": 0.59765625, "learning_rate": 0.00030971514980987426, "loss": 0.228, "step": 80922 }, { "epoch": 0.1434849407657412, "grad_norm": 0.7578125, "learning_rate": 0.0003096866725993722, "loss": 0.2268, "step": 80924 }, { "epoch": 0.143488486931051, "grad_norm": 0.34375, "learning_rate": 0.00030965819884524147, "loss": 0.1861, "step": 80926 }, { "epoch": 0.14349203309636083, "grad_norm": 0.2890625, "learning_rate": 0.00030962972854760653, "loss": 0.184, "step": 80928 }, { "epoch": 0.14349557926167064, "grad_norm": 0.439453125, "learning_rate": 0.00030960126170659233, "loss": 0.3377, "step": 80930 }, { "epoch": 0.14349912542698046, "grad_norm": 0.89453125, "learning_rate": 0.000309572798322323, "loss": 0.1643, "step": 80932 }, { "epoch": 0.14350267159229027, "grad_norm": 0.451171875, "learning_rate": 0.0003095443383949231, "loss": 0.133, "step": 80934 }, { "epoch": 0.14350621775760009, "grad_norm": 0.42578125, "learning_rate": 0.000309515881924517, "loss": 0.1375, "step": 80936 }, { "epoch": 0.1435097639229099, "grad_norm": 0.255859375, "learning_rate": 0.0003094874289112294, "loss": 0.1996, "step": 80938 }, { "epoch": 0.14351331008821971, "grad_norm": 0.173828125, "learning_rate": 0.00030945897935518465, "loss": 0.1351, "step": 80940 }, { "epoch": 0.14351685625352953, "grad_norm": 0.318359375, "learning_rate": 0.00030943053325650706, "loss": 0.1898, "step": 80942 }, { "epoch": 0.14352040241883937, "grad_norm": 1.421875, "learning_rate": 0.00030940209061532114, "loss": 0.1775, "step": 80944 }, { "epoch": 0.14352394858414919, "grad_norm": 0.12890625, "learning_rate": 0.00030937365143175104, "loss": 0.161, "step": 80946 }, { "epoch": 0.143527494749459, "grad_norm": 0.2431640625, "learning_rate": 0.00030934521570592175, "loss": 0.1257, "step": 80948 }, { "epoch": 0.14353104091476881, "grad_norm": 0.435546875, "learning_rate": 0.0003093167834379569, "loss": 0.224, "step": 80950 }, { "epoch": 0.14353458708007863, "grad_norm": 0.53125, "learning_rate": 0.0003092883546279812, "loss": 0.1597, "step": 80952 }, { "epoch": 0.14353813324538844, "grad_norm": 0.93359375, "learning_rate": 0.0003092599292761188, "loss": 0.1847, "step": 80954 }, { "epoch": 0.14354167941069826, "grad_norm": 0.1904296875, "learning_rate": 0.0003092315073824945, "loss": 0.1663, "step": 80956 }, { "epoch": 0.14354522557600807, "grad_norm": 0.5546875, "learning_rate": 0.000309203088947232, "loss": 0.1419, "step": 80958 }, { "epoch": 0.1435487717413179, "grad_norm": 0.349609375, "learning_rate": 0.00030917467397045584, "loss": 0.156, "step": 80960 }, { "epoch": 0.1435523179066277, "grad_norm": 2.046875, "learning_rate": 0.0003091462624522904, "loss": 0.252, "step": 80962 }, { "epoch": 0.14355586407193752, "grad_norm": 0.453125, "learning_rate": 0.0003091178543928597, "loss": 0.2034, "step": 80964 }, { "epoch": 0.14355941023724733, "grad_norm": 0.546875, "learning_rate": 0.00030908944979228834, "loss": 0.1802, "step": 80966 }, { "epoch": 0.14356295640255715, "grad_norm": 0.255859375, "learning_rate": 0.0003090610486507001, "loss": 0.1463, "step": 80968 }, { "epoch": 0.14356650256786696, "grad_norm": 0.2294921875, "learning_rate": 0.00030903265096821953, "loss": 0.1923, "step": 80970 }, { "epoch": 0.14357004873317677, "grad_norm": 0.53125, "learning_rate": 0.00030900425674497043, "loss": 0.1868, "step": 80972 }, { "epoch": 0.1435735948984866, "grad_norm": 0.451171875, "learning_rate": 0.0003089758659810776, "loss": 0.2623, "step": 80974 }, { "epoch": 0.1435771410637964, "grad_norm": 0.26171875, "learning_rate": 0.0003089474786766646, "loss": 0.2113, "step": 80976 }, { "epoch": 0.14358068722910622, "grad_norm": 1.2890625, "learning_rate": 0.00030891909483185597, "loss": 0.1615, "step": 80978 }, { "epoch": 0.14358423339441603, "grad_norm": 0.8359375, "learning_rate": 0.0003088907144467757, "loss": 0.1628, "step": 80980 }, { "epoch": 0.14358777955972585, "grad_norm": 0.56640625, "learning_rate": 0.0003088623375215479, "loss": 0.1741, "step": 80982 }, { "epoch": 0.14359132572503566, "grad_norm": 0.349609375, "learning_rate": 0.00030883396405629666, "loss": 0.1443, "step": 80984 }, { "epoch": 0.14359487189034548, "grad_norm": 0.404296875, "learning_rate": 0.00030880559405114594, "loss": 0.1416, "step": 80986 }, { "epoch": 0.1435984180556553, "grad_norm": 0.33203125, "learning_rate": 0.0003087772275062203, "loss": 0.1735, "step": 80988 }, { "epoch": 0.1436019642209651, "grad_norm": 0.87890625, "learning_rate": 0.0003087488644216433, "loss": 0.1917, "step": 80990 }, { "epoch": 0.14360551038627492, "grad_norm": 0.1875, "learning_rate": 0.00030872050479753923, "loss": 0.2052, "step": 80992 }, { "epoch": 0.14360905655158474, "grad_norm": 0.6484375, "learning_rate": 0.00030869214863403174, "loss": 0.2115, "step": 80994 }, { "epoch": 0.14361260271689455, "grad_norm": 0.59375, "learning_rate": 0.0003086637959312454, "loss": 0.3036, "step": 80996 }, { "epoch": 0.14361614888220436, "grad_norm": 0.41796875, "learning_rate": 0.00030863544668930406, "loss": 0.17, "step": 80998 }, { "epoch": 0.14361969504751418, "grad_norm": 1.078125, "learning_rate": 0.0003086071009083314, "loss": 0.217, "step": 81000 }, { "epoch": 0.143623241212824, "grad_norm": 0.49609375, "learning_rate": 0.00030857875858845166, "loss": 0.1398, "step": 81002 }, { "epoch": 0.1436267873781338, "grad_norm": 0.408203125, "learning_rate": 0.0003085504197297886, "loss": 0.22, "step": 81004 }, { "epoch": 0.14363033354344362, "grad_norm": 0.38671875, "learning_rate": 0.00030852208433246667, "loss": 0.1987, "step": 81006 }, { "epoch": 0.14363387970875344, "grad_norm": 0.85546875, "learning_rate": 0.0003084937523966089, "loss": 0.2002, "step": 81008 }, { "epoch": 0.14363742587406325, "grad_norm": 0.796875, "learning_rate": 0.0003084654239223399, "loss": 0.3012, "step": 81010 }, { "epoch": 0.14364097203937307, "grad_norm": 1.25, "learning_rate": 0.0003084370989097832, "loss": 0.3298, "step": 81012 }, { "epoch": 0.14364451820468288, "grad_norm": 1.1640625, "learning_rate": 0.00030840877735906333, "loss": 0.2304, "step": 81014 }, { "epoch": 0.1436480643699927, "grad_norm": 0.361328125, "learning_rate": 0.00030838045927030313, "loss": 0.213, "step": 81016 }, { "epoch": 0.1436516105353025, "grad_norm": 0.3359375, "learning_rate": 0.0003083521446436272, "loss": 0.1869, "step": 81018 }, { "epoch": 0.14365515670061232, "grad_norm": 0.166015625, "learning_rate": 0.00030832383347915917, "loss": 0.1617, "step": 81020 }, { "epoch": 0.14365870286592214, "grad_norm": 1.109375, "learning_rate": 0.0003082955257770228, "loss": 0.1843, "step": 81022 }, { "epoch": 0.14366224903123195, "grad_norm": 0.69921875, "learning_rate": 0.00030826722153734195, "loss": 0.1214, "step": 81024 }, { "epoch": 0.14366579519654177, "grad_norm": 0.28125, "learning_rate": 0.00030823892076024015, "loss": 0.153, "step": 81026 }, { "epoch": 0.14366934136185158, "grad_norm": 0.412109375, "learning_rate": 0.00030821062344584167, "loss": 0.1718, "step": 81028 }, { "epoch": 0.1436728875271614, "grad_norm": 0.94921875, "learning_rate": 0.0003081823295942699, "loss": 0.1953, "step": 81030 }, { "epoch": 0.1436764336924712, "grad_norm": 0.462890625, "learning_rate": 0.00030815403920564876, "loss": 0.1746, "step": 81032 }, { "epoch": 0.14367997985778105, "grad_norm": 0.388671875, "learning_rate": 0.0003081257522801016, "loss": 0.1839, "step": 81034 }, { "epoch": 0.14368352602309087, "grad_norm": 0.443359375, "learning_rate": 0.0003080974688177527, "loss": 0.1888, "step": 81036 }, { "epoch": 0.14368707218840068, "grad_norm": 1.2734375, "learning_rate": 0.00030806918881872553, "loss": 0.2605, "step": 81038 }, { "epoch": 0.1436906183537105, "grad_norm": 0.494140625, "learning_rate": 0.00030804091228314365, "loss": 0.3442, "step": 81040 }, { "epoch": 0.1436941645190203, "grad_norm": 0.44921875, "learning_rate": 0.0003080126392111307, "loss": 0.1594, "step": 81042 }, { "epoch": 0.14369771068433013, "grad_norm": 0.91796875, "learning_rate": 0.0003079843696028104, "loss": 0.1657, "step": 81044 }, { "epoch": 0.14370125684963994, "grad_norm": 0.5234375, "learning_rate": 0.00030795610345830656, "loss": 0.1894, "step": 81046 }, { "epoch": 0.14370480301494976, "grad_norm": 0.50390625, "learning_rate": 0.00030792784077774256, "loss": 0.1703, "step": 81048 }, { "epoch": 0.14370834918025957, "grad_norm": 0.52734375, "learning_rate": 0.0003078995815612422, "loss": 0.2193, "step": 81050 }, { "epoch": 0.14371189534556938, "grad_norm": 0.318359375, "learning_rate": 0.00030787132580892875, "loss": 0.1429, "step": 81052 }, { "epoch": 0.1437154415108792, "grad_norm": 0.51171875, "learning_rate": 0.0003078430735209261, "loss": 0.259, "step": 81054 }, { "epoch": 0.14371898767618901, "grad_norm": 0.56640625, "learning_rate": 0.00030781482469735777, "loss": 0.2501, "step": 81056 }, { "epoch": 0.14372253384149883, "grad_norm": 0.296875, "learning_rate": 0.0003077865793383473, "loss": 0.2414, "step": 81058 }, { "epoch": 0.14372608000680864, "grad_norm": 3.640625, "learning_rate": 0.00030775833744401803, "loss": 0.2392, "step": 81060 }, { "epoch": 0.14372962617211846, "grad_norm": 0.73828125, "learning_rate": 0.0003077300990144936, "loss": 0.2815, "step": 81062 }, { "epoch": 0.14373317233742827, "grad_norm": 0.2490234375, "learning_rate": 0.00030770186404989767, "loss": 0.186, "step": 81064 }, { "epoch": 0.1437367185027381, "grad_norm": 0.45703125, "learning_rate": 0.00030767363255035336, "loss": 0.227, "step": 81066 }, { "epoch": 0.1437402646680479, "grad_norm": 0.376953125, "learning_rate": 0.0003076454045159844, "loss": 0.3068, "step": 81068 }, { "epoch": 0.14374381083335772, "grad_norm": 2.390625, "learning_rate": 0.0003076171799469141, "loss": 0.2998, "step": 81070 }, { "epoch": 0.14374735699866753, "grad_norm": 0.263671875, "learning_rate": 0.0003075889588432663, "loss": 0.2912, "step": 81072 }, { "epoch": 0.14375090316397734, "grad_norm": 0.578125, "learning_rate": 0.0003075607412051638, "loss": 0.1737, "step": 81074 }, { "epoch": 0.14375444932928716, "grad_norm": 0.53515625, "learning_rate": 0.0003075325270327304, "loss": 0.1415, "step": 81076 }, { "epoch": 0.14375799549459697, "grad_norm": 0.240234375, "learning_rate": 0.0003075043163260895, "loss": 0.2516, "step": 81078 }, { "epoch": 0.1437615416599068, "grad_norm": 0.3125, "learning_rate": 0.0003074761090853643, "loss": 0.3804, "step": 81080 }, { "epoch": 0.1437650878252166, "grad_norm": 0.41796875, "learning_rate": 0.00030744790531067834, "loss": 0.2007, "step": 81082 }, { "epoch": 0.14376863399052642, "grad_norm": 1.5546875, "learning_rate": 0.00030741970500215466, "loss": 0.2462, "step": 81084 }, { "epoch": 0.14377218015583623, "grad_norm": 0.296875, "learning_rate": 0.00030739150815991694, "loss": 0.1518, "step": 81086 }, { "epoch": 0.14377572632114605, "grad_norm": 1.4921875, "learning_rate": 0.0003073633147840884, "loss": 0.2005, "step": 81088 }, { "epoch": 0.14377927248645586, "grad_norm": 0.28125, "learning_rate": 0.00030733512487479236, "loss": 0.1267, "step": 81090 }, { "epoch": 0.14378281865176568, "grad_norm": 0.69921875, "learning_rate": 0.00030730693843215195, "loss": 0.1904, "step": 81092 }, { "epoch": 0.1437863648170755, "grad_norm": 0.609375, "learning_rate": 0.00030727875545629043, "loss": 0.2241, "step": 81094 }, { "epoch": 0.1437899109823853, "grad_norm": 0.486328125, "learning_rate": 0.0003072505759473314, "loss": 0.2068, "step": 81096 }, { "epoch": 0.14379345714769512, "grad_norm": 0.59375, "learning_rate": 0.0003072223999053977, "loss": 0.1836, "step": 81098 }, { "epoch": 0.14379700331300493, "grad_norm": 0.423828125, "learning_rate": 0.0003071942273306128, "loss": 0.1685, "step": 81100 }, { "epoch": 0.14380054947831475, "grad_norm": 1.6953125, "learning_rate": 0.0003071660582230996, "loss": 0.2121, "step": 81102 }, { "epoch": 0.14380409564362456, "grad_norm": 0.30078125, "learning_rate": 0.000307137892582982, "loss": 0.1602, "step": 81104 }, { "epoch": 0.14380764180893438, "grad_norm": 0.478515625, "learning_rate": 0.00030710973041038226, "loss": 0.1655, "step": 81106 }, { "epoch": 0.1438111879742442, "grad_norm": 8.75, "learning_rate": 0.00030708157170542417, "loss": 0.2002, "step": 81108 }, { "epoch": 0.143814734139554, "grad_norm": 0.400390625, "learning_rate": 0.00030705341646823084, "loss": 0.1581, "step": 81110 }, { "epoch": 0.14381828030486382, "grad_norm": 0.39453125, "learning_rate": 0.0003070252646989251, "loss": 0.1457, "step": 81112 }, { "epoch": 0.14382182647017364, "grad_norm": 0.42578125, "learning_rate": 0.0003069971163976303, "loss": 0.1889, "step": 81114 }, { "epoch": 0.14382537263548345, "grad_norm": 0.48828125, "learning_rate": 0.00030696897156446936, "loss": 0.2043, "step": 81116 }, { "epoch": 0.14382891880079327, "grad_norm": 0.4375, "learning_rate": 0.0003069408301995655, "loss": 0.1738, "step": 81118 }, { "epoch": 0.14383246496610308, "grad_norm": 0.34765625, "learning_rate": 0.000306912692303042, "loss": 0.1708, "step": 81120 }, { "epoch": 0.1438360111314129, "grad_norm": 0.3515625, "learning_rate": 0.00030688455787502157, "loss": 0.1475, "step": 81122 }, { "epoch": 0.14383955729672274, "grad_norm": 0.54296875, "learning_rate": 0.00030685642691562724, "loss": 0.2359, "step": 81124 }, { "epoch": 0.14384310346203255, "grad_norm": 0.5859375, "learning_rate": 0.00030682829942498246, "loss": 0.1951, "step": 81126 }, { "epoch": 0.14384664962734237, "grad_norm": 0.26953125, "learning_rate": 0.00030680017540320984, "loss": 0.1614, "step": 81128 }, { "epoch": 0.14385019579265218, "grad_norm": 0.41796875, "learning_rate": 0.0003067720548504326, "loss": 0.1592, "step": 81130 }, { "epoch": 0.143853741957962, "grad_norm": 0.890625, "learning_rate": 0.0003067439377667735, "loss": 0.2356, "step": 81132 }, { "epoch": 0.1438572881232718, "grad_norm": 0.5390625, "learning_rate": 0.00030671582415235557, "loss": 0.1944, "step": 81134 }, { "epoch": 0.14386083428858162, "grad_norm": 1.0078125, "learning_rate": 0.000306687714007302, "loss": 0.187, "step": 81136 }, { "epoch": 0.14386438045389144, "grad_norm": 2.84375, "learning_rate": 0.00030665960733173555, "loss": 0.1521, "step": 81138 }, { "epoch": 0.14386792661920125, "grad_norm": 0.267578125, "learning_rate": 0.00030663150412577907, "loss": 0.1713, "step": 81140 }, { "epoch": 0.14387147278451107, "grad_norm": 0.365234375, "learning_rate": 0.0003066034043895555, "loss": 0.2021, "step": 81142 }, { "epoch": 0.14387501894982088, "grad_norm": 0.23046875, "learning_rate": 0.00030657530812318784, "loss": 0.1451, "step": 81144 }, { "epoch": 0.1438785651151307, "grad_norm": 0.42578125, "learning_rate": 0.000306547215326799, "loss": 0.2393, "step": 81146 }, { "epoch": 0.1438821112804405, "grad_norm": 0.3515625, "learning_rate": 0.0003065191260005117, "loss": 0.1698, "step": 81148 }, { "epoch": 0.14388565744575033, "grad_norm": 0.435546875, "learning_rate": 0.0003064910401444489, "loss": 0.1859, "step": 81150 }, { "epoch": 0.14388920361106014, "grad_norm": 0.20703125, "learning_rate": 0.0003064629577587331, "loss": 0.142, "step": 81152 }, { "epoch": 0.14389274977636995, "grad_norm": 0.52734375, "learning_rate": 0.00030643487884348774, "loss": 0.2554, "step": 81154 }, { "epoch": 0.14389629594167977, "grad_norm": 0.203125, "learning_rate": 0.00030640680339883493, "loss": 0.1293, "step": 81156 }, { "epoch": 0.14389984210698958, "grad_norm": 0.6171875, "learning_rate": 0.0003063787314248979, "loss": 0.1726, "step": 81158 }, { "epoch": 0.1439033882722994, "grad_norm": 0.3046875, "learning_rate": 0.00030635066292179926, "loss": 0.1586, "step": 81160 }, { "epoch": 0.1439069344376092, "grad_norm": 1.2734375, "learning_rate": 0.0003063225978896621, "loss": 0.1899, "step": 81162 }, { "epoch": 0.14391048060291903, "grad_norm": 0.439453125, "learning_rate": 0.00030629453632860856, "loss": 0.1715, "step": 81164 }, { "epoch": 0.14391402676822884, "grad_norm": 0.421875, "learning_rate": 0.0003062664782387619, "loss": 0.1669, "step": 81166 }, { "epoch": 0.14391757293353866, "grad_norm": 0.55078125, "learning_rate": 0.0003062384236202445, "loss": 0.1682, "step": 81168 }, { "epoch": 0.14392111909884847, "grad_norm": 2.03125, "learning_rate": 0.00030621037247317916, "loss": 0.2315, "step": 81170 }, { "epoch": 0.14392466526415829, "grad_norm": 0.244140625, "learning_rate": 0.00030618232479768864, "loss": 0.1462, "step": 81172 }, { "epoch": 0.1439282114294681, "grad_norm": 0.625, "learning_rate": 0.00030615428059389523, "loss": 0.1761, "step": 81174 }, { "epoch": 0.14393175759477791, "grad_norm": 0.400390625, "learning_rate": 0.0003061262398619222, "loss": 0.1762, "step": 81176 }, { "epoch": 0.14393530376008773, "grad_norm": 0.298828125, "learning_rate": 0.0003060982026018918, "loss": 0.2111, "step": 81178 }, { "epoch": 0.14393884992539754, "grad_norm": 0.3828125, "learning_rate": 0.0003060701688139267, "loss": 0.1522, "step": 81180 }, { "epoch": 0.14394239609070736, "grad_norm": 0.5390625, "learning_rate": 0.00030604213849814933, "loss": 0.2011, "step": 81182 }, { "epoch": 0.14394594225601717, "grad_norm": 0.33984375, "learning_rate": 0.00030601411165468264, "loss": 0.172, "step": 81184 }, { "epoch": 0.143949488421327, "grad_norm": 0.59375, "learning_rate": 0.000305986088283649, "loss": 0.1686, "step": 81186 }, { "epoch": 0.1439530345866368, "grad_norm": 0.6015625, "learning_rate": 0.00030595806838517106, "loss": 0.1613, "step": 81188 }, { "epoch": 0.14395658075194662, "grad_norm": 0.94140625, "learning_rate": 0.00030593005195937126, "loss": 0.1533, "step": 81190 }, { "epoch": 0.14396012691725643, "grad_norm": 0.2578125, "learning_rate": 0.00030590203900637196, "loss": 0.1872, "step": 81192 }, { "epoch": 0.14396367308256625, "grad_norm": 0.64453125, "learning_rate": 0.00030587402952629626, "loss": 0.1698, "step": 81194 }, { "epoch": 0.14396721924787606, "grad_norm": 0.4375, "learning_rate": 0.00030584602351926593, "loss": 0.1721, "step": 81196 }, { "epoch": 0.14397076541318587, "grad_norm": 1.1796875, "learning_rate": 0.0003058180209854039, "loss": 0.2275, "step": 81198 }, { "epoch": 0.1439743115784957, "grad_norm": 1.15625, "learning_rate": 0.00030579002192483246, "loss": 0.5006, "step": 81200 }, { "epoch": 0.1439778577438055, "grad_norm": 0.74609375, "learning_rate": 0.00030576202633767434, "loss": 0.2742, "step": 81202 }, { "epoch": 0.14398140390911532, "grad_norm": 0.45703125, "learning_rate": 0.00030573403422405154, "loss": 0.1497, "step": 81204 }, { "epoch": 0.14398495007442513, "grad_norm": 0.55859375, "learning_rate": 0.0003057060455840869, "loss": 0.169, "step": 81206 }, { "epoch": 0.14398849623973495, "grad_norm": 0.349609375, "learning_rate": 0.0003056780604179026, "loss": 0.1744, "step": 81208 }, { "epoch": 0.14399204240504476, "grad_norm": 0.2294921875, "learning_rate": 0.000305650078725621, "loss": 0.126, "step": 81210 }, { "epoch": 0.14399558857035458, "grad_norm": 0.4140625, "learning_rate": 0.00030562210050736466, "loss": 0.1726, "step": 81212 }, { "epoch": 0.1439991347356644, "grad_norm": 0.7421875, "learning_rate": 0.00030559412576325556, "loss": 0.2015, "step": 81214 }, { "epoch": 0.14400268090097423, "grad_norm": 0.98828125, "learning_rate": 0.0003055661544934165, "loss": 0.2825, "step": 81216 }, { "epoch": 0.14400622706628405, "grad_norm": 0.3671875, "learning_rate": 0.0003055381866979695, "loss": 0.2005, "step": 81218 }, { "epoch": 0.14400977323159386, "grad_norm": 0.4296875, "learning_rate": 0.0003055102223770373, "loss": 0.1726, "step": 81220 }, { "epoch": 0.14401331939690368, "grad_norm": 0.54296875, "learning_rate": 0.0003054822615307415, "loss": 0.1834, "step": 81222 }, { "epoch": 0.1440168655622135, "grad_norm": 0.5, "learning_rate": 0.00030545430415920496, "loss": 0.1468, "step": 81224 }, { "epoch": 0.1440204117275233, "grad_norm": 0.515625, "learning_rate": 0.00030542635026254984, "loss": 0.1883, "step": 81226 }, { "epoch": 0.14402395789283312, "grad_norm": 0.322265625, "learning_rate": 0.00030539839984089826, "loss": 0.1477, "step": 81228 }, { "epoch": 0.14402750405814294, "grad_norm": 0.38671875, "learning_rate": 0.00030537045289437246, "loss": 0.3028, "step": 81230 }, { "epoch": 0.14403105022345275, "grad_norm": 0.4296875, "learning_rate": 0.0003053425094230946, "loss": 0.2423, "step": 81232 }, { "epoch": 0.14403459638876256, "grad_norm": 0.12890625, "learning_rate": 0.0003053145694271872, "loss": 0.1458, "step": 81234 }, { "epoch": 0.14403814255407238, "grad_norm": 0.2314453125, "learning_rate": 0.00030528663290677234, "loss": 0.1624, "step": 81236 }, { "epoch": 0.1440416887193822, "grad_norm": 1.0546875, "learning_rate": 0.000305258699861972, "loss": 0.282, "step": 81238 }, { "epoch": 0.144045234884692, "grad_norm": 1.3671875, "learning_rate": 0.00030523077029290843, "loss": 0.288, "step": 81240 }, { "epoch": 0.14404878105000182, "grad_norm": 0.341796875, "learning_rate": 0.0003052028441997039, "loss": 0.2086, "step": 81242 }, { "epoch": 0.14405232721531164, "grad_norm": 0.46484375, "learning_rate": 0.0003051749215824804, "loss": 0.1293, "step": 81244 }, { "epoch": 0.14405587338062145, "grad_norm": 0.28515625, "learning_rate": 0.00030514700244136024, "loss": 0.1872, "step": 81246 }, { "epoch": 0.14405941954593127, "grad_norm": 0.4921875, "learning_rate": 0.00030511908677646526, "loss": 0.212, "step": 81248 }, { "epoch": 0.14406296571124108, "grad_norm": 0.59375, "learning_rate": 0.0003050911745879176, "loss": 0.1645, "step": 81250 }, { "epoch": 0.1440665118765509, "grad_norm": 0.1669921875, "learning_rate": 0.00030506326587583974, "loss": 0.1291, "step": 81252 }, { "epoch": 0.1440700580418607, "grad_norm": 0.6015625, "learning_rate": 0.00030503536064035297, "loss": 0.296, "step": 81254 }, { "epoch": 0.14407360420717052, "grad_norm": 0.2890625, "learning_rate": 0.0003050074588815799, "loss": 0.1867, "step": 81256 }, { "epoch": 0.14407715037248034, "grad_norm": 0.65625, "learning_rate": 0.0003049795605996424, "loss": 0.1612, "step": 81258 }, { "epoch": 0.14408069653779015, "grad_norm": 0.39453125, "learning_rate": 0.0003049516657946627, "loss": 0.1643, "step": 81260 }, { "epoch": 0.14408424270309997, "grad_norm": 0.232421875, "learning_rate": 0.00030492377446676234, "loss": 0.1302, "step": 81262 }, { "epoch": 0.14408778886840978, "grad_norm": 0.70703125, "learning_rate": 0.00030489588661606373, "loss": 0.1834, "step": 81264 }, { "epoch": 0.1440913350337196, "grad_norm": 0.42578125, "learning_rate": 0.00030486800224268864, "loss": 0.3846, "step": 81266 }, { "epoch": 0.1440948811990294, "grad_norm": 0.65234375, "learning_rate": 0.00030484012134675905, "loss": 0.1725, "step": 81268 }, { "epoch": 0.14409842736433923, "grad_norm": 0.322265625, "learning_rate": 0.0003048122439283969, "loss": 0.1331, "step": 81270 }, { "epoch": 0.14410197352964904, "grad_norm": 0.326171875, "learning_rate": 0.0003047843699877239, "loss": 0.1251, "step": 81272 }, { "epoch": 0.14410551969495886, "grad_norm": 0.19140625, "learning_rate": 0.00030475649952486233, "loss": 0.1463, "step": 81274 }, { "epoch": 0.14410906586026867, "grad_norm": 0.259765625, "learning_rate": 0.0003047286325399339, "loss": 0.2803, "step": 81276 }, { "epoch": 0.14411261202557848, "grad_norm": 0.43359375, "learning_rate": 0.0003047007690330604, "loss": 0.161, "step": 81278 }, { "epoch": 0.1441161581908883, "grad_norm": 0.291015625, "learning_rate": 0.0003046729090043639, "loss": 0.1964, "step": 81280 }, { "epoch": 0.14411970435619811, "grad_norm": 0.2109375, "learning_rate": 0.0003046450524539659, "loss": 0.2699, "step": 81282 }, { "epoch": 0.14412325052150793, "grad_norm": 0.37109375, "learning_rate": 0.0003046171993819887, "loss": 0.2034, "step": 81284 }, { "epoch": 0.14412679668681774, "grad_norm": 0.6171875, "learning_rate": 0.00030458934978855367, "loss": 0.1282, "step": 81286 }, { "epoch": 0.14413034285212756, "grad_norm": 1.2265625, "learning_rate": 0.00030456150367378287, "loss": 0.2047, "step": 81288 }, { "epoch": 0.14413388901743737, "grad_norm": 0.6484375, "learning_rate": 0.0003045336610377978, "loss": 0.1492, "step": 81290 }, { "epoch": 0.1441374351827472, "grad_norm": 0.59375, "learning_rate": 0.0003045058218807209, "loss": 0.1605, "step": 81292 }, { "epoch": 0.144140981348057, "grad_norm": 1.703125, "learning_rate": 0.00030447798620267296, "loss": 0.3534, "step": 81294 }, { "epoch": 0.14414452751336682, "grad_norm": 0.5703125, "learning_rate": 0.0003044501540037765, "loss": 0.2555, "step": 81296 }, { "epoch": 0.14414807367867663, "grad_norm": 0.1767578125, "learning_rate": 0.00030442232528415296, "loss": 0.1431, "step": 81298 }, { "epoch": 0.14415161984398644, "grad_norm": 0.390625, "learning_rate": 0.0003043945000439238, "loss": 0.169, "step": 81300 }, { "epoch": 0.14415516600929626, "grad_norm": 0.484375, "learning_rate": 0.0003043666782832113, "loss": 0.2425, "step": 81302 }, { "epoch": 0.14415871217460607, "grad_norm": 0.2138671875, "learning_rate": 0.00030433886000213653, "loss": 0.2034, "step": 81304 }, { "epoch": 0.14416225833991592, "grad_norm": 0.380859375, "learning_rate": 0.00030431104520082153, "loss": 0.195, "step": 81306 }, { "epoch": 0.14416580450522573, "grad_norm": 0.333984375, "learning_rate": 0.0003042832338793876, "loss": 0.1612, "step": 81308 }, { "epoch": 0.14416935067053555, "grad_norm": 1.84375, "learning_rate": 0.0003042554260379571, "loss": 0.1929, "step": 81310 }, { "epoch": 0.14417289683584536, "grad_norm": 0.5234375, "learning_rate": 0.00030422762167665066, "loss": 0.144, "step": 81312 }, { "epoch": 0.14417644300115517, "grad_norm": 0.8046875, "learning_rate": 0.0003041998207955905, "loss": 0.2367, "step": 81314 }, { "epoch": 0.144179989166465, "grad_norm": 0.306640625, "learning_rate": 0.0003041720233948981, "loss": 0.1462, "step": 81316 }, { "epoch": 0.1441835353317748, "grad_norm": 0.205078125, "learning_rate": 0.0003041442294746951, "loss": 0.2487, "step": 81318 }, { "epoch": 0.14418708149708462, "grad_norm": 1.3125, "learning_rate": 0.0003041164390351029, "loss": 0.2106, "step": 81320 }, { "epoch": 0.14419062766239443, "grad_norm": 0.296875, "learning_rate": 0.0003040886520762429, "loss": 0.2117, "step": 81322 }, { "epoch": 0.14419417382770425, "grad_norm": 0.8828125, "learning_rate": 0.000304060868598237, "loss": 0.1785, "step": 81324 }, { "epoch": 0.14419771999301406, "grad_norm": 0.173828125, "learning_rate": 0.0003040330886012066, "loss": 0.1202, "step": 81326 }, { "epoch": 0.14420126615832388, "grad_norm": 0.58203125, "learning_rate": 0.0003040053120852729, "loss": 0.1539, "step": 81328 }, { "epoch": 0.1442048123236337, "grad_norm": 0.119140625, "learning_rate": 0.00030397753905055764, "loss": 0.1463, "step": 81330 }, { "epoch": 0.1442083584889435, "grad_norm": 1.4921875, "learning_rate": 0.00030394976949718236, "loss": 0.2337, "step": 81332 }, { "epoch": 0.14421190465425332, "grad_norm": 0.45703125, "learning_rate": 0.0003039220034252683, "loss": 0.1683, "step": 81334 }, { "epoch": 0.14421545081956313, "grad_norm": 0.361328125, "learning_rate": 0.0003038942408349371, "loss": 0.1715, "step": 81336 }, { "epoch": 0.14421899698487295, "grad_norm": 1.015625, "learning_rate": 0.0003038664817263099, "loss": 0.221, "step": 81338 }, { "epoch": 0.14422254315018276, "grad_norm": 0.49609375, "learning_rate": 0.0003038387260995082, "loss": 0.1828, "step": 81340 }, { "epoch": 0.14422608931549258, "grad_norm": 0.388671875, "learning_rate": 0.00030381097395465386, "loss": 0.1626, "step": 81342 }, { "epoch": 0.1442296354808024, "grad_norm": 0.1591796875, "learning_rate": 0.0003037832252918674, "loss": 0.1885, "step": 81344 }, { "epoch": 0.1442331816461122, "grad_norm": 0.353515625, "learning_rate": 0.00030375548011127084, "loss": 0.1622, "step": 81346 }, { "epoch": 0.14423672781142202, "grad_norm": 0.33203125, "learning_rate": 0.00030372773841298506, "loss": 0.2101, "step": 81348 }, { "epoch": 0.14424027397673184, "grad_norm": 0.369140625, "learning_rate": 0.000303700000197132, "loss": 0.1585, "step": 81350 }, { "epoch": 0.14424382014204165, "grad_norm": 0.1494140625, "learning_rate": 0.0003036722654638321, "loss": 0.1505, "step": 81352 }, { "epoch": 0.14424736630735147, "grad_norm": 1.4921875, "learning_rate": 0.0003036445342132075, "loss": 0.5105, "step": 81354 }, { "epoch": 0.14425091247266128, "grad_norm": 0.68359375, "learning_rate": 0.00030361680644537916, "loss": 0.16, "step": 81356 }, { "epoch": 0.1442544586379711, "grad_norm": 0.48828125, "learning_rate": 0.0003035890821604682, "loss": 0.1896, "step": 81358 }, { "epoch": 0.1442580048032809, "grad_norm": 0.61328125, "learning_rate": 0.0003035613613585961, "loss": 0.2087, "step": 81360 }, { "epoch": 0.14426155096859072, "grad_norm": 1.6328125, "learning_rate": 0.00030353364403988363, "loss": 0.2379, "step": 81362 }, { "epoch": 0.14426509713390054, "grad_norm": 0.546875, "learning_rate": 0.0003035059302044527, "loss": 0.1529, "step": 81364 }, { "epoch": 0.14426864329921035, "grad_norm": 0.32421875, "learning_rate": 0.0003034782198524241, "loss": 0.2396, "step": 81366 }, { "epoch": 0.14427218946452017, "grad_norm": 0.7109375, "learning_rate": 0.0003034505129839191, "loss": 0.1409, "step": 81368 }, { "epoch": 0.14427573562982998, "grad_norm": 0.365234375, "learning_rate": 0.0003034228095990587, "loss": 0.1753, "step": 81370 }, { "epoch": 0.1442792817951398, "grad_norm": 0.39453125, "learning_rate": 0.00030339510969796434, "loss": 0.1359, "step": 81372 }, { "epoch": 0.1442828279604496, "grad_norm": 0.5234375, "learning_rate": 0.0003033674132807572, "loss": 0.1586, "step": 81374 }, { "epoch": 0.14428637412575943, "grad_norm": 1.125, "learning_rate": 0.0003033397203475581, "loss": 0.2013, "step": 81376 }, { "epoch": 0.14428992029106924, "grad_norm": 0.220703125, "learning_rate": 0.00030331203089848833, "loss": 0.159, "step": 81378 }, { "epoch": 0.14429346645637905, "grad_norm": 0.318359375, "learning_rate": 0.00030328434493366883, "loss": 0.1335, "step": 81380 }, { "epoch": 0.14429701262168887, "grad_norm": 0.435546875, "learning_rate": 0.000303256662453221, "loss": 0.2382, "step": 81382 }, { "epoch": 0.14430055878699868, "grad_norm": 0.7421875, "learning_rate": 0.00030322898345726566, "loss": 0.2312, "step": 81384 }, { "epoch": 0.1443041049523085, "grad_norm": 0.291015625, "learning_rate": 0.00030320130794592404, "loss": 0.1564, "step": 81386 }, { "epoch": 0.1443076511176183, "grad_norm": 0.83984375, "learning_rate": 0.0003031736359193168, "loss": 0.1966, "step": 81388 }, { "epoch": 0.14431119728292813, "grad_norm": 0.6640625, "learning_rate": 0.0003031459673775654, "loss": 0.1643, "step": 81390 }, { "epoch": 0.14431474344823794, "grad_norm": 0.2275390625, "learning_rate": 0.00030311830232079075, "loss": 0.1435, "step": 81392 }, { "epoch": 0.14431828961354776, "grad_norm": 0.6875, "learning_rate": 0.0003030906407491136, "loss": 0.2013, "step": 81394 }, { "epoch": 0.1443218357788576, "grad_norm": 0.353515625, "learning_rate": 0.0003030629826626552, "loss": 0.1674, "step": 81396 }, { "epoch": 0.1443253819441674, "grad_norm": 0.98828125, "learning_rate": 0.0003030353280615362, "loss": 0.1755, "step": 81398 }, { "epoch": 0.14432892810947723, "grad_norm": 0.5859375, "learning_rate": 0.00030300767694587815, "loss": 0.1685, "step": 81400 }, { "epoch": 0.14433247427478704, "grad_norm": 0.3046875, "learning_rate": 0.0003029800293158012, "loss": 0.227, "step": 81402 }, { "epoch": 0.14433602044009686, "grad_norm": 1.234375, "learning_rate": 0.00030295238517142676, "loss": 0.3592, "step": 81404 }, { "epoch": 0.14433956660540667, "grad_norm": 0.3125, "learning_rate": 0.0003029247445128755, "loss": 0.2031, "step": 81406 }, { "epoch": 0.1443431127707165, "grad_norm": 0.4296875, "learning_rate": 0.0003028971073402688, "loss": 0.1276, "step": 81408 }, { "epoch": 0.1443466589360263, "grad_norm": 0.220703125, "learning_rate": 0.00030286947365372664, "loss": 0.1235, "step": 81410 }, { "epoch": 0.14435020510133612, "grad_norm": 0.279296875, "learning_rate": 0.0003028418434533705, "loss": 0.1973, "step": 81412 }, { "epoch": 0.14435375126664593, "grad_norm": 0.25, "learning_rate": 0.0003028142167393213, "loss": 0.1412, "step": 81414 }, { "epoch": 0.14435729743195574, "grad_norm": 0.94140625, "learning_rate": 0.0003027865935116995, "loss": 0.1634, "step": 81416 }, { "epoch": 0.14436084359726556, "grad_norm": 0.390625, "learning_rate": 0.0003027589737706262, "loss": 0.3165, "step": 81418 }, { "epoch": 0.14436438976257537, "grad_norm": 0.2353515625, "learning_rate": 0.00030273135751622185, "loss": 0.1428, "step": 81420 }, { "epoch": 0.1443679359278852, "grad_norm": 0.2275390625, "learning_rate": 0.00030270374474860754, "loss": 0.147, "step": 81422 }, { "epoch": 0.144371482093195, "grad_norm": 1.1640625, "learning_rate": 0.0003026761354679039, "loss": 0.1879, "step": 81424 }, { "epoch": 0.14437502825850482, "grad_norm": 0.5625, "learning_rate": 0.0003026485296742318, "loss": 0.1593, "step": 81426 }, { "epoch": 0.14437857442381463, "grad_norm": 0.76953125, "learning_rate": 0.0003026209273677117, "loss": 0.1719, "step": 81428 }, { "epoch": 0.14438212058912445, "grad_norm": 0.65234375, "learning_rate": 0.0003025933285484646, "loss": 0.2235, "step": 81430 }, { "epoch": 0.14438566675443426, "grad_norm": 0.328125, "learning_rate": 0.00030256573321661115, "loss": 0.1636, "step": 81432 }, { "epoch": 0.14438921291974408, "grad_norm": 0.54296875, "learning_rate": 0.0003025381413722719, "loss": 0.1881, "step": 81434 }, { "epoch": 0.1443927590850539, "grad_norm": 1.515625, "learning_rate": 0.00030251055301556767, "loss": 0.1616, "step": 81436 }, { "epoch": 0.1443963052503637, "grad_norm": 0.27734375, "learning_rate": 0.0003024829681466188, "loss": 0.1603, "step": 81438 }, { "epoch": 0.14439985141567352, "grad_norm": 1.859375, "learning_rate": 0.00030245538676554665, "loss": 0.2099, "step": 81440 }, { "epoch": 0.14440339758098333, "grad_norm": 0.455078125, "learning_rate": 0.0003024278088724709, "loss": 0.1789, "step": 81442 }, { "epoch": 0.14440694374629315, "grad_norm": 0.546875, "learning_rate": 0.0003024002344675128, "loss": 0.2874, "step": 81444 }, { "epoch": 0.14441048991160296, "grad_norm": 2.484375, "learning_rate": 0.00030237266355079284, "loss": 0.2109, "step": 81446 }, { "epoch": 0.14441403607691278, "grad_norm": 0.271484375, "learning_rate": 0.00030234509612243155, "loss": 0.1554, "step": 81448 }, { "epoch": 0.1444175822422226, "grad_norm": 0.4765625, "learning_rate": 0.00030231753218254936, "loss": 0.1481, "step": 81450 }, { "epoch": 0.1444211284075324, "grad_norm": 0.609375, "learning_rate": 0.0003022899717312668, "loss": 0.2543, "step": 81452 }, { "epoch": 0.14442467457284222, "grad_norm": 0.52734375, "learning_rate": 0.00030226241476870476, "loss": 0.2316, "step": 81454 }, { "epoch": 0.14442822073815204, "grad_norm": 0.62109375, "learning_rate": 0.00030223486129498354, "loss": 0.135, "step": 81456 }, { "epoch": 0.14443176690346185, "grad_norm": 0.462890625, "learning_rate": 0.0003022073113102236, "loss": 0.1444, "step": 81458 }, { "epoch": 0.14443531306877166, "grad_norm": 0.3203125, "learning_rate": 0.0003021797648145453, "loss": 0.1921, "step": 81460 }, { "epoch": 0.14443885923408148, "grad_norm": 0.2314453125, "learning_rate": 0.00030215222180806955, "loss": 0.1331, "step": 81462 }, { "epoch": 0.1444424053993913, "grad_norm": 0.28125, "learning_rate": 0.00030212468229091644, "loss": 0.1323, "step": 81464 }, { "epoch": 0.1444459515647011, "grad_norm": 0.287109375, "learning_rate": 0.00030209714626320655, "loss": 0.1727, "step": 81466 }, { "epoch": 0.14444949773001092, "grad_norm": 0.1884765625, "learning_rate": 0.0003020696137250602, "loss": 0.1289, "step": 81468 }, { "epoch": 0.14445304389532074, "grad_norm": 0.2421875, "learning_rate": 0.0003020420846765978, "loss": 0.1826, "step": 81470 }, { "epoch": 0.14445659006063055, "grad_norm": 0.2412109375, "learning_rate": 0.0003020145591179399, "loss": 0.157, "step": 81472 }, { "epoch": 0.14446013622594037, "grad_norm": 0.75, "learning_rate": 0.00030198703704920684, "loss": 0.1824, "step": 81474 }, { "epoch": 0.14446368239125018, "grad_norm": 0.1943359375, "learning_rate": 0.0003019595184705188, "loss": 0.1637, "step": 81476 }, { "epoch": 0.14446722855656, "grad_norm": 0.271484375, "learning_rate": 0.0003019320033819963, "loss": 0.191, "step": 81478 }, { "epoch": 0.1444707747218698, "grad_norm": 0.3125, "learning_rate": 0.0003019044917837597, "loss": 0.2334, "step": 81480 }, { "epoch": 0.14447432088717962, "grad_norm": 0.5859375, "learning_rate": 0.0003018769836759293, "loss": 0.175, "step": 81482 }, { "epoch": 0.14447786705248944, "grad_norm": 0.21484375, "learning_rate": 0.00030184947905862537, "loss": 0.1599, "step": 81484 }, { "epoch": 0.14448141321779925, "grad_norm": 0.462890625, "learning_rate": 0.0003018219779319682, "loss": 0.1466, "step": 81486 }, { "epoch": 0.1444849593831091, "grad_norm": 1.5546875, "learning_rate": 0.0003017944802960778, "loss": 0.1708, "step": 81488 }, { "epoch": 0.1444885055484189, "grad_norm": 1.84375, "learning_rate": 0.0003017669861510751, "loss": 0.2626, "step": 81490 }, { "epoch": 0.14449205171372873, "grad_norm": 0.765625, "learning_rate": 0.00030173949549707965, "loss": 0.1376, "step": 81492 }, { "epoch": 0.14449559787903854, "grad_norm": 0.62890625, "learning_rate": 0.0003017120083342121, "loss": 0.3383, "step": 81494 }, { "epoch": 0.14449914404434835, "grad_norm": 0.21484375, "learning_rate": 0.00030168452466259227, "loss": 0.2593, "step": 81496 }, { "epoch": 0.14450269020965817, "grad_norm": 0.53515625, "learning_rate": 0.00030165704448234103, "loss": 0.2197, "step": 81498 }, { "epoch": 0.14450623637496798, "grad_norm": 0.251953125, "learning_rate": 0.0003016295677935778, "loss": 0.2225, "step": 81500 }, { "epoch": 0.1445097825402778, "grad_norm": 0.2119140625, "learning_rate": 0.0003016020945964233, "loss": 0.1485, "step": 81502 }, { "epoch": 0.1445133287055876, "grad_norm": 0.431640625, "learning_rate": 0.0003015746248909974, "loss": 0.1854, "step": 81504 }, { "epoch": 0.14451687487089743, "grad_norm": 0.33984375, "learning_rate": 0.0003015471586774203, "loss": 0.1445, "step": 81506 }, { "epoch": 0.14452042103620724, "grad_norm": 0.388671875, "learning_rate": 0.00030151969595581217, "loss": 0.1609, "step": 81508 }, { "epoch": 0.14452396720151706, "grad_norm": 0.640625, "learning_rate": 0.0003014922367262929, "loss": 0.2214, "step": 81510 }, { "epoch": 0.14452751336682687, "grad_norm": 0.9140625, "learning_rate": 0.00030146478098898294, "loss": 0.1898, "step": 81512 }, { "epoch": 0.14453105953213669, "grad_norm": 0.494140625, "learning_rate": 0.00030143732874400223, "loss": 0.1893, "step": 81514 }, { "epoch": 0.1445346056974465, "grad_norm": 0.470703125, "learning_rate": 0.0003014098799914707, "loss": 0.1452, "step": 81516 }, { "epoch": 0.14453815186275631, "grad_norm": 0.875, "learning_rate": 0.0003013824347315084, "loss": 0.1608, "step": 81518 }, { "epoch": 0.14454169802806613, "grad_norm": 0.38671875, "learning_rate": 0.00030135499296423545, "loss": 0.1746, "step": 81520 }, { "epoch": 0.14454524419337594, "grad_norm": 2.296875, "learning_rate": 0.000301327554689772, "loss": 0.2138, "step": 81522 }, { "epoch": 0.14454879035868576, "grad_norm": 1.3046875, "learning_rate": 0.0003013001199082379, "loss": 0.2207, "step": 81524 }, { "epoch": 0.14455233652399557, "grad_norm": 0.38671875, "learning_rate": 0.00030127268861975307, "loss": 0.1708, "step": 81526 }, { "epoch": 0.1445558826893054, "grad_norm": 0.75, "learning_rate": 0.00030124526082443736, "loss": 0.1735, "step": 81528 }, { "epoch": 0.1445594288546152, "grad_norm": 0.35546875, "learning_rate": 0.0003012178365224113, "loss": 0.2014, "step": 81530 }, { "epoch": 0.14456297501992502, "grad_norm": 0.62109375, "learning_rate": 0.00030119041571379407, "loss": 0.1611, "step": 81532 }, { "epoch": 0.14456652118523483, "grad_norm": 0.431640625, "learning_rate": 0.0003011629983987062, "loss": 0.2295, "step": 81534 }, { "epoch": 0.14457006735054465, "grad_norm": 0.6015625, "learning_rate": 0.0003011355845772671, "loss": 0.6734, "step": 81536 }, { "epoch": 0.14457361351585446, "grad_norm": 0.59375, "learning_rate": 0.0003011081742495973, "loss": 0.1648, "step": 81538 }, { "epoch": 0.14457715968116427, "grad_norm": 0.35546875, "learning_rate": 0.00030108076741581586, "loss": 0.2074, "step": 81540 }, { "epoch": 0.1445807058464741, "grad_norm": 0.2431640625, "learning_rate": 0.00030105336407604326, "loss": 0.2907, "step": 81542 }, { "epoch": 0.1445842520117839, "grad_norm": 0.7265625, "learning_rate": 0.0003010259642303993, "loss": 0.1643, "step": 81544 }, { "epoch": 0.14458779817709372, "grad_norm": 0.2099609375, "learning_rate": 0.00030099856787900344, "loss": 0.1977, "step": 81546 }, { "epoch": 0.14459134434240353, "grad_norm": 0.498046875, "learning_rate": 0.000300971175021976, "loss": 0.1629, "step": 81548 }, { "epoch": 0.14459489050771335, "grad_norm": 0.41796875, "learning_rate": 0.00030094378565943624, "loss": 0.2032, "step": 81550 }, { "epoch": 0.14459843667302316, "grad_norm": 0.66015625, "learning_rate": 0.00030091639979150435, "loss": 0.197, "step": 81552 }, { "epoch": 0.14460198283833298, "grad_norm": 0.88671875, "learning_rate": 0.00030088901741829975, "loss": 0.2042, "step": 81554 }, { "epoch": 0.1446055290036428, "grad_norm": 0.18359375, "learning_rate": 0.0003008616385399429, "loss": 0.1872, "step": 81556 }, { "epoch": 0.1446090751689526, "grad_norm": 0.82421875, "learning_rate": 0.0003008342631565526, "loss": 0.1882, "step": 81558 }, { "epoch": 0.14461262133426242, "grad_norm": 0.384765625, "learning_rate": 0.0003008068912682492, "loss": 0.1559, "step": 81560 }, { "epoch": 0.14461616749957223, "grad_norm": 0.6171875, "learning_rate": 0.00030077952287515223, "loss": 0.2183, "step": 81562 }, { "epoch": 0.14461971366488205, "grad_norm": 0.69921875, "learning_rate": 0.0003007521579773814, "loss": 0.1806, "step": 81564 }, { "epoch": 0.14462325983019186, "grad_norm": 0.248046875, "learning_rate": 0.00030072479657505634, "loss": 0.183, "step": 81566 }, { "epoch": 0.14462680599550168, "grad_norm": 0.326171875, "learning_rate": 0.0003006974386682966, "loss": 0.1561, "step": 81568 }, { "epoch": 0.1446303521608115, "grad_norm": 2.953125, "learning_rate": 0.00030067008425722214, "loss": 0.4227, "step": 81570 }, { "epoch": 0.1446338983261213, "grad_norm": 0.87109375, "learning_rate": 0.0003006427333419525, "loss": 0.1525, "step": 81572 }, { "epoch": 0.14463744449143112, "grad_norm": 0.31640625, "learning_rate": 0.0003006153859226071, "loss": 0.1535, "step": 81574 }, { "epoch": 0.14464099065674094, "grad_norm": 0.388671875, "learning_rate": 0.0003005880419993056, "loss": 0.1649, "step": 81576 }, { "epoch": 0.14464453682205078, "grad_norm": 0.66796875, "learning_rate": 0.0003005607015721677, "loss": 0.2013, "step": 81578 }, { "epoch": 0.1446480829873606, "grad_norm": 0.345703125, "learning_rate": 0.000300533364641313, "loss": 0.2224, "step": 81580 }, { "epoch": 0.1446516291526704, "grad_norm": 0.31640625, "learning_rate": 0.00030050603120686096, "loss": 0.1658, "step": 81582 }, { "epoch": 0.14465517531798022, "grad_norm": 0.408203125, "learning_rate": 0.00030047870126893113, "loss": 0.1307, "step": 81584 }, { "epoch": 0.14465872148329004, "grad_norm": 0.318359375, "learning_rate": 0.00030045137482764296, "loss": 0.1807, "step": 81586 }, { "epoch": 0.14466226764859985, "grad_norm": 0.412109375, "learning_rate": 0.0003004240518831163, "loss": 0.1969, "step": 81588 }, { "epoch": 0.14466581381390967, "grad_norm": 0.31640625, "learning_rate": 0.00030039673243547005, "loss": 0.1647, "step": 81590 }, { "epoch": 0.14466935997921948, "grad_norm": 0.63671875, "learning_rate": 0.0003003694164848242, "loss": 0.116, "step": 81592 }, { "epoch": 0.1446729061445293, "grad_norm": 0.2578125, "learning_rate": 0.0003003421040312979, "loss": 0.1912, "step": 81594 }, { "epoch": 0.1446764523098391, "grad_norm": 0.267578125, "learning_rate": 0.000300314795075011, "loss": 0.2256, "step": 81596 }, { "epoch": 0.14467999847514892, "grad_norm": 0.43359375, "learning_rate": 0.0003002874896160824, "loss": 0.1456, "step": 81598 }, { "epoch": 0.14468354464045874, "grad_norm": 0.25390625, "learning_rate": 0.000300260187654632, "loss": 0.1467, "step": 81600 }, { "epoch": 0.14468709080576855, "grad_norm": 0.578125, "learning_rate": 0.0003002328891907789, "loss": 0.2475, "step": 81602 }, { "epoch": 0.14469063697107837, "grad_norm": 0.390625, "learning_rate": 0.0003002055942246427, "loss": 0.1201, "step": 81604 }, { "epoch": 0.14469418313638818, "grad_norm": 0.365234375, "learning_rate": 0.0003001783027563425, "loss": 0.1773, "step": 81606 }, { "epoch": 0.144697729301698, "grad_norm": 0.1669921875, "learning_rate": 0.00030015101478599775, "loss": 0.1648, "step": 81608 }, { "epoch": 0.1447012754670078, "grad_norm": 0.1669921875, "learning_rate": 0.00030012373031372807, "loss": 0.2076, "step": 81610 }, { "epoch": 0.14470482163231763, "grad_norm": 0.494140625, "learning_rate": 0.0003000964493396525, "loss": 0.1689, "step": 81612 }, { "epoch": 0.14470836779762744, "grad_norm": 0.8203125, "learning_rate": 0.00030006917186389045, "loss": 0.1519, "step": 81614 }, { "epoch": 0.14471191396293726, "grad_norm": 0.486328125, "learning_rate": 0.00030004189788656097, "loss": 0.3183, "step": 81616 }, { "epoch": 0.14471546012824707, "grad_norm": 0.52734375, "learning_rate": 0.0003000146274077837, "loss": 0.1741, "step": 81618 }, { "epoch": 0.14471900629355688, "grad_norm": 0.451171875, "learning_rate": 0.00029998736042767776, "loss": 0.2698, "step": 81620 }, { "epoch": 0.1447225524588667, "grad_norm": 0.6015625, "learning_rate": 0.00029996009694636255, "loss": 0.1505, "step": 81622 }, { "epoch": 0.1447260986241765, "grad_norm": 0.38671875, "learning_rate": 0.00029993283696395706, "loss": 0.118, "step": 81624 }, { "epoch": 0.14472964478948633, "grad_norm": 0.25390625, "learning_rate": 0.0002999055804805805, "loss": 0.1893, "step": 81626 }, { "epoch": 0.14473319095479614, "grad_norm": 0.7578125, "learning_rate": 0.0002998783274963524, "loss": 0.1692, "step": 81628 }, { "epoch": 0.14473673712010596, "grad_norm": 0.392578125, "learning_rate": 0.00029985107801139147, "loss": 0.1651, "step": 81630 }, { "epoch": 0.14474028328541577, "grad_norm": 0.326171875, "learning_rate": 0.0002998238320258173, "loss": 0.2708, "step": 81632 }, { "epoch": 0.1447438294507256, "grad_norm": 1.3359375, "learning_rate": 0.00029979658953974885, "loss": 0.1775, "step": 81634 }, { "epoch": 0.1447473756160354, "grad_norm": 0.5625, "learning_rate": 0.0002997693505533051, "loss": 0.1628, "step": 81636 }, { "epoch": 0.14475092178134522, "grad_norm": 0.7578125, "learning_rate": 0.0002997421150666058, "loss": 0.1591, "step": 81638 }, { "epoch": 0.14475446794665503, "grad_norm": 0.25390625, "learning_rate": 0.00029971488307976927, "loss": 0.244, "step": 81640 }, { "epoch": 0.14475801411196484, "grad_norm": 0.2001953125, "learning_rate": 0.00029968765459291517, "loss": 0.2545, "step": 81642 }, { "epoch": 0.14476156027727466, "grad_norm": 0.61328125, "learning_rate": 0.0002996604296061623, "loss": 0.1292, "step": 81644 }, { "epoch": 0.14476510644258447, "grad_norm": 0.47265625, "learning_rate": 0.0002996332081196301, "loss": 0.1794, "step": 81646 }, { "epoch": 0.1447686526078943, "grad_norm": 0.90234375, "learning_rate": 0.0002996059901334369, "loss": 0.1899, "step": 81648 }, { "epoch": 0.1447721987732041, "grad_norm": 0.328125, "learning_rate": 0.0002995787756477025, "loss": 0.1618, "step": 81650 }, { "epoch": 0.14477574493851392, "grad_norm": 0.34765625, "learning_rate": 0.0002995515646625455, "loss": 0.1538, "step": 81652 }, { "epoch": 0.14477929110382373, "grad_norm": 0.328125, "learning_rate": 0.0002995243571780851, "loss": 0.2233, "step": 81654 }, { "epoch": 0.14478283726913355, "grad_norm": 0.515625, "learning_rate": 0.00029949715319444, "loss": 0.2445, "step": 81656 }, { "epoch": 0.14478638343444336, "grad_norm": 0.400390625, "learning_rate": 0.0002994699527117294, "loss": 0.2053, "step": 81658 }, { "epoch": 0.14478992959975318, "grad_norm": 0.1826171875, "learning_rate": 0.00029944275573007224, "loss": 0.1713, "step": 81660 }, { "epoch": 0.144793475765063, "grad_norm": 1.7734375, "learning_rate": 0.0002994155622495875, "loss": 0.3839, "step": 81662 }, { "epoch": 0.1447970219303728, "grad_norm": 0.28125, "learning_rate": 0.0002993883722703941, "loss": 0.1951, "step": 81664 }, { "epoch": 0.14480056809568262, "grad_norm": 0.384765625, "learning_rate": 0.0002993611857926107, "loss": 0.2023, "step": 81666 }, { "epoch": 0.14480411426099246, "grad_norm": 0.84765625, "learning_rate": 0.00029933400281635653, "loss": 0.2149, "step": 81668 }, { "epoch": 0.14480766042630228, "grad_norm": 0.443359375, "learning_rate": 0.0002993068233417505, "loss": 0.1833, "step": 81670 }, { "epoch": 0.1448112065916121, "grad_norm": 0.625, "learning_rate": 0.00029927964736891125, "loss": 0.2631, "step": 81672 }, { "epoch": 0.1448147527569219, "grad_norm": 1.3203125, "learning_rate": 0.0002992524748979576, "loss": 0.1845, "step": 81674 }, { "epoch": 0.14481829892223172, "grad_norm": 0.4453125, "learning_rate": 0.00029922530592900844, "loss": 0.1953, "step": 81676 }, { "epoch": 0.14482184508754153, "grad_norm": 1.375, "learning_rate": 0.000299198140462183, "loss": 0.1961, "step": 81678 }, { "epoch": 0.14482539125285135, "grad_norm": 0.2001953125, "learning_rate": 0.00029917097849759927, "loss": 0.1924, "step": 81680 }, { "epoch": 0.14482893741816116, "grad_norm": 0.77734375, "learning_rate": 0.00029914382003537677, "loss": 0.1737, "step": 81682 }, { "epoch": 0.14483248358347098, "grad_norm": 0.55078125, "learning_rate": 0.0002991166650756338, "loss": 0.2033, "step": 81684 }, { "epoch": 0.1448360297487808, "grad_norm": 0.6015625, "learning_rate": 0.00029908951361848975, "loss": 0.1414, "step": 81686 }, { "epoch": 0.1448395759140906, "grad_norm": 0.27734375, "learning_rate": 0.0002990623656640625, "loss": 0.1439, "step": 81688 }, { "epoch": 0.14484312207940042, "grad_norm": 0.443359375, "learning_rate": 0.00029903522121247153, "loss": 0.1691, "step": 81690 }, { "epoch": 0.14484666824471024, "grad_norm": 0.77734375, "learning_rate": 0.00029900808026383504, "loss": 0.2071, "step": 81692 }, { "epoch": 0.14485021441002005, "grad_norm": 0.51953125, "learning_rate": 0.00029898094281827214, "loss": 0.1355, "step": 81694 }, { "epoch": 0.14485376057532987, "grad_norm": 0.69921875, "learning_rate": 0.0002989538088759012, "loss": 0.2593, "step": 81696 }, { "epoch": 0.14485730674063968, "grad_norm": 0.251953125, "learning_rate": 0.00029892667843684086, "loss": 0.1579, "step": 81698 }, { "epoch": 0.1448608529059495, "grad_norm": 0.7265625, "learning_rate": 0.0002988995515012101, "loss": 0.2163, "step": 81700 }, { "epoch": 0.1448643990712593, "grad_norm": 0.19140625, "learning_rate": 0.00029887242806912723, "loss": 0.1309, "step": 81702 }, { "epoch": 0.14486794523656912, "grad_norm": 0.91015625, "learning_rate": 0.00029884530814071114, "loss": 0.2096, "step": 81704 }, { "epoch": 0.14487149140187894, "grad_norm": 0.2265625, "learning_rate": 0.00029881819171608014, "loss": 0.1743, "step": 81706 }, { "epoch": 0.14487503756718875, "grad_norm": 0.314453125, "learning_rate": 0.0002987910787953531, "loss": 0.1416, "step": 81708 }, { "epoch": 0.14487858373249857, "grad_norm": 0.8359375, "learning_rate": 0.00029876396937864855, "loss": 0.1344, "step": 81710 }, { "epoch": 0.14488212989780838, "grad_norm": 0.251953125, "learning_rate": 0.0002987368634660849, "loss": 0.1224, "step": 81712 }, { "epoch": 0.1448856760631182, "grad_norm": 0.91796875, "learning_rate": 0.0002987097610577808, "loss": 0.1918, "step": 81714 }, { "epoch": 0.144889222228428, "grad_norm": 0.5546875, "learning_rate": 0.0002986826621538546, "loss": 0.1854, "step": 81716 }, { "epoch": 0.14489276839373783, "grad_norm": 0.76953125, "learning_rate": 0.00029865556675442523, "loss": 0.1991, "step": 81718 }, { "epoch": 0.14489631455904764, "grad_norm": 0.318359375, "learning_rate": 0.00029862847485961084, "loss": 0.1981, "step": 81720 }, { "epoch": 0.14489986072435745, "grad_norm": 0.287109375, "learning_rate": 0.00029860138646953005, "loss": 0.1943, "step": 81722 }, { "epoch": 0.14490340688966727, "grad_norm": 0.345703125, "learning_rate": 0.00029857430158430106, "loss": 0.1398, "step": 81724 }, { "epoch": 0.14490695305497708, "grad_norm": 0.3046875, "learning_rate": 0.00029854722020404274, "loss": 0.1549, "step": 81726 }, { "epoch": 0.1449104992202869, "grad_norm": 0.55859375, "learning_rate": 0.0002985201423288735, "loss": 0.2137, "step": 81728 }, { "epoch": 0.1449140453855967, "grad_norm": 0.41796875, "learning_rate": 0.0002984930679589114, "loss": 0.1599, "step": 81730 }, { "epoch": 0.14491759155090653, "grad_norm": 0.201171875, "learning_rate": 0.000298465997094275, "loss": 0.1411, "step": 81732 }, { "epoch": 0.14492113771621634, "grad_norm": 0.267578125, "learning_rate": 0.00029843892973508275, "loss": 0.1759, "step": 81734 }, { "epoch": 0.14492468388152616, "grad_norm": 0.84765625, "learning_rate": 0.0002984118658814533, "loss": 0.2253, "step": 81736 }, { "epoch": 0.14492823004683597, "grad_norm": 0.283203125, "learning_rate": 0.0002983848055335044, "loss": 0.1467, "step": 81738 }, { "epoch": 0.14493177621214579, "grad_norm": 0.5546875, "learning_rate": 0.0002983577486913549, "loss": 0.211, "step": 81740 }, { "epoch": 0.1449353223774556, "grad_norm": 0.482421875, "learning_rate": 0.0002983306953551228, "loss": 0.171, "step": 81742 }, { "epoch": 0.14493886854276541, "grad_norm": 0.75390625, "learning_rate": 0.00029830364552492684, "loss": 0.2545, "step": 81744 }, { "epoch": 0.14494241470807523, "grad_norm": 0.71484375, "learning_rate": 0.0002982765992008848, "loss": 0.132, "step": 81746 }, { "epoch": 0.14494596087338504, "grad_norm": 0.318359375, "learning_rate": 0.00029824955638311536, "loss": 0.1605, "step": 81748 }, { "epoch": 0.14494950703869486, "grad_norm": 0.90625, "learning_rate": 0.00029822251707173667, "loss": 0.1866, "step": 81750 }, { "epoch": 0.14495305320400467, "grad_norm": 0.58984375, "learning_rate": 0.0002981954812668669, "loss": 0.1831, "step": 81752 }, { "epoch": 0.1449565993693145, "grad_norm": 0.490234375, "learning_rate": 0.00029816844896862434, "loss": 0.1244, "step": 81754 }, { "epoch": 0.1449601455346243, "grad_norm": 0.1962890625, "learning_rate": 0.00029814142017712706, "loss": 0.136, "step": 81756 }, { "epoch": 0.14496369169993412, "grad_norm": 0.90234375, "learning_rate": 0.0002981143948924937, "loss": 0.1876, "step": 81758 }, { "epoch": 0.14496723786524396, "grad_norm": 0.2734375, "learning_rate": 0.00029808737311484213, "loss": 0.1726, "step": 81760 }, { "epoch": 0.14497078403055377, "grad_norm": 0.67578125, "learning_rate": 0.0002980603548442906, "loss": 0.2657, "step": 81762 }, { "epoch": 0.1449743301958636, "grad_norm": 0.259765625, "learning_rate": 0.00029803334008095706, "loss": 0.3019, "step": 81764 }, { "epoch": 0.1449778763611734, "grad_norm": 0.6796875, "learning_rate": 0.0002980063288249599, "loss": 0.168, "step": 81766 }, { "epoch": 0.14498142252648322, "grad_norm": 0.546875, "learning_rate": 0.00029797932107641743, "loss": 0.185, "step": 81768 }, { "epoch": 0.14498496869179303, "grad_norm": 0.70703125, "learning_rate": 0.00029795231683544745, "loss": 0.1895, "step": 81770 }, { "epoch": 0.14498851485710285, "grad_norm": 0.431640625, "learning_rate": 0.0002979253161021681, "loss": 0.1959, "step": 81772 }, { "epoch": 0.14499206102241266, "grad_norm": 2.640625, "learning_rate": 0.00029789831887669745, "loss": 0.2476, "step": 81774 }, { "epoch": 0.14499560718772248, "grad_norm": 0.263671875, "learning_rate": 0.00029787132515915395, "loss": 0.1825, "step": 81776 }, { "epoch": 0.1449991533530323, "grad_norm": 0.318359375, "learning_rate": 0.00029784433494965494, "loss": 0.1712, "step": 81778 }, { "epoch": 0.1450026995183421, "grad_norm": 0.73046875, "learning_rate": 0.00029781734824831904, "loss": 0.175, "step": 81780 }, { "epoch": 0.14500624568365192, "grad_norm": 2.40625, "learning_rate": 0.00029779036505526394, "loss": 0.2876, "step": 81782 }, { "epoch": 0.14500979184896173, "grad_norm": 0.455078125, "learning_rate": 0.0002977633853706081, "loss": 0.1312, "step": 81784 }, { "epoch": 0.14501333801427155, "grad_norm": 0.353515625, "learning_rate": 0.00029773640919446905, "loss": 0.2417, "step": 81786 }, { "epoch": 0.14501688417958136, "grad_norm": 0.6796875, "learning_rate": 0.0002977094365269649, "loss": 0.2178, "step": 81788 }, { "epoch": 0.14502043034489118, "grad_norm": 0.23046875, "learning_rate": 0.0002976824673682138, "loss": 0.1575, "step": 81790 }, { "epoch": 0.145023976510201, "grad_norm": 0.1875, "learning_rate": 0.00029765550171833346, "loss": 0.2087, "step": 81792 }, { "epoch": 0.1450275226755108, "grad_norm": 0.416015625, "learning_rate": 0.00029762853957744195, "loss": 0.1317, "step": 81794 }, { "epoch": 0.14503106884082062, "grad_norm": 0.75, "learning_rate": 0.0002976015809456569, "loss": 0.2428, "step": 81796 }, { "epoch": 0.14503461500613044, "grad_norm": 0.5078125, "learning_rate": 0.00029757462582309657, "loss": 0.1702, "step": 81798 }, { "epoch": 0.14503816117144025, "grad_norm": 0.50390625, "learning_rate": 0.00029754767420987895, "loss": 0.1352, "step": 81800 }, { "epoch": 0.14504170733675006, "grad_norm": 0.322265625, "learning_rate": 0.0002975207261061215, "loss": 0.1586, "step": 81802 }, { "epoch": 0.14504525350205988, "grad_norm": 0.50390625, "learning_rate": 0.0002974937815119423, "loss": 0.2114, "step": 81804 }, { "epoch": 0.1450487996673697, "grad_norm": 0.58203125, "learning_rate": 0.000297466840427459, "loss": 0.1801, "step": 81806 }, { "epoch": 0.1450523458326795, "grad_norm": 1.25, "learning_rate": 0.0002974399028527896, "loss": 0.1605, "step": 81808 }, { "epoch": 0.14505589199798932, "grad_norm": 0.412109375, "learning_rate": 0.00029741296878805205, "loss": 0.2005, "step": 81810 }, { "epoch": 0.14505943816329914, "grad_norm": 0.5078125, "learning_rate": 0.0002973860382333639, "loss": 0.1734, "step": 81812 }, { "epoch": 0.14506298432860895, "grad_norm": 1.3828125, "learning_rate": 0.0002973591111888429, "loss": 0.2404, "step": 81814 }, { "epoch": 0.14506653049391877, "grad_norm": 1.125, "learning_rate": 0.000297332187654607, "loss": 0.1996, "step": 81816 }, { "epoch": 0.14507007665922858, "grad_norm": 0.2119140625, "learning_rate": 0.0002973052676307739, "loss": 0.1632, "step": 81818 }, { "epoch": 0.1450736228245384, "grad_norm": 0.64453125, "learning_rate": 0.00029727835111746133, "loss": 0.1841, "step": 81820 }, { "epoch": 0.1450771689898482, "grad_norm": 0.41015625, "learning_rate": 0.000297251438114787, "loss": 0.1597, "step": 81822 }, { "epoch": 0.14508071515515802, "grad_norm": 0.33203125, "learning_rate": 0.00029722452862286836, "loss": 0.1973, "step": 81824 }, { "epoch": 0.14508426132046784, "grad_norm": 0.150390625, "learning_rate": 0.0002971976226418237, "loss": 0.158, "step": 81826 }, { "epoch": 0.14508780748577765, "grad_norm": 0.8828125, "learning_rate": 0.00029717072017176996, "loss": 0.2206, "step": 81828 }, { "epoch": 0.14509135365108747, "grad_norm": 0.423828125, "learning_rate": 0.0002971438212128254, "loss": 0.173, "step": 81830 }, { "epoch": 0.14509489981639728, "grad_norm": 0.6328125, "learning_rate": 0.00029711692576510714, "loss": 0.2119, "step": 81832 }, { "epoch": 0.1450984459817071, "grad_norm": 0.34375, "learning_rate": 0.0002970900338287334, "loss": 0.1998, "step": 81834 }, { "epoch": 0.1451019921470169, "grad_norm": 0.5234375, "learning_rate": 0.00029706314540382117, "loss": 0.1777, "step": 81836 }, { "epoch": 0.14510553831232673, "grad_norm": 1.1796875, "learning_rate": 0.0002970362604904885, "loss": 0.1954, "step": 81838 }, { "epoch": 0.14510908447763654, "grad_norm": 1.796875, "learning_rate": 0.0002970093790888528, "loss": 0.3683, "step": 81840 }, { "epoch": 0.14511263064294636, "grad_norm": 0.498046875, "learning_rate": 0.00029698250119903166, "loss": 0.1691, "step": 81842 }, { "epoch": 0.14511617680825617, "grad_norm": 0.1328125, "learning_rate": 0.0002969556268211426, "loss": 0.1539, "step": 81844 }, { "epoch": 0.14511972297356598, "grad_norm": 0.60546875, "learning_rate": 0.000296928755955303, "loss": 0.1657, "step": 81846 }, { "epoch": 0.1451232691388758, "grad_norm": 0.216796875, "learning_rate": 0.0002969018886016307, "loss": 0.158, "step": 81848 }, { "epoch": 0.14512681530418564, "grad_norm": 0.474609375, "learning_rate": 0.00029687502476024316, "loss": 0.1432, "step": 81850 }, { "epoch": 0.14513036146949546, "grad_norm": 0.875, "learning_rate": 0.0002968481644312576, "loss": 0.1941, "step": 81852 }, { "epoch": 0.14513390763480527, "grad_norm": 0.1474609375, "learning_rate": 0.00029682130761479156, "loss": 0.2812, "step": 81854 }, { "epoch": 0.14513745380011508, "grad_norm": 0.427734375, "learning_rate": 0.0002967944543109627, "loss": 0.1712, "step": 81856 }, { "epoch": 0.1451409999654249, "grad_norm": 1.5859375, "learning_rate": 0.00029676760451988833, "loss": 0.3507, "step": 81858 }, { "epoch": 0.14514454613073471, "grad_norm": 1.3203125, "learning_rate": 0.00029674075824168595, "loss": 0.203, "step": 81860 }, { "epoch": 0.14514809229604453, "grad_norm": 0.388671875, "learning_rate": 0.0002967139154764729, "loss": 0.1903, "step": 81862 }, { "epoch": 0.14515163846135434, "grad_norm": 0.6015625, "learning_rate": 0.00029668707622436624, "loss": 0.2176, "step": 81864 }, { "epoch": 0.14515518462666416, "grad_norm": 0.39453125, "learning_rate": 0.0002966602404854842, "loss": 0.1936, "step": 81866 }, { "epoch": 0.14515873079197397, "grad_norm": 0.625, "learning_rate": 0.00029663340825994324, "loss": 0.3416, "step": 81868 }, { "epoch": 0.1451622769572838, "grad_norm": 0.3828125, "learning_rate": 0.0002966065795478613, "loss": 0.1595, "step": 81870 }, { "epoch": 0.1451658231225936, "grad_norm": 0.447265625, "learning_rate": 0.00029657975434935533, "loss": 0.1352, "step": 81872 }, { "epoch": 0.14516936928790342, "grad_norm": 0.48046875, "learning_rate": 0.00029655293266454315, "loss": 0.1448, "step": 81874 }, { "epoch": 0.14517291545321323, "grad_norm": 0.38671875, "learning_rate": 0.00029652611449354147, "loss": 0.17, "step": 81876 }, { "epoch": 0.14517646161852304, "grad_norm": 0.171875, "learning_rate": 0.00029649929983646797, "loss": 0.1509, "step": 81878 }, { "epoch": 0.14518000778383286, "grad_norm": 0.26953125, "learning_rate": 0.00029647248869343987, "loss": 0.1727, "step": 81880 }, { "epoch": 0.14518355394914267, "grad_norm": 0.62890625, "learning_rate": 0.0002964456810645741, "loss": 0.1981, "step": 81882 }, { "epoch": 0.1451871001144525, "grad_norm": 1.34375, "learning_rate": 0.0002964188769499885, "loss": 0.1844, "step": 81884 }, { "epoch": 0.1451906462797623, "grad_norm": 0.3125, "learning_rate": 0.0002963920763497997, "loss": 0.2072, "step": 81886 }, { "epoch": 0.14519419244507212, "grad_norm": 0.42578125, "learning_rate": 0.00029636527926412527, "loss": 0.244, "step": 81888 }, { "epoch": 0.14519773861038193, "grad_norm": 0.94140625, "learning_rate": 0.0002963384856930821, "loss": 0.2204, "step": 81890 }, { "epoch": 0.14520128477569175, "grad_norm": 0.47265625, "learning_rate": 0.0002963116956367881, "loss": 0.1614, "step": 81892 }, { "epoch": 0.14520483094100156, "grad_norm": 0.466796875, "learning_rate": 0.0002962849090953594, "loss": 0.182, "step": 81894 }, { "epoch": 0.14520837710631138, "grad_norm": 0.349609375, "learning_rate": 0.00029625812606891386, "loss": 0.1387, "step": 81896 }, { "epoch": 0.1452119232716212, "grad_norm": 0.828125, "learning_rate": 0.0002962313465575683, "loss": 0.1735, "step": 81898 }, { "epoch": 0.145215469436931, "grad_norm": 0.57421875, "learning_rate": 0.00029620457056144005, "loss": 0.1442, "step": 81900 }, { "epoch": 0.14521901560224082, "grad_norm": 0.330078125, "learning_rate": 0.00029617779808064617, "loss": 0.1822, "step": 81902 }, { "epoch": 0.14522256176755063, "grad_norm": 0.53515625, "learning_rate": 0.0002961510291153034, "loss": 0.1613, "step": 81904 }, { "epoch": 0.14522610793286045, "grad_norm": 1.0390625, "learning_rate": 0.00029612426366552934, "loss": 0.1606, "step": 81906 }, { "epoch": 0.14522965409817026, "grad_norm": 0.2578125, "learning_rate": 0.0002960975017314408, "loss": 0.1478, "step": 81908 }, { "epoch": 0.14523320026348008, "grad_norm": 0.333984375, "learning_rate": 0.0002960707433131548, "loss": 0.1865, "step": 81910 }, { "epoch": 0.1452367464287899, "grad_norm": 0.453125, "learning_rate": 0.00029604398841078823, "loss": 0.2149, "step": 81912 }, { "epoch": 0.1452402925940997, "grad_norm": 0.3828125, "learning_rate": 0.0002960172370244584, "loss": 0.1494, "step": 81914 }, { "epoch": 0.14524383875940952, "grad_norm": 0.69921875, "learning_rate": 0.0002959904891542823, "loss": 0.2866, "step": 81916 }, { "epoch": 0.14524738492471934, "grad_norm": 0.416015625, "learning_rate": 0.0002959637448003766, "loss": 0.1422, "step": 81918 }, { "epoch": 0.14525093109002915, "grad_norm": 0.67578125, "learning_rate": 0.0002959370039628585, "loss": 0.2269, "step": 81920 }, { "epoch": 0.14525447725533897, "grad_norm": 0.5390625, "learning_rate": 0.0002959102666418447, "loss": 0.1716, "step": 81922 }, { "epoch": 0.14525802342064878, "grad_norm": 0.78125, "learning_rate": 0.00029588353283745275, "loss": 0.2479, "step": 81924 }, { "epoch": 0.1452615695859586, "grad_norm": 0.4296875, "learning_rate": 0.00029585680254979875, "loss": 0.1435, "step": 81926 }, { "epoch": 0.1452651157512684, "grad_norm": 0.439453125, "learning_rate": 0.0002958300757790001, "loss": 0.1808, "step": 81928 }, { "epoch": 0.14526866191657822, "grad_norm": 0.29296875, "learning_rate": 0.00029580335252517347, "loss": 0.1819, "step": 81930 }, { "epoch": 0.14527220808188804, "grad_norm": 3.953125, "learning_rate": 0.00029577663278843627, "loss": 0.2351, "step": 81932 }, { "epoch": 0.14527575424719785, "grad_norm": 0.84375, "learning_rate": 0.0002957499165689045, "loss": 0.1988, "step": 81934 }, { "epoch": 0.14527930041250767, "grad_norm": 0.78125, "learning_rate": 0.00029572320386669557, "loss": 0.183, "step": 81936 }, { "epoch": 0.14528284657781748, "grad_norm": 0.9375, "learning_rate": 0.00029569649468192614, "loss": 0.232, "step": 81938 }, { "epoch": 0.1452863927431273, "grad_norm": 0.2158203125, "learning_rate": 0.00029566978901471313, "loss": 0.094, "step": 81940 }, { "epoch": 0.14528993890843714, "grad_norm": 0.62890625, "learning_rate": 0.00029564308686517326, "loss": 0.2204, "step": 81942 }, { "epoch": 0.14529348507374695, "grad_norm": 0.35546875, "learning_rate": 0.0002956163882334231, "loss": 0.1892, "step": 81944 }, { "epoch": 0.14529703123905677, "grad_norm": 0.296875, "learning_rate": 0.00029558969311957976, "loss": 0.2429, "step": 81946 }, { "epoch": 0.14530057740436658, "grad_norm": 0.4375, "learning_rate": 0.00029556300152375995, "loss": 0.1308, "step": 81948 }, { "epoch": 0.1453041235696764, "grad_norm": 2.265625, "learning_rate": 0.0002955363134460802, "loss": 0.2631, "step": 81950 }, { "epoch": 0.1453076697349862, "grad_norm": 0.609375, "learning_rate": 0.00029550962888665717, "loss": 0.2735, "step": 81952 }, { "epoch": 0.14531121590029603, "grad_norm": 0.58984375, "learning_rate": 0.0002954829478456079, "loss": 0.2133, "step": 81954 }, { "epoch": 0.14531476206560584, "grad_norm": 0.28515625, "learning_rate": 0.00029545627032304887, "loss": 0.1468, "step": 81956 }, { "epoch": 0.14531830823091565, "grad_norm": 3.765625, "learning_rate": 0.00029542959631909685, "loss": 0.183, "step": 81958 }, { "epoch": 0.14532185439622547, "grad_norm": 1.0390625, "learning_rate": 0.0002954029258338684, "loss": 0.1719, "step": 81960 }, { "epoch": 0.14532540056153528, "grad_norm": 0.734375, "learning_rate": 0.00029537625886748, "loss": 0.1796, "step": 81962 }, { "epoch": 0.1453289467268451, "grad_norm": 0.3828125, "learning_rate": 0.00029534959542004875, "loss": 0.1645, "step": 81964 }, { "epoch": 0.1453324928921549, "grad_norm": 0.55859375, "learning_rate": 0.0002953229354916907, "loss": 0.1888, "step": 81966 }, { "epoch": 0.14533603905746473, "grad_norm": 0.234375, "learning_rate": 0.00029529627908252295, "loss": 0.1729, "step": 81968 }, { "epoch": 0.14533958522277454, "grad_norm": 0.322265625, "learning_rate": 0.00029526962619266166, "loss": 0.2507, "step": 81970 }, { "epoch": 0.14534313138808436, "grad_norm": 0.5703125, "learning_rate": 0.00029524297682222356, "loss": 0.2121, "step": 81972 }, { "epoch": 0.14534667755339417, "grad_norm": 0.365234375, "learning_rate": 0.0002952163309713253, "loss": 0.1751, "step": 81974 }, { "epoch": 0.14535022371870399, "grad_norm": 0.2353515625, "learning_rate": 0.0002951896886400835, "loss": 0.1608, "step": 81976 }, { "epoch": 0.1453537698840138, "grad_norm": 0.51953125, "learning_rate": 0.0002951630498286144, "loss": 0.2055, "step": 81978 }, { "epoch": 0.14535731604932361, "grad_norm": 0.291015625, "learning_rate": 0.0002951364145370344, "loss": 0.1508, "step": 81980 }, { "epoch": 0.14536086221463343, "grad_norm": 0.482421875, "learning_rate": 0.0002951097827654605, "loss": 0.1644, "step": 81982 }, { "epoch": 0.14536440837994324, "grad_norm": 0.3671875, "learning_rate": 0.0002950831545140087, "loss": 0.2123, "step": 81984 }, { "epoch": 0.14536795454525306, "grad_norm": 0.203125, "learning_rate": 0.0002950565297827957, "loss": 0.1529, "step": 81986 }, { "epoch": 0.14537150071056287, "grad_norm": 0.408203125, "learning_rate": 0.00029502990857193794, "loss": 0.1643, "step": 81988 }, { "epoch": 0.1453750468758727, "grad_norm": 0.28125, "learning_rate": 0.00029500329088155176, "loss": 0.1783, "step": 81990 }, { "epoch": 0.1453785930411825, "grad_norm": 0.259765625, "learning_rate": 0.00029497667671175344, "loss": 0.1763, "step": 81992 }, { "epoch": 0.14538213920649232, "grad_norm": 0.5234375, "learning_rate": 0.0002949500660626595, "loss": 0.2007, "step": 81994 }, { "epoch": 0.14538568537180213, "grad_norm": 0.7265625, "learning_rate": 0.0002949234589343865, "loss": 0.2142, "step": 81996 }, { "epoch": 0.14538923153711195, "grad_norm": 0.275390625, "learning_rate": 0.00029489685532705067, "loss": 0.2376, "step": 81998 }, { "epoch": 0.14539277770242176, "grad_norm": 0.349609375, "learning_rate": 0.0002948702552407683, "loss": 0.168, "step": 82000 }, { "epoch": 0.14539632386773158, "grad_norm": 0.71484375, "learning_rate": 0.00029484365867565556, "loss": 0.2068, "step": 82002 }, { "epoch": 0.1453998700330414, "grad_norm": 0.39453125, "learning_rate": 0.00029481706563182913, "loss": 0.223, "step": 82004 }, { "epoch": 0.1454034161983512, "grad_norm": 0.578125, "learning_rate": 0.0002947904761094052, "loss": 0.1625, "step": 82006 }, { "epoch": 0.14540696236366102, "grad_norm": 0.392578125, "learning_rate": 0.0002947638901085, "loss": 0.1649, "step": 82008 }, { "epoch": 0.14541050852897083, "grad_norm": 0.322265625, "learning_rate": 0.00029473730762922986, "loss": 0.1613, "step": 82010 }, { "epoch": 0.14541405469428065, "grad_norm": 0.333984375, "learning_rate": 0.00029471072867171065, "loss": 0.2039, "step": 82012 }, { "epoch": 0.14541760085959046, "grad_norm": 0.41796875, "learning_rate": 0.00029468415323605945, "loss": 0.1655, "step": 82014 }, { "epoch": 0.14542114702490028, "grad_norm": 0.380859375, "learning_rate": 0.00029465758132239157, "loss": 0.2029, "step": 82016 }, { "epoch": 0.1454246931902101, "grad_norm": 0.5703125, "learning_rate": 0.0002946310129308238, "loss": 0.1534, "step": 82018 }, { "epoch": 0.1454282393555199, "grad_norm": 0.28515625, "learning_rate": 0.000294604448061472, "loss": 0.1344, "step": 82020 }, { "epoch": 0.14543178552082972, "grad_norm": 2.15625, "learning_rate": 0.00029457788671445286, "loss": 0.3098, "step": 82022 }, { "epoch": 0.14543533168613954, "grad_norm": 0.470703125, "learning_rate": 0.0002945513288898819, "loss": 0.1275, "step": 82024 }, { "epoch": 0.14543887785144935, "grad_norm": 0.45703125, "learning_rate": 0.0002945247745878756, "loss": 0.2327, "step": 82026 }, { "epoch": 0.14544242401675916, "grad_norm": 0.482421875, "learning_rate": 0.00029449822380855017, "loss": 0.1666, "step": 82028 }, { "epoch": 0.14544597018206898, "grad_norm": 0.380859375, "learning_rate": 0.00029447167655202163, "loss": 0.1497, "step": 82030 }, { "epoch": 0.14544951634737882, "grad_norm": 1.84375, "learning_rate": 0.000294445132818406, "loss": 0.2769, "step": 82032 }, { "epoch": 0.14545306251268864, "grad_norm": 0.265625, "learning_rate": 0.0002944185926078193, "loss": 0.1098, "step": 82034 }, { "epoch": 0.14545660867799845, "grad_norm": 0.54296875, "learning_rate": 0.00029439205592037796, "loss": 0.2289, "step": 82036 }, { "epoch": 0.14546015484330826, "grad_norm": 0.287109375, "learning_rate": 0.00029436552275619775, "loss": 0.2122, "step": 82038 }, { "epoch": 0.14546370100861808, "grad_norm": 0.490234375, "learning_rate": 0.00029433899311539477, "loss": 0.2124, "step": 82040 }, { "epoch": 0.1454672471739279, "grad_norm": 0.369140625, "learning_rate": 0.00029431246699808503, "loss": 0.252, "step": 82042 }, { "epoch": 0.1454707933392377, "grad_norm": 0.439453125, "learning_rate": 0.0002942859444043845, "loss": 0.1642, "step": 82044 }, { "epoch": 0.14547433950454752, "grad_norm": 0.671875, "learning_rate": 0.0002942594253344093, "loss": 0.2339, "step": 82046 }, { "epoch": 0.14547788566985734, "grad_norm": 0.158203125, "learning_rate": 0.00029423290978827543, "loss": 0.1657, "step": 82048 }, { "epoch": 0.14548143183516715, "grad_norm": 0.9609375, "learning_rate": 0.00029420639776609874, "loss": 0.2429, "step": 82050 }, { "epoch": 0.14548497800047697, "grad_norm": 0.35546875, "learning_rate": 0.00029417988926799506, "loss": 0.1488, "step": 82052 }, { "epoch": 0.14548852416578678, "grad_norm": 0.46875, "learning_rate": 0.00029415338429408057, "loss": 0.2228, "step": 82054 }, { "epoch": 0.1454920703310966, "grad_norm": 0.279296875, "learning_rate": 0.0002941268828444711, "loss": 0.1866, "step": 82056 }, { "epoch": 0.1454956164964064, "grad_norm": 0.431640625, "learning_rate": 0.00029410038491928253, "loss": 0.142, "step": 82058 }, { "epoch": 0.14549916266171622, "grad_norm": 0.2197265625, "learning_rate": 0.0002940738905186306, "loss": 0.1767, "step": 82060 }, { "epoch": 0.14550270882702604, "grad_norm": 0.64453125, "learning_rate": 0.0002940473996426315, "loss": 0.1644, "step": 82062 }, { "epoch": 0.14550625499233585, "grad_norm": 0.482421875, "learning_rate": 0.000294020912291401, "loss": 0.1376, "step": 82064 }, { "epoch": 0.14550980115764567, "grad_norm": 0.4140625, "learning_rate": 0.00029399442846505473, "loss": 0.2106, "step": 82066 }, { "epoch": 0.14551334732295548, "grad_norm": 2.203125, "learning_rate": 0.00029396794816370875, "loss": 0.1959, "step": 82068 }, { "epoch": 0.1455168934882653, "grad_norm": 0.7421875, "learning_rate": 0.0002939414713874785, "loss": 0.206, "step": 82070 }, { "epoch": 0.1455204396535751, "grad_norm": 0.4296875, "learning_rate": 0.0002939149981364804, "loss": 0.1739, "step": 82072 }, { "epoch": 0.14552398581888493, "grad_norm": 0.3203125, "learning_rate": 0.0002938885284108296, "loss": 0.2035, "step": 82074 }, { "epoch": 0.14552753198419474, "grad_norm": 0.435546875, "learning_rate": 0.0002938620622106423, "loss": 0.1726, "step": 82076 }, { "epoch": 0.14553107814950456, "grad_norm": 0.392578125, "learning_rate": 0.0002938355995360339, "loss": 0.1751, "step": 82078 }, { "epoch": 0.14553462431481437, "grad_norm": 1.0625, "learning_rate": 0.00029380914038712057, "loss": 0.2149, "step": 82080 }, { "epoch": 0.14553817048012418, "grad_norm": 0.2890625, "learning_rate": 0.0002937826847640175, "loss": 0.1876, "step": 82082 }, { "epoch": 0.145541716645434, "grad_norm": 0.408203125, "learning_rate": 0.0002937562326668408, "loss": 0.1603, "step": 82084 }, { "epoch": 0.14554526281074381, "grad_norm": 0.404296875, "learning_rate": 0.000293729784095706, "loss": 0.1968, "step": 82086 }, { "epoch": 0.14554880897605363, "grad_norm": 2.71875, "learning_rate": 0.0002937033390507289, "loss": 0.3452, "step": 82088 }, { "epoch": 0.14555235514136344, "grad_norm": 0.95703125, "learning_rate": 0.0002936768975320249, "loss": 0.2197, "step": 82090 }, { "epoch": 0.14555590130667326, "grad_norm": 0.734375, "learning_rate": 0.0002936504595397097, "loss": 0.2436, "step": 82092 }, { "epoch": 0.14555944747198307, "grad_norm": 0.328125, "learning_rate": 0.0002936240250738991, "loss": 0.1456, "step": 82094 }, { "epoch": 0.1455629936372929, "grad_norm": 0.52734375, "learning_rate": 0.0002935975941347087, "loss": 0.1684, "step": 82096 }, { "epoch": 0.1455665398026027, "grad_norm": 0.400390625, "learning_rate": 0.000293571166722254, "loss": 0.1674, "step": 82098 }, { "epoch": 0.14557008596791252, "grad_norm": 1.5234375, "learning_rate": 0.00029354474283665034, "loss": 0.3398, "step": 82100 }, { "epoch": 0.14557363213322233, "grad_norm": 0.232421875, "learning_rate": 0.00029351832247801374, "loss": 0.2121, "step": 82102 }, { "epoch": 0.14557717829853214, "grad_norm": 0.5703125, "learning_rate": 0.0002934919056464596, "loss": 0.1777, "step": 82104 }, { "epoch": 0.14558072446384196, "grad_norm": 0.578125, "learning_rate": 0.0002934654923421033, "loss": 0.1566, "step": 82106 }, { "epoch": 0.14558427062915177, "grad_norm": 0.3125, "learning_rate": 0.00029343908256506047, "loss": 0.2513, "step": 82108 }, { "epoch": 0.1455878167944616, "grad_norm": 0.2373046875, "learning_rate": 0.0002934126763154464, "loss": 0.1726, "step": 82110 }, { "epoch": 0.1455913629597714, "grad_norm": 1.53125, "learning_rate": 0.00029338627359337715, "loss": 0.2914, "step": 82112 }, { "epoch": 0.14559490912508122, "grad_norm": 0.2890625, "learning_rate": 0.00029335987439896744, "loss": 0.1259, "step": 82114 }, { "epoch": 0.14559845529039103, "grad_norm": 1.5, "learning_rate": 0.00029333347873233324, "loss": 0.1978, "step": 82116 }, { "epoch": 0.14560200145570085, "grad_norm": 0.62890625, "learning_rate": 0.0002933070865935897, "loss": 0.148, "step": 82118 }, { "epoch": 0.14560554762101066, "grad_norm": 0.63671875, "learning_rate": 0.0002932806979828527, "loss": 0.2214, "step": 82120 }, { "epoch": 0.1456090937863205, "grad_norm": 0.16796875, "learning_rate": 0.0002932543129002371, "loss": 0.1154, "step": 82122 }, { "epoch": 0.14561263995163032, "grad_norm": 0.25390625, "learning_rate": 0.0002932279313458585, "loss": 0.1627, "step": 82124 }, { "epoch": 0.14561618611694013, "grad_norm": 0.39453125, "learning_rate": 0.0002932015533198323, "loss": 0.1895, "step": 82126 }, { "epoch": 0.14561973228224995, "grad_norm": 0.2578125, "learning_rate": 0.000293175178822274, "loss": 0.1785, "step": 82128 }, { "epoch": 0.14562327844755976, "grad_norm": 0.314453125, "learning_rate": 0.00029314880785329876, "loss": 0.1633, "step": 82130 }, { "epoch": 0.14562682461286958, "grad_norm": 0.392578125, "learning_rate": 0.00029312244041302185, "loss": 0.2577, "step": 82132 }, { "epoch": 0.1456303707781794, "grad_norm": 0.71875, "learning_rate": 0.0002930960765015588, "loss": 0.1237, "step": 82134 }, { "epoch": 0.1456339169434892, "grad_norm": 0.37890625, "learning_rate": 0.0002930697161190246, "loss": 0.1326, "step": 82136 }, { "epoch": 0.14563746310879902, "grad_norm": 0.578125, "learning_rate": 0.00029304335926553507, "loss": 0.1213, "step": 82138 }, { "epoch": 0.14564100927410883, "grad_norm": 0.55859375, "learning_rate": 0.0002930170059412049, "loss": 0.1511, "step": 82140 }, { "epoch": 0.14564455543941865, "grad_norm": 1.390625, "learning_rate": 0.0002929906561461497, "loss": 0.2167, "step": 82142 }, { "epoch": 0.14564810160472846, "grad_norm": 0.33203125, "learning_rate": 0.0002929643098804846, "loss": 0.2039, "step": 82144 }, { "epoch": 0.14565164777003828, "grad_norm": 1.7421875, "learning_rate": 0.00029293796714432486, "loss": 0.3537, "step": 82146 }, { "epoch": 0.1456551939353481, "grad_norm": 0.365234375, "learning_rate": 0.0002929116279377856, "loss": 0.1974, "step": 82148 }, { "epoch": 0.1456587401006579, "grad_norm": 0.318359375, "learning_rate": 0.000292885292260982, "loss": 0.1198, "step": 82150 }, { "epoch": 0.14566228626596772, "grad_norm": 0.384765625, "learning_rate": 0.0002928589601140294, "loss": 0.1556, "step": 82152 }, { "epoch": 0.14566583243127754, "grad_norm": 0.5, "learning_rate": 0.00029283263149704295, "loss": 0.2394, "step": 82154 }, { "epoch": 0.14566937859658735, "grad_norm": 2.4375, "learning_rate": 0.00029280630641013755, "loss": 0.2413, "step": 82156 }, { "epoch": 0.14567292476189717, "grad_norm": 0.87890625, "learning_rate": 0.0002927799848534286, "loss": 0.2326, "step": 82158 }, { "epoch": 0.14567647092720698, "grad_norm": 4.8125, "learning_rate": 0.00029275366682703096, "loss": 0.3718, "step": 82160 }, { "epoch": 0.1456800170925168, "grad_norm": 0.60546875, "learning_rate": 0.00029272735233106017, "loss": 0.1809, "step": 82162 }, { "epoch": 0.1456835632578266, "grad_norm": 2.8125, "learning_rate": 0.0002927010413656306, "loss": 0.238, "step": 82164 }, { "epoch": 0.14568710942313642, "grad_norm": 0.71875, "learning_rate": 0.00029267473393085813, "loss": 0.252, "step": 82166 }, { "epoch": 0.14569065558844624, "grad_norm": 1.015625, "learning_rate": 0.00029264843002685707, "loss": 0.2733, "step": 82168 }, { "epoch": 0.14569420175375605, "grad_norm": 0.29296875, "learning_rate": 0.00029262212965374323, "loss": 0.1261, "step": 82170 }, { "epoch": 0.14569774791906587, "grad_norm": 0.298828125, "learning_rate": 0.0002925958328116308, "loss": 0.1732, "step": 82172 }, { "epoch": 0.14570129408437568, "grad_norm": 0.427734375, "learning_rate": 0.0002925695395006354, "loss": 0.1456, "step": 82174 }, { "epoch": 0.1457048402496855, "grad_norm": 0.263671875, "learning_rate": 0.0002925432497208719, "loss": 0.2066, "step": 82176 }, { "epoch": 0.1457083864149953, "grad_norm": 0.380859375, "learning_rate": 0.0002925169634724551, "loss": 0.1486, "step": 82178 }, { "epoch": 0.14571193258030513, "grad_norm": 0.2421875, "learning_rate": 0.0002924906807555001, "loss": 0.1574, "step": 82180 }, { "epoch": 0.14571547874561494, "grad_norm": 1.640625, "learning_rate": 0.0002924644015701216, "loss": 0.2188, "step": 82182 }, { "epoch": 0.14571902491092475, "grad_norm": 0.380859375, "learning_rate": 0.000292438125916435, "loss": 0.1691, "step": 82184 }, { "epoch": 0.14572257107623457, "grad_norm": 0.1875, "learning_rate": 0.0002924118537945549, "loss": 0.1403, "step": 82186 }, { "epoch": 0.14572611724154438, "grad_norm": 0.58984375, "learning_rate": 0.00029238558520459625, "loss": 0.1618, "step": 82188 }, { "epoch": 0.1457296634068542, "grad_norm": 0.248046875, "learning_rate": 0.0002923593201466738, "loss": 0.154, "step": 82190 }, { "epoch": 0.145733209572164, "grad_norm": 0.40234375, "learning_rate": 0.00029233305862090266, "loss": 0.1684, "step": 82192 }, { "epoch": 0.14573675573747383, "grad_norm": 0.32421875, "learning_rate": 0.0002923068006273975, "loss": 0.23, "step": 82194 }, { "epoch": 0.14574030190278364, "grad_norm": 0.33984375, "learning_rate": 0.00029228054616627335, "loss": 0.1653, "step": 82196 }, { "epoch": 0.14574384806809346, "grad_norm": 0.54296875, "learning_rate": 0.0002922542952376448, "loss": 0.187, "step": 82198 }, { "epoch": 0.14574739423340327, "grad_norm": 0.5078125, "learning_rate": 0.0002922280478416268, "loss": 0.1571, "step": 82200 }, { "epoch": 0.14575094039871309, "grad_norm": 0.310546875, "learning_rate": 0.00029220180397833423, "loss": 0.1493, "step": 82202 }, { "epoch": 0.1457544865640229, "grad_norm": 0.3359375, "learning_rate": 0.00029217556364788156, "loss": 0.1337, "step": 82204 }, { "epoch": 0.14575803272933271, "grad_norm": 0.2138671875, "learning_rate": 0.00029214932685038374, "loss": 0.219, "step": 82206 }, { "epoch": 0.14576157889464253, "grad_norm": 0.373046875, "learning_rate": 0.00029212309358595554, "loss": 0.193, "step": 82208 }, { "epoch": 0.14576512505995234, "grad_norm": 0.298828125, "learning_rate": 0.00029209686385471177, "loss": 0.1744, "step": 82210 }, { "epoch": 0.14576867122526216, "grad_norm": 1.03125, "learning_rate": 0.00029207063765676687, "loss": 0.2165, "step": 82212 }, { "epoch": 0.145772217390572, "grad_norm": 0.9453125, "learning_rate": 0.00029204441499223587, "loss": 0.1543, "step": 82214 }, { "epoch": 0.14577576355588182, "grad_norm": 0.400390625, "learning_rate": 0.00029201819586123317, "loss": 0.1813, "step": 82216 }, { "epoch": 0.14577930972119163, "grad_norm": 0.91015625, "learning_rate": 0.0002919919802638734, "loss": 0.1427, "step": 82218 }, { "epoch": 0.14578285588650144, "grad_norm": 1.578125, "learning_rate": 0.0002919657682002718, "loss": 0.2716, "step": 82220 }, { "epoch": 0.14578640205181126, "grad_norm": 0.55078125, "learning_rate": 0.0002919395596705422, "loss": 0.1653, "step": 82222 }, { "epoch": 0.14578994821712107, "grad_norm": 0.255859375, "learning_rate": 0.00029191335467479966, "loss": 0.1757, "step": 82224 }, { "epoch": 0.1457934943824309, "grad_norm": 0.2431640625, "learning_rate": 0.00029188715321315864, "loss": 0.1384, "step": 82226 }, { "epoch": 0.1457970405477407, "grad_norm": 0.45703125, "learning_rate": 0.0002918609552857341, "loss": 0.2404, "step": 82228 }, { "epoch": 0.14580058671305052, "grad_norm": 0.462890625, "learning_rate": 0.00029183476089264006, "loss": 0.2052, "step": 82230 }, { "epoch": 0.14580413287836033, "grad_norm": 0.71484375, "learning_rate": 0.0002918085700339915, "loss": 0.1297, "step": 82232 }, { "epoch": 0.14580767904367015, "grad_norm": 0.404296875, "learning_rate": 0.00029178238270990265, "loss": 0.1947, "step": 82234 }, { "epoch": 0.14581122520897996, "grad_norm": 0.91796875, "learning_rate": 0.0002917561989204883, "loss": 0.1664, "step": 82236 }, { "epoch": 0.14581477137428978, "grad_norm": 4.28125, "learning_rate": 0.0002917300186658628, "loss": 0.2743, "step": 82238 }, { "epoch": 0.1458183175395996, "grad_norm": 0.4140625, "learning_rate": 0.00029170384194614055, "loss": 0.1587, "step": 82240 }, { "epoch": 0.1458218637049094, "grad_norm": 0.326171875, "learning_rate": 0.0002916776687614364, "loss": 0.1608, "step": 82242 }, { "epoch": 0.14582540987021922, "grad_norm": 0.19140625, "learning_rate": 0.00029165149911186455, "loss": 0.1726, "step": 82244 }, { "epoch": 0.14582895603552903, "grad_norm": 1.2578125, "learning_rate": 0.00029162533299753954, "loss": 0.2858, "step": 82246 }, { "epoch": 0.14583250220083885, "grad_norm": 0.63671875, "learning_rate": 0.00029159917041857554, "loss": 0.2546, "step": 82248 }, { "epoch": 0.14583604836614866, "grad_norm": 0.423828125, "learning_rate": 0.0002915730113750874, "loss": 0.1799, "step": 82250 }, { "epoch": 0.14583959453145848, "grad_norm": 0.396484375, "learning_rate": 0.00029154685586718935, "loss": 0.2586, "step": 82252 }, { "epoch": 0.1458431406967683, "grad_norm": 2.078125, "learning_rate": 0.00029152070389499565, "loss": 0.362, "step": 82254 }, { "epoch": 0.1458466868620781, "grad_norm": 0.423828125, "learning_rate": 0.00029149455545862094, "loss": 0.187, "step": 82256 }, { "epoch": 0.14585023302738792, "grad_norm": 0.451171875, "learning_rate": 0.00029146841055817895, "loss": 0.1555, "step": 82258 }, { "epoch": 0.14585377919269774, "grad_norm": 0.302734375, "learning_rate": 0.00029144226919378515, "loss": 0.1953, "step": 82260 }, { "epoch": 0.14585732535800755, "grad_norm": 0.94140625, "learning_rate": 0.0002914161313655526, "loss": 0.2605, "step": 82262 }, { "epoch": 0.14586087152331736, "grad_norm": 0.486328125, "learning_rate": 0.00029138999707359646, "loss": 0.1507, "step": 82264 }, { "epoch": 0.14586441768862718, "grad_norm": 0.6484375, "learning_rate": 0.00029136386631803065, "loss": 0.2029, "step": 82266 }, { "epoch": 0.145867963853937, "grad_norm": 0.275390625, "learning_rate": 0.00029133773909896987, "loss": 0.1646, "step": 82268 }, { "epoch": 0.1458715100192468, "grad_norm": 0.28125, "learning_rate": 0.00029131161541652784, "loss": 0.1681, "step": 82270 }, { "epoch": 0.14587505618455662, "grad_norm": 2.25, "learning_rate": 0.00029128549527081916, "loss": 0.1213, "step": 82272 }, { "epoch": 0.14587860234986644, "grad_norm": 2.015625, "learning_rate": 0.00029125937866195795, "loss": 0.1583, "step": 82274 }, { "epoch": 0.14588214851517625, "grad_norm": 0.7578125, "learning_rate": 0.0002912332655900585, "loss": 0.1791, "step": 82276 }, { "epoch": 0.14588569468048607, "grad_norm": 1.328125, "learning_rate": 0.00029120715605523496, "loss": 0.1357, "step": 82278 }, { "epoch": 0.14588924084579588, "grad_norm": 0.375, "learning_rate": 0.0002911810500576013, "loss": 0.189, "step": 82280 }, { "epoch": 0.1458927870111057, "grad_norm": 1.125, "learning_rate": 0.00029115494759727215, "loss": 0.1805, "step": 82282 }, { "epoch": 0.1458963331764155, "grad_norm": 0.185546875, "learning_rate": 0.0002911288486743613, "loss": 0.16, "step": 82284 }, { "epoch": 0.14589987934172532, "grad_norm": 4.9375, "learning_rate": 0.0002911027532889831, "loss": 0.1764, "step": 82286 }, { "epoch": 0.14590342550703514, "grad_norm": 0.1953125, "learning_rate": 0.00029107666144125146, "loss": 0.1836, "step": 82288 }, { "epoch": 0.14590697167234495, "grad_norm": 0.162109375, "learning_rate": 0.0002910505731312807, "loss": 0.2054, "step": 82290 }, { "epoch": 0.14591051783765477, "grad_norm": 0.30859375, "learning_rate": 0.00029102448835918493, "loss": 0.1448, "step": 82292 }, { "epoch": 0.14591406400296458, "grad_norm": 0.69921875, "learning_rate": 0.00029099840712507795, "loss": 0.1718, "step": 82294 }, { "epoch": 0.1459176101682744, "grad_norm": 0.80859375, "learning_rate": 0.0002909723294290743, "loss": 0.1654, "step": 82296 }, { "epoch": 0.1459211563335842, "grad_norm": 0.43359375, "learning_rate": 0.00029094625527128744, "loss": 0.184, "step": 82298 }, { "epoch": 0.14592470249889403, "grad_norm": 1.4453125, "learning_rate": 0.000290920184651832, "loss": 0.1864, "step": 82300 }, { "epoch": 0.14592824866420384, "grad_norm": 0.47265625, "learning_rate": 0.00029089411757082133, "loss": 0.1209, "step": 82302 }, { "epoch": 0.14593179482951368, "grad_norm": 0.345703125, "learning_rate": 0.00029086805402837006, "loss": 0.2067, "step": 82304 }, { "epoch": 0.1459353409948235, "grad_norm": 0.3203125, "learning_rate": 0.00029084199402459177, "loss": 0.2145, "step": 82306 }, { "epoch": 0.1459388871601333, "grad_norm": 0.322265625, "learning_rate": 0.0002908159375596007, "loss": 0.1921, "step": 82308 }, { "epoch": 0.14594243332544313, "grad_norm": 0.357421875, "learning_rate": 0.00029078988463351077, "loss": 0.128, "step": 82310 }, { "epoch": 0.14594597949075294, "grad_norm": 0.7265625, "learning_rate": 0.00029076383524643574, "loss": 0.1955, "step": 82312 }, { "epoch": 0.14594952565606276, "grad_norm": 1.65625, "learning_rate": 0.0002907377893984897, "loss": 0.2295, "step": 82314 }, { "epoch": 0.14595307182137257, "grad_norm": 0.390625, "learning_rate": 0.00029071174708978633, "loss": 0.1639, "step": 82316 }, { "epoch": 0.14595661798668239, "grad_norm": 0.357421875, "learning_rate": 0.0002906857083204399, "loss": 0.1843, "step": 82318 }, { "epoch": 0.1459601641519922, "grad_norm": 0.48828125, "learning_rate": 0.0002906596730905638, "loss": 0.1749, "step": 82320 }, { "epoch": 0.14596371031730201, "grad_norm": 0.291015625, "learning_rate": 0.0002906336414002724, "loss": 0.1609, "step": 82322 }, { "epoch": 0.14596725648261183, "grad_norm": 0.25, "learning_rate": 0.0002906076132496793, "loss": 0.1705, "step": 82324 }, { "epoch": 0.14597080264792164, "grad_norm": 0.49609375, "learning_rate": 0.00029058158863889835, "loss": 0.1619, "step": 82326 }, { "epoch": 0.14597434881323146, "grad_norm": 0.28515625, "learning_rate": 0.0002905555675680433, "loss": 0.1551, "step": 82328 }, { "epoch": 0.14597789497854127, "grad_norm": 0.38671875, "learning_rate": 0.00029052955003722793, "loss": 0.1726, "step": 82330 }, { "epoch": 0.1459814411438511, "grad_norm": 0.443359375, "learning_rate": 0.0002905035360465662, "loss": 0.1611, "step": 82332 }, { "epoch": 0.1459849873091609, "grad_norm": 0.283203125, "learning_rate": 0.0002904775255961719, "loss": 0.1801, "step": 82334 }, { "epoch": 0.14598853347447072, "grad_norm": 0.94140625, "learning_rate": 0.0002904515186861586, "loss": 0.1565, "step": 82336 }, { "epoch": 0.14599207963978053, "grad_norm": 0.95703125, "learning_rate": 0.00029042551531663997, "loss": 0.1983, "step": 82338 }, { "epoch": 0.14599562580509035, "grad_norm": 1.46875, "learning_rate": 0.00029039951548773, "loss": 0.2971, "step": 82340 }, { "epoch": 0.14599917197040016, "grad_norm": 0.6953125, "learning_rate": 0.0002903735191995423, "loss": 0.1632, "step": 82342 }, { "epoch": 0.14600271813570997, "grad_norm": 0.69921875, "learning_rate": 0.00029034752645219064, "loss": 0.1745, "step": 82344 }, { "epoch": 0.1460062643010198, "grad_norm": 1.671875, "learning_rate": 0.00029032153724578845, "loss": 0.2121, "step": 82346 }, { "epoch": 0.1460098104663296, "grad_norm": 0.318359375, "learning_rate": 0.00029029555158044943, "loss": 0.1751, "step": 82348 }, { "epoch": 0.14601335663163942, "grad_norm": 0.3125, "learning_rate": 0.0002902695694562877, "loss": 0.198, "step": 82350 }, { "epoch": 0.14601690279694923, "grad_norm": 0.59375, "learning_rate": 0.0002902435908734162, "loss": 0.2228, "step": 82352 }, { "epoch": 0.14602044896225905, "grad_norm": 0.435546875, "learning_rate": 0.00029021761583194897, "loss": 0.2093, "step": 82354 }, { "epoch": 0.14602399512756886, "grad_norm": 0.2177734375, "learning_rate": 0.0002901916443319994, "loss": 0.1851, "step": 82356 }, { "epoch": 0.14602754129287868, "grad_norm": 0.68359375, "learning_rate": 0.00029016567637368145, "loss": 0.2276, "step": 82358 }, { "epoch": 0.1460310874581885, "grad_norm": 0.314453125, "learning_rate": 0.0002901397119571082, "loss": 0.1455, "step": 82360 }, { "epoch": 0.1460346336234983, "grad_norm": 1.1171875, "learning_rate": 0.00029011375108239354, "loss": 0.2611, "step": 82362 }, { "epoch": 0.14603817978880812, "grad_norm": 0.36328125, "learning_rate": 0.0002900877937496509, "loss": 0.1954, "step": 82364 }, { "epoch": 0.14604172595411793, "grad_norm": 1.5625, "learning_rate": 0.0002900618399589938, "loss": 0.1316, "step": 82366 }, { "epoch": 0.14604527211942775, "grad_norm": 1.8203125, "learning_rate": 0.00029003588971053567, "loss": 0.1794, "step": 82368 }, { "epoch": 0.14604881828473756, "grad_norm": 0.259765625, "learning_rate": 0.00029000994300438994, "loss": 0.1601, "step": 82370 }, { "epoch": 0.14605236445004738, "grad_norm": 0.45703125, "learning_rate": 0.0002899839998406705, "loss": 0.2331, "step": 82372 }, { "epoch": 0.1460559106153572, "grad_norm": 0.326171875, "learning_rate": 0.00028995806021949046, "loss": 0.1685, "step": 82374 }, { "epoch": 0.146059456780667, "grad_norm": 0.388671875, "learning_rate": 0.00028993212414096326, "loss": 0.1208, "step": 82376 }, { "epoch": 0.14606300294597682, "grad_norm": 0.4921875, "learning_rate": 0.0002899061916052023, "loss": 0.1659, "step": 82378 }, { "epoch": 0.14606654911128664, "grad_norm": 1.2265625, "learning_rate": 0.0002898802626123212, "loss": 0.1718, "step": 82380 }, { "epoch": 0.14607009527659645, "grad_norm": 0.3359375, "learning_rate": 0.0002898543371624334, "loss": 0.1857, "step": 82382 }, { "epoch": 0.14607364144190627, "grad_norm": 0.216796875, "learning_rate": 0.0002898284152556521, "loss": 0.1341, "step": 82384 }, { "epoch": 0.14607718760721608, "grad_norm": 0.65625, "learning_rate": 0.0002898024968920906, "loss": 0.148, "step": 82386 }, { "epoch": 0.1460807337725259, "grad_norm": 0.7890625, "learning_rate": 0.0002897765820718622, "loss": 0.2192, "step": 82388 }, { "epoch": 0.1460842799378357, "grad_norm": 1.7890625, "learning_rate": 0.0002897506707950808, "loss": 0.1798, "step": 82390 }, { "epoch": 0.14608782610314552, "grad_norm": 0.1767578125, "learning_rate": 0.0002897247630618591, "loss": 0.164, "step": 82392 }, { "epoch": 0.14609137226845537, "grad_norm": 0.6875, "learning_rate": 0.00028969885887231064, "loss": 0.1473, "step": 82394 }, { "epoch": 0.14609491843376518, "grad_norm": 1.578125, "learning_rate": 0.00028967295822654855, "loss": 0.1821, "step": 82396 }, { "epoch": 0.146098464599075, "grad_norm": 0.357421875, "learning_rate": 0.0002896470611246864, "loss": 0.1632, "step": 82398 }, { "epoch": 0.1461020107643848, "grad_norm": 0.2890625, "learning_rate": 0.0002896211675668373, "loss": 0.144, "step": 82400 }, { "epoch": 0.14610555692969462, "grad_norm": 0.353515625, "learning_rate": 0.0002895952775531145, "loss": 0.1544, "step": 82402 }, { "epoch": 0.14610910309500444, "grad_norm": 0.265625, "learning_rate": 0.00028956939108363124, "loss": 0.1565, "step": 82404 }, { "epoch": 0.14611264926031425, "grad_norm": 2.625, "learning_rate": 0.0002895435081585005, "loss": 0.3175, "step": 82406 }, { "epoch": 0.14611619542562407, "grad_norm": 0.43359375, "learning_rate": 0.00028951762877783605, "loss": 0.4161, "step": 82408 }, { "epoch": 0.14611974159093388, "grad_norm": 0.4921875, "learning_rate": 0.0002894917529417504, "loss": 0.1784, "step": 82410 }, { "epoch": 0.1461232877562437, "grad_norm": 0.455078125, "learning_rate": 0.00028946588065035714, "loss": 0.2201, "step": 82412 }, { "epoch": 0.1461268339215535, "grad_norm": 0.53515625, "learning_rate": 0.0002894400119037691, "loss": 0.1986, "step": 82414 }, { "epoch": 0.14613038008686333, "grad_norm": 0.396484375, "learning_rate": 0.00028941414670210006, "loss": 0.1784, "step": 82416 }, { "epoch": 0.14613392625217314, "grad_norm": 0.1806640625, "learning_rate": 0.00028938828504546235, "loss": 0.1647, "step": 82418 }, { "epoch": 0.14613747241748296, "grad_norm": 0.578125, "learning_rate": 0.0002893624269339695, "loss": 0.1994, "step": 82420 }, { "epoch": 0.14614101858279277, "grad_norm": 0.255859375, "learning_rate": 0.0002893365723677346, "loss": 0.239, "step": 82422 }, { "epoch": 0.14614456474810258, "grad_norm": 0.283203125, "learning_rate": 0.0002893107213468706, "loss": 0.2876, "step": 82424 }, { "epoch": 0.1461481109134124, "grad_norm": 0.20703125, "learning_rate": 0.0002892848738714906, "loss": 0.1201, "step": 82426 }, { "epoch": 0.1461516570787222, "grad_norm": 0.40625, "learning_rate": 0.0002892590299417074, "loss": 0.1359, "step": 82428 }, { "epoch": 0.14615520324403203, "grad_norm": 0.2109375, "learning_rate": 0.0002892331895576346, "loss": 0.1888, "step": 82430 }, { "epoch": 0.14615874940934184, "grad_norm": 0.369140625, "learning_rate": 0.0002892073527193848, "loss": 0.1567, "step": 82432 }, { "epoch": 0.14616229557465166, "grad_norm": 0.232421875, "learning_rate": 0.0002891815194270711, "loss": 0.3458, "step": 82434 }, { "epoch": 0.14616584173996147, "grad_norm": 0.306640625, "learning_rate": 0.00028915568968080626, "loss": 0.1544, "step": 82436 }, { "epoch": 0.1461693879052713, "grad_norm": 0.2314453125, "learning_rate": 0.00028912986348070366, "loss": 0.1501, "step": 82438 }, { "epoch": 0.1461729340705811, "grad_norm": 0.8515625, "learning_rate": 0.00028910404082687594, "loss": 0.2577, "step": 82440 }, { "epoch": 0.14617648023589092, "grad_norm": 0.6875, "learning_rate": 0.00028907822171943617, "loss": 0.1439, "step": 82442 }, { "epoch": 0.14618002640120073, "grad_norm": 0.322265625, "learning_rate": 0.0002890524061584972, "loss": 0.1952, "step": 82444 }, { "epoch": 0.14618357256651054, "grad_norm": 0.1796875, "learning_rate": 0.00028902659414417175, "loss": 0.1429, "step": 82446 }, { "epoch": 0.14618711873182036, "grad_norm": 4.1875, "learning_rate": 0.00028900078567657337, "loss": 0.2697, "step": 82448 }, { "epoch": 0.14619066489713017, "grad_norm": 1.1875, "learning_rate": 0.00028897498075581397, "loss": 0.2361, "step": 82450 }, { "epoch": 0.14619421106244, "grad_norm": 0.61328125, "learning_rate": 0.0002889491793820072, "loss": 0.1982, "step": 82452 }, { "epoch": 0.1461977572277498, "grad_norm": 0.435546875, "learning_rate": 0.0002889233815552653, "loss": 0.2056, "step": 82454 }, { "epoch": 0.14620130339305962, "grad_norm": 0.443359375, "learning_rate": 0.0002888975872757018, "loss": 0.1901, "step": 82456 }, { "epoch": 0.14620484955836943, "grad_norm": 0.77734375, "learning_rate": 0.00028887179654342893, "loss": 0.1129, "step": 82458 }, { "epoch": 0.14620839572367925, "grad_norm": 1.015625, "learning_rate": 0.00028884600935855965, "loss": 0.205, "step": 82460 }, { "epoch": 0.14621194188898906, "grad_norm": 0.458984375, "learning_rate": 0.0002888202257212068, "loss": 0.1761, "step": 82462 }, { "epoch": 0.14621548805429888, "grad_norm": 0.87109375, "learning_rate": 0.0002887944456314831, "loss": 0.1335, "step": 82464 }, { "epoch": 0.1462190342196087, "grad_norm": 0.26953125, "learning_rate": 0.0002887686690895012, "loss": 0.1793, "step": 82466 }, { "epoch": 0.1462225803849185, "grad_norm": 0.34375, "learning_rate": 0.00028874289609537383, "loss": 0.1829, "step": 82468 }, { "epoch": 0.14622612655022832, "grad_norm": 0.169921875, "learning_rate": 0.00028871712664921403, "loss": 0.1715, "step": 82470 }, { "epoch": 0.14622967271553813, "grad_norm": 0.2421875, "learning_rate": 0.0002886913607511341, "loss": 0.1504, "step": 82472 }, { "epoch": 0.14623321888084795, "grad_norm": 0.46484375, "learning_rate": 0.00028866559840124713, "loss": 0.2929, "step": 82474 }, { "epoch": 0.14623676504615776, "grad_norm": 0.353515625, "learning_rate": 0.0002886398395996653, "loss": 0.2063, "step": 82476 }, { "epoch": 0.14624031121146758, "grad_norm": 0.326171875, "learning_rate": 0.0002886140843465016, "loss": 0.1479, "step": 82478 }, { "epoch": 0.1462438573767774, "grad_norm": 0.33203125, "learning_rate": 0.0002885883326418687, "loss": 0.1693, "step": 82480 }, { "epoch": 0.1462474035420872, "grad_norm": 0.2197265625, "learning_rate": 0.000288562584485879, "loss": 0.1519, "step": 82482 }, { "epoch": 0.14625094970739702, "grad_norm": 0.1767578125, "learning_rate": 0.0002885368398786452, "loss": 0.1662, "step": 82484 }, { "epoch": 0.14625449587270686, "grad_norm": 0.369140625, "learning_rate": 0.00028851109882027984, "loss": 0.1892, "step": 82486 }, { "epoch": 0.14625804203801668, "grad_norm": 3.796875, "learning_rate": 0.00028848536131089564, "loss": 0.2519, "step": 82488 }, { "epoch": 0.1462615882033265, "grad_norm": 0.2578125, "learning_rate": 0.00028845962735060514, "loss": 0.1362, "step": 82490 }, { "epoch": 0.1462651343686363, "grad_norm": 0.3203125, "learning_rate": 0.00028843389693952086, "loss": 0.1743, "step": 82492 }, { "epoch": 0.14626868053394612, "grad_norm": 0.263671875, "learning_rate": 0.00028840817007775514, "loss": 0.164, "step": 82494 }, { "epoch": 0.14627222669925594, "grad_norm": 0.64453125, "learning_rate": 0.0002883824467654207, "loss": 0.252, "step": 82496 }, { "epoch": 0.14627577286456575, "grad_norm": 0.3046875, "learning_rate": 0.0002883567270026301, "loss": 0.1463, "step": 82498 }, { "epoch": 0.14627931902987557, "grad_norm": 0.2333984375, "learning_rate": 0.0002883310107894958, "loss": 0.184, "step": 82500 }, { "epoch": 0.14628286519518538, "grad_norm": 1.71875, "learning_rate": 0.00028830529812613005, "loss": 0.2862, "step": 82502 }, { "epoch": 0.1462864113604952, "grad_norm": 0.4453125, "learning_rate": 0.0002882795890126453, "loss": 0.152, "step": 82504 }, { "epoch": 0.146289957525805, "grad_norm": 0.44140625, "learning_rate": 0.00028825388344915447, "loss": 0.1903, "step": 82506 }, { "epoch": 0.14629350369111482, "grad_norm": 1.46875, "learning_rate": 0.0002882281814357693, "loss": 0.1828, "step": 82508 }, { "epoch": 0.14629704985642464, "grad_norm": 0.3125, "learning_rate": 0.00028820248297260266, "loss": 0.1474, "step": 82510 }, { "epoch": 0.14630059602173445, "grad_norm": 0.435546875, "learning_rate": 0.00028817678805976687, "loss": 0.3127, "step": 82512 }, { "epoch": 0.14630414218704427, "grad_norm": 0.78515625, "learning_rate": 0.0002881510966973742, "loss": 0.1511, "step": 82514 }, { "epoch": 0.14630768835235408, "grad_norm": 1.15625, "learning_rate": 0.00028812540888553705, "loss": 0.2157, "step": 82516 }, { "epoch": 0.1463112345176639, "grad_norm": 0.1767578125, "learning_rate": 0.0002880997246243676, "loss": 0.1476, "step": 82518 }, { "epoch": 0.1463147806829737, "grad_norm": 1.21875, "learning_rate": 0.0002880740439139786, "loss": 0.2482, "step": 82520 }, { "epoch": 0.14631832684828353, "grad_norm": 0.251953125, "learning_rate": 0.000288048366754482, "loss": 0.1368, "step": 82522 }, { "epoch": 0.14632187301359334, "grad_norm": 0.318359375, "learning_rate": 0.00028802269314599023, "loss": 0.1841, "step": 82524 }, { "epoch": 0.14632541917890315, "grad_norm": 0.287109375, "learning_rate": 0.00028799702308861545, "loss": 0.1361, "step": 82526 }, { "epoch": 0.14632896534421297, "grad_norm": 0.6328125, "learning_rate": 0.00028797135658247013, "loss": 0.1787, "step": 82528 }, { "epoch": 0.14633251150952278, "grad_norm": 0.373046875, "learning_rate": 0.00028794569362766643, "loss": 0.2136, "step": 82530 }, { "epoch": 0.1463360576748326, "grad_norm": 0.263671875, "learning_rate": 0.00028792003422431657, "loss": 0.1969, "step": 82532 }, { "epoch": 0.1463396038401424, "grad_norm": 0.353515625, "learning_rate": 0.0002878943783725328, "loss": 0.2039, "step": 82534 }, { "epoch": 0.14634315000545223, "grad_norm": 0.87109375, "learning_rate": 0.0002878687260724271, "loss": 0.1415, "step": 82536 }, { "epoch": 0.14634669617076204, "grad_norm": 4.59375, "learning_rate": 0.0002878430773241122, "loss": 0.4478, "step": 82538 }, { "epoch": 0.14635024233607186, "grad_norm": 0.59375, "learning_rate": 0.0002878174321276995, "loss": 0.1791, "step": 82540 }, { "epoch": 0.14635378850138167, "grad_norm": 1.90625, "learning_rate": 0.00028779179048330183, "loss": 0.5468, "step": 82542 }, { "epoch": 0.14635733466669149, "grad_norm": 0.263671875, "learning_rate": 0.0002877661523910309, "loss": 0.1919, "step": 82544 }, { "epoch": 0.1463608808320013, "grad_norm": 0.54296875, "learning_rate": 0.00028774051785099937, "loss": 0.1396, "step": 82546 }, { "epoch": 0.14636442699731111, "grad_norm": 1.015625, "learning_rate": 0.00028771488686331857, "loss": 0.2122, "step": 82548 }, { "epoch": 0.14636797316262093, "grad_norm": 0.1552734375, "learning_rate": 0.0002876892594281014, "loss": 0.1765, "step": 82550 }, { "epoch": 0.14637151932793074, "grad_norm": 1.3515625, "learning_rate": 0.0002876636355454593, "loss": 0.4098, "step": 82552 }, { "epoch": 0.14637506549324056, "grad_norm": 0.6328125, "learning_rate": 0.0002876380152155046, "loss": 0.1837, "step": 82554 }, { "epoch": 0.14637861165855037, "grad_norm": 0.2275390625, "learning_rate": 0.0002876123984383496, "loss": 0.125, "step": 82556 }, { "epoch": 0.1463821578238602, "grad_norm": 0.28515625, "learning_rate": 0.0002875867852141058, "loss": 0.171, "step": 82558 }, { "epoch": 0.14638570398917, "grad_norm": 0.91015625, "learning_rate": 0.00028756117554288565, "loss": 0.1422, "step": 82560 }, { "epoch": 0.14638925015447982, "grad_norm": 0.283203125, "learning_rate": 0.0002875355694248009, "loss": 0.1548, "step": 82562 }, { "epoch": 0.14639279631978963, "grad_norm": 1.2890625, "learning_rate": 0.00028750996685996386, "loss": 0.2414, "step": 82564 }, { "epoch": 0.14639634248509945, "grad_norm": 0.380859375, "learning_rate": 0.00028748436784848595, "loss": 0.1521, "step": 82566 }, { "epoch": 0.14639988865040926, "grad_norm": 0.474609375, "learning_rate": 0.00028745877239047963, "loss": 0.2586, "step": 82568 }, { "epoch": 0.14640343481571907, "grad_norm": 0.189453125, "learning_rate": 0.0002874331804860567, "loss": 0.2006, "step": 82570 }, { "epoch": 0.1464069809810289, "grad_norm": 0.390625, "learning_rate": 0.000287407592135329, "loss": 0.1707, "step": 82572 }, { "epoch": 0.1464105271463387, "grad_norm": 3.96875, "learning_rate": 0.0002873820073384085, "loss": 0.2577, "step": 82574 }, { "epoch": 0.14641407331164855, "grad_norm": 0.5, "learning_rate": 0.0002873564260954068, "loss": 0.1625, "step": 82576 }, { "epoch": 0.14641761947695836, "grad_norm": 0.2412109375, "learning_rate": 0.00028733084840643644, "loss": 0.1619, "step": 82578 }, { "epoch": 0.14642116564226818, "grad_norm": 0.60546875, "learning_rate": 0.00028730527427160875, "loss": 0.1777, "step": 82580 }, { "epoch": 0.146424711807578, "grad_norm": 1.8359375, "learning_rate": 0.0002872797036910357, "loss": 0.4199, "step": 82582 }, { "epoch": 0.1464282579728878, "grad_norm": 0.494140625, "learning_rate": 0.000287254136664829, "loss": 0.1759, "step": 82584 }, { "epoch": 0.14643180413819762, "grad_norm": 0.55078125, "learning_rate": 0.0002872285731931008, "loss": 0.1603, "step": 82586 }, { "epoch": 0.14643535030350743, "grad_norm": 0.2412109375, "learning_rate": 0.0002872030132759628, "loss": 0.1325, "step": 82588 }, { "epoch": 0.14643889646881725, "grad_norm": 0.2412109375, "learning_rate": 0.0002871774569135265, "loss": 0.1965, "step": 82590 }, { "epoch": 0.14644244263412706, "grad_norm": 0.2373046875, "learning_rate": 0.000287151904105904, "loss": 0.2408, "step": 82592 }, { "epoch": 0.14644598879943688, "grad_norm": 0.408203125, "learning_rate": 0.00028712635485320673, "loss": 0.2324, "step": 82594 }, { "epoch": 0.1464495349647467, "grad_norm": 0.34765625, "learning_rate": 0.00028710080915554686, "loss": 0.1915, "step": 82596 }, { "epoch": 0.1464530811300565, "grad_norm": 0.61328125, "learning_rate": 0.0002870752670130356, "loss": 0.1825, "step": 82598 }, { "epoch": 0.14645662729536632, "grad_norm": 0.490234375, "learning_rate": 0.000287049728425785, "loss": 0.1548, "step": 82600 }, { "epoch": 0.14646017346067614, "grad_norm": 0.1962890625, "learning_rate": 0.00028702419339390666, "loss": 0.1257, "step": 82602 }, { "epoch": 0.14646371962598595, "grad_norm": 0.380859375, "learning_rate": 0.0002869986619175125, "loss": 0.1723, "step": 82604 }, { "epoch": 0.14646726579129576, "grad_norm": 0.345703125, "learning_rate": 0.0002869731339967137, "loss": 0.1908, "step": 82606 }, { "epoch": 0.14647081195660558, "grad_norm": 0.29296875, "learning_rate": 0.00028694760963162216, "loss": 0.1531, "step": 82608 }, { "epoch": 0.1464743581219154, "grad_norm": 0.349609375, "learning_rate": 0.0002869220888223496, "loss": 0.302, "step": 82610 }, { "epoch": 0.1464779042872252, "grad_norm": 0.251953125, "learning_rate": 0.0002868965715690075, "loss": 0.1711, "step": 82612 }, { "epoch": 0.14648145045253502, "grad_norm": 0.306640625, "learning_rate": 0.00028687105787170755, "loss": 0.2651, "step": 82614 }, { "epoch": 0.14648499661784484, "grad_norm": 2.234375, "learning_rate": 0.00028684554773056103, "loss": 0.2652, "step": 82616 }, { "epoch": 0.14648854278315465, "grad_norm": 0.27734375, "learning_rate": 0.00028682004114567993, "loss": 0.1833, "step": 82618 }, { "epoch": 0.14649208894846447, "grad_norm": 0.51953125, "learning_rate": 0.0002867945381171756, "loss": 0.1223, "step": 82620 }, { "epoch": 0.14649563511377428, "grad_norm": 0.279296875, "learning_rate": 0.0002867690386451597, "loss": 0.3409, "step": 82622 }, { "epoch": 0.1464991812790841, "grad_norm": 0.91015625, "learning_rate": 0.0002867435427297434, "loss": 0.2844, "step": 82624 }, { "epoch": 0.1465027274443939, "grad_norm": 0.443359375, "learning_rate": 0.00028671805037103855, "loss": 0.1824, "step": 82626 }, { "epoch": 0.14650627360970372, "grad_norm": 0.314453125, "learning_rate": 0.0002866925615691566, "loss": 0.1769, "step": 82628 }, { "epoch": 0.14650981977501354, "grad_norm": 0.416015625, "learning_rate": 0.000286667076324209, "loss": 0.2136, "step": 82630 }, { "epoch": 0.14651336594032335, "grad_norm": 0.80859375, "learning_rate": 0.00028664159463630707, "loss": 0.1884, "step": 82632 }, { "epoch": 0.14651691210563317, "grad_norm": 0.4140625, "learning_rate": 0.0002866161165055623, "loss": 0.1898, "step": 82634 }, { "epoch": 0.14652045827094298, "grad_norm": 0.84375, "learning_rate": 0.00028659064193208624, "loss": 0.1866, "step": 82636 }, { "epoch": 0.1465240044362528, "grad_norm": 0.33203125, "learning_rate": 0.0002865651709159903, "loss": 0.15, "step": 82638 }, { "epoch": 0.1465275506015626, "grad_norm": 0.62890625, "learning_rate": 0.00028653970345738576, "loss": 0.1842, "step": 82640 }, { "epoch": 0.14653109676687243, "grad_norm": 0.24609375, "learning_rate": 0.0002865142395563839, "loss": 0.2098, "step": 82642 }, { "epoch": 0.14653464293218224, "grad_norm": 0.765625, "learning_rate": 0.0002864887792130964, "loss": 0.2077, "step": 82644 }, { "epoch": 0.14653818909749206, "grad_norm": 0.8203125, "learning_rate": 0.0002864633224276344, "loss": 0.299, "step": 82646 }, { "epoch": 0.14654173526280187, "grad_norm": 0.2431640625, "learning_rate": 0.00028643786920010936, "loss": 0.1588, "step": 82648 }, { "epoch": 0.14654528142811168, "grad_norm": 0.212890625, "learning_rate": 0.0002864124195306323, "loss": 0.3012, "step": 82650 }, { "epoch": 0.1465488275934215, "grad_norm": 0.75390625, "learning_rate": 0.00028638697341931474, "loss": 0.2301, "step": 82652 }, { "epoch": 0.1465523737587313, "grad_norm": 0.50390625, "learning_rate": 0.00028636153086626827, "loss": 0.1855, "step": 82654 }, { "epoch": 0.14655591992404113, "grad_norm": 1.2890625, "learning_rate": 0.00028633609187160347, "loss": 0.1978, "step": 82656 }, { "epoch": 0.14655946608935094, "grad_norm": 0.2177734375, "learning_rate": 0.0002863106564354321, "loss": 0.1519, "step": 82658 }, { "epoch": 0.14656301225466076, "grad_norm": 0.5, "learning_rate": 0.00028628522455786525, "loss": 0.1762, "step": 82660 }, { "epoch": 0.14656655841997057, "grad_norm": 0.423828125, "learning_rate": 0.00028625979623901436, "loss": 0.2262, "step": 82662 }, { "epoch": 0.1465701045852804, "grad_norm": 0.486328125, "learning_rate": 0.00028623437147899016, "loss": 0.1602, "step": 82664 }, { "epoch": 0.14657365075059023, "grad_norm": 0.263671875, "learning_rate": 0.00028620895027790425, "loss": 0.1943, "step": 82666 }, { "epoch": 0.14657719691590004, "grad_norm": 0.25390625, "learning_rate": 0.0002861835326358678, "loss": 0.1826, "step": 82668 }, { "epoch": 0.14658074308120986, "grad_norm": 1.1171875, "learning_rate": 0.00028615811855299187, "loss": 0.2527, "step": 82670 }, { "epoch": 0.14658428924651967, "grad_norm": 1.40625, "learning_rate": 0.00028613270802938743, "loss": 0.2206, "step": 82672 }, { "epoch": 0.1465878354118295, "grad_norm": 0.455078125, "learning_rate": 0.0002861073010651657, "loss": 0.5108, "step": 82674 }, { "epoch": 0.1465913815771393, "grad_norm": 0.19921875, "learning_rate": 0.000286081897660438, "loss": 0.1533, "step": 82676 }, { "epoch": 0.14659492774244912, "grad_norm": 0.1728515625, "learning_rate": 0.0002860564978153153, "loss": 0.1809, "step": 82678 }, { "epoch": 0.14659847390775893, "grad_norm": 0.431640625, "learning_rate": 0.00028603110152990873, "loss": 0.1793, "step": 82680 }, { "epoch": 0.14660202007306875, "grad_norm": 0.314453125, "learning_rate": 0.00028600570880432934, "loss": 0.1553, "step": 82682 }, { "epoch": 0.14660556623837856, "grad_norm": 0.265625, "learning_rate": 0.000285980319638688, "loss": 0.157, "step": 82684 }, { "epoch": 0.14660911240368837, "grad_norm": 0.36328125, "learning_rate": 0.00028595493403309614, "loss": 0.2046, "step": 82686 }, { "epoch": 0.1466126585689982, "grad_norm": 0.6953125, "learning_rate": 0.00028592955198766416, "loss": 0.1509, "step": 82688 }, { "epoch": 0.146616204734308, "grad_norm": 0.34765625, "learning_rate": 0.00028590417350250383, "loss": 0.1932, "step": 82690 }, { "epoch": 0.14661975089961782, "grad_norm": 0.34375, "learning_rate": 0.0002858787985777254, "loss": 0.1783, "step": 82692 }, { "epoch": 0.14662329706492763, "grad_norm": 0.39453125, "learning_rate": 0.00028585342721344065, "loss": 0.2098, "step": 82694 }, { "epoch": 0.14662684323023745, "grad_norm": 1.265625, "learning_rate": 0.00028582805940975973, "loss": 0.236, "step": 82696 }, { "epoch": 0.14663038939554726, "grad_norm": 0.30859375, "learning_rate": 0.0002858026951667941, "loss": 0.169, "step": 82698 }, { "epoch": 0.14663393556085708, "grad_norm": 1.3828125, "learning_rate": 0.00028577733448465457, "loss": 0.2647, "step": 82700 }, { "epoch": 0.1466374817261669, "grad_norm": 0.88671875, "learning_rate": 0.00028575197736345205, "loss": 0.2409, "step": 82702 }, { "epoch": 0.1466410278914767, "grad_norm": 0.30859375, "learning_rate": 0.0002857266238032973, "loss": 0.1879, "step": 82704 }, { "epoch": 0.14664457405678652, "grad_norm": 0.255859375, "learning_rate": 0.00028570127380430125, "loss": 0.151, "step": 82706 }, { "epoch": 0.14664812022209633, "grad_norm": 0.875, "learning_rate": 0.000285675927366575, "loss": 0.17, "step": 82708 }, { "epoch": 0.14665166638740615, "grad_norm": 0.50390625, "learning_rate": 0.0002856505844902291, "loss": 0.1783, "step": 82710 }, { "epoch": 0.14665521255271596, "grad_norm": 0.73046875, "learning_rate": 0.00028562524517537465, "loss": 0.179, "step": 82712 }, { "epoch": 0.14665875871802578, "grad_norm": 0.3671875, "learning_rate": 0.00028559990942212207, "loss": 0.1687, "step": 82714 }, { "epoch": 0.1466623048833356, "grad_norm": 5.09375, "learning_rate": 0.0002855745772305826, "loss": 0.2322, "step": 82716 }, { "epoch": 0.1466658510486454, "grad_norm": 0.2119140625, "learning_rate": 0.0002855492486008669, "loss": 0.1258, "step": 82718 }, { "epoch": 0.14666939721395522, "grad_norm": 1.25, "learning_rate": 0.00028552392353308576, "loss": 0.237, "step": 82720 }, { "epoch": 0.14667294337926504, "grad_norm": 0.349609375, "learning_rate": 0.00028549860202734976, "loss": 0.1668, "step": 82722 }, { "epoch": 0.14667648954457485, "grad_norm": 0.458984375, "learning_rate": 0.0002854732840837696, "loss": 0.1958, "step": 82724 }, { "epoch": 0.14668003570988467, "grad_norm": 0.2890625, "learning_rate": 0.0002854479697024564, "loss": 0.1607, "step": 82726 }, { "epoch": 0.14668358187519448, "grad_norm": 1.0859375, "learning_rate": 0.0002854226588835206, "loss": 0.1894, "step": 82728 }, { "epoch": 0.1466871280405043, "grad_norm": 0.84375, "learning_rate": 0.00028539735162707284, "loss": 0.1687, "step": 82730 }, { "epoch": 0.1466906742058141, "grad_norm": 1.09375, "learning_rate": 0.00028537204793322384, "loss": 0.2789, "step": 82732 }, { "epoch": 0.14669422037112392, "grad_norm": 1.2421875, "learning_rate": 0.0002853467478020845, "loss": 0.3243, "step": 82734 }, { "epoch": 0.14669776653643374, "grad_norm": 0.34765625, "learning_rate": 0.0002853214512337652, "loss": 0.1999, "step": 82736 }, { "epoch": 0.14670131270174355, "grad_norm": 0.482421875, "learning_rate": 0.0002852961582283767, "loss": 0.2107, "step": 82738 }, { "epoch": 0.14670485886705337, "grad_norm": 0.38671875, "learning_rate": 0.00028527086878602946, "loss": 0.1635, "step": 82740 }, { "epoch": 0.14670840503236318, "grad_norm": 0.34375, "learning_rate": 0.0002852455829068341, "loss": 0.1782, "step": 82742 }, { "epoch": 0.146711951197673, "grad_norm": 0.31640625, "learning_rate": 0.00028522030059090165, "loss": 0.1671, "step": 82744 }, { "epoch": 0.1467154973629828, "grad_norm": 0.283203125, "learning_rate": 0.000285195021838342, "loss": 0.1255, "step": 82746 }, { "epoch": 0.14671904352829263, "grad_norm": 0.3828125, "learning_rate": 0.00028516974664926617, "loss": 0.1491, "step": 82748 }, { "epoch": 0.14672258969360244, "grad_norm": 0.2412109375, "learning_rate": 0.0002851444750237843, "loss": 0.183, "step": 82750 }, { "epoch": 0.14672613585891225, "grad_norm": 0.470703125, "learning_rate": 0.00028511920696200766, "loss": 0.2141, "step": 82752 }, { "epoch": 0.14672968202422207, "grad_norm": 0.609375, "learning_rate": 0.00028509394246404587, "loss": 0.3001, "step": 82754 }, { "epoch": 0.14673322818953188, "grad_norm": 0.3515625, "learning_rate": 0.00028506868153000987, "loss": 0.2255, "step": 82756 }, { "epoch": 0.14673677435484173, "grad_norm": 0.56640625, "learning_rate": 0.00028504342416001024, "loss": 0.2309, "step": 82758 }, { "epoch": 0.14674032052015154, "grad_norm": 1.6171875, "learning_rate": 0.0002850181703541572, "loss": 0.2682, "step": 82760 }, { "epoch": 0.14674386668546135, "grad_norm": 0.5078125, "learning_rate": 0.0002849929201125614, "loss": 0.1773, "step": 82762 }, { "epoch": 0.14674741285077117, "grad_norm": 0.42578125, "learning_rate": 0.0002849676734353329, "loss": 0.1788, "step": 82764 }, { "epoch": 0.14675095901608098, "grad_norm": 0.4609375, "learning_rate": 0.0002849424303225826, "loss": 0.1388, "step": 82766 }, { "epoch": 0.1467545051813908, "grad_norm": 0.455078125, "learning_rate": 0.00028491719077442066, "loss": 0.2416, "step": 82768 }, { "epoch": 0.1467580513467006, "grad_norm": 2.265625, "learning_rate": 0.0002848919547909574, "loss": 0.3736, "step": 82770 }, { "epoch": 0.14676159751201043, "grad_norm": 0.419921875, "learning_rate": 0.0002848667223723031, "loss": 0.1639, "step": 82772 }, { "epoch": 0.14676514367732024, "grad_norm": 0.2890625, "learning_rate": 0.0002848414935185685, "loss": 0.1554, "step": 82774 }, { "epoch": 0.14676868984263006, "grad_norm": 0.392578125, "learning_rate": 0.00028481626822986363, "loss": 0.1817, "step": 82776 }, { "epoch": 0.14677223600793987, "grad_norm": 0.298828125, "learning_rate": 0.0002847910465062989, "loss": 0.1519, "step": 82778 }, { "epoch": 0.14677578217324969, "grad_norm": 0.2138671875, "learning_rate": 0.0002847658283479847, "loss": 0.2103, "step": 82780 }, { "epoch": 0.1467793283385595, "grad_norm": 0.68359375, "learning_rate": 0.000284740613755031, "loss": 0.1621, "step": 82782 }, { "epoch": 0.14678287450386932, "grad_norm": 1.4765625, "learning_rate": 0.00028471540272754863, "loss": 0.257, "step": 82784 }, { "epoch": 0.14678642066917913, "grad_norm": 0.76953125, "learning_rate": 0.00028469019526564717, "loss": 0.1892, "step": 82786 }, { "epoch": 0.14678996683448894, "grad_norm": 0.52734375, "learning_rate": 0.00028466499136943734, "loss": 0.1337, "step": 82788 }, { "epoch": 0.14679351299979876, "grad_norm": 0.265625, "learning_rate": 0.00028463979103902914, "loss": 0.1815, "step": 82790 }, { "epoch": 0.14679705916510857, "grad_norm": 0.578125, "learning_rate": 0.00028461459427453316, "loss": 0.2088, "step": 82792 }, { "epoch": 0.1468006053304184, "grad_norm": 0.26171875, "learning_rate": 0.000284589401076059, "loss": 0.1404, "step": 82794 }, { "epoch": 0.1468041514957282, "grad_norm": 0.349609375, "learning_rate": 0.0002845642114437173, "loss": 0.1681, "step": 82796 }, { "epoch": 0.14680769766103802, "grad_norm": 3.6875, "learning_rate": 0.00028453902537761813, "loss": 0.3261, "step": 82798 }, { "epoch": 0.14681124382634783, "grad_norm": 0.447265625, "learning_rate": 0.0002845138428778715, "loss": 0.1779, "step": 82800 }, { "epoch": 0.14681478999165765, "grad_norm": 0.61328125, "learning_rate": 0.0002844886639445877, "loss": 0.1957, "step": 82802 }, { "epoch": 0.14681833615696746, "grad_norm": 1.609375, "learning_rate": 0.00028446348857787655, "loss": 0.2933, "step": 82804 }, { "epoch": 0.14682188232227728, "grad_norm": 0.236328125, "learning_rate": 0.0002844383167778486, "loss": 0.1884, "step": 82806 }, { "epoch": 0.1468254284875871, "grad_norm": 0.369140625, "learning_rate": 0.0002844131485446136, "loss": 0.1858, "step": 82808 }, { "epoch": 0.1468289746528969, "grad_norm": 0.400390625, "learning_rate": 0.00028438798387828195, "loss": 0.1563, "step": 82810 }, { "epoch": 0.14683252081820672, "grad_norm": 0.71875, "learning_rate": 0.0002843628227789633, "loss": 0.2864, "step": 82812 }, { "epoch": 0.14683606698351653, "grad_norm": 0.625, "learning_rate": 0.00028433766524676795, "loss": 0.203, "step": 82814 }, { "epoch": 0.14683961314882635, "grad_norm": 0.443359375, "learning_rate": 0.0002843125112818059, "loss": 0.1633, "step": 82816 }, { "epoch": 0.14684315931413616, "grad_norm": 0.341796875, "learning_rate": 0.00028428736088418705, "loss": 0.2346, "step": 82818 }, { "epoch": 0.14684670547944598, "grad_norm": 0.294921875, "learning_rate": 0.0002842622140540215, "loss": 0.1317, "step": 82820 }, { "epoch": 0.1468502516447558, "grad_norm": 1.234375, "learning_rate": 0.000284237070791419, "loss": 0.204, "step": 82822 }, { "epoch": 0.1468537978100656, "grad_norm": 0.392578125, "learning_rate": 0.00028421193109649, "loss": 0.1581, "step": 82824 }, { "epoch": 0.14685734397537542, "grad_norm": 0.75, "learning_rate": 0.000284186794969344, "loss": 0.1968, "step": 82826 }, { "epoch": 0.14686089014068524, "grad_norm": 0.56640625, "learning_rate": 0.00028416166241009125, "loss": 0.1503, "step": 82828 }, { "epoch": 0.14686443630599505, "grad_norm": 0.451171875, "learning_rate": 0.00028413653341884115, "loss": 0.1538, "step": 82830 }, { "epoch": 0.14686798247130486, "grad_norm": 0.490234375, "learning_rate": 0.0002841114079957041, "loss": 0.2191, "step": 82832 }, { "epoch": 0.14687152863661468, "grad_norm": 1.109375, "learning_rate": 0.0002840862861407899, "loss": 0.1542, "step": 82834 }, { "epoch": 0.1468750748019245, "grad_norm": 0.28515625, "learning_rate": 0.0002840611678542084, "loss": 0.1645, "step": 82836 }, { "epoch": 0.1468786209672343, "grad_norm": 0.61328125, "learning_rate": 0.0002840360531360692, "loss": 0.1681, "step": 82838 }, { "epoch": 0.14688216713254412, "grad_norm": 0.66015625, "learning_rate": 0.0002840109419864824, "loss": 0.2384, "step": 82840 }, { "epoch": 0.14688571329785394, "grad_norm": 0.6796875, "learning_rate": 0.00028398583440555805, "loss": 0.235, "step": 82842 }, { "epoch": 0.14688925946316375, "grad_norm": 0.294921875, "learning_rate": 0.00028396073039340514, "loss": 0.1549, "step": 82844 }, { "epoch": 0.14689280562847357, "grad_norm": 0.79296875, "learning_rate": 0.0002839356299501344, "loss": 0.1464, "step": 82846 }, { "epoch": 0.1468963517937834, "grad_norm": 0.5390625, "learning_rate": 0.00028391053307585485, "loss": 0.1799, "step": 82848 }, { "epoch": 0.14689989795909322, "grad_norm": 0.265625, "learning_rate": 0.000283885439770677, "loss": 0.2194, "step": 82850 }, { "epoch": 0.14690344412440304, "grad_norm": 0.703125, "learning_rate": 0.0002838603500347098, "loss": 0.1345, "step": 82852 }, { "epoch": 0.14690699028971285, "grad_norm": 2.015625, "learning_rate": 0.0002838352638680636, "loss": 0.194, "step": 82854 }, { "epoch": 0.14691053645502267, "grad_norm": 0.44140625, "learning_rate": 0.00028381018127084773, "loss": 0.1853, "step": 82856 }, { "epoch": 0.14691408262033248, "grad_norm": 0.400390625, "learning_rate": 0.00028378510224317215, "loss": 0.1954, "step": 82858 }, { "epoch": 0.1469176287856423, "grad_norm": 0.392578125, "learning_rate": 0.0002837600267851464, "loss": 0.1743, "step": 82860 }, { "epoch": 0.1469211749509521, "grad_norm": 0.283203125, "learning_rate": 0.00028373495489688, "loss": 0.1887, "step": 82862 }, { "epoch": 0.14692472111626192, "grad_norm": 1.4375, "learning_rate": 0.00028370988657848293, "loss": 0.214, "step": 82864 }, { "epoch": 0.14692826728157174, "grad_norm": 0.2578125, "learning_rate": 0.0002836848218300646, "loss": 0.1895, "step": 82866 }, { "epoch": 0.14693181344688155, "grad_norm": 1.328125, "learning_rate": 0.0002836597606517347, "loss": 0.162, "step": 82868 }, { "epoch": 0.14693535961219137, "grad_norm": 0.2890625, "learning_rate": 0.0002836347030436029, "loss": 0.1867, "step": 82870 }, { "epoch": 0.14693890577750118, "grad_norm": 0.7578125, "learning_rate": 0.0002836096490057784, "loss": 0.2435, "step": 82872 }, { "epoch": 0.146942451942811, "grad_norm": 0.91796875, "learning_rate": 0.0002835845985383716, "loss": 0.1433, "step": 82874 }, { "epoch": 0.1469459981081208, "grad_norm": 0.49609375, "learning_rate": 0.00028355955164149107, "loss": 0.1949, "step": 82876 }, { "epoch": 0.14694954427343063, "grad_norm": 0.31640625, "learning_rate": 0.00028353450831524704, "loss": 0.1342, "step": 82878 }, { "epoch": 0.14695309043874044, "grad_norm": 3.359375, "learning_rate": 0.0002835094685597486, "loss": 0.2583, "step": 82880 }, { "epoch": 0.14695663660405026, "grad_norm": 0.8359375, "learning_rate": 0.0002834844323751058, "loss": 0.2768, "step": 82882 }, { "epoch": 0.14696018276936007, "grad_norm": 0.25390625, "learning_rate": 0.0002834593997614276, "loss": 0.1543, "step": 82884 }, { "epoch": 0.14696372893466988, "grad_norm": 0.7421875, "learning_rate": 0.00028343437071882375, "loss": 0.2239, "step": 82886 }, { "epoch": 0.1469672750999797, "grad_norm": 0.84375, "learning_rate": 0.00028340934524740357, "loss": 0.1913, "step": 82888 }, { "epoch": 0.14697082126528951, "grad_norm": 0.7421875, "learning_rate": 0.0002833843233472765, "loss": 0.3054, "step": 82890 }, { "epoch": 0.14697436743059933, "grad_norm": 0.2470703125, "learning_rate": 0.0002833593050185524, "loss": 0.1819, "step": 82892 }, { "epoch": 0.14697791359590914, "grad_norm": 0.2890625, "learning_rate": 0.00028333429026133997, "loss": 0.1366, "step": 82894 }, { "epoch": 0.14698145976121896, "grad_norm": 0.294921875, "learning_rate": 0.00028330927907574914, "loss": 0.1546, "step": 82896 }, { "epoch": 0.14698500592652877, "grad_norm": 0.26171875, "learning_rate": 0.00028328427146188896, "loss": 0.1871, "step": 82898 }, { "epoch": 0.1469885520918386, "grad_norm": 0.63671875, "learning_rate": 0.00028325926741986935, "loss": 0.1879, "step": 82900 }, { "epoch": 0.1469920982571484, "grad_norm": 0.359375, "learning_rate": 0.0002832342669497989, "loss": 0.2078, "step": 82902 }, { "epoch": 0.14699564442245822, "grad_norm": 0.298828125, "learning_rate": 0.00028320927005178755, "loss": 0.1757, "step": 82904 }, { "epoch": 0.14699919058776803, "grad_norm": 0.361328125, "learning_rate": 0.00028318427672594437, "loss": 0.1744, "step": 82906 }, { "epoch": 0.14700273675307785, "grad_norm": 0.33203125, "learning_rate": 0.0002831592869723787, "loss": 0.1568, "step": 82908 }, { "epoch": 0.14700628291838766, "grad_norm": 0.392578125, "learning_rate": 0.0002831343007911998, "loss": 0.1622, "step": 82910 }, { "epoch": 0.14700982908369747, "grad_norm": 0.39453125, "learning_rate": 0.0002831093181825168, "loss": 0.202, "step": 82912 }, { "epoch": 0.1470133752490073, "grad_norm": 0.208984375, "learning_rate": 0.0002830843391464394, "loss": 0.1847, "step": 82914 }, { "epoch": 0.1470169214143171, "grad_norm": 0.259765625, "learning_rate": 0.00028305936368307655, "loss": 0.138, "step": 82916 }, { "epoch": 0.14702046757962692, "grad_norm": 0.439453125, "learning_rate": 0.0002830343917925374, "loss": 0.1485, "step": 82918 }, { "epoch": 0.14702401374493673, "grad_norm": 0.31640625, "learning_rate": 0.0002830094234749313, "loss": 0.238, "step": 82920 }, { "epoch": 0.14702755991024655, "grad_norm": 0.8671875, "learning_rate": 0.0002829844587303675, "loss": 0.2147, "step": 82922 }, { "epoch": 0.14703110607555636, "grad_norm": 0.96875, "learning_rate": 0.0002829594975589551, "loss": 0.2198, "step": 82924 }, { "epoch": 0.14703465224086618, "grad_norm": 0.423828125, "learning_rate": 0.00028293453996080316, "loss": 0.1888, "step": 82926 }, { "epoch": 0.147038198406176, "grad_norm": 1.03125, "learning_rate": 0.00028290958593602103, "loss": 0.1795, "step": 82928 }, { "epoch": 0.1470417445714858, "grad_norm": 0.2734375, "learning_rate": 0.0002828846354847176, "loss": 0.1569, "step": 82930 }, { "epoch": 0.14704529073679562, "grad_norm": 0.26171875, "learning_rate": 0.0002828596886070024, "loss": 0.2065, "step": 82932 }, { "epoch": 0.14704883690210543, "grad_norm": 0.466796875, "learning_rate": 0.0002828347453029839, "loss": 0.148, "step": 82934 }, { "epoch": 0.14705238306741525, "grad_norm": 0.2470703125, "learning_rate": 0.00028280980557277187, "loss": 0.2003, "step": 82936 }, { "epoch": 0.1470559292327251, "grad_norm": 0.734375, "learning_rate": 0.0002827848694164748, "loss": 0.2046, "step": 82938 }, { "epoch": 0.1470594753980349, "grad_norm": 0.390625, "learning_rate": 0.00028275993683420244, "loss": 0.1999, "step": 82940 }, { "epoch": 0.14706302156334472, "grad_norm": 0.703125, "learning_rate": 0.000282735007826063, "loss": 0.1962, "step": 82942 }, { "epoch": 0.14706656772865453, "grad_norm": 0.455078125, "learning_rate": 0.0002827100823921661, "loss": 0.1961, "step": 82944 }, { "epoch": 0.14707011389396435, "grad_norm": 0.3203125, "learning_rate": 0.00028268516053262053, "loss": 0.161, "step": 82946 }, { "epoch": 0.14707366005927416, "grad_norm": 0.26953125, "learning_rate": 0.0002826602422475353, "loss": 0.1829, "step": 82948 }, { "epoch": 0.14707720622458398, "grad_norm": 0.423828125, "learning_rate": 0.0002826353275370194, "loss": 0.2458, "step": 82950 }, { "epoch": 0.1470807523898938, "grad_norm": 0.26953125, "learning_rate": 0.0002826104164011817, "loss": 0.3265, "step": 82952 }, { "epoch": 0.1470842985552036, "grad_norm": 0.265625, "learning_rate": 0.0002825855088401314, "loss": 0.1548, "step": 82954 }, { "epoch": 0.14708784472051342, "grad_norm": 0.439453125, "learning_rate": 0.00028256060485397724, "loss": 0.2001, "step": 82956 }, { "epoch": 0.14709139088582324, "grad_norm": 0.5390625, "learning_rate": 0.0002825357044428282, "loss": 0.1778, "step": 82958 }, { "epoch": 0.14709493705113305, "grad_norm": 0.53125, "learning_rate": 0.00028251080760679294, "loss": 0.168, "step": 82960 }, { "epoch": 0.14709848321644287, "grad_norm": 0.52734375, "learning_rate": 0.00028248591434598073, "loss": 0.1754, "step": 82962 }, { "epoch": 0.14710202938175268, "grad_norm": 0.51171875, "learning_rate": 0.00028246102466050033, "loss": 0.1795, "step": 82964 }, { "epoch": 0.1471055755470625, "grad_norm": 1.671875, "learning_rate": 0.0002824361385504604, "loss": 0.157, "step": 82966 }, { "epoch": 0.1471091217123723, "grad_norm": 0.486328125, "learning_rate": 0.0002824112560159701, "loss": 0.1462, "step": 82968 }, { "epoch": 0.14711266787768212, "grad_norm": 0.333984375, "learning_rate": 0.0002823863770571377, "loss": 0.1902, "step": 82970 }, { "epoch": 0.14711621404299194, "grad_norm": 0.37890625, "learning_rate": 0.0002823615016740726, "loss": 0.1946, "step": 82972 }, { "epoch": 0.14711976020830175, "grad_norm": 0.84765625, "learning_rate": 0.0002823366298668833, "loss": 0.1811, "step": 82974 }, { "epoch": 0.14712330637361157, "grad_norm": 0.6953125, "learning_rate": 0.0002823117616356787, "loss": 0.2244, "step": 82976 }, { "epoch": 0.14712685253892138, "grad_norm": 0.32421875, "learning_rate": 0.00028228689698056727, "loss": 0.217, "step": 82978 }, { "epoch": 0.1471303987042312, "grad_norm": 0.494140625, "learning_rate": 0.0002822620359016581, "loss": 0.1653, "step": 82980 }, { "epoch": 0.147133944869541, "grad_norm": 0.166015625, "learning_rate": 0.0002822371783990598, "loss": 0.23, "step": 82982 }, { "epoch": 0.14713749103485083, "grad_norm": 1.2578125, "learning_rate": 0.0002822123244728812, "loss": 0.2091, "step": 82984 }, { "epoch": 0.14714103720016064, "grad_norm": 5.65625, "learning_rate": 0.00028218747412323073, "loss": 0.3501, "step": 82986 }, { "epoch": 0.14714458336547045, "grad_norm": 0.326171875, "learning_rate": 0.0002821626273502171, "loss": 0.2589, "step": 82988 }, { "epoch": 0.14714812953078027, "grad_norm": 0.3046875, "learning_rate": 0.0002821377841539494, "loss": 0.2119, "step": 82990 }, { "epoch": 0.14715167569609008, "grad_norm": 0.46484375, "learning_rate": 0.00028211294453453554, "loss": 0.1876, "step": 82992 }, { "epoch": 0.1471552218613999, "grad_norm": 0.7890625, "learning_rate": 0.00028208810849208484, "loss": 0.1757, "step": 82994 }, { "epoch": 0.1471587680267097, "grad_norm": 0.427734375, "learning_rate": 0.00028206327602670544, "loss": 0.2212, "step": 82996 }, { "epoch": 0.14716231419201953, "grad_norm": 1.1796875, "learning_rate": 0.00028203844713850664, "loss": 0.2136, "step": 82998 }, { "epoch": 0.14716586035732934, "grad_norm": 9.375, "learning_rate": 0.00028201362182759605, "loss": 0.3367, "step": 83000 }, { "epoch": 0.14716940652263916, "grad_norm": 0.2373046875, "learning_rate": 0.00028198880009408294, "loss": 0.1639, "step": 83002 }, { "epoch": 0.14717295268794897, "grad_norm": 0.2451171875, "learning_rate": 0.00028196398193807565, "loss": 0.1305, "step": 83004 }, { "epoch": 0.14717649885325879, "grad_norm": 0.80078125, "learning_rate": 0.0002819391673596828, "loss": 0.1763, "step": 83006 }, { "epoch": 0.1471800450185686, "grad_norm": 0.50390625, "learning_rate": 0.0002819143563590127, "loss": 0.1701, "step": 83008 }, { "epoch": 0.14718359118387841, "grad_norm": 0.46484375, "learning_rate": 0.0002818895489361739, "loss": 0.1763, "step": 83010 }, { "epoch": 0.14718713734918823, "grad_norm": 0.30078125, "learning_rate": 0.00028186474509127524, "loss": 0.1911, "step": 83012 }, { "epoch": 0.14719068351449804, "grad_norm": 0.9453125, "learning_rate": 0.0002818399448244249, "loss": 0.1987, "step": 83014 }, { "epoch": 0.14719422967980786, "grad_norm": 3.21875, "learning_rate": 0.00028181514813573137, "loss": 0.2703, "step": 83016 }, { "epoch": 0.14719777584511767, "grad_norm": 0.515625, "learning_rate": 0.000281790355025303, "loss": 0.1908, "step": 83018 }, { "epoch": 0.1472013220104275, "grad_norm": 0.21484375, "learning_rate": 0.00028176556549324854, "loss": 0.1732, "step": 83020 }, { "epoch": 0.1472048681757373, "grad_norm": 0.60546875, "learning_rate": 0.00028174077953967613, "loss": 0.1685, "step": 83022 }, { "epoch": 0.14720841434104712, "grad_norm": 0.1669921875, "learning_rate": 0.00028171599716469433, "loss": 0.1746, "step": 83024 }, { "epoch": 0.14721196050635693, "grad_norm": 0.828125, "learning_rate": 0.0002816912183684114, "loss": 0.2404, "step": 83026 }, { "epoch": 0.14721550667166675, "grad_norm": 0.4609375, "learning_rate": 0.00028166644315093557, "loss": 0.2149, "step": 83028 }, { "epoch": 0.1472190528369766, "grad_norm": 0.275390625, "learning_rate": 0.00028164167151237566, "loss": 0.1273, "step": 83030 }, { "epoch": 0.1472225990022864, "grad_norm": 0.310546875, "learning_rate": 0.00028161690345283945, "loss": 0.1242, "step": 83032 }, { "epoch": 0.14722614516759622, "grad_norm": 0.71875, "learning_rate": 0.0002815921389724357, "loss": 0.1661, "step": 83034 }, { "epoch": 0.14722969133290603, "grad_norm": 0.244140625, "learning_rate": 0.00028156737807127257, "loss": 0.2086, "step": 83036 }, { "epoch": 0.14723323749821585, "grad_norm": 0.52734375, "learning_rate": 0.0002815426207494583, "loss": 0.2017, "step": 83038 }, { "epoch": 0.14723678366352566, "grad_norm": 0.609375, "learning_rate": 0.0002815178670071012, "loss": 0.1964, "step": 83040 }, { "epoch": 0.14724032982883548, "grad_norm": 0.267578125, "learning_rate": 0.00028149311684430936, "loss": 0.1408, "step": 83042 }, { "epoch": 0.1472438759941453, "grad_norm": 0.421875, "learning_rate": 0.0002814683702611913, "loss": 0.2363, "step": 83044 }, { "epoch": 0.1472474221594551, "grad_norm": 0.2119140625, "learning_rate": 0.0002814436272578552, "loss": 0.1248, "step": 83046 }, { "epoch": 0.14725096832476492, "grad_norm": 0.240234375, "learning_rate": 0.0002814188878344091, "loss": 0.1853, "step": 83048 }, { "epoch": 0.14725451449007473, "grad_norm": 0.48046875, "learning_rate": 0.0002813941519909612, "loss": 0.1865, "step": 83050 }, { "epoch": 0.14725806065538455, "grad_norm": 0.330078125, "learning_rate": 0.00028136941972761997, "loss": 0.16, "step": 83052 }, { "epoch": 0.14726160682069436, "grad_norm": 0.294921875, "learning_rate": 0.00028134469104449327, "loss": 0.1675, "step": 83054 }, { "epoch": 0.14726515298600418, "grad_norm": 0.734375, "learning_rate": 0.0002813199659416895, "loss": 0.1724, "step": 83056 }, { "epoch": 0.147268699151314, "grad_norm": 0.4453125, "learning_rate": 0.00028129524441931653, "loss": 0.1497, "step": 83058 }, { "epoch": 0.1472722453166238, "grad_norm": 0.416015625, "learning_rate": 0.00028127052647748245, "loss": 0.1409, "step": 83060 }, { "epoch": 0.14727579148193362, "grad_norm": 0.306640625, "learning_rate": 0.00028124581211629577, "loss": 0.1354, "step": 83062 }, { "epoch": 0.14727933764724344, "grad_norm": 0.46875, "learning_rate": 0.00028122110133586424, "loss": 0.1641, "step": 83064 }, { "epoch": 0.14728288381255325, "grad_norm": 0.48046875, "learning_rate": 0.0002811963941362961, "loss": 0.2159, "step": 83066 }, { "epoch": 0.14728642997786306, "grad_norm": 0.1962890625, "learning_rate": 0.00028117169051769905, "loss": 0.1697, "step": 83068 }, { "epoch": 0.14728997614317288, "grad_norm": 0.28515625, "learning_rate": 0.0002811469904801816, "loss": 0.2448, "step": 83070 }, { "epoch": 0.1472935223084827, "grad_norm": 0.466796875, "learning_rate": 0.00028112229402385157, "loss": 0.1709, "step": 83072 }, { "epoch": 0.1472970684737925, "grad_norm": 0.365234375, "learning_rate": 0.00028109760114881693, "loss": 0.1926, "step": 83074 }, { "epoch": 0.14730061463910232, "grad_norm": 0.2099609375, "learning_rate": 0.00028107291185518576, "loss": 0.1634, "step": 83076 }, { "epoch": 0.14730416080441214, "grad_norm": 0.40234375, "learning_rate": 0.0002810482261430658, "loss": 0.1791, "step": 83078 }, { "epoch": 0.14730770696972195, "grad_norm": 0.83203125, "learning_rate": 0.0002810235440125655, "loss": 0.227, "step": 83080 }, { "epoch": 0.14731125313503177, "grad_norm": 0.671875, "learning_rate": 0.00028099886546379223, "loss": 0.2327, "step": 83082 }, { "epoch": 0.14731479930034158, "grad_norm": 0.283203125, "learning_rate": 0.0002809741904968543, "loss": 0.165, "step": 83084 }, { "epoch": 0.1473183454656514, "grad_norm": 0.9296875, "learning_rate": 0.00028094951911185936, "loss": 0.2086, "step": 83086 }, { "epoch": 0.1473218916309612, "grad_norm": 0.2431640625, "learning_rate": 0.00028092485130891577, "loss": 0.1579, "step": 83088 }, { "epoch": 0.14732543779627102, "grad_norm": 0.326171875, "learning_rate": 0.0002809001870881308, "loss": 0.1768, "step": 83090 }, { "epoch": 0.14732898396158084, "grad_norm": 0.515625, "learning_rate": 0.0002808755264496128, "loss": 0.2022, "step": 83092 }, { "epoch": 0.14733253012689065, "grad_norm": 0.9296875, "learning_rate": 0.00028085086939346946, "loss": 0.2249, "step": 83094 }, { "epoch": 0.14733607629220047, "grad_norm": 0.72265625, "learning_rate": 0.0002808262159198085, "loss": 0.2204, "step": 83096 }, { "epoch": 0.14733962245751028, "grad_norm": 0.3828125, "learning_rate": 0.00028080156602873783, "loss": 0.3556, "step": 83098 }, { "epoch": 0.1473431686228201, "grad_norm": 0.515625, "learning_rate": 0.0002807769197203653, "loss": 0.2058, "step": 83100 }, { "epoch": 0.1473467147881299, "grad_norm": 0.34765625, "learning_rate": 0.0002807522769947987, "loss": 0.1349, "step": 83102 }, { "epoch": 0.14735026095343973, "grad_norm": 0.484375, "learning_rate": 0.0002807276378521457, "loss": 0.2024, "step": 83104 }, { "epoch": 0.14735380711874954, "grad_norm": 0.294921875, "learning_rate": 0.0002807030022925142, "loss": 0.1384, "step": 83106 }, { "epoch": 0.14735735328405936, "grad_norm": 0.4453125, "learning_rate": 0.0002806783703160117, "loss": 0.1916, "step": 83108 }, { "epoch": 0.14736089944936917, "grad_norm": 0.314453125, "learning_rate": 0.0002806537419227463, "loss": 0.1629, "step": 83110 }, { "epoch": 0.14736444561467898, "grad_norm": 3.1875, "learning_rate": 0.0002806291171128254, "loss": 0.2019, "step": 83112 }, { "epoch": 0.1473679917799888, "grad_norm": 0.337890625, "learning_rate": 0.00028060449588635677, "loss": 0.1753, "step": 83114 }, { "epoch": 0.14737153794529861, "grad_norm": 0.326171875, "learning_rate": 0.0002805798782434482, "loss": 0.1395, "step": 83116 }, { "epoch": 0.14737508411060843, "grad_norm": 2.640625, "learning_rate": 0.00028055526418420705, "loss": 0.2321, "step": 83118 }, { "epoch": 0.14737863027591827, "grad_norm": 0.6171875, "learning_rate": 0.0002805306537087415, "loss": 0.1704, "step": 83120 }, { "epoch": 0.14738217644122809, "grad_norm": 0.51953125, "learning_rate": 0.0002805060468171587, "loss": 0.1968, "step": 83122 }, { "epoch": 0.1473857226065379, "grad_norm": 0.38671875, "learning_rate": 0.00028048144350956643, "loss": 0.1999, "step": 83124 }, { "epoch": 0.14738926877184771, "grad_norm": 0.6328125, "learning_rate": 0.0002804568437860722, "loss": 0.1878, "step": 83126 }, { "epoch": 0.14739281493715753, "grad_norm": 0.26953125, "learning_rate": 0.00028043224764678394, "loss": 0.1793, "step": 83128 }, { "epoch": 0.14739636110246734, "grad_norm": 0.79296875, "learning_rate": 0.0002804076550918087, "loss": 0.2521, "step": 83130 }, { "epoch": 0.14739990726777716, "grad_norm": 0.255859375, "learning_rate": 0.0002803830661212545, "loss": 0.1541, "step": 83132 }, { "epoch": 0.14740345343308697, "grad_norm": 0.400390625, "learning_rate": 0.0002803584807352286, "loss": 0.1723, "step": 83134 }, { "epoch": 0.1474069995983968, "grad_norm": 0.490234375, "learning_rate": 0.00028033389893383873, "loss": 0.1915, "step": 83136 }, { "epoch": 0.1474105457637066, "grad_norm": 0.515625, "learning_rate": 0.0002803093207171922, "loss": 0.1721, "step": 83138 }, { "epoch": 0.14741409192901642, "grad_norm": 0.3984375, "learning_rate": 0.00028028474608539627, "loss": 0.1508, "step": 83140 }, { "epoch": 0.14741763809432623, "grad_norm": 1.171875, "learning_rate": 0.000280260175038559, "loss": 0.432, "step": 83142 }, { "epoch": 0.14742118425963605, "grad_norm": 0.49609375, "learning_rate": 0.0002802356075767873, "loss": 0.1971, "step": 83144 }, { "epoch": 0.14742473042494586, "grad_norm": 0.51171875, "learning_rate": 0.0002802110437001894, "loss": 0.1654, "step": 83146 }, { "epoch": 0.14742827659025567, "grad_norm": 0.6171875, "learning_rate": 0.0002801864834088718, "loss": 0.1691, "step": 83148 }, { "epoch": 0.1474318227555655, "grad_norm": 0.359375, "learning_rate": 0.0002801619267029424, "loss": 0.1082, "step": 83150 }, { "epoch": 0.1474353689208753, "grad_norm": 0.2275390625, "learning_rate": 0.0002801373735825086, "loss": 0.1294, "step": 83152 }, { "epoch": 0.14743891508618512, "grad_norm": 0.5703125, "learning_rate": 0.0002801128240476777, "loss": 0.1529, "step": 83154 }, { "epoch": 0.14744246125149493, "grad_norm": 0.337890625, "learning_rate": 0.00028008827809855694, "loss": 0.1979, "step": 83156 }, { "epoch": 0.14744600741680475, "grad_norm": 0.408203125, "learning_rate": 0.0002800637357352537, "loss": 0.2026, "step": 83158 }, { "epoch": 0.14744955358211456, "grad_norm": 1.2265625, "learning_rate": 0.0002800391969578756, "loss": 0.2005, "step": 83160 }, { "epoch": 0.14745309974742438, "grad_norm": 0.37109375, "learning_rate": 0.0002800146617665297, "loss": 0.1878, "step": 83162 }, { "epoch": 0.1474566459127342, "grad_norm": 0.23828125, "learning_rate": 0.00027999013016132343, "loss": 0.1552, "step": 83164 }, { "epoch": 0.147460192078044, "grad_norm": 0.7734375, "learning_rate": 0.00027996560214236377, "loss": 0.1905, "step": 83166 }, { "epoch": 0.14746373824335382, "grad_norm": 0.2490234375, "learning_rate": 0.00027994107770975846, "loss": 0.1777, "step": 83168 }, { "epoch": 0.14746728440866363, "grad_norm": 0.484375, "learning_rate": 0.00027991655686361453, "loss": 0.3262, "step": 83170 }, { "epoch": 0.14747083057397345, "grad_norm": 0.7734375, "learning_rate": 0.0002798920396040392, "loss": 0.17, "step": 83172 }, { "epoch": 0.14747437673928326, "grad_norm": 1.5859375, "learning_rate": 0.0002798675259311397, "loss": 0.24, "step": 83174 }, { "epoch": 0.14747792290459308, "grad_norm": 0.875, "learning_rate": 0.0002798430158450231, "loss": 0.1819, "step": 83176 }, { "epoch": 0.1474814690699029, "grad_norm": 0.515625, "learning_rate": 0.00027981850934579713, "loss": 0.1426, "step": 83178 }, { "epoch": 0.1474850152352127, "grad_norm": 1.390625, "learning_rate": 0.0002797940064335682, "loss": 0.2989, "step": 83180 }, { "epoch": 0.14748856140052252, "grad_norm": 0.1787109375, "learning_rate": 0.00027976950710844397, "loss": 0.1042, "step": 83182 }, { "epoch": 0.14749210756583234, "grad_norm": 1.90625, "learning_rate": 0.0002797450113705314, "loss": 0.36, "step": 83184 }, { "epoch": 0.14749565373114215, "grad_norm": 0.447265625, "learning_rate": 0.00027972051921993794, "loss": 0.1342, "step": 83186 }, { "epoch": 0.14749919989645197, "grad_norm": 3.625, "learning_rate": 0.00027969603065677016, "loss": 0.1849, "step": 83188 }, { "epoch": 0.14750274606176178, "grad_norm": 0.5859375, "learning_rate": 0.0002796715456811357, "loss": 0.1663, "step": 83190 }, { "epoch": 0.1475062922270716, "grad_norm": 0.341796875, "learning_rate": 0.00027964706429314123, "loss": 0.1388, "step": 83192 }, { "epoch": 0.1475098383923814, "grad_norm": 0.1943359375, "learning_rate": 0.000279622586492894, "loss": 0.2111, "step": 83194 }, { "epoch": 0.14751338455769122, "grad_norm": 0.271484375, "learning_rate": 0.0002795981122805012, "loss": 0.1507, "step": 83196 }, { "epoch": 0.14751693072300104, "grad_norm": 0.279296875, "learning_rate": 0.0002795736416560694, "loss": 0.1849, "step": 83198 }, { "epoch": 0.14752047688831085, "grad_norm": 0.55859375, "learning_rate": 0.0002795491746197062, "loss": 0.1927, "step": 83200 }, { "epoch": 0.14752402305362067, "grad_norm": 0.2236328125, "learning_rate": 0.0002795247111715183, "loss": 0.1905, "step": 83202 }, { "epoch": 0.14752756921893048, "grad_norm": 1.5546875, "learning_rate": 0.00027950025131161265, "loss": 0.3942, "step": 83204 }, { "epoch": 0.1475311153842403, "grad_norm": 0.345703125, "learning_rate": 0.00027947579504009626, "loss": 0.2331, "step": 83206 }, { "epoch": 0.1475346615495501, "grad_norm": 0.765625, "learning_rate": 0.0002794513423570761, "loss": 0.2018, "step": 83208 }, { "epoch": 0.14753820771485995, "grad_norm": 0.421875, "learning_rate": 0.0002794268932626593, "loss": 0.1802, "step": 83210 }, { "epoch": 0.14754175388016977, "grad_norm": 0.478515625, "learning_rate": 0.0002794024477569526, "loss": 0.1954, "step": 83212 }, { "epoch": 0.14754530004547958, "grad_norm": 0.408203125, "learning_rate": 0.0002793780058400629, "loss": 0.1872, "step": 83214 }, { "epoch": 0.1475488462107894, "grad_norm": 0.388671875, "learning_rate": 0.00027935356751209703, "loss": 0.1783, "step": 83216 }, { "epoch": 0.1475523923760992, "grad_norm": 0.435546875, "learning_rate": 0.0002793291327731622, "loss": 0.1793, "step": 83218 }, { "epoch": 0.14755593854140903, "grad_norm": 0.3515625, "learning_rate": 0.0002793047016233648, "loss": 0.173, "step": 83220 }, { "epoch": 0.14755948470671884, "grad_norm": 1.734375, "learning_rate": 0.00027928027406281207, "loss": 0.1652, "step": 83222 }, { "epoch": 0.14756303087202866, "grad_norm": 0.82421875, "learning_rate": 0.00027925585009161067, "loss": 0.4001, "step": 83224 }, { "epoch": 0.14756657703733847, "grad_norm": 0.33203125, "learning_rate": 0.0002792314297098672, "loss": 0.192, "step": 83226 }, { "epoch": 0.14757012320264828, "grad_norm": 0.640625, "learning_rate": 0.0002792070129176891, "loss": 0.2867, "step": 83228 }, { "epoch": 0.1475736693679581, "grad_norm": 0.67578125, "learning_rate": 0.0002791825997151824, "loss": 0.1942, "step": 83230 }, { "epoch": 0.1475772155332679, "grad_norm": 0.4453125, "learning_rate": 0.0002791581901024544, "loss": 0.17, "step": 83232 }, { "epoch": 0.14758076169857773, "grad_norm": 0.259765625, "learning_rate": 0.0002791337840796116, "loss": 0.2051, "step": 83234 }, { "epoch": 0.14758430786388754, "grad_norm": 0.2177734375, "learning_rate": 0.00027910938164676095, "loss": 0.2144, "step": 83236 }, { "epoch": 0.14758785402919736, "grad_norm": 0.55078125, "learning_rate": 0.0002790849828040088, "loss": 0.2483, "step": 83238 }, { "epoch": 0.14759140019450717, "grad_norm": 0.419921875, "learning_rate": 0.00027906058755146237, "loss": 0.2449, "step": 83240 }, { "epoch": 0.147594946359817, "grad_norm": 0.24609375, "learning_rate": 0.00027903619588922794, "loss": 0.145, "step": 83242 }, { "epoch": 0.1475984925251268, "grad_norm": 0.470703125, "learning_rate": 0.0002790118078174123, "loss": 0.1759, "step": 83244 }, { "epoch": 0.14760203869043662, "grad_norm": 0.94140625, "learning_rate": 0.00027898742333612214, "loss": 0.2854, "step": 83246 }, { "epoch": 0.14760558485574643, "grad_norm": 0.93359375, "learning_rate": 0.000278963042445464, "loss": 0.1868, "step": 83248 }, { "epoch": 0.14760913102105624, "grad_norm": 0.3203125, "learning_rate": 0.0002789386651455447, "loss": 0.1681, "step": 83250 }, { "epoch": 0.14761267718636606, "grad_norm": 0.361328125, "learning_rate": 0.0002789142914364708, "loss": 0.2302, "step": 83252 }, { "epoch": 0.14761622335167587, "grad_norm": 0.494140625, "learning_rate": 0.0002788899213183488, "loss": 0.1801, "step": 83254 }, { "epoch": 0.1476197695169857, "grad_norm": 0.1533203125, "learning_rate": 0.0002788655547912852, "loss": 0.1972, "step": 83256 }, { "epoch": 0.1476233156822955, "grad_norm": 0.220703125, "learning_rate": 0.0002788411918553868, "loss": 0.1478, "step": 83258 }, { "epoch": 0.14762686184760532, "grad_norm": 0.49609375, "learning_rate": 0.0002788168325107601, "loss": 0.169, "step": 83260 }, { "epoch": 0.14763040801291513, "grad_norm": 0.91015625, "learning_rate": 0.0002787924767575115, "loss": 0.1728, "step": 83262 }, { "epoch": 0.14763395417822495, "grad_norm": 0.80078125, "learning_rate": 0.0002787681245957477, "loss": 0.1488, "step": 83264 }, { "epoch": 0.14763750034353476, "grad_norm": 0.435546875, "learning_rate": 0.00027874377602557477, "loss": 0.1704, "step": 83266 }, { "epoch": 0.14764104650884458, "grad_norm": 0.47265625, "learning_rate": 0.00027871943104709995, "loss": 0.1544, "step": 83268 }, { "epoch": 0.1476445926741544, "grad_norm": 3.15625, "learning_rate": 0.00027869508966042896, "loss": 0.3704, "step": 83270 }, { "epoch": 0.1476481388394642, "grad_norm": 0.2890625, "learning_rate": 0.00027867075186566875, "loss": 0.139, "step": 83272 }, { "epoch": 0.14765168500477402, "grad_norm": 0.625, "learning_rate": 0.0002786464176629254, "loss": 0.1226, "step": 83274 }, { "epoch": 0.14765523117008383, "grad_norm": 0.376953125, "learning_rate": 0.00027862208705230586, "loss": 0.1419, "step": 83276 }, { "epoch": 0.14765877733539365, "grad_norm": 2.109375, "learning_rate": 0.00027859776003391585, "loss": 0.2, "step": 83278 }, { "epoch": 0.14766232350070346, "grad_norm": 0.3125, "learning_rate": 0.00027857343660786234, "loss": 0.1383, "step": 83280 }, { "epoch": 0.14766586966601328, "grad_norm": 0.54296875, "learning_rate": 0.00027854911677425135, "loss": 0.1589, "step": 83282 }, { "epoch": 0.1476694158313231, "grad_norm": 0.283203125, "learning_rate": 0.00027852480053318953, "loss": 0.1754, "step": 83284 }, { "epoch": 0.1476729619966329, "grad_norm": 0.734375, "learning_rate": 0.00027850048788478297, "loss": 0.1741, "step": 83286 }, { "epoch": 0.14767650816194272, "grad_norm": 1.3125, "learning_rate": 0.00027847617882913796, "loss": 0.2883, "step": 83288 }, { "epoch": 0.14768005432725254, "grad_norm": 1.4375, "learning_rate": 0.00027845187336636104, "loss": 0.2685, "step": 83290 }, { "epoch": 0.14768360049256235, "grad_norm": 0.95703125, "learning_rate": 0.0002784275714965584, "loss": 0.147, "step": 83292 }, { "epoch": 0.14768714665787216, "grad_norm": 1.3515625, "learning_rate": 0.00027840327321983644, "loss": 0.2003, "step": 83294 }, { "epoch": 0.14769069282318198, "grad_norm": 0.462890625, "learning_rate": 0.0002783789785363011, "loss": 0.1854, "step": 83296 }, { "epoch": 0.1476942389884918, "grad_norm": 0.2255859375, "learning_rate": 0.00027835468744605903, "loss": 0.2188, "step": 83298 }, { "epoch": 0.1476977851538016, "grad_norm": 0.400390625, "learning_rate": 0.00027833039994921635, "loss": 0.1826, "step": 83300 }, { "epoch": 0.14770133131911145, "grad_norm": 0.7265625, "learning_rate": 0.00027830611604587915, "loss": 0.1743, "step": 83302 }, { "epoch": 0.14770487748442127, "grad_norm": 2.703125, "learning_rate": 0.00027828183573615364, "loss": 0.3596, "step": 83304 }, { "epoch": 0.14770842364973108, "grad_norm": 0.90234375, "learning_rate": 0.000278257559020146, "loss": 0.1897, "step": 83306 }, { "epoch": 0.1477119698150409, "grad_norm": 0.1875, "learning_rate": 0.00027823328589796266, "loss": 0.2031, "step": 83308 }, { "epoch": 0.1477155159803507, "grad_norm": 0.1787109375, "learning_rate": 0.0002782090163697095, "loss": 0.1398, "step": 83310 }, { "epoch": 0.14771906214566052, "grad_norm": 0.578125, "learning_rate": 0.0002781847504354928, "loss": 0.1574, "step": 83312 }, { "epoch": 0.14772260831097034, "grad_norm": 2.578125, "learning_rate": 0.0002781604880954184, "loss": 0.2049, "step": 83314 }, { "epoch": 0.14772615447628015, "grad_norm": 0.5625, "learning_rate": 0.00027813622934959293, "loss": 0.1348, "step": 83316 }, { "epoch": 0.14772970064158997, "grad_norm": 0.189453125, "learning_rate": 0.0002781119741981221, "loss": 0.1514, "step": 83318 }, { "epoch": 0.14773324680689978, "grad_norm": 0.35546875, "learning_rate": 0.00027808772264111215, "loss": 0.2111, "step": 83320 }, { "epoch": 0.1477367929722096, "grad_norm": 0.259765625, "learning_rate": 0.000278063474678669, "loss": 0.153, "step": 83322 }, { "epoch": 0.1477403391375194, "grad_norm": 0.259765625, "learning_rate": 0.0002780392303108987, "loss": 0.1964, "step": 83324 }, { "epoch": 0.14774388530282923, "grad_norm": 0.287109375, "learning_rate": 0.0002780149895379076, "loss": 0.1752, "step": 83326 }, { "epoch": 0.14774743146813904, "grad_norm": 0.5234375, "learning_rate": 0.0002779907523598012, "loss": 0.1278, "step": 83328 }, { "epoch": 0.14775097763344885, "grad_norm": 0.8984375, "learning_rate": 0.0002779665187766859, "loss": 0.2421, "step": 83330 }, { "epoch": 0.14775452379875867, "grad_norm": 0.333984375, "learning_rate": 0.00027794228878866745, "loss": 0.1796, "step": 83332 }, { "epoch": 0.14775806996406848, "grad_norm": 0.287109375, "learning_rate": 0.00027791806239585215, "loss": 0.1497, "step": 83334 }, { "epoch": 0.1477616161293783, "grad_norm": 0.50390625, "learning_rate": 0.00027789383959834547, "loss": 0.1723, "step": 83336 }, { "epoch": 0.1477651622946881, "grad_norm": 0.2890625, "learning_rate": 0.0002778696203962537, "loss": 0.2124, "step": 83338 }, { "epoch": 0.14776870845999793, "grad_norm": 0.34375, "learning_rate": 0.00027784540478968276, "loss": 0.2108, "step": 83340 }, { "epoch": 0.14777225462530774, "grad_norm": 0.5078125, "learning_rate": 0.00027782119277873843, "loss": 0.2162, "step": 83342 }, { "epoch": 0.14777580079061756, "grad_norm": 0.2265625, "learning_rate": 0.0002777969843635266, "loss": 0.1508, "step": 83344 }, { "epoch": 0.14777934695592737, "grad_norm": 0.4375, "learning_rate": 0.000277772779544153, "loss": 0.2095, "step": 83346 }, { "epoch": 0.14778289312123719, "grad_norm": 0.408203125, "learning_rate": 0.00027774857832072375, "loss": 0.1571, "step": 83348 }, { "epoch": 0.147786439286547, "grad_norm": 0.53515625, "learning_rate": 0.00027772438069334466, "loss": 0.1656, "step": 83350 }, { "epoch": 0.14778998545185681, "grad_norm": 0.6484375, "learning_rate": 0.0002777001866621215, "loss": 0.1737, "step": 83352 }, { "epoch": 0.14779353161716663, "grad_norm": 0.60546875, "learning_rate": 0.00027767599622715997, "loss": 0.2771, "step": 83354 }, { "epoch": 0.14779707778247644, "grad_norm": 0.6171875, "learning_rate": 0.0002776518093885661, "loss": 0.1645, "step": 83356 }, { "epoch": 0.14780062394778626, "grad_norm": 0.30859375, "learning_rate": 0.0002776276261464455, "loss": 0.2048, "step": 83358 }, { "epoch": 0.14780417011309607, "grad_norm": 0.39453125, "learning_rate": 0.000277603446500904, "loss": 0.1907, "step": 83360 }, { "epoch": 0.1478077162784059, "grad_norm": 1.46875, "learning_rate": 0.0002775792704520472, "loss": 0.1804, "step": 83362 }, { "epoch": 0.1478112624437157, "grad_norm": 1.9921875, "learning_rate": 0.00027755509799998093, "loss": 0.2563, "step": 83364 }, { "epoch": 0.14781480860902552, "grad_norm": 0.32421875, "learning_rate": 0.0002775309291448113, "loss": 0.1461, "step": 83366 }, { "epoch": 0.14781835477433533, "grad_norm": 1.03125, "learning_rate": 0.0002775067638866432, "loss": 0.2195, "step": 83368 }, { "epoch": 0.14782190093964515, "grad_norm": 0.40625, "learning_rate": 0.0002774826022255829, "loss": 0.1806, "step": 83370 }, { "epoch": 0.14782544710495496, "grad_norm": 0.236328125, "learning_rate": 0.0002774584441617359, "loss": 0.152, "step": 83372 }, { "epoch": 0.14782899327026477, "grad_norm": 0.34375, "learning_rate": 0.000277434289695208, "loss": 0.1488, "step": 83374 }, { "epoch": 0.1478325394355746, "grad_norm": 0.310546875, "learning_rate": 0.00027741013882610454, "loss": 0.1785, "step": 83376 }, { "epoch": 0.1478360856008844, "grad_norm": 1.6015625, "learning_rate": 0.00027738599155453146, "loss": 0.2138, "step": 83378 }, { "epoch": 0.14783963176619422, "grad_norm": 0.185546875, "learning_rate": 0.00027736184788059413, "loss": 0.1634, "step": 83380 }, { "epoch": 0.14784317793150403, "grad_norm": 0.2119140625, "learning_rate": 0.0002773377078043984, "loss": 0.3321, "step": 83382 }, { "epoch": 0.14784672409681385, "grad_norm": 1.78125, "learning_rate": 0.0002773135713260496, "loss": 0.4712, "step": 83384 }, { "epoch": 0.14785027026212366, "grad_norm": 0.5859375, "learning_rate": 0.0002772894384456531, "loss": 0.1992, "step": 83386 }, { "epoch": 0.14785381642743348, "grad_norm": 0.302734375, "learning_rate": 0.0002772653091633149, "loss": 0.1826, "step": 83388 }, { "epoch": 0.1478573625927433, "grad_norm": 0.93359375, "learning_rate": 0.0002772411834791404, "loss": 0.1761, "step": 83390 }, { "epoch": 0.14786090875805313, "grad_norm": 0.369140625, "learning_rate": 0.0002772170613932351, "loss": 0.2256, "step": 83392 }, { "epoch": 0.14786445492336295, "grad_norm": 0.51171875, "learning_rate": 0.00027719294290570447, "loss": 0.1979, "step": 83394 }, { "epoch": 0.14786800108867276, "grad_norm": 0.96875, "learning_rate": 0.0002771688280166536, "loss": 0.1749, "step": 83396 }, { "epoch": 0.14787154725398258, "grad_norm": 0.28515625, "learning_rate": 0.00027714471672618865, "loss": 0.1538, "step": 83398 }, { "epoch": 0.1478750934192924, "grad_norm": 0.349609375, "learning_rate": 0.00027712060903441465, "loss": 0.2133, "step": 83400 }, { "epoch": 0.1478786395846022, "grad_norm": 1.0703125, "learning_rate": 0.0002770965049414372, "loss": 0.1709, "step": 83402 }, { "epoch": 0.14788218574991202, "grad_norm": 0.498046875, "learning_rate": 0.0002770724044473614, "loss": 0.1702, "step": 83404 }, { "epoch": 0.14788573191522184, "grad_norm": 0.322265625, "learning_rate": 0.0002770483075522931, "loss": 0.2179, "step": 83406 }, { "epoch": 0.14788927808053165, "grad_norm": 0.482421875, "learning_rate": 0.0002770242142563375, "loss": 0.1961, "step": 83408 }, { "epoch": 0.14789282424584146, "grad_norm": 0.5234375, "learning_rate": 0.0002770001245595999, "loss": 0.1389, "step": 83410 }, { "epoch": 0.14789637041115128, "grad_norm": 6.5625, "learning_rate": 0.00027697603846218565, "loss": 0.3081, "step": 83412 }, { "epoch": 0.1478999165764611, "grad_norm": 0.3828125, "learning_rate": 0.00027695195596420005, "loss": 0.2869, "step": 83414 }, { "epoch": 0.1479034627417709, "grad_norm": 0.28515625, "learning_rate": 0.00027692787706574886, "loss": 0.1605, "step": 83416 }, { "epoch": 0.14790700890708072, "grad_norm": 0.1923828125, "learning_rate": 0.0002769038017669367, "loss": 0.1102, "step": 83418 }, { "epoch": 0.14791055507239054, "grad_norm": 0.578125, "learning_rate": 0.0002768797300678693, "loss": 0.2079, "step": 83420 }, { "epoch": 0.14791410123770035, "grad_norm": 0.58203125, "learning_rate": 0.0002768556619686517, "loss": 0.1729, "step": 83422 }, { "epoch": 0.14791764740301017, "grad_norm": 1.171875, "learning_rate": 0.0002768315974693896, "loss": 0.3098, "step": 83424 }, { "epoch": 0.14792119356831998, "grad_norm": 0.37109375, "learning_rate": 0.0002768075365701877, "loss": 0.18, "step": 83426 }, { "epoch": 0.1479247397336298, "grad_norm": 0.2275390625, "learning_rate": 0.0002767834792711515, "loss": 0.1465, "step": 83428 }, { "epoch": 0.1479282858989396, "grad_norm": 0.275390625, "learning_rate": 0.0002767594255723862, "loss": 0.1518, "step": 83430 }, { "epoch": 0.14793183206424942, "grad_norm": 0.302734375, "learning_rate": 0.0002767353754739971, "loss": 0.242, "step": 83432 }, { "epoch": 0.14793537822955924, "grad_norm": 0.91015625, "learning_rate": 0.0002767113289760891, "loss": 0.193, "step": 83434 }, { "epoch": 0.14793892439486905, "grad_norm": 1.1015625, "learning_rate": 0.00027668728607876746, "loss": 0.1864, "step": 83436 }, { "epoch": 0.14794247056017887, "grad_norm": 0.5390625, "learning_rate": 0.0002766632467821376, "loss": 0.1567, "step": 83438 }, { "epoch": 0.14794601672548868, "grad_norm": 0.1953125, "learning_rate": 0.00027663921108630443, "loss": 0.167, "step": 83440 }, { "epoch": 0.1479495628907985, "grad_norm": 0.94921875, "learning_rate": 0.00027661517899137307, "loss": 0.2639, "step": 83442 }, { "epoch": 0.1479531090561083, "grad_norm": 0.337890625, "learning_rate": 0.00027659115049744835, "loss": 0.2062, "step": 83444 }, { "epoch": 0.14795665522141813, "grad_norm": 0.60546875, "learning_rate": 0.00027656712560463604, "loss": 0.1708, "step": 83446 }, { "epoch": 0.14796020138672794, "grad_norm": 0.216796875, "learning_rate": 0.0002765431043130407, "loss": 0.1367, "step": 83448 }, { "epoch": 0.14796374755203776, "grad_norm": 0.6875, "learning_rate": 0.00027651908662276736, "loss": 0.1903, "step": 83450 }, { "epoch": 0.14796729371734757, "grad_norm": 1.4375, "learning_rate": 0.00027649507253392144, "loss": 0.3882, "step": 83452 }, { "epoch": 0.14797083988265738, "grad_norm": 0.56640625, "learning_rate": 0.0002764710620466074, "loss": 0.166, "step": 83454 }, { "epoch": 0.1479743860479672, "grad_norm": 0.2333984375, "learning_rate": 0.0002764470551609309, "loss": 0.1722, "step": 83456 }, { "epoch": 0.147977932213277, "grad_norm": 0.337890625, "learning_rate": 0.0002764230518769964, "loss": 0.1746, "step": 83458 }, { "epoch": 0.14798147837858683, "grad_norm": 4.28125, "learning_rate": 0.00027639905219490913, "loss": 0.2012, "step": 83460 }, { "epoch": 0.14798502454389664, "grad_norm": 0.63671875, "learning_rate": 0.0002763750561147739, "loss": 0.1539, "step": 83462 }, { "epoch": 0.14798857070920646, "grad_norm": 0.64453125, "learning_rate": 0.00027635106363669605, "loss": 0.2186, "step": 83464 }, { "epoch": 0.14799211687451627, "grad_norm": 0.86328125, "learning_rate": 0.0002763270747607799, "loss": 0.1467, "step": 83466 }, { "epoch": 0.1479956630398261, "grad_norm": 0.53515625, "learning_rate": 0.0002763030894871309, "loss": 0.1459, "step": 83468 }, { "epoch": 0.1479992092051359, "grad_norm": 0.56640625, "learning_rate": 0.00027627910781585356, "loss": 0.1831, "step": 83470 }, { "epoch": 0.14800275537044572, "grad_norm": 1.078125, "learning_rate": 0.0002762551297470531, "loss": 0.1838, "step": 83472 }, { "epoch": 0.14800630153575553, "grad_norm": 0.41015625, "learning_rate": 0.0002762311552808341, "loss": 0.1308, "step": 83474 }, { "epoch": 0.14800984770106534, "grad_norm": 0.373046875, "learning_rate": 0.0002762071844173015, "loss": 0.1458, "step": 83476 }, { "epoch": 0.14801339386637516, "grad_norm": 1.6015625, "learning_rate": 0.00027618321715656025, "loss": 0.1621, "step": 83478 }, { "epoch": 0.14801694003168497, "grad_norm": 0.302734375, "learning_rate": 0.00027615925349871485, "loss": 0.1478, "step": 83480 }, { "epoch": 0.14802048619699482, "grad_norm": 0.55078125, "learning_rate": 0.00027613529344387065, "loss": 0.2051, "step": 83482 }, { "epoch": 0.14802403236230463, "grad_norm": 0.1962890625, "learning_rate": 0.0002761113369921319, "loss": 0.1942, "step": 83484 }, { "epoch": 0.14802757852761445, "grad_norm": 0.392578125, "learning_rate": 0.0002760873841436037, "loss": 0.2759, "step": 83486 }, { "epoch": 0.14803112469292426, "grad_norm": 0.259765625, "learning_rate": 0.0002760634348983907, "loss": 0.1508, "step": 83488 }, { "epoch": 0.14803467085823407, "grad_norm": 0.5859375, "learning_rate": 0.0002760394892565977, "loss": 0.1851, "step": 83490 }, { "epoch": 0.1480382170235439, "grad_norm": 0.44140625, "learning_rate": 0.0002760155472183293, "loss": 0.1597, "step": 83492 }, { "epoch": 0.1480417631888537, "grad_norm": 0.388671875, "learning_rate": 0.00027599160878369013, "loss": 0.1501, "step": 83494 }, { "epoch": 0.14804530935416352, "grad_norm": 1.0234375, "learning_rate": 0.00027596767395278514, "loss": 0.205, "step": 83496 }, { "epoch": 0.14804885551947333, "grad_norm": 0.1669921875, "learning_rate": 0.0002759437427257189, "loss": 0.1216, "step": 83498 }, { "epoch": 0.14805240168478315, "grad_norm": 1.046875, "learning_rate": 0.0002759198151025961, "loss": 0.1789, "step": 83500 }, { "epoch": 0.14805594785009296, "grad_norm": 1.0, "learning_rate": 0.0002758958910835211, "loss": 0.1804, "step": 83502 }, { "epoch": 0.14805949401540278, "grad_norm": 0.4140625, "learning_rate": 0.00027587197066859906, "loss": 0.288, "step": 83504 }, { "epoch": 0.1480630401807126, "grad_norm": 0.39453125, "learning_rate": 0.0002758480538579342, "loss": 0.1662, "step": 83506 }, { "epoch": 0.1480665863460224, "grad_norm": 1.34375, "learning_rate": 0.0002758241406516313, "loss": 0.1807, "step": 83508 }, { "epoch": 0.14807013251133222, "grad_norm": 1.0859375, "learning_rate": 0.0002758002310497948, "loss": 0.2311, "step": 83510 }, { "epoch": 0.14807367867664203, "grad_norm": 0.51171875, "learning_rate": 0.00027577632505252926, "loss": 0.1681, "step": 83512 }, { "epoch": 0.14807722484195185, "grad_norm": 0.2255859375, "learning_rate": 0.00027575242265993953, "loss": 0.1445, "step": 83514 }, { "epoch": 0.14808077100726166, "grad_norm": 1.0546875, "learning_rate": 0.0002757285238721297, "loss": 0.2809, "step": 83516 }, { "epoch": 0.14808431717257148, "grad_norm": 0.44140625, "learning_rate": 0.00027570462868920463, "loss": 0.1564, "step": 83518 }, { "epoch": 0.1480878633378813, "grad_norm": 0.30859375, "learning_rate": 0.0002756807371112685, "loss": 0.1784, "step": 83520 }, { "epoch": 0.1480914095031911, "grad_norm": 3.875, "learning_rate": 0.00027565684913842645, "loss": 0.2814, "step": 83522 }, { "epoch": 0.14809495566850092, "grad_norm": 0.51171875, "learning_rate": 0.00027563296477078203, "loss": 0.1684, "step": 83524 }, { "epoch": 0.14809850183381074, "grad_norm": 0.30859375, "learning_rate": 0.0002756090840084404, "loss": 0.1736, "step": 83526 }, { "epoch": 0.14810204799912055, "grad_norm": 0.306640625, "learning_rate": 0.00027558520685150577, "loss": 0.1357, "step": 83528 }, { "epoch": 0.14810559416443037, "grad_norm": 0.62109375, "learning_rate": 0.00027556133330008264, "loss": 0.1644, "step": 83530 }, { "epoch": 0.14810914032974018, "grad_norm": 0.478515625, "learning_rate": 0.0002755374633542753, "loss": 0.1914, "step": 83532 }, { "epoch": 0.14811268649505, "grad_norm": 1.375, "learning_rate": 0.000275513597014188, "loss": 0.3029, "step": 83534 }, { "epoch": 0.1481162326603598, "grad_norm": 0.275390625, "learning_rate": 0.0002754897342799255, "loss": 0.202, "step": 83536 }, { "epoch": 0.14811977882566962, "grad_norm": 1.8203125, "learning_rate": 0.000275465875151592, "loss": 0.2253, "step": 83538 }, { "epoch": 0.14812332499097944, "grad_norm": 0.234375, "learning_rate": 0.0002754420196292917, "loss": 0.1395, "step": 83540 }, { "epoch": 0.14812687115628925, "grad_norm": 0.5, "learning_rate": 0.000275418167713129, "loss": 0.1745, "step": 83542 }, { "epoch": 0.14813041732159907, "grad_norm": 0.60546875, "learning_rate": 0.0002753943194032084, "loss": 0.1817, "step": 83544 }, { "epoch": 0.14813396348690888, "grad_norm": 0.37890625, "learning_rate": 0.0002753704746996341, "loss": 0.1666, "step": 83546 }, { "epoch": 0.1481375096522187, "grad_norm": 0.240234375, "learning_rate": 0.0002753466336025102, "loss": 0.1413, "step": 83548 }, { "epoch": 0.1481410558175285, "grad_norm": 0.59375, "learning_rate": 0.0002753227961119412, "loss": 0.1761, "step": 83550 }, { "epoch": 0.14814460198283833, "grad_norm": 0.578125, "learning_rate": 0.0002752989622280312, "loss": 0.2504, "step": 83552 }, { "epoch": 0.14814814814814814, "grad_norm": 0.26171875, "learning_rate": 0.0002752751319508846, "loss": 0.1692, "step": 83554 }, { "epoch": 0.14815169431345795, "grad_norm": 0.1787109375, "learning_rate": 0.0002752513052806053, "loss": 0.2111, "step": 83556 }, { "epoch": 0.14815524047876777, "grad_norm": 1.1953125, "learning_rate": 0.00027522748221729795, "loss": 0.2095, "step": 83558 }, { "epoch": 0.14815878664407758, "grad_norm": 0.6328125, "learning_rate": 0.0002752036627610664, "loss": 0.1726, "step": 83560 }, { "epoch": 0.1481623328093874, "grad_norm": 0.3203125, "learning_rate": 0.00027517984691201487, "loss": 0.191, "step": 83562 }, { "epoch": 0.1481658789746972, "grad_norm": 0.7109375, "learning_rate": 0.0002751560346702476, "loss": 0.1453, "step": 83564 }, { "epoch": 0.14816942514000703, "grad_norm": 0.314453125, "learning_rate": 0.0002751322260358688, "loss": 0.2468, "step": 83566 }, { "epoch": 0.14817297130531684, "grad_norm": 0.26953125, "learning_rate": 0.00027510842100898254, "loss": 0.1489, "step": 83568 }, { "epoch": 0.14817651747062666, "grad_norm": 0.357421875, "learning_rate": 0.0002750846195896927, "loss": 0.1803, "step": 83570 }, { "epoch": 0.14818006363593647, "grad_norm": 0.35546875, "learning_rate": 0.00027506082177810383, "loss": 0.195, "step": 83572 }, { "epoch": 0.1481836098012463, "grad_norm": 0.251953125, "learning_rate": 0.0002750370275743194, "loss": 0.1612, "step": 83574 }, { "epoch": 0.14818715596655613, "grad_norm": 0.384765625, "learning_rate": 0.0002750132369784441, "loss": 0.218, "step": 83576 }, { "epoch": 0.14819070213186594, "grad_norm": 0.1923828125, "learning_rate": 0.0002749894499905816, "loss": 0.152, "step": 83578 }, { "epoch": 0.14819424829717576, "grad_norm": 0.859375, "learning_rate": 0.00027496566661083604, "loss": 0.224, "step": 83580 }, { "epoch": 0.14819779446248557, "grad_norm": 0.55859375, "learning_rate": 0.0002749418868393114, "loss": 0.2411, "step": 83582 }, { "epoch": 0.14820134062779539, "grad_norm": 0.234375, "learning_rate": 0.0002749181106761116, "loss": 0.2101, "step": 83584 }, { "epoch": 0.1482048867931052, "grad_norm": 0.3671875, "learning_rate": 0.0002748943381213409, "loss": 0.1758, "step": 83586 }, { "epoch": 0.14820843295841502, "grad_norm": 1.0234375, "learning_rate": 0.00027487056917510297, "loss": 0.1794, "step": 83588 }, { "epoch": 0.14821197912372483, "grad_norm": 0.2578125, "learning_rate": 0.0002748468038375019, "loss": 0.1968, "step": 83590 }, { "epoch": 0.14821552528903464, "grad_norm": 0.4609375, "learning_rate": 0.00027482304210864155, "loss": 0.1746, "step": 83592 }, { "epoch": 0.14821907145434446, "grad_norm": 0.486328125, "learning_rate": 0.00027479928398862596, "loss": 0.1608, "step": 83594 }, { "epoch": 0.14822261761965427, "grad_norm": 0.375, "learning_rate": 0.00027477552947755896, "loss": 0.1918, "step": 83596 }, { "epoch": 0.1482261637849641, "grad_norm": 0.48828125, "learning_rate": 0.0002747517785755446, "loss": 0.1938, "step": 83598 }, { "epoch": 0.1482297099502739, "grad_norm": 0.2333984375, "learning_rate": 0.0002747280312826864, "loss": 0.1317, "step": 83600 }, { "epoch": 0.14823325611558372, "grad_norm": 1.140625, "learning_rate": 0.0002747042875990884, "loss": 0.1484, "step": 83602 }, { "epoch": 0.14823680228089353, "grad_norm": 1.2578125, "learning_rate": 0.00027468054752485474, "loss": 0.2479, "step": 83604 }, { "epoch": 0.14824034844620335, "grad_norm": 0.87890625, "learning_rate": 0.00027465681106008863, "loss": 0.1851, "step": 83606 }, { "epoch": 0.14824389461151316, "grad_norm": 0.21875, "learning_rate": 0.0002746330782048943, "loss": 0.166, "step": 83608 }, { "epoch": 0.14824744077682298, "grad_norm": 1.6171875, "learning_rate": 0.00027460934895937527, "loss": 0.1772, "step": 83610 }, { "epoch": 0.1482509869421328, "grad_norm": 0.31640625, "learning_rate": 0.00027458562332363597, "loss": 0.2037, "step": 83612 }, { "epoch": 0.1482545331074426, "grad_norm": 0.439453125, "learning_rate": 0.0002745619012977793, "loss": 0.2097, "step": 83614 }, { "epoch": 0.14825807927275242, "grad_norm": 0.2392578125, "learning_rate": 0.0002745381828819096, "loss": 0.1575, "step": 83616 }, { "epoch": 0.14826162543806223, "grad_norm": 0.2216796875, "learning_rate": 0.00027451446807613044, "loss": 0.159, "step": 83618 }, { "epoch": 0.14826517160337205, "grad_norm": 0.291015625, "learning_rate": 0.0002744907568805454, "loss": 0.2209, "step": 83620 }, { "epoch": 0.14826871776868186, "grad_norm": 0.41796875, "learning_rate": 0.00027446704929525833, "loss": 0.2297, "step": 83622 }, { "epoch": 0.14827226393399168, "grad_norm": 0.251953125, "learning_rate": 0.00027444334532037264, "loss": 0.1899, "step": 83624 }, { "epoch": 0.1482758100993015, "grad_norm": 0.302734375, "learning_rate": 0.00027441964495599245, "loss": 0.1199, "step": 83626 }, { "epoch": 0.1482793562646113, "grad_norm": 0.41796875, "learning_rate": 0.00027439594820222114, "loss": 0.1747, "step": 83628 }, { "epoch": 0.14828290242992112, "grad_norm": 0.15234375, "learning_rate": 0.0002743722550591624, "loss": 0.1165, "step": 83630 }, { "epoch": 0.14828644859523094, "grad_norm": 0.5546875, "learning_rate": 0.00027434856552691967, "loss": 0.2123, "step": 83632 }, { "epoch": 0.14828999476054075, "grad_norm": 0.1962890625, "learning_rate": 0.00027432487960559685, "loss": 0.1478, "step": 83634 }, { "epoch": 0.14829354092585056, "grad_norm": 0.8125, "learning_rate": 0.00027430119729529746, "loss": 0.1992, "step": 83636 }, { "epoch": 0.14829708709116038, "grad_norm": 0.50390625, "learning_rate": 0.00027427751859612494, "loss": 0.2106, "step": 83638 }, { "epoch": 0.1483006332564702, "grad_norm": 0.46875, "learning_rate": 0.0002742538435081829, "loss": 0.217, "step": 83640 }, { "epoch": 0.14830417942178, "grad_norm": 0.23828125, "learning_rate": 0.0002742301720315749, "loss": 0.2174, "step": 83642 }, { "epoch": 0.14830772558708982, "grad_norm": 0.44140625, "learning_rate": 0.00027420650416640446, "loss": 0.1672, "step": 83644 }, { "epoch": 0.14831127175239964, "grad_norm": 0.318359375, "learning_rate": 0.00027418283991277517, "loss": 0.222, "step": 83646 }, { "epoch": 0.14831481791770945, "grad_norm": 0.37109375, "learning_rate": 0.0002741591792707905, "loss": 0.1876, "step": 83648 }, { "epoch": 0.14831836408301927, "grad_norm": 0.53125, "learning_rate": 0.0002741355222405536, "loss": 0.198, "step": 83650 }, { "epoch": 0.14832191024832908, "grad_norm": 4.71875, "learning_rate": 0.0002741118688221683, "loss": 0.3115, "step": 83652 }, { "epoch": 0.1483254564136389, "grad_norm": 0.228515625, "learning_rate": 0.00027408821901573813, "loss": 0.1322, "step": 83654 }, { "epoch": 0.1483290025789487, "grad_norm": 2.9375, "learning_rate": 0.00027406457282136627, "loss": 0.251, "step": 83656 }, { "epoch": 0.14833254874425852, "grad_norm": 0.78125, "learning_rate": 0.0002740409302391562, "loss": 0.1882, "step": 83658 }, { "epoch": 0.14833609490956834, "grad_norm": 0.5234375, "learning_rate": 0.00027401729126921116, "loss": 0.1633, "step": 83660 }, { "epoch": 0.14833964107487815, "grad_norm": 0.4921875, "learning_rate": 0.00027399365591163505, "loss": 0.2645, "step": 83662 }, { "epoch": 0.148343187240188, "grad_norm": 0.283203125, "learning_rate": 0.0002739700241665306, "loss": 0.2038, "step": 83664 }, { "epoch": 0.1483467334054978, "grad_norm": 0.5546875, "learning_rate": 0.0002739463960340015, "loss": 0.19, "step": 83666 }, { "epoch": 0.14835027957080762, "grad_norm": 1.078125, "learning_rate": 0.00027392277151415097, "loss": 0.2036, "step": 83668 }, { "epoch": 0.14835382573611744, "grad_norm": 0.2451171875, "learning_rate": 0.00027389915060708266, "loss": 0.1843, "step": 83670 }, { "epoch": 0.14835737190142725, "grad_norm": 0.33984375, "learning_rate": 0.00027387553331289935, "loss": 0.2001, "step": 83672 }, { "epoch": 0.14836091806673707, "grad_norm": 0.28125, "learning_rate": 0.0002738519196317047, "loss": 0.1333, "step": 83674 }, { "epoch": 0.14836446423204688, "grad_norm": 0.3125, "learning_rate": 0.00027382830956360185, "loss": 0.1845, "step": 83676 }, { "epoch": 0.1483680103973567, "grad_norm": 0.40234375, "learning_rate": 0.0002738047031086941, "loss": 0.168, "step": 83678 }, { "epoch": 0.1483715565626665, "grad_norm": 0.99609375, "learning_rate": 0.0002737811002670847, "loss": 0.2431, "step": 83680 }, { "epoch": 0.14837510272797633, "grad_norm": 0.3828125, "learning_rate": 0.0002737575010388766, "loss": 0.218, "step": 83682 }, { "epoch": 0.14837864889328614, "grad_norm": 0.84375, "learning_rate": 0.0002737339054241735, "loss": 0.1753, "step": 83684 }, { "epoch": 0.14838219505859596, "grad_norm": 0.55078125, "learning_rate": 0.00027371031342307827, "loss": 0.2265, "step": 83686 }, { "epoch": 0.14838574122390577, "grad_norm": 1.5234375, "learning_rate": 0.0002736867250356942, "loss": 0.2549, "step": 83688 }, { "epoch": 0.14838928738921559, "grad_norm": 0.447265625, "learning_rate": 0.0002736631402621243, "loss": 0.2271, "step": 83690 }, { "epoch": 0.1483928335545254, "grad_norm": 0.49609375, "learning_rate": 0.00027363955910247195, "loss": 0.212, "step": 83692 }, { "epoch": 0.14839637971983521, "grad_norm": 0.447265625, "learning_rate": 0.0002736159815568402, "loss": 0.1614, "step": 83694 }, { "epoch": 0.14839992588514503, "grad_norm": 2.375, "learning_rate": 0.000273592407625332, "loss": 0.3843, "step": 83696 }, { "epoch": 0.14840347205045484, "grad_norm": 0.8515625, "learning_rate": 0.00027356883730805077, "loss": 0.1526, "step": 83698 }, { "epoch": 0.14840701821576466, "grad_norm": 0.212890625, "learning_rate": 0.000273545270605099, "loss": 0.1559, "step": 83700 }, { "epoch": 0.14841056438107447, "grad_norm": 0.35546875, "learning_rate": 0.0002735217075165807, "loss": 0.1909, "step": 83702 }, { "epoch": 0.1484141105463843, "grad_norm": 0.44921875, "learning_rate": 0.000273498148042598, "loss": 0.2036, "step": 83704 }, { "epoch": 0.1484176567116941, "grad_norm": 0.43359375, "learning_rate": 0.00027347459218325437, "loss": 0.4632, "step": 83706 }, { "epoch": 0.14842120287700392, "grad_norm": 0.86328125, "learning_rate": 0.00027345103993865267, "loss": 0.1965, "step": 83708 }, { "epoch": 0.14842474904231373, "grad_norm": 1.2578125, "learning_rate": 0.0002734274913088963, "loss": 0.1585, "step": 83710 }, { "epoch": 0.14842829520762355, "grad_norm": 0.2373046875, "learning_rate": 0.00027340394629408766, "loss": 0.1443, "step": 83712 }, { "epoch": 0.14843184137293336, "grad_norm": 0.68359375, "learning_rate": 0.0002733804048943302, "loss": 0.4215, "step": 83714 }, { "epoch": 0.14843538753824317, "grad_norm": 0.384765625, "learning_rate": 0.0002733568671097267, "loss": 0.2302, "step": 83716 }, { "epoch": 0.148438933703553, "grad_norm": 0.3359375, "learning_rate": 0.00027333333294037997, "loss": 0.1354, "step": 83718 }, { "epoch": 0.1484424798688628, "grad_norm": 0.29296875, "learning_rate": 0.0002733098023863932, "loss": 0.1208, "step": 83720 }, { "epoch": 0.14844602603417262, "grad_norm": 0.828125, "learning_rate": 0.0002732862754478689, "loss": 0.1889, "step": 83722 }, { "epoch": 0.14844957219948243, "grad_norm": 0.2431640625, "learning_rate": 0.0002732627521249104, "loss": 0.3078, "step": 83724 }, { "epoch": 0.14845311836479225, "grad_norm": 0.2412109375, "learning_rate": 0.0002732392324176203, "loss": 0.1563, "step": 83726 }, { "epoch": 0.14845666453010206, "grad_norm": 0.56640625, "learning_rate": 0.0002732157163261018, "loss": 0.2143, "step": 83728 }, { "epoch": 0.14846021069541188, "grad_norm": 0.25, "learning_rate": 0.00027319220385045725, "loss": 0.1757, "step": 83730 }, { "epoch": 0.1484637568607217, "grad_norm": 0.4375, "learning_rate": 0.00027316869499078986, "loss": 0.1626, "step": 83732 }, { "epoch": 0.1484673030260315, "grad_norm": 0.9453125, "learning_rate": 0.00027314518974720225, "loss": 0.2427, "step": 83734 }, { "epoch": 0.14847084919134132, "grad_norm": 0.43359375, "learning_rate": 0.0002731216881197974, "loss": 0.1931, "step": 83736 }, { "epoch": 0.14847439535665113, "grad_norm": 0.3203125, "learning_rate": 0.00027309819010867787, "loss": 0.1656, "step": 83738 }, { "epoch": 0.14847794152196095, "grad_norm": 0.34375, "learning_rate": 0.0002730746957139465, "loss": 0.1689, "step": 83740 }, { "epoch": 0.14848148768727076, "grad_norm": 0.412109375, "learning_rate": 0.00027305120493570617, "loss": 0.2766, "step": 83742 }, { "epoch": 0.14848503385258058, "grad_norm": 0.5, "learning_rate": 0.0002730277177740595, "loss": 0.1432, "step": 83744 }, { "epoch": 0.1484885800178904, "grad_norm": 0.8125, "learning_rate": 0.00027300423422910923, "loss": 0.203, "step": 83746 }, { "epoch": 0.1484921261832002, "grad_norm": 0.2314453125, "learning_rate": 0.0002729807543009581, "loss": 0.1842, "step": 83748 }, { "epoch": 0.14849567234851002, "grad_norm": 0.41796875, "learning_rate": 0.0002729572779897086, "loss": 0.204, "step": 83750 }, { "epoch": 0.14849921851381984, "grad_norm": 0.291015625, "learning_rate": 0.0002729338052954638, "loss": 0.1384, "step": 83752 }, { "epoch": 0.14850276467912968, "grad_norm": 0.359375, "learning_rate": 0.00027291033621832593, "loss": 0.1604, "step": 83754 }, { "epoch": 0.1485063108444395, "grad_norm": 0.384765625, "learning_rate": 0.0002728868707583979, "loss": 0.2398, "step": 83756 }, { "epoch": 0.1485098570097493, "grad_norm": 1.3203125, "learning_rate": 0.0002728634089157821, "loss": 0.2513, "step": 83758 }, { "epoch": 0.14851340317505912, "grad_norm": 0.37890625, "learning_rate": 0.00027283995069058163, "loss": 0.15, "step": 83760 }, { "epoch": 0.14851694934036894, "grad_norm": 0.4296875, "learning_rate": 0.0002728164960828984, "loss": 0.1675, "step": 83762 }, { "epoch": 0.14852049550567875, "grad_norm": 0.37109375, "learning_rate": 0.0002727930450928354, "loss": 0.155, "step": 83764 }, { "epoch": 0.14852404167098857, "grad_norm": 0.19921875, "learning_rate": 0.0002727695977204953, "loss": 0.1711, "step": 83766 }, { "epoch": 0.14852758783629838, "grad_norm": 0.369140625, "learning_rate": 0.00027274615396598046, "loss": 0.1878, "step": 83768 }, { "epoch": 0.1485311340016082, "grad_norm": 0.54296875, "learning_rate": 0.0002727227138293933, "loss": 0.1729, "step": 83770 }, { "epoch": 0.148534680166918, "grad_norm": 1.0078125, "learning_rate": 0.0002726992773108364, "loss": 0.2014, "step": 83772 }, { "epoch": 0.14853822633222782, "grad_norm": 0.3125, "learning_rate": 0.0002726758444104123, "loss": 0.1562, "step": 83774 }, { "epoch": 0.14854177249753764, "grad_norm": 0.8984375, "learning_rate": 0.00027265241512822364, "loss": 0.1928, "step": 83776 }, { "epoch": 0.14854531866284745, "grad_norm": 0.40625, "learning_rate": 0.00027262898946437265, "loss": 0.1505, "step": 83778 }, { "epoch": 0.14854886482815727, "grad_norm": 0.361328125, "learning_rate": 0.00027260556741896173, "loss": 0.1844, "step": 83780 }, { "epoch": 0.14855241099346708, "grad_norm": 0.373046875, "learning_rate": 0.0002725821489920937, "loss": 0.2431, "step": 83782 }, { "epoch": 0.1485559571587769, "grad_norm": 6.46875, "learning_rate": 0.00027255873418387066, "loss": 0.181, "step": 83784 }, { "epoch": 0.1485595033240867, "grad_norm": 0.244140625, "learning_rate": 0.00027253532299439503, "loss": 0.1879, "step": 83786 }, { "epoch": 0.14856304948939653, "grad_norm": 0.2255859375, "learning_rate": 0.0002725119154237693, "loss": 0.174, "step": 83788 }, { "epoch": 0.14856659565470634, "grad_norm": 0.330078125, "learning_rate": 0.00027248851147209556, "loss": 0.1951, "step": 83790 }, { "epoch": 0.14857014182001615, "grad_norm": 0.361328125, "learning_rate": 0.00027246511113947685, "loss": 0.1612, "step": 83792 }, { "epoch": 0.14857368798532597, "grad_norm": 0.30859375, "learning_rate": 0.0002724417144260146, "loss": 0.1399, "step": 83794 }, { "epoch": 0.14857723415063578, "grad_norm": 0.255859375, "learning_rate": 0.0002724183213318119, "loss": 0.15, "step": 83796 }, { "epoch": 0.1485807803159456, "grad_norm": 0.294921875, "learning_rate": 0.00027239493185697046, "loss": 0.1369, "step": 83798 }, { "epoch": 0.1485843264812554, "grad_norm": 0.35546875, "learning_rate": 0.00027237154600159315, "loss": 0.1563, "step": 83800 }, { "epoch": 0.14858787264656523, "grad_norm": 0.4921875, "learning_rate": 0.0002723481637657817, "loss": 0.179, "step": 83802 }, { "epoch": 0.14859141881187504, "grad_norm": 1.703125, "learning_rate": 0.00027232478514963883, "loss": 0.2652, "step": 83804 }, { "epoch": 0.14859496497718486, "grad_norm": 1.65625, "learning_rate": 0.00027230141015326646, "loss": 0.1709, "step": 83806 }, { "epoch": 0.14859851114249467, "grad_norm": 0.228515625, "learning_rate": 0.0002722780387767671, "loss": 0.1711, "step": 83808 }, { "epoch": 0.14860205730780449, "grad_norm": 0.78515625, "learning_rate": 0.0002722546710202427, "loss": 0.3316, "step": 83810 }, { "epoch": 0.1486056034731143, "grad_norm": 0.388671875, "learning_rate": 0.0002722313068837954, "loss": 0.2221, "step": 83812 }, { "epoch": 0.14860914963842412, "grad_norm": 0.298828125, "learning_rate": 0.00027220794636752764, "loss": 0.1614, "step": 83814 }, { "epoch": 0.14861269580373393, "grad_norm": 0.8046875, "learning_rate": 0.0002721845894715415, "loss": 0.2272, "step": 83816 }, { "epoch": 0.14861624196904374, "grad_norm": 2.75, "learning_rate": 0.00027216123619593926, "loss": 0.2533, "step": 83818 }, { "epoch": 0.14861978813435356, "grad_norm": 1.5703125, "learning_rate": 0.0002721378865408226, "loss": 0.3869, "step": 83820 }, { "epoch": 0.14862333429966337, "grad_norm": 0.41015625, "learning_rate": 0.00027211454050629413, "loss": 0.2326, "step": 83822 }, { "epoch": 0.1486268804649732, "grad_norm": 1.46875, "learning_rate": 0.0002720911980924558, "loss": 0.2571, "step": 83824 }, { "epoch": 0.148630426630283, "grad_norm": 0.291015625, "learning_rate": 0.00027206785929940975, "loss": 0.1816, "step": 83826 }, { "epoch": 0.14863397279559282, "grad_norm": 1.109375, "learning_rate": 0.0002720445241272579, "loss": 0.1482, "step": 83828 }, { "epoch": 0.14863751896090263, "grad_norm": 1.1171875, "learning_rate": 0.0002720211925761021, "loss": 0.1902, "step": 83830 }, { "epoch": 0.14864106512621245, "grad_norm": 0.294921875, "learning_rate": 0.0002719978646460449, "loss": 0.1883, "step": 83832 }, { "epoch": 0.14864461129152226, "grad_norm": 0.9375, "learning_rate": 0.0002719745403371881, "loss": 0.3654, "step": 83834 }, { "epoch": 0.14864815745683208, "grad_norm": 0.64453125, "learning_rate": 0.00027195121964963364, "loss": 0.1789, "step": 83836 }, { "epoch": 0.1486517036221419, "grad_norm": 0.60546875, "learning_rate": 0.00027192790258348336, "loss": 0.1502, "step": 83838 }, { "epoch": 0.1486552497874517, "grad_norm": 0.2158203125, "learning_rate": 0.00027190458913883966, "loss": 0.1898, "step": 83840 }, { "epoch": 0.14865879595276152, "grad_norm": 2.625, "learning_rate": 0.0002718812793158043, "loss": 0.2306, "step": 83842 }, { "epoch": 0.14866234211807133, "grad_norm": 0.3203125, "learning_rate": 0.0002718579731144791, "loss": 0.1681, "step": 83844 }, { "epoch": 0.14866588828338118, "grad_norm": 0.9375, "learning_rate": 0.00027183467053496613, "loss": 0.1965, "step": 83846 }, { "epoch": 0.148669434448691, "grad_norm": 0.482421875, "learning_rate": 0.00027181137157736713, "loss": 0.1636, "step": 83848 }, { "epoch": 0.1486729806140008, "grad_norm": 0.349609375, "learning_rate": 0.0002717880762417844, "loss": 0.2207, "step": 83850 }, { "epoch": 0.14867652677931062, "grad_norm": 0.30078125, "learning_rate": 0.0002717647845283193, "loss": 0.2051, "step": 83852 }, { "epoch": 0.14868007294462043, "grad_norm": 0.470703125, "learning_rate": 0.0002717414964370739, "loss": 0.2057, "step": 83854 }, { "epoch": 0.14868361910993025, "grad_norm": 0.376953125, "learning_rate": 0.0002717182119681502, "loss": 0.1671, "step": 83856 }, { "epoch": 0.14868716527524006, "grad_norm": 0.671875, "learning_rate": 0.00027169493112165006, "loss": 0.2725, "step": 83858 }, { "epoch": 0.14869071144054988, "grad_norm": 0.2314453125, "learning_rate": 0.00027167165389767485, "loss": 0.1598, "step": 83860 }, { "epoch": 0.1486942576058597, "grad_norm": 0.51171875, "learning_rate": 0.0002716483802963268, "loss": 0.2093, "step": 83862 }, { "epoch": 0.1486978037711695, "grad_norm": 0.71875, "learning_rate": 0.0002716251103177077, "loss": 0.1673, "step": 83864 }, { "epoch": 0.14870134993647932, "grad_norm": 0.6796875, "learning_rate": 0.0002716018439619191, "loss": 0.1584, "step": 83866 }, { "epoch": 0.14870489610178914, "grad_norm": 0.32421875, "learning_rate": 0.00027157858122906284, "loss": 0.1437, "step": 83868 }, { "epoch": 0.14870844226709895, "grad_norm": 0.4921875, "learning_rate": 0.00027155532211924055, "loss": 0.2835, "step": 83870 }, { "epoch": 0.14871198843240876, "grad_norm": 0.427734375, "learning_rate": 0.0002715320666325542, "loss": 0.1642, "step": 83872 }, { "epoch": 0.14871553459771858, "grad_norm": 0.431640625, "learning_rate": 0.0002715088147691053, "loss": 0.1562, "step": 83874 }, { "epoch": 0.1487190807630284, "grad_norm": 0.60546875, "learning_rate": 0.00027148556652899567, "loss": 0.1376, "step": 83876 }, { "epoch": 0.1487226269283382, "grad_norm": 0.291015625, "learning_rate": 0.0002714623219123268, "loss": 0.128, "step": 83878 }, { "epoch": 0.14872617309364802, "grad_norm": 1.4140625, "learning_rate": 0.00027143908091920043, "loss": 0.2329, "step": 83880 }, { "epoch": 0.14872971925895784, "grad_norm": 0.265625, "learning_rate": 0.0002714158435497184, "loss": 0.1689, "step": 83882 }, { "epoch": 0.14873326542426765, "grad_norm": 0.419921875, "learning_rate": 0.0002713926098039821, "loss": 0.2396, "step": 83884 }, { "epoch": 0.14873681158957747, "grad_norm": 0.81640625, "learning_rate": 0.0002713693796820933, "loss": 0.1897, "step": 83886 }, { "epoch": 0.14874035775488728, "grad_norm": 0.396484375, "learning_rate": 0.00027134615318415324, "loss": 0.1713, "step": 83888 }, { "epoch": 0.1487439039201971, "grad_norm": 1.015625, "learning_rate": 0.00027132293031026416, "loss": 0.2051, "step": 83890 }, { "epoch": 0.1487474500855069, "grad_norm": 0.271484375, "learning_rate": 0.000271299711060527, "loss": 0.576, "step": 83892 }, { "epoch": 0.14875099625081672, "grad_norm": 0.640625, "learning_rate": 0.0002712764954350435, "loss": 0.1877, "step": 83894 }, { "epoch": 0.14875454241612654, "grad_norm": 0.1865234375, "learning_rate": 0.0002712532834339151, "loss": 0.1375, "step": 83896 }, { "epoch": 0.14875808858143635, "grad_norm": 0.28125, "learning_rate": 0.00027123007505724375, "loss": 0.1248, "step": 83898 }, { "epoch": 0.14876163474674617, "grad_norm": 0.50390625, "learning_rate": 0.00027120687030513055, "loss": 0.1926, "step": 83900 }, { "epoch": 0.14876518091205598, "grad_norm": 0.46484375, "learning_rate": 0.0002711836691776771, "loss": 0.1842, "step": 83902 }, { "epoch": 0.1487687270773658, "grad_norm": 0.63671875, "learning_rate": 0.00027116047167498477, "loss": 0.1473, "step": 83904 }, { "epoch": 0.1487722732426756, "grad_norm": 0.2890625, "learning_rate": 0.00027113727779715503, "loss": 0.1571, "step": 83906 }, { "epoch": 0.14877581940798543, "grad_norm": 0.57421875, "learning_rate": 0.00027111408754428956, "loss": 0.2039, "step": 83908 }, { "epoch": 0.14877936557329524, "grad_norm": 0.431640625, "learning_rate": 0.0002710909009164894, "loss": 0.1717, "step": 83910 }, { "epoch": 0.14878291173860506, "grad_norm": 0.2099609375, "learning_rate": 0.0002710677179138563, "loss": 0.1523, "step": 83912 }, { "epoch": 0.14878645790391487, "grad_norm": 1.5, "learning_rate": 0.0002710445385364915, "loss": 0.191, "step": 83914 }, { "epoch": 0.14879000406922469, "grad_norm": 0.255859375, "learning_rate": 0.0002710213627844964, "loss": 0.1719, "step": 83916 }, { "epoch": 0.1487935502345345, "grad_norm": 0.2236328125, "learning_rate": 0.0002709981906579722, "loss": 0.3236, "step": 83918 }, { "epoch": 0.14879709639984431, "grad_norm": 0.6640625, "learning_rate": 0.0002709750221570204, "loss": 0.179, "step": 83920 }, { "epoch": 0.14880064256515413, "grad_norm": 0.359375, "learning_rate": 0.0002709518572817423, "loss": 0.1856, "step": 83922 }, { "epoch": 0.14880418873046394, "grad_norm": 0.48828125, "learning_rate": 0.0002709286960322393, "loss": 0.2917, "step": 83924 }, { "epoch": 0.14880773489577376, "grad_norm": 0.2890625, "learning_rate": 0.0002709055384086126, "loss": 0.2019, "step": 83926 }, { "epoch": 0.14881128106108357, "grad_norm": 1.09375, "learning_rate": 0.0002708823844109633, "loss": 0.3909, "step": 83928 }, { "epoch": 0.1488148272263934, "grad_norm": 0.58984375, "learning_rate": 0.00027085923403939293, "loss": 0.1497, "step": 83930 }, { "epoch": 0.1488183733917032, "grad_norm": 0.625, "learning_rate": 0.00027083608729400275, "loss": 0.1698, "step": 83932 }, { "epoch": 0.14882191955701302, "grad_norm": 0.41796875, "learning_rate": 0.00027081294417489386, "loss": 0.3545, "step": 83934 }, { "epoch": 0.14882546572232286, "grad_norm": 0.4453125, "learning_rate": 0.0002707898046821675, "loss": 0.2025, "step": 83936 }, { "epoch": 0.14882901188763267, "grad_norm": 0.53125, "learning_rate": 0.00027076666881592466, "loss": 0.166, "step": 83938 }, { "epoch": 0.1488325580529425, "grad_norm": 0.498046875, "learning_rate": 0.000270743536576267, "loss": 0.2124, "step": 83940 }, { "epoch": 0.1488361042182523, "grad_norm": 0.470703125, "learning_rate": 0.0002707204079632953, "loss": 0.1685, "step": 83942 }, { "epoch": 0.14883965038356212, "grad_norm": 0.1591796875, "learning_rate": 0.0002706972829771109, "loss": 0.163, "step": 83944 }, { "epoch": 0.14884319654887193, "grad_norm": 0.234375, "learning_rate": 0.0002706741616178147, "loss": 0.1482, "step": 83946 }, { "epoch": 0.14884674271418175, "grad_norm": 0.62109375, "learning_rate": 0.00027065104388550825, "loss": 0.2166, "step": 83948 }, { "epoch": 0.14885028887949156, "grad_norm": 0.65625, "learning_rate": 0.00027062792978029226, "loss": 0.51, "step": 83950 }, { "epoch": 0.14885383504480137, "grad_norm": 0.703125, "learning_rate": 0.0002706048193022679, "loss": 0.1622, "step": 83952 }, { "epoch": 0.1488573812101112, "grad_norm": 0.28125, "learning_rate": 0.00027058171245153634, "loss": 0.1679, "step": 83954 }, { "epoch": 0.148860927375421, "grad_norm": 1.21875, "learning_rate": 0.00027055860922819866, "loss": 0.2826, "step": 83956 }, { "epoch": 0.14886447354073082, "grad_norm": 0.91796875, "learning_rate": 0.0002705355096323558, "loss": 0.1826, "step": 83958 }, { "epoch": 0.14886801970604063, "grad_norm": 0.359375, "learning_rate": 0.0002705124136641086, "loss": 0.1884, "step": 83960 }, { "epoch": 0.14887156587135045, "grad_norm": 1.3203125, "learning_rate": 0.0002704893213235584, "loss": 0.292, "step": 83962 }, { "epoch": 0.14887511203666026, "grad_norm": 1.1171875, "learning_rate": 0.0002704662326108063, "loss": 0.2644, "step": 83964 }, { "epoch": 0.14887865820197008, "grad_norm": 0.1748046875, "learning_rate": 0.0002704431475259528, "loss": 0.1658, "step": 83966 }, { "epoch": 0.1488822043672799, "grad_norm": 0.263671875, "learning_rate": 0.00027042006606909903, "loss": 0.1559, "step": 83968 }, { "epoch": 0.1488857505325897, "grad_norm": 0.44921875, "learning_rate": 0.0002703969882403462, "loss": 0.1692, "step": 83970 }, { "epoch": 0.14888929669789952, "grad_norm": 0.23828125, "learning_rate": 0.00027037391403979505, "loss": 0.2054, "step": 83972 }, { "epoch": 0.14889284286320933, "grad_norm": 0.6875, "learning_rate": 0.00027035084346754644, "loss": 0.1969, "step": 83974 }, { "epoch": 0.14889638902851915, "grad_norm": 0.314453125, "learning_rate": 0.00027032777652370135, "loss": 0.1801, "step": 83976 }, { "epoch": 0.14889993519382896, "grad_norm": 0.369140625, "learning_rate": 0.00027030471320836055, "loss": 0.1459, "step": 83978 }, { "epoch": 0.14890348135913878, "grad_norm": 0.625, "learning_rate": 0.00027028165352162504, "loss": 0.204, "step": 83980 }, { "epoch": 0.1489070275244486, "grad_norm": 0.376953125, "learning_rate": 0.00027025859746359563, "loss": 0.1444, "step": 83982 }, { "epoch": 0.1489105736897584, "grad_norm": 1.53125, "learning_rate": 0.00027023554503437323, "loss": 0.1831, "step": 83984 }, { "epoch": 0.14891411985506822, "grad_norm": 0.8359375, "learning_rate": 0.00027021249623405844, "loss": 0.1233, "step": 83986 }, { "epoch": 0.14891766602037804, "grad_norm": 0.5234375, "learning_rate": 0.00027018945106275214, "loss": 0.3864, "step": 83988 }, { "epoch": 0.14892121218568785, "grad_norm": 0.89453125, "learning_rate": 0.00027016640952055534, "loss": 0.2558, "step": 83990 }, { "epoch": 0.14892475835099767, "grad_norm": 0.55859375, "learning_rate": 0.00027014337160756864, "loss": 0.1745, "step": 83992 }, { "epoch": 0.14892830451630748, "grad_norm": 1.3125, "learning_rate": 0.00027012033732389277, "loss": 0.145, "step": 83994 }, { "epoch": 0.1489318506816173, "grad_norm": 0.1689453125, "learning_rate": 0.0002700973066696283, "loss": 0.1444, "step": 83996 }, { "epoch": 0.1489353968469271, "grad_norm": 0.265625, "learning_rate": 0.0002700742796448765, "loss": 0.1697, "step": 83998 }, { "epoch": 0.14893894301223692, "grad_norm": 0.7421875, "learning_rate": 0.00027005125624973735, "loss": 0.3505, "step": 84000 }, { "epoch": 0.14894248917754674, "grad_norm": 0.27734375, "learning_rate": 0.0002700282364843122, "loss": 0.1524, "step": 84002 }, { "epoch": 0.14894603534285655, "grad_norm": 0.48828125, "learning_rate": 0.0002700052203487012, "loss": 0.1476, "step": 84004 }, { "epoch": 0.14894958150816637, "grad_norm": 0.427734375, "learning_rate": 0.0002699822078430055, "loss": 0.1511, "step": 84006 }, { "epoch": 0.14895312767347618, "grad_norm": 0.4453125, "learning_rate": 0.00026995919896732526, "loss": 0.2309, "step": 84008 }, { "epoch": 0.148956673838786, "grad_norm": 0.484375, "learning_rate": 0.0002699361937217615, "loss": 0.1633, "step": 84010 }, { "epoch": 0.1489602200040958, "grad_norm": 1.0703125, "learning_rate": 0.0002699131921064146, "loss": 0.3584, "step": 84012 }, { "epoch": 0.14896376616940563, "grad_norm": 0.55078125, "learning_rate": 0.0002698901941213852, "loss": 0.1833, "step": 84014 }, { "epoch": 0.14896731233471544, "grad_norm": 0.4453125, "learning_rate": 0.00026986719976677406, "loss": 0.1476, "step": 84016 }, { "epoch": 0.14897085850002525, "grad_norm": 0.51171875, "learning_rate": 0.0002698442090426813, "loss": 0.1735, "step": 84018 }, { "epoch": 0.14897440466533507, "grad_norm": 0.56640625, "learning_rate": 0.000269821221949208, "loss": 0.2239, "step": 84020 }, { "epoch": 0.14897795083064488, "grad_norm": 0.40625, "learning_rate": 0.00026979823848645446, "loss": 0.1997, "step": 84022 }, { "epoch": 0.1489814969959547, "grad_norm": 0.9921875, "learning_rate": 0.0002697752586545211, "loss": 0.1951, "step": 84024 }, { "epoch": 0.14898504316126454, "grad_norm": 0.404296875, "learning_rate": 0.00026975228245350834, "loss": 0.1977, "step": 84026 }, { "epoch": 0.14898858932657436, "grad_norm": 0.26953125, "learning_rate": 0.0002697293098835171, "loss": 0.1612, "step": 84028 }, { "epoch": 0.14899213549188417, "grad_norm": 0.33984375, "learning_rate": 0.00026970634094464744, "loss": 0.1467, "step": 84030 }, { "epoch": 0.14899568165719398, "grad_norm": 0.79296875, "learning_rate": 0.000269683375637, "loss": 0.1511, "step": 84032 }, { "epoch": 0.1489992278225038, "grad_norm": 0.322265625, "learning_rate": 0.0002696604139606753, "loss": 0.1938, "step": 84034 }, { "epoch": 0.1490027739878136, "grad_norm": 0.71484375, "learning_rate": 0.0002696374559157733, "loss": 0.1552, "step": 84036 }, { "epoch": 0.14900632015312343, "grad_norm": 0.373046875, "learning_rate": 0.00026961450150239516, "loss": 0.202, "step": 84038 }, { "epoch": 0.14900986631843324, "grad_norm": 0.384765625, "learning_rate": 0.00026959155072064044, "loss": 0.1468, "step": 84040 }, { "epoch": 0.14901341248374306, "grad_norm": 1.0078125, "learning_rate": 0.00026956860357061014, "loss": 0.1536, "step": 84042 }, { "epoch": 0.14901695864905287, "grad_norm": 0.384765625, "learning_rate": 0.0002695456600524042, "loss": 0.1829, "step": 84044 }, { "epoch": 0.1490205048143627, "grad_norm": 0.314453125, "learning_rate": 0.0002695227201661236, "loss": 0.1793, "step": 84046 }, { "epoch": 0.1490240509796725, "grad_norm": 0.66796875, "learning_rate": 0.0002694997839118677, "loss": 0.1477, "step": 84048 }, { "epoch": 0.14902759714498232, "grad_norm": 0.287109375, "learning_rate": 0.00026947685128973764, "loss": 0.1541, "step": 84050 }, { "epoch": 0.14903114331029213, "grad_norm": 0.287109375, "learning_rate": 0.0002694539222998334, "loss": 0.2633, "step": 84052 }, { "epoch": 0.14903468947560194, "grad_norm": 0.2421875, "learning_rate": 0.0002694309969422552, "loss": 0.2117, "step": 84054 }, { "epoch": 0.14903823564091176, "grad_norm": 1.1328125, "learning_rate": 0.00026940807521710344, "loss": 0.2791, "step": 84056 }, { "epoch": 0.14904178180622157, "grad_norm": 0.828125, "learning_rate": 0.0002693851571244781, "loss": 0.1986, "step": 84058 }, { "epoch": 0.1490453279715314, "grad_norm": 0.3671875, "learning_rate": 0.00026936224266447975, "loss": 0.2118, "step": 84060 }, { "epoch": 0.1490488741368412, "grad_norm": 0.3359375, "learning_rate": 0.00026933933183720827, "loss": 0.1802, "step": 84062 }, { "epoch": 0.14905242030215102, "grad_norm": 0.322265625, "learning_rate": 0.00026931642464276444, "loss": 0.1414, "step": 84064 }, { "epoch": 0.14905596646746083, "grad_norm": 0.64453125, "learning_rate": 0.00026929352108124774, "loss": 0.1525, "step": 84066 }, { "epoch": 0.14905951263277065, "grad_norm": 1.9375, "learning_rate": 0.0002692706211527587, "loss": 0.3967, "step": 84068 }, { "epoch": 0.14906305879808046, "grad_norm": 0.294921875, "learning_rate": 0.00026924772485739743, "loss": 0.1893, "step": 84070 }, { "epoch": 0.14906660496339028, "grad_norm": 0.4765625, "learning_rate": 0.00026922483219526407, "loss": 0.2282, "step": 84072 }, { "epoch": 0.1490701511287001, "grad_norm": 0.341796875, "learning_rate": 0.0002692019431664587, "loss": 0.2374, "step": 84074 }, { "epoch": 0.1490736972940099, "grad_norm": 0.2890625, "learning_rate": 0.0002691790577710814, "loss": 0.259, "step": 84076 }, { "epoch": 0.14907724345931972, "grad_norm": 0.82421875, "learning_rate": 0.0002691561760092324, "loss": 0.2063, "step": 84078 }, { "epoch": 0.14908078962462953, "grad_norm": 2.0625, "learning_rate": 0.00026913329788101175, "loss": 0.2487, "step": 84080 }, { "epoch": 0.14908433578993935, "grad_norm": 0.625, "learning_rate": 0.00026911042338651923, "loss": 0.1461, "step": 84082 }, { "epoch": 0.14908788195524916, "grad_norm": 0.51953125, "learning_rate": 0.00026908755252585514, "loss": 0.2014, "step": 84084 }, { "epoch": 0.14909142812055898, "grad_norm": 0.296875, "learning_rate": 0.0002690646852991195, "loss": 0.1913, "step": 84086 }, { "epoch": 0.1490949742858688, "grad_norm": 0.435546875, "learning_rate": 0.0002690418217064123, "loss": 0.243, "step": 84088 }, { "epoch": 0.1490985204511786, "grad_norm": 0.2060546875, "learning_rate": 0.00026901896174783344, "loss": 0.2056, "step": 84090 }, { "epoch": 0.14910206661648842, "grad_norm": 0.267578125, "learning_rate": 0.000268996105423483, "loss": 0.2222, "step": 84092 }, { "epoch": 0.14910561278179824, "grad_norm": 0.4765625, "learning_rate": 0.00026897325273346067, "loss": 0.2434, "step": 84094 }, { "epoch": 0.14910915894710805, "grad_norm": 2.0, "learning_rate": 0.00026895040367786707, "loss": 0.2748, "step": 84096 }, { "epoch": 0.14911270511241786, "grad_norm": 0.28125, "learning_rate": 0.0002689275582568012, "loss": 0.1201, "step": 84098 }, { "epoch": 0.14911625127772768, "grad_norm": 0.2890625, "learning_rate": 0.00026890471647036365, "loss": 0.1771, "step": 84100 }, { "epoch": 0.1491197974430375, "grad_norm": 0.251953125, "learning_rate": 0.00026888187831865414, "loss": 0.158, "step": 84102 }, { "epoch": 0.1491233436083473, "grad_norm": 0.9296875, "learning_rate": 0.0002688590438017725, "loss": 0.1745, "step": 84104 }, { "epoch": 0.14912688977365712, "grad_norm": 0.5, "learning_rate": 0.00026883621291981866, "loss": 0.1841, "step": 84106 }, { "epoch": 0.14913043593896694, "grad_norm": 0.361328125, "learning_rate": 0.0002688133856728922, "loss": 0.2384, "step": 84108 }, { "epoch": 0.14913398210427675, "grad_norm": 0.388671875, "learning_rate": 0.00026879056206109334, "loss": 0.2352, "step": 84110 }, { "epoch": 0.14913752826958657, "grad_norm": 1.109375, "learning_rate": 0.0002687677420845218, "loss": 0.2294, "step": 84112 }, { "epoch": 0.14914107443489638, "grad_norm": 0.27734375, "learning_rate": 0.00026874492574327724, "loss": 0.1994, "step": 84114 }, { "epoch": 0.1491446206002062, "grad_norm": 0.5546875, "learning_rate": 0.0002687221130374595, "loss": 0.1902, "step": 84116 }, { "epoch": 0.14914816676551604, "grad_norm": 0.3828125, "learning_rate": 0.00026869930396716834, "loss": 0.1755, "step": 84118 }, { "epoch": 0.14915171293082585, "grad_norm": 0.2578125, "learning_rate": 0.00026867649853250366, "loss": 0.1788, "step": 84120 }, { "epoch": 0.14915525909613567, "grad_norm": 0.291015625, "learning_rate": 0.0002686536967335652, "loss": 0.1847, "step": 84122 }, { "epoch": 0.14915880526144548, "grad_norm": 0.48828125, "learning_rate": 0.0002686308985704523, "loss": 0.1559, "step": 84124 }, { "epoch": 0.1491623514267553, "grad_norm": 0.349609375, "learning_rate": 0.0002686081040432651, "loss": 0.2208, "step": 84126 }, { "epoch": 0.1491658975920651, "grad_norm": 0.1923828125, "learning_rate": 0.00026858531315210316, "loss": 0.1841, "step": 84128 }, { "epoch": 0.14916944375737493, "grad_norm": 0.953125, "learning_rate": 0.00026856252589706593, "loss": 0.1927, "step": 84130 }, { "epoch": 0.14917298992268474, "grad_norm": 1.3828125, "learning_rate": 0.00026853974227825333, "loss": 0.2006, "step": 84132 }, { "epoch": 0.14917653608799455, "grad_norm": 0.251953125, "learning_rate": 0.00026851696229576483, "loss": 0.1136, "step": 84134 }, { "epoch": 0.14918008225330437, "grad_norm": 0.470703125, "learning_rate": 0.00026849418594970055, "loss": 0.1473, "step": 84136 }, { "epoch": 0.14918362841861418, "grad_norm": 0.32421875, "learning_rate": 0.0002684714132401592, "loss": 0.185, "step": 84138 }, { "epoch": 0.149187174583924, "grad_norm": 0.51171875, "learning_rate": 0.0002684486441672411, "loss": 0.1745, "step": 84140 }, { "epoch": 0.1491907207492338, "grad_norm": 0.53515625, "learning_rate": 0.0002684258787310458, "loss": 0.1696, "step": 84142 }, { "epoch": 0.14919426691454363, "grad_norm": 0.81640625, "learning_rate": 0.00026840311693167247, "loss": 0.1831, "step": 84144 }, { "epoch": 0.14919781307985344, "grad_norm": 0.458984375, "learning_rate": 0.0002683803587692209, "loss": 0.1333, "step": 84146 }, { "epoch": 0.14920135924516326, "grad_norm": 0.30859375, "learning_rate": 0.0002683576042437904, "loss": 0.2073, "step": 84148 }, { "epoch": 0.14920490541047307, "grad_norm": 0.408203125, "learning_rate": 0.0002683348533554809, "loss": 0.1514, "step": 84150 }, { "epoch": 0.14920845157578289, "grad_norm": 0.40234375, "learning_rate": 0.0002683121061043915, "loss": 0.1878, "step": 84152 }, { "epoch": 0.1492119977410927, "grad_norm": 0.671875, "learning_rate": 0.0002682893624906221, "loss": 0.2627, "step": 84154 }, { "epoch": 0.14921554390640251, "grad_norm": 0.33984375, "learning_rate": 0.00026826662251427155, "loss": 0.1616, "step": 84156 }, { "epoch": 0.14921909007171233, "grad_norm": 0.33984375, "learning_rate": 0.0002682438861754399, "loss": 0.1559, "step": 84158 }, { "epoch": 0.14922263623702214, "grad_norm": 0.263671875, "learning_rate": 0.0002682211534742263, "loss": 0.1393, "step": 84160 }, { "epoch": 0.14922618240233196, "grad_norm": 0.52734375, "learning_rate": 0.00026819842441073034, "loss": 0.179, "step": 84162 }, { "epoch": 0.14922972856764177, "grad_norm": 0.8125, "learning_rate": 0.0002681756989850511, "loss": 0.1965, "step": 84164 }, { "epoch": 0.1492332747329516, "grad_norm": 0.2041015625, "learning_rate": 0.0002681529771972883, "loss": 0.1681, "step": 84166 }, { "epoch": 0.1492368208982614, "grad_norm": 0.173828125, "learning_rate": 0.0002681302590475411, "loss": 0.1738, "step": 84168 }, { "epoch": 0.14924036706357122, "grad_norm": 1.765625, "learning_rate": 0.000268107544535909, "loss": 0.2944, "step": 84170 }, { "epoch": 0.14924391322888103, "grad_norm": 1.2578125, "learning_rate": 0.0002680848336624914, "loss": 0.2206, "step": 84172 }, { "epoch": 0.14924745939419085, "grad_norm": 0.39453125, "learning_rate": 0.0002680621264273871, "loss": 0.1599, "step": 84174 }, { "epoch": 0.14925100555950066, "grad_norm": 0.62109375, "learning_rate": 0.00026803942283069616, "loss": 0.2319, "step": 84176 }, { "epoch": 0.14925455172481047, "grad_norm": 0.400390625, "learning_rate": 0.0002680167228725175, "loss": 0.1728, "step": 84178 }, { "epoch": 0.1492580978901203, "grad_norm": 0.486328125, "learning_rate": 0.00026799402655295035, "loss": 0.1535, "step": 84180 }, { "epoch": 0.1492616440554301, "grad_norm": 0.328125, "learning_rate": 0.0002679713338720942, "loss": 0.1462, "step": 84182 }, { "epoch": 0.14926519022073992, "grad_norm": 0.50390625, "learning_rate": 0.00026794864483004776, "loss": 0.2104, "step": 84184 }, { "epoch": 0.14926873638604973, "grad_norm": 0.5234375, "learning_rate": 0.0002679259594269111, "loss": 0.1394, "step": 84186 }, { "epoch": 0.14927228255135955, "grad_norm": 0.353515625, "learning_rate": 0.00026790327766278263, "loss": 0.1766, "step": 84188 }, { "epoch": 0.14927582871666936, "grad_norm": 0.41796875, "learning_rate": 0.000267880599537762, "loss": 0.1872, "step": 84190 }, { "epoch": 0.14927937488197918, "grad_norm": 0.296875, "learning_rate": 0.00026785792505194816, "loss": 0.1949, "step": 84192 }, { "epoch": 0.149282921047289, "grad_norm": 0.22265625, "learning_rate": 0.0002678352542054407, "loss": 0.2611, "step": 84194 }, { "epoch": 0.1492864672125988, "grad_norm": 5.3125, "learning_rate": 0.00026781258699833804, "loss": 0.213, "step": 84196 }, { "epoch": 0.14929001337790862, "grad_norm": 0.1787109375, "learning_rate": 0.00026778992343074007, "loss": 0.1819, "step": 84198 }, { "epoch": 0.14929355954321843, "grad_norm": 0.8828125, "learning_rate": 0.0002677672635027454, "loss": 0.1723, "step": 84200 }, { "epoch": 0.14929710570852825, "grad_norm": 0.63671875, "learning_rate": 0.00026774460721445335, "loss": 0.1807, "step": 84202 }, { "epoch": 0.14930065187383806, "grad_norm": 0.55078125, "learning_rate": 0.000267721954565963, "loss": 0.2067, "step": 84204 }, { "epoch": 0.14930419803914788, "grad_norm": 0.1806640625, "learning_rate": 0.0002676993055573732, "loss": 0.1551, "step": 84206 }, { "epoch": 0.14930774420445772, "grad_norm": 0.546875, "learning_rate": 0.0002676766601887834, "loss": 0.1734, "step": 84208 }, { "epoch": 0.14931129036976754, "grad_norm": 1.2578125, "learning_rate": 0.0002676540184602923, "loss": 0.3057, "step": 84210 }, { "epoch": 0.14931483653507735, "grad_norm": 0.56640625, "learning_rate": 0.00026763138037199914, "loss": 0.1593, "step": 84212 }, { "epoch": 0.14931838270038716, "grad_norm": 0.74609375, "learning_rate": 0.00026760874592400265, "loss": 0.1506, "step": 84214 }, { "epoch": 0.14932192886569698, "grad_norm": 0.44140625, "learning_rate": 0.0002675861151164021, "loss": 0.2608, "step": 84216 }, { "epoch": 0.1493254750310068, "grad_norm": 1.109375, "learning_rate": 0.0002675634879492964, "loss": 0.2021, "step": 84218 }, { "epoch": 0.1493290211963166, "grad_norm": 0.42578125, "learning_rate": 0.0002675408644227844, "loss": 0.2468, "step": 84220 }, { "epoch": 0.14933256736162642, "grad_norm": 0.4453125, "learning_rate": 0.0002675182445369652, "loss": 0.1865, "step": 84222 }, { "epoch": 0.14933611352693624, "grad_norm": 0.37890625, "learning_rate": 0.0002674956282919374, "loss": 0.1615, "step": 84224 }, { "epoch": 0.14933965969224605, "grad_norm": 0.3203125, "learning_rate": 0.00026747301568780056, "loss": 0.184, "step": 84226 }, { "epoch": 0.14934320585755587, "grad_norm": 0.375, "learning_rate": 0.00026745040672465273, "loss": 0.3423, "step": 84228 }, { "epoch": 0.14934675202286568, "grad_norm": 0.54296875, "learning_rate": 0.00026742780140259347, "loss": 0.165, "step": 84230 }, { "epoch": 0.1493502981881755, "grad_norm": 0.34765625, "learning_rate": 0.0002674051997217212, "loss": 0.1611, "step": 84232 }, { "epoch": 0.1493538443534853, "grad_norm": 0.52734375, "learning_rate": 0.00026738260168213514, "loss": 0.2021, "step": 84234 }, { "epoch": 0.14935739051879512, "grad_norm": 0.80078125, "learning_rate": 0.00026736000728393383, "loss": 0.2152, "step": 84236 }, { "epoch": 0.14936093668410494, "grad_norm": 0.43359375, "learning_rate": 0.00026733741652721634, "loss": 0.1684, "step": 84238 }, { "epoch": 0.14936448284941475, "grad_norm": 0.1845703125, "learning_rate": 0.00026731482941208117, "loss": 0.1921, "step": 84240 }, { "epoch": 0.14936802901472457, "grad_norm": 0.220703125, "learning_rate": 0.0002672922459386273, "loss": 0.1792, "step": 84242 }, { "epoch": 0.14937157518003438, "grad_norm": 2.40625, "learning_rate": 0.0002672696661069536, "loss": 0.2175, "step": 84244 }, { "epoch": 0.1493751213453442, "grad_norm": 1.3203125, "learning_rate": 0.00026724708991715843, "loss": 0.3797, "step": 84246 }, { "epoch": 0.149378667510654, "grad_norm": 0.1357421875, "learning_rate": 0.0002672245173693408, "loss": 0.1079, "step": 84248 }, { "epoch": 0.14938221367596383, "grad_norm": 0.349609375, "learning_rate": 0.00026720194846359933, "loss": 0.1318, "step": 84250 }, { "epoch": 0.14938575984127364, "grad_norm": 1.828125, "learning_rate": 0.0002671793832000332, "loss": 0.2081, "step": 84252 }, { "epoch": 0.14938930600658346, "grad_norm": 0.458984375, "learning_rate": 0.00026715682157874026, "loss": 0.1753, "step": 84254 }, { "epoch": 0.14939285217189327, "grad_norm": 0.375, "learning_rate": 0.00026713426359981985, "loss": 0.1669, "step": 84256 }, { "epoch": 0.14939639833720308, "grad_norm": 0.375, "learning_rate": 0.00026711170926337036, "loss": 0.1587, "step": 84258 }, { "epoch": 0.1493999445025129, "grad_norm": 0.376953125, "learning_rate": 0.00026708915856949047, "loss": 0.161, "step": 84260 }, { "epoch": 0.1494034906678227, "grad_norm": 0.75, "learning_rate": 0.00026706661151827866, "loss": 0.3611, "step": 84262 }, { "epoch": 0.14940703683313253, "grad_norm": 0.447265625, "learning_rate": 0.00026704406810983356, "loss": 0.243, "step": 84264 }, { "epoch": 0.14941058299844234, "grad_norm": 0.337890625, "learning_rate": 0.0002670215283442541, "loss": 0.1461, "step": 84266 }, { "epoch": 0.14941412916375216, "grad_norm": 0.2060546875, "learning_rate": 0.0002669989922216385, "loss": 0.1556, "step": 84268 }, { "epoch": 0.14941767532906197, "grad_norm": 0.37109375, "learning_rate": 0.00026697645974208564, "loss": 0.2593, "step": 84270 }, { "epoch": 0.1494212214943718, "grad_norm": 0.298828125, "learning_rate": 0.00026695393090569373, "loss": 0.2819, "step": 84272 }, { "epoch": 0.1494247676596816, "grad_norm": 3.171875, "learning_rate": 0.0002669314057125613, "loss": 0.3788, "step": 84274 }, { "epoch": 0.14942831382499142, "grad_norm": 0.361328125, "learning_rate": 0.0002669088841627873, "loss": 0.1724, "step": 84276 }, { "epoch": 0.14943185999030123, "grad_norm": 0.42578125, "learning_rate": 0.00026688636625646963, "loss": 0.2187, "step": 84278 }, { "epoch": 0.14943540615561104, "grad_norm": 1.8515625, "learning_rate": 0.00026686385199370724, "loss": 0.2058, "step": 84280 }, { "epoch": 0.14943895232092086, "grad_norm": 0.19921875, "learning_rate": 0.00026684134137459827, "loss": 0.153, "step": 84282 }, { "epoch": 0.14944249848623067, "grad_norm": 0.376953125, "learning_rate": 0.0002668188343992415, "loss": 0.2132, "step": 84284 }, { "epoch": 0.1494460446515405, "grad_norm": 0.72265625, "learning_rate": 0.000266796331067735, "loss": 0.1622, "step": 84286 }, { "epoch": 0.1494495908168503, "grad_norm": 0.9765625, "learning_rate": 0.0002667738313801775, "loss": 0.1375, "step": 84288 }, { "epoch": 0.14945313698216012, "grad_norm": 0.279296875, "learning_rate": 0.0002667513353366673, "loss": 0.1594, "step": 84290 }, { "epoch": 0.14945668314746993, "grad_norm": 0.328125, "learning_rate": 0.00026672884293730273, "loss": 0.1493, "step": 84292 }, { "epoch": 0.14946022931277975, "grad_norm": 2.5, "learning_rate": 0.00026670635418218203, "loss": 0.2643, "step": 84294 }, { "epoch": 0.14946377547808956, "grad_norm": 0.8046875, "learning_rate": 0.00026668386907140374, "loss": 0.1687, "step": 84296 }, { "epoch": 0.14946732164339938, "grad_norm": 0.71484375, "learning_rate": 0.0002666613876050662, "loss": 0.1583, "step": 84298 }, { "epoch": 0.14947086780870922, "grad_norm": 1.5, "learning_rate": 0.0002666389097832678, "loss": 0.2313, "step": 84300 }, { "epoch": 0.14947441397401903, "grad_norm": 0.6171875, "learning_rate": 0.00026661643560610664, "loss": 0.2852, "step": 84302 }, { "epoch": 0.14947796013932885, "grad_norm": 0.6484375, "learning_rate": 0.000266593965073681, "loss": 0.1444, "step": 84304 }, { "epoch": 0.14948150630463866, "grad_norm": 0.6484375, "learning_rate": 0.0002665714981860894, "loss": 0.1869, "step": 84306 }, { "epoch": 0.14948505246994848, "grad_norm": 0.21875, "learning_rate": 0.0002665490349434299, "loss": 0.1937, "step": 84308 }, { "epoch": 0.1494885986352583, "grad_norm": 0.283203125, "learning_rate": 0.00026652657534580084, "loss": 0.2002, "step": 84310 }, { "epoch": 0.1494921448005681, "grad_norm": 0.54296875, "learning_rate": 0.0002665041193933004, "loss": 0.1884, "step": 84312 }, { "epoch": 0.14949569096587792, "grad_norm": 0.890625, "learning_rate": 0.0002664816670860266, "loss": 0.1847, "step": 84314 }, { "epoch": 0.14949923713118773, "grad_norm": 0.42578125, "learning_rate": 0.00026645921842407806, "loss": 0.1824, "step": 84316 }, { "epoch": 0.14950278329649755, "grad_norm": 3.4375, "learning_rate": 0.0002664367734075527, "loss": 0.5388, "step": 84318 }, { "epoch": 0.14950632946180736, "grad_norm": 0.3828125, "learning_rate": 0.0002664143320365486, "loss": 0.1553, "step": 84320 }, { "epoch": 0.14950987562711718, "grad_norm": 0.8515625, "learning_rate": 0.00026639189431116396, "loss": 0.2176, "step": 84322 }, { "epoch": 0.149513421792427, "grad_norm": 0.91796875, "learning_rate": 0.0002663694602314971, "loss": 0.1565, "step": 84324 }, { "epoch": 0.1495169679577368, "grad_norm": 0.2236328125, "learning_rate": 0.00026634702979764596, "loss": 0.1632, "step": 84326 }, { "epoch": 0.14952051412304662, "grad_norm": 0.37890625, "learning_rate": 0.00026632460300970865, "loss": 0.1908, "step": 84328 }, { "epoch": 0.14952406028835644, "grad_norm": 3.390625, "learning_rate": 0.00026630217986778326, "loss": 0.4721, "step": 84330 }, { "epoch": 0.14952760645366625, "grad_norm": 1.046875, "learning_rate": 0.0002662797603719679, "loss": 0.1534, "step": 84332 }, { "epoch": 0.14953115261897607, "grad_norm": 0.27734375, "learning_rate": 0.00026625734452236067, "loss": 0.1665, "step": 84334 }, { "epoch": 0.14953469878428588, "grad_norm": 0.349609375, "learning_rate": 0.00026623493231905926, "loss": 0.1624, "step": 84336 }, { "epoch": 0.1495382449495957, "grad_norm": 0.3359375, "learning_rate": 0.0002662125237621622, "loss": 0.1584, "step": 84338 }, { "epoch": 0.1495417911149055, "grad_norm": 0.625, "learning_rate": 0.00026619011885176694, "loss": 0.1528, "step": 84340 }, { "epoch": 0.14954533728021532, "grad_norm": 0.5703125, "learning_rate": 0.0002661677175879722, "loss": 0.2069, "step": 84342 }, { "epoch": 0.14954888344552514, "grad_norm": 0.51171875, "learning_rate": 0.0002661453199708751, "loss": 0.2026, "step": 84344 }, { "epoch": 0.14955242961083495, "grad_norm": 0.7890625, "learning_rate": 0.00026612292600057427, "loss": 0.1495, "step": 84346 }, { "epoch": 0.14955597577614477, "grad_norm": 0.322265625, "learning_rate": 0.0002661005356771673, "loss": 0.1385, "step": 84348 }, { "epoch": 0.14955952194145458, "grad_norm": 0.408203125, "learning_rate": 0.0002660781490007522, "loss": 0.2822, "step": 84350 }, { "epoch": 0.1495630681067644, "grad_norm": 0.4375, "learning_rate": 0.000266055765971427, "loss": 0.1821, "step": 84352 }, { "epoch": 0.1495666142720742, "grad_norm": 0.3671875, "learning_rate": 0.00026603338658928916, "loss": 0.1445, "step": 84354 }, { "epoch": 0.14957016043738403, "grad_norm": 0.51953125, "learning_rate": 0.00026601101085443706, "loss": 0.1594, "step": 84356 }, { "epoch": 0.14957370660269384, "grad_norm": 0.3359375, "learning_rate": 0.0002659886387669684, "loss": 0.1886, "step": 84358 }, { "epoch": 0.14957725276800365, "grad_norm": 0.2421875, "learning_rate": 0.000265966270326981, "loss": 0.1597, "step": 84360 }, { "epoch": 0.14958079893331347, "grad_norm": 0.490234375, "learning_rate": 0.0002659439055345725, "loss": 0.1655, "step": 84362 }, { "epoch": 0.14958434509862328, "grad_norm": 0.734375, "learning_rate": 0.00026592154438984106, "loss": 0.2532, "step": 84364 }, { "epoch": 0.1495878912639331, "grad_norm": 0.37109375, "learning_rate": 0.0002658991868928842, "loss": 0.2063, "step": 84366 }, { "epoch": 0.1495914374292429, "grad_norm": 0.30078125, "learning_rate": 0.0002658768330437998, "loss": 0.1763, "step": 84368 }, { "epoch": 0.14959498359455273, "grad_norm": 0.56640625, "learning_rate": 0.0002658544828426857, "loss": 0.181, "step": 84370 }, { "epoch": 0.14959852975986254, "grad_norm": 1.3203125, "learning_rate": 0.0002658321362896393, "loss": 0.2365, "step": 84372 }, { "epoch": 0.14960207592517236, "grad_norm": 0.8984375, "learning_rate": 0.000265809793384759, "loss": 0.2044, "step": 84374 }, { "epoch": 0.14960562209048217, "grad_norm": 0.40625, "learning_rate": 0.00026578745412814174, "loss": 0.1623, "step": 84376 }, { "epoch": 0.14960916825579199, "grad_norm": 0.52734375, "learning_rate": 0.00026576511851988586, "loss": 0.2155, "step": 84378 }, { "epoch": 0.1496127144211018, "grad_norm": 0.251953125, "learning_rate": 0.00026574278656008856, "loss": 0.1774, "step": 84380 }, { "epoch": 0.14961626058641161, "grad_norm": 0.455078125, "learning_rate": 0.0002657204582488479, "loss": 0.2351, "step": 84382 }, { "epoch": 0.14961980675172143, "grad_norm": 0.412109375, "learning_rate": 0.0002656981335862611, "loss": 0.2147, "step": 84384 }, { "epoch": 0.14962335291703124, "grad_norm": 0.2578125, "learning_rate": 0.0002656758125724262, "loss": 0.2091, "step": 84386 }, { "epoch": 0.14962689908234106, "grad_norm": 0.34375, "learning_rate": 0.0002656534952074408, "loss": 0.1742, "step": 84388 }, { "epoch": 0.1496304452476509, "grad_norm": 0.3125, "learning_rate": 0.00026563118149140226, "loss": 0.1291, "step": 84390 }, { "epoch": 0.14963399141296072, "grad_norm": 0.87890625, "learning_rate": 0.0002656088714244082, "loss": 0.1754, "step": 84392 }, { "epoch": 0.14963753757827053, "grad_norm": 0.388671875, "learning_rate": 0.0002655865650065562, "loss": 0.1619, "step": 84394 }, { "epoch": 0.14964108374358034, "grad_norm": 0.31640625, "learning_rate": 0.00026556426223794407, "loss": 0.1819, "step": 84396 }, { "epoch": 0.14964462990889016, "grad_norm": 0.6640625, "learning_rate": 0.00026554196311866896, "loss": 0.1697, "step": 84398 }, { "epoch": 0.14964817607419997, "grad_norm": 0.62109375, "learning_rate": 0.00026551966764882883, "loss": 0.246, "step": 84400 }, { "epoch": 0.1496517222395098, "grad_norm": 0.455078125, "learning_rate": 0.0002654973758285207, "loss": 0.1418, "step": 84402 }, { "epoch": 0.1496552684048196, "grad_norm": 0.228515625, "learning_rate": 0.0002654750876578425, "loss": 0.14, "step": 84404 }, { "epoch": 0.14965881457012942, "grad_norm": 0.2099609375, "learning_rate": 0.00026545280313689147, "loss": 0.2357, "step": 84406 }, { "epoch": 0.14966236073543923, "grad_norm": 1.078125, "learning_rate": 0.0002654305222657651, "loss": 0.1762, "step": 84408 }, { "epoch": 0.14966590690074905, "grad_norm": 0.349609375, "learning_rate": 0.0002654082450445609, "loss": 0.1779, "step": 84410 }, { "epoch": 0.14966945306605886, "grad_norm": 0.61328125, "learning_rate": 0.00026538597147337606, "loss": 0.1657, "step": 84412 }, { "epoch": 0.14967299923136868, "grad_norm": 0.267578125, "learning_rate": 0.00026536370155230845, "loss": 0.2405, "step": 84414 }, { "epoch": 0.1496765453966785, "grad_norm": 0.1962890625, "learning_rate": 0.00026534143528145497, "loss": 0.1676, "step": 84416 }, { "epoch": 0.1496800915619883, "grad_norm": 0.58984375, "learning_rate": 0.0002653191726609134, "loss": 0.1421, "step": 84418 }, { "epoch": 0.14968363772729812, "grad_norm": 0.376953125, "learning_rate": 0.0002652969136907808, "loss": 0.2211, "step": 84420 }, { "epoch": 0.14968718389260793, "grad_norm": 1.1796875, "learning_rate": 0.00026527465837115465, "loss": 0.1357, "step": 84422 }, { "epoch": 0.14969073005791775, "grad_norm": 5.4375, "learning_rate": 0.0002652524067021323, "loss": 0.2769, "step": 84424 }, { "epoch": 0.14969427622322756, "grad_norm": 0.44921875, "learning_rate": 0.0002652301586838112, "loss": 0.192, "step": 84426 }, { "epoch": 0.14969782238853738, "grad_norm": 0.291015625, "learning_rate": 0.0002652079143162883, "loss": 0.1584, "step": 84428 }, { "epoch": 0.1497013685538472, "grad_norm": 1.4296875, "learning_rate": 0.00026518567359966105, "loss": 0.3053, "step": 84430 }, { "epoch": 0.149704914719157, "grad_norm": 0.388671875, "learning_rate": 0.000265163436534027, "loss": 0.147, "step": 84432 }, { "epoch": 0.14970846088446682, "grad_norm": 0.296875, "learning_rate": 0.00026514120311948286, "loss": 0.2143, "step": 84434 }, { "epoch": 0.14971200704977664, "grad_norm": 0.5078125, "learning_rate": 0.0002651189733561263, "loss": 0.2198, "step": 84436 }, { "epoch": 0.14971555321508645, "grad_norm": 0.291015625, "learning_rate": 0.00026509674724405427, "loss": 0.1708, "step": 84438 }, { "epoch": 0.14971909938039626, "grad_norm": 0.453125, "learning_rate": 0.00026507452478336435, "loss": 0.141, "step": 84440 }, { "epoch": 0.14972264554570608, "grad_norm": 0.7265625, "learning_rate": 0.00026505230597415324, "loss": 0.1891, "step": 84442 }, { "epoch": 0.1497261917110159, "grad_norm": 0.7265625, "learning_rate": 0.0002650300908165184, "loss": 0.1925, "step": 84444 }, { "epoch": 0.1497297378763257, "grad_norm": 0.3828125, "learning_rate": 0.00026500787931055703, "loss": 0.2027, "step": 84446 }, { "epoch": 0.14973328404163552, "grad_norm": 0.453125, "learning_rate": 0.0002649856714563661, "loss": 0.1367, "step": 84448 }, { "epoch": 0.14973683020694534, "grad_norm": 0.55859375, "learning_rate": 0.0002649634672540429, "loss": 0.2918, "step": 84450 }, { "epoch": 0.14974037637225515, "grad_norm": 0.5390625, "learning_rate": 0.00026494126670368424, "loss": 0.1952, "step": 84452 }, { "epoch": 0.14974392253756497, "grad_norm": 0.5234375, "learning_rate": 0.00026491906980538756, "loss": 0.1628, "step": 84454 }, { "epoch": 0.14974746870287478, "grad_norm": 0.8671875, "learning_rate": 0.00026489687655924975, "loss": 0.2326, "step": 84456 }, { "epoch": 0.1497510148681846, "grad_norm": 0.431640625, "learning_rate": 0.000264874686965368, "loss": 0.2895, "step": 84458 }, { "epoch": 0.1497545610334944, "grad_norm": 0.478515625, "learning_rate": 0.0002648525010238393, "loss": 0.1584, "step": 84460 }, { "epoch": 0.14975810719880422, "grad_norm": 0.34765625, "learning_rate": 0.0002648303187347605, "loss": 0.198, "step": 84462 }, { "epoch": 0.14976165336411404, "grad_norm": 0.216796875, "learning_rate": 0.000264808140098229, "loss": 0.1234, "step": 84464 }, { "epoch": 0.14976519952942385, "grad_norm": 0.546875, "learning_rate": 0.0002647859651143412, "loss": 0.1171, "step": 84466 }, { "epoch": 0.14976874569473367, "grad_norm": 0.71484375, "learning_rate": 0.0002647637937831947, "loss": 0.2261, "step": 84468 }, { "epoch": 0.14977229186004348, "grad_norm": 0.484375, "learning_rate": 0.00026474162610488596, "loss": 0.2633, "step": 84470 }, { "epoch": 0.1497758380253533, "grad_norm": 0.294921875, "learning_rate": 0.00026471946207951247, "loss": 0.3592, "step": 84472 }, { "epoch": 0.1497793841906631, "grad_norm": 0.8046875, "learning_rate": 0.00026469730170717063, "loss": 0.3421, "step": 84474 }, { "epoch": 0.14978293035597293, "grad_norm": 0.63671875, "learning_rate": 0.0002646751449879578, "loss": 0.3611, "step": 84476 }, { "epoch": 0.14978647652128274, "grad_norm": 0.72265625, "learning_rate": 0.00026465299192197055, "loss": 0.2068, "step": 84478 }, { "epoch": 0.14979002268659258, "grad_norm": 1.453125, "learning_rate": 0.000264630842509306, "loss": 0.24, "step": 84480 }, { "epoch": 0.1497935688519024, "grad_norm": 0.451171875, "learning_rate": 0.00026460869675006075, "loss": 0.1766, "step": 84482 }, { "epoch": 0.1497971150172122, "grad_norm": 0.306640625, "learning_rate": 0.00026458655464433174, "loss": 0.2098, "step": 84484 }, { "epoch": 0.14980066118252203, "grad_norm": 0.271484375, "learning_rate": 0.00026456441619221606, "loss": 0.1394, "step": 84486 }, { "epoch": 0.14980420734783184, "grad_norm": 0.38671875, "learning_rate": 0.00026454228139381015, "loss": 0.1863, "step": 84488 }, { "epoch": 0.14980775351314166, "grad_norm": 0.57421875, "learning_rate": 0.0002645201502492113, "loss": 0.154, "step": 84490 }, { "epoch": 0.14981129967845147, "grad_norm": 0.2255859375, "learning_rate": 0.0002644980227585158, "loss": 0.1415, "step": 84492 }, { "epoch": 0.14981484584376129, "grad_norm": 1.9765625, "learning_rate": 0.00026447589892182073, "loss": 0.2133, "step": 84494 }, { "epoch": 0.1498183920090711, "grad_norm": 1.03125, "learning_rate": 0.00026445377873922273, "loss": 0.2253, "step": 84496 }, { "epoch": 0.14982193817438091, "grad_norm": 1.9921875, "learning_rate": 0.00026443166221081855, "loss": 0.2807, "step": 84498 }, { "epoch": 0.14982548433969073, "grad_norm": 1.2734375, "learning_rate": 0.00026440954933670493, "loss": 0.2124, "step": 84500 }, { "epoch": 0.14982903050500054, "grad_norm": 0.546875, "learning_rate": 0.00026438744011697844, "loss": 0.1928, "step": 84502 }, { "epoch": 0.14983257667031036, "grad_norm": 0.3828125, "learning_rate": 0.00026436533455173605, "loss": 0.199, "step": 84504 }, { "epoch": 0.14983612283562017, "grad_norm": 0.384765625, "learning_rate": 0.00026434323264107416, "loss": 0.2042, "step": 84506 }, { "epoch": 0.14983966900093, "grad_norm": 0.287109375, "learning_rate": 0.0002643211343850898, "loss": 0.1521, "step": 84508 }, { "epoch": 0.1498432151662398, "grad_norm": 0.3984375, "learning_rate": 0.0002642990397838791, "loss": 0.1556, "step": 84510 }, { "epoch": 0.14984676133154962, "grad_norm": 0.2890625, "learning_rate": 0.00026427694883753906, "loss": 0.2875, "step": 84512 }, { "epoch": 0.14985030749685943, "grad_norm": 0.47265625, "learning_rate": 0.0002642548615461663, "loss": 0.212, "step": 84514 }, { "epoch": 0.14985385366216925, "grad_norm": 0.357421875, "learning_rate": 0.0002642327779098573, "loss": 0.2063, "step": 84516 }, { "epoch": 0.14985739982747906, "grad_norm": 0.490234375, "learning_rate": 0.00026421069792870866, "loss": 0.156, "step": 84518 }, { "epoch": 0.14986094599278887, "grad_norm": 0.361328125, "learning_rate": 0.00026418862160281674, "loss": 0.1919, "step": 84520 }, { "epoch": 0.1498644921580987, "grad_norm": 0.83984375, "learning_rate": 0.0002641665489322786, "loss": 0.156, "step": 84522 }, { "epoch": 0.1498680383234085, "grad_norm": 0.55078125, "learning_rate": 0.00026414447991719025, "loss": 0.2101, "step": 84524 }, { "epoch": 0.14987158448871832, "grad_norm": 1.390625, "learning_rate": 0.0002641224145576485, "loss": 0.2337, "step": 84526 }, { "epoch": 0.14987513065402813, "grad_norm": 0.337890625, "learning_rate": 0.0002641003528537497, "loss": 0.1838, "step": 84528 }, { "epoch": 0.14987867681933795, "grad_norm": 0.2578125, "learning_rate": 0.0002640782948055906, "loss": 0.1816, "step": 84530 }, { "epoch": 0.14988222298464776, "grad_norm": 1.4140625, "learning_rate": 0.0002640562404132673, "loss": 0.2257, "step": 84532 }, { "epoch": 0.14988576914995758, "grad_norm": 0.439453125, "learning_rate": 0.0002640341896768765, "loss": 0.1898, "step": 84534 }, { "epoch": 0.1498893153152674, "grad_norm": 0.73046875, "learning_rate": 0.0002640121425965145, "loss": 0.21, "step": 84536 }, { "epoch": 0.1498928614805772, "grad_norm": 1.203125, "learning_rate": 0.00026399009917227795, "loss": 0.2751, "step": 84538 }, { "epoch": 0.14989640764588702, "grad_norm": 4.0625, "learning_rate": 0.0002639680594042629, "loss": 0.2624, "step": 84540 }, { "epoch": 0.14989995381119683, "grad_norm": 0.173828125, "learning_rate": 0.00026394602329256594, "loss": 0.1253, "step": 84542 }, { "epoch": 0.14990349997650665, "grad_norm": 0.84765625, "learning_rate": 0.0002639239908372836, "loss": 0.2573, "step": 84544 }, { "epoch": 0.14990704614181646, "grad_norm": 3.375, "learning_rate": 0.00026390196203851193, "loss": 0.1542, "step": 84546 }, { "epoch": 0.14991059230712628, "grad_norm": 0.203125, "learning_rate": 0.0002638799368963475, "loss": 0.1563, "step": 84548 }, { "epoch": 0.1499141384724361, "grad_norm": 0.53125, "learning_rate": 0.0002638579154108864, "loss": 0.2333, "step": 84550 }, { "epoch": 0.1499176846377459, "grad_norm": 0.408203125, "learning_rate": 0.0002638358975822253, "loss": 0.1538, "step": 84552 }, { "epoch": 0.14992123080305572, "grad_norm": 0.72265625, "learning_rate": 0.0002638138834104602, "loss": 0.177, "step": 84554 }, { "epoch": 0.14992477696836554, "grad_norm": 0.486328125, "learning_rate": 0.0002637918728956875, "loss": 0.1439, "step": 84556 }, { "epoch": 0.14992832313367535, "grad_norm": 0.63671875, "learning_rate": 0.00026376986603800337, "loss": 0.1945, "step": 84558 }, { "epoch": 0.14993186929898517, "grad_norm": 0.68359375, "learning_rate": 0.000263747862837504, "loss": 0.2465, "step": 84560 }, { "epoch": 0.14993541546429498, "grad_norm": 0.7265625, "learning_rate": 0.000263725863294286, "loss": 0.2471, "step": 84562 }, { "epoch": 0.1499389616296048, "grad_norm": 0.921875, "learning_rate": 0.000263703867408445, "loss": 0.2355, "step": 84564 }, { "epoch": 0.1499425077949146, "grad_norm": 0.58984375, "learning_rate": 0.00026368187518007764, "loss": 0.1745, "step": 84566 }, { "epoch": 0.14994605396022442, "grad_norm": 0.44140625, "learning_rate": 0.0002636598866092799, "loss": 0.201, "step": 84568 }, { "epoch": 0.14994960012553424, "grad_norm": 0.3515625, "learning_rate": 0.0002636379016961481, "loss": 0.127, "step": 84570 }, { "epoch": 0.14995314629084408, "grad_norm": 0.6328125, "learning_rate": 0.0002636159204407783, "loss": 0.1582, "step": 84572 }, { "epoch": 0.1499566924561539, "grad_norm": 0.53125, "learning_rate": 0.00026359394284326663, "loss": 0.1475, "step": 84574 }, { "epoch": 0.1499602386214637, "grad_norm": 0.48046875, "learning_rate": 0.00026357196890370915, "loss": 0.1367, "step": 84576 }, { "epoch": 0.14996378478677352, "grad_norm": 0.53515625, "learning_rate": 0.00026354999862220204, "loss": 0.1732, "step": 84578 }, { "epoch": 0.14996733095208334, "grad_norm": 0.73828125, "learning_rate": 0.00026352803199884163, "loss": 0.2179, "step": 84580 }, { "epoch": 0.14997087711739315, "grad_norm": 0.671875, "learning_rate": 0.00026350606903372334, "loss": 0.1824, "step": 84582 }, { "epoch": 0.14997442328270297, "grad_norm": 1.078125, "learning_rate": 0.0002634841097269438, "loss": 0.2286, "step": 84584 }, { "epoch": 0.14997796944801278, "grad_norm": 0.322265625, "learning_rate": 0.0002634621540785987, "loss": 0.1633, "step": 84586 }, { "epoch": 0.1499815156133226, "grad_norm": 0.283203125, "learning_rate": 0.0002634402020887845, "loss": 0.1638, "step": 84588 }, { "epoch": 0.1499850617786324, "grad_norm": 0.63671875, "learning_rate": 0.00026341825375759666, "loss": 0.1945, "step": 84590 }, { "epoch": 0.14998860794394223, "grad_norm": 0.1904296875, "learning_rate": 0.00026339630908513156, "loss": 0.1023, "step": 84592 }, { "epoch": 0.14999215410925204, "grad_norm": 0.279296875, "learning_rate": 0.000263374368071485, "loss": 0.2208, "step": 84594 }, { "epoch": 0.14999570027456186, "grad_norm": 0.89453125, "learning_rate": 0.00026335243071675307, "loss": 0.1782, "step": 84596 }, { "epoch": 0.14999924643987167, "grad_norm": 0.828125, "learning_rate": 0.0002633304970210316, "loss": 0.2532, "step": 84598 }, { "epoch": 0.15000279260518148, "grad_norm": 0.439453125, "learning_rate": 0.0002633085669844164, "loss": 0.2663, "step": 84600 }, { "epoch": 0.1500063387704913, "grad_norm": 0.3203125, "learning_rate": 0.0002632866406070037, "loss": 0.1904, "step": 84602 }, { "epoch": 0.1500098849358011, "grad_norm": 0.255859375, "learning_rate": 0.00026326471788888916, "loss": 0.1493, "step": 84604 }, { "epoch": 0.15001343110111093, "grad_norm": 0.98828125, "learning_rate": 0.00026324279883016864, "loss": 0.1708, "step": 84606 }, { "epoch": 0.15001697726642074, "grad_norm": 0.271484375, "learning_rate": 0.00026322088343093804, "loss": 0.1926, "step": 84608 }, { "epoch": 0.15002052343173056, "grad_norm": 1.875, "learning_rate": 0.0002631989716912934, "loss": 0.2101, "step": 84610 }, { "epoch": 0.15002406959704037, "grad_norm": 0.6171875, "learning_rate": 0.0002631770636113303, "loss": 0.1967, "step": 84612 }, { "epoch": 0.15002761576235019, "grad_norm": 0.390625, "learning_rate": 0.00026315515919114464, "loss": 0.1978, "step": 84614 }, { "epoch": 0.15003116192766, "grad_norm": 0.361328125, "learning_rate": 0.0002631332584308323, "loss": 0.1941, "step": 84616 }, { "epoch": 0.15003470809296982, "grad_norm": 0.306640625, "learning_rate": 0.00026311136133048876, "loss": 0.2107, "step": 84618 }, { "epoch": 0.15003825425827963, "grad_norm": 0.263671875, "learning_rate": 0.00026308946789021034, "loss": 0.1486, "step": 84620 }, { "epoch": 0.15004180042358944, "grad_norm": 0.765625, "learning_rate": 0.00026306757811009215, "loss": 0.1758, "step": 84622 }, { "epoch": 0.15004534658889926, "grad_norm": 0.478515625, "learning_rate": 0.0002630456919902303, "loss": 0.2016, "step": 84624 }, { "epoch": 0.15004889275420907, "grad_norm": 0.330078125, "learning_rate": 0.0002630238095307205, "loss": 0.1593, "step": 84626 }, { "epoch": 0.1500524389195189, "grad_norm": 0.4765625, "learning_rate": 0.00026300193073165837, "loss": 0.2001, "step": 84628 }, { "epoch": 0.1500559850848287, "grad_norm": 0.251953125, "learning_rate": 0.00026298005559313964, "loss": 0.1997, "step": 84630 }, { "epoch": 0.15005953125013852, "grad_norm": 0.91015625, "learning_rate": 0.00026295818411525975, "loss": 0.1775, "step": 84632 }, { "epoch": 0.15006307741544833, "grad_norm": 0.49609375, "learning_rate": 0.0002629363162981148, "loss": 0.1599, "step": 84634 }, { "epoch": 0.15006662358075815, "grad_norm": 0.6328125, "learning_rate": 0.00026291445214179996, "loss": 0.2151, "step": 84636 }, { "epoch": 0.15007016974606796, "grad_norm": 2.4375, "learning_rate": 0.0002628925916464112, "loss": 0.3738, "step": 84638 }, { "epoch": 0.15007371591137778, "grad_norm": 1.015625, "learning_rate": 0.00026287073481204396, "loss": 0.1707, "step": 84640 }, { "epoch": 0.1500772620766876, "grad_norm": 0.64453125, "learning_rate": 0.00026284888163879386, "loss": 0.2286, "step": 84642 }, { "epoch": 0.1500808082419974, "grad_norm": 0.61328125, "learning_rate": 0.00026282703212675653, "loss": 0.22, "step": 84644 }, { "epoch": 0.15008435440730722, "grad_norm": 0.58984375, "learning_rate": 0.0002628051862760274, "loss": 0.2314, "step": 84646 }, { "epoch": 0.15008790057261703, "grad_norm": 0.224609375, "learning_rate": 0.00026278334408670216, "loss": 0.1701, "step": 84648 }, { "epoch": 0.15009144673792685, "grad_norm": 0.80859375, "learning_rate": 0.0002627615055588762, "loss": 0.278, "step": 84650 }, { "epoch": 0.15009499290323666, "grad_norm": 0.302734375, "learning_rate": 0.0002627396706926451, "loss": 0.1782, "step": 84652 }, { "epoch": 0.15009853906854648, "grad_norm": 0.59765625, "learning_rate": 0.00026271783948810434, "loss": 0.2301, "step": 84654 }, { "epoch": 0.1501020852338563, "grad_norm": 0.2490234375, "learning_rate": 0.0002626960119453495, "loss": 0.1069, "step": 84656 }, { "epoch": 0.1501056313991661, "grad_norm": 0.29296875, "learning_rate": 0.00026267418806447573, "loss": 0.1227, "step": 84658 }, { "epoch": 0.15010917756447592, "grad_norm": 4.125, "learning_rate": 0.00026265236784557875, "loss": 0.2845, "step": 84660 }, { "epoch": 0.15011272372978576, "grad_norm": 0.25, "learning_rate": 0.00026263055128875404, "loss": 0.1714, "step": 84662 }, { "epoch": 0.15011626989509558, "grad_norm": 0.205078125, "learning_rate": 0.0002626087383940969, "loss": 0.2073, "step": 84664 }, { "epoch": 0.1501198160604054, "grad_norm": 1.015625, "learning_rate": 0.00026258692916170264, "loss": 0.2247, "step": 84666 }, { "epoch": 0.1501233622257152, "grad_norm": 1.03125, "learning_rate": 0.0002625651235916666, "loss": 0.1604, "step": 84668 }, { "epoch": 0.15012690839102502, "grad_norm": 0.291015625, "learning_rate": 0.00026254332168408465, "loss": 0.1706, "step": 84670 }, { "epoch": 0.15013045455633484, "grad_norm": 3.828125, "learning_rate": 0.0002625215234390513, "loss": 0.2037, "step": 84672 }, { "epoch": 0.15013400072164465, "grad_norm": 0.2138671875, "learning_rate": 0.0002624997288566627, "loss": 0.1985, "step": 84674 }, { "epoch": 0.15013754688695446, "grad_norm": 0.3125, "learning_rate": 0.00026247793793701354, "loss": 0.2083, "step": 84676 }, { "epoch": 0.15014109305226428, "grad_norm": 0.68359375, "learning_rate": 0.00026245615068019965, "loss": 0.2607, "step": 84678 }, { "epoch": 0.1501446392175741, "grad_norm": 1.9609375, "learning_rate": 0.0002624343670863158, "loss": 0.2606, "step": 84680 }, { "epoch": 0.1501481853828839, "grad_norm": 0.7734375, "learning_rate": 0.00026241258715545767, "loss": 0.2003, "step": 84682 }, { "epoch": 0.15015173154819372, "grad_norm": 0.859375, "learning_rate": 0.00026239081088772035, "loss": 0.2005, "step": 84684 }, { "epoch": 0.15015527771350354, "grad_norm": 0.3515625, "learning_rate": 0.000262369038283199, "loss": 0.2131, "step": 84686 }, { "epoch": 0.15015882387881335, "grad_norm": 0.22265625, "learning_rate": 0.0002623472693419891, "loss": 0.1457, "step": 84688 }, { "epoch": 0.15016237004412317, "grad_norm": 0.291015625, "learning_rate": 0.0002623255040641854, "loss": 0.1775, "step": 84690 }, { "epoch": 0.15016591620943298, "grad_norm": 0.78515625, "learning_rate": 0.0002623037424498836, "loss": 0.1944, "step": 84692 }, { "epoch": 0.1501694623747428, "grad_norm": 0.3359375, "learning_rate": 0.0002622819844991786, "loss": 0.3291, "step": 84694 }, { "epoch": 0.1501730085400526, "grad_norm": 0.310546875, "learning_rate": 0.0002622602302121656, "loss": 0.2246, "step": 84696 }, { "epoch": 0.15017655470536243, "grad_norm": 0.21484375, "learning_rate": 0.00026223847958893956, "loss": 0.2452, "step": 84698 }, { "epoch": 0.15018010087067224, "grad_norm": 0.427734375, "learning_rate": 0.00026221673262959594, "loss": 0.1335, "step": 84700 }, { "epoch": 0.15018364703598205, "grad_norm": 2.0, "learning_rate": 0.00026219498933422966, "loss": 0.2332, "step": 84702 }, { "epoch": 0.15018719320129187, "grad_norm": 0.25, "learning_rate": 0.0002621732497029358, "loss": 0.3939, "step": 84704 }, { "epoch": 0.15019073936660168, "grad_norm": 0.55859375, "learning_rate": 0.0002621515137358096, "loss": 0.1532, "step": 84706 }, { "epoch": 0.1501942855319115, "grad_norm": 0.21484375, "learning_rate": 0.0002621297814329457, "loss": 0.1506, "step": 84708 }, { "epoch": 0.1501978316972213, "grad_norm": 0.232421875, "learning_rate": 0.0002621080527944399, "loss": 0.2005, "step": 84710 }, { "epoch": 0.15020137786253113, "grad_norm": 2.59375, "learning_rate": 0.0002620863278203864, "loss": 0.3314, "step": 84712 }, { "epoch": 0.15020492402784094, "grad_norm": 0.298828125, "learning_rate": 0.00026206460651088075, "loss": 0.1668, "step": 84714 }, { "epoch": 0.15020847019315076, "grad_norm": 0.275390625, "learning_rate": 0.00026204288886601756, "loss": 0.2151, "step": 84716 }, { "epoch": 0.15021201635846057, "grad_norm": 0.46484375, "learning_rate": 0.0002620211748858924, "loss": 0.2263, "step": 84718 }, { "epoch": 0.15021556252377039, "grad_norm": 0.8125, "learning_rate": 0.0002619994645705996, "loss": 0.1609, "step": 84720 }, { "epoch": 0.1502191086890802, "grad_norm": 1.171875, "learning_rate": 0.00026197775792023454, "loss": 0.1563, "step": 84722 }, { "epoch": 0.15022265485439001, "grad_norm": 0.33984375, "learning_rate": 0.0002619560549348919, "loss": 0.1827, "step": 84724 }, { "epoch": 0.15022620101969983, "grad_norm": 0.64453125, "learning_rate": 0.00026193435561466675, "loss": 0.1987, "step": 84726 }, { "epoch": 0.15022974718500964, "grad_norm": 0.3515625, "learning_rate": 0.00026191265995965387, "loss": 0.1853, "step": 84728 }, { "epoch": 0.15023329335031946, "grad_norm": 0.380859375, "learning_rate": 0.000261890967969948, "loss": 0.0959, "step": 84730 }, { "epoch": 0.15023683951562927, "grad_norm": 0.59375, "learning_rate": 0.0002618692796456444, "loss": 0.2225, "step": 84732 }, { "epoch": 0.1502403856809391, "grad_norm": 0.53125, "learning_rate": 0.00026184759498683763, "loss": 0.2163, "step": 84734 }, { "epoch": 0.1502439318462489, "grad_norm": 0.37890625, "learning_rate": 0.0002618259139936229, "loss": 0.1977, "step": 84736 }, { "epoch": 0.15024747801155872, "grad_norm": 0.197265625, "learning_rate": 0.00026180423666609436, "loss": 0.1592, "step": 84738 }, { "epoch": 0.15025102417686853, "grad_norm": 0.515625, "learning_rate": 0.0002617825630043474, "loss": 0.1878, "step": 84740 }, { "epoch": 0.15025457034217835, "grad_norm": 0.189453125, "learning_rate": 0.0002617608930084767, "loss": 0.1146, "step": 84742 }, { "epoch": 0.15025811650748816, "grad_norm": 0.328125, "learning_rate": 0.00026173922667857693, "loss": 0.2344, "step": 84744 }, { "epoch": 0.15026166267279797, "grad_norm": 0.27734375, "learning_rate": 0.0002617175640147428, "loss": 0.1319, "step": 84746 }, { "epoch": 0.1502652088381078, "grad_norm": 0.455078125, "learning_rate": 0.00026169590501706904, "loss": 0.1439, "step": 84748 }, { "epoch": 0.1502687550034176, "grad_norm": 0.2314453125, "learning_rate": 0.0002616742496856505, "loss": 0.194, "step": 84750 }, { "epoch": 0.15027230116872745, "grad_norm": 0.51953125, "learning_rate": 0.0002616525980205819, "loss": 0.1507, "step": 84752 }, { "epoch": 0.15027584733403726, "grad_norm": 0.330078125, "learning_rate": 0.0002616309500219578, "loss": 0.2545, "step": 84754 }, { "epoch": 0.15027939349934707, "grad_norm": 0.59765625, "learning_rate": 0.0002616093056898728, "loss": 0.2501, "step": 84756 }, { "epoch": 0.1502829396646569, "grad_norm": 0.32421875, "learning_rate": 0.00026158766502442195, "loss": 0.169, "step": 84758 }, { "epoch": 0.1502864858299667, "grad_norm": 0.453125, "learning_rate": 0.0002615660280256995, "loss": 0.1504, "step": 84760 }, { "epoch": 0.15029003199527652, "grad_norm": 0.294921875, "learning_rate": 0.0002615443946938004, "loss": 0.1943, "step": 84762 }, { "epoch": 0.15029357816058633, "grad_norm": 0.91015625, "learning_rate": 0.000261522765028819, "loss": 0.3482, "step": 84764 }, { "epoch": 0.15029712432589615, "grad_norm": 0.228515625, "learning_rate": 0.00026150113903084974, "loss": 0.1596, "step": 84766 }, { "epoch": 0.15030067049120596, "grad_norm": 0.29296875, "learning_rate": 0.00026147951669998787, "loss": 0.2076, "step": 84768 }, { "epoch": 0.15030421665651578, "grad_norm": 0.73828125, "learning_rate": 0.00026145789803632715, "loss": 0.1633, "step": 84770 }, { "epoch": 0.1503077628218256, "grad_norm": 0.275390625, "learning_rate": 0.0002614362830399627, "loss": 0.1708, "step": 84772 }, { "epoch": 0.1503113089871354, "grad_norm": 0.1884765625, "learning_rate": 0.0002614146717109887, "loss": 0.1789, "step": 84774 }, { "epoch": 0.15031485515244522, "grad_norm": 0.26953125, "learning_rate": 0.00026139306404950003, "loss": 0.1896, "step": 84776 }, { "epoch": 0.15031840131775503, "grad_norm": 0.86328125, "learning_rate": 0.00026137146005559083, "loss": 0.274, "step": 84778 }, { "epoch": 0.15032194748306485, "grad_norm": 1.0703125, "learning_rate": 0.00026134985972935576, "loss": 0.2223, "step": 84780 }, { "epoch": 0.15032549364837466, "grad_norm": 5.78125, "learning_rate": 0.0002613282630708893, "loss": 0.2454, "step": 84782 }, { "epoch": 0.15032903981368448, "grad_norm": 0.23828125, "learning_rate": 0.0002613066700802858, "loss": 0.22, "step": 84784 }, { "epoch": 0.1503325859789943, "grad_norm": 0.26953125, "learning_rate": 0.00026128508075763977, "loss": 0.1217, "step": 84786 }, { "epoch": 0.1503361321443041, "grad_norm": 0.390625, "learning_rate": 0.00026126349510304555, "loss": 0.2057, "step": 84788 }, { "epoch": 0.15033967830961392, "grad_norm": 0.4453125, "learning_rate": 0.0002612419131165977, "loss": 0.1582, "step": 84790 }, { "epoch": 0.15034322447492374, "grad_norm": 0.2412109375, "learning_rate": 0.0002612203347983905, "loss": 0.2239, "step": 84792 }, { "epoch": 0.15034677064023355, "grad_norm": 0.392578125, "learning_rate": 0.0002611987601485184, "loss": 0.2726, "step": 84794 }, { "epoch": 0.15035031680554337, "grad_norm": 0.8203125, "learning_rate": 0.0002611771891670754, "loss": 0.1653, "step": 84796 }, { "epoch": 0.15035386297085318, "grad_norm": 0.4921875, "learning_rate": 0.00026115562185415644, "loss": 0.1944, "step": 84798 }, { "epoch": 0.150357409136163, "grad_norm": 0.220703125, "learning_rate": 0.0002611340582098555, "loss": 0.1404, "step": 84800 }, { "epoch": 0.1503609553014728, "grad_norm": 0.68359375, "learning_rate": 0.00026111249823426686, "loss": 0.1776, "step": 84802 }, { "epoch": 0.15036450146678262, "grad_norm": 1.125, "learning_rate": 0.00026109094192748486, "loss": 0.167, "step": 84804 }, { "epoch": 0.15036804763209244, "grad_norm": 1.5, "learning_rate": 0.00026106938928960365, "loss": 0.3387, "step": 84806 }, { "epoch": 0.15037159379740225, "grad_norm": 1.2265625, "learning_rate": 0.0002610478403207179, "loss": 0.2168, "step": 84808 }, { "epoch": 0.15037513996271207, "grad_norm": 0.3671875, "learning_rate": 0.00026102629502092135, "loss": 0.1575, "step": 84810 }, { "epoch": 0.15037868612802188, "grad_norm": 0.87109375, "learning_rate": 0.00026100475339030854, "loss": 0.1493, "step": 84812 }, { "epoch": 0.1503822322933317, "grad_norm": 0.375, "learning_rate": 0.00026098321542897366, "loss": 0.2175, "step": 84814 }, { "epoch": 0.1503857784586415, "grad_norm": 0.3125, "learning_rate": 0.0002609616811370107, "loss": 0.1836, "step": 84816 }, { "epoch": 0.15038932462395133, "grad_norm": 0.279296875, "learning_rate": 0.00026094015051451413, "loss": 0.1494, "step": 84818 }, { "epoch": 0.15039287078926114, "grad_norm": 1.953125, "learning_rate": 0.0002609186235615777, "loss": 0.1641, "step": 84820 }, { "epoch": 0.15039641695457096, "grad_norm": 0.314453125, "learning_rate": 0.00026089710027829604, "loss": 0.1455, "step": 84822 }, { "epoch": 0.15039996311988077, "grad_norm": 0.8203125, "learning_rate": 0.0002608755806647628, "loss": 0.1926, "step": 84824 }, { "epoch": 0.15040350928519058, "grad_norm": 0.58984375, "learning_rate": 0.00026085406472107283, "loss": 0.1737, "step": 84826 }, { "epoch": 0.1504070554505004, "grad_norm": 0.61328125, "learning_rate": 0.00026083255244731924, "loss": 0.3839, "step": 84828 }, { "epoch": 0.1504106016158102, "grad_norm": 0.2216796875, "learning_rate": 0.0002608110438435969, "loss": 0.2472, "step": 84830 }, { "epoch": 0.15041414778112003, "grad_norm": 0.255859375, "learning_rate": 0.0002607895389099995, "loss": 0.2008, "step": 84832 }, { "epoch": 0.15041769394642984, "grad_norm": 1.5234375, "learning_rate": 0.00026076803764662126, "loss": 0.2031, "step": 84834 }, { "epoch": 0.15042124011173966, "grad_norm": 0.400390625, "learning_rate": 0.0002607465400535562, "loss": 0.1666, "step": 84836 }, { "epoch": 0.15042478627704947, "grad_norm": 0.7265625, "learning_rate": 0.00026072504613089805, "loss": 0.3107, "step": 84838 }, { "epoch": 0.15042833244235929, "grad_norm": 0.2041015625, "learning_rate": 0.0002607035558787412, "loss": 0.1206, "step": 84840 }, { "epoch": 0.1504318786076691, "grad_norm": 1.5078125, "learning_rate": 0.00026068206929717936, "loss": 0.3091, "step": 84842 }, { "epoch": 0.15043542477297894, "grad_norm": 5.65625, "learning_rate": 0.00026066058638630677, "loss": 0.1393, "step": 84844 }, { "epoch": 0.15043897093828876, "grad_norm": 0.328125, "learning_rate": 0.000260639107146217, "loss": 0.2101, "step": 84846 }, { "epoch": 0.15044251710359857, "grad_norm": 0.4453125, "learning_rate": 0.00026061763157700444, "loss": 0.1549, "step": 84848 }, { "epoch": 0.1504460632689084, "grad_norm": 0.3359375, "learning_rate": 0.0002605961596787627, "loss": 0.1887, "step": 84850 }, { "epoch": 0.1504496094342182, "grad_norm": 0.384765625, "learning_rate": 0.00026057469145158584, "loss": 0.1504, "step": 84852 }, { "epoch": 0.15045315559952802, "grad_norm": 0.515625, "learning_rate": 0.00026055322689556767, "loss": 0.1437, "step": 84854 }, { "epoch": 0.15045670176483783, "grad_norm": 1.7109375, "learning_rate": 0.000260531766010802, "loss": 0.3376, "step": 84856 }, { "epoch": 0.15046024793014764, "grad_norm": 0.267578125, "learning_rate": 0.0002605103087973829, "loss": 0.134, "step": 84858 }, { "epoch": 0.15046379409545746, "grad_norm": 0.490234375, "learning_rate": 0.0002604888552554039, "loss": 0.2802, "step": 84860 }, { "epoch": 0.15046734026076727, "grad_norm": 0.3359375, "learning_rate": 0.00026046740538495907, "loss": 0.1785, "step": 84862 }, { "epoch": 0.1504708864260771, "grad_norm": 0.86328125, "learning_rate": 0.00026044595918614207, "loss": 0.1809, "step": 84864 }, { "epoch": 0.1504744325913869, "grad_norm": 0.6953125, "learning_rate": 0.000260424516659047, "loss": 0.2114, "step": 84866 }, { "epoch": 0.15047797875669672, "grad_norm": 0.5625, "learning_rate": 0.00026040307780376715, "loss": 0.1832, "step": 84868 }, { "epoch": 0.15048152492200653, "grad_norm": 0.2734375, "learning_rate": 0.0002603816426203967, "loss": 0.182, "step": 84870 }, { "epoch": 0.15048507108731635, "grad_norm": 0.259765625, "learning_rate": 0.00026036021110902925, "loss": 0.1705, "step": 84872 }, { "epoch": 0.15048861725262616, "grad_norm": 0.357421875, "learning_rate": 0.00026033878326975845, "loss": 0.1717, "step": 84874 }, { "epoch": 0.15049216341793598, "grad_norm": 0.375, "learning_rate": 0.0002603173591026782, "loss": 0.2157, "step": 84876 }, { "epoch": 0.1504957095832458, "grad_norm": 0.33984375, "learning_rate": 0.00026029593860788186, "loss": 0.1825, "step": 84878 }, { "epoch": 0.1504992557485556, "grad_norm": 0.447265625, "learning_rate": 0.0002602745217854635, "loss": 0.2369, "step": 84880 }, { "epoch": 0.15050280191386542, "grad_norm": 0.70703125, "learning_rate": 0.00026025310863551655, "loss": 0.2212, "step": 84882 }, { "epoch": 0.15050634807917523, "grad_norm": 0.302734375, "learning_rate": 0.0002602316991581347, "loss": 0.1766, "step": 84884 }, { "epoch": 0.15050989424448505, "grad_norm": 0.8203125, "learning_rate": 0.0002602102933534116, "loss": 0.1572, "step": 84886 }, { "epoch": 0.15051344040979486, "grad_norm": 0.58984375, "learning_rate": 0.0002601888912214409, "loss": 0.2344, "step": 84888 }, { "epoch": 0.15051698657510468, "grad_norm": 0.4453125, "learning_rate": 0.00026016749276231614, "loss": 0.1911, "step": 84890 }, { "epoch": 0.1505205327404145, "grad_norm": 0.48828125, "learning_rate": 0.000260146097976131, "loss": 0.1678, "step": 84892 }, { "epoch": 0.1505240789057243, "grad_norm": 0.48828125, "learning_rate": 0.00026012470686297894, "loss": 0.3629, "step": 84894 }, { "epoch": 0.15052762507103412, "grad_norm": 0.2734375, "learning_rate": 0.00026010331942295345, "loss": 0.1723, "step": 84896 }, { "epoch": 0.15053117123634394, "grad_norm": 0.337890625, "learning_rate": 0.00026008193565614843, "loss": 0.1756, "step": 84898 }, { "epoch": 0.15053471740165375, "grad_norm": 0.2431640625, "learning_rate": 0.0002600605555626569, "loss": 0.1349, "step": 84900 }, { "epoch": 0.15053826356696356, "grad_norm": 0.61328125, "learning_rate": 0.00026003917914257273, "loss": 0.1925, "step": 84902 }, { "epoch": 0.15054180973227338, "grad_norm": 0.37890625, "learning_rate": 0.0002600178063959891, "loss": 0.1728, "step": 84904 }, { "epoch": 0.1505453558975832, "grad_norm": 0.486328125, "learning_rate": 0.0002599964373229998, "loss": 0.2011, "step": 84906 }, { "epoch": 0.150548902062893, "grad_norm": 0.333984375, "learning_rate": 0.00025997507192369824, "loss": 0.2235, "step": 84908 }, { "epoch": 0.15055244822820282, "grad_norm": 0.34375, "learning_rate": 0.00025995371019817766, "loss": 0.1692, "step": 84910 }, { "epoch": 0.15055599439351264, "grad_norm": 0.65234375, "learning_rate": 0.0002599323521465317, "loss": 0.2426, "step": 84912 }, { "epoch": 0.15055954055882245, "grad_norm": 0.46484375, "learning_rate": 0.00025991099776885346, "loss": 0.1067, "step": 84914 }, { "epoch": 0.15056308672413227, "grad_norm": 0.2890625, "learning_rate": 0.00025988964706523675, "loss": 0.1902, "step": 84916 }, { "epoch": 0.15056663288944208, "grad_norm": 3.71875, "learning_rate": 0.00025986830003577456, "loss": 0.1646, "step": 84918 }, { "epoch": 0.1505701790547519, "grad_norm": 2.4375, "learning_rate": 0.00025984695668056055, "loss": 0.1962, "step": 84920 }, { "epoch": 0.1505737252200617, "grad_norm": 1.6328125, "learning_rate": 0.0002598256169996878, "loss": 0.4945, "step": 84922 }, { "epoch": 0.15057727138537153, "grad_norm": 0.25, "learning_rate": 0.00025980428099324997, "loss": 0.1854, "step": 84924 }, { "epoch": 0.15058081755068134, "grad_norm": 0.2119140625, "learning_rate": 0.00025978294866133993, "loss": 0.1685, "step": 84926 }, { "epoch": 0.15058436371599115, "grad_norm": 0.294921875, "learning_rate": 0.00025976162000405147, "loss": 0.1495, "step": 84928 }, { "epoch": 0.15058790988130097, "grad_norm": 0.56640625, "learning_rate": 0.00025974029502147747, "loss": 0.1988, "step": 84930 }, { "epoch": 0.15059145604661078, "grad_norm": 0.322265625, "learning_rate": 0.0002597189737137113, "loss": 0.1694, "step": 84932 }, { "epoch": 0.15059500221192063, "grad_norm": 0.41015625, "learning_rate": 0.00025969765608084637, "loss": 0.1711, "step": 84934 }, { "epoch": 0.15059854837723044, "grad_norm": 0.294921875, "learning_rate": 0.00025967634212297565, "loss": 0.1791, "step": 84936 }, { "epoch": 0.15060209454254025, "grad_norm": 0.349609375, "learning_rate": 0.0002596550318401925, "loss": 0.2355, "step": 84938 }, { "epoch": 0.15060564070785007, "grad_norm": 0.26953125, "learning_rate": 0.0002596337252325903, "loss": 0.4229, "step": 84940 }, { "epoch": 0.15060918687315988, "grad_norm": 0.28125, "learning_rate": 0.0002596124223002619, "loss": 0.1793, "step": 84942 }, { "epoch": 0.1506127330384697, "grad_norm": 0.287109375, "learning_rate": 0.00025959112304330047, "loss": 0.1433, "step": 84944 }, { "epoch": 0.1506162792037795, "grad_norm": 2.46875, "learning_rate": 0.00025956982746179946, "loss": 0.2699, "step": 84946 }, { "epoch": 0.15061982536908933, "grad_norm": 0.62890625, "learning_rate": 0.0002595485355558519, "loss": 0.1822, "step": 84948 }, { "epoch": 0.15062337153439914, "grad_norm": 0.392578125, "learning_rate": 0.00025952724732555084, "loss": 0.1675, "step": 84950 }, { "epoch": 0.15062691769970896, "grad_norm": 0.5703125, "learning_rate": 0.0002595059627709894, "loss": 0.2831, "step": 84952 }, { "epoch": 0.15063046386501877, "grad_norm": 0.490234375, "learning_rate": 0.00025948468189226047, "loss": 0.1701, "step": 84954 }, { "epoch": 0.15063401003032859, "grad_norm": 0.462890625, "learning_rate": 0.0002594634046894576, "loss": 0.2112, "step": 84956 }, { "epoch": 0.1506375561956384, "grad_norm": 0.328125, "learning_rate": 0.0002594421311626733, "loss": 0.1684, "step": 84958 }, { "epoch": 0.15064110236094821, "grad_norm": 0.546875, "learning_rate": 0.00025942086131200095, "loss": 0.1593, "step": 84960 }, { "epoch": 0.15064464852625803, "grad_norm": 0.8671875, "learning_rate": 0.00025939959513753344, "loss": 0.2035, "step": 84962 }, { "epoch": 0.15064819469156784, "grad_norm": 0.28515625, "learning_rate": 0.000259378332639364, "loss": 0.2078, "step": 84964 }, { "epoch": 0.15065174085687766, "grad_norm": 1.09375, "learning_rate": 0.0002593570738175852, "loss": 0.1794, "step": 84966 }, { "epoch": 0.15065528702218747, "grad_norm": 0.2431640625, "learning_rate": 0.0002593358186722904, "loss": 0.1586, "step": 84968 }, { "epoch": 0.1506588331874973, "grad_norm": 0.71484375, "learning_rate": 0.0002593145672035725, "loss": 0.138, "step": 84970 }, { "epoch": 0.1506623793528071, "grad_norm": 0.2353515625, "learning_rate": 0.00025929331941152426, "loss": 0.1664, "step": 84972 }, { "epoch": 0.15066592551811692, "grad_norm": 0.431640625, "learning_rate": 0.0002592720752962387, "loss": 0.2102, "step": 84974 }, { "epoch": 0.15066947168342673, "grad_norm": 0.3984375, "learning_rate": 0.0002592508348578086, "loss": 0.1493, "step": 84976 }, { "epoch": 0.15067301784873655, "grad_norm": 0.3671875, "learning_rate": 0.0002592295980963271, "loss": 0.1832, "step": 84978 }, { "epoch": 0.15067656401404636, "grad_norm": 0.33203125, "learning_rate": 0.00025920836501188706, "loss": 0.1523, "step": 84980 }, { "epoch": 0.15068011017935617, "grad_norm": 0.494140625, "learning_rate": 0.00025918713560458114, "loss": 0.1506, "step": 84982 }, { "epoch": 0.150683656344666, "grad_norm": 0.5625, "learning_rate": 0.0002591659098745024, "loss": 0.143, "step": 84984 }, { "epoch": 0.1506872025099758, "grad_norm": 0.61328125, "learning_rate": 0.0002591446878217433, "loss": 0.1594, "step": 84986 }, { "epoch": 0.15069074867528562, "grad_norm": 0.6796875, "learning_rate": 0.0002591234694463971, "loss": 0.2281, "step": 84988 }, { "epoch": 0.15069429484059543, "grad_norm": 1.453125, "learning_rate": 0.0002591022547485564, "loss": 0.1799, "step": 84990 }, { "epoch": 0.15069784100590525, "grad_norm": 0.376953125, "learning_rate": 0.0002590810437283139, "loss": 0.2139, "step": 84992 }, { "epoch": 0.15070138717121506, "grad_norm": 0.427734375, "learning_rate": 0.00025905983638576237, "loss": 0.2449, "step": 84994 }, { "epoch": 0.15070493333652488, "grad_norm": 0.302734375, "learning_rate": 0.0002590386327209948, "loss": 0.204, "step": 84996 }, { "epoch": 0.1507084795018347, "grad_norm": 0.265625, "learning_rate": 0.0002590174327341038, "loss": 0.2027, "step": 84998 }, { "epoch": 0.1507120256671445, "grad_norm": 0.9765625, "learning_rate": 0.00025899623642518206, "loss": 0.3424, "step": 85000 }, { "epoch": 0.15071557183245432, "grad_norm": 0.2451171875, "learning_rate": 0.0002589750437943221, "loss": 0.1396, "step": 85002 }, { "epoch": 0.15071911799776413, "grad_norm": 0.49609375, "learning_rate": 0.00025895385484161684, "loss": 0.1963, "step": 85004 }, { "epoch": 0.15072266416307395, "grad_norm": 0.357421875, "learning_rate": 0.000258932669567159, "loss": 0.1294, "step": 85006 }, { "epoch": 0.15072621032838376, "grad_norm": 0.271484375, "learning_rate": 0.0002589114879710409, "loss": 0.1545, "step": 85008 }, { "epoch": 0.15072975649369358, "grad_norm": 1.5625, "learning_rate": 0.0002588903100533556, "loss": 0.2844, "step": 85010 }, { "epoch": 0.1507333026590034, "grad_norm": 0.2265625, "learning_rate": 0.0002588691358141953, "loss": 0.1782, "step": 85012 }, { "epoch": 0.1507368488243132, "grad_norm": 0.76953125, "learning_rate": 0.0002588479652536531, "loss": 0.1889, "step": 85014 }, { "epoch": 0.15074039498962302, "grad_norm": 0.236328125, "learning_rate": 0.00025882679837182094, "loss": 0.2099, "step": 85016 }, { "epoch": 0.15074394115493284, "grad_norm": 0.9453125, "learning_rate": 0.000258805635168792, "loss": 0.2082, "step": 85018 }, { "epoch": 0.15074748732024265, "grad_norm": 0.2216796875, "learning_rate": 0.00025878447564465856, "loss": 0.2439, "step": 85020 }, { "epoch": 0.15075103348555247, "grad_norm": 0.2353515625, "learning_rate": 0.00025876331979951317, "loss": 0.1969, "step": 85022 }, { "epoch": 0.1507545796508623, "grad_norm": 1.3359375, "learning_rate": 0.00025874216763344837, "loss": 0.2944, "step": 85024 }, { "epoch": 0.15075812581617212, "grad_norm": 0.5703125, "learning_rate": 0.0002587210191465565, "loss": 0.2017, "step": 85026 }, { "epoch": 0.15076167198148194, "grad_norm": 0.5, "learning_rate": 0.00025869987433893056, "loss": 0.2025, "step": 85028 }, { "epoch": 0.15076521814679175, "grad_norm": 0.95703125, "learning_rate": 0.0002586787332106624, "loss": 0.1689, "step": 85030 }, { "epoch": 0.15076876431210157, "grad_norm": 0.185546875, "learning_rate": 0.00025865759576184494, "loss": 0.142, "step": 85032 }, { "epoch": 0.15077231047741138, "grad_norm": 0.65625, "learning_rate": 0.00025863646199257025, "loss": 0.1584, "step": 85034 }, { "epoch": 0.1507758566427212, "grad_norm": 0.67578125, "learning_rate": 0.0002586153319029312, "loss": 0.198, "step": 85036 }, { "epoch": 0.150779402808031, "grad_norm": 0.189453125, "learning_rate": 0.00025859420549301975, "loss": 0.2072, "step": 85038 }, { "epoch": 0.15078294897334082, "grad_norm": 0.201171875, "learning_rate": 0.0002585730827629287, "loss": 0.1814, "step": 85040 }, { "epoch": 0.15078649513865064, "grad_norm": 0.2578125, "learning_rate": 0.00025855196371275005, "loss": 0.2428, "step": 85042 }, { "epoch": 0.15079004130396045, "grad_norm": 0.640625, "learning_rate": 0.00025853084834257623, "loss": 0.1949, "step": 85044 }, { "epoch": 0.15079358746927027, "grad_norm": 0.24609375, "learning_rate": 0.00025850973665250004, "loss": 0.1562, "step": 85046 }, { "epoch": 0.15079713363458008, "grad_norm": 0.58203125, "learning_rate": 0.00025848862864261314, "loss": 0.1869, "step": 85048 }, { "epoch": 0.1508006797998899, "grad_norm": 0.3046875, "learning_rate": 0.00025846752431300834, "loss": 0.1141, "step": 85050 }, { "epoch": 0.1508042259651997, "grad_norm": 0.25390625, "learning_rate": 0.0002584464236637777, "loss": 0.176, "step": 85052 }, { "epoch": 0.15080777213050953, "grad_norm": 0.314453125, "learning_rate": 0.0002584253266950138, "loss": 0.1893, "step": 85054 }, { "epoch": 0.15081131829581934, "grad_norm": 1.1171875, "learning_rate": 0.0002584042334068083, "loss": 0.4651, "step": 85056 }, { "epoch": 0.15081486446112916, "grad_norm": 3.796875, "learning_rate": 0.0002583831437992541, "loss": 0.2483, "step": 85058 }, { "epoch": 0.15081841062643897, "grad_norm": 0.69140625, "learning_rate": 0.0002583620578724431, "loss": 0.1534, "step": 85060 }, { "epoch": 0.15082195679174878, "grad_norm": 0.294921875, "learning_rate": 0.0002583409756264675, "loss": 0.2415, "step": 85062 }, { "epoch": 0.1508255029570586, "grad_norm": 0.34375, "learning_rate": 0.00025831989706141974, "loss": 0.2056, "step": 85064 }, { "epoch": 0.1508290491223684, "grad_norm": 0.1572265625, "learning_rate": 0.0002582988221773916, "loss": 0.324, "step": 85066 }, { "epoch": 0.15083259528767823, "grad_norm": 0.2578125, "learning_rate": 0.0002582777509744757, "loss": 0.1874, "step": 85068 }, { "epoch": 0.15083614145298804, "grad_norm": 0.240234375, "learning_rate": 0.0002582566834527638, "loss": 0.1803, "step": 85070 }, { "epoch": 0.15083968761829786, "grad_norm": 0.30078125, "learning_rate": 0.00025823561961234846, "loss": 0.1717, "step": 85072 }, { "epoch": 0.15084323378360767, "grad_norm": 0.427734375, "learning_rate": 0.00025821455945332133, "loss": 0.2167, "step": 85074 }, { "epoch": 0.1508467799489175, "grad_norm": 0.28515625, "learning_rate": 0.0002581935029757749, "loss": 0.3214, "step": 85076 }, { "epoch": 0.1508503261142273, "grad_norm": 0.33984375, "learning_rate": 0.0002581724501798011, "loss": 0.1899, "step": 85078 }, { "epoch": 0.15085387227953712, "grad_norm": 0.21875, "learning_rate": 0.000258151401065492, "loss": 0.1952, "step": 85080 }, { "epoch": 0.15085741844484693, "grad_norm": 0.298828125, "learning_rate": 0.00025813035563293973, "loss": 0.1897, "step": 85082 }, { "epoch": 0.15086096461015674, "grad_norm": 0.6640625, "learning_rate": 0.00025810931388223603, "loss": 0.2875, "step": 85084 }, { "epoch": 0.15086451077546656, "grad_norm": 0.4921875, "learning_rate": 0.0002580882758134733, "loss": 0.155, "step": 85086 }, { "epoch": 0.15086805694077637, "grad_norm": 0.357421875, "learning_rate": 0.0002580672414267435, "loss": 0.2537, "step": 85088 }, { "epoch": 0.1508716031060862, "grad_norm": 0.3203125, "learning_rate": 0.0002580462107221385, "loss": 0.1519, "step": 85090 }, { "epoch": 0.150875149271396, "grad_norm": 0.21484375, "learning_rate": 0.0002580251836997501, "loss": 0.184, "step": 85092 }, { "epoch": 0.15087869543670582, "grad_norm": 0.5234375, "learning_rate": 0.0002580041603596708, "loss": 0.1508, "step": 85094 }, { "epoch": 0.15088224160201563, "grad_norm": 1.46875, "learning_rate": 0.00025798314070199204, "loss": 0.2601, "step": 85096 }, { "epoch": 0.15088578776732545, "grad_norm": 0.51171875, "learning_rate": 0.0002579621247268059, "loss": 0.1405, "step": 85098 }, { "epoch": 0.15088933393263526, "grad_norm": 0.2119140625, "learning_rate": 0.00025794111243420445, "loss": 0.1392, "step": 85100 }, { "epoch": 0.15089288009794508, "grad_norm": 0.70703125, "learning_rate": 0.0002579201038242792, "loss": 0.1969, "step": 85102 }, { "epoch": 0.1508964262632549, "grad_norm": 0.439453125, "learning_rate": 0.0002578990988971226, "loss": 0.1833, "step": 85104 }, { "epoch": 0.1508999724285647, "grad_norm": 0.578125, "learning_rate": 0.00025787809765282594, "loss": 0.1904, "step": 85106 }, { "epoch": 0.15090351859387452, "grad_norm": 0.462890625, "learning_rate": 0.00025785710009148144, "loss": 0.1246, "step": 85108 }, { "epoch": 0.15090706475918433, "grad_norm": 1.546875, "learning_rate": 0.00025783610621318067, "loss": 0.1818, "step": 85110 }, { "epoch": 0.15091061092449415, "grad_norm": 0.419921875, "learning_rate": 0.0002578151160180158, "loss": 0.2014, "step": 85112 }, { "epoch": 0.15091415708980396, "grad_norm": 0.66796875, "learning_rate": 0.0002577941295060782, "loss": 0.2339, "step": 85114 }, { "epoch": 0.1509177032551138, "grad_norm": 0.65234375, "learning_rate": 0.00025777314667746, "loss": 0.2146, "step": 85116 }, { "epoch": 0.15092124942042362, "grad_norm": 0.2578125, "learning_rate": 0.0002577521675322528, "loss": 0.1988, "step": 85118 }, { "epoch": 0.15092479558573343, "grad_norm": 0.51953125, "learning_rate": 0.0002577311920705484, "loss": 0.2011, "step": 85120 }, { "epoch": 0.15092834175104325, "grad_norm": 0.341796875, "learning_rate": 0.00025771022029243856, "loss": 0.1506, "step": 85122 }, { "epoch": 0.15093188791635306, "grad_norm": 0.2314453125, "learning_rate": 0.0002576892521980147, "loss": 0.1569, "step": 85124 }, { "epoch": 0.15093543408166288, "grad_norm": 0.275390625, "learning_rate": 0.00025766828778736904, "loss": 0.1308, "step": 85126 }, { "epoch": 0.1509389802469727, "grad_norm": 0.72265625, "learning_rate": 0.00025764732706059297, "loss": 0.2017, "step": 85128 }, { "epoch": 0.1509425264122825, "grad_norm": 0.70703125, "learning_rate": 0.0002576263700177782, "loss": 0.187, "step": 85130 }, { "epoch": 0.15094607257759232, "grad_norm": 0.74609375, "learning_rate": 0.0002576054166590162, "loss": 0.2015, "step": 85132 }, { "epoch": 0.15094961874290214, "grad_norm": 0.1962890625, "learning_rate": 0.000257584466984399, "loss": 0.2065, "step": 85134 }, { "epoch": 0.15095316490821195, "grad_norm": 0.419921875, "learning_rate": 0.000257563520994018, "loss": 0.2226, "step": 85136 }, { "epoch": 0.15095671107352177, "grad_norm": 0.439453125, "learning_rate": 0.0002575425786879647, "loss": 0.2247, "step": 85138 }, { "epoch": 0.15096025723883158, "grad_norm": 0.47265625, "learning_rate": 0.0002575216400663309, "loss": 0.1465, "step": 85140 }, { "epoch": 0.1509638034041414, "grad_norm": 0.365234375, "learning_rate": 0.0002575007051292079, "loss": 0.1817, "step": 85142 }, { "epoch": 0.1509673495694512, "grad_norm": 0.6953125, "learning_rate": 0.0002574797738766877, "loss": 0.1725, "step": 85144 }, { "epoch": 0.15097089573476102, "grad_norm": 0.5546875, "learning_rate": 0.00025745884630886133, "loss": 0.1564, "step": 85146 }, { "epoch": 0.15097444190007084, "grad_norm": 0.94921875, "learning_rate": 0.0002574379224258207, "loss": 0.1752, "step": 85148 }, { "epoch": 0.15097798806538065, "grad_norm": 0.43359375, "learning_rate": 0.00025741700222765726, "loss": 0.1681, "step": 85150 }, { "epoch": 0.15098153423069047, "grad_norm": 0.40625, "learning_rate": 0.0002573960857144622, "loss": 0.1246, "step": 85152 }, { "epoch": 0.15098508039600028, "grad_norm": 0.240234375, "learning_rate": 0.00025737517288632744, "loss": 0.1632, "step": 85154 }, { "epoch": 0.1509886265613101, "grad_norm": 1.2421875, "learning_rate": 0.000257354263743344, "loss": 0.2418, "step": 85156 }, { "epoch": 0.1509921727266199, "grad_norm": 0.34765625, "learning_rate": 0.0002573333582856036, "loss": 0.2433, "step": 85158 }, { "epoch": 0.15099571889192973, "grad_norm": 0.359375, "learning_rate": 0.0002573124565131976, "loss": 0.1237, "step": 85160 }, { "epoch": 0.15099926505723954, "grad_norm": 0.3828125, "learning_rate": 0.0002572915584262176, "loss": 0.2139, "step": 85162 }, { "epoch": 0.15100281122254935, "grad_norm": 1.5078125, "learning_rate": 0.0002572706640247547, "loss": 0.1796, "step": 85164 }, { "epoch": 0.15100635738785917, "grad_norm": 0.76953125, "learning_rate": 0.00025724977330890034, "loss": 0.2367, "step": 85166 }, { "epoch": 0.15100990355316898, "grad_norm": 1.6640625, "learning_rate": 0.000257228886278746, "loss": 0.2017, "step": 85168 }, { "epoch": 0.1510134497184788, "grad_norm": 0.333984375, "learning_rate": 0.000257208002934383, "loss": 0.1898, "step": 85170 }, { "epoch": 0.1510169958837886, "grad_norm": 0.59375, "learning_rate": 0.00025718712327590275, "loss": 0.2306, "step": 85172 }, { "epoch": 0.15102054204909843, "grad_norm": 0.416015625, "learning_rate": 0.00025716624730339616, "loss": 0.1655, "step": 85174 }, { "epoch": 0.15102408821440824, "grad_norm": 0.314453125, "learning_rate": 0.00025714537501695506, "loss": 0.162, "step": 85176 }, { "epoch": 0.15102763437971806, "grad_norm": 0.5859375, "learning_rate": 0.00025712450641667044, "loss": 0.172, "step": 85178 }, { "epoch": 0.15103118054502787, "grad_norm": 0.6640625, "learning_rate": 0.00025710364150263374, "loss": 0.1715, "step": 85180 }, { "epoch": 0.15103472671033769, "grad_norm": 0.4765625, "learning_rate": 0.0002570827802749358, "loss": 0.2064, "step": 85182 }, { "epoch": 0.1510382728756475, "grad_norm": 0.3203125, "learning_rate": 0.0002570619227336684, "loss": 0.2878, "step": 85184 }, { "epoch": 0.15104181904095731, "grad_norm": 1.703125, "learning_rate": 0.0002570410688789225, "loss": 0.2297, "step": 85186 }, { "epoch": 0.15104536520626713, "grad_norm": 0.294921875, "learning_rate": 0.00025702021871078923, "loss": 0.1913, "step": 85188 }, { "epoch": 0.15104891137157694, "grad_norm": 0.2001953125, "learning_rate": 0.00025699937222935996, "loss": 0.1809, "step": 85190 }, { "epoch": 0.15105245753688676, "grad_norm": 0.296875, "learning_rate": 0.00025697852943472565, "loss": 0.1741, "step": 85192 }, { "epoch": 0.15105600370219657, "grad_norm": 0.61328125, "learning_rate": 0.00025695769032697775, "loss": 0.1574, "step": 85194 }, { "epoch": 0.1510595498675064, "grad_norm": 0.55859375, "learning_rate": 0.00025693685490620706, "loss": 0.1457, "step": 85196 }, { "epoch": 0.1510630960328162, "grad_norm": 0.435546875, "learning_rate": 0.0002569160231725049, "loss": 0.1639, "step": 85198 }, { "epoch": 0.15106664219812602, "grad_norm": 0.5, "learning_rate": 0.00025689519512596214, "loss": 0.1431, "step": 85200 }, { "epoch": 0.15107018836343583, "grad_norm": 0.609375, "learning_rate": 0.0002568743707666705, "loss": 0.1374, "step": 85202 }, { "epoch": 0.15107373452874565, "grad_norm": 0.96875, "learning_rate": 0.0002568535500947202, "loss": 0.1502, "step": 85204 }, { "epoch": 0.1510772806940555, "grad_norm": 0.48046875, "learning_rate": 0.0002568327331102029, "loss": 0.1488, "step": 85206 }, { "epoch": 0.1510808268593653, "grad_norm": 0.56640625, "learning_rate": 0.0002568119198132095, "loss": 0.1863, "step": 85208 }, { "epoch": 0.15108437302467512, "grad_norm": 0.546875, "learning_rate": 0.00025679111020383095, "loss": 0.1623, "step": 85210 }, { "epoch": 0.15108791918998493, "grad_norm": 1.0234375, "learning_rate": 0.0002567703042821582, "loss": 0.5189, "step": 85212 }, { "epoch": 0.15109146535529475, "grad_norm": 1.359375, "learning_rate": 0.00025674950204828236, "loss": 0.2328, "step": 85214 }, { "epoch": 0.15109501152060456, "grad_norm": 0.38671875, "learning_rate": 0.00025672870350229435, "loss": 0.3142, "step": 85216 }, { "epoch": 0.15109855768591438, "grad_norm": 0.201171875, "learning_rate": 0.0002567079086442852, "loss": 0.1639, "step": 85218 }, { "epoch": 0.1511021038512242, "grad_norm": 0.400390625, "learning_rate": 0.0002566871174743459, "loss": 0.175, "step": 85220 }, { "epoch": 0.151105650016534, "grad_norm": 1.0390625, "learning_rate": 0.00025666632999256714, "loss": 0.2304, "step": 85222 }, { "epoch": 0.15110919618184382, "grad_norm": 0.27734375, "learning_rate": 0.0002566455461990401, "loss": 0.204, "step": 85224 }, { "epoch": 0.15111274234715363, "grad_norm": 0.482421875, "learning_rate": 0.00025662476609385556, "loss": 0.1527, "step": 85226 }, { "epoch": 0.15111628851246345, "grad_norm": 0.3359375, "learning_rate": 0.0002566039896771044, "loss": 0.1513, "step": 85228 }, { "epoch": 0.15111983467777326, "grad_norm": 0.30078125, "learning_rate": 0.0002565832169488775, "loss": 0.224, "step": 85230 }, { "epoch": 0.15112338084308308, "grad_norm": 0.59375, "learning_rate": 0.0002565624479092655, "loss": 0.2519, "step": 85232 }, { "epoch": 0.1511269270083929, "grad_norm": 0.87109375, "learning_rate": 0.00025654168255835976, "loss": 0.1662, "step": 85234 }, { "epoch": 0.1511304731737027, "grad_norm": 0.75, "learning_rate": 0.00025652092089625047, "loss": 0.1639, "step": 85236 }, { "epoch": 0.15113401933901252, "grad_norm": 0.34375, "learning_rate": 0.0002565001629230288, "loss": 0.179, "step": 85238 }, { "epoch": 0.15113756550432234, "grad_norm": 0.390625, "learning_rate": 0.00025647940863878546, "loss": 0.2049, "step": 85240 }, { "epoch": 0.15114111166963215, "grad_norm": 0.6796875, "learning_rate": 0.0002564586580436113, "loss": 0.1985, "step": 85242 }, { "epoch": 0.15114465783494196, "grad_norm": 0.435546875, "learning_rate": 0.0002564379111375969, "loss": 0.1904, "step": 85244 }, { "epoch": 0.15114820400025178, "grad_norm": 0.75390625, "learning_rate": 0.00025641716792083316, "loss": 0.2613, "step": 85246 }, { "epoch": 0.1511517501655616, "grad_norm": 0.39453125, "learning_rate": 0.0002563964283934106, "loss": 0.1571, "step": 85248 }, { "epoch": 0.1511552963308714, "grad_norm": 0.27734375, "learning_rate": 0.00025637569255542006, "loss": 0.1505, "step": 85250 }, { "epoch": 0.15115884249618122, "grad_norm": 0.306640625, "learning_rate": 0.0002563549604069524, "loss": 0.1326, "step": 85252 }, { "epoch": 0.15116238866149104, "grad_norm": 0.2470703125, "learning_rate": 0.0002563342319480978, "loss": 0.1474, "step": 85254 }, { "epoch": 0.15116593482680085, "grad_norm": 0.27734375, "learning_rate": 0.00025631350717894737, "loss": 0.1485, "step": 85256 }, { "epoch": 0.15116948099211067, "grad_norm": 0.302734375, "learning_rate": 0.0002562927860995915, "loss": 0.1773, "step": 85258 }, { "epoch": 0.15117302715742048, "grad_norm": 1.328125, "learning_rate": 0.00025627206871012113, "loss": 0.193, "step": 85260 }, { "epoch": 0.1511765733227303, "grad_norm": 0.337890625, "learning_rate": 0.0002562513550106263, "loss": 0.1802, "step": 85262 }, { "epoch": 0.1511801194880401, "grad_norm": 0.265625, "learning_rate": 0.00025623064500119815, "loss": 0.2707, "step": 85264 }, { "epoch": 0.15118366565334992, "grad_norm": 0.458984375, "learning_rate": 0.00025620993868192703, "loss": 0.2087, "step": 85266 }, { "epoch": 0.15118721181865974, "grad_norm": 0.369140625, "learning_rate": 0.00025618923605290355, "loss": 0.1843, "step": 85268 }, { "epoch": 0.15119075798396955, "grad_norm": 0.34375, "learning_rate": 0.00025616853711421815, "loss": 0.2282, "step": 85270 }, { "epoch": 0.15119430414927937, "grad_norm": 1.4140625, "learning_rate": 0.0002561478418659613, "loss": 0.1811, "step": 85272 }, { "epoch": 0.15119785031458918, "grad_norm": 0.412109375, "learning_rate": 0.0002561271503082237, "loss": 0.1798, "step": 85274 }, { "epoch": 0.151201396479899, "grad_norm": 0.2734375, "learning_rate": 0.0002561064624410958, "loss": 0.142, "step": 85276 }, { "epoch": 0.1512049426452088, "grad_norm": 0.50390625, "learning_rate": 0.00025608577826466807, "loss": 0.1434, "step": 85278 }, { "epoch": 0.15120848881051863, "grad_norm": 0.578125, "learning_rate": 0.00025606509777903073, "loss": 0.1777, "step": 85280 }, { "epoch": 0.15121203497582844, "grad_norm": 0.734375, "learning_rate": 0.0002560444209842747, "loss": 0.2154, "step": 85282 }, { "epoch": 0.15121558114113826, "grad_norm": 0.578125, "learning_rate": 0.00025602374788049004, "loss": 0.2385, "step": 85284 }, { "epoch": 0.15121912730644807, "grad_norm": 0.51171875, "learning_rate": 0.00025600307846776726, "loss": 0.1922, "step": 85286 }, { "epoch": 0.15122267347175788, "grad_norm": 0.66796875, "learning_rate": 0.0002559824127461969, "loss": 0.3122, "step": 85288 }, { "epoch": 0.1512262196370677, "grad_norm": 1.5625, "learning_rate": 0.0002559617507158689, "loss": 0.2226, "step": 85290 }, { "epoch": 0.1512297658023775, "grad_norm": 0.447265625, "learning_rate": 0.00025594109237687436, "loss": 0.2007, "step": 85292 }, { "epoch": 0.15123331196768733, "grad_norm": 0.3125, "learning_rate": 0.00025592043772930276, "loss": 0.1408, "step": 85294 }, { "epoch": 0.15123685813299717, "grad_norm": 0.359375, "learning_rate": 0.00025589978677324505, "loss": 0.1838, "step": 85296 }, { "epoch": 0.15124040429830699, "grad_norm": 1.140625, "learning_rate": 0.0002558791395087913, "loss": 0.2179, "step": 85298 }, { "epoch": 0.1512439504636168, "grad_norm": 0.34375, "learning_rate": 0.0002558584959360321, "loss": 0.1576, "step": 85300 }, { "epoch": 0.15124749662892661, "grad_norm": 0.337890625, "learning_rate": 0.00025583785605505716, "loss": 0.1818, "step": 85302 }, { "epoch": 0.15125104279423643, "grad_norm": 0.240234375, "learning_rate": 0.0002558172198659573, "loss": 0.1713, "step": 85304 }, { "epoch": 0.15125458895954624, "grad_norm": 1.0234375, "learning_rate": 0.0002557965873688225, "loss": 0.1554, "step": 85306 }, { "epoch": 0.15125813512485606, "grad_norm": 0.302734375, "learning_rate": 0.0002557759585637432, "loss": 0.1917, "step": 85308 }, { "epoch": 0.15126168129016587, "grad_norm": 0.80078125, "learning_rate": 0.0002557553334508093, "loss": 0.1549, "step": 85310 }, { "epoch": 0.1512652274554757, "grad_norm": 0.4375, "learning_rate": 0.0002557347120301112, "loss": 0.1532, "step": 85312 }, { "epoch": 0.1512687736207855, "grad_norm": 0.392578125, "learning_rate": 0.00025571409430173915, "loss": 0.2002, "step": 85314 }, { "epoch": 0.15127231978609532, "grad_norm": 0.5625, "learning_rate": 0.0002556934802657831, "loss": 0.1846, "step": 85316 }, { "epoch": 0.15127586595140513, "grad_norm": 0.57421875, "learning_rate": 0.0002556728699223335, "loss": 0.2184, "step": 85318 }, { "epoch": 0.15127941211671495, "grad_norm": 0.15234375, "learning_rate": 0.0002556522632714801, "loss": 0.2285, "step": 85320 }, { "epoch": 0.15128295828202476, "grad_norm": 0.50390625, "learning_rate": 0.00025563166031331334, "loss": 0.1753, "step": 85322 }, { "epoch": 0.15128650444733457, "grad_norm": 0.171875, "learning_rate": 0.0002556110610479234, "loss": 0.1556, "step": 85324 }, { "epoch": 0.1512900506126444, "grad_norm": 0.296875, "learning_rate": 0.0002555904654754, "loss": 0.1931, "step": 85326 }, { "epoch": 0.1512935967779542, "grad_norm": 0.255859375, "learning_rate": 0.0002555698735958335, "loss": 0.1834, "step": 85328 }, { "epoch": 0.15129714294326402, "grad_norm": 0.5, "learning_rate": 0.00025554928540931374, "loss": 0.1261, "step": 85330 }, { "epoch": 0.15130068910857383, "grad_norm": 0.412109375, "learning_rate": 0.000255528700915931, "loss": 0.2161, "step": 85332 }, { "epoch": 0.15130423527388365, "grad_norm": 4.0625, "learning_rate": 0.0002555081201157753, "loss": 0.3837, "step": 85334 }, { "epoch": 0.15130778143919346, "grad_norm": 0.2421875, "learning_rate": 0.00025548754300893647, "loss": 0.3365, "step": 85336 }, { "epoch": 0.15131132760450328, "grad_norm": 0.310546875, "learning_rate": 0.00025546696959550445, "loss": 0.1703, "step": 85338 }, { "epoch": 0.1513148737698131, "grad_norm": 0.259765625, "learning_rate": 0.0002554463998755694, "loss": 0.2005, "step": 85340 }, { "epoch": 0.1513184199351229, "grad_norm": 0.263671875, "learning_rate": 0.0002554258338492214, "loss": 0.169, "step": 85342 }, { "epoch": 0.15132196610043272, "grad_norm": 0.408203125, "learning_rate": 0.0002554052715165499, "loss": 0.2887, "step": 85344 }, { "epoch": 0.15132551226574253, "grad_norm": 0.419921875, "learning_rate": 0.00025538471287764536, "loss": 0.1423, "step": 85346 }, { "epoch": 0.15132905843105235, "grad_norm": 0.462890625, "learning_rate": 0.00025536415793259734, "loss": 0.1751, "step": 85348 }, { "epoch": 0.15133260459636216, "grad_norm": 0.5234375, "learning_rate": 0.0002553436066814961, "loss": 0.2071, "step": 85350 }, { "epoch": 0.15133615076167198, "grad_norm": 0.201171875, "learning_rate": 0.0002553230591244311, "loss": 0.1753, "step": 85352 }, { "epoch": 0.1513396969269818, "grad_norm": 0.87890625, "learning_rate": 0.00025530251526149244, "loss": 0.1502, "step": 85354 }, { "epoch": 0.1513432430922916, "grad_norm": 0.9609375, "learning_rate": 0.0002552819750927699, "loss": 0.2311, "step": 85356 }, { "epoch": 0.15134678925760142, "grad_norm": 0.373046875, "learning_rate": 0.0002552614386183534, "loss": 0.163, "step": 85358 }, { "epoch": 0.15135033542291124, "grad_norm": 0.31640625, "learning_rate": 0.0002552409058383326, "loss": 0.1703, "step": 85360 }, { "epoch": 0.15135388158822105, "grad_norm": 0.59765625, "learning_rate": 0.00025522037675279743, "loss": 0.1887, "step": 85362 }, { "epoch": 0.15135742775353087, "grad_norm": 0.20703125, "learning_rate": 0.00025519985136183754, "loss": 0.1574, "step": 85364 }, { "epoch": 0.15136097391884068, "grad_norm": 0.921875, "learning_rate": 0.000255179329665543, "loss": 0.2795, "step": 85366 }, { "epoch": 0.1513645200841505, "grad_norm": 0.6171875, "learning_rate": 0.00025515881166400317, "loss": 0.2257, "step": 85368 }, { "epoch": 0.1513680662494603, "grad_norm": 0.205078125, "learning_rate": 0.0002551382973573079, "loss": 0.1658, "step": 85370 }, { "epoch": 0.15137161241477012, "grad_norm": 0.23046875, "learning_rate": 0.000255117786745547, "loss": 0.3595, "step": 85372 }, { "epoch": 0.15137515858007994, "grad_norm": 0.380859375, "learning_rate": 0.00025509727982881027, "loss": 0.1762, "step": 85374 }, { "epoch": 0.15137870474538975, "grad_norm": 0.39453125, "learning_rate": 0.0002550767766071872, "loss": 0.1545, "step": 85376 }, { "epoch": 0.15138225091069957, "grad_norm": 0.81640625, "learning_rate": 0.0002550562770807675, "loss": 0.1663, "step": 85378 }, { "epoch": 0.15138579707600938, "grad_norm": 0.90234375, "learning_rate": 0.00025503578124964075, "loss": 0.2011, "step": 85380 }, { "epoch": 0.1513893432413192, "grad_norm": 0.49609375, "learning_rate": 0.0002550152891138968, "loss": 0.1791, "step": 85382 }, { "epoch": 0.151392889406629, "grad_norm": 0.302734375, "learning_rate": 0.0002549948006736251, "loss": 0.2154, "step": 85384 }, { "epoch": 0.15139643557193883, "grad_norm": 0.384765625, "learning_rate": 0.0002549743159289153, "loss": 0.2276, "step": 85386 }, { "epoch": 0.15139998173724867, "grad_norm": 0.46484375, "learning_rate": 0.00025495383487985696, "loss": 0.2791, "step": 85388 }, { "epoch": 0.15140352790255848, "grad_norm": 0.54296875, "learning_rate": 0.00025493335752653983, "loss": 0.2527, "step": 85390 }, { "epoch": 0.1514070740678683, "grad_norm": 1.625, "learning_rate": 0.0002549128838690532, "loss": 0.2758, "step": 85392 }, { "epoch": 0.1514106202331781, "grad_norm": 0.306640625, "learning_rate": 0.0002548924139074867, "loss": 0.1807, "step": 85394 }, { "epoch": 0.15141416639848793, "grad_norm": 0.396484375, "learning_rate": 0.00025487194764193, "loss": 0.1562, "step": 85396 }, { "epoch": 0.15141771256379774, "grad_norm": 0.396484375, "learning_rate": 0.0002548514850724724, "loss": 0.1858, "step": 85398 }, { "epoch": 0.15142125872910756, "grad_norm": 9.3125, "learning_rate": 0.00025483102619920363, "loss": 0.2306, "step": 85400 }, { "epoch": 0.15142480489441737, "grad_norm": 0.7421875, "learning_rate": 0.00025481057102221276, "loss": 0.2058, "step": 85402 }, { "epoch": 0.15142835105972718, "grad_norm": 1.546875, "learning_rate": 0.00025479011954158964, "loss": 0.2261, "step": 85404 }, { "epoch": 0.151431897225037, "grad_norm": 0.7421875, "learning_rate": 0.0002547696717574235, "loss": 0.215, "step": 85406 }, { "epoch": 0.1514354433903468, "grad_norm": 0.369140625, "learning_rate": 0.0002547492276698042, "loss": 0.1642, "step": 85408 }, { "epoch": 0.15143898955565663, "grad_norm": 0.345703125, "learning_rate": 0.0002547287872788204, "loss": 0.15, "step": 85410 }, { "epoch": 0.15144253572096644, "grad_norm": 0.408203125, "learning_rate": 0.000254708350584562, "loss": 0.1256, "step": 85412 }, { "epoch": 0.15144608188627626, "grad_norm": 0.328125, "learning_rate": 0.00025468791758711834, "loss": 0.1457, "step": 85414 }, { "epoch": 0.15144962805158607, "grad_norm": 1.1640625, "learning_rate": 0.0002546674882865787, "loss": 0.1759, "step": 85416 }, { "epoch": 0.15145317421689589, "grad_norm": 1.3125, "learning_rate": 0.0002546470626830325, "loss": 0.1988, "step": 85418 }, { "epoch": 0.1514567203822057, "grad_norm": 0.1884765625, "learning_rate": 0.0002546266407765688, "loss": 0.1397, "step": 85420 }, { "epoch": 0.15146026654751552, "grad_norm": 0.392578125, "learning_rate": 0.0002546062225672772, "loss": 0.2394, "step": 85422 }, { "epoch": 0.15146381271282533, "grad_norm": 0.4921875, "learning_rate": 0.000254585808055247, "loss": 0.1736, "step": 85424 }, { "epoch": 0.15146735887813514, "grad_norm": 0.75, "learning_rate": 0.0002545653972405674, "loss": 0.2212, "step": 85426 }, { "epoch": 0.15147090504344496, "grad_norm": 0.240234375, "learning_rate": 0.00025454499012332756, "loss": 0.1886, "step": 85428 }, { "epoch": 0.15147445120875477, "grad_norm": 1.75, "learning_rate": 0.0002545245867036169, "loss": 0.3373, "step": 85430 }, { "epoch": 0.1514779973740646, "grad_norm": 0.73046875, "learning_rate": 0.0002545041869815246, "loss": 0.269, "step": 85432 }, { "epoch": 0.1514815435393744, "grad_norm": 0.365234375, "learning_rate": 0.0002544837909571399, "loss": 0.1913, "step": 85434 }, { "epoch": 0.15148508970468422, "grad_norm": 0.73828125, "learning_rate": 0.000254463398630552, "loss": 0.1827, "step": 85436 }, { "epoch": 0.15148863586999403, "grad_norm": 0.236328125, "learning_rate": 0.0002544430100018498, "loss": 0.1562, "step": 85438 }, { "epoch": 0.15149218203530385, "grad_norm": 0.388671875, "learning_rate": 0.0002544226250711231, "loss": 0.1458, "step": 85440 }, { "epoch": 0.15149572820061366, "grad_norm": 0.34765625, "learning_rate": 0.0002544022438384603, "loss": 0.1557, "step": 85442 }, { "epoch": 0.15149927436592348, "grad_norm": 0.361328125, "learning_rate": 0.0002543818663039512, "loss": 0.1341, "step": 85444 }, { "epoch": 0.1515028205312333, "grad_norm": 0.3046875, "learning_rate": 0.00025436149246768424, "loss": 0.1785, "step": 85446 }, { "epoch": 0.1515063666965431, "grad_norm": 0.703125, "learning_rate": 0.0002543411223297493, "loss": 0.1929, "step": 85448 }, { "epoch": 0.15150991286185292, "grad_norm": 0.390625, "learning_rate": 0.00025432075589023484, "loss": 0.2442, "step": 85450 }, { "epoch": 0.15151345902716273, "grad_norm": 0.9453125, "learning_rate": 0.0002543003931492303, "loss": 0.209, "step": 85452 }, { "epoch": 0.15151700519247255, "grad_norm": 1.5, "learning_rate": 0.0002542800341068245, "loss": 0.2294, "step": 85454 }, { "epoch": 0.15152055135778236, "grad_norm": 0.7890625, "learning_rate": 0.0002542596787631067, "loss": 0.1841, "step": 85456 }, { "epoch": 0.15152409752309218, "grad_norm": 0.375, "learning_rate": 0.0002542393271181658, "loss": 0.1691, "step": 85458 }, { "epoch": 0.151527643688402, "grad_norm": 0.515625, "learning_rate": 0.0002542189791720906, "loss": 0.1775, "step": 85460 }, { "epoch": 0.1515311898537118, "grad_norm": 0.640625, "learning_rate": 0.0002541986349249705, "loss": 0.1867, "step": 85462 }, { "epoch": 0.15153473601902162, "grad_norm": 0.361328125, "learning_rate": 0.0002541782943768942, "loss": 0.1878, "step": 85464 }, { "epoch": 0.15153828218433144, "grad_norm": 1.9765625, "learning_rate": 0.00025415795752795073, "loss": 0.3773, "step": 85466 }, { "epoch": 0.15154182834964125, "grad_norm": 0.361328125, "learning_rate": 0.0002541376243782289, "loss": 0.1621, "step": 85468 }, { "epoch": 0.15154537451495106, "grad_norm": 1.4296875, "learning_rate": 0.0002541172949278179, "loss": 0.1369, "step": 85470 }, { "epoch": 0.15154892068026088, "grad_norm": 1.375, "learning_rate": 0.0002540969691768065, "loss": 0.191, "step": 85472 }, { "epoch": 0.1515524668455707, "grad_norm": 0.6328125, "learning_rate": 0.0002540766471252836, "loss": 0.2112, "step": 85474 }, { "epoch": 0.1515560130108805, "grad_norm": 0.310546875, "learning_rate": 0.00025405632877333815, "loss": 0.1922, "step": 85476 }, { "epoch": 0.15155955917619035, "grad_norm": 0.51953125, "learning_rate": 0.0002540360141210587, "loss": 0.2312, "step": 85478 }, { "epoch": 0.15156310534150017, "grad_norm": 1.0078125, "learning_rate": 0.0002540157031685347, "loss": 0.1615, "step": 85480 }, { "epoch": 0.15156665150680998, "grad_norm": 0.373046875, "learning_rate": 0.0002539953959158544, "loss": 0.1769, "step": 85482 }, { "epoch": 0.1515701976721198, "grad_norm": 0.44140625, "learning_rate": 0.0002539750923631068, "loss": 0.1887, "step": 85484 }, { "epoch": 0.1515737438374296, "grad_norm": 0.353515625, "learning_rate": 0.00025395479251038054, "loss": 0.1663, "step": 85486 }, { "epoch": 0.15157729000273942, "grad_norm": 0.177734375, "learning_rate": 0.0002539344963577648, "loss": 0.1881, "step": 85488 }, { "epoch": 0.15158083616804924, "grad_norm": 1.0625, "learning_rate": 0.00025391420390534806, "loss": 0.2149, "step": 85490 }, { "epoch": 0.15158438233335905, "grad_norm": 0.345703125, "learning_rate": 0.00025389391515321925, "loss": 0.1777, "step": 85492 }, { "epoch": 0.15158792849866887, "grad_norm": 0.185546875, "learning_rate": 0.00025387363010146693, "loss": 0.1246, "step": 85494 }, { "epoch": 0.15159147466397868, "grad_norm": 1.875, "learning_rate": 0.0002538533487501796, "loss": 0.2458, "step": 85496 }, { "epoch": 0.1515950208292885, "grad_norm": 0.1572265625, "learning_rate": 0.0002538330710994467, "loss": 0.1335, "step": 85498 }, { "epoch": 0.1515985669945983, "grad_norm": 0.275390625, "learning_rate": 0.0002538127971493561, "loss": 0.2316, "step": 85500 }, { "epoch": 0.15160211315990813, "grad_norm": 0.455078125, "learning_rate": 0.0002537925268999968, "loss": 0.2136, "step": 85502 }, { "epoch": 0.15160565932521794, "grad_norm": 0.6640625, "learning_rate": 0.00025377226035145755, "loss": 0.2015, "step": 85504 }, { "epoch": 0.15160920549052775, "grad_norm": 0.7734375, "learning_rate": 0.000253751997503827, "loss": 0.1823, "step": 85506 }, { "epoch": 0.15161275165583757, "grad_norm": 0.458984375, "learning_rate": 0.00025373173835719346, "loss": 0.1532, "step": 85508 }, { "epoch": 0.15161629782114738, "grad_norm": 0.62109375, "learning_rate": 0.0002537114829116456, "loss": 0.2262, "step": 85510 }, { "epoch": 0.1516198439864572, "grad_norm": 0.5703125, "learning_rate": 0.00025369123116727245, "loss": 0.1993, "step": 85512 }, { "epoch": 0.151623390151767, "grad_norm": 0.8203125, "learning_rate": 0.0002536709831241621, "loss": 0.3016, "step": 85514 }, { "epoch": 0.15162693631707683, "grad_norm": 0.5546875, "learning_rate": 0.0002536507387824033, "loss": 0.196, "step": 85516 }, { "epoch": 0.15163048248238664, "grad_norm": 0.2734375, "learning_rate": 0.00025363049814208446, "loss": 0.1459, "step": 85518 }, { "epoch": 0.15163402864769646, "grad_norm": 0.2578125, "learning_rate": 0.00025361026120329427, "loss": 0.1719, "step": 85520 }, { "epoch": 0.15163757481300627, "grad_norm": 0.61328125, "learning_rate": 0.00025359002796612116, "loss": 0.1808, "step": 85522 }, { "epoch": 0.15164112097831609, "grad_norm": 0.36328125, "learning_rate": 0.00025356979843065366, "loss": 0.1581, "step": 85524 }, { "epoch": 0.1516446671436259, "grad_norm": 0.44140625, "learning_rate": 0.00025354957259698013, "loss": 0.2304, "step": 85526 }, { "epoch": 0.15164821330893571, "grad_norm": 0.6875, "learning_rate": 0.00025352935046518905, "loss": 0.3449, "step": 85528 }, { "epoch": 0.15165175947424553, "grad_norm": 0.26171875, "learning_rate": 0.0002535091320353691, "loss": 0.1566, "step": 85530 }, { "epoch": 0.15165530563955534, "grad_norm": 0.322265625, "learning_rate": 0.00025348891730760825, "loss": 0.1826, "step": 85532 }, { "epoch": 0.15165885180486516, "grad_norm": 1.3359375, "learning_rate": 0.00025346870628199526, "loss": 0.1927, "step": 85534 }, { "epoch": 0.15166239797017497, "grad_norm": 0.373046875, "learning_rate": 0.0002534484989586184, "loss": 0.2342, "step": 85536 }, { "epoch": 0.1516659441354848, "grad_norm": 0.4609375, "learning_rate": 0.00025342829533756624, "loss": 0.1806, "step": 85538 }, { "epoch": 0.1516694903007946, "grad_norm": 0.4921875, "learning_rate": 0.0002534080954189267, "loss": 0.1504, "step": 85540 }, { "epoch": 0.15167303646610442, "grad_norm": 0.62890625, "learning_rate": 0.0002533878992027886, "loss": 0.1895, "step": 85542 }, { "epoch": 0.15167658263141423, "grad_norm": 0.396484375, "learning_rate": 0.00025336770668924, "loss": 0.1581, "step": 85544 }, { "epoch": 0.15168012879672405, "grad_norm": 0.6328125, "learning_rate": 0.00025334751787836927, "loss": 0.2944, "step": 85546 }, { "epoch": 0.15168367496203386, "grad_norm": 0.2021484375, "learning_rate": 0.0002533273327702648, "loss": 0.1349, "step": 85548 }, { "epoch": 0.15168722112734367, "grad_norm": 0.36328125, "learning_rate": 0.0002533071513650145, "loss": 0.1767, "step": 85550 }, { "epoch": 0.1516907672926535, "grad_norm": 1.265625, "learning_rate": 0.00025328697366270713, "loss": 0.3401, "step": 85552 }, { "epoch": 0.1516943134579633, "grad_norm": 0.486328125, "learning_rate": 0.0002532667996634306, "loss": 0.1567, "step": 85554 }, { "epoch": 0.15169785962327312, "grad_norm": 0.8515625, "learning_rate": 0.00025324662936727335, "loss": 0.2089, "step": 85556 }, { "epoch": 0.15170140578858293, "grad_norm": 0.458984375, "learning_rate": 0.00025322646277432337, "loss": 0.1615, "step": 85558 }, { "epoch": 0.15170495195389275, "grad_norm": 0.765625, "learning_rate": 0.00025320629988466903, "loss": 0.1986, "step": 85560 }, { "epoch": 0.15170849811920256, "grad_norm": 0.392578125, "learning_rate": 0.00025318614069839843, "loss": 0.1611, "step": 85562 }, { "epoch": 0.15171204428451238, "grad_norm": 0.41796875, "learning_rate": 0.00025316598521559993, "loss": 0.2424, "step": 85564 }, { "epoch": 0.1517155904498222, "grad_norm": 0.404296875, "learning_rate": 0.0002531458334363614, "loss": 0.1583, "step": 85566 }, { "epoch": 0.15171913661513203, "grad_norm": 0.65234375, "learning_rate": 0.00025312568536077083, "loss": 0.1729, "step": 85568 }, { "epoch": 0.15172268278044185, "grad_norm": 0.87109375, "learning_rate": 0.00025310554098891707, "loss": 0.3025, "step": 85570 }, { "epoch": 0.15172622894575166, "grad_norm": 0.439453125, "learning_rate": 0.0002530854003208873, "loss": 0.1854, "step": 85572 }, { "epoch": 0.15172977511106148, "grad_norm": 0.796875, "learning_rate": 0.00025306526335677033, "loss": 0.1939, "step": 85574 }, { "epoch": 0.1517333212763713, "grad_norm": 0.349609375, "learning_rate": 0.00025304513009665375, "loss": 0.331, "step": 85576 }, { "epoch": 0.1517368674416811, "grad_norm": 0.4609375, "learning_rate": 0.00025302500054062596, "loss": 0.1374, "step": 85578 }, { "epoch": 0.15174041360699092, "grad_norm": 0.66015625, "learning_rate": 0.00025300487468877473, "loss": 0.168, "step": 85580 }, { "epoch": 0.15174395977230073, "grad_norm": 0.625, "learning_rate": 0.00025298475254118826, "loss": 0.1776, "step": 85582 }, { "epoch": 0.15174750593761055, "grad_norm": 0.2060546875, "learning_rate": 0.0002529646340979544, "loss": 0.1831, "step": 85584 }, { "epoch": 0.15175105210292036, "grad_norm": 0.416015625, "learning_rate": 0.00025294451935916126, "loss": 0.2226, "step": 85586 }, { "epoch": 0.15175459826823018, "grad_norm": 0.4140625, "learning_rate": 0.0002529244083248968, "loss": 0.1958, "step": 85588 }, { "epoch": 0.15175814443354, "grad_norm": 0.36328125, "learning_rate": 0.00025290430099524883, "loss": 0.1481, "step": 85590 }, { "epoch": 0.1517616905988498, "grad_norm": 0.263671875, "learning_rate": 0.0002528841973703054, "loss": 0.1772, "step": 85592 }, { "epoch": 0.15176523676415962, "grad_norm": 0.4296875, "learning_rate": 0.0002528640974501545, "loss": 0.1937, "step": 85594 }, { "epoch": 0.15176878292946944, "grad_norm": 0.26953125, "learning_rate": 0.0002528440012348841, "loss": 0.1615, "step": 85596 }, { "epoch": 0.15177232909477925, "grad_norm": 0.189453125, "learning_rate": 0.0002528239087245816, "loss": 0.1872, "step": 85598 }, { "epoch": 0.15177587526008907, "grad_norm": 0.46875, "learning_rate": 0.00025280381991933556, "loss": 0.1938, "step": 85600 }, { "epoch": 0.15177942142539888, "grad_norm": 0.30078125, "learning_rate": 0.0002527837348192333, "loss": 0.1659, "step": 85602 }, { "epoch": 0.1517829675907087, "grad_norm": 0.53515625, "learning_rate": 0.0002527636534243631, "loss": 0.1784, "step": 85604 }, { "epoch": 0.1517865137560185, "grad_norm": 0.4765625, "learning_rate": 0.0002527435757348123, "loss": 0.1637, "step": 85606 }, { "epoch": 0.15179005992132832, "grad_norm": 0.26953125, "learning_rate": 0.000252723501750669, "loss": 0.1996, "step": 85608 }, { "epoch": 0.15179360608663814, "grad_norm": 0.2578125, "learning_rate": 0.00025270343147202094, "loss": 0.2433, "step": 85610 }, { "epoch": 0.15179715225194795, "grad_norm": 0.75390625, "learning_rate": 0.000252683364898956, "loss": 0.2537, "step": 85612 }, { "epoch": 0.15180069841725777, "grad_norm": 0.48046875, "learning_rate": 0.00025266330203156174, "loss": 0.115, "step": 85614 }, { "epoch": 0.15180424458256758, "grad_norm": 0.396484375, "learning_rate": 0.00025264324286992603, "loss": 0.1832, "step": 85616 }, { "epoch": 0.1518077907478774, "grad_norm": 0.46875, "learning_rate": 0.00025262318741413656, "loss": 0.3174, "step": 85618 }, { "epoch": 0.1518113369131872, "grad_norm": 0.291015625, "learning_rate": 0.00025260313566428114, "loss": 0.1595, "step": 85620 }, { "epoch": 0.15181488307849703, "grad_norm": 0.765625, "learning_rate": 0.0002525830876204474, "loss": 0.2138, "step": 85622 }, { "epoch": 0.15181842924380684, "grad_norm": 0.2255859375, "learning_rate": 0.0002525630432827228, "loss": 0.1989, "step": 85624 }, { "epoch": 0.15182197540911666, "grad_norm": 0.23828125, "learning_rate": 0.0002525430026511953, "loss": 0.164, "step": 85626 }, { "epoch": 0.15182552157442647, "grad_norm": 0.54296875, "learning_rate": 0.0002525229657259526, "loss": 0.2031, "step": 85628 }, { "epoch": 0.15182906773973628, "grad_norm": 0.640625, "learning_rate": 0.000252502932507082, "loss": 0.2177, "step": 85630 }, { "epoch": 0.1518326139050461, "grad_norm": 0.314453125, "learning_rate": 0.0002524829029946714, "loss": 0.1811, "step": 85632 }, { "epoch": 0.1518361600703559, "grad_norm": 0.265625, "learning_rate": 0.00025246287718880793, "loss": 0.1681, "step": 85634 }, { "epoch": 0.15183970623566573, "grad_norm": 2.375, "learning_rate": 0.0002524428550895801, "loss": 0.182, "step": 85636 }, { "epoch": 0.15184325240097554, "grad_norm": 0.279296875, "learning_rate": 0.00025242283669707445, "loss": 0.4141, "step": 85638 }, { "epoch": 0.15184679856628536, "grad_norm": 0.2734375, "learning_rate": 0.00025240282201137914, "loss": 0.1477, "step": 85640 }, { "epoch": 0.15185034473159517, "grad_norm": 1.140625, "learning_rate": 0.00025238281103258164, "loss": 0.1511, "step": 85642 }, { "epoch": 0.15185389089690499, "grad_norm": 2.25, "learning_rate": 0.00025236280376076927, "loss": 0.1713, "step": 85644 }, { "epoch": 0.1518574370622148, "grad_norm": 1.1328125, "learning_rate": 0.0002523428001960296, "loss": 0.1883, "step": 85646 }, { "epoch": 0.15186098322752462, "grad_norm": 2.03125, "learning_rate": 0.00025232280033845016, "loss": 0.3081, "step": 85648 }, { "epoch": 0.15186452939283443, "grad_norm": 0.3203125, "learning_rate": 0.0002523028041881184, "loss": 0.2018, "step": 85650 }, { "epoch": 0.15186807555814424, "grad_norm": 0.380859375, "learning_rate": 0.0002522828117451218, "loss": 0.1291, "step": 85652 }, { "epoch": 0.15187162172345406, "grad_norm": 0.12353515625, "learning_rate": 0.0002522628230095478, "loss": 0.1188, "step": 85654 }, { "epoch": 0.15187516788876387, "grad_norm": 0.484375, "learning_rate": 0.00025224283798148364, "loss": 0.1535, "step": 85656 }, { "epoch": 0.1518787140540737, "grad_norm": 0.248046875, "learning_rate": 0.000252222856661017, "loss": 0.1746, "step": 85658 }, { "epoch": 0.15188226021938353, "grad_norm": 0.494140625, "learning_rate": 0.00025220287904823517, "loss": 0.1545, "step": 85660 }, { "epoch": 0.15188580638469334, "grad_norm": 7.9375, "learning_rate": 0.0002521829051432255, "loss": 0.1937, "step": 85662 }, { "epoch": 0.15188935255000316, "grad_norm": 0.1494140625, "learning_rate": 0.00025216293494607526, "loss": 0.155, "step": 85664 }, { "epoch": 0.15189289871531297, "grad_norm": 0.625, "learning_rate": 0.0002521429684568718, "loss": 0.1903, "step": 85666 }, { "epoch": 0.1518964448806228, "grad_norm": 0.61328125, "learning_rate": 0.00025212300567570257, "loss": 0.2044, "step": 85668 }, { "epoch": 0.1518999910459326, "grad_norm": 0.384765625, "learning_rate": 0.0002521030466026549, "loss": 0.3247, "step": 85670 }, { "epoch": 0.15190353721124242, "grad_norm": 0.23828125, "learning_rate": 0.0002520830912378159, "loss": 0.1676, "step": 85672 }, { "epoch": 0.15190708337655223, "grad_norm": 0.5, "learning_rate": 0.0002520631395812728, "loss": 0.1918, "step": 85674 }, { "epoch": 0.15191062954186205, "grad_norm": 0.63671875, "learning_rate": 0.0002520431916331131, "loss": 0.2226, "step": 85676 }, { "epoch": 0.15191417570717186, "grad_norm": 0.416015625, "learning_rate": 0.000252023247393424, "loss": 0.2023, "step": 85678 }, { "epoch": 0.15191772187248168, "grad_norm": 0.33203125, "learning_rate": 0.00025200330686229257, "loss": 0.1528, "step": 85680 }, { "epoch": 0.1519212680377915, "grad_norm": 0.59765625, "learning_rate": 0.00025198337003980615, "loss": 0.1895, "step": 85682 }, { "epoch": 0.1519248142031013, "grad_norm": 0.5625, "learning_rate": 0.0002519634369260517, "loss": 0.1628, "step": 85684 }, { "epoch": 0.15192836036841112, "grad_norm": 0.3359375, "learning_rate": 0.000251943507521117, "loss": 0.1674, "step": 85686 }, { "epoch": 0.15193190653372093, "grad_norm": 0.46875, "learning_rate": 0.00025192358182508836, "loss": 0.1906, "step": 85688 }, { "epoch": 0.15193545269903075, "grad_norm": 1.9375, "learning_rate": 0.00025190365983805365, "loss": 0.2874, "step": 85690 }, { "epoch": 0.15193899886434056, "grad_norm": 1.453125, "learning_rate": 0.0002518837415600995, "loss": 0.2515, "step": 85692 }, { "epoch": 0.15194254502965038, "grad_norm": 0.93359375, "learning_rate": 0.0002518638269913133, "loss": 0.2395, "step": 85694 }, { "epoch": 0.1519460911949602, "grad_norm": 0.41015625, "learning_rate": 0.00025184391613178206, "loss": 0.1529, "step": 85696 }, { "epoch": 0.15194963736027, "grad_norm": 0.392578125, "learning_rate": 0.00025182400898159275, "loss": 0.1491, "step": 85698 }, { "epoch": 0.15195318352557982, "grad_norm": 0.71484375, "learning_rate": 0.0002518041055408327, "loss": 0.2487, "step": 85700 }, { "epoch": 0.15195672969088964, "grad_norm": 0.80078125, "learning_rate": 0.00025178420580958884, "loss": 0.2203, "step": 85702 }, { "epoch": 0.15196027585619945, "grad_norm": 0.384765625, "learning_rate": 0.00025176430978794803, "loss": 0.143, "step": 85704 }, { "epoch": 0.15196382202150926, "grad_norm": 0.48046875, "learning_rate": 0.0002517444174759974, "loss": 0.1428, "step": 85706 }, { "epoch": 0.15196736818681908, "grad_norm": 0.259765625, "learning_rate": 0.00025172452887382396, "loss": 0.1455, "step": 85708 }, { "epoch": 0.1519709143521289, "grad_norm": 0.50390625, "learning_rate": 0.0002517046439815148, "loss": 0.1417, "step": 85710 }, { "epoch": 0.1519744605174387, "grad_norm": 0.51953125, "learning_rate": 0.0002516847627991567, "loss": 0.1371, "step": 85712 }, { "epoch": 0.15197800668274852, "grad_norm": 0.390625, "learning_rate": 0.00025166488532683677, "loss": 0.1553, "step": 85714 }, { "epoch": 0.15198155284805834, "grad_norm": 3.9375, "learning_rate": 0.00025164501156464176, "loss": 0.3678, "step": 85716 }, { "epoch": 0.15198509901336815, "grad_norm": 0.25390625, "learning_rate": 0.00025162514151265885, "loss": 0.1547, "step": 85718 }, { "epoch": 0.15198864517867797, "grad_norm": 0.40234375, "learning_rate": 0.0002516052751709746, "loss": 0.1292, "step": 85720 }, { "epoch": 0.15199219134398778, "grad_norm": 0.57421875, "learning_rate": 0.00025158541253967616, "loss": 0.1909, "step": 85722 }, { "epoch": 0.1519957375092976, "grad_norm": 0.46875, "learning_rate": 0.0002515655536188501, "loss": 0.1896, "step": 85724 }, { "epoch": 0.1519992836746074, "grad_norm": 0.310546875, "learning_rate": 0.00025154569840858383, "loss": 0.1801, "step": 85726 }, { "epoch": 0.15200282983991723, "grad_norm": 1.03125, "learning_rate": 0.0002515258469089636, "loss": 0.1911, "step": 85728 }, { "epoch": 0.15200637600522704, "grad_norm": 4.625, "learning_rate": 0.0002515059991200766, "loss": 0.3026, "step": 85730 }, { "epoch": 0.15200992217053685, "grad_norm": 0.2470703125, "learning_rate": 0.0002514861550420094, "loss": 0.2109, "step": 85732 }, { "epoch": 0.15201346833584667, "grad_norm": 0.408203125, "learning_rate": 0.00025146631467484894, "loss": 0.2113, "step": 85734 }, { "epoch": 0.15201701450115648, "grad_norm": 0.56640625, "learning_rate": 0.0002514464780186819, "loss": 0.2077, "step": 85736 }, { "epoch": 0.1520205606664663, "grad_norm": 0.1943359375, "learning_rate": 0.0002514266450735949, "loss": 0.1529, "step": 85738 }, { "epoch": 0.1520241068317761, "grad_norm": 0.375, "learning_rate": 0.0002514068158396751, "loss": 0.2009, "step": 85740 }, { "epoch": 0.15202765299708593, "grad_norm": 0.462890625, "learning_rate": 0.0002513869903170087, "loss": 0.1745, "step": 85742 }, { "epoch": 0.15203119916239574, "grad_norm": 1.921875, "learning_rate": 0.00025136716850568305, "loss": 0.3689, "step": 85744 }, { "epoch": 0.15203474532770556, "grad_norm": 1.0, "learning_rate": 0.00025134735040578416, "loss": 0.163, "step": 85746 }, { "epoch": 0.15203829149301537, "grad_norm": 0.44921875, "learning_rate": 0.0002513275360173991, "loss": 0.246, "step": 85748 }, { "epoch": 0.1520418376583252, "grad_norm": 0.42578125, "learning_rate": 0.0002513077253406145, "loss": 0.221, "step": 85750 }, { "epoch": 0.15204538382363503, "grad_norm": 1.203125, "learning_rate": 0.0002512879183755168, "loss": 0.3002, "step": 85752 }, { "epoch": 0.15204892998894484, "grad_norm": 0.64453125, "learning_rate": 0.00025126811512219294, "loss": 0.2376, "step": 85754 }, { "epoch": 0.15205247615425466, "grad_norm": 0.404296875, "learning_rate": 0.00025124831558072906, "loss": 0.2031, "step": 85756 }, { "epoch": 0.15205602231956447, "grad_norm": 0.5625, "learning_rate": 0.00025122851975121216, "loss": 0.1391, "step": 85758 }, { "epoch": 0.15205956848487429, "grad_norm": 0.890625, "learning_rate": 0.0002512087276337288, "loss": 0.2296, "step": 85760 }, { "epoch": 0.1520631146501841, "grad_norm": 0.4296875, "learning_rate": 0.00025118893922836534, "loss": 0.2792, "step": 85762 }, { "epoch": 0.15206666081549391, "grad_norm": 35.25, "learning_rate": 0.00025116915453520834, "loss": 0.2176, "step": 85764 }, { "epoch": 0.15207020698080373, "grad_norm": 0.34375, "learning_rate": 0.0002511493735543445, "loss": 0.1718, "step": 85766 }, { "epoch": 0.15207375314611354, "grad_norm": 0.1630859375, "learning_rate": 0.00025112959628586026, "loss": 0.1545, "step": 85768 }, { "epoch": 0.15207729931142336, "grad_norm": 0.59375, "learning_rate": 0.00025110982272984206, "loss": 0.1554, "step": 85770 }, { "epoch": 0.15208084547673317, "grad_norm": 0.3671875, "learning_rate": 0.00025109005288637636, "loss": 0.1324, "step": 85772 }, { "epoch": 0.152084391642043, "grad_norm": 0.56640625, "learning_rate": 0.00025107028675554953, "loss": 0.1579, "step": 85774 }, { "epoch": 0.1520879378073528, "grad_norm": 0.58203125, "learning_rate": 0.00025105052433744846, "loss": 0.1991, "step": 85776 }, { "epoch": 0.15209148397266262, "grad_norm": 0.3359375, "learning_rate": 0.000251030765632159, "loss": 0.1934, "step": 85778 }, { "epoch": 0.15209503013797243, "grad_norm": 0.419921875, "learning_rate": 0.00025101101063976797, "loss": 0.1382, "step": 85780 }, { "epoch": 0.15209857630328225, "grad_norm": 0.2001953125, "learning_rate": 0.0002509912593603615, "loss": 0.2049, "step": 85782 }, { "epoch": 0.15210212246859206, "grad_norm": 0.349609375, "learning_rate": 0.0002509715117940264, "loss": 0.1347, "step": 85784 }, { "epoch": 0.15210566863390187, "grad_norm": 0.60546875, "learning_rate": 0.0002509517679408484, "loss": 0.2124, "step": 85786 }, { "epoch": 0.1521092147992117, "grad_norm": 0.796875, "learning_rate": 0.00025093202780091423, "loss": 0.1387, "step": 85788 }, { "epoch": 0.1521127609645215, "grad_norm": 0.330078125, "learning_rate": 0.0002509122913743103, "loss": 0.1803, "step": 85790 }, { "epoch": 0.15211630712983132, "grad_norm": 0.267578125, "learning_rate": 0.00025089255866112274, "loss": 0.1879, "step": 85792 }, { "epoch": 0.15211985329514113, "grad_norm": 1.4921875, "learning_rate": 0.000250872829661438, "loss": 0.1862, "step": 85794 }, { "epoch": 0.15212339946045095, "grad_norm": 0.8046875, "learning_rate": 0.0002508531043753421, "loss": 0.1554, "step": 85796 }, { "epoch": 0.15212694562576076, "grad_norm": 0.2001953125, "learning_rate": 0.0002508333828029216, "loss": 0.2076, "step": 85798 }, { "epoch": 0.15213049179107058, "grad_norm": 0.28515625, "learning_rate": 0.0002508136649442625, "loss": 0.1572, "step": 85800 }, { "epoch": 0.1521340379563804, "grad_norm": 0.306640625, "learning_rate": 0.00025079395079945135, "loss": 0.1854, "step": 85802 }, { "epoch": 0.1521375841216902, "grad_norm": 0.49609375, "learning_rate": 0.00025077424036857394, "loss": 0.2047, "step": 85804 }, { "epoch": 0.15214113028700002, "grad_norm": 2.921875, "learning_rate": 0.0002507545336517169, "loss": 0.4835, "step": 85806 }, { "epoch": 0.15214467645230983, "grad_norm": 0.3828125, "learning_rate": 0.00025073483064896624, "loss": 0.155, "step": 85808 }, { "epoch": 0.15214822261761965, "grad_norm": 0.84375, "learning_rate": 0.00025071513136040805, "loss": 0.1974, "step": 85810 }, { "epoch": 0.15215176878292946, "grad_norm": 0.17578125, "learning_rate": 0.0002506954357861286, "loss": 0.1325, "step": 85812 }, { "epoch": 0.15215531494823928, "grad_norm": 0.23828125, "learning_rate": 0.0002506757439262139, "loss": 0.1751, "step": 85814 }, { "epoch": 0.1521588611135491, "grad_norm": 0.29296875, "learning_rate": 0.00025065605578075033, "loss": 0.1775, "step": 85816 }, { "epoch": 0.1521624072788589, "grad_norm": 0.421875, "learning_rate": 0.0002506363713498236, "loss": 0.2488, "step": 85818 }, { "epoch": 0.15216595344416872, "grad_norm": 1.5234375, "learning_rate": 0.00025061669063352005, "loss": 0.3667, "step": 85820 }, { "epoch": 0.15216949960947854, "grad_norm": 0.90234375, "learning_rate": 0.00025059701363192564, "loss": 0.2102, "step": 85822 }, { "epoch": 0.15217304577478835, "grad_norm": 0.435546875, "learning_rate": 0.0002505773403451267, "loss": 0.1909, "step": 85824 }, { "epoch": 0.15217659194009817, "grad_norm": 0.80078125, "learning_rate": 0.00025055767077320894, "loss": 0.2546, "step": 85826 }, { "epoch": 0.15218013810540798, "grad_norm": 0.291015625, "learning_rate": 0.00025053800491625866, "loss": 0.3077, "step": 85828 }, { "epoch": 0.1521836842707178, "grad_norm": 0.54296875, "learning_rate": 0.00025051834277436166, "loss": 0.2121, "step": 85830 }, { "epoch": 0.1521872304360276, "grad_norm": 0.416015625, "learning_rate": 0.0002504986843476038, "loss": 0.1675, "step": 85832 }, { "epoch": 0.15219077660133742, "grad_norm": 0.65625, "learning_rate": 0.00025047902963607155, "loss": 0.1658, "step": 85834 }, { "epoch": 0.15219432276664724, "grad_norm": 0.44921875, "learning_rate": 0.0002504593786398503, "loss": 0.185, "step": 85836 }, { "epoch": 0.15219786893195705, "grad_norm": 0.486328125, "learning_rate": 0.0002504397313590265, "loss": 0.133, "step": 85838 }, { "epoch": 0.1522014150972669, "grad_norm": 0.439453125, "learning_rate": 0.0002504200877936855, "loss": 0.1589, "step": 85840 }, { "epoch": 0.1522049612625767, "grad_norm": 0.64453125, "learning_rate": 0.0002504004479439139, "loss": 0.113, "step": 85842 }, { "epoch": 0.15220850742788652, "grad_norm": 0.2060546875, "learning_rate": 0.000250380811809797, "loss": 0.1633, "step": 85844 }, { "epoch": 0.15221205359319634, "grad_norm": 0.384765625, "learning_rate": 0.000250361179391421, "loss": 0.1518, "step": 85846 }, { "epoch": 0.15221559975850615, "grad_norm": 0.33984375, "learning_rate": 0.0002503415506888717, "loss": 0.1592, "step": 85848 }, { "epoch": 0.15221914592381597, "grad_norm": 0.1982421875, "learning_rate": 0.0002503219257022349, "loss": 0.1796, "step": 85850 }, { "epoch": 0.15222269208912578, "grad_norm": 0.35546875, "learning_rate": 0.00025030230443159646, "loss": 0.1567, "step": 85852 }, { "epoch": 0.1522262382544356, "grad_norm": 0.38671875, "learning_rate": 0.000250282686877042, "loss": 0.1787, "step": 85854 }, { "epoch": 0.1522297844197454, "grad_norm": 0.88671875, "learning_rate": 0.0002502630730386577, "loss": 0.1908, "step": 85856 }, { "epoch": 0.15223333058505523, "grad_norm": 0.220703125, "learning_rate": 0.00025024346291652904, "loss": 0.2193, "step": 85858 }, { "epoch": 0.15223687675036504, "grad_norm": 0.4140625, "learning_rate": 0.00025022385651074194, "loss": 0.2203, "step": 85860 }, { "epoch": 0.15224042291567486, "grad_norm": 0.66015625, "learning_rate": 0.00025020425382138206, "loss": 0.188, "step": 85862 }, { "epoch": 0.15224396908098467, "grad_norm": 1.2109375, "learning_rate": 0.000250184654848535, "loss": 0.1726, "step": 85864 }, { "epoch": 0.15224751524629448, "grad_norm": 0.296875, "learning_rate": 0.00025016505959228705, "loss": 0.1619, "step": 85866 }, { "epoch": 0.1522510614116043, "grad_norm": 0.87890625, "learning_rate": 0.0002501454680527231, "loss": 0.1721, "step": 85868 }, { "epoch": 0.1522546075769141, "grad_norm": 0.291015625, "learning_rate": 0.00025012588022992933, "loss": 0.2119, "step": 85870 }, { "epoch": 0.15225815374222393, "grad_norm": 0.423828125, "learning_rate": 0.00025010629612399115, "loss": 0.1815, "step": 85872 }, { "epoch": 0.15226169990753374, "grad_norm": 0.953125, "learning_rate": 0.0002500867157349947, "loss": 0.1812, "step": 85874 }, { "epoch": 0.15226524607284356, "grad_norm": 0.2412109375, "learning_rate": 0.00025006713906302485, "loss": 0.1735, "step": 85876 }, { "epoch": 0.15226879223815337, "grad_norm": 1.0390625, "learning_rate": 0.00025004756610816787, "loss": 0.2392, "step": 85878 }, { "epoch": 0.1522723384034632, "grad_norm": 0.73046875, "learning_rate": 0.00025002799687050915, "loss": 0.1869, "step": 85880 }, { "epoch": 0.152275884568773, "grad_norm": 0.87109375, "learning_rate": 0.0002500084313501342, "loss": 0.283, "step": 85882 }, { "epoch": 0.15227943073408282, "grad_norm": 0.365234375, "learning_rate": 0.00024998886954712867, "loss": 0.2209, "step": 85884 }, { "epoch": 0.15228297689939263, "grad_norm": 0.57421875, "learning_rate": 0.000249969311461578, "loss": 0.174, "step": 85886 }, { "epoch": 0.15228652306470244, "grad_norm": 0.78515625, "learning_rate": 0.00024994975709356787, "loss": 0.1459, "step": 85888 }, { "epoch": 0.15229006923001226, "grad_norm": 0.361328125, "learning_rate": 0.0002499302064431837, "loss": 0.1425, "step": 85890 }, { "epoch": 0.15229361539532207, "grad_norm": 0.25390625, "learning_rate": 0.0002499106595105111, "loss": 0.1499, "step": 85892 }, { "epoch": 0.1522971615606319, "grad_norm": 0.275390625, "learning_rate": 0.00024989111629563525, "loss": 0.1964, "step": 85894 }, { "epoch": 0.1523007077259417, "grad_norm": 0.251953125, "learning_rate": 0.00024987157679864203, "loss": 0.1626, "step": 85896 }, { "epoch": 0.15230425389125152, "grad_norm": 0.263671875, "learning_rate": 0.0002498520410196168, "loss": 0.1786, "step": 85898 }, { "epoch": 0.15230780005656133, "grad_norm": 1.3828125, "learning_rate": 0.0002498325089586449, "loss": 0.2137, "step": 85900 }, { "epoch": 0.15231134622187115, "grad_norm": 1.4765625, "learning_rate": 0.00024981298061581167, "loss": 0.2117, "step": 85902 }, { "epoch": 0.15231489238718096, "grad_norm": 0.29296875, "learning_rate": 0.0002497934559912026, "loss": 0.211, "step": 85904 }, { "epoch": 0.15231843855249078, "grad_norm": 0.345703125, "learning_rate": 0.00024977393508490323, "loss": 0.2107, "step": 85906 }, { "epoch": 0.1523219847178006, "grad_norm": 0.37890625, "learning_rate": 0.0002497544178969986, "loss": 0.1435, "step": 85908 }, { "epoch": 0.1523255308831104, "grad_norm": 0.384765625, "learning_rate": 0.00024973490442757444, "loss": 0.1313, "step": 85910 }, { "epoch": 0.15232907704842022, "grad_norm": 0.232421875, "learning_rate": 0.0002497153946767158, "loss": 0.1591, "step": 85912 }, { "epoch": 0.15233262321373003, "grad_norm": 1.46875, "learning_rate": 0.0002496958886445082, "loss": 0.2016, "step": 85914 }, { "epoch": 0.15233616937903985, "grad_norm": 0.365234375, "learning_rate": 0.0002496763863310369, "loss": 0.1352, "step": 85916 }, { "epoch": 0.15233971554434966, "grad_norm": 0.380859375, "learning_rate": 0.00024965688773638707, "loss": 0.3135, "step": 85918 }, { "epoch": 0.15234326170965948, "grad_norm": 1.40625, "learning_rate": 0.00024963739286064417, "loss": 0.2349, "step": 85920 }, { "epoch": 0.1523468078749693, "grad_norm": 0.2451171875, "learning_rate": 0.0002496179017038932, "loss": 0.1258, "step": 85922 }, { "epoch": 0.1523503540402791, "grad_norm": 0.6484375, "learning_rate": 0.00024959841426621976, "loss": 0.1341, "step": 85924 }, { "epoch": 0.15235390020558892, "grad_norm": 0.439453125, "learning_rate": 0.0002495789305477087, "loss": 0.1644, "step": 85926 }, { "epoch": 0.15235744637089874, "grad_norm": 1.6953125, "learning_rate": 0.0002495594505484455, "loss": 0.2871, "step": 85928 }, { "epoch": 0.15236099253620855, "grad_norm": 0.1435546875, "learning_rate": 0.00024953997426851516, "loss": 0.2833, "step": 85930 }, { "epoch": 0.1523645387015184, "grad_norm": 0.42578125, "learning_rate": 0.0002495205017080032, "loss": 0.1743, "step": 85932 }, { "epoch": 0.1523680848668282, "grad_norm": 0.2578125, "learning_rate": 0.0002495010328669944, "loss": 0.166, "step": 85934 }, { "epoch": 0.15237163103213802, "grad_norm": 4.21875, "learning_rate": 0.0002494815677455741, "loss": 0.2921, "step": 85936 }, { "epoch": 0.15237517719744784, "grad_norm": 0.578125, "learning_rate": 0.00024946210634382745, "loss": 0.207, "step": 85938 }, { "epoch": 0.15237872336275765, "grad_norm": 0.373046875, "learning_rate": 0.0002494426486618394, "loss": 0.2412, "step": 85940 }, { "epoch": 0.15238226952806747, "grad_norm": 0.455078125, "learning_rate": 0.0002494231946996952, "loss": 0.2044, "step": 85942 }, { "epoch": 0.15238581569337728, "grad_norm": 0.671875, "learning_rate": 0.0002494037444574797, "loss": 0.1683, "step": 85944 }, { "epoch": 0.1523893618586871, "grad_norm": 0.2060546875, "learning_rate": 0.00024938429793527824, "loss": 0.1551, "step": 85946 }, { "epoch": 0.1523929080239969, "grad_norm": 0.25390625, "learning_rate": 0.0002493648551331758, "loss": 0.2645, "step": 85948 }, { "epoch": 0.15239645418930672, "grad_norm": 0.32421875, "learning_rate": 0.0002493454160512575, "loss": 0.1319, "step": 85950 }, { "epoch": 0.15240000035461654, "grad_norm": 1.15625, "learning_rate": 0.0002493259806896079, "loss": 0.2431, "step": 85952 }, { "epoch": 0.15240354651992635, "grad_norm": 0.466796875, "learning_rate": 0.0002493065490483126, "loss": 0.2826, "step": 85954 }, { "epoch": 0.15240709268523617, "grad_norm": 0.380859375, "learning_rate": 0.0002492871211274562, "loss": 0.1592, "step": 85956 }, { "epoch": 0.15241063885054598, "grad_norm": 0.29296875, "learning_rate": 0.00024926769692712393, "loss": 0.2087, "step": 85958 }, { "epoch": 0.1524141850158558, "grad_norm": 0.5625, "learning_rate": 0.0002492482764474005, "loss": 0.1745, "step": 85960 }, { "epoch": 0.1524177311811656, "grad_norm": 0.51953125, "learning_rate": 0.0002492288596883708, "loss": 0.2217, "step": 85962 }, { "epoch": 0.15242127734647543, "grad_norm": 0.65625, "learning_rate": 0.0002492094466501201, "loss": 0.4971, "step": 85964 }, { "epoch": 0.15242482351178524, "grad_norm": 0.314453125, "learning_rate": 0.0002491900373327329, "loss": 0.1813, "step": 85966 }, { "epoch": 0.15242836967709505, "grad_norm": 0.2255859375, "learning_rate": 0.00024917063173629435, "loss": 0.1643, "step": 85968 }, { "epoch": 0.15243191584240487, "grad_norm": 0.244140625, "learning_rate": 0.0002491512298608891, "loss": 0.1616, "step": 85970 }, { "epoch": 0.15243546200771468, "grad_norm": 0.61328125, "learning_rate": 0.00024913183170660244, "loss": 0.1519, "step": 85972 }, { "epoch": 0.1524390081730245, "grad_norm": 0.40625, "learning_rate": 0.0002491124372735186, "loss": 0.2727, "step": 85974 }, { "epoch": 0.1524425543383343, "grad_norm": 0.59765625, "learning_rate": 0.0002490930465617229, "loss": 0.1347, "step": 85976 }, { "epoch": 0.15244610050364413, "grad_norm": 0.94140625, "learning_rate": 0.0002490736595712999, "loss": 0.1614, "step": 85978 }, { "epoch": 0.15244964666895394, "grad_norm": 0.271484375, "learning_rate": 0.00024905427630233444, "loss": 0.181, "step": 85980 }, { "epoch": 0.15245319283426376, "grad_norm": 0.294921875, "learning_rate": 0.00024903489675491135, "loss": 0.1896, "step": 85982 }, { "epoch": 0.15245673899957357, "grad_norm": 0.5234375, "learning_rate": 0.0002490155209291151, "loss": 0.1852, "step": 85984 }, { "epoch": 0.15246028516488339, "grad_norm": 0.345703125, "learning_rate": 0.00024899614882503083, "loss": 0.1504, "step": 85986 }, { "epoch": 0.1524638313301932, "grad_norm": 0.3125, "learning_rate": 0.0002489767804427431, "loss": 0.1595, "step": 85988 }, { "epoch": 0.15246737749550301, "grad_norm": 0.408203125, "learning_rate": 0.0002489574157823366, "loss": 0.1611, "step": 85990 }, { "epoch": 0.15247092366081283, "grad_norm": 0.314453125, "learning_rate": 0.0002489380548438958, "loss": 0.158, "step": 85992 }, { "epoch": 0.15247446982612264, "grad_norm": 0.205078125, "learning_rate": 0.0002489186976275058, "loss": 0.1814, "step": 85994 }, { "epoch": 0.15247801599143246, "grad_norm": 0.15234375, "learning_rate": 0.00024889934413325105, "loss": 0.1406, "step": 85996 }, { "epoch": 0.15248156215674227, "grad_norm": 0.357421875, "learning_rate": 0.00024887999436121617, "loss": 0.2471, "step": 85998 }, { "epoch": 0.1524851083220521, "grad_norm": 0.1513671875, "learning_rate": 0.00024886064831148577, "loss": 0.1517, "step": 86000 }, { "epoch": 0.1524886544873619, "grad_norm": 0.25390625, "learning_rate": 0.00024884130598414435, "loss": 0.21, "step": 86002 }, { "epoch": 0.15249220065267172, "grad_norm": 0.30078125, "learning_rate": 0.00024882196737927687, "loss": 0.1716, "step": 86004 }, { "epoch": 0.15249574681798153, "grad_norm": 2.859375, "learning_rate": 0.00024880263249696757, "loss": 0.22, "step": 86006 }, { "epoch": 0.15249929298329135, "grad_norm": 0.302734375, "learning_rate": 0.0002487833013373013, "loss": 0.1489, "step": 86008 }, { "epoch": 0.15250283914860116, "grad_norm": 0.458984375, "learning_rate": 0.0002487639739003622, "loss": 0.1809, "step": 86010 }, { "epoch": 0.15250638531391097, "grad_norm": 0.2314453125, "learning_rate": 0.00024874465018623505, "loss": 0.2754, "step": 86012 }, { "epoch": 0.1525099314792208, "grad_norm": 0.93359375, "learning_rate": 0.0002487253301950044, "loss": 0.1698, "step": 86014 }, { "epoch": 0.1525134776445306, "grad_norm": 1.296875, "learning_rate": 0.00024870601392675473, "loss": 0.1973, "step": 86016 }, { "epoch": 0.15251702380984042, "grad_norm": 0.259765625, "learning_rate": 0.00024868670138157047, "loss": 0.1769, "step": 86018 }, { "epoch": 0.15252056997515023, "grad_norm": 0.40234375, "learning_rate": 0.00024866739255953587, "loss": 0.1979, "step": 86020 }, { "epoch": 0.15252411614046008, "grad_norm": 0.66015625, "learning_rate": 0.00024864808746073583, "loss": 0.2675, "step": 86022 }, { "epoch": 0.1525276623057699, "grad_norm": 0.197265625, "learning_rate": 0.0002486287860852543, "loss": 0.164, "step": 86024 }, { "epoch": 0.1525312084710797, "grad_norm": 0.39453125, "learning_rate": 0.0002486094884331761, "loss": 0.1898, "step": 86026 }, { "epoch": 0.15253475463638952, "grad_norm": 0.494140625, "learning_rate": 0.0002485901945045853, "loss": 0.1592, "step": 86028 }, { "epoch": 0.15253830080169933, "grad_norm": 0.326171875, "learning_rate": 0.0002485709042995667, "loss": 0.1545, "step": 86030 }, { "epoch": 0.15254184696700915, "grad_norm": 0.54296875, "learning_rate": 0.0002485516178182041, "loss": 0.4195, "step": 86032 }, { "epoch": 0.15254539313231896, "grad_norm": 0.3359375, "learning_rate": 0.0002485323350605823, "loss": 0.1556, "step": 86034 }, { "epoch": 0.15254893929762878, "grad_norm": 0.25, "learning_rate": 0.0002485130560267855, "loss": 0.1647, "step": 86036 }, { "epoch": 0.1525524854629386, "grad_norm": 0.6171875, "learning_rate": 0.000248493780716898, "loss": 0.1785, "step": 86038 }, { "epoch": 0.1525560316282484, "grad_norm": 8.3125, "learning_rate": 0.000248474509131004, "loss": 0.2835, "step": 86040 }, { "epoch": 0.15255957779355822, "grad_norm": 0.8046875, "learning_rate": 0.00024845524126918786, "loss": 0.1944, "step": 86042 }, { "epoch": 0.15256312395886804, "grad_norm": 1.25, "learning_rate": 0.00024843597713153386, "loss": 0.222, "step": 86044 }, { "epoch": 0.15256667012417785, "grad_norm": 0.1962890625, "learning_rate": 0.0002484167167181263, "loss": 0.1267, "step": 86046 }, { "epoch": 0.15257021628948766, "grad_norm": 0.416015625, "learning_rate": 0.0002483974600290494, "loss": 0.2241, "step": 86048 }, { "epoch": 0.15257376245479748, "grad_norm": 0.373046875, "learning_rate": 0.00024837820706438734, "loss": 0.1638, "step": 86050 }, { "epoch": 0.1525773086201073, "grad_norm": 0.75390625, "learning_rate": 0.0002483589578242241, "loss": 0.2491, "step": 86052 }, { "epoch": 0.1525808547854171, "grad_norm": 0.6328125, "learning_rate": 0.0002483397123086444, "loss": 0.2021, "step": 86054 }, { "epoch": 0.15258440095072692, "grad_norm": 0.212890625, "learning_rate": 0.0002483204705177319, "loss": 0.1432, "step": 86056 }, { "epoch": 0.15258794711603674, "grad_norm": 0.53515625, "learning_rate": 0.000248301232451571, "loss": 0.1564, "step": 86058 }, { "epoch": 0.15259149328134655, "grad_norm": 0.1640625, "learning_rate": 0.0002482819981102457, "loss": 0.1372, "step": 86060 }, { "epoch": 0.15259503944665637, "grad_norm": 0.443359375, "learning_rate": 0.0002482627674938404, "loss": 0.1493, "step": 86062 }, { "epoch": 0.15259858561196618, "grad_norm": 0.265625, "learning_rate": 0.00024824354060243883, "loss": 0.1758, "step": 86064 }, { "epoch": 0.152602131777276, "grad_norm": 0.306640625, "learning_rate": 0.0002482243174361254, "loss": 0.1837, "step": 86066 }, { "epoch": 0.1526056779425858, "grad_norm": 1.015625, "learning_rate": 0.0002482050979949839, "loss": 0.1825, "step": 86068 }, { "epoch": 0.15260922410789562, "grad_norm": 0.90625, "learning_rate": 0.0002481858822790986, "loss": 0.2179, "step": 86070 }, { "epoch": 0.15261277027320544, "grad_norm": 0.71484375, "learning_rate": 0.00024816667028855347, "loss": 0.2159, "step": 86072 }, { "epoch": 0.15261631643851525, "grad_norm": 7.375, "learning_rate": 0.0002481474620234324, "loss": 0.1916, "step": 86074 }, { "epoch": 0.15261986260382507, "grad_norm": 0.2041015625, "learning_rate": 0.0002481282574838196, "loss": 0.2176, "step": 86076 }, { "epoch": 0.15262340876913488, "grad_norm": 0.94140625, "learning_rate": 0.0002481090566697989, "loss": 0.3975, "step": 86078 }, { "epoch": 0.1526269549344447, "grad_norm": 0.2236328125, "learning_rate": 0.0002480898595814545, "loss": 0.2218, "step": 86080 }, { "epoch": 0.1526305010997545, "grad_norm": 0.4765625, "learning_rate": 0.0002480706662188701, "loss": 0.1521, "step": 86082 }, { "epoch": 0.15263404726506433, "grad_norm": 0.322265625, "learning_rate": 0.0002480514765821298, "loss": 0.1668, "step": 86084 }, { "epoch": 0.15263759343037414, "grad_norm": 0.53515625, "learning_rate": 0.00024803229067131745, "loss": 0.1547, "step": 86086 }, { "epoch": 0.15264113959568396, "grad_norm": 0.2578125, "learning_rate": 0.00024801310848651697, "loss": 0.1883, "step": 86088 }, { "epoch": 0.15264468576099377, "grad_norm": 0.54296875, "learning_rate": 0.00024799393002781237, "loss": 0.1905, "step": 86090 }, { "epoch": 0.15264823192630358, "grad_norm": 1.375, "learning_rate": 0.0002479747552952872, "loss": 0.3816, "step": 86092 }, { "epoch": 0.1526517780916134, "grad_norm": 0.423828125, "learning_rate": 0.0002479555842890256, "loss": 0.1264, "step": 86094 }, { "epoch": 0.1526553242569232, "grad_norm": 0.53125, "learning_rate": 0.00024793641700911147, "loss": 0.2144, "step": 86096 }, { "epoch": 0.15265887042223303, "grad_norm": 0.640625, "learning_rate": 0.00024791725345562845, "loss": 0.2182, "step": 86098 }, { "epoch": 0.15266241658754284, "grad_norm": 0.263671875, "learning_rate": 0.0002478980936286603, "loss": 0.1931, "step": 86100 }, { "epoch": 0.15266596275285266, "grad_norm": 0.482421875, "learning_rate": 0.00024787893752829104, "loss": 0.2197, "step": 86102 }, { "epoch": 0.15266950891816247, "grad_norm": 0.375, "learning_rate": 0.0002478597851546043, "loss": 0.1523, "step": 86104 }, { "epoch": 0.1526730550834723, "grad_norm": 0.671875, "learning_rate": 0.00024784063650768406, "loss": 0.1776, "step": 86106 }, { "epoch": 0.1526766012487821, "grad_norm": 3.609375, "learning_rate": 0.00024782149158761367, "loss": 0.3079, "step": 86108 }, { "epoch": 0.15268014741409192, "grad_norm": 0.205078125, "learning_rate": 0.0002478023503944771, "loss": 0.183, "step": 86110 }, { "epoch": 0.15268369357940176, "grad_norm": 0.291015625, "learning_rate": 0.00024778321292835817, "loss": 0.1556, "step": 86112 }, { "epoch": 0.15268723974471157, "grad_norm": 0.244140625, "learning_rate": 0.0002477640791893403, "loss": 0.1472, "step": 86114 }, { "epoch": 0.1526907859100214, "grad_norm": 0.52734375, "learning_rate": 0.00024774494917750736, "loss": 0.211, "step": 86116 }, { "epoch": 0.1526943320753312, "grad_norm": 0.337890625, "learning_rate": 0.0002477258228929428, "loss": 0.1567, "step": 86118 }, { "epoch": 0.15269787824064102, "grad_norm": 0.271484375, "learning_rate": 0.0002477067003357307, "loss": 0.1657, "step": 86120 }, { "epoch": 0.15270142440595083, "grad_norm": 0.427734375, "learning_rate": 0.0002476875815059543, "loss": 0.153, "step": 86122 }, { "epoch": 0.15270497057126065, "grad_norm": 0.2412109375, "learning_rate": 0.0002476684664036973, "loss": 0.1729, "step": 86124 }, { "epoch": 0.15270851673657046, "grad_norm": 0.33984375, "learning_rate": 0.0002476493550290434, "loss": 0.1565, "step": 86126 }, { "epoch": 0.15271206290188027, "grad_norm": 0.765625, "learning_rate": 0.0002476302473820761, "loss": 0.1879, "step": 86128 }, { "epoch": 0.1527156090671901, "grad_norm": 0.1611328125, "learning_rate": 0.00024761114346287907, "loss": 0.1423, "step": 86130 }, { "epoch": 0.1527191552324999, "grad_norm": 2.859375, "learning_rate": 0.0002475920432715355, "loss": 0.196, "step": 86132 }, { "epoch": 0.15272270139780972, "grad_norm": 0.60546875, "learning_rate": 0.0002475729468081294, "loss": 0.2011, "step": 86134 }, { "epoch": 0.15272624756311953, "grad_norm": 0.412109375, "learning_rate": 0.00024755385407274413, "loss": 0.1941, "step": 86136 }, { "epoch": 0.15272979372842935, "grad_norm": 0.41796875, "learning_rate": 0.00024753476506546307, "loss": 0.211, "step": 86138 }, { "epoch": 0.15273333989373916, "grad_norm": 0.3203125, "learning_rate": 0.00024751567978636963, "loss": 0.1352, "step": 86140 }, { "epoch": 0.15273688605904898, "grad_norm": 0.455078125, "learning_rate": 0.0002474965982355476, "loss": 0.1893, "step": 86142 }, { "epoch": 0.1527404322243588, "grad_norm": 0.2060546875, "learning_rate": 0.0002474775204130802, "loss": 0.1788, "step": 86144 }, { "epoch": 0.1527439783896686, "grad_norm": 0.298828125, "learning_rate": 0.000247458446319051, "loss": 0.1595, "step": 86146 }, { "epoch": 0.15274752455497842, "grad_norm": 0.263671875, "learning_rate": 0.0002474393759535434, "loss": 0.1508, "step": 86148 }, { "epoch": 0.15275107072028823, "grad_norm": 1.0078125, "learning_rate": 0.00024742030931664043, "loss": 0.2948, "step": 86150 }, { "epoch": 0.15275461688559805, "grad_norm": 0.51171875, "learning_rate": 0.0002474012464084262, "loss": 0.2024, "step": 86152 }, { "epoch": 0.15275816305090786, "grad_norm": 2.6875, "learning_rate": 0.0002473821872289833, "loss": 0.32, "step": 86154 }, { "epoch": 0.15276170921621768, "grad_norm": 0.2216796875, "learning_rate": 0.00024736313177839566, "loss": 0.1781, "step": 86156 }, { "epoch": 0.1527652553815275, "grad_norm": 0.52734375, "learning_rate": 0.00024734408005674626, "loss": 0.1655, "step": 86158 }, { "epoch": 0.1527688015468373, "grad_norm": 0.609375, "learning_rate": 0.0002473250320641188, "loss": 0.179, "step": 86160 }, { "epoch": 0.15277234771214712, "grad_norm": 1.2578125, "learning_rate": 0.00024730598780059617, "loss": 0.1889, "step": 86162 }, { "epoch": 0.15277589387745694, "grad_norm": 0.396484375, "learning_rate": 0.00024728694726626195, "loss": 0.1647, "step": 86164 }, { "epoch": 0.15277944004276675, "grad_norm": 0.58203125, "learning_rate": 0.00024726791046119923, "loss": 0.1843, "step": 86166 }, { "epoch": 0.15278298620807657, "grad_norm": 1.078125, "learning_rate": 0.00024724887738549134, "loss": 0.2321, "step": 86168 }, { "epoch": 0.15278653237338638, "grad_norm": 0.60546875, "learning_rate": 0.0002472298480392217, "loss": 0.1477, "step": 86170 }, { "epoch": 0.1527900785386962, "grad_norm": 0.296875, "learning_rate": 0.00024721082242247315, "loss": 0.1621, "step": 86172 }, { "epoch": 0.152793624704006, "grad_norm": 0.2421875, "learning_rate": 0.0002471918005353291, "loss": 0.1761, "step": 86174 }, { "epoch": 0.15279717086931582, "grad_norm": 0.67578125, "learning_rate": 0.00024717278237787276, "loss": 0.2985, "step": 86176 }, { "epoch": 0.15280071703462564, "grad_norm": 0.55859375, "learning_rate": 0.0002471537679501875, "loss": 0.1872, "step": 86178 }, { "epoch": 0.15280426319993545, "grad_norm": 0.640625, "learning_rate": 0.00024713475725235607, "loss": 0.2128, "step": 86180 }, { "epoch": 0.15280780936524527, "grad_norm": 0.314453125, "learning_rate": 0.00024711575028446185, "loss": 0.1888, "step": 86182 }, { "epoch": 0.15281135553055508, "grad_norm": 0.322265625, "learning_rate": 0.000247096747046588, "loss": 0.2022, "step": 86184 }, { "epoch": 0.1528149016958649, "grad_norm": 0.84375, "learning_rate": 0.00024707774753881753, "loss": 0.1799, "step": 86186 }, { "epoch": 0.1528184478611747, "grad_norm": 0.5234375, "learning_rate": 0.00024705875176123354, "loss": 0.1648, "step": 86188 }, { "epoch": 0.15282199402648453, "grad_norm": 0.3203125, "learning_rate": 0.0002470397597139189, "loss": 0.1554, "step": 86190 }, { "epoch": 0.15282554019179434, "grad_norm": 1.03125, "learning_rate": 0.00024702077139695714, "loss": 0.2661, "step": 86192 }, { "epoch": 0.15282908635710415, "grad_norm": 0.2119140625, "learning_rate": 0.000247001786810431, "loss": 0.1758, "step": 86194 }, { "epoch": 0.15283263252241397, "grad_norm": 2.046875, "learning_rate": 0.00024698280595442367, "loss": 0.3071, "step": 86196 }, { "epoch": 0.15283617868772378, "grad_norm": 0.28515625, "learning_rate": 0.00024696382882901773, "loss": 0.2311, "step": 86198 }, { "epoch": 0.1528397248530336, "grad_norm": 0.59375, "learning_rate": 0.0002469448554342968, "loss": 0.1709, "step": 86200 }, { "epoch": 0.1528432710183434, "grad_norm": 0.314453125, "learning_rate": 0.00024692588577034357, "loss": 0.1322, "step": 86202 }, { "epoch": 0.15284681718365326, "grad_norm": 0.2109375, "learning_rate": 0.00024690691983724085, "loss": 0.1974, "step": 86204 }, { "epoch": 0.15285036334896307, "grad_norm": 1.1171875, "learning_rate": 0.0002468879576350719, "loss": 0.1849, "step": 86206 }, { "epoch": 0.15285390951427288, "grad_norm": 1.2421875, "learning_rate": 0.0002468689991639192, "loss": 0.2219, "step": 86208 }, { "epoch": 0.1528574556795827, "grad_norm": 0.7109375, "learning_rate": 0.0002468500444238662, "loss": 0.1557, "step": 86210 }, { "epoch": 0.1528610018448925, "grad_norm": 0.72265625, "learning_rate": 0.0002468310934149953, "loss": 0.1709, "step": 86212 }, { "epoch": 0.15286454801020233, "grad_norm": 0.462890625, "learning_rate": 0.00024681214613738985, "loss": 0.2212, "step": 86214 }, { "epoch": 0.15286809417551214, "grad_norm": 0.439453125, "learning_rate": 0.0002467932025911324, "loss": 0.2265, "step": 86216 }, { "epoch": 0.15287164034082196, "grad_norm": 0.91796875, "learning_rate": 0.00024677426277630593, "loss": 0.1784, "step": 86218 }, { "epoch": 0.15287518650613177, "grad_norm": 0.55859375, "learning_rate": 0.0002467553266929931, "loss": 0.162, "step": 86220 }, { "epoch": 0.1528787326714416, "grad_norm": 0.765625, "learning_rate": 0.00024673639434127676, "loss": 0.1844, "step": 86222 }, { "epoch": 0.1528822788367514, "grad_norm": 0.55078125, "learning_rate": 0.00024671746572124, "loss": 0.1834, "step": 86224 }, { "epoch": 0.15288582500206122, "grad_norm": 0.6171875, "learning_rate": 0.0002466985408329654, "loss": 0.1864, "step": 86226 }, { "epoch": 0.15288937116737103, "grad_norm": 0.61328125, "learning_rate": 0.00024667961967653564, "loss": 0.4175, "step": 86228 }, { "epoch": 0.15289291733268084, "grad_norm": 0.466796875, "learning_rate": 0.0002466607022520336, "loss": 0.1242, "step": 86230 }, { "epoch": 0.15289646349799066, "grad_norm": 0.259765625, "learning_rate": 0.0002466417885595419, "loss": 0.1746, "step": 86232 }, { "epoch": 0.15290000966330047, "grad_norm": 0.2431640625, "learning_rate": 0.0002466228785991434, "loss": 0.1862, "step": 86234 }, { "epoch": 0.1529035558286103, "grad_norm": 0.2197265625, "learning_rate": 0.00024660397237092073, "loss": 0.1681, "step": 86236 }, { "epoch": 0.1529071019939201, "grad_norm": 0.578125, "learning_rate": 0.0002465850698749566, "loss": 0.1882, "step": 86238 }, { "epoch": 0.15291064815922992, "grad_norm": 0.578125, "learning_rate": 0.0002465661711113334, "loss": 0.1834, "step": 86240 }, { "epoch": 0.15291419432453973, "grad_norm": 1.796875, "learning_rate": 0.00024654727608013435, "loss": 0.2568, "step": 86242 }, { "epoch": 0.15291774048984955, "grad_norm": 1.4296875, "learning_rate": 0.00024652838478144166, "loss": 0.2847, "step": 86244 }, { "epoch": 0.15292128665515936, "grad_norm": 0.52734375, "learning_rate": 0.00024650949721533803, "loss": 0.3739, "step": 86246 }, { "epoch": 0.15292483282046918, "grad_norm": 0.345703125, "learning_rate": 0.0002464906133819061, "loss": 0.1694, "step": 86248 }, { "epoch": 0.152928378985779, "grad_norm": 0.41015625, "learning_rate": 0.00024647173328122844, "loss": 0.188, "step": 86250 }, { "epoch": 0.1529319251510888, "grad_norm": 0.310546875, "learning_rate": 0.0002464528569133877, "loss": 0.1809, "step": 86252 }, { "epoch": 0.15293547131639862, "grad_norm": 0.490234375, "learning_rate": 0.0002464339842784664, "loss": 0.3037, "step": 86254 }, { "epoch": 0.15293901748170843, "grad_norm": 0.376953125, "learning_rate": 0.00024641511537654696, "loss": 0.1428, "step": 86256 }, { "epoch": 0.15294256364701825, "grad_norm": 0.70703125, "learning_rate": 0.0002463962502077119, "loss": 0.188, "step": 86258 }, { "epoch": 0.15294610981232806, "grad_norm": 2.453125, "learning_rate": 0.00024637738877204406, "loss": 0.2549, "step": 86260 }, { "epoch": 0.15294965597763788, "grad_norm": 0.3984375, "learning_rate": 0.00024635853106962547, "loss": 0.1896, "step": 86262 }, { "epoch": 0.1529532021429477, "grad_norm": 1.9375, "learning_rate": 0.00024633967710053876, "loss": 0.1551, "step": 86264 }, { "epoch": 0.1529567483082575, "grad_norm": 0.66796875, "learning_rate": 0.00024632082686486647, "loss": 0.2144, "step": 86266 }, { "epoch": 0.15296029447356732, "grad_norm": 0.431640625, "learning_rate": 0.00024630198036269124, "loss": 0.1703, "step": 86268 }, { "epoch": 0.15296384063887714, "grad_norm": 0.2236328125, "learning_rate": 0.000246283137594095, "loss": 0.1783, "step": 86270 }, { "epoch": 0.15296738680418695, "grad_norm": 0.283203125, "learning_rate": 0.0002462642985591606, "loss": 0.1309, "step": 86272 }, { "epoch": 0.15297093296949676, "grad_norm": 0.62109375, "learning_rate": 0.0002462454632579701, "loss": 0.1956, "step": 86274 }, { "epoch": 0.15297447913480658, "grad_norm": 0.29296875, "learning_rate": 0.0002462266316906061, "loss": 0.3578, "step": 86276 }, { "epoch": 0.1529780253001164, "grad_norm": 0.98828125, "learning_rate": 0.0002462078038571509, "loss": 0.2029, "step": 86278 }, { "epoch": 0.1529815714654262, "grad_norm": 0.40625, "learning_rate": 0.0002461889797576867, "loss": 0.499, "step": 86280 }, { "epoch": 0.15298511763073602, "grad_norm": 0.373046875, "learning_rate": 0.0002461701593922961, "loss": 0.2179, "step": 86282 }, { "epoch": 0.15298866379604584, "grad_norm": 0.357421875, "learning_rate": 0.00024615134276106124, "loss": 0.1242, "step": 86284 }, { "epoch": 0.15299220996135565, "grad_norm": 0.36328125, "learning_rate": 0.0002461325298640644, "loss": 0.1782, "step": 86286 }, { "epoch": 0.15299575612666547, "grad_norm": 0.85546875, "learning_rate": 0.00024611372070138784, "loss": 0.1987, "step": 86288 }, { "epoch": 0.15299930229197528, "grad_norm": 1.4140625, "learning_rate": 0.0002460949152731139, "loss": 0.3252, "step": 86290 }, { "epoch": 0.1530028484572851, "grad_norm": 0.97265625, "learning_rate": 0.00024607611357932484, "loss": 0.1862, "step": 86292 }, { "epoch": 0.15300639462259494, "grad_norm": 0.92578125, "learning_rate": 0.00024605731562010285, "loss": 0.2383, "step": 86294 }, { "epoch": 0.15300994078790475, "grad_norm": 0.2001953125, "learning_rate": 0.0002460385213955301, "loss": 0.2452, "step": 86296 }, { "epoch": 0.15301348695321457, "grad_norm": 0.2490234375, "learning_rate": 0.0002460197309056887, "loss": 0.1546, "step": 86298 }, { "epoch": 0.15301703311852438, "grad_norm": 0.427734375, "learning_rate": 0.00024600094415066115, "loss": 0.249, "step": 86300 }, { "epoch": 0.1530205792838342, "grad_norm": 0.8515625, "learning_rate": 0.0002459821611305292, "loss": 0.1376, "step": 86302 }, { "epoch": 0.153024125449144, "grad_norm": 0.263671875, "learning_rate": 0.0002459633818453754, "loss": 0.195, "step": 86304 }, { "epoch": 0.15302767161445383, "grad_norm": 0.63671875, "learning_rate": 0.00024594460629528156, "loss": 0.1307, "step": 86306 }, { "epoch": 0.15303121777976364, "grad_norm": 0.376953125, "learning_rate": 0.0002459258344803301, "loss": 0.1915, "step": 86308 }, { "epoch": 0.15303476394507345, "grad_norm": 1.0, "learning_rate": 0.00024590706640060264, "loss": 0.1366, "step": 86310 }, { "epoch": 0.15303831011038327, "grad_norm": 0.73828125, "learning_rate": 0.00024588830205618176, "loss": 0.2271, "step": 86312 }, { "epoch": 0.15304185627569308, "grad_norm": 0.69140625, "learning_rate": 0.0002458695414471493, "loss": 0.1586, "step": 86314 }, { "epoch": 0.1530454024410029, "grad_norm": 0.2294921875, "learning_rate": 0.00024585078457358734, "loss": 0.1165, "step": 86316 }, { "epoch": 0.1530489486063127, "grad_norm": 1.3125, "learning_rate": 0.00024583203143557786, "loss": 0.2431, "step": 86318 }, { "epoch": 0.15305249477162253, "grad_norm": 0.296875, "learning_rate": 0.0002458132820332028, "loss": 0.1771, "step": 86320 }, { "epoch": 0.15305604093693234, "grad_norm": 0.31640625, "learning_rate": 0.00024579453636654447, "loss": 0.1487, "step": 86322 }, { "epoch": 0.15305958710224216, "grad_norm": 0.7265625, "learning_rate": 0.00024577579443568455, "loss": 0.1323, "step": 86324 }, { "epoch": 0.15306313326755197, "grad_norm": 0.421875, "learning_rate": 0.0002457570562407051, "loss": 0.1901, "step": 86326 }, { "epoch": 0.15306667943286179, "grad_norm": 1.4453125, "learning_rate": 0.0002457383217816881, "loss": 0.1898, "step": 86328 }, { "epoch": 0.1530702255981716, "grad_norm": 0.63671875, "learning_rate": 0.00024571959105871546, "loss": 0.1976, "step": 86330 }, { "epoch": 0.15307377176348141, "grad_norm": 0.5625, "learning_rate": 0.0002457008640718692, "loss": 0.1859, "step": 86332 }, { "epoch": 0.15307731792879123, "grad_norm": 0.2431640625, "learning_rate": 0.000245682140821231, "loss": 0.2832, "step": 86334 }, { "epoch": 0.15308086409410104, "grad_norm": 0.62109375, "learning_rate": 0.000245663421306883, "loss": 0.2097, "step": 86336 }, { "epoch": 0.15308441025941086, "grad_norm": 0.34375, "learning_rate": 0.0002456447055289068, "loss": 0.3175, "step": 86338 }, { "epoch": 0.15308795642472067, "grad_norm": 0.3203125, "learning_rate": 0.0002456259934873844, "loss": 0.1969, "step": 86340 }, { "epoch": 0.1530915025900305, "grad_norm": 0.4921875, "learning_rate": 0.0002456072851823977, "loss": 0.2151, "step": 86342 }, { "epoch": 0.1530950487553403, "grad_norm": 0.4765625, "learning_rate": 0.00024558858061402846, "loss": 0.1761, "step": 86344 }, { "epoch": 0.15309859492065012, "grad_norm": 3.0625, "learning_rate": 0.00024556987978235835, "loss": 0.5074, "step": 86346 }, { "epoch": 0.15310214108595993, "grad_norm": 0.39453125, "learning_rate": 0.0002455511826874693, "loss": 0.1638, "step": 86348 }, { "epoch": 0.15310568725126975, "grad_norm": 0.875, "learning_rate": 0.0002455324893294432, "loss": 0.1846, "step": 86350 }, { "epoch": 0.15310923341657956, "grad_norm": 0.486328125, "learning_rate": 0.00024551379970836154, "loss": 0.1522, "step": 86352 }, { "epoch": 0.15311277958188937, "grad_norm": 0.337890625, "learning_rate": 0.0002454951138243063, "loss": 0.4027, "step": 86354 }, { "epoch": 0.1531163257471992, "grad_norm": 0.5859375, "learning_rate": 0.00024547643167735885, "loss": 0.2239, "step": 86356 }, { "epoch": 0.153119871912509, "grad_norm": 0.291015625, "learning_rate": 0.0002454577532676013, "loss": 0.1747, "step": 86358 }, { "epoch": 0.15312341807781882, "grad_norm": 0.3046875, "learning_rate": 0.00024543907859511503, "loss": 0.3441, "step": 86360 }, { "epoch": 0.15312696424312863, "grad_norm": 0.328125, "learning_rate": 0.000245420407659982, "loss": 0.1536, "step": 86362 }, { "epoch": 0.15313051040843845, "grad_norm": 0.46875, "learning_rate": 0.00024540174046228347, "loss": 0.2112, "step": 86364 }, { "epoch": 0.15313405657374826, "grad_norm": 0.57421875, "learning_rate": 0.00024538307700210164, "loss": 0.1902, "step": 86366 }, { "epoch": 0.15313760273905808, "grad_norm": 0.3125, "learning_rate": 0.00024536441727951756, "loss": 0.1669, "step": 86368 }, { "epoch": 0.1531411489043679, "grad_norm": 1.0625, "learning_rate": 0.0002453457612946132, "loss": 0.1463, "step": 86370 }, { "epoch": 0.1531446950696777, "grad_norm": 0.64453125, "learning_rate": 0.00024532710904747005, "loss": 0.296, "step": 86372 }, { "epoch": 0.15314824123498752, "grad_norm": 0.28125, "learning_rate": 0.0002453084605381697, "loss": 0.2022, "step": 86374 }, { "epoch": 0.15315178740029733, "grad_norm": 0.68359375, "learning_rate": 0.00024528981576679357, "loss": 0.1576, "step": 86376 }, { "epoch": 0.15315533356560715, "grad_norm": 0.349609375, "learning_rate": 0.00024527117473342324, "loss": 0.1907, "step": 86378 }, { "epoch": 0.15315887973091696, "grad_norm": 0.310546875, "learning_rate": 0.0002452525374381405, "loss": 0.138, "step": 86380 }, { "epoch": 0.15316242589622678, "grad_norm": 0.232421875, "learning_rate": 0.00024523390388102664, "loss": 0.1617, "step": 86382 }, { "epoch": 0.15316597206153662, "grad_norm": 0.3984375, "learning_rate": 0.00024521527406216306, "loss": 0.1805, "step": 86384 }, { "epoch": 0.15316951822684644, "grad_norm": 3.0625, "learning_rate": 0.0002451966479816314, "loss": 0.3753, "step": 86386 }, { "epoch": 0.15317306439215625, "grad_norm": 0.32421875, "learning_rate": 0.000245178025639513, "loss": 0.1444, "step": 86388 }, { "epoch": 0.15317661055746606, "grad_norm": 1.3203125, "learning_rate": 0.0002451594070358895, "loss": 0.2064, "step": 86390 }, { "epoch": 0.15318015672277588, "grad_norm": 0.94140625, "learning_rate": 0.0002451407921708422, "loss": 0.2051, "step": 86392 }, { "epoch": 0.1531837028880857, "grad_norm": 0.8515625, "learning_rate": 0.0002451221810444525, "loss": 0.2102, "step": 86394 }, { "epoch": 0.1531872490533955, "grad_norm": 0.498046875, "learning_rate": 0.0002451035736568017, "loss": 0.1497, "step": 86396 }, { "epoch": 0.15319079521870532, "grad_norm": 0.5390625, "learning_rate": 0.0002450849700079716, "loss": 0.1798, "step": 86398 }, { "epoch": 0.15319434138401514, "grad_norm": 0.353515625, "learning_rate": 0.0002450663700980428, "loss": 0.1561, "step": 86400 }, { "epoch": 0.15319788754932495, "grad_norm": 0.255859375, "learning_rate": 0.00024504777392709735, "loss": 0.116, "step": 86402 }, { "epoch": 0.15320143371463477, "grad_norm": 0.283203125, "learning_rate": 0.0002450291814952163, "loss": 0.149, "step": 86404 }, { "epoch": 0.15320497987994458, "grad_norm": 0.515625, "learning_rate": 0.000245010592802481, "loss": 0.1534, "step": 86406 }, { "epoch": 0.1532085260452544, "grad_norm": 0.439453125, "learning_rate": 0.00024499200784897275, "loss": 0.181, "step": 86408 }, { "epoch": 0.1532120722105642, "grad_norm": 0.373046875, "learning_rate": 0.0002449734266347727, "loss": 0.1602, "step": 86410 }, { "epoch": 0.15321561837587402, "grad_norm": 0.50390625, "learning_rate": 0.0002449548491599623, "loss": 0.1708, "step": 86412 }, { "epoch": 0.15321916454118384, "grad_norm": 0.26953125, "learning_rate": 0.0002449362754246225, "loss": 0.1433, "step": 86414 }, { "epoch": 0.15322271070649365, "grad_norm": 2.140625, "learning_rate": 0.0002449177054288352, "loss": 0.2988, "step": 86416 }, { "epoch": 0.15322625687180347, "grad_norm": 0.8203125, "learning_rate": 0.0002448991391726808, "loss": 0.2787, "step": 86418 }, { "epoch": 0.15322980303711328, "grad_norm": 0.26953125, "learning_rate": 0.00024488057665624104, "loss": 0.1395, "step": 86420 }, { "epoch": 0.1532333492024231, "grad_norm": 0.2578125, "learning_rate": 0.0002448620178795969, "loss": 0.1955, "step": 86422 }, { "epoch": 0.1532368953677329, "grad_norm": 0.53125, "learning_rate": 0.0002448434628428295, "loss": 0.1352, "step": 86424 }, { "epoch": 0.15324044153304273, "grad_norm": 0.2373046875, "learning_rate": 0.0002448249115460202, "loss": 0.1386, "step": 86426 }, { "epoch": 0.15324398769835254, "grad_norm": 1.421875, "learning_rate": 0.00024480636398924974, "loss": 0.2683, "step": 86428 }, { "epoch": 0.15324753386366236, "grad_norm": 0.3984375, "learning_rate": 0.0002447878201725996, "loss": 0.3185, "step": 86430 }, { "epoch": 0.15325108002897217, "grad_norm": 0.97265625, "learning_rate": 0.0002447692800961508, "loss": 0.1785, "step": 86432 }, { "epoch": 0.15325462619428198, "grad_norm": 0.21484375, "learning_rate": 0.0002447507437599845, "loss": 0.1901, "step": 86434 }, { "epoch": 0.1532581723595918, "grad_norm": 2.421875, "learning_rate": 0.0002447322111641814, "loss": 0.3465, "step": 86436 }, { "epoch": 0.1532617185249016, "grad_norm": 0.27734375, "learning_rate": 0.000244713682308823, "loss": 0.1819, "step": 86438 }, { "epoch": 0.15326526469021143, "grad_norm": 0.59375, "learning_rate": 0.0002446951571939902, "loss": 0.1566, "step": 86440 }, { "epoch": 0.15326881085552124, "grad_norm": 0.296875, "learning_rate": 0.00024467663581976385, "loss": 0.1712, "step": 86442 }, { "epoch": 0.15327235702083106, "grad_norm": 0.94921875, "learning_rate": 0.00024465811818622516, "loss": 0.1727, "step": 86444 }, { "epoch": 0.15327590318614087, "grad_norm": 0.439453125, "learning_rate": 0.0002446396042934548, "loss": 0.2242, "step": 86446 }, { "epoch": 0.1532794493514507, "grad_norm": 0.240234375, "learning_rate": 0.00024462109414153424, "loss": 0.1362, "step": 86448 }, { "epoch": 0.1532829955167605, "grad_norm": 0.455078125, "learning_rate": 0.000244602587730544, "loss": 0.1561, "step": 86450 }, { "epoch": 0.15328654168207032, "grad_norm": 0.353515625, "learning_rate": 0.00024458408506056515, "loss": 0.1418, "step": 86452 }, { "epoch": 0.15329008784738013, "grad_norm": 0.328125, "learning_rate": 0.0002445655861316786, "loss": 0.1487, "step": 86454 }, { "epoch": 0.15329363401268994, "grad_norm": 0.474609375, "learning_rate": 0.0002445470909439654, "loss": 0.213, "step": 86456 }, { "epoch": 0.15329718017799976, "grad_norm": 0.515625, "learning_rate": 0.0002445285994975062, "loss": 0.19, "step": 86458 }, { "epoch": 0.15330072634330957, "grad_norm": 0.515625, "learning_rate": 0.0002445101117923821, "loss": 0.1735, "step": 86460 }, { "epoch": 0.1533042725086194, "grad_norm": 0.361328125, "learning_rate": 0.00024449162782867376, "loss": 0.1763, "step": 86462 }, { "epoch": 0.1533078186739292, "grad_norm": 0.291015625, "learning_rate": 0.00024447314760646214, "loss": 0.2351, "step": 86464 }, { "epoch": 0.15331136483923902, "grad_norm": 2.0625, "learning_rate": 0.0002444546711258281, "loss": 0.2449, "step": 86466 }, { "epoch": 0.15331491100454883, "grad_norm": 0.259765625, "learning_rate": 0.0002444361983868522, "loss": 0.1409, "step": 86468 }, { "epoch": 0.15331845716985865, "grad_norm": 0.1796875, "learning_rate": 0.0002444177293896154, "loss": 0.1992, "step": 86470 }, { "epoch": 0.15332200333516846, "grad_norm": 1.0078125, "learning_rate": 0.00024439926413419863, "loss": 0.2819, "step": 86472 }, { "epoch": 0.15332554950047828, "grad_norm": 0.283203125, "learning_rate": 0.0002443808026206825, "loss": 0.1936, "step": 86474 }, { "epoch": 0.15332909566578812, "grad_norm": 0.36328125, "learning_rate": 0.00024436234484914753, "loss": 0.2023, "step": 86476 }, { "epoch": 0.15333264183109793, "grad_norm": 0.73046875, "learning_rate": 0.0002443438908196748, "loss": 0.2008, "step": 86478 }, { "epoch": 0.15333618799640775, "grad_norm": 0.349609375, "learning_rate": 0.00024432544053234486, "loss": 0.2023, "step": 86480 }, { "epoch": 0.15333973416171756, "grad_norm": 0.326171875, "learning_rate": 0.00024430699398723845, "loss": 0.1791, "step": 86482 }, { "epoch": 0.15334328032702738, "grad_norm": 0.412109375, "learning_rate": 0.0002442885511844361, "loss": 0.1411, "step": 86484 }, { "epoch": 0.1533468264923372, "grad_norm": 0.421875, "learning_rate": 0.00024427011212401856, "loss": 0.2008, "step": 86486 }, { "epoch": 0.153350372657647, "grad_norm": 0.2734375, "learning_rate": 0.0002442516768060666, "loss": 0.1844, "step": 86488 }, { "epoch": 0.15335391882295682, "grad_norm": 0.5, "learning_rate": 0.0002442332452306606, "loss": 0.1918, "step": 86490 }, { "epoch": 0.15335746498826663, "grad_norm": 1.5078125, "learning_rate": 0.0002442148173978814, "loss": 0.1923, "step": 86492 }, { "epoch": 0.15336101115357645, "grad_norm": 0.29296875, "learning_rate": 0.0002441963933078093, "loss": 0.1364, "step": 86494 }, { "epoch": 0.15336455731888626, "grad_norm": 0.396484375, "learning_rate": 0.00024417797296052523, "loss": 0.1739, "step": 86496 }, { "epoch": 0.15336810348419608, "grad_norm": 0.31640625, "learning_rate": 0.0002441595563561096, "loss": 0.1421, "step": 86498 }, { "epoch": 0.1533716496495059, "grad_norm": 0.88671875, "learning_rate": 0.0002441411434946429, "loss": 0.2001, "step": 86500 }, { "epoch": 0.1533751958148157, "grad_norm": 0.37109375, "learning_rate": 0.00024412273437620575, "loss": 0.1701, "step": 86502 }, { "epoch": 0.15337874198012552, "grad_norm": 0.251953125, "learning_rate": 0.0002441043290008784, "loss": 0.1393, "step": 86504 }, { "epoch": 0.15338228814543534, "grad_norm": 0.72265625, "learning_rate": 0.00024408592736874186, "loss": 0.1841, "step": 86506 }, { "epoch": 0.15338583431074515, "grad_norm": 0.271484375, "learning_rate": 0.0002440675294798761, "loss": 0.2165, "step": 86508 }, { "epoch": 0.15338938047605497, "grad_norm": 0.51953125, "learning_rate": 0.00024404913533436186, "loss": 0.2684, "step": 86510 }, { "epoch": 0.15339292664136478, "grad_norm": 0.796875, "learning_rate": 0.00024403074493227936, "loss": 0.1354, "step": 86512 }, { "epoch": 0.1533964728066746, "grad_norm": 0.412109375, "learning_rate": 0.0002440123582737094, "loss": 0.1873, "step": 86514 }, { "epoch": 0.1534000189719844, "grad_norm": 0.3359375, "learning_rate": 0.00024399397535873203, "loss": 0.2647, "step": 86516 }, { "epoch": 0.15340356513729422, "grad_norm": 0.7890625, "learning_rate": 0.00024397559618742775, "loss": 0.2335, "step": 86518 }, { "epoch": 0.15340711130260404, "grad_norm": 0.515625, "learning_rate": 0.0002439572207598771, "loss": 0.214, "step": 86520 }, { "epoch": 0.15341065746791385, "grad_norm": 0.3984375, "learning_rate": 0.00024393884907616026, "loss": 0.1865, "step": 86522 }, { "epoch": 0.15341420363322367, "grad_norm": 1.21875, "learning_rate": 0.00024392048113635763, "loss": 0.3595, "step": 86524 }, { "epoch": 0.15341774979853348, "grad_norm": 0.216796875, "learning_rate": 0.0002439021169405495, "loss": 0.1788, "step": 86526 }, { "epoch": 0.1534212959638433, "grad_norm": 0.400390625, "learning_rate": 0.00024388375648881633, "loss": 0.1664, "step": 86528 }, { "epoch": 0.1534248421291531, "grad_norm": 0.38671875, "learning_rate": 0.00024386539978123824, "loss": 0.3758, "step": 86530 }, { "epoch": 0.15342838829446293, "grad_norm": 0.255859375, "learning_rate": 0.00024384704681789567, "loss": 0.1718, "step": 86532 }, { "epoch": 0.15343193445977274, "grad_norm": 1.8046875, "learning_rate": 0.00024382869759886864, "loss": 0.2616, "step": 86534 }, { "epoch": 0.15343548062508255, "grad_norm": 0.3359375, "learning_rate": 0.0002438103521242377, "loss": 0.4325, "step": 86536 }, { "epoch": 0.15343902679039237, "grad_norm": 1.6171875, "learning_rate": 0.00024379201039408295, "loss": 0.4118, "step": 86538 }, { "epoch": 0.15344257295570218, "grad_norm": 0.28125, "learning_rate": 0.0002437736724084846, "loss": 0.1609, "step": 86540 }, { "epoch": 0.153446119121012, "grad_norm": 0.87890625, "learning_rate": 0.00024375533816752278, "loss": 0.1329, "step": 86542 }, { "epoch": 0.1534496652863218, "grad_norm": 0.337890625, "learning_rate": 0.00024373700767127768, "loss": 0.1888, "step": 86544 }, { "epoch": 0.15345321145163163, "grad_norm": 1.1953125, "learning_rate": 0.00024371868091982977, "loss": 0.1711, "step": 86546 }, { "epoch": 0.15345675761694144, "grad_norm": 0.484375, "learning_rate": 0.00024370035791325875, "loss": 0.2102, "step": 86548 }, { "epoch": 0.15346030378225126, "grad_norm": 0.2197265625, "learning_rate": 0.0002436820386516451, "loss": 0.169, "step": 86550 }, { "epoch": 0.15346384994756107, "grad_norm": 0.15625, "learning_rate": 0.00024366372313506862, "loss": 0.1772, "step": 86552 }, { "epoch": 0.15346739611287089, "grad_norm": 0.4453125, "learning_rate": 0.00024364541136360986, "loss": 0.1899, "step": 86554 }, { "epoch": 0.1534709422781807, "grad_norm": 0.578125, "learning_rate": 0.00024362710333734845, "loss": 0.1749, "step": 86556 }, { "epoch": 0.15347448844349051, "grad_norm": 0.388671875, "learning_rate": 0.0002436087990563647, "loss": 0.1657, "step": 86558 }, { "epoch": 0.15347803460880033, "grad_norm": 1.4921875, "learning_rate": 0.00024359049852073862, "loss": 0.1824, "step": 86560 }, { "epoch": 0.15348158077411014, "grad_norm": 0.63671875, "learning_rate": 0.00024357220173055027, "loss": 0.1675, "step": 86562 }, { "epoch": 0.15348512693941996, "grad_norm": 0.318359375, "learning_rate": 0.0002435539086858796, "loss": 0.1639, "step": 86564 }, { "epoch": 0.1534886731047298, "grad_norm": 0.74609375, "learning_rate": 0.0002435356193868066, "loss": 0.1493, "step": 86566 }, { "epoch": 0.15349221927003961, "grad_norm": 1.5, "learning_rate": 0.00024351733383341136, "loss": 0.2259, "step": 86568 }, { "epoch": 0.15349576543534943, "grad_norm": 0.578125, "learning_rate": 0.0002434990520257738, "loss": 0.247, "step": 86570 }, { "epoch": 0.15349931160065924, "grad_norm": 0.52734375, "learning_rate": 0.00024348077396397388, "loss": 0.1295, "step": 86572 }, { "epoch": 0.15350285776596906, "grad_norm": 0.45703125, "learning_rate": 0.00024346249964809157, "loss": 0.1736, "step": 86574 }, { "epoch": 0.15350640393127887, "grad_norm": 0.349609375, "learning_rate": 0.00024344422907820665, "loss": 0.1944, "step": 86576 }, { "epoch": 0.1535099500965887, "grad_norm": 1.1015625, "learning_rate": 0.0002434259622543992, "loss": 0.6136, "step": 86578 }, { "epoch": 0.1535134962618985, "grad_norm": 0.66015625, "learning_rate": 0.000243407699176749, "loss": 0.1346, "step": 86580 }, { "epoch": 0.15351704242720832, "grad_norm": 0.46484375, "learning_rate": 0.00024338943984533596, "loss": 0.1799, "step": 86582 }, { "epoch": 0.15352058859251813, "grad_norm": 0.427734375, "learning_rate": 0.00024337118426023986, "loss": 0.1666, "step": 86584 }, { "epoch": 0.15352413475782795, "grad_norm": 0.4765625, "learning_rate": 0.0002433529324215407, "loss": 0.2001, "step": 86586 }, { "epoch": 0.15352768092313776, "grad_norm": 1.7578125, "learning_rate": 0.00024333468432931823, "loss": 0.1693, "step": 86588 }, { "epoch": 0.15353122708844757, "grad_norm": 0.337890625, "learning_rate": 0.00024331643998365223, "loss": 0.1671, "step": 86590 }, { "epoch": 0.1535347732537574, "grad_norm": 2.796875, "learning_rate": 0.0002432981993846225, "loss": 0.3166, "step": 86592 }, { "epoch": 0.1535383194190672, "grad_norm": 0.259765625, "learning_rate": 0.0002432799625323087, "loss": 0.1672, "step": 86594 }, { "epoch": 0.15354186558437702, "grad_norm": 0.279296875, "learning_rate": 0.00024326172942679093, "loss": 0.1645, "step": 86596 }, { "epoch": 0.15354541174968683, "grad_norm": 1.0078125, "learning_rate": 0.00024324350006814843, "loss": 0.1617, "step": 86598 }, { "epoch": 0.15354895791499665, "grad_norm": 0.84375, "learning_rate": 0.00024322527445646137, "loss": 0.1652, "step": 86600 }, { "epoch": 0.15355250408030646, "grad_norm": 0.244140625, "learning_rate": 0.0002432070525918092, "loss": 0.1818, "step": 86602 }, { "epoch": 0.15355605024561628, "grad_norm": 0.2080078125, "learning_rate": 0.00024318883447427186, "loss": 0.1924, "step": 86604 }, { "epoch": 0.1535595964109261, "grad_norm": 0.25, "learning_rate": 0.00024317062010392865, "loss": 0.2852, "step": 86606 }, { "epoch": 0.1535631425762359, "grad_norm": 1.7421875, "learning_rate": 0.00024315240948085957, "loss": 0.2588, "step": 86608 }, { "epoch": 0.15356668874154572, "grad_norm": 0.41796875, "learning_rate": 0.00024313420260514408, "loss": 0.1265, "step": 86610 }, { "epoch": 0.15357023490685554, "grad_norm": 0.298828125, "learning_rate": 0.00024311599947686197, "loss": 0.1517, "step": 86612 }, { "epoch": 0.15357378107216535, "grad_norm": 0.36328125, "learning_rate": 0.00024309780009609266, "loss": 0.1675, "step": 86614 }, { "epoch": 0.15357732723747516, "grad_norm": 0.59765625, "learning_rate": 0.00024307960446291578, "loss": 0.1497, "step": 86616 }, { "epoch": 0.15358087340278498, "grad_norm": 0.8671875, "learning_rate": 0.000243061412577411, "loss": 0.2101, "step": 86618 }, { "epoch": 0.1535844195680948, "grad_norm": 1.9296875, "learning_rate": 0.0002430432244396578, "loss": 0.2912, "step": 86620 }, { "epoch": 0.1535879657334046, "grad_norm": 0.29296875, "learning_rate": 0.00024302504004973584, "loss": 0.1597, "step": 86622 }, { "epoch": 0.15359151189871442, "grad_norm": 0.26953125, "learning_rate": 0.00024300685940772439, "loss": 0.201, "step": 86624 }, { "epoch": 0.15359505806402424, "grad_norm": 0.251953125, "learning_rate": 0.00024298868251370332, "loss": 0.1646, "step": 86626 }, { "epoch": 0.15359860422933405, "grad_norm": 0.21875, "learning_rate": 0.00024297050936775185, "loss": 0.1536, "step": 86628 }, { "epoch": 0.15360215039464387, "grad_norm": 0.7734375, "learning_rate": 0.00024295233996994963, "loss": 0.1912, "step": 86630 }, { "epoch": 0.15360569655995368, "grad_norm": 0.21484375, "learning_rate": 0.00024293417432037605, "loss": 0.1841, "step": 86632 }, { "epoch": 0.1536092427252635, "grad_norm": 0.462890625, "learning_rate": 0.0002429160124191103, "loss": 0.1548, "step": 86634 }, { "epoch": 0.1536127888905733, "grad_norm": 0.314453125, "learning_rate": 0.00024289785426623244, "loss": 0.1415, "step": 86636 }, { "epoch": 0.15361633505588312, "grad_norm": 0.2021484375, "learning_rate": 0.00024287969986182116, "loss": 0.1594, "step": 86638 }, { "epoch": 0.15361988122119294, "grad_norm": 0.5859375, "learning_rate": 0.00024286154920595643, "loss": 0.2534, "step": 86640 }, { "epoch": 0.15362342738650275, "grad_norm": 0.57421875, "learning_rate": 0.00024284340229871715, "loss": 0.1612, "step": 86642 }, { "epoch": 0.15362697355181257, "grad_norm": 0.443359375, "learning_rate": 0.0002428252591401833, "loss": 0.184, "step": 86644 }, { "epoch": 0.15363051971712238, "grad_norm": 0.20703125, "learning_rate": 0.00024280711973043354, "loss": 0.167, "step": 86646 }, { "epoch": 0.1536340658824322, "grad_norm": 0.40625, "learning_rate": 0.0002427889840695476, "loss": 0.138, "step": 86648 }, { "epoch": 0.153637612047742, "grad_norm": 0.28125, "learning_rate": 0.0002427708521576048, "loss": 0.1293, "step": 86650 }, { "epoch": 0.15364115821305183, "grad_norm": 0.236328125, "learning_rate": 0.00024275272399468432, "loss": 0.1398, "step": 86652 }, { "epoch": 0.15364470437836164, "grad_norm": 0.2451171875, "learning_rate": 0.00024273459958086554, "loss": 0.1593, "step": 86654 }, { "epoch": 0.15364825054367148, "grad_norm": 0.7890625, "learning_rate": 0.00024271647891622745, "loss": 0.2084, "step": 86656 }, { "epoch": 0.1536517967089813, "grad_norm": 0.8359375, "learning_rate": 0.00024269836200084967, "loss": 0.2386, "step": 86658 }, { "epoch": 0.1536553428742911, "grad_norm": 0.208984375, "learning_rate": 0.0002426802488348113, "loss": 0.1318, "step": 86660 }, { "epoch": 0.15365888903960093, "grad_norm": 0.26171875, "learning_rate": 0.00024266213941819153, "loss": 0.1488, "step": 86662 }, { "epoch": 0.15366243520491074, "grad_norm": 0.52734375, "learning_rate": 0.00024264403375106935, "loss": 0.257, "step": 86664 }, { "epoch": 0.15366598137022056, "grad_norm": 0.2470703125, "learning_rate": 0.00024262593183352443, "loss": 0.1587, "step": 86666 }, { "epoch": 0.15366952753553037, "grad_norm": 0.296875, "learning_rate": 0.00024260783366563548, "loss": 0.1364, "step": 86668 }, { "epoch": 0.15367307370084018, "grad_norm": 0.373046875, "learning_rate": 0.00024258973924748197, "loss": 0.2563, "step": 86670 }, { "epoch": 0.15367661986615, "grad_norm": 0.640625, "learning_rate": 0.00024257164857914283, "loss": 0.2609, "step": 86672 }, { "epoch": 0.15368016603145981, "grad_norm": 0.87890625, "learning_rate": 0.00024255356166069718, "loss": 0.2076, "step": 86674 }, { "epoch": 0.15368371219676963, "grad_norm": 0.96484375, "learning_rate": 0.00024253547849222424, "loss": 0.3865, "step": 86676 }, { "epoch": 0.15368725836207944, "grad_norm": 1.9296875, "learning_rate": 0.00024251739907380307, "loss": 0.4368, "step": 86678 }, { "epoch": 0.15369080452738926, "grad_norm": 0.59765625, "learning_rate": 0.00024249932340551273, "loss": 0.1615, "step": 86680 }, { "epoch": 0.15369435069269907, "grad_norm": 0.51953125, "learning_rate": 0.0002424812514874321, "loss": 0.1485, "step": 86682 }, { "epoch": 0.1536978968580089, "grad_norm": 0.33203125, "learning_rate": 0.0002424631833196406, "loss": 0.1446, "step": 86684 }, { "epoch": 0.1537014430233187, "grad_norm": 0.431640625, "learning_rate": 0.0002424451189022169, "loss": 0.2033, "step": 86686 }, { "epoch": 0.15370498918862852, "grad_norm": 1.1171875, "learning_rate": 0.00024242705823524023, "loss": 0.1943, "step": 86688 }, { "epoch": 0.15370853535393833, "grad_norm": 0.45703125, "learning_rate": 0.00024240900131878936, "loss": 0.1592, "step": 86690 }, { "epoch": 0.15371208151924814, "grad_norm": 0.326171875, "learning_rate": 0.0002423909481529433, "loss": 0.1888, "step": 86692 }, { "epoch": 0.15371562768455796, "grad_norm": 1.359375, "learning_rate": 0.00024237289873778122, "loss": 0.1562, "step": 86694 }, { "epoch": 0.15371917384986777, "grad_norm": 0.44140625, "learning_rate": 0.00024235485307338184, "loss": 0.1863, "step": 86696 }, { "epoch": 0.1537227200151776, "grad_norm": 0.56640625, "learning_rate": 0.0002423368111598242, "loss": 0.1671, "step": 86698 }, { "epoch": 0.1537262661804874, "grad_norm": 0.26171875, "learning_rate": 0.00024231877299718695, "loss": 0.1576, "step": 86700 }, { "epoch": 0.15372981234579722, "grad_norm": 9.8125, "learning_rate": 0.00024230073858554958, "loss": 0.2821, "step": 86702 }, { "epoch": 0.15373335851110703, "grad_norm": 0.41796875, "learning_rate": 0.00024228270792499016, "loss": 0.1711, "step": 86704 }, { "epoch": 0.15373690467641685, "grad_norm": 0.77734375, "learning_rate": 0.00024226468101558825, "loss": 0.2073, "step": 86706 }, { "epoch": 0.15374045084172666, "grad_norm": 0.271484375, "learning_rate": 0.00024224665785742223, "loss": 0.1415, "step": 86708 }, { "epoch": 0.15374399700703648, "grad_norm": 0.5546875, "learning_rate": 0.0002422286384505712, "loss": 0.1827, "step": 86710 }, { "epoch": 0.1537475431723463, "grad_norm": 0.578125, "learning_rate": 0.00024221062279511385, "loss": 0.1966, "step": 86712 }, { "epoch": 0.1537510893376561, "grad_norm": 0.46484375, "learning_rate": 0.00024219261089112874, "loss": 0.1913, "step": 86714 }, { "epoch": 0.15375463550296592, "grad_norm": 0.30859375, "learning_rate": 0.00024217460273869508, "loss": 0.1147, "step": 86716 }, { "epoch": 0.15375818166827573, "grad_norm": 0.455078125, "learning_rate": 0.00024215659833789143, "loss": 0.1866, "step": 86718 }, { "epoch": 0.15376172783358555, "grad_norm": 0.392578125, "learning_rate": 0.00024213859768879655, "loss": 0.1524, "step": 86720 }, { "epoch": 0.15376527399889536, "grad_norm": 0.306640625, "learning_rate": 0.00024212060079148893, "loss": 0.1686, "step": 86722 }, { "epoch": 0.15376882016420518, "grad_norm": 0.50390625, "learning_rate": 0.00024210260764604764, "loss": 0.2433, "step": 86724 }, { "epoch": 0.153772366329515, "grad_norm": 3.34375, "learning_rate": 0.00024208461825255128, "loss": 0.3882, "step": 86726 }, { "epoch": 0.1537759124948248, "grad_norm": 0.46875, "learning_rate": 0.00024206663261107844, "loss": 0.1523, "step": 86728 }, { "epoch": 0.15377945866013462, "grad_norm": 0.291015625, "learning_rate": 0.00024204865072170774, "loss": 0.2052, "step": 86730 }, { "epoch": 0.15378300482544444, "grad_norm": 0.32421875, "learning_rate": 0.00024203067258451777, "loss": 0.1693, "step": 86732 }, { "epoch": 0.15378655099075425, "grad_norm": 2.28125, "learning_rate": 0.0002420126981995876, "loss": 0.1766, "step": 86734 }, { "epoch": 0.15379009715606407, "grad_norm": 0.43359375, "learning_rate": 0.0002419947275669952, "loss": 0.1685, "step": 86736 }, { "epoch": 0.15379364332137388, "grad_norm": 0.37109375, "learning_rate": 0.00024197676068681955, "loss": 0.1284, "step": 86738 }, { "epoch": 0.1537971894866837, "grad_norm": 0.203125, "learning_rate": 0.00024195879755913925, "loss": 0.3644, "step": 86740 }, { "epoch": 0.1538007356519935, "grad_norm": 2.21875, "learning_rate": 0.0002419408381840328, "loss": 0.2814, "step": 86742 }, { "epoch": 0.15380428181730332, "grad_norm": 0.60546875, "learning_rate": 0.0002419228825615786, "loss": 0.2033, "step": 86744 }, { "epoch": 0.15380782798261314, "grad_norm": 0.259765625, "learning_rate": 0.00024190493069185516, "loss": 0.1821, "step": 86746 }, { "epoch": 0.15381137414792298, "grad_norm": 0.328125, "learning_rate": 0.00024188698257494125, "loss": 0.1988, "step": 86748 }, { "epoch": 0.1538149203132328, "grad_norm": 0.56640625, "learning_rate": 0.00024186903821091506, "loss": 0.1395, "step": 86750 }, { "epoch": 0.1538184664785426, "grad_norm": 0.353515625, "learning_rate": 0.0002418510975998555, "loss": 0.188, "step": 86752 }, { "epoch": 0.15382201264385242, "grad_norm": 0.546875, "learning_rate": 0.00024183316074184063, "loss": 0.1382, "step": 86754 }, { "epoch": 0.15382555880916224, "grad_norm": 1.6015625, "learning_rate": 0.00024181522763694907, "loss": 0.3801, "step": 86756 }, { "epoch": 0.15382910497447205, "grad_norm": 1.109375, "learning_rate": 0.00024179729828525915, "loss": 0.172, "step": 86758 }, { "epoch": 0.15383265113978187, "grad_norm": 0.458984375, "learning_rate": 0.00024177937268684942, "loss": 0.1527, "step": 86760 }, { "epoch": 0.15383619730509168, "grad_norm": 0.451171875, "learning_rate": 0.0002417614508417982, "loss": 0.1792, "step": 86762 }, { "epoch": 0.1538397434704015, "grad_norm": 0.68359375, "learning_rate": 0.00024174353275018378, "loss": 0.1326, "step": 86764 }, { "epoch": 0.1538432896357113, "grad_norm": 0.5, "learning_rate": 0.00024172561841208462, "loss": 0.1748, "step": 86766 }, { "epoch": 0.15384683580102113, "grad_norm": 0.41015625, "learning_rate": 0.00024170770782757923, "loss": 0.2025, "step": 86768 }, { "epoch": 0.15385038196633094, "grad_norm": 0.5390625, "learning_rate": 0.00024168980099674558, "loss": 0.1832, "step": 86770 }, { "epoch": 0.15385392813164075, "grad_norm": 0.53125, "learning_rate": 0.00024167189791966215, "loss": 0.1575, "step": 86772 }, { "epoch": 0.15385747429695057, "grad_norm": 0.87890625, "learning_rate": 0.00024165399859640737, "loss": 0.1222, "step": 86774 }, { "epoch": 0.15386102046226038, "grad_norm": 0.197265625, "learning_rate": 0.0002416361030270595, "loss": 0.2097, "step": 86776 }, { "epoch": 0.1538645666275702, "grad_norm": 0.447265625, "learning_rate": 0.0002416182112116966, "loss": 0.1511, "step": 86778 }, { "epoch": 0.15386811279288, "grad_norm": 0.38671875, "learning_rate": 0.00024160032315039708, "loss": 0.1897, "step": 86780 }, { "epoch": 0.15387165895818983, "grad_norm": 0.314453125, "learning_rate": 0.000241582438843239, "loss": 0.1767, "step": 86782 }, { "epoch": 0.15387520512349964, "grad_norm": 0.49609375, "learning_rate": 0.00024156455829030103, "loss": 0.2497, "step": 86784 }, { "epoch": 0.15387875128880946, "grad_norm": 0.1474609375, "learning_rate": 0.0002415466814916606, "loss": 0.1278, "step": 86786 }, { "epoch": 0.15388229745411927, "grad_norm": 0.44921875, "learning_rate": 0.00024152880844739662, "loss": 0.2341, "step": 86788 }, { "epoch": 0.15388584361942909, "grad_norm": 0.435546875, "learning_rate": 0.00024151093915758686, "loss": 0.1537, "step": 86790 }, { "epoch": 0.1538893897847389, "grad_norm": 1.609375, "learning_rate": 0.0002414930736223097, "loss": 0.2219, "step": 86792 }, { "epoch": 0.15389293595004871, "grad_norm": 0.203125, "learning_rate": 0.00024147521184164305, "loss": 0.1766, "step": 86794 }, { "epoch": 0.15389648211535853, "grad_norm": 0.2890625, "learning_rate": 0.0002414573538156652, "loss": 0.1457, "step": 86796 }, { "epoch": 0.15390002828066834, "grad_norm": 0.71484375, "learning_rate": 0.0002414394995444542, "loss": 0.1718, "step": 86798 }, { "epoch": 0.15390357444597816, "grad_norm": 1.046875, "learning_rate": 0.00024142164902808805, "loss": 0.2459, "step": 86800 }, { "epoch": 0.15390712061128797, "grad_norm": 0.44921875, "learning_rate": 0.0002414038022666449, "loss": 0.1864, "step": 86802 }, { "epoch": 0.1539106667765978, "grad_norm": 0.19140625, "learning_rate": 0.00024138595926020273, "loss": 0.1921, "step": 86804 }, { "epoch": 0.1539142129419076, "grad_norm": 0.255859375, "learning_rate": 0.0002413681200088397, "loss": 0.1949, "step": 86806 }, { "epoch": 0.15391775910721742, "grad_norm": 0.474609375, "learning_rate": 0.00024135028451263376, "loss": 0.2148, "step": 86808 }, { "epoch": 0.15392130527252723, "grad_norm": 0.267578125, "learning_rate": 0.00024133245277166296, "loss": 0.1368, "step": 86810 }, { "epoch": 0.15392485143783705, "grad_norm": 0.314453125, "learning_rate": 0.00024131462478600515, "loss": 0.1134, "step": 86812 }, { "epoch": 0.15392839760314686, "grad_norm": 0.703125, "learning_rate": 0.00024129680055573837, "loss": 0.1787, "step": 86814 }, { "epoch": 0.15393194376845667, "grad_norm": 0.28515625, "learning_rate": 0.00024127898008094066, "loss": 0.204, "step": 86816 }, { "epoch": 0.1539354899337665, "grad_norm": 0.66796875, "learning_rate": 0.00024126116336168992, "loss": 0.573, "step": 86818 }, { "epoch": 0.1539390360990763, "grad_norm": 0.62890625, "learning_rate": 0.000241243350398064, "loss": 0.1806, "step": 86820 }, { "epoch": 0.15394258226438612, "grad_norm": 0.455078125, "learning_rate": 0.00024122554119014082, "loss": 0.1827, "step": 86822 }, { "epoch": 0.15394612842969593, "grad_norm": 1.359375, "learning_rate": 0.00024120773573799842, "loss": 0.1903, "step": 86824 }, { "epoch": 0.15394967459500575, "grad_norm": 0.318359375, "learning_rate": 0.00024118993404171425, "loss": 0.1902, "step": 86826 }, { "epoch": 0.15395322076031556, "grad_norm": 0.423828125, "learning_rate": 0.00024117213610136661, "loss": 0.1442, "step": 86828 }, { "epoch": 0.15395676692562538, "grad_norm": 0.349609375, "learning_rate": 0.00024115434191703311, "loss": 0.1927, "step": 86830 }, { "epoch": 0.1539603130909352, "grad_norm": 0.546875, "learning_rate": 0.00024113655148879176, "loss": 0.2598, "step": 86832 }, { "epoch": 0.153963859256245, "grad_norm": 0.380859375, "learning_rate": 0.00024111876481672025, "loss": 0.1949, "step": 86834 }, { "epoch": 0.15396740542155482, "grad_norm": 0.23046875, "learning_rate": 0.00024110098190089637, "loss": 0.1555, "step": 86836 }, { "epoch": 0.15397095158686466, "grad_norm": 0.875, "learning_rate": 0.00024108320274139784, "loss": 0.1896, "step": 86838 }, { "epoch": 0.15397449775217448, "grad_norm": 0.4375, "learning_rate": 0.0002410654273383023, "loss": 0.1877, "step": 86840 }, { "epoch": 0.1539780439174843, "grad_norm": 0.51953125, "learning_rate": 0.000241047655691688, "loss": 0.2213, "step": 86842 }, { "epoch": 0.1539815900827941, "grad_norm": 0.294921875, "learning_rate": 0.00024102988780163204, "loss": 0.1228, "step": 86844 }, { "epoch": 0.15398513624810392, "grad_norm": 0.361328125, "learning_rate": 0.00024101212366821247, "loss": 0.1845, "step": 86846 }, { "epoch": 0.15398868241341374, "grad_norm": 0.62109375, "learning_rate": 0.00024099436329150687, "loss": 0.2137, "step": 86848 }, { "epoch": 0.15399222857872355, "grad_norm": 0.318359375, "learning_rate": 0.00024097660667159312, "loss": 0.12, "step": 86850 }, { "epoch": 0.15399577474403336, "grad_norm": 0.26953125, "learning_rate": 0.0002409588538085485, "loss": 0.1888, "step": 86852 }, { "epoch": 0.15399932090934318, "grad_norm": 0.392578125, "learning_rate": 0.00024094110470245105, "loss": 0.1452, "step": 86854 }, { "epoch": 0.154002867074653, "grad_norm": 0.48046875, "learning_rate": 0.00024092335935337817, "loss": 0.2157, "step": 86856 }, { "epoch": 0.1540064132399628, "grad_norm": 0.66015625, "learning_rate": 0.00024090561776140743, "loss": 0.2299, "step": 86858 }, { "epoch": 0.15400995940527262, "grad_norm": 0.427734375, "learning_rate": 0.00024088787992661656, "loss": 0.1828, "step": 86860 }, { "epoch": 0.15401350557058244, "grad_norm": 0.83203125, "learning_rate": 0.000240870145849083, "loss": 0.233, "step": 86862 }, { "epoch": 0.15401705173589225, "grad_norm": 0.484375, "learning_rate": 0.00024085241552888436, "loss": 0.183, "step": 86864 }, { "epoch": 0.15402059790120207, "grad_norm": 0.1513671875, "learning_rate": 0.0002408346889660983, "loss": 0.1327, "step": 86866 }, { "epoch": 0.15402414406651188, "grad_norm": 1.140625, "learning_rate": 0.00024081696616080228, "loss": 0.2102, "step": 86868 }, { "epoch": 0.1540276902318217, "grad_norm": 3.65625, "learning_rate": 0.0002407992471130736, "loss": 0.2711, "step": 86870 }, { "epoch": 0.1540312363971315, "grad_norm": 0.28515625, "learning_rate": 0.00024078153182299002, "loss": 0.2601, "step": 86872 }, { "epoch": 0.15403478256244132, "grad_norm": 0.546875, "learning_rate": 0.000240763820290629, "loss": 0.2339, "step": 86874 }, { "epoch": 0.15403832872775114, "grad_norm": 0.5234375, "learning_rate": 0.00024074611251606793, "loss": 0.1967, "step": 86876 }, { "epoch": 0.15404187489306095, "grad_norm": 2.3125, "learning_rate": 0.00024072840849938414, "loss": 0.3441, "step": 86878 }, { "epoch": 0.15404542105837077, "grad_norm": 0.455078125, "learning_rate": 0.00024071070824065516, "loss": 0.2042, "step": 86880 }, { "epoch": 0.15404896722368058, "grad_norm": 0.3203125, "learning_rate": 0.0002406930117399586, "loss": 0.1763, "step": 86882 }, { "epoch": 0.1540525133889904, "grad_norm": 0.294921875, "learning_rate": 0.00024067531899737141, "loss": 0.1325, "step": 86884 }, { "epoch": 0.1540560595543002, "grad_norm": 0.376953125, "learning_rate": 0.00024065763001297137, "loss": 0.2157, "step": 86886 }, { "epoch": 0.15405960571961003, "grad_norm": 0.55859375, "learning_rate": 0.00024063994478683553, "loss": 0.247, "step": 86888 }, { "epoch": 0.15406315188491984, "grad_norm": 1.546875, "learning_rate": 0.00024062226331904172, "loss": 0.2113, "step": 86890 }, { "epoch": 0.15406669805022966, "grad_norm": 0.6171875, "learning_rate": 0.00024060458560966658, "loss": 0.209, "step": 86892 }, { "epoch": 0.15407024421553947, "grad_norm": 0.345703125, "learning_rate": 0.00024058691165878808, "loss": 0.1561, "step": 86894 }, { "epoch": 0.15407379038084928, "grad_norm": 0.2431640625, "learning_rate": 0.00024056924146648311, "loss": 0.1786, "step": 86896 }, { "epoch": 0.1540773365461591, "grad_norm": 0.1962890625, "learning_rate": 0.00024055157503282903, "loss": 0.183, "step": 86898 }, { "epoch": 0.15408088271146891, "grad_norm": 0.1669921875, "learning_rate": 0.0002405339123579032, "loss": 0.1152, "step": 86900 }, { "epoch": 0.15408442887677873, "grad_norm": 1.046875, "learning_rate": 0.00024051625344178264, "loss": 0.2531, "step": 86902 }, { "epoch": 0.15408797504208854, "grad_norm": 0.6796875, "learning_rate": 0.00024049859828454486, "loss": 0.1669, "step": 86904 }, { "epoch": 0.15409152120739836, "grad_norm": 0.2353515625, "learning_rate": 0.00024048094688626693, "loss": 0.2591, "step": 86906 }, { "epoch": 0.15409506737270817, "grad_norm": 0.50390625, "learning_rate": 0.00024046329924702617, "loss": 0.1979, "step": 86908 }, { "epoch": 0.154098613538018, "grad_norm": 0.2734375, "learning_rate": 0.00024044565536689944, "loss": 0.1531, "step": 86910 }, { "epoch": 0.1541021597033278, "grad_norm": 0.6953125, "learning_rate": 0.00024042801524596426, "loss": 0.2101, "step": 86912 }, { "epoch": 0.15410570586863762, "grad_norm": 0.482421875, "learning_rate": 0.0002404103788842977, "loss": 0.1669, "step": 86914 }, { "epoch": 0.15410925203394743, "grad_norm": 0.259765625, "learning_rate": 0.00024039274628197678, "loss": 0.1696, "step": 86916 }, { "epoch": 0.15411279819925724, "grad_norm": 0.400390625, "learning_rate": 0.0002403751174390787, "loss": 0.1538, "step": 86918 }, { "epoch": 0.15411634436456706, "grad_norm": 0.390625, "learning_rate": 0.00024035749235568033, "loss": 0.1942, "step": 86920 }, { "epoch": 0.15411989052987687, "grad_norm": 0.8203125, "learning_rate": 0.00024033987103185922, "loss": 0.1847, "step": 86922 }, { "epoch": 0.1541234366951867, "grad_norm": 1.1953125, "learning_rate": 0.00024032225346769202, "loss": 0.2685, "step": 86924 }, { "epoch": 0.1541269828604965, "grad_norm": 0.3828125, "learning_rate": 0.00024030463966325603, "loss": 0.2044, "step": 86926 }, { "epoch": 0.15413052902580632, "grad_norm": 0.57421875, "learning_rate": 0.0002402870296186281, "loss": 0.2086, "step": 86928 }, { "epoch": 0.15413407519111616, "grad_norm": 0.5234375, "learning_rate": 0.00024026942333388519, "loss": 0.3153, "step": 86930 }, { "epoch": 0.15413762135642597, "grad_norm": 0.1865234375, "learning_rate": 0.0002402518208091047, "loss": 0.1455, "step": 86932 }, { "epoch": 0.1541411675217358, "grad_norm": 0.310546875, "learning_rate": 0.0002402342220443632, "loss": 0.4108, "step": 86934 }, { "epoch": 0.1541447136870456, "grad_norm": 0.326171875, "learning_rate": 0.0002402166270397378, "loss": 0.1682, "step": 86936 }, { "epoch": 0.15414825985235542, "grad_norm": 0.5625, "learning_rate": 0.00024019903579530538, "loss": 0.2089, "step": 86938 }, { "epoch": 0.15415180601766523, "grad_norm": 1.6796875, "learning_rate": 0.00024018144831114316, "loss": 0.2245, "step": 86940 }, { "epoch": 0.15415535218297505, "grad_norm": 0.267578125, "learning_rate": 0.0002401638645873276, "loss": 0.1076, "step": 86942 }, { "epoch": 0.15415889834828486, "grad_norm": 0.1337890625, "learning_rate": 0.00024014628462393594, "loss": 0.1602, "step": 86944 }, { "epoch": 0.15416244451359468, "grad_norm": 0.8125, "learning_rate": 0.00024012870842104504, "loss": 0.1713, "step": 86946 }, { "epoch": 0.1541659906789045, "grad_norm": 0.84765625, "learning_rate": 0.00024011113597873168, "loss": 0.3307, "step": 86948 }, { "epoch": 0.1541695368442143, "grad_norm": 0.7890625, "learning_rate": 0.00024009356729707271, "loss": 0.1795, "step": 86950 }, { "epoch": 0.15417308300952412, "grad_norm": 0.32421875, "learning_rate": 0.00024007600237614486, "loss": 0.1895, "step": 86952 }, { "epoch": 0.15417662917483393, "grad_norm": 1.6875, "learning_rate": 0.00024005844121602517, "loss": 0.4618, "step": 86954 }, { "epoch": 0.15418017534014375, "grad_norm": 0.298828125, "learning_rate": 0.0002400408838167904, "loss": 0.204, "step": 86956 }, { "epoch": 0.15418372150545356, "grad_norm": 0.8203125, "learning_rate": 0.00024002333017851725, "loss": 0.2209, "step": 86958 }, { "epoch": 0.15418726767076338, "grad_norm": 7.34375, "learning_rate": 0.00024000578030128239, "loss": 0.1835, "step": 86960 }, { "epoch": 0.1541908138360732, "grad_norm": 0.56640625, "learning_rate": 0.00023998823418516286, "loss": 0.2129, "step": 86962 }, { "epoch": 0.154194360001383, "grad_norm": 0.62890625, "learning_rate": 0.00023997069183023518, "loss": 0.1273, "step": 86964 }, { "epoch": 0.15419790616669282, "grad_norm": 1.8828125, "learning_rate": 0.00023995315323657618, "loss": 0.2135, "step": 86966 }, { "epoch": 0.15420145233200264, "grad_norm": 0.546875, "learning_rate": 0.00023993561840426254, "loss": 0.3555, "step": 86968 }, { "epoch": 0.15420499849731245, "grad_norm": 0.5859375, "learning_rate": 0.00023991808733337078, "loss": 0.1865, "step": 86970 }, { "epoch": 0.15420854466262227, "grad_norm": 1.1640625, "learning_rate": 0.00023990056002397793, "loss": 0.3576, "step": 86972 }, { "epoch": 0.15421209082793208, "grad_norm": 0.75, "learning_rate": 0.0002398830364761601, "loss": 0.1885, "step": 86974 }, { "epoch": 0.1542156369932419, "grad_norm": 0.20703125, "learning_rate": 0.0002398655166899945, "loss": 0.1702, "step": 86976 }, { "epoch": 0.1542191831585517, "grad_norm": 0.27734375, "learning_rate": 0.0002398480006655573, "loss": 0.1527, "step": 86978 }, { "epoch": 0.15422272932386152, "grad_norm": 0.60546875, "learning_rate": 0.00023983048840292565, "loss": 0.1484, "step": 86980 }, { "epoch": 0.15422627548917134, "grad_norm": 0.8125, "learning_rate": 0.00023981297990217543, "loss": 0.2108, "step": 86982 }, { "epoch": 0.15422982165448115, "grad_norm": 4.59375, "learning_rate": 0.00023979547516338377, "loss": 0.1555, "step": 86984 }, { "epoch": 0.15423336781979097, "grad_norm": 1.5859375, "learning_rate": 0.00023977797418662708, "loss": 0.2513, "step": 86986 }, { "epoch": 0.15423691398510078, "grad_norm": 0.5390625, "learning_rate": 0.00023976047697198182, "loss": 0.1257, "step": 86988 }, { "epoch": 0.1542404601504106, "grad_norm": 0.78515625, "learning_rate": 0.0002397429835195245, "loss": 0.1989, "step": 86990 }, { "epoch": 0.1542440063157204, "grad_norm": 0.296875, "learning_rate": 0.00023972549382933158, "loss": 0.1955, "step": 86992 }, { "epoch": 0.15424755248103023, "grad_norm": 0.55859375, "learning_rate": 0.00023970800790147983, "loss": 0.1686, "step": 86994 }, { "epoch": 0.15425109864634004, "grad_norm": 0.458984375, "learning_rate": 0.00023969052573604544, "loss": 0.2384, "step": 86996 }, { "epoch": 0.15425464481164985, "grad_norm": 0.337890625, "learning_rate": 0.00023967304733310497, "loss": 0.2631, "step": 86998 }, { "epoch": 0.15425819097695967, "grad_norm": 0.2109375, "learning_rate": 0.00023965557269273486, "loss": 0.1645, "step": 87000 }, { "epoch": 0.15426173714226948, "grad_norm": 0.2490234375, "learning_rate": 0.00023963810181501153, "loss": 0.1924, "step": 87002 }, { "epoch": 0.1542652833075793, "grad_norm": 0.61328125, "learning_rate": 0.00023962063470001134, "loss": 0.1657, "step": 87004 }, { "epoch": 0.1542688294728891, "grad_norm": 0.5546875, "learning_rate": 0.00023960317134781082, "loss": 0.1424, "step": 87006 }, { "epoch": 0.15427237563819893, "grad_norm": 0.53125, "learning_rate": 0.0002395857117584862, "loss": 0.2203, "step": 87008 }, { "epoch": 0.15427592180350874, "grad_norm": 0.77734375, "learning_rate": 0.00023956825593211383, "loss": 0.1862, "step": 87010 }, { "epoch": 0.15427946796881856, "grad_norm": 0.5625, "learning_rate": 0.00023955080386877018, "loss": 0.1832, "step": 87012 }, { "epoch": 0.15428301413412837, "grad_norm": 0.6328125, "learning_rate": 0.00023953335556853154, "loss": 0.1924, "step": 87014 }, { "epoch": 0.15428656029943819, "grad_norm": 0.158203125, "learning_rate": 0.00023951591103147415, "loss": 0.2057, "step": 87016 }, { "epoch": 0.154290106464748, "grad_norm": 1.3828125, "learning_rate": 0.0002394984702576743, "loss": 0.2624, "step": 87018 }, { "epoch": 0.15429365263005784, "grad_norm": 1.0703125, "learning_rate": 0.00023948103324720826, "loss": 0.2202, "step": 87020 }, { "epoch": 0.15429719879536766, "grad_norm": 0.28515625, "learning_rate": 0.0002394636000001524, "loss": 0.185, "step": 87022 }, { "epoch": 0.15430074496067747, "grad_norm": 0.984375, "learning_rate": 0.000239446170516583, "loss": 0.1944, "step": 87024 }, { "epoch": 0.1543042911259873, "grad_norm": 1.0234375, "learning_rate": 0.00023942874479657603, "loss": 0.2075, "step": 87026 }, { "epoch": 0.1543078372912971, "grad_norm": 1.7109375, "learning_rate": 0.00023941132284020788, "loss": 0.5017, "step": 87028 }, { "epoch": 0.15431138345660692, "grad_norm": 0.20703125, "learning_rate": 0.00023939390464755485, "loss": 0.1474, "step": 87030 }, { "epoch": 0.15431492962191673, "grad_norm": 0.37890625, "learning_rate": 0.00023937649021869273, "loss": 0.1856, "step": 87032 }, { "epoch": 0.15431847578722654, "grad_norm": 0.75, "learning_rate": 0.0002393590795536981, "loss": 0.1696, "step": 87034 }, { "epoch": 0.15432202195253636, "grad_norm": 1.4609375, "learning_rate": 0.00023934167265264684, "loss": 0.2477, "step": 87036 }, { "epoch": 0.15432556811784617, "grad_norm": 1.421875, "learning_rate": 0.00023932426951561538, "loss": 0.4179, "step": 87038 }, { "epoch": 0.154329114283156, "grad_norm": 0.2578125, "learning_rate": 0.00023930687014267932, "loss": 0.2411, "step": 87040 }, { "epoch": 0.1543326604484658, "grad_norm": 0.53125, "learning_rate": 0.0002392894745339153, "loss": 0.1887, "step": 87042 }, { "epoch": 0.15433620661377562, "grad_norm": 1.40625, "learning_rate": 0.00023927208268939912, "loss": 0.1945, "step": 87044 }, { "epoch": 0.15433975277908543, "grad_norm": 0.44921875, "learning_rate": 0.00023925469460920685, "loss": 0.2101, "step": 87046 }, { "epoch": 0.15434329894439525, "grad_norm": 0.66796875, "learning_rate": 0.00023923731029341452, "loss": 0.1532, "step": 87048 }, { "epoch": 0.15434684510970506, "grad_norm": 0.7265625, "learning_rate": 0.00023921992974209816, "loss": 0.1393, "step": 87050 }, { "epoch": 0.15435039127501488, "grad_norm": 0.578125, "learning_rate": 0.00023920255295533396, "loss": 0.1922, "step": 87052 }, { "epoch": 0.1543539374403247, "grad_norm": 0.2353515625, "learning_rate": 0.00023918517993319778, "loss": 0.261, "step": 87054 }, { "epoch": 0.1543574836056345, "grad_norm": 0.279296875, "learning_rate": 0.00023916781067576554, "loss": 0.199, "step": 87056 }, { "epoch": 0.15436102977094432, "grad_norm": 1.421875, "learning_rate": 0.00023915044518311318, "loss": 0.2281, "step": 87058 }, { "epoch": 0.15436457593625413, "grad_norm": 0.298828125, "learning_rate": 0.00023913308345531684, "loss": 0.1263, "step": 87060 }, { "epoch": 0.15436812210156395, "grad_norm": 0.7578125, "learning_rate": 0.00023911572549245239, "loss": 0.2983, "step": 87062 }, { "epoch": 0.15437166826687376, "grad_norm": 0.380859375, "learning_rate": 0.00023909837129459563, "loss": 0.2419, "step": 87064 }, { "epoch": 0.15437521443218358, "grad_norm": 0.330078125, "learning_rate": 0.0002390810208618225, "loss": 0.1743, "step": 87066 }, { "epoch": 0.1543787605974934, "grad_norm": 3.09375, "learning_rate": 0.00023906367419420887, "loss": 0.1636, "step": 87068 }, { "epoch": 0.1543823067628032, "grad_norm": 0.42578125, "learning_rate": 0.0002390463312918309, "loss": 0.2167, "step": 87070 }, { "epoch": 0.15438585292811302, "grad_norm": 0.7421875, "learning_rate": 0.00023902899215476385, "loss": 0.1826, "step": 87072 }, { "epoch": 0.15438939909342284, "grad_norm": 0.6484375, "learning_rate": 0.00023901165678308402, "loss": 0.1374, "step": 87074 }, { "epoch": 0.15439294525873265, "grad_norm": 0.302734375, "learning_rate": 0.000238994325176867, "loss": 0.1632, "step": 87076 }, { "epoch": 0.15439649142404246, "grad_norm": 0.263671875, "learning_rate": 0.0002389769973361888, "loss": 0.1495, "step": 87078 }, { "epoch": 0.15440003758935228, "grad_norm": 0.4765625, "learning_rate": 0.000238959673261125, "loss": 0.1558, "step": 87080 }, { "epoch": 0.1544035837546621, "grad_norm": 0.267578125, "learning_rate": 0.00023894235295175145, "loss": 0.1063, "step": 87082 }, { "epoch": 0.1544071299199719, "grad_norm": 0.16015625, "learning_rate": 0.00023892503640814383, "loss": 0.1933, "step": 87084 }, { "epoch": 0.15441067608528172, "grad_norm": 0.2451171875, "learning_rate": 0.00023890772363037793, "loss": 0.1277, "step": 87086 }, { "epoch": 0.15441422225059154, "grad_norm": 0.734375, "learning_rate": 0.00023889041461852963, "loss": 0.2396, "step": 87088 }, { "epoch": 0.15441776841590135, "grad_norm": 0.1943359375, "learning_rate": 0.00023887310937267432, "loss": 0.2087, "step": 87090 }, { "epoch": 0.15442131458121117, "grad_norm": 1.015625, "learning_rate": 0.00023885580789288785, "loss": 0.2094, "step": 87092 }, { "epoch": 0.15442486074652098, "grad_norm": 0.453125, "learning_rate": 0.00023883851017924591, "loss": 0.1766, "step": 87094 }, { "epoch": 0.1544284069118308, "grad_norm": 0.421875, "learning_rate": 0.0002388212162318241, "loss": 0.1924, "step": 87096 }, { "epoch": 0.1544319530771406, "grad_norm": 0.28515625, "learning_rate": 0.00023880392605069795, "loss": 0.1873, "step": 87098 }, { "epoch": 0.15443549924245042, "grad_norm": 0.361328125, "learning_rate": 0.00023878663963594322, "loss": 0.2771, "step": 87100 }, { "epoch": 0.15443904540776024, "grad_norm": 0.302734375, "learning_rate": 0.00023876935698763556, "loss": 0.1872, "step": 87102 }, { "epoch": 0.15444259157307005, "grad_norm": 0.83203125, "learning_rate": 0.00023875207810585038, "loss": 0.1768, "step": 87104 }, { "epoch": 0.15444613773837987, "grad_norm": 0.314453125, "learning_rate": 0.00023873480299066343, "loss": 0.3142, "step": 87106 }, { "epoch": 0.15444968390368968, "grad_norm": 0.98828125, "learning_rate": 0.00023871753164215006, "loss": 0.1841, "step": 87108 }, { "epoch": 0.15445323006899953, "grad_norm": 0.310546875, "learning_rate": 0.000238700264060386, "loss": 0.1376, "step": 87110 }, { "epoch": 0.15445677623430934, "grad_norm": 0.8671875, "learning_rate": 0.0002386830002454466, "loss": 0.2191, "step": 87112 }, { "epoch": 0.15446032239961915, "grad_norm": 0.6015625, "learning_rate": 0.0002386657401974076, "loss": 0.2127, "step": 87114 }, { "epoch": 0.15446386856492897, "grad_norm": 0.5703125, "learning_rate": 0.0002386484839163442, "loss": 0.1489, "step": 87116 }, { "epoch": 0.15446741473023878, "grad_norm": 0.234375, "learning_rate": 0.000238631231402332, "loss": 0.2139, "step": 87118 }, { "epoch": 0.1544709608955486, "grad_norm": 0.484375, "learning_rate": 0.00023861398265544658, "loss": 0.1824, "step": 87120 }, { "epoch": 0.1544745070608584, "grad_norm": 0.2890625, "learning_rate": 0.00023859673767576305, "loss": 0.1645, "step": 87122 }, { "epoch": 0.15447805322616823, "grad_norm": 1.125, "learning_rate": 0.00023857949646335718, "loss": 0.1512, "step": 87124 }, { "epoch": 0.15448159939147804, "grad_norm": 0.72265625, "learning_rate": 0.00023856225901830406, "loss": 0.2171, "step": 87126 }, { "epoch": 0.15448514555678786, "grad_norm": 0.2236328125, "learning_rate": 0.00023854502534067948, "loss": 0.1412, "step": 87128 }, { "epoch": 0.15448869172209767, "grad_norm": 0.2265625, "learning_rate": 0.00023852779543055832, "loss": 0.1697, "step": 87130 }, { "epoch": 0.15449223788740749, "grad_norm": 0.5859375, "learning_rate": 0.00023851056928801635, "loss": 0.1716, "step": 87132 }, { "epoch": 0.1544957840527173, "grad_norm": 0.95703125, "learning_rate": 0.0002384933469131287, "loss": 0.1889, "step": 87134 }, { "epoch": 0.15449933021802711, "grad_norm": 0.267578125, "learning_rate": 0.00023847612830597075, "loss": 0.1664, "step": 87136 }, { "epoch": 0.15450287638333693, "grad_norm": 0.36328125, "learning_rate": 0.0002384589134666177, "loss": 0.2767, "step": 87138 }, { "epoch": 0.15450642254864674, "grad_norm": 0.93359375, "learning_rate": 0.0002384417023951449, "loss": 0.3485, "step": 87140 }, { "epoch": 0.15450996871395656, "grad_norm": 0.412109375, "learning_rate": 0.00023842449509162774, "loss": 0.1613, "step": 87142 }, { "epoch": 0.15451351487926637, "grad_norm": 0.5, "learning_rate": 0.00023840729155614134, "loss": 0.1642, "step": 87144 }, { "epoch": 0.1545170610445762, "grad_norm": 0.796875, "learning_rate": 0.000238390091788761, "loss": 0.1842, "step": 87146 }, { "epoch": 0.154520607209886, "grad_norm": 2.375, "learning_rate": 0.00023837289578956174, "loss": 0.4983, "step": 87148 }, { "epoch": 0.15452415337519582, "grad_norm": 0.400390625, "learning_rate": 0.00023835570355861915, "loss": 0.1596, "step": 87150 }, { "epoch": 0.15452769954050563, "grad_norm": 0.357421875, "learning_rate": 0.00023833851509600813, "loss": 0.1632, "step": 87152 }, { "epoch": 0.15453124570581545, "grad_norm": 0.81640625, "learning_rate": 0.00023832133040180398, "loss": 0.1926, "step": 87154 }, { "epoch": 0.15453479187112526, "grad_norm": 0.5390625, "learning_rate": 0.00023830414947608177, "loss": 0.1464, "step": 87156 }, { "epoch": 0.15453833803643507, "grad_norm": 0.435546875, "learning_rate": 0.0002382869723189166, "loss": 0.1912, "step": 87158 }, { "epoch": 0.1545418842017449, "grad_norm": 2.34375, "learning_rate": 0.00023826979893038386, "loss": 0.3242, "step": 87160 }, { "epoch": 0.1545454303670547, "grad_norm": 0.3046875, "learning_rate": 0.00023825262931055828, "loss": 0.187, "step": 87162 }, { "epoch": 0.15454897653236452, "grad_norm": 0.78515625, "learning_rate": 0.00023823546345951528, "loss": 0.2248, "step": 87164 }, { "epoch": 0.15455252269767433, "grad_norm": 0.259765625, "learning_rate": 0.0002382183013773297, "loss": 0.1678, "step": 87166 }, { "epoch": 0.15455606886298415, "grad_norm": 0.29296875, "learning_rate": 0.00023820114306407668, "loss": 0.18, "step": 87168 }, { "epoch": 0.15455961502829396, "grad_norm": 0.396484375, "learning_rate": 0.00023818398851983136, "loss": 0.2673, "step": 87170 }, { "epoch": 0.15456316119360378, "grad_norm": 0.44921875, "learning_rate": 0.00023816683774466858, "loss": 0.2137, "step": 87172 }, { "epoch": 0.1545667073589136, "grad_norm": 0.359375, "learning_rate": 0.0002381496907386635, "loss": 0.1889, "step": 87174 }, { "epoch": 0.1545702535242234, "grad_norm": 0.83203125, "learning_rate": 0.00023813254750189094, "loss": 0.2331, "step": 87176 }, { "epoch": 0.15457379968953322, "grad_norm": 0.2470703125, "learning_rate": 0.00023811540803442627, "loss": 0.3139, "step": 87178 }, { "epoch": 0.15457734585484303, "grad_norm": 0.435546875, "learning_rate": 0.00023809827233634386, "loss": 0.1736, "step": 87180 }, { "epoch": 0.15458089202015285, "grad_norm": 0.302734375, "learning_rate": 0.00023808114040771905, "loss": 0.1883, "step": 87182 }, { "epoch": 0.15458443818546266, "grad_norm": 0.7421875, "learning_rate": 0.00023806401224862658, "loss": 0.2129, "step": 87184 }, { "epoch": 0.15458798435077248, "grad_norm": 0.71484375, "learning_rate": 0.00023804688785914165, "loss": 0.1623, "step": 87186 }, { "epoch": 0.1545915305160823, "grad_norm": 0.251953125, "learning_rate": 0.0002380297672393388, "loss": 0.1723, "step": 87188 }, { "epoch": 0.1545950766813921, "grad_norm": 0.1826171875, "learning_rate": 0.00023801265038929316, "loss": 0.1053, "step": 87190 }, { "epoch": 0.15459862284670192, "grad_norm": 0.2470703125, "learning_rate": 0.00023799553730907946, "loss": 0.1399, "step": 87192 }, { "epoch": 0.15460216901201174, "grad_norm": 0.216796875, "learning_rate": 0.00023797842799877254, "loss": 0.1545, "step": 87194 }, { "epoch": 0.15460571517732155, "grad_norm": 0.32421875, "learning_rate": 0.00023796132245844725, "loss": 0.2298, "step": 87196 }, { "epoch": 0.15460926134263137, "grad_norm": 0.337890625, "learning_rate": 0.00023794422068817834, "loss": 0.1548, "step": 87198 }, { "epoch": 0.15461280750794118, "grad_norm": 0.375, "learning_rate": 0.00023792712268804072, "loss": 0.174, "step": 87200 }, { "epoch": 0.15461635367325102, "grad_norm": 1.9765625, "learning_rate": 0.00023791002845810914, "loss": 0.1934, "step": 87202 }, { "epoch": 0.15461989983856084, "grad_norm": 1.328125, "learning_rate": 0.00023789293799845836, "loss": 0.202, "step": 87204 }, { "epoch": 0.15462344600387065, "grad_norm": 0.6171875, "learning_rate": 0.000237875851309163, "loss": 0.2257, "step": 87206 }, { "epoch": 0.15462699216918047, "grad_norm": 0.337890625, "learning_rate": 0.00023785876839029792, "loss": 0.1256, "step": 87208 }, { "epoch": 0.15463053833449028, "grad_norm": 0.421875, "learning_rate": 0.00023784168924193783, "loss": 0.234, "step": 87210 }, { "epoch": 0.1546340844998001, "grad_norm": 0.515625, "learning_rate": 0.0002378246138641574, "loss": 0.2082, "step": 87212 }, { "epoch": 0.1546376306651099, "grad_norm": 0.298828125, "learning_rate": 0.0002378075422570314, "loss": 0.1786, "step": 87214 }, { "epoch": 0.15464117683041972, "grad_norm": 0.26953125, "learning_rate": 0.00023779047442063406, "loss": 0.1413, "step": 87216 }, { "epoch": 0.15464472299572954, "grad_norm": 1.21875, "learning_rate": 0.0002377734103550407, "loss": 0.1456, "step": 87218 }, { "epoch": 0.15464826916103935, "grad_norm": 0.224609375, "learning_rate": 0.0002377563500603254, "loss": 0.1429, "step": 87220 }, { "epoch": 0.15465181532634917, "grad_norm": 0.5546875, "learning_rate": 0.0002377392935365631, "loss": 0.152, "step": 87222 }, { "epoch": 0.15465536149165898, "grad_norm": 1.515625, "learning_rate": 0.00023772224078382812, "loss": 0.2647, "step": 87224 }, { "epoch": 0.1546589076569688, "grad_norm": 0.54296875, "learning_rate": 0.00023770519180219536, "loss": 0.1906, "step": 87226 }, { "epoch": 0.1546624538222786, "grad_norm": 0.30859375, "learning_rate": 0.00023768814659173908, "loss": 0.2013, "step": 87228 }, { "epoch": 0.15466599998758843, "grad_norm": 2.078125, "learning_rate": 0.000237671105152534, "loss": 0.2356, "step": 87230 }, { "epoch": 0.15466954615289824, "grad_norm": 0.365234375, "learning_rate": 0.00023765406748465465, "loss": 0.1464, "step": 87232 }, { "epoch": 0.15467309231820806, "grad_norm": 0.478515625, "learning_rate": 0.00023763703358817541, "loss": 0.1654, "step": 87234 }, { "epoch": 0.15467663848351787, "grad_norm": 0.19921875, "learning_rate": 0.00023762000346317104, "loss": 0.1447, "step": 87236 }, { "epoch": 0.15468018464882768, "grad_norm": 0.412109375, "learning_rate": 0.0002376029771097156, "loss": 0.2227, "step": 87238 }, { "epoch": 0.1546837308141375, "grad_norm": 0.4296875, "learning_rate": 0.00023758595452788388, "loss": 0.2001, "step": 87240 }, { "epoch": 0.1546872769794473, "grad_norm": 0.2490234375, "learning_rate": 0.0002375689357177503, "loss": 0.2101, "step": 87242 }, { "epoch": 0.15469082314475713, "grad_norm": 0.36328125, "learning_rate": 0.00023755192067938924, "loss": 0.2254, "step": 87244 }, { "epoch": 0.15469436931006694, "grad_norm": 0.65625, "learning_rate": 0.000237534909412875, "loss": 0.2276, "step": 87246 }, { "epoch": 0.15469791547537676, "grad_norm": 0.6328125, "learning_rate": 0.00023751790191828227, "loss": 0.3314, "step": 87248 }, { "epoch": 0.15470146164068657, "grad_norm": 5.3125, "learning_rate": 0.0002375008981956851, "loss": 0.2306, "step": 87250 }, { "epoch": 0.1547050078059964, "grad_norm": 0.40625, "learning_rate": 0.0002374838982451581, "loss": 0.1857, "step": 87252 }, { "epoch": 0.1547085539713062, "grad_norm": 0.55078125, "learning_rate": 0.00023746690206677542, "loss": 0.1503, "step": 87254 }, { "epoch": 0.15471210013661602, "grad_norm": 1.6171875, "learning_rate": 0.00023744990966061147, "loss": 0.2657, "step": 87256 }, { "epoch": 0.15471564630192583, "grad_norm": 0.359375, "learning_rate": 0.00023743292102674073, "loss": 0.154, "step": 87258 }, { "epoch": 0.15471919246723564, "grad_norm": 0.4921875, "learning_rate": 0.00023741593616523728, "loss": 0.1837, "step": 87260 }, { "epoch": 0.15472273863254546, "grad_norm": 0.671875, "learning_rate": 0.00023739895507617556, "loss": 0.175, "step": 87262 }, { "epoch": 0.15472628479785527, "grad_norm": 0.291015625, "learning_rate": 0.00023738197775962955, "loss": 0.3003, "step": 87264 }, { "epoch": 0.1547298309631651, "grad_norm": 1.109375, "learning_rate": 0.0002373650042156738, "loss": 0.238, "step": 87266 }, { "epoch": 0.1547333771284749, "grad_norm": 0.251953125, "learning_rate": 0.00023734803444438253, "loss": 0.186, "step": 87268 }, { "epoch": 0.15473692329378472, "grad_norm": 0.2890625, "learning_rate": 0.0002373310684458299, "loss": 0.2453, "step": 87270 }, { "epoch": 0.15474046945909453, "grad_norm": 0.43359375, "learning_rate": 0.00023731410622009, "loss": 0.1711, "step": 87272 }, { "epoch": 0.15474401562440435, "grad_norm": 0.416015625, "learning_rate": 0.0002372971477672369, "loss": 0.1708, "step": 87274 }, { "epoch": 0.15474756178971416, "grad_norm": 0.5234375, "learning_rate": 0.00023728019308734535, "loss": 0.1587, "step": 87276 }, { "epoch": 0.15475110795502398, "grad_norm": 2.734375, "learning_rate": 0.00023726324218048878, "loss": 0.4502, "step": 87278 }, { "epoch": 0.1547546541203338, "grad_norm": 0.20703125, "learning_rate": 0.00023724629504674174, "loss": 0.1507, "step": 87280 }, { "epoch": 0.1547582002856436, "grad_norm": 0.2216796875, "learning_rate": 0.00023722935168617838, "loss": 0.1644, "step": 87282 }, { "epoch": 0.15476174645095342, "grad_norm": 0.5546875, "learning_rate": 0.00023721241209887251, "loss": 0.2779, "step": 87284 }, { "epoch": 0.15476529261626323, "grad_norm": 0.498046875, "learning_rate": 0.0002371954762848985, "loss": 0.1688, "step": 87286 }, { "epoch": 0.15476883878157305, "grad_norm": 0.50390625, "learning_rate": 0.00023717854424433016, "loss": 0.133, "step": 87288 }, { "epoch": 0.15477238494688286, "grad_norm": 0.2412109375, "learning_rate": 0.00023716161597724187, "loss": 0.1831, "step": 87290 }, { "epoch": 0.1547759311121927, "grad_norm": 0.5546875, "learning_rate": 0.00023714469148370735, "loss": 0.1855, "step": 87292 }, { "epoch": 0.15477947727750252, "grad_norm": 4.09375, "learning_rate": 0.00023712777076380083, "loss": 0.2196, "step": 87294 }, { "epoch": 0.15478302344281233, "grad_norm": 0.70703125, "learning_rate": 0.00023711085381759606, "loss": 0.2611, "step": 87296 }, { "epoch": 0.15478656960812215, "grad_norm": 0.828125, "learning_rate": 0.00023709394064516738, "loss": 0.1558, "step": 87298 }, { "epoch": 0.15479011577343196, "grad_norm": 0.326171875, "learning_rate": 0.0002370770312465884, "loss": 0.2411, "step": 87300 }, { "epoch": 0.15479366193874178, "grad_norm": 0.248046875, "learning_rate": 0.00023706012562193347, "loss": 0.1285, "step": 87302 }, { "epoch": 0.1547972081040516, "grad_norm": 0.4453125, "learning_rate": 0.0002370432237712761, "loss": 0.1585, "step": 87304 }, { "epoch": 0.1548007542693614, "grad_norm": 0.49609375, "learning_rate": 0.0002370263256946904, "loss": 0.1165, "step": 87306 }, { "epoch": 0.15480430043467122, "grad_norm": 0.2890625, "learning_rate": 0.00023700943139225047, "loss": 0.1712, "step": 87308 }, { "epoch": 0.15480784659998104, "grad_norm": 0.1962890625, "learning_rate": 0.00023699254086402983, "loss": 0.1918, "step": 87310 }, { "epoch": 0.15481139276529085, "grad_norm": 0.6328125, "learning_rate": 0.00023697565411010254, "loss": 0.1896, "step": 87312 }, { "epoch": 0.15481493893060067, "grad_norm": 0.546875, "learning_rate": 0.00023695877113054235, "loss": 0.2074, "step": 87314 }, { "epoch": 0.15481848509591048, "grad_norm": 0.49609375, "learning_rate": 0.00023694189192542342, "loss": 0.1636, "step": 87316 }, { "epoch": 0.1548220312612203, "grad_norm": 0.328125, "learning_rate": 0.00023692501649481913, "loss": 0.1546, "step": 87318 }, { "epoch": 0.1548255774265301, "grad_norm": 0.3828125, "learning_rate": 0.00023690814483880355, "loss": 0.2081, "step": 87320 }, { "epoch": 0.15482912359183992, "grad_norm": 0.2236328125, "learning_rate": 0.00023689127695745047, "loss": 0.147, "step": 87322 }, { "epoch": 0.15483266975714974, "grad_norm": 0.2890625, "learning_rate": 0.0002368744128508335, "loss": 0.2422, "step": 87324 }, { "epoch": 0.15483621592245955, "grad_norm": 0.70703125, "learning_rate": 0.0002368575525190265, "loss": 0.1378, "step": 87326 }, { "epoch": 0.15483976208776937, "grad_norm": 0.2255859375, "learning_rate": 0.0002368406959621032, "loss": 0.1671, "step": 87328 }, { "epoch": 0.15484330825307918, "grad_norm": 0.42578125, "learning_rate": 0.00023682384318013725, "loss": 0.1298, "step": 87330 }, { "epoch": 0.154846854418389, "grad_norm": 1.65625, "learning_rate": 0.0002368069941732024, "loss": 0.2479, "step": 87332 }, { "epoch": 0.1548504005836988, "grad_norm": 0.2333984375, "learning_rate": 0.00023679014894137257, "loss": 0.1991, "step": 87334 }, { "epoch": 0.15485394674900863, "grad_norm": 1.03125, "learning_rate": 0.000236773307484721, "loss": 0.1774, "step": 87336 }, { "epoch": 0.15485749291431844, "grad_norm": 0.38671875, "learning_rate": 0.00023675646980332176, "loss": 0.1556, "step": 87338 }, { "epoch": 0.15486103907962825, "grad_norm": 0.275390625, "learning_rate": 0.00023673963589724816, "loss": 0.1745, "step": 87340 }, { "epoch": 0.15486458524493807, "grad_norm": 0.3671875, "learning_rate": 0.00023672280576657396, "loss": 0.1558, "step": 87342 }, { "epoch": 0.15486813141024788, "grad_norm": 0.25390625, "learning_rate": 0.00023670597941137278, "loss": 0.1798, "step": 87344 }, { "epoch": 0.1548716775755577, "grad_norm": 0.265625, "learning_rate": 0.00023668915683171807, "loss": 0.2018, "step": 87346 }, { "epoch": 0.1548752237408675, "grad_norm": 1.71875, "learning_rate": 0.0002366723380276837, "loss": 0.2494, "step": 87348 }, { "epoch": 0.15487876990617733, "grad_norm": 0.2734375, "learning_rate": 0.00023665552299934297, "loss": 0.2177, "step": 87350 }, { "epoch": 0.15488231607148714, "grad_norm": 0.6015625, "learning_rate": 0.00023663871174676947, "loss": 0.2632, "step": 87352 }, { "epoch": 0.15488586223679696, "grad_norm": 0.302734375, "learning_rate": 0.00023662190427003665, "loss": 0.1593, "step": 87354 }, { "epoch": 0.15488940840210677, "grad_norm": 3.140625, "learning_rate": 0.00023660510056921827, "loss": 0.2036, "step": 87356 }, { "epoch": 0.15489295456741659, "grad_norm": 0.68359375, "learning_rate": 0.00023658830064438756, "loss": 0.1957, "step": 87358 }, { "epoch": 0.1548965007327264, "grad_norm": 0.30078125, "learning_rate": 0.00023657150449561825, "loss": 0.1956, "step": 87360 }, { "epoch": 0.15490004689803621, "grad_norm": 0.1875, "learning_rate": 0.0002365547121229836, "loss": 0.1882, "step": 87362 }, { "epoch": 0.15490359306334603, "grad_norm": 0.30859375, "learning_rate": 0.0002365379235265569, "loss": 0.1777, "step": 87364 }, { "epoch": 0.15490713922865584, "grad_norm": 0.37109375, "learning_rate": 0.000236521138706412, "loss": 0.1775, "step": 87366 }, { "epoch": 0.15491068539396566, "grad_norm": 0.318359375, "learning_rate": 0.0002365043576626219, "loss": 0.1613, "step": 87368 }, { "epoch": 0.15491423155927547, "grad_norm": 1.078125, "learning_rate": 0.0002364875803952602, "loss": 0.2866, "step": 87370 }, { "epoch": 0.1549177777245853, "grad_norm": 0.5390625, "learning_rate": 0.0002364708069044001, "loss": 0.193, "step": 87372 }, { "epoch": 0.1549213238898951, "grad_norm": 0.37109375, "learning_rate": 0.00023645403719011542, "loss": 0.1659, "step": 87374 }, { "epoch": 0.15492487005520492, "grad_norm": 0.400390625, "learning_rate": 0.00023643727125247888, "loss": 0.1829, "step": 87376 }, { "epoch": 0.15492841622051473, "grad_norm": 0.93359375, "learning_rate": 0.0002364205090915643, "loss": 0.2177, "step": 87378 }, { "epoch": 0.15493196238582455, "grad_norm": 0.236328125, "learning_rate": 0.0002364037507074447, "loss": 0.1778, "step": 87380 }, { "epoch": 0.1549355085511344, "grad_norm": 1.1484375, "learning_rate": 0.0002363869961001935, "loss": 0.1915, "step": 87382 }, { "epoch": 0.1549390547164442, "grad_norm": 0.6484375, "learning_rate": 0.00023637024526988398, "loss": 0.1669, "step": 87384 }, { "epoch": 0.15494260088175402, "grad_norm": 0.16015625, "learning_rate": 0.00023635349821658912, "loss": 0.1521, "step": 87386 }, { "epoch": 0.15494614704706383, "grad_norm": 1.3125, "learning_rate": 0.00023633675494038256, "loss": 0.233, "step": 87388 }, { "epoch": 0.15494969321237365, "grad_norm": 0.2138671875, "learning_rate": 0.00023632001544133735, "loss": 0.1708, "step": 87390 }, { "epoch": 0.15495323937768346, "grad_norm": 0.328125, "learning_rate": 0.00023630327971952677, "loss": 0.1976, "step": 87392 }, { "epoch": 0.15495678554299328, "grad_norm": 0.34765625, "learning_rate": 0.00023628654777502377, "loss": 0.2105, "step": 87394 }, { "epoch": 0.1549603317083031, "grad_norm": 4.5625, "learning_rate": 0.00023626981960790186, "loss": 0.2502, "step": 87396 }, { "epoch": 0.1549638778736129, "grad_norm": 0.67578125, "learning_rate": 0.000236253095218234, "loss": 0.1942, "step": 87398 }, { "epoch": 0.15496742403892272, "grad_norm": 1.5078125, "learning_rate": 0.00023623637460609343, "loss": 0.1856, "step": 87400 }, { "epoch": 0.15497097020423253, "grad_norm": 0.609375, "learning_rate": 0.0002362196577715532, "loss": 0.204, "step": 87402 }, { "epoch": 0.15497451636954235, "grad_norm": 0.96484375, "learning_rate": 0.00023620294471468625, "loss": 0.1441, "step": 87404 }, { "epoch": 0.15497806253485216, "grad_norm": 0.3984375, "learning_rate": 0.00023618623543556622, "loss": 0.1812, "step": 87406 }, { "epoch": 0.15498160870016198, "grad_norm": 0.328125, "learning_rate": 0.00023616952993426552, "loss": 0.28, "step": 87408 }, { "epoch": 0.1549851548654718, "grad_norm": 0.306640625, "learning_rate": 0.00023615282821085772, "loss": 0.2212, "step": 87410 }, { "epoch": 0.1549887010307816, "grad_norm": 3.015625, "learning_rate": 0.0002361361302654155, "loss": 0.2097, "step": 87412 }, { "epoch": 0.15499224719609142, "grad_norm": 0.421875, "learning_rate": 0.00023611943609801224, "loss": 0.1773, "step": 87414 }, { "epoch": 0.15499579336140124, "grad_norm": 0.490234375, "learning_rate": 0.0002361027457087206, "loss": 0.1553, "step": 87416 }, { "epoch": 0.15499933952671105, "grad_norm": 0.55078125, "learning_rate": 0.00023608605909761384, "loss": 0.1702, "step": 87418 }, { "epoch": 0.15500288569202086, "grad_norm": 0.58984375, "learning_rate": 0.00023606937626476486, "loss": 0.1724, "step": 87420 }, { "epoch": 0.15500643185733068, "grad_norm": 0.337890625, "learning_rate": 0.00023605269721024643, "loss": 0.1483, "step": 87422 }, { "epoch": 0.1550099780226405, "grad_norm": 0.63671875, "learning_rate": 0.0002360360219341319, "loss": 0.1963, "step": 87424 }, { "epoch": 0.1550135241879503, "grad_norm": 0.50390625, "learning_rate": 0.0002360193504364938, "loss": 0.1568, "step": 87426 }, { "epoch": 0.15501707035326012, "grad_norm": 4.34375, "learning_rate": 0.00023600268271740528, "loss": 0.2362, "step": 87428 }, { "epoch": 0.15502061651856994, "grad_norm": 0.74609375, "learning_rate": 0.00023598601877693895, "loss": 0.2012, "step": 87430 }, { "epoch": 0.15502416268387975, "grad_norm": 0.53125, "learning_rate": 0.0002359693586151682, "loss": 0.2484, "step": 87432 }, { "epoch": 0.15502770884918957, "grad_norm": 2.40625, "learning_rate": 0.00023595270223216537, "loss": 0.2113, "step": 87434 }, { "epoch": 0.15503125501449938, "grad_norm": 0.384765625, "learning_rate": 0.00023593604962800362, "loss": 0.1929, "step": 87436 }, { "epoch": 0.1550348011798092, "grad_norm": 3.8125, "learning_rate": 0.0002359194008027557, "loss": 0.2962, "step": 87438 }, { "epoch": 0.155038347345119, "grad_norm": 1.15625, "learning_rate": 0.00023590275575649438, "loss": 0.5356, "step": 87440 }, { "epoch": 0.15504189351042882, "grad_norm": 0.37890625, "learning_rate": 0.00023588611448929254, "loss": 0.1501, "step": 87442 }, { "epoch": 0.15504543967573864, "grad_norm": 0.5, "learning_rate": 0.00023586947700122271, "loss": 0.2114, "step": 87444 }, { "epoch": 0.15504898584104845, "grad_norm": 0.318359375, "learning_rate": 0.00023585284329235792, "loss": 0.1674, "step": 87446 }, { "epoch": 0.15505253200635827, "grad_norm": 0.86328125, "learning_rate": 0.00023583621336277093, "loss": 0.2233, "step": 87448 }, { "epoch": 0.15505607817166808, "grad_norm": 0.53515625, "learning_rate": 0.00023581958721253436, "loss": 0.1925, "step": 87450 }, { "epoch": 0.1550596243369779, "grad_norm": 0.69140625, "learning_rate": 0.00023580296484172086, "loss": 0.373, "step": 87452 }, { "epoch": 0.1550631705022877, "grad_norm": 1.046875, "learning_rate": 0.0002357863462504032, "loss": 0.1698, "step": 87454 }, { "epoch": 0.15506671666759753, "grad_norm": 0.400390625, "learning_rate": 0.00023576973143865426, "loss": 0.2195, "step": 87456 }, { "epoch": 0.15507026283290734, "grad_norm": 0.1787109375, "learning_rate": 0.00023575312040654626, "loss": 0.1232, "step": 87458 }, { "epoch": 0.15507380899821716, "grad_norm": 2.609375, "learning_rate": 0.00023573651315415218, "loss": 0.2876, "step": 87460 }, { "epoch": 0.15507735516352697, "grad_norm": 0.9296875, "learning_rate": 0.00023571990968154449, "loss": 0.1991, "step": 87462 }, { "epoch": 0.15508090132883678, "grad_norm": 0.37109375, "learning_rate": 0.00023570330998879614, "loss": 0.1623, "step": 87464 }, { "epoch": 0.1550844474941466, "grad_norm": 0.3125, "learning_rate": 0.00023568671407597923, "loss": 0.3154, "step": 87466 }, { "epoch": 0.1550879936594564, "grad_norm": 0.5390625, "learning_rate": 0.00023567012194316664, "loss": 0.2388, "step": 87468 }, { "epoch": 0.15509153982476623, "grad_norm": 0.439453125, "learning_rate": 0.0002356535335904309, "loss": 0.143, "step": 87470 }, { "epoch": 0.15509508599007604, "grad_norm": 0.24609375, "learning_rate": 0.00023563694901784457, "loss": 0.5897, "step": 87472 }, { "epoch": 0.15509863215538588, "grad_norm": 0.322265625, "learning_rate": 0.00023562036822548016, "loss": 0.1966, "step": 87474 }, { "epoch": 0.1551021783206957, "grad_norm": 0.41796875, "learning_rate": 0.00023560379121340995, "loss": 0.1647, "step": 87476 }, { "epoch": 0.15510572448600551, "grad_norm": 0.306640625, "learning_rate": 0.00023558721798170681, "loss": 0.161, "step": 87478 }, { "epoch": 0.15510927065131533, "grad_norm": 0.474609375, "learning_rate": 0.00023557064853044318, "loss": 0.1391, "step": 87480 }, { "epoch": 0.15511281681662514, "grad_norm": 0.65234375, "learning_rate": 0.00023555408285969132, "loss": 0.1659, "step": 87482 }, { "epoch": 0.15511636298193496, "grad_norm": 0.251953125, "learning_rate": 0.00023553752096952364, "loss": 0.1475, "step": 87484 }, { "epoch": 0.15511990914724477, "grad_norm": 0.5859375, "learning_rate": 0.00023552096286001283, "loss": 0.1842, "step": 87486 }, { "epoch": 0.1551234553125546, "grad_norm": 0.384765625, "learning_rate": 0.00023550440853123116, "loss": 0.1748, "step": 87488 }, { "epoch": 0.1551270014778644, "grad_norm": 0.2138671875, "learning_rate": 0.0002354878579832511, "loss": 0.1875, "step": 87490 }, { "epoch": 0.15513054764317422, "grad_norm": 2.078125, "learning_rate": 0.00023547131121614493, "loss": 0.2599, "step": 87492 }, { "epoch": 0.15513409380848403, "grad_norm": 0.66015625, "learning_rate": 0.00023545476822998498, "loss": 0.2061, "step": 87494 }, { "epoch": 0.15513763997379384, "grad_norm": 0.357421875, "learning_rate": 0.00023543822902484396, "loss": 0.1635, "step": 87496 }, { "epoch": 0.15514118613910366, "grad_norm": 0.404296875, "learning_rate": 0.00023542169360079367, "loss": 0.1462, "step": 87498 }, { "epoch": 0.15514473230441347, "grad_norm": 0.26953125, "learning_rate": 0.00023540516195790672, "loss": 0.2301, "step": 87500 }, { "epoch": 0.1551482784697233, "grad_norm": 0.546875, "learning_rate": 0.00023538863409625532, "loss": 0.2036, "step": 87502 }, { "epoch": 0.1551518246350331, "grad_norm": 0.228515625, "learning_rate": 0.0002353721100159119, "loss": 0.1793, "step": 87504 }, { "epoch": 0.15515537080034292, "grad_norm": 0.3359375, "learning_rate": 0.0002353555897169487, "loss": 0.1776, "step": 87506 }, { "epoch": 0.15515891696565273, "grad_norm": 0.4453125, "learning_rate": 0.00023533907319943782, "loss": 0.1715, "step": 87508 }, { "epoch": 0.15516246313096255, "grad_norm": 0.4140625, "learning_rate": 0.00023532256046345166, "loss": 0.1212, "step": 87510 }, { "epoch": 0.15516600929627236, "grad_norm": 0.2353515625, "learning_rate": 0.0002353060515090622, "loss": 0.1562, "step": 87512 }, { "epoch": 0.15516955546158218, "grad_norm": 0.275390625, "learning_rate": 0.00023528954633634207, "loss": 0.1814, "step": 87514 }, { "epoch": 0.155173101626892, "grad_norm": 0.8515625, "learning_rate": 0.00023527304494536292, "loss": 0.1616, "step": 87516 }, { "epoch": 0.1551766477922018, "grad_norm": 0.25390625, "learning_rate": 0.0002352565473361972, "loss": 0.1761, "step": 87518 }, { "epoch": 0.15518019395751162, "grad_norm": 0.48046875, "learning_rate": 0.00023524005350891706, "loss": 0.1593, "step": 87520 }, { "epoch": 0.15518374012282143, "grad_norm": 0.375, "learning_rate": 0.00023522356346359483, "loss": 0.1627, "step": 87522 }, { "epoch": 0.15518728628813125, "grad_norm": 0.25390625, "learning_rate": 0.0002352070772003021, "loss": 0.1665, "step": 87524 }, { "epoch": 0.15519083245344106, "grad_norm": 0.6953125, "learning_rate": 0.0002351905947191115, "loss": 0.1508, "step": 87526 }, { "epoch": 0.15519437861875088, "grad_norm": 0.31640625, "learning_rate": 0.00023517411602009487, "loss": 0.2241, "step": 87528 }, { "epoch": 0.1551979247840607, "grad_norm": 0.255859375, "learning_rate": 0.00023515764110332424, "loss": 0.1533, "step": 87530 }, { "epoch": 0.1552014709493705, "grad_norm": 0.451171875, "learning_rate": 0.0002351411699688718, "loss": 0.1575, "step": 87532 }, { "epoch": 0.15520501711468032, "grad_norm": 0.61328125, "learning_rate": 0.00023512470261680935, "loss": 0.1377, "step": 87534 }, { "epoch": 0.15520856327999014, "grad_norm": 2.015625, "learning_rate": 0.00023510823904720912, "loss": 0.2416, "step": 87536 }, { "epoch": 0.15521210944529995, "grad_norm": 0.279296875, "learning_rate": 0.0002350917792601432, "loss": 0.1733, "step": 87538 }, { "epoch": 0.15521565561060977, "grad_norm": 0.61328125, "learning_rate": 0.0002350753232556834, "loss": 0.1489, "step": 87540 }, { "epoch": 0.15521920177591958, "grad_norm": 0.404296875, "learning_rate": 0.0002350588710339016, "loss": 0.2791, "step": 87542 }, { "epoch": 0.1552227479412294, "grad_norm": 0.353515625, "learning_rate": 0.00023504242259486996, "loss": 0.1459, "step": 87544 }, { "epoch": 0.1552262941065392, "grad_norm": 0.9140625, "learning_rate": 0.0002350259779386604, "loss": 0.1693, "step": 87546 }, { "epoch": 0.15522984027184902, "grad_norm": 0.640625, "learning_rate": 0.00023500953706534475, "loss": 0.1882, "step": 87548 }, { "epoch": 0.15523338643715884, "grad_norm": 0.609375, "learning_rate": 0.00023499309997499494, "loss": 0.2242, "step": 87550 }, { "epoch": 0.15523693260246865, "grad_norm": 0.421875, "learning_rate": 0.0002349766666676827, "loss": 0.2041, "step": 87552 }, { "epoch": 0.15524047876777847, "grad_norm": 0.466796875, "learning_rate": 0.00023496023714348027, "loss": 0.1784, "step": 87554 }, { "epoch": 0.15524402493308828, "grad_norm": 0.7890625, "learning_rate": 0.00023494381140245913, "loss": 0.2873, "step": 87556 }, { "epoch": 0.1552475710983981, "grad_norm": 0.94921875, "learning_rate": 0.0002349273894446914, "loss": 0.1682, "step": 87558 }, { "epoch": 0.1552511172637079, "grad_norm": 0.408203125, "learning_rate": 0.00023491097127024862, "loss": 0.2132, "step": 87560 }, { "epoch": 0.15525466342901773, "grad_norm": 0.404296875, "learning_rate": 0.00023489455687920294, "loss": 0.1878, "step": 87562 }, { "epoch": 0.15525820959432757, "grad_norm": 0.416015625, "learning_rate": 0.0002348781462716258, "loss": 0.2016, "step": 87564 }, { "epoch": 0.15526175575963738, "grad_norm": 0.298828125, "learning_rate": 0.00023486173944758923, "loss": 0.184, "step": 87566 }, { "epoch": 0.1552653019249472, "grad_norm": 0.248046875, "learning_rate": 0.00023484533640716478, "loss": 0.2046, "step": 87568 }, { "epoch": 0.155268848090257, "grad_norm": 0.458984375, "learning_rate": 0.00023482893715042442, "loss": 0.1867, "step": 87570 }, { "epoch": 0.15527239425556683, "grad_norm": 0.44140625, "learning_rate": 0.00023481254167743965, "loss": 0.1686, "step": 87572 }, { "epoch": 0.15527594042087664, "grad_norm": 1.3671875, "learning_rate": 0.00023479614998828224, "loss": 0.2507, "step": 87574 }, { "epoch": 0.15527948658618645, "grad_norm": 0.431640625, "learning_rate": 0.00023477976208302392, "loss": 0.2308, "step": 87576 }, { "epoch": 0.15528303275149627, "grad_norm": 0.59375, "learning_rate": 0.00023476337796173635, "loss": 0.1722, "step": 87578 }, { "epoch": 0.15528657891680608, "grad_norm": 0.66015625, "learning_rate": 0.00023474699762449124, "loss": 0.2017, "step": 87580 }, { "epoch": 0.1552901250821159, "grad_norm": 0.365234375, "learning_rate": 0.00023473062107135995, "loss": 0.1641, "step": 87582 }, { "epoch": 0.1552936712474257, "grad_norm": 0.17578125, "learning_rate": 0.00023471424830241456, "loss": 0.1116, "step": 87584 }, { "epoch": 0.15529721741273553, "grad_norm": 0.294921875, "learning_rate": 0.00023469787931772635, "loss": 0.1822, "step": 87586 }, { "epoch": 0.15530076357804534, "grad_norm": 0.2080078125, "learning_rate": 0.000234681514117367, "loss": 0.1428, "step": 87588 }, { "epoch": 0.15530430974335516, "grad_norm": 0.6875, "learning_rate": 0.000234665152701408, "loss": 0.1727, "step": 87590 }, { "epoch": 0.15530785590866497, "grad_norm": 1.6796875, "learning_rate": 0.00023464879506992099, "loss": 0.2251, "step": 87592 }, { "epoch": 0.15531140207397479, "grad_norm": 0.396484375, "learning_rate": 0.00023463244122297753, "loss": 0.1278, "step": 87594 }, { "epoch": 0.1553149482392846, "grad_norm": 0.5546875, "learning_rate": 0.00023461609116064912, "loss": 0.1726, "step": 87596 }, { "epoch": 0.15531849440459441, "grad_norm": 2.4375, "learning_rate": 0.00023459974488300724, "loss": 0.207, "step": 87598 }, { "epoch": 0.15532204056990423, "grad_norm": 1.5390625, "learning_rate": 0.00023458340239012327, "loss": 0.2949, "step": 87600 }, { "epoch": 0.15532558673521404, "grad_norm": 0.41015625, "learning_rate": 0.00023456706368206898, "loss": 0.224, "step": 87602 }, { "epoch": 0.15532913290052386, "grad_norm": 0.35546875, "learning_rate": 0.00023455072875891564, "loss": 0.1762, "step": 87604 }, { "epoch": 0.15533267906583367, "grad_norm": 0.26171875, "learning_rate": 0.00023453439762073468, "loss": 0.1377, "step": 87606 }, { "epoch": 0.1553362252311435, "grad_norm": 0.427734375, "learning_rate": 0.00023451807026759748, "loss": 0.1969, "step": 87608 }, { "epoch": 0.1553397713964533, "grad_norm": 0.248046875, "learning_rate": 0.00023450174669957549, "loss": 0.1311, "step": 87610 }, { "epoch": 0.15534331756176312, "grad_norm": 0.76953125, "learning_rate": 0.00023448542691674026, "loss": 0.1812, "step": 87612 }, { "epoch": 0.15534686372707293, "grad_norm": 0.33984375, "learning_rate": 0.00023446911091916286, "loss": 0.1399, "step": 87614 }, { "epoch": 0.15535040989238275, "grad_norm": 0.439453125, "learning_rate": 0.0002344527987069149, "loss": 0.2416, "step": 87616 }, { "epoch": 0.15535395605769256, "grad_norm": 13.125, "learning_rate": 0.00023443649028006763, "loss": 0.1848, "step": 87618 }, { "epoch": 0.15535750222300237, "grad_norm": 1.7421875, "learning_rate": 0.0002344201856386924, "loss": 0.3907, "step": 87620 }, { "epoch": 0.1553610483883122, "grad_norm": 0.2431640625, "learning_rate": 0.00023440388478286034, "loss": 0.1611, "step": 87622 }, { "epoch": 0.155364594553622, "grad_norm": 1.65625, "learning_rate": 0.000234387587712643, "loss": 0.4216, "step": 87624 }, { "epoch": 0.15536814071893182, "grad_norm": 0.376953125, "learning_rate": 0.00023437129442811157, "loss": 0.1705, "step": 87626 }, { "epoch": 0.15537168688424163, "grad_norm": 0.255859375, "learning_rate": 0.00023435500492933732, "loss": 0.1917, "step": 87628 }, { "epoch": 0.15537523304955145, "grad_norm": 0.373046875, "learning_rate": 0.0002343387192163914, "loss": 0.1273, "step": 87630 }, { "epoch": 0.15537877921486126, "grad_norm": 0.5390625, "learning_rate": 0.000234322437289345, "loss": 0.2353, "step": 87632 }, { "epoch": 0.15538232538017108, "grad_norm": 0.84765625, "learning_rate": 0.0002343061591482695, "loss": 0.2405, "step": 87634 }, { "epoch": 0.1553858715454809, "grad_norm": 2.78125, "learning_rate": 0.000234289884793236, "loss": 0.4974, "step": 87636 }, { "epoch": 0.1553894177107907, "grad_norm": 0.2265625, "learning_rate": 0.00023427361422431565, "loss": 0.1863, "step": 87638 }, { "epoch": 0.15539296387610052, "grad_norm": 0.24609375, "learning_rate": 0.0002342573474415796, "loss": 0.2104, "step": 87640 }, { "epoch": 0.15539651004141034, "grad_norm": 0.1669921875, "learning_rate": 0.00023424108444509902, "loss": 0.1229, "step": 87642 }, { "epoch": 0.15540005620672015, "grad_norm": 0.33984375, "learning_rate": 0.0002342248252349453, "loss": 0.2085, "step": 87644 }, { "epoch": 0.15540360237202996, "grad_norm": 0.37109375, "learning_rate": 0.00023420856981118906, "loss": 0.1697, "step": 87646 }, { "epoch": 0.15540714853733978, "grad_norm": 0.5703125, "learning_rate": 0.00023419231817390162, "loss": 0.2341, "step": 87648 }, { "epoch": 0.1554106947026496, "grad_norm": 0.2451171875, "learning_rate": 0.00023417607032315399, "loss": 0.1455, "step": 87650 }, { "epoch": 0.1554142408679594, "grad_norm": 0.318359375, "learning_rate": 0.00023415982625901756, "loss": 0.1143, "step": 87652 }, { "epoch": 0.15541778703326925, "grad_norm": 1.703125, "learning_rate": 0.00023414358598156286, "loss": 0.2553, "step": 87654 }, { "epoch": 0.15542133319857906, "grad_norm": 0.90625, "learning_rate": 0.00023412734949086132, "loss": 0.1621, "step": 87656 }, { "epoch": 0.15542487936388888, "grad_norm": 0.53125, "learning_rate": 0.00023411111678698374, "loss": 0.2166, "step": 87658 }, { "epoch": 0.1554284255291987, "grad_norm": 0.3125, "learning_rate": 0.00023409488787000117, "loss": 0.2791, "step": 87660 }, { "epoch": 0.1554319716945085, "grad_norm": 0.216796875, "learning_rate": 0.0002340786627399846, "loss": 0.1097, "step": 87662 }, { "epoch": 0.15543551785981832, "grad_norm": 0.55859375, "learning_rate": 0.00023406244139700488, "loss": 0.1926, "step": 87664 }, { "epoch": 0.15543906402512814, "grad_norm": 0.333984375, "learning_rate": 0.00023404622384113317, "loss": 0.2048, "step": 87666 }, { "epoch": 0.15544261019043795, "grad_norm": 0.208984375, "learning_rate": 0.00023403001007244012, "loss": 0.1812, "step": 87668 }, { "epoch": 0.15544615635574777, "grad_norm": 1.390625, "learning_rate": 0.000234013800090997, "loss": 0.2072, "step": 87670 }, { "epoch": 0.15544970252105758, "grad_norm": 0.35546875, "learning_rate": 0.0002339975938968743, "loss": 0.1627, "step": 87672 }, { "epoch": 0.1554532486863674, "grad_norm": 0.2421875, "learning_rate": 0.00023398139149014322, "loss": 0.1512, "step": 87674 }, { "epoch": 0.1554567948516772, "grad_norm": 0.62109375, "learning_rate": 0.00023396519287087448, "loss": 0.2156, "step": 87676 }, { "epoch": 0.15546034101698702, "grad_norm": 0.9453125, "learning_rate": 0.00023394899803913882, "loss": 0.152, "step": 87678 }, { "epoch": 0.15546388718229684, "grad_norm": 0.353515625, "learning_rate": 0.00023393280699500729, "loss": 0.215, "step": 87680 }, { "epoch": 0.15546743334760665, "grad_norm": 1.125, "learning_rate": 0.00023391661973855046, "loss": 0.158, "step": 87682 }, { "epoch": 0.15547097951291647, "grad_norm": 0.55078125, "learning_rate": 0.00023390043626983942, "loss": 0.1896, "step": 87684 }, { "epoch": 0.15547452567822628, "grad_norm": 0.41015625, "learning_rate": 0.00023388425658894477, "loss": 0.2564, "step": 87686 }, { "epoch": 0.1554780718435361, "grad_norm": 0.2265625, "learning_rate": 0.00023386808069593721, "loss": 0.1323, "step": 87688 }, { "epoch": 0.1554816180088459, "grad_norm": 1.609375, "learning_rate": 0.00023385190859088749, "loss": 0.2622, "step": 87690 }, { "epoch": 0.15548516417415573, "grad_norm": 0.330078125, "learning_rate": 0.00023383574027386646, "loss": 0.2352, "step": 87692 }, { "epoch": 0.15548871033946554, "grad_norm": 2.09375, "learning_rate": 0.00023381957574494488, "loss": 0.309, "step": 87694 }, { "epoch": 0.15549225650477536, "grad_norm": 0.3671875, "learning_rate": 0.00023380341500419324, "loss": 0.5172, "step": 87696 }, { "epoch": 0.15549580267008517, "grad_norm": 0.99609375, "learning_rate": 0.00023378725805168235, "loss": 0.1677, "step": 87698 }, { "epoch": 0.15549934883539498, "grad_norm": 0.3125, "learning_rate": 0.00023377110488748273, "loss": 0.1485, "step": 87700 }, { "epoch": 0.1555028950007048, "grad_norm": 0.51953125, "learning_rate": 0.00023375495551166542, "loss": 0.1602, "step": 87702 }, { "epoch": 0.15550644116601461, "grad_norm": 0.26953125, "learning_rate": 0.00023373880992430044, "loss": 0.1608, "step": 87704 }, { "epoch": 0.15550998733132443, "grad_norm": 0.61328125, "learning_rate": 0.00023372266812545886, "loss": 0.133, "step": 87706 }, { "epoch": 0.15551353349663424, "grad_norm": 0.19921875, "learning_rate": 0.00023370653011521103, "loss": 0.1739, "step": 87708 }, { "epoch": 0.15551707966194406, "grad_norm": 0.60546875, "learning_rate": 0.0002336903958936279, "loss": 0.197, "step": 87710 }, { "epoch": 0.15552062582725387, "grad_norm": 0.1845703125, "learning_rate": 0.00023367426546077946, "loss": 0.1555, "step": 87712 }, { "epoch": 0.1555241719925637, "grad_norm": 1.015625, "learning_rate": 0.0002336581388167367, "loss": 0.1544, "step": 87714 }, { "epoch": 0.1555277181578735, "grad_norm": 0.4140625, "learning_rate": 0.00023364201596157006, "loss": 0.2574, "step": 87716 }, { "epoch": 0.15553126432318332, "grad_norm": 0.67578125, "learning_rate": 0.00023362589689534985, "loss": 0.1708, "step": 87718 }, { "epoch": 0.15553481048849313, "grad_norm": 0.1904296875, "learning_rate": 0.00023360978161814688, "loss": 0.1227, "step": 87720 }, { "epoch": 0.15553835665380294, "grad_norm": 0.5390625, "learning_rate": 0.00023359367013003134, "loss": 0.2178, "step": 87722 }, { "epoch": 0.15554190281911276, "grad_norm": 0.33203125, "learning_rate": 0.00023357756243107386, "loss": 0.2, "step": 87724 }, { "epoch": 0.15554544898442257, "grad_norm": 0.271484375, "learning_rate": 0.00023356145852134486, "loss": 0.1252, "step": 87726 }, { "epoch": 0.1555489951497324, "grad_norm": 1.171875, "learning_rate": 0.00023354535840091478, "loss": 0.2603, "step": 87728 }, { "epoch": 0.1555525413150422, "grad_norm": 0.3203125, "learning_rate": 0.00023352926206985386, "loss": 0.1454, "step": 87730 }, { "epoch": 0.15555608748035202, "grad_norm": 0.55078125, "learning_rate": 0.00023351316952823279, "loss": 0.3388, "step": 87732 }, { "epoch": 0.15555963364566183, "grad_norm": 0.52734375, "learning_rate": 0.00023349708077612184, "loss": 0.2079, "step": 87734 }, { "epoch": 0.15556317981097165, "grad_norm": 0.5859375, "learning_rate": 0.00023348099581359122, "loss": 0.1579, "step": 87736 }, { "epoch": 0.15556672597628146, "grad_norm": 1.1640625, "learning_rate": 0.0002334649146407115, "loss": 0.1952, "step": 87738 }, { "epoch": 0.15557027214159128, "grad_norm": 0.314453125, "learning_rate": 0.0002334488372575527, "loss": 0.1794, "step": 87740 }, { "epoch": 0.1555738183069011, "grad_norm": 0.41015625, "learning_rate": 0.0002334327636641856, "loss": 0.177, "step": 87742 }, { "epoch": 0.1555773644722109, "grad_norm": 0.380859375, "learning_rate": 0.00023341669386068, "loss": 0.2233, "step": 87744 }, { "epoch": 0.15558091063752075, "grad_norm": 1.1640625, "learning_rate": 0.0002334006278471065, "loss": 0.1608, "step": 87746 }, { "epoch": 0.15558445680283056, "grad_norm": 0.7265625, "learning_rate": 0.0002333845656235352, "loss": 0.1698, "step": 87748 }, { "epoch": 0.15558800296814038, "grad_norm": 0.1572265625, "learning_rate": 0.00023336850719003665, "loss": 0.1753, "step": 87750 }, { "epoch": 0.1555915491334502, "grad_norm": 0.4765625, "learning_rate": 0.00023335245254668061, "loss": 0.3937, "step": 87752 }, { "epoch": 0.15559509529876, "grad_norm": 0.625, "learning_rate": 0.00023333640169353762, "loss": 0.1935, "step": 87754 }, { "epoch": 0.15559864146406982, "grad_norm": 1.0625, "learning_rate": 0.0002333203546306778, "loss": 0.2812, "step": 87756 }, { "epoch": 0.15560218762937963, "grad_norm": 0.6171875, "learning_rate": 0.0002333043113581712, "loss": 0.174, "step": 87758 }, { "epoch": 0.15560573379468945, "grad_norm": 0.41015625, "learning_rate": 0.0002332882718760884, "loss": 0.2191, "step": 87760 }, { "epoch": 0.15560927995999926, "grad_norm": 0.1865234375, "learning_rate": 0.000233272236184499, "loss": 0.1889, "step": 87762 }, { "epoch": 0.15561282612530908, "grad_norm": 0.484375, "learning_rate": 0.00023325620428347337, "loss": 0.2228, "step": 87764 }, { "epoch": 0.1556163722906189, "grad_norm": 0.294921875, "learning_rate": 0.0002332401761730817, "loss": 0.2002, "step": 87766 }, { "epoch": 0.1556199184559287, "grad_norm": 0.78515625, "learning_rate": 0.0002332241518533942, "loss": 0.4486, "step": 87768 }, { "epoch": 0.15562346462123852, "grad_norm": 0.59375, "learning_rate": 0.00023320813132448055, "loss": 0.1606, "step": 87770 }, { "epoch": 0.15562701078654834, "grad_norm": 0.828125, "learning_rate": 0.0002331921145864111, "loss": 0.2378, "step": 87772 }, { "epoch": 0.15563055695185815, "grad_norm": 0.392578125, "learning_rate": 0.00023317610163925585, "loss": 0.2285, "step": 87774 }, { "epoch": 0.15563410311716797, "grad_norm": 0.47265625, "learning_rate": 0.0002331600924830848, "loss": 0.2551, "step": 87776 }, { "epoch": 0.15563764928247778, "grad_norm": 0.271484375, "learning_rate": 0.00023314408711796805, "loss": 0.2006, "step": 87778 }, { "epoch": 0.1556411954477876, "grad_norm": 0.68359375, "learning_rate": 0.00023312808554397542, "loss": 0.1783, "step": 87780 }, { "epoch": 0.1556447416130974, "grad_norm": 1.7421875, "learning_rate": 0.00023311208776117715, "loss": 0.3544, "step": 87782 }, { "epoch": 0.15564828777840722, "grad_norm": 0.2890625, "learning_rate": 0.00023309609376964298, "loss": 0.173, "step": 87784 }, { "epoch": 0.15565183394371704, "grad_norm": 0.275390625, "learning_rate": 0.00023308010356944294, "loss": 0.2357, "step": 87786 }, { "epoch": 0.15565538010902685, "grad_norm": 4.6875, "learning_rate": 0.00023306411716064688, "loss": 0.1965, "step": 87788 }, { "epoch": 0.15565892627433667, "grad_norm": 0.435546875, "learning_rate": 0.00023304813454332488, "loss": 0.1612, "step": 87790 }, { "epoch": 0.15566247243964648, "grad_norm": 0.9765625, "learning_rate": 0.00023303215571754686, "loss": 0.1613, "step": 87792 }, { "epoch": 0.1556660186049563, "grad_norm": 0.69140625, "learning_rate": 0.00023301618068338243, "loss": 0.2771, "step": 87794 }, { "epoch": 0.1556695647702661, "grad_norm": 0.43359375, "learning_rate": 0.00023300020944090169, "loss": 0.2893, "step": 87796 }, { "epoch": 0.15567311093557593, "grad_norm": 0.50390625, "learning_rate": 0.00023298424199017436, "loss": 0.1819, "step": 87798 }, { "epoch": 0.15567665710088574, "grad_norm": 0.2275390625, "learning_rate": 0.00023296827833127048, "loss": 0.2316, "step": 87800 }, { "epoch": 0.15568020326619555, "grad_norm": 1.4140625, "learning_rate": 0.00023295231846425952, "loss": 0.1938, "step": 87802 }, { "epoch": 0.15568374943150537, "grad_norm": 0.83203125, "learning_rate": 0.0002329363623892117, "loss": 0.3206, "step": 87804 }, { "epoch": 0.15568729559681518, "grad_norm": 0.4140625, "learning_rate": 0.00023292041010619647, "loss": 0.1615, "step": 87806 }, { "epoch": 0.155690841762125, "grad_norm": 0.50390625, "learning_rate": 0.00023290446161528365, "loss": 0.179, "step": 87808 }, { "epoch": 0.1556943879274348, "grad_norm": 0.54296875, "learning_rate": 0.00023288851691654312, "loss": 0.1979, "step": 87810 }, { "epoch": 0.15569793409274463, "grad_norm": 0.31640625, "learning_rate": 0.00023287257601004442, "loss": 0.1661, "step": 87812 }, { "epoch": 0.15570148025805444, "grad_norm": 2.484375, "learning_rate": 0.00023285663889585756, "loss": 0.1772, "step": 87814 }, { "epoch": 0.15570502642336426, "grad_norm": 0.1552734375, "learning_rate": 0.0002328407055740519, "loss": 0.1163, "step": 87816 }, { "epoch": 0.15570857258867407, "grad_norm": 0.97265625, "learning_rate": 0.00023282477604469744, "loss": 0.3772, "step": 87818 }, { "epoch": 0.15571211875398389, "grad_norm": 0.19140625, "learning_rate": 0.00023280885030786352, "loss": 0.1476, "step": 87820 }, { "epoch": 0.1557156649192937, "grad_norm": 2.296875, "learning_rate": 0.00023279292836362009, "loss": 0.225, "step": 87822 }, { "epoch": 0.15571921108460351, "grad_norm": 0.6640625, "learning_rate": 0.00023277701021203663, "loss": 0.1948, "step": 87824 }, { "epoch": 0.15572275724991333, "grad_norm": 2.28125, "learning_rate": 0.0002327610958531829, "loss": 0.3772, "step": 87826 }, { "epoch": 0.15572630341522314, "grad_norm": 0.41015625, "learning_rate": 0.00023274518528712825, "loss": 0.2362, "step": 87828 }, { "epoch": 0.15572984958053296, "grad_norm": 0.921875, "learning_rate": 0.0002327292785139423, "loss": 0.2368, "step": 87830 }, { "epoch": 0.15573339574584277, "grad_norm": 0.30078125, "learning_rate": 0.00023271337553369505, "loss": 0.2566, "step": 87832 }, { "epoch": 0.1557369419111526, "grad_norm": 0.3203125, "learning_rate": 0.0002326974763464554, "loss": 0.1624, "step": 87834 }, { "epoch": 0.15574048807646243, "grad_norm": 0.2197265625, "learning_rate": 0.00023268158095229326, "loss": 0.1706, "step": 87836 }, { "epoch": 0.15574403424177224, "grad_norm": 0.50390625, "learning_rate": 0.00023266568935127806, "loss": 0.1727, "step": 87838 }, { "epoch": 0.15574758040708206, "grad_norm": 0.349609375, "learning_rate": 0.00023264980154347949, "loss": 0.1635, "step": 87840 }, { "epoch": 0.15575112657239187, "grad_norm": 0.34765625, "learning_rate": 0.00023263391752896678, "loss": 0.1696, "step": 87842 }, { "epoch": 0.1557546727377017, "grad_norm": 0.2255859375, "learning_rate": 0.00023261803730780943, "loss": 0.1745, "step": 87844 }, { "epoch": 0.1557582189030115, "grad_norm": 0.435546875, "learning_rate": 0.00023260216088007702, "loss": 0.1704, "step": 87846 }, { "epoch": 0.15576176506832132, "grad_norm": 0.310546875, "learning_rate": 0.00023258628824583886, "loss": 0.1922, "step": 87848 }, { "epoch": 0.15576531123363113, "grad_norm": 0.30078125, "learning_rate": 0.00023257041940516455, "loss": 0.1861, "step": 87850 }, { "epoch": 0.15576885739894095, "grad_norm": 0.41015625, "learning_rate": 0.00023255455435812318, "loss": 0.1635, "step": 87852 }, { "epoch": 0.15577240356425076, "grad_norm": 0.46875, "learning_rate": 0.00023253869310478444, "loss": 0.146, "step": 87854 }, { "epoch": 0.15577594972956058, "grad_norm": 2.109375, "learning_rate": 0.00023252283564521747, "loss": 0.4094, "step": 87856 }, { "epoch": 0.1557794958948704, "grad_norm": 0.5546875, "learning_rate": 0.00023250698197949197, "loss": 0.2163, "step": 87858 }, { "epoch": 0.1557830420601802, "grad_norm": 0.31640625, "learning_rate": 0.00023249113210767675, "loss": 0.1693, "step": 87860 }, { "epoch": 0.15578658822549002, "grad_norm": 0.1953125, "learning_rate": 0.00023247528602984155, "loss": 0.1476, "step": 87862 }, { "epoch": 0.15579013439079983, "grad_norm": 0.42578125, "learning_rate": 0.00023245944374605554, "loss": 0.1482, "step": 87864 }, { "epoch": 0.15579368055610965, "grad_norm": 0.25390625, "learning_rate": 0.00023244360525638793, "loss": 0.3259, "step": 87866 }, { "epoch": 0.15579722672141946, "grad_norm": 0.271484375, "learning_rate": 0.0002324277705609081, "loss": 0.2392, "step": 87868 }, { "epoch": 0.15580077288672928, "grad_norm": 0.435546875, "learning_rate": 0.00023241193965968518, "loss": 0.1954, "step": 87870 }, { "epoch": 0.1558043190520391, "grad_norm": 0.259765625, "learning_rate": 0.0002323961125527885, "loss": 0.1257, "step": 87872 }, { "epoch": 0.1558078652173489, "grad_norm": 0.392578125, "learning_rate": 0.00023238028924028734, "loss": 0.1793, "step": 87874 }, { "epoch": 0.15581141138265872, "grad_norm": 0.81640625, "learning_rate": 0.0002323644697222508, "loss": 0.2045, "step": 87876 }, { "epoch": 0.15581495754796854, "grad_norm": 0.640625, "learning_rate": 0.00023234865399874796, "loss": 0.1596, "step": 87878 }, { "epoch": 0.15581850371327835, "grad_norm": 1.2578125, "learning_rate": 0.00023233284206984826, "loss": 0.1838, "step": 87880 }, { "epoch": 0.15582204987858816, "grad_norm": 0.26171875, "learning_rate": 0.0002323170339356207, "loss": 0.1635, "step": 87882 }, { "epoch": 0.15582559604389798, "grad_norm": 0.26953125, "learning_rate": 0.0002323012295961344, "loss": 0.1468, "step": 87884 }, { "epoch": 0.1558291422092078, "grad_norm": 0.498046875, "learning_rate": 0.00023228542905145844, "loss": 0.1639, "step": 87886 }, { "epoch": 0.1558326883745176, "grad_norm": 0.384765625, "learning_rate": 0.00023226963230166202, "loss": 0.1554, "step": 87888 }, { "epoch": 0.15583623453982742, "grad_norm": 0.380859375, "learning_rate": 0.00023225383934681433, "loss": 0.2074, "step": 87890 }, { "epoch": 0.15583978070513724, "grad_norm": 0.90625, "learning_rate": 0.00023223805018698405, "loss": 0.214, "step": 87892 }, { "epoch": 0.15584332687044705, "grad_norm": 0.451171875, "learning_rate": 0.00023222226482224063, "loss": 0.14, "step": 87894 }, { "epoch": 0.15584687303575687, "grad_norm": 0.349609375, "learning_rate": 0.00023220648325265287, "loss": 0.1574, "step": 87896 }, { "epoch": 0.15585041920106668, "grad_norm": 0.314453125, "learning_rate": 0.00023219070547829006, "loss": 0.1685, "step": 87898 }, { "epoch": 0.1558539653663765, "grad_norm": 0.396484375, "learning_rate": 0.0002321749314992208, "loss": 0.2067, "step": 87900 }, { "epoch": 0.1558575115316863, "grad_norm": 0.96875, "learning_rate": 0.0002321591613155145, "loss": 0.219, "step": 87902 }, { "epoch": 0.15586105769699612, "grad_norm": 0.3828125, "learning_rate": 0.00023214339492723976, "loss": 0.2167, "step": 87904 }, { "epoch": 0.15586460386230594, "grad_norm": 0.42578125, "learning_rate": 0.00023212763233446584, "loss": 0.1707, "step": 87906 }, { "epoch": 0.15586815002761575, "grad_norm": 0.8671875, "learning_rate": 0.00023211187353726147, "loss": 0.1447, "step": 87908 }, { "epoch": 0.15587169619292557, "grad_norm": 1.0078125, "learning_rate": 0.00023209611853569563, "loss": 0.2377, "step": 87910 }, { "epoch": 0.15587524235823538, "grad_norm": 0.263671875, "learning_rate": 0.0002320803673298372, "loss": 0.1675, "step": 87912 }, { "epoch": 0.1558787885235452, "grad_norm": 0.54296875, "learning_rate": 0.00023206461991975516, "loss": 0.1804, "step": 87914 }, { "epoch": 0.155882334688855, "grad_norm": 0.44921875, "learning_rate": 0.0002320488763055183, "loss": 0.1791, "step": 87916 }, { "epoch": 0.15588588085416483, "grad_norm": 0.94921875, "learning_rate": 0.00023203313648719539, "loss": 0.2058, "step": 87918 }, { "epoch": 0.15588942701947464, "grad_norm": 0.431640625, "learning_rate": 0.00023201740046485542, "loss": 0.1889, "step": 87920 }, { "epoch": 0.15589297318478446, "grad_norm": 0.451171875, "learning_rate": 0.00023200166823856723, "loss": 0.16, "step": 87922 }, { "epoch": 0.15589651935009427, "grad_norm": 0.341796875, "learning_rate": 0.00023198593980839951, "loss": 0.1717, "step": 87924 }, { "epoch": 0.1559000655154041, "grad_norm": 0.41796875, "learning_rate": 0.0002319702151744211, "loss": 0.1441, "step": 87926 }, { "epoch": 0.15590361168071393, "grad_norm": 0.357421875, "learning_rate": 0.00023195449433670068, "loss": 0.1983, "step": 87928 }, { "epoch": 0.15590715784602374, "grad_norm": 0.259765625, "learning_rate": 0.00023193877729530714, "loss": 0.1946, "step": 87930 }, { "epoch": 0.15591070401133356, "grad_norm": 0.40234375, "learning_rate": 0.00023192306405030917, "loss": 0.1808, "step": 87932 }, { "epoch": 0.15591425017664337, "grad_norm": 1.671875, "learning_rate": 0.00023190735460177546, "loss": 0.1975, "step": 87934 }, { "epoch": 0.15591779634195319, "grad_norm": 0.376953125, "learning_rate": 0.00023189164894977463, "loss": 0.1812, "step": 87936 }, { "epoch": 0.155921342507263, "grad_norm": 0.26953125, "learning_rate": 0.00023187594709437562, "loss": 0.1597, "step": 87938 }, { "epoch": 0.15592488867257281, "grad_norm": 0.60546875, "learning_rate": 0.0002318602490356469, "loss": 0.1877, "step": 87940 }, { "epoch": 0.15592843483788263, "grad_norm": 0.400390625, "learning_rate": 0.0002318445547736572, "loss": 0.158, "step": 87942 }, { "epoch": 0.15593198100319244, "grad_norm": 0.439453125, "learning_rate": 0.0002318288643084752, "loss": 0.1874, "step": 87944 }, { "epoch": 0.15593552716850226, "grad_norm": 0.1640625, "learning_rate": 0.00023181317764016927, "loss": 0.0927, "step": 87946 }, { "epoch": 0.15593907333381207, "grad_norm": 1.2734375, "learning_rate": 0.0002317974947688084, "loss": 0.336, "step": 87948 }, { "epoch": 0.1559426194991219, "grad_norm": 0.435546875, "learning_rate": 0.00023178181569446078, "loss": 0.1665, "step": 87950 }, { "epoch": 0.1559461656644317, "grad_norm": 0.2421875, "learning_rate": 0.00023176614041719534, "loss": 0.142, "step": 87952 }, { "epoch": 0.15594971182974152, "grad_norm": 0.2734375, "learning_rate": 0.00023175046893708024, "loss": 0.2111, "step": 87954 }, { "epoch": 0.15595325799505133, "grad_norm": 0.173828125, "learning_rate": 0.0002317348012541846, "loss": 0.1828, "step": 87956 }, { "epoch": 0.15595680416036115, "grad_norm": 0.251953125, "learning_rate": 0.0002317191373685763, "loss": 0.3833, "step": 87958 }, { "epoch": 0.15596035032567096, "grad_norm": 0.53125, "learning_rate": 0.00023170347728032424, "loss": 0.1868, "step": 87960 }, { "epoch": 0.15596389649098077, "grad_norm": 0.40234375, "learning_rate": 0.00023168782098949688, "loss": 0.1671, "step": 87962 }, { "epoch": 0.1559674426562906, "grad_norm": 0.412109375, "learning_rate": 0.00023167216849616248, "loss": 0.1641, "step": 87964 }, { "epoch": 0.1559709888216004, "grad_norm": 0.337890625, "learning_rate": 0.00023165651980038972, "loss": 0.1639, "step": 87966 }, { "epoch": 0.15597453498691022, "grad_norm": 0.6484375, "learning_rate": 0.0002316408749022468, "loss": 0.2174, "step": 87968 }, { "epoch": 0.15597808115222003, "grad_norm": 0.267578125, "learning_rate": 0.00023162523380180247, "loss": 0.1694, "step": 87970 }, { "epoch": 0.15598162731752985, "grad_norm": 0.353515625, "learning_rate": 0.00023160959649912494, "loss": 0.1948, "step": 87972 }, { "epoch": 0.15598517348283966, "grad_norm": 0.53125, "learning_rate": 0.00023159396299428256, "loss": 0.2265, "step": 87974 }, { "epoch": 0.15598871964814948, "grad_norm": 4.53125, "learning_rate": 0.0002315783332873438, "loss": 0.2256, "step": 87976 }, { "epoch": 0.1559922658134593, "grad_norm": 0.451171875, "learning_rate": 0.0002315627073783768, "loss": 0.1628, "step": 87978 }, { "epoch": 0.1559958119787691, "grad_norm": 0.1728515625, "learning_rate": 0.00023154708526745024, "loss": 0.1361, "step": 87980 }, { "epoch": 0.15599935814407892, "grad_norm": 0.375, "learning_rate": 0.0002315314669546323, "loss": 0.1659, "step": 87982 }, { "epoch": 0.15600290430938873, "grad_norm": 0.6796875, "learning_rate": 0.00023151585243999113, "loss": 0.1699, "step": 87984 }, { "epoch": 0.15600645047469855, "grad_norm": 0.384765625, "learning_rate": 0.00023150024172359518, "loss": 0.1556, "step": 87986 }, { "epoch": 0.15600999664000836, "grad_norm": 0.28515625, "learning_rate": 0.0002314846348055128, "loss": 0.2017, "step": 87988 }, { "epoch": 0.15601354280531818, "grad_norm": 0.94921875, "learning_rate": 0.000231469031685812, "loss": 0.2655, "step": 87990 }, { "epoch": 0.156017088970628, "grad_norm": 0.396484375, "learning_rate": 0.00023145343236456118, "loss": 0.4095, "step": 87992 }, { "epoch": 0.1560206351359378, "grad_norm": 0.34375, "learning_rate": 0.00023143783684182855, "loss": 0.1442, "step": 87994 }, { "epoch": 0.15602418130124762, "grad_norm": 1.53125, "learning_rate": 0.0002314222451176824, "loss": 0.282, "step": 87996 }, { "epoch": 0.15602772746655744, "grad_norm": 0.408203125, "learning_rate": 0.0002314066571921907, "loss": 0.187, "step": 87998 }, { "epoch": 0.15603127363186725, "grad_norm": 1.1328125, "learning_rate": 0.00023139107306542172, "loss": 0.1937, "step": 88000 }, { "epoch": 0.15603481979717707, "grad_norm": 0.84765625, "learning_rate": 0.00023137549273744367, "loss": 0.2624, "step": 88002 }, { "epoch": 0.15603836596248688, "grad_norm": 0.51953125, "learning_rate": 0.00023135991620832464, "loss": 0.1777, "step": 88004 }, { "epoch": 0.1560419121277967, "grad_norm": 0.2216796875, "learning_rate": 0.00023134434347813284, "loss": 0.1349, "step": 88006 }, { "epoch": 0.1560454582931065, "grad_norm": 0.47265625, "learning_rate": 0.0002313287745469362, "loss": 0.201, "step": 88008 }, { "epoch": 0.15604900445841632, "grad_norm": 0.28515625, "learning_rate": 0.00023131320941480302, "loss": 0.1501, "step": 88010 }, { "epoch": 0.15605255062372614, "grad_norm": 0.361328125, "learning_rate": 0.00023129764808180124, "loss": 0.1731, "step": 88012 }, { "epoch": 0.15605609678903595, "grad_norm": 4.875, "learning_rate": 0.000231282090547999, "loss": 0.2935, "step": 88014 }, { "epoch": 0.15605964295434577, "grad_norm": 0.369140625, "learning_rate": 0.0002312665368134642, "loss": 0.142, "step": 88016 }, { "epoch": 0.1560631891196556, "grad_norm": 0.384765625, "learning_rate": 0.0002312509868782649, "loss": 0.1423, "step": 88018 }, { "epoch": 0.15606673528496542, "grad_norm": 0.361328125, "learning_rate": 0.0002312354407424692, "loss": 0.1598, "step": 88020 }, { "epoch": 0.15607028145027524, "grad_norm": 0.158203125, "learning_rate": 0.0002312198984061452, "loss": 0.1581, "step": 88022 }, { "epoch": 0.15607382761558505, "grad_norm": 0.625, "learning_rate": 0.00023120435986936062, "loss": 0.1615, "step": 88024 }, { "epoch": 0.15607737378089487, "grad_norm": 1.375, "learning_rate": 0.00023118882513218337, "loss": 0.1571, "step": 88026 }, { "epoch": 0.15608091994620468, "grad_norm": 0.2119140625, "learning_rate": 0.0002311732941946816, "loss": 0.1418, "step": 88028 }, { "epoch": 0.1560844661115145, "grad_norm": 0.28515625, "learning_rate": 0.00023115776705692325, "loss": 0.1583, "step": 88030 }, { "epoch": 0.1560880122768243, "grad_norm": 0.5546875, "learning_rate": 0.00023114224371897612, "loss": 0.2139, "step": 88032 }, { "epoch": 0.15609155844213413, "grad_norm": 0.70703125, "learning_rate": 0.00023112672418090812, "loss": 0.1815, "step": 88034 }, { "epoch": 0.15609510460744394, "grad_norm": 0.42578125, "learning_rate": 0.00023111120844278703, "loss": 0.1842, "step": 88036 }, { "epoch": 0.15609865077275376, "grad_norm": 0.294921875, "learning_rate": 0.000231095696504681, "loss": 0.1756, "step": 88038 }, { "epoch": 0.15610219693806357, "grad_norm": 3.625, "learning_rate": 0.00023108018836665744, "loss": 0.3138, "step": 88040 }, { "epoch": 0.15610574310337338, "grad_norm": 0.69921875, "learning_rate": 0.0002310646840287846, "loss": 0.1732, "step": 88042 }, { "epoch": 0.1561092892686832, "grad_norm": 0.203125, "learning_rate": 0.00023104918349112997, "loss": 0.1534, "step": 88044 }, { "epoch": 0.156112835433993, "grad_norm": 0.5234375, "learning_rate": 0.00023103368675376164, "loss": 0.3385, "step": 88046 }, { "epoch": 0.15611638159930283, "grad_norm": 0.28125, "learning_rate": 0.00023101819381674693, "loss": 0.1721, "step": 88048 }, { "epoch": 0.15611992776461264, "grad_norm": 0.400390625, "learning_rate": 0.0002310027046801541, "loss": 0.2182, "step": 88050 }, { "epoch": 0.15612347392992246, "grad_norm": 0.365234375, "learning_rate": 0.0002309872193440506, "loss": 0.2822, "step": 88052 }, { "epoch": 0.15612702009523227, "grad_norm": 0.33984375, "learning_rate": 0.00023097173780850427, "loss": 0.1378, "step": 88054 }, { "epoch": 0.1561305662605421, "grad_norm": 0.30859375, "learning_rate": 0.00023095626007358275, "loss": 0.1294, "step": 88056 }, { "epoch": 0.1561341124258519, "grad_norm": 0.310546875, "learning_rate": 0.0002309407861393536, "loss": 0.2696, "step": 88058 }, { "epoch": 0.15613765859116172, "grad_norm": 0.482421875, "learning_rate": 0.00023092531600588472, "loss": 0.2474, "step": 88060 }, { "epoch": 0.15614120475647153, "grad_norm": 0.232421875, "learning_rate": 0.00023090984967324383, "loss": 0.1423, "step": 88062 }, { "epoch": 0.15614475092178134, "grad_norm": 0.458984375, "learning_rate": 0.00023089438714149832, "loss": 0.1697, "step": 88064 }, { "epoch": 0.15614829708709116, "grad_norm": 0.451171875, "learning_rate": 0.00023087892841071577, "loss": 0.2013, "step": 88066 }, { "epoch": 0.15615184325240097, "grad_norm": 0.302734375, "learning_rate": 0.0002308634734809642, "loss": 0.1405, "step": 88068 }, { "epoch": 0.1561553894177108, "grad_norm": 1.328125, "learning_rate": 0.00023084802235231086, "loss": 0.1702, "step": 88070 }, { "epoch": 0.1561589355830206, "grad_norm": 0.6328125, "learning_rate": 0.0002308325750248234, "loss": 0.2111, "step": 88072 }, { "epoch": 0.15616248174833042, "grad_norm": 0.296875, "learning_rate": 0.00023081713149856938, "loss": 0.1328, "step": 88074 }, { "epoch": 0.15616602791364023, "grad_norm": 0.28515625, "learning_rate": 0.00023080169177361627, "loss": 0.1407, "step": 88076 }, { "epoch": 0.15616957407895005, "grad_norm": 0.2470703125, "learning_rate": 0.00023078625585003183, "loss": 0.1874, "step": 88078 }, { "epoch": 0.15617312024425986, "grad_norm": 0.65234375, "learning_rate": 0.00023077082372788328, "loss": 0.1935, "step": 88080 }, { "epoch": 0.15617666640956968, "grad_norm": 1.65625, "learning_rate": 0.00023075539540723822, "loss": 0.2021, "step": 88082 }, { "epoch": 0.1561802125748795, "grad_norm": 0.26171875, "learning_rate": 0.00023073997088816412, "loss": 0.1753, "step": 88084 }, { "epoch": 0.1561837587401893, "grad_norm": 0.29296875, "learning_rate": 0.00023072455017072863, "loss": 0.207, "step": 88086 }, { "epoch": 0.15618730490549912, "grad_norm": 0.5390625, "learning_rate": 0.0002307091332549989, "loss": 0.1678, "step": 88088 }, { "epoch": 0.15619085107080893, "grad_norm": 0.306640625, "learning_rate": 0.00023069372014104255, "loss": 0.1928, "step": 88090 }, { "epoch": 0.15619439723611875, "grad_norm": 0.61328125, "learning_rate": 0.00023067831082892684, "loss": 0.193, "step": 88092 }, { "epoch": 0.15619794340142856, "grad_norm": 0.244140625, "learning_rate": 0.00023066290531871926, "loss": 0.1653, "step": 88094 }, { "epoch": 0.15620148956673838, "grad_norm": 0.416015625, "learning_rate": 0.00023064750361048725, "loss": 0.1769, "step": 88096 }, { "epoch": 0.1562050357320482, "grad_norm": 0.703125, "learning_rate": 0.0002306321057042979, "loss": 0.1913, "step": 88098 }, { "epoch": 0.156208581897358, "grad_norm": 0.76171875, "learning_rate": 0.0002306167116002188, "loss": 0.212, "step": 88100 }, { "epoch": 0.15621212806266782, "grad_norm": 1.5546875, "learning_rate": 0.0002306013212983172, "loss": 0.1791, "step": 88102 }, { "epoch": 0.15621567422797764, "grad_norm": 0.55859375, "learning_rate": 0.00023058593479866052, "loss": 0.1774, "step": 88104 }, { "epoch": 0.15621922039328745, "grad_norm": 0.65234375, "learning_rate": 0.0002305705521013158, "loss": 0.2315, "step": 88106 }, { "epoch": 0.1562227665585973, "grad_norm": 0.23046875, "learning_rate": 0.00023055517320635062, "loss": 0.1429, "step": 88108 }, { "epoch": 0.1562263127239071, "grad_norm": 0.1708984375, "learning_rate": 0.00023053979811383198, "loss": 0.184, "step": 88110 }, { "epoch": 0.15622985888921692, "grad_norm": 0.291015625, "learning_rate": 0.00023052442682382725, "loss": 0.207, "step": 88112 }, { "epoch": 0.15623340505452674, "grad_norm": 0.275390625, "learning_rate": 0.00023050905933640365, "loss": 0.1891, "step": 88114 }, { "epoch": 0.15623695121983655, "grad_norm": 0.435546875, "learning_rate": 0.00023049369565162825, "loss": 0.1451, "step": 88116 }, { "epoch": 0.15624049738514637, "grad_norm": 0.271484375, "learning_rate": 0.0002304783357695684, "loss": 0.2134, "step": 88118 }, { "epoch": 0.15624404355045618, "grad_norm": 0.3671875, "learning_rate": 0.00023046297969029134, "loss": 0.1393, "step": 88120 }, { "epoch": 0.156247589715766, "grad_norm": 1.0, "learning_rate": 0.000230447627413864, "loss": 0.1721, "step": 88122 }, { "epoch": 0.1562511358810758, "grad_norm": 0.32421875, "learning_rate": 0.00023043227894035357, "loss": 0.1546, "step": 88124 }, { "epoch": 0.15625468204638562, "grad_norm": 0.3046875, "learning_rate": 0.00023041693426982743, "loss": 0.201, "step": 88126 }, { "epoch": 0.15625822821169544, "grad_norm": 0.416015625, "learning_rate": 0.0002304015934023524, "loss": 0.2369, "step": 88128 }, { "epoch": 0.15626177437700525, "grad_norm": 0.4296875, "learning_rate": 0.00023038625633799576, "loss": 0.1637, "step": 88130 }, { "epoch": 0.15626532054231507, "grad_norm": 0.44140625, "learning_rate": 0.0002303709230768245, "loss": 0.178, "step": 88132 }, { "epoch": 0.15626886670762488, "grad_norm": 0.2734375, "learning_rate": 0.00023035559361890556, "loss": 0.1502, "step": 88134 }, { "epoch": 0.1562724128729347, "grad_norm": 0.6171875, "learning_rate": 0.00023034026796430628, "loss": 0.2078, "step": 88136 }, { "epoch": 0.1562759590382445, "grad_norm": 0.875, "learning_rate": 0.0002303249461130933, "loss": 0.1417, "step": 88138 }, { "epoch": 0.15627950520355433, "grad_norm": 0.59765625, "learning_rate": 0.00023030962806533397, "loss": 0.2047, "step": 88140 }, { "epoch": 0.15628305136886414, "grad_norm": 0.3828125, "learning_rate": 0.00023029431382109506, "loss": 0.1253, "step": 88142 }, { "epoch": 0.15628659753417395, "grad_norm": 1.4375, "learning_rate": 0.00023027900338044386, "loss": 0.3091, "step": 88144 }, { "epoch": 0.15629014369948377, "grad_norm": 0.515625, "learning_rate": 0.00023026369674344687, "loss": 0.2143, "step": 88146 }, { "epoch": 0.15629368986479358, "grad_norm": 0.384765625, "learning_rate": 0.00023024839391017134, "loss": 0.156, "step": 88148 }, { "epoch": 0.1562972360301034, "grad_norm": 2.15625, "learning_rate": 0.00023023309488068414, "loss": 0.29, "step": 88150 }, { "epoch": 0.1563007821954132, "grad_norm": 0.259765625, "learning_rate": 0.00023021779965505223, "loss": 0.2078, "step": 88152 }, { "epoch": 0.15630432836072303, "grad_norm": 0.18359375, "learning_rate": 0.00023020250823334236, "loss": 0.1485, "step": 88154 }, { "epoch": 0.15630787452603284, "grad_norm": 0.3203125, "learning_rate": 0.0002301872206156213, "loss": 0.1859, "step": 88156 }, { "epoch": 0.15631142069134266, "grad_norm": 0.2734375, "learning_rate": 0.00023017193680195634, "loss": 0.196, "step": 88158 }, { "epoch": 0.15631496685665247, "grad_norm": 0.59375, "learning_rate": 0.000230156656792414, "loss": 0.453, "step": 88160 }, { "epoch": 0.15631851302196229, "grad_norm": 1.21875, "learning_rate": 0.0002301413805870612, "loss": 0.1713, "step": 88162 }, { "epoch": 0.1563220591872721, "grad_norm": 0.2333984375, "learning_rate": 0.00023012610818596473, "loss": 0.2907, "step": 88164 }, { "epoch": 0.15632560535258191, "grad_norm": 0.494140625, "learning_rate": 0.0002301108395891913, "loss": 0.159, "step": 88166 }, { "epoch": 0.15632915151789173, "grad_norm": 0.55859375, "learning_rate": 0.00023009557479680787, "loss": 0.2715, "step": 88168 }, { "epoch": 0.15633269768320154, "grad_norm": 0.546875, "learning_rate": 0.000230080313808881, "loss": 0.2005, "step": 88170 }, { "epoch": 0.15633624384851136, "grad_norm": 0.279296875, "learning_rate": 0.00023006505662547755, "loss": 0.1754, "step": 88172 }, { "epoch": 0.15633979001382117, "grad_norm": 0.259765625, "learning_rate": 0.00023004980324666422, "loss": 0.1921, "step": 88174 }, { "epoch": 0.156343336179131, "grad_norm": 0.212890625, "learning_rate": 0.00023003455367250777, "loss": 0.2202, "step": 88176 }, { "epoch": 0.1563468823444408, "grad_norm": 3.78125, "learning_rate": 0.00023001930790307487, "loss": 0.2747, "step": 88178 }, { "epoch": 0.15635042850975062, "grad_norm": 0.43359375, "learning_rate": 0.00023000406593843214, "loss": 0.1823, "step": 88180 }, { "epoch": 0.15635397467506043, "grad_norm": 0.671875, "learning_rate": 0.00022998882777864628, "loss": 0.162, "step": 88182 }, { "epoch": 0.15635752084037025, "grad_norm": 0.3515625, "learning_rate": 0.00022997359342378373, "loss": 0.1428, "step": 88184 }, { "epoch": 0.15636106700568006, "grad_norm": 0.73046875, "learning_rate": 0.00022995836287391163, "loss": 0.1849, "step": 88186 }, { "epoch": 0.15636461317098987, "grad_norm": 1.9609375, "learning_rate": 0.00022994313612909612, "loss": 0.278, "step": 88188 }, { "epoch": 0.1563681593362997, "grad_norm": 1.7265625, "learning_rate": 0.00022992791318940392, "loss": 0.2224, "step": 88190 }, { "epoch": 0.1563717055016095, "grad_norm": 0.80859375, "learning_rate": 0.00022991269405490158, "loss": 0.1722, "step": 88192 }, { "epoch": 0.15637525166691932, "grad_norm": 0.37109375, "learning_rate": 0.00022989747872565594, "loss": 0.152, "step": 88194 }, { "epoch": 0.15637879783222913, "grad_norm": 0.51171875, "learning_rate": 0.0002298822672017331, "loss": 0.1723, "step": 88196 }, { "epoch": 0.15638234399753898, "grad_norm": 10.5, "learning_rate": 0.0002298670594831998, "loss": 0.3679, "step": 88198 }, { "epoch": 0.1563858901628488, "grad_norm": 0.388671875, "learning_rate": 0.00022985185557012266, "loss": 0.1528, "step": 88200 }, { "epoch": 0.1563894363281586, "grad_norm": 0.52734375, "learning_rate": 0.00022983665546256804, "loss": 0.1959, "step": 88202 }, { "epoch": 0.15639298249346842, "grad_norm": 0.1953125, "learning_rate": 0.00022982145916060248, "loss": 0.1778, "step": 88204 }, { "epoch": 0.15639652865877823, "grad_norm": 0.2080078125, "learning_rate": 0.0002298062666642922, "loss": 0.4349, "step": 88206 }, { "epoch": 0.15640007482408805, "grad_norm": 2.375, "learning_rate": 0.00022979107797370393, "loss": 0.2205, "step": 88208 }, { "epoch": 0.15640362098939786, "grad_norm": 1.3828125, "learning_rate": 0.00022977589308890406, "loss": 0.474, "step": 88210 }, { "epoch": 0.15640716715470768, "grad_norm": 0.361328125, "learning_rate": 0.00022976071200995902, "loss": 0.1622, "step": 88212 }, { "epoch": 0.1564107133200175, "grad_norm": 0.279296875, "learning_rate": 0.00022974553473693497, "loss": 0.1765, "step": 88214 }, { "epoch": 0.1564142594853273, "grad_norm": 0.515625, "learning_rate": 0.00022973036126989856, "loss": 0.1553, "step": 88216 }, { "epoch": 0.15641780565063712, "grad_norm": 0.302734375, "learning_rate": 0.00022971519160891595, "loss": 0.1963, "step": 88218 }, { "epoch": 0.15642135181594694, "grad_norm": 0.95703125, "learning_rate": 0.00022970002575405362, "loss": 0.2029, "step": 88220 }, { "epoch": 0.15642489798125675, "grad_norm": 0.46484375, "learning_rate": 0.00022968486370537785, "loss": 0.1809, "step": 88222 }, { "epoch": 0.15642844414656656, "grad_norm": 0.44921875, "learning_rate": 0.0002296697054629549, "loss": 0.1506, "step": 88224 }, { "epoch": 0.15643199031187638, "grad_norm": 1.8828125, "learning_rate": 0.0002296545510268513, "loss": 0.1474, "step": 88226 }, { "epoch": 0.1564355364771862, "grad_norm": 0.67578125, "learning_rate": 0.0002296394003971329, "loss": 0.1481, "step": 88228 }, { "epoch": 0.156439082642496, "grad_norm": 0.7734375, "learning_rate": 0.00022962425357386625, "loss": 0.1675, "step": 88230 }, { "epoch": 0.15644262880780582, "grad_norm": 0.4453125, "learning_rate": 0.0002296091105571175, "loss": 0.1554, "step": 88232 }, { "epoch": 0.15644617497311564, "grad_norm": 0.7421875, "learning_rate": 0.00022959397134695312, "loss": 0.2002, "step": 88234 }, { "epoch": 0.15644972113842545, "grad_norm": 1.9453125, "learning_rate": 0.00022957883594343893, "loss": 0.3163, "step": 88236 }, { "epoch": 0.15645326730373527, "grad_norm": 0.7578125, "learning_rate": 0.00022956370434664138, "loss": 0.1849, "step": 88238 }, { "epoch": 0.15645681346904508, "grad_norm": 0.26171875, "learning_rate": 0.0002295485765566266, "loss": 0.1484, "step": 88240 }, { "epoch": 0.1564603596343549, "grad_norm": 0.392578125, "learning_rate": 0.00022953345257346075, "loss": 0.2304, "step": 88242 }, { "epoch": 0.1564639057996647, "grad_norm": 0.3203125, "learning_rate": 0.0002295183323972099, "loss": 0.1242, "step": 88244 }, { "epoch": 0.15646745196497452, "grad_norm": 0.3359375, "learning_rate": 0.00022950321602794007, "loss": 0.2286, "step": 88246 }, { "epoch": 0.15647099813028434, "grad_norm": 2.375, "learning_rate": 0.00022948810346571773, "loss": 0.188, "step": 88248 }, { "epoch": 0.15647454429559415, "grad_norm": 0.859375, "learning_rate": 0.00022947299471060885, "loss": 0.2495, "step": 88250 }, { "epoch": 0.15647809046090397, "grad_norm": 0.58984375, "learning_rate": 0.00022945788976267927, "loss": 0.2119, "step": 88252 }, { "epoch": 0.15648163662621378, "grad_norm": 0.2451171875, "learning_rate": 0.00022944278862199516, "loss": 0.1173, "step": 88254 }, { "epoch": 0.1564851827915236, "grad_norm": 0.3046875, "learning_rate": 0.0002294276912886227, "loss": 0.1703, "step": 88256 }, { "epoch": 0.1564887289568334, "grad_norm": 2.265625, "learning_rate": 0.00022941259776262785, "loss": 0.2374, "step": 88258 }, { "epoch": 0.15649227512214323, "grad_norm": 0.5234375, "learning_rate": 0.00022939750804407656, "loss": 0.1797, "step": 88260 }, { "epoch": 0.15649582128745304, "grad_norm": 0.5078125, "learning_rate": 0.00022938242213303497, "loss": 0.1781, "step": 88262 }, { "epoch": 0.15649936745276286, "grad_norm": 0.2236328125, "learning_rate": 0.00022936734002956878, "loss": 0.1904, "step": 88264 }, { "epoch": 0.15650291361807267, "grad_norm": 0.2412109375, "learning_rate": 0.00022935226173374424, "loss": 0.1333, "step": 88266 }, { "epoch": 0.15650645978338248, "grad_norm": 0.1982421875, "learning_rate": 0.00022933718724562707, "loss": 0.1333, "step": 88268 }, { "epoch": 0.1565100059486923, "grad_norm": 0.37890625, "learning_rate": 0.0002293221165652834, "loss": 0.1951, "step": 88270 }, { "epoch": 0.1565135521140021, "grad_norm": 1.984375, "learning_rate": 0.00022930704969277896, "loss": 0.1814, "step": 88272 }, { "epoch": 0.15651709827931193, "grad_norm": 0.625, "learning_rate": 0.00022929198662817983, "loss": 0.2334, "step": 88274 }, { "epoch": 0.15652064444462174, "grad_norm": 0.451171875, "learning_rate": 0.0002292769273715517, "loss": 0.1849, "step": 88276 }, { "epoch": 0.15652419060993156, "grad_norm": 0.3828125, "learning_rate": 0.00022926187192296062, "loss": 0.144, "step": 88278 }, { "epoch": 0.15652773677524137, "grad_norm": 0.380859375, "learning_rate": 0.00022924682028247222, "loss": 0.2738, "step": 88280 }, { "epoch": 0.1565312829405512, "grad_norm": 0.3125, "learning_rate": 0.00022923177245015241, "loss": 0.1478, "step": 88282 }, { "epoch": 0.156534829105861, "grad_norm": 0.275390625, "learning_rate": 0.00022921672842606713, "loss": 0.2236, "step": 88284 }, { "epoch": 0.15653837527117082, "grad_norm": 4.28125, "learning_rate": 0.00022920168821028203, "loss": 0.2085, "step": 88286 }, { "epoch": 0.15654192143648063, "grad_norm": 0.34375, "learning_rate": 0.00022918665180286286, "loss": 0.1404, "step": 88288 }, { "epoch": 0.15654546760179047, "grad_norm": 0.21484375, "learning_rate": 0.00022917161920387545, "loss": 0.1456, "step": 88290 }, { "epoch": 0.1565490137671003, "grad_norm": 0.30859375, "learning_rate": 0.00022915659041338577, "loss": 0.1434, "step": 88292 }, { "epoch": 0.1565525599324101, "grad_norm": 0.2060546875, "learning_rate": 0.00022914156543145904, "loss": 0.2105, "step": 88294 }, { "epoch": 0.15655610609771992, "grad_norm": 0.84375, "learning_rate": 0.0002291265442581613, "loss": 0.1919, "step": 88296 }, { "epoch": 0.15655965226302973, "grad_norm": 0.33984375, "learning_rate": 0.00022911152689355827, "loss": 0.1865, "step": 88298 }, { "epoch": 0.15656319842833955, "grad_norm": 0.263671875, "learning_rate": 0.00022909651333771552, "loss": 0.2048, "step": 88300 }, { "epoch": 0.15656674459364936, "grad_norm": 0.1474609375, "learning_rate": 0.00022908150359069883, "loss": 0.1366, "step": 88302 }, { "epoch": 0.15657029075895917, "grad_norm": 0.490234375, "learning_rate": 0.00022906649765257355, "loss": 0.1947, "step": 88304 }, { "epoch": 0.156573836924269, "grad_norm": 0.28515625, "learning_rate": 0.0002290514955234056, "loss": 0.1752, "step": 88306 }, { "epoch": 0.1565773830895788, "grad_norm": 0.51171875, "learning_rate": 0.00022903649720326056, "loss": 0.2462, "step": 88308 }, { "epoch": 0.15658092925488862, "grad_norm": 0.4765625, "learning_rate": 0.00022902150269220396, "loss": 0.1472, "step": 88310 }, { "epoch": 0.15658447542019843, "grad_norm": 0.490234375, "learning_rate": 0.00022900651199030125, "loss": 0.1493, "step": 88312 }, { "epoch": 0.15658802158550825, "grad_norm": 1.203125, "learning_rate": 0.00022899152509761828, "loss": 0.1508, "step": 88314 }, { "epoch": 0.15659156775081806, "grad_norm": 0.2275390625, "learning_rate": 0.0002289765420142204, "loss": 0.3197, "step": 88316 }, { "epoch": 0.15659511391612788, "grad_norm": 1.1484375, "learning_rate": 0.00022896156274017316, "loss": 0.2707, "step": 88318 }, { "epoch": 0.1565986600814377, "grad_norm": 0.369140625, "learning_rate": 0.0002289465872755421, "loss": 0.1567, "step": 88320 }, { "epoch": 0.1566022062467475, "grad_norm": 0.3203125, "learning_rate": 0.00022893161562039267, "loss": 0.1355, "step": 88322 }, { "epoch": 0.15660575241205732, "grad_norm": 1.6875, "learning_rate": 0.00022891664777479053, "loss": 0.2308, "step": 88324 }, { "epoch": 0.15660929857736713, "grad_norm": 0.27734375, "learning_rate": 0.00022890168373880068, "loss": 0.1642, "step": 88326 }, { "epoch": 0.15661284474267695, "grad_norm": 0.263671875, "learning_rate": 0.00022888672351248902, "loss": 0.1986, "step": 88328 }, { "epoch": 0.15661639090798676, "grad_norm": 0.6328125, "learning_rate": 0.0002288717670959209, "loss": 0.2666, "step": 88330 }, { "epoch": 0.15661993707329658, "grad_norm": 0.408203125, "learning_rate": 0.00022885681448916163, "loss": 0.3215, "step": 88332 }, { "epoch": 0.1566234832386064, "grad_norm": 1.9609375, "learning_rate": 0.00022884186569227665, "loss": 0.249, "step": 88334 }, { "epoch": 0.1566270294039162, "grad_norm": 2.59375, "learning_rate": 0.00022882692070533118, "loss": 0.4424, "step": 88336 }, { "epoch": 0.15663057556922602, "grad_norm": 0.251953125, "learning_rate": 0.00022881197952839081, "loss": 0.2042, "step": 88338 }, { "epoch": 0.15663412173453584, "grad_norm": 0.197265625, "learning_rate": 0.00022879704216152081, "loss": 0.3793, "step": 88340 }, { "epoch": 0.15663766789984565, "grad_norm": 0.94140625, "learning_rate": 0.0002287821086047866, "loss": 0.1965, "step": 88342 }, { "epoch": 0.15664121406515547, "grad_norm": 0.44921875, "learning_rate": 0.00022876717885825313, "loss": 0.1732, "step": 88344 }, { "epoch": 0.15664476023046528, "grad_norm": 0.33984375, "learning_rate": 0.00022875225292198618, "loss": 0.1943, "step": 88346 }, { "epoch": 0.1566483063957751, "grad_norm": 0.5546875, "learning_rate": 0.0002287373307960507, "loss": 0.2217, "step": 88348 }, { "epoch": 0.1566518525610849, "grad_norm": 2.046875, "learning_rate": 0.00022872241248051206, "loss": 0.2002, "step": 88350 }, { "epoch": 0.15665539872639472, "grad_norm": 1.078125, "learning_rate": 0.00022870749797543552, "loss": 0.3359, "step": 88352 }, { "epoch": 0.15665894489170454, "grad_norm": 0.26171875, "learning_rate": 0.00022869258728088615, "loss": 0.1358, "step": 88354 }, { "epoch": 0.15666249105701435, "grad_norm": 0.5234375, "learning_rate": 0.00022867768039692933, "loss": 0.1595, "step": 88356 }, { "epoch": 0.15666603722232417, "grad_norm": 0.26171875, "learning_rate": 0.0002286627773236303, "loss": 0.1439, "step": 88358 }, { "epoch": 0.15666958338763398, "grad_norm": 1.078125, "learning_rate": 0.0002286478780610541, "loss": 0.6575, "step": 88360 }, { "epoch": 0.1566731295529438, "grad_norm": 0.189453125, "learning_rate": 0.00022863298260926592, "loss": 0.1825, "step": 88362 }, { "epoch": 0.1566766757182536, "grad_norm": 1.0703125, "learning_rate": 0.00022861809096833092, "loss": 0.229, "step": 88364 }, { "epoch": 0.15668022188356343, "grad_norm": 0.7890625, "learning_rate": 0.00022860320313831432, "loss": 0.2311, "step": 88366 }, { "epoch": 0.15668376804887324, "grad_norm": 0.2119140625, "learning_rate": 0.00022858831911928105, "loss": 0.2059, "step": 88368 }, { "epoch": 0.15668731421418305, "grad_norm": 0.3828125, "learning_rate": 0.00022857343891129637, "loss": 0.1779, "step": 88370 }, { "epoch": 0.15669086037949287, "grad_norm": 0.2431640625, "learning_rate": 0.0002285585625144252, "loss": 0.1433, "step": 88372 }, { "epoch": 0.15669440654480268, "grad_norm": 1.1328125, "learning_rate": 0.00022854368992873282, "loss": 0.1637, "step": 88374 }, { "epoch": 0.1566979527101125, "grad_norm": 0.859375, "learning_rate": 0.00022852882115428392, "loss": 0.1648, "step": 88376 }, { "epoch": 0.1567014988754223, "grad_norm": 0.26953125, "learning_rate": 0.00022851395619114394, "loss": 0.1838, "step": 88378 }, { "epoch": 0.15670504504073215, "grad_norm": 1.2734375, "learning_rate": 0.00022849909503937755, "loss": 0.295, "step": 88380 }, { "epoch": 0.15670859120604197, "grad_norm": 0.298828125, "learning_rate": 0.0002284842376990501, "loss": 0.1503, "step": 88382 }, { "epoch": 0.15671213737135178, "grad_norm": 0.1669921875, "learning_rate": 0.00022846938417022612, "loss": 0.172, "step": 88384 }, { "epoch": 0.1567156835366616, "grad_norm": 1.0390625, "learning_rate": 0.00022845453445297095, "loss": 0.1863, "step": 88386 }, { "epoch": 0.1567192297019714, "grad_norm": 0.421875, "learning_rate": 0.0002284396885473493, "loss": 0.2526, "step": 88388 }, { "epoch": 0.15672277586728123, "grad_norm": 0.251953125, "learning_rate": 0.0002284248464534263, "loss": 0.1844, "step": 88390 }, { "epoch": 0.15672632203259104, "grad_norm": 0.6640625, "learning_rate": 0.00022841000817126674, "loss": 0.4127, "step": 88392 }, { "epoch": 0.15672986819790086, "grad_norm": 0.55859375, "learning_rate": 0.00022839517370093544, "loss": 0.2238, "step": 88394 }, { "epoch": 0.15673341436321067, "grad_norm": 0.326171875, "learning_rate": 0.00022838034304249738, "loss": 0.1613, "step": 88396 }, { "epoch": 0.15673696052852049, "grad_norm": 0.2451171875, "learning_rate": 0.00022836551619601742, "loss": 0.1676, "step": 88398 }, { "epoch": 0.1567405066938303, "grad_norm": 0.79296875, "learning_rate": 0.00022835069316156042, "loss": 0.1635, "step": 88400 }, { "epoch": 0.15674405285914011, "grad_norm": 0.349609375, "learning_rate": 0.000228335873939191, "loss": 0.1884, "step": 88402 }, { "epoch": 0.15674759902444993, "grad_norm": 0.3515625, "learning_rate": 0.00022832105852897436, "loss": 0.221, "step": 88404 }, { "epoch": 0.15675114518975974, "grad_norm": 0.70703125, "learning_rate": 0.00022830624693097508, "loss": 0.1934, "step": 88406 }, { "epoch": 0.15675469135506956, "grad_norm": 0.59375, "learning_rate": 0.00022829143914525785, "loss": 0.4278, "step": 88408 }, { "epoch": 0.15675823752037937, "grad_norm": 0.48828125, "learning_rate": 0.0002282766351718875, "loss": 0.1833, "step": 88410 }, { "epoch": 0.1567617836856892, "grad_norm": 0.5390625, "learning_rate": 0.0002282618350109288, "loss": 0.201, "step": 88412 }, { "epoch": 0.156765329850999, "grad_norm": 0.439453125, "learning_rate": 0.00022824703866244656, "loss": 0.1836, "step": 88414 }, { "epoch": 0.15676887601630882, "grad_norm": 0.5390625, "learning_rate": 0.00022823224612650528, "loss": 0.2502, "step": 88416 }, { "epoch": 0.15677242218161863, "grad_norm": 0.23046875, "learning_rate": 0.00022821745740316985, "loss": 0.2107, "step": 88418 }, { "epoch": 0.15677596834692845, "grad_norm": 0.365234375, "learning_rate": 0.0002282026724925047, "loss": 0.2057, "step": 88420 }, { "epoch": 0.15677951451223826, "grad_norm": 0.73828125, "learning_rate": 0.00022818789139457488, "loss": 0.1996, "step": 88422 }, { "epoch": 0.15678306067754808, "grad_norm": 0.1552734375, "learning_rate": 0.0002281731141094447, "loss": 0.1603, "step": 88424 }, { "epoch": 0.1567866068428579, "grad_norm": 0.58984375, "learning_rate": 0.000228158340637179, "loss": 0.133, "step": 88426 }, { "epoch": 0.1567901530081677, "grad_norm": 0.59375, "learning_rate": 0.00022814357097784225, "loss": 0.1934, "step": 88428 }, { "epoch": 0.15679369917347752, "grad_norm": 0.400390625, "learning_rate": 0.000228128805131499, "loss": 0.2201, "step": 88430 }, { "epoch": 0.15679724533878733, "grad_norm": 1.03125, "learning_rate": 0.00022811404309821404, "loss": 0.1313, "step": 88432 }, { "epoch": 0.15680079150409715, "grad_norm": 0.58203125, "learning_rate": 0.0002280992848780517, "loss": 0.1912, "step": 88434 }, { "epoch": 0.15680433766940696, "grad_norm": 0.294921875, "learning_rate": 0.0002280845304710767, "loss": 0.1972, "step": 88436 }, { "epoch": 0.15680788383471678, "grad_norm": 0.5390625, "learning_rate": 0.00022806977987735333, "loss": 0.1449, "step": 88438 }, { "epoch": 0.1568114300000266, "grad_norm": 0.52734375, "learning_rate": 0.00022805503309694653, "loss": 0.1618, "step": 88440 }, { "epoch": 0.1568149761653364, "grad_norm": 0.380859375, "learning_rate": 0.00022804029012992032, "loss": 0.3609, "step": 88442 }, { "epoch": 0.15681852233064622, "grad_norm": 0.294921875, "learning_rate": 0.0002280255509763395, "loss": 0.1808, "step": 88444 }, { "epoch": 0.15682206849595604, "grad_norm": 0.314453125, "learning_rate": 0.00022801081563626839, "loss": 0.1687, "step": 88446 }, { "epoch": 0.15682561466126585, "grad_norm": 0.357421875, "learning_rate": 0.0002279960841097715, "loss": 0.1315, "step": 88448 }, { "epoch": 0.15682916082657566, "grad_norm": 2.15625, "learning_rate": 0.00022798135639691313, "loss": 0.2758, "step": 88450 }, { "epoch": 0.15683270699188548, "grad_norm": 0.435546875, "learning_rate": 0.00022796663249775772, "loss": 0.1653, "step": 88452 }, { "epoch": 0.1568362531571953, "grad_norm": 0.400390625, "learning_rate": 0.00022795191241236991, "loss": 0.2037, "step": 88454 }, { "epoch": 0.1568397993225051, "grad_norm": 0.26171875, "learning_rate": 0.00022793719614081377, "loss": 0.2048, "step": 88456 }, { "epoch": 0.15684334548781492, "grad_norm": 0.8203125, "learning_rate": 0.00022792248368315384, "loss": 0.3633, "step": 88458 }, { "epoch": 0.15684689165312474, "grad_norm": 1.4453125, "learning_rate": 0.0002279077750394543, "loss": 0.4259, "step": 88460 }, { "epoch": 0.15685043781843455, "grad_norm": 0.412109375, "learning_rate": 0.00022789307020977968, "loss": 0.1798, "step": 88462 }, { "epoch": 0.15685398398374437, "grad_norm": 0.361328125, "learning_rate": 0.00022787836919419422, "loss": 0.2067, "step": 88464 }, { "epoch": 0.15685753014905418, "grad_norm": 0.1962890625, "learning_rate": 0.00022786367199276213, "loss": 0.1905, "step": 88466 }, { "epoch": 0.156861076314364, "grad_norm": 0.2490234375, "learning_rate": 0.00022784897860554778, "loss": 0.1398, "step": 88468 }, { "epoch": 0.15686462247967384, "grad_norm": 0.337890625, "learning_rate": 0.00022783428903261525, "loss": 0.184, "step": 88470 }, { "epoch": 0.15686816864498365, "grad_norm": 0.2060546875, "learning_rate": 0.00022781960327402916, "loss": 0.1625, "step": 88472 }, { "epoch": 0.15687171481029347, "grad_norm": 1.171875, "learning_rate": 0.0002278049213298533, "loss": 0.493, "step": 88474 }, { "epoch": 0.15687526097560328, "grad_norm": 0.314453125, "learning_rate": 0.00022779024320015211, "loss": 0.1363, "step": 88476 }, { "epoch": 0.1568788071409131, "grad_norm": 0.4296875, "learning_rate": 0.00022777556888498973, "loss": 0.1775, "step": 88478 }, { "epoch": 0.1568823533062229, "grad_norm": 0.76953125, "learning_rate": 0.00022776089838443057, "loss": 0.2523, "step": 88480 }, { "epoch": 0.15688589947153272, "grad_norm": 0.24609375, "learning_rate": 0.0002277462316985384, "loss": 0.1694, "step": 88482 }, { "epoch": 0.15688944563684254, "grad_norm": 0.25390625, "learning_rate": 0.00022773156882737765, "loss": 0.2295, "step": 88484 }, { "epoch": 0.15689299180215235, "grad_norm": 0.26171875, "learning_rate": 0.00022771690977101225, "loss": 0.1915, "step": 88486 }, { "epoch": 0.15689653796746217, "grad_norm": 0.228515625, "learning_rate": 0.00022770225452950653, "loss": 0.2397, "step": 88488 }, { "epoch": 0.15690008413277198, "grad_norm": 0.2080078125, "learning_rate": 0.00022768760310292436, "loss": 0.1695, "step": 88490 }, { "epoch": 0.1569036302980818, "grad_norm": 0.345703125, "learning_rate": 0.0002276729554913298, "loss": 0.1596, "step": 88492 }, { "epoch": 0.1569071764633916, "grad_norm": 0.9453125, "learning_rate": 0.00022765831169478724, "loss": 0.2461, "step": 88494 }, { "epoch": 0.15691072262870143, "grad_norm": 0.296875, "learning_rate": 0.0002276436717133604, "loss": 0.1833, "step": 88496 }, { "epoch": 0.15691426879401124, "grad_norm": 0.96484375, "learning_rate": 0.00022762903554711343, "loss": 0.2435, "step": 88498 }, { "epoch": 0.15691781495932106, "grad_norm": 0.216796875, "learning_rate": 0.00022761440319611027, "loss": 0.1637, "step": 88500 }, { "epoch": 0.15692136112463087, "grad_norm": 0.33203125, "learning_rate": 0.0002275997746604151, "loss": 0.2236, "step": 88502 }, { "epoch": 0.15692490728994068, "grad_norm": 0.271484375, "learning_rate": 0.0002275851499400916, "loss": 0.2073, "step": 88504 }, { "epoch": 0.1569284534552505, "grad_norm": 0.302734375, "learning_rate": 0.00022757052903520404, "loss": 0.1704, "step": 88506 }, { "epoch": 0.15693199962056031, "grad_norm": 0.35546875, "learning_rate": 0.00022755591194581622, "loss": 0.179, "step": 88508 }, { "epoch": 0.15693554578587013, "grad_norm": 0.2373046875, "learning_rate": 0.00022754129867199192, "loss": 0.2368, "step": 88510 }, { "epoch": 0.15693909195117994, "grad_norm": 2.875, "learning_rate": 0.00022752668921379533, "loss": 0.2568, "step": 88512 }, { "epoch": 0.15694263811648976, "grad_norm": 0.7421875, "learning_rate": 0.0002275120835712902, "loss": 0.1683, "step": 88514 }, { "epoch": 0.15694618428179957, "grad_norm": 0.7265625, "learning_rate": 0.00022749748174454034, "loss": 0.2242, "step": 88516 }, { "epoch": 0.1569497304471094, "grad_norm": 1.3984375, "learning_rate": 0.0002274828837336098, "loss": 0.3306, "step": 88518 }, { "epoch": 0.1569532766124192, "grad_norm": 0.5625, "learning_rate": 0.00022746828953856214, "loss": 0.1585, "step": 88520 }, { "epoch": 0.15695682277772902, "grad_norm": 0.244140625, "learning_rate": 0.00022745369915946155, "loss": 0.162, "step": 88522 }, { "epoch": 0.15696036894303883, "grad_norm": 0.5, "learning_rate": 0.00022743911259637145, "loss": 0.2215, "step": 88524 }, { "epoch": 0.15696391510834865, "grad_norm": 0.94921875, "learning_rate": 0.000227424529849356, "loss": 0.2678, "step": 88526 }, { "epoch": 0.15696746127365846, "grad_norm": 0.67578125, "learning_rate": 0.00022740995091847857, "loss": 0.1794, "step": 88528 }, { "epoch": 0.15697100743896827, "grad_norm": 0.310546875, "learning_rate": 0.00022739537580380352, "loss": 0.2085, "step": 88530 }, { "epoch": 0.1569745536042781, "grad_norm": 0.46484375, "learning_rate": 0.00022738080450539395, "loss": 0.2041, "step": 88532 }, { "epoch": 0.1569780997695879, "grad_norm": 0.259765625, "learning_rate": 0.0002273662370233139, "loss": 0.1834, "step": 88534 }, { "epoch": 0.15698164593489772, "grad_norm": 1.7734375, "learning_rate": 0.0002273516733576271, "loss": 0.1809, "step": 88536 }, { "epoch": 0.15698519210020753, "grad_norm": 0.296875, "learning_rate": 0.00022733711350839713, "loss": 0.4041, "step": 88538 }, { "epoch": 0.15698873826551735, "grad_norm": 0.59375, "learning_rate": 0.00022732255747568786, "loss": 0.1591, "step": 88540 }, { "epoch": 0.15699228443082716, "grad_norm": 0.61328125, "learning_rate": 0.00022730800525956255, "loss": 0.2812, "step": 88542 }, { "epoch": 0.15699583059613698, "grad_norm": 0.46875, "learning_rate": 0.00022729345686008532, "loss": 0.3901, "step": 88544 }, { "epoch": 0.1569993767614468, "grad_norm": 0.345703125, "learning_rate": 0.00022727891227731955, "loss": 0.2265, "step": 88546 }, { "epoch": 0.1570029229267566, "grad_norm": 3.171875, "learning_rate": 0.00022726437151132886, "loss": 0.2487, "step": 88548 }, { "epoch": 0.15700646909206642, "grad_norm": 0.46484375, "learning_rate": 0.00022724983456217675, "loss": 0.1948, "step": 88550 }, { "epoch": 0.15701001525737623, "grad_norm": 0.609375, "learning_rate": 0.0002272353014299271, "loss": 0.1624, "step": 88552 }, { "epoch": 0.15701356142268605, "grad_norm": 0.703125, "learning_rate": 0.0002272207721146432, "loss": 0.1312, "step": 88554 }, { "epoch": 0.15701710758799586, "grad_norm": 0.91015625, "learning_rate": 0.0002272062466163887, "loss": 0.3041, "step": 88556 }, { "epoch": 0.15702065375330568, "grad_norm": 0.9140625, "learning_rate": 0.00022719172493522704, "loss": 0.2099, "step": 88558 }, { "epoch": 0.1570241999186155, "grad_norm": 0.40625, "learning_rate": 0.00022717720707122178, "loss": 0.1792, "step": 88560 }, { "epoch": 0.15702774608392533, "grad_norm": 0.34375, "learning_rate": 0.00022716269302443655, "loss": 0.1849, "step": 88562 }, { "epoch": 0.15703129224923515, "grad_norm": 1.5234375, "learning_rate": 0.00022714818279493448, "loss": 0.1815, "step": 88564 }, { "epoch": 0.15703483841454496, "grad_norm": 0.4921875, "learning_rate": 0.0002271336763827794, "loss": 0.1661, "step": 88566 }, { "epoch": 0.15703838457985478, "grad_norm": 0.365234375, "learning_rate": 0.0002271191737880345, "loss": 0.2234, "step": 88568 }, { "epoch": 0.1570419307451646, "grad_norm": 0.37890625, "learning_rate": 0.0002271046750107635, "loss": 0.231, "step": 88570 }, { "epoch": 0.1570454769104744, "grad_norm": 1.9453125, "learning_rate": 0.00022709018005102942, "loss": 0.525, "step": 88572 }, { "epoch": 0.15704902307578422, "grad_norm": 0.240234375, "learning_rate": 0.00022707568890889597, "loss": 0.1419, "step": 88574 }, { "epoch": 0.15705256924109404, "grad_norm": 0.62109375, "learning_rate": 0.0002270612015844263, "loss": 0.1627, "step": 88576 }, { "epoch": 0.15705611540640385, "grad_norm": 1.6484375, "learning_rate": 0.00022704671807768396, "loss": 0.3023, "step": 88578 }, { "epoch": 0.15705966157171367, "grad_norm": 0.357421875, "learning_rate": 0.0002270322383887322, "loss": 0.1925, "step": 88580 }, { "epoch": 0.15706320773702348, "grad_norm": 0.80859375, "learning_rate": 0.00022701776251763417, "loss": 0.1975, "step": 88582 }, { "epoch": 0.1570667539023333, "grad_norm": 0.26953125, "learning_rate": 0.00022700329046445355, "loss": 0.136, "step": 88584 }, { "epoch": 0.1570703000676431, "grad_norm": 0.271484375, "learning_rate": 0.00022698882222925342, "loss": 0.1584, "step": 88586 }, { "epoch": 0.15707384623295292, "grad_norm": 0.2216796875, "learning_rate": 0.00022697435781209702, "loss": 0.1836, "step": 88588 }, { "epoch": 0.15707739239826274, "grad_norm": 0.29296875, "learning_rate": 0.00022695989721304764, "loss": 0.2009, "step": 88590 }, { "epoch": 0.15708093856357255, "grad_norm": 0.283203125, "learning_rate": 0.00022694544043216862, "loss": 0.1995, "step": 88592 }, { "epoch": 0.15708448472888237, "grad_norm": 0.44140625, "learning_rate": 0.0002269309874695231, "loss": 0.3224, "step": 88594 }, { "epoch": 0.15708803089419218, "grad_norm": 0.455078125, "learning_rate": 0.00022691653832517433, "loss": 0.1449, "step": 88596 }, { "epoch": 0.157091577059502, "grad_norm": 0.890625, "learning_rate": 0.00022690209299918546, "loss": 0.2626, "step": 88598 }, { "epoch": 0.1570951232248118, "grad_norm": 0.345703125, "learning_rate": 0.00022688765149161955, "loss": 0.177, "step": 88600 }, { "epoch": 0.15709866939012163, "grad_norm": 0.28515625, "learning_rate": 0.00022687321380253998, "loss": 0.1924, "step": 88602 }, { "epoch": 0.15710221555543144, "grad_norm": 0.2421875, "learning_rate": 0.00022685877993200987, "loss": 0.1507, "step": 88604 }, { "epoch": 0.15710576172074125, "grad_norm": 0.92578125, "learning_rate": 0.00022684434988009224, "loss": 0.2088, "step": 88606 }, { "epoch": 0.15710930788605107, "grad_norm": 0.31640625, "learning_rate": 0.00022682992364685012, "loss": 0.1497, "step": 88608 }, { "epoch": 0.15711285405136088, "grad_norm": 0.43359375, "learning_rate": 0.00022681550123234684, "loss": 0.2425, "step": 88610 }, { "epoch": 0.1571164002166707, "grad_norm": 0.69921875, "learning_rate": 0.00022680108263664536, "loss": 0.1876, "step": 88612 }, { "epoch": 0.1571199463819805, "grad_norm": 0.486328125, "learning_rate": 0.0002267866678598087, "loss": 0.2357, "step": 88614 }, { "epoch": 0.15712349254729033, "grad_norm": 0.251953125, "learning_rate": 0.0002267722569018999, "loss": 0.1982, "step": 88616 }, { "epoch": 0.15712703871260014, "grad_norm": 0.322265625, "learning_rate": 0.00022675784976298202, "loss": 0.1699, "step": 88618 }, { "epoch": 0.15713058487790996, "grad_norm": 0.875, "learning_rate": 0.0002267434464431181, "loss": 0.197, "step": 88620 }, { "epoch": 0.15713413104321977, "grad_norm": 0.2421875, "learning_rate": 0.00022672904694237106, "loss": 0.2221, "step": 88622 }, { "epoch": 0.15713767720852959, "grad_norm": 0.25390625, "learning_rate": 0.00022671465126080395, "loss": 0.1994, "step": 88624 }, { "epoch": 0.1571412233738394, "grad_norm": 0.314453125, "learning_rate": 0.00022670025939847965, "loss": 0.1671, "step": 88626 }, { "epoch": 0.15714476953914921, "grad_norm": 0.5546875, "learning_rate": 0.00022668587135546127, "loss": 0.1698, "step": 88628 }, { "epoch": 0.15714831570445903, "grad_norm": 4.625, "learning_rate": 0.00022667148713181138, "loss": 0.1593, "step": 88630 }, { "epoch": 0.15715186186976884, "grad_norm": 0.4375, "learning_rate": 0.00022665710672759327, "loss": 0.1929, "step": 88632 }, { "epoch": 0.15715540803507866, "grad_norm": 0.1748046875, "learning_rate": 0.00022664273014286966, "loss": 0.1923, "step": 88634 }, { "epoch": 0.15715895420038847, "grad_norm": 0.33984375, "learning_rate": 0.00022662835737770353, "loss": 0.2353, "step": 88636 }, { "epoch": 0.1571625003656983, "grad_norm": 0.283203125, "learning_rate": 0.00022661398843215751, "loss": 0.1526, "step": 88638 }, { "epoch": 0.1571660465310081, "grad_norm": 0.39453125, "learning_rate": 0.0002265996233062946, "loss": 0.1634, "step": 88640 }, { "epoch": 0.15716959269631792, "grad_norm": 1.2109375, "learning_rate": 0.00022658526200017756, "loss": 0.1812, "step": 88642 }, { "epoch": 0.15717313886162773, "grad_norm": 0.2734375, "learning_rate": 0.00022657090451386942, "loss": 0.1436, "step": 88644 }, { "epoch": 0.15717668502693755, "grad_norm": 0.55859375, "learning_rate": 0.0002265565508474327, "loss": 0.2116, "step": 88646 }, { "epoch": 0.15718023119224736, "grad_norm": 0.376953125, "learning_rate": 0.00022654220100093016, "loss": 0.1609, "step": 88648 }, { "epoch": 0.15718377735755718, "grad_norm": 0.4375, "learning_rate": 0.0002265278549744248, "loss": 0.1766, "step": 88650 }, { "epoch": 0.15718732352286702, "grad_norm": 0.55859375, "learning_rate": 0.00022651351276797922, "loss": 0.2535, "step": 88652 }, { "epoch": 0.15719086968817683, "grad_norm": 0.2470703125, "learning_rate": 0.00022649917438165618, "loss": 0.1812, "step": 88654 }, { "epoch": 0.15719441585348665, "grad_norm": 0.357421875, "learning_rate": 0.00022648483981551828, "loss": 0.1846, "step": 88656 }, { "epoch": 0.15719796201879646, "grad_norm": 0.427734375, "learning_rate": 0.00022647050906962832, "loss": 0.1866, "step": 88658 }, { "epoch": 0.15720150818410628, "grad_norm": 0.259765625, "learning_rate": 0.00022645618214404902, "loss": 0.1989, "step": 88660 }, { "epoch": 0.1572050543494161, "grad_norm": 1.078125, "learning_rate": 0.00022644185903884285, "loss": 0.2747, "step": 88662 }, { "epoch": 0.1572086005147259, "grad_norm": 0.345703125, "learning_rate": 0.00022642753975407262, "loss": 0.1976, "step": 88664 }, { "epoch": 0.15721214668003572, "grad_norm": 0.6796875, "learning_rate": 0.00022641322428980083, "loss": 0.1819, "step": 88666 }, { "epoch": 0.15721569284534553, "grad_norm": 0.92578125, "learning_rate": 0.00022639891264609032, "loss": 0.1408, "step": 88668 }, { "epoch": 0.15721923901065535, "grad_norm": 0.4765625, "learning_rate": 0.00022638460482300328, "loss": 0.2287, "step": 88670 }, { "epoch": 0.15722278517596516, "grad_norm": 0.3359375, "learning_rate": 0.00022637030082060274, "loss": 0.183, "step": 88672 }, { "epoch": 0.15722633134127498, "grad_norm": 0.6640625, "learning_rate": 0.000226356000638951, "loss": 0.2101, "step": 88674 }, { "epoch": 0.1572298775065848, "grad_norm": 0.9296875, "learning_rate": 0.00022634170427811046, "loss": 0.3204, "step": 88676 }, { "epoch": 0.1572334236718946, "grad_norm": 0.16015625, "learning_rate": 0.0002263274117381441, "loss": 0.2571, "step": 88678 }, { "epoch": 0.15723696983720442, "grad_norm": 0.408203125, "learning_rate": 0.00022631312301911395, "loss": 0.2085, "step": 88680 }, { "epoch": 0.15724051600251424, "grad_norm": 0.65234375, "learning_rate": 0.00022629883812108276, "loss": 0.1554, "step": 88682 }, { "epoch": 0.15724406216782405, "grad_norm": 0.59375, "learning_rate": 0.000226284557044113, "loss": 0.1756, "step": 88684 }, { "epoch": 0.15724760833313386, "grad_norm": 0.29296875, "learning_rate": 0.000226270279788267, "loss": 0.1443, "step": 88686 }, { "epoch": 0.15725115449844368, "grad_norm": 0.283203125, "learning_rate": 0.0002262560063536074, "loss": 0.185, "step": 88688 }, { "epoch": 0.1572547006637535, "grad_norm": 0.2890625, "learning_rate": 0.00022624173674019632, "loss": 0.1558, "step": 88690 }, { "epoch": 0.1572582468290633, "grad_norm": 0.328125, "learning_rate": 0.0002262274709480964, "loss": 0.1767, "step": 88692 }, { "epoch": 0.15726179299437312, "grad_norm": 0.421875, "learning_rate": 0.0002262132089773701, "loss": 0.1845, "step": 88694 }, { "epoch": 0.15726533915968294, "grad_norm": 0.9609375, "learning_rate": 0.00022619895082807957, "loss": 0.1895, "step": 88696 }, { "epoch": 0.15726888532499275, "grad_norm": 0.39453125, "learning_rate": 0.00022618469650028715, "loss": 0.2055, "step": 88698 }, { "epoch": 0.15727243149030257, "grad_norm": 0.8046875, "learning_rate": 0.0002261704459940554, "loss": 0.1812, "step": 88700 }, { "epoch": 0.15727597765561238, "grad_norm": 1.6328125, "learning_rate": 0.00022615619930944656, "loss": 0.3615, "step": 88702 }, { "epoch": 0.1572795238209222, "grad_norm": 0.400390625, "learning_rate": 0.000226141956446523, "loss": 0.191, "step": 88704 }, { "epoch": 0.157283069986232, "grad_norm": 0.318359375, "learning_rate": 0.00022612771740534684, "loss": 0.2267, "step": 88706 }, { "epoch": 0.15728661615154182, "grad_norm": 0.55078125, "learning_rate": 0.00022611348218598028, "loss": 0.2348, "step": 88708 }, { "epoch": 0.15729016231685164, "grad_norm": 0.48828125, "learning_rate": 0.000226099250788486, "loss": 0.2136, "step": 88710 }, { "epoch": 0.15729370848216145, "grad_norm": 0.55859375, "learning_rate": 0.0002260850232129258, "loss": 0.1652, "step": 88712 }, { "epoch": 0.15729725464747127, "grad_norm": 0.6171875, "learning_rate": 0.00022607079945936216, "loss": 0.1696, "step": 88714 }, { "epoch": 0.15730080081278108, "grad_norm": 0.44921875, "learning_rate": 0.00022605657952785705, "loss": 0.1601, "step": 88716 }, { "epoch": 0.1573043469780909, "grad_norm": 0.58984375, "learning_rate": 0.00022604236341847305, "loss": 0.1662, "step": 88718 }, { "epoch": 0.1573078931434007, "grad_norm": 0.9375, "learning_rate": 0.00022602815113127196, "loss": 0.1886, "step": 88720 }, { "epoch": 0.15731143930871053, "grad_norm": 0.279296875, "learning_rate": 0.00022601394266631616, "loss": 0.1271, "step": 88722 }, { "epoch": 0.15731498547402034, "grad_norm": 0.97265625, "learning_rate": 0.00022599973802366765, "loss": 0.1756, "step": 88724 }, { "epoch": 0.15731853163933016, "grad_norm": 1.0234375, "learning_rate": 0.00022598553720338865, "loss": 0.4757, "step": 88726 }, { "epoch": 0.15732207780463997, "grad_norm": 0.96484375, "learning_rate": 0.00022597134020554114, "loss": 0.2321, "step": 88728 }, { "epoch": 0.15732562396994978, "grad_norm": 0.490234375, "learning_rate": 0.00022595714703018722, "loss": 0.1506, "step": 88730 }, { "epoch": 0.1573291701352596, "grad_norm": 9.4375, "learning_rate": 0.0002259429576773892, "loss": 0.2279, "step": 88732 }, { "epoch": 0.15733271630056941, "grad_norm": 0.1845703125, "learning_rate": 0.00022592877214720896, "loss": 0.1861, "step": 88734 }, { "epoch": 0.15733626246587923, "grad_norm": 0.341796875, "learning_rate": 0.00022591459043970854, "loss": 0.1511, "step": 88736 }, { "epoch": 0.15733980863118904, "grad_norm": 1.640625, "learning_rate": 0.00022590041255494983, "loss": 0.1713, "step": 88738 }, { "epoch": 0.15734335479649886, "grad_norm": 0.318359375, "learning_rate": 0.00022588623849299509, "loss": 0.1745, "step": 88740 }, { "epoch": 0.1573469009618087, "grad_norm": 0.7109375, "learning_rate": 0.00022587206825390617, "loss": 0.2234, "step": 88742 }, { "epoch": 0.15735044712711851, "grad_norm": 0.23046875, "learning_rate": 0.00022585790183774512, "loss": 0.1157, "step": 88744 }, { "epoch": 0.15735399329242833, "grad_norm": 0.78125, "learning_rate": 0.0002258437392445738, "loss": 0.1681, "step": 88746 }, { "epoch": 0.15735753945773814, "grad_norm": 0.1767578125, "learning_rate": 0.00022582958047445418, "loss": 0.3067, "step": 88748 }, { "epoch": 0.15736108562304796, "grad_norm": 0.6953125, "learning_rate": 0.00022581542552744835, "loss": 0.2199, "step": 88750 }, { "epoch": 0.15736463178835777, "grad_norm": 0.89453125, "learning_rate": 0.0002258012744036179, "loss": 0.2527, "step": 88752 }, { "epoch": 0.1573681779536676, "grad_norm": 0.361328125, "learning_rate": 0.0002257871271030249, "loss": 0.1902, "step": 88754 }, { "epoch": 0.1573717241189774, "grad_norm": 0.390625, "learning_rate": 0.00022577298362573114, "loss": 0.1741, "step": 88756 }, { "epoch": 0.15737527028428722, "grad_norm": 0.337890625, "learning_rate": 0.00022575884397179865, "loss": 0.2007, "step": 88758 }, { "epoch": 0.15737881644959703, "grad_norm": 0.490234375, "learning_rate": 0.00022574470814128912, "loss": 0.1572, "step": 88760 }, { "epoch": 0.15738236261490685, "grad_norm": 0.47265625, "learning_rate": 0.0002257305761342644, "loss": 0.2083, "step": 88762 }, { "epoch": 0.15738590878021666, "grad_norm": 0.92578125, "learning_rate": 0.00022571644795078637, "loss": 0.1719, "step": 88764 }, { "epoch": 0.15738945494552647, "grad_norm": 0.96875, "learning_rate": 0.00022570232359091658, "loss": 0.2103, "step": 88766 }, { "epoch": 0.1573930011108363, "grad_norm": 0.28515625, "learning_rate": 0.00022568820305471726, "loss": 0.1528, "step": 88768 }, { "epoch": 0.1573965472761461, "grad_norm": 0.41015625, "learning_rate": 0.0002256740863422496, "loss": 0.2056, "step": 88770 }, { "epoch": 0.15740009344145592, "grad_norm": 0.302734375, "learning_rate": 0.0002256599734535758, "loss": 0.1674, "step": 88772 }, { "epoch": 0.15740363960676573, "grad_norm": 0.5, "learning_rate": 0.00022564586438875723, "loss": 0.1622, "step": 88774 }, { "epoch": 0.15740718577207555, "grad_norm": 0.55078125, "learning_rate": 0.00022563175914785605, "loss": 0.2027, "step": 88776 }, { "epoch": 0.15741073193738536, "grad_norm": 0.55078125, "learning_rate": 0.0002256176577309334, "loss": 0.168, "step": 88778 }, { "epoch": 0.15741427810269518, "grad_norm": 0.86328125, "learning_rate": 0.00022560356013805123, "loss": 0.2344, "step": 88780 }, { "epoch": 0.157417824268005, "grad_norm": 0.35546875, "learning_rate": 0.0002255894663692713, "loss": 0.1725, "step": 88782 }, { "epoch": 0.1574213704333148, "grad_norm": 0.369140625, "learning_rate": 0.00022557537642465503, "loss": 0.1778, "step": 88784 }, { "epoch": 0.15742491659862462, "grad_norm": 0.392578125, "learning_rate": 0.00022556129030426423, "loss": 0.1747, "step": 88786 }, { "epoch": 0.15742846276393443, "grad_norm": 0.494140625, "learning_rate": 0.0002255472080081603, "loss": 0.1722, "step": 88788 }, { "epoch": 0.15743200892924425, "grad_norm": 0.318359375, "learning_rate": 0.00022553312953640502, "loss": 0.1377, "step": 88790 }, { "epoch": 0.15743555509455406, "grad_norm": 0.2080078125, "learning_rate": 0.00022551905488905983, "loss": 0.1578, "step": 88792 }, { "epoch": 0.15743910125986388, "grad_norm": 2.1875, "learning_rate": 0.00022550498406618635, "loss": 0.3427, "step": 88794 }, { "epoch": 0.1574426474251737, "grad_norm": 0.87109375, "learning_rate": 0.0002254909170678461, "loss": 0.1825, "step": 88796 }, { "epoch": 0.1574461935904835, "grad_norm": 0.85546875, "learning_rate": 0.0002254768538941006, "loss": 0.1559, "step": 88798 }, { "epoch": 0.15744973975579332, "grad_norm": 0.2392578125, "learning_rate": 0.00022546279454501133, "loss": 0.1997, "step": 88800 }, { "epoch": 0.15745328592110314, "grad_norm": 0.482421875, "learning_rate": 0.00022544873902063996, "loss": 0.1271, "step": 88802 }, { "epoch": 0.15745683208641295, "grad_norm": 0.5234375, "learning_rate": 0.00022543468732104765, "loss": 0.2171, "step": 88804 }, { "epoch": 0.15746037825172277, "grad_norm": 0.72265625, "learning_rate": 0.00022542063944629598, "loss": 0.1855, "step": 88806 }, { "epoch": 0.15746392441703258, "grad_norm": 0.66015625, "learning_rate": 0.00022540659539644665, "loss": 0.161, "step": 88808 }, { "epoch": 0.1574674705823424, "grad_norm": 2.0625, "learning_rate": 0.0002253925551715605, "loss": 0.1868, "step": 88810 }, { "epoch": 0.1574710167476522, "grad_norm": 1.875, "learning_rate": 0.0002253785187716996, "loss": 0.2362, "step": 88812 }, { "epoch": 0.15747456291296202, "grad_norm": 0.52734375, "learning_rate": 0.00022536448619692474, "loss": 0.1793, "step": 88814 }, { "epoch": 0.15747810907827184, "grad_norm": 0.5703125, "learning_rate": 0.00022535045744729787, "loss": 0.1815, "step": 88816 }, { "epoch": 0.15748165524358165, "grad_norm": 2.1875, "learning_rate": 0.00022533643252287984, "loss": 0.3284, "step": 88818 }, { "epoch": 0.15748520140889147, "grad_norm": 0.32421875, "learning_rate": 0.0002253224114237322, "loss": 0.1484, "step": 88820 }, { "epoch": 0.15748874757420128, "grad_norm": 0.94140625, "learning_rate": 0.00022530839414991636, "loss": 0.2539, "step": 88822 }, { "epoch": 0.1574922937395111, "grad_norm": 0.3203125, "learning_rate": 0.00022529438070149348, "loss": 0.1226, "step": 88824 }, { "epoch": 0.1574958399048209, "grad_norm": 0.29296875, "learning_rate": 0.00022528037107852485, "loss": 0.1854, "step": 88826 }, { "epoch": 0.15749938607013073, "grad_norm": 0.44921875, "learning_rate": 0.00022526636528107167, "loss": 0.1804, "step": 88828 }, { "epoch": 0.15750293223544054, "grad_norm": 0.55078125, "learning_rate": 0.00022525236330919553, "loss": 0.1508, "step": 88830 }, { "epoch": 0.15750647840075035, "grad_norm": 0.3828125, "learning_rate": 0.00022523836516295738, "loss": 0.2521, "step": 88832 }, { "epoch": 0.1575100245660602, "grad_norm": 0.2890625, "learning_rate": 0.00022522437084241838, "loss": 0.1542, "step": 88834 }, { "epoch": 0.15751357073137, "grad_norm": 0.443359375, "learning_rate": 0.00022521038034763988, "loss": 0.1345, "step": 88836 }, { "epoch": 0.15751711689667983, "grad_norm": 0.8515625, "learning_rate": 0.00022519639367868307, "loss": 0.2439, "step": 88838 }, { "epoch": 0.15752066306198964, "grad_norm": 0.3125, "learning_rate": 0.00022518241083560914, "loss": 0.1291, "step": 88840 }, { "epoch": 0.15752420922729946, "grad_norm": 0.4140625, "learning_rate": 0.0002251684318184792, "loss": 0.1901, "step": 88842 }, { "epoch": 0.15752775539260927, "grad_norm": 0.5234375, "learning_rate": 0.00022515445662735437, "loss": 0.1429, "step": 88844 }, { "epoch": 0.15753130155791908, "grad_norm": 0.1875, "learning_rate": 0.00022514048526229557, "loss": 0.1356, "step": 88846 }, { "epoch": 0.1575348477232289, "grad_norm": 0.50390625, "learning_rate": 0.00022512651772336434, "loss": 0.2182, "step": 88848 }, { "epoch": 0.1575383938885387, "grad_norm": 0.4140625, "learning_rate": 0.00022511255401062153, "loss": 0.1841, "step": 88850 }, { "epoch": 0.15754194005384853, "grad_norm": 1.1875, "learning_rate": 0.00022509859412412817, "loss": 0.1978, "step": 88852 }, { "epoch": 0.15754548621915834, "grad_norm": 0.294921875, "learning_rate": 0.00022508463806394526, "loss": 0.1982, "step": 88854 }, { "epoch": 0.15754903238446816, "grad_norm": 0.2294921875, "learning_rate": 0.00022507068583013396, "loss": 0.139, "step": 88856 }, { "epoch": 0.15755257854977797, "grad_norm": 0.349609375, "learning_rate": 0.00022505673742275535, "loss": 0.1869, "step": 88858 }, { "epoch": 0.1575561247150878, "grad_norm": 0.447265625, "learning_rate": 0.00022504279284187034, "loss": 0.2652, "step": 88860 }, { "epoch": 0.1575596708803976, "grad_norm": 0.46484375, "learning_rate": 0.00022502885208753993, "loss": 0.298, "step": 88862 }, { "epoch": 0.15756321704570742, "grad_norm": 0.63671875, "learning_rate": 0.00022501491515982495, "loss": 0.1683, "step": 88864 }, { "epoch": 0.15756676321101723, "grad_norm": 0.3125, "learning_rate": 0.0002250009820587866, "loss": 0.1598, "step": 88866 }, { "epoch": 0.15757030937632704, "grad_norm": 0.2578125, "learning_rate": 0.0002249870527844856, "loss": 0.184, "step": 88868 }, { "epoch": 0.15757385554163686, "grad_norm": 1.265625, "learning_rate": 0.0002249731273369831, "loss": 0.2587, "step": 88870 }, { "epoch": 0.15757740170694667, "grad_norm": 0.287109375, "learning_rate": 0.00022495920571633982, "loss": 0.1485, "step": 88872 }, { "epoch": 0.1575809478722565, "grad_norm": 0.232421875, "learning_rate": 0.00022494528792261662, "loss": 0.198, "step": 88874 }, { "epoch": 0.1575844940375663, "grad_norm": 0.73046875, "learning_rate": 0.00022493137395587453, "loss": 0.2094, "step": 88876 }, { "epoch": 0.15758804020287612, "grad_norm": 1.359375, "learning_rate": 0.00022491746381617415, "loss": 0.1925, "step": 88878 }, { "epoch": 0.15759158636818593, "grad_norm": 0.390625, "learning_rate": 0.0002249035575035766, "loss": 0.257, "step": 88880 }, { "epoch": 0.15759513253349575, "grad_norm": 0.56640625, "learning_rate": 0.00022488965501814252, "loss": 0.1838, "step": 88882 }, { "epoch": 0.15759867869880556, "grad_norm": 1.1875, "learning_rate": 0.00022487575635993291, "loss": 0.2138, "step": 88884 }, { "epoch": 0.15760222486411538, "grad_norm": 0.52734375, "learning_rate": 0.00022486186152900816, "loss": 0.2294, "step": 88886 }, { "epoch": 0.1576057710294252, "grad_norm": 0.458984375, "learning_rate": 0.00022484797052542944, "loss": 0.1287, "step": 88888 }, { "epoch": 0.157609317194735, "grad_norm": 0.56640625, "learning_rate": 0.00022483408334925733, "loss": 0.2313, "step": 88890 }, { "epoch": 0.15761286336004482, "grad_norm": 0.1806640625, "learning_rate": 0.00022482020000055256, "loss": 0.1612, "step": 88892 }, { "epoch": 0.15761640952535463, "grad_norm": 0.25, "learning_rate": 0.000224806320479376, "loss": 0.127, "step": 88894 }, { "epoch": 0.15761995569066445, "grad_norm": 1.4453125, "learning_rate": 0.00022479244478578798, "loss": 0.2362, "step": 88896 }, { "epoch": 0.15762350185597426, "grad_norm": 0.51171875, "learning_rate": 0.00022477857291984974, "loss": 0.2064, "step": 88898 }, { "epoch": 0.15762704802128408, "grad_norm": 0.30859375, "learning_rate": 0.00022476470488162135, "loss": 0.1998, "step": 88900 }, { "epoch": 0.1576305941865939, "grad_norm": 0.7109375, "learning_rate": 0.00022475084067116385, "loss": 0.1927, "step": 88902 }, { "epoch": 0.1576341403519037, "grad_norm": 0.376953125, "learning_rate": 0.00022473698028853764, "loss": 0.1784, "step": 88904 }, { "epoch": 0.15763768651721352, "grad_norm": 0.419921875, "learning_rate": 0.00022472312373380372, "loss": 0.1928, "step": 88906 }, { "epoch": 0.15764123268252334, "grad_norm": 0.2412109375, "learning_rate": 0.00022470927100702224, "loss": 0.2532, "step": 88908 }, { "epoch": 0.15764477884783315, "grad_norm": 1.015625, "learning_rate": 0.00022469542210825407, "loss": 0.1853, "step": 88910 }, { "epoch": 0.15764832501314296, "grad_norm": 0.67578125, "learning_rate": 0.00022468157703755967, "loss": 0.1653, "step": 88912 }, { "epoch": 0.15765187117845278, "grad_norm": 0.431640625, "learning_rate": 0.0002246677357949996, "loss": 0.1692, "step": 88914 }, { "epoch": 0.1576554173437626, "grad_norm": 0.32421875, "learning_rate": 0.00022465389838063443, "loss": 0.1636, "step": 88916 }, { "epoch": 0.1576589635090724, "grad_norm": 0.53515625, "learning_rate": 0.0002246400647945245, "loss": 0.1095, "step": 88918 }, { "epoch": 0.15766250967438222, "grad_norm": 0.44140625, "learning_rate": 0.00022462623503673065, "loss": 0.3506, "step": 88920 }, { "epoch": 0.15766605583969204, "grad_norm": 0.380859375, "learning_rate": 0.00022461240910731311, "loss": 0.1815, "step": 88922 }, { "epoch": 0.15766960200500188, "grad_norm": 0.4921875, "learning_rate": 0.00022459858700633236, "loss": 0.1774, "step": 88924 }, { "epoch": 0.1576731481703117, "grad_norm": 1.015625, "learning_rate": 0.00022458476873384888, "loss": 0.2051, "step": 88926 }, { "epoch": 0.1576766943356215, "grad_norm": 0.314453125, "learning_rate": 0.00022457095428992318, "loss": 0.1359, "step": 88928 }, { "epoch": 0.15768024050093132, "grad_norm": 0.2333984375, "learning_rate": 0.00022455714367461562, "loss": 0.1817, "step": 88930 }, { "epoch": 0.15768378666624114, "grad_norm": 0.83203125, "learning_rate": 0.00022454333688798654, "loss": 0.1554, "step": 88932 }, { "epoch": 0.15768733283155095, "grad_norm": 0.2060546875, "learning_rate": 0.00022452953393009648, "loss": 0.2785, "step": 88934 }, { "epoch": 0.15769087899686077, "grad_norm": 0.23828125, "learning_rate": 0.00022451573480100547, "loss": 0.1779, "step": 88936 }, { "epoch": 0.15769442516217058, "grad_norm": 0.734375, "learning_rate": 0.00022450193950077432, "loss": 0.2195, "step": 88938 }, { "epoch": 0.1576979713274804, "grad_norm": 0.6796875, "learning_rate": 0.00022448814802946315, "loss": 0.1616, "step": 88940 }, { "epoch": 0.1577015174927902, "grad_norm": 0.5390625, "learning_rate": 0.0002244743603871322, "loss": 0.2663, "step": 88942 }, { "epoch": 0.15770506365810003, "grad_norm": 0.46484375, "learning_rate": 0.00022446057657384176, "loss": 0.2122, "step": 88944 }, { "epoch": 0.15770860982340984, "grad_norm": 0.376953125, "learning_rate": 0.00022444679658965223, "loss": 0.1903, "step": 88946 }, { "epoch": 0.15771215598871965, "grad_norm": 0.33203125, "learning_rate": 0.00022443302043462392, "loss": 0.1601, "step": 88948 }, { "epoch": 0.15771570215402947, "grad_norm": 0.953125, "learning_rate": 0.00022441924810881692, "loss": 0.1647, "step": 88950 }, { "epoch": 0.15771924831933928, "grad_norm": 0.47265625, "learning_rate": 0.0002244054796122915, "loss": 0.2514, "step": 88952 }, { "epoch": 0.1577227944846491, "grad_norm": 0.38671875, "learning_rate": 0.00022439171494510786, "loss": 0.2461, "step": 88954 }, { "epoch": 0.1577263406499589, "grad_norm": 0.3515625, "learning_rate": 0.00022437795410732648, "loss": 0.1993, "step": 88956 }, { "epoch": 0.15772988681526873, "grad_norm": 0.546875, "learning_rate": 0.00022436419709900707, "loss": 0.1954, "step": 88958 }, { "epoch": 0.15773343298057854, "grad_norm": 0.2119140625, "learning_rate": 0.00022435044392021006, "loss": 0.2255, "step": 88960 }, { "epoch": 0.15773697914588836, "grad_norm": 1.1171875, "learning_rate": 0.00022433669457099558, "loss": 0.2848, "step": 88962 }, { "epoch": 0.15774052531119817, "grad_norm": 0.640625, "learning_rate": 0.00022432294905142393, "loss": 0.174, "step": 88964 }, { "epoch": 0.15774407147650799, "grad_norm": 0.80078125, "learning_rate": 0.00022430920736155483, "loss": 0.5296, "step": 88966 }, { "epoch": 0.1577476176418178, "grad_norm": 0.80859375, "learning_rate": 0.00022429546950144863, "loss": 0.3723, "step": 88968 }, { "epoch": 0.15775116380712761, "grad_norm": 1.0859375, "learning_rate": 0.00022428173547116547, "loss": 0.3311, "step": 88970 }, { "epoch": 0.15775470997243743, "grad_norm": 0.65625, "learning_rate": 0.00022426800527076523, "loss": 0.2522, "step": 88972 }, { "epoch": 0.15775825613774724, "grad_norm": 0.490234375, "learning_rate": 0.0002242542789003082, "loss": 0.1481, "step": 88974 }, { "epoch": 0.15776180230305706, "grad_norm": 0.451171875, "learning_rate": 0.00022424055635985403, "loss": 0.1714, "step": 88976 }, { "epoch": 0.15776534846836687, "grad_norm": 0.63671875, "learning_rate": 0.00022422683764946314, "loss": 0.162, "step": 88978 }, { "epoch": 0.1577688946336767, "grad_norm": 0.251953125, "learning_rate": 0.00022421312276919527, "loss": 0.1265, "step": 88980 }, { "epoch": 0.1577724407989865, "grad_norm": 0.52734375, "learning_rate": 0.00022419941171911056, "loss": 0.1996, "step": 88982 }, { "epoch": 0.15777598696429632, "grad_norm": 0.453125, "learning_rate": 0.0002241857044992688, "loss": 0.1629, "step": 88984 }, { "epoch": 0.15777953312960613, "grad_norm": 0.359375, "learning_rate": 0.0002241720011097301, "loss": 0.2286, "step": 88986 }, { "epoch": 0.15778307929491595, "grad_norm": 0.5, "learning_rate": 0.00022415830155055432, "loss": 0.2063, "step": 88988 }, { "epoch": 0.15778662546022576, "grad_norm": 1.984375, "learning_rate": 0.00022414460582180135, "loss": 0.3295, "step": 88990 }, { "epoch": 0.15779017162553557, "grad_norm": 0.32421875, "learning_rate": 0.00022413091392353118, "loss": 0.1488, "step": 88992 }, { "epoch": 0.1577937177908454, "grad_norm": 0.240234375, "learning_rate": 0.00022411722585580348, "loss": 0.1774, "step": 88994 }, { "epoch": 0.1577972639561552, "grad_norm": 0.26953125, "learning_rate": 0.00022410354161867847, "loss": 0.1976, "step": 88996 }, { "epoch": 0.15780081012146502, "grad_norm": 2.34375, "learning_rate": 0.0002240898612122156, "loss": 0.6944, "step": 88998 }, { "epoch": 0.15780435628677483, "grad_norm": 0.53515625, "learning_rate": 0.00022407618463647497, "loss": 0.1965, "step": 89000 }, { "epoch": 0.15780790245208465, "grad_norm": 6.96875, "learning_rate": 0.00022406251189151626, "loss": 0.183, "step": 89002 }, { "epoch": 0.15781144861739446, "grad_norm": 5.125, "learning_rate": 0.00022404884297739947, "loss": 0.2914, "step": 89004 }, { "epoch": 0.15781499478270428, "grad_norm": 0.31640625, "learning_rate": 0.0002240351778941841, "loss": 0.2472, "step": 89006 }, { "epoch": 0.1578185409480141, "grad_norm": 0.44140625, "learning_rate": 0.00022402151664193006, "loss": 0.1625, "step": 89008 }, { "epoch": 0.1578220871133239, "grad_norm": 1.34375, "learning_rate": 0.0002240078592206971, "loss": 0.1955, "step": 89010 }, { "epoch": 0.15782563327863372, "grad_norm": 0.30859375, "learning_rate": 0.00022399420563054483, "loss": 0.1481, "step": 89012 }, { "epoch": 0.15782917944394356, "grad_norm": 0.494140625, "learning_rate": 0.00022398055587153324, "loss": 0.2302, "step": 89014 }, { "epoch": 0.15783272560925338, "grad_norm": 0.216796875, "learning_rate": 0.00022396690994372172, "loss": 0.1645, "step": 89016 }, { "epoch": 0.1578362717745632, "grad_norm": 0.2734375, "learning_rate": 0.0002239532678471701, "loss": 0.1807, "step": 89018 }, { "epoch": 0.157839817939873, "grad_norm": 0.546875, "learning_rate": 0.00022393962958193795, "loss": 0.2582, "step": 89020 }, { "epoch": 0.15784336410518282, "grad_norm": 0.234375, "learning_rate": 0.0002239259951480852, "loss": 0.1708, "step": 89022 }, { "epoch": 0.15784691027049264, "grad_norm": 0.451171875, "learning_rate": 0.00022391236454567104, "loss": 0.1287, "step": 89024 }, { "epoch": 0.15785045643580245, "grad_norm": 0.236328125, "learning_rate": 0.0002238987377747554, "loss": 0.1217, "step": 89026 }, { "epoch": 0.15785400260111226, "grad_norm": 3.0, "learning_rate": 0.0002238851148353978, "loss": 0.2124, "step": 89028 }, { "epoch": 0.15785754876642208, "grad_norm": 1.09375, "learning_rate": 0.00022387149572765776, "loss": 0.2554, "step": 89030 }, { "epoch": 0.1578610949317319, "grad_norm": 0.486328125, "learning_rate": 0.00022385788045159485, "loss": 0.1894, "step": 89032 }, { "epoch": 0.1578646410970417, "grad_norm": 0.35546875, "learning_rate": 0.0002238442690072687, "loss": 0.2082, "step": 89034 }, { "epoch": 0.15786818726235152, "grad_norm": 0.5625, "learning_rate": 0.00022383066139473873, "loss": 0.1328, "step": 89036 }, { "epoch": 0.15787173342766134, "grad_norm": 0.39453125, "learning_rate": 0.0002238170576140646, "loss": 0.1906, "step": 89038 }, { "epoch": 0.15787527959297115, "grad_norm": 0.291015625, "learning_rate": 0.00022380345766530562, "loss": 0.1244, "step": 89040 }, { "epoch": 0.15787882575828097, "grad_norm": 0.66796875, "learning_rate": 0.00022378986154852142, "loss": 0.1606, "step": 89042 }, { "epoch": 0.15788237192359078, "grad_norm": 0.3125, "learning_rate": 0.00022377626926377138, "loss": 0.1655, "step": 89044 }, { "epoch": 0.1578859180889006, "grad_norm": 0.71484375, "learning_rate": 0.000223762680811115, "loss": 0.1862, "step": 89046 }, { "epoch": 0.1578894642542104, "grad_norm": 0.890625, "learning_rate": 0.0002237490961906116, "loss": 0.2935, "step": 89048 }, { "epoch": 0.15789301041952022, "grad_norm": 0.498046875, "learning_rate": 0.00022373551540232076, "loss": 0.194, "step": 89050 }, { "epoch": 0.15789655658483004, "grad_norm": 0.4609375, "learning_rate": 0.00022372193844630162, "loss": 0.1954, "step": 89052 }, { "epoch": 0.15790010275013985, "grad_norm": 0.478515625, "learning_rate": 0.0002237083653226139, "loss": 0.2901, "step": 89054 }, { "epoch": 0.15790364891544967, "grad_norm": 0.369140625, "learning_rate": 0.00022369479603131674, "loss": 0.1927, "step": 89056 }, { "epoch": 0.15790719508075948, "grad_norm": 0.26953125, "learning_rate": 0.0002236812305724695, "loss": 0.1654, "step": 89058 }, { "epoch": 0.1579107412460693, "grad_norm": 0.35546875, "learning_rate": 0.00022366766894613146, "loss": 0.1681, "step": 89060 }, { "epoch": 0.1579142874113791, "grad_norm": 0.314453125, "learning_rate": 0.0002236541111523621, "loss": 0.2266, "step": 89062 }, { "epoch": 0.15791783357668893, "grad_norm": 0.63671875, "learning_rate": 0.00022364055719122056, "loss": 0.1689, "step": 89064 }, { "epoch": 0.15792137974199874, "grad_norm": 1.0234375, "learning_rate": 0.0002236270070627662, "loss": 0.1967, "step": 89066 }, { "epoch": 0.15792492590730856, "grad_norm": 0.67578125, "learning_rate": 0.00022361346076705824, "loss": 0.167, "step": 89068 }, { "epoch": 0.15792847207261837, "grad_norm": 0.294921875, "learning_rate": 0.00022359991830415597, "loss": 0.2198, "step": 89070 }, { "epoch": 0.15793201823792818, "grad_norm": 0.26171875, "learning_rate": 0.0002235863796741186, "loss": 0.1692, "step": 89072 }, { "epoch": 0.157935564403238, "grad_norm": 0.212890625, "learning_rate": 0.00022357284487700514, "loss": 0.1578, "step": 89074 }, { "epoch": 0.1579391105685478, "grad_norm": 0.3828125, "learning_rate": 0.00022355931391287518, "loss": 0.1511, "step": 89076 }, { "epoch": 0.15794265673385763, "grad_norm": 0.341796875, "learning_rate": 0.0002235457867817876, "loss": 0.1237, "step": 89078 }, { "epoch": 0.15794620289916744, "grad_norm": 0.326171875, "learning_rate": 0.00022353226348380167, "loss": 0.1636, "step": 89080 }, { "epoch": 0.15794974906447726, "grad_norm": 0.71875, "learning_rate": 0.00022351874401897653, "loss": 0.1986, "step": 89082 }, { "epoch": 0.15795329522978707, "grad_norm": 0.765625, "learning_rate": 0.00022350522838737114, "loss": 0.304, "step": 89084 }, { "epoch": 0.1579568413950969, "grad_norm": 0.19140625, "learning_rate": 0.00022349171658904494, "loss": 0.1865, "step": 89086 }, { "epoch": 0.1579603875604067, "grad_norm": 0.53125, "learning_rate": 0.0002234782086240567, "loss": 0.1511, "step": 89088 }, { "epoch": 0.15796393372571652, "grad_norm": 0.3671875, "learning_rate": 0.00022346470449246573, "loss": 0.1292, "step": 89090 }, { "epoch": 0.15796747989102633, "grad_norm": 0.33984375, "learning_rate": 0.00022345120419433082, "loss": 0.1388, "step": 89092 }, { "epoch": 0.15797102605633614, "grad_norm": 0.1953125, "learning_rate": 0.0002234377077297113, "loss": 0.1442, "step": 89094 }, { "epoch": 0.15797457222164596, "grad_norm": 0.1640625, "learning_rate": 0.00022342421509866618, "loss": 0.1703, "step": 89096 }, { "epoch": 0.15797811838695577, "grad_norm": 0.73046875, "learning_rate": 0.00022341072630125425, "loss": 0.2273, "step": 89098 }, { "epoch": 0.1579816645522656, "grad_norm": 0.2490234375, "learning_rate": 0.00022339724133753466, "loss": 0.132, "step": 89100 }, { "epoch": 0.1579852107175754, "grad_norm": 0.796875, "learning_rate": 0.00022338376020756623, "loss": 0.1842, "step": 89102 }, { "epoch": 0.15798875688288522, "grad_norm": 0.55859375, "learning_rate": 0.00022337028291140825, "loss": 0.242, "step": 89104 }, { "epoch": 0.15799230304819506, "grad_norm": 0.3359375, "learning_rate": 0.0002233568094491193, "loss": 0.2534, "step": 89106 }, { "epoch": 0.15799584921350487, "grad_norm": 0.412109375, "learning_rate": 0.0002233433398207585, "loss": 0.2116, "step": 89108 }, { "epoch": 0.1579993953788147, "grad_norm": 0.494140625, "learning_rate": 0.00022332987402638455, "loss": 0.1704, "step": 89110 }, { "epoch": 0.1580029415441245, "grad_norm": 0.259765625, "learning_rate": 0.00022331641206605677, "loss": 0.3444, "step": 89112 }, { "epoch": 0.15800648770943432, "grad_norm": 0.4921875, "learning_rate": 0.00022330295393983352, "loss": 0.1878, "step": 89114 }, { "epoch": 0.15801003387474413, "grad_norm": 0.330078125, "learning_rate": 0.00022328949964777405, "loss": 0.2403, "step": 89116 }, { "epoch": 0.15801358004005395, "grad_norm": 0.41015625, "learning_rate": 0.00022327604918993704, "loss": 0.1715, "step": 89118 }, { "epoch": 0.15801712620536376, "grad_norm": 0.41796875, "learning_rate": 0.00022326260256638132, "loss": 0.2154, "step": 89120 }, { "epoch": 0.15802067237067358, "grad_norm": 0.447265625, "learning_rate": 0.00022324915977716574, "loss": 0.1976, "step": 89122 }, { "epoch": 0.1580242185359834, "grad_norm": 0.306640625, "learning_rate": 0.00022323572082234887, "loss": 0.1519, "step": 89124 }, { "epoch": 0.1580277647012932, "grad_norm": 0.453125, "learning_rate": 0.00022322228570198982, "loss": 0.1865, "step": 89126 }, { "epoch": 0.15803131086660302, "grad_norm": 0.66796875, "learning_rate": 0.0002232088544161472, "loss": 0.1986, "step": 89128 }, { "epoch": 0.15803485703191283, "grad_norm": 1.2265625, "learning_rate": 0.00022319542696487985, "loss": 0.1832, "step": 89130 }, { "epoch": 0.15803840319722265, "grad_norm": 0.6640625, "learning_rate": 0.00022318200334824618, "loss": 0.1828, "step": 89132 }, { "epoch": 0.15804194936253246, "grad_norm": 0.515625, "learning_rate": 0.0002231685835663052, "loss": 0.194, "step": 89134 }, { "epoch": 0.15804549552784228, "grad_norm": 0.8984375, "learning_rate": 0.00022315516761911553, "loss": 0.156, "step": 89136 }, { "epoch": 0.1580490416931521, "grad_norm": 0.76171875, "learning_rate": 0.00022314175550673592, "loss": 0.2096, "step": 89138 }, { "epoch": 0.1580525878584619, "grad_norm": 0.59765625, "learning_rate": 0.00022312834722922482, "loss": 0.1811, "step": 89140 }, { "epoch": 0.15805613402377172, "grad_norm": 0.29296875, "learning_rate": 0.00022311494278664092, "loss": 0.2117, "step": 89142 }, { "epoch": 0.15805968018908154, "grad_norm": 0.279296875, "learning_rate": 0.00022310154217904305, "loss": 0.1889, "step": 89144 }, { "epoch": 0.15806322635439135, "grad_norm": 0.59375, "learning_rate": 0.00022308814540648953, "loss": 0.2149, "step": 89146 }, { "epoch": 0.15806677251970117, "grad_norm": 2.15625, "learning_rate": 0.0002230747524690392, "loss": 0.3449, "step": 89148 }, { "epoch": 0.15807031868501098, "grad_norm": 0.462890625, "learning_rate": 0.0002230613633667504, "loss": 0.1866, "step": 89150 }, { "epoch": 0.1580738648503208, "grad_norm": 1.890625, "learning_rate": 0.00022304797809968196, "loss": 0.3292, "step": 89152 }, { "epoch": 0.1580774110156306, "grad_norm": 0.4375, "learning_rate": 0.00022303459666789214, "loss": 0.1871, "step": 89154 }, { "epoch": 0.15808095718094042, "grad_norm": 0.6015625, "learning_rate": 0.00022302121907143973, "loss": 0.212, "step": 89156 }, { "epoch": 0.15808450334625024, "grad_norm": 1.40625, "learning_rate": 0.00022300784531038305, "loss": 0.1667, "step": 89158 }, { "epoch": 0.15808804951156005, "grad_norm": 0.310546875, "learning_rate": 0.00022299447538478063, "loss": 0.1622, "step": 89160 }, { "epoch": 0.15809159567686987, "grad_norm": 0.3515625, "learning_rate": 0.0002229811092946909, "loss": 0.1657, "step": 89162 }, { "epoch": 0.15809514184217968, "grad_norm": 0.671875, "learning_rate": 0.0002229677470401723, "loss": 0.2329, "step": 89164 }, { "epoch": 0.1580986880074895, "grad_norm": 0.2353515625, "learning_rate": 0.0002229543886212835, "loss": 0.1643, "step": 89166 }, { "epoch": 0.1581022341727993, "grad_norm": 0.27734375, "learning_rate": 0.0002229410340380827, "loss": 0.2165, "step": 89168 }, { "epoch": 0.15810578033810913, "grad_norm": 0.5703125, "learning_rate": 0.00022292768329062833, "loss": 0.1766, "step": 89170 }, { "epoch": 0.15810932650341894, "grad_norm": 0.28515625, "learning_rate": 0.00022291433637897882, "loss": 0.168, "step": 89172 }, { "epoch": 0.15811287266872875, "grad_norm": 0.8125, "learning_rate": 0.00022290099330319254, "loss": 0.2165, "step": 89174 }, { "epoch": 0.15811641883403857, "grad_norm": 0.2197265625, "learning_rate": 0.00022288765406332779, "loss": 0.155, "step": 89176 }, { "epoch": 0.15811996499934838, "grad_norm": 0.17578125, "learning_rate": 0.000222874318659443, "loss": 0.1874, "step": 89178 }, { "epoch": 0.1581235111646582, "grad_norm": 0.86328125, "learning_rate": 0.00022286098709159647, "loss": 0.3195, "step": 89180 }, { "epoch": 0.158127057329968, "grad_norm": 0.2216796875, "learning_rate": 0.00022284765935984643, "loss": 0.1203, "step": 89182 }, { "epoch": 0.15813060349527783, "grad_norm": 0.400390625, "learning_rate": 0.0002228343354642512, "loss": 0.1669, "step": 89184 }, { "epoch": 0.15813414966058764, "grad_norm": 0.51953125, "learning_rate": 0.00022282101540486914, "loss": 0.2876, "step": 89186 }, { "epoch": 0.15813769582589746, "grad_norm": 1.5234375, "learning_rate": 0.00022280769918175844, "loss": 0.2651, "step": 89188 }, { "epoch": 0.15814124199120727, "grad_norm": 0.6328125, "learning_rate": 0.00022279438679497721, "loss": 0.1961, "step": 89190 }, { "epoch": 0.15814478815651709, "grad_norm": 0.53125, "learning_rate": 0.00022278107824458387, "loss": 0.1802, "step": 89192 }, { "epoch": 0.1581483343218269, "grad_norm": 1.359375, "learning_rate": 0.00022276777353063653, "loss": 0.1551, "step": 89194 }, { "epoch": 0.15815188048713674, "grad_norm": 0.69921875, "learning_rate": 0.0002227544726531935, "loss": 0.2552, "step": 89196 }, { "epoch": 0.15815542665244656, "grad_norm": 1.390625, "learning_rate": 0.0002227411756123127, "loss": 0.2646, "step": 89198 }, { "epoch": 0.15815897281775637, "grad_norm": 0.56640625, "learning_rate": 0.00022272788240805245, "loss": 0.2577, "step": 89200 }, { "epoch": 0.15816251898306619, "grad_norm": 0.466796875, "learning_rate": 0.00022271459304047097, "loss": 0.1832, "step": 89202 }, { "epoch": 0.158166065148376, "grad_norm": 0.349609375, "learning_rate": 0.00022270130750962612, "loss": 0.199, "step": 89204 }, { "epoch": 0.15816961131368582, "grad_norm": 1.421875, "learning_rate": 0.00022268802581557623, "loss": 0.4085, "step": 89206 }, { "epoch": 0.15817315747899563, "grad_norm": 0.41796875, "learning_rate": 0.0002226747479583793, "loss": 0.2463, "step": 89208 }, { "epoch": 0.15817670364430544, "grad_norm": 0.53515625, "learning_rate": 0.00022266147393809347, "loss": 0.1802, "step": 89210 }, { "epoch": 0.15818024980961526, "grad_norm": 1.5, "learning_rate": 0.00022264820375477657, "loss": 0.199, "step": 89212 }, { "epoch": 0.15818379597492507, "grad_norm": 0.275390625, "learning_rate": 0.00022263493740848694, "loss": 0.2208, "step": 89214 }, { "epoch": 0.1581873421402349, "grad_norm": 0.3515625, "learning_rate": 0.0002226216748992824, "loss": 0.1431, "step": 89216 }, { "epoch": 0.1581908883055447, "grad_norm": 0.26171875, "learning_rate": 0.00022260841622722102, "loss": 0.3447, "step": 89218 }, { "epoch": 0.15819443447085452, "grad_norm": 0.357421875, "learning_rate": 0.00022259516139236084, "loss": 0.1925, "step": 89220 }, { "epoch": 0.15819798063616433, "grad_norm": 0.64453125, "learning_rate": 0.00022258191039475965, "loss": 0.2334, "step": 89222 }, { "epoch": 0.15820152680147415, "grad_norm": 0.58203125, "learning_rate": 0.00022256866323447561, "loss": 0.1766, "step": 89224 }, { "epoch": 0.15820507296678396, "grad_norm": 0.33203125, "learning_rate": 0.0002225554199115666, "loss": 0.1667, "step": 89226 }, { "epoch": 0.15820861913209378, "grad_norm": 0.404296875, "learning_rate": 0.0002225421804260905, "loss": 0.2016, "step": 89228 }, { "epoch": 0.1582121652974036, "grad_norm": 3.109375, "learning_rate": 0.00022252894477810516, "loss": 0.2095, "step": 89230 }, { "epoch": 0.1582157114627134, "grad_norm": 6.21875, "learning_rate": 0.00022251571296766842, "loss": 0.2051, "step": 89232 }, { "epoch": 0.15821925762802322, "grad_norm": 0.302734375, "learning_rate": 0.00022250248499483844, "loss": 0.1355, "step": 89234 }, { "epoch": 0.15822280379333303, "grad_norm": 0.33984375, "learning_rate": 0.00022248926085967273, "loss": 0.1619, "step": 89236 }, { "epoch": 0.15822634995864285, "grad_norm": 0.95703125, "learning_rate": 0.00022247604056222937, "loss": 0.2626, "step": 89238 }, { "epoch": 0.15822989612395266, "grad_norm": 0.494140625, "learning_rate": 0.00022246282410256596, "loss": 0.2273, "step": 89240 }, { "epoch": 0.15823344228926248, "grad_norm": 0.56640625, "learning_rate": 0.00022244961148074063, "loss": 0.1486, "step": 89242 }, { "epoch": 0.1582369884545723, "grad_norm": 0.2080078125, "learning_rate": 0.0002224364026968107, "loss": 0.1484, "step": 89244 }, { "epoch": 0.1582405346198821, "grad_norm": 0.265625, "learning_rate": 0.00022242319775083445, "loss": 0.1921, "step": 89246 }, { "epoch": 0.15824408078519192, "grad_norm": 0.2109375, "learning_rate": 0.00022240999664286927, "loss": 0.1332, "step": 89248 }, { "epoch": 0.15824762695050174, "grad_norm": 0.4765625, "learning_rate": 0.00022239679937297296, "loss": 0.135, "step": 89250 }, { "epoch": 0.15825117311581155, "grad_norm": 0.25390625, "learning_rate": 0.00022238360594120326, "loss": 0.2696, "step": 89252 }, { "epoch": 0.15825471928112136, "grad_norm": 0.427734375, "learning_rate": 0.00022237041634761795, "loss": 0.1419, "step": 89254 }, { "epoch": 0.15825826544643118, "grad_norm": 0.2734375, "learning_rate": 0.00022235723059227467, "loss": 0.2131, "step": 89256 }, { "epoch": 0.158261811611741, "grad_norm": 0.283203125, "learning_rate": 0.00022234404867523094, "loss": 0.1772, "step": 89258 }, { "epoch": 0.1582653577770508, "grad_norm": 0.1806640625, "learning_rate": 0.00022233087059654466, "loss": 0.1743, "step": 89260 }, { "epoch": 0.15826890394236062, "grad_norm": 0.6015625, "learning_rate": 0.00022231769635627327, "loss": 0.1954, "step": 89262 }, { "epoch": 0.15827245010767044, "grad_norm": 0.490234375, "learning_rate": 0.00022230452595447454, "loss": 0.1439, "step": 89264 }, { "epoch": 0.15827599627298025, "grad_norm": 0.3125, "learning_rate": 0.00022229135939120595, "loss": 0.1366, "step": 89266 }, { "epoch": 0.15827954243829007, "grad_norm": 0.29296875, "learning_rate": 0.00022227819666652515, "loss": 0.1434, "step": 89268 }, { "epoch": 0.15828308860359988, "grad_norm": 0.75, "learning_rate": 0.00022226503778048962, "loss": 0.1875, "step": 89270 }, { "epoch": 0.1582866347689097, "grad_norm": 0.416015625, "learning_rate": 0.00022225188273315697, "loss": 0.1851, "step": 89272 }, { "epoch": 0.1582901809342195, "grad_norm": 0.37890625, "learning_rate": 0.00022223873152458473, "loss": 0.2144, "step": 89274 }, { "epoch": 0.15829372709952932, "grad_norm": 0.2734375, "learning_rate": 0.00022222558415483043, "loss": 0.1617, "step": 89276 }, { "epoch": 0.15829727326483914, "grad_norm": 0.25, "learning_rate": 0.00022221244062395158, "loss": 0.173, "step": 89278 }, { "epoch": 0.15830081943014895, "grad_norm": 0.208984375, "learning_rate": 0.00022219930093200557, "loss": 0.1482, "step": 89280 }, { "epoch": 0.15830436559545877, "grad_norm": 0.439453125, "learning_rate": 0.00022218616507905002, "loss": 0.1311, "step": 89282 }, { "epoch": 0.15830791176076858, "grad_norm": 0.484375, "learning_rate": 0.00022217303306514222, "loss": 0.1621, "step": 89284 }, { "epoch": 0.1583114579260784, "grad_norm": 0.443359375, "learning_rate": 0.00022215990489033965, "loss": 0.184, "step": 89286 }, { "epoch": 0.15831500409138824, "grad_norm": 0.14453125, "learning_rate": 0.00022214678055469978, "loss": 0.1357, "step": 89288 }, { "epoch": 0.15831855025669805, "grad_norm": 0.37890625, "learning_rate": 0.00022213366005827996, "loss": 0.1869, "step": 89290 }, { "epoch": 0.15832209642200787, "grad_norm": 0.318359375, "learning_rate": 0.00022212054340113775, "loss": 0.1682, "step": 89292 }, { "epoch": 0.15832564258731768, "grad_norm": 0.28515625, "learning_rate": 0.00022210743058333004, "loss": 0.1443, "step": 89294 }, { "epoch": 0.1583291887526275, "grad_norm": 0.5703125, "learning_rate": 0.00022209432160491467, "loss": 0.2264, "step": 89296 }, { "epoch": 0.1583327349179373, "grad_norm": 1.6953125, "learning_rate": 0.00022208121646594877, "loss": 0.1851, "step": 89298 }, { "epoch": 0.15833628108324713, "grad_norm": 0.34375, "learning_rate": 0.00022206811516648975, "loss": 0.1837, "step": 89300 }, { "epoch": 0.15833982724855694, "grad_norm": 0.349609375, "learning_rate": 0.0002220550177065947, "loss": 0.1474, "step": 89302 }, { "epoch": 0.15834337341386676, "grad_norm": 0.205078125, "learning_rate": 0.0002220419240863211, "loss": 0.1597, "step": 89304 }, { "epoch": 0.15834691957917657, "grad_norm": 0.53125, "learning_rate": 0.00022202883430572616, "loss": 0.1495, "step": 89306 }, { "epoch": 0.15835046574448639, "grad_norm": 0.2236328125, "learning_rate": 0.00022201574836486703, "loss": 0.1719, "step": 89308 }, { "epoch": 0.1583540119097962, "grad_norm": 0.419921875, "learning_rate": 0.0002220026662638011, "loss": 0.1782, "step": 89310 }, { "epoch": 0.15835755807510601, "grad_norm": 0.482421875, "learning_rate": 0.0002219895880025854, "loss": 0.2313, "step": 89312 }, { "epoch": 0.15836110424041583, "grad_norm": 0.44921875, "learning_rate": 0.00022197651358127742, "loss": 0.1582, "step": 89314 }, { "epoch": 0.15836465040572564, "grad_norm": 0.3203125, "learning_rate": 0.00022196344299993403, "loss": 0.1676, "step": 89316 }, { "epoch": 0.15836819657103546, "grad_norm": 0.80078125, "learning_rate": 0.0002219503762586125, "loss": 0.2022, "step": 89318 }, { "epoch": 0.15837174273634527, "grad_norm": 0.25, "learning_rate": 0.00022193731335736999, "loss": 0.2064, "step": 89320 }, { "epoch": 0.1583752889016551, "grad_norm": 0.30078125, "learning_rate": 0.0002219242542962637, "loss": 0.1173, "step": 89322 }, { "epoch": 0.1583788350669649, "grad_norm": 0.296875, "learning_rate": 0.00022191119907535067, "loss": 0.1992, "step": 89324 }, { "epoch": 0.15838238123227472, "grad_norm": 0.3515625, "learning_rate": 0.00022189814769468796, "loss": 0.1921, "step": 89326 }, { "epoch": 0.15838592739758453, "grad_norm": 0.26171875, "learning_rate": 0.00022188510015433272, "loss": 0.1459, "step": 89328 }, { "epoch": 0.15838947356289435, "grad_norm": 1.0390625, "learning_rate": 0.00022187205645434193, "loss": 0.1597, "step": 89330 }, { "epoch": 0.15839301972820416, "grad_norm": 0.2412109375, "learning_rate": 0.0002218590165947728, "loss": 0.1942, "step": 89332 }, { "epoch": 0.15839656589351397, "grad_norm": 1.1171875, "learning_rate": 0.0002218459805756821, "loss": 0.2732, "step": 89334 }, { "epoch": 0.1584001120588238, "grad_norm": 2.859375, "learning_rate": 0.0002218329483971271, "loss": 0.2185, "step": 89336 }, { "epoch": 0.1584036582241336, "grad_norm": 1.65625, "learning_rate": 0.00022181992005916451, "loss": 0.1764, "step": 89338 }, { "epoch": 0.15840720438944342, "grad_norm": 0.6640625, "learning_rate": 0.00022180689556185177, "loss": 0.1694, "step": 89340 }, { "epoch": 0.15841075055475323, "grad_norm": 0.5703125, "learning_rate": 0.00022179387490524525, "loss": 0.1634, "step": 89342 }, { "epoch": 0.15841429672006305, "grad_norm": 0.8359375, "learning_rate": 0.00022178085808940247, "loss": 0.1718, "step": 89344 }, { "epoch": 0.15841784288537286, "grad_norm": 0.392578125, "learning_rate": 0.00022176784511437992, "loss": 0.1798, "step": 89346 }, { "epoch": 0.15842138905068268, "grad_norm": 0.296875, "learning_rate": 0.0002217548359802347, "loss": 0.2153, "step": 89348 }, { "epoch": 0.1584249352159925, "grad_norm": 0.65625, "learning_rate": 0.00022174183068702371, "loss": 0.2451, "step": 89350 }, { "epoch": 0.1584284813813023, "grad_norm": 3.703125, "learning_rate": 0.00022172882923480382, "loss": 0.4173, "step": 89352 }, { "epoch": 0.15843202754661212, "grad_norm": 0.376953125, "learning_rate": 0.00022171583162363173, "loss": 0.1437, "step": 89354 }, { "epoch": 0.15843557371192193, "grad_norm": 1.609375, "learning_rate": 0.00022170283785356455, "loss": 0.1429, "step": 89356 }, { "epoch": 0.15843911987723175, "grad_norm": 0.45703125, "learning_rate": 0.00022168984792465902, "loss": 0.1544, "step": 89358 }, { "epoch": 0.15844266604254156, "grad_norm": 0.494140625, "learning_rate": 0.00022167686183697182, "loss": 0.157, "step": 89360 }, { "epoch": 0.15844621220785138, "grad_norm": 1.0625, "learning_rate": 0.00022166387959055996, "loss": 0.1785, "step": 89362 }, { "epoch": 0.1584497583731612, "grad_norm": 0.263671875, "learning_rate": 0.00022165090118547989, "loss": 0.1926, "step": 89364 }, { "epoch": 0.158453304538471, "grad_norm": 0.640625, "learning_rate": 0.00022163792662178877, "loss": 0.231, "step": 89366 }, { "epoch": 0.15845685070378082, "grad_norm": 0.4296875, "learning_rate": 0.00022162495589954313, "loss": 0.1373, "step": 89368 }, { "epoch": 0.15846039686909064, "grad_norm": 0.74609375, "learning_rate": 0.00022161198901879956, "loss": 0.294, "step": 89370 }, { "epoch": 0.15846394303440045, "grad_norm": 0.98828125, "learning_rate": 0.00022159902597961497, "loss": 0.2094, "step": 89372 }, { "epoch": 0.15846748919971027, "grad_norm": 0.423828125, "learning_rate": 0.00022158606678204606, "loss": 0.1992, "step": 89374 }, { "epoch": 0.15847103536502008, "grad_norm": 0.33203125, "learning_rate": 0.00022157311142614948, "loss": 0.1552, "step": 89376 }, { "epoch": 0.15847458153032992, "grad_norm": 0.2109375, "learning_rate": 0.00022156015991198173, "loss": 0.1085, "step": 89378 }, { "epoch": 0.15847812769563974, "grad_norm": 0.2490234375, "learning_rate": 0.00022154721223959967, "loss": 0.1394, "step": 89380 }, { "epoch": 0.15848167386094955, "grad_norm": 1.640625, "learning_rate": 0.00022153426840905988, "loss": 0.221, "step": 89382 }, { "epoch": 0.15848522002625937, "grad_norm": 0.380859375, "learning_rate": 0.0002215213284204189, "loss": 0.1579, "step": 89384 }, { "epoch": 0.15848876619156918, "grad_norm": 1.484375, "learning_rate": 0.0002215083922737333, "loss": 0.3119, "step": 89386 }, { "epoch": 0.158492312356879, "grad_norm": 0.5078125, "learning_rate": 0.0002214954599690597, "loss": 0.1494, "step": 89388 }, { "epoch": 0.1584958585221888, "grad_norm": 0.984375, "learning_rate": 0.00022148253150645482, "loss": 0.1488, "step": 89390 }, { "epoch": 0.15849940468749862, "grad_norm": 0.3203125, "learning_rate": 0.00022146960688597476, "loss": 0.1763, "step": 89392 }, { "epoch": 0.15850295085280844, "grad_norm": 0.2412109375, "learning_rate": 0.00022145668610767656, "loss": 0.1586, "step": 89394 }, { "epoch": 0.15850649701811825, "grad_norm": 1.0234375, "learning_rate": 0.00022144376917161641, "loss": 0.2075, "step": 89396 }, { "epoch": 0.15851004318342807, "grad_norm": 0.54296875, "learning_rate": 0.00022143085607785094, "loss": 0.2175, "step": 89398 }, { "epoch": 0.15851358934873788, "grad_norm": 0.5234375, "learning_rate": 0.0002214179468264365, "loss": 0.2119, "step": 89400 }, { "epoch": 0.1585171355140477, "grad_norm": 0.328125, "learning_rate": 0.00022140504141742964, "loss": 0.151, "step": 89402 }, { "epoch": 0.1585206816793575, "grad_norm": 0.4609375, "learning_rate": 0.00022139213985088675, "loss": 0.1523, "step": 89404 }, { "epoch": 0.15852422784466733, "grad_norm": 0.59375, "learning_rate": 0.00022137924212686433, "loss": 0.2242, "step": 89406 }, { "epoch": 0.15852777400997714, "grad_norm": 0.263671875, "learning_rate": 0.0002213663482454188, "loss": 0.157, "step": 89408 }, { "epoch": 0.15853132017528695, "grad_norm": 0.52734375, "learning_rate": 0.00022135345820660637, "loss": 0.1898, "step": 89410 }, { "epoch": 0.15853486634059677, "grad_norm": 0.578125, "learning_rate": 0.0002213405720104836, "loss": 0.1591, "step": 89412 }, { "epoch": 0.15853841250590658, "grad_norm": 0.21875, "learning_rate": 0.00022132768965710676, "loss": 0.1227, "step": 89414 }, { "epoch": 0.1585419586712164, "grad_norm": 0.498046875, "learning_rate": 0.0002213148111465322, "loss": 0.1492, "step": 89416 }, { "epoch": 0.1585455048365262, "grad_norm": 0.515625, "learning_rate": 0.00022130193647881637, "loss": 0.1502, "step": 89418 }, { "epoch": 0.15854905100183603, "grad_norm": 1.25, "learning_rate": 0.0002212890656540152, "loss": 0.2212, "step": 89420 }, { "epoch": 0.15855259716714584, "grad_norm": 0.423828125, "learning_rate": 0.00022127619867218562, "loss": 0.2882, "step": 89422 }, { "epoch": 0.15855614333245566, "grad_norm": 0.96875, "learning_rate": 0.00022126333553338322, "loss": 0.2078, "step": 89424 }, { "epoch": 0.15855968949776547, "grad_norm": 0.3984375, "learning_rate": 0.00022125047623766463, "loss": 0.1708, "step": 89426 }, { "epoch": 0.15856323566307529, "grad_norm": 0.51171875, "learning_rate": 0.00022123762078508598, "loss": 0.1886, "step": 89428 }, { "epoch": 0.1585667818283851, "grad_norm": 0.8984375, "learning_rate": 0.0002212247691757036, "loss": 0.2319, "step": 89430 }, { "epoch": 0.15857032799369492, "grad_norm": 0.65625, "learning_rate": 0.00022121192140957363, "loss": 0.203, "step": 89432 }, { "epoch": 0.15857387415900473, "grad_norm": 5.6875, "learning_rate": 0.00022119907748675233, "loss": 0.2604, "step": 89434 }, { "epoch": 0.15857742032431454, "grad_norm": 0.39453125, "learning_rate": 0.00022118623740729573, "loss": 0.1392, "step": 89436 }, { "epoch": 0.15858096648962436, "grad_norm": 0.8984375, "learning_rate": 0.00022117340117125994, "loss": 0.1961, "step": 89438 }, { "epoch": 0.15858451265493417, "grad_norm": 0.37109375, "learning_rate": 0.0002211605687787014, "loss": 0.1682, "step": 89440 }, { "epoch": 0.158588058820244, "grad_norm": 0.6640625, "learning_rate": 0.00022114774022967583, "loss": 0.15, "step": 89442 }, { "epoch": 0.1585916049855538, "grad_norm": 0.34375, "learning_rate": 0.0002211349155242397, "loss": 0.1483, "step": 89444 }, { "epoch": 0.15859515115086362, "grad_norm": 3.015625, "learning_rate": 0.00022112209466244886, "loss": 0.3684, "step": 89446 }, { "epoch": 0.15859869731617343, "grad_norm": 0.19140625, "learning_rate": 0.0002211092776443596, "loss": 0.1757, "step": 89448 }, { "epoch": 0.15860224348148325, "grad_norm": 0.314453125, "learning_rate": 0.00022109646447002768, "loss": 0.185, "step": 89450 }, { "epoch": 0.15860578964679306, "grad_norm": 0.8359375, "learning_rate": 0.0002210836551395094, "loss": 0.2024, "step": 89452 }, { "epoch": 0.15860933581210288, "grad_norm": 1.1015625, "learning_rate": 0.00022107084965286062, "loss": 0.1478, "step": 89454 }, { "epoch": 0.1586128819774127, "grad_norm": 0.9140625, "learning_rate": 0.00022105804801013745, "loss": 0.2383, "step": 89456 }, { "epoch": 0.1586164281427225, "grad_norm": 0.703125, "learning_rate": 0.0002210452502113958, "loss": 0.1642, "step": 89458 }, { "epoch": 0.15861997430803232, "grad_norm": 0.423828125, "learning_rate": 0.00022103245625669163, "loss": 0.1692, "step": 89460 }, { "epoch": 0.15862352047334213, "grad_norm": 0.3984375, "learning_rate": 0.000221019666146081, "loss": 0.2804, "step": 89462 }, { "epoch": 0.15862706663865195, "grad_norm": 0.3125, "learning_rate": 0.0002210068798796197, "loss": 0.1884, "step": 89464 }, { "epoch": 0.15863061280396176, "grad_norm": 0.369140625, "learning_rate": 0.00022099409745736384, "loss": 0.1659, "step": 89466 }, { "epoch": 0.1586341589692716, "grad_norm": 0.37890625, "learning_rate": 0.0002209813188793691, "loss": 0.236, "step": 89468 }, { "epoch": 0.15863770513458142, "grad_norm": 0.3515625, "learning_rate": 0.00022096854414569152, "loss": 0.1905, "step": 89470 }, { "epoch": 0.15864125129989123, "grad_norm": 1.5390625, "learning_rate": 0.00022095577325638693, "loss": 0.3139, "step": 89472 }, { "epoch": 0.15864479746520105, "grad_norm": 0.9921875, "learning_rate": 0.00022094300621151123, "loss": 0.2513, "step": 89474 }, { "epoch": 0.15864834363051086, "grad_norm": 0.78515625, "learning_rate": 0.00022093024301112017, "loss": 0.2053, "step": 89476 }, { "epoch": 0.15865188979582068, "grad_norm": 0.6015625, "learning_rate": 0.00022091748365526948, "loss": 0.1726, "step": 89478 }, { "epoch": 0.1586554359611305, "grad_norm": 0.26171875, "learning_rate": 0.00022090472814401525, "loss": 0.1715, "step": 89480 }, { "epoch": 0.1586589821264403, "grad_norm": 4.625, "learning_rate": 0.00022089197647741303, "loss": 0.2322, "step": 89482 }, { "epoch": 0.15866252829175012, "grad_norm": 0.5078125, "learning_rate": 0.00022087922865551862, "loss": 0.2375, "step": 89484 }, { "epoch": 0.15866607445705994, "grad_norm": 0.330078125, "learning_rate": 0.00022086648467838785, "loss": 0.1294, "step": 89486 }, { "epoch": 0.15866962062236975, "grad_norm": 0.287109375, "learning_rate": 0.00022085374454607653, "loss": 0.1952, "step": 89488 }, { "epoch": 0.15867316678767956, "grad_norm": 0.271484375, "learning_rate": 0.00022084100825864002, "loss": 0.1696, "step": 89490 }, { "epoch": 0.15867671295298938, "grad_norm": 0.5234375, "learning_rate": 0.0002208282758161344, "loss": 0.1809, "step": 89492 }, { "epoch": 0.1586802591182992, "grad_norm": 2.09375, "learning_rate": 0.0002208155472186153, "loss": 0.2848, "step": 89494 }, { "epoch": 0.158683805283609, "grad_norm": 0.494140625, "learning_rate": 0.0002208028224661382, "loss": 0.2553, "step": 89496 }, { "epoch": 0.15868735144891882, "grad_norm": 0.455078125, "learning_rate": 0.0002207901015587589, "loss": 0.1992, "step": 89498 }, { "epoch": 0.15869089761422864, "grad_norm": 0.369140625, "learning_rate": 0.00022077738449653283, "loss": 0.191, "step": 89500 }, { "epoch": 0.15869444377953845, "grad_norm": 0.53515625, "learning_rate": 0.00022076467127951603, "loss": 0.1745, "step": 89502 }, { "epoch": 0.15869798994484827, "grad_norm": 0.2060546875, "learning_rate": 0.0002207519619077637, "loss": 0.1439, "step": 89504 }, { "epoch": 0.15870153611015808, "grad_norm": 0.72265625, "learning_rate": 0.00022073925638133154, "loss": 0.2025, "step": 89506 }, { "epoch": 0.1587050822754679, "grad_norm": 0.1904296875, "learning_rate": 0.0002207265547002751, "loss": 0.1663, "step": 89508 }, { "epoch": 0.1587086284407777, "grad_norm": 0.5078125, "learning_rate": 0.00022071385686465007, "loss": 0.1818, "step": 89510 }, { "epoch": 0.15871217460608752, "grad_norm": 1.2265625, "learning_rate": 0.0002207011628745118, "loss": 0.2, "step": 89512 }, { "epoch": 0.15871572077139734, "grad_norm": 0.5, "learning_rate": 0.00022068847272991597, "loss": 0.1773, "step": 89514 }, { "epoch": 0.15871926693670715, "grad_norm": 5.96875, "learning_rate": 0.00022067578643091806, "loss": 0.4527, "step": 89516 }, { "epoch": 0.15872281310201697, "grad_norm": 0.765625, "learning_rate": 0.00022066310397757325, "loss": 0.1714, "step": 89518 }, { "epoch": 0.15872635926732678, "grad_norm": 0.2060546875, "learning_rate": 0.00022065042536993738, "loss": 0.1629, "step": 89520 }, { "epoch": 0.1587299054326366, "grad_norm": 0.3203125, "learning_rate": 0.0002206377506080658, "loss": 0.2376, "step": 89522 }, { "epoch": 0.1587334515979464, "grad_norm": 1.2265625, "learning_rate": 0.00022062507969201393, "loss": 0.2596, "step": 89524 }, { "epoch": 0.15873699776325623, "grad_norm": 0.453125, "learning_rate": 0.00022061241262183695, "loss": 0.1556, "step": 89526 }, { "epoch": 0.15874054392856604, "grad_norm": 0.16015625, "learning_rate": 0.00022059974939759065, "loss": 0.1668, "step": 89528 }, { "epoch": 0.15874409009387586, "grad_norm": 9.1875, "learning_rate": 0.0002205870900193302, "loss": 0.3088, "step": 89530 }, { "epoch": 0.15874763625918567, "grad_norm": 0.4609375, "learning_rate": 0.000220574434487111, "loss": 0.1709, "step": 89532 }, { "epoch": 0.15875118242449549, "grad_norm": 0.384765625, "learning_rate": 0.00022056178280098845, "loss": 0.1694, "step": 89534 }, { "epoch": 0.1587547285898053, "grad_norm": 0.400390625, "learning_rate": 0.0002205491349610176, "loss": 0.1906, "step": 89536 }, { "epoch": 0.15875827475511511, "grad_norm": 0.4296875, "learning_rate": 0.0002205364909672543, "loss": 0.1677, "step": 89538 }, { "epoch": 0.15876182092042493, "grad_norm": 0.32421875, "learning_rate": 0.00022052385081975333, "loss": 0.1701, "step": 89540 }, { "epoch": 0.15876536708573474, "grad_norm": 1.8984375, "learning_rate": 0.0002205112145185702, "loss": 0.1929, "step": 89542 }, { "epoch": 0.15876891325104456, "grad_norm": 0.91015625, "learning_rate": 0.0002204985820637601, "loss": 0.1668, "step": 89544 }, { "epoch": 0.15877245941635437, "grad_norm": 0.62890625, "learning_rate": 0.00022048595345537848, "loss": 0.1785, "step": 89546 }, { "epoch": 0.1587760055816642, "grad_norm": 0.98828125, "learning_rate": 0.00022047332869348029, "loss": 0.1589, "step": 89548 }, { "epoch": 0.158779551746974, "grad_norm": 0.392578125, "learning_rate": 0.00022046070777812087, "loss": 0.155, "step": 89550 }, { "epoch": 0.15878309791228382, "grad_norm": 0.33203125, "learning_rate": 0.00022044809070935551, "loss": 0.1767, "step": 89552 }, { "epoch": 0.15878664407759363, "grad_norm": 1.359375, "learning_rate": 0.00022043547748723925, "loss": 0.1764, "step": 89554 }, { "epoch": 0.15879019024290345, "grad_norm": 0.400390625, "learning_rate": 0.00022042286811182734, "loss": 0.2035, "step": 89556 }, { "epoch": 0.15879373640821326, "grad_norm": 0.96875, "learning_rate": 0.00022041026258317473, "loss": 0.2151, "step": 89558 }, { "epoch": 0.1587972825735231, "grad_norm": 0.431640625, "learning_rate": 0.00022039766090133688, "loss": 0.1847, "step": 89560 }, { "epoch": 0.15880082873883292, "grad_norm": 1.234375, "learning_rate": 0.0002203850630663687, "loss": 0.2165, "step": 89562 }, { "epoch": 0.15880437490414273, "grad_norm": 1.953125, "learning_rate": 0.00022037246907832527, "loss": 0.3281, "step": 89564 }, { "epoch": 0.15880792106945255, "grad_norm": 0.40234375, "learning_rate": 0.00022035987893726178, "loss": 0.2463, "step": 89566 }, { "epoch": 0.15881146723476236, "grad_norm": 0.40234375, "learning_rate": 0.00022034729264323305, "loss": 0.1383, "step": 89568 }, { "epoch": 0.15881501340007217, "grad_norm": 0.201171875, "learning_rate": 0.00022033471019629441, "loss": 0.171, "step": 89570 }, { "epoch": 0.158818559565382, "grad_norm": 1.0859375, "learning_rate": 0.00022032213159650085, "loss": 0.2395, "step": 89572 }, { "epoch": 0.1588221057306918, "grad_norm": 4.3125, "learning_rate": 0.00022030955684390724, "loss": 0.2419, "step": 89574 }, { "epoch": 0.15882565189600162, "grad_norm": 0.2236328125, "learning_rate": 0.00022029698593856856, "loss": 0.1687, "step": 89576 }, { "epoch": 0.15882919806131143, "grad_norm": 0.89453125, "learning_rate": 0.00022028441888054007, "loss": 0.1489, "step": 89578 }, { "epoch": 0.15883274422662125, "grad_norm": 0.271484375, "learning_rate": 0.00022027185566987636, "loss": 0.2169, "step": 89580 }, { "epoch": 0.15883629039193106, "grad_norm": 0.341796875, "learning_rate": 0.00022025929630663257, "loss": 0.1579, "step": 89582 }, { "epoch": 0.15883983655724088, "grad_norm": 0.78125, "learning_rate": 0.00022024674079086372, "loss": 0.2257, "step": 89584 }, { "epoch": 0.1588433827225507, "grad_norm": 0.53515625, "learning_rate": 0.00022023418912262455, "loss": 0.2133, "step": 89586 }, { "epoch": 0.1588469288878605, "grad_norm": 0.37890625, "learning_rate": 0.00022022164130196993, "loss": 0.1731, "step": 89588 }, { "epoch": 0.15885047505317032, "grad_norm": 0.4609375, "learning_rate": 0.0002202090973289548, "loss": 0.1428, "step": 89590 }, { "epoch": 0.15885402121848013, "grad_norm": 0.37890625, "learning_rate": 0.00022019655720363415, "loss": 0.1775, "step": 89592 }, { "epoch": 0.15885756738378995, "grad_norm": 0.234375, "learning_rate": 0.00022018402092606262, "loss": 0.1513, "step": 89594 }, { "epoch": 0.15886111354909976, "grad_norm": 0.177734375, "learning_rate": 0.00022017148849629516, "loss": 0.156, "step": 89596 }, { "epoch": 0.15886465971440958, "grad_norm": 0.201171875, "learning_rate": 0.00022015895991438656, "loss": 0.1953, "step": 89598 }, { "epoch": 0.1588682058797194, "grad_norm": 0.2373046875, "learning_rate": 0.0002201464351803915, "loss": 0.1971, "step": 89600 }, { "epoch": 0.1588717520450292, "grad_norm": 0.375, "learning_rate": 0.00022013391429436503, "loss": 0.155, "step": 89602 }, { "epoch": 0.15887529821033902, "grad_norm": 0.73828125, "learning_rate": 0.00022012139725636167, "loss": 0.2585, "step": 89604 }, { "epoch": 0.15887884437564884, "grad_norm": 0.357421875, "learning_rate": 0.0002201088840664362, "loss": 0.2157, "step": 89606 }, { "epoch": 0.15888239054095865, "grad_norm": 0.5, "learning_rate": 0.00022009637472464327, "loss": 0.1924, "step": 89608 }, { "epoch": 0.15888593670626847, "grad_norm": 0.431640625, "learning_rate": 0.00022008386923103775, "loss": 0.1507, "step": 89610 }, { "epoch": 0.15888948287157828, "grad_norm": 0.3125, "learning_rate": 0.00022007136758567432, "loss": 0.1691, "step": 89612 }, { "epoch": 0.1588930290368881, "grad_norm": 0.498046875, "learning_rate": 0.00022005886978860767, "loss": 0.1558, "step": 89614 }, { "epoch": 0.1588965752021979, "grad_norm": 0.46875, "learning_rate": 0.00022004637583989214, "loss": 0.1646, "step": 89616 }, { "epoch": 0.15890012136750772, "grad_norm": 0.41015625, "learning_rate": 0.00022003388573958282, "loss": 0.1663, "step": 89618 }, { "epoch": 0.15890366753281754, "grad_norm": 0.7109375, "learning_rate": 0.00022002139948773417, "loss": 0.1788, "step": 89620 }, { "epoch": 0.15890721369812735, "grad_norm": 0.73046875, "learning_rate": 0.00022000891708440071, "loss": 0.1861, "step": 89622 }, { "epoch": 0.15891075986343717, "grad_norm": 0.50390625, "learning_rate": 0.00021999643852963714, "loss": 0.1588, "step": 89624 }, { "epoch": 0.15891430602874698, "grad_norm": 0.42578125, "learning_rate": 0.00021998396382349783, "loss": 0.1703, "step": 89626 }, { "epoch": 0.1589178521940568, "grad_norm": 0.232421875, "learning_rate": 0.00021997149296603763, "loss": 0.1375, "step": 89628 }, { "epoch": 0.1589213983593666, "grad_norm": 0.7734375, "learning_rate": 0.00021995902595731088, "loss": 0.2301, "step": 89630 }, { "epoch": 0.15892494452467643, "grad_norm": 0.5859375, "learning_rate": 0.00021994656279737218, "loss": 0.2045, "step": 89632 }, { "epoch": 0.15892849068998624, "grad_norm": 0.32421875, "learning_rate": 0.00021993410348627597, "loss": 0.1836, "step": 89634 }, { "epoch": 0.15893203685529605, "grad_norm": 0.47265625, "learning_rate": 0.00021992164802407693, "loss": 0.1676, "step": 89636 }, { "epoch": 0.15893558302060587, "grad_norm": 0.5546875, "learning_rate": 0.00021990919641082915, "loss": 0.2077, "step": 89638 }, { "epoch": 0.15893912918591568, "grad_norm": 1.734375, "learning_rate": 0.00021989674864658748, "loss": 0.1886, "step": 89640 }, { "epoch": 0.1589426753512255, "grad_norm": 0.318359375, "learning_rate": 0.0002198843047314063, "loss": 0.166, "step": 89642 }, { "epoch": 0.1589462215165353, "grad_norm": 0.6796875, "learning_rate": 0.00021987186466533983, "loss": 0.1354, "step": 89644 }, { "epoch": 0.15894976768184513, "grad_norm": 0.302734375, "learning_rate": 0.00021985942844844253, "loss": 0.1701, "step": 89646 }, { "epoch": 0.15895331384715494, "grad_norm": 0.400390625, "learning_rate": 0.00021984699608076891, "loss": 0.1649, "step": 89648 }, { "epoch": 0.15895686001246478, "grad_norm": 1.234375, "learning_rate": 0.00021983456756237323, "loss": 0.2488, "step": 89650 }, { "epoch": 0.1589604061777746, "grad_norm": 2.140625, "learning_rate": 0.00021982214289330995, "loss": 0.221, "step": 89652 }, { "epoch": 0.1589639523430844, "grad_norm": 0.4296875, "learning_rate": 0.00021980972207363337, "loss": 0.1577, "step": 89654 }, { "epoch": 0.15896749850839423, "grad_norm": 0.30859375, "learning_rate": 0.00021979730510339766, "loss": 0.1621, "step": 89656 }, { "epoch": 0.15897104467370404, "grad_norm": 0.81640625, "learning_rate": 0.00021978489198265735, "loss": 0.2002, "step": 89658 }, { "epoch": 0.15897459083901386, "grad_norm": 0.54296875, "learning_rate": 0.00021977248271146666, "loss": 0.1798, "step": 89660 }, { "epoch": 0.15897813700432367, "grad_norm": 0.45703125, "learning_rate": 0.00021976007728987976, "loss": 0.1547, "step": 89662 }, { "epoch": 0.1589816831696335, "grad_norm": 2.125, "learning_rate": 0.00021974767571795106, "loss": 0.4416, "step": 89664 }, { "epoch": 0.1589852293349433, "grad_norm": 0.318359375, "learning_rate": 0.00021973527799573465, "loss": 0.2053, "step": 89666 }, { "epoch": 0.15898877550025312, "grad_norm": 0.302734375, "learning_rate": 0.00021972288412328489, "loss": 0.1656, "step": 89668 }, { "epoch": 0.15899232166556293, "grad_norm": 0.302734375, "learning_rate": 0.00021971049410065584, "loss": 0.1812, "step": 89670 }, { "epoch": 0.15899586783087274, "grad_norm": 0.34375, "learning_rate": 0.0002196981079279017, "loss": 0.1545, "step": 89672 }, { "epoch": 0.15899941399618256, "grad_norm": 0.384765625, "learning_rate": 0.0002196857256050767, "loss": 0.1523, "step": 89674 }, { "epoch": 0.15900296016149237, "grad_norm": 0.193359375, "learning_rate": 0.00021967334713223516, "loss": 0.1622, "step": 89676 }, { "epoch": 0.1590065063268022, "grad_norm": 0.671875, "learning_rate": 0.0002196609725094309, "loss": 0.138, "step": 89678 }, { "epoch": 0.159010052492112, "grad_norm": 0.2275390625, "learning_rate": 0.0002196486017367182, "loss": 0.1605, "step": 89680 }, { "epoch": 0.15901359865742182, "grad_norm": 0.97265625, "learning_rate": 0.0002196362348141512, "loss": 0.2026, "step": 89682 }, { "epoch": 0.15901714482273163, "grad_norm": 0.318359375, "learning_rate": 0.00021962387174178383, "loss": 0.1671, "step": 89684 }, { "epoch": 0.15902069098804145, "grad_norm": 0.52734375, "learning_rate": 0.00021961151251967048, "loss": 0.2038, "step": 89686 }, { "epoch": 0.15902423715335126, "grad_norm": 0.314453125, "learning_rate": 0.00021959915714786474, "loss": 0.1857, "step": 89688 }, { "epoch": 0.15902778331866108, "grad_norm": 0.314453125, "learning_rate": 0.000219586805626421, "loss": 0.1631, "step": 89690 }, { "epoch": 0.1590313294839709, "grad_norm": 0.291015625, "learning_rate": 0.00021957445795539306, "loss": 0.1442, "step": 89692 }, { "epoch": 0.1590348756492807, "grad_norm": 0.58984375, "learning_rate": 0.00021956211413483526, "loss": 0.1456, "step": 89694 }, { "epoch": 0.15903842181459052, "grad_norm": 0.40625, "learning_rate": 0.00021954977416480115, "loss": 0.1885, "step": 89696 }, { "epoch": 0.15904196797990033, "grad_norm": 0.58984375, "learning_rate": 0.00021953743804534501, "loss": 0.1286, "step": 89698 }, { "epoch": 0.15904551414521015, "grad_norm": 0.5703125, "learning_rate": 0.0002195251057765207, "loss": 0.1613, "step": 89700 }, { "epoch": 0.15904906031051996, "grad_norm": 0.2041015625, "learning_rate": 0.000219512777358382, "loss": 0.1858, "step": 89702 }, { "epoch": 0.15905260647582978, "grad_norm": 0.287109375, "learning_rate": 0.00021950045279098306, "loss": 0.2169, "step": 89704 }, { "epoch": 0.1590561526411396, "grad_norm": 0.296875, "learning_rate": 0.00021948813207437764, "loss": 0.1656, "step": 89706 }, { "epoch": 0.1590596988064494, "grad_norm": 0.248046875, "learning_rate": 0.0002194758152086197, "loss": 0.1678, "step": 89708 }, { "epoch": 0.15906324497175922, "grad_norm": 0.26953125, "learning_rate": 0.0002194635021937631, "loss": 0.1582, "step": 89710 }, { "epoch": 0.15906679113706904, "grad_norm": 0.67578125, "learning_rate": 0.00021945119302986164, "loss": 0.2548, "step": 89712 }, { "epoch": 0.15907033730237885, "grad_norm": 1.8671875, "learning_rate": 0.0002194388877169691, "loss": 0.2054, "step": 89714 }, { "epoch": 0.15907388346768866, "grad_norm": 0.365234375, "learning_rate": 0.0002194265862551395, "loss": 0.2116, "step": 89716 }, { "epoch": 0.15907742963299848, "grad_norm": 2.859375, "learning_rate": 0.00021941428864442647, "loss": 0.2647, "step": 89718 }, { "epoch": 0.1590809757983083, "grad_norm": 0.1875, "learning_rate": 0.0002194019948848838, "loss": 0.172, "step": 89720 }, { "epoch": 0.1590845219636181, "grad_norm": 0.74609375, "learning_rate": 0.00021938970497656536, "loss": 0.201, "step": 89722 }, { "epoch": 0.15908806812892792, "grad_norm": 0.255859375, "learning_rate": 0.00021937741891952473, "loss": 0.1502, "step": 89724 }, { "epoch": 0.15909161429423774, "grad_norm": 1.625, "learning_rate": 0.00021936513671381595, "loss": 0.2541, "step": 89726 }, { "epoch": 0.15909516045954755, "grad_norm": 0.8828125, "learning_rate": 0.00021935285835949229, "loss": 0.1631, "step": 89728 }, { "epoch": 0.15909870662485737, "grad_norm": 0.57421875, "learning_rate": 0.00021934058385660778, "loss": 0.1215, "step": 89730 }, { "epoch": 0.15910225279016718, "grad_norm": 0.2578125, "learning_rate": 0.0002193283132052159, "loss": 0.2306, "step": 89732 }, { "epoch": 0.159105798955477, "grad_norm": 0.4609375, "learning_rate": 0.0002193160464053707, "loss": 0.2246, "step": 89734 }, { "epoch": 0.1591093451207868, "grad_norm": 0.55078125, "learning_rate": 0.00021930378345712525, "loss": 0.1925, "step": 89736 }, { "epoch": 0.15911289128609662, "grad_norm": 0.33984375, "learning_rate": 0.00021929152436053368, "loss": 0.1853, "step": 89738 }, { "epoch": 0.15911643745140647, "grad_norm": 0.25390625, "learning_rate": 0.00021927926911564937, "loss": 0.1364, "step": 89740 }, { "epoch": 0.15911998361671628, "grad_norm": 0.6015625, "learning_rate": 0.00021926701772252588, "loss": 0.1995, "step": 89742 }, { "epoch": 0.1591235297820261, "grad_norm": 0.57421875, "learning_rate": 0.00021925477018121695, "loss": 0.2131, "step": 89744 }, { "epoch": 0.1591270759473359, "grad_norm": 0.4296875, "learning_rate": 0.00021924252649177596, "loss": 0.2018, "step": 89746 }, { "epoch": 0.15913062211264573, "grad_norm": 0.62890625, "learning_rate": 0.00021923028665425662, "loss": 0.2336, "step": 89748 }, { "epoch": 0.15913416827795554, "grad_norm": 0.546875, "learning_rate": 0.00021921805066871244, "loss": 0.5087, "step": 89750 }, { "epoch": 0.15913771444326535, "grad_norm": 0.212890625, "learning_rate": 0.00021920581853519675, "loss": 0.1653, "step": 89752 }, { "epoch": 0.15914126060857517, "grad_norm": 0.625, "learning_rate": 0.0002191935902537633, "loss": 0.1952, "step": 89754 }, { "epoch": 0.15914480677388498, "grad_norm": 0.94921875, "learning_rate": 0.0002191813658244654, "loss": 0.2981, "step": 89756 }, { "epoch": 0.1591483529391948, "grad_norm": 0.5078125, "learning_rate": 0.00021916914524735664, "loss": 0.1798, "step": 89758 }, { "epoch": 0.1591518991045046, "grad_norm": 0.5546875, "learning_rate": 0.00021915692852249031, "loss": 0.1572, "step": 89760 }, { "epoch": 0.15915544526981443, "grad_norm": 0.296875, "learning_rate": 0.00021914471564991994, "loss": 0.1261, "step": 89762 }, { "epoch": 0.15915899143512424, "grad_norm": 0.431640625, "learning_rate": 0.00021913250662969886, "loss": 0.1514, "step": 89764 }, { "epoch": 0.15916253760043406, "grad_norm": 0.8828125, "learning_rate": 0.00021912030146188063, "loss": 0.2066, "step": 89766 }, { "epoch": 0.15916608376574387, "grad_norm": 0.28125, "learning_rate": 0.00021910810014651847, "loss": 0.1775, "step": 89768 }, { "epoch": 0.15916962993105369, "grad_norm": 0.431640625, "learning_rate": 0.00021909590268366584, "loss": 0.1556, "step": 89770 }, { "epoch": 0.1591731760963635, "grad_norm": 0.482421875, "learning_rate": 0.00021908370907337604, "loss": 0.1962, "step": 89772 }, { "epoch": 0.15917672226167331, "grad_norm": 0.296875, "learning_rate": 0.0002190715193157023, "loss": 0.1477, "step": 89774 }, { "epoch": 0.15918026842698313, "grad_norm": 0.330078125, "learning_rate": 0.00021905933341069824, "loss": 0.184, "step": 89776 }, { "epoch": 0.15918381459229294, "grad_norm": 0.5, "learning_rate": 0.00021904715135841668, "loss": 0.1853, "step": 89778 }, { "epoch": 0.15918736075760276, "grad_norm": 0.4375, "learning_rate": 0.00021903497315891134, "loss": 0.1837, "step": 89780 }, { "epoch": 0.15919090692291257, "grad_norm": 0.265625, "learning_rate": 0.0002190227988122352, "loss": 0.1842, "step": 89782 }, { "epoch": 0.1591944530882224, "grad_norm": 0.50390625, "learning_rate": 0.00021901062831844174, "loss": 0.3859, "step": 89784 }, { "epoch": 0.1591979992535322, "grad_norm": 0.5546875, "learning_rate": 0.00021899846167758395, "loss": 0.2971, "step": 89786 }, { "epoch": 0.15920154541884202, "grad_norm": 0.306640625, "learning_rate": 0.00021898629888971515, "loss": 0.1723, "step": 89788 }, { "epoch": 0.15920509158415183, "grad_norm": 0.51953125, "learning_rate": 0.00021897413995488861, "loss": 0.1818, "step": 89790 }, { "epoch": 0.15920863774946165, "grad_norm": 0.54296875, "learning_rate": 0.00021896198487315736, "loss": 0.1921, "step": 89792 }, { "epoch": 0.15921218391477146, "grad_norm": 0.50390625, "learning_rate": 0.00021894983364457464, "loss": 0.1432, "step": 89794 }, { "epoch": 0.15921573008008127, "grad_norm": 0.380859375, "learning_rate": 0.00021893768626919343, "loss": 0.4822, "step": 89796 }, { "epoch": 0.1592192762453911, "grad_norm": 0.5078125, "learning_rate": 0.0002189255427470672, "loss": 0.2343, "step": 89798 }, { "epoch": 0.1592228224107009, "grad_norm": 0.310546875, "learning_rate": 0.0002189134030782487, "loss": 0.1678, "step": 89800 }, { "epoch": 0.15922636857601072, "grad_norm": 0.451171875, "learning_rate": 0.00021890126726279132, "loss": 0.2003, "step": 89802 }, { "epoch": 0.15922991474132053, "grad_norm": 0.291015625, "learning_rate": 0.00021888913530074787, "loss": 0.2242, "step": 89804 }, { "epoch": 0.15923346090663035, "grad_norm": 0.322265625, "learning_rate": 0.00021887700719217155, "loss": 0.1754, "step": 89806 }, { "epoch": 0.15923700707194016, "grad_norm": 2.015625, "learning_rate": 0.00021886488293711544, "loss": 0.2069, "step": 89808 }, { "epoch": 0.15924055323724998, "grad_norm": 0.26171875, "learning_rate": 0.00021885276253563255, "loss": 0.1758, "step": 89810 }, { "epoch": 0.1592440994025598, "grad_norm": 0.4296875, "learning_rate": 0.0002188406459877757, "loss": 0.162, "step": 89812 }, { "epoch": 0.1592476455678696, "grad_norm": 0.302734375, "learning_rate": 0.00021882853329359815, "loss": 0.1855, "step": 89814 }, { "epoch": 0.15925119173317942, "grad_norm": 2.0625, "learning_rate": 0.00021881642445315281, "loss": 0.2817, "step": 89816 }, { "epoch": 0.15925473789848923, "grad_norm": 0.61328125, "learning_rate": 0.00021880431946649236, "loss": 0.1934, "step": 89818 }, { "epoch": 0.15925828406379905, "grad_norm": 0.400390625, "learning_rate": 0.0002187922183336701, "loss": 0.2038, "step": 89820 }, { "epoch": 0.15926183022910886, "grad_norm": 0.8828125, "learning_rate": 0.00021878012105473868, "loss": 0.1718, "step": 89822 }, { "epoch": 0.15926537639441868, "grad_norm": 0.443359375, "learning_rate": 0.00021876802762975134, "loss": 0.1759, "step": 89824 }, { "epoch": 0.1592689225597285, "grad_norm": 1.21875, "learning_rate": 0.00021875593805876066, "loss": 0.1917, "step": 89826 }, { "epoch": 0.1592724687250383, "grad_norm": 0.56640625, "learning_rate": 0.00021874385234181963, "loss": 0.199, "step": 89828 }, { "epoch": 0.15927601489034812, "grad_norm": 0.9765625, "learning_rate": 0.00021873177047898105, "loss": 0.2014, "step": 89830 }, { "epoch": 0.15927956105565796, "grad_norm": 0.78515625, "learning_rate": 0.00021871969247029793, "loss": 0.1961, "step": 89832 }, { "epoch": 0.15928310722096778, "grad_norm": 0.62109375, "learning_rate": 0.00021870761831582292, "loss": 0.2011, "step": 89834 }, { "epoch": 0.1592866533862776, "grad_norm": 0.18359375, "learning_rate": 0.00021869554801560867, "loss": 0.1234, "step": 89836 }, { "epoch": 0.1592901995515874, "grad_norm": 0.251953125, "learning_rate": 0.0002186834815697084, "loss": 0.1855, "step": 89838 }, { "epoch": 0.15929374571689722, "grad_norm": 2.34375, "learning_rate": 0.0002186714189781746, "loss": 0.4508, "step": 89840 }, { "epoch": 0.15929729188220704, "grad_norm": 0.359375, "learning_rate": 0.00021865936024106014, "loss": 0.1435, "step": 89842 }, { "epoch": 0.15930083804751685, "grad_norm": 0.5625, "learning_rate": 0.00021864730535841754, "loss": 0.1496, "step": 89844 }, { "epoch": 0.15930438421282667, "grad_norm": 0.392578125, "learning_rate": 0.00021863525433029972, "loss": 0.1617, "step": 89846 }, { "epoch": 0.15930793037813648, "grad_norm": 0.396484375, "learning_rate": 0.00021862320715675937, "loss": 0.224, "step": 89848 }, { "epoch": 0.1593114765434463, "grad_norm": 0.37890625, "learning_rate": 0.00021861116383784922, "loss": 0.1626, "step": 89850 }, { "epoch": 0.1593150227087561, "grad_norm": 0.609375, "learning_rate": 0.00021859912437362177, "loss": 0.2238, "step": 89852 }, { "epoch": 0.15931856887406592, "grad_norm": 0.291015625, "learning_rate": 0.0002185870887641297, "loss": 0.1459, "step": 89854 }, { "epoch": 0.15932211503937574, "grad_norm": 0.388671875, "learning_rate": 0.00021857505700942587, "loss": 0.1538, "step": 89856 }, { "epoch": 0.15932566120468555, "grad_norm": 1.203125, "learning_rate": 0.00021856302910956278, "loss": 0.2628, "step": 89858 }, { "epoch": 0.15932920736999537, "grad_norm": 0.36328125, "learning_rate": 0.00021855100506459288, "loss": 0.2185, "step": 89860 }, { "epoch": 0.15933275353530518, "grad_norm": 0.3125, "learning_rate": 0.00021853898487456886, "loss": 0.1529, "step": 89862 }, { "epoch": 0.159336299700615, "grad_norm": 0.6171875, "learning_rate": 0.00021852696853954335, "loss": 0.2192, "step": 89864 }, { "epoch": 0.1593398458659248, "grad_norm": 0.345703125, "learning_rate": 0.00021851495605956892, "loss": 0.2165, "step": 89866 }, { "epoch": 0.15934339203123463, "grad_norm": 0.44921875, "learning_rate": 0.000218502947434698, "loss": 0.1921, "step": 89868 }, { "epoch": 0.15934693819654444, "grad_norm": 0.30078125, "learning_rate": 0.0002184909426649832, "loss": 0.2106, "step": 89870 }, { "epoch": 0.15935048436185426, "grad_norm": 0.6953125, "learning_rate": 0.00021847894175047687, "loss": 0.2281, "step": 89872 }, { "epoch": 0.15935403052716407, "grad_norm": 0.318359375, "learning_rate": 0.00021846694469123174, "loss": 0.1943, "step": 89874 }, { "epoch": 0.15935757669247388, "grad_norm": 0.6328125, "learning_rate": 0.00021845495148729995, "loss": 0.2489, "step": 89876 }, { "epoch": 0.1593611228577837, "grad_norm": 0.6328125, "learning_rate": 0.00021844296213873433, "loss": 0.2811, "step": 89878 }, { "epoch": 0.1593646690230935, "grad_norm": 0.435546875, "learning_rate": 0.00021843097664558695, "loss": 0.1618, "step": 89880 }, { "epoch": 0.15936821518840333, "grad_norm": 0.439453125, "learning_rate": 0.00021841899500791055, "loss": 0.1735, "step": 89882 }, { "epoch": 0.15937176135371314, "grad_norm": 0.5625, "learning_rate": 0.0002184070172257573, "loss": 0.2024, "step": 89884 }, { "epoch": 0.15937530751902296, "grad_norm": 3.03125, "learning_rate": 0.00021839504329917967, "loss": 0.2327, "step": 89886 }, { "epoch": 0.15937885368433277, "grad_norm": 0.3515625, "learning_rate": 0.00021838307322823012, "loss": 0.164, "step": 89888 }, { "epoch": 0.1593823998496426, "grad_norm": 0.38671875, "learning_rate": 0.00021837110701296075, "loss": 0.1966, "step": 89890 }, { "epoch": 0.1593859460149524, "grad_norm": 0.322265625, "learning_rate": 0.00021835914465342422, "loss": 0.1456, "step": 89892 }, { "epoch": 0.15938949218026222, "grad_norm": 0.232421875, "learning_rate": 0.00021834718614967247, "loss": 0.1283, "step": 89894 }, { "epoch": 0.15939303834557203, "grad_norm": 0.2041015625, "learning_rate": 0.0002183352315017582, "loss": 0.1768, "step": 89896 }, { "epoch": 0.15939658451088184, "grad_norm": 0.55859375, "learning_rate": 0.0002183232807097334, "loss": 0.228, "step": 89898 }, { "epoch": 0.15940013067619166, "grad_norm": 0.79296875, "learning_rate": 0.0002183113337736504, "loss": 0.2124, "step": 89900 }, { "epoch": 0.15940367684150147, "grad_norm": 0.48828125, "learning_rate": 0.0002182993906935615, "loss": 0.2242, "step": 89902 }, { "epoch": 0.1594072230068113, "grad_norm": 0.67578125, "learning_rate": 0.0002182874514695189, "loss": 0.1852, "step": 89904 }, { "epoch": 0.1594107691721211, "grad_norm": 0.484375, "learning_rate": 0.00021827551610157492, "loss": 0.1603, "step": 89906 }, { "epoch": 0.15941431533743092, "grad_norm": 0.359375, "learning_rate": 0.00021826358458978162, "loss": 0.1663, "step": 89908 }, { "epoch": 0.15941786150274073, "grad_norm": 1.671875, "learning_rate": 0.00021825165693419122, "loss": 0.2737, "step": 89910 }, { "epoch": 0.15942140766805055, "grad_norm": 0.71875, "learning_rate": 0.0002182397331348558, "loss": 0.2991, "step": 89912 }, { "epoch": 0.15942495383336036, "grad_norm": 0.333984375, "learning_rate": 0.0002182278131918278, "loss": 0.181, "step": 89914 }, { "epoch": 0.15942849999867018, "grad_norm": 0.53515625, "learning_rate": 0.000218215897105159, "loss": 0.2723, "step": 89916 }, { "epoch": 0.15943204616398, "grad_norm": 0.400390625, "learning_rate": 0.0002182039848749017, "loss": 0.1467, "step": 89918 }, { "epoch": 0.1594355923292898, "grad_norm": 0.404296875, "learning_rate": 0.00021819207650110794, "loss": 0.2643, "step": 89920 }, { "epoch": 0.15943913849459965, "grad_norm": 3.0, "learning_rate": 0.0002181801719838299, "loss": 0.1564, "step": 89922 }, { "epoch": 0.15944268465990946, "grad_norm": 0.306640625, "learning_rate": 0.0002181682713231195, "loss": 0.2941, "step": 89924 }, { "epoch": 0.15944623082521928, "grad_norm": 0.357421875, "learning_rate": 0.00021815637451902883, "loss": 0.1779, "step": 89926 }, { "epoch": 0.1594497769905291, "grad_norm": 0.2470703125, "learning_rate": 0.00021814448157160996, "loss": 0.1464, "step": 89928 }, { "epoch": 0.1594533231558389, "grad_norm": 0.5625, "learning_rate": 0.00021813259248091495, "loss": 0.1556, "step": 89930 }, { "epoch": 0.15945686932114872, "grad_norm": 0.443359375, "learning_rate": 0.00021812070724699567, "loss": 0.2196, "step": 89932 }, { "epoch": 0.15946041548645853, "grad_norm": 0.55859375, "learning_rate": 0.0002181088258699041, "loss": 0.1736, "step": 89934 }, { "epoch": 0.15946396165176835, "grad_norm": 0.6640625, "learning_rate": 0.00021809694834969235, "loss": 0.2648, "step": 89936 }, { "epoch": 0.15946750781707816, "grad_norm": 0.71484375, "learning_rate": 0.00021808507468641224, "loss": 0.1466, "step": 89938 }, { "epoch": 0.15947105398238798, "grad_norm": 0.283203125, "learning_rate": 0.00021807320488011577, "loss": 0.1281, "step": 89940 }, { "epoch": 0.1594746001476978, "grad_norm": 0.44140625, "learning_rate": 0.00021806133893085482, "loss": 0.1969, "step": 89942 }, { "epoch": 0.1594781463130076, "grad_norm": 0.326171875, "learning_rate": 0.0002180494768386812, "loss": 0.1496, "step": 89944 }, { "epoch": 0.15948169247831742, "grad_norm": 0.44921875, "learning_rate": 0.00021803761860364698, "loss": 0.1649, "step": 89946 }, { "epoch": 0.15948523864362724, "grad_norm": 0.30859375, "learning_rate": 0.00021802576422580395, "loss": 0.2054, "step": 89948 }, { "epoch": 0.15948878480893705, "grad_norm": 0.255859375, "learning_rate": 0.0002180139137052038, "loss": 0.1675, "step": 89950 }, { "epoch": 0.15949233097424687, "grad_norm": 0.375, "learning_rate": 0.00021800206704189847, "loss": 0.2206, "step": 89952 }, { "epoch": 0.15949587713955668, "grad_norm": 0.2294921875, "learning_rate": 0.00021799022423593979, "loss": 0.2018, "step": 89954 }, { "epoch": 0.1594994233048665, "grad_norm": 0.419921875, "learning_rate": 0.0002179783852873796, "loss": 0.1589, "step": 89956 }, { "epoch": 0.1595029694701763, "grad_norm": 0.95703125, "learning_rate": 0.00021796655019626953, "loss": 0.1948, "step": 89958 }, { "epoch": 0.15950651563548612, "grad_norm": 0.78125, "learning_rate": 0.00021795471896266153, "loss": 0.2264, "step": 89960 }, { "epoch": 0.15951006180079594, "grad_norm": 0.62109375, "learning_rate": 0.0002179428915866071, "loss": 0.1897, "step": 89962 }, { "epoch": 0.15951360796610575, "grad_norm": 0.68359375, "learning_rate": 0.0002179310680681583, "loss": 0.3493, "step": 89964 }, { "epoch": 0.15951715413141557, "grad_norm": 0.55859375, "learning_rate": 0.00021791924840736638, "loss": 0.1854, "step": 89966 }, { "epoch": 0.15952070029672538, "grad_norm": 0.2060546875, "learning_rate": 0.00021790743260428352, "loss": 0.1881, "step": 89968 }, { "epoch": 0.1595242464620352, "grad_norm": 0.6328125, "learning_rate": 0.00021789562065896095, "loss": 0.1698, "step": 89970 }, { "epoch": 0.159527792627345, "grad_norm": 0.55859375, "learning_rate": 0.0002178838125714508, "loss": 0.186, "step": 89972 }, { "epoch": 0.15953133879265483, "grad_norm": 0.328125, "learning_rate": 0.00021787200834180425, "loss": 0.1843, "step": 89974 }, { "epoch": 0.15953488495796464, "grad_norm": 1.8515625, "learning_rate": 0.00021786020797007327, "loss": 0.2346, "step": 89976 }, { "epoch": 0.15953843112327445, "grad_norm": 1.7734375, "learning_rate": 0.00021784841145630927, "loss": 0.2145, "step": 89978 }, { "epoch": 0.15954197728858427, "grad_norm": 0.6171875, "learning_rate": 0.00021783661880056394, "loss": 0.2285, "step": 89980 }, { "epoch": 0.15954552345389408, "grad_norm": 0.50390625, "learning_rate": 0.0002178248300028888, "loss": 0.2223, "step": 89982 }, { "epoch": 0.1595490696192039, "grad_norm": 0.333984375, "learning_rate": 0.00021781304506333538, "loss": 0.1371, "step": 89984 }, { "epoch": 0.1595526157845137, "grad_norm": 0.474609375, "learning_rate": 0.00021780126398195532, "loss": 0.1919, "step": 89986 }, { "epoch": 0.15955616194982353, "grad_norm": 0.423828125, "learning_rate": 0.00021778948675880012, "loss": 0.1335, "step": 89988 }, { "epoch": 0.15955970811513334, "grad_norm": 0.5390625, "learning_rate": 0.00021777771339392122, "loss": 0.1822, "step": 89990 }, { "epoch": 0.15956325428044316, "grad_norm": 0.41015625, "learning_rate": 0.00021776594388737006, "loss": 0.1502, "step": 89992 }, { "epoch": 0.15956680044575297, "grad_norm": 4.3125, "learning_rate": 0.00021775417823919832, "loss": 0.2451, "step": 89994 }, { "epoch": 0.15957034661106279, "grad_norm": 0.271484375, "learning_rate": 0.00021774241644945724, "loss": 0.184, "step": 89996 }, { "epoch": 0.1595738927763726, "grad_norm": 0.369140625, "learning_rate": 0.00021773065851819843, "loss": 0.1471, "step": 89998 }, { "epoch": 0.15957743894168241, "grad_norm": 0.369140625, "learning_rate": 0.0002177189044454733, "loss": 0.1407, "step": 90000 }, { "epoch": 0.15958098510699223, "grad_norm": 0.296875, "learning_rate": 0.00021770715423133304, "loss": 0.2886, "step": 90002 }, { "epoch": 0.15958453127230204, "grad_norm": 0.169921875, "learning_rate": 0.00021769540787582934, "loss": 0.2364, "step": 90004 }, { "epoch": 0.15958807743761186, "grad_norm": 0.259765625, "learning_rate": 0.00021768366537901324, "loss": 0.1491, "step": 90006 }, { "epoch": 0.15959162360292167, "grad_norm": 1.4296875, "learning_rate": 0.00021767192674093636, "loss": 0.1841, "step": 90008 }, { "epoch": 0.1595951697682315, "grad_norm": 0.330078125, "learning_rate": 0.00021766019196165, "loss": 0.1444, "step": 90010 }, { "epoch": 0.15959871593354133, "grad_norm": 0.55078125, "learning_rate": 0.00021764846104120546, "loss": 0.1638, "step": 90012 }, { "epoch": 0.15960226209885114, "grad_norm": 0.84375, "learning_rate": 0.00021763673397965388, "loss": 0.2023, "step": 90014 }, { "epoch": 0.15960580826416096, "grad_norm": 0.3125, "learning_rate": 0.00021762501077704683, "loss": 0.2088, "step": 90016 }, { "epoch": 0.15960935442947077, "grad_norm": 0.265625, "learning_rate": 0.0002176132914334354, "loss": 0.1691, "step": 90018 }, { "epoch": 0.1596129005947806, "grad_norm": 1.171875, "learning_rate": 0.0002176015759488708, "loss": 0.1654, "step": 90020 }, { "epoch": 0.1596164467600904, "grad_norm": 1.1015625, "learning_rate": 0.0002175898643234045, "loss": 0.4096, "step": 90022 }, { "epoch": 0.15961999292540022, "grad_norm": 0.30078125, "learning_rate": 0.00021757815655708745, "loss": 0.2174, "step": 90024 }, { "epoch": 0.15962353909071003, "grad_norm": 0.47265625, "learning_rate": 0.00021756645264997098, "loss": 0.1841, "step": 90026 }, { "epoch": 0.15962708525601985, "grad_norm": 0.373046875, "learning_rate": 0.00021755475260210624, "loss": 0.1682, "step": 90028 }, { "epoch": 0.15963063142132966, "grad_norm": 0.58203125, "learning_rate": 0.00021754305641354457, "loss": 0.1852, "step": 90030 }, { "epoch": 0.15963417758663948, "grad_norm": 0.45703125, "learning_rate": 0.00021753136408433682, "loss": 0.1411, "step": 90032 }, { "epoch": 0.1596377237519493, "grad_norm": 0.58984375, "learning_rate": 0.0002175196756145344, "loss": 0.1654, "step": 90034 }, { "epoch": 0.1596412699172591, "grad_norm": 0.9375, "learning_rate": 0.0002175079910041883, "loss": 0.2031, "step": 90036 }, { "epoch": 0.15964481608256892, "grad_norm": 0.376953125, "learning_rate": 0.00021749631025334964, "loss": 0.1794, "step": 90038 }, { "epoch": 0.15964836224787873, "grad_norm": 0.3203125, "learning_rate": 0.00021748463336206944, "loss": 0.2143, "step": 90040 }, { "epoch": 0.15965190841318855, "grad_norm": 0.2392578125, "learning_rate": 0.00021747296033039877, "loss": 0.1601, "step": 90042 }, { "epoch": 0.15965545457849836, "grad_norm": 0.5234375, "learning_rate": 0.0002174612911583888, "loss": 0.1681, "step": 90044 }, { "epoch": 0.15965900074380818, "grad_norm": 1.2421875, "learning_rate": 0.00021744962584609057, "loss": 0.1779, "step": 90046 }, { "epoch": 0.159662546909118, "grad_norm": 0.2412109375, "learning_rate": 0.00021743796439355507, "loss": 0.3281, "step": 90048 }, { "epoch": 0.1596660930744278, "grad_norm": 0.431640625, "learning_rate": 0.000217426306800833, "loss": 0.1893, "step": 90050 }, { "epoch": 0.15966963923973762, "grad_norm": 2.03125, "learning_rate": 0.0002174146530679759, "loss": 0.1676, "step": 90052 }, { "epoch": 0.15967318540504744, "grad_norm": 0.34765625, "learning_rate": 0.00021740300319503426, "loss": 0.2463, "step": 90054 }, { "epoch": 0.15967673157035725, "grad_norm": 0.47265625, "learning_rate": 0.00021739135718205926, "loss": 0.2279, "step": 90056 }, { "epoch": 0.15968027773566706, "grad_norm": 0.4453125, "learning_rate": 0.0002173797150291019, "loss": 0.1929, "step": 90058 }, { "epoch": 0.15968382390097688, "grad_norm": 0.30078125, "learning_rate": 0.00021736807673621277, "loss": 0.1809, "step": 90060 }, { "epoch": 0.1596873700662867, "grad_norm": 0.56640625, "learning_rate": 0.00021735644230344318, "loss": 0.2147, "step": 90062 }, { "epoch": 0.1596909162315965, "grad_norm": 0.515625, "learning_rate": 0.00021734481173084366, "loss": 0.2396, "step": 90064 }, { "epoch": 0.15969446239690632, "grad_norm": 0.2578125, "learning_rate": 0.0002173331850184653, "loss": 0.1681, "step": 90066 }, { "epoch": 0.15969800856221614, "grad_norm": 0.43359375, "learning_rate": 0.00021732156216635874, "loss": 0.1768, "step": 90068 }, { "epoch": 0.15970155472752595, "grad_norm": 0.85546875, "learning_rate": 0.00021730994317457515, "loss": 0.1496, "step": 90070 }, { "epoch": 0.15970510089283577, "grad_norm": 0.2392578125, "learning_rate": 0.00021729832804316497, "loss": 0.2963, "step": 90072 }, { "epoch": 0.15970864705814558, "grad_norm": 0.314453125, "learning_rate": 0.00021728671677217925, "loss": 0.1714, "step": 90074 }, { "epoch": 0.1597121932234554, "grad_norm": 0.4609375, "learning_rate": 0.0002172751093616688, "loss": 0.1736, "step": 90076 }, { "epoch": 0.1597157393887652, "grad_norm": 0.419921875, "learning_rate": 0.00021726350581168413, "loss": 0.1803, "step": 90078 }, { "epoch": 0.15971928555407502, "grad_norm": 0.1796875, "learning_rate": 0.00021725190612227624, "loss": 0.2582, "step": 90080 }, { "epoch": 0.15972283171938484, "grad_norm": 0.328125, "learning_rate": 0.00021724031029349557, "loss": 0.2781, "step": 90082 }, { "epoch": 0.15972637788469465, "grad_norm": 1.3359375, "learning_rate": 0.00021722871832539315, "loss": 0.1842, "step": 90084 }, { "epoch": 0.15972992405000447, "grad_norm": 0.2470703125, "learning_rate": 0.00021721713021801946, "loss": 0.1701, "step": 90086 }, { "epoch": 0.15973347021531428, "grad_norm": 0.279296875, "learning_rate": 0.0002172055459714254, "loss": 0.1731, "step": 90088 }, { "epoch": 0.1597370163806241, "grad_norm": 0.2392578125, "learning_rate": 0.0002171939655856614, "loss": 0.1917, "step": 90090 }, { "epoch": 0.1597405625459339, "grad_norm": 0.2177734375, "learning_rate": 0.00021718238906077818, "loss": 0.1663, "step": 90092 }, { "epoch": 0.15974410871124373, "grad_norm": 0.451171875, "learning_rate": 0.00021717081639682643, "loss": 0.1587, "step": 90094 }, { "epoch": 0.15974765487655354, "grad_norm": 0.9296875, "learning_rate": 0.00021715924759385674, "loss": 0.2594, "step": 90096 }, { "epoch": 0.15975120104186336, "grad_norm": 1.40625, "learning_rate": 0.00021714768265191964, "loss": 0.2485, "step": 90098 }, { "epoch": 0.15975474720717317, "grad_norm": 1.890625, "learning_rate": 0.00021713612157106568, "loss": 0.1854, "step": 90100 }, { "epoch": 0.15975829337248298, "grad_norm": 0.341796875, "learning_rate": 0.00021712456435134565, "loss": 0.2046, "step": 90102 }, { "epoch": 0.15976183953779283, "grad_norm": 0.322265625, "learning_rate": 0.00021711301099280989, "loss": 0.216, "step": 90104 }, { "epoch": 0.15976538570310264, "grad_norm": 0.765625, "learning_rate": 0.00021710146149550894, "loss": 0.1719, "step": 90106 }, { "epoch": 0.15976893186841246, "grad_norm": 1.125, "learning_rate": 0.0002170899158594934, "loss": 0.1711, "step": 90108 }, { "epoch": 0.15977247803372227, "grad_norm": 0.322265625, "learning_rate": 0.00021707837408481363, "loss": 0.1662, "step": 90110 }, { "epoch": 0.15977602419903209, "grad_norm": 0.359375, "learning_rate": 0.00021706683617152034, "loss": 0.1357, "step": 90112 }, { "epoch": 0.1597795703643419, "grad_norm": 1.7734375, "learning_rate": 0.0002170553021196636, "loss": 0.4441, "step": 90114 }, { "epoch": 0.15978311652965171, "grad_norm": 0.34375, "learning_rate": 0.00021704377192929435, "loss": 0.1373, "step": 90116 }, { "epoch": 0.15978666269496153, "grad_norm": 0.43359375, "learning_rate": 0.00021703224560046255, "loss": 0.2031, "step": 90118 }, { "epoch": 0.15979020886027134, "grad_norm": 0.455078125, "learning_rate": 0.00021702072313321903, "loss": 0.194, "step": 90120 }, { "epoch": 0.15979375502558116, "grad_norm": 0.259765625, "learning_rate": 0.00021700920452761378, "loss": 0.216, "step": 90122 }, { "epoch": 0.15979730119089097, "grad_norm": 0.86328125, "learning_rate": 0.00021699768978369741, "loss": 0.3406, "step": 90124 }, { "epoch": 0.1598008473562008, "grad_norm": 0.62109375, "learning_rate": 0.0002169861789015203, "loss": 0.1905, "step": 90126 }, { "epoch": 0.1598043935215106, "grad_norm": 0.470703125, "learning_rate": 0.00021697467188113275, "loss": 0.2728, "step": 90128 }, { "epoch": 0.15980793968682042, "grad_norm": 0.314453125, "learning_rate": 0.00021696316872258503, "loss": 0.1668, "step": 90130 }, { "epoch": 0.15981148585213023, "grad_norm": 0.302734375, "learning_rate": 0.00021695166942592735, "loss": 0.2006, "step": 90132 }, { "epoch": 0.15981503201744005, "grad_norm": 0.3046875, "learning_rate": 0.00021694017399121032, "loss": 0.2437, "step": 90134 }, { "epoch": 0.15981857818274986, "grad_norm": 0.419921875, "learning_rate": 0.00021692868241848394, "loss": 0.1543, "step": 90136 }, { "epoch": 0.15982212434805967, "grad_norm": 1.3515625, "learning_rate": 0.00021691719470779861, "loss": 0.1674, "step": 90138 }, { "epoch": 0.1598256705133695, "grad_norm": 0.294921875, "learning_rate": 0.00021690571085920436, "loss": 0.174, "step": 90140 }, { "epoch": 0.1598292166786793, "grad_norm": 0.384765625, "learning_rate": 0.0002168942308727517, "loss": 0.2874, "step": 90142 }, { "epoch": 0.15983276284398912, "grad_norm": 0.271484375, "learning_rate": 0.0002168827547484907, "loss": 0.138, "step": 90144 }, { "epoch": 0.15983630900929893, "grad_norm": 0.259765625, "learning_rate": 0.00021687128248647154, "loss": 0.1815, "step": 90146 }, { "epoch": 0.15983985517460875, "grad_norm": 0.65234375, "learning_rate": 0.00021685981408674446, "loss": 0.1792, "step": 90148 }, { "epoch": 0.15984340133991856, "grad_norm": 0.359375, "learning_rate": 0.00021684834954935944, "loss": 0.2739, "step": 90150 }, { "epoch": 0.15984694750522838, "grad_norm": 0.58203125, "learning_rate": 0.00021683688887436692, "loss": 0.1971, "step": 90152 }, { "epoch": 0.1598504936705382, "grad_norm": 0.53515625, "learning_rate": 0.00021682543206181666, "loss": 0.2217, "step": 90154 }, { "epoch": 0.159854039835848, "grad_norm": 0.474609375, "learning_rate": 0.00021681397911175912, "loss": 0.191, "step": 90156 }, { "epoch": 0.15985758600115782, "grad_norm": 0.421875, "learning_rate": 0.00021680253002424407, "loss": 0.1616, "step": 90158 }, { "epoch": 0.15986113216646763, "grad_norm": 0.6953125, "learning_rate": 0.00021679108479932187, "loss": 0.2089, "step": 90160 }, { "epoch": 0.15986467833177745, "grad_norm": 0.375, "learning_rate": 0.00021677964343704226, "loss": 0.1747, "step": 90162 }, { "epoch": 0.15986822449708726, "grad_norm": 0.62109375, "learning_rate": 0.00021676820593745556, "loss": 0.1538, "step": 90164 }, { "epoch": 0.15987177066239708, "grad_norm": 0.326171875, "learning_rate": 0.00021675677230061175, "loss": 0.1558, "step": 90166 }, { "epoch": 0.1598753168277069, "grad_norm": 0.72265625, "learning_rate": 0.00021674534252656073, "loss": 0.1535, "step": 90168 }, { "epoch": 0.1598788629930167, "grad_norm": 0.55078125, "learning_rate": 0.00021673391661535243, "loss": 0.2293, "step": 90170 }, { "epoch": 0.15988240915832652, "grad_norm": 0.765625, "learning_rate": 0.00021672249456703696, "loss": 0.2115, "step": 90172 }, { "epoch": 0.15988595532363634, "grad_norm": 0.671875, "learning_rate": 0.00021671107638166425, "loss": 0.1393, "step": 90174 }, { "epoch": 0.15988950148894615, "grad_norm": 0.6640625, "learning_rate": 0.0002166996620592843, "loss": 0.1429, "step": 90176 }, { "epoch": 0.15989304765425597, "grad_norm": 0.53515625, "learning_rate": 0.0002166882515999468, "loss": 0.1584, "step": 90178 }, { "epoch": 0.15989659381956578, "grad_norm": 2.0625, "learning_rate": 0.00021667684500370187, "loss": 0.4179, "step": 90180 }, { "epoch": 0.1599001399848756, "grad_norm": 0.8671875, "learning_rate": 0.00021666544227059932, "loss": 0.3414, "step": 90182 }, { "epoch": 0.1599036861501854, "grad_norm": 2.296875, "learning_rate": 0.00021665404340068906, "loss": 0.3091, "step": 90184 }, { "epoch": 0.15990723231549522, "grad_norm": 0.66015625, "learning_rate": 0.0002166426483940208, "loss": 0.1944, "step": 90186 }, { "epoch": 0.15991077848080504, "grad_norm": 0.435546875, "learning_rate": 0.00021663125725064465, "loss": 0.1702, "step": 90188 }, { "epoch": 0.15991432464611485, "grad_norm": 0.283203125, "learning_rate": 0.00021661986997061004, "loss": 0.2508, "step": 90190 }, { "epoch": 0.15991787081142467, "grad_norm": 1.3671875, "learning_rate": 0.00021660848655396712, "loss": 0.2399, "step": 90192 }, { "epoch": 0.1599214169767345, "grad_norm": 0.44921875, "learning_rate": 0.00021659710700076557, "loss": 0.1596, "step": 90194 }, { "epoch": 0.15992496314204432, "grad_norm": 1.28125, "learning_rate": 0.00021658573131105516, "loss": 0.1539, "step": 90196 }, { "epoch": 0.15992850930735414, "grad_norm": 0.451171875, "learning_rate": 0.00021657435948488544, "loss": 0.239, "step": 90198 }, { "epoch": 0.15993205547266395, "grad_norm": 0.44140625, "learning_rate": 0.00021656299152230646, "loss": 0.1294, "step": 90200 }, { "epoch": 0.15993560163797377, "grad_norm": 0.48046875, "learning_rate": 0.00021655162742336778, "loss": 0.1385, "step": 90202 }, { "epoch": 0.15993914780328358, "grad_norm": 0.1591796875, "learning_rate": 0.00021654026718811908, "loss": 0.194, "step": 90204 }, { "epoch": 0.1599426939685934, "grad_norm": 0.5390625, "learning_rate": 0.0002165289108166101, "loss": 0.2512, "step": 90206 }, { "epoch": 0.1599462401339032, "grad_norm": 0.263671875, "learning_rate": 0.0002165175583088904, "loss": 0.1973, "step": 90208 }, { "epoch": 0.15994978629921303, "grad_norm": 0.5703125, "learning_rate": 0.0002165062096650099, "loss": 0.167, "step": 90210 }, { "epoch": 0.15995333246452284, "grad_norm": 0.7109375, "learning_rate": 0.00021649486488501784, "loss": 0.1565, "step": 90212 }, { "epoch": 0.15995687862983266, "grad_norm": 1.03125, "learning_rate": 0.0002164835239689642, "loss": 0.2645, "step": 90214 }, { "epoch": 0.15996042479514247, "grad_norm": 0.5234375, "learning_rate": 0.00021647218691689827, "loss": 0.2016, "step": 90216 }, { "epoch": 0.15996397096045228, "grad_norm": 0.2431640625, "learning_rate": 0.00021646085372886993, "loss": 0.2006, "step": 90218 }, { "epoch": 0.1599675171257621, "grad_norm": 0.42578125, "learning_rate": 0.00021644952440492854, "loss": 0.216, "step": 90220 }, { "epoch": 0.1599710632910719, "grad_norm": 0.267578125, "learning_rate": 0.00021643819894512367, "loss": 0.1593, "step": 90222 }, { "epoch": 0.15997460945638173, "grad_norm": 1.4296875, "learning_rate": 0.00021642687734950495, "loss": 0.298, "step": 90224 }, { "epoch": 0.15997815562169154, "grad_norm": 0.412109375, "learning_rate": 0.00021641555961812177, "loss": 0.1378, "step": 90226 }, { "epoch": 0.15998170178700136, "grad_norm": 0.4609375, "learning_rate": 0.00021640424575102368, "loss": 0.2096, "step": 90228 }, { "epoch": 0.15998524795231117, "grad_norm": 0.265625, "learning_rate": 0.00021639293574826017, "loss": 0.1896, "step": 90230 }, { "epoch": 0.15998879411762099, "grad_norm": 0.2890625, "learning_rate": 0.00021638162960988075, "loss": 0.1462, "step": 90232 }, { "epoch": 0.1599923402829308, "grad_norm": 0.7578125, "learning_rate": 0.00021637032733593482, "loss": 0.1364, "step": 90234 }, { "epoch": 0.15999588644824062, "grad_norm": 0.197265625, "learning_rate": 0.00021635902892647168, "loss": 0.2298, "step": 90236 }, { "epoch": 0.15999943261355043, "grad_norm": 1.03125, "learning_rate": 0.000216347734381541, "loss": 0.2299, "step": 90238 }, { "epoch": 0.16000297877886024, "grad_norm": 2.296875, "learning_rate": 0.00021633644370119191, "loss": 0.1975, "step": 90240 }, { "epoch": 0.16000652494417006, "grad_norm": 0.396484375, "learning_rate": 0.00021632515688547405, "loss": 0.1548, "step": 90242 }, { "epoch": 0.16001007110947987, "grad_norm": 2.171875, "learning_rate": 0.00021631387393443658, "loss": 0.1933, "step": 90244 }, { "epoch": 0.1600136172747897, "grad_norm": 0.267578125, "learning_rate": 0.00021630259484812906, "loss": 0.2254, "step": 90246 }, { "epoch": 0.1600171634400995, "grad_norm": 0.26171875, "learning_rate": 0.00021629131962660043, "loss": 0.1813, "step": 90248 }, { "epoch": 0.16002070960540932, "grad_norm": 0.6484375, "learning_rate": 0.00021628004826990054, "loss": 0.2329, "step": 90250 }, { "epoch": 0.16002425577071913, "grad_norm": 0.279296875, "learning_rate": 0.00021626878077807816, "loss": 0.169, "step": 90252 }, { "epoch": 0.16002780193602895, "grad_norm": 0.337890625, "learning_rate": 0.00021625751715118288, "loss": 0.1853, "step": 90254 }, { "epoch": 0.16003134810133876, "grad_norm": 0.392578125, "learning_rate": 0.00021624625738926384, "loss": 0.1868, "step": 90256 }, { "epoch": 0.16003489426664858, "grad_norm": 0.216796875, "learning_rate": 0.0002162350014923705, "loss": 0.1424, "step": 90258 }, { "epoch": 0.1600384404319584, "grad_norm": 0.490234375, "learning_rate": 0.0002162237494605517, "loss": 0.2109, "step": 90260 }, { "epoch": 0.1600419865972682, "grad_norm": 0.171875, "learning_rate": 0.00021621250129385695, "loss": 0.1409, "step": 90262 }, { "epoch": 0.16004553276257802, "grad_norm": 0.68359375, "learning_rate": 0.00021620125699233533, "loss": 0.1696, "step": 90264 }, { "epoch": 0.16004907892788783, "grad_norm": 0.30859375, "learning_rate": 0.00021619001655603609, "loss": 0.1687, "step": 90266 }, { "epoch": 0.16005262509319765, "grad_norm": 0.404296875, "learning_rate": 0.00021617877998500825, "loss": 0.172, "step": 90268 }, { "epoch": 0.16005617125850746, "grad_norm": 0.259765625, "learning_rate": 0.0002161675472793011, "loss": 0.1709, "step": 90270 }, { "epoch": 0.16005971742381728, "grad_norm": 0.6875, "learning_rate": 0.00021615631843896374, "loss": 0.1313, "step": 90272 }, { "epoch": 0.1600632635891271, "grad_norm": 0.55859375, "learning_rate": 0.00021614509346404518, "loss": 0.1406, "step": 90274 }, { "epoch": 0.1600668097544369, "grad_norm": 0.57421875, "learning_rate": 0.00021613387235459465, "loss": 0.1388, "step": 90276 }, { "epoch": 0.16007035591974672, "grad_norm": 0.578125, "learning_rate": 0.00021612265511066116, "loss": 0.2088, "step": 90278 }, { "epoch": 0.16007390208505654, "grad_norm": 1.5078125, "learning_rate": 0.00021611144173229357, "loss": 0.2918, "step": 90280 }, { "epoch": 0.16007744825036635, "grad_norm": 0.435546875, "learning_rate": 0.00021610023221954128, "loss": 0.1595, "step": 90282 }, { "epoch": 0.1600809944156762, "grad_norm": 0.38671875, "learning_rate": 0.00021608902657245312, "loss": 0.1377, "step": 90284 }, { "epoch": 0.160084540580986, "grad_norm": 0.515625, "learning_rate": 0.00021607782479107817, "loss": 0.1101, "step": 90286 }, { "epoch": 0.16008808674629582, "grad_norm": 0.59765625, "learning_rate": 0.0002160666268754652, "loss": 0.1677, "step": 90288 }, { "epoch": 0.16009163291160564, "grad_norm": 0.466796875, "learning_rate": 0.00021605543282566348, "loss": 0.1967, "step": 90290 }, { "epoch": 0.16009517907691545, "grad_norm": 4.03125, "learning_rate": 0.00021604424264172183, "loss": 0.2372, "step": 90292 }, { "epoch": 0.16009872524222526, "grad_norm": 0.416015625, "learning_rate": 0.00021603305632368925, "loss": 0.1514, "step": 90294 }, { "epoch": 0.16010227140753508, "grad_norm": 0.26171875, "learning_rate": 0.00021602187387161463, "loss": 0.1559, "step": 90296 }, { "epoch": 0.1601058175728449, "grad_norm": 0.890625, "learning_rate": 0.00021601069528554673, "loss": 0.1544, "step": 90298 }, { "epoch": 0.1601093637381547, "grad_norm": 0.90234375, "learning_rate": 0.0002159995205655347, "loss": 0.1903, "step": 90300 }, { "epoch": 0.16011290990346452, "grad_norm": 0.296875, "learning_rate": 0.00021598834971162708, "loss": 0.1649, "step": 90302 }, { "epoch": 0.16011645606877434, "grad_norm": 0.19921875, "learning_rate": 0.0002159771827238731, "loss": 0.1291, "step": 90304 }, { "epoch": 0.16012000223408415, "grad_norm": 1.203125, "learning_rate": 0.00021596601960232137, "loss": 0.1576, "step": 90306 }, { "epoch": 0.16012354839939397, "grad_norm": 0.283203125, "learning_rate": 0.00021595486034702084, "loss": 0.1679, "step": 90308 }, { "epoch": 0.16012709456470378, "grad_norm": 0.48046875, "learning_rate": 0.0002159437049580201, "loss": 0.2271, "step": 90310 }, { "epoch": 0.1601306407300136, "grad_norm": 0.44921875, "learning_rate": 0.0002159325534353681, "loss": 0.1431, "step": 90312 }, { "epoch": 0.1601341868953234, "grad_norm": 0.443359375, "learning_rate": 0.00021592140577911362, "loss": 0.2346, "step": 90314 }, { "epoch": 0.16013773306063322, "grad_norm": 0.302734375, "learning_rate": 0.0002159102619893054, "loss": 0.1365, "step": 90316 }, { "epoch": 0.16014127922594304, "grad_norm": 1.078125, "learning_rate": 0.00021589912206599216, "loss": 0.2826, "step": 90318 }, { "epoch": 0.16014482539125285, "grad_norm": 0.62109375, "learning_rate": 0.00021588798600922254, "loss": 0.1816, "step": 90320 }, { "epoch": 0.16014837155656267, "grad_norm": 1.6953125, "learning_rate": 0.00021587685381904538, "loss": 0.3289, "step": 90322 }, { "epoch": 0.16015191772187248, "grad_norm": 0.44921875, "learning_rate": 0.0002158657254955092, "loss": 0.1907, "step": 90324 }, { "epoch": 0.1601554638871823, "grad_norm": 0.263671875, "learning_rate": 0.0002158546010386629, "loss": 0.2255, "step": 90326 }, { "epoch": 0.1601590100524921, "grad_norm": 0.462890625, "learning_rate": 0.00021584348044855492, "loss": 0.1683, "step": 90328 }, { "epoch": 0.16016255621780193, "grad_norm": 0.5546875, "learning_rate": 0.00021583236372523404, "loss": 0.1564, "step": 90330 }, { "epoch": 0.16016610238311174, "grad_norm": 0.40625, "learning_rate": 0.0002158212508687487, "loss": 0.1478, "step": 90332 }, { "epoch": 0.16016964854842156, "grad_norm": 0.205078125, "learning_rate": 0.0002158101418791478, "loss": 0.1548, "step": 90334 }, { "epoch": 0.16017319471373137, "grad_norm": 0.435546875, "learning_rate": 0.00021579903675647967, "loss": 0.2043, "step": 90336 }, { "epoch": 0.16017674087904119, "grad_norm": 0.39453125, "learning_rate": 0.00021578793550079285, "loss": 0.1963, "step": 90338 }, { "epoch": 0.160180287044351, "grad_norm": 0.5390625, "learning_rate": 0.00021577683811213613, "loss": 0.1482, "step": 90340 }, { "epoch": 0.16018383320966081, "grad_norm": 0.326171875, "learning_rate": 0.00021576574459055778, "loss": 0.1787, "step": 90342 }, { "epoch": 0.16018737937497063, "grad_norm": 1.015625, "learning_rate": 0.00021575465493610645, "loss": 0.2279, "step": 90344 }, { "epoch": 0.16019092554028044, "grad_norm": 0.4453125, "learning_rate": 0.0002157435691488306, "loss": 0.2011, "step": 90346 }, { "epoch": 0.16019447170559026, "grad_norm": 0.2353515625, "learning_rate": 0.00021573248722877897, "loss": 0.2954, "step": 90348 }, { "epoch": 0.16019801787090007, "grad_norm": 0.51171875, "learning_rate": 0.00021572140917599958, "loss": 0.1843, "step": 90350 }, { "epoch": 0.1602015640362099, "grad_norm": 0.42578125, "learning_rate": 0.00021571033499054112, "loss": 0.1376, "step": 90352 }, { "epoch": 0.1602051102015197, "grad_norm": 0.23046875, "learning_rate": 0.0002156992646724521, "loss": 0.1695, "step": 90354 }, { "epoch": 0.16020865636682952, "grad_norm": 0.2890625, "learning_rate": 0.00021568819822178066, "loss": 0.162, "step": 90356 }, { "epoch": 0.16021220253213933, "grad_norm": 0.2734375, "learning_rate": 0.00021567713563857557, "loss": 0.1784, "step": 90358 }, { "epoch": 0.16021574869744915, "grad_norm": 0.38671875, "learning_rate": 0.0002156660769228849, "loss": 0.2261, "step": 90360 }, { "epoch": 0.16021929486275896, "grad_norm": 0.435546875, "learning_rate": 0.00021565502207475707, "loss": 0.1569, "step": 90362 }, { "epoch": 0.16022284102806877, "grad_norm": 1.046875, "learning_rate": 0.0002156439710942405, "loss": 0.1914, "step": 90364 }, { "epoch": 0.1602263871933786, "grad_norm": 0.2734375, "learning_rate": 0.0002156329239813837, "loss": 0.1425, "step": 90366 }, { "epoch": 0.1602299333586884, "grad_norm": 0.361328125, "learning_rate": 0.00021562188073623455, "loss": 0.1561, "step": 90368 }, { "epoch": 0.16023347952399822, "grad_norm": 0.41796875, "learning_rate": 0.00021561084135884176, "loss": 0.1464, "step": 90370 }, { "epoch": 0.16023702568930803, "grad_norm": 0.8125, "learning_rate": 0.00021559980584925331, "loss": 0.4433, "step": 90372 }, { "epoch": 0.16024057185461785, "grad_norm": 0.68359375, "learning_rate": 0.0002155887742075177, "loss": 0.2313, "step": 90374 }, { "epoch": 0.1602441180199277, "grad_norm": 0.466796875, "learning_rate": 0.000215577746433683, "loss": 0.1866, "step": 90376 }, { "epoch": 0.1602476641852375, "grad_norm": 0.2216796875, "learning_rate": 0.00021556672252779746, "loss": 0.1967, "step": 90378 }, { "epoch": 0.16025121035054732, "grad_norm": 0.25, "learning_rate": 0.00021555570248990943, "loss": 0.2312, "step": 90380 }, { "epoch": 0.16025475651585713, "grad_norm": 0.625, "learning_rate": 0.00021554468632006702, "loss": 0.1782, "step": 90382 }, { "epoch": 0.16025830268116695, "grad_norm": 0.2421875, "learning_rate": 0.0002155336740183183, "loss": 0.1811, "step": 90384 }, { "epoch": 0.16026184884647676, "grad_norm": 1.0078125, "learning_rate": 0.00021552266558471153, "loss": 0.1721, "step": 90386 }, { "epoch": 0.16026539501178658, "grad_norm": 1.3046875, "learning_rate": 0.00021551166101929493, "loss": 0.255, "step": 90388 }, { "epoch": 0.1602689411770964, "grad_norm": 0.322265625, "learning_rate": 0.00021550066032211652, "loss": 0.1651, "step": 90390 }, { "epoch": 0.1602724873424062, "grad_norm": 0.33984375, "learning_rate": 0.00021548966349322452, "loss": 0.2278, "step": 90392 }, { "epoch": 0.16027603350771602, "grad_norm": 0.34375, "learning_rate": 0.0002154786705326669, "loss": 0.1901, "step": 90394 }, { "epoch": 0.16027957967302583, "grad_norm": 0.306640625, "learning_rate": 0.00021546768144049168, "loss": 0.2018, "step": 90396 }, { "epoch": 0.16028312583833565, "grad_norm": 0.28125, "learning_rate": 0.00021545669621674722, "loss": 0.1258, "step": 90398 }, { "epoch": 0.16028667200364546, "grad_norm": 0.73046875, "learning_rate": 0.0002154457148614812, "loss": 0.4016, "step": 90400 }, { "epoch": 0.16029021816895528, "grad_norm": 0.216796875, "learning_rate": 0.00021543473737474186, "loss": 0.1862, "step": 90402 }, { "epoch": 0.1602937643342651, "grad_norm": 0.5546875, "learning_rate": 0.00021542376375657712, "loss": 0.1366, "step": 90404 }, { "epoch": 0.1602973104995749, "grad_norm": 0.328125, "learning_rate": 0.0002154127940070351, "loss": 0.1627, "step": 90406 }, { "epoch": 0.16030085666488472, "grad_norm": 0.37109375, "learning_rate": 0.00021540182812616364, "loss": 0.1723, "step": 90408 }, { "epoch": 0.16030440283019454, "grad_norm": 0.71484375, "learning_rate": 0.00021539086611401073, "loss": 0.1891, "step": 90410 }, { "epoch": 0.16030794899550435, "grad_norm": 0.41796875, "learning_rate": 0.00021537990797062436, "loss": 0.1671, "step": 90412 }, { "epoch": 0.16031149516081417, "grad_norm": 0.373046875, "learning_rate": 0.0002153689536960524, "loss": 0.1907, "step": 90414 }, { "epoch": 0.16031504132612398, "grad_norm": 0.283203125, "learning_rate": 0.00021535800329034293, "loss": 0.1603, "step": 90416 }, { "epoch": 0.1603185874914338, "grad_norm": 0.40625, "learning_rate": 0.00021534705675354342, "loss": 0.1607, "step": 90418 }, { "epoch": 0.1603221336567436, "grad_norm": 1.078125, "learning_rate": 0.00021533611408570215, "loss": 0.1862, "step": 90420 }, { "epoch": 0.16032567982205342, "grad_norm": 1.2265625, "learning_rate": 0.0002153251752868669, "loss": 0.2916, "step": 90422 }, { "epoch": 0.16032922598736324, "grad_norm": 0.7890625, "learning_rate": 0.00021531424035708548, "loss": 0.1926, "step": 90424 }, { "epoch": 0.16033277215267305, "grad_norm": 0.2421875, "learning_rate": 0.00021530330929640554, "loss": 0.1469, "step": 90426 }, { "epoch": 0.16033631831798287, "grad_norm": 0.8984375, "learning_rate": 0.00021529238210487512, "loss": 0.1766, "step": 90428 }, { "epoch": 0.16033986448329268, "grad_norm": 0.37890625, "learning_rate": 0.00021528145878254185, "loss": 0.1522, "step": 90430 }, { "epoch": 0.1603434106486025, "grad_norm": 0.375, "learning_rate": 0.00021527053932945373, "loss": 0.1613, "step": 90432 }, { "epoch": 0.1603469568139123, "grad_norm": 0.205078125, "learning_rate": 0.00021525962374565827, "loss": 0.1664, "step": 90434 }, { "epoch": 0.16035050297922213, "grad_norm": 0.53125, "learning_rate": 0.0002152487120312032, "loss": 0.2304, "step": 90436 }, { "epoch": 0.16035404914453194, "grad_norm": 0.259765625, "learning_rate": 0.0002152378041861364, "loss": 0.1783, "step": 90438 }, { "epoch": 0.16035759530984176, "grad_norm": 0.373046875, "learning_rate": 0.0002152269002105056, "loss": 0.2252, "step": 90440 }, { "epoch": 0.16036114147515157, "grad_norm": 0.46875, "learning_rate": 0.0002152160001043584, "loss": 0.2237, "step": 90442 }, { "epoch": 0.16036468764046138, "grad_norm": 0.2197265625, "learning_rate": 0.0002152051038677423, "loss": 0.2023, "step": 90444 }, { "epoch": 0.1603682338057712, "grad_norm": 0.796875, "learning_rate": 0.00021519421150070528, "loss": 0.1566, "step": 90446 }, { "epoch": 0.160371779971081, "grad_norm": 0.498046875, "learning_rate": 0.00021518332300329477, "loss": 0.1719, "step": 90448 }, { "epoch": 0.16037532613639083, "grad_norm": 2.203125, "learning_rate": 0.00021517243837555845, "loss": 0.2487, "step": 90450 }, { "epoch": 0.16037887230170064, "grad_norm": 0.232421875, "learning_rate": 0.00021516155761754397, "loss": 0.1943, "step": 90452 }, { "epoch": 0.16038241846701046, "grad_norm": 0.95703125, "learning_rate": 0.00021515068072929877, "loss": 0.1728, "step": 90454 }, { "epoch": 0.16038596463232027, "grad_norm": 0.94921875, "learning_rate": 0.00021513980771087067, "loss": 0.2299, "step": 90456 }, { "epoch": 0.16038951079763009, "grad_norm": 0.322265625, "learning_rate": 0.00021512893856230687, "loss": 0.1496, "step": 90458 }, { "epoch": 0.1603930569629399, "grad_norm": 0.3671875, "learning_rate": 0.00021511807328365517, "loss": 0.1834, "step": 90460 }, { "epoch": 0.16039660312824972, "grad_norm": 0.33984375, "learning_rate": 0.00021510721187496305, "loss": 0.1867, "step": 90462 }, { "epoch": 0.16040014929355953, "grad_norm": 0.47265625, "learning_rate": 0.00021509635433627792, "loss": 0.184, "step": 90464 }, { "epoch": 0.16040369545886937, "grad_norm": 0.32421875, "learning_rate": 0.00021508550066764737, "loss": 0.1701, "step": 90466 }, { "epoch": 0.1604072416241792, "grad_norm": 0.396484375, "learning_rate": 0.00021507465086911877, "loss": 0.1789, "step": 90468 }, { "epoch": 0.160410787789489, "grad_norm": 2.03125, "learning_rate": 0.00021506380494073962, "loss": 0.2408, "step": 90470 }, { "epoch": 0.16041433395479882, "grad_norm": 1.203125, "learning_rate": 0.00021505296288255738, "loss": 0.1859, "step": 90472 }, { "epoch": 0.16041788012010863, "grad_norm": 0.703125, "learning_rate": 0.0002150421246946195, "loss": 0.2288, "step": 90474 }, { "epoch": 0.16042142628541844, "grad_norm": 0.4765625, "learning_rate": 0.00021503129037697322, "loss": 0.1937, "step": 90476 }, { "epoch": 0.16042497245072826, "grad_norm": 0.99609375, "learning_rate": 0.00021502045992966614, "loss": 0.1341, "step": 90478 }, { "epoch": 0.16042851861603807, "grad_norm": 0.314453125, "learning_rate": 0.0002150096333527455, "loss": 0.2156, "step": 90480 }, { "epoch": 0.1604320647813479, "grad_norm": 0.357421875, "learning_rate": 0.00021499881064625855, "loss": 0.1497, "step": 90482 }, { "epoch": 0.1604356109466577, "grad_norm": 0.353515625, "learning_rate": 0.0002149879918102528, "loss": 0.2208, "step": 90484 }, { "epoch": 0.16043915711196752, "grad_norm": 0.5234375, "learning_rate": 0.00021497717684477544, "loss": 0.1335, "step": 90486 }, { "epoch": 0.16044270327727733, "grad_norm": 0.3046875, "learning_rate": 0.00021496636574987394, "loss": 0.1553, "step": 90488 }, { "epoch": 0.16044624944258715, "grad_norm": 0.265625, "learning_rate": 0.00021495555852559534, "loss": 0.1508, "step": 90490 }, { "epoch": 0.16044979560789696, "grad_norm": 0.87109375, "learning_rate": 0.00021494475517198713, "loss": 0.1568, "step": 90492 }, { "epoch": 0.16045334177320678, "grad_norm": 0.32421875, "learning_rate": 0.00021493395568909638, "loss": 0.2352, "step": 90494 }, { "epoch": 0.1604568879385166, "grad_norm": 0.1845703125, "learning_rate": 0.00021492316007697053, "loss": 0.2294, "step": 90496 }, { "epoch": 0.1604604341038264, "grad_norm": 0.3203125, "learning_rate": 0.00021491236833565648, "loss": 0.1625, "step": 90498 }, { "epoch": 0.16046398026913622, "grad_norm": 0.416015625, "learning_rate": 0.0002149015804652017, "loss": 0.1908, "step": 90500 }, { "epoch": 0.16046752643444603, "grad_norm": 0.6015625, "learning_rate": 0.00021489079646565338, "loss": 0.1409, "step": 90502 }, { "epoch": 0.16047107259975585, "grad_norm": 0.1943359375, "learning_rate": 0.00021488001633705847, "loss": 0.1138, "step": 90504 }, { "epoch": 0.16047461876506566, "grad_norm": 0.44921875, "learning_rate": 0.00021486924007946433, "loss": 0.172, "step": 90506 }, { "epoch": 0.16047816493037548, "grad_norm": 0.2177734375, "learning_rate": 0.00021485846769291785, "loss": 0.1836, "step": 90508 }, { "epoch": 0.1604817110956853, "grad_norm": 0.44921875, "learning_rate": 0.00021484769917746627, "loss": 0.1411, "step": 90510 }, { "epoch": 0.1604852572609951, "grad_norm": 0.26171875, "learning_rate": 0.00021483693453315685, "loss": 0.2124, "step": 90512 }, { "epoch": 0.16048880342630492, "grad_norm": 0.625, "learning_rate": 0.00021482617376003638, "loss": 0.3128, "step": 90514 }, { "epoch": 0.16049234959161474, "grad_norm": 0.55859375, "learning_rate": 0.00021481541685815204, "loss": 0.2222, "step": 90516 }, { "epoch": 0.16049589575692455, "grad_norm": 0.3984375, "learning_rate": 0.000214804663827551, "loss": 0.2892, "step": 90518 }, { "epoch": 0.16049944192223436, "grad_norm": 0.2099609375, "learning_rate": 0.0002147939146682802, "loss": 0.1425, "step": 90520 }, { "epoch": 0.16050298808754418, "grad_norm": 0.5859375, "learning_rate": 0.00021478316938038655, "loss": 0.2017, "step": 90522 }, { "epoch": 0.160506534252854, "grad_norm": 0.29296875, "learning_rate": 0.00021477242796391716, "loss": 0.1575, "step": 90524 }, { "epoch": 0.1605100804181638, "grad_norm": 0.236328125, "learning_rate": 0.00021476169041891895, "loss": 0.2206, "step": 90526 }, { "epoch": 0.16051362658347362, "grad_norm": 0.361328125, "learning_rate": 0.0002147509567454388, "loss": 0.2417, "step": 90528 }, { "epoch": 0.16051717274878344, "grad_norm": 0.193359375, "learning_rate": 0.00021474022694352394, "loss": 0.1513, "step": 90530 }, { "epoch": 0.16052071891409325, "grad_norm": 0.40234375, "learning_rate": 0.000214729501013221, "loss": 0.2208, "step": 90532 }, { "epoch": 0.16052426507940307, "grad_norm": 0.287109375, "learning_rate": 0.00021471877895457695, "loss": 0.1639, "step": 90534 }, { "epoch": 0.16052781124471288, "grad_norm": 0.3125, "learning_rate": 0.00021470806076763876, "loss": 0.1499, "step": 90536 }, { "epoch": 0.1605313574100227, "grad_norm": 0.71875, "learning_rate": 0.0002146973464524534, "loss": 0.2063, "step": 90538 }, { "epoch": 0.1605349035753325, "grad_norm": 0.26953125, "learning_rate": 0.00021468663600906753, "loss": 0.1473, "step": 90540 }, { "epoch": 0.16053844974064232, "grad_norm": 0.2109375, "learning_rate": 0.00021467592943752797, "loss": 0.1533, "step": 90542 }, { "epoch": 0.16054199590595214, "grad_norm": 0.4375, "learning_rate": 0.00021466522673788165, "loss": 0.1177, "step": 90544 }, { "epoch": 0.16054554207126195, "grad_norm": 0.48046875, "learning_rate": 0.00021465452791017556, "loss": 0.1547, "step": 90546 }, { "epoch": 0.16054908823657177, "grad_norm": 0.5625, "learning_rate": 0.00021464383295445602, "loss": 0.1574, "step": 90548 }, { "epoch": 0.16055263440188158, "grad_norm": 1.25, "learning_rate": 0.0002146331418707703, "loss": 0.3312, "step": 90550 }, { "epoch": 0.1605561805671914, "grad_norm": 0.259765625, "learning_rate": 0.0002146224546591648, "loss": 0.1461, "step": 90552 }, { "epoch": 0.1605597267325012, "grad_norm": 0.50390625, "learning_rate": 0.00021461177131968642, "loss": 0.5179, "step": 90554 }, { "epoch": 0.16056327289781105, "grad_norm": 0.31640625, "learning_rate": 0.00021460109185238185, "loss": 0.4403, "step": 90556 }, { "epoch": 0.16056681906312087, "grad_norm": 0.67578125, "learning_rate": 0.0002145904162572979, "loss": 0.2011, "step": 90558 }, { "epoch": 0.16057036522843068, "grad_norm": 0.6640625, "learning_rate": 0.00021457974453448106, "loss": 0.1928, "step": 90560 }, { "epoch": 0.1605739113937405, "grad_norm": 0.341796875, "learning_rate": 0.00021456907668397822, "loss": 0.2001, "step": 90562 }, { "epoch": 0.1605774575590503, "grad_norm": 0.515625, "learning_rate": 0.00021455841270583576, "loss": 0.2037, "step": 90564 }, { "epoch": 0.16058100372436013, "grad_norm": 10.25, "learning_rate": 0.00021454775260010053, "loss": 0.3137, "step": 90566 }, { "epoch": 0.16058454988966994, "grad_norm": 0.58984375, "learning_rate": 0.00021453709636681914, "loss": 0.1809, "step": 90568 }, { "epoch": 0.16058809605497976, "grad_norm": 0.4765625, "learning_rate": 0.0002145264440060382, "loss": 0.1906, "step": 90570 }, { "epoch": 0.16059164222028957, "grad_norm": 0.78125, "learning_rate": 0.0002145157955178042, "loss": 0.2142, "step": 90572 }, { "epoch": 0.16059518838559939, "grad_norm": 0.330078125, "learning_rate": 0.00021450515090216368, "loss": 0.2196, "step": 90574 }, { "epoch": 0.1605987345509092, "grad_norm": 0.365234375, "learning_rate": 0.00021449451015916336, "loss": 0.1942, "step": 90576 }, { "epoch": 0.16060228071621901, "grad_norm": 0.578125, "learning_rate": 0.00021448387328884972, "loss": 0.1382, "step": 90578 }, { "epoch": 0.16060582688152883, "grad_norm": 0.3984375, "learning_rate": 0.0002144732402912692, "loss": 0.2158, "step": 90580 }, { "epoch": 0.16060937304683864, "grad_norm": 0.6015625, "learning_rate": 0.00021446261116646824, "loss": 0.1867, "step": 90582 }, { "epoch": 0.16061291921214846, "grad_norm": 0.1298828125, "learning_rate": 0.0002144519859144936, "loss": 0.1364, "step": 90584 }, { "epoch": 0.16061646537745827, "grad_norm": 0.56640625, "learning_rate": 0.00021444136453539157, "loss": 0.1721, "step": 90586 }, { "epoch": 0.1606200115427681, "grad_norm": 0.2412109375, "learning_rate": 0.0002144307470292085, "loss": 0.1581, "step": 90588 }, { "epoch": 0.1606235577080779, "grad_norm": 0.39453125, "learning_rate": 0.0002144201333959911, "loss": 0.1317, "step": 90590 }, { "epoch": 0.16062710387338772, "grad_norm": 0.4765625, "learning_rate": 0.00021440952363578554, "loss": 0.157, "step": 90592 }, { "epoch": 0.16063065003869753, "grad_norm": 1.6953125, "learning_rate": 0.0002143989177486383, "loss": 0.1658, "step": 90594 }, { "epoch": 0.16063419620400735, "grad_norm": 0.8125, "learning_rate": 0.00021438831573459584, "loss": 0.1814, "step": 90596 }, { "epoch": 0.16063774236931716, "grad_norm": 0.8828125, "learning_rate": 0.00021437771759370445, "loss": 0.1312, "step": 90598 }, { "epoch": 0.16064128853462697, "grad_norm": 0.5390625, "learning_rate": 0.0002143671233260104, "loss": 0.1412, "step": 90600 }, { "epoch": 0.1606448346999368, "grad_norm": 0.62109375, "learning_rate": 0.0002143565329315602, "loss": 0.1379, "step": 90602 }, { "epoch": 0.1606483808652466, "grad_norm": 0.283203125, "learning_rate": 0.00021434594641040012, "loss": 0.1849, "step": 90604 }, { "epoch": 0.16065192703055642, "grad_norm": 1.40625, "learning_rate": 0.00021433536376257625, "loss": 0.2734, "step": 90606 }, { "epoch": 0.16065547319586623, "grad_norm": 1.2421875, "learning_rate": 0.00021432478498813523, "loss": 0.1898, "step": 90608 }, { "epoch": 0.16065901936117605, "grad_norm": 3.109375, "learning_rate": 0.00021431421008712295, "loss": 0.2337, "step": 90610 }, { "epoch": 0.16066256552648586, "grad_norm": 0.5625, "learning_rate": 0.00021430363905958614, "loss": 0.303, "step": 90612 }, { "epoch": 0.16066611169179568, "grad_norm": 0.302734375, "learning_rate": 0.00021429307190557042, "loss": 0.2035, "step": 90614 }, { "epoch": 0.1606696578571055, "grad_norm": 0.40625, "learning_rate": 0.00021428250862512248, "loss": 0.1963, "step": 90616 }, { "epoch": 0.1606732040224153, "grad_norm": 0.77734375, "learning_rate": 0.00021427194921828845, "loss": 0.2274, "step": 90618 }, { "epoch": 0.16067675018772512, "grad_norm": 0.98046875, "learning_rate": 0.00021426139368511433, "loss": 0.2518, "step": 90620 }, { "epoch": 0.16068029635303493, "grad_norm": 0.26171875, "learning_rate": 0.00021425084202564634, "loss": 0.1888, "step": 90622 }, { "epoch": 0.16068384251834475, "grad_norm": 0.337890625, "learning_rate": 0.0002142402942399306, "loss": 0.1237, "step": 90624 }, { "epoch": 0.16068738868365456, "grad_norm": 0.4609375, "learning_rate": 0.00021422975032801342, "loss": 0.1598, "step": 90626 }, { "epoch": 0.16069093484896438, "grad_norm": 0.271484375, "learning_rate": 0.0002142192102899408, "loss": 0.1664, "step": 90628 }, { "epoch": 0.1606944810142742, "grad_norm": 0.828125, "learning_rate": 0.0002142086741257588, "loss": 0.1999, "step": 90630 }, { "epoch": 0.160698027179584, "grad_norm": 1.6640625, "learning_rate": 0.00021419814183551352, "loss": 0.4149, "step": 90632 }, { "epoch": 0.16070157334489382, "grad_norm": 0.2353515625, "learning_rate": 0.00021418761341925104, "loss": 0.1362, "step": 90634 }, { "epoch": 0.16070511951020364, "grad_norm": 1.6484375, "learning_rate": 0.0002141770888770175, "loss": 0.2214, "step": 90636 }, { "epoch": 0.16070866567551345, "grad_norm": 0.384765625, "learning_rate": 0.00021416656820885866, "loss": 0.1785, "step": 90638 }, { "epoch": 0.16071221184082327, "grad_norm": 0.76953125, "learning_rate": 0.0002141560514148207, "loss": 0.2058, "step": 90640 }, { "epoch": 0.16071575800613308, "grad_norm": 0.2490234375, "learning_rate": 0.0002141455384949497, "loss": 0.1906, "step": 90642 }, { "epoch": 0.1607193041714429, "grad_norm": 0.216796875, "learning_rate": 0.0002141350294492916, "loss": 0.1369, "step": 90644 }, { "epoch": 0.1607228503367527, "grad_norm": 0.6796875, "learning_rate": 0.00021412452427789223, "loss": 0.1699, "step": 90646 }, { "epoch": 0.16072639650206255, "grad_norm": 0.9609375, "learning_rate": 0.00021411402298079763, "loss": 0.1458, "step": 90648 }, { "epoch": 0.16072994266737237, "grad_norm": 0.24609375, "learning_rate": 0.00021410352555805364, "loss": 0.1403, "step": 90650 }, { "epoch": 0.16073348883268218, "grad_norm": 0.33984375, "learning_rate": 0.00021409303200970633, "loss": 0.1717, "step": 90652 }, { "epoch": 0.160737034997992, "grad_norm": 0.51953125, "learning_rate": 0.00021408254233580153, "loss": 0.1538, "step": 90654 }, { "epoch": 0.1607405811633018, "grad_norm": 0.6796875, "learning_rate": 0.00021407205653638494, "loss": 0.1529, "step": 90656 }, { "epoch": 0.16074412732861162, "grad_norm": 0.4140625, "learning_rate": 0.0002140615746115027, "loss": 0.1749, "step": 90658 }, { "epoch": 0.16074767349392144, "grad_norm": 0.86328125, "learning_rate": 0.00021405109656120048, "loss": 0.1851, "step": 90660 }, { "epoch": 0.16075121965923125, "grad_norm": 1.3125, "learning_rate": 0.00021404062238552406, "loss": 0.2252, "step": 90662 }, { "epoch": 0.16075476582454107, "grad_norm": 0.57421875, "learning_rate": 0.00021403015208451936, "loss": 0.2172, "step": 90664 }, { "epoch": 0.16075831198985088, "grad_norm": 0.353515625, "learning_rate": 0.0002140196856582322, "loss": 0.3414, "step": 90666 }, { "epoch": 0.1607618581551607, "grad_norm": 0.365234375, "learning_rate": 0.00021400922310670838, "loss": 0.3258, "step": 90668 }, { "epoch": 0.1607654043204705, "grad_norm": 0.21484375, "learning_rate": 0.0002139987644299935, "loss": 0.2227, "step": 90670 }, { "epoch": 0.16076895048578033, "grad_norm": 0.439453125, "learning_rate": 0.00021398830962813338, "loss": 0.2133, "step": 90672 }, { "epoch": 0.16077249665109014, "grad_norm": 0.28515625, "learning_rate": 0.0002139778587011736, "loss": 0.1849, "step": 90674 }, { "epoch": 0.16077604281639996, "grad_norm": 0.57421875, "learning_rate": 0.00021396741164916015, "loss": 0.173, "step": 90676 }, { "epoch": 0.16077958898170977, "grad_norm": 0.6640625, "learning_rate": 0.00021395696847213849, "loss": 0.1539, "step": 90678 }, { "epoch": 0.16078313514701958, "grad_norm": 0.7578125, "learning_rate": 0.00021394652917015446, "loss": 0.1908, "step": 90680 }, { "epoch": 0.1607866813123294, "grad_norm": 0.484375, "learning_rate": 0.00021393609374325355, "loss": 0.1928, "step": 90682 }, { "epoch": 0.1607902274776392, "grad_norm": 0.427734375, "learning_rate": 0.00021392566219148156, "loss": 0.1901, "step": 90684 }, { "epoch": 0.16079377364294903, "grad_norm": 0.30859375, "learning_rate": 0.00021391523451488395, "loss": 0.1912, "step": 90686 }, { "epoch": 0.16079731980825884, "grad_norm": 0.2734375, "learning_rate": 0.0002139048107135065, "loss": 0.1745, "step": 90688 }, { "epoch": 0.16080086597356866, "grad_norm": 0.2373046875, "learning_rate": 0.00021389439078739464, "loss": 0.1283, "step": 90690 }, { "epoch": 0.16080441213887847, "grad_norm": 0.251953125, "learning_rate": 0.00021388397473659398, "loss": 0.1893, "step": 90692 }, { "epoch": 0.1608079583041883, "grad_norm": 0.25390625, "learning_rate": 0.00021387356256115015, "loss": 0.1647, "step": 90694 }, { "epoch": 0.1608115044694981, "grad_norm": 0.314453125, "learning_rate": 0.0002138631542611086, "loss": 0.1746, "step": 90696 }, { "epoch": 0.16081505063480792, "grad_norm": 0.546875, "learning_rate": 0.00021385274983651493, "loss": 0.156, "step": 90698 }, { "epoch": 0.16081859680011773, "grad_norm": 0.1767578125, "learning_rate": 0.00021384234928741448, "loss": 0.2712, "step": 90700 }, { "epoch": 0.16082214296542754, "grad_norm": 0.921875, "learning_rate": 0.000213831952613853, "loss": 0.2585, "step": 90702 }, { "epoch": 0.16082568913073736, "grad_norm": 0.453125, "learning_rate": 0.0002138215598158757, "loss": 0.1586, "step": 90704 }, { "epoch": 0.16082923529604717, "grad_norm": 2.84375, "learning_rate": 0.0002138111708935282, "loss": 0.2443, "step": 90706 }, { "epoch": 0.160832781461357, "grad_norm": 0.330078125, "learning_rate": 0.0002138007858468559, "loss": 0.1348, "step": 90708 }, { "epoch": 0.1608363276266668, "grad_norm": 1.390625, "learning_rate": 0.0002137904046759041, "loss": 0.1848, "step": 90710 }, { "epoch": 0.16083987379197662, "grad_norm": 0.373046875, "learning_rate": 0.00021378002738071844, "loss": 0.1996, "step": 90712 }, { "epoch": 0.16084341995728643, "grad_norm": 0.57421875, "learning_rate": 0.00021376965396134408, "loss": 0.278, "step": 90714 }, { "epoch": 0.16084696612259625, "grad_norm": 0.52734375, "learning_rate": 0.00021375928441782642, "loss": 0.1875, "step": 90716 }, { "epoch": 0.16085051228790606, "grad_norm": 0.5859375, "learning_rate": 0.00021374891875021096, "loss": 0.2216, "step": 90718 }, { "epoch": 0.16085405845321588, "grad_norm": 0.578125, "learning_rate": 0.0002137385569585429, "loss": 0.2004, "step": 90720 }, { "epoch": 0.1608576046185257, "grad_norm": 2.453125, "learning_rate": 0.0002137281990428675, "loss": 0.2104, "step": 90722 }, { "epoch": 0.1608611507838355, "grad_norm": 0.40234375, "learning_rate": 0.00021371784500323024, "loss": 0.1833, "step": 90724 }, { "epoch": 0.16086469694914532, "grad_norm": 0.359375, "learning_rate": 0.00021370749483967633, "loss": 0.1483, "step": 90726 }, { "epoch": 0.16086824311445513, "grad_norm": 0.5234375, "learning_rate": 0.00021369714855225104, "loss": 0.192, "step": 90728 }, { "epoch": 0.16087178927976495, "grad_norm": 0.443359375, "learning_rate": 0.0002136868061409995, "loss": 0.2623, "step": 90730 }, { "epoch": 0.16087533544507476, "grad_norm": 0.458984375, "learning_rate": 0.00021367646760596704, "loss": 0.2541, "step": 90732 }, { "epoch": 0.16087888161038458, "grad_norm": 0.29296875, "learning_rate": 0.00021366613294719898, "loss": 0.1884, "step": 90734 }, { "epoch": 0.1608824277756944, "grad_norm": 0.953125, "learning_rate": 0.0002136558021647403, "loss": 0.2822, "step": 90736 }, { "epoch": 0.16088597394100423, "grad_norm": 0.2412109375, "learning_rate": 0.0002136454752586363, "loss": 0.1848, "step": 90738 }, { "epoch": 0.16088952010631405, "grad_norm": 0.41015625, "learning_rate": 0.00021363515222893217, "loss": 0.1449, "step": 90740 }, { "epoch": 0.16089306627162386, "grad_norm": 0.4453125, "learning_rate": 0.0002136248330756731, "loss": 0.2131, "step": 90742 }, { "epoch": 0.16089661243693368, "grad_norm": 0.306640625, "learning_rate": 0.00021361451779890403, "loss": 0.2425, "step": 90744 }, { "epoch": 0.1609001586022435, "grad_norm": 0.279296875, "learning_rate": 0.00021360420639867026, "loss": 0.2751, "step": 90746 }, { "epoch": 0.1609037047675533, "grad_norm": 0.40234375, "learning_rate": 0.00021359389887501675, "loss": 0.1823, "step": 90748 }, { "epoch": 0.16090725093286312, "grad_norm": 0.43359375, "learning_rate": 0.0002135835952279887, "loss": 0.1484, "step": 90750 }, { "epoch": 0.16091079709817294, "grad_norm": 0.62890625, "learning_rate": 0.00021357329545763107, "loss": 0.1908, "step": 90752 }, { "epoch": 0.16091434326348275, "grad_norm": 0.75, "learning_rate": 0.0002135629995639889, "loss": 0.1377, "step": 90754 }, { "epoch": 0.16091788942879257, "grad_norm": 7.0, "learning_rate": 0.00021355270754710737, "loss": 0.2933, "step": 90756 }, { "epoch": 0.16092143559410238, "grad_norm": 0.234375, "learning_rate": 0.00021354241940703132, "loss": 0.1342, "step": 90758 }, { "epoch": 0.1609249817594122, "grad_norm": 0.404296875, "learning_rate": 0.00021353213514380587, "loss": 0.1709, "step": 90760 }, { "epoch": 0.160928527924722, "grad_norm": 0.85546875, "learning_rate": 0.00021352185475747586, "loss": 0.2011, "step": 90762 }, { "epoch": 0.16093207409003182, "grad_norm": 0.3125, "learning_rate": 0.00021351157824808632, "loss": 0.1453, "step": 90764 }, { "epoch": 0.16093562025534164, "grad_norm": 0.333984375, "learning_rate": 0.00021350130561568228, "loss": 0.1714, "step": 90766 }, { "epoch": 0.16093916642065145, "grad_norm": 0.482421875, "learning_rate": 0.00021349103686030857, "loss": 0.1555, "step": 90768 }, { "epoch": 0.16094271258596127, "grad_norm": 0.73828125, "learning_rate": 0.0002134807719820101, "loss": 0.1284, "step": 90770 }, { "epoch": 0.16094625875127108, "grad_norm": 0.6640625, "learning_rate": 0.00021347051098083179, "loss": 0.1959, "step": 90772 }, { "epoch": 0.1609498049165809, "grad_norm": 0.546875, "learning_rate": 0.00021346025385681852, "loss": 0.1754, "step": 90774 }, { "epoch": 0.1609533510818907, "grad_norm": 0.2333984375, "learning_rate": 0.00021345000061001512, "loss": 0.1635, "step": 90776 }, { "epoch": 0.16095689724720053, "grad_norm": 0.9765625, "learning_rate": 0.00021343975124046646, "loss": 0.208, "step": 90778 }, { "epoch": 0.16096044341251034, "grad_norm": 0.40625, "learning_rate": 0.00021342950574821726, "loss": 0.1746, "step": 90780 }, { "epoch": 0.16096398957782015, "grad_norm": 0.30078125, "learning_rate": 0.00021341926413331254, "loss": 0.2173, "step": 90782 }, { "epoch": 0.16096753574312997, "grad_norm": 0.828125, "learning_rate": 0.000213409026395797, "loss": 0.1848, "step": 90784 }, { "epoch": 0.16097108190843978, "grad_norm": 0.3046875, "learning_rate": 0.0002133987925357153, "loss": 0.2046, "step": 90786 }, { "epoch": 0.1609746280737496, "grad_norm": 0.30078125, "learning_rate": 0.00021338856255311229, "loss": 0.2137, "step": 90788 }, { "epoch": 0.1609781742390594, "grad_norm": 0.4453125, "learning_rate": 0.00021337833644803267, "loss": 0.1347, "step": 90790 }, { "epoch": 0.16098172040436923, "grad_norm": 0.376953125, "learning_rate": 0.0002133681142205213, "loss": 0.1795, "step": 90792 }, { "epoch": 0.16098526656967904, "grad_norm": 0.42578125, "learning_rate": 0.00021335789587062268, "loss": 0.4495, "step": 90794 }, { "epoch": 0.16098881273498886, "grad_norm": 0.279296875, "learning_rate": 0.00021334768139838164, "loss": 0.1562, "step": 90796 }, { "epoch": 0.16099235890029867, "grad_norm": 0.486328125, "learning_rate": 0.00021333747080384287, "loss": 0.185, "step": 90798 }, { "epoch": 0.16099590506560849, "grad_norm": 6.09375, "learning_rate": 0.00021332726408705092, "loss": 0.2615, "step": 90800 }, { "epoch": 0.1609994512309183, "grad_norm": 0.1806640625, "learning_rate": 0.00021331706124805045, "loss": 0.1576, "step": 90802 }, { "epoch": 0.16100299739622811, "grad_norm": 1.2421875, "learning_rate": 0.00021330686228688615, "loss": 0.2615, "step": 90804 }, { "epoch": 0.16100654356153793, "grad_norm": 2.046875, "learning_rate": 0.00021329666720360256, "loss": 0.2212, "step": 90806 }, { "epoch": 0.16101008972684774, "grad_norm": 0.357421875, "learning_rate": 0.00021328647599824424, "loss": 0.1961, "step": 90808 }, { "epoch": 0.16101363589215756, "grad_norm": 0.70703125, "learning_rate": 0.0002132762886708559, "loss": 0.1926, "step": 90810 }, { "epoch": 0.16101718205746737, "grad_norm": 1.6953125, "learning_rate": 0.00021326610522148193, "loss": 0.3025, "step": 90812 }, { "epoch": 0.1610207282227772, "grad_norm": 0.435546875, "learning_rate": 0.00021325592565016693, "loss": 0.1723, "step": 90814 }, { "epoch": 0.161024274388087, "grad_norm": 0.29296875, "learning_rate": 0.00021324574995695551, "loss": 0.1781, "step": 90816 }, { "epoch": 0.16102782055339682, "grad_norm": 1.375, "learning_rate": 0.00021323557814189204, "loss": 0.336, "step": 90818 }, { "epoch": 0.16103136671870663, "grad_norm": 0.201171875, "learning_rate": 0.00021322541020502107, "loss": 0.1651, "step": 90820 }, { "epoch": 0.16103491288401645, "grad_norm": 1.0859375, "learning_rate": 0.00021321524614638704, "loss": 0.2041, "step": 90822 }, { "epoch": 0.16103845904932626, "grad_norm": 0.9609375, "learning_rate": 0.0002132050859660345, "loss": 0.1567, "step": 90824 }, { "epoch": 0.16104200521463607, "grad_norm": 0.333984375, "learning_rate": 0.0002131949296640077, "loss": 0.1719, "step": 90826 }, { "epoch": 0.16104555137994592, "grad_norm": 0.189453125, "learning_rate": 0.00021318477724035123, "loss": 0.1363, "step": 90828 }, { "epoch": 0.16104909754525573, "grad_norm": 0.9453125, "learning_rate": 0.00021317462869510927, "loss": 0.1555, "step": 90830 }, { "epoch": 0.16105264371056555, "grad_norm": 0.546875, "learning_rate": 0.00021316448402832654, "loss": 0.2077, "step": 90832 }, { "epoch": 0.16105618987587536, "grad_norm": 0.6328125, "learning_rate": 0.00021315434324004716, "loss": 0.1451, "step": 90834 }, { "epoch": 0.16105973604118518, "grad_norm": 0.376953125, "learning_rate": 0.00021314420633031554, "loss": 0.1776, "step": 90836 }, { "epoch": 0.161063282206495, "grad_norm": 0.2001953125, "learning_rate": 0.00021313407329917594, "loss": 0.2297, "step": 90838 }, { "epoch": 0.1610668283718048, "grad_norm": 0.443359375, "learning_rate": 0.0002131239441466729, "loss": 0.2155, "step": 90840 }, { "epoch": 0.16107037453711462, "grad_norm": 0.65625, "learning_rate": 0.00021311381887285054, "loss": 0.1819, "step": 90842 }, { "epoch": 0.16107392070242443, "grad_norm": 1.21875, "learning_rate": 0.00021310369747775299, "loss": 0.1965, "step": 90844 }, { "epoch": 0.16107746686773425, "grad_norm": 1.8828125, "learning_rate": 0.00021309357996142483, "loss": 0.4055, "step": 90846 }, { "epoch": 0.16108101303304406, "grad_norm": 1.8125, "learning_rate": 0.00021308346632391018, "loss": 0.1759, "step": 90848 }, { "epoch": 0.16108455919835388, "grad_norm": 0.41796875, "learning_rate": 0.00021307335656525333, "loss": 0.2427, "step": 90850 }, { "epoch": 0.1610881053636637, "grad_norm": 0.70703125, "learning_rate": 0.0002130632506854983, "loss": 0.2365, "step": 90852 }, { "epoch": 0.1610916515289735, "grad_norm": 0.71875, "learning_rate": 0.00021305314868468945, "loss": 0.1734, "step": 90854 }, { "epoch": 0.16109519769428332, "grad_norm": 0.54296875, "learning_rate": 0.00021304305056287092, "loss": 0.1384, "step": 90856 }, { "epoch": 0.16109874385959314, "grad_norm": 0.4140625, "learning_rate": 0.000213032956320087, "loss": 0.1895, "step": 90858 }, { "epoch": 0.16110229002490295, "grad_norm": 0.34375, "learning_rate": 0.0002130228659563816, "loss": 0.3912, "step": 90860 }, { "epoch": 0.16110583619021276, "grad_norm": 0.345703125, "learning_rate": 0.00021301277947179893, "loss": 0.156, "step": 90862 }, { "epoch": 0.16110938235552258, "grad_norm": 0.302734375, "learning_rate": 0.00021300269686638326, "loss": 0.1718, "step": 90864 }, { "epoch": 0.1611129285208324, "grad_norm": 1.046875, "learning_rate": 0.00021299261814017854, "loss": 0.3277, "step": 90866 }, { "epoch": 0.1611164746861422, "grad_norm": 0.57421875, "learning_rate": 0.00021298254329322883, "loss": 0.227, "step": 90868 }, { "epoch": 0.16112002085145202, "grad_norm": 0.48046875, "learning_rate": 0.0002129724723255782, "loss": 0.1921, "step": 90870 }, { "epoch": 0.16112356701676184, "grad_norm": 0.23828125, "learning_rate": 0.00021296240523727074, "loss": 0.1637, "step": 90872 }, { "epoch": 0.16112711318207165, "grad_norm": 0.6796875, "learning_rate": 0.00021295234202835058, "loss": 0.315, "step": 90874 }, { "epoch": 0.16113065934738147, "grad_norm": 1.3203125, "learning_rate": 0.00021294228269886148, "loss": 0.1879, "step": 90876 }, { "epoch": 0.16113420551269128, "grad_norm": 0.96875, "learning_rate": 0.00021293222724884766, "loss": 0.1839, "step": 90878 }, { "epoch": 0.1611377516780011, "grad_norm": 0.30078125, "learning_rate": 0.0002129221756783529, "loss": 0.1853, "step": 90880 }, { "epoch": 0.1611412978433109, "grad_norm": 0.486328125, "learning_rate": 0.00021291212798742144, "loss": 0.1454, "step": 90882 }, { "epoch": 0.16114484400862072, "grad_norm": 0.2255859375, "learning_rate": 0.00021290208417609692, "loss": 0.1474, "step": 90884 }, { "epoch": 0.16114839017393054, "grad_norm": 0.69921875, "learning_rate": 0.00021289204424442345, "loss": 0.1756, "step": 90886 }, { "epoch": 0.16115193633924035, "grad_norm": 0.5859375, "learning_rate": 0.0002128820081924448, "loss": 0.1807, "step": 90888 }, { "epoch": 0.16115548250455017, "grad_norm": 0.8125, "learning_rate": 0.00021287197602020506, "loss": 0.1645, "step": 90890 }, { "epoch": 0.16115902866985998, "grad_norm": 0.26171875, "learning_rate": 0.00021286194772774793, "loss": 0.1612, "step": 90892 }, { "epoch": 0.1611625748351698, "grad_norm": 0.9375, "learning_rate": 0.00021285192331511733, "loss": 0.1955, "step": 90894 }, { "epoch": 0.1611661210004796, "grad_norm": 1.21875, "learning_rate": 0.0002128419027823572, "loss": 0.1961, "step": 90896 }, { "epoch": 0.16116966716578943, "grad_norm": 0.75390625, "learning_rate": 0.0002128318861295111, "loss": 0.225, "step": 90898 }, { "epoch": 0.16117321333109924, "grad_norm": 0.21875, "learning_rate": 0.00021282187335662308, "loss": 0.1702, "step": 90900 }, { "epoch": 0.16117675949640906, "grad_norm": 1.15625, "learning_rate": 0.0002128118644637369, "loss": 0.1686, "step": 90902 }, { "epoch": 0.16118030566171887, "grad_norm": 0.240234375, "learning_rate": 0.0002128018594508962, "loss": 0.148, "step": 90904 }, { "epoch": 0.16118385182702868, "grad_norm": 0.83984375, "learning_rate": 0.00021279185831814485, "loss": 0.2393, "step": 90906 }, { "epoch": 0.1611873979923385, "grad_norm": 0.87890625, "learning_rate": 0.0002127818610655266, "loss": 0.1499, "step": 90908 }, { "epoch": 0.1611909441576483, "grad_norm": 0.23046875, "learning_rate": 0.00021277186769308505, "loss": 0.1392, "step": 90910 }, { "epoch": 0.16119449032295813, "grad_norm": 0.416015625, "learning_rate": 0.000212761878200864, "loss": 0.1856, "step": 90912 }, { "epoch": 0.16119803648826794, "grad_norm": 0.89453125, "learning_rate": 0.0002127518925889072, "loss": 0.2576, "step": 90914 }, { "epoch": 0.16120158265357776, "grad_norm": 0.9296875, "learning_rate": 0.00021274191085725829, "loss": 0.1721, "step": 90916 }, { "epoch": 0.16120512881888757, "grad_norm": 2.1875, "learning_rate": 0.0002127319330059608, "loss": 0.1762, "step": 90918 }, { "epoch": 0.16120867498419741, "grad_norm": 0.298828125, "learning_rate": 0.0002127219590350584, "loss": 0.1541, "step": 90920 }, { "epoch": 0.16121222114950723, "grad_norm": 2.140625, "learning_rate": 0.00021271198894459487, "loss": 0.1931, "step": 90922 }, { "epoch": 0.16121576731481704, "grad_norm": 0.2314453125, "learning_rate": 0.0002127020227346136, "loss": 0.1701, "step": 90924 }, { "epoch": 0.16121931348012686, "grad_norm": 0.55078125, "learning_rate": 0.0002126920604051583, "loss": 0.2165, "step": 90926 }, { "epoch": 0.16122285964543667, "grad_norm": 0.5234375, "learning_rate": 0.0002126821019562725, "loss": 0.1859, "step": 90928 }, { "epoch": 0.1612264058107465, "grad_norm": 1.7734375, "learning_rate": 0.0002126721473879999, "loss": 0.2709, "step": 90930 }, { "epoch": 0.1612299519760563, "grad_norm": 3.609375, "learning_rate": 0.00021266219670038373, "loss": 0.2622, "step": 90932 }, { "epoch": 0.16123349814136612, "grad_norm": 0.5078125, "learning_rate": 0.00021265224989346779, "loss": 0.2008, "step": 90934 }, { "epoch": 0.16123704430667593, "grad_norm": 0.296875, "learning_rate": 0.00021264230696729547, "loss": 0.18, "step": 90936 }, { "epoch": 0.16124059047198575, "grad_norm": 0.5078125, "learning_rate": 0.00021263236792191018, "loss": 0.2052, "step": 90938 }, { "epoch": 0.16124413663729556, "grad_norm": 0.546875, "learning_rate": 0.00021262243275735562, "loss": 0.143, "step": 90940 }, { "epoch": 0.16124768280260537, "grad_norm": 0.47265625, "learning_rate": 0.000212612501473675, "loss": 0.1535, "step": 90942 }, { "epoch": 0.1612512289679152, "grad_norm": 0.25, "learning_rate": 0.00021260257407091184, "loss": 0.1754, "step": 90944 }, { "epoch": 0.161254775133225, "grad_norm": 0.1845703125, "learning_rate": 0.00021259265054910946, "loss": 0.1169, "step": 90946 }, { "epoch": 0.16125832129853482, "grad_norm": 0.32421875, "learning_rate": 0.00021258273090831158, "loss": 0.2006, "step": 90948 }, { "epoch": 0.16126186746384463, "grad_norm": 0.3671875, "learning_rate": 0.00021257281514856118, "loss": 0.1742, "step": 90950 }, { "epoch": 0.16126541362915445, "grad_norm": 0.5390625, "learning_rate": 0.00021256290326990184, "loss": 0.1648, "step": 90952 }, { "epoch": 0.16126895979446426, "grad_norm": 0.48046875, "learning_rate": 0.00021255299527237693, "loss": 0.1766, "step": 90954 }, { "epoch": 0.16127250595977408, "grad_norm": 0.58984375, "learning_rate": 0.00021254309115602976, "loss": 0.1673, "step": 90956 }, { "epoch": 0.1612760521250839, "grad_norm": 0.37109375, "learning_rate": 0.00021253319092090356, "loss": 0.187, "step": 90958 }, { "epoch": 0.1612795982903937, "grad_norm": 0.267578125, "learning_rate": 0.00021252329456704157, "loss": 0.1609, "step": 90960 }, { "epoch": 0.16128314445570352, "grad_norm": 0.64453125, "learning_rate": 0.00021251340209448735, "loss": 0.1541, "step": 90962 }, { "epoch": 0.16128669062101333, "grad_norm": 1.203125, "learning_rate": 0.00021250351350328398, "loss": 0.4158, "step": 90964 }, { "epoch": 0.16129023678632315, "grad_norm": 0.296875, "learning_rate": 0.0002124936287934746, "loss": 0.1908, "step": 90966 }, { "epoch": 0.16129378295163296, "grad_norm": 0.48828125, "learning_rate": 0.0002124837479651026, "loss": 0.2114, "step": 90968 }, { "epoch": 0.16129732911694278, "grad_norm": 1.0234375, "learning_rate": 0.00021247387101821125, "loss": 0.2054, "step": 90970 }, { "epoch": 0.1613008752822526, "grad_norm": 0.51171875, "learning_rate": 0.0002124639979528436, "loss": 0.1499, "step": 90972 }, { "epoch": 0.1613044214475624, "grad_norm": 0.2578125, "learning_rate": 0.0002124541287690429, "loss": 0.1917, "step": 90974 }, { "epoch": 0.16130796761287222, "grad_norm": 0.6796875, "learning_rate": 0.00021244426346685234, "loss": 0.2066, "step": 90976 }, { "epoch": 0.16131151377818204, "grad_norm": 1.3984375, "learning_rate": 0.00021243440204631487, "loss": 0.1872, "step": 90978 }, { "epoch": 0.16131505994349185, "grad_norm": 0.42578125, "learning_rate": 0.0002124245445074739, "loss": 0.1869, "step": 90980 }, { "epoch": 0.16131860610880167, "grad_norm": 0.37109375, "learning_rate": 0.00021241469085037231, "loss": 0.2057, "step": 90982 }, { "epoch": 0.16132215227411148, "grad_norm": 0.7734375, "learning_rate": 0.00021240484107505335, "loss": 0.158, "step": 90984 }, { "epoch": 0.1613256984394213, "grad_norm": 0.30078125, "learning_rate": 0.00021239499518156002, "loss": 0.1953, "step": 90986 }, { "epoch": 0.1613292446047311, "grad_norm": 0.59375, "learning_rate": 0.00021238515316993544, "loss": 0.1898, "step": 90988 }, { "epoch": 0.16133279077004092, "grad_norm": 0.2890625, "learning_rate": 0.00021237531504022256, "loss": 0.1462, "step": 90990 }, { "epoch": 0.16133633693535074, "grad_norm": 0.6015625, "learning_rate": 0.00021236548079246444, "loss": 0.1417, "step": 90992 }, { "epoch": 0.16133988310066055, "grad_norm": 0.55078125, "learning_rate": 0.00021235565042670412, "loss": 0.2104, "step": 90994 }, { "epoch": 0.16134342926597037, "grad_norm": 0.82421875, "learning_rate": 0.00021234582394298468, "loss": 0.1442, "step": 90996 }, { "epoch": 0.16134697543128018, "grad_norm": 0.5078125, "learning_rate": 0.00021233600134134891, "loss": 0.1813, "step": 90998 }, { "epoch": 0.16135052159659, "grad_norm": 0.216796875, "learning_rate": 0.00021232618262183978, "loss": 0.1727, "step": 91000 }, { "epoch": 0.1613540677618998, "grad_norm": 0.35546875, "learning_rate": 0.0002123163677845004, "loss": 0.2379, "step": 91002 }, { "epoch": 0.16135761392720963, "grad_norm": 0.9765625, "learning_rate": 0.00021230655682937362, "loss": 0.234, "step": 91004 }, { "epoch": 0.16136116009251944, "grad_norm": 0.3671875, "learning_rate": 0.00021229674975650233, "loss": 0.1441, "step": 91006 }, { "epoch": 0.16136470625782925, "grad_norm": 0.5078125, "learning_rate": 0.00021228694656592932, "loss": 0.171, "step": 91008 }, { "epoch": 0.1613682524231391, "grad_norm": 0.5625, "learning_rate": 0.00021227714725769755, "loss": 0.207, "step": 91010 }, { "epoch": 0.1613717985884489, "grad_norm": 0.431640625, "learning_rate": 0.0002122673518318501, "loss": 0.1571, "step": 91012 }, { "epoch": 0.16137534475375873, "grad_norm": 0.2275390625, "learning_rate": 0.00021225756028842932, "loss": 0.1713, "step": 91014 }, { "epoch": 0.16137889091906854, "grad_norm": 0.640625, "learning_rate": 0.00021224777262747853, "loss": 0.2123, "step": 91016 }, { "epoch": 0.16138243708437836, "grad_norm": 0.35546875, "learning_rate": 0.0002122379888490402, "loss": 0.2115, "step": 91018 }, { "epoch": 0.16138598324968817, "grad_norm": 0.55078125, "learning_rate": 0.00021222820895315726, "loss": 0.2227, "step": 91020 }, { "epoch": 0.16138952941499798, "grad_norm": 0.35546875, "learning_rate": 0.00021221843293987248, "loss": 0.2253, "step": 91022 }, { "epoch": 0.1613930755803078, "grad_norm": 0.296875, "learning_rate": 0.0002122086608092285, "loss": 0.1336, "step": 91024 }, { "epoch": 0.1613966217456176, "grad_norm": 0.69140625, "learning_rate": 0.00021219889256126828, "loss": 0.1684, "step": 91026 }, { "epoch": 0.16140016791092743, "grad_norm": 0.33203125, "learning_rate": 0.00021218912819603422, "loss": 0.1426, "step": 91028 }, { "epoch": 0.16140371407623724, "grad_norm": 0.435546875, "learning_rate": 0.00021217936771356948, "loss": 0.1642, "step": 91030 }, { "epoch": 0.16140726024154706, "grad_norm": 0.361328125, "learning_rate": 0.0002121696111139163, "loss": 0.1826, "step": 91032 }, { "epoch": 0.16141080640685687, "grad_norm": 0.3515625, "learning_rate": 0.0002121598583971175, "loss": 0.1789, "step": 91034 }, { "epoch": 0.16141435257216669, "grad_norm": 0.42578125, "learning_rate": 0.0002121501095632158, "loss": 0.1937, "step": 91036 }, { "epoch": 0.1614178987374765, "grad_norm": 0.46484375, "learning_rate": 0.0002121403646122539, "loss": 0.1968, "step": 91038 }, { "epoch": 0.16142144490278632, "grad_norm": 0.22265625, "learning_rate": 0.00021213062354427415, "loss": 0.1968, "step": 91040 }, { "epoch": 0.16142499106809613, "grad_norm": 1.0859375, "learning_rate": 0.00021212088635931943, "loss": 0.1433, "step": 91042 }, { "epoch": 0.16142853723340594, "grad_norm": 0.47265625, "learning_rate": 0.00021211115305743214, "loss": 0.172, "step": 91044 }, { "epoch": 0.16143208339871576, "grad_norm": 0.490234375, "learning_rate": 0.000212101423638655, "loss": 0.1642, "step": 91046 }, { "epoch": 0.16143562956402557, "grad_norm": 0.421875, "learning_rate": 0.00021209169810303035, "loss": 0.1879, "step": 91048 }, { "epoch": 0.1614391757293354, "grad_norm": 0.353515625, "learning_rate": 0.0002120819764506009, "loss": 0.1947, "step": 91050 }, { "epoch": 0.1614427218946452, "grad_norm": 0.39453125, "learning_rate": 0.00021207225868140925, "loss": 0.18, "step": 91052 }, { "epoch": 0.16144626805995502, "grad_norm": 0.41796875, "learning_rate": 0.00021206254479549762, "loss": 0.2058, "step": 91054 }, { "epoch": 0.16144981422526483, "grad_norm": 0.275390625, "learning_rate": 0.00021205283479290874, "loss": 0.1264, "step": 91056 }, { "epoch": 0.16145336039057465, "grad_norm": 0.462890625, "learning_rate": 0.00021204312867368488, "loss": 0.1772, "step": 91058 }, { "epoch": 0.16145690655588446, "grad_norm": 0.58984375, "learning_rate": 0.00021203342643786872, "loss": 0.1851, "step": 91060 }, { "epoch": 0.16146045272119428, "grad_norm": 0.4453125, "learning_rate": 0.00021202372808550244, "loss": 0.1856, "step": 91062 }, { "epoch": 0.1614639988865041, "grad_norm": 0.408203125, "learning_rate": 0.00021201403361662867, "loss": 0.1308, "step": 91064 }, { "epoch": 0.1614675450518139, "grad_norm": 0.49609375, "learning_rate": 0.0002120043430312897, "loss": 0.1854, "step": 91066 }, { "epoch": 0.16147109121712372, "grad_norm": 1.953125, "learning_rate": 0.00021199465632952777, "loss": 0.2397, "step": 91068 }, { "epoch": 0.16147463738243353, "grad_norm": 0.51953125, "learning_rate": 0.00021198497351138568, "loss": 0.1684, "step": 91070 }, { "epoch": 0.16147818354774335, "grad_norm": 0.3984375, "learning_rate": 0.00021197529457690528, "loss": 0.1695, "step": 91072 }, { "epoch": 0.16148172971305316, "grad_norm": 1.953125, "learning_rate": 0.00021196561952612918, "loss": 0.2136, "step": 91074 }, { "epoch": 0.16148527587836298, "grad_norm": 0.71484375, "learning_rate": 0.00021195594835909953, "loss": 0.2223, "step": 91076 }, { "epoch": 0.1614888220436728, "grad_norm": 0.189453125, "learning_rate": 0.00021194628107585892, "loss": 0.3469, "step": 91078 }, { "epoch": 0.1614923682089826, "grad_norm": 0.40234375, "learning_rate": 0.00021193661767644927, "loss": 0.2272, "step": 91080 }, { "epoch": 0.16149591437429242, "grad_norm": 0.5390625, "learning_rate": 0.0002119269581609131, "loss": 0.1694, "step": 91082 }, { "epoch": 0.16149946053960224, "grad_norm": 0.30078125, "learning_rate": 0.00021191730252929255, "loss": 0.176, "step": 91084 }, { "epoch": 0.16150300670491205, "grad_norm": 0.36328125, "learning_rate": 0.00021190765078162987, "loss": 0.1473, "step": 91086 }, { "epoch": 0.16150655287022186, "grad_norm": 0.201171875, "learning_rate": 0.0002118980029179672, "loss": 0.2345, "step": 91088 }, { "epoch": 0.16151009903553168, "grad_norm": 0.9765625, "learning_rate": 0.0002118883589383468, "loss": 0.1678, "step": 91090 }, { "epoch": 0.1615136452008415, "grad_norm": 0.79296875, "learning_rate": 0.00021187871884281089, "loss": 0.2004, "step": 91092 }, { "epoch": 0.1615171913661513, "grad_norm": 0.42578125, "learning_rate": 0.00021186908263140156, "loss": 0.1157, "step": 91094 }, { "epoch": 0.16152073753146112, "grad_norm": 0.796875, "learning_rate": 0.00021185945030416094, "loss": 0.1751, "step": 91096 }, { "epoch": 0.16152428369677094, "grad_norm": 1.25, "learning_rate": 0.00021184982186113116, "loss": 0.2015, "step": 91098 }, { "epoch": 0.16152782986208078, "grad_norm": 0.423828125, "learning_rate": 0.0002118401973023544, "loss": 0.1653, "step": 91100 }, { "epoch": 0.1615313760273906, "grad_norm": 0.40234375, "learning_rate": 0.0002118305766278727, "loss": 0.1711, "step": 91102 }, { "epoch": 0.1615349221927004, "grad_norm": 0.39453125, "learning_rate": 0.0002118209598377283, "loss": 0.1553, "step": 91104 }, { "epoch": 0.16153846835801022, "grad_norm": 1.46875, "learning_rate": 0.000211811346931963, "loss": 0.4004, "step": 91106 }, { "epoch": 0.16154201452332004, "grad_norm": 0.5078125, "learning_rate": 0.0002118017379106188, "loss": 0.2387, "step": 91108 }, { "epoch": 0.16154556068862985, "grad_norm": 0.359375, "learning_rate": 0.00021179213277373806, "loss": 0.2143, "step": 91110 }, { "epoch": 0.16154910685393967, "grad_norm": 0.828125, "learning_rate": 0.00021178253152136258, "loss": 0.2508, "step": 91112 }, { "epoch": 0.16155265301924948, "grad_norm": 0.41015625, "learning_rate": 0.00021177293415353433, "loss": 0.1862, "step": 91114 }, { "epoch": 0.1615561991845593, "grad_norm": 0.33984375, "learning_rate": 0.00021176334067029532, "loss": 0.1776, "step": 91116 }, { "epoch": 0.1615597453498691, "grad_norm": 0.28515625, "learning_rate": 0.0002117537510716875, "loss": 0.2085, "step": 91118 }, { "epoch": 0.16156329151517893, "grad_norm": 0.80859375, "learning_rate": 0.0002117441653577528, "loss": 0.1914, "step": 91120 }, { "epoch": 0.16156683768048874, "grad_norm": 0.609375, "learning_rate": 0.00021173458352853326, "loss": 0.2068, "step": 91122 }, { "epoch": 0.16157038384579855, "grad_norm": 2.40625, "learning_rate": 0.0002117250055840706, "loss": 0.3193, "step": 91124 }, { "epoch": 0.16157393001110837, "grad_norm": 0.466796875, "learning_rate": 0.00021171543152440672, "loss": 0.1901, "step": 91126 }, { "epoch": 0.16157747617641818, "grad_norm": 0.470703125, "learning_rate": 0.00021170586134958372, "loss": 0.1502, "step": 91128 }, { "epoch": 0.161581022341728, "grad_norm": 0.22265625, "learning_rate": 0.0002116962950596432, "loss": 0.3708, "step": 91130 }, { "epoch": 0.1615845685070378, "grad_norm": 1.3125, "learning_rate": 0.0002116867326546272, "loss": 0.1826, "step": 91132 }, { "epoch": 0.16158811467234763, "grad_norm": 0.5390625, "learning_rate": 0.00021167717413457724, "loss": 0.1322, "step": 91134 }, { "epoch": 0.16159166083765744, "grad_norm": 0.25390625, "learning_rate": 0.0002116676194995356, "loss": 0.1883, "step": 91136 }, { "epoch": 0.16159520700296726, "grad_norm": 0.53515625, "learning_rate": 0.00021165806874954353, "loss": 0.1915, "step": 91138 }, { "epoch": 0.16159875316827707, "grad_norm": 0.310546875, "learning_rate": 0.00021164852188464318, "loss": 0.1901, "step": 91140 }, { "epoch": 0.16160229933358689, "grad_norm": 0.76953125, "learning_rate": 0.00021163897890487618, "loss": 0.1691, "step": 91142 }, { "epoch": 0.1616058454988967, "grad_norm": 0.3515625, "learning_rate": 0.00021162943981028426, "loss": 0.1764, "step": 91144 }, { "epoch": 0.16160939166420651, "grad_norm": 0.7421875, "learning_rate": 0.00021161990460090918, "loss": 0.1691, "step": 91146 }, { "epoch": 0.16161293782951633, "grad_norm": 0.5625, "learning_rate": 0.0002116103732767925, "loss": 0.1955, "step": 91148 }, { "epoch": 0.16161648399482614, "grad_norm": 0.232421875, "learning_rate": 0.00021160084583797604, "loss": 0.1302, "step": 91150 }, { "epoch": 0.16162003016013596, "grad_norm": 0.275390625, "learning_rate": 0.0002115913222845015, "loss": 0.1765, "step": 91152 }, { "epoch": 0.16162357632544577, "grad_norm": 0.416015625, "learning_rate": 0.00021158180261641047, "loss": 0.1652, "step": 91154 }, { "epoch": 0.1616271224907556, "grad_norm": 0.6171875, "learning_rate": 0.00021157228683374454, "loss": 0.1796, "step": 91156 }, { "epoch": 0.1616306686560654, "grad_norm": 0.82421875, "learning_rate": 0.00021156277493654534, "loss": 0.2483, "step": 91158 }, { "epoch": 0.16163421482137522, "grad_norm": 0.439453125, "learning_rate": 0.00021155326692485457, "loss": 0.1561, "step": 91160 }, { "epoch": 0.16163776098668503, "grad_norm": 0.26953125, "learning_rate": 0.00021154376279871367, "loss": 0.1407, "step": 91162 }, { "epoch": 0.16164130715199485, "grad_norm": 0.416015625, "learning_rate": 0.00021153426255816424, "loss": 0.1783, "step": 91164 }, { "epoch": 0.16164485331730466, "grad_norm": 0.34375, "learning_rate": 0.00021152476620324792, "loss": 0.1638, "step": 91166 }, { "epoch": 0.16164839948261447, "grad_norm": 0.6640625, "learning_rate": 0.00021151527373400628, "loss": 0.2561, "step": 91168 }, { "epoch": 0.1616519456479243, "grad_norm": 0.27734375, "learning_rate": 0.0002115057851504806, "loss": 0.146, "step": 91170 }, { "epoch": 0.1616554918132341, "grad_norm": 0.482421875, "learning_rate": 0.0002114963004527126, "loss": 0.2596, "step": 91172 }, { "epoch": 0.16165903797854392, "grad_norm": 0.2412109375, "learning_rate": 0.00021148681964074358, "loss": 0.1374, "step": 91174 }, { "epoch": 0.16166258414385373, "grad_norm": 0.5703125, "learning_rate": 0.00021147734271461524, "loss": 0.4048, "step": 91176 }, { "epoch": 0.16166613030916355, "grad_norm": 0.2138671875, "learning_rate": 0.00021146786967436883, "loss": 0.1451, "step": 91178 }, { "epoch": 0.16166967647447336, "grad_norm": 1.125, "learning_rate": 0.00021145840052004577, "loss": 0.3055, "step": 91180 }, { "epoch": 0.16167322263978318, "grad_norm": 0.2080078125, "learning_rate": 0.00021144893525168758, "loss": 0.1374, "step": 91182 }, { "epoch": 0.161676768805093, "grad_norm": 0.302734375, "learning_rate": 0.0002114394738693356, "loss": 0.1971, "step": 91184 }, { "epoch": 0.1616803149704028, "grad_norm": 0.5546875, "learning_rate": 0.00021143001637303127, "loss": 0.2054, "step": 91186 }, { "epoch": 0.16168386113571262, "grad_norm": 1.21875, "learning_rate": 0.00021142056276281585, "loss": 0.2175, "step": 91188 }, { "epoch": 0.16168740730102243, "grad_norm": 0.484375, "learning_rate": 0.0002114111130387308, "loss": 0.1508, "step": 91190 }, { "epoch": 0.16169095346633228, "grad_norm": 0.34765625, "learning_rate": 0.00021140166720081736, "loss": 0.1799, "step": 91192 }, { "epoch": 0.1616944996316421, "grad_norm": 0.69921875, "learning_rate": 0.00021139222524911686, "loss": 0.1508, "step": 91194 }, { "epoch": 0.1616980457969519, "grad_norm": 0.400390625, "learning_rate": 0.0002113827871836706, "loss": 0.1917, "step": 91196 }, { "epoch": 0.16170159196226172, "grad_norm": 0.39453125, "learning_rate": 0.0002113733530045198, "loss": 0.1373, "step": 91198 }, { "epoch": 0.16170513812757153, "grad_norm": 2.390625, "learning_rate": 0.00021136392271170585, "loss": 0.3014, "step": 91200 }, { "epoch": 0.16170868429288135, "grad_norm": 0.2265625, "learning_rate": 0.00021135449630526994, "loss": 0.1646, "step": 91202 }, { "epoch": 0.16171223045819116, "grad_norm": 0.9765625, "learning_rate": 0.00021134507378525331, "loss": 0.2129, "step": 91204 }, { "epoch": 0.16171577662350098, "grad_norm": 0.197265625, "learning_rate": 0.00021133565515169702, "loss": 0.1806, "step": 91206 }, { "epoch": 0.1617193227888108, "grad_norm": 0.1689453125, "learning_rate": 0.00021132624040464247, "loss": 0.1927, "step": 91208 }, { "epoch": 0.1617228689541206, "grad_norm": 0.6015625, "learning_rate": 0.0002113168295441307, "loss": 0.2568, "step": 91210 }, { "epoch": 0.16172641511943042, "grad_norm": 0.353515625, "learning_rate": 0.00021130742257020288, "loss": 0.1716, "step": 91212 }, { "epoch": 0.16172996128474024, "grad_norm": 0.84375, "learning_rate": 0.00021129801948290027, "loss": 0.1728, "step": 91214 }, { "epoch": 0.16173350745005005, "grad_norm": 0.30859375, "learning_rate": 0.00021128862028226377, "loss": 0.2022, "step": 91216 }, { "epoch": 0.16173705361535987, "grad_norm": 0.427734375, "learning_rate": 0.00021127922496833482, "loss": 0.3041, "step": 91218 }, { "epoch": 0.16174059978066968, "grad_norm": 0.271484375, "learning_rate": 0.00021126983354115408, "loss": 0.1638, "step": 91220 }, { "epoch": 0.1617441459459795, "grad_norm": 0.87890625, "learning_rate": 0.00021126044600076305, "loss": 0.1516, "step": 91222 }, { "epoch": 0.1617476921112893, "grad_norm": 0.33203125, "learning_rate": 0.00021125106234720238, "loss": 0.2015, "step": 91224 }, { "epoch": 0.16175123827659912, "grad_norm": 0.2490234375, "learning_rate": 0.00021124168258051352, "loss": 0.1652, "step": 91226 }, { "epoch": 0.16175478444190894, "grad_norm": 0.67578125, "learning_rate": 0.00021123230670073718, "loss": 0.18, "step": 91228 }, { "epoch": 0.16175833060721875, "grad_norm": 0.41796875, "learning_rate": 0.00021122293470791445, "loss": 0.1718, "step": 91230 }, { "epoch": 0.16176187677252857, "grad_norm": 0.2412109375, "learning_rate": 0.00021121356660208648, "loss": 0.1418, "step": 91232 }, { "epoch": 0.16176542293783838, "grad_norm": 0.451171875, "learning_rate": 0.00021120420238329398, "loss": 0.2068, "step": 91234 }, { "epoch": 0.1617689691031482, "grad_norm": 0.30859375, "learning_rate": 0.00021119484205157807, "loss": 0.1821, "step": 91236 }, { "epoch": 0.161772515268458, "grad_norm": 0.5859375, "learning_rate": 0.0002111854856069796, "loss": 0.1739, "step": 91238 }, { "epoch": 0.16177606143376783, "grad_norm": 0.380859375, "learning_rate": 0.00021117613304953958, "loss": 0.1924, "step": 91240 }, { "epoch": 0.16177960759907764, "grad_norm": 0.8203125, "learning_rate": 0.00021116678437929883, "loss": 0.1981, "step": 91242 }, { "epoch": 0.16178315376438746, "grad_norm": 0.423828125, "learning_rate": 0.00021115743959629823, "loss": 0.1526, "step": 91244 }, { "epoch": 0.16178669992969727, "grad_norm": 0.4921875, "learning_rate": 0.00021114809870057872, "loss": 0.1609, "step": 91246 }, { "epoch": 0.16179024609500708, "grad_norm": 0.26953125, "learning_rate": 0.00021113876169218115, "loss": 0.161, "step": 91248 }, { "epoch": 0.1617937922603169, "grad_norm": 1.453125, "learning_rate": 0.00021112942857114638, "loss": 0.1502, "step": 91250 }, { "epoch": 0.1617973384256267, "grad_norm": 0.1328125, "learning_rate": 0.00021112009933751502, "loss": 0.1259, "step": 91252 }, { "epoch": 0.16180088459093653, "grad_norm": 0.515625, "learning_rate": 0.0002111107739913282, "loss": 0.2009, "step": 91254 }, { "epoch": 0.16180443075624634, "grad_norm": 0.369140625, "learning_rate": 0.00021110145253262636, "loss": 0.2306, "step": 91256 }, { "epoch": 0.16180797692155616, "grad_norm": 0.294921875, "learning_rate": 0.00021109213496145056, "loss": 0.1986, "step": 91258 }, { "epoch": 0.16181152308686597, "grad_norm": 0.51171875, "learning_rate": 0.0002110828212778413, "loss": 0.2224, "step": 91260 }, { "epoch": 0.16181506925217579, "grad_norm": 0.3125, "learning_rate": 0.0002110735114818395, "loss": 0.2033, "step": 91262 }, { "epoch": 0.1618186154174856, "grad_norm": 0.419921875, "learning_rate": 0.0002110642055734858, "loss": 0.2216, "step": 91264 }, { "epoch": 0.16182216158279542, "grad_norm": 0.2314453125, "learning_rate": 0.00021105490355282096, "loss": 0.1519, "step": 91266 }, { "epoch": 0.16182570774810523, "grad_norm": 0.6171875, "learning_rate": 0.00021104560541988554, "loss": 0.2069, "step": 91268 }, { "epoch": 0.16182925391341504, "grad_norm": 1.0, "learning_rate": 0.00021103631117472035, "loss": 0.1786, "step": 91270 }, { "epoch": 0.16183280007872486, "grad_norm": 0.494140625, "learning_rate": 0.00021102702081736595, "loss": 0.1713, "step": 91272 }, { "epoch": 0.16183634624403467, "grad_norm": 0.2470703125, "learning_rate": 0.00021101773434786288, "loss": 0.172, "step": 91274 }, { "epoch": 0.1618398924093445, "grad_norm": 0.6484375, "learning_rate": 0.000211008451766252, "loss": 0.1532, "step": 91276 }, { "epoch": 0.1618434385746543, "grad_norm": 0.2275390625, "learning_rate": 0.0002109991730725736, "loss": 0.3257, "step": 91278 }, { "epoch": 0.16184698473996412, "grad_norm": 0.46484375, "learning_rate": 0.00021098989826686854, "loss": 0.2403, "step": 91280 }, { "epoch": 0.16185053090527396, "grad_norm": 0.9140625, "learning_rate": 0.00021098062734917723, "loss": 0.2946, "step": 91282 }, { "epoch": 0.16185407707058377, "grad_norm": 0.41796875, "learning_rate": 0.00021097136031954033, "loss": 0.1896, "step": 91284 }, { "epoch": 0.1618576232358936, "grad_norm": 0.55859375, "learning_rate": 0.00021096209717799824, "loss": 0.1435, "step": 91286 }, { "epoch": 0.1618611694012034, "grad_norm": 0.75, "learning_rate": 0.00021095283792459143, "loss": 0.1753, "step": 91288 }, { "epoch": 0.16186471556651322, "grad_norm": 1.5625, "learning_rate": 0.00021094358255936062, "loss": 0.2508, "step": 91290 }, { "epoch": 0.16186826173182303, "grad_norm": 1.0625, "learning_rate": 0.00021093433108234613, "loss": 0.1808, "step": 91292 }, { "epoch": 0.16187180789713285, "grad_norm": 1.0703125, "learning_rate": 0.0002109250834935884, "loss": 0.3175, "step": 91294 }, { "epoch": 0.16187535406244266, "grad_norm": 0.365234375, "learning_rate": 0.0002109158397931279, "loss": 0.2133, "step": 91296 }, { "epoch": 0.16187890022775248, "grad_norm": 0.302734375, "learning_rate": 0.00021090659998100508, "loss": 0.1967, "step": 91298 }, { "epoch": 0.1618824463930623, "grad_norm": 1.5703125, "learning_rate": 0.0002108973640572604, "loss": 0.2575, "step": 91300 }, { "epoch": 0.1618859925583721, "grad_norm": 0.703125, "learning_rate": 0.0002108881320219342, "loss": 0.2116, "step": 91302 }, { "epoch": 0.16188953872368192, "grad_norm": 0.220703125, "learning_rate": 0.00021087890387506684, "loss": 0.175, "step": 91304 }, { "epoch": 0.16189308488899173, "grad_norm": 1.453125, "learning_rate": 0.0002108696796166987, "loss": 0.1888, "step": 91306 }, { "epoch": 0.16189663105430155, "grad_norm": 0.2470703125, "learning_rate": 0.00021086045924687005, "loss": 0.155, "step": 91308 }, { "epoch": 0.16190017721961136, "grad_norm": 0.76171875, "learning_rate": 0.00021085124276562137, "loss": 0.1035, "step": 91310 }, { "epoch": 0.16190372338492118, "grad_norm": 0.4453125, "learning_rate": 0.0002108420301729928, "loss": 0.3089, "step": 91312 }, { "epoch": 0.161907269550231, "grad_norm": 0.66015625, "learning_rate": 0.00021083282146902474, "loss": 0.1718, "step": 91314 }, { "epoch": 0.1619108157155408, "grad_norm": 0.86328125, "learning_rate": 0.00021082361665375745, "loss": 0.2345, "step": 91316 }, { "epoch": 0.16191436188085062, "grad_norm": 1.9453125, "learning_rate": 0.00021081441572723107, "loss": 0.1875, "step": 91318 }, { "epoch": 0.16191790804616044, "grad_norm": 0.310546875, "learning_rate": 0.00021080521868948606, "loss": 0.1707, "step": 91320 }, { "epoch": 0.16192145421147025, "grad_norm": 0.359375, "learning_rate": 0.0002107960255405624, "loss": 0.2217, "step": 91322 }, { "epoch": 0.16192500037678006, "grad_norm": 0.57421875, "learning_rate": 0.0002107868362805005, "loss": 0.2292, "step": 91324 }, { "epoch": 0.16192854654208988, "grad_norm": 0.78125, "learning_rate": 0.0002107776509093404, "loss": 0.2066, "step": 91326 }, { "epoch": 0.1619320927073997, "grad_norm": 0.35546875, "learning_rate": 0.00021076846942712238, "loss": 0.1201, "step": 91328 }, { "epoch": 0.1619356388727095, "grad_norm": 0.2373046875, "learning_rate": 0.00021075929183388652, "loss": 0.1604, "step": 91330 }, { "epoch": 0.16193918503801932, "grad_norm": 0.31640625, "learning_rate": 0.00021075011812967304, "loss": 0.1644, "step": 91332 }, { "epoch": 0.16194273120332914, "grad_norm": 0.4609375, "learning_rate": 0.00021074094831452194, "loss": 0.1959, "step": 91334 }, { "epoch": 0.16194627736863895, "grad_norm": 0.765625, "learning_rate": 0.00021073178238847337, "loss": 0.2643, "step": 91336 }, { "epoch": 0.16194982353394877, "grad_norm": 0.37890625, "learning_rate": 0.00021072262035156753, "loss": 0.1832, "step": 91338 }, { "epoch": 0.16195336969925858, "grad_norm": 1.015625, "learning_rate": 0.00021071346220384436, "loss": 0.2619, "step": 91340 }, { "epoch": 0.1619569158645684, "grad_norm": 0.447265625, "learning_rate": 0.00021070430794534393, "loss": 0.1776, "step": 91342 }, { "epoch": 0.1619604620298782, "grad_norm": 0.25390625, "learning_rate": 0.0002106951575761063, "loss": 0.2486, "step": 91344 }, { "epoch": 0.16196400819518803, "grad_norm": 0.185546875, "learning_rate": 0.00021068601109617148, "loss": 0.1532, "step": 91346 }, { "epoch": 0.16196755436049784, "grad_norm": 1.5625, "learning_rate": 0.00021067686850557947, "loss": 0.235, "step": 91348 }, { "epoch": 0.16197110052580765, "grad_norm": 0.173828125, "learning_rate": 0.0002106677298043702, "loss": 0.144, "step": 91350 }, { "epoch": 0.16197464669111747, "grad_norm": 0.365234375, "learning_rate": 0.0002106585949925838, "loss": 0.431, "step": 91352 }, { "epoch": 0.16197819285642728, "grad_norm": 0.65625, "learning_rate": 0.00021064946407026007, "loss": 0.2136, "step": 91354 }, { "epoch": 0.1619817390217371, "grad_norm": 0.134765625, "learning_rate": 0.00021064033703743903, "loss": 0.1647, "step": 91356 }, { "epoch": 0.1619852851870469, "grad_norm": 0.373046875, "learning_rate": 0.0002106312138941604, "loss": 0.1727, "step": 91358 }, { "epoch": 0.16198883135235673, "grad_norm": 1.21875, "learning_rate": 0.00021062209464046435, "loss": 0.1865, "step": 91360 }, { "epoch": 0.16199237751766654, "grad_norm": 0.37109375, "learning_rate": 0.00021061297927639066, "loss": 0.2072, "step": 91362 }, { "epoch": 0.16199592368297636, "grad_norm": 0.32421875, "learning_rate": 0.0002106038678019791, "loss": 0.1474, "step": 91364 }, { "epoch": 0.16199946984828617, "grad_norm": 0.2314453125, "learning_rate": 0.0002105947602172697, "loss": 0.1498, "step": 91366 }, { "epoch": 0.16200301601359599, "grad_norm": 0.35546875, "learning_rate": 0.00021058565652230213, "loss": 0.1709, "step": 91368 }, { "epoch": 0.1620065621789058, "grad_norm": 0.94921875, "learning_rate": 0.00021057655671711625, "loss": 0.1791, "step": 91370 }, { "epoch": 0.16201010834421564, "grad_norm": 1.078125, "learning_rate": 0.00021056746080175184, "loss": 0.2392, "step": 91372 }, { "epoch": 0.16201365450952546, "grad_norm": 0.35546875, "learning_rate": 0.00021055836877624886, "loss": 0.169, "step": 91374 }, { "epoch": 0.16201720067483527, "grad_norm": 0.51953125, "learning_rate": 0.0002105492806406468, "loss": 0.1693, "step": 91376 }, { "epoch": 0.16202074684014509, "grad_norm": 0.37890625, "learning_rate": 0.0002105401963949856, "loss": 0.1817, "step": 91378 }, { "epoch": 0.1620242930054549, "grad_norm": 0.5078125, "learning_rate": 0.00021053111603930492, "loss": 0.185, "step": 91380 }, { "epoch": 0.16202783917076471, "grad_norm": 0.3984375, "learning_rate": 0.00021052203957364445, "loss": 0.1645, "step": 91382 }, { "epoch": 0.16203138533607453, "grad_norm": 0.244140625, "learning_rate": 0.00021051296699804397, "loss": 0.1798, "step": 91384 }, { "epoch": 0.16203493150138434, "grad_norm": 0.28515625, "learning_rate": 0.00021050389831254294, "loss": 0.3182, "step": 91386 }, { "epoch": 0.16203847766669416, "grad_norm": 0.3984375, "learning_rate": 0.00021049483351718128, "loss": 0.1736, "step": 91388 }, { "epoch": 0.16204202383200397, "grad_norm": 0.68359375, "learning_rate": 0.00021048577261199852, "loss": 0.1684, "step": 91390 }, { "epoch": 0.1620455699973138, "grad_norm": 0.498046875, "learning_rate": 0.00021047671559703431, "loss": 0.1158, "step": 91392 }, { "epoch": 0.1620491161626236, "grad_norm": 0.2119140625, "learning_rate": 0.00021046766247232823, "loss": 0.156, "step": 91394 }, { "epoch": 0.16205266232793342, "grad_norm": 0.44921875, "learning_rate": 0.00021045861323791993, "loss": 0.1858, "step": 91396 }, { "epoch": 0.16205620849324323, "grad_norm": 1.75, "learning_rate": 0.00021044956789384897, "loss": 0.2387, "step": 91398 }, { "epoch": 0.16205975465855305, "grad_norm": 0.1435546875, "learning_rate": 0.0002104405264401548, "loss": 0.2625, "step": 91400 }, { "epoch": 0.16206330082386286, "grad_norm": 0.353515625, "learning_rate": 0.00021043148887687707, "loss": 0.1667, "step": 91402 }, { "epoch": 0.16206684698917267, "grad_norm": 0.2333984375, "learning_rate": 0.0002104224552040553, "loss": 0.1788, "step": 91404 }, { "epoch": 0.1620703931544825, "grad_norm": 1.15625, "learning_rate": 0.00021041342542172902, "loss": 0.1764, "step": 91406 }, { "epoch": 0.1620739393197923, "grad_norm": 0.326171875, "learning_rate": 0.00021040439952993767, "loss": 0.1669, "step": 91408 }, { "epoch": 0.16207748548510212, "grad_norm": 0.396484375, "learning_rate": 0.00021039537752872076, "loss": 0.3094, "step": 91410 }, { "epoch": 0.16208103165041193, "grad_norm": 0.1904296875, "learning_rate": 0.00021038635941811757, "loss": 0.1639, "step": 91412 }, { "epoch": 0.16208457781572175, "grad_norm": 1.0234375, "learning_rate": 0.00021037734519816788, "loss": 0.2245, "step": 91414 }, { "epoch": 0.16208812398103156, "grad_norm": 0.31640625, "learning_rate": 0.0002103683348689108, "loss": 0.1885, "step": 91416 }, { "epoch": 0.16209167014634138, "grad_norm": 0.4140625, "learning_rate": 0.0002103593284303859, "loss": 0.1781, "step": 91418 }, { "epoch": 0.1620952163116512, "grad_norm": 0.421875, "learning_rate": 0.00021035032588263264, "loss": 0.1482, "step": 91420 }, { "epoch": 0.162098762476961, "grad_norm": 0.466796875, "learning_rate": 0.00021034132722569016, "loss": 0.1481, "step": 91422 }, { "epoch": 0.16210230864227082, "grad_norm": 0.384765625, "learning_rate": 0.00021033233245959802, "loss": 0.1439, "step": 91424 }, { "epoch": 0.16210585480758063, "grad_norm": 0.322265625, "learning_rate": 0.00021032334158439537, "loss": 0.1933, "step": 91426 }, { "epoch": 0.16210940097289045, "grad_norm": 0.67578125, "learning_rate": 0.00021031435460012172, "loss": 0.2978, "step": 91428 }, { "epoch": 0.16211294713820026, "grad_norm": 0.66796875, "learning_rate": 0.0002103053715068163, "loss": 0.1765, "step": 91430 }, { "epoch": 0.16211649330351008, "grad_norm": 0.2109375, "learning_rate": 0.00021029639230451839, "loss": 0.1673, "step": 91432 }, { "epoch": 0.1621200394688199, "grad_norm": 0.5390625, "learning_rate": 0.00021028741699326717, "loss": 0.2109, "step": 91434 }, { "epoch": 0.1621235856341297, "grad_norm": 0.53515625, "learning_rate": 0.000210278445573102, "loss": 0.1709, "step": 91436 }, { "epoch": 0.16212713179943952, "grad_norm": 0.169921875, "learning_rate": 0.0002102694780440622, "loss": 0.1672, "step": 91438 }, { "epoch": 0.16213067796474934, "grad_norm": 1.171875, "learning_rate": 0.00021026051440618682, "loss": 0.2066, "step": 91440 }, { "epoch": 0.16213422413005915, "grad_norm": 0.390625, "learning_rate": 0.00021025155465951515, "loss": 0.2149, "step": 91442 }, { "epoch": 0.16213777029536897, "grad_norm": 1.734375, "learning_rate": 0.00021024259880408628, "loss": 0.2361, "step": 91444 }, { "epoch": 0.16214131646067878, "grad_norm": 0.384765625, "learning_rate": 0.0002102336468399395, "loss": 0.2173, "step": 91446 }, { "epoch": 0.1621448626259886, "grad_norm": 0.1962890625, "learning_rate": 0.00021022469876711403, "loss": 0.1833, "step": 91448 }, { "epoch": 0.1621484087912984, "grad_norm": 0.65234375, "learning_rate": 0.00021021575458564877, "loss": 0.2377, "step": 91450 }, { "epoch": 0.16215195495660822, "grad_norm": 0.69921875, "learning_rate": 0.0002102068142955829, "loss": 0.2724, "step": 91452 }, { "epoch": 0.16215550112191804, "grad_norm": 0.7578125, "learning_rate": 0.0002101978778969556, "loss": 0.2241, "step": 91454 }, { "epoch": 0.16215904728722785, "grad_norm": 0.53515625, "learning_rate": 0.00021018894538980595, "loss": 0.1929, "step": 91456 }, { "epoch": 0.16216259345253767, "grad_norm": 0.93359375, "learning_rate": 0.000210180016774173, "loss": 0.1999, "step": 91458 }, { "epoch": 0.16216613961784748, "grad_norm": 0.423828125, "learning_rate": 0.00021017109205009586, "loss": 0.3371, "step": 91460 }, { "epoch": 0.1621696857831573, "grad_norm": 0.26171875, "learning_rate": 0.00021016217121761334, "loss": 0.1839, "step": 91462 }, { "epoch": 0.16217323194846714, "grad_norm": 0.337890625, "learning_rate": 0.00021015325427676474, "loss": 0.1911, "step": 91464 }, { "epoch": 0.16217677811377695, "grad_norm": 0.44921875, "learning_rate": 0.0002101443412275888, "loss": 0.1889, "step": 91466 }, { "epoch": 0.16218032427908677, "grad_norm": 0.28515625, "learning_rate": 0.00021013543207012465, "loss": 0.1624, "step": 91468 }, { "epoch": 0.16218387044439658, "grad_norm": 0.2373046875, "learning_rate": 0.0002101265268044112, "loss": 0.1702, "step": 91470 }, { "epoch": 0.1621874166097064, "grad_norm": 0.34375, "learning_rate": 0.00021011762543048755, "loss": 0.2003, "step": 91472 }, { "epoch": 0.1621909627750162, "grad_norm": 0.33203125, "learning_rate": 0.00021010872794839238, "loss": 0.1739, "step": 91474 }, { "epoch": 0.16219450894032603, "grad_norm": 0.2236328125, "learning_rate": 0.0002100998343581648, "loss": 0.1871, "step": 91476 }, { "epoch": 0.16219805510563584, "grad_norm": 0.1904296875, "learning_rate": 0.00021009094465984364, "loss": 0.1826, "step": 91478 }, { "epoch": 0.16220160127094566, "grad_norm": 0.92578125, "learning_rate": 0.0002100820588534677, "loss": 0.2208, "step": 91480 }, { "epoch": 0.16220514743625547, "grad_norm": 0.4765625, "learning_rate": 0.00021007317693907596, "loss": 0.1636, "step": 91482 }, { "epoch": 0.16220869360156528, "grad_norm": 0.498046875, "learning_rate": 0.00021006429891670707, "loss": 0.1577, "step": 91484 }, { "epoch": 0.1622122397668751, "grad_norm": 1.453125, "learning_rate": 0.00021005542478640018, "loss": 0.1373, "step": 91486 }, { "epoch": 0.1622157859321849, "grad_norm": 0.88671875, "learning_rate": 0.00021004655454819385, "loss": 0.1634, "step": 91488 }, { "epoch": 0.16221933209749473, "grad_norm": 1.6796875, "learning_rate": 0.0002100376882021269, "loss": 0.2158, "step": 91490 }, { "epoch": 0.16222287826280454, "grad_norm": 0.435546875, "learning_rate": 0.00021002882574823825, "loss": 0.147, "step": 91492 }, { "epoch": 0.16222642442811436, "grad_norm": 0.458984375, "learning_rate": 0.00021001996718656644, "loss": 0.1574, "step": 91494 }, { "epoch": 0.16222997059342417, "grad_norm": 0.60546875, "learning_rate": 0.00021001111251715043, "loss": 0.1982, "step": 91496 }, { "epoch": 0.162233516758734, "grad_norm": 0.30078125, "learning_rate": 0.00021000226174002883, "loss": 0.1721, "step": 91498 }, { "epoch": 0.1622370629240438, "grad_norm": 0.494140625, "learning_rate": 0.0002099934148552404, "loss": 0.1735, "step": 91500 }, { "epoch": 0.16224060908935362, "grad_norm": 0.2470703125, "learning_rate": 0.00020998457186282367, "loss": 0.2655, "step": 91502 }, { "epoch": 0.16224415525466343, "grad_norm": 0.57421875, "learning_rate": 0.00020997573276281765, "loss": 0.2681, "step": 91504 }, { "epoch": 0.16224770141997324, "grad_norm": 0.3046875, "learning_rate": 0.00020996689755526057, "loss": 0.1296, "step": 91506 }, { "epoch": 0.16225124758528306, "grad_norm": 0.3828125, "learning_rate": 0.00020995806624019134, "loss": 0.1709, "step": 91508 }, { "epoch": 0.16225479375059287, "grad_norm": 0.4609375, "learning_rate": 0.0002099492388176486, "loss": 0.1432, "step": 91510 }, { "epoch": 0.1622583399159027, "grad_norm": 0.625, "learning_rate": 0.00020994041528767083, "loss": 0.157, "step": 91512 }, { "epoch": 0.1622618860812125, "grad_norm": 0.498046875, "learning_rate": 0.0002099315956502967, "loss": 0.1556, "step": 91514 }, { "epoch": 0.16226543224652232, "grad_norm": 0.333984375, "learning_rate": 0.0002099227799055647, "loss": 0.2, "step": 91516 }, { "epoch": 0.16226897841183213, "grad_norm": 0.55859375, "learning_rate": 0.00020991396805351357, "loss": 0.2032, "step": 91518 }, { "epoch": 0.16227252457714195, "grad_norm": 0.36328125, "learning_rate": 0.00020990516009418168, "loss": 0.1698, "step": 91520 }, { "epoch": 0.16227607074245176, "grad_norm": 0.3125, "learning_rate": 0.00020989635602760749, "loss": 0.1858, "step": 91522 }, { "epoch": 0.16227961690776158, "grad_norm": 0.275390625, "learning_rate": 0.0002098875558538296, "loss": 0.1, "step": 91524 }, { "epoch": 0.1622831630730714, "grad_norm": 1.578125, "learning_rate": 0.00020987875957288667, "loss": 0.3212, "step": 91526 }, { "epoch": 0.1622867092383812, "grad_norm": 0.28125, "learning_rate": 0.00020986996718481686, "loss": 0.1725, "step": 91528 }, { "epoch": 0.16229025540369102, "grad_norm": 1.1953125, "learning_rate": 0.00020986117868965884, "loss": 0.1763, "step": 91530 }, { "epoch": 0.16229380156900083, "grad_norm": 0.55078125, "learning_rate": 0.000209852394087451, "loss": 0.1836, "step": 91532 }, { "epoch": 0.16229734773431065, "grad_norm": 0.29296875, "learning_rate": 0.00020984361337823154, "loss": 0.1635, "step": 91534 }, { "epoch": 0.16230089389962046, "grad_norm": 0.94921875, "learning_rate": 0.00020983483656203923, "loss": 0.4163, "step": 91536 }, { "epoch": 0.16230444006493028, "grad_norm": 0.86328125, "learning_rate": 0.00020982606363891218, "loss": 0.1685, "step": 91538 }, { "epoch": 0.1623079862302401, "grad_norm": 0.2265625, "learning_rate": 0.00020981729460888896, "loss": 0.1665, "step": 91540 }, { "epoch": 0.1623115323955499, "grad_norm": 0.474609375, "learning_rate": 0.00020980852947200765, "loss": 0.1502, "step": 91542 }, { "epoch": 0.16231507856085972, "grad_norm": 0.345703125, "learning_rate": 0.00020979976822830694, "loss": 0.1446, "step": 91544 }, { "epoch": 0.16231862472616954, "grad_norm": 0.53515625, "learning_rate": 0.00020979101087782484, "loss": 0.1284, "step": 91546 }, { "epoch": 0.16232217089147935, "grad_norm": 0.76171875, "learning_rate": 0.00020978225742059977, "loss": 0.2743, "step": 91548 }, { "epoch": 0.16232571705678916, "grad_norm": 0.498046875, "learning_rate": 0.00020977350785667, "loss": 0.2613, "step": 91550 }, { "epoch": 0.16232926322209898, "grad_norm": 0.427734375, "learning_rate": 0.00020976476218607378, "loss": 0.1577, "step": 91552 }, { "epoch": 0.16233280938740882, "grad_norm": 0.54296875, "learning_rate": 0.0002097560204088495, "loss": 0.1316, "step": 91554 }, { "epoch": 0.16233635555271864, "grad_norm": 0.54296875, "learning_rate": 0.0002097472825250352, "loss": 0.2141, "step": 91556 }, { "epoch": 0.16233990171802845, "grad_norm": 0.271484375, "learning_rate": 0.00020973854853466913, "loss": 0.1426, "step": 91558 }, { "epoch": 0.16234344788333827, "grad_norm": 0.28515625, "learning_rate": 0.00020972981843778954, "loss": 0.1111, "step": 91560 }, { "epoch": 0.16234699404864808, "grad_norm": 0.224609375, "learning_rate": 0.00020972109223443464, "loss": 0.1723, "step": 91562 }, { "epoch": 0.1623505402139579, "grad_norm": 1.09375, "learning_rate": 0.00020971236992464251, "loss": 0.2196, "step": 91564 }, { "epoch": 0.1623540863792677, "grad_norm": 0.45703125, "learning_rate": 0.00020970365150845132, "loss": 0.1799, "step": 91566 }, { "epoch": 0.16235763254457752, "grad_norm": 0.75, "learning_rate": 0.00020969493698589926, "loss": 0.1639, "step": 91568 }, { "epoch": 0.16236117870988734, "grad_norm": 1.3125, "learning_rate": 0.00020968622635702432, "loss": 0.1623, "step": 91570 }, { "epoch": 0.16236472487519715, "grad_norm": 0.4296875, "learning_rate": 0.0002096775196218648, "loss": 0.1932, "step": 91572 }, { "epoch": 0.16236827104050697, "grad_norm": 1.2734375, "learning_rate": 0.00020966881678045847, "loss": 0.1479, "step": 91574 }, { "epoch": 0.16237181720581678, "grad_norm": 0.29296875, "learning_rate": 0.0002096601178328437, "loss": 0.2017, "step": 91576 }, { "epoch": 0.1623753633711266, "grad_norm": 1.6015625, "learning_rate": 0.00020965142277905836, "loss": 0.3763, "step": 91578 }, { "epoch": 0.1623789095364364, "grad_norm": 0.466796875, "learning_rate": 0.0002096427316191406, "loss": 0.2168, "step": 91580 }, { "epoch": 0.16238245570174623, "grad_norm": 0.392578125, "learning_rate": 0.00020963404435312815, "loss": 0.237, "step": 91582 }, { "epoch": 0.16238600186705604, "grad_norm": 0.318359375, "learning_rate": 0.00020962536098105935, "loss": 0.1429, "step": 91584 }, { "epoch": 0.16238954803236585, "grad_norm": 1.140625, "learning_rate": 0.00020961668150297204, "loss": 0.2628, "step": 91586 }, { "epoch": 0.16239309419767567, "grad_norm": 2.390625, "learning_rate": 0.00020960800591890416, "loss": 0.2665, "step": 91588 }, { "epoch": 0.16239664036298548, "grad_norm": 0.3828125, "learning_rate": 0.0002095993342288937, "loss": 0.1361, "step": 91590 }, { "epoch": 0.1624001865282953, "grad_norm": 0.50390625, "learning_rate": 0.0002095906664329784, "loss": 0.2053, "step": 91592 }, { "epoch": 0.1624037326936051, "grad_norm": 0.306640625, "learning_rate": 0.00020958200253119648, "loss": 0.1446, "step": 91594 }, { "epoch": 0.16240727885891493, "grad_norm": 0.734375, "learning_rate": 0.00020957334252358557, "loss": 0.3057, "step": 91596 }, { "epoch": 0.16241082502422474, "grad_norm": 0.5, "learning_rate": 0.0002095646864101836, "loss": 0.1441, "step": 91598 }, { "epoch": 0.16241437118953456, "grad_norm": 0.13671875, "learning_rate": 0.00020955603419102846, "loss": 0.1555, "step": 91600 }, { "epoch": 0.16241791735484437, "grad_norm": 1.9921875, "learning_rate": 0.00020954738586615812, "loss": 0.1719, "step": 91602 }, { "epoch": 0.16242146352015419, "grad_norm": 2.140625, "learning_rate": 0.00020953874143561025, "loss": 0.2621, "step": 91604 }, { "epoch": 0.162425009685464, "grad_norm": 1.9921875, "learning_rate": 0.00020953010089942261, "loss": 0.2395, "step": 91606 }, { "epoch": 0.16242855585077381, "grad_norm": 0.1806640625, "learning_rate": 0.00020952146425763318, "loss": 0.1752, "step": 91608 }, { "epoch": 0.16243210201608363, "grad_norm": 0.5625, "learning_rate": 0.0002095128315102794, "loss": 0.1442, "step": 91610 }, { "epoch": 0.16243564818139344, "grad_norm": 0.59375, "learning_rate": 0.0002095042026573995, "loss": 0.1863, "step": 91612 }, { "epoch": 0.16243919434670326, "grad_norm": 0.287109375, "learning_rate": 0.00020949557769903078, "loss": 0.1438, "step": 91614 }, { "epoch": 0.16244274051201307, "grad_norm": 0.53125, "learning_rate": 0.00020948695663521117, "loss": 0.1766, "step": 91616 }, { "epoch": 0.1624462866773229, "grad_norm": 0.81640625, "learning_rate": 0.00020947833946597828, "loss": 0.1637, "step": 91618 }, { "epoch": 0.1624498328426327, "grad_norm": 0.2734375, "learning_rate": 0.00020946972619137003, "loss": 0.1414, "step": 91620 }, { "epoch": 0.16245337900794252, "grad_norm": 1.4375, "learning_rate": 0.00020946111681142377, "loss": 0.1769, "step": 91622 }, { "epoch": 0.16245692517325233, "grad_norm": 0.40234375, "learning_rate": 0.00020945251132617736, "loss": 0.1594, "step": 91624 }, { "epoch": 0.16246047133856215, "grad_norm": 0.45703125, "learning_rate": 0.00020944390973566833, "loss": 0.187, "step": 91626 }, { "epoch": 0.16246401750387196, "grad_norm": 0.1787109375, "learning_rate": 0.00020943531203993445, "loss": 0.1961, "step": 91628 }, { "epoch": 0.16246756366918177, "grad_norm": 0.380859375, "learning_rate": 0.00020942671823901307, "loss": 0.1779, "step": 91630 }, { "epoch": 0.1624711098344916, "grad_norm": 0.56640625, "learning_rate": 0.00020941812833294204, "loss": 0.2318, "step": 91632 }, { "epoch": 0.1624746559998014, "grad_norm": 0.53125, "learning_rate": 0.0002094095423217587, "loss": 0.1678, "step": 91634 }, { "epoch": 0.16247820216511122, "grad_norm": 2.4375, "learning_rate": 0.00020940096020550074, "loss": 0.1721, "step": 91636 }, { "epoch": 0.16248174833042103, "grad_norm": 2.734375, "learning_rate": 0.00020939238198420577, "loss": 0.1437, "step": 91638 }, { "epoch": 0.16248529449573085, "grad_norm": 0.93359375, "learning_rate": 0.00020938380765791098, "loss": 0.1463, "step": 91640 }, { "epoch": 0.16248884066104066, "grad_norm": 0.64453125, "learning_rate": 0.00020937523722665418, "loss": 0.2022, "step": 91642 }, { "epoch": 0.16249238682635048, "grad_norm": 0.392578125, "learning_rate": 0.0002093666706904729, "loss": 0.1979, "step": 91644 }, { "epoch": 0.16249593299166032, "grad_norm": 0.65625, "learning_rate": 0.00020935810804940431, "loss": 0.2837, "step": 91646 }, { "epoch": 0.16249947915697013, "grad_norm": 0.5234375, "learning_rate": 0.000209349549303486, "loss": 0.2091, "step": 91648 }, { "epoch": 0.16250302532227995, "grad_norm": 0.34765625, "learning_rate": 0.00020934099445275538, "loss": 0.1745, "step": 91650 }, { "epoch": 0.16250657148758976, "grad_norm": 0.341796875, "learning_rate": 0.0002093324434972501, "loss": 0.2097, "step": 91652 }, { "epoch": 0.16251011765289958, "grad_norm": 0.234375, "learning_rate": 0.00020932389643700717, "loss": 0.1883, "step": 91654 }, { "epoch": 0.1625136638182094, "grad_norm": 1.0703125, "learning_rate": 0.0002093153532720642, "loss": 0.1899, "step": 91656 }, { "epoch": 0.1625172099835192, "grad_norm": 1.4375, "learning_rate": 0.00020930681400245846, "loss": 0.2681, "step": 91658 }, { "epoch": 0.16252075614882902, "grad_norm": 0.271484375, "learning_rate": 0.00020929827862822736, "loss": 0.1891, "step": 91660 }, { "epoch": 0.16252430231413884, "grad_norm": 0.77734375, "learning_rate": 0.00020928974714940827, "loss": 0.1861, "step": 91662 }, { "epoch": 0.16252784847944865, "grad_norm": 0.388671875, "learning_rate": 0.00020928121956603833, "loss": 0.2091, "step": 91664 }, { "epoch": 0.16253139464475846, "grad_norm": 0.3359375, "learning_rate": 0.000209272695878155, "loss": 0.1735, "step": 91666 }, { "epoch": 0.16253494081006828, "grad_norm": 0.54296875, "learning_rate": 0.0002092641760857956, "loss": 0.1969, "step": 91668 }, { "epoch": 0.1625384869753781, "grad_norm": 0.236328125, "learning_rate": 0.00020925566018899715, "loss": 0.2238, "step": 91670 }, { "epoch": 0.1625420331406879, "grad_norm": 0.49609375, "learning_rate": 0.00020924714818779707, "loss": 0.1592, "step": 91672 }, { "epoch": 0.16254557930599772, "grad_norm": 0.8984375, "learning_rate": 0.0002092386400822326, "loss": 0.1853, "step": 91674 }, { "epoch": 0.16254912547130754, "grad_norm": 0.32421875, "learning_rate": 0.0002092301358723409, "loss": 0.1294, "step": 91676 }, { "epoch": 0.16255267163661735, "grad_norm": 0.330078125, "learning_rate": 0.00020922163555815918, "loss": 0.2185, "step": 91678 }, { "epoch": 0.16255621780192717, "grad_norm": 0.451171875, "learning_rate": 0.00020921313913972446, "loss": 0.3407, "step": 91680 }, { "epoch": 0.16255976396723698, "grad_norm": 0.498046875, "learning_rate": 0.00020920464661707415, "loss": 0.1849, "step": 91682 }, { "epoch": 0.1625633101325468, "grad_norm": 0.189453125, "learning_rate": 0.0002091961579902453, "loss": 0.207, "step": 91684 }, { "epoch": 0.1625668562978566, "grad_norm": 0.83984375, "learning_rate": 0.000209187673259275, "loss": 0.1831, "step": 91686 }, { "epoch": 0.16257040246316642, "grad_norm": 0.388671875, "learning_rate": 0.00020917919242420036, "loss": 0.1617, "step": 91688 }, { "epoch": 0.16257394862847624, "grad_norm": 0.330078125, "learning_rate": 0.00020917071548505844, "loss": 0.1856, "step": 91690 }, { "epoch": 0.16257749479378605, "grad_norm": 0.23828125, "learning_rate": 0.00020916224244188646, "loss": 0.1695, "step": 91692 }, { "epoch": 0.16258104095909587, "grad_norm": 0.5390625, "learning_rate": 0.0002091537732947213, "loss": 0.1949, "step": 91694 }, { "epoch": 0.16258458712440568, "grad_norm": 0.2060546875, "learning_rate": 0.0002091453080436, "loss": 0.3245, "step": 91696 }, { "epoch": 0.1625881332897155, "grad_norm": 0.328125, "learning_rate": 0.0002091368466885598, "loss": 0.1411, "step": 91698 }, { "epoch": 0.1625916794550253, "grad_norm": 0.66015625, "learning_rate": 0.0002091283892296374, "loss": 0.1609, "step": 91700 }, { "epoch": 0.16259522562033513, "grad_norm": 0.6015625, "learning_rate": 0.0002091199356668701, "loss": 0.1607, "step": 91702 }, { "epoch": 0.16259877178564494, "grad_norm": 0.5, "learning_rate": 0.00020911148600029467, "loss": 0.1443, "step": 91704 }, { "epoch": 0.16260231795095476, "grad_norm": 1.03125, "learning_rate": 0.0002091030402299481, "loss": 0.2707, "step": 91706 }, { "epoch": 0.16260586411626457, "grad_norm": 0.294921875, "learning_rate": 0.0002090945983558672, "loss": 0.1841, "step": 91708 }, { "epoch": 0.16260941028157438, "grad_norm": 4.75, "learning_rate": 0.00020908616037808932, "loss": 0.144, "step": 91710 }, { "epoch": 0.1626129564468842, "grad_norm": 0.60546875, "learning_rate": 0.00020907772629665076, "loss": 0.1655, "step": 91712 }, { "epoch": 0.162616502612194, "grad_norm": 0.384765625, "learning_rate": 0.00020906929611158893, "loss": 0.1876, "step": 91714 }, { "epoch": 0.16262004877750383, "grad_norm": 0.3046875, "learning_rate": 0.0002090608698229404, "loss": 0.1625, "step": 91716 }, { "epoch": 0.16262359494281364, "grad_norm": 0.5546875, "learning_rate": 0.0002090524474307421, "loss": 0.2851, "step": 91718 }, { "epoch": 0.16262714110812346, "grad_norm": 0.279296875, "learning_rate": 0.00020904402893503088, "loss": 0.1641, "step": 91720 }, { "epoch": 0.16263068727343327, "grad_norm": 0.396484375, "learning_rate": 0.00020903561433584356, "loss": 0.1769, "step": 91722 }, { "epoch": 0.1626342334387431, "grad_norm": 0.91015625, "learning_rate": 0.00020902720363321698, "loss": 0.1853, "step": 91724 }, { "epoch": 0.1626377796040529, "grad_norm": 3.578125, "learning_rate": 0.00020901879682718778, "loss": 0.2213, "step": 91726 }, { "epoch": 0.16264132576936272, "grad_norm": 0.56640625, "learning_rate": 0.00020901039391779287, "loss": 0.3109, "step": 91728 }, { "epoch": 0.16264487193467253, "grad_norm": 1.0390625, "learning_rate": 0.00020900199490506897, "loss": 0.2985, "step": 91730 }, { "epoch": 0.16264841809998234, "grad_norm": 0.7109375, "learning_rate": 0.0002089935997890527, "loss": 0.1327, "step": 91732 }, { "epoch": 0.16265196426529216, "grad_norm": 1.046875, "learning_rate": 0.00020898520856978099, "loss": 0.2062, "step": 91734 }, { "epoch": 0.162655510430602, "grad_norm": 0.37109375, "learning_rate": 0.00020897682124729035, "loss": 0.2048, "step": 91736 }, { "epoch": 0.16265905659591182, "grad_norm": 0.416015625, "learning_rate": 0.00020896843782161747, "loss": 0.1651, "step": 91738 }, { "epoch": 0.16266260276122163, "grad_norm": 0.2177734375, "learning_rate": 0.00020896005829279914, "loss": 0.2102, "step": 91740 }, { "epoch": 0.16266614892653145, "grad_norm": 0.466796875, "learning_rate": 0.00020895168266087203, "loss": 0.1464, "step": 91742 }, { "epoch": 0.16266969509184126, "grad_norm": 0.193359375, "learning_rate": 0.0002089433109258725, "loss": 0.128, "step": 91744 }, { "epoch": 0.16267324125715107, "grad_norm": 1.1953125, "learning_rate": 0.00020893494308783743, "loss": 0.2965, "step": 91746 }, { "epoch": 0.1626767874224609, "grad_norm": 1.1953125, "learning_rate": 0.00020892657914680325, "loss": 0.4056, "step": 91748 }, { "epoch": 0.1626803335877707, "grad_norm": 0.28515625, "learning_rate": 0.00020891821910280677, "loss": 0.2033, "step": 91750 }, { "epoch": 0.16268387975308052, "grad_norm": 0.1806640625, "learning_rate": 0.00020890986295588426, "loss": 0.123, "step": 91752 }, { "epoch": 0.16268742591839033, "grad_norm": 0.208984375, "learning_rate": 0.0002089015107060724, "loss": 0.144, "step": 91754 }, { "epoch": 0.16269097208370015, "grad_norm": 0.287109375, "learning_rate": 0.00020889316235340782, "loss": 0.1798, "step": 91756 }, { "epoch": 0.16269451824900996, "grad_norm": 0.31640625, "learning_rate": 0.00020888481789792685, "loss": 0.196, "step": 91758 }, { "epoch": 0.16269806441431978, "grad_norm": 0.41015625, "learning_rate": 0.00020887647733966612, "loss": 0.1895, "step": 91760 }, { "epoch": 0.1627016105796296, "grad_norm": 1.2109375, "learning_rate": 0.00020886814067866196, "loss": 0.24, "step": 91762 }, { "epoch": 0.1627051567449394, "grad_norm": 0.578125, "learning_rate": 0.00020885980791495105, "loss": 0.1774, "step": 91764 }, { "epoch": 0.16270870291024922, "grad_norm": 0.26171875, "learning_rate": 0.00020885147904856966, "loss": 0.1535, "step": 91766 }, { "epoch": 0.16271224907555903, "grad_norm": 0.62890625, "learning_rate": 0.00020884315407955422, "loss": 0.1843, "step": 91768 }, { "epoch": 0.16271579524086885, "grad_norm": 2.75, "learning_rate": 0.00020883483300794116, "loss": 0.2208, "step": 91770 }, { "epoch": 0.16271934140617866, "grad_norm": 0.361328125, "learning_rate": 0.00020882651583376699, "loss": 0.1648, "step": 91772 }, { "epoch": 0.16272288757148848, "grad_norm": 0.6484375, "learning_rate": 0.00020881820255706795, "loss": 0.1771, "step": 91774 }, { "epoch": 0.1627264337367983, "grad_norm": 0.361328125, "learning_rate": 0.00020880989317788033, "loss": 0.2988, "step": 91776 }, { "epoch": 0.1627299799021081, "grad_norm": 0.267578125, "learning_rate": 0.00020880158769624068, "loss": 0.2444, "step": 91778 }, { "epoch": 0.16273352606741792, "grad_norm": 2.21875, "learning_rate": 0.0002087932861121852, "loss": 0.3143, "step": 91780 }, { "epoch": 0.16273707223272774, "grad_norm": 0.44140625, "learning_rate": 0.00020878498842575016, "loss": 0.1697, "step": 91782 }, { "epoch": 0.16274061839803755, "grad_norm": 0.21484375, "learning_rate": 0.000208776694636972, "loss": 0.1688, "step": 91784 }, { "epoch": 0.16274416456334737, "grad_norm": 0.244140625, "learning_rate": 0.0002087684047458868, "loss": 0.1432, "step": 91786 }, { "epoch": 0.16274771072865718, "grad_norm": 0.404296875, "learning_rate": 0.00020876011875253087, "loss": 0.132, "step": 91788 }, { "epoch": 0.162751256893967, "grad_norm": 0.251953125, "learning_rate": 0.0002087518366569406, "loss": 0.1699, "step": 91790 }, { "epoch": 0.1627548030592768, "grad_norm": 0.5390625, "learning_rate": 0.00020874355845915205, "loss": 0.2607, "step": 91792 }, { "epoch": 0.16275834922458662, "grad_norm": 0.3359375, "learning_rate": 0.00020873528415920155, "loss": 0.1579, "step": 91794 }, { "epoch": 0.16276189538989644, "grad_norm": 0.408203125, "learning_rate": 0.00020872701375712507, "loss": 0.1915, "step": 91796 }, { "epoch": 0.16276544155520625, "grad_norm": 0.486328125, "learning_rate": 0.00020871874725295896, "loss": 0.1956, "step": 91798 }, { "epoch": 0.16276898772051607, "grad_norm": 0.2294921875, "learning_rate": 0.00020871048464673946, "loss": 0.1406, "step": 91800 }, { "epoch": 0.16277253388582588, "grad_norm": 0.4765625, "learning_rate": 0.00020870222593850237, "loss": 0.1914, "step": 91802 }, { "epoch": 0.1627760800511357, "grad_norm": 0.54296875, "learning_rate": 0.0002086939711282842, "loss": 0.2056, "step": 91804 }, { "epoch": 0.1627796262164455, "grad_norm": 0.9140625, "learning_rate": 0.0002086857202161207, "loss": 0.1684, "step": 91806 }, { "epoch": 0.16278317238175533, "grad_norm": 0.37109375, "learning_rate": 0.00020867747320204837, "loss": 0.3345, "step": 91808 }, { "epoch": 0.16278671854706514, "grad_norm": 3.171875, "learning_rate": 0.00020866923008610274, "loss": 0.3104, "step": 91810 }, { "epoch": 0.16279026471237495, "grad_norm": 0.62890625, "learning_rate": 0.0002086609908683203, "loss": 0.1872, "step": 91812 }, { "epoch": 0.16279381087768477, "grad_norm": 0.69921875, "learning_rate": 0.00020865275554873692, "loss": 0.1497, "step": 91814 }, { "epoch": 0.16279735704299458, "grad_norm": 1.203125, "learning_rate": 0.0002086445241273887, "loss": 0.1642, "step": 91816 }, { "epoch": 0.1628009032083044, "grad_norm": 1.7421875, "learning_rate": 0.00020863629660431145, "loss": 0.4431, "step": 91818 }, { "epoch": 0.1628044493736142, "grad_norm": 1.2109375, "learning_rate": 0.00020862807297954128, "loss": 0.2993, "step": 91820 }, { "epoch": 0.16280799553892403, "grad_norm": 1.46875, "learning_rate": 0.00020861985325311427, "loss": 0.3462, "step": 91822 }, { "epoch": 0.16281154170423384, "grad_norm": 0.25390625, "learning_rate": 0.0002086116374250662, "loss": 0.2003, "step": 91824 }, { "epoch": 0.16281508786954368, "grad_norm": 1.0, "learning_rate": 0.00020860342549543302, "loss": 0.1518, "step": 91826 }, { "epoch": 0.1628186340348535, "grad_norm": 0.6015625, "learning_rate": 0.00020859521746425058, "loss": 0.2106, "step": 91828 }, { "epoch": 0.1628221802001633, "grad_norm": 0.32421875, "learning_rate": 0.00020858701333155498, "loss": 0.1291, "step": 91830 }, { "epoch": 0.16282572636547313, "grad_norm": 0.337890625, "learning_rate": 0.00020857881309738197, "loss": 0.3085, "step": 91832 }, { "epoch": 0.16282927253078294, "grad_norm": 0.53125, "learning_rate": 0.00020857061676176746, "loss": 0.1597, "step": 91834 }, { "epoch": 0.16283281869609276, "grad_norm": 0.875, "learning_rate": 0.00020856242432474721, "loss": 0.2269, "step": 91836 }, { "epoch": 0.16283636486140257, "grad_norm": 0.6484375, "learning_rate": 0.00020855423578635714, "loss": 0.2102, "step": 91838 }, { "epoch": 0.1628399110267124, "grad_norm": 0.515625, "learning_rate": 0.0002085460511466331, "loss": 0.2438, "step": 91840 }, { "epoch": 0.1628434571920222, "grad_norm": 1.0703125, "learning_rate": 0.00020853787040561066, "loss": 0.1684, "step": 91842 }, { "epoch": 0.16284700335733202, "grad_norm": 0.515625, "learning_rate": 0.0002085296935633258, "loss": 0.1491, "step": 91844 }, { "epoch": 0.16285054952264183, "grad_norm": 0.6484375, "learning_rate": 0.00020852152061981425, "loss": 0.1325, "step": 91846 }, { "epoch": 0.16285409568795164, "grad_norm": 0.2392578125, "learning_rate": 0.00020851335157511187, "loss": 0.2473, "step": 91848 }, { "epoch": 0.16285764185326146, "grad_norm": 0.59765625, "learning_rate": 0.0002085051864292541, "loss": 0.2001, "step": 91850 }, { "epoch": 0.16286118801857127, "grad_norm": 0.30859375, "learning_rate": 0.0002084970251822768, "loss": 0.1932, "step": 91852 }, { "epoch": 0.1628647341838811, "grad_norm": 0.27734375, "learning_rate": 0.00020848886783421576, "loss": 0.1525, "step": 91854 }, { "epoch": 0.1628682803491909, "grad_norm": 0.36328125, "learning_rate": 0.0002084807143851065, "loss": 0.1782, "step": 91856 }, { "epoch": 0.16287182651450072, "grad_norm": 1.6796875, "learning_rate": 0.00020847256483498474, "loss": 0.1581, "step": 91858 }, { "epoch": 0.16287537267981053, "grad_norm": 0.423828125, "learning_rate": 0.00020846441918388612, "loss": 0.1622, "step": 91860 }, { "epoch": 0.16287891884512035, "grad_norm": 0.5625, "learning_rate": 0.00020845627743184625, "loss": 0.1364, "step": 91862 }, { "epoch": 0.16288246501043016, "grad_norm": 0.494140625, "learning_rate": 0.00020844813957890073, "loss": 0.1472, "step": 91864 }, { "epoch": 0.16288601117573998, "grad_norm": 0.2431640625, "learning_rate": 0.00020844000562508525, "loss": 0.1926, "step": 91866 }, { "epoch": 0.1628895573410498, "grad_norm": 0.232421875, "learning_rate": 0.00020843187557043524, "loss": 0.1548, "step": 91868 }, { "epoch": 0.1628931035063596, "grad_norm": 0.384765625, "learning_rate": 0.00020842374941498632, "loss": 0.1985, "step": 91870 }, { "epoch": 0.16289664967166942, "grad_norm": 0.609375, "learning_rate": 0.0002084156271587741, "loss": 0.2003, "step": 91872 }, { "epoch": 0.16290019583697923, "grad_norm": 0.255859375, "learning_rate": 0.00020840750880183402, "loss": 0.2111, "step": 91874 }, { "epoch": 0.16290374200228905, "grad_norm": 0.337890625, "learning_rate": 0.0002083993943442016, "loss": 0.1286, "step": 91876 }, { "epoch": 0.16290728816759886, "grad_norm": 0.236328125, "learning_rate": 0.00020839128378591221, "loss": 0.1321, "step": 91878 }, { "epoch": 0.16291083433290868, "grad_norm": 0.61328125, "learning_rate": 0.00020838317712700154, "loss": 0.1478, "step": 91880 }, { "epoch": 0.1629143804982185, "grad_norm": 0.453125, "learning_rate": 0.00020837507436750498, "loss": 0.1464, "step": 91882 }, { "epoch": 0.1629179266635283, "grad_norm": 0.416015625, "learning_rate": 0.00020836697550745786, "loss": 0.1775, "step": 91884 }, { "epoch": 0.16292147282883812, "grad_norm": 0.18359375, "learning_rate": 0.00020835888054689572, "loss": 0.2926, "step": 91886 }, { "epoch": 0.16292501899414794, "grad_norm": 1.6953125, "learning_rate": 0.00020835078948585375, "loss": 0.2934, "step": 91888 }, { "epoch": 0.16292856515945775, "grad_norm": 0.3515625, "learning_rate": 0.00020834270232436778, "loss": 0.1874, "step": 91890 }, { "epoch": 0.16293211132476756, "grad_norm": 0.3984375, "learning_rate": 0.00020833461906247263, "loss": 0.1511, "step": 91892 }, { "epoch": 0.16293565749007738, "grad_norm": 0.62109375, "learning_rate": 0.00020832653970020404, "loss": 0.1906, "step": 91894 }, { "epoch": 0.1629392036553872, "grad_norm": 0.5546875, "learning_rate": 0.00020831846423759713, "loss": 0.2168, "step": 91896 }, { "epoch": 0.162942749820697, "grad_norm": 0.2119140625, "learning_rate": 0.00020831039267468746, "loss": 0.1819, "step": 91898 }, { "epoch": 0.16294629598600682, "grad_norm": 2.296875, "learning_rate": 0.00020830232501151005, "loss": 0.2821, "step": 91900 }, { "epoch": 0.16294984215131664, "grad_norm": 0.2353515625, "learning_rate": 0.00020829426124810036, "loss": 0.1764, "step": 91902 }, { "epoch": 0.16295338831662645, "grad_norm": 0.2734375, "learning_rate": 0.0002082862013844936, "loss": 0.2621, "step": 91904 }, { "epoch": 0.16295693448193627, "grad_norm": 0.4609375, "learning_rate": 0.00020827814542072505, "loss": 0.1717, "step": 91906 }, { "epoch": 0.16296048064724608, "grad_norm": 0.302734375, "learning_rate": 0.0002082700933568299, "loss": 0.1781, "step": 91908 }, { "epoch": 0.1629640268125559, "grad_norm": 0.310546875, "learning_rate": 0.00020826204519284336, "loss": 0.1563, "step": 91910 }, { "epoch": 0.1629675729778657, "grad_norm": 0.71875, "learning_rate": 0.00020825400092880069, "loss": 0.2477, "step": 91912 }, { "epoch": 0.16297111914317552, "grad_norm": 0.33984375, "learning_rate": 0.00020824596056473693, "loss": 0.1558, "step": 91914 }, { "epoch": 0.16297466530848534, "grad_norm": 1.9765625, "learning_rate": 0.00020823792410068747, "loss": 0.2382, "step": 91916 }, { "epoch": 0.16297821147379518, "grad_norm": 0.2734375, "learning_rate": 0.00020822989153668726, "loss": 0.1615, "step": 91918 }, { "epoch": 0.162981757639105, "grad_norm": 0.431640625, "learning_rate": 0.00020822186287277155, "loss": 0.1755, "step": 91920 }, { "epoch": 0.1629853038044148, "grad_norm": 0.4921875, "learning_rate": 0.00020821383810897538, "loss": 0.3257, "step": 91922 }, { "epoch": 0.16298884996972463, "grad_norm": 0.6015625, "learning_rate": 0.00020820581724533384, "loss": 0.1907, "step": 91924 }, { "epoch": 0.16299239613503444, "grad_norm": 0.78515625, "learning_rate": 0.00020819780028188213, "loss": 0.1679, "step": 91926 }, { "epoch": 0.16299594230034425, "grad_norm": 0.32421875, "learning_rate": 0.00020818978721865504, "loss": 0.1954, "step": 91928 }, { "epoch": 0.16299948846565407, "grad_norm": 0.390625, "learning_rate": 0.00020818177805568804, "loss": 0.2394, "step": 91930 }, { "epoch": 0.16300303463096388, "grad_norm": 0.30078125, "learning_rate": 0.00020817377279301578, "loss": 0.1653, "step": 91932 }, { "epoch": 0.1630065807962737, "grad_norm": 1.1953125, "learning_rate": 0.0002081657714306734, "loss": 0.213, "step": 91934 }, { "epoch": 0.1630101269615835, "grad_norm": 0.55078125, "learning_rate": 0.00020815777396869593, "loss": 0.2687, "step": 91936 }, { "epoch": 0.16301367312689333, "grad_norm": 0.43359375, "learning_rate": 0.00020814978040711838, "loss": 0.211, "step": 91938 }, { "epoch": 0.16301721929220314, "grad_norm": 0.212890625, "learning_rate": 0.0002081417907459755, "loss": 0.156, "step": 91940 }, { "epoch": 0.16302076545751296, "grad_norm": 0.48046875, "learning_rate": 0.00020813380498530245, "loss": 0.1898, "step": 91942 }, { "epoch": 0.16302431162282277, "grad_norm": 0.65625, "learning_rate": 0.00020812582312513403, "loss": 0.1952, "step": 91944 }, { "epoch": 0.16302785778813259, "grad_norm": 0.271484375, "learning_rate": 0.00020811784516550522, "loss": 0.1625, "step": 91946 }, { "epoch": 0.1630314039534424, "grad_norm": 0.6015625, "learning_rate": 0.00020810987110645097, "loss": 0.1574, "step": 91948 }, { "epoch": 0.16303495011875221, "grad_norm": 0.3359375, "learning_rate": 0.00020810190094800593, "loss": 0.1959, "step": 91950 }, { "epoch": 0.16303849628406203, "grad_norm": 0.51171875, "learning_rate": 0.0002080939346902052, "loss": 0.2386, "step": 91952 }, { "epoch": 0.16304204244937184, "grad_norm": 0.365234375, "learning_rate": 0.00020808597233308346, "loss": 0.1414, "step": 91954 }, { "epoch": 0.16304558861468166, "grad_norm": 0.419921875, "learning_rate": 0.00020807801387667568, "loss": 0.1494, "step": 91956 }, { "epoch": 0.16304913477999147, "grad_norm": 0.38671875, "learning_rate": 0.00020807005932101642, "loss": 0.145, "step": 91958 }, { "epoch": 0.1630526809453013, "grad_norm": 0.40625, "learning_rate": 0.00020806210866614074, "loss": 0.1742, "step": 91960 }, { "epoch": 0.1630562271106111, "grad_norm": 0.275390625, "learning_rate": 0.00020805416191208337, "loss": 0.1888, "step": 91962 }, { "epoch": 0.16305977327592092, "grad_norm": 0.27734375, "learning_rate": 0.0002080462190588789, "loss": 0.1772, "step": 91964 }, { "epoch": 0.16306331944123073, "grad_norm": 0.19921875, "learning_rate": 0.00020803828010656217, "loss": 0.1357, "step": 91966 }, { "epoch": 0.16306686560654055, "grad_norm": 0.283203125, "learning_rate": 0.0002080303450551678, "loss": 0.2027, "step": 91968 }, { "epoch": 0.16307041177185036, "grad_norm": 0.75390625, "learning_rate": 0.0002080224139047307, "loss": 0.1366, "step": 91970 }, { "epoch": 0.16307395793716017, "grad_norm": 0.1875, "learning_rate": 0.00020801448665528545, "loss": 0.1228, "step": 91972 }, { "epoch": 0.16307750410247, "grad_norm": 3.671875, "learning_rate": 0.00020800656330686665, "loss": 0.3226, "step": 91974 }, { "epoch": 0.1630810502677798, "grad_norm": 0.451171875, "learning_rate": 0.00020799864385950896, "loss": 0.2299, "step": 91976 }, { "epoch": 0.16308459643308962, "grad_norm": 0.357421875, "learning_rate": 0.00020799072831324721, "loss": 0.1958, "step": 91978 }, { "epoch": 0.16308814259839943, "grad_norm": 0.69140625, "learning_rate": 0.00020798281666811578, "loss": 0.1535, "step": 91980 }, { "epoch": 0.16309168876370925, "grad_norm": 0.515625, "learning_rate": 0.00020797490892414934, "loss": 0.2161, "step": 91982 }, { "epoch": 0.16309523492901906, "grad_norm": 1.203125, "learning_rate": 0.00020796700508138256, "loss": 0.2597, "step": 91984 }, { "epoch": 0.16309878109432888, "grad_norm": 0.333984375, "learning_rate": 0.00020795910513984992, "loss": 0.1516, "step": 91986 }, { "epoch": 0.1631023272596387, "grad_norm": 0.66796875, "learning_rate": 0.0002079512090995861, "loss": 0.205, "step": 91988 }, { "epoch": 0.1631058734249485, "grad_norm": 1.5078125, "learning_rate": 0.00020794331696062535, "loss": 0.2031, "step": 91990 }, { "epoch": 0.16310941959025832, "grad_norm": 0.7265625, "learning_rate": 0.00020793542872300243, "loss": 0.1749, "step": 91992 }, { "epoch": 0.16311296575556813, "grad_norm": 0.404296875, "learning_rate": 0.00020792754438675177, "loss": 0.169, "step": 91994 }, { "epoch": 0.16311651192087795, "grad_norm": 0.41796875, "learning_rate": 0.0002079196639519079, "loss": 0.2272, "step": 91996 }, { "epoch": 0.16312005808618776, "grad_norm": 0.5078125, "learning_rate": 0.0002079117874185052, "loss": 0.4795, "step": 91998 }, { "epoch": 0.16312360425149758, "grad_norm": 0.376953125, "learning_rate": 0.00020790391478657812, "loss": 0.1308, "step": 92000 }, { "epoch": 0.1631271504168074, "grad_norm": 0.365234375, "learning_rate": 0.00020789604605616117, "loss": 0.1544, "step": 92002 }, { "epoch": 0.1631306965821172, "grad_norm": 0.474609375, "learning_rate": 0.0002078881812272888, "loss": 0.2176, "step": 92004 }, { "epoch": 0.16313424274742702, "grad_norm": 1.34375, "learning_rate": 0.0002078803202999952, "loss": 0.2793, "step": 92006 }, { "epoch": 0.16313778891273686, "grad_norm": 0.251953125, "learning_rate": 0.00020787246327431494, "loss": 0.1304, "step": 92008 }, { "epoch": 0.16314133507804668, "grad_norm": 0.9921875, "learning_rate": 0.00020786461015028226, "loss": 0.2001, "step": 92010 }, { "epoch": 0.1631448812433565, "grad_norm": 0.65625, "learning_rate": 0.0002078567609279316, "loss": 0.3947, "step": 92012 }, { "epoch": 0.1631484274086663, "grad_norm": 4.71875, "learning_rate": 0.00020784891560729724, "loss": 0.2812, "step": 92014 }, { "epoch": 0.16315197357397612, "grad_norm": 0.5546875, "learning_rate": 0.00020784107418841345, "loss": 0.2389, "step": 92016 }, { "epoch": 0.16315551973928594, "grad_norm": 1.265625, "learning_rate": 0.0002078332366713146, "loss": 0.1749, "step": 92018 }, { "epoch": 0.16315906590459575, "grad_norm": 0.24609375, "learning_rate": 0.00020782540305603505, "loss": 0.1688, "step": 92020 }, { "epoch": 0.16316261206990557, "grad_norm": 0.333984375, "learning_rate": 0.00020781757334260886, "loss": 0.2184, "step": 92022 }, { "epoch": 0.16316615823521538, "grad_norm": 0.302734375, "learning_rate": 0.00020780974753107034, "loss": 0.1722, "step": 92024 }, { "epoch": 0.1631697044005252, "grad_norm": 0.5546875, "learning_rate": 0.00020780192562145375, "loss": 0.1774, "step": 92026 }, { "epoch": 0.163173250565835, "grad_norm": 0.22265625, "learning_rate": 0.0002077941076137933, "loss": 0.1448, "step": 92028 }, { "epoch": 0.16317679673114482, "grad_norm": 0.357421875, "learning_rate": 0.00020778629350812317, "loss": 0.1551, "step": 92030 }, { "epoch": 0.16318034289645464, "grad_norm": 0.3671875, "learning_rate": 0.00020777848330447754, "loss": 0.1596, "step": 92032 }, { "epoch": 0.16318388906176445, "grad_norm": 0.51953125, "learning_rate": 0.0002077706770028906, "loss": 0.2035, "step": 92034 }, { "epoch": 0.16318743522707427, "grad_norm": 1.5703125, "learning_rate": 0.0002077628746033964, "loss": 0.2327, "step": 92036 }, { "epoch": 0.16319098139238408, "grad_norm": 0.337890625, "learning_rate": 0.00020775507610602914, "loss": 0.1741, "step": 92038 }, { "epoch": 0.1631945275576939, "grad_norm": 0.361328125, "learning_rate": 0.00020774728151082286, "loss": 0.1734, "step": 92040 }, { "epoch": 0.1631980737230037, "grad_norm": 0.306640625, "learning_rate": 0.00020773949081781174, "loss": 0.1891, "step": 92042 }, { "epoch": 0.16320161988831353, "grad_norm": 0.51953125, "learning_rate": 0.0002077317040270297, "loss": 0.1227, "step": 92044 }, { "epoch": 0.16320516605362334, "grad_norm": 1.109375, "learning_rate": 0.00020772392113851102, "loss": 0.1724, "step": 92046 }, { "epoch": 0.16320871221893316, "grad_norm": 0.208984375, "learning_rate": 0.00020771614215228955, "loss": 0.163, "step": 92048 }, { "epoch": 0.16321225838424297, "grad_norm": 0.6953125, "learning_rate": 0.00020770836706839944, "loss": 0.1925, "step": 92050 }, { "epoch": 0.16321580454955278, "grad_norm": 1.0546875, "learning_rate": 0.00020770059588687455, "loss": 0.2208, "step": 92052 }, { "epoch": 0.1632193507148626, "grad_norm": 0.77734375, "learning_rate": 0.00020769282860774897, "loss": 0.2741, "step": 92054 }, { "epoch": 0.1632228968801724, "grad_norm": 0.41015625, "learning_rate": 0.00020768506523105666, "loss": 0.1852, "step": 92056 }, { "epoch": 0.16322644304548223, "grad_norm": 0.419921875, "learning_rate": 0.0002076773057568315, "loss": 0.1454, "step": 92058 }, { "epoch": 0.16322998921079204, "grad_norm": 0.48828125, "learning_rate": 0.00020766955018510747, "loss": 0.1809, "step": 92060 }, { "epoch": 0.16323353537610186, "grad_norm": 0.251953125, "learning_rate": 0.00020766179851591855, "loss": 0.1405, "step": 92062 }, { "epoch": 0.16323708154141167, "grad_norm": 0.431640625, "learning_rate": 0.00020765405074929866, "loss": 0.2466, "step": 92064 }, { "epoch": 0.1632406277067215, "grad_norm": 0.369140625, "learning_rate": 0.00020764630688528143, "loss": 0.2996, "step": 92066 }, { "epoch": 0.1632441738720313, "grad_norm": 0.318359375, "learning_rate": 0.00020763856692390111, "loss": 0.1691, "step": 92068 }, { "epoch": 0.16324772003734112, "grad_norm": 0.4609375, "learning_rate": 0.0002076308308651913, "loss": 0.2238, "step": 92070 }, { "epoch": 0.16325126620265093, "grad_norm": 1.4140625, "learning_rate": 0.00020762309870918583, "loss": 0.1611, "step": 92072 }, { "epoch": 0.16325481236796074, "grad_norm": 0.359375, "learning_rate": 0.00020761537045591857, "loss": 0.1846, "step": 92074 }, { "epoch": 0.16325835853327056, "grad_norm": 0.35546875, "learning_rate": 0.00020760764610542335, "loss": 0.1556, "step": 92076 }, { "epoch": 0.16326190469858037, "grad_norm": 0.4765625, "learning_rate": 0.000207599925657734, "loss": 0.1851, "step": 92078 }, { "epoch": 0.1632654508638902, "grad_norm": 0.47265625, "learning_rate": 0.0002075922091128841, "loss": 0.1968, "step": 92080 }, { "epoch": 0.1632689970292, "grad_norm": 1.9375, "learning_rate": 0.0002075844964709075, "loss": 0.215, "step": 92082 }, { "epoch": 0.16327254319450982, "grad_norm": 0.5625, "learning_rate": 0.00020757678773183794, "loss": 0.1717, "step": 92084 }, { "epoch": 0.16327608935981963, "grad_norm": 0.62109375, "learning_rate": 0.00020756908289570924, "loss": 0.2065, "step": 92086 }, { "epoch": 0.16327963552512945, "grad_norm": 0.72265625, "learning_rate": 0.00020756138196255486, "loss": 0.2629, "step": 92088 }, { "epoch": 0.16328318169043926, "grad_norm": 0.55078125, "learning_rate": 0.0002075536849324086, "loss": 0.1522, "step": 92090 }, { "epoch": 0.16328672785574908, "grad_norm": 0.328125, "learning_rate": 0.00020754599180530426, "loss": 0.1321, "step": 92092 }, { "epoch": 0.1632902740210589, "grad_norm": 14.1875, "learning_rate": 0.00020753830258127525, "loss": 0.3636, "step": 92094 }, { "epoch": 0.1632938201863687, "grad_norm": 0.291015625, "learning_rate": 0.00020753061726035534, "loss": 0.2101, "step": 92096 }, { "epoch": 0.16329736635167855, "grad_norm": 0.62109375, "learning_rate": 0.00020752293584257807, "loss": 0.1473, "step": 92098 }, { "epoch": 0.16330091251698836, "grad_norm": 0.4765625, "learning_rate": 0.00020751525832797711, "loss": 0.1646, "step": 92100 }, { "epoch": 0.16330445868229818, "grad_norm": 0.25, "learning_rate": 0.00020750758471658586, "loss": 0.1887, "step": 92102 }, { "epoch": 0.163308004847608, "grad_norm": 0.6640625, "learning_rate": 0.0002074999150084382, "loss": 0.1637, "step": 92104 }, { "epoch": 0.1633115510129178, "grad_norm": 0.67578125, "learning_rate": 0.00020749224920356733, "loss": 0.1836, "step": 92106 }, { "epoch": 0.16331509717822762, "grad_norm": 0.3359375, "learning_rate": 0.00020748458730200702, "loss": 0.146, "step": 92108 }, { "epoch": 0.16331864334353743, "grad_norm": 5.34375, "learning_rate": 0.00020747692930379074, "loss": 0.178, "step": 92110 }, { "epoch": 0.16332218950884725, "grad_norm": 0.58984375, "learning_rate": 0.00020746927520895182, "loss": 0.1761, "step": 92112 }, { "epoch": 0.16332573567415706, "grad_norm": 0.337890625, "learning_rate": 0.00020746162501752397, "loss": 0.2898, "step": 92114 }, { "epoch": 0.16332928183946688, "grad_norm": 0.34765625, "learning_rate": 0.0002074539787295404, "loss": 0.1824, "step": 92116 }, { "epoch": 0.1633328280047767, "grad_norm": 0.8125, "learning_rate": 0.00020744633634503466, "loss": 0.2251, "step": 92118 }, { "epoch": 0.1633363741700865, "grad_norm": 0.494140625, "learning_rate": 0.00020743869786404035, "loss": 0.1632, "step": 92120 }, { "epoch": 0.16333992033539632, "grad_norm": 0.349609375, "learning_rate": 0.00020743106328659062, "loss": 0.1939, "step": 92122 }, { "epoch": 0.16334346650070614, "grad_norm": 0.482421875, "learning_rate": 0.00020742343261271885, "loss": 0.1662, "step": 92124 }, { "epoch": 0.16334701266601595, "grad_norm": 0.7265625, "learning_rate": 0.0002074158058424587, "loss": 0.2447, "step": 92126 }, { "epoch": 0.16335055883132577, "grad_norm": 1.578125, "learning_rate": 0.00020740818297584326, "loss": 0.2114, "step": 92128 }, { "epoch": 0.16335410499663558, "grad_norm": 0.345703125, "learning_rate": 0.0002074005640129059, "loss": 0.1045, "step": 92130 }, { "epoch": 0.1633576511619454, "grad_norm": 0.310546875, "learning_rate": 0.00020739294895368002, "loss": 0.176, "step": 92132 }, { "epoch": 0.1633611973272552, "grad_norm": 2.578125, "learning_rate": 0.00020738533779819895, "loss": 0.304, "step": 92134 }, { "epoch": 0.16336474349256502, "grad_norm": 0.94140625, "learning_rate": 0.0002073777305464959, "loss": 0.2259, "step": 92136 }, { "epoch": 0.16336828965787484, "grad_norm": 0.271484375, "learning_rate": 0.0002073701271986041, "loss": 0.1318, "step": 92138 }, { "epoch": 0.16337183582318465, "grad_norm": 0.28515625, "learning_rate": 0.00020736252775455693, "loss": 0.2123, "step": 92140 }, { "epoch": 0.16337538198849447, "grad_norm": 0.53515625, "learning_rate": 0.00020735493221438742, "loss": 0.264, "step": 92142 }, { "epoch": 0.16337892815380428, "grad_norm": 0.244140625, "learning_rate": 0.0002073473405781291, "loss": 0.1721, "step": 92144 }, { "epoch": 0.1633824743191141, "grad_norm": 0.2138671875, "learning_rate": 0.00020733975284581492, "loss": 0.2768, "step": 92146 }, { "epoch": 0.1633860204844239, "grad_norm": 0.2265625, "learning_rate": 0.0002073321690174782, "loss": 0.1665, "step": 92148 }, { "epoch": 0.16338956664973373, "grad_norm": 0.40625, "learning_rate": 0.000207324589093152, "loss": 0.1718, "step": 92150 }, { "epoch": 0.16339311281504354, "grad_norm": 0.2578125, "learning_rate": 0.0002073170130728695, "loss": 0.1866, "step": 92152 }, { "epoch": 0.16339665898035335, "grad_norm": 0.326171875, "learning_rate": 0.00020730944095666393, "loss": 0.1794, "step": 92154 }, { "epoch": 0.16340020514566317, "grad_norm": 0.185546875, "learning_rate": 0.0002073018727445682, "loss": 0.1579, "step": 92156 }, { "epoch": 0.16340375131097298, "grad_norm": 0.28125, "learning_rate": 0.0002072943084366156, "loss": 0.1703, "step": 92158 }, { "epoch": 0.1634072974762828, "grad_norm": 0.76953125, "learning_rate": 0.00020728674803283925, "loss": 0.1889, "step": 92160 }, { "epoch": 0.1634108436415926, "grad_norm": 0.27734375, "learning_rate": 0.00020727919153327198, "loss": 0.317, "step": 92162 }, { "epoch": 0.16341438980690243, "grad_norm": 0.734375, "learning_rate": 0.00020727163893794704, "loss": 0.1506, "step": 92164 }, { "epoch": 0.16341793597221224, "grad_norm": 0.306640625, "learning_rate": 0.0002072640902468973, "loss": 0.1832, "step": 92166 }, { "epoch": 0.16342148213752206, "grad_norm": 0.357421875, "learning_rate": 0.00020725654546015606, "loss": 0.217, "step": 92168 }, { "epoch": 0.16342502830283187, "grad_norm": 1.5703125, "learning_rate": 0.0002072490045777559, "loss": 0.2382, "step": 92170 }, { "epoch": 0.16342857446814169, "grad_norm": 0.90234375, "learning_rate": 0.00020724146759973022, "loss": 0.2004, "step": 92172 }, { "epoch": 0.1634321206334515, "grad_norm": 0.54296875, "learning_rate": 0.00020723393452611163, "loss": 0.1806, "step": 92174 }, { "epoch": 0.16343566679876131, "grad_norm": 2.953125, "learning_rate": 0.00020722640535693332, "loss": 0.2374, "step": 92176 }, { "epoch": 0.16343921296407113, "grad_norm": 1.4921875, "learning_rate": 0.00020721888009222802, "loss": 0.2408, "step": 92178 }, { "epoch": 0.16344275912938094, "grad_norm": 0.4765625, "learning_rate": 0.00020721135873202887, "loss": 0.2836, "step": 92180 }, { "epoch": 0.16344630529469076, "grad_norm": 0.5, "learning_rate": 0.0002072038412763686, "loss": 0.1851, "step": 92182 }, { "epoch": 0.16344985146000057, "grad_norm": 0.27734375, "learning_rate": 0.00020719632772528022, "loss": 0.219, "step": 92184 }, { "epoch": 0.1634533976253104, "grad_norm": 0.208984375, "learning_rate": 0.00020718881807879635, "loss": 0.1765, "step": 92186 }, { "epoch": 0.1634569437906202, "grad_norm": 0.23046875, "learning_rate": 0.00020718131233695, "loss": 0.1343, "step": 92188 }, { "epoch": 0.16346048995593004, "grad_norm": 1.828125, "learning_rate": 0.00020717381049977404, "loss": 0.3753, "step": 92190 }, { "epoch": 0.16346403612123986, "grad_norm": 0.7421875, "learning_rate": 0.00020716631256730123, "loss": 0.1875, "step": 92192 }, { "epoch": 0.16346758228654967, "grad_norm": 0.359375, "learning_rate": 0.00020715881853956437, "loss": 0.1601, "step": 92194 }, { "epoch": 0.1634711284518595, "grad_norm": 0.35546875, "learning_rate": 0.00020715132841659612, "loss": 0.1759, "step": 92196 }, { "epoch": 0.1634746746171693, "grad_norm": 0.57421875, "learning_rate": 0.00020714384219842934, "loss": 0.1424, "step": 92198 }, { "epoch": 0.16347822078247912, "grad_norm": 0.291015625, "learning_rate": 0.00020713635988509684, "loss": 0.2111, "step": 92200 }, { "epoch": 0.16348176694778893, "grad_norm": 0.64453125, "learning_rate": 0.00020712888147663122, "loss": 0.1391, "step": 92202 }, { "epoch": 0.16348531311309875, "grad_norm": 0.5625, "learning_rate": 0.00020712140697306524, "loss": 0.1593, "step": 92204 }, { "epoch": 0.16348885927840856, "grad_norm": 0.33984375, "learning_rate": 0.00020711393637443153, "loss": 0.1585, "step": 92206 }, { "epoch": 0.16349240544371837, "grad_norm": 0.703125, "learning_rate": 0.00020710646968076288, "loss": 0.2154, "step": 92208 }, { "epoch": 0.1634959516090282, "grad_norm": 0.162109375, "learning_rate": 0.00020709900689209185, "loss": 0.1197, "step": 92210 }, { "epoch": 0.163499497774338, "grad_norm": 0.1865234375, "learning_rate": 0.00020709154800845113, "loss": 0.4222, "step": 92212 }, { "epoch": 0.16350304393964782, "grad_norm": 0.251953125, "learning_rate": 0.00020708409302987326, "loss": 0.177, "step": 92214 }, { "epoch": 0.16350659010495763, "grad_norm": 1.109375, "learning_rate": 0.00020707664195639095, "loss": 0.1987, "step": 92216 }, { "epoch": 0.16351013627026745, "grad_norm": 0.3125, "learning_rate": 0.00020706919478803677, "loss": 0.1568, "step": 92218 }, { "epoch": 0.16351368243557726, "grad_norm": 0.349609375, "learning_rate": 0.00020706175152484317, "loss": 0.1517, "step": 92220 }, { "epoch": 0.16351722860088708, "grad_norm": 0.318359375, "learning_rate": 0.00020705431216684283, "loss": 0.2503, "step": 92222 }, { "epoch": 0.1635207747661969, "grad_norm": 0.359375, "learning_rate": 0.00020704687671406824, "loss": 0.3741, "step": 92224 }, { "epoch": 0.1635243209315067, "grad_norm": 0.326171875, "learning_rate": 0.00020703944516655198, "loss": 0.1675, "step": 92226 }, { "epoch": 0.16352786709681652, "grad_norm": 0.416015625, "learning_rate": 0.0002070320175243264, "loss": 0.174, "step": 92228 }, { "epoch": 0.16353141326212633, "grad_norm": 0.34375, "learning_rate": 0.0002070245937874241, "loss": 0.33, "step": 92230 }, { "epoch": 0.16353495942743615, "grad_norm": 0.58984375, "learning_rate": 0.00020701717395587752, "loss": 0.1921, "step": 92232 }, { "epoch": 0.16353850559274596, "grad_norm": 2.171875, "learning_rate": 0.00020700975802971918, "loss": 0.2669, "step": 92234 }, { "epoch": 0.16354205175805578, "grad_norm": 1.4453125, "learning_rate": 0.0002070023460089814, "loss": 0.2521, "step": 92236 }, { "epoch": 0.1635455979233656, "grad_norm": 0.58203125, "learning_rate": 0.00020699493789369666, "loss": 0.1648, "step": 92238 }, { "epoch": 0.1635491440886754, "grad_norm": 0.2890625, "learning_rate": 0.00020698753368389732, "loss": 0.1886, "step": 92240 }, { "epoch": 0.16355269025398522, "grad_norm": 1.7109375, "learning_rate": 0.00020698013337961584, "loss": 0.234, "step": 92242 }, { "epoch": 0.16355623641929504, "grad_norm": 1.625, "learning_rate": 0.00020697273698088445, "loss": 0.1941, "step": 92244 }, { "epoch": 0.16355978258460485, "grad_norm": 0.294921875, "learning_rate": 0.00020696534448773563, "loss": 0.1482, "step": 92246 }, { "epoch": 0.16356332874991467, "grad_norm": 0.875, "learning_rate": 0.00020695795590020162, "loss": 0.1716, "step": 92248 }, { "epoch": 0.16356687491522448, "grad_norm": 0.328125, "learning_rate": 0.00020695057121831474, "loss": 0.1829, "step": 92250 }, { "epoch": 0.1635704210805343, "grad_norm": 1.859375, "learning_rate": 0.0002069431904421074, "loss": 0.1707, "step": 92252 }, { "epoch": 0.1635739672458441, "grad_norm": 0.49609375, "learning_rate": 0.00020693581357161177, "loss": 0.1978, "step": 92254 }, { "epoch": 0.16357751341115392, "grad_norm": 0.369140625, "learning_rate": 0.00020692844060686002, "loss": 0.2187, "step": 92256 }, { "epoch": 0.16358105957646374, "grad_norm": 0.421875, "learning_rate": 0.0002069210715478847, "loss": 0.2138, "step": 92258 }, { "epoch": 0.16358460574177355, "grad_norm": 2.5625, "learning_rate": 0.00020691370639471776, "loss": 0.231, "step": 92260 }, { "epoch": 0.16358815190708337, "grad_norm": 8.625, "learning_rate": 0.00020690634514739157, "loss": 0.2157, "step": 92262 }, { "epoch": 0.16359169807239318, "grad_norm": 0.349609375, "learning_rate": 0.00020689898780593808, "loss": 0.1884, "step": 92264 }, { "epoch": 0.163595244237703, "grad_norm": 0.298828125, "learning_rate": 0.0002068916343703899, "loss": 0.1691, "step": 92266 }, { "epoch": 0.1635987904030128, "grad_norm": 0.84375, "learning_rate": 0.0002068842848407787, "loss": 0.211, "step": 92268 }, { "epoch": 0.16360233656832263, "grad_norm": 0.2392578125, "learning_rate": 0.00020687693921713697, "loss": 0.2417, "step": 92270 }, { "epoch": 0.16360588273363244, "grad_norm": 0.318359375, "learning_rate": 0.00020686959749949664, "loss": 0.1754, "step": 92272 }, { "epoch": 0.16360942889894226, "grad_norm": 1.03125, "learning_rate": 0.0002068622596878901, "loss": 0.2883, "step": 92274 }, { "epoch": 0.16361297506425207, "grad_norm": 0.5546875, "learning_rate": 0.00020685492578234905, "loss": 0.2125, "step": 92276 }, { "epoch": 0.16361652122956188, "grad_norm": 0.58203125, "learning_rate": 0.00020684759578290583, "loss": 0.1461, "step": 92278 }, { "epoch": 0.16362006739487173, "grad_norm": 0.94140625, "learning_rate": 0.00020684026968959232, "loss": 0.2913, "step": 92280 }, { "epoch": 0.16362361356018154, "grad_norm": 0.5, "learning_rate": 0.0002068329475024407, "loss": 0.1802, "step": 92282 }, { "epoch": 0.16362715972549136, "grad_norm": 0.38671875, "learning_rate": 0.00020682562922148308, "loss": 0.2107, "step": 92284 }, { "epoch": 0.16363070589080117, "grad_norm": 0.5546875, "learning_rate": 0.00020681831484675118, "loss": 0.1501, "step": 92286 }, { "epoch": 0.16363425205611098, "grad_norm": 0.326171875, "learning_rate": 0.00020681100437827722, "loss": 0.1712, "step": 92288 }, { "epoch": 0.1636377982214208, "grad_norm": 0.44921875, "learning_rate": 0.00020680369781609314, "loss": 0.1582, "step": 92290 }, { "epoch": 0.16364134438673061, "grad_norm": 0.42578125, "learning_rate": 0.0002067963951602308, "loss": 0.1456, "step": 92292 }, { "epoch": 0.16364489055204043, "grad_norm": 0.8125, "learning_rate": 0.00020678909641072224, "loss": 0.2005, "step": 92294 }, { "epoch": 0.16364843671735024, "grad_norm": 0.2353515625, "learning_rate": 0.00020678180156759925, "loss": 0.158, "step": 92296 }, { "epoch": 0.16365198288266006, "grad_norm": 0.27734375, "learning_rate": 0.0002067745106308939, "loss": 0.139, "step": 92298 }, { "epoch": 0.16365552904796987, "grad_norm": 1.03125, "learning_rate": 0.00020676722360063799, "loss": 0.2187, "step": 92300 }, { "epoch": 0.1636590752132797, "grad_norm": 0.439453125, "learning_rate": 0.00020675994047686332, "loss": 0.1889, "step": 92302 }, { "epoch": 0.1636626213785895, "grad_norm": 0.3828125, "learning_rate": 0.00020675266125960186, "loss": 0.2118, "step": 92304 }, { "epoch": 0.16366616754389932, "grad_norm": 0.392578125, "learning_rate": 0.00020674538594888539, "loss": 0.2105, "step": 92306 }, { "epoch": 0.16366971370920913, "grad_norm": 1.921875, "learning_rate": 0.00020673811454474577, "loss": 0.2399, "step": 92308 }, { "epoch": 0.16367325987451894, "grad_norm": 0.78125, "learning_rate": 0.0002067308470472147, "loss": 0.231, "step": 92310 }, { "epoch": 0.16367680603982876, "grad_norm": 0.26953125, "learning_rate": 0.00020672358345632405, "loss": 0.1529, "step": 92312 }, { "epoch": 0.16368035220513857, "grad_norm": 0.341796875, "learning_rate": 0.00020671632377210566, "loss": 0.1637, "step": 92314 }, { "epoch": 0.1636838983704484, "grad_norm": 3.734375, "learning_rate": 0.00020670906799459115, "loss": 0.2628, "step": 92316 }, { "epoch": 0.1636874445357582, "grad_norm": 0.8046875, "learning_rate": 0.00020670181612381227, "loss": 0.1727, "step": 92318 }, { "epoch": 0.16369099070106802, "grad_norm": 0.56640625, "learning_rate": 0.0002066945681598007, "loss": 0.1377, "step": 92320 }, { "epoch": 0.16369453686637783, "grad_norm": 0.291015625, "learning_rate": 0.00020668732410258821, "loss": 0.1554, "step": 92322 }, { "epoch": 0.16369808303168765, "grad_norm": 1.0625, "learning_rate": 0.00020668008395220662, "loss": 0.2188, "step": 92324 }, { "epoch": 0.16370162919699746, "grad_norm": 2.71875, "learning_rate": 0.0002066728477086873, "loss": 0.272, "step": 92326 }, { "epoch": 0.16370517536230728, "grad_norm": 0.337890625, "learning_rate": 0.00020666561537206215, "loss": 0.1664, "step": 92328 }, { "epoch": 0.1637087215276171, "grad_norm": 1.59375, "learning_rate": 0.00020665838694236254, "loss": 0.193, "step": 92330 }, { "epoch": 0.1637122676929269, "grad_norm": 1.6015625, "learning_rate": 0.00020665116241962029, "loss": 0.2786, "step": 92332 }, { "epoch": 0.16371581385823672, "grad_norm": 4.53125, "learning_rate": 0.00020664394180386697, "loss": 0.1903, "step": 92334 }, { "epoch": 0.16371936002354653, "grad_norm": 0.416015625, "learning_rate": 0.00020663672509513406, "loss": 0.2227, "step": 92336 }, { "epoch": 0.16372290618885635, "grad_norm": 1.3671875, "learning_rate": 0.0002066295122934532, "loss": 0.4109, "step": 92338 }, { "epoch": 0.16372645235416616, "grad_norm": 0.48828125, "learning_rate": 0.00020662230339885595, "loss": 0.188, "step": 92340 }, { "epoch": 0.16372999851947598, "grad_norm": 0.29296875, "learning_rate": 0.00020661509841137383, "loss": 0.2121, "step": 92342 }, { "epoch": 0.1637335446847858, "grad_norm": 0.84375, "learning_rate": 0.0002066078973310383, "loss": 0.2091, "step": 92344 }, { "epoch": 0.1637370908500956, "grad_norm": 0.55859375, "learning_rate": 0.00020660070015788084, "loss": 0.1551, "step": 92346 }, { "epoch": 0.16374063701540542, "grad_norm": 0.412109375, "learning_rate": 0.00020659350689193308, "loss": 0.1387, "step": 92348 }, { "epoch": 0.16374418318071524, "grad_norm": 0.2099609375, "learning_rate": 0.00020658631753322633, "loss": 0.1898, "step": 92350 }, { "epoch": 0.16374772934602505, "grad_norm": 0.478515625, "learning_rate": 0.00020657913208179196, "loss": 0.1617, "step": 92352 }, { "epoch": 0.16375127551133487, "grad_norm": 0.5625, "learning_rate": 0.00020657195053766167, "loss": 0.1694, "step": 92354 }, { "epoch": 0.16375482167664468, "grad_norm": 0.31640625, "learning_rate": 0.00020656477290086655, "loss": 0.2559, "step": 92356 }, { "epoch": 0.1637583678419545, "grad_norm": 0.201171875, "learning_rate": 0.00020655759917143828, "loss": 0.2305, "step": 92358 }, { "epoch": 0.1637619140072643, "grad_norm": 1.0703125, "learning_rate": 0.0002065504293494081, "loss": 0.2821, "step": 92360 }, { "epoch": 0.16376546017257412, "grad_norm": 0.5390625, "learning_rate": 0.00020654326343480718, "loss": 0.2216, "step": 92362 }, { "epoch": 0.16376900633788394, "grad_norm": 0.4375, "learning_rate": 0.00020653610142766724, "loss": 0.2066, "step": 92364 }, { "epoch": 0.16377255250319375, "grad_norm": 0.23828125, "learning_rate": 0.0002065289433280193, "loss": 0.1344, "step": 92366 }, { "epoch": 0.16377609866850357, "grad_norm": 0.34765625, "learning_rate": 0.0002065217891358949, "loss": 0.1672, "step": 92368 }, { "epoch": 0.1637796448338134, "grad_norm": 0.60546875, "learning_rate": 0.00020651463885132507, "loss": 0.1998, "step": 92370 }, { "epoch": 0.16378319099912322, "grad_norm": 0.95703125, "learning_rate": 0.0002065074924743412, "loss": 0.2252, "step": 92372 }, { "epoch": 0.16378673716443304, "grad_norm": 0.369140625, "learning_rate": 0.00020650035000497463, "loss": 0.2022, "step": 92374 }, { "epoch": 0.16379028332974285, "grad_norm": 2.046875, "learning_rate": 0.0002064932114432565, "loss": 0.4562, "step": 92376 }, { "epoch": 0.16379382949505267, "grad_norm": 1.140625, "learning_rate": 0.00020648607678921802, "loss": 0.1966, "step": 92378 }, { "epoch": 0.16379737566036248, "grad_norm": 0.2119140625, "learning_rate": 0.00020647894604289052, "loss": 0.1933, "step": 92380 }, { "epoch": 0.1638009218256723, "grad_norm": 0.17578125, "learning_rate": 0.00020647181920430501, "loss": 0.1674, "step": 92382 }, { "epoch": 0.1638044679909821, "grad_norm": 0.380859375, "learning_rate": 0.00020646469627349275, "loss": 0.1866, "step": 92384 }, { "epoch": 0.16380801415629193, "grad_norm": 0.34765625, "learning_rate": 0.00020645757725048486, "loss": 0.1584, "step": 92386 }, { "epoch": 0.16381156032160174, "grad_norm": 0.2578125, "learning_rate": 0.00020645046213531252, "loss": 0.1239, "step": 92388 }, { "epoch": 0.16381510648691155, "grad_norm": 0.216796875, "learning_rate": 0.00020644335092800692, "loss": 0.1692, "step": 92390 }, { "epoch": 0.16381865265222137, "grad_norm": 0.3203125, "learning_rate": 0.00020643624362859893, "loss": 0.1626, "step": 92392 }, { "epoch": 0.16382219881753118, "grad_norm": 0.431640625, "learning_rate": 0.0002064291402371198, "loss": 0.4426, "step": 92394 }, { "epoch": 0.163825744982841, "grad_norm": 0.41015625, "learning_rate": 0.0002064220407536006, "loss": 0.1723, "step": 92396 }, { "epoch": 0.1638292911481508, "grad_norm": 0.40625, "learning_rate": 0.00020641494517807226, "loss": 0.192, "step": 92398 }, { "epoch": 0.16383283731346063, "grad_norm": 0.408203125, "learning_rate": 0.00020640785351056597, "loss": 0.1721, "step": 92400 }, { "epoch": 0.16383638347877044, "grad_norm": 0.51171875, "learning_rate": 0.00020640076575111261, "loss": 0.1814, "step": 92402 }, { "epoch": 0.16383992964408026, "grad_norm": 1.7265625, "learning_rate": 0.0002063936818997433, "loss": 0.1355, "step": 92404 }, { "epoch": 0.16384347580939007, "grad_norm": 0.7109375, "learning_rate": 0.00020638660195648894, "loss": 0.2101, "step": 92406 }, { "epoch": 0.16384702197469989, "grad_norm": 0.3046875, "learning_rate": 0.00020637952592138047, "loss": 0.1634, "step": 92408 }, { "epoch": 0.1638505681400097, "grad_norm": 0.19140625, "learning_rate": 0.00020637245379444903, "loss": 0.1586, "step": 92410 }, { "epoch": 0.16385411430531951, "grad_norm": 0.478515625, "learning_rate": 0.0002063653855757252, "loss": 0.1817, "step": 92412 }, { "epoch": 0.16385766047062933, "grad_norm": 0.388671875, "learning_rate": 0.0002063583212652402, "loss": 0.2399, "step": 92414 }, { "epoch": 0.16386120663593914, "grad_norm": 0.392578125, "learning_rate": 0.00020635126086302475, "loss": 0.1861, "step": 92416 }, { "epoch": 0.16386475280124896, "grad_norm": 0.46875, "learning_rate": 0.00020634420436910984, "loss": 0.1659, "step": 92418 }, { "epoch": 0.16386829896655877, "grad_norm": 0.322265625, "learning_rate": 0.00020633715178352622, "loss": 0.1908, "step": 92420 }, { "epoch": 0.1638718451318686, "grad_norm": 0.58203125, "learning_rate": 0.0002063301031063049, "loss": 0.1472, "step": 92422 }, { "epoch": 0.1638753912971784, "grad_norm": 0.25390625, "learning_rate": 0.0002063230583374764, "loss": 0.2321, "step": 92424 }, { "epoch": 0.16387893746248822, "grad_norm": 0.78125, "learning_rate": 0.0002063160174770719, "loss": 0.1846, "step": 92426 }, { "epoch": 0.16388248362779803, "grad_norm": 0.71484375, "learning_rate": 0.00020630898052512205, "loss": 0.1622, "step": 92428 }, { "epoch": 0.16388602979310785, "grad_norm": 0.490234375, "learning_rate": 0.00020630194748165757, "loss": 0.1731, "step": 92430 }, { "epoch": 0.16388957595841766, "grad_norm": 0.21484375, "learning_rate": 0.00020629491834670927, "loss": 0.2185, "step": 92432 }, { "epoch": 0.16389312212372747, "grad_norm": 2.296875, "learning_rate": 0.00020628789312030773, "loss": 0.1946, "step": 92434 }, { "epoch": 0.1638966682890373, "grad_norm": 0.205078125, "learning_rate": 0.000206280871802484, "loss": 0.2596, "step": 92436 }, { "epoch": 0.1639002144543471, "grad_norm": 0.26953125, "learning_rate": 0.0002062738543932685, "loss": 0.1605, "step": 92438 }, { "epoch": 0.16390376061965692, "grad_norm": 1.1796875, "learning_rate": 0.00020626684089269207, "loss": 0.2099, "step": 92440 }, { "epoch": 0.16390730678496673, "grad_norm": 0.263671875, "learning_rate": 0.00020625983130078527, "loss": 0.217, "step": 92442 }, { "epoch": 0.16391085295027655, "grad_norm": 0.1806640625, "learning_rate": 0.00020625282561757892, "loss": 0.1406, "step": 92444 }, { "epoch": 0.16391439911558636, "grad_norm": 0.30078125, "learning_rate": 0.0002062458238431035, "loss": 0.2032, "step": 92446 }, { "epoch": 0.16391794528089618, "grad_norm": 1.1875, "learning_rate": 0.0002062388259773897, "loss": 0.1591, "step": 92448 }, { "epoch": 0.163921491446206, "grad_norm": 0.890625, "learning_rate": 0.0002062318320204681, "loss": 0.281, "step": 92450 }, { "epoch": 0.1639250376115158, "grad_norm": 0.39453125, "learning_rate": 0.00020622484197236924, "loss": 0.1798, "step": 92452 }, { "epoch": 0.16392858377682562, "grad_norm": 0.2080078125, "learning_rate": 0.00020621785583312385, "loss": 0.2088, "step": 92454 }, { "epoch": 0.16393212994213543, "grad_norm": 0.4921875, "learning_rate": 0.00020621087360276247, "loss": 0.2424, "step": 92456 }, { "epoch": 0.16393567610744525, "grad_norm": 0.19140625, "learning_rate": 0.00020620389528131546, "loss": 0.168, "step": 92458 }, { "epoch": 0.16393922227275506, "grad_norm": 0.3125, "learning_rate": 0.00020619692086881335, "loss": 0.2166, "step": 92460 }, { "epoch": 0.1639427684380649, "grad_norm": 0.28515625, "learning_rate": 0.00020618995036528686, "loss": 0.1565, "step": 92462 }, { "epoch": 0.16394631460337472, "grad_norm": 0.88671875, "learning_rate": 0.0002061829837707663, "loss": 0.2525, "step": 92464 }, { "epoch": 0.16394986076868454, "grad_norm": 0.30859375, "learning_rate": 0.00020617602108528215, "loss": 0.1824, "step": 92466 }, { "epoch": 0.16395340693399435, "grad_norm": 21.625, "learning_rate": 0.00020616906230886493, "loss": 0.1919, "step": 92468 }, { "epoch": 0.16395695309930416, "grad_norm": 1.1640625, "learning_rate": 0.000206162107441545, "loss": 0.1991, "step": 92470 }, { "epoch": 0.16396049926461398, "grad_norm": 0.408203125, "learning_rate": 0.00020615515648335285, "loss": 0.1352, "step": 92472 }, { "epoch": 0.1639640454299238, "grad_norm": 0.189453125, "learning_rate": 0.00020614820943431873, "loss": 0.1243, "step": 92474 }, { "epoch": 0.1639675915952336, "grad_norm": 0.2333984375, "learning_rate": 0.00020614126629447325, "loss": 0.137, "step": 92476 }, { "epoch": 0.16397113776054342, "grad_norm": 0.921875, "learning_rate": 0.00020613432706384656, "loss": 0.1859, "step": 92478 }, { "epoch": 0.16397468392585324, "grad_norm": 0.671875, "learning_rate": 0.00020612739174246915, "loss": 0.1864, "step": 92480 }, { "epoch": 0.16397823009116305, "grad_norm": 0.46875, "learning_rate": 0.00020612046033037127, "loss": 0.2471, "step": 92482 }, { "epoch": 0.16398177625647287, "grad_norm": 0.380859375, "learning_rate": 0.00020611353282758336, "loss": 0.1815, "step": 92484 }, { "epoch": 0.16398532242178268, "grad_norm": 0.4140625, "learning_rate": 0.0002061066092341356, "loss": 0.1459, "step": 92486 }, { "epoch": 0.1639888685870925, "grad_norm": 0.291015625, "learning_rate": 0.00020609968955005825, "loss": 0.1643, "step": 92488 }, { "epoch": 0.1639924147524023, "grad_norm": 0.37890625, "learning_rate": 0.00020609277377538164, "loss": 0.1872, "step": 92490 }, { "epoch": 0.16399596091771212, "grad_norm": 0.515625, "learning_rate": 0.00020608586191013599, "loss": 0.1446, "step": 92492 }, { "epoch": 0.16399950708302194, "grad_norm": 0.57421875, "learning_rate": 0.00020607895395435158, "loss": 0.146, "step": 92494 }, { "epoch": 0.16400305324833175, "grad_norm": 0.7734375, "learning_rate": 0.0002060720499080585, "loss": 0.1403, "step": 92496 }, { "epoch": 0.16400659941364157, "grad_norm": 1.1796875, "learning_rate": 0.00020606514977128705, "loss": 0.2247, "step": 92498 }, { "epoch": 0.16401014557895138, "grad_norm": 0.310546875, "learning_rate": 0.00020605825354406737, "loss": 0.1559, "step": 92500 }, { "epoch": 0.1640136917442612, "grad_norm": 0.2119140625, "learning_rate": 0.00020605136122642964, "loss": 0.1611, "step": 92502 }, { "epoch": 0.164017237909571, "grad_norm": 0.228515625, "learning_rate": 0.00020604447281840396, "loss": 0.1451, "step": 92504 }, { "epoch": 0.16402078407488083, "grad_norm": 0.345703125, "learning_rate": 0.0002060375883200205, "loss": 0.1819, "step": 92506 }, { "epoch": 0.16402433024019064, "grad_norm": 0.333984375, "learning_rate": 0.0002060307077313094, "loss": 0.1524, "step": 92508 }, { "epoch": 0.16402787640550046, "grad_norm": 0.2197265625, "learning_rate": 0.00020602383105230068, "loss": 0.1222, "step": 92510 }, { "epoch": 0.16403142257081027, "grad_norm": 0.9453125, "learning_rate": 0.0002060169582830245, "loss": 0.1763, "step": 92512 }, { "epoch": 0.16403496873612008, "grad_norm": 0.283203125, "learning_rate": 0.0002060100894235107, "loss": 0.1212, "step": 92514 }, { "epoch": 0.1640385149014299, "grad_norm": 0.546875, "learning_rate": 0.00020600322447378953, "loss": 0.1919, "step": 92516 }, { "epoch": 0.16404206106673971, "grad_norm": 0.2119140625, "learning_rate": 0.000205996363433891, "loss": 0.1768, "step": 92518 }, { "epoch": 0.16404560723204953, "grad_norm": 0.87109375, "learning_rate": 0.00020598950630384513, "loss": 0.1542, "step": 92520 }, { "epoch": 0.16404915339735934, "grad_norm": 0.2041015625, "learning_rate": 0.0002059826530836818, "loss": 0.2313, "step": 92522 }, { "epoch": 0.16405269956266916, "grad_norm": 0.203125, "learning_rate": 0.000205975803773431, "loss": 0.1785, "step": 92524 }, { "epoch": 0.16405624572797897, "grad_norm": 0.2421875, "learning_rate": 0.0002059689583731227, "loss": 0.1891, "step": 92526 }, { "epoch": 0.1640597918932888, "grad_norm": 0.451171875, "learning_rate": 0.00020596211688278697, "loss": 0.1509, "step": 92528 }, { "epoch": 0.1640633380585986, "grad_norm": 0.333984375, "learning_rate": 0.0002059552793024536, "loss": 0.1842, "step": 92530 }, { "epoch": 0.16406688422390842, "grad_norm": 1.5703125, "learning_rate": 0.0002059484456321524, "loss": 0.2034, "step": 92532 }, { "epoch": 0.16407043038921823, "grad_norm": 0.5546875, "learning_rate": 0.00020594161587191354, "loss": 0.1952, "step": 92534 }, { "epoch": 0.16407397655452804, "grad_norm": 0.3203125, "learning_rate": 0.00020593479002176652, "loss": 0.1447, "step": 92536 }, { "epoch": 0.16407752271983786, "grad_norm": 0.72265625, "learning_rate": 0.00020592796808174162, "loss": 0.2498, "step": 92538 }, { "epoch": 0.16408106888514767, "grad_norm": 0.349609375, "learning_rate": 0.00020592115005186825, "loss": 0.1562, "step": 92540 }, { "epoch": 0.1640846150504575, "grad_norm": 1.359375, "learning_rate": 0.00020591433593217657, "loss": 0.1847, "step": 92542 }, { "epoch": 0.1640881612157673, "grad_norm": 0.6796875, "learning_rate": 0.00020590752572269622, "loss": 0.1826, "step": 92544 }, { "epoch": 0.16409170738107712, "grad_norm": 0.4296875, "learning_rate": 0.000205900719423457, "loss": 0.1562, "step": 92546 }, { "epoch": 0.16409525354638693, "grad_norm": 0.458984375, "learning_rate": 0.0002058939170344886, "loss": 0.1312, "step": 92548 }, { "epoch": 0.16409879971169675, "grad_norm": 0.2734375, "learning_rate": 0.00020588711855582087, "loss": 0.1521, "step": 92550 }, { "epoch": 0.1641023458770066, "grad_norm": 0.53125, "learning_rate": 0.00020588032398748352, "loss": 0.1129, "step": 92552 }, { "epoch": 0.1641058920423164, "grad_norm": 0.341796875, "learning_rate": 0.00020587353332950637, "loss": 0.1461, "step": 92554 }, { "epoch": 0.16410943820762622, "grad_norm": 0.7890625, "learning_rate": 0.00020586674658191895, "loss": 0.2467, "step": 92556 }, { "epoch": 0.16411298437293603, "grad_norm": 0.3203125, "learning_rate": 0.00020585996374475094, "loss": 0.1633, "step": 92558 }, { "epoch": 0.16411653053824585, "grad_norm": 0.6640625, "learning_rate": 0.00020585318481803206, "loss": 0.2072, "step": 92560 }, { "epoch": 0.16412007670355566, "grad_norm": 0.267578125, "learning_rate": 0.00020584640980179204, "loss": 0.1374, "step": 92562 }, { "epoch": 0.16412362286886548, "grad_norm": 0.49609375, "learning_rate": 0.00020583963869606054, "loss": 0.1942, "step": 92564 }, { "epoch": 0.1641271690341753, "grad_norm": 0.43359375, "learning_rate": 0.00020583287150086695, "loss": 0.1896, "step": 92566 }, { "epoch": 0.1641307151994851, "grad_norm": 0.373046875, "learning_rate": 0.00020582610821624086, "loss": 0.1581, "step": 92568 }, { "epoch": 0.16413426136479492, "grad_norm": 0.34765625, "learning_rate": 0.00020581934884221219, "loss": 0.1441, "step": 92570 }, { "epoch": 0.16413780753010473, "grad_norm": 0.671875, "learning_rate": 0.00020581259337881019, "loss": 0.2688, "step": 92572 }, { "epoch": 0.16414135369541455, "grad_norm": 0.26953125, "learning_rate": 0.00020580584182606454, "loss": 0.178, "step": 92574 }, { "epoch": 0.16414489986072436, "grad_norm": 0.41015625, "learning_rate": 0.00020579909418400472, "loss": 0.1331, "step": 92576 }, { "epoch": 0.16414844602603418, "grad_norm": 0.9453125, "learning_rate": 0.0002057923504526603, "loss": 0.2193, "step": 92578 }, { "epoch": 0.164151992191344, "grad_norm": 0.6171875, "learning_rate": 0.00020578561063206056, "loss": 0.2459, "step": 92580 }, { "epoch": 0.1641555383566538, "grad_norm": 0.4296875, "learning_rate": 0.00020577887472223526, "loss": 0.1965, "step": 92582 }, { "epoch": 0.16415908452196362, "grad_norm": 0.353515625, "learning_rate": 0.0002057721427232137, "loss": 0.1543, "step": 92584 }, { "epoch": 0.16416263068727344, "grad_norm": 2.28125, "learning_rate": 0.00020576541463502532, "loss": 0.1711, "step": 92586 }, { "epoch": 0.16416617685258325, "grad_norm": 0.400390625, "learning_rate": 0.00020575869045769967, "loss": 0.1811, "step": 92588 }, { "epoch": 0.16416972301789307, "grad_norm": 0.92578125, "learning_rate": 0.00020575197019126603, "loss": 0.1616, "step": 92590 }, { "epoch": 0.16417326918320288, "grad_norm": 0.2578125, "learning_rate": 0.00020574525383575378, "loss": 0.1583, "step": 92592 }, { "epoch": 0.1641768153485127, "grad_norm": 0.5546875, "learning_rate": 0.00020573854139119242, "loss": 0.2169, "step": 92594 }, { "epoch": 0.1641803615138225, "grad_norm": 0.51953125, "learning_rate": 0.00020573183285761126, "loss": 0.2069, "step": 92596 }, { "epoch": 0.16418390767913232, "grad_norm": 0.80859375, "learning_rate": 0.00020572512823503953, "loss": 0.2138, "step": 92598 }, { "epoch": 0.16418745384444214, "grad_norm": 0.384765625, "learning_rate": 0.0002057184275235066, "loss": 0.1864, "step": 92600 }, { "epoch": 0.16419100000975195, "grad_norm": 0.58203125, "learning_rate": 0.00020571173072304194, "loss": 0.1641, "step": 92602 }, { "epoch": 0.16419454617506177, "grad_norm": 0.72265625, "learning_rate": 0.00020570503783367463, "loss": 0.1731, "step": 92604 }, { "epoch": 0.16419809234037158, "grad_norm": 0.84765625, "learning_rate": 0.000205698348855434, "loss": 0.1877, "step": 92606 }, { "epoch": 0.1642016385056814, "grad_norm": 0.2236328125, "learning_rate": 0.0002056916637883493, "loss": 0.1844, "step": 92608 }, { "epoch": 0.1642051846709912, "grad_norm": 0.61328125, "learning_rate": 0.0002056849826324499, "loss": 0.2599, "step": 92610 }, { "epoch": 0.16420873083630103, "grad_norm": 0.392578125, "learning_rate": 0.00020567830538776482, "loss": 0.3088, "step": 92612 }, { "epoch": 0.16421227700161084, "grad_norm": 1.59375, "learning_rate": 0.00020567163205432337, "loss": 0.2577, "step": 92614 }, { "epoch": 0.16421582316692065, "grad_norm": 0.46484375, "learning_rate": 0.00020566496263215464, "loss": 0.23, "step": 92616 }, { "epoch": 0.16421936933223047, "grad_norm": 0.28515625, "learning_rate": 0.00020565829712128797, "loss": 0.1396, "step": 92618 }, { "epoch": 0.16422291549754028, "grad_norm": 0.365234375, "learning_rate": 0.00020565163552175238, "loss": 0.1908, "step": 92620 }, { "epoch": 0.1642264616628501, "grad_norm": 0.423828125, "learning_rate": 0.00020564497783357702, "loss": 0.1373, "step": 92622 }, { "epoch": 0.1642300078281599, "grad_norm": 0.3984375, "learning_rate": 0.0002056383240567911, "loss": 0.1479, "step": 92624 }, { "epoch": 0.16423355399346973, "grad_norm": 0.36328125, "learning_rate": 0.00020563167419142354, "loss": 0.2541, "step": 92626 }, { "epoch": 0.16423710015877954, "grad_norm": 0.267578125, "learning_rate": 0.00020562502823750353, "loss": 0.1774, "step": 92628 }, { "epoch": 0.16424064632408936, "grad_norm": 0.28125, "learning_rate": 0.0002056183861950602, "loss": 0.1233, "step": 92630 }, { "epoch": 0.16424419248939917, "grad_norm": 0.2333984375, "learning_rate": 0.00020561174806412244, "loss": 0.1618, "step": 92632 }, { "epoch": 0.16424773865470899, "grad_norm": 3.828125, "learning_rate": 0.00020560511384471952, "loss": 0.409, "step": 92634 }, { "epoch": 0.1642512848200188, "grad_norm": 0.2119140625, "learning_rate": 0.00020559848353688017, "loss": 0.1666, "step": 92636 }, { "epoch": 0.16425483098532861, "grad_norm": 0.212890625, "learning_rate": 0.00020559185714063365, "loss": 0.1578, "step": 92638 }, { "epoch": 0.16425837715063843, "grad_norm": 0.26953125, "learning_rate": 0.00020558523465600864, "loss": 0.2029, "step": 92640 }, { "epoch": 0.16426192331594827, "grad_norm": 0.4453125, "learning_rate": 0.00020557861608303438, "loss": 0.1728, "step": 92642 }, { "epoch": 0.1642654694812581, "grad_norm": 0.98046875, "learning_rate": 0.00020557200142173968, "loss": 0.2008, "step": 92644 }, { "epoch": 0.1642690156465679, "grad_norm": 0.2578125, "learning_rate": 0.0002055653906721535, "loss": 0.1495, "step": 92646 }, { "epoch": 0.16427256181187772, "grad_norm": 0.5625, "learning_rate": 0.0002055587838343048, "loss": 0.1535, "step": 92648 }, { "epoch": 0.16427610797718753, "grad_norm": 0.357421875, "learning_rate": 0.00020555218090822242, "loss": 0.2, "step": 92650 }, { "epoch": 0.16427965414249734, "grad_norm": 0.61328125, "learning_rate": 0.0002055455818939352, "loss": 0.2381, "step": 92652 }, { "epoch": 0.16428320030780716, "grad_norm": 1.1015625, "learning_rate": 0.00020553898679147206, "loss": 0.3305, "step": 92654 }, { "epoch": 0.16428674647311697, "grad_norm": 1.671875, "learning_rate": 0.00020553239560086185, "loss": 0.2095, "step": 92656 }, { "epoch": 0.1642902926384268, "grad_norm": 0.193359375, "learning_rate": 0.00020552580832213334, "loss": 0.1841, "step": 92658 }, { "epoch": 0.1642938388037366, "grad_norm": 0.27734375, "learning_rate": 0.00020551922495531548, "loss": 0.1467, "step": 92660 }, { "epoch": 0.16429738496904642, "grad_norm": 0.30078125, "learning_rate": 0.00020551264550043675, "loss": 0.1901, "step": 92662 }, { "epoch": 0.16430093113435623, "grad_norm": 0.53125, "learning_rate": 0.0002055060699575263, "loss": 0.1718, "step": 92664 }, { "epoch": 0.16430447729966605, "grad_norm": 0.283203125, "learning_rate": 0.00020549949832661263, "loss": 0.1333, "step": 92666 }, { "epoch": 0.16430802346497586, "grad_norm": 0.3984375, "learning_rate": 0.0002054929306077246, "loss": 0.1792, "step": 92668 }, { "epoch": 0.16431156963028568, "grad_norm": 0.859375, "learning_rate": 0.00020548636680089083, "loss": 0.1646, "step": 92670 }, { "epoch": 0.1643151157955955, "grad_norm": 0.55859375, "learning_rate": 0.00020547980690614024, "loss": 0.2513, "step": 92672 }, { "epoch": 0.1643186619609053, "grad_norm": 0.9921875, "learning_rate": 0.00020547325092350129, "loss": 0.2151, "step": 92674 }, { "epoch": 0.16432220812621512, "grad_norm": 1.328125, "learning_rate": 0.00020546669885300274, "loss": 0.1758, "step": 92676 }, { "epoch": 0.16432575429152493, "grad_norm": 0.5234375, "learning_rate": 0.00020546015069467324, "loss": 0.1902, "step": 92678 }, { "epoch": 0.16432930045683475, "grad_norm": 0.65234375, "learning_rate": 0.00020545360644854145, "loss": 0.1959, "step": 92680 }, { "epoch": 0.16433284662214456, "grad_norm": 0.23046875, "learning_rate": 0.0002054470661146359, "loss": 0.216, "step": 92682 }, { "epoch": 0.16433639278745438, "grad_norm": 0.859375, "learning_rate": 0.00020544052969298534, "loss": 0.2096, "step": 92684 }, { "epoch": 0.1643399389527642, "grad_norm": 0.404296875, "learning_rate": 0.00020543399718361832, "loss": 0.153, "step": 92686 }, { "epoch": 0.164343485118074, "grad_norm": 0.375, "learning_rate": 0.00020542746858656324, "loss": 0.1516, "step": 92688 }, { "epoch": 0.16434703128338382, "grad_norm": 0.4296875, "learning_rate": 0.00020542094390184893, "loss": 0.1992, "step": 92690 }, { "epoch": 0.16435057744869364, "grad_norm": 0.244140625, "learning_rate": 0.00020541442312950364, "loss": 0.1591, "step": 92692 }, { "epoch": 0.16435412361400345, "grad_norm": 0.69921875, "learning_rate": 0.00020540790626955615, "loss": 0.257, "step": 92694 }, { "epoch": 0.16435766977931326, "grad_norm": 0.25, "learning_rate": 0.00020540139332203477, "loss": 0.1637, "step": 92696 }, { "epoch": 0.16436121594462308, "grad_norm": 0.98828125, "learning_rate": 0.00020539488428696802, "loss": 0.1715, "step": 92698 }, { "epoch": 0.1643647621099329, "grad_norm": 0.4375, "learning_rate": 0.00020538837916438444, "loss": 0.2988, "step": 92700 }, { "epoch": 0.1643683082752427, "grad_norm": 0.423828125, "learning_rate": 0.00020538187795431244, "loss": 0.1579, "step": 92702 }, { "epoch": 0.16437185444055252, "grad_norm": 0.369140625, "learning_rate": 0.00020537538065678047, "loss": 0.1807, "step": 92704 }, { "epoch": 0.16437540060586234, "grad_norm": 0.423828125, "learning_rate": 0.00020536888727181683, "loss": 0.1695, "step": 92706 }, { "epoch": 0.16437894677117215, "grad_norm": 0.3984375, "learning_rate": 0.00020536239779945004, "loss": 0.2062, "step": 92708 }, { "epoch": 0.16438249293648197, "grad_norm": 0.89453125, "learning_rate": 0.00020535591223970858, "loss": 0.1584, "step": 92710 }, { "epoch": 0.16438603910179178, "grad_norm": 0.49609375, "learning_rate": 0.00020534943059262066, "loss": 0.2448, "step": 92712 }, { "epoch": 0.1643895852671016, "grad_norm": 0.31640625, "learning_rate": 0.00020534295285821457, "loss": 0.1483, "step": 92714 }, { "epoch": 0.1643931314324114, "grad_norm": 0.546875, "learning_rate": 0.00020533647903651865, "loss": 0.1203, "step": 92716 }, { "epoch": 0.16439667759772122, "grad_norm": 0.2041015625, "learning_rate": 0.0002053300091275615, "loss": 0.1965, "step": 92718 }, { "epoch": 0.16440022376303104, "grad_norm": 0.314453125, "learning_rate": 0.00020532354313137108, "loss": 0.1803, "step": 92720 }, { "epoch": 0.16440376992834085, "grad_norm": 0.392578125, "learning_rate": 0.0002053170810479758, "loss": 0.1821, "step": 92722 }, { "epoch": 0.16440731609365067, "grad_norm": 0.6015625, "learning_rate": 0.00020531062287740387, "loss": 0.2154, "step": 92724 }, { "epoch": 0.16441086225896048, "grad_norm": 0.314453125, "learning_rate": 0.0002053041686196837, "loss": 0.1883, "step": 92726 }, { "epoch": 0.1644144084242703, "grad_norm": 0.87890625, "learning_rate": 0.00020529771827484326, "loss": 0.2044, "step": 92728 }, { "epoch": 0.1644179545895801, "grad_norm": 0.44921875, "learning_rate": 0.000205291271842911, "loss": 0.1586, "step": 92730 }, { "epoch": 0.16442150075488993, "grad_norm": 0.232421875, "learning_rate": 0.000205284829323915, "loss": 0.181, "step": 92732 }, { "epoch": 0.16442504692019977, "grad_norm": 0.56640625, "learning_rate": 0.00020527839071788338, "loss": 0.1434, "step": 92734 }, { "epoch": 0.16442859308550958, "grad_norm": 0.6328125, "learning_rate": 0.00020527195602484447, "loss": 0.1812, "step": 92736 }, { "epoch": 0.1644321392508194, "grad_norm": 0.123046875, "learning_rate": 0.00020526552524482617, "loss": 0.1363, "step": 92738 }, { "epoch": 0.1644356854161292, "grad_norm": 1.3046875, "learning_rate": 0.00020525909837785676, "loss": 0.2592, "step": 92740 }, { "epoch": 0.16443923158143903, "grad_norm": 0.31640625, "learning_rate": 0.00020525267542396443, "loss": 0.1726, "step": 92742 }, { "epoch": 0.16444277774674884, "grad_norm": 0.35546875, "learning_rate": 0.0002052462563831771, "loss": 0.1275, "step": 92744 }, { "epoch": 0.16444632391205866, "grad_norm": 0.22265625, "learning_rate": 0.00020523984125552292, "loss": 0.1842, "step": 92746 }, { "epoch": 0.16444987007736847, "grad_norm": 0.25390625, "learning_rate": 0.0002052334300410299, "loss": 0.1932, "step": 92748 }, { "epoch": 0.16445341624267829, "grad_norm": 0.361328125, "learning_rate": 0.0002052270227397262, "loss": 0.223, "step": 92750 }, { "epoch": 0.1644569624079881, "grad_norm": 0.62890625, "learning_rate": 0.0002052206193516396, "loss": 0.1638, "step": 92752 }, { "epoch": 0.16446050857329791, "grad_norm": 0.287109375, "learning_rate": 0.00020521421987679847, "loss": 0.1858, "step": 92754 }, { "epoch": 0.16446405473860773, "grad_norm": 0.3125, "learning_rate": 0.00020520782431523044, "loss": 0.2099, "step": 92756 }, { "epoch": 0.16446760090391754, "grad_norm": 0.51953125, "learning_rate": 0.00020520143266696366, "loss": 0.1529, "step": 92758 }, { "epoch": 0.16447114706922736, "grad_norm": 2.21875, "learning_rate": 0.00020519504493202607, "loss": 0.2069, "step": 92760 }, { "epoch": 0.16447469323453717, "grad_norm": 0.95703125, "learning_rate": 0.00020518866111044563, "loss": 0.1951, "step": 92762 }, { "epoch": 0.164478239399847, "grad_norm": 0.275390625, "learning_rate": 0.00020518228120225009, "loss": 0.1263, "step": 92764 }, { "epoch": 0.1644817855651568, "grad_norm": 0.34375, "learning_rate": 0.00020517590520746762, "loss": 0.1245, "step": 92766 }, { "epoch": 0.16448533173046662, "grad_norm": 1.046875, "learning_rate": 0.00020516953312612585, "loss": 0.1836, "step": 92768 }, { "epoch": 0.16448887789577643, "grad_norm": 0.28515625, "learning_rate": 0.00020516316495825283, "loss": 0.168, "step": 92770 }, { "epoch": 0.16449242406108625, "grad_norm": 0.462890625, "learning_rate": 0.00020515680070387632, "loss": 0.1695, "step": 92772 }, { "epoch": 0.16449597022639606, "grad_norm": 0.3515625, "learning_rate": 0.0002051504403630242, "loss": 0.2436, "step": 92774 }, { "epoch": 0.16449951639170587, "grad_norm": 0.291015625, "learning_rate": 0.00020514408393572432, "loss": 0.1763, "step": 92776 }, { "epoch": 0.1645030625570157, "grad_norm": 1.8359375, "learning_rate": 0.00020513773142200425, "loss": 0.1704, "step": 92778 }, { "epoch": 0.1645066087223255, "grad_norm": 0.63671875, "learning_rate": 0.00020513138282189202, "loss": 0.1367, "step": 92780 }, { "epoch": 0.16451015488763532, "grad_norm": 0.32421875, "learning_rate": 0.00020512503813541544, "loss": 0.1734, "step": 92782 }, { "epoch": 0.16451370105294513, "grad_norm": 1.46875, "learning_rate": 0.00020511869736260203, "loss": 0.2463, "step": 92784 }, { "epoch": 0.16451724721825495, "grad_norm": 0.322265625, "learning_rate": 0.0002051123605034797, "loss": 0.158, "step": 92786 }, { "epoch": 0.16452079338356476, "grad_norm": 0.5703125, "learning_rate": 0.000205106027558076, "loss": 0.2609, "step": 92788 }, { "epoch": 0.16452433954887458, "grad_norm": 0.7890625, "learning_rate": 0.00020509969852641874, "loss": 0.3263, "step": 92790 }, { "epoch": 0.1645278857141844, "grad_norm": 0.4921875, "learning_rate": 0.0002050933734085356, "loss": 0.2585, "step": 92792 }, { "epoch": 0.1645314318794942, "grad_norm": 0.328125, "learning_rate": 0.00020508705220445423, "loss": 0.1928, "step": 92794 }, { "epoch": 0.16453497804480402, "grad_norm": 0.271484375, "learning_rate": 0.0002050807349142022, "loss": 0.1902, "step": 92796 }, { "epoch": 0.16453852421011383, "grad_norm": 0.53125, "learning_rate": 0.00020507442153780726, "loss": 0.1649, "step": 92798 }, { "epoch": 0.16454207037542365, "grad_norm": 0.412109375, "learning_rate": 0.000205068112075297, "loss": 0.2066, "step": 92800 }, { "epoch": 0.16454561654073346, "grad_norm": 1.34375, "learning_rate": 0.00020506180652669892, "loss": 0.1241, "step": 92802 }, { "epoch": 0.16454916270604328, "grad_norm": 0.375, "learning_rate": 0.0002050555048920407, "loss": 0.1624, "step": 92804 }, { "epoch": 0.1645527088713531, "grad_norm": 0.1796875, "learning_rate": 0.00020504920717134977, "loss": 0.1253, "step": 92806 }, { "epoch": 0.1645562550366629, "grad_norm": 1.109375, "learning_rate": 0.00020504291336465388, "loss": 0.1727, "step": 92808 }, { "epoch": 0.16455980120197272, "grad_norm": 0.435546875, "learning_rate": 0.0002050366234719804, "loss": 0.1758, "step": 92810 }, { "epoch": 0.16456334736728254, "grad_norm": 0.8203125, "learning_rate": 0.00020503033749335686, "loss": 0.2042, "step": 92812 }, { "epoch": 0.16456689353259235, "grad_norm": 0.73828125, "learning_rate": 0.0002050240554288107, "loss": 0.1924, "step": 92814 }, { "epoch": 0.16457043969790217, "grad_norm": 0.341796875, "learning_rate": 0.00020501777727836956, "loss": 0.1856, "step": 92816 }, { "epoch": 0.16457398586321198, "grad_norm": 1.0703125, "learning_rate": 0.00020501150304206068, "loss": 0.1593, "step": 92818 }, { "epoch": 0.1645775320285218, "grad_norm": 0.515625, "learning_rate": 0.00020500523271991178, "loss": 0.1485, "step": 92820 }, { "epoch": 0.1645810781938316, "grad_norm": 0.314453125, "learning_rate": 0.00020499896631194994, "loss": 0.1926, "step": 92822 }, { "epoch": 0.16458462435914145, "grad_norm": 1.03125, "learning_rate": 0.00020499270381820286, "loss": 0.2106, "step": 92824 }, { "epoch": 0.16458817052445127, "grad_norm": 0.57421875, "learning_rate": 0.00020498644523869782, "loss": 0.1642, "step": 92826 }, { "epoch": 0.16459171668976108, "grad_norm": 0.365234375, "learning_rate": 0.00020498019057346207, "loss": 0.1834, "step": 92828 }, { "epoch": 0.1645952628550709, "grad_norm": 0.6328125, "learning_rate": 0.00020497393982252314, "loss": 0.2317, "step": 92830 }, { "epoch": 0.1645988090203807, "grad_norm": 0.1669921875, "learning_rate": 0.00020496769298590834, "loss": 0.1502, "step": 92832 }, { "epoch": 0.16460235518569052, "grad_norm": 0.53515625, "learning_rate": 0.00020496145006364489, "loss": 0.2147, "step": 92834 }, { "epoch": 0.16460590135100034, "grad_norm": 0.330078125, "learning_rate": 0.00020495521105576018, "loss": 0.1166, "step": 92836 }, { "epoch": 0.16460944751631015, "grad_norm": 0.58984375, "learning_rate": 0.00020494897596228147, "loss": 0.3111, "step": 92838 }, { "epoch": 0.16461299368161997, "grad_norm": 0.35546875, "learning_rate": 0.0002049427447832361, "loss": 0.1736, "step": 92840 }, { "epoch": 0.16461653984692978, "grad_norm": 0.455078125, "learning_rate": 0.00020493651751865121, "loss": 0.2643, "step": 92842 }, { "epoch": 0.1646200860122396, "grad_norm": 0.453125, "learning_rate": 0.00020493029416855405, "loss": 0.1857, "step": 92844 }, { "epoch": 0.1646236321775494, "grad_norm": 0.60546875, "learning_rate": 0.0002049240747329718, "loss": 0.1766, "step": 92846 }, { "epoch": 0.16462717834285923, "grad_norm": 0.8046875, "learning_rate": 0.00020491785921193184, "loss": 0.2545, "step": 92848 }, { "epoch": 0.16463072450816904, "grad_norm": 0.376953125, "learning_rate": 0.00020491164760546115, "loss": 0.1824, "step": 92850 }, { "epoch": 0.16463427067347886, "grad_norm": 0.33203125, "learning_rate": 0.00020490543991358713, "loss": 0.1183, "step": 92852 }, { "epoch": 0.16463781683878867, "grad_norm": 0.41015625, "learning_rate": 0.00020489923613633662, "loss": 0.1856, "step": 92854 }, { "epoch": 0.16464136300409848, "grad_norm": 0.373046875, "learning_rate": 0.00020489303627373702, "loss": 0.1745, "step": 92856 }, { "epoch": 0.1646449091694083, "grad_norm": 1.4375, "learning_rate": 0.00020488684032581527, "loss": 0.1867, "step": 92858 }, { "epoch": 0.1646484553347181, "grad_norm": 0.9296875, "learning_rate": 0.00020488064829259862, "loss": 0.2529, "step": 92860 }, { "epoch": 0.16465200150002793, "grad_norm": 0.34765625, "learning_rate": 0.00020487446017411397, "loss": 0.1327, "step": 92862 }, { "epoch": 0.16465554766533774, "grad_norm": 0.322265625, "learning_rate": 0.00020486827597038858, "loss": 0.1791, "step": 92864 }, { "epoch": 0.16465909383064756, "grad_norm": 0.27734375, "learning_rate": 0.00020486209568144935, "loss": 0.1747, "step": 92866 }, { "epoch": 0.16466263999595737, "grad_norm": 1.3984375, "learning_rate": 0.00020485591930732332, "loss": 0.1785, "step": 92868 }, { "epoch": 0.1646661861612672, "grad_norm": 0.25390625, "learning_rate": 0.00020484974684803763, "loss": 0.2083, "step": 92870 }, { "epoch": 0.164669732326577, "grad_norm": 1.7734375, "learning_rate": 0.00020484357830361904, "loss": 0.1762, "step": 92872 }, { "epoch": 0.16467327849188682, "grad_norm": 0.265625, "learning_rate": 0.00020483741367409482, "loss": 0.2017, "step": 92874 }, { "epoch": 0.16467682465719663, "grad_norm": 0.65625, "learning_rate": 0.00020483125295949164, "loss": 0.1622, "step": 92876 }, { "epoch": 0.16468037082250644, "grad_norm": 0.330078125, "learning_rate": 0.00020482509615983664, "loss": 0.2823, "step": 92878 }, { "epoch": 0.16468391698781626, "grad_norm": 0.462890625, "learning_rate": 0.0002048189432751568, "loss": 0.1705, "step": 92880 }, { "epoch": 0.16468746315312607, "grad_norm": 0.74609375, "learning_rate": 0.00020481279430547875, "loss": 0.1978, "step": 92882 }, { "epoch": 0.1646910093184359, "grad_norm": 0.59375, "learning_rate": 0.00020480664925082962, "loss": 0.202, "step": 92884 }, { "epoch": 0.1646945554837457, "grad_norm": 1.1328125, "learning_rate": 0.00020480050811123618, "loss": 0.2291, "step": 92886 }, { "epoch": 0.16469810164905552, "grad_norm": 0.546875, "learning_rate": 0.00020479437088672536, "loss": 0.2223, "step": 92888 }, { "epoch": 0.16470164781436533, "grad_norm": 1.9765625, "learning_rate": 0.00020478823757732402, "loss": 0.1946, "step": 92890 }, { "epoch": 0.16470519397967515, "grad_norm": 0.85546875, "learning_rate": 0.00020478210818305887, "loss": 0.1768, "step": 92892 }, { "epoch": 0.16470874014498496, "grad_norm": 0.2216796875, "learning_rate": 0.00020477598270395673, "loss": 0.1557, "step": 92894 }, { "epoch": 0.16471228631029478, "grad_norm": 1.625, "learning_rate": 0.00020476986114004457, "loss": 0.3556, "step": 92896 }, { "epoch": 0.1647158324756046, "grad_norm": 0.458984375, "learning_rate": 0.00020476374349134895, "loss": 0.448, "step": 92898 }, { "epoch": 0.1647193786409144, "grad_norm": 0.353515625, "learning_rate": 0.00020475762975789665, "loss": 0.1777, "step": 92900 }, { "epoch": 0.16472292480622422, "grad_norm": 0.267578125, "learning_rate": 0.00020475151993971455, "loss": 0.2003, "step": 92902 }, { "epoch": 0.16472647097153403, "grad_norm": 0.353515625, "learning_rate": 0.00020474541403682915, "loss": 0.1251, "step": 92904 }, { "epoch": 0.16473001713684385, "grad_norm": 0.71875, "learning_rate": 0.0002047393120492674, "loss": 0.162, "step": 92906 }, { "epoch": 0.16473356330215366, "grad_norm": 0.326171875, "learning_rate": 0.00020473321397705582, "loss": 0.1708, "step": 92908 }, { "epoch": 0.16473710946746348, "grad_norm": 0.28125, "learning_rate": 0.00020472711982022116, "loss": 0.1888, "step": 92910 }, { "epoch": 0.1647406556327733, "grad_norm": 0.240234375, "learning_rate": 0.00020472102957878999, "loss": 0.1754, "step": 92912 }, { "epoch": 0.16474420179808313, "grad_norm": 0.337890625, "learning_rate": 0.0002047149432527891, "loss": 0.1844, "step": 92914 }, { "epoch": 0.16474774796339295, "grad_norm": 0.2412109375, "learning_rate": 0.00020470886084224488, "loss": 0.177, "step": 92916 }, { "epoch": 0.16475129412870276, "grad_norm": 0.39453125, "learning_rate": 0.0002047027823471841, "loss": 0.1851, "step": 92918 }, { "epoch": 0.16475484029401258, "grad_norm": 0.55078125, "learning_rate": 0.0002046967077676333, "loss": 0.2397, "step": 92920 }, { "epoch": 0.1647583864593224, "grad_norm": 0.2451171875, "learning_rate": 0.00020469063710361912, "loss": 0.192, "step": 92922 }, { "epoch": 0.1647619326246322, "grad_norm": 0.48828125, "learning_rate": 0.00020468457035516791, "loss": 0.1785, "step": 92924 }, { "epoch": 0.16476547878994202, "grad_norm": 0.2099609375, "learning_rate": 0.00020467850752230637, "loss": 0.1244, "step": 92926 }, { "epoch": 0.16476902495525184, "grad_norm": 1.9921875, "learning_rate": 0.00020467244860506104, "loss": 0.3497, "step": 92928 }, { "epoch": 0.16477257112056165, "grad_norm": 0.1591796875, "learning_rate": 0.00020466639360345833, "loss": 0.1509, "step": 92930 }, { "epoch": 0.16477611728587147, "grad_norm": 1.2109375, "learning_rate": 0.00020466034251752471, "loss": 0.4114, "step": 92932 }, { "epoch": 0.16477966345118128, "grad_norm": 0.62890625, "learning_rate": 0.00020465429534728666, "loss": 0.2237, "step": 92934 }, { "epoch": 0.1647832096164911, "grad_norm": 0.31640625, "learning_rate": 0.00020464825209277066, "loss": 0.2084, "step": 92936 }, { "epoch": 0.1647867557818009, "grad_norm": 0.5546875, "learning_rate": 0.00020464221275400316, "loss": 0.1647, "step": 92938 }, { "epoch": 0.16479030194711072, "grad_norm": 0.90234375, "learning_rate": 0.00020463617733101048, "loss": 0.1496, "step": 92940 }, { "epoch": 0.16479384811242054, "grad_norm": 0.60546875, "learning_rate": 0.00020463014582381908, "loss": 0.2037, "step": 92942 }, { "epoch": 0.16479739427773035, "grad_norm": 0.26171875, "learning_rate": 0.00020462411823245536, "loss": 0.1688, "step": 92944 }, { "epoch": 0.16480094044304017, "grad_norm": 0.478515625, "learning_rate": 0.00020461809455694566, "loss": 0.1911, "step": 92946 }, { "epoch": 0.16480448660834998, "grad_norm": 0.2333984375, "learning_rate": 0.0002046120747973163, "loss": 0.1714, "step": 92948 }, { "epoch": 0.1648080327736598, "grad_norm": 0.275390625, "learning_rate": 0.00020460605895359363, "loss": 0.153, "step": 92950 }, { "epoch": 0.1648115789389696, "grad_norm": 0.45703125, "learning_rate": 0.00020460004702580392, "loss": 0.147, "step": 92952 }, { "epoch": 0.16481512510427943, "grad_norm": 1.15625, "learning_rate": 0.0002045940390139735, "loss": 0.2553, "step": 92954 }, { "epoch": 0.16481867126958924, "grad_norm": 0.48828125, "learning_rate": 0.00020458803491812868, "loss": 0.1655, "step": 92956 }, { "epoch": 0.16482221743489905, "grad_norm": 0.1748046875, "learning_rate": 0.0002045820347382956, "loss": 0.1139, "step": 92958 }, { "epoch": 0.16482576360020887, "grad_norm": 0.357421875, "learning_rate": 0.0002045760384745006, "loss": 0.2378, "step": 92960 }, { "epoch": 0.16482930976551868, "grad_norm": 0.55859375, "learning_rate": 0.00020457004612676989, "loss": 0.152, "step": 92962 }, { "epoch": 0.1648328559308285, "grad_norm": 0.326171875, "learning_rate": 0.0002045640576951297, "loss": 0.1761, "step": 92964 }, { "epoch": 0.1648364020961383, "grad_norm": 0.3671875, "learning_rate": 0.00020455807317960618, "loss": 0.148, "step": 92966 }, { "epoch": 0.16483994826144813, "grad_norm": 0.41015625, "learning_rate": 0.00020455209258022557, "loss": 0.1563, "step": 92968 }, { "epoch": 0.16484349442675794, "grad_norm": 0.466796875, "learning_rate": 0.0002045461158970139, "loss": 0.2072, "step": 92970 }, { "epoch": 0.16484704059206776, "grad_norm": 0.33984375, "learning_rate": 0.00020454014312999745, "loss": 0.1844, "step": 92972 }, { "epoch": 0.16485058675737757, "grad_norm": 0.64453125, "learning_rate": 0.0002045341742792022, "loss": 0.2511, "step": 92974 }, { "epoch": 0.16485413292268739, "grad_norm": 0.345703125, "learning_rate": 0.00020452820934465431, "loss": 0.1853, "step": 92976 }, { "epoch": 0.1648576790879972, "grad_norm": 0.267578125, "learning_rate": 0.00020452224832637992, "loss": 0.4068, "step": 92978 }, { "epoch": 0.16486122525330701, "grad_norm": 1.921875, "learning_rate": 0.00020451629122440502, "loss": 0.1488, "step": 92980 }, { "epoch": 0.16486477141861683, "grad_norm": 0.30859375, "learning_rate": 0.00020451033803875577, "loss": 0.284, "step": 92982 }, { "epoch": 0.16486831758392664, "grad_norm": 0.462890625, "learning_rate": 0.00020450438876945808, "loss": 0.1873, "step": 92984 }, { "epoch": 0.16487186374923646, "grad_norm": 0.63671875, "learning_rate": 0.000204498443416538, "loss": 0.1855, "step": 92986 }, { "epoch": 0.16487540991454627, "grad_norm": 0.314453125, "learning_rate": 0.00020449250198002162, "loss": 0.1695, "step": 92988 }, { "epoch": 0.1648789560798561, "grad_norm": 0.69921875, "learning_rate": 0.00020448656445993483, "loss": 0.197, "step": 92990 }, { "epoch": 0.1648825022451659, "grad_norm": 0.859375, "learning_rate": 0.00020448063085630369, "loss": 0.2253, "step": 92992 }, { "epoch": 0.16488604841047572, "grad_norm": 1.390625, "learning_rate": 0.00020447470116915397, "loss": 0.219, "step": 92994 }, { "epoch": 0.16488959457578553, "grad_norm": 0.322265625, "learning_rate": 0.0002044687753985119, "loss": 0.1632, "step": 92996 }, { "epoch": 0.16489314074109535, "grad_norm": 0.5859375, "learning_rate": 0.00020446285354440303, "loss": 0.207, "step": 92998 }, { "epoch": 0.16489668690640516, "grad_norm": 0.310546875, "learning_rate": 0.00020445693560685362, "loss": 0.2258, "step": 93000 }, { "epoch": 0.16490023307171497, "grad_norm": 0.5390625, "learning_rate": 0.00020445102158588927, "loss": 0.2447, "step": 93002 }, { "epoch": 0.1649037792370248, "grad_norm": 0.8828125, "learning_rate": 0.00020444511148153604, "loss": 0.1608, "step": 93004 }, { "epoch": 0.16490732540233463, "grad_norm": 0.357421875, "learning_rate": 0.00020443920529381957, "loss": 0.1736, "step": 93006 }, { "epoch": 0.16491087156764445, "grad_norm": 0.31640625, "learning_rate": 0.00020443330302276585, "loss": 0.1741, "step": 93008 }, { "epoch": 0.16491441773295426, "grad_norm": 0.5859375, "learning_rate": 0.00020442740466840073, "loss": 0.3705, "step": 93010 }, { "epoch": 0.16491796389826407, "grad_norm": 0.80859375, "learning_rate": 0.00020442151023074994, "loss": 0.2158, "step": 93012 }, { "epoch": 0.1649215100635739, "grad_norm": 1.0390625, "learning_rate": 0.00020441561970983922, "loss": 0.1849, "step": 93014 }, { "epoch": 0.1649250562288837, "grad_norm": 0.34765625, "learning_rate": 0.00020440973310569427, "loss": 0.1528, "step": 93016 }, { "epoch": 0.16492860239419352, "grad_norm": 0.29296875, "learning_rate": 0.00020440385041834103, "loss": 0.1959, "step": 93018 }, { "epoch": 0.16493214855950333, "grad_norm": 1.578125, "learning_rate": 0.00020439797164780504, "loss": 0.2861, "step": 93020 }, { "epoch": 0.16493569472481315, "grad_norm": 0.69921875, "learning_rate": 0.00020439209679411224, "loss": 0.1702, "step": 93022 }, { "epoch": 0.16493924089012296, "grad_norm": 0.765625, "learning_rate": 0.00020438622585728806, "loss": 0.1587, "step": 93024 }, { "epoch": 0.16494278705543278, "grad_norm": 0.431640625, "learning_rate": 0.00020438035883735826, "loss": 0.2164, "step": 93026 }, { "epoch": 0.1649463332207426, "grad_norm": 0.2197265625, "learning_rate": 0.00020437449573434865, "loss": 0.1733, "step": 93028 }, { "epoch": 0.1649498793860524, "grad_norm": 0.189453125, "learning_rate": 0.00020436863654828464, "loss": 0.1509, "step": 93030 }, { "epoch": 0.16495342555136222, "grad_norm": 0.431640625, "learning_rate": 0.00020436278127919206, "loss": 0.1372, "step": 93032 }, { "epoch": 0.16495697171667204, "grad_norm": 0.54296875, "learning_rate": 0.0002043569299270964, "loss": 0.1896, "step": 93034 }, { "epoch": 0.16496051788198185, "grad_norm": 0.439453125, "learning_rate": 0.00020435108249202324, "loss": 0.4139, "step": 93036 }, { "epoch": 0.16496406404729166, "grad_norm": 0.326171875, "learning_rate": 0.0002043452389739982, "loss": 0.2311, "step": 93038 }, { "epoch": 0.16496761021260148, "grad_norm": 2.015625, "learning_rate": 0.00020433939937304692, "loss": 0.488, "step": 93040 }, { "epoch": 0.1649711563779113, "grad_norm": 0.275390625, "learning_rate": 0.00020433356368919475, "loss": 0.1201, "step": 93042 }, { "epoch": 0.1649747025432211, "grad_norm": 0.4140625, "learning_rate": 0.00020432773192246732, "loss": 0.1541, "step": 93044 }, { "epoch": 0.16497824870853092, "grad_norm": 0.640625, "learning_rate": 0.00020432190407289008, "loss": 0.1761, "step": 93046 }, { "epoch": 0.16498179487384074, "grad_norm": 0.40625, "learning_rate": 0.00020431608014048866, "loss": 0.207, "step": 93048 }, { "epoch": 0.16498534103915055, "grad_norm": 0.408203125, "learning_rate": 0.0002043102601252884, "loss": 0.1494, "step": 93050 }, { "epoch": 0.16498888720446037, "grad_norm": 0.47265625, "learning_rate": 0.00020430444402731474, "loss": 0.1539, "step": 93052 }, { "epoch": 0.16499243336977018, "grad_norm": 0.515625, "learning_rate": 0.00020429863184659325, "loss": 0.1576, "step": 93054 }, { "epoch": 0.16499597953508, "grad_norm": 0.69921875, "learning_rate": 0.00020429282358314912, "loss": 0.1867, "step": 93056 }, { "epoch": 0.1649995257003898, "grad_norm": 0.91796875, "learning_rate": 0.00020428701923700803, "loss": 0.1848, "step": 93058 }, { "epoch": 0.16500307186569962, "grad_norm": 1.109375, "learning_rate": 0.00020428121880819506, "loss": 0.1907, "step": 93060 }, { "epoch": 0.16500661803100944, "grad_norm": 0.23828125, "learning_rate": 0.00020427542229673593, "loss": 0.1232, "step": 93062 }, { "epoch": 0.16501016419631925, "grad_norm": 0.259765625, "learning_rate": 0.0002042696297026556, "loss": 0.1286, "step": 93064 }, { "epoch": 0.16501371036162907, "grad_norm": 0.328125, "learning_rate": 0.00020426384102597984, "loss": 0.1689, "step": 93066 }, { "epoch": 0.16501725652693888, "grad_norm": 0.33984375, "learning_rate": 0.00020425805626673362, "loss": 0.2648, "step": 93068 }, { "epoch": 0.1650208026922487, "grad_norm": 0.32421875, "learning_rate": 0.00020425227542494236, "loss": 0.1615, "step": 93070 }, { "epoch": 0.1650243488575585, "grad_norm": 0.439453125, "learning_rate": 0.0002042464985006313, "loss": 0.2039, "step": 93072 }, { "epoch": 0.16502789502286833, "grad_norm": 0.322265625, "learning_rate": 0.00020424072549382572, "loss": 0.1983, "step": 93074 }, { "epoch": 0.16503144118817814, "grad_norm": 0.5390625, "learning_rate": 0.00020423495640455093, "loss": 0.1531, "step": 93076 }, { "epoch": 0.16503498735348796, "grad_norm": 0.345703125, "learning_rate": 0.0002042291912328322, "loss": 0.1879, "step": 93078 }, { "epoch": 0.16503853351879777, "grad_norm": 0.287109375, "learning_rate": 0.00020422342997869456, "loss": 0.1565, "step": 93080 }, { "epoch": 0.16504207968410758, "grad_norm": 0.33203125, "learning_rate": 0.00020421767264216335, "loss": 0.172, "step": 93082 }, { "epoch": 0.1650456258494174, "grad_norm": 0.359375, "learning_rate": 0.00020421191922326363, "loss": 0.2696, "step": 93084 }, { "epoch": 0.1650491720147272, "grad_norm": 0.7734375, "learning_rate": 0.00020420616972202072, "loss": 0.1812, "step": 93086 }, { "epoch": 0.16505271818003703, "grad_norm": 1.234375, "learning_rate": 0.0002042004241384597, "loss": 0.3067, "step": 93088 }, { "epoch": 0.16505626434534684, "grad_norm": 1.7421875, "learning_rate": 0.00020419468247260565, "loss": 0.4838, "step": 93090 }, { "epoch": 0.16505981051065666, "grad_norm": 0.80859375, "learning_rate": 0.0002041889447244837, "loss": 0.1821, "step": 93092 }, { "epoch": 0.16506335667596647, "grad_norm": 0.47265625, "learning_rate": 0.00020418321089411902, "loss": 0.1509, "step": 93094 }, { "epoch": 0.16506690284127631, "grad_norm": 0.1826171875, "learning_rate": 0.0002041774809815365, "loss": 0.1503, "step": 93096 }, { "epoch": 0.16507044900658613, "grad_norm": 0.166015625, "learning_rate": 0.00020417175498676138, "loss": 0.1789, "step": 93098 }, { "epoch": 0.16507399517189594, "grad_norm": 0.57421875, "learning_rate": 0.0002041660329098187, "loss": 0.1695, "step": 93100 }, { "epoch": 0.16507754133720576, "grad_norm": 0.859375, "learning_rate": 0.00020416031475073337, "loss": 0.1979, "step": 93102 }, { "epoch": 0.16508108750251557, "grad_norm": 2.609375, "learning_rate": 0.00020415460050953054, "loss": 0.2188, "step": 93104 }, { "epoch": 0.1650846336678254, "grad_norm": 0.5, "learning_rate": 0.00020414889018623503, "loss": 0.1752, "step": 93106 }, { "epoch": 0.1650881798331352, "grad_norm": 0.408203125, "learning_rate": 0.0002041431837808719, "loss": 0.1844, "step": 93108 }, { "epoch": 0.16509172599844502, "grad_norm": 1.6484375, "learning_rate": 0.00020413748129346611, "loss": 0.196, "step": 93110 }, { "epoch": 0.16509527216375483, "grad_norm": 1.0703125, "learning_rate": 0.00020413178272404267, "loss": 0.2417, "step": 93112 }, { "epoch": 0.16509881832906464, "grad_norm": 0.62109375, "learning_rate": 0.00020412608807262637, "loss": 0.1431, "step": 93114 }, { "epoch": 0.16510236449437446, "grad_norm": 1.9375, "learning_rate": 0.00020412039733924215, "loss": 0.39, "step": 93116 }, { "epoch": 0.16510591065968427, "grad_norm": 0.419921875, "learning_rate": 0.000204114710523915, "loss": 0.1871, "step": 93118 }, { "epoch": 0.1651094568249941, "grad_norm": 0.283203125, "learning_rate": 0.00020410902762666973, "loss": 0.1447, "step": 93120 }, { "epoch": 0.1651130029903039, "grad_norm": 0.1455078125, "learning_rate": 0.00020410334864753106, "loss": 0.1702, "step": 93122 }, { "epoch": 0.16511654915561372, "grad_norm": 0.59375, "learning_rate": 0.00020409767358652402, "loss": 0.1993, "step": 93124 }, { "epoch": 0.16512009532092353, "grad_norm": 0.18359375, "learning_rate": 0.0002040920024436733, "loss": 0.1648, "step": 93126 }, { "epoch": 0.16512364148623335, "grad_norm": 0.65625, "learning_rate": 0.00020408633521900387, "loss": 0.2026, "step": 93128 }, { "epoch": 0.16512718765154316, "grad_norm": 2.21875, "learning_rate": 0.00020408067191254027, "loss": 0.1906, "step": 93130 }, { "epoch": 0.16513073381685298, "grad_norm": 0.859375, "learning_rate": 0.0002040750125243075, "loss": 0.1854, "step": 93132 }, { "epoch": 0.1651342799821628, "grad_norm": 0.37109375, "learning_rate": 0.0002040693570543302, "loss": 0.2021, "step": 93134 }, { "epoch": 0.1651378261474726, "grad_norm": 1.234375, "learning_rate": 0.00020406370550263305, "loss": 0.3341, "step": 93136 }, { "epoch": 0.16514137231278242, "grad_norm": 3.28125, "learning_rate": 0.0002040580578692408, "loss": 0.3124, "step": 93138 }, { "epoch": 0.16514491847809223, "grad_norm": 0.41015625, "learning_rate": 0.00020405241415417832, "loss": 0.2344, "step": 93140 }, { "epoch": 0.16514846464340205, "grad_norm": 0.49609375, "learning_rate": 0.0002040467743574701, "loss": 0.184, "step": 93142 }, { "epoch": 0.16515201080871186, "grad_norm": 0.28515625, "learning_rate": 0.00020404113847914085, "loss": 0.1405, "step": 93144 }, { "epoch": 0.16515555697402168, "grad_norm": 2.125, "learning_rate": 0.00020403550651921514, "loss": 0.237, "step": 93146 }, { "epoch": 0.1651591031393315, "grad_norm": 0.2294921875, "learning_rate": 0.0002040298784777178, "loss": 0.143, "step": 93148 }, { "epoch": 0.1651626493046413, "grad_norm": 0.419921875, "learning_rate": 0.00020402425435467323, "loss": 0.1501, "step": 93150 }, { "epoch": 0.16516619546995112, "grad_norm": 0.72265625, "learning_rate": 0.0002040186341501062, "loss": 0.1824, "step": 93152 }, { "epoch": 0.16516974163526094, "grad_norm": 0.51953125, "learning_rate": 0.00020401301786404118, "loss": 0.1714, "step": 93154 }, { "epoch": 0.16517328780057075, "grad_norm": 0.337890625, "learning_rate": 0.00020400740549650275, "loss": 0.1975, "step": 93156 }, { "epoch": 0.16517683396588057, "grad_norm": 0.208984375, "learning_rate": 0.00020400179704751554, "loss": 0.1165, "step": 93158 }, { "epoch": 0.16518038013119038, "grad_norm": 0.2021484375, "learning_rate": 0.00020399619251710408, "loss": 0.1308, "step": 93160 }, { "epoch": 0.1651839262965002, "grad_norm": 0.5, "learning_rate": 0.00020399059190529266, "loss": 0.1964, "step": 93162 }, { "epoch": 0.16518747246181, "grad_norm": 0.451171875, "learning_rate": 0.00020398499521210594, "loss": 0.2014, "step": 93164 }, { "epoch": 0.16519101862711982, "grad_norm": 0.34765625, "learning_rate": 0.00020397940243756845, "loss": 0.1423, "step": 93166 }, { "epoch": 0.16519456479242964, "grad_norm": 0.390625, "learning_rate": 0.0002039738135817046, "loss": 0.202, "step": 93168 }, { "epoch": 0.16519811095773945, "grad_norm": 1.859375, "learning_rate": 0.00020396822864453872, "loss": 0.3521, "step": 93170 }, { "epoch": 0.16520165712304927, "grad_norm": 0.90625, "learning_rate": 0.00020396264762609546, "loss": 0.1922, "step": 93172 }, { "epoch": 0.16520520328835908, "grad_norm": 0.2890625, "learning_rate": 0.00020395707052639898, "loss": 0.2108, "step": 93174 }, { "epoch": 0.1652087494536689, "grad_norm": 0.2001953125, "learning_rate": 0.00020395149734547382, "loss": 0.2167, "step": 93176 }, { "epoch": 0.1652122956189787, "grad_norm": 0.671875, "learning_rate": 0.00020394592808334436, "loss": 0.1411, "step": 93178 }, { "epoch": 0.16521584178428853, "grad_norm": 0.515625, "learning_rate": 0.00020394036274003487, "loss": 0.1989, "step": 93180 }, { "epoch": 0.16521938794959834, "grad_norm": 0.4453125, "learning_rate": 0.0002039348013155698, "loss": 0.1812, "step": 93182 }, { "epoch": 0.16522293411490815, "grad_norm": 0.169921875, "learning_rate": 0.00020392924380997337, "loss": 0.1597, "step": 93184 }, { "epoch": 0.165226480280218, "grad_norm": 0.275390625, "learning_rate": 0.00020392369022326998, "loss": 0.2039, "step": 93186 }, { "epoch": 0.1652300264455278, "grad_norm": 1.3046875, "learning_rate": 0.00020391814055548374, "loss": 0.1888, "step": 93188 }, { "epoch": 0.16523357261083763, "grad_norm": 0.25390625, "learning_rate": 0.0002039125948066391, "loss": 0.2253, "step": 93190 }, { "epoch": 0.16523711877614744, "grad_norm": 0.21484375, "learning_rate": 0.0002039070529767604, "loss": 0.1877, "step": 93192 }, { "epoch": 0.16524066494145725, "grad_norm": 0.2373046875, "learning_rate": 0.00020390151506587153, "loss": 0.16, "step": 93194 }, { "epoch": 0.16524421110676707, "grad_norm": 0.353515625, "learning_rate": 0.0002038959810739971, "loss": 0.1616, "step": 93196 }, { "epoch": 0.16524775727207688, "grad_norm": 0.57421875, "learning_rate": 0.00020389045100116106, "loss": 0.1845, "step": 93198 }, { "epoch": 0.1652513034373867, "grad_norm": 0.2333984375, "learning_rate": 0.00020388492484738765, "loss": 0.331, "step": 93200 }, { "epoch": 0.1652548496026965, "grad_norm": 0.33984375, "learning_rate": 0.000203879402612701, "loss": 0.1563, "step": 93202 }, { "epoch": 0.16525839576800633, "grad_norm": 0.51953125, "learning_rate": 0.00020387388429712544, "loss": 0.1276, "step": 93204 }, { "epoch": 0.16526194193331614, "grad_norm": 0.2265625, "learning_rate": 0.00020386836990068485, "loss": 0.1451, "step": 93206 }, { "epoch": 0.16526548809862596, "grad_norm": 0.73046875, "learning_rate": 0.0002038628594234035, "loss": 0.2052, "step": 93208 }, { "epoch": 0.16526903426393577, "grad_norm": 0.8125, "learning_rate": 0.00020385735286530558, "loss": 0.2353, "step": 93210 }, { "epoch": 0.16527258042924559, "grad_norm": 0.39453125, "learning_rate": 0.00020385185022641494, "loss": 0.1717, "step": 93212 }, { "epoch": 0.1652761265945554, "grad_norm": 0.3203125, "learning_rate": 0.00020384635150675574, "loss": 0.11, "step": 93214 }, { "epoch": 0.16527967275986521, "grad_norm": 1.3046875, "learning_rate": 0.00020384085670635215, "loss": 0.2227, "step": 93216 }, { "epoch": 0.16528321892517503, "grad_norm": 2.4375, "learning_rate": 0.00020383536582522808, "loss": 0.272, "step": 93218 }, { "epoch": 0.16528676509048484, "grad_norm": 0.306640625, "learning_rate": 0.00020382987886340758, "loss": 0.1852, "step": 93220 }, { "epoch": 0.16529031125579466, "grad_norm": 0.337890625, "learning_rate": 0.00020382439582091453, "loss": 0.1991, "step": 93222 }, { "epoch": 0.16529385742110447, "grad_norm": 0.7734375, "learning_rate": 0.00020381891669777303, "loss": 0.2013, "step": 93224 }, { "epoch": 0.1652974035864143, "grad_norm": 0.26953125, "learning_rate": 0.00020381344149400716, "loss": 0.1701, "step": 93226 }, { "epoch": 0.1653009497517241, "grad_norm": 0.458984375, "learning_rate": 0.0002038079702096406, "loss": 0.1328, "step": 93228 }, { "epoch": 0.16530449591703392, "grad_norm": 2.734375, "learning_rate": 0.0002038025028446974, "loss": 0.2251, "step": 93230 }, { "epoch": 0.16530804208234373, "grad_norm": 0.275390625, "learning_rate": 0.0002037970393992016, "loss": 0.1182, "step": 93232 }, { "epoch": 0.16531158824765355, "grad_norm": 0.42578125, "learning_rate": 0.00020379157987317683, "loss": 0.1715, "step": 93234 }, { "epoch": 0.16531513441296336, "grad_norm": 0.37109375, "learning_rate": 0.00020378612426664723, "loss": 0.2064, "step": 93236 }, { "epoch": 0.16531868057827317, "grad_norm": 0.5, "learning_rate": 0.00020378067257963647, "loss": 0.1744, "step": 93238 }, { "epoch": 0.165322226743583, "grad_norm": 0.61328125, "learning_rate": 0.00020377522481216853, "loss": 0.1866, "step": 93240 }, { "epoch": 0.1653257729088928, "grad_norm": 2.015625, "learning_rate": 0.00020376978096426718, "loss": 0.4331, "step": 93242 }, { "epoch": 0.16532931907420262, "grad_norm": 0.1650390625, "learning_rate": 0.00020376434103595606, "loss": 0.2192, "step": 93244 }, { "epoch": 0.16533286523951243, "grad_norm": 0.7265625, "learning_rate": 0.0002037589050272593, "loss": 0.2127, "step": 93246 }, { "epoch": 0.16533641140482225, "grad_norm": 0.5, "learning_rate": 0.0002037534729382003, "loss": 0.1838, "step": 93248 }, { "epoch": 0.16533995757013206, "grad_norm": 0.400390625, "learning_rate": 0.00020374804476880315, "loss": 0.2608, "step": 93250 }, { "epoch": 0.16534350373544188, "grad_norm": 0.2412109375, "learning_rate": 0.00020374262051909138, "loss": 0.1557, "step": 93252 }, { "epoch": 0.1653470499007517, "grad_norm": 0.154296875, "learning_rate": 0.0002037372001890888, "loss": 0.1916, "step": 93254 }, { "epoch": 0.1653505960660615, "grad_norm": 0.259765625, "learning_rate": 0.00020373178377881913, "loss": 0.1984, "step": 93256 }, { "epoch": 0.16535414223137132, "grad_norm": 0.1494140625, "learning_rate": 0.00020372637128830603, "loss": 0.1535, "step": 93258 }, { "epoch": 0.16535768839668114, "grad_norm": 0.9296875, "learning_rate": 0.00020372096271757318, "loss": 0.2998, "step": 93260 }, { "epoch": 0.16536123456199095, "grad_norm": 0.4453125, "learning_rate": 0.00020371555806664407, "loss": 0.2122, "step": 93262 }, { "epoch": 0.16536478072730076, "grad_norm": 0.283203125, "learning_rate": 0.00020371015733554264, "loss": 0.2003, "step": 93264 }, { "epoch": 0.16536832689261058, "grad_norm": 0.490234375, "learning_rate": 0.00020370476052429222, "loss": 0.2494, "step": 93266 }, { "epoch": 0.1653718730579204, "grad_norm": 0.31640625, "learning_rate": 0.00020369936763291668, "loss": 0.1775, "step": 93268 }, { "epoch": 0.1653754192232302, "grad_norm": 0.8203125, "learning_rate": 0.0002036939786614394, "loss": 0.2356, "step": 93270 }, { "epoch": 0.16537896538854002, "grad_norm": 0.271484375, "learning_rate": 0.00020368859360988403, "loss": 0.1661, "step": 93272 }, { "epoch": 0.16538251155384984, "grad_norm": 0.341796875, "learning_rate": 0.00020368321247827415, "loss": 0.286, "step": 93274 }, { "epoch": 0.16538605771915965, "grad_norm": 1.3046875, "learning_rate": 0.00020367783526663327, "loss": 0.3629, "step": 93276 }, { "epoch": 0.1653896038844695, "grad_norm": 0.244140625, "learning_rate": 0.00020367246197498483, "loss": 0.2053, "step": 93278 }, { "epoch": 0.1653931500497793, "grad_norm": 0.515625, "learning_rate": 0.00020366709260335233, "loss": 0.1608, "step": 93280 }, { "epoch": 0.16539669621508912, "grad_norm": 0.376953125, "learning_rate": 0.00020366172715175951, "loss": 0.1375, "step": 93282 }, { "epoch": 0.16540024238039894, "grad_norm": 0.59765625, "learning_rate": 0.0002036563656202295, "loss": 0.1635, "step": 93284 }, { "epoch": 0.16540378854570875, "grad_norm": 0.28125, "learning_rate": 0.0002036510080087859, "loss": 0.1675, "step": 93286 }, { "epoch": 0.16540733471101857, "grad_norm": 0.185546875, "learning_rate": 0.00020364565431745213, "loss": 0.1642, "step": 93288 }, { "epoch": 0.16541088087632838, "grad_norm": 1.109375, "learning_rate": 0.0002036403045462516, "loss": 0.2206, "step": 93290 }, { "epoch": 0.1654144270416382, "grad_norm": 0.353515625, "learning_rate": 0.00020363495869520776, "loss": 0.1462, "step": 93292 }, { "epoch": 0.165417973206948, "grad_norm": 0.421875, "learning_rate": 0.0002036296167643439, "loss": 0.1454, "step": 93294 }, { "epoch": 0.16542151937225782, "grad_norm": 1.2890625, "learning_rate": 0.00020362427875368343, "loss": 0.1992, "step": 93296 }, { "epoch": 0.16542506553756764, "grad_norm": 0.65625, "learning_rate": 0.00020361894466324964, "loss": 0.2541, "step": 93298 }, { "epoch": 0.16542861170287745, "grad_norm": 0.51953125, "learning_rate": 0.00020361361449306603, "loss": 0.1836, "step": 93300 }, { "epoch": 0.16543215786818727, "grad_norm": 0.78125, "learning_rate": 0.00020360828824315572, "loss": 0.2039, "step": 93302 }, { "epoch": 0.16543570403349708, "grad_norm": 0.2255859375, "learning_rate": 0.00020360296591354207, "loss": 0.2242, "step": 93304 }, { "epoch": 0.1654392501988069, "grad_norm": 0.828125, "learning_rate": 0.0002035976475042483, "loss": 0.2209, "step": 93306 }, { "epoch": 0.1654427963641167, "grad_norm": 1.046875, "learning_rate": 0.00020359233301529782, "loss": 0.1818, "step": 93308 }, { "epoch": 0.16544634252942653, "grad_norm": 0.578125, "learning_rate": 0.00020358702244671373, "loss": 0.2032, "step": 93310 }, { "epoch": 0.16544988869473634, "grad_norm": 0.296875, "learning_rate": 0.00020358171579851924, "loss": 0.1679, "step": 93312 }, { "epoch": 0.16545343486004616, "grad_norm": 0.47265625, "learning_rate": 0.00020357641307073776, "loss": 0.176, "step": 93314 }, { "epoch": 0.16545698102535597, "grad_norm": 1.6796875, "learning_rate": 0.00020357111426339228, "loss": 0.1871, "step": 93316 }, { "epoch": 0.16546052719066578, "grad_norm": 1.1484375, "learning_rate": 0.00020356581937650602, "loss": 0.2008, "step": 93318 }, { "epoch": 0.1654640733559756, "grad_norm": 0.2158203125, "learning_rate": 0.00020356052841010214, "loss": 0.1578, "step": 93320 }, { "epoch": 0.16546761952128541, "grad_norm": 0.54296875, "learning_rate": 0.00020355524136420384, "loss": 0.2173, "step": 93322 }, { "epoch": 0.16547116568659523, "grad_norm": 0.81640625, "learning_rate": 0.00020354995823883425, "loss": 0.166, "step": 93324 }, { "epoch": 0.16547471185190504, "grad_norm": 0.287109375, "learning_rate": 0.00020354467903401635, "loss": 0.2074, "step": 93326 }, { "epoch": 0.16547825801721486, "grad_norm": 0.333984375, "learning_rate": 0.00020353940374977334, "loss": 0.197, "step": 93328 }, { "epoch": 0.16548180418252467, "grad_norm": 0.2392578125, "learning_rate": 0.00020353413238612814, "loss": 0.1522, "step": 93330 }, { "epoch": 0.1654853503478345, "grad_norm": 0.322265625, "learning_rate": 0.00020352886494310409, "loss": 0.157, "step": 93332 }, { "epoch": 0.1654888965131443, "grad_norm": 0.248046875, "learning_rate": 0.00020352360142072393, "loss": 0.1895, "step": 93334 }, { "epoch": 0.16549244267845412, "grad_norm": 0.8984375, "learning_rate": 0.00020351834181901078, "loss": 0.1638, "step": 93336 }, { "epoch": 0.16549598884376393, "grad_norm": 0.412109375, "learning_rate": 0.0002035130861379877, "loss": 0.1676, "step": 93338 }, { "epoch": 0.16549953500907374, "grad_norm": 0.1845703125, "learning_rate": 0.00020350783437767775, "loss": 0.1248, "step": 93340 }, { "epoch": 0.16550308117438356, "grad_norm": 0.16796875, "learning_rate": 0.00020350258653810367, "loss": 0.1598, "step": 93342 }, { "epoch": 0.16550662733969337, "grad_norm": 0.29296875, "learning_rate": 0.00020349734261928848, "loss": 0.2553, "step": 93344 }, { "epoch": 0.1655101735050032, "grad_norm": 0.224609375, "learning_rate": 0.00020349210262125532, "loss": 0.1567, "step": 93346 }, { "epoch": 0.165513719670313, "grad_norm": 0.361328125, "learning_rate": 0.0002034868665440268, "loss": 0.1674, "step": 93348 }, { "epoch": 0.16551726583562282, "grad_norm": 0.333984375, "learning_rate": 0.00020348163438762607, "loss": 0.1884, "step": 93350 }, { "epoch": 0.16552081200093263, "grad_norm": 0.64453125, "learning_rate": 0.00020347640615207588, "loss": 0.2065, "step": 93352 }, { "epoch": 0.16552435816624245, "grad_norm": 0.73046875, "learning_rate": 0.0002034711818373991, "loss": 0.1695, "step": 93354 }, { "epoch": 0.16552790433155226, "grad_norm": 0.34375, "learning_rate": 0.00020346596144361865, "loss": 0.1565, "step": 93356 }, { "epoch": 0.16553145049686208, "grad_norm": 0.330078125, "learning_rate": 0.0002034607449707573, "loss": 0.1336, "step": 93358 }, { "epoch": 0.1655349966621719, "grad_norm": 0.6015625, "learning_rate": 0.0002034555324188378, "loss": 0.1873, "step": 93360 }, { "epoch": 0.1655385428274817, "grad_norm": 0.37890625, "learning_rate": 0.00020345032378788313, "loss": 0.1565, "step": 93362 }, { "epoch": 0.16554208899279152, "grad_norm": 0.259765625, "learning_rate": 0.00020344511907791595, "loss": 0.1781, "step": 93364 }, { "epoch": 0.16554563515810133, "grad_norm": 1.6640625, "learning_rate": 0.00020343991828895903, "loss": 0.2051, "step": 93366 }, { "epoch": 0.16554918132341118, "grad_norm": 0.43359375, "learning_rate": 0.00020343472142103514, "loss": 0.208, "step": 93368 }, { "epoch": 0.165552727488721, "grad_norm": 1.671875, "learning_rate": 0.00020342952847416694, "loss": 0.1882, "step": 93370 }, { "epoch": 0.1655562736540308, "grad_norm": 1.2578125, "learning_rate": 0.00020342433944837719, "loss": 0.2161, "step": 93372 }, { "epoch": 0.16555981981934062, "grad_norm": 0.54296875, "learning_rate": 0.00020341915434368867, "loss": 0.1534, "step": 93374 }, { "epoch": 0.16556336598465043, "grad_norm": 0.1630859375, "learning_rate": 0.00020341397316012388, "loss": 0.1826, "step": 93376 }, { "epoch": 0.16556691214996025, "grad_norm": 0.33203125, "learning_rate": 0.00020340879589770563, "loss": 0.2179, "step": 93378 }, { "epoch": 0.16557045831527006, "grad_norm": 1.5546875, "learning_rate": 0.00020340362255645642, "loss": 0.1916, "step": 93380 }, { "epoch": 0.16557400448057988, "grad_norm": 0.29296875, "learning_rate": 0.0002033984531363991, "loss": 0.2223, "step": 93382 }, { "epoch": 0.1655775506458897, "grad_norm": 0.19140625, "learning_rate": 0.00020339328763755604, "loss": 0.3874, "step": 93384 }, { "epoch": 0.1655810968111995, "grad_norm": 0.57421875, "learning_rate": 0.00020338812605994993, "loss": 0.2185, "step": 93386 }, { "epoch": 0.16558464297650932, "grad_norm": 0.53515625, "learning_rate": 0.0002033829684036034, "loss": 0.211, "step": 93388 }, { "epoch": 0.16558818914181914, "grad_norm": 0.90625, "learning_rate": 0.00020337781466853887, "loss": 0.2196, "step": 93390 }, { "epoch": 0.16559173530712895, "grad_norm": 0.390625, "learning_rate": 0.00020337266485477896, "loss": 0.1614, "step": 93392 }, { "epoch": 0.16559528147243877, "grad_norm": 0.6875, "learning_rate": 0.00020336751896234627, "loss": 0.1697, "step": 93394 }, { "epoch": 0.16559882763774858, "grad_norm": 0.625, "learning_rate": 0.00020336237699126317, "loss": 0.1475, "step": 93396 }, { "epoch": 0.1656023738030584, "grad_norm": 2.390625, "learning_rate": 0.0002033572389415523, "loss": 0.3049, "step": 93398 }, { "epoch": 0.1656059199683682, "grad_norm": 0.333984375, "learning_rate": 0.00020335210481323588, "loss": 0.1829, "step": 93400 }, { "epoch": 0.16560946613367802, "grad_norm": 0.3828125, "learning_rate": 0.00020334697460633658, "loss": 0.1929, "step": 93402 }, { "epoch": 0.16561301229898784, "grad_norm": 2.140625, "learning_rate": 0.00020334184832087687, "loss": 0.2372, "step": 93404 }, { "epoch": 0.16561655846429765, "grad_norm": 0.408203125, "learning_rate": 0.00020333672595687898, "loss": 0.1743, "step": 93406 }, { "epoch": 0.16562010462960747, "grad_norm": 0.435546875, "learning_rate": 0.00020333160751436535, "loss": 0.1063, "step": 93408 }, { "epoch": 0.16562365079491728, "grad_norm": 0.396484375, "learning_rate": 0.00020332649299335858, "loss": 0.182, "step": 93410 }, { "epoch": 0.1656271969602271, "grad_norm": 2.21875, "learning_rate": 0.00020332138239388076, "loss": 0.202, "step": 93412 }, { "epoch": 0.1656307431255369, "grad_norm": 0.388671875, "learning_rate": 0.00020331627571595442, "loss": 0.1899, "step": 93414 }, { "epoch": 0.16563428929084673, "grad_norm": 0.60546875, "learning_rate": 0.00020331117295960177, "loss": 0.175, "step": 93416 }, { "epoch": 0.16563783545615654, "grad_norm": 0.21875, "learning_rate": 0.00020330607412484525, "loss": 0.1802, "step": 93418 }, { "epoch": 0.16564138162146635, "grad_norm": 1.1640625, "learning_rate": 0.00020330097921170706, "loss": 0.2455, "step": 93420 }, { "epoch": 0.16564492778677617, "grad_norm": 0.29296875, "learning_rate": 0.0002032958882202095, "loss": 0.1818, "step": 93422 }, { "epoch": 0.16564847395208598, "grad_norm": 0.48828125, "learning_rate": 0.00020329080115037492, "loss": 0.1589, "step": 93424 }, { "epoch": 0.1656520201173958, "grad_norm": 1.1796875, "learning_rate": 0.00020328571800222554, "loss": 0.1541, "step": 93426 }, { "epoch": 0.1656555662827056, "grad_norm": 0.60546875, "learning_rate": 0.0002032806387757834, "loss": 0.1471, "step": 93428 }, { "epoch": 0.16565911244801543, "grad_norm": 0.1865234375, "learning_rate": 0.0002032755634710711, "loss": 0.1356, "step": 93430 }, { "epoch": 0.16566265861332524, "grad_norm": 0.388671875, "learning_rate": 0.0002032704920881104, "loss": 0.1529, "step": 93432 }, { "epoch": 0.16566620477863506, "grad_norm": 0.2421875, "learning_rate": 0.00020326542462692374, "loss": 0.2012, "step": 93434 }, { "epoch": 0.16566975094394487, "grad_norm": 0.78515625, "learning_rate": 0.00020326036108753328, "loss": 0.1821, "step": 93436 }, { "epoch": 0.16567329710925469, "grad_norm": 0.40234375, "learning_rate": 0.00020325530146996103, "loss": 0.196, "step": 93438 }, { "epoch": 0.1656768432745645, "grad_norm": 0.390625, "learning_rate": 0.0002032502457742293, "loss": 0.2324, "step": 93440 }, { "epoch": 0.16568038943987431, "grad_norm": 0.7578125, "learning_rate": 0.00020324519400035995, "loss": 0.1527, "step": 93442 }, { "epoch": 0.16568393560518413, "grad_norm": 0.625, "learning_rate": 0.00020324014614837534, "loss": 0.2041, "step": 93444 }, { "epoch": 0.16568748177049394, "grad_norm": 0.283203125, "learning_rate": 0.00020323510221829745, "loss": 0.1614, "step": 93446 }, { "epoch": 0.16569102793580376, "grad_norm": 0.51953125, "learning_rate": 0.00020323006221014835, "loss": 0.1864, "step": 93448 }, { "epoch": 0.16569457410111357, "grad_norm": 0.33203125, "learning_rate": 0.00020322502612394997, "loss": 0.2189, "step": 93450 }, { "epoch": 0.1656981202664234, "grad_norm": 0.361328125, "learning_rate": 0.00020321999395972435, "loss": 0.1193, "step": 93452 }, { "epoch": 0.1657016664317332, "grad_norm": 0.443359375, "learning_rate": 0.00020321496571749364, "loss": 0.1474, "step": 93454 }, { "epoch": 0.16570521259704302, "grad_norm": 1.34375, "learning_rate": 0.00020320994139727978, "loss": 0.4208, "step": 93456 }, { "epoch": 0.16570875876235286, "grad_norm": 0.953125, "learning_rate": 0.00020320492099910471, "loss": 0.1409, "step": 93458 }, { "epoch": 0.16571230492766267, "grad_norm": 0.85546875, "learning_rate": 0.00020319990452299038, "loss": 0.2573, "step": 93460 }, { "epoch": 0.1657158510929725, "grad_norm": 0.46484375, "learning_rate": 0.00020319489196895873, "loss": 0.2109, "step": 93462 }, { "epoch": 0.1657193972582823, "grad_norm": 1.234375, "learning_rate": 0.00020318988333703174, "loss": 0.3504, "step": 93464 }, { "epoch": 0.16572294342359212, "grad_norm": 1.3984375, "learning_rate": 0.00020318487862723124, "loss": 0.2101, "step": 93466 }, { "epoch": 0.16572648958890193, "grad_norm": 1.46875, "learning_rate": 0.00020317987783957919, "loss": 0.197, "step": 93468 }, { "epoch": 0.16573003575421175, "grad_norm": 0.6640625, "learning_rate": 0.0002031748809740974, "loss": 0.1674, "step": 93470 }, { "epoch": 0.16573358191952156, "grad_norm": 0.5, "learning_rate": 0.0002031698880308077, "loss": 0.1315, "step": 93472 }, { "epoch": 0.16573712808483138, "grad_norm": 0.271484375, "learning_rate": 0.000203164899009732, "loss": 0.2056, "step": 93474 }, { "epoch": 0.1657406742501412, "grad_norm": 0.2216796875, "learning_rate": 0.0002031599139108921, "loss": 0.1999, "step": 93476 }, { "epoch": 0.165744220415451, "grad_norm": 0.90625, "learning_rate": 0.0002031549327343098, "loss": 0.2088, "step": 93478 }, { "epoch": 0.16574776658076082, "grad_norm": 0.26953125, "learning_rate": 0.00020314995548000687, "loss": 0.171, "step": 93480 }, { "epoch": 0.16575131274607063, "grad_norm": 0.224609375, "learning_rate": 0.00020314498214800495, "loss": 0.2027, "step": 93482 }, { "epoch": 0.16575485891138045, "grad_norm": 0.28515625, "learning_rate": 0.0002031400127383261, "loss": 0.1431, "step": 93484 }, { "epoch": 0.16575840507669026, "grad_norm": 0.40625, "learning_rate": 0.0002031350472509918, "loss": 0.177, "step": 93486 }, { "epoch": 0.16576195124200008, "grad_norm": 0.546875, "learning_rate": 0.0002031300856860239, "loss": 0.1815, "step": 93488 }, { "epoch": 0.1657654974073099, "grad_norm": 0.37109375, "learning_rate": 0.00020312512804344398, "loss": 0.1727, "step": 93490 }, { "epoch": 0.1657690435726197, "grad_norm": 0.53515625, "learning_rate": 0.0002031201743232738, "loss": 0.1595, "step": 93492 }, { "epoch": 0.16577258973792952, "grad_norm": 0.5, "learning_rate": 0.000203115224525535, "loss": 0.1742, "step": 93494 }, { "epoch": 0.16577613590323934, "grad_norm": 2.328125, "learning_rate": 0.00020311027865024917, "loss": 0.2557, "step": 93496 }, { "epoch": 0.16577968206854915, "grad_norm": 0.59375, "learning_rate": 0.0002031053366974381, "loss": 0.1506, "step": 93498 }, { "epoch": 0.16578322823385896, "grad_norm": 0.392578125, "learning_rate": 0.00020310039866712327, "loss": 0.1521, "step": 93500 }, { "epoch": 0.16578677439916878, "grad_norm": 0.369140625, "learning_rate": 0.00020309546455932632, "loss": 0.2249, "step": 93502 }, { "epoch": 0.1657903205644786, "grad_norm": 0.37890625, "learning_rate": 0.00020309053437406887, "loss": 0.1686, "step": 93504 }, { "epoch": 0.1657938667297884, "grad_norm": 0.3046875, "learning_rate": 0.00020308560811137239, "loss": 0.1744, "step": 93506 }, { "epoch": 0.16579741289509822, "grad_norm": 0.30078125, "learning_rate": 0.00020308068577125848, "loss": 0.1874, "step": 93508 }, { "epoch": 0.16580095906040804, "grad_norm": 0.291015625, "learning_rate": 0.00020307576735374862, "loss": 0.1718, "step": 93510 }, { "epoch": 0.16580450522571785, "grad_norm": 0.365234375, "learning_rate": 0.00020307085285886447, "loss": 0.1672, "step": 93512 }, { "epoch": 0.16580805139102767, "grad_norm": 0.1591796875, "learning_rate": 0.00020306594228662733, "loss": 0.175, "step": 93514 }, { "epoch": 0.16581159755633748, "grad_norm": 0.2021484375, "learning_rate": 0.0002030610356370587, "loss": 0.1662, "step": 93516 }, { "epoch": 0.1658151437216473, "grad_norm": 0.462890625, "learning_rate": 0.00020305613291018022, "loss": 0.162, "step": 93518 }, { "epoch": 0.1658186898869571, "grad_norm": 0.375, "learning_rate": 0.00020305123410601308, "loss": 0.1446, "step": 93520 }, { "epoch": 0.16582223605226692, "grad_norm": 1.203125, "learning_rate": 0.0002030463392245789, "loss": 0.4159, "step": 93522 }, { "epoch": 0.16582578221757674, "grad_norm": 0.63671875, "learning_rate": 0.00020304144826589903, "loss": 0.2905, "step": 93524 }, { "epoch": 0.16582932838288655, "grad_norm": 3.40625, "learning_rate": 0.00020303656122999482, "loss": 0.2552, "step": 93526 }, { "epoch": 0.16583287454819637, "grad_norm": 0.392578125, "learning_rate": 0.00020303167811688773, "loss": 0.1726, "step": 93528 }, { "epoch": 0.16583642071350618, "grad_norm": 0.380859375, "learning_rate": 0.00020302679892659903, "loss": 0.2115, "step": 93530 }, { "epoch": 0.165839966878816, "grad_norm": 0.73046875, "learning_rate": 0.0002030219236591501, "loss": 0.2571, "step": 93532 }, { "epoch": 0.1658435130441258, "grad_norm": 0.404296875, "learning_rate": 0.00020301705231456219, "loss": 0.1704, "step": 93534 }, { "epoch": 0.16584705920943563, "grad_norm": 0.259765625, "learning_rate": 0.0002030121848928567, "loss": 0.2104, "step": 93536 }, { "epoch": 0.16585060537474544, "grad_norm": 0.287109375, "learning_rate": 0.0002030073213940549, "loss": 0.1718, "step": 93538 }, { "epoch": 0.16585415154005526, "grad_norm": 0.58984375, "learning_rate": 0.000203002461818178, "loss": 0.2089, "step": 93540 }, { "epoch": 0.16585769770536507, "grad_norm": 1.2578125, "learning_rate": 0.00020299760616524726, "loss": 0.3056, "step": 93542 }, { "epoch": 0.16586124387067488, "grad_norm": 0.263671875, "learning_rate": 0.00020299275443528398, "loss": 0.157, "step": 93544 }, { "epoch": 0.1658647900359847, "grad_norm": 0.341796875, "learning_rate": 0.00020298790662830944, "loss": 0.1755, "step": 93546 }, { "epoch": 0.16586833620129451, "grad_norm": 0.74609375, "learning_rate": 0.0002029830627443447, "loss": 0.1755, "step": 93548 }, { "epoch": 0.16587188236660436, "grad_norm": 0.267578125, "learning_rate": 0.00020297822278341098, "loss": 0.1958, "step": 93550 }, { "epoch": 0.16587542853191417, "grad_norm": 0.341796875, "learning_rate": 0.0002029733867455294, "loss": 0.1809, "step": 93552 }, { "epoch": 0.16587897469722399, "grad_norm": 0.81640625, "learning_rate": 0.00020296855463072133, "loss": 0.25, "step": 93554 }, { "epoch": 0.1658825208625338, "grad_norm": 0.37890625, "learning_rate": 0.0002029637264390076, "loss": 0.2075, "step": 93556 }, { "epoch": 0.16588606702784361, "grad_norm": 0.1953125, "learning_rate": 0.0002029589021704095, "loss": 0.174, "step": 93558 }, { "epoch": 0.16588961319315343, "grad_norm": 0.34375, "learning_rate": 0.00020295408182494808, "loss": 0.1721, "step": 93560 }, { "epoch": 0.16589315935846324, "grad_norm": 1.2890625, "learning_rate": 0.00020294926540264453, "loss": 0.3884, "step": 93562 }, { "epoch": 0.16589670552377306, "grad_norm": 0.25, "learning_rate": 0.00020294445290351971, "loss": 0.1353, "step": 93564 }, { "epoch": 0.16590025168908287, "grad_norm": 15.6875, "learning_rate": 0.00020293964432759488, "loss": 0.177, "step": 93566 }, { "epoch": 0.1659037978543927, "grad_norm": 0.58984375, "learning_rate": 0.00020293483967489086, "loss": 0.1346, "step": 93568 }, { "epoch": 0.1659073440197025, "grad_norm": 0.52734375, "learning_rate": 0.00020293003894542892, "loss": 0.2264, "step": 93570 }, { "epoch": 0.16591089018501232, "grad_norm": 0.34375, "learning_rate": 0.00020292524213922974, "loss": 0.1982, "step": 93572 }, { "epoch": 0.16591443635032213, "grad_norm": 0.2216796875, "learning_rate": 0.00020292044925631462, "loss": 0.1573, "step": 93574 }, { "epoch": 0.16591798251563195, "grad_norm": 1.3359375, "learning_rate": 0.00020291566029670424, "loss": 0.2494, "step": 93576 }, { "epoch": 0.16592152868094176, "grad_norm": 0.51171875, "learning_rate": 0.00020291087526041985, "loss": 0.1648, "step": 93578 }, { "epoch": 0.16592507484625157, "grad_norm": 0.55859375, "learning_rate": 0.00020290609414748206, "loss": 0.1804, "step": 93580 }, { "epoch": 0.1659286210115614, "grad_norm": 0.345703125, "learning_rate": 0.00020290131695791194, "loss": 0.2076, "step": 93582 }, { "epoch": 0.1659321671768712, "grad_norm": 0.4765625, "learning_rate": 0.00020289654369173033, "loss": 0.2335, "step": 93584 }, { "epoch": 0.16593571334218102, "grad_norm": 0.63671875, "learning_rate": 0.0002028917743489582, "loss": 0.2133, "step": 93586 }, { "epoch": 0.16593925950749083, "grad_norm": 0.578125, "learning_rate": 0.0002028870089296164, "loss": 0.1672, "step": 93588 }, { "epoch": 0.16594280567280065, "grad_norm": 0.890625, "learning_rate": 0.00020288224743372562, "loss": 0.2624, "step": 93590 }, { "epoch": 0.16594635183811046, "grad_norm": 0.388671875, "learning_rate": 0.0002028774898613068, "loss": 0.15, "step": 93592 }, { "epoch": 0.16594989800342028, "grad_norm": 0.224609375, "learning_rate": 0.0002028727362123807, "loss": 0.1949, "step": 93594 }, { "epoch": 0.1659534441687301, "grad_norm": 0.31640625, "learning_rate": 0.00020286798648696822, "loss": 0.1905, "step": 93596 }, { "epoch": 0.1659569903340399, "grad_norm": 0.37890625, "learning_rate": 0.00020286324068508993, "loss": 0.1946, "step": 93598 }, { "epoch": 0.16596053649934972, "grad_norm": 0.91015625, "learning_rate": 0.00020285849880676685, "loss": 0.1945, "step": 93600 }, { "epoch": 0.16596408266465953, "grad_norm": 0.396484375, "learning_rate": 0.0002028537608520195, "loss": 0.1678, "step": 93602 }, { "epoch": 0.16596762882996935, "grad_norm": 1.453125, "learning_rate": 0.00020284902682086872, "loss": 0.4228, "step": 93604 }, { "epoch": 0.16597117499527916, "grad_norm": 0.2265625, "learning_rate": 0.00020284429671333507, "loss": 0.2139, "step": 93606 }, { "epoch": 0.16597472116058898, "grad_norm": 0.703125, "learning_rate": 0.0002028395705294394, "loss": 0.244, "step": 93608 }, { "epoch": 0.1659782673258988, "grad_norm": 0.96875, "learning_rate": 0.00020283484826920237, "loss": 0.2172, "step": 93610 }, { "epoch": 0.1659818134912086, "grad_norm": 0.451171875, "learning_rate": 0.0002028301299326445, "loss": 0.2358, "step": 93612 }, { "epoch": 0.16598535965651842, "grad_norm": 0.353515625, "learning_rate": 0.0002028254155197865, "loss": 0.1526, "step": 93614 }, { "epoch": 0.16598890582182824, "grad_norm": 0.34375, "learning_rate": 0.00020282070503064897, "loss": 0.2913, "step": 93616 }, { "epoch": 0.16599245198713805, "grad_norm": 0.28515625, "learning_rate": 0.00020281599846525263, "loss": 0.1564, "step": 93618 }, { "epoch": 0.16599599815244787, "grad_norm": 0.296875, "learning_rate": 0.00020281129582361797, "loss": 0.1745, "step": 93620 }, { "epoch": 0.16599954431775768, "grad_norm": 0.44140625, "learning_rate": 0.00020280659710576542, "loss": 0.2419, "step": 93622 }, { "epoch": 0.1660030904830675, "grad_norm": 0.388671875, "learning_rate": 0.00020280190231171584, "loss": 0.191, "step": 93624 }, { "epoch": 0.1660066366483773, "grad_norm": 0.6953125, "learning_rate": 0.00020279721144148937, "loss": 0.1823, "step": 93626 }, { "epoch": 0.16601018281368712, "grad_norm": 0.9140625, "learning_rate": 0.00020279252449510688, "loss": 0.1966, "step": 93628 }, { "epoch": 0.16601372897899694, "grad_norm": 0.193359375, "learning_rate": 0.0002027878414725887, "loss": 0.1438, "step": 93630 }, { "epoch": 0.16601727514430675, "grad_norm": 0.275390625, "learning_rate": 0.00020278316237395535, "loss": 0.2315, "step": 93632 }, { "epoch": 0.16602082130961657, "grad_norm": 0.298828125, "learning_rate": 0.00020277848719922728, "loss": 0.1866, "step": 93634 }, { "epoch": 0.16602436747492638, "grad_norm": 0.53125, "learning_rate": 0.00020277381594842493, "loss": 0.2098, "step": 93636 }, { "epoch": 0.1660279136402362, "grad_norm": 0.28515625, "learning_rate": 0.00020276914862156872, "loss": 0.2026, "step": 93638 }, { "epoch": 0.16603145980554604, "grad_norm": 0.44140625, "learning_rate": 0.00020276448521867917, "loss": 0.1429, "step": 93640 }, { "epoch": 0.16603500597085585, "grad_norm": 0.2578125, "learning_rate": 0.00020275982573977647, "loss": 0.2032, "step": 93642 }, { "epoch": 0.16603855213616567, "grad_norm": 0.48828125, "learning_rate": 0.00020275517018488115, "loss": 0.1394, "step": 93644 }, { "epoch": 0.16604209830147548, "grad_norm": 1.828125, "learning_rate": 0.00020275051855401358, "loss": 0.2291, "step": 93646 }, { "epoch": 0.1660456444667853, "grad_norm": 0.423828125, "learning_rate": 0.00020274587084719397, "loss": 0.1563, "step": 93648 }, { "epoch": 0.1660491906320951, "grad_norm": 0.36328125, "learning_rate": 0.00020274122706444284, "loss": 0.1572, "step": 93650 }, { "epoch": 0.16605273679740493, "grad_norm": 0.388671875, "learning_rate": 0.00020273658720578036, "loss": 0.1718, "step": 93652 }, { "epoch": 0.16605628296271474, "grad_norm": 0.265625, "learning_rate": 0.00020273195127122682, "loss": 0.2075, "step": 93654 }, { "epoch": 0.16605982912802456, "grad_norm": 1.2109375, "learning_rate": 0.00020272731926080249, "loss": 0.2596, "step": 93656 }, { "epoch": 0.16606337529333437, "grad_norm": 1.671875, "learning_rate": 0.00020272269117452767, "loss": 0.2868, "step": 93658 }, { "epoch": 0.16606692145864418, "grad_norm": 6.4375, "learning_rate": 0.00020271806701242274, "loss": 0.1917, "step": 93660 }, { "epoch": 0.166070467623954, "grad_norm": 1.9375, "learning_rate": 0.0002027134467745077, "loss": 0.2111, "step": 93662 }, { "epoch": 0.1660740137892638, "grad_norm": 0.62890625, "learning_rate": 0.00020270883046080278, "loss": 0.1627, "step": 93664 }, { "epoch": 0.16607755995457363, "grad_norm": 0.2470703125, "learning_rate": 0.00020270421807132825, "loss": 0.1557, "step": 93666 }, { "epoch": 0.16608110611988344, "grad_norm": 0.46484375, "learning_rate": 0.00020269960960610432, "loss": 0.1497, "step": 93668 }, { "epoch": 0.16608465228519326, "grad_norm": 0.484375, "learning_rate": 0.00020269500506515099, "loss": 0.2354, "step": 93670 }, { "epoch": 0.16608819845050307, "grad_norm": 0.431640625, "learning_rate": 0.00020269040444848853, "loss": 0.152, "step": 93672 }, { "epoch": 0.1660917446158129, "grad_norm": 0.82421875, "learning_rate": 0.0002026858077561371, "loss": 0.1879, "step": 93674 }, { "epoch": 0.1660952907811227, "grad_norm": 0.396484375, "learning_rate": 0.0002026812149881167, "loss": 0.2146, "step": 93676 }, { "epoch": 0.16609883694643252, "grad_norm": 1.09375, "learning_rate": 0.00020267662614444735, "loss": 0.2115, "step": 93678 }, { "epoch": 0.16610238311174233, "grad_norm": 0.49609375, "learning_rate": 0.00020267204122514923, "loss": 0.3181, "step": 93680 }, { "epoch": 0.16610592927705214, "grad_norm": 0.60546875, "learning_rate": 0.00020266746023024241, "loss": 0.2199, "step": 93682 }, { "epoch": 0.16610947544236196, "grad_norm": 0.30859375, "learning_rate": 0.0002026628831597469, "loss": 0.1467, "step": 93684 }, { "epoch": 0.16611302160767177, "grad_norm": 0.412109375, "learning_rate": 0.00020265831001368276, "loss": 0.1615, "step": 93686 }, { "epoch": 0.1661165677729816, "grad_norm": 0.2353515625, "learning_rate": 0.0002026537407920698, "loss": 0.2198, "step": 93688 }, { "epoch": 0.1661201139382914, "grad_norm": 0.2373046875, "learning_rate": 0.00020264917549492816, "loss": 0.1431, "step": 93690 }, { "epoch": 0.16612366010360122, "grad_norm": 0.287109375, "learning_rate": 0.00020264461412227789, "loss": 0.1936, "step": 93692 }, { "epoch": 0.16612720626891103, "grad_norm": 0.2421875, "learning_rate": 0.00020264005667413871, "loss": 0.1883, "step": 93694 }, { "epoch": 0.16613075243422085, "grad_norm": 0.427734375, "learning_rate": 0.00020263550315053073, "loss": 0.2006, "step": 93696 }, { "epoch": 0.16613429859953066, "grad_norm": 0.291015625, "learning_rate": 0.00020263095355147384, "loss": 0.204, "step": 93698 }, { "epoch": 0.16613784476484048, "grad_norm": 0.64453125, "learning_rate": 0.00020262640787698782, "loss": 0.1908, "step": 93700 }, { "epoch": 0.1661413909301503, "grad_norm": 0.6875, "learning_rate": 0.00020262186612709273, "loss": 0.2149, "step": 93702 }, { "epoch": 0.1661449370954601, "grad_norm": 1.46875, "learning_rate": 0.0002026173283018083, "loss": 0.249, "step": 93704 }, { "epoch": 0.16614848326076992, "grad_norm": 0.193359375, "learning_rate": 0.00020261279440115441, "loss": 0.1047, "step": 93706 }, { "epoch": 0.16615202942607973, "grad_norm": 0.83984375, "learning_rate": 0.0002026082644251509, "loss": 0.1767, "step": 93708 }, { "epoch": 0.16615557559138955, "grad_norm": 0.5078125, "learning_rate": 0.00020260373837381745, "loss": 0.1986, "step": 93710 }, { "epoch": 0.16615912175669936, "grad_norm": 1.03125, "learning_rate": 0.00020259921624717411, "loss": 0.3656, "step": 93712 }, { "epoch": 0.16616266792200918, "grad_norm": 0.34375, "learning_rate": 0.00020259469804524054, "loss": 0.1992, "step": 93714 }, { "epoch": 0.166166214087319, "grad_norm": 0.30078125, "learning_rate": 0.00020259018376803648, "loss": 0.3292, "step": 93716 }, { "epoch": 0.1661697602526288, "grad_norm": 1.1953125, "learning_rate": 0.00020258567341558165, "loss": 0.1445, "step": 93718 }, { "epoch": 0.16617330641793862, "grad_norm": 2.046875, "learning_rate": 0.00020258116698789575, "loss": 0.3552, "step": 93720 }, { "epoch": 0.16617685258324844, "grad_norm": 1.40625, "learning_rate": 0.00020257666448499862, "loss": 0.2276, "step": 93722 }, { "epoch": 0.16618039874855825, "grad_norm": 0.267578125, "learning_rate": 0.00020257216590690989, "loss": 0.2063, "step": 93724 }, { "epoch": 0.16618394491386806, "grad_norm": 0.3046875, "learning_rate": 0.00020256767125364922, "loss": 0.1617, "step": 93726 }, { "epoch": 0.16618749107917788, "grad_norm": 0.310546875, "learning_rate": 0.00020256318052523618, "loss": 0.1864, "step": 93728 }, { "epoch": 0.16619103724448772, "grad_norm": 0.318359375, "learning_rate": 0.00020255869372169048, "loss": 0.1821, "step": 93730 }, { "epoch": 0.16619458340979754, "grad_norm": 0.19921875, "learning_rate": 0.0002025542108430318, "loss": 0.2304, "step": 93732 }, { "epoch": 0.16619812957510735, "grad_norm": 0.8984375, "learning_rate": 0.00020254973188927981, "loss": 0.1545, "step": 93734 }, { "epoch": 0.16620167574041717, "grad_norm": 0.6015625, "learning_rate": 0.0002025452568604539, "loss": 0.1508, "step": 93736 }, { "epoch": 0.16620522190572698, "grad_norm": 0.193359375, "learning_rate": 0.00020254078575657368, "loss": 0.1667, "step": 93738 }, { "epoch": 0.1662087680710368, "grad_norm": 0.267578125, "learning_rate": 0.0002025363185776589, "loss": 0.176, "step": 93740 }, { "epoch": 0.1662123142363466, "grad_norm": 0.2314453125, "learning_rate": 0.00020253185532372885, "loss": 0.1677, "step": 93742 }, { "epoch": 0.16621586040165642, "grad_norm": 0.341796875, "learning_rate": 0.00020252739599480315, "loss": 0.1693, "step": 93744 }, { "epoch": 0.16621940656696624, "grad_norm": 0.34765625, "learning_rate": 0.00020252294059090132, "loss": 0.1566, "step": 93746 }, { "epoch": 0.16622295273227605, "grad_norm": 0.458984375, "learning_rate": 0.00020251848911204278, "loss": 0.1644, "step": 93748 }, { "epoch": 0.16622649889758587, "grad_norm": 0.3203125, "learning_rate": 0.00020251404155824715, "loss": 0.1885, "step": 93750 }, { "epoch": 0.16623004506289568, "grad_norm": 0.265625, "learning_rate": 0.00020250959792953373, "loss": 0.2884, "step": 93752 }, { "epoch": 0.1662335912282055, "grad_norm": 0.2021484375, "learning_rate": 0.000202505158225922, "loss": 0.148, "step": 93754 }, { "epoch": 0.1662371373935153, "grad_norm": 0.29296875, "learning_rate": 0.0002025007224474314, "loss": 0.2981, "step": 93756 }, { "epoch": 0.16624068355882513, "grad_norm": 0.380859375, "learning_rate": 0.00020249629059408128, "loss": 0.1569, "step": 93758 }, { "epoch": 0.16624422972413494, "grad_norm": 0.314453125, "learning_rate": 0.00020249186266589107, "loss": 0.1733, "step": 93760 }, { "epoch": 0.16624777588944475, "grad_norm": 0.17578125, "learning_rate": 0.00020248743866288014, "loss": 0.1616, "step": 93762 }, { "epoch": 0.16625132205475457, "grad_norm": 0.76171875, "learning_rate": 0.0002024830185850678, "loss": 0.1839, "step": 93764 }, { "epoch": 0.16625486822006438, "grad_norm": 2.34375, "learning_rate": 0.00020247860243247345, "loss": 0.261, "step": 93766 }, { "epoch": 0.1662584143853742, "grad_norm": 0.390625, "learning_rate": 0.0002024741902051163, "loss": 0.18, "step": 93768 }, { "epoch": 0.166261960550684, "grad_norm": 0.48046875, "learning_rate": 0.00020246978190301563, "loss": 0.2014, "step": 93770 }, { "epoch": 0.16626550671599383, "grad_norm": 0.5546875, "learning_rate": 0.00020246537752619087, "loss": 0.2128, "step": 93772 }, { "epoch": 0.16626905288130364, "grad_norm": 0.275390625, "learning_rate": 0.0002024609770746612, "loss": 0.1214, "step": 93774 }, { "epoch": 0.16627259904661346, "grad_norm": 1.3046875, "learning_rate": 0.00020245658054844585, "loss": 0.2868, "step": 93776 }, { "epoch": 0.16627614521192327, "grad_norm": 0.310546875, "learning_rate": 0.000202452187947564, "loss": 0.1335, "step": 93778 }, { "epoch": 0.16627969137723309, "grad_norm": 0.4375, "learning_rate": 0.00020244779927203503, "loss": 0.2357, "step": 93780 }, { "epoch": 0.1662832375425429, "grad_norm": 0.330078125, "learning_rate": 0.00020244341452187796, "loss": 0.2702, "step": 93782 }, { "epoch": 0.16628678370785271, "grad_norm": 2.296875, "learning_rate": 0.000202439033697112, "loss": 0.2883, "step": 93784 }, { "epoch": 0.16629032987316253, "grad_norm": 0.490234375, "learning_rate": 0.00020243465679775635, "loss": 0.192, "step": 93786 }, { "epoch": 0.16629387603847234, "grad_norm": 0.2578125, "learning_rate": 0.00020243028382383019, "loss": 0.1975, "step": 93788 }, { "epoch": 0.16629742220378216, "grad_norm": 0.62109375, "learning_rate": 0.00020242591477535258, "loss": 0.2163, "step": 93790 }, { "epoch": 0.16630096836909197, "grad_norm": 0.1962890625, "learning_rate": 0.00020242154965234273, "loss": 0.1265, "step": 93792 }, { "epoch": 0.1663045145344018, "grad_norm": 0.2177734375, "learning_rate": 0.00020241718845481952, "loss": 0.175, "step": 93794 }, { "epoch": 0.1663080606997116, "grad_norm": 0.50390625, "learning_rate": 0.0002024128311828022, "loss": 0.1823, "step": 93796 }, { "epoch": 0.16631160686502142, "grad_norm": 0.63671875, "learning_rate": 0.00020240847783630972, "loss": 0.1896, "step": 93798 }, { "epoch": 0.16631515303033123, "grad_norm": 0.4609375, "learning_rate": 0.00020240412841536132, "loss": 0.1349, "step": 93800 }, { "epoch": 0.16631869919564105, "grad_norm": 0.271484375, "learning_rate": 0.00020239978291997578, "loss": 0.1375, "step": 93802 }, { "epoch": 0.16632224536095086, "grad_norm": 0.57421875, "learning_rate": 0.00020239544135017214, "loss": 0.2323, "step": 93804 }, { "epoch": 0.16632579152626067, "grad_norm": 0.3984375, "learning_rate": 0.0002023911037059695, "loss": 0.1463, "step": 93806 }, { "epoch": 0.1663293376915705, "grad_norm": 0.380859375, "learning_rate": 0.00020238676998738682, "loss": 0.1732, "step": 93808 }, { "epoch": 0.1663328838568803, "grad_norm": 0.37890625, "learning_rate": 0.00020238244019444302, "loss": 0.1705, "step": 93810 }, { "epoch": 0.16633643002219012, "grad_norm": 0.53515625, "learning_rate": 0.00020237811432715695, "loss": 0.2358, "step": 93812 }, { "epoch": 0.16633997618749993, "grad_norm": 1.2734375, "learning_rate": 0.00020237379238554766, "loss": 0.298, "step": 93814 }, { "epoch": 0.16634352235280975, "grad_norm": 0.275390625, "learning_rate": 0.00020236947436963403, "loss": 0.1352, "step": 93816 }, { "epoch": 0.16634706851811956, "grad_norm": 0.4140625, "learning_rate": 0.00020236516027943486, "loss": 0.2909, "step": 93818 }, { "epoch": 0.16635061468342938, "grad_norm": 0.56640625, "learning_rate": 0.0002023608501149691, "loss": 0.1686, "step": 93820 }, { "epoch": 0.16635416084873922, "grad_norm": 0.8046875, "learning_rate": 0.0002023565438762555, "loss": 0.2104, "step": 93822 }, { "epoch": 0.16635770701404903, "grad_norm": 0.6875, "learning_rate": 0.00020235224156331307, "loss": 0.169, "step": 93824 }, { "epoch": 0.16636125317935885, "grad_norm": 0.4140625, "learning_rate": 0.00020234794317616037, "loss": 0.1808, "step": 93826 }, { "epoch": 0.16636479934466866, "grad_norm": 2.109375, "learning_rate": 0.00020234364871481652, "loss": 0.2632, "step": 93828 }, { "epoch": 0.16636834550997848, "grad_norm": 2.15625, "learning_rate": 0.00020233935817930008, "loss": 0.2258, "step": 93830 }, { "epoch": 0.1663718916752883, "grad_norm": 0.384765625, "learning_rate": 0.00020233507156962985, "loss": 0.1513, "step": 93832 }, { "epoch": 0.1663754378405981, "grad_norm": 0.263671875, "learning_rate": 0.00020233078888582458, "loss": 0.2755, "step": 93834 }, { "epoch": 0.16637898400590792, "grad_norm": 0.255859375, "learning_rate": 0.000202326510127903, "loss": 0.1187, "step": 93836 }, { "epoch": 0.16638253017121774, "grad_norm": 1.1640625, "learning_rate": 0.0002023222352958838, "loss": 0.1642, "step": 93838 }, { "epoch": 0.16638607633652755, "grad_norm": 0.87890625, "learning_rate": 0.00020231796438978573, "loss": 0.2014, "step": 93840 }, { "epoch": 0.16638962250183736, "grad_norm": 0.267578125, "learning_rate": 0.00020231369740962752, "loss": 0.2143, "step": 93842 }, { "epoch": 0.16639316866714718, "grad_norm": 1.1640625, "learning_rate": 0.00020230943435542767, "loss": 0.2189, "step": 93844 }, { "epoch": 0.166396714832457, "grad_norm": 0.318359375, "learning_rate": 0.000202305175227205, "loss": 0.2419, "step": 93846 }, { "epoch": 0.1664002609977668, "grad_norm": 0.470703125, "learning_rate": 0.00020230092002497798, "loss": 0.2201, "step": 93848 }, { "epoch": 0.16640380716307662, "grad_norm": 1.578125, "learning_rate": 0.00020229666874876535, "loss": 0.361, "step": 93850 }, { "epoch": 0.16640735332838644, "grad_norm": 0.2373046875, "learning_rate": 0.00020229242139858556, "loss": 0.1941, "step": 93852 }, { "epoch": 0.16641089949369625, "grad_norm": 2.015625, "learning_rate": 0.0002022881779744573, "loss": 0.1802, "step": 93854 }, { "epoch": 0.16641444565900607, "grad_norm": 0.23828125, "learning_rate": 0.00020228393847639917, "loss": 0.1732, "step": 93856 }, { "epoch": 0.16641799182431588, "grad_norm": 0.59375, "learning_rate": 0.00020227970290442952, "loss": 0.39, "step": 93858 }, { "epoch": 0.1664215379896257, "grad_norm": 0.18359375, "learning_rate": 0.0002022754712585671, "loss": 0.1357, "step": 93860 }, { "epoch": 0.1664250841549355, "grad_norm": 0.142578125, "learning_rate": 0.0002022712435388302, "loss": 0.1076, "step": 93862 }, { "epoch": 0.16642863032024532, "grad_norm": 0.25, "learning_rate": 0.00020226701974523754, "loss": 0.207, "step": 93864 }, { "epoch": 0.16643217648555514, "grad_norm": 0.515625, "learning_rate": 0.00020226279987780734, "loss": 0.1959, "step": 93866 }, { "epoch": 0.16643572265086495, "grad_norm": 0.220703125, "learning_rate": 0.00020225858393655831, "loss": 0.1743, "step": 93868 }, { "epoch": 0.16643926881617477, "grad_norm": 0.41796875, "learning_rate": 0.0002022543719215087, "loss": 0.1343, "step": 93870 }, { "epoch": 0.16644281498148458, "grad_norm": 0.38671875, "learning_rate": 0.000202250163832677, "loss": 0.199, "step": 93872 }, { "epoch": 0.1664463611467944, "grad_norm": 2.171875, "learning_rate": 0.00020224595967008164, "loss": 0.2128, "step": 93874 }, { "epoch": 0.1664499073121042, "grad_norm": 0.7109375, "learning_rate": 0.0002022417594337409, "loss": 0.1765, "step": 93876 }, { "epoch": 0.16645345347741403, "grad_norm": 0.69921875, "learning_rate": 0.00020223756312367328, "loss": 0.1927, "step": 93878 }, { "epoch": 0.16645699964272384, "grad_norm": 1.2265625, "learning_rate": 0.00020223337073989702, "loss": 0.3677, "step": 93880 }, { "epoch": 0.16646054580803366, "grad_norm": 1.4609375, "learning_rate": 0.00020222918228243058, "loss": 0.2685, "step": 93882 }, { "epoch": 0.16646409197334347, "grad_norm": 0.3828125, "learning_rate": 0.00020222499775129218, "loss": 0.1853, "step": 93884 }, { "epoch": 0.16646763813865328, "grad_norm": 0.7109375, "learning_rate": 0.00020222081714650015, "loss": 0.2009, "step": 93886 }, { "epoch": 0.1664711843039631, "grad_norm": 0.208984375, "learning_rate": 0.0002022166404680728, "loss": 0.2029, "step": 93888 }, { "epoch": 0.1664747304692729, "grad_norm": 0.7890625, "learning_rate": 0.00020221246771602832, "loss": 0.1898, "step": 93890 }, { "epoch": 0.16647827663458273, "grad_norm": 0.392578125, "learning_rate": 0.00020220829889038515, "loss": 0.163, "step": 93892 }, { "epoch": 0.16648182279989254, "grad_norm": 0.306640625, "learning_rate": 0.00020220413399116123, "loss": 0.1416, "step": 93894 }, { "epoch": 0.16648536896520236, "grad_norm": 0.431640625, "learning_rate": 0.0002021999730183749, "loss": 0.1764, "step": 93896 }, { "epoch": 0.16648891513051217, "grad_norm": 0.361328125, "learning_rate": 0.00020219581597204453, "loss": 0.1889, "step": 93898 }, { "epoch": 0.166492461295822, "grad_norm": 0.431640625, "learning_rate": 0.00020219166285218802, "loss": 0.2113, "step": 93900 }, { "epoch": 0.1664960074611318, "grad_norm": 0.1748046875, "learning_rate": 0.00020218751365882375, "loss": 0.1865, "step": 93902 }, { "epoch": 0.16649955362644162, "grad_norm": 0.35546875, "learning_rate": 0.00020218336839196977, "loss": 0.1489, "step": 93904 }, { "epoch": 0.16650309979175143, "grad_norm": 0.1884765625, "learning_rate": 0.00020217922705164417, "loss": 0.1607, "step": 93906 }, { "epoch": 0.16650664595706124, "grad_norm": 0.24609375, "learning_rate": 0.0002021750896378652, "loss": 0.2175, "step": 93908 }, { "epoch": 0.16651019212237106, "grad_norm": 1.0703125, "learning_rate": 0.00020217095615065079, "loss": 0.1789, "step": 93910 }, { "epoch": 0.1665137382876809, "grad_norm": 0.52734375, "learning_rate": 0.00020216682659001913, "loss": 0.1564, "step": 93912 }, { "epoch": 0.16651728445299072, "grad_norm": 0.251953125, "learning_rate": 0.00020216270095598825, "loss": 0.1535, "step": 93914 }, { "epoch": 0.16652083061830053, "grad_norm": 0.1796875, "learning_rate": 0.00020215857924857616, "loss": 0.2452, "step": 93916 }, { "epoch": 0.16652437678361035, "grad_norm": 0.2041015625, "learning_rate": 0.00020215446146780092, "loss": 0.2016, "step": 93918 }, { "epoch": 0.16652792294892016, "grad_norm": 0.45703125, "learning_rate": 0.00020215034761368055, "loss": 0.1983, "step": 93920 }, { "epoch": 0.16653146911422997, "grad_norm": 0.328125, "learning_rate": 0.00020214623768623308, "loss": 0.1902, "step": 93922 }, { "epoch": 0.1665350152795398, "grad_norm": 1.15625, "learning_rate": 0.00020214213168547637, "loss": 0.1782, "step": 93924 }, { "epoch": 0.1665385614448496, "grad_norm": 0.36328125, "learning_rate": 0.00020213802961142844, "loss": 0.2214, "step": 93926 }, { "epoch": 0.16654210761015942, "grad_norm": 1.4140625, "learning_rate": 0.00020213393146410725, "loss": 0.2311, "step": 93928 }, { "epoch": 0.16654565377546923, "grad_norm": 1.0625, "learning_rate": 0.0002021298372435307, "loss": 0.187, "step": 93930 }, { "epoch": 0.16654919994077905, "grad_norm": 0.83984375, "learning_rate": 0.00020212574694971673, "loss": 0.2946, "step": 93932 }, { "epoch": 0.16655274610608886, "grad_norm": 0.875, "learning_rate": 0.00020212166058268308, "loss": 0.181, "step": 93934 }, { "epoch": 0.16655629227139868, "grad_norm": 0.55859375, "learning_rate": 0.0002021175781424478, "loss": 0.5336, "step": 93936 }, { "epoch": 0.1665598384367085, "grad_norm": 1.515625, "learning_rate": 0.00020211349962902874, "loss": 0.2272, "step": 93938 }, { "epoch": 0.1665633846020183, "grad_norm": 0.515625, "learning_rate": 0.00020210942504244358, "loss": 0.1442, "step": 93940 }, { "epoch": 0.16656693076732812, "grad_norm": 0.2734375, "learning_rate": 0.0002021053543827103, "loss": 0.2155, "step": 93942 }, { "epoch": 0.16657047693263793, "grad_norm": 0.62109375, "learning_rate": 0.00020210128764984665, "loss": 0.247, "step": 93944 }, { "epoch": 0.16657402309794775, "grad_norm": 0.26171875, "learning_rate": 0.0002020972248438704, "loss": 0.1478, "step": 93946 }, { "epoch": 0.16657756926325756, "grad_norm": 3.390625, "learning_rate": 0.00020209316596479934, "loss": 0.305, "step": 93948 }, { "epoch": 0.16658111542856738, "grad_norm": 0.671875, "learning_rate": 0.0002020891110126512, "loss": 0.2331, "step": 93950 }, { "epoch": 0.1665846615938772, "grad_norm": 0.9140625, "learning_rate": 0.00020208505998744357, "loss": 0.2053, "step": 93952 }, { "epoch": 0.166588207759187, "grad_norm": 0.4609375, "learning_rate": 0.00020208101288919446, "loss": 0.1752, "step": 93954 }, { "epoch": 0.16659175392449682, "grad_norm": 3.078125, "learning_rate": 0.0002020769697179215, "loss": 0.2999, "step": 93956 }, { "epoch": 0.16659530008980664, "grad_norm": 0.3671875, "learning_rate": 0.00020207293047364215, "loss": 0.1917, "step": 93958 }, { "epoch": 0.16659884625511645, "grad_norm": 0.50390625, "learning_rate": 0.0002020688951563743, "loss": 0.3937, "step": 93960 }, { "epoch": 0.16660239242042627, "grad_norm": 0.453125, "learning_rate": 0.00020206486376613548, "loss": 0.1782, "step": 93962 }, { "epoch": 0.16660593858573608, "grad_norm": 0.28515625, "learning_rate": 0.00020206083630294343, "loss": 0.2755, "step": 93964 }, { "epoch": 0.1666094847510459, "grad_norm": 0.1923828125, "learning_rate": 0.00020205681276681565, "loss": 0.1786, "step": 93966 }, { "epoch": 0.1666130309163557, "grad_norm": 0.8125, "learning_rate": 0.00020205279315776972, "loss": 0.1675, "step": 93968 }, { "epoch": 0.16661657708166552, "grad_norm": 0.39453125, "learning_rate": 0.00020204877747582336, "loss": 0.1898, "step": 93970 }, { "epoch": 0.16662012324697534, "grad_norm": 0.29296875, "learning_rate": 0.00020204476572099404, "loss": 0.1657, "step": 93972 }, { "epoch": 0.16662366941228515, "grad_norm": 1.71875, "learning_rate": 0.00020204075789329932, "loss": 0.1665, "step": 93974 }, { "epoch": 0.16662721557759497, "grad_norm": 1.015625, "learning_rate": 0.0002020367539927568, "loss": 0.2169, "step": 93976 }, { "epoch": 0.16663076174290478, "grad_norm": 1.0, "learning_rate": 0.00020203275401938376, "loss": 0.2072, "step": 93978 }, { "epoch": 0.1666343079082146, "grad_norm": 0.326171875, "learning_rate": 0.00020202875797319803, "loss": 0.1944, "step": 93980 }, { "epoch": 0.1666378540735244, "grad_norm": 0.353515625, "learning_rate": 0.00020202476585421683, "loss": 0.1556, "step": 93982 }, { "epoch": 0.16664140023883423, "grad_norm": 0.494140625, "learning_rate": 0.0002020207776624576, "loss": 0.2134, "step": 93984 }, { "epoch": 0.16664494640414404, "grad_norm": 0.1923828125, "learning_rate": 0.000202016793397938, "loss": 0.1567, "step": 93986 }, { "epoch": 0.16664849256945385, "grad_norm": 0.73828125, "learning_rate": 0.0002020128130606753, "loss": 0.1653, "step": 93988 }, { "epoch": 0.16665203873476367, "grad_norm": 0.2119140625, "learning_rate": 0.000202008836650687, "loss": 0.2451, "step": 93990 }, { "epoch": 0.16665558490007348, "grad_norm": 1.71875, "learning_rate": 0.00020200486416799033, "loss": 0.2337, "step": 93992 }, { "epoch": 0.1666591310653833, "grad_norm": 0.86328125, "learning_rate": 0.00020200089561260283, "loss": 0.152, "step": 93994 }, { "epoch": 0.1666626772306931, "grad_norm": 0.326171875, "learning_rate": 0.00020199693098454176, "loss": 0.1652, "step": 93996 }, { "epoch": 0.16666622339600293, "grad_norm": 0.1865234375, "learning_rate": 0.00020199297028382457, "loss": 0.1848, "step": 93998 }, { "epoch": 0.16666976956131274, "grad_norm": 0.384765625, "learning_rate": 0.0002019890135104684, "loss": 0.1997, "step": 94000 }, { "epoch": 0.16667331572662256, "grad_norm": 0.86328125, "learning_rate": 0.00020198506066449067, "loss": 0.1768, "step": 94002 }, { "epoch": 0.1666768618919324, "grad_norm": 1.6015625, "learning_rate": 0.00020198111174590873, "loss": 0.2316, "step": 94004 }, { "epoch": 0.1666804080572422, "grad_norm": 0.3046875, "learning_rate": 0.0002019771667547397, "loss": 0.1936, "step": 94006 }, { "epoch": 0.16668395422255203, "grad_norm": 0.287109375, "learning_rate": 0.00020197322569100092, "loss": 0.198, "step": 94008 }, { "epoch": 0.16668750038786184, "grad_norm": 0.296875, "learning_rate": 0.00020196928855470964, "loss": 0.1305, "step": 94010 }, { "epoch": 0.16669104655317166, "grad_norm": 0.1787109375, "learning_rate": 0.0002019653553458831, "loss": 0.2143, "step": 94012 }, { "epoch": 0.16669459271848147, "grad_norm": 3.84375, "learning_rate": 0.00020196142606453843, "loss": 0.2335, "step": 94014 }, { "epoch": 0.16669813888379129, "grad_norm": 2.765625, "learning_rate": 0.00020195750071069275, "loss": 0.1962, "step": 94016 }, { "epoch": 0.1667016850491011, "grad_norm": 0.7578125, "learning_rate": 0.0002019535792843634, "loss": 0.1745, "step": 94018 }, { "epoch": 0.16670523121441091, "grad_norm": 0.828125, "learning_rate": 0.00020194966178556744, "loss": 0.1612, "step": 94020 }, { "epoch": 0.16670877737972073, "grad_norm": 0.294921875, "learning_rate": 0.00020194574821432207, "loss": 0.1976, "step": 94022 }, { "epoch": 0.16671232354503054, "grad_norm": 2.6875, "learning_rate": 0.00020194183857064426, "loss": 0.2938, "step": 94024 }, { "epoch": 0.16671586971034036, "grad_norm": 0.333984375, "learning_rate": 0.00020193793285455122, "loss": 0.1527, "step": 94026 }, { "epoch": 0.16671941587565017, "grad_norm": 0.71484375, "learning_rate": 0.00020193403106605993, "loss": 0.2019, "step": 94028 }, { "epoch": 0.16672296204096, "grad_norm": 0.5078125, "learning_rate": 0.00020193013320518767, "loss": 0.1454, "step": 94030 }, { "epoch": 0.1667265082062698, "grad_norm": 1.6875, "learning_rate": 0.0002019262392719512, "loss": 0.1991, "step": 94032 }, { "epoch": 0.16673005437157962, "grad_norm": 0.84375, "learning_rate": 0.00020192234926636775, "loss": 0.1738, "step": 94034 }, { "epoch": 0.16673360053688943, "grad_norm": 0.154296875, "learning_rate": 0.00020191846318845427, "loss": 0.2046, "step": 94036 }, { "epoch": 0.16673714670219925, "grad_norm": 0.408203125, "learning_rate": 0.0002019145810382279, "loss": 0.1894, "step": 94038 }, { "epoch": 0.16674069286750906, "grad_norm": 3.921875, "learning_rate": 0.00020191070281570537, "loss": 0.1895, "step": 94040 }, { "epoch": 0.16674423903281888, "grad_norm": 0.640625, "learning_rate": 0.0002019068285209037, "loss": 0.2104, "step": 94042 }, { "epoch": 0.1667477851981287, "grad_norm": 0.55859375, "learning_rate": 0.00020190295815383997, "loss": 0.4116, "step": 94044 }, { "epoch": 0.1667513313634385, "grad_norm": 0.373046875, "learning_rate": 0.00020189909171453095, "loss": 0.2086, "step": 94046 }, { "epoch": 0.16675487752874832, "grad_norm": 0.189453125, "learning_rate": 0.00020189522920299372, "loss": 0.1762, "step": 94048 }, { "epoch": 0.16675842369405813, "grad_norm": 0.6640625, "learning_rate": 0.000201891370619245, "loss": 0.2381, "step": 94050 }, { "epoch": 0.16676196985936795, "grad_norm": 0.28515625, "learning_rate": 0.00020188751596330177, "loss": 0.1913, "step": 94052 }, { "epoch": 0.16676551602467776, "grad_norm": 0.6640625, "learning_rate": 0.0002018836652351808, "loss": 0.1888, "step": 94054 }, { "epoch": 0.16676906218998758, "grad_norm": 0.310546875, "learning_rate": 0.00020187981843489909, "loss": 0.1944, "step": 94056 }, { "epoch": 0.1667726083552974, "grad_norm": 0.1923828125, "learning_rate": 0.0002018759755624733, "loss": 0.1616, "step": 94058 }, { "epoch": 0.1667761545206072, "grad_norm": 0.98046875, "learning_rate": 0.00020187213661792037, "loss": 0.16, "step": 94060 }, { "epoch": 0.16677970068591702, "grad_norm": 0.228515625, "learning_rate": 0.00020186830160125695, "loss": 0.1807, "step": 94062 }, { "epoch": 0.16678324685122684, "grad_norm": 1.03125, "learning_rate": 0.0002018644705124999, "loss": 0.2166, "step": 94064 }, { "epoch": 0.16678679301653665, "grad_norm": 1.6328125, "learning_rate": 0.00020186064335166596, "loss": 0.1614, "step": 94066 }, { "epoch": 0.16679033918184646, "grad_norm": 0.283203125, "learning_rate": 0.00020185682011877187, "loss": 0.1668, "step": 94068 }, { "epoch": 0.16679388534715628, "grad_norm": 0.4375, "learning_rate": 0.00020185300081383437, "loss": 0.2131, "step": 94070 }, { "epoch": 0.1667974315124661, "grad_norm": 0.3046875, "learning_rate": 0.00020184918543687008, "loss": 0.1529, "step": 94072 }, { "epoch": 0.1668009776777759, "grad_norm": 0.400390625, "learning_rate": 0.00020184537398789578, "loss": 0.1777, "step": 94074 }, { "epoch": 0.16680452384308572, "grad_norm": 2.84375, "learning_rate": 0.00020184156646692805, "loss": 0.1727, "step": 94076 }, { "epoch": 0.16680807000839554, "grad_norm": 1.03125, "learning_rate": 0.00020183776287398364, "loss": 0.2139, "step": 94078 }, { "epoch": 0.16681161617370535, "grad_norm": 0.65234375, "learning_rate": 0.00020183396320907923, "loss": 0.2179, "step": 94080 }, { "epoch": 0.16681516233901517, "grad_norm": 0.27734375, "learning_rate": 0.00020183016747223122, "loss": 0.1435, "step": 94082 }, { "epoch": 0.16681870850432498, "grad_norm": 0.703125, "learning_rate": 0.00020182637566345632, "loss": 0.255, "step": 94084 }, { "epoch": 0.1668222546696348, "grad_norm": 0.392578125, "learning_rate": 0.0002018225877827712, "loss": 0.2013, "step": 94086 }, { "epoch": 0.1668258008349446, "grad_norm": 0.59765625, "learning_rate": 0.00020181880383019244, "loss": 0.257, "step": 94088 }, { "epoch": 0.16682934700025442, "grad_norm": 0.26953125, "learning_rate": 0.0002018150238057364, "loss": 0.1829, "step": 94090 }, { "epoch": 0.16683289316556424, "grad_norm": 0.447265625, "learning_rate": 0.00020181124770941966, "loss": 0.1934, "step": 94092 }, { "epoch": 0.16683643933087408, "grad_norm": 0.76953125, "learning_rate": 0.00020180747554125893, "loss": 0.2048, "step": 94094 }, { "epoch": 0.1668399854961839, "grad_norm": 0.890625, "learning_rate": 0.0002018037073012705, "loss": 0.188, "step": 94096 }, { "epoch": 0.1668435316614937, "grad_norm": 0.357421875, "learning_rate": 0.00020179994298947092, "loss": 0.1545, "step": 94098 }, { "epoch": 0.16684707782680352, "grad_norm": 0.84765625, "learning_rate": 0.00020179618260587662, "loss": 0.184, "step": 94100 }, { "epoch": 0.16685062399211334, "grad_norm": 0.5390625, "learning_rate": 0.00020179242615050413, "loss": 0.189, "step": 94102 }, { "epoch": 0.16685417015742315, "grad_norm": 0.3984375, "learning_rate": 0.00020178867362336984, "loss": 0.1945, "step": 94104 }, { "epoch": 0.16685771632273297, "grad_norm": 0.310546875, "learning_rate": 0.0002017849250244901, "loss": 0.1488, "step": 94106 }, { "epoch": 0.16686126248804278, "grad_norm": 0.4609375, "learning_rate": 0.00020178118035388132, "loss": 0.166, "step": 94108 }, { "epoch": 0.1668648086533526, "grad_norm": 1.1328125, "learning_rate": 0.00020177743961156003, "loss": 0.1936, "step": 94110 }, { "epoch": 0.1668683548186624, "grad_norm": 1.9140625, "learning_rate": 0.0002017737027975424, "loss": 0.131, "step": 94112 }, { "epoch": 0.16687190098397223, "grad_norm": 0.58203125, "learning_rate": 0.00020176996991184485, "loss": 0.1763, "step": 94114 }, { "epoch": 0.16687544714928204, "grad_norm": 0.392578125, "learning_rate": 0.00020176624095448364, "loss": 0.1815, "step": 94116 }, { "epoch": 0.16687899331459186, "grad_norm": 0.462890625, "learning_rate": 0.0002017625159254752, "loss": 0.1488, "step": 94118 }, { "epoch": 0.16688253947990167, "grad_norm": 0.23828125, "learning_rate": 0.00020175879482483576, "loss": 0.2591, "step": 94120 }, { "epoch": 0.16688608564521148, "grad_norm": 0.482421875, "learning_rate": 0.00020175507765258157, "loss": 0.1786, "step": 94122 }, { "epoch": 0.1668896318105213, "grad_norm": 0.439453125, "learning_rate": 0.00020175136440872898, "loss": 0.2739, "step": 94124 }, { "epoch": 0.16689317797583111, "grad_norm": 0.283203125, "learning_rate": 0.00020174765509329407, "loss": 0.1367, "step": 94126 }, { "epoch": 0.16689672414114093, "grad_norm": 0.33984375, "learning_rate": 0.00020174394970629315, "loss": 0.2205, "step": 94128 }, { "epoch": 0.16690027030645074, "grad_norm": 0.38671875, "learning_rate": 0.0002017402482477425, "loss": 0.1369, "step": 94130 }, { "epoch": 0.16690381647176056, "grad_norm": 0.474609375, "learning_rate": 0.00020173655071765808, "loss": 0.197, "step": 94132 }, { "epoch": 0.16690736263707037, "grad_norm": 0.353515625, "learning_rate": 0.00020173285711605642, "loss": 0.1415, "step": 94134 }, { "epoch": 0.1669109088023802, "grad_norm": 0.26171875, "learning_rate": 0.00020172916744295335, "loss": 0.3169, "step": 94136 }, { "epoch": 0.16691445496769, "grad_norm": 0.46484375, "learning_rate": 0.00020172548169836513, "loss": 0.2533, "step": 94138 }, { "epoch": 0.16691800113299982, "grad_norm": 0.2119140625, "learning_rate": 0.0002017217998823078, "loss": 0.1679, "step": 94140 }, { "epoch": 0.16692154729830963, "grad_norm": 0.98828125, "learning_rate": 0.00020171812199479763, "loss": 0.1837, "step": 94142 }, { "epoch": 0.16692509346361945, "grad_norm": 0.5, "learning_rate": 0.00020171444803585058, "loss": 0.2219, "step": 94144 }, { "epoch": 0.16692863962892926, "grad_norm": 0.330078125, "learning_rate": 0.00020171077800548276, "loss": 0.1958, "step": 94146 }, { "epoch": 0.16693218579423907, "grad_norm": 0.470703125, "learning_rate": 0.00020170711190371023, "loss": 0.2288, "step": 94148 }, { "epoch": 0.1669357319595489, "grad_norm": 0.357421875, "learning_rate": 0.000201703449730549, "loss": 0.1842, "step": 94150 }, { "epoch": 0.1669392781248587, "grad_norm": 0.1845703125, "learning_rate": 0.00020169979148601508, "loss": 0.192, "step": 94152 }, { "epoch": 0.16694282429016852, "grad_norm": 0.2177734375, "learning_rate": 0.0002016961371701244, "loss": 0.1794, "step": 94154 }, { "epoch": 0.16694637045547833, "grad_norm": 0.482421875, "learning_rate": 0.00020169248678289308, "loss": 0.1783, "step": 94156 }, { "epoch": 0.16694991662078815, "grad_norm": 0.625, "learning_rate": 0.0002016888403243371, "loss": 0.1452, "step": 94158 }, { "epoch": 0.16695346278609796, "grad_norm": 1.046875, "learning_rate": 0.00020168519779447227, "loss": 0.1998, "step": 94160 }, { "epoch": 0.16695700895140778, "grad_norm": 0.2236328125, "learning_rate": 0.00020168155919331448, "loss": 0.2044, "step": 94162 }, { "epoch": 0.1669605551167176, "grad_norm": 1.2109375, "learning_rate": 0.00020167792452087984, "loss": 0.2755, "step": 94164 }, { "epoch": 0.1669641012820274, "grad_norm": 0.314453125, "learning_rate": 0.00020167429377718416, "loss": 0.1904, "step": 94166 }, { "epoch": 0.16696764744733722, "grad_norm": 0.63671875, "learning_rate": 0.00020167066696224329, "loss": 0.2071, "step": 94168 }, { "epoch": 0.16697119361264703, "grad_norm": 1.0390625, "learning_rate": 0.00020166704407607313, "loss": 0.3857, "step": 94170 }, { "epoch": 0.16697473977795685, "grad_norm": 0.60546875, "learning_rate": 0.00020166342511868945, "loss": 0.2088, "step": 94172 }, { "epoch": 0.16697828594326666, "grad_norm": 0.515625, "learning_rate": 0.00020165981009010828, "loss": 0.1602, "step": 94174 }, { "epoch": 0.16698183210857648, "grad_norm": 0.447265625, "learning_rate": 0.00020165619899034518, "loss": 0.2246, "step": 94176 }, { "epoch": 0.1669853782738863, "grad_norm": 0.53515625, "learning_rate": 0.00020165259181941604, "loss": 0.2148, "step": 94178 }, { "epoch": 0.1669889244391961, "grad_norm": 0.671875, "learning_rate": 0.0002016489885773366, "loss": 0.1682, "step": 94180 }, { "epoch": 0.16699247060450592, "grad_norm": 0.43359375, "learning_rate": 0.00020164538926412272, "loss": 0.1928, "step": 94182 }, { "epoch": 0.16699601676981576, "grad_norm": 0.65234375, "learning_rate": 0.0002016417938797901, "loss": 0.2042, "step": 94184 }, { "epoch": 0.16699956293512558, "grad_norm": 0.400390625, "learning_rate": 0.00020163820242435432, "loss": 0.1686, "step": 94186 }, { "epoch": 0.1670031091004354, "grad_norm": 0.6875, "learning_rate": 0.0002016346148978314, "loss": 0.1467, "step": 94188 }, { "epoch": 0.1670066552657452, "grad_norm": 1.15625, "learning_rate": 0.00020163103130023673, "loss": 0.2456, "step": 94190 }, { "epoch": 0.16701020143105502, "grad_norm": 3.1875, "learning_rate": 0.00020162745163158612, "loss": 0.242, "step": 94192 }, { "epoch": 0.16701374759636484, "grad_norm": 0.337890625, "learning_rate": 0.00020162387589189518, "loss": 0.1302, "step": 94194 }, { "epoch": 0.16701729376167465, "grad_norm": 1.015625, "learning_rate": 0.00020162030408117957, "loss": 0.3031, "step": 94196 }, { "epoch": 0.16702083992698447, "grad_norm": 0.306640625, "learning_rate": 0.0002016167361994548, "loss": 0.2102, "step": 94198 }, { "epoch": 0.16702438609229428, "grad_norm": 0.275390625, "learning_rate": 0.00020161317224673677, "loss": 0.1766, "step": 94200 }, { "epoch": 0.1670279322576041, "grad_norm": 0.6171875, "learning_rate": 0.00020160961222304085, "loss": 0.1553, "step": 94202 }, { "epoch": 0.1670314784229139, "grad_norm": 0.412109375, "learning_rate": 0.0002016060561283826, "loss": 0.2194, "step": 94204 }, { "epoch": 0.16703502458822372, "grad_norm": 0.462890625, "learning_rate": 0.00020160250396277751, "loss": 0.2647, "step": 94206 }, { "epoch": 0.16703857075353354, "grad_norm": 0.224609375, "learning_rate": 0.00020159895572624136, "loss": 0.1825, "step": 94208 }, { "epoch": 0.16704211691884335, "grad_norm": 0.73828125, "learning_rate": 0.00020159541141878952, "loss": 0.1838, "step": 94210 }, { "epoch": 0.16704566308415317, "grad_norm": 0.578125, "learning_rate": 0.0002015918710404374, "loss": 0.2077, "step": 94212 }, { "epoch": 0.16704920924946298, "grad_norm": 0.427734375, "learning_rate": 0.0002015883345912007, "loss": 0.1523, "step": 94214 }, { "epoch": 0.1670527554147728, "grad_norm": 0.2333984375, "learning_rate": 0.00020158480207109464, "loss": 0.2026, "step": 94216 }, { "epoch": 0.1670563015800826, "grad_norm": 0.1962890625, "learning_rate": 0.00020158127348013483, "loss": 0.1551, "step": 94218 }, { "epoch": 0.16705984774539243, "grad_norm": 0.302734375, "learning_rate": 0.00020157774881833667, "loss": 0.279, "step": 94220 }, { "epoch": 0.16706339391070224, "grad_norm": 0.451171875, "learning_rate": 0.00020157422808571553, "loss": 0.163, "step": 94222 }, { "epoch": 0.16706694007601205, "grad_norm": 1.2890625, "learning_rate": 0.00020157071128228699, "loss": 0.2914, "step": 94224 }, { "epoch": 0.16707048624132187, "grad_norm": 1.6875, "learning_rate": 0.00020156719840806617, "loss": 0.2892, "step": 94226 }, { "epoch": 0.16707403240663168, "grad_norm": 0.56640625, "learning_rate": 0.0002015636894630686, "loss": 0.2071, "step": 94228 }, { "epoch": 0.1670775785719415, "grad_norm": 0.72265625, "learning_rate": 0.00020156018444730955, "loss": 0.2232, "step": 94230 }, { "epoch": 0.1670811247372513, "grad_norm": 0.34765625, "learning_rate": 0.00020155668336080434, "loss": 0.1621, "step": 94232 }, { "epoch": 0.16708467090256113, "grad_norm": 0.310546875, "learning_rate": 0.00020155318620356847, "loss": 0.128, "step": 94234 }, { "epoch": 0.16708821706787094, "grad_norm": 0.48046875, "learning_rate": 0.00020154969297561694, "loss": 0.2147, "step": 94236 }, { "epoch": 0.16709176323318076, "grad_norm": 1.4765625, "learning_rate": 0.00020154620367696525, "loss": 0.2059, "step": 94238 }, { "epoch": 0.16709530939849057, "grad_norm": 4.15625, "learning_rate": 0.0002015427183076285, "loss": 0.3357, "step": 94240 }, { "epoch": 0.16709885556380039, "grad_norm": 0.65625, "learning_rate": 0.00020153923686762212, "loss": 0.338, "step": 94242 }, { "epoch": 0.1671024017291102, "grad_norm": 0.46875, "learning_rate": 0.0002015357593569612, "loss": 0.2735, "step": 94244 }, { "epoch": 0.16710594789442001, "grad_norm": 0.7421875, "learning_rate": 0.0002015322857756609, "loss": 0.1844, "step": 94246 }, { "epoch": 0.16710949405972983, "grad_norm": 0.57421875, "learning_rate": 0.00020152881612373652, "loss": 0.2107, "step": 94248 }, { "epoch": 0.16711304022503964, "grad_norm": 1.03125, "learning_rate": 0.00020152535040120326, "loss": 0.271, "step": 94250 }, { "epoch": 0.16711658639034946, "grad_norm": 0.474609375, "learning_rate": 0.00020152188860807612, "loss": 0.1464, "step": 94252 }, { "epoch": 0.16712013255565927, "grad_norm": 0.5234375, "learning_rate": 0.00020151843074437043, "loss": 0.1834, "step": 94254 }, { "epoch": 0.1671236787209691, "grad_norm": 0.353515625, "learning_rate": 0.00020151497681010114, "loss": 0.1971, "step": 94256 }, { "epoch": 0.1671272248862789, "grad_norm": 0.470703125, "learning_rate": 0.00020151152680528362, "loss": 0.2845, "step": 94258 }, { "epoch": 0.16713077105158872, "grad_norm": 0.51171875, "learning_rate": 0.00020150808072993265, "loss": 0.1714, "step": 94260 }, { "epoch": 0.16713431721689853, "grad_norm": 0.1669921875, "learning_rate": 0.0002015046385840634, "loss": 0.1621, "step": 94262 }, { "epoch": 0.16713786338220835, "grad_norm": 0.23828125, "learning_rate": 0.00020150120036769098, "loss": 0.165, "step": 94264 }, { "epoch": 0.16714140954751816, "grad_norm": 0.66015625, "learning_rate": 0.0002014977660808304, "loss": 0.1575, "step": 94266 }, { "epoch": 0.16714495571282798, "grad_norm": 0.5, "learning_rate": 0.0002014943357234967, "loss": 0.1308, "step": 94268 }, { "epoch": 0.1671485018781378, "grad_norm": 1.09375, "learning_rate": 0.0002014909092957048, "loss": 0.1975, "step": 94270 }, { "epoch": 0.1671520480434476, "grad_norm": 0.353515625, "learning_rate": 0.0002014874867974698, "loss": 0.201, "step": 94272 }, { "epoch": 0.16715559420875742, "grad_norm": 0.91796875, "learning_rate": 0.00020148406822880665, "loss": 0.2286, "step": 94274 }, { "epoch": 0.16715914037406726, "grad_norm": 0.69921875, "learning_rate": 0.00020148065358973025, "loss": 0.1779, "step": 94276 }, { "epoch": 0.16716268653937708, "grad_norm": 0.54296875, "learning_rate": 0.00020147724288025552, "loss": 0.2111, "step": 94278 }, { "epoch": 0.1671662327046869, "grad_norm": 0.21484375, "learning_rate": 0.00020147383610039739, "loss": 0.2044, "step": 94280 }, { "epoch": 0.1671697788699967, "grad_norm": 1.28125, "learning_rate": 0.00020147043325017077, "loss": 0.2509, "step": 94282 }, { "epoch": 0.16717332503530652, "grad_norm": 0.796875, "learning_rate": 0.0002014670343295906, "loss": 0.1524, "step": 94284 }, { "epoch": 0.16717687120061633, "grad_norm": 0.3984375, "learning_rate": 0.0002014636393386716, "loss": 0.1959, "step": 94286 }, { "epoch": 0.16718041736592615, "grad_norm": 0.7265625, "learning_rate": 0.00020146024827742882, "loss": 0.1427, "step": 94288 }, { "epoch": 0.16718396353123596, "grad_norm": 0.251953125, "learning_rate": 0.00020145686114587697, "loss": 0.1962, "step": 94290 }, { "epoch": 0.16718750969654578, "grad_norm": 0.328125, "learning_rate": 0.0002014534779440308, "loss": 0.2091, "step": 94292 }, { "epoch": 0.1671910558618556, "grad_norm": 0.294921875, "learning_rate": 0.00020145009867190516, "loss": 0.213, "step": 94294 }, { "epoch": 0.1671946020271654, "grad_norm": 0.5234375, "learning_rate": 0.0002014467233295149, "loss": 0.206, "step": 94296 }, { "epoch": 0.16719814819247522, "grad_norm": 0.392578125, "learning_rate": 0.0002014433519168747, "loss": 0.1904, "step": 94298 }, { "epoch": 0.16720169435778504, "grad_norm": 0.2119140625, "learning_rate": 0.00020143998443399936, "loss": 0.2238, "step": 94300 }, { "epoch": 0.16720524052309485, "grad_norm": 1.2578125, "learning_rate": 0.00020143662088090362, "loss": 0.1609, "step": 94302 }, { "epoch": 0.16720878668840466, "grad_norm": 0.29296875, "learning_rate": 0.00020143326125760216, "loss": 0.2117, "step": 94304 }, { "epoch": 0.16721233285371448, "grad_norm": 0.82421875, "learning_rate": 0.00020142990556410958, "loss": 0.2145, "step": 94306 }, { "epoch": 0.1672158790190243, "grad_norm": 1.0859375, "learning_rate": 0.00020142655380044077, "loss": 0.1964, "step": 94308 }, { "epoch": 0.1672194251843341, "grad_norm": 0.267578125, "learning_rate": 0.0002014232059666101, "loss": 0.1359, "step": 94310 }, { "epoch": 0.16722297134964392, "grad_norm": 0.5625, "learning_rate": 0.0002014198620626325, "loss": 0.1673, "step": 94312 }, { "epoch": 0.16722651751495374, "grad_norm": 0.2099609375, "learning_rate": 0.0002014165220885224, "loss": 0.1712, "step": 94314 }, { "epoch": 0.16723006368026355, "grad_norm": 0.72265625, "learning_rate": 0.00020141318604429453, "loss": 0.1862, "step": 94316 }, { "epoch": 0.16723360984557337, "grad_norm": 0.62890625, "learning_rate": 0.0002014098539299633, "loss": 0.2363, "step": 94318 }, { "epoch": 0.16723715601088318, "grad_norm": 0.94921875, "learning_rate": 0.0002014065257455435, "loss": 0.1808, "step": 94320 }, { "epoch": 0.167240702176193, "grad_norm": 0.66015625, "learning_rate": 0.00020140320149104966, "loss": 0.1416, "step": 94322 }, { "epoch": 0.1672442483415028, "grad_norm": 0.37890625, "learning_rate": 0.00020139988116649616, "loss": 0.1619, "step": 94324 }, { "epoch": 0.16724779450681262, "grad_norm": 0.435546875, "learning_rate": 0.0002013965647718976, "loss": 0.1913, "step": 94326 }, { "epoch": 0.16725134067212244, "grad_norm": 0.2197265625, "learning_rate": 0.00020139325230726852, "loss": 0.2285, "step": 94328 }, { "epoch": 0.16725488683743225, "grad_norm": 0.96484375, "learning_rate": 0.00020138994377262337, "loss": 0.2068, "step": 94330 }, { "epoch": 0.16725843300274207, "grad_norm": 0.5, "learning_rate": 0.00020138663916797664, "loss": 0.1413, "step": 94332 }, { "epoch": 0.16726197916805188, "grad_norm": 0.55859375, "learning_rate": 0.00020138333849334272, "loss": 0.1422, "step": 94334 }, { "epoch": 0.1672655253333617, "grad_norm": 0.1787109375, "learning_rate": 0.00020138004174873622, "loss": 0.1576, "step": 94336 }, { "epoch": 0.1672690714986715, "grad_norm": 0.263671875, "learning_rate": 0.00020137674893417132, "loss": 0.1758, "step": 94338 }, { "epoch": 0.16727261766398133, "grad_norm": 0.51953125, "learning_rate": 0.0002013734600496626, "loss": 0.1696, "step": 94340 }, { "epoch": 0.16727616382929114, "grad_norm": 0.53125, "learning_rate": 0.00020137017509522432, "loss": 0.171, "step": 94342 }, { "epoch": 0.16727970999460096, "grad_norm": 0.369140625, "learning_rate": 0.000201366894070871, "loss": 0.219, "step": 94344 }, { "epoch": 0.16728325615991077, "grad_norm": 0.248046875, "learning_rate": 0.00020136361697661685, "loss": 0.1985, "step": 94346 }, { "epoch": 0.16728680232522058, "grad_norm": 0.4453125, "learning_rate": 0.00020136034381247622, "loss": 0.158, "step": 94348 }, { "epoch": 0.1672903484905304, "grad_norm": 0.3203125, "learning_rate": 0.00020135707457846354, "loss": 0.1593, "step": 94350 }, { "epoch": 0.16729389465584021, "grad_norm": 0.38671875, "learning_rate": 0.00020135380927459293, "loss": 0.1705, "step": 94352 }, { "epoch": 0.16729744082115003, "grad_norm": 0.46484375, "learning_rate": 0.00020135054790087884, "loss": 0.1525, "step": 94354 }, { "epoch": 0.16730098698645984, "grad_norm": 0.2470703125, "learning_rate": 0.00020134729045733542, "loss": 0.1713, "step": 94356 }, { "epoch": 0.16730453315176966, "grad_norm": 7.5625, "learning_rate": 0.0002013440369439769, "loss": 0.1854, "step": 94358 }, { "epoch": 0.16730807931707947, "grad_norm": 2.84375, "learning_rate": 0.00020134078736081765, "loss": 0.2278, "step": 94360 }, { "epoch": 0.1673116254823893, "grad_norm": 0.6171875, "learning_rate": 0.00020133754170787176, "loss": 0.1848, "step": 94362 }, { "epoch": 0.1673151716476991, "grad_norm": 2.125, "learning_rate": 0.00020133429998515337, "loss": 0.1888, "step": 94364 }, { "epoch": 0.16731871781300894, "grad_norm": 0.50390625, "learning_rate": 0.00020133106219267686, "loss": 0.3498, "step": 94366 }, { "epoch": 0.16732226397831876, "grad_norm": 0.259765625, "learning_rate": 0.00020132782833045623, "loss": 0.187, "step": 94368 }, { "epoch": 0.16732581014362857, "grad_norm": 0.60546875, "learning_rate": 0.00020132459839850565, "loss": 0.1908, "step": 94370 }, { "epoch": 0.1673293563089384, "grad_norm": 2.03125, "learning_rate": 0.00020132137239683935, "loss": 0.3612, "step": 94372 }, { "epoch": 0.1673329024742482, "grad_norm": 0.57421875, "learning_rate": 0.00020131815032547123, "loss": 0.2571, "step": 94374 }, { "epoch": 0.16733644863955802, "grad_norm": 0.361328125, "learning_rate": 0.0002013149321844156, "loss": 0.1464, "step": 94376 }, { "epoch": 0.16733999480486783, "grad_norm": 0.76171875, "learning_rate": 0.00020131171797368641, "loss": 0.1846, "step": 94378 }, { "epoch": 0.16734354097017765, "grad_norm": 0.310546875, "learning_rate": 0.0002013085076932977, "loss": 0.1866, "step": 94380 }, { "epoch": 0.16734708713548746, "grad_norm": 0.4375, "learning_rate": 0.0002013053013432636, "loss": 0.2194, "step": 94382 }, { "epoch": 0.16735063330079727, "grad_norm": 0.310546875, "learning_rate": 0.00020130209892359806, "loss": 0.1833, "step": 94384 }, { "epoch": 0.1673541794661071, "grad_norm": 0.2333984375, "learning_rate": 0.00020129890043431515, "loss": 0.1916, "step": 94386 }, { "epoch": 0.1673577256314169, "grad_norm": 0.88671875, "learning_rate": 0.00020129570587542882, "loss": 0.145, "step": 94388 }, { "epoch": 0.16736127179672672, "grad_norm": 0.275390625, "learning_rate": 0.00020129251524695298, "loss": 0.2704, "step": 94390 }, { "epoch": 0.16736481796203653, "grad_norm": 1.3515625, "learning_rate": 0.00020128932854890174, "loss": 0.1929, "step": 94392 }, { "epoch": 0.16736836412734635, "grad_norm": 0.5078125, "learning_rate": 0.0002012861457812889, "loss": 0.1704, "step": 94394 }, { "epoch": 0.16737191029265616, "grad_norm": 0.24609375, "learning_rate": 0.0002012829669441284, "loss": 0.1836, "step": 94396 }, { "epoch": 0.16737545645796598, "grad_norm": 0.703125, "learning_rate": 0.00020127979203743424, "loss": 0.2563, "step": 94398 }, { "epoch": 0.1673790026232758, "grad_norm": 0.392578125, "learning_rate": 0.00020127662106122013, "loss": 0.1889, "step": 94400 }, { "epoch": 0.1673825487885856, "grad_norm": 0.99609375, "learning_rate": 0.00020127345401550016, "loss": 0.2374, "step": 94402 }, { "epoch": 0.16738609495389542, "grad_norm": 1.0390625, "learning_rate": 0.00020127029090028788, "loss": 0.2222, "step": 94404 }, { "epoch": 0.16738964111920523, "grad_norm": 0.462890625, "learning_rate": 0.00020126713171559745, "loss": 0.1728, "step": 94406 }, { "epoch": 0.16739318728451505, "grad_norm": 0.2255859375, "learning_rate": 0.00020126397646144252, "loss": 0.1749, "step": 94408 }, { "epoch": 0.16739673344982486, "grad_norm": 0.345703125, "learning_rate": 0.00020126082513783694, "loss": 0.3035, "step": 94410 }, { "epoch": 0.16740027961513468, "grad_norm": 0.279296875, "learning_rate": 0.00020125767774479433, "loss": 0.2096, "step": 94412 }, { "epoch": 0.1674038257804445, "grad_norm": 1.0703125, "learning_rate": 0.00020125453428232871, "loss": 0.2111, "step": 94414 }, { "epoch": 0.1674073719457543, "grad_norm": 1.2890625, "learning_rate": 0.00020125139475045365, "loss": 0.1564, "step": 94416 }, { "epoch": 0.16741091811106412, "grad_norm": 0.451171875, "learning_rate": 0.00020124825914918292, "loss": 0.1433, "step": 94418 }, { "epoch": 0.16741446427637394, "grad_norm": 0.546875, "learning_rate": 0.0002012451274785303, "loss": 0.212, "step": 94420 }, { "epoch": 0.16741801044168375, "grad_norm": 0.244140625, "learning_rate": 0.00020124199973850943, "loss": 0.1504, "step": 94422 }, { "epoch": 0.16742155660699357, "grad_norm": 0.314453125, "learning_rate": 0.00020123887592913395, "loss": 0.1499, "step": 94424 }, { "epoch": 0.16742510277230338, "grad_norm": 0.369140625, "learning_rate": 0.00020123575605041758, "loss": 0.2297, "step": 94426 }, { "epoch": 0.1674286489376132, "grad_norm": 0.2216796875, "learning_rate": 0.0002012326401023739, "loss": 0.1655, "step": 94428 }, { "epoch": 0.167432195102923, "grad_norm": 0.42578125, "learning_rate": 0.0002012295280850167, "loss": 0.1893, "step": 94430 }, { "epoch": 0.16743574126823282, "grad_norm": 0.5390625, "learning_rate": 0.00020122641999835932, "loss": 0.1978, "step": 94432 }, { "epoch": 0.16743928743354264, "grad_norm": 0.77734375, "learning_rate": 0.00020122331584241567, "loss": 0.2004, "step": 94434 }, { "epoch": 0.16744283359885245, "grad_norm": 0.640625, "learning_rate": 0.00020122021561719912, "loss": 0.1836, "step": 94436 }, { "epoch": 0.16744637976416227, "grad_norm": 1.9609375, "learning_rate": 0.00020121711932272318, "loss": 0.302, "step": 94438 }, { "epoch": 0.16744992592947208, "grad_norm": 0.451171875, "learning_rate": 0.0002012140269590016, "loss": 0.1828, "step": 94440 }, { "epoch": 0.1674534720947819, "grad_norm": 0.87109375, "learning_rate": 0.0002012109385260477, "loss": 0.1755, "step": 94442 }, { "epoch": 0.1674570182600917, "grad_norm": 0.58984375, "learning_rate": 0.00020120785402387515, "loss": 0.2038, "step": 94444 }, { "epoch": 0.16746056442540153, "grad_norm": 0.4609375, "learning_rate": 0.00020120477345249735, "loss": 0.302, "step": 94446 }, { "epoch": 0.16746411059071134, "grad_norm": 0.58203125, "learning_rate": 0.00020120169681192774, "loss": 0.2445, "step": 94448 }, { "epoch": 0.16746765675602115, "grad_norm": 0.90625, "learning_rate": 0.0002011986241021799, "loss": 0.1639, "step": 94450 }, { "epoch": 0.16747120292133097, "grad_norm": 0.30078125, "learning_rate": 0.00020119555532326712, "loss": 0.2157, "step": 94452 }, { "epoch": 0.16747474908664078, "grad_norm": 0.36328125, "learning_rate": 0.00020119249047520298, "loss": 0.2323, "step": 94454 }, { "epoch": 0.16747829525195063, "grad_norm": 0.486328125, "learning_rate": 0.0002011894295580007, "loss": 0.1711, "step": 94456 }, { "epoch": 0.16748184141726044, "grad_norm": 0.349609375, "learning_rate": 0.0002011863725716738, "loss": 0.1229, "step": 94458 }, { "epoch": 0.16748538758257026, "grad_norm": 0.45703125, "learning_rate": 0.00020118331951623564, "loss": 0.1726, "step": 94460 }, { "epoch": 0.16748893374788007, "grad_norm": 1.0703125, "learning_rate": 0.00020118027039169948, "loss": 0.1962, "step": 94462 }, { "epoch": 0.16749247991318988, "grad_norm": 0.39453125, "learning_rate": 0.00020117722519807883, "loss": 0.2001, "step": 94464 }, { "epoch": 0.1674960260784997, "grad_norm": 2.328125, "learning_rate": 0.00020117418393538684, "loss": 0.2533, "step": 94466 }, { "epoch": 0.1674995722438095, "grad_norm": 0.6640625, "learning_rate": 0.00020117114660363683, "loss": 0.1492, "step": 94468 }, { "epoch": 0.16750311840911933, "grad_norm": 0.23046875, "learning_rate": 0.00020116811320284217, "loss": 0.1506, "step": 94470 }, { "epoch": 0.16750666457442914, "grad_norm": 1.984375, "learning_rate": 0.00020116508373301607, "loss": 0.293, "step": 94472 }, { "epoch": 0.16751021073973896, "grad_norm": 0.2216796875, "learning_rate": 0.00020116205819417182, "loss": 0.2357, "step": 94474 }, { "epoch": 0.16751375690504877, "grad_norm": 0.3359375, "learning_rate": 0.00020115903658632248, "loss": 0.1107, "step": 94476 }, { "epoch": 0.1675173030703586, "grad_norm": 3.234375, "learning_rate": 0.00020115601890948156, "loss": 0.331, "step": 94478 }, { "epoch": 0.1675208492356684, "grad_norm": 0.337890625, "learning_rate": 0.0002011530051636621, "loss": 0.2403, "step": 94480 }, { "epoch": 0.16752439540097822, "grad_norm": 0.515625, "learning_rate": 0.00020114999534887718, "loss": 0.16, "step": 94482 }, { "epoch": 0.16752794156628803, "grad_norm": 0.7421875, "learning_rate": 0.00020114698946514014, "loss": 0.2729, "step": 94484 }, { "epoch": 0.16753148773159784, "grad_norm": 0.63671875, "learning_rate": 0.000201143987512464, "loss": 0.1273, "step": 94486 }, { "epoch": 0.16753503389690766, "grad_norm": 0.271484375, "learning_rate": 0.00020114098949086193, "loss": 0.2097, "step": 94488 }, { "epoch": 0.16753858006221747, "grad_norm": 0.51171875, "learning_rate": 0.0002011379954003471, "loss": 0.2454, "step": 94490 }, { "epoch": 0.1675421262275273, "grad_norm": 0.447265625, "learning_rate": 0.00020113500524093265, "loss": 0.1752, "step": 94492 }, { "epoch": 0.1675456723928371, "grad_norm": 0.87890625, "learning_rate": 0.0002011320190126314, "loss": 0.1646, "step": 94494 }, { "epoch": 0.16754921855814692, "grad_norm": 0.443359375, "learning_rate": 0.0002011290367154567, "loss": 0.1601, "step": 94496 }, { "epoch": 0.16755276472345673, "grad_norm": 0.474609375, "learning_rate": 0.0002011260583494214, "loss": 0.1443, "step": 94498 }, { "epoch": 0.16755631088876655, "grad_norm": 0.2578125, "learning_rate": 0.00020112308391453859, "loss": 0.5236, "step": 94500 }, { "epoch": 0.16755985705407636, "grad_norm": 0.220703125, "learning_rate": 0.0002011201134108213, "loss": 0.1668, "step": 94502 }, { "epoch": 0.16756340321938618, "grad_norm": 0.189453125, "learning_rate": 0.00020111714683828248, "loss": 0.152, "step": 94504 }, { "epoch": 0.167566949384696, "grad_norm": 0.384765625, "learning_rate": 0.00020111418419693512, "loss": 0.1659, "step": 94506 }, { "epoch": 0.1675704955500058, "grad_norm": 0.55078125, "learning_rate": 0.00020111122548679224, "loss": 0.1856, "step": 94508 }, { "epoch": 0.16757404171531562, "grad_norm": 0.423828125, "learning_rate": 0.00020110827070786669, "loss": 0.238, "step": 94510 }, { "epoch": 0.16757758788062543, "grad_norm": 0.53515625, "learning_rate": 0.00020110531986017142, "loss": 0.1179, "step": 94512 }, { "epoch": 0.16758113404593525, "grad_norm": 0.52734375, "learning_rate": 0.00020110237294371935, "loss": 0.1661, "step": 94514 }, { "epoch": 0.16758468021124506, "grad_norm": 0.427734375, "learning_rate": 0.00020109942995852331, "loss": 0.2581, "step": 94516 }, { "epoch": 0.16758822637655488, "grad_norm": 0.46875, "learning_rate": 0.00020109649090459627, "loss": 0.1635, "step": 94518 }, { "epoch": 0.1675917725418647, "grad_norm": 1.0546875, "learning_rate": 0.00020109355578195104, "loss": 0.2712, "step": 94520 }, { "epoch": 0.1675953187071745, "grad_norm": 1.421875, "learning_rate": 0.0002010906245906004, "loss": 0.3299, "step": 94522 }, { "epoch": 0.16759886487248432, "grad_norm": 0.76953125, "learning_rate": 0.00020108769733055722, "loss": 0.1866, "step": 94524 }, { "epoch": 0.16760241103779414, "grad_norm": 0.255859375, "learning_rate": 0.00020108477400183433, "loss": 0.2978, "step": 94526 }, { "epoch": 0.16760595720310395, "grad_norm": 0.88671875, "learning_rate": 0.00020108185460444462, "loss": 0.154, "step": 94528 }, { "epoch": 0.16760950336841376, "grad_norm": 0.451171875, "learning_rate": 0.00020107893913840053, "loss": 0.1535, "step": 94530 }, { "epoch": 0.16761304953372358, "grad_norm": 0.498046875, "learning_rate": 0.00020107602760371506, "loss": 0.184, "step": 94532 }, { "epoch": 0.1676165956990334, "grad_norm": 0.53125, "learning_rate": 0.00020107312000040088, "loss": 0.1994, "step": 94534 }, { "epoch": 0.1676201418643432, "grad_norm": 0.56640625, "learning_rate": 0.0002010702163284708, "loss": 0.2274, "step": 94536 }, { "epoch": 0.16762368802965302, "grad_norm": 0.28515625, "learning_rate": 0.0002010673165879373, "loss": 0.1914, "step": 94538 }, { "epoch": 0.16762723419496284, "grad_norm": 0.291015625, "learning_rate": 0.00020106442077881328, "loss": 0.1539, "step": 94540 }, { "epoch": 0.16763078036027265, "grad_norm": 0.29296875, "learning_rate": 0.0002010615289011113, "loss": 0.2272, "step": 94542 }, { "epoch": 0.16763432652558247, "grad_norm": 0.2275390625, "learning_rate": 0.00020105864095484405, "loss": 0.1915, "step": 94544 }, { "epoch": 0.16763787269089228, "grad_norm": 1.4453125, "learning_rate": 0.0002010557569400241, "loss": 0.2875, "step": 94546 }, { "epoch": 0.16764141885620212, "grad_norm": 0.37890625, "learning_rate": 0.00020105287685666409, "loss": 0.2623, "step": 94548 }, { "epoch": 0.16764496502151194, "grad_norm": 2.015625, "learning_rate": 0.00020105000070477663, "loss": 0.2199, "step": 94550 }, { "epoch": 0.16764851118682175, "grad_norm": 1.796875, "learning_rate": 0.0002010471284843743, "loss": 0.311, "step": 94552 }, { "epoch": 0.16765205735213157, "grad_norm": 0.275390625, "learning_rate": 0.00020104426019546974, "loss": 0.162, "step": 94554 }, { "epoch": 0.16765560351744138, "grad_norm": 0.5859375, "learning_rate": 0.00020104139583807535, "loss": 0.195, "step": 94556 }, { "epoch": 0.1676591496827512, "grad_norm": 0.7109375, "learning_rate": 0.00020103853541220368, "loss": 0.1696, "step": 94558 }, { "epoch": 0.167662695848061, "grad_norm": 0.3125, "learning_rate": 0.00020103567891786733, "loss": 0.1704, "step": 94560 }, { "epoch": 0.16766624201337083, "grad_norm": 0.828125, "learning_rate": 0.00020103282635507873, "loss": 0.2673, "step": 94562 }, { "epoch": 0.16766978817868064, "grad_norm": 0.345703125, "learning_rate": 0.00020102997772385034, "loss": 0.1936, "step": 94564 }, { "epoch": 0.16767333434399045, "grad_norm": 0.486328125, "learning_rate": 0.00020102713302419463, "loss": 0.1956, "step": 94566 }, { "epoch": 0.16767688050930027, "grad_norm": 0.87890625, "learning_rate": 0.00020102429225612412, "loss": 0.2118, "step": 94568 }, { "epoch": 0.16768042667461008, "grad_norm": 0.5234375, "learning_rate": 0.0002010214554196511, "loss": 0.1601, "step": 94570 }, { "epoch": 0.1676839728399199, "grad_norm": 0.515625, "learning_rate": 0.0002010186225147881, "loss": 0.1755, "step": 94572 }, { "epoch": 0.1676875190052297, "grad_norm": 0.255859375, "learning_rate": 0.00020101579354154744, "loss": 0.1565, "step": 94574 }, { "epoch": 0.16769106517053953, "grad_norm": 0.408203125, "learning_rate": 0.0002010129684999415, "loss": 0.1932, "step": 94576 }, { "epoch": 0.16769461133584934, "grad_norm": 1.9609375, "learning_rate": 0.00020101014738998265, "loss": 0.3134, "step": 94578 }, { "epoch": 0.16769815750115916, "grad_norm": 0.4609375, "learning_rate": 0.00020100733021168323, "loss": 0.1827, "step": 94580 }, { "epoch": 0.16770170366646897, "grad_norm": 2.4375, "learning_rate": 0.00020100451696505544, "loss": 0.1369, "step": 94582 }, { "epoch": 0.16770524983177879, "grad_norm": 0.80859375, "learning_rate": 0.00020100170765011176, "loss": 0.1956, "step": 94584 }, { "epoch": 0.1677087959970886, "grad_norm": 0.36328125, "learning_rate": 0.0002009989022668645, "loss": 0.1787, "step": 94586 }, { "epoch": 0.16771234216239841, "grad_norm": 0.392578125, "learning_rate": 0.00020099610081532573, "loss": 0.1467, "step": 94588 }, { "epoch": 0.16771588832770823, "grad_norm": 0.546875, "learning_rate": 0.00020099330329550783, "loss": 0.1919, "step": 94590 }, { "epoch": 0.16771943449301804, "grad_norm": 0.25390625, "learning_rate": 0.0002009905097074231, "loss": 0.2062, "step": 94592 }, { "epoch": 0.16772298065832786, "grad_norm": 0.3125, "learning_rate": 0.00020098772005108364, "loss": 0.1581, "step": 94594 }, { "epoch": 0.16772652682363767, "grad_norm": 0.67578125, "learning_rate": 0.00020098493432650164, "loss": 0.203, "step": 94596 }, { "epoch": 0.1677300729889475, "grad_norm": 0.416015625, "learning_rate": 0.00020098215253368934, "loss": 0.1785, "step": 94598 }, { "epoch": 0.1677336191542573, "grad_norm": 1.734375, "learning_rate": 0.00020097937467265892, "loss": 0.18, "step": 94600 }, { "epoch": 0.16773716531956712, "grad_norm": 0.8828125, "learning_rate": 0.0002009766007434224, "loss": 0.2243, "step": 94602 }, { "epoch": 0.16774071148487693, "grad_norm": 0.91796875, "learning_rate": 0.00020097383074599217, "loss": 0.1827, "step": 94604 }, { "epoch": 0.16774425765018675, "grad_norm": 0.80078125, "learning_rate": 0.00020097106468038004, "loss": 0.184, "step": 94606 }, { "epoch": 0.16774780381549656, "grad_norm": 0.296875, "learning_rate": 0.00020096830254659833, "loss": 0.151, "step": 94608 }, { "epoch": 0.16775134998080637, "grad_norm": 0.5703125, "learning_rate": 0.00020096554434465908, "loss": 0.1972, "step": 94610 }, { "epoch": 0.1677548961461162, "grad_norm": 0.828125, "learning_rate": 0.0002009627900745743, "loss": 0.2947, "step": 94612 }, { "epoch": 0.167758442311426, "grad_norm": 0.40234375, "learning_rate": 0.00020096003973635602, "loss": 0.2081, "step": 94614 }, { "epoch": 0.16776198847673582, "grad_norm": 0.494140625, "learning_rate": 0.0002009572933300163, "loss": 0.1529, "step": 94616 }, { "epoch": 0.16776553464204563, "grad_norm": 0.27734375, "learning_rate": 0.00020095455085556712, "loss": 0.1659, "step": 94618 }, { "epoch": 0.16776908080735545, "grad_norm": 0.388671875, "learning_rate": 0.00020095181231302058, "loss": 0.1902, "step": 94620 }, { "epoch": 0.16777262697266526, "grad_norm": 0.3359375, "learning_rate": 0.00020094907770238857, "loss": 0.1897, "step": 94622 }, { "epoch": 0.16777617313797508, "grad_norm": 2.03125, "learning_rate": 0.000200946347023683, "loss": 0.1448, "step": 94624 }, { "epoch": 0.1677797193032849, "grad_norm": 0.2578125, "learning_rate": 0.00020094362027691604, "loss": 0.2032, "step": 94626 }, { "epoch": 0.1677832654685947, "grad_norm": 1.984375, "learning_rate": 0.0002009408974620993, "loss": 0.6653, "step": 94628 }, { "epoch": 0.16778681163390452, "grad_norm": 0.31640625, "learning_rate": 0.00020093817857924485, "loss": 0.1516, "step": 94630 }, { "epoch": 0.16779035779921433, "grad_norm": 0.18359375, "learning_rate": 0.00020093546362836453, "loss": 0.1937, "step": 94632 }, { "epoch": 0.16779390396452415, "grad_norm": 0.29296875, "learning_rate": 0.00020093275260947037, "loss": 0.1988, "step": 94634 }, { "epoch": 0.16779745012983396, "grad_norm": 0.3125, "learning_rate": 0.000200930045522574, "loss": 0.1537, "step": 94636 }, { "epoch": 0.1678009962951438, "grad_norm": 0.51171875, "learning_rate": 0.0002009273423676875, "loss": 0.234, "step": 94638 }, { "epoch": 0.16780454246045362, "grad_norm": 0.48046875, "learning_rate": 0.00020092464314482248, "loss": 0.1761, "step": 94640 }, { "epoch": 0.16780808862576344, "grad_norm": 2.953125, "learning_rate": 0.00020092194785399084, "loss": 0.4152, "step": 94642 }, { "epoch": 0.16781163479107325, "grad_norm": 0.416015625, "learning_rate": 0.0002009192564952043, "loss": 0.1386, "step": 94644 }, { "epoch": 0.16781518095638306, "grad_norm": 3.1875, "learning_rate": 0.00020091656906847478, "loss": 0.1628, "step": 94646 }, { "epoch": 0.16781872712169288, "grad_norm": 0.46875, "learning_rate": 0.00020091388557381387, "loss": 0.1614, "step": 94648 }, { "epoch": 0.1678222732870027, "grad_norm": 0.609375, "learning_rate": 0.00020091120601123337, "loss": 0.2117, "step": 94650 }, { "epoch": 0.1678258194523125, "grad_norm": 0.259765625, "learning_rate": 0.00020090853038074507, "loss": 0.1307, "step": 94652 }, { "epoch": 0.16782936561762232, "grad_norm": 0.61328125, "learning_rate": 0.00020090585868236054, "loss": 0.19, "step": 94654 }, { "epoch": 0.16783291178293214, "grad_norm": 0.3515625, "learning_rate": 0.00020090319091609158, "loss": 0.2078, "step": 94656 }, { "epoch": 0.16783645794824195, "grad_norm": 0.322265625, "learning_rate": 0.0002009005270819497, "loss": 0.1631, "step": 94658 }, { "epoch": 0.16784000411355177, "grad_norm": 0.41015625, "learning_rate": 0.00020089786717994673, "loss": 0.1827, "step": 94660 }, { "epoch": 0.16784355027886158, "grad_norm": 2.0, "learning_rate": 0.0002008952112100943, "loss": 0.3251, "step": 94662 }, { "epoch": 0.1678470964441714, "grad_norm": 0.322265625, "learning_rate": 0.00020089255917240384, "loss": 0.1467, "step": 94664 }, { "epoch": 0.1678506426094812, "grad_norm": 1.4375, "learning_rate": 0.0002008899110668871, "loss": 0.3408, "step": 94666 }, { "epoch": 0.16785418877479102, "grad_norm": 0.392578125, "learning_rate": 0.00020088726689355559, "loss": 0.1692, "step": 94668 }, { "epoch": 0.16785773494010084, "grad_norm": 0.474609375, "learning_rate": 0.00020088462665242098, "loss": 0.1658, "step": 94670 }, { "epoch": 0.16786128110541065, "grad_norm": 0.455078125, "learning_rate": 0.0002008819903434947, "loss": 0.2075, "step": 94672 }, { "epoch": 0.16786482727072047, "grad_norm": 0.30078125, "learning_rate": 0.0002008793579667883, "loss": 0.15, "step": 94674 }, { "epoch": 0.16786837343603028, "grad_norm": 0.41015625, "learning_rate": 0.00020087672952231341, "loss": 0.1701, "step": 94676 }, { "epoch": 0.1678719196013401, "grad_norm": 0.287109375, "learning_rate": 0.0002008741050100814, "loss": 0.1867, "step": 94678 }, { "epoch": 0.1678754657666499, "grad_norm": 0.306640625, "learning_rate": 0.00020087148443010368, "loss": 0.2148, "step": 94680 }, { "epoch": 0.16787901193195973, "grad_norm": 0.267578125, "learning_rate": 0.00020086886778239186, "loss": 0.2045, "step": 94682 }, { "epoch": 0.16788255809726954, "grad_norm": 0.408203125, "learning_rate": 0.00020086625506695737, "loss": 0.2048, "step": 94684 }, { "epoch": 0.16788610426257936, "grad_norm": 0.7890625, "learning_rate": 0.00020086364628381164, "loss": 0.1907, "step": 94686 }, { "epoch": 0.16788965042788917, "grad_norm": 4.59375, "learning_rate": 0.0002008610414329659, "loss": 0.2858, "step": 94688 }, { "epoch": 0.16789319659319898, "grad_norm": 0.6875, "learning_rate": 0.00020085844051443186, "loss": 0.183, "step": 94690 }, { "epoch": 0.1678967427585088, "grad_norm": 0.333984375, "learning_rate": 0.00020085584352822058, "loss": 0.1987, "step": 94692 }, { "epoch": 0.1679002889238186, "grad_norm": 0.2021484375, "learning_rate": 0.00020085325047434357, "loss": 0.1576, "step": 94694 }, { "epoch": 0.16790383508912843, "grad_norm": 0.2421875, "learning_rate": 0.00020085066135281223, "loss": 0.1534, "step": 94696 }, { "epoch": 0.16790738125443824, "grad_norm": 0.267578125, "learning_rate": 0.00020084807616363776, "loss": 0.1703, "step": 94698 }, { "epoch": 0.16791092741974806, "grad_norm": 0.17578125, "learning_rate": 0.00020084549490683156, "loss": 0.1198, "step": 94700 }, { "epoch": 0.16791447358505787, "grad_norm": 0.267578125, "learning_rate": 0.0002008429175824048, "loss": 0.1519, "step": 94702 }, { "epoch": 0.1679180197503677, "grad_norm": 0.193359375, "learning_rate": 0.00020084034419036884, "loss": 0.1712, "step": 94704 }, { "epoch": 0.1679215659156775, "grad_norm": 0.384765625, "learning_rate": 0.00020083777473073498, "loss": 0.1804, "step": 94706 }, { "epoch": 0.16792511208098732, "grad_norm": 0.27734375, "learning_rate": 0.00020083520920351439, "loss": 0.1665, "step": 94708 }, { "epoch": 0.16792865824629713, "grad_norm": 0.322265625, "learning_rate": 0.00020083264760871822, "loss": 0.1715, "step": 94710 }, { "epoch": 0.16793220441160694, "grad_norm": 0.421875, "learning_rate": 0.00020083008994635783, "loss": 0.1971, "step": 94712 }, { "epoch": 0.16793575057691676, "grad_norm": 1.9140625, "learning_rate": 0.0002008275362164443, "loss": 0.3847, "step": 94714 }, { "epoch": 0.16793929674222657, "grad_norm": 0.259765625, "learning_rate": 0.0002008249864189888, "loss": 0.1949, "step": 94716 }, { "epoch": 0.1679428429075364, "grad_norm": 0.3203125, "learning_rate": 0.0002008224405540026, "loss": 0.1582, "step": 94718 }, { "epoch": 0.1679463890728462, "grad_norm": 0.43359375, "learning_rate": 0.0002008198986214966, "loss": 0.229, "step": 94720 }, { "epoch": 0.16794993523815602, "grad_norm": 0.7578125, "learning_rate": 0.00020081736062148214, "loss": 0.2422, "step": 94722 }, { "epoch": 0.16795348140346583, "grad_norm": 0.61328125, "learning_rate": 0.0002008148265539702, "loss": 0.1516, "step": 94724 }, { "epoch": 0.16795702756877565, "grad_norm": 0.400390625, "learning_rate": 0.00020081229641897192, "loss": 0.2024, "step": 94726 }, { "epoch": 0.1679605737340855, "grad_norm": 0.20703125, "learning_rate": 0.00020080977021649842, "loss": 0.1787, "step": 94728 }, { "epoch": 0.1679641198993953, "grad_norm": 0.30078125, "learning_rate": 0.00020080724794656059, "loss": 0.1396, "step": 94730 }, { "epoch": 0.16796766606470512, "grad_norm": 0.6015625, "learning_rate": 0.00020080472960916945, "loss": 0.1892, "step": 94732 }, { "epoch": 0.16797121223001493, "grad_norm": 0.2197265625, "learning_rate": 0.00020080221520433635, "loss": 0.2765, "step": 94734 }, { "epoch": 0.16797475839532475, "grad_norm": 0.2314453125, "learning_rate": 0.00020079970473207184, "loss": 0.1947, "step": 94736 }, { "epoch": 0.16797830456063456, "grad_norm": 0.203125, "learning_rate": 0.00020079719819238715, "loss": 0.1977, "step": 94738 }, { "epoch": 0.16798185072594438, "grad_norm": 0.5390625, "learning_rate": 0.0002007946955852933, "loss": 0.2103, "step": 94740 }, { "epoch": 0.1679853968912542, "grad_norm": 0.3359375, "learning_rate": 0.00020079219691080096, "loss": 0.1885, "step": 94742 }, { "epoch": 0.167988943056564, "grad_norm": 0.4453125, "learning_rate": 0.0002007897021689214, "loss": 0.1481, "step": 94744 }, { "epoch": 0.16799248922187382, "grad_norm": 0.384765625, "learning_rate": 0.00020078721135966524, "loss": 0.1224, "step": 94746 }, { "epoch": 0.16799603538718363, "grad_norm": 1.4375, "learning_rate": 0.0002007847244830435, "loss": 0.28, "step": 94748 }, { "epoch": 0.16799958155249345, "grad_norm": 0.69140625, "learning_rate": 0.0002007822415390671, "loss": 0.1944, "step": 94750 }, { "epoch": 0.16800312771780326, "grad_norm": 0.236328125, "learning_rate": 0.0002007797625277469, "loss": 0.1865, "step": 94752 }, { "epoch": 0.16800667388311308, "grad_norm": 0.373046875, "learning_rate": 0.00020077728744909355, "loss": 0.1667, "step": 94754 }, { "epoch": 0.1680102200484229, "grad_norm": 0.66015625, "learning_rate": 0.0002007748163031181, "loss": 0.1752, "step": 94756 }, { "epoch": 0.1680137662137327, "grad_norm": 0.46875, "learning_rate": 0.00020077234908983123, "loss": 0.2153, "step": 94758 }, { "epoch": 0.16801731237904252, "grad_norm": 0.263671875, "learning_rate": 0.00020076988580924383, "loss": 0.1428, "step": 94760 }, { "epoch": 0.16802085854435234, "grad_norm": 0.390625, "learning_rate": 0.00020076742646136657, "loss": 0.1815, "step": 94762 }, { "epoch": 0.16802440470966215, "grad_norm": 0.41015625, "learning_rate": 0.00020076497104621027, "loss": 0.1786, "step": 94764 }, { "epoch": 0.16802795087497197, "grad_norm": 0.58984375, "learning_rate": 0.0002007625195637856, "loss": 0.1612, "step": 94766 }, { "epoch": 0.16803149704028178, "grad_norm": 0.82421875, "learning_rate": 0.00020076007201410336, "loss": 0.1736, "step": 94768 }, { "epoch": 0.1680350432055916, "grad_norm": 0.72265625, "learning_rate": 0.00020075762839717425, "loss": 0.135, "step": 94770 }, { "epoch": 0.1680385893709014, "grad_norm": 0.375, "learning_rate": 0.00020075518871300887, "loss": 0.1494, "step": 94772 }, { "epoch": 0.16804213553621122, "grad_norm": 0.310546875, "learning_rate": 0.000200752752961618, "loss": 0.1819, "step": 94774 }, { "epoch": 0.16804568170152104, "grad_norm": 0.2099609375, "learning_rate": 0.00020075032114301222, "loss": 0.1553, "step": 94776 }, { "epoch": 0.16804922786683085, "grad_norm": 0.328125, "learning_rate": 0.0002007478932572022, "loss": 0.1997, "step": 94778 }, { "epoch": 0.16805277403214067, "grad_norm": 0.68359375, "learning_rate": 0.0002007454693041986, "loss": 0.1735, "step": 94780 }, { "epoch": 0.16805632019745048, "grad_norm": 0.70703125, "learning_rate": 0.00020074304928401196, "loss": 0.2861, "step": 94782 }, { "epoch": 0.1680598663627603, "grad_norm": 0.42578125, "learning_rate": 0.00020074063319665278, "loss": 0.2239, "step": 94784 }, { "epoch": 0.1680634125280701, "grad_norm": 0.318359375, "learning_rate": 0.00020073822104213173, "loss": 0.1664, "step": 94786 }, { "epoch": 0.16806695869337993, "grad_norm": 0.3515625, "learning_rate": 0.00020073581282045947, "loss": 0.1696, "step": 94788 }, { "epoch": 0.16807050485868974, "grad_norm": 0.458984375, "learning_rate": 0.00020073340853164634, "loss": 0.1607, "step": 94790 }, { "epoch": 0.16807405102399955, "grad_norm": 0.419921875, "learning_rate": 0.00020073100817570284, "loss": 0.2191, "step": 94792 }, { "epoch": 0.16807759718930937, "grad_norm": 0.431640625, "learning_rate": 0.00020072861175263963, "loss": 0.1649, "step": 94794 }, { "epoch": 0.16808114335461918, "grad_norm": 0.828125, "learning_rate": 0.00020072621926246715, "loss": 0.2536, "step": 94796 }, { "epoch": 0.168084689519929, "grad_norm": 0.7421875, "learning_rate": 0.00020072383070519578, "loss": 0.1757, "step": 94798 }, { "epoch": 0.1680882356852388, "grad_norm": 0.466796875, "learning_rate": 0.000200721446080836, "loss": 0.2373, "step": 94800 }, { "epoch": 0.16809178185054863, "grad_norm": 0.5078125, "learning_rate": 0.00020071906538939822, "loss": 0.196, "step": 94802 }, { "epoch": 0.16809532801585844, "grad_norm": 0.451171875, "learning_rate": 0.000200716688630893, "loss": 0.1941, "step": 94804 }, { "epoch": 0.16809887418116826, "grad_norm": 0.275390625, "learning_rate": 0.0002007143158053306, "loss": 0.165, "step": 94806 }, { "epoch": 0.16810242034647807, "grad_norm": 0.244140625, "learning_rate": 0.0002007119469127213, "loss": 0.1657, "step": 94808 }, { "epoch": 0.16810596651178789, "grad_norm": 0.1982421875, "learning_rate": 0.00020070958195307571, "loss": 0.1533, "step": 94810 }, { "epoch": 0.1681095126770977, "grad_norm": 7.375, "learning_rate": 0.0002007072209264039, "loss": 0.3414, "step": 94812 }, { "epoch": 0.16811305884240751, "grad_norm": 0.265625, "learning_rate": 0.00020070486383271639, "loss": 0.2141, "step": 94814 }, { "epoch": 0.16811660500771733, "grad_norm": 0.75390625, "learning_rate": 0.0002007025106720235, "loss": 0.1769, "step": 94816 }, { "epoch": 0.16812015117302714, "grad_norm": 0.484375, "learning_rate": 0.00020070016144433538, "loss": 0.1811, "step": 94818 }, { "epoch": 0.16812369733833699, "grad_norm": 0.263671875, "learning_rate": 0.00020069781614966243, "loss": 0.2054, "step": 94820 }, { "epoch": 0.1681272435036468, "grad_norm": 0.51953125, "learning_rate": 0.0002006954747880148, "loss": 0.1736, "step": 94822 }, { "epoch": 0.16813078966895662, "grad_norm": 0.2451171875, "learning_rate": 0.00020069313735940282, "loss": 0.1243, "step": 94824 }, { "epoch": 0.16813433583426643, "grad_norm": 0.458984375, "learning_rate": 0.00020069080386383664, "loss": 0.1477, "step": 94826 }, { "epoch": 0.16813788199957624, "grad_norm": 0.3359375, "learning_rate": 0.00020068847430132654, "loss": 0.154, "step": 94828 }, { "epoch": 0.16814142816488606, "grad_norm": 0.83984375, "learning_rate": 0.0002006861486718826, "loss": 0.1809, "step": 94830 }, { "epoch": 0.16814497433019587, "grad_norm": 0.279296875, "learning_rate": 0.00020068382697551514, "loss": 0.1954, "step": 94832 }, { "epoch": 0.1681485204955057, "grad_norm": 1.4921875, "learning_rate": 0.00020068150921223428, "loss": 0.1976, "step": 94834 }, { "epoch": 0.1681520666608155, "grad_norm": 0.283203125, "learning_rate": 0.00020067919538205002, "loss": 0.1892, "step": 94836 }, { "epoch": 0.16815561282612532, "grad_norm": 0.30078125, "learning_rate": 0.0002006768854849726, "loss": 0.2051, "step": 94838 }, { "epoch": 0.16815915899143513, "grad_norm": 0.30859375, "learning_rate": 0.00020067457952101212, "loss": 0.1344, "step": 94840 }, { "epoch": 0.16816270515674495, "grad_norm": 0.203125, "learning_rate": 0.00020067227749017867, "loss": 0.1884, "step": 94842 }, { "epoch": 0.16816625132205476, "grad_norm": 0.3984375, "learning_rate": 0.0002006699793924822, "loss": 0.1667, "step": 94844 }, { "epoch": 0.16816979748736458, "grad_norm": 0.35546875, "learning_rate": 0.00020066768522793288, "loss": 0.1442, "step": 94846 }, { "epoch": 0.1681733436526744, "grad_norm": 0.462890625, "learning_rate": 0.00020066539499654073, "loss": 0.1533, "step": 94848 }, { "epoch": 0.1681768898179842, "grad_norm": 0.3515625, "learning_rate": 0.00020066310869831572, "loss": 0.1047, "step": 94850 }, { "epoch": 0.16818043598329402, "grad_norm": 0.2001953125, "learning_rate": 0.00020066082633326784, "loss": 0.2117, "step": 94852 }, { "epoch": 0.16818398214860383, "grad_norm": 0.298828125, "learning_rate": 0.00020065854790140718, "loss": 0.2191, "step": 94854 }, { "epoch": 0.16818752831391365, "grad_norm": 1.046875, "learning_rate": 0.00020065627340274368, "loss": 0.2598, "step": 94856 }, { "epoch": 0.16819107447922346, "grad_norm": 0.75390625, "learning_rate": 0.0002006540028372871, "loss": 0.1782, "step": 94858 }, { "epoch": 0.16819462064453328, "grad_norm": 0.6953125, "learning_rate": 0.00020065173620504762, "loss": 0.3918, "step": 94860 }, { "epoch": 0.1681981668098431, "grad_norm": 0.435546875, "learning_rate": 0.00020064947350603496, "loss": 0.1961, "step": 94862 }, { "epoch": 0.1682017129751529, "grad_norm": 0.31640625, "learning_rate": 0.00020064721474025914, "loss": 0.1923, "step": 94864 }, { "epoch": 0.16820525914046272, "grad_norm": 0.1748046875, "learning_rate": 0.00020064495990772995, "loss": 0.1564, "step": 94866 }, { "epoch": 0.16820880530577254, "grad_norm": 1.9921875, "learning_rate": 0.00020064270900845737, "loss": 0.3806, "step": 94868 }, { "epoch": 0.16821235147108235, "grad_norm": 0.35546875, "learning_rate": 0.00020064046204245112, "loss": 0.2076, "step": 94870 }, { "epoch": 0.16821589763639216, "grad_norm": 0.30859375, "learning_rate": 0.00020063821900972106, "loss": 0.161, "step": 94872 }, { "epoch": 0.16821944380170198, "grad_norm": 0.46484375, "learning_rate": 0.0002006359799102771, "loss": 0.191, "step": 94874 }, { "epoch": 0.1682229899670118, "grad_norm": 1.1796875, "learning_rate": 0.00020063374474412894, "loss": 0.2433, "step": 94876 }, { "epoch": 0.1682265361323216, "grad_norm": 0.353515625, "learning_rate": 0.00020063151351128625, "loss": 0.1935, "step": 94878 }, { "epoch": 0.16823008229763142, "grad_norm": 0.384765625, "learning_rate": 0.000200629286211759, "loss": 0.1539, "step": 94880 }, { "epoch": 0.16823362846294124, "grad_norm": 1.15625, "learning_rate": 0.00020062706284555687, "loss": 0.2375, "step": 94882 }, { "epoch": 0.16823717462825105, "grad_norm": 0.296875, "learning_rate": 0.00020062484341268948, "loss": 0.1639, "step": 94884 }, { "epoch": 0.16824072079356087, "grad_norm": 0.44921875, "learning_rate": 0.00020062262791316666, "loss": 0.1726, "step": 94886 }, { "epoch": 0.16824426695887068, "grad_norm": 0.66015625, "learning_rate": 0.00020062041634699796, "loss": 0.2203, "step": 94888 }, { "epoch": 0.1682478131241805, "grad_norm": 0.40234375, "learning_rate": 0.0002006182087141932, "loss": 0.1833, "step": 94890 }, { "epoch": 0.1682513592894903, "grad_norm": 0.7109375, "learning_rate": 0.00020061600501476197, "loss": 0.2337, "step": 94892 }, { "epoch": 0.16825490545480012, "grad_norm": 0.5078125, "learning_rate": 0.00020061380524871394, "loss": 0.1685, "step": 94894 }, { "epoch": 0.16825845162010994, "grad_norm": 0.419921875, "learning_rate": 0.00020061160941605864, "loss": 0.162, "step": 94896 }, { "epoch": 0.16826199778541975, "grad_norm": 0.2177734375, "learning_rate": 0.00020060941751680572, "loss": 0.1446, "step": 94898 }, { "epoch": 0.16826554395072957, "grad_norm": 0.373046875, "learning_rate": 0.00020060722955096482, "loss": 0.129, "step": 94900 }, { "epoch": 0.16826909011603938, "grad_norm": 0.380859375, "learning_rate": 0.00020060504551854552, "loss": 0.2305, "step": 94902 }, { "epoch": 0.1682726362813492, "grad_norm": 0.55859375, "learning_rate": 0.0002006028654195572, "loss": 0.1342, "step": 94904 }, { "epoch": 0.168276182446659, "grad_norm": 0.81640625, "learning_rate": 0.00020060068925400963, "loss": 0.1949, "step": 94906 }, { "epoch": 0.16827972861196883, "grad_norm": 1.390625, "learning_rate": 0.00020059851702191212, "loss": 0.1912, "step": 94908 }, { "epoch": 0.16828327477727867, "grad_norm": 0.15234375, "learning_rate": 0.00020059634872327429, "loss": 0.1994, "step": 94910 }, { "epoch": 0.16828682094258848, "grad_norm": 0.29296875, "learning_rate": 0.0002005941843581056, "loss": 0.1661, "step": 94912 }, { "epoch": 0.1682903671078983, "grad_norm": 0.7734375, "learning_rate": 0.00020059202392641554, "loss": 0.1788, "step": 94914 }, { "epoch": 0.1682939132732081, "grad_norm": 0.63671875, "learning_rate": 0.00020058986742821353, "loss": 0.2679, "step": 94916 }, { "epoch": 0.16829745943851793, "grad_norm": 0.1943359375, "learning_rate": 0.00020058771486350897, "loss": 0.1579, "step": 94918 }, { "epoch": 0.16830100560382774, "grad_norm": 2.390625, "learning_rate": 0.0002005855662323113, "loss": 0.1882, "step": 94920 }, { "epoch": 0.16830455176913756, "grad_norm": 0.48828125, "learning_rate": 0.00020058342153462993, "loss": 0.1627, "step": 94922 }, { "epoch": 0.16830809793444737, "grad_norm": 0.2353515625, "learning_rate": 0.0002005812807704742, "loss": 0.1508, "step": 94924 }, { "epoch": 0.16831164409975718, "grad_norm": 2.859375, "learning_rate": 0.00020057914393985354, "loss": 0.1645, "step": 94926 }, { "epoch": 0.168315190265067, "grad_norm": 0.447265625, "learning_rate": 0.00020057701104277728, "loss": 0.3084, "step": 94928 }, { "epoch": 0.16831873643037681, "grad_norm": 0.306640625, "learning_rate": 0.00020057488207925474, "loss": 0.1446, "step": 94930 }, { "epoch": 0.16832228259568663, "grad_norm": 0.80859375, "learning_rate": 0.00020057275704929512, "loss": 0.2477, "step": 94932 }, { "epoch": 0.16832582876099644, "grad_norm": 0.20703125, "learning_rate": 0.0002005706359529079, "loss": 0.1426, "step": 94934 }, { "epoch": 0.16832937492630626, "grad_norm": 0.318359375, "learning_rate": 0.00020056851879010222, "loss": 0.1313, "step": 94936 }, { "epoch": 0.16833292109161607, "grad_norm": 0.279296875, "learning_rate": 0.0002005664055608874, "loss": 0.1728, "step": 94938 }, { "epoch": 0.1683364672569259, "grad_norm": 0.451171875, "learning_rate": 0.00020056429626527273, "loss": 0.147, "step": 94940 }, { "epoch": 0.1683400134222357, "grad_norm": 0.50390625, "learning_rate": 0.0002005621909032673, "loss": 0.1697, "step": 94942 }, { "epoch": 0.16834355958754552, "grad_norm": 0.291015625, "learning_rate": 0.00020056008947488038, "loss": 0.1569, "step": 94944 }, { "epoch": 0.16834710575285533, "grad_norm": 1.78125, "learning_rate": 0.0002005579919801212, "loss": 0.2968, "step": 94946 }, { "epoch": 0.16835065191816515, "grad_norm": 0.828125, "learning_rate": 0.00020055589841899897, "loss": 0.1681, "step": 94948 }, { "epoch": 0.16835419808347496, "grad_norm": 0.51171875, "learning_rate": 0.00020055380879152268, "loss": 0.2584, "step": 94950 }, { "epoch": 0.16835774424878477, "grad_norm": 0.234375, "learning_rate": 0.00020055172309770152, "loss": 0.1745, "step": 94952 }, { "epoch": 0.1683612904140946, "grad_norm": 0.400390625, "learning_rate": 0.00020054964133754476, "loss": 0.2231, "step": 94954 }, { "epoch": 0.1683648365794044, "grad_norm": 0.51953125, "learning_rate": 0.0002005475635110614, "loss": 0.1755, "step": 94956 }, { "epoch": 0.16836838274471422, "grad_norm": 0.263671875, "learning_rate": 0.00020054548961826065, "loss": 0.1753, "step": 94958 }, { "epoch": 0.16837192891002403, "grad_norm": 0.9140625, "learning_rate": 0.00020054341965915128, "loss": 0.2091, "step": 94960 }, { "epoch": 0.16837547507533385, "grad_norm": 0.1796875, "learning_rate": 0.00020054135363374253, "loss": 0.1581, "step": 94962 }, { "epoch": 0.16837902124064366, "grad_norm": 1.640625, "learning_rate": 0.00020053929154204356, "loss": 0.2272, "step": 94964 }, { "epoch": 0.16838256740595348, "grad_norm": 0.33984375, "learning_rate": 0.00020053723338406322, "loss": 0.1909, "step": 94966 }, { "epoch": 0.1683861135712633, "grad_norm": 0.458984375, "learning_rate": 0.0002005351791598104, "loss": 0.1878, "step": 94968 }, { "epoch": 0.1683896597365731, "grad_norm": 0.91796875, "learning_rate": 0.00020053312886929444, "loss": 0.2322, "step": 94970 }, { "epoch": 0.16839320590188292, "grad_norm": 0.287109375, "learning_rate": 0.000200531082512524, "loss": 0.167, "step": 94972 }, { "epoch": 0.16839675206719273, "grad_norm": 1.34375, "learning_rate": 0.0002005290400895081, "loss": 0.3509, "step": 94974 }, { "epoch": 0.16840029823250255, "grad_norm": 0.59375, "learning_rate": 0.0002005270016002558, "loss": 0.1804, "step": 94976 }, { "epoch": 0.16840384439781236, "grad_norm": 1.7109375, "learning_rate": 0.0002005249670447758, "loss": 0.291, "step": 94978 }, { "epoch": 0.16840739056312218, "grad_norm": 0.3046875, "learning_rate": 0.00020052293642307723, "loss": 0.2052, "step": 94980 }, { "epoch": 0.168410936728432, "grad_norm": 0.234375, "learning_rate": 0.00020052090973516883, "loss": 0.159, "step": 94982 }, { "epoch": 0.1684144828937418, "grad_norm": 0.11474609375, "learning_rate": 0.00020051888698105952, "loss": 0.2439, "step": 94984 }, { "epoch": 0.16841802905905162, "grad_norm": 0.326171875, "learning_rate": 0.000200516868160758, "loss": 0.1125, "step": 94986 }, { "epoch": 0.16842157522436144, "grad_norm": 0.27734375, "learning_rate": 0.0002005148532742733, "loss": 0.1672, "step": 94988 }, { "epoch": 0.16842512138967125, "grad_norm": 0.62890625, "learning_rate": 0.00020051284232161412, "loss": 0.1521, "step": 94990 }, { "epoch": 0.16842866755498107, "grad_norm": 0.326171875, "learning_rate": 0.0002005108353027893, "loss": 0.1855, "step": 94992 }, { "epoch": 0.16843221372029088, "grad_norm": 1.015625, "learning_rate": 0.00020050883221780762, "loss": 0.1749, "step": 94994 }, { "epoch": 0.1684357598856007, "grad_norm": 1.8125, "learning_rate": 0.00020050683306667777, "loss": 0.2134, "step": 94996 }, { "epoch": 0.1684393060509105, "grad_norm": 0.486328125, "learning_rate": 0.00020050483784940867, "loss": 0.185, "step": 94998 }, { "epoch": 0.16844285221622035, "grad_norm": 0.4296875, "learning_rate": 0.0002005028465660088, "loss": 0.1647, "step": 95000 }, { "epoch": 0.16844639838153017, "grad_norm": 0.36328125, "learning_rate": 0.00020050085921648693, "loss": 0.1792, "step": 95002 }, { "epoch": 0.16844994454683998, "grad_norm": 0.58203125, "learning_rate": 0.00020049887580085192, "loss": 0.1479, "step": 95004 }, { "epoch": 0.1684534907121498, "grad_norm": 0.859375, "learning_rate": 0.0002004968963191124, "loss": 0.259, "step": 95006 }, { "epoch": 0.1684570368774596, "grad_norm": 0.96875, "learning_rate": 0.00020049492077127678, "loss": 0.1924, "step": 95008 }, { "epoch": 0.16846058304276942, "grad_norm": 1.90625, "learning_rate": 0.00020049294915735406, "loss": 0.2367, "step": 95010 }, { "epoch": 0.16846412920807924, "grad_norm": 0.73828125, "learning_rate": 0.00020049098147735262, "loss": 0.1218, "step": 95012 }, { "epoch": 0.16846767537338905, "grad_norm": 0.2177734375, "learning_rate": 0.00020048901773128105, "loss": 0.1667, "step": 95014 }, { "epoch": 0.16847122153869887, "grad_norm": 1.671875, "learning_rate": 0.00020048705791914808, "loss": 0.2803, "step": 95016 }, { "epoch": 0.16847476770400868, "grad_norm": 0.318359375, "learning_rate": 0.0002004851020409622, "loss": 0.216, "step": 95018 }, { "epoch": 0.1684783138693185, "grad_norm": 0.16796875, "learning_rate": 0.00020048315009673202, "loss": 0.1677, "step": 95020 }, { "epoch": 0.1684818600346283, "grad_norm": 0.267578125, "learning_rate": 0.00020048120208646602, "loss": 0.1653, "step": 95022 }, { "epoch": 0.16848540619993813, "grad_norm": 0.4296875, "learning_rate": 0.00020047925801017279, "loss": 0.213, "step": 95024 }, { "epoch": 0.16848895236524794, "grad_norm": 2.46875, "learning_rate": 0.00020047731786786065, "loss": 0.19, "step": 95026 }, { "epoch": 0.16849249853055775, "grad_norm": 0.28125, "learning_rate": 0.0002004753816595383, "loss": 0.2124, "step": 95028 }, { "epoch": 0.16849604469586757, "grad_norm": 0.431640625, "learning_rate": 0.00020047344938521416, "loss": 0.1949, "step": 95030 }, { "epoch": 0.16849959086117738, "grad_norm": 1.7421875, "learning_rate": 0.00020047152104489663, "loss": 0.2901, "step": 95032 }, { "epoch": 0.1685031370264872, "grad_norm": 0.39453125, "learning_rate": 0.00020046959663859413, "loss": 0.2008, "step": 95034 }, { "epoch": 0.168506683191797, "grad_norm": 0.306640625, "learning_rate": 0.00020046767616631512, "loss": 0.2479, "step": 95036 }, { "epoch": 0.16851022935710683, "grad_norm": 0.35546875, "learning_rate": 0.000200465759628068, "loss": 0.2338, "step": 95038 }, { "epoch": 0.16851377552241664, "grad_norm": 0.49609375, "learning_rate": 0.0002004638470238611, "loss": 0.2005, "step": 95040 }, { "epoch": 0.16851732168772646, "grad_norm": 0.333984375, "learning_rate": 0.00020046193835370277, "loss": 0.1547, "step": 95042 }, { "epoch": 0.16852086785303627, "grad_norm": 0.35546875, "learning_rate": 0.00020046003361760153, "loss": 0.1984, "step": 95044 }, { "epoch": 0.16852441401834609, "grad_norm": 0.333984375, "learning_rate": 0.00020045813281556547, "loss": 0.1659, "step": 95046 }, { "epoch": 0.1685279601836559, "grad_norm": 0.55859375, "learning_rate": 0.00020045623594760315, "loss": 0.1996, "step": 95048 }, { "epoch": 0.16853150634896572, "grad_norm": 0.28125, "learning_rate": 0.00020045434301372263, "loss": 0.1304, "step": 95050 }, { "epoch": 0.16853505251427553, "grad_norm": 2.9375, "learning_rate": 0.00020045245401393232, "loss": 0.2667, "step": 95052 }, { "epoch": 0.16853859867958534, "grad_norm": 0.19140625, "learning_rate": 0.00020045056894824048, "loss": 0.1412, "step": 95054 }, { "epoch": 0.16854214484489516, "grad_norm": 2.3125, "learning_rate": 0.0002004486878166553, "loss": 0.3524, "step": 95056 }, { "epoch": 0.16854569101020497, "grad_norm": 0.6640625, "learning_rate": 0.00020044681061918512, "loss": 0.2285, "step": 95058 }, { "epoch": 0.1685492371755148, "grad_norm": 0.330078125, "learning_rate": 0.00020044493735583798, "loss": 0.1489, "step": 95060 }, { "epoch": 0.1685527833408246, "grad_norm": 0.3671875, "learning_rate": 0.00020044306802662214, "loss": 0.1727, "step": 95062 }, { "epoch": 0.16855632950613442, "grad_norm": 0.400390625, "learning_rate": 0.00020044120263154586, "loss": 0.197, "step": 95064 }, { "epoch": 0.16855987567144423, "grad_norm": 0.58203125, "learning_rate": 0.00020043934117061724, "loss": 0.2126, "step": 95066 }, { "epoch": 0.16856342183675405, "grad_norm": 0.2001953125, "learning_rate": 0.00020043748364384435, "loss": 0.1724, "step": 95068 }, { "epoch": 0.16856696800206386, "grad_norm": 0.38671875, "learning_rate": 0.00020043563005123544, "loss": 0.1511, "step": 95070 }, { "epoch": 0.16857051416737368, "grad_norm": 1.125, "learning_rate": 0.00020043378039279853, "loss": 0.2568, "step": 95072 }, { "epoch": 0.1685740603326835, "grad_norm": 1.0234375, "learning_rate": 0.0002004319346685417, "loss": 0.1471, "step": 95074 }, { "epoch": 0.1685776064979933, "grad_norm": 0.39453125, "learning_rate": 0.00020043009287847315, "loss": 0.2198, "step": 95076 }, { "epoch": 0.16858115266330312, "grad_norm": 0.51953125, "learning_rate": 0.00020042825502260075, "loss": 0.1879, "step": 95078 }, { "epoch": 0.16858469882861293, "grad_norm": 0.33984375, "learning_rate": 0.00020042642110093263, "loss": 0.1563, "step": 95080 }, { "epoch": 0.16858824499392275, "grad_norm": 0.39453125, "learning_rate": 0.00020042459111347687, "loss": 0.1731, "step": 95082 }, { "epoch": 0.16859179115923256, "grad_norm": 0.75390625, "learning_rate": 0.00020042276506024144, "loss": 0.1646, "step": 95084 }, { "epoch": 0.16859533732454238, "grad_norm": 1.1640625, "learning_rate": 0.00020042094294123424, "loss": 0.1738, "step": 95086 }, { "epoch": 0.1685988834898522, "grad_norm": 0.73046875, "learning_rate": 0.00020041912475646332, "loss": 0.1868, "step": 95088 }, { "epoch": 0.168602429655162, "grad_norm": 2.875, "learning_rate": 0.00020041731050593652, "loss": 0.1913, "step": 95090 }, { "epoch": 0.16860597582047185, "grad_norm": 0.38671875, "learning_rate": 0.00020041550018966192, "loss": 0.1376, "step": 95092 }, { "epoch": 0.16860952198578166, "grad_norm": 0.61328125, "learning_rate": 0.00020041369380764746, "loss": 0.1803, "step": 95094 }, { "epoch": 0.16861306815109148, "grad_norm": 0.5703125, "learning_rate": 0.0002004118913599009, "loss": 0.1517, "step": 95096 }, { "epoch": 0.1686166143164013, "grad_norm": 0.1513671875, "learning_rate": 0.00020041009284643013, "loss": 0.1332, "step": 95098 }, { "epoch": 0.1686201604817111, "grad_norm": 1.6875, "learning_rate": 0.0002004082982672432, "loss": 0.3116, "step": 95100 }, { "epoch": 0.16862370664702092, "grad_norm": 0.5390625, "learning_rate": 0.00020040650762234775, "loss": 0.1754, "step": 95102 }, { "epoch": 0.16862725281233074, "grad_norm": 0.8359375, "learning_rate": 0.00020040472091175166, "loss": 0.3261, "step": 95104 }, { "epoch": 0.16863079897764055, "grad_norm": 1.0234375, "learning_rate": 0.00020040293813546276, "loss": 0.2356, "step": 95106 }, { "epoch": 0.16863434514295036, "grad_norm": 2.515625, "learning_rate": 0.00020040115929348894, "loss": 0.3183, "step": 95108 }, { "epoch": 0.16863789130826018, "grad_norm": 1.9609375, "learning_rate": 0.0002003993843858379, "loss": 0.2169, "step": 95110 }, { "epoch": 0.16864143747357, "grad_norm": 0.326171875, "learning_rate": 0.0002003976134125173, "loss": 0.225, "step": 95112 }, { "epoch": 0.1686449836388798, "grad_norm": 1.140625, "learning_rate": 0.00020039584637353505, "loss": 0.3845, "step": 95114 }, { "epoch": 0.16864852980418962, "grad_norm": 0.2216796875, "learning_rate": 0.0002003940832688989, "loss": 0.1582, "step": 95116 }, { "epoch": 0.16865207596949944, "grad_norm": 0.578125, "learning_rate": 0.00020039232409861633, "loss": 0.1988, "step": 95118 }, { "epoch": 0.16865562213480925, "grad_norm": 0.79296875, "learning_rate": 0.00020039056886269534, "loss": 0.2286, "step": 95120 }, { "epoch": 0.16865916830011907, "grad_norm": 0.341796875, "learning_rate": 0.00020038881756114334, "loss": 0.1747, "step": 95122 }, { "epoch": 0.16866271446542888, "grad_norm": 0.330078125, "learning_rate": 0.0002003870701939681, "loss": 0.1747, "step": 95124 }, { "epoch": 0.1686662606307387, "grad_norm": 1.15625, "learning_rate": 0.00020038532676117728, "loss": 0.5885, "step": 95126 }, { "epoch": 0.1686698067960485, "grad_norm": 0.279296875, "learning_rate": 0.00020038358726277844, "loss": 0.1771, "step": 95128 }, { "epoch": 0.16867335296135832, "grad_norm": 0.2314453125, "learning_rate": 0.0002003818516987792, "loss": 0.2893, "step": 95130 }, { "epoch": 0.16867689912666814, "grad_norm": 0.77734375, "learning_rate": 0.0002003801200691873, "loss": 0.1165, "step": 95132 }, { "epoch": 0.16868044529197795, "grad_norm": 0.44140625, "learning_rate": 0.00020037839237401007, "loss": 0.1389, "step": 95134 }, { "epoch": 0.16868399145728777, "grad_norm": 0.47265625, "learning_rate": 0.00020037666861325536, "loss": 0.1979, "step": 95136 }, { "epoch": 0.16868753762259758, "grad_norm": 0.306640625, "learning_rate": 0.00020037494878693039, "loss": 0.2221, "step": 95138 }, { "epoch": 0.1686910837879074, "grad_norm": 0.32421875, "learning_rate": 0.0002003732328950428, "loss": 0.1683, "step": 95140 }, { "epoch": 0.1686946299532172, "grad_norm": 1.203125, "learning_rate": 0.00020037152093760032, "loss": 0.1837, "step": 95142 }, { "epoch": 0.16869817611852703, "grad_norm": 1.1171875, "learning_rate": 0.00020036981291461008, "loss": 0.2672, "step": 95144 }, { "epoch": 0.16870172228383684, "grad_norm": 0.640625, "learning_rate": 0.00020036810882607966, "loss": 0.2889, "step": 95146 }, { "epoch": 0.16870526844914666, "grad_norm": 0.640625, "learning_rate": 0.00020036640867201674, "loss": 0.2649, "step": 95148 }, { "epoch": 0.16870881461445647, "grad_norm": 0.53515625, "learning_rate": 0.00020036471245242845, "loss": 0.1794, "step": 95150 }, { "epoch": 0.16871236077976628, "grad_norm": 1.0, "learning_rate": 0.0002003630201673224, "loss": 0.2515, "step": 95152 }, { "epoch": 0.1687159069450761, "grad_norm": 0.73046875, "learning_rate": 0.0002003613318167059, "loss": 0.2085, "step": 95154 }, { "epoch": 0.16871945311038591, "grad_norm": 0.3828125, "learning_rate": 0.00020035964740058632, "loss": 0.193, "step": 95156 }, { "epoch": 0.16872299927569573, "grad_norm": 0.83984375, "learning_rate": 0.00020035796691897115, "loss": 0.1767, "step": 95158 }, { "epoch": 0.16872654544100554, "grad_norm": 2.171875, "learning_rate": 0.0002003562903718676, "loss": 0.1893, "step": 95160 }, { "epoch": 0.16873009160631536, "grad_norm": 0.50390625, "learning_rate": 0.00020035461775928315, "loss": 0.1873, "step": 95162 }, { "epoch": 0.16873363777162517, "grad_norm": 0.462890625, "learning_rate": 0.00020035294908122493, "loss": 0.1756, "step": 95164 }, { "epoch": 0.168737183936935, "grad_norm": 0.5859375, "learning_rate": 0.0002003512843377004, "loss": 0.1783, "step": 95166 }, { "epoch": 0.1687407301022448, "grad_norm": 0.396484375, "learning_rate": 0.00020034962352871667, "loss": 0.1774, "step": 95168 }, { "epoch": 0.16874427626755462, "grad_norm": 0.7890625, "learning_rate": 0.0002003479666542812, "loss": 0.1885, "step": 95170 }, { "epoch": 0.16874782243286443, "grad_norm": 0.28515625, "learning_rate": 0.00020034631371440115, "loss": 0.2206, "step": 95172 }, { "epoch": 0.16875136859817425, "grad_norm": 0.345703125, "learning_rate": 0.00020034466470908362, "loss": 0.1981, "step": 95174 }, { "epoch": 0.16875491476348406, "grad_norm": 0.2138671875, "learning_rate": 0.0002003430196383361, "loss": 0.1714, "step": 95176 }, { "epoch": 0.16875846092879387, "grad_norm": 0.83984375, "learning_rate": 0.00020034137850216555, "loss": 0.1703, "step": 95178 }, { "epoch": 0.1687620070941037, "grad_norm": 0.376953125, "learning_rate": 0.00020033974130057925, "loss": 0.1575, "step": 95180 }, { "epoch": 0.16876555325941353, "grad_norm": 0.51953125, "learning_rate": 0.00020033810803358432, "loss": 0.2193, "step": 95182 }, { "epoch": 0.16876909942472335, "grad_norm": 0.5390625, "learning_rate": 0.00020033647870118802, "loss": 0.2126, "step": 95184 }, { "epoch": 0.16877264559003316, "grad_norm": 0.271484375, "learning_rate": 0.00020033485330339724, "loss": 0.1564, "step": 95186 }, { "epoch": 0.16877619175534297, "grad_norm": 0.546875, "learning_rate": 0.00020033323184021924, "loss": 0.1972, "step": 95188 }, { "epoch": 0.1687797379206528, "grad_norm": 0.3671875, "learning_rate": 0.00020033161431166113, "loss": 0.1947, "step": 95190 }, { "epoch": 0.1687832840859626, "grad_norm": 0.384765625, "learning_rate": 0.00020033000071772994, "loss": 0.1457, "step": 95192 }, { "epoch": 0.16878683025127242, "grad_norm": 1.0625, "learning_rate": 0.0002003283910584328, "loss": 0.1574, "step": 95194 }, { "epoch": 0.16879037641658223, "grad_norm": 2.0, "learning_rate": 0.00020032678533377657, "loss": 0.3019, "step": 95196 }, { "epoch": 0.16879392258189205, "grad_norm": 0.251953125, "learning_rate": 0.00020032518354376843, "loss": 0.195, "step": 95198 }, { "epoch": 0.16879746874720186, "grad_norm": 0.2578125, "learning_rate": 0.0002003235856884154, "loss": 0.3124, "step": 95200 }, { "epoch": 0.16880101491251168, "grad_norm": 0.59765625, "learning_rate": 0.00020032199176772436, "loss": 0.1892, "step": 95202 }, { "epoch": 0.1688045610778215, "grad_norm": 1.609375, "learning_rate": 0.00020032040178170232, "loss": 0.2941, "step": 95204 }, { "epoch": 0.1688081072431313, "grad_norm": 0.28515625, "learning_rate": 0.00020031881573035626, "loss": 0.1351, "step": 95206 }, { "epoch": 0.16881165340844112, "grad_norm": 2.484375, "learning_rate": 0.00020031723361369318, "loss": 0.179, "step": 95208 }, { "epoch": 0.16881519957375093, "grad_norm": 0.75390625, "learning_rate": 0.00020031565543171979, "loss": 0.1775, "step": 95210 }, { "epoch": 0.16881874573906075, "grad_norm": 0.412109375, "learning_rate": 0.00020031408118444325, "loss": 0.1588, "step": 95212 }, { "epoch": 0.16882229190437056, "grad_norm": 1.859375, "learning_rate": 0.00020031251087187027, "loss": 0.2667, "step": 95214 }, { "epoch": 0.16882583806968038, "grad_norm": 0.439453125, "learning_rate": 0.00020031094449400776, "loss": 0.1747, "step": 95216 }, { "epoch": 0.1688293842349902, "grad_norm": 0.24609375, "learning_rate": 0.00020030938205086249, "loss": 0.1371, "step": 95218 }, { "epoch": 0.1688329304003, "grad_norm": 0.349609375, "learning_rate": 0.00020030782354244148, "loss": 0.1626, "step": 95220 }, { "epoch": 0.16883647656560982, "grad_norm": 0.33203125, "learning_rate": 0.0002003062689687515, "loss": 0.1601, "step": 95222 }, { "epoch": 0.16884002273091964, "grad_norm": 1.7734375, "learning_rate": 0.00020030471832979915, "loss": 0.2639, "step": 95224 }, { "epoch": 0.16884356889622945, "grad_norm": 0.404296875, "learning_rate": 0.00020030317162559143, "loss": 0.1677, "step": 95226 }, { "epoch": 0.16884711506153927, "grad_norm": 0.203125, "learning_rate": 0.00020030162885613506, "loss": 0.1432, "step": 95228 }, { "epoch": 0.16885066122684908, "grad_norm": 0.421875, "learning_rate": 0.00020030009002143667, "loss": 0.2446, "step": 95230 }, { "epoch": 0.1688542073921589, "grad_norm": 0.2109375, "learning_rate": 0.00020029855512150325, "loss": 0.2715, "step": 95232 }, { "epoch": 0.1688577535574687, "grad_norm": 1.71875, "learning_rate": 0.00020029702415634118, "loss": 0.2308, "step": 95234 }, { "epoch": 0.16886129972277852, "grad_norm": 0.2197265625, "learning_rate": 0.00020029549712595736, "loss": 0.1611, "step": 95236 }, { "epoch": 0.16886484588808834, "grad_norm": 1.125, "learning_rate": 0.0002002939740303584, "loss": 0.2541, "step": 95238 }, { "epoch": 0.16886839205339815, "grad_norm": 0.42578125, "learning_rate": 0.00020029245486955104, "loss": 0.1656, "step": 95240 }, { "epoch": 0.16887193821870797, "grad_norm": 0.392578125, "learning_rate": 0.0002002909396435419, "loss": 0.1343, "step": 95242 }, { "epoch": 0.16887548438401778, "grad_norm": 1.015625, "learning_rate": 0.00020028942835233752, "loss": 0.2013, "step": 95244 }, { "epoch": 0.1688790305493276, "grad_norm": 0.173828125, "learning_rate": 0.0002002879209959445, "loss": 0.1476, "step": 95246 }, { "epoch": 0.1688825767146374, "grad_norm": 0.53515625, "learning_rate": 0.00020028641757436957, "loss": 0.1608, "step": 95248 }, { "epoch": 0.16888612287994723, "grad_norm": 0.87109375, "learning_rate": 0.0002002849180876193, "loss": 0.1509, "step": 95250 }, { "epoch": 0.16888966904525704, "grad_norm": 0.3125, "learning_rate": 0.00020028342253570008, "loss": 0.173, "step": 95252 }, { "epoch": 0.16889321521056685, "grad_norm": 0.326171875, "learning_rate": 0.00020028193091861866, "loss": 0.1664, "step": 95254 }, { "epoch": 0.16889676137587667, "grad_norm": 0.236328125, "learning_rate": 0.00020028044323638133, "loss": 0.1643, "step": 95256 }, { "epoch": 0.16890030754118648, "grad_norm": 0.54296875, "learning_rate": 0.0002002789594889948, "loss": 0.1722, "step": 95258 }, { "epoch": 0.1689038537064963, "grad_norm": 0.236328125, "learning_rate": 0.00020027747967646547, "loss": 0.2394, "step": 95260 }, { "epoch": 0.1689073998718061, "grad_norm": 0.400390625, "learning_rate": 0.00020027600379879978, "loss": 0.1922, "step": 95262 }, { "epoch": 0.16891094603711593, "grad_norm": 0.28515625, "learning_rate": 0.00020027453185600424, "loss": 0.1695, "step": 95264 }, { "epoch": 0.16891449220242574, "grad_norm": 0.78125, "learning_rate": 0.00020027306384808532, "loss": 0.2967, "step": 95266 }, { "epoch": 0.16891803836773556, "grad_norm": 0.609375, "learning_rate": 0.00020027159977504934, "loss": 0.192, "step": 95268 }, { "epoch": 0.16892158453304537, "grad_norm": 0.8359375, "learning_rate": 0.00020027013963690282, "loss": 0.1986, "step": 95270 }, { "epoch": 0.1689251306983552, "grad_norm": 0.640625, "learning_rate": 0.00020026868343365204, "loss": 0.1739, "step": 95272 }, { "epoch": 0.16892867686366503, "grad_norm": 0.5234375, "learning_rate": 0.00020026723116530338, "loss": 0.1771, "step": 95274 }, { "epoch": 0.16893222302897484, "grad_norm": 0.2734375, "learning_rate": 0.00020026578283186328, "loss": 0.1816, "step": 95276 }, { "epoch": 0.16893576919428466, "grad_norm": 1.2265625, "learning_rate": 0.000200264338433338, "loss": 0.2662, "step": 95278 }, { "epoch": 0.16893931535959447, "grad_norm": 0.37890625, "learning_rate": 0.0002002628979697339, "loss": 0.1819, "step": 95280 }, { "epoch": 0.1689428615249043, "grad_norm": 0.36328125, "learning_rate": 0.00020026146144105718, "loss": 0.1843, "step": 95282 }, { "epoch": 0.1689464076902141, "grad_norm": 0.171875, "learning_rate": 0.00020026002884731425, "loss": 0.1616, "step": 95284 }, { "epoch": 0.16894995385552392, "grad_norm": 0.66796875, "learning_rate": 0.0002002586001885114, "loss": 0.223, "step": 95286 }, { "epoch": 0.16895350002083373, "grad_norm": 1.75, "learning_rate": 0.0002002571754646547, "loss": 0.2714, "step": 95288 }, { "epoch": 0.16895704618614354, "grad_norm": 0.8203125, "learning_rate": 0.00020025575467575048, "loss": 0.1747, "step": 95290 }, { "epoch": 0.16896059235145336, "grad_norm": 0.60546875, "learning_rate": 0.00020025433782180503, "loss": 0.1256, "step": 95292 }, { "epoch": 0.16896413851676317, "grad_norm": 0.4140625, "learning_rate": 0.0002002529249028244, "loss": 0.1606, "step": 95294 }, { "epoch": 0.168967684682073, "grad_norm": 0.6484375, "learning_rate": 0.00020025151591881484, "loss": 0.2884, "step": 95296 }, { "epoch": 0.1689712308473828, "grad_norm": 0.421875, "learning_rate": 0.0002002501108697826, "loss": 0.1045, "step": 95298 }, { "epoch": 0.16897477701269262, "grad_norm": 0.400390625, "learning_rate": 0.0002002487097557336, "loss": 0.2068, "step": 95300 }, { "epoch": 0.16897832317800243, "grad_norm": 0.8671875, "learning_rate": 0.00020024731257667418, "loss": 0.1393, "step": 95302 }, { "epoch": 0.16898186934331225, "grad_norm": 1.0234375, "learning_rate": 0.00020024591933261046, "loss": 0.1718, "step": 95304 }, { "epoch": 0.16898541550862206, "grad_norm": 0.36328125, "learning_rate": 0.00020024453002354836, "loss": 0.2207, "step": 95306 }, { "epoch": 0.16898896167393188, "grad_norm": 0.90234375, "learning_rate": 0.000200243144649494, "loss": 0.2601, "step": 95308 }, { "epoch": 0.1689925078392417, "grad_norm": 0.3984375, "learning_rate": 0.00020024176321045353, "loss": 0.2324, "step": 95310 }, { "epoch": 0.1689960540045515, "grad_norm": 0.296875, "learning_rate": 0.000200240385706433, "loss": 0.1847, "step": 95312 }, { "epoch": 0.16899960016986132, "grad_norm": 0.39453125, "learning_rate": 0.00020023901213743833, "loss": 0.1486, "step": 95314 }, { "epoch": 0.16900314633517113, "grad_norm": 0.6875, "learning_rate": 0.00020023764250347558, "loss": 0.1891, "step": 95316 }, { "epoch": 0.16900669250048095, "grad_norm": 0.515625, "learning_rate": 0.00020023627680455076, "loss": 0.1553, "step": 95318 }, { "epoch": 0.16901023866579076, "grad_norm": 0.396484375, "learning_rate": 0.0002002349150406698, "loss": 0.1706, "step": 95320 }, { "epoch": 0.16901378483110058, "grad_norm": 0.76171875, "learning_rate": 0.00020023355721183862, "loss": 0.1892, "step": 95322 }, { "epoch": 0.1690173309964104, "grad_norm": 0.1884765625, "learning_rate": 0.00020023220331806332, "loss": 0.1377, "step": 95324 }, { "epoch": 0.1690208771617202, "grad_norm": 0.66015625, "learning_rate": 0.0002002308533593496, "loss": 0.2422, "step": 95326 }, { "epoch": 0.16902442332703002, "grad_norm": 0.330078125, "learning_rate": 0.00020022950733570354, "loss": 0.1716, "step": 95328 }, { "epoch": 0.16902796949233984, "grad_norm": 0.34375, "learning_rate": 0.000200228165247131, "loss": 0.2199, "step": 95330 }, { "epoch": 0.16903151565764965, "grad_norm": 0.255859375, "learning_rate": 0.00020022682709363776, "loss": 0.207, "step": 95332 }, { "epoch": 0.16903506182295946, "grad_norm": 0.40234375, "learning_rate": 0.0002002254928752298, "loss": 0.1331, "step": 95334 }, { "epoch": 0.16903860798826928, "grad_norm": 0.427734375, "learning_rate": 0.00020022416259191282, "loss": 0.2414, "step": 95336 }, { "epoch": 0.1690421541535791, "grad_norm": 0.71484375, "learning_rate": 0.00020022283624369276, "loss": 0.1494, "step": 95338 }, { "epoch": 0.1690457003188889, "grad_norm": 0.478515625, "learning_rate": 0.00020022151383057525, "loss": 0.1958, "step": 95340 }, { "epoch": 0.16904924648419872, "grad_norm": 1.125, "learning_rate": 0.00020022019535256627, "loss": 0.1646, "step": 95342 }, { "epoch": 0.16905279264950854, "grad_norm": 1.4140625, "learning_rate": 0.00020021888080967143, "loss": 0.1728, "step": 95344 }, { "epoch": 0.16905633881481835, "grad_norm": 0.55078125, "learning_rate": 0.00020021757020189662, "loss": 0.2035, "step": 95346 }, { "epoch": 0.16905988498012817, "grad_norm": 0.33984375, "learning_rate": 0.00020021626352924755, "loss": 0.2459, "step": 95348 }, { "epoch": 0.16906343114543798, "grad_norm": 0.224609375, "learning_rate": 0.0002002149607917298, "loss": 0.1405, "step": 95350 }, { "epoch": 0.1690669773107478, "grad_norm": 0.57421875, "learning_rate": 0.00020021366198934914, "loss": 0.1612, "step": 95352 }, { "epoch": 0.1690705234760576, "grad_norm": 1.4140625, "learning_rate": 0.00020021236712211132, "loss": 0.2924, "step": 95354 }, { "epoch": 0.16907406964136742, "grad_norm": 0.96484375, "learning_rate": 0.00020021107619002198, "loss": 0.1868, "step": 95356 }, { "epoch": 0.16907761580667724, "grad_norm": 0.357421875, "learning_rate": 0.00020020978919308666, "loss": 0.412, "step": 95358 }, { "epoch": 0.16908116197198705, "grad_norm": 0.54296875, "learning_rate": 0.00020020850613131102, "loss": 0.174, "step": 95360 }, { "epoch": 0.16908470813729687, "grad_norm": 0.265625, "learning_rate": 0.0002002072270047008, "loss": 0.1408, "step": 95362 }, { "epoch": 0.1690882543026067, "grad_norm": 0.201171875, "learning_rate": 0.00020020595181326157, "loss": 0.1385, "step": 95364 }, { "epoch": 0.16909180046791653, "grad_norm": 0.400390625, "learning_rate": 0.0002002046805569987, "loss": 0.2037, "step": 95366 }, { "epoch": 0.16909534663322634, "grad_norm": 0.169921875, "learning_rate": 0.00020020341323591793, "loss": 0.1696, "step": 95368 }, { "epoch": 0.16909889279853615, "grad_norm": 0.416015625, "learning_rate": 0.00020020214985002482, "loss": 0.1526, "step": 95370 }, { "epoch": 0.16910243896384597, "grad_norm": 2.109375, "learning_rate": 0.0002002008903993248, "loss": 0.2489, "step": 95372 }, { "epoch": 0.16910598512915578, "grad_norm": 0.24609375, "learning_rate": 0.0002001996348838234, "loss": 0.1763, "step": 95374 }, { "epoch": 0.1691095312944656, "grad_norm": 0.56640625, "learning_rate": 0.00020019838330352624, "loss": 0.1903, "step": 95376 }, { "epoch": 0.1691130774597754, "grad_norm": 0.22265625, "learning_rate": 0.00020019713565843858, "loss": 0.2347, "step": 95378 }, { "epoch": 0.16911662362508523, "grad_norm": 1.078125, "learning_rate": 0.000200195891948566, "loss": 0.2758, "step": 95380 }, { "epoch": 0.16912016979039504, "grad_norm": 0.6171875, "learning_rate": 0.00020019465217391397, "loss": 0.1643, "step": 95382 }, { "epoch": 0.16912371595570486, "grad_norm": 0.337890625, "learning_rate": 0.00020019341633448776, "loss": 0.194, "step": 95384 }, { "epoch": 0.16912726212101467, "grad_norm": 0.333984375, "learning_rate": 0.00020019218443029303, "loss": 0.2119, "step": 95386 }, { "epoch": 0.16913080828632449, "grad_norm": 0.1923828125, "learning_rate": 0.0002001909564613349, "loss": 0.1698, "step": 95388 }, { "epoch": 0.1691343544516343, "grad_norm": 0.8515625, "learning_rate": 0.0002001897324276188, "loss": 0.2258, "step": 95390 }, { "epoch": 0.16913790061694411, "grad_norm": 0.189453125, "learning_rate": 0.00020018851232915026, "loss": 0.1494, "step": 95392 }, { "epoch": 0.16914144678225393, "grad_norm": 0.337890625, "learning_rate": 0.00020018729616593441, "loss": 0.1122, "step": 95394 }, { "epoch": 0.16914499294756374, "grad_norm": 45.75, "learning_rate": 0.00020018608393797672, "loss": 0.2342, "step": 95396 }, { "epoch": 0.16914853911287356, "grad_norm": 0.267578125, "learning_rate": 0.0002001848756452824, "loss": 0.1475, "step": 95398 }, { "epoch": 0.16915208527818337, "grad_norm": 1.0625, "learning_rate": 0.00020018367128785673, "loss": 0.2711, "step": 95400 }, { "epoch": 0.1691556314434932, "grad_norm": 1.390625, "learning_rate": 0.00020018247086570504, "loss": 0.1967, "step": 95402 }, { "epoch": 0.169159177608803, "grad_norm": 0.47265625, "learning_rate": 0.00020018127437883255, "loss": 0.1558, "step": 95404 }, { "epoch": 0.16916272377411282, "grad_norm": 0.71484375, "learning_rate": 0.00020018008182724447, "loss": 0.1441, "step": 95406 }, { "epoch": 0.16916626993942263, "grad_norm": 0.390625, "learning_rate": 0.0002001788932109461, "loss": 0.1748, "step": 95408 }, { "epoch": 0.16916981610473245, "grad_norm": 1.109375, "learning_rate": 0.00020017770852994249, "loss": 0.3225, "step": 95410 }, { "epoch": 0.16917336227004226, "grad_norm": 0.23828125, "learning_rate": 0.00020017652778423888, "loss": 0.1766, "step": 95412 }, { "epoch": 0.16917690843535207, "grad_norm": 0.359375, "learning_rate": 0.00020017535097384054, "loss": 0.174, "step": 95414 }, { "epoch": 0.1691804546006619, "grad_norm": 0.349609375, "learning_rate": 0.0002001741780987525, "loss": 0.162, "step": 95416 }, { "epoch": 0.1691840007659717, "grad_norm": 0.4296875, "learning_rate": 0.00020017300915897994, "loss": 0.1839, "step": 95418 }, { "epoch": 0.16918754693128152, "grad_norm": 0.353515625, "learning_rate": 0.0002001718441545279, "loss": 0.2051, "step": 95420 }, { "epoch": 0.16919109309659133, "grad_norm": 0.703125, "learning_rate": 0.00020017068308540156, "loss": 0.1608, "step": 95422 }, { "epoch": 0.16919463926190115, "grad_norm": 0.77734375, "learning_rate": 0.00020016952595160599, "loss": 0.1182, "step": 95424 }, { "epoch": 0.16919818542721096, "grad_norm": 0.41015625, "learning_rate": 0.00020016837275314625, "loss": 0.2393, "step": 95426 }, { "epoch": 0.16920173159252078, "grad_norm": 0.66015625, "learning_rate": 0.00020016722349002738, "loss": 0.3195, "step": 95428 }, { "epoch": 0.1692052777578306, "grad_norm": 0.26953125, "learning_rate": 0.00020016607816225428, "loss": 0.1667, "step": 95430 }, { "epoch": 0.1692088239231404, "grad_norm": 0.466796875, "learning_rate": 0.00020016493676983219, "loss": 0.1509, "step": 95432 }, { "epoch": 0.16921237008845022, "grad_norm": 1.9296875, "learning_rate": 0.00020016379931276585, "loss": 0.1751, "step": 95434 }, { "epoch": 0.16921591625376003, "grad_norm": 0.40625, "learning_rate": 0.00020016266579106045, "loss": 0.2172, "step": 95436 }, { "epoch": 0.16921946241906985, "grad_norm": 0.189453125, "learning_rate": 0.0002001615362047209, "loss": 0.1747, "step": 95438 }, { "epoch": 0.16922300858437966, "grad_norm": 0.40234375, "learning_rate": 0.00020016041055375205, "loss": 0.2575, "step": 95440 }, { "epoch": 0.16922655474968948, "grad_norm": 0.9375, "learning_rate": 0.00020015928883815884, "loss": 0.1479, "step": 95442 }, { "epoch": 0.1692301009149993, "grad_norm": 0.5078125, "learning_rate": 0.00020015817105794627, "loss": 0.1269, "step": 95444 }, { "epoch": 0.1692336470803091, "grad_norm": 0.470703125, "learning_rate": 0.00020015705721311919, "loss": 0.1835, "step": 95446 }, { "epoch": 0.16923719324561892, "grad_norm": 0.59765625, "learning_rate": 0.00020015594730368234, "loss": 0.1513, "step": 95448 }, { "epoch": 0.16924073941092874, "grad_norm": 0.380859375, "learning_rate": 0.0002001548413296408, "loss": 0.1811, "step": 95450 }, { "epoch": 0.16924428557623855, "grad_norm": 1.9140625, "learning_rate": 0.00020015373929099922, "loss": 0.2298, "step": 95452 }, { "epoch": 0.1692478317415484, "grad_norm": 0.455078125, "learning_rate": 0.0002001526411877625, "loss": 0.1797, "step": 95454 }, { "epoch": 0.1692513779068582, "grad_norm": 0.21484375, "learning_rate": 0.00020015154701993545, "loss": 0.2217, "step": 95456 }, { "epoch": 0.16925492407216802, "grad_norm": 1.3984375, "learning_rate": 0.0002001504567875229, "loss": 0.2562, "step": 95458 }, { "epoch": 0.16925847023747784, "grad_norm": 2.609375, "learning_rate": 0.00020014937049052945, "loss": 0.1712, "step": 95460 }, { "epoch": 0.16926201640278765, "grad_norm": 0.3125, "learning_rate": 0.00020014828812895996, "loss": 0.1181, "step": 95462 }, { "epoch": 0.16926556256809747, "grad_norm": 0.2421875, "learning_rate": 0.0002001472097028193, "loss": 0.1489, "step": 95464 }, { "epoch": 0.16926910873340728, "grad_norm": 0.263671875, "learning_rate": 0.00020014613521211188, "loss": 0.2225, "step": 95466 }, { "epoch": 0.1692726548987171, "grad_norm": 0.16015625, "learning_rate": 0.00020014506465684264, "loss": 0.1315, "step": 95468 }, { "epoch": 0.1692762010640269, "grad_norm": 1.390625, "learning_rate": 0.0002001439980370162, "loss": 0.2793, "step": 95470 }, { "epoch": 0.16927974722933672, "grad_norm": 0.63671875, "learning_rate": 0.00020014293535263727, "loss": 0.1921, "step": 95472 }, { "epoch": 0.16928329339464654, "grad_norm": 0.2890625, "learning_rate": 0.00020014187660371036, "loss": 0.222, "step": 95474 }, { "epoch": 0.16928683955995635, "grad_norm": 0.474609375, "learning_rate": 0.00020014082179024018, "loss": 0.2829, "step": 95476 }, { "epoch": 0.16929038572526617, "grad_norm": 0.390625, "learning_rate": 0.00020013977091223138, "loss": 0.1848, "step": 95478 }, { "epoch": 0.16929393189057598, "grad_norm": 0.515625, "learning_rate": 0.00020013872396968852, "loss": 0.1524, "step": 95480 }, { "epoch": 0.1692974780558858, "grad_norm": 0.47265625, "learning_rate": 0.00020013768096261613, "loss": 0.1836, "step": 95482 }, { "epoch": 0.1693010242211956, "grad_norm": 0.51171875, "learning_rate": 0.00020013664189101886, "loss": 0.1658, "step": 95484 }, { "epoch": 0.16930457038650543, "grad_norm": 0.77734375, "learning_rate": 0.00020013560675490123, "loss": 0.2257, "step": 95486 }, { "epoch": 0.16930811655181524, "grad_norm": 0.412109375, "learning_rate": 0.00020013457555426772, "loss": 0.161, "step": 95488 }, { "epoch": 0.16931166271712506, "grad_norm": 0.5703125, "learning_rate": 0.0002001335482891229, "loss": 0.1862, "step": 95490 }, { "epoch": 0.16931520888243487, "grad_norm": 0.2275390625, "learning_rate": 0.00020013252495947118, "loss": 0.1896, "step": 95492 }, { "epoch": 0.16931875504774468, "grad_norm": 0.84765625, "learning_rate": 0.00020013150556531716, "loss": 0.2024, "step": 95494 }, { "epoch": 0.1693223012130545, "grad_norm": 0.5703125, "learning_rate": 0.00020013049010666524, "loss": 0.2361, "step": 95496 }, { "epoch": 0.1693258473783643, "grad_norm": 0.2099609375, "learning_rate": 0.00020012947858351977, "loss": 0.2107, "step": 95498 }, { "epoch": 0.16932939354367413, "grad_norm": 0.515625, "learning_rate": 0.00020012847099588535, "loss": 0.325, "step": 95500 }, { "epoch": 0.16933293970898394, "grad_norm": 0.212890625, "learning_rate": 0.00020012746734376626, "loss": 0.1659, "step": 95502 }, { "epoch": 0.16933648587429376, "grad_norm": 0.302734375, "learning_rate": 0.00020012646762716682, "loss": 0.3264, "step": 95504 }, { "epoch": 0.16934003203960357, "grad_norm": 0.2412109375, "learning_rate": 0.00020012547184609152, "loss": 0.1437, "step": 95506 }, { "epoch": 0.1693435782049134, "grad_norm": 0.54296875, "learning_rate": 0.00020012448000054483, "loss": 0.1607, "step": 95508 }, { "epoch": 0.1693471243702232, "grad_norm": 1.9921875, "learning_rate": 0.00020012349209053086, "loss": 0.36, "step": 95510 }, { "epoch": 0.16935067053553302, "grad_norm": 0.478515625, "learning_rate": 0.00020012250811605404, "loss": 0.2313, "step": 95512 }, { "epoch": 0.16935421670084283, "grad_norm": 0.6796875, "learning_rate": 0.00020012152807711864, "loss": 0.2243, "step": 95514 }, { "epoch": 0.16935776286615264, "grad_norm": 0.2431640625, "learning_rate": 0.00020012055197372901, "loss": 0.1821, "step": 95516 }, { "epoch": 0.16936130903146246, "grad_norm": 0.45703125, "learning_rate": 0.0002001195798058894, "loss": 0.1307, "step": 95518 }, { "epoch": 0.16936485519677227, "grad_norm": 0.56640625, "learning_rate": 0.00020011861157360396, "loss": 0.2033, "step": 95520 }, { "epoch": 0.1693684013620821, "grad_norm": 0.2333984375, "learning_rate": 0.000200117647276877, "loss": 0.1666, "step": 95522 }, { "epoch": 0.1693719475273919, "grad_norm": 0.16015625, "learning_rate": 0.00020011668691571278, "loss": 0.1257, "step": 95524 }, { "epoch": 0.16937549369270172, "grad_norm": 0.380859375, "learning_rate": 0.00020011573049011548, "loss": 0.1774, "step": 95526 }, { "epoch": 0.16937903985801153, "grad_norm": 0.310546875, "learning_rate": 0.0002001147780000892, "loss": 0.1665, "step": 95528 }, { "epoch": 0.16938258602332135, "grad_norm": 0.49609375, "learning_rate": 0.00020011382944563812, "loss": 0.1616, "step": 95530 }, { "epoch": 0.16938613218863116, "grad_norm": 0.2265625, "learning_rate": 0.0002001128848267665, "loss": 0.1179, "step": 95532 }, { "epoch": 0.16938967835394098, "grad_norm": 2.609375, "learning_rate": 0.00020011194414347845, "loss": 0.3042, "step": 95534 }, { "epoch": 0.1693932245192508, "grad_norm": 0.38671875, "learning_rate": 0.00020011100739577791, "loss": 0.1516, "step": 95536 }, { "epoch": 0.1693967706845606, "grad_norm": 0.255859375, "learning_rate": 0.00020011007458366927, "loss": 0.1438, "step": 95538 }, { "epoch": 0.16940031684987042, "grad_norm": 0.1591796875, "learning_rate": 0.00020010914570715637, "loss": 0.2064, "step": 95540 }, { "epoch": 0.16940386301518023, "grad_norm": 0.3125, "learning_rate": 0.0002001082207662433, "loss": 0.4374, "step": 95542 }, { "epoch": 0.16940740918049008, "grad_norm": 0.55078125, "learning_rate": 0.0002001072997609342, "loss": 0.1327, "step": 95544 }, { "epoch": 0.1694109553457999, "grad_norm": 1.5078125, "learning_rate": 0.00020010638269123303, "loss": 0.1481, "step": 95546 }, { "epoch": 0.1694145015111097, "grad_norm": 0.68359375, "learning_rate": 0.00020010546955714388, "loss": 0.1951, "step": 95548 }, { "epoch": 0.16941804767641952, "grad_norm": 0.63671875, "learning_rate": 0.00020010456035867067, "loss": 0.1333, "step": 95550 }, { "epoch": 0.16942159384172933, "grad_norm": 0.21875, "learning_rate": 0.0002001036550958173, "loss": 0.1918, "step": 95552 }, { "epoch": 0.16942514000703915, "grad_norm": 0.87890625, "learning_rate": 0.00020010275376858795, "loss": 0.1745, "step": 95554 }, { "epoch": 0.16942868617234896, "grad_norm": 0.1875, "learning_rate": 0.0002001018563769863, "loss": 0.1443, "step": 95556 }, { "epoch": 0.16943223233765878, "grad_norm": 0.53515625, "learning_rate": 0.00020010096292101648, "loss": 0.1761, "step": 95558 }, { "epoch": 0.1694357785029686, "grad_norm": 0.5234375, "learning_rate": 0.00020010007340068233, "loss": 0.2231, "step": 95560 }, { "epoch": 0.1694393246682784, "grad_norm": 0.421875, "learning_rate": 0.00020009918781598775, "loss": 0.2027, "step": 95562 }, { "epoch": 0.16944287083358822, "grad_norm": 0.2236328125, "learning_rate": 0.00020009830616693656, "loss": 0.1833, "step": 95564 }, { "epoch": 0.16944641699889804, "grad_norm": 0.275390625, "learning_rate": 0.0002000974284535327, "loss": 0.2093, "step": 95566 }, { "epoch": 0.16944996316420785, "grad_norm": 0.2177734375, "learning_rate": 0.00020009655467577987, "loss": 0.1567, "step": 95568 }, { "epoch": 0.16945350932951767, "grad_norm": 0.54296875, "learning_rate": 0.0002000956848336821, "loss": 0.4492, "step": 95570 }, { "epoch": 0.16945705549482748, "grad_norm": 0.69921875, "learning_rate": 0.00020009481892724297, "loss": 0.2898, "step": 95572 }, { "epoch": 0.1694606016601373, "grad_norm": 0.890625, "learning_rate": 0.0002000939569564664, "loss": 0.1811, "step": 95574 }, { "epoch": 0.1694641478254471, "grad_norm": 0.404296875, "learning_rate": 0.00020009309892135625, "loss": 0.1862, "step": 95576 }, { "epoch": 0.16946769399075692, "grad_norm": 0.3671875, "learning_rate": 0.00020009224482191607, "loss": 0.1394, "step": 95578 }, { "epoch": 0.16947124015606674, "grad_norm": 0.31640625, "learning_rate": 0.00020009139465814956, "loss": 0.1571, "step": 95580 }, { "epoch": 0.16947478632137655, "grad_norm": 0.7265625, "learning_rate": 0.00020009054843006077, "loss": 0.1953, "step": 95582 }, { "epoch": 0.16947833248668637, "grad_norm": 0.2099609375, "learning_rate": 0.00020008970613765304, "loss": 0.141, "step": 95584 }, { "epoch": 0.16948187865199618, "grad_norm": 0.322265625, "learning_rate": 0.0002000888677809303, "loss": 0.1753, "step": 95586 }, { "epoch": 0.169485424817306, "grad_norm": 1.1171875, "learning_rate": 0.00020008803335989602, "loss": 0.1852, "step": 95588 }, { "epoch": 0.1694889709826158, "grad_norm": 0.4375, "learning_rate": 0.00020008720287455404, "loss": 0.1646, "step": 95590 }, { "epoch": 0.16949251714792563, "grad_norm": 0.2333984375, "learning_rate": 0.00020008637632490793, "loss": 0.2167, "step": 95592 }, { "epoch": 0.16949606331323544, "grad_norm": 1.5390625, "learning_rate": 0.00020008555371096127, "loss": 0.1787, "step": 95594 }, { "epoch": 0.16949960947854525, "grad_norm": 0.3359375, "learning_rate": 0.00020008473503271756, "loss": 0.1465, "step": 95596 }, { "epoch": 0.16950315564385507, "grad_norm": 0.2275390625, "learning_rate": 0.00020008392029018064, "loss": 0.1423, "step": 95598 }, { "epoch": 0.16950670180916488, "grad_norm": 0.30859375, "learning_rate": 0.00020008310948335374, "loss": 0.1736, "step": 95600 }, { "epoch": 0.1695102479744747, "grad_norm": 0.40234375, "learning_rate": 0.0002000823026122407, "loss": 0.2207, "step": 95602 }, { "epoch": 0.1695137941397845, "grad_norm": 0.96484375, "learning_rate": 0.00020008149967684494, "loss": 0.1506, "step": 95604 }, { "epoch": 0.16951734030509433, "grad_norm": 0.5078125, "learning_rate": 0.00020008070067716993, "loss": 0.2824, "step": 95606 }, { "epoch": 0.16952088647040414, "grad_norm": 0.2138671875, "learning_rate": 0.00020007990561321916, "loss": 0.1868, "step": 95608 }, { "epoch": 0.16952443263571396, "grad_norm": 0.294921875, "learning_rate": 0.00020007911448499622, "loss": 0.3081, "step": 95610 }, { "epoch": 0.16952797880102377, "grad_norm": 0.255859375, "learning_rate": 0.00020007832729250442, "loss": 0.1475, "step": 95612 }, { "epoch": 0.16953152496633359, "grad_norm": 0.59375, "learning_rate": 0.0002000775440357474, "loss": 0.2854, "step": 95614 }, { "epoch": 0.1695350711316434, "grad_norm": 0.3125, "learning_rate": 0.00020007676471472835, "loss": 0.1598, "step": 95616 }, { "epoch": 0.16953861729695321, "grad_norm": 0.388671875, "learning_rate": 0.00020007598932945088, "loss": 0.2386, "step": 95618 }, { "epoch": 0.16954216346226303, "grad_norm": 0.51171875, "learning_rate": 0.00020007521787991823, "loss": 0.3813, "step": 95620 }, { "epoch": 0.16954570962757284, "grad_norm": 0.4453125, "learning_rate": 0.00020007445036613388, "loss": 0.2079, "step": 95622 }, { "epoch": 0.16954925579288266, "grad_norm": 0.953125, "learning_rate": 0.0002000736867881011, "loss": 0.2396, "step": 95624 }, { "epoch": 0.16955280195819247, "grad_norm": 2.015625, "learning_rate": 0.0002000729271458233, "loss": 0.2472, "step": 95626 }, { "epoch": 0.1695563481235023, "grad_norm": 0.22265625, "learning_rate": 0.00020007217143930373, "loss": 0.251, "step": 95628 }, { "epoch": 0.1695598942888121, "grad_norm": 0.63671875, "learning_rate": 0.0002000714196685458, "loss": 0.1967, "step": 95630 }, { "epoch": 0.16956344045412192, "grad_norm": 0.4140625, "learning_rate": 0.00020007067183355277, "loss": 0.1833, "step": 95632 }, { "epoch": 0.16956698661943173, "grad_norm": 0.384765625, "learning_rate": 0.00020006992793432786, "loss": 0.257, "step": 95634 }, { "epoch": 0.16957053278474157, "grad_norm": 0.498046875, "learning_rate": 0.00020006918797087434, "loss": 0.1582, "step": 95636 }, { "epoch": 0.1695740789500514, "grad_norm": 0.365234375, "learning_rate": 0.00020006845194319543, "loss": 0.1816, "step": 95638 }, { "epoch": 0.1695776251153612, "grad_norm": 0.51953125, "learning_rate": 0.00020006771985129433, "loss": 0.1964, "step": 95640 }, { "epoch": 0.16958117128067102, "grad_norm": 0.6796875, "learning_rate": 0.00020006699169517435, "loss": 0.18, "step": 95642 }, { "epoch": 0.16958471744598083, "grad_norm": 0.2353515625, "learning_rate": 0.00020006626747483852, "loss": 0.2331, "step": 95644 }, { "epoch": 0.16958826361129065, "grad_norm": 1.0234375, "learning_rate": 0.0002000655471902902, "loss": 0.219, "step": 95646 }, { "epoch": 0.16959180977660046, "grad_norm": 0.326171875, "learning_rate": 0.00020006483084153241, "loss": 0.1504, "step": 95648 }, { "epoch": 0.16959535594191028, "grad_norm": 0.26953125, "learning_rate": 0.0002000641184285682, "loss": 0.151, "step": 95650 }, { "epoch": 0.1695989021072201, "grad_norm": 0.53515625, "learning_rate": 0.00020006340995140095, "loss": 0.1498, "step": 95652 }, { "epoch": 0.1696024482725299, "grad_norm": 1.15625, "learning_rate": 0.0002000627054100335, "loss": 0.1233, "step": 95654 }, { "epoch": 0.16960599443783972, "grad_norm": 0.86328125, "learning_rate": 0.00020006200480446903, "loss": 0.1246, "step": 95656 }, { "epoch": 0.16960954060314953, "grad_norm": 1.8046875, "learning_rate": 0.00020006130813471059, "loss": 0.3415, "step": 95658 }, { "epoch": 0.16961308676845935, "grad_norm": 0.69140625, "learning_rate": 0.00020006061540076125, "loss": 0.2135, "step": 95660 }, { "epoch": 0.16961663293376916, "grad_norm": 0.2041015625, "learning_rate": 0.00020005992660262413, "loss": 0.1391, "step": 95662 }, { "epoch": 0.16962017909907898, "grad_norm": 0.28515625, "learning_rate": 0.00020005924174030208, "loss": 0.2794, "step": 95664 }, { "epoch": 0.1696237252643888, "grad_norm": 1.09375, "learning_rate": 0.0002000585608137981, "loss": 0.283, "step": 95666 }, { "epoch": 0.1696272714296986, "grad_norm": 1.4375, "learning_rate": 0.0002000578838231153, "loss": 0.4607, "step": 95668 }, { "epoch": 0.16963081759500842, "grad_norm": 0.6015625, "learning_rate": 0.00020005721076825654, "loss": 0.2125, "step": 95670 }, { "epoch": 0.16963436376031824, "grad_norm": 0.3046875, "learning_rate": 0.00020005654164922476, "loss": 0.131, "step": 95672 }, { "epoch": 0.16963790992562805, "grad_norm": 0.3671875, "learning_rate": 0.00020005587646602295, "loss": 0.2216, "step": 95674 }, { "epoch": 0.16964145609093786, "grad_norm": 0.314453125, "learning_rate": 0.00020005521521865397, "loss": 0.2268, "step": 95676 }, { "epoch": 0.16964500225624768, "grad_norm": 0.41015625, "learning_rate": 0.00020005455790712078, "loss": 0.2619, "step": 95678 }, { "epoch": 0.1696485484215575, "grad_norm": 0.58203125, "learning_rate": 0.00020005390453142622, "loss": 0.1334, "step": 95680 }, { "epoch": 0.1696520945868673, "grad_norm": 0.248046875, "learning_rate": 0.000200053255091573, "loss": 0.1624, "step": 95682 }, { "epoch": 0.16965564075217712, "grad_norm": 0.31640625, "learning_rate": 0.0002000526095875642, "loss": 0.2193, "step": 95684 }, { "epoch": 0.16965918691748694, "grad_norm": 0.11376953125, "learning_rate": 0.00020005196801940248, "loss": 0.1573, "step": 95686 }, { "epoch": 0.16966273308279675, "grad_norm": 1.8671875, "learning_rate": 0.00020005133038709073, "loss": 0.1906, "step": 95688 }, { "epoch": 0.16966627924810657, "grad_norm": 0.8671875, "learning_rate": 0.0002000506966906317, "loss": 0.1953, "step": 95690 }, { "epoch": 0.16966982541341638, "grad_norm": 0.279296875, "learning_rate": 0.00020005006693002826, "loss": 0.2117, "step": 95692 }, { "epoch": 0.1696733715787262, "grad_norm": 0.62890625, "learning_rate": 0.00020004944110528294, "loss": 0.1689, "step": 95694 }, { "epoch": 0.169676917744036, "grad_norm": 0.1943359375, "learning_rate": 0.00020004881921639864, "loss": 0.1893, "step": 95696 }, { "epoch": 0.16968046390934582, "grad_norm": 0.439453125, "learning_rate": 0.0002000482012633781, "loss": 0.1883, "step": 95698 }, { "epoch": 0.16968401007465564, "grad_norm": 0.1728515625, "learning_rate": 0.00020004758724622397, "loss": 0.1781, "step": 95700 }, { "epoch": 0.16968755623996545, "grad_norm": 0.42578125, "learning_rate": 0.0002000469771649389, "loss": 0.1774, "step": 95702 }, { "epoch": 0.16969110240527527, "grad_norm": 0.328125, "learning_rate": 0.0002000463710195256, "loss": 0.1249, "step": 95704 }, { "epoch": 0.16969464857058508, "grad_norm": 0.609375, "learning_rate": 0.0002000457688099867, "loss": 0.1229, "step": 95706 }, { "epoch": 0.1696981947358949, "grad_norm": 0.23828125, "learning_rate": 0.00020004517053632488, "loss": 0.169, "step": 95708 }, { "epoch": 0.1697017409012047, "grad_norm": 0.50390625, "learning_rate": 0.00020004457619854276, "loss": 0.2743, "step": 95710 }, { "epoch": 0.16970528706651453, "grad_norm": 0.83984375, "learning_rate": 0.0002000439857966428, "loss": 0.1719, "step": 95712 }, { "epoch": 0.16970883323182434, "grad_norm": 0.70703125, "learning_rate": 0.00020004339933062787, "loss": 0.1545, "step": 95714 }, { "epoch": 0.16971237939713416, "grad_norm": 0.345703125, "learning_rate": 0.00020004281680050025, "loss": 0.1334, "step": 95716 }, { "epoch": 0.16971592556244397, "grad_norm": 0.357421875, "learning_rate": 0.0002000422382062625, "loss": 0.3459, "step": 95718 }, { "epoch": 0.16971947172775378, "grad_norm": 1.1484375, "learning_rate": 0.00020004166354791738, "loss": 0.2046, "step": 95720 }, { "epoch": 0.1697230178930636, "grad_norm": 0.294921875, "learning_rate": 0.0002000410928254672, "loss": 0.1739, "step": 95722 }, { "epoch": 0.1697265640583734, "grad_norm": 0.404296875, "learning_rate": 0.0002000405260389145, "loss": 0.2499, "step": 95724 }, { "epoch": 0.16973011022368326, "grad_norm": 0.3046875, "learning_rate": 0.00020003996318826192, "loss": 0.1851, "step": 95726 }, { "epoch": 0.16973365638899307, "grad_norm": 0.50390625, "learning_rate": 0.00020003940427351164, "loss": 0.1433, "step": 95728 }, { "epoch": 0.16973720255430289, "grad_norm": 0.56640625, "learning_rate": 0.00020003884929466626, "loss": 0.2354, "step": 95730 }, { "epoch": 0.1697407487196127, "grad_norm": 0.416015625, "learning_rate": 0.00020003829825172828, "loss": 0.1829, "step": 95732 }, { "epoch": 0.16974429488492251, "grad_norm": 0.5078125, "learning_rate": 0.0002000377511446999, "loss": 0.126, "step": 95734 }, { "epoch": 0.16974784105023233, "grad_norm": 0.36328125, "learning_rate": 0.00020003720797358372, "loss": 0.2042, "step": 95736 }, { "epoch": 0.16975138721554214, "grad_norm": 0.31640625, "learning_rate": 0.00020003666873838203, "loss": 0.1763, "step": 95738 }, { "epoch": 0.16975493338085196, "grad_norm": 0.1376953125, "learning_rate": 0.00020003613343909713, "loss": 0.2964, "step": 95740 }, { "epoch": 0.16975847954616177, "grad_norm": 0.2158203125, "learning_rate": 0.00020003560207573152, "loss": 0.1345, "step": 95742 }, { "epoch": 0.1697620257114716, "grad_norm": 0.236328125, "learning_rate": 0.0002000350746482874, "loss": 0.159, "step": 95744 }, { "epoch": 0.1697655718767814, "grad_norm": 0.470703125, "learning_rate": 0.00020003455115676707, "loss": 0.1854, "step": 95746 }, { "epoch": 0.16976911804209122, "grad_norm": 0.7109375, "learning_rate": 0.0002000340316011728, "loss": 0.2412, "step": 95748 }, { "epoch": 0.16977266420740103, "grad_norm": 1.0625, "learning_rate": 0.00020003351598150697, "loss": 0.3075, "step": 95750 }, { "epoch": 0.16977621037271085, "grad_norm": 0.275390625, "learning_rate": 0.00020003300429777176, "loss": 0.2053, "step": 95752 }, { "epoch": 0.16977975653802066, "grad_norm": 0.494140625, "learning_rate": 0.00020003249654996946, "loss": 0.181, "step": 95754 }, { "epoch": 0.16978330270333047, "grad_norm": 1.1171875, "learning_rate": 0.00020003199273810224, "loss": 0.1851, "step": 95756 }, { "epoch": 0.1697868488686403, "grad_norm": 0.1796875, "learning_rate": 0.00020003149286217228, "loss": 0.1302, "step": 95758 }, { "epoch": 0.1697903950339501, "grad_norm": 0.828125, "learning_rate": 0.0002000309969221819, "loss": 0.2299, "step": 95760 }, { "epoch": 0.16979394119925992, "grad_norm": 0.51953125, "learning_rate": 0.00020003050491813312, "loss": 0.1958, "step": 95762 }, { "epoch": 0.16979748736456973, "grad_norm": 0.4609375, "learning_rate": 0.0002000300168500281, "loss": 0.1327, "step": 95764 }, { "epoch": 0.16980103352987955, "grad_norm": 0.2392578125, "learning_rate": 0.00020002953271786912, "loss": 0.1948, "step": 95766 }, { "epoch": 0.16980457969518936, "grad_norm": 0.4765625, "learning_rate": 0.00020002905252165812, "loss": 0.1577, "step": 95768 }, { "epoch": 0.16980812586049918, "grad_norm": 0.69921875, "learning_rate": 0.00020002857626139731, "loss": 0.1578, "step": 95770 }, { "epoch": 0.169811672025809, "grad_norm": 1.421875, "learning_rate": 0.00020002810393708872, "loss": 0.3649, "step": 95772 }, { "epoch": 0.1698152181911188, "grad_norm": 0.40234375, "learning_rate": 0.00020002763554873444, "loss": 0.1596, "step": 95774 }, { "epoch": 0.16981876435642862, "grad_norm": 0.1962890625, "learning_rate": 0.0002000271710963366, "loss": 0.1129, "step": 95776 }, { "epoch": 0.16982231052173843, "grad_norm": 1.0, "learning_rate": 0.00020002671057989702, "loss": 0.2373, "step": 95778 }, { "epoch": 0.16982585668704825, "grad_norm": 1.203125, "learning_rate": 0.00020002625399941787, "loss": 0.353, "step": 95780 }, { "epoch": 0.16982940285235806, "grad_norm": 0.1728515625, "learning_rate": 0.00020002580135490122, "loss": 0.1573, "step": 95782 }, { "epoch": 0.16983294901766788, "grad_norm": 0.134765625, "learning_rate": 0.00020002535264634891, "loss": 0.1407, "step": 95784 }, { "epoch": 0.1698364951829777, "grad_norm": 0.40625, "learning_rate": 0.00020002490787376286, "loss": 0.1861, "step": 95786 }, { "epoch": 0.1698400413482875, "grad_norm": 0.65625, "learning_rate": 0.00020002446703714513, "loss": 0.2353, "step": 95788 }, { "epoch": 0.16984358751359732, "grad_norm": 0.66796875, "learning_rate": 0.00020002403013649766, "loss": 0.1831, "step": 95790 }, { "epoch": 0.16984713367890714, "grad_norm": 0.93359375, "learning_rate": 0.00020002359717182227, "loss": 0.1726, "step": 95792 }, { "epoch": 0.16985067984421695, "grad_norm": 0.78515625, "learning_rate": 0.0002000231681431209, "loss": 0.3302, "step": 95794 }, { "epoch": 0.16985422600952677, "grad_norm": 1.21875, "learning_rate": 0.00020002274305039536, "loss": 0.3313, "step": 95796 }, { "epoch": 0.16985777217483658, "grad_norm": 0.482421875, "learning_rate": 0.0002000223218936477, "loss": 0.233, "step": 95798 }, { "epoch": 0.1698613183401464, "grad_norm": 0.8203125, "learning_rate": 0.00020002190467287957, "loss": 0.1474, "step": 95800 }, { "epoch": 0.1698648645054562, "grad_norm": 0.6015625, "learning_rate": 0.00020002149138809286, "loss": 0.1651, "step": 95802 }, { "epoch": 0.16986841067076602, "grad_norm": 0.765625, "learning_rate": 0.00020002108203928944, "loss": 0.2095, "step": 95804 }, { "epoch": 0.16987195683607584, "grad_norm": 0.404296875, "learning_rate": 0.0002000206766264709, "loss": 0.1585, "step": 95806 }, { "epoch": 0.16987550300138565, "grad_norm": 0.9140625, "learning_rate": 0.0002000202751496392, "loss": 0.1414, "step": 95808 }, { "epoch": 0.16987904916669547, "grad_norm": 0.490234375, "learning_rate": 0.00020001987760879617, "loss": 0.1635, "step": 95810 }, { "epoch": 0.16988259533200528, "grad_norm": 0.5078125, "learning_rate": 0.00020001948400394327, "loss": 0.1628, "step": 95812 }, { "epoch": 0.1698861414973151, "grad_norm": 0.58203125, "learning_rate": 0.00020001909433508248, "loss": 0.199, "step": 95814 }, { "epoch": 0.16988968766262494, "grad_norm": 0.859375, "learning_rate": 0.00020001870860221534, "loss": 0.2725, "step": 95816 }, { "epoch": 0.16989323382793475, "grad_norm": 0.640625, "learning_rate": 0.00020001832680534363, "loss": 0.1524, "step": 95818 }, { "epoch": 0.16989677999324457, "grad_norm": 0.388671875, "learning_rate": 0.00020001794894446894, "loss": 0.1748, "step": 95820 }, { "epoch": 0.16990032615855438, "grad_norm": 0.35546875, "learning_rate": 0.00020001757501959298, "loss": 0.2041, "step": 95822 }, { "epoch": 0.1699038723238642, "grad_norm": 0.416015625, "learning_rate": 0.00020001720503071743, "loss": 0.1832, "step": 95824 }, { "epoch": 0.169907418489174, "grad_norm": 0.55859375, "learning_rate": 0.0002000168389778438, "loss": 0.2141, "step": 95826 }, { "epoch": 0.16991096465448383, "grad_norm": 3.078125, "learning_rate": 0.00020001647686097382, "loss": 0.4205, "step": 95828 }, { "epoch": 0.16991451081979364, "grad_norm": 0.2578125, "learning_rate": 0.00020001611868010886, "loss": 0.1754, "step": 95830 }, { "epoch": 0.16991805698510346, "grad_norm": 0.2265625, "learning_rate": 0.00020001576443525072, "loss": 0.2251, "step": 95832 }, { "epoch": 0.16992160315041327, "grad_norm": 0.326171875, "learning_rate": 0.0002000154141264009, "loss": 0.1595, "step": 95834 }, { "epoch": 0.16992514931572308, "grad_norm": 0.5546875, "learning_rate": 0.00020001506775356077, "loss": 0.1681, "step": 95836 }, { "epoch": 0.1699286954810329, "grad_norm": 0.1494140625, "learning_rate": 0.00020001472531673198, "loss": 0.146, "step": 95838 }, { "epoch": 0.1699322416463427, "grad_norm": 0.25, "learning_rate": 0.00020001438681591612, "loss": 0.1754, "step": 95840 }, { "epoch": 0.16993578781165253, "grad_norm": 0.30859375, "learning_rate": 0.00020001405225111445, "loss": 0.1804, "step": 95842 }, { "epoch": 0.16993933397696234, "grad_norm": 0.474609375, "learning_rate": 0.0002000137216223286, "loss": 0.1478, "step": 95844 }, { "epoch": 0.16994288014227216, "grad_norm": 1.8046875, "learning_rate": 0.00020001339492955986, "loss": 0.1924, "step": 95846 }, { "epoch": 0.16994642630758197, "grad_norm": 1.390625, "learning_rate": 0.00020001307217280983, "loss": 0.2049, "step": 95848 }, { "epoch": 0.16994997247289179, "grad_norm": 1.4921875, "learning_rate": 0.00020001275335207992, "loss": 0.6807, "step": 95850 }, { "epoch": 0.1699535186382016, "grad_norm": 0.34765625, "learning_rate": 0.0002000124384673713, "loss": 0.4166, "step": 95852 }, { "epoch": 0.16995706480351142, "grad_norm": 0.20703125, "learning_rate": 0.00020001212751868562, "loss": 0.1549, "step": 95854 }, { "epoch": 0.16996061096882123, "grad_norm": 0.5546875, "learning_rate": 0.000200011820506024, "loss": 0.181, "step": 95856 }, { "epoch": 0.16996415713413104, "grad_norm": 0.35546875, "learning_rate": 0.00020001151742938804, "loss": 0.2071, "step": 95858 }, { "epoch": 0.16996770329944086, "grad_norm": 1.46875, "learning_rate": 0.00020001121828877884, "loss": 0.234, "step": 95860 }, { "epoch": 0.16997124946475067, "grad_norm": 0.75390625, "learning_rate": 0.00020001092308419784, "loss": 0.186, "step": 95862 }, { "epoch": 0.1699747956300605, "grad_norm": 0.318359375, "learning_rate": 0.00020001063181564627, "loss": 0.1766, "step": 95864 }, { "epoch": 0.1699783417953703, "grad_norm": 0.30078125, "learning_rate": 0.0002000103444831254, "loss": 0.1632, "step": 95866 }, { "epoch": 0.16998188796068012, "grad_norm": 0.796875, "learning_rate": 0.0002000100610866365, "loss": 0.2069, "step": 95868 }, { "epoch": 0.16998543412598993, "grad_norm": 0.37109375, "learning_rate": 0.00020000978162618087, "loss": 0.1679, "step": 95870 }, { "epoch": 0.16998898029129975, "grad_norm": 0.2451171875, "learning_rate": 0.00020000950610175966, "loss": 0.1533, "step": 95872 }, { "epoch": 0.16999252645660956, "grad_norm": 0.35546875, "learning_rate": 0.0002000092345133741, "loss": 0.1775, "step": 95874 }, { "epoch": 0.16999607262191938, "grad_norm": 0.48046875, "learning_rate": 0.00020000896686102532, "loss": 0.1739, "step": 95876 }, { "epoch": 0.1699996187872292, "grad_norm": 0.40234375, "learning_rate": 0.00020000870314471455, "loss": 0.1877, "step": 95878 }, { "epoch": 0.170003164952539, "grad_norm": 0.62890625, "learning_rate": 0.00020000844336444292, "loss": 0.1856, "step": 95880 }, { "epoch": 0.17000671111784882, "grad_norm": 0.515625, "learning_rate": 0.00020000818752021163, "loss": 0.1516, "step": 95882 }, { "epoch": 0.17001025728315863, "grad_norm": 0.412109375, "learning_rate": 0.00020000793561202182, "loss": 0.2548, "step": 95884 }, { "epoch": 0.17001380344846845, "grad_norm": 0.2734375, "learning_rate": 0.0002000076876398744, "loss": 0.1353, "step": 95886 }, { "epoch": 0.17001734961377826, "grad_norm": 0.333984375, "learning_rate": 0.00020000744360377056, "loss": 0.1876, "step": 95888 }, { "epoch": 0.17002089577908808, "grad_norm": 0.349609375, "learning_rate": 0.00020000720350371153, "loss": 0.1833, "step": 95890 }, { "epoch": 0.1700244419443979, "grad_norm": 0.59375, "learning_rate": 0.00020000696733969809, "loss": 0.1551, "step": 95892 }, { "epoch": 0.1700279881097077, "grad_norm": 0.46875, "learning_rate": 0.00020000673511173147, "loss": 0.1455, "step": 95894 }, { "epoch": 0.17003153427501752, "grad_norm": 0.515625, "learning_rate": 0.0002000065068198126, "loss": 0.1935, "step": 95896 }, { "epoch": 0.17003508044032734, "grad_norm": 0.82421875, "learning_rate": 0.0002000062824639424, "loss": 0.1815, "step": 95898 }, { "epoch": 0.17003862660563715, "grad_norm": 1.9765625, "learning_rate": 0.000200006062044122, "loss": 0.1709, "step": 95900 }, { "epoch": 0.17004217277094696, "grad_norm": 0.337890625, "learning_rate": 0.0002000058455603524, "loss": 0.0982, "step": 95902 }, { "epoch": 0.17004571893625678, "grad_norm": 0.3515625, "learning_rate": 0.00020000563301263426, "loss": 0.1953, "step": 95904 }, { "epoch": 0.1700492651015666, "grad_norm": 0.435546875, "learning_rate": 0.0002000054244009688, "loss": 0.2078, "step": 95906 }, { "epoch": 0.17005281126687644, "grad_norm": 2.078125, "learning_rate": 0.00020000521972535684, "loss": 0.267, "step": 95908 }, { "epoch": 0.17005635743218625, "grad_norm": 0.49609375, "learning_rate": 0.0002000050189857993, "loss": 0.1859, "step": 95910 }, { "epoch": 0.17005990359749606, "grad_norm": 0.49609375, "learning_rate": 0.00020000482218229698, "loss": 0.1754, "step": 95912 }, { "epoch": 0.17006344976280588, "grad_norm": 0.51171875, "learning_rate": 0.00020000462931485087, "loss": 0.1905, "step": 95914 }, { "epoch": 0.1700669959281157, "grad_norm": 0.126953125, "learning_rate": 0.00020000444038346162, "loss": 0.1402, "step": 95916 }, { "epoch": 0.1700705420934255, "grad_norm": 0.271484375, "learning_rate": 0.00020000425538813029, "loss": 0.2136, "step": 95918 }, { "epoch": 0.17007408825873532, "grad_norm": 0.427734375, "learning_rate": 0.0002000040743288575, "loss": 0.182, "step": 95920 }, { "epoch": 0.17007763442404514, "grad_norm": 1.2109375, "learning_rate": 0.0002000038972056441, "loss": 0.1684, "step": 95922 }, { "epoch": 0.17008118058935495, "grad_norm": 0.462890625, "learning_rate": 0.0002000037240184909, "loss": 0.2122, "step": 95924 }, { "epoch": 0.17008472675466477, "grad_norm": 0.435546875, "learning_rate": 0.00020000355476739855, "loss": 0.1285, "step": 95926 }, { "epoch": 0.17008827291997458, "grad_norm": 0.294921875, "learning_rate": 0.00020000338945236798, "loss": 0.1389, "step": 95928 }, { "epoch": 0.1700918190852844, "grad_norm": 0.47265625, "learning_rate": 0.00020000322807339971, "loss": 0.1483, "step": 95930 }, { "epoch": 0.1700953652505942, "grad_norm": 0.2001953125, "learning_rate": 0.0002000030706304946, "loss": 0.2073, "step": 95932 }, { "epoch": 0.17009891141590402, "grad_norm": 0.77734375, "learning_rate": 0.00020000291712365327, "loss": 0.2605, "step": 95934 }, { "epoch": 0.17010245758121384, "grad_norm": 0.1669921875, "learning_rate": 0.0002000027675528764, "loss": 0.1489, "step": 95936 }, { "epoch": 0.17010600374652365, "grad_norm": 0.1943359375, "learning_rate": 0.0002000026219181646, "loss": 0.148, "step": 95938 }, { "epoch": 0.17010954991183347, "grad_norm": 0.2431640625, "learning_rate": 0.0002000024802195186, "loss": 0.1348, "step": 95940 }, { "epoch": 0.17011309607714328, "grad_norm": 1.015625, "learning_rate": 0.00020000234245693893, "loss": 0.166, "step": 95942 }, { "epoch": 0.1701166422424531, "grad_norm": 0.125, "learning_rate": 0.00020000220863042637, "loss": 0.1118, "step": 95944 }, { "epoch": 0.1701201884077629, "grad_norm": 0.19140625, "learning_rate": 0.00020000207873998116, "loss": 0.1416, "step": 95946 }, { "epoch": 0.17012373457307273, "grad_norm": 0.65625, "learning_rate": 0.00020000195278560424, "loss": 0.3453, "step": 95948 }, { "epoch": 0.17012728073838254, "grad_norm": 0.482421875, "learning_rate": 0.00020000183076729588, "loss": 0.218, "step": 95950 }, { "epoch": 0.17013082690369236, "grad_norm": 0.2734375, "learning_rate": 0.0002000017126850568, "loss": 0.1846, "step": 95952 }, { "epoch": 0.17013437306900217, "grad_norm": 0.56640625, "learning_rate": 0.00020000159853888748, "loss": 0.1969, "step": 95954 }, { "epoch": 0.17013791923431199, "grad_norm": 0.2099609375, "learning_rate": 0.00020000148832878835, "loss": 0.168, "step": 95956 }, { "epoch": 0.1701414653996218, "grad_norm": 0.240234375, "learning_rate": 0.00020000138205475995, "loss": 0.1764, "step": 95958 }, { "epoch": 0.17014501156493161, "grad_norm": 0.578125, "learning_rate": 0.00020000127971680264, "loss": 0.1547, "step": 95960 }, { "epoch": 0.17014855773024143, "grad_norm": 0.267578125, "learning_rate": 0.00020000118131491702, "loss": 0.167, "step": 95962 }, { "epoch": 0.17015210389555124, "grad_norm": 0.498046875, "learning_rate": 0.00020000108684910343, "loss": 0.1707, "step": 95964 }, { "epoch": 0.17015565006086106, "grad_norm": 0.1630859375, "learning_rate": 0.00020000099631936234, "loss": 0.1616, "step": 95966 }, { "epoch": 0.17015919622617087, "grad_norm": 0.427734375, "learning_rate": 0.0002000009097256941, "loss": 0.1525, "step": 95968 }, { "epoch": 0.1701627423914807, "grad_norm": 0.33203125, "learning_rate": 0.00020000082706809902, "loss": 0.1746, "step": 95970 }, { "epoch": 0.1701662885567905, "grad_norm": 0.5390625, "learning_rate": 0.00020000074834657761, "loss": 0.1601, "step": 95972 }, { "epoch": 0.17016983472210032, "grad_norm": 0.6015625, "learning_rate": 0.0002000006735611302, "loss": 0.1467, "step": 95974 }, { "epoch": 0.17017338088741013, "grad_norm": 0.1845703125, "learning_rate": 0.0002000006027117569, "loss": 0.1847, "step": 95976 }, { "epoch": 0.17017692705271995, "grad_norm": 0.2578125, "learning_rate": 0.00020000053579845832, "loss": 0.1692, "step": 95978 }, { "epoch": 0.17018047321802976, "grad_norm": 0.228515625, "learning_rate": 0.00020000047282123457, "loss": 0.1568, "step": 95980 }, { "epoch": 0.17018401938333957, "grad_norm": 0.95703125, "learning_rate": 0.00020000041378008602, "loss": 0.1627, "step": 95982 }, { "epoch": 0.1701875655486494, "grad_norm": 0.71875, "learning_rate": 0.00020000035867501282, "loss": 0.1803, "step": 95984 }, { "epoch": 0.1701911117139592, "grad_norm": 0.3046875, "learning_rate": 0.0002000003075060153, "loss": 0.1829, "step": 95986 }, { "epoch": 0.17019465787926902, "grad_norm": 0.90234375, "learning_rate": 0.0002000002602730936, "loss": 0.214, "step": 95988 }, { "epoch": 0.17019820404457883, "grad_norm": 0.314453125, "learning_rate": 0.00020000021697624799, "loss": 0.1355, "step": 95990 }, { "epoch": 0.17020175020988865, "grad_norm": 1.2265625, "learning_rate": 0.00020000017761547873, "loss": 0.2936, "step": 95992 }, { "epoch": 0.17020529637519846, "grad_norm": 0.3515625, "learning_rate": 0.00020000014219078585, "loss": 0.1566, "step": 95994 }, { "epoch": 0.17020884254050828, "grad_norm": 1.0390625, "learning_rate": 0.00020000011070216955, "loss": 0.198, "step": 95996 }, { "epoch": 0.17021238870581812, "grad_norm": 0.70703125, "learning_rate": 0.00020000008314963, "loss": 0.2176, "step": 95998 }, { "epoch": 0.17021593487112793, "grad_norm": 0.60546875, "learning_rate": 0.0002000000595331673, "loss": 0.2789, "step": 96000 } ], "logging_steps": 2, "max_steps": 96010, "num_input_tokens_seen": 0, "num_train_epochs": 1, "save_steps": 4000, "stateful_callbacks": { "TrainerControl": { "args": { "should_epoch_stop": false, "should_evaluate": false, "should_log": false, "should_save": true, "should_training_stop": false }, "attributes": {} } }, "total_flos": 5.550634085085807e+18, "train_batch_size": 4, "trial_name": null, "trial_params": null }