{ "best_metric": null, "best_model_checkpoint": null, "epoch": 0.9999462510077936, "eval_steps": 500, "global_step": 9302, "is_hyper_param_search": false, "is_local_process_zero": true, "is_world_process_zero": true, "log_history": [ { "epoch": 0.0, "grad_norm": 4.52533925718378, "learning_rate": 3.5714285714285714e-06, "loss": 2.1829, "step": 1 }, { "epoch": 0.0, "grad_norm": 0.8917807391691538, "learning_rate": 7.142857142857143e-06, "loss": 1.9838, "step": 2 }, { "epoch": 0.0, "grad_norm": 2.0748449554888, "learning_rate": 1.0714285714285714e-05, "loss": 1.9405, "step": 3 }, { "epoch": 0.0, "grad_norm": 0.6850439438507572, "learning_rate": 1.4285714285714285e-05, "loss": 2.0572, "step": 4 }, { "epoch": 0.0, "grad_norm": 5.233502306710884, "learning_rate": 1.7857142857142855e-05, "loss": 2.017, "step": 5 }, { "epoch": 0.0, "grad_norm": 2.1190437540863045, "learning_rate": 2.1428571428571428e-05, "loss": 2.1505, "step": 6 }, { "epoch": 0.0, "grad_norm": 78.75075566973746, "learning_rate": 2.5e-05, "loss": 1.9595, "step": 7 }, { "epoch": 0.0, "grad_norm": 5.491814243878795, "learning_rate": 2.857142857142857e-05, "loss": 1.9188, "step": 8 }, { "epoch": 0.0, "grad_norm": 1.0583872857696868, "learning_rate": 3.214285714285714e-05, "loss": 2.2103, "step": 9 }, { "epoch": 0.0, "grad_norm": 0.885577656564717, "learning_rate": 3.571428571428571e-05, "loss": 2.0415, "step": 10 }, { "epoch": 0.0, "grad_norm": 2.4388687007921046, "learning_rate": 3.928571428571428e-05, "loss": 2.0554, "step": 11 }, { "epoch": 0.0, "grad_norm": 0.5874308565723774, "learning_rate": 4.2857142857142856e-05, "loss": 1.9838, "step": 12 }, { "epoch": 0.0, "grad_norm": 0.7501093864144527, "learning_rate": 4.642857142857143e-05, "loss": 2.103, "step": 13 }, { "epoch": 0.0, "grad_norm": 0.7408455109981018, "learning_rate": 5e-05, "loss": 1.8546, "step": 14 }, { "epoch": 0.0, "grad_norm": 1.0611197819252878, "learning_rate": 5.357142857142857e-05, "loss": 1.9603, "step": 15 }, { "epoch": 0.0, "grad_norm": 0.9561526566868136, "learning_rate": 5.714285714285714e-05, "loss": 1.9864, "step": 16 }, { "epoch": 0.0, "grad_norm": 0.8141552130974743, "learning_rate": 6.0714285714285715e-05, "loss": 1.8761, "step": 17 }, { "epoch": 0.0, "grad_norm": 0.9098149261285031, "learning_rate": 6.428571428571427e-05, "loss": 1.767, "step": 18 }, { "epoch": 0.0, "grad_norm": 0.8866212598000296, "learning_rate": 6.785714285714285e-05, "loss": 2.1052, "step": 19 }, { "epoch": 0.0, "grad_norm": 2.1741636005388276, "learning_rate": 7.142857142857142e-05, "loss": 1.9065, "step": 20 }, { "epoch": 0.0, "grad_norm": 1.3333604730387096, "learning_rate": 7.5e-05, "loss": 2.1443, "step": 21 }, { "epoch": 0.0, "grad_norm": 1.3112663647673257, "learning_rate": 7.857142857142857e-05, "loss": 1.9296, "step": 22 }, { "epoch": 0.0, "grad_norm": 1.4304082960837077, "learning_rate": 8.214285714285714e-05, "loss": 1.8629, "step": 23 }, { "epoch": 0.0, "grad_norm": 1.5095743351765647, "learning_rate": 8.571428571428571e-05, "loss": 2.096, "step": 24 }, { "epoch": 0.0, "grad_norm": 3.5458230630716185, "learning_rate": 8.928571428571429e-05, "loss": 2.0096, "step": 25 }, { "epoch": 0.0, "grad_norm": 2.761767871209414, "learning_rate": 9.285714285714286e-05, "loss": 1.9721, "step": 26 }, { "epoch": 0.0, "grad_norm": 5.81257268388485, "learning_rate": 9.642857142857143e-05, "loss": 2.0577, "step": 27 }, { "epoch": 0.0, "grad_norm": 4.486947255640102, "learning_rate": 0.0001, "loss": 1.916, "step": 28 }, { "epoch": 0.0, "grad_norm": 2.0720761542391175, "learning_rate": 0.00010357142857142858, "loss": 1.9207, "step": 29 }, { "epoch": 0.0, "grad_norm": 1.6505238857303257, "learning_rate": 0.00010714285714285714, "loss": 2.0371, "step": 30 }, { "epoch": 0.0, "grad_norm": 9.291313495395931, "learning_rate": 0.00011071428571428571, "loss": 2.0557, "step": 31 }, { "epoch": 0.0, "grad_norm": 1.7313893024574583, "learning_rate": 0.00011428571428571428, "loss": 1.9889, "step": 32 }, { "epoch": 0.0, "grad_norm": 1.5935351470037225, "learning_rate": 0.00011785714285714286, "loss": 1.9542, "step": 33 }, { "epoch": 0.0, "grad_norm": 1.066218796348364, "learning_rate": 0.00012142857142857143, "loss": 1.9928, "step": 34 }, { "epoch": 0.0, "grad_norm": 1.3713446928171047, "learning_rate": 0.000125, "loss": 1.9124, "step": 35 }, { "epoch": 0.0, "grad_norm": 1.3523693102135053, "learning_rate": 0.00012857142857142855, "loss": 2.0267, "step": 36 }, { "epoch": 0.0, "grad_norm": 0.9515514517206934, "learning_rate": 0.00013214285714285715, "loss": 1.8685, "step": 37 }, { "epoch": 0.0, "grad_norm": 0.9729557725179657, "learning_rate": 0.0001357142857142857, "loss": 1.7549, "step": 38 }, { "epoch": 0.0, "grad_norm": 1.564147690807114, "learning_rate": 0.0001392857142857143, "loss": 1.9658, "step": 39 }, { "epoch": 0.0, "grad_norm": 1.002970099447507, "learning_rate": 0.00014285714285714284, "loss": 2.0966, "step": 40 }, { "epoch": 0.0, "grad_norm": 1.0123033933136292, "learning_rate": 0.00014642857142857144, "loss": 1.9739, "step": 41 }, { "epoch": 0.0, "grad_norm": 1.7633814778573, "learning_rate": 0.00015, "loss": 1.847, "step": 42 }, { "epoch": 0.0, "grad_norm": 1.1686064851334461, "learning_rate": 0.0001535714285714286, "loss": 1.9012, "step": 43 }, { "epoch": 0.0, "grad_norm": 0.9361446756319182, "learning_rate": 0.00015714285714285713, "loss": 1.9118, "step": 44 }, { "epoch": 0.0, "grad_norm": 1.5200884270670607, "learning_rate": 0.00016071428571428573, "loss": 1.9476, "step": 45 }, { "epoch": 0.0, "grad_norm": 1.450100365815453, "learning_rate": 0.00016428571428571428, "loss": 1.7931, "step": 46 }, { "epoch": 0.01, "grad_norm": 1.288950921055788, "learning_rate": 0.00016785714285714285, "loss": 1.9861, "step": 47 }, { "epoch": 0.01, "grad_norm": 0.803610469800681, "learning_rate": 0.00017142857142857143, "loss": 1.9061, "step": 48 }, { "epoch": 0.01, "grad_norm": 1.4356333181084613, "learning_rate": 0.000175, "loss": 1.8542, "step": 49 }, { "epoch": 0.01, "grad_norm": 1.2358575078876701, "learning_rate": 0.00017857142857142857, "loss": 1.9251, "step": 50 }, { "epoch": 0.01, "grad_norm": 1.0864304720316573, "learning_rate": 0.00018214285714285714, "loss": 1.8825, "step": 51 }, { "epoch": 0.01, "grad_norm": 1.1309351656853563, "learning_rate": 0.00018571428571428572, "loss": 1.846, "step": 52 }, { "epoch": 0.01, "grad_norm": 0.9981874490118918, "learning_rate": 0.0001892857142857143, "loss": 1.7579, "step": 53 }, { "epoch": 0.01, "grad_norm": 1.065892064141929, "learning_rate": 0.00019285714285714286, "loss": 1.8208, "step": 54 }, { "epoch": 0.01, "grad_norm": 0.8816359706176079, "learning_rate": 0.00019642857142857144, "loss": 1.7919, "step": 55 }, { "epoch": 0.01, "grad_norm": 1.0578385928836473, "learning_rate": 0.0002, "loss": 1.8116, "step": 56 }, { "epoch": 0.01, "grad_norm": 0.9421173828756452, "learning_rate": 0.00020357142857142858, "loss": 1.7467, "step": 57 }, { "epoch": 0.01, "grad_norm": 1.6289823592081432, "learning_rate": 0.00020714285714285716, "loss": 1.655, "step": 58 }, { "epoch": 0.01, "grad_norm": 1.4741147658119709, "learning_rate": 0.00021071428571428573, "loss": 1.7672, "step": 59 }, { "epoch": 0.01, "grad_norm": 2.57246021644073, "learning_rate": 0.00021428571428571427, "loss": 1.6553, "step": 60 }, { "epoch": 0.01, "grad_norm": 1.1375051314898432, "learning_rate": 0.00021785714285714287, "loss": 1.7275, "step": 61 }, { "epoch": 0.01, "grad_norm": 1.3522322434105656, "learning_rate": 0.00022142857142857142, "loss": 1.6659, "step": 62 }, { "epoch": 0.01, "grad_norm": 1.3498064220242347, "learning_rate": 0.00022500000000000002, "loss": 1.7152, "step": 63 }, { "epoch": 0.01, "grad_norm": 1.4251392957299767, "learning_rate": 0.00022857142857142857, "loss": 1.5624, "step": 64 }, { "epoch": 0.01, "grad_norm": 1.119320800018593, "learning_rate": 0.00023214285714285717, "loss": 1.667, "step": 65 }, { "epoch": 0.01, "grad_norm": 1.0918414494076893, "learning_rate": 0.0002357142857142857, "loss": 1.5963, "step": 66 }, { "epoch": 0.01, "grad_norm": 0.9746103076509893, "learning_rate": 0.0002392857142857143, "loss": 1.504, "step": 67 }, { "epoch": 0.01, "grad_norm": 0.9492418576791255, "learning_rate": 0.00024285714285714286, "loss": 1.6585, "step": 68 }, { "epoch": 0.01, "grad_norm": 0.8174319326574438, "learning_rate": 0.00024642857142857143, "loss": 1.5783, "step": 69 }, { "epoch": 0.01, "grad_norm": 1.184127183820156, "learning_rate": 0.00025, "loss": 1.6589, "step": 70 }, { "epoch": 0.01, "grad_norm": 0.6662930221337083, "learning_rate": 0.0002535714285714286, "loss": 1.5221, "step": 71 }, { "epoch": 0.01, "grad_norm": 1.2301561957715066, "learning_rate": 0.0002571428571428571, "loss": 1.7142, "step": 72 }, { "epoch": 0.01, "grad_norm": 0.7534991533961916, "learning_rate": 0.0002607142857142857, "loss": 1.5588, "step": 73 }, { "epoch": 0.01, "grad_norm": 0.5609113126608429, "learning_rate": 0.0002642857142857143, "loss": 1.5584, "step": 74 }, { "epoch": 0.01, "grad_norm": 0.958306688822885, "learning_rate": 0.00026785714285714287, "loss": 1.523, "step": 75 }, { "epoch": 0.01, "grad_norm": 0.6114223761007516, "learning_rate": 0.0002714285714285714, "loss": 1.6552, "step": 76 }, { "epoch": 0.01, "grad_norm": 0.6799693126275542, "learning_rate": 0.000275, "loss": 1.6153, "step": 77 }, { "epoch": 0.01, "grad_norm": 0.846263726121709, "learning_rate": 0.0002785714285714286, "loss": 1.5276, "step": 78 }, { "epoch": 0.01, "grad_norm": 0.5330196945414835, "learning_rate": 0.00028214285714285716, "loss": 1.5136, "step": 79 }, { "epoch": 0.01, "grad_norm": 0.6961419376772497, "learning_rate": 0.0002857142857142857, "loss": 1.4979, "step": 80 }, { "epoch": 0.01, "grad_norm": 0.44617572572355957, "learning_rate": 0.0002892857142857143, "loss": 1.5388, "step": 81 }, { "epoch": 0.01, "grad_norm": 0.5404931034315101, "learning_rate": 0.0002928571428571429, "loss": 1.53, "step": 82 }, { "epoch": 0.01, "grad_norm": 0.625345659039276, "learning_rate": 0.00029642857142857145, "loss": 1.5604, "step": 83 }, { "epoch": 0.01, "grad_norm": 0.3973305228158473, "learning_rate": 0.0003, "loss": 1.5368, "step": 84 }, { "epoch": 0.01, "grad_norm": 0.5150470529051457, "learning_rate": 0.00030357142857142855, "loss": 1.5111, "step": 85 }, { "epoch": 0.01, "grad_norm": 0.5165971204137647, "learning_rate": 0.0003071428571428572, "loss": 1.6526, "step": 86 }, { "epoch": 0.01, "grad_norm": 0.5061721865580466, "learning_rate": 0.00031071428571428575, "loss": 1.5447, "step": 87 }, { "epoch": 0.01, "grad_norm": 0.7102067251701049, "learning_rate": 0.00031428571428571427, "loss": 1.4482, "step": 88 }, { "epoch": 0.01, "grad_norm": 0.5244806059205487, "learning_rate": 0.00031785714285714284, "loss": 1.6764, "step": 89 }, { "epoch": 0.01, "grad_norm": 0.5221727806541643, "learning_rate": 0.00032142857142857147, "loss": 1.6043, "step": 90 }, { "epoch": 0.01, "grad_norm": 0.7848075737761744, "learning_rate": 0.00032500000000000004, "loss": 1.5074, "step": 91 }, { "epoch": 0.01, "grad_norm": 0.4290039738082572, "learning_rate": 0.00032857142857142856, "loss": 1.5482, "step": 92 }, { "epoch": 0.01, "grad_norm": 0.5232736843606722, "learning_rate": 0.00033214285714285713, "loss": 1.5139, "step": 93 }, { "epoch": 0.01, "grad_norm": 0.44862425899364705, "learning_rate": 0.0003357142857142857, "loss": 1.5531, "step": 94 }, { "epoch": 0.01, "grad_norm": 0.4858044328676179, "learning_rate": 0.00033928571428571433, "loss": 1.5697, "step": 95 }, { "epoch": 0.01, "grad_norm": 0.40783462592279174, "learning_rate": 0.00034285714285714285, "loss": 1.4921, "step": 96 }, { "epoch": 0.01, "grad_norm": 0.4833210430418762, "learning_rate": 0.0003464285714285714, "loss": 1.6326, "step": 97 }, { "epoch": 0.01, "grad_norm": 0.3349518955445, "learning_rate": 0.00035, "loss": 1.481, "step": 98 }, { "epoch": 0.01, "grad_norm": 0.5119405076678242, "learning_rate": 0.0003535714285714286, "loss": 1.5565, "step": 99 }, { "epoch": 0.01, "grad_norm": 0.4644397775386018, "learning_rate": 0.00035714285714285714, "loss": 1.5632, "step": 100 }, { "epoch": 0.01, "grad_norm": 0.3798960509359317, "learning_rate": 0.0003607142857142857, "loss": 1.6424, "step": 101 }, { "epoch": 0.01, "grad_norm": 0.38836469924030503, "learning_rate": 0.0003642857142857143, "loss": 1.5311, "step": 102 }, { "epoch": 0.01, "grad_norm": 0.40155819248605457, "learning_rate": 0.0003678571428571429, "loss": 1.4503, "step": 103 }, { "epoch": 0.01, "grad_norm": 0.5577097012658978, "learning_rate": 0.00037142857142857143, "loss": 1.4728, "step": 104 }, { "epoch": 0.01, "grad_norm": 0.3302891581452391, "learning_rate": 0.000375, "loss": 1.4451, "step": 105 }, { "epoch": 0.01, "grad_norm": 0.5105057674282057, "learning_rate": 0.0003785714285714286, "loss": 1.4823, "step": 106 }, { "epoch": 0.01, "grad_norm": 0.34139026979805664, "learning_rate": 0.0003821428571428571, "loss": 1.5526, "step": 107 }, { "epoch": 0.01, "grad_norm": 0.3526373961195756, "learning_rate": 0.0003857142857142857, "loss": 1.6479, "step": 108 }, { "epoch": 0.01, "grad_norm": 0.36739582888447553, "learning_rate": 0.0003892857142857143, "loss": 1.5424, "step": 109 }, { "epoch": 0.01, "grad_norm": 0.31509373847267547, "learning_rate": 0.0003928571428571429, "loss": 1.7238, "step": 110 }, { "epoch": 0.01, "grad_norm": 0.7305039688024733, "learning_rate": 0.0003964285714285714, "loss": 1.5271, "step": 111 }, { "epoch": 0.01, "grad_norm": 0.3581584781956524, "learning_rate": 0.0004, "loss": 1.6029, "step": 112 }, { "epoch": 0.01, "grad_norm": 0.3224198059244875, "learning_rate": 0.0004035714285714286, "loss": 1.5386, "step": 113 }, { "epoch": 0.01, "grad_norm": 0.3471545607154616, "learning_rate": 0.00040714285714285717, "loss": 1.468, "step": 114 }, { "epoch": 0.01, "grad_norm": 0.3904049444253878, "learning_rate": 0.0004107142857142857, "loss": 1.6968, "step": 115 }, { "epoch": 0.01, "grad_norm": 0.3121284899151761, "learning_rate": 0.0004142857142857143, "loss": 1.7429, "step": 116 }, { "epoch": 0.01, "grad_norm": 0.3244004142779458, "learning_rate": 0.0004178571428571429, "loss": 1.5028, "step": 117 }, { "epoch": 0.01, "grad_norm": 0.5252291278727379, "learning_rate": 0.00042142857142857146, "loss": 1.4616, "step": 118 }, { "epoch": 0.01, "grad_norm": 0.3378902942457399, "learning_rate": 0.000425, "loss": 1.4332, "step": 119 }, { "epoch": 0.01, "grad_norm": 0.28884442583413833, "learning_rate": 0.00042857142857142855, "loss": 1.3443, "step": 120 }, { "epoch": 0.01, "grad_norm": 0.35609856553313685, "learning_rate": 0.0004321428571428572, "loss": 1.5219, "step": 121 }, { "epoch": 0.01, "grad_norm": 0.35554901725913257, "learning_rate": 0.00043571428571428575, "loss": 1.5723, "step": 122 }, { "epoch": 0.01, "grad_norm": 0.3440599128217032, "learning_rate": 0.00043928571428571427, "loss": 1.3977, "step": 123 }, { "epoch": 0.01, "grad_norm": 0.31192531792308886, "learning_rate": 0.00044285714285714284, "loss": 1.5228, "step": 124 }, { "epoch": 0.01, "grad_norm": 0.32046127353392895, "learning_rate": 0.00044642857142857147, "loss": 1.4763, "step": 125 }, { "epoch": 0.01, "grad_norm": 0.3101840268627974, "learning_rate": 0.00045000000000000004, "loss": 1.4582, "step": 126 }, { "epoch": 0.01, "grad_norm": 0.4288094131576573, "learning_rate": 0.00045357142857142856, "loss": 1.5662, "step": 127 }, { "epoch": 0.01, "grad_norm": 0.44158522598033534, "learning_rate": 0.00045714285714285713, "loss": 1.4745, "step": 128 }, { "epoch": 0.01, "grad_norm": 0.3239538132892933, "learning_rate": 0.0004607142857142857, "loss": 1.4899, "step": 129 }, { "epoch": 0.01, "grad_norm": 0.2871588061072913, "learning_rate": 0.00046428571428571433, "loss": 1.6439, "step": 130 }, { "epoch": 0.01, "grad_norm": 0.38892543132373336, "learning_rate": 0.00046785714285714285, "loss": 1.4552, "step": 131 }, { "epoch": 0.01, "grad_norm": 0.3825473937891277, "learning_rate": 0.0004714285714285714, "loss": 1.5597, "step": 132 }, { "epoch": 0.01, "grad_norm": 0.4309140645349847, "learning_rate": 0.000475, "loss": 1.5602, "step": 133 }, { "epoch": 0.01, "grad_norm": 0.4017136556479656, "learning_rate": 0.0004785714285714286, "loss": 1.5129, "step": 134 }, { "epoch": 0.01, "grad_norm": 0.28489478417342257, "learning_rate": 0.00048214285714285715, "loss": 1.4758, "step": 135 }, { "epoch": 0.01, "grad_norm": 0.3099684276288773, "learning_rate": 0.0004857142857142857, "loss": 1.5706, "step": 136 }, { "epoch": 0.01, "grad_norm": 0.29120739116594524, "learning_rate": 0.0004892857142857142, "loss": 1.5954, "step": 137 }, { "epoch": 0.01, "grad_norm": 0.36657854067868006, "learning_rate": 0.0004928571428571429, "loss": 1.5739, "step": 138 }, { "epoch": 0.01, "grad_norm": 0.4317725139749333, "learning_rate": 0.0004964285714285715, "loss": 1.4752, "step": 139 }, { "epoch": 0.02, "grad_norm": 0.31764086869505503, "learning_rate": 0.0005, "loss": 1.6824, "step": 140 }, { "epoch": 0.02, "grad_norm": 0.3385632495847182, "learning_rate": 0.0005035714285714285, "loss": 1.5375, "step": 141 }, { "epoch": 0.02, "grad_norm": 0.37461770956336343, "learning_rate": 0.0005071428571428572, "loss": 1.412, "step": 142 }, { "epoch": 0.02, "grad_norm": 0.3399190654827032, "learning_rate": 0.0005107142857142857, "loss": 1.4806, "step": 143 }, { "epoch": 0.02, "grad_norm": 0.4216971640700005, "learning_rate": 0.0005142857142857142, "loss": 1.5229, "step": 144 }, { "epoch": 0.02, "grad_norm": 0.3746707384688907, "learning_rate": 0.0005178571428571429, "loss": 1.5051, "step": 145 }, { "epoch": 0.02, "grad_norm": 0.360108642576056, "learning_rate": 0.0005214285714285714, "loss": 1.4608, "step": 146 }, { "epoch": 0.02, "grad_norm": 0.3508119200146609, "learning_rate": 0.0005250000000000001, "loss": 1.5631, "step": 147 }, { "epoch": 0.02, "grad_norm": 0.3979211809075355, "learning_rate": 0.0005285714285714286, "loss": 1.7342, "step": 148 }, { "epoch": 0.02, "grad_norm": 0.2965752820354549, "learning_rate": 0.0005321428571428571, "loss": 1.4366, "step": 149 }, { "epoch": 0.02, "grad_norm": 0.36692441991683633, "learning_rate": 0.0005357142857142857, "loss": 1.6711, "step": 150 }, { "epoch": 0.02, "grad_norm": 0.3433996604400371, "learning_rate": 0.0005392857142857143, "loss": 1.5016, "step": 151 }, { "epoch": 0.02, "grad_norm": 0.3136388055796795, "learning_rate": 0.0005428571428571428, "loss": 1.4356, "step": 152 }, { "epoch": 0.02, "grad_norm": 0.30192687104297594, "learning_rate": 0.0005464285714285714, "loss": 1.4473, "step": 153 }, { "epoch": 0.02, "grad_norm": 0.33129198015257594, "learning_rate": 0.00055, "loss": 1.4901, "step": 154 }, { "epoch": 0.02, "grad_norm": 0.3165111191353476, "learning_rate": 0.0005535714285714287, "loss": 1.4939, "step": 155 }, { "epoch": 0.02, "grad_norm": 0.29059641041159867, "learning_rate": 0.0005571428571428572, "loss": 1.5284, "step": 156 }, { "epoch": 0.02, "grad_norm": 0.30213428826667454, "learning_rate": 0.0005607142857142857, "loss": 1.6462, "step": 157 }, { "epoch": 0.02, "grad_norm": 0.2845219847958041, "learning_rate": 0.0005642857142857143, "loss": 1.526, "step": 158 }, { "epoch": 0.02, "grad_norm": 0.35986971493563835, "learning_rate": 0.0005678571428571428, "loss": 1.4807, "step": 159 }, { "epoch": 0.02, "grad_norm": 0.43297526779692946, "learning_rate": 0.0005714285714285714, "loss": 1.4925, "step": 160 }, { "epoch": 0.02, "grad_norm": 0.309817795851931, "learning_rate": 0.000575, "loss": 1.4316, "step": 161 }, { "epoch": 0.02, "grad_norm": 0.3532850398670643, "learning_rate": 0.0005785714285714286, "loss": 1.3771, "step": 162 }, { "epoch": 0.02, "grad_norm": 0.2921544010366211, "learning_rate": 0.0005821428571428572, "loss": 1.3419, "step": 163 }, { "epoch": 0.02, "grad_norm": 0.29158256657962844, "learning_rate": 0.0005857142857142858, "loss": 1.3892, "step": 164 }, { "epoch": 0.02, "grad_norm": 0.26959751530851456, "learning_rate": 0.0005892857142857143, "loss": 1.5987, "step": 165 }, { "epoch": 0.02, "grad_norm": 0.27858915227839154, "learning_rate": 0.0005928571428571429, "loss": 1.6159, "step": 166 }, { "epoch": 0.02, "grad_norm": 0.27887679232979334, "learning_rate": 0.0005964285714285714, "loss": 1.6343, "step": 167 }, { "epoch": 0.02, "grad_norm": 0.2902063856837706, "learning_rate": 0.0006, "loss": 1.5628, "step": 168 }, { "epoch": 0.02, "grad_norm": 0.26242952479313464, "learning_rate": 0.0006035714285714286, "loss": 1.6507, "step": 169 }, { "epoch": 0.02, "grad_norm": 0.2593759252347874, "learning_rate": 0.0006071428571428571, "loss": 1.5475, "step": 170 }, { "epoch": 0.02, "grad_norm": 0.31011666182187436, "learning_rate": 0.0006107142857142858, "loss": 1.5178, "step": 171 }, { "epoch": 0.02, "grad_norm": 0.25639111105134527, "learning_rate": 0.0006142857142857143, "loss": 1.498, "step": 172 }, { "epoch": 0.02, "grad_norm": 0.31652842035082357, "learning_rate": 0.0006178571428571429, "loss": 1.4955, "step": 173 }, { "epoch": 0.02, "grad_norm": 0.2987038846515858, "learning_rate": 0.0006214285714285715, "loss": 1.4423, "step": 174 }, { "epoch": 0.02, "grad_norm": 0.3263219499540308, "learning_rate": 0.000625, "loss": 1.7098, "step": 175 }, { "epoch": 0.02, "grad_norm": 0.32379973578168375, "learning_rate": 0.0006285714285714285, "loss": 1.5546, "step": 176 }, { "epoch": 0.02, "grad_norm": 0.30673266201950766, "learning_rate": 0.0006321428571428572, "loss": 1.5595, "step": 177 }, { "epoch": 0.02, "grad_norm": 0.31025981831403515, "learning_rate": 0.0006357142857142857, "loss": 1.5682, "step": 178 }, { "epoch": 0.02, "grad_norm": 0.2631500803146406, "learning_rate": 0.0006392857142857142, "loss": 1.4778, "step": 179 }, { "epoch": 0.02, "grad_norm": 0.263772189519905, "learning_rate": 0.0006428571428571429, "loss": 1.5322, "step": 180 }, { "epoch": 0.02, "grad_norm": 0.3170171832408717, "learning_rate": 0.0006464285714285715, "loss": 1.5399, "step": 181 }, { "epoch": 0.02, "grad_norm": 0.25839825036928027, "learning_rate": 0.0006500000000000001, "loss": 1.5088, "step": 182 }, { "epoch": 0.02, "grad_norm": 0.2693335388497259, "learning_rate": 0.0006535714285714286, "loss": 1.3628, "step": 183 }, { "epoch": 0.02, "grad_norm": 0.3374590349944099, "learning_rate": 0.0006571428571428571, "loss": 1.5421, "step": 184 }, { "epoch": 0.02, "grad_norm": 0.2880039486353611, "learning_rate": 0.0006607142857142857, "loss": 1.5958, "step": 185 }, { "epoch": 0.02, "grad_norm": 0.2532226088788996, "learning_rate": 0.0006642857142857143, "loss": 1.544, "step": 186 }, { "epoch": 0.02, "grad_norm": 0.31236526445694107, "learning_rate": 0.0006678571428571428, "loss": 1.4988, "step": 187 }, { "epoch": 0.02, "grad_norm": 0.296432786908887, "learning_rate": 0.0006714285714285714, "loss": 1.5167, "step": 188 }, { "epoch": 0.02, "grad_norm": 0.3319120080791588, "learning_rate": 0.000675, "loss": 1.4906, "step": 189 }, { "epoch": 0.02, "grad_norm": 0.2671474694497889, "learning_rate": 0.0006785714285714287, "loss": 1.506, "step": 190 }, { "epoch": 0.02, "grad_norm": 0.29230866138530726, "learning_rate": 0.0006821428571428572, "loss": 1.5605, "step": 191 }, { "epoch": 0.02, "grad_norm": 0.29237517306757743, "learning_rate": 0.0006857142857142857, "loss": 1.5592, "step": 192 }, { "epoch": 0.02, "grad_norm": 0.23998407955562542, "learning_rate": 0.0006892857142857143, "loss": 1.5919, "step": 193 }, { "epoch": 0.02, "grad_norm": 0.33358955469999935, "learning_rate": 0.0006928571428571428, "loss": 1.5007, "step": 194 }, { "epoch": 0.02, "grad_norm": 0.2588549835888095, "learning_rate": 0.0006964285714285714, "loss": 1.583, "step": 195 }, { "epoch": 0.02, "grad_norm": 0.2985579696888121, "learning_rate": 0.0007, "loss": 1.5594, "step": 196 }, { "epoch": 0.02, "grad_norm": 0.2837689469382843, "learning_rate": 0.0007035714285714286, "loss": 1.4313, "step": 197 }, { "epoch": 0.02, "grad_norm": 0.26100830899090055, "learning_rate": 0.0007071428571428572, "loss": 1.5072, "step": 198 }, { "epoch": 0.02, "grad_norm": 0.2858482873084629, "learning_rate": 0.0007107142857142858, "loss": 1.6042, "step": 199 }, { "epoch": 0.02, "grad_norm": 0.28657146589590393, "learning_rate": 0.0007142857142857143, "loss": 1.5303, "step": 200 }, { "epoch": 0.02, "grad_norm": 0.29257530177935365, "learning_rate": 0.0007178571428571429, "loss": 1.6107, "step": 201 }, { "epoch": 0.02, "grad_norm": 0.3042172997552271, "learning_rate": 0.0007214285714285714, "loss": 1.5416, "step": 202 }, { "epoch": 0.02, "grad_norm": 0.30426651346545824, "learning_rate": 0.000725, "loss": 1.5159, "step": 203 }, { "epoch": 0.02, "grad_norm": 0.26088662022664416, "learning_rate": 0.0007285714285714286, "loss": 1.511, "step": 204 }, { "epoch": 0.02, "grad_norm": 0.22126894553880855, "learning_rate": 0.0007321428571428571, "loss": 1.3603, "step": 205 }, { "epoch": 0.02, "grad_norm": 0.22402688666484616, "learning_rate": 0.0007357142857142858, "loss": 1.4555, "step": 206 }, { "epoch": 0.02, "grad_norm": 0.22146626840818323, "learning_rate": 0.0007392857142857144, "loss": 1.345, "step": 207 }, { "epoch": 0.02, "grad_norm": 0.234618942149512, "learning_rate": 0.0007428571428571429, "loss": 1.4908, "step": 208 }, { "epoch": 0.02, "grad_norm": 0.29634089484886406, "learning_rate": 0.0007464285714285715, "loss": 1.5035, "step": 209 }, { "epoch": 0.02, "grad_norm": 0.25852822888367205, "learning_rate": 0.00075, "loss": 1.6435, "step": 210 }, { "epoch": 0.02, "grad_norm": 0.292756900485729, "learning_rate": 0.0007535714285714285, "loss": 1.5641, "step": 211 }, { "epoch": 0.02, "grad_norm": 0.2039367704094928, "learning_rate": 0.0007571428571428572, "loss": 1.4156, "step": 212 }, { "epoch": 0.02, "grad_norm": 0.25648074373805585, "learning_rate": 0.0007607142857142857, "loss": 1.4194, "step": 213 }, { "epoch": 0.02, "grad_norm": 0.2552155993199289, "learning_rate": 0.0007642857142857142, "loss": 1.4379, "step": 214 }, { "epoch": 0.02, "grad_norm": 0.2585414529627138, "learning_rate": 0.0007678571428571429, "loss": 1.5787, "step": 215 }, { "epoch": 0.02, "grad_norm": 0.2546374137167054, "learning_rate": 0.0007714285714285715, "loss": 1.5555, "step": 216 }, { "epoch": 0.02, "grad_norm": 0.27081684679812273, "learning_rate": 0.0007750000000000001, "loss": 1.3571, "step": 217 }, { "epoch": 0.02, "grad_norm": 0.24325223266409077, "learning_rate": 0.0007785714285714286, "loss": 1.5655, "step": 218 }, { "epoch": 0.02, "grad_norm": 0.24276431673429932, "learning_rate": 0.0007821428571428571, "loss": 1.4558, "step": 219 }, { "epoch": 0.02, "grad_norm": 0.2366434468516385, "learning_rate": 0.0007857142857142857, "loss": 1.5843, "step": 220 }, { "epoch": 0.02, "grad_norm": 0.23005762850876868, "learning_rate": 0.0007892857142857143, "loss": 1.4706, "step": 221 }, { "epoch": 0.02, "grad_norm": 0.21661794426819167, "learning_rate": 0.0007928571428571428, "loss": 1.5066, "step": 222 }, { "epoch": 0.02, "grad_norm": 0.24364550930674125, "learning_rate": 0.0007964285714285714, "loss": 1.4984, "step": 223 }, { "epoch": 0.02, "grad_norm": 0.24544373452185395, "learning_rate": 0.0008, "loss": 1.5392, "step": 224 }, { "epoch": 0.02, "grad_norm": 0.3164181294977973, "learning_rate": 0.0008035714285714287, "loss": 1.4713, "step": 225 }, { "epoch": 0.02, "grad_norm": 0.2057971791624359, "learning_rate": 0.0008071428571428572, "loss": 1.6402, "step": 226 }, { "epoch": 0.02, "grad_norm": 0.22646235028301068, "learning_rate": 0.0008107142857142857, "loss": 1.6311, "step": 227 }, { "epoch": 0.02, "grad_norm": 0.2434939861749518, "learning_rate": 0.0008142857142857143, "loss": 1.4522, "step": 228 }, { "epoch": 0.02, "grad_norm": 0.25565301421024494, "learning_rate": 0.0008178571428571428, "loss": 1.4731, "step": 229 }, { "epoch": 0.02, "grad_norm": 0.2145112325466064, "learning_rate": 0.0008214285714285714, "loss": 1.508, "step": 230 }, { "epoch": 0.02, "grad_norm": 0.24235525781497383, "learning_rate": 0.000825, "loss": 1.5515, "step": 231 }, { "epoch": 0.02, "grad_norm": 0.24808658822607518, "learning_rate": 0.0008285714285714286, "loss": 1.4308, "step": 232 }, { "epoch": 0.03, "grad_norm": 0.25257648565419344, "learning_rate": 0.0008321428571428573, "loss": 1.5598, "step": 233 }, { "epoch": 0.03, "grad_norm": 0.2052328252860909, "learning_rate": 0.0008357142857142858, "loss": 1.4058, "step": 234 }, { "epoch": 0.03, "grad_norm": 0.30554429044632603, "learning_rate": 0.0008392857142857143, "loss": 1.4984, "step": 235 }, { "epoch": 0.03, "grad_norm": 0.2684656793305302, "learning_rate": 0.0008428571428571429, "loss": 1.5596, "step": 236 }, { "epoch": 0.03, "grad_norm": 0.24919207229849352, "learning_rate": 0.0008464285714285714, "loss": 1.5213, "step": 237 }, { "epoch": 0.03, "grad_norm": 0.23262987526628107, "learning_rate": 0.00085, "loss": 1.6697, "step": 238 }, { "epoch": 0.03, "grad_norm": 0.206144476035226, "learning_rate": 0.0008535714285714286, "loss": 1.6066, "step": 239 }, { "epoch": 0.03, "grad_norm": 0.1916608965478554, "learning_rate": 0.0008571428571428571, "loss": 1.5925, "step": 240 }, { "epoch": 0.03, "grad_norm": 0.18887206567344306, "learning_rate": 0.0008607142857142858, "loss": 1.5143, "step": 241 }, { "epoch": 0.03, "grad_norm": 0.18012370414006335, "learning_rate": 0.0008642857142857144, "loss": 1.5059, "step": 242 }, { "epoch": 0.03, "grad_norm": 0.23347982878855053, "learning_rate": 0.0008678571428571429, "loss": 1.5216, "step": 243 }, { "epoch": 0.03, "grad_norm": 0.2113463845182561, "learning_rate": 0.0008714285714285715, "loss": 1.6146, "step": 244 }, { "epoch": 0.03, "grad_norm": 0.2069102374474599, "learning_rate": 0.000875, "loss": 1.4846, "step": 245 }, { "epoch": 0.03, "grad_norm": 0.20672558766688756, "learning_rate": 0.0008785714285714285, "loss": 1.4937, "step": 246 }, { "epoch": 0.03, "grad_norm": 0.21223510647754198, "learning_rate": 0.0008821428571428572, "loss": 1.5495, "step": 247 }, { "epoch": 0.03, "grad_norm": 0.2702576482389292, "learning_rate": 0.0008857142857142857, "loss": 1.4101, "step": 248 }, { "epoch": 0.03, "grad_norm": 0.22179795777881917, "learning_rate": 0.0008892857142857142, "loss": 1.677, "step": 249 }, { "epoch": 0.03, "grad_norm": 0.2231004575446933, "learning_rate": 0.0008928571428571429, "loss": 1.6451, "step": 250 }, { "epoch": 0.03, "grad_norm": 0.19345695749788838, "learning_rate": 0.0008964285714285715, "loss": 1.4773, "step": 251 }, { "epoch": 0.03, "grad_norm": 0.2164011557010416, "learning_rate": 0.0009000000000000001, "loss": 1.4006, "step": 252 }, { "epoch": 0.03, "grad_norm": 0.20521116028853958, "learning_rate": 0.0009035714285714286, "loss": 1.4699, "step": 253 }, { "epoch": 0.03, "grad_norm": 0.21510699790233587, "learning_rate": 0.0009071428571428571, "loss": 1.3917, "step": 254 }, { "epoch": 0.03, "grad_norm": 0.18317267392579845, "learning_rate": 0.0009107142857142857, "loss": 1.5035, "step": 255 }, { "epoch": 0.03, "grad_norm": 0.2783009940687441, "learning_rate": 0.0009142857142857143, "loss": 1.5334, "step": 256 }, { "epoch": 0.03, "grad_norm": 0.2054533522968223, "learning_rate": 0.0009178571428571428, "loss": 1.3404, "step": 257 }, { "epoch": 0.03, "grad_norm": 0.21102299105750338, "learning_rate": 0.0009214285714285714, "loss": 1.3101, "step": 258 }, { "epoch": 0.03, "grad_norm": 0.18963226471367908, "learning_rate": 0.000925, "loss": 1.3663, "step": 259 }, { "epoch": 0.03, "grad_norm": 0.2184587395865125, "learning_rate": 0.0009285714285714287, "loss": 1.5179, "step": 260 }, { "epoch": 0.03, "grad_norm": 0.20605484569236143, "learning_rate": 0.0009321428571428572, "loss": 1.5511, "step": 261 }, { "epoch": 0.03, "grad_norm": 0.2791226050077258, "learning_rate": 0.0009357142857142857, "loss": 1.5101, "step": 262 }, { "epoch": 0.03, "grad_norm": 0.19037886699486145, "learning_rate": 0.0009392857142857143, "loss": 1.4686, "step": 263 }, { "epoch": 0.03, "grad_norm": 0.1981693942215429, "learning_rate": 0.0009428571428571429, "loss": 1.3974, "step": 264 }, { "epoch": 0.03, "grad_norm": 0.22826376509236745, "learning_rate": 0.0009464285714285714, "loss": 1.5342, "step": 265 }, { "epoch": 0.03, "grad_norm": 0.19772832728602222, "learning_rate": 0.00095, "loss": 1.4764, "step": 266 }, { "epoch": 0.03, "grad_norm": 0.2584142237517251, "learning_rate": 0.0009535714285714286, "loss": 1.4261, "step": 267 }, { "epoch": 0.03, "grad_norm": 0.2144015182353289, "learning_rate": 0.0009571428571428573, "loss": 1.5228, "step": 268 }, { "epoch": 0.03, "grad_norm": 0.17217525622544874, "learning_rate": 0.0009607142857142858, "loss": 1.5678, "step": 269 }, { "epoch": 0.03, "grad_norm": 0.1996368107538457, "learning_rate": 0.0009642857142857143, "loss": 1.5514, "step": 270 }, { "epoch": 0.03, "grad_norm": 0.20732376738259717, "learning_rate": 0.0009678571428571429, "loss": 1.5184, "step": 271 }, { "epoch": 0.03, "grad_norm": 0.19770305356463294, "learning_rate": 0.0009714285714285714, "loss": 1.6127, "step": 272 }, { "epoch": 0.03, "grad_norm": 0.18981688274012634, "learning_rate": 0.000975, "loss": 1.5789, "step": 273 }, { "epoch": 0.03, "grad_norm": 0.20918579733765114, "learning_rate": 0.0009785714285714285, "loss": 1.408, "step": 274 }, { "epoch": 0.03, "grad_norm": 0.20927524823711904, "learning_rate": 0.0009821428571428572, "loss": 1.4623, "step": 275 }, { "epoch": 0.03, "grad_norm": 0.17053415437999953, "learning_rate": 0.0009857142857142857, "loss": 1.4153, "step": 276 }, { "epoch": 0.03, "grad_norm": 0.17353729231705273, "learning_rate": 0.0009892857142857142, "loss": 1.4628, "step": 277 }, { "epoch": 0.03, "grad_norm": 0.19297924630966062, "learning_rate": 0.000992857142857143, "loss": 1.4706, "step": 278 }, { "epoch": 0.03, "grad_norm": 0.18001888908262587, "learning_rate": 0.0009964285714285715, "loss": 1.5269, "step": 279 }, { "epoch": 0.03, "grad_norm": 0.18830669239089334, "learning_rate": 0.001, "loss": 1.5408, "step": 280 }, { "epoch": 0.03, "grad_norm": 0.17643323360815724, "learning_rate": 0.0009999999696866382, "loss": 1.4809, "step": 281 }, { "epoch": 0.03, "grad_norm": 0.2040443477107501, "learning_rate": 0.000999999878746556, "loss": 1.5866, "step": 282 }, { "epoch": 0.03, "grad_norm": 0.1793843334457987, "learning_rate": 0.000999999727179765, "loss": 1.4915, "step": 283 }, { "epoch": 0.03, "grad_norm": 0.2035576998367501, "learning_rate": 0.0009999995149862835, "loss": 1.5028, "step": 284 }, { "epoch": 0.03, "grad_norm": 0.18822278840465206, "learning_rate": 0.0009999992421661369, "loss": 1.5095, "step": 285 }, { "epoch": 0.03, "grad_norm": 0.22599095980346726, "learning_rate": 0.0009999989087193582, "loss": 1.4853, "step": 286 }, { "epoch": 0.03, "grad_norm": 0.19948992318384096, "learning_rate": 0.0009999985146459881, "loss": 1.4658, "step": 287 }, { "epoch": 0.03, "grad_norm": 0.2077750753103998, "learning_rate": 0.0009999980599460746, "loss": 1.4903, "step": 288 }, { "epoch": 0.03, "grad_norm": 0.2042826255511603, "learning_rate": 0.0009999975446196726, "loss": 1.5716, "step": 289 }, { "epoch": 0.03, "grad_norm": 0.1809148903047096, "learning_rate": 0.0009999969686668442, "loss": 1.5172, "step": 290 }, { "epoch": 0.03, "grad_norm": 0.17323034884698793, "learning_rate": 0.0009999963320876598, "loss": 1.4453, "step": 291 }, { "epoch": 0.03, "grad_norm": 0.20664753532753552, "learning_rate": 0.0009999956348821966, "loss": 1.6475, "step": 292 }, { "epoch": 0.03, "grad_norm": 0.5668139338801725, "learning_rate": 0.0009999948770505387, "loss": 1.4983, "step": 293 }, { "epoch": 0.03, "grad_norm": 0.20916989817361614, "learning_rate": 0.0009999940585927781, "loss": 1.5331, "step": 294 }, { "epoch": 0.03, "grad_norm": 0.20045916684556297, "learning_rate": 0.0009999931795090142, "loss": 1.4464, "step": 295 }, { "epoch": 0.03, "grad_norm": 0.1991163579687259, "learning_rate": 0.0009999922397993537, "loss": 1.5759, "step": 296 }, { "epoch": 0.03, "grad_norm": 0.26448205876932024, "learning_rate": 0.0009999912394639103, "loss": 1.4446, "step": 297 }, { "epoch": 0.03, "grad_norm": 0.19273562442141076, "learning_rate": 0.0009999901785028053, "loss": 1.5955, "step": 298 }, { "epoch": 0.03, "grad_norm": 0.16933403058061827, "learning_rate": 0.0009999890569161677, "loss": 1.4588, "step": 299 }, { "epoch": 0.03, "grad_norm": 0.1830637464672982, "learning_rate": 0.000999987874704133, "loss": 1.6497, "step": 300 }, { "epoch": 0.03, "grad_norm": 0.17282182901114648, "learning_rate": 0.0009999866318668449, "loss": 1.4537, "step": 301 }, { "epoch": 0.03, "grad_norm": 0.18143770203988144, "learning_rate": 0.0009999853284044537, "loss": 1.5288, "step": 302 }, { "epoch": 0.03, "grad_norm": 0.21283164213442005, "learning_rate": 0.000999983964317118, "loss": 1.6833, "step": 303 }, { "epoch": 0.03, "grad_norm": 0.16871865453821608, "learning_rate": 0.0009999825396050028, "loss": 1.4128, "step": 304 }, { "epoch": 0.03, "grad_norm": 0.17788316189586986, "learning_rate": 0.000999981054268281, "loss": 1.4958, "step": 305 }, { "epoch": 0.03, "grad_norm": 0.17628497956862654, "learning_rate": 0.0009999795083071327, "loss": 1.5826, "step": 306 }, { "epoch": 0.03, "grad_norm": 0.17774954930179934, "learning_rate": 0.0009999779017217452, "loss": 1.5999, "step": 307 }, { "epoch": 0.03, "grad_norm": 0.16138411201607197, "learning_rate": 0.0009999762345123135, "loss": 1.448, "step": 308 }, { "epoch": 0.03, "grad_norm": 0.14636359639747132, "learning_rate": 0.0009999745066790397, "loss": 1.5426, "step": 309 }, { "epoch": 0.03, "grad_norm": 0.17395839471064034, "learning_rate": 0.0009999727182221335, "loss": 1.4609, "step": 310 }, { "epoch": 0.03, "grad_norm": 0.1596077796811495, "learning_rate": 0.0009999708691418112, "loss": 1.5566, "step": 311 }, { "epoch": 0.03, "grad_norm": 0.17254640291130927, "learning_rate": 0.0009999689594382976, "loss": 1.5635, "step": 312 }, { "epoch": 0.03, "grad_norm": 0.21093878480731898, "learning_rate": 0.0009999669891118238, "loss": 1.4798, "step": 313 }, { "epoch": 0.03, "grad_norm": 0.14639537332716396, "learning_rate": 0.0009999649581626292, "loss": 1.3929, "step": 314 }, { "epoch": 0.03, "grad_norm": 0.18027095555325548, "learning_rate": 0.0009999628665909597, "loss": 1.6339, "step": 315 }, { "epoch": 0.03, "grad_norm": 0.18042986872074054, "learning_rate": 0.0009999607143970689, "loss": 1.547, "step": 316 }, { "epoch": 0.03, "grad_norm": 0.1586215202794954, "learning_rate": 0.0009999585015812178, "loss": 1.4079, "step": 317 }, { "epoch": 0.03, "grad_norm": 0.17834825140193727, "learning_rate": 0.0009999562281436747, "loss": 1.4367, "step": 318 }, { "epoch": 0.03, "grad_norm": 0.17982659450306954, "learning_rate": 0.0009999538940847156, "loss": 1.6693, "step": 319 }, { "epoch": 0.03, "grad_norm": 0.164622497777815, "learning_rate": 0.0009999514994046231, "loss": 1.5315, "step": 320 }, { "epoch": 0.03, "grad_norm": 0.2006206552979814, "learning_rate": 0.0009999490441036877, "loss": 1.5092, "step": 321 }, { "epoch": 0.03, "grad_norm": 0.16907762296767967, "learning_rate": 0.0009999465281822072, "loss": 1.5472, "step": 322 }, { "epoch": 0.03, "grad_norm": 0.19421538970369667, "learning_rate": 0.0009999439516404863, "loss": 1.5062, "step": 323 }, { "epoch": 0.03, "grad_norm": 0.16267349147204174, "learning_rate": 0.000999941314478838, "loss": 1.4601, "step": 324 }, { "epoch": 0.03, "grad_norm": 0.17687924044685271, "learning_rate": 0.0009999386166975816, "loss": 1.4468, "step": 325 }, { "epoch": 0.04, "grad_norm": 0.1658946269536119, "learning_rate": 0.0009999358582970443, "loss": 1.3812, "step": 326 }, { "epoch": 0.04, "grad_norm": 0.19961931592072243, "learning_rate": 0.000999933039277561, "loss": 1.451, "step": 327 }, { "epoch": 0.04, "grad_norm": 0.18247338360131146, "learning_rate": 0.0009999301596394727, "loss": 1.5772, "step": 328 }, { "epoch": 0.04, "grad_norm": 0.18531837950187607, "learning_rate": 0.0009999272193831293, "loss": 1.499, "step": 329 }, { "epoch": 0.04, "grad_norm": 0.17578485273271977, "learning_rate": 0.000999924218508887, "loss": 1.4941, "step": 330 }, { "epoch": 0.04, "grad_norm": 0.1716177706116155, "learning_rate": 0.0009999211570171098, "loss": 1.3478, "step": 331 }, { "epoch": 0.04, "grad_norm": 0.15907076461451958, "learning_rate": 0.0009999180349081686, "loss": 1.5074, "step": 332 }, { "epoch": 0.04, "grad_norm": 0.14106955725340992, "learning_rate": 0.0009999148521824424, "loss": 1.6304, "step": 333 }, { "epoch": 0.04, "grad_norm": 0.18473843834209136, "learning_rate": 0.0009999116088403167, "loss": 1.3852, "step": 334 }, { "epoch": 0.04, "grad_norm": 0.12962084897330353, "learning_rate": 0.0009999083048821851, "loss": 1.5796, "step": 335 }, { "epoch": 0.04, "grad_norm": 0.21469389717837936, "learning_rate": 0.000999904940308448, "loss": 1.5622, "step": 336 }, { "epoch": 0.04, "grad_norm": 0.13965253223738205, "learning_rate": 0.0009999015151195135, "loss": 1.4831, "step": 337 }, { "epoch": 0.04, "grad_norm": 0.14771018846987435, "learning_rate": 0.000999898029315797, "loss": 1.4489, "step": 338 }, { "epoch": 0.04, "grad_norm": 0.14126087838350085, "learning_rate": 0.0009998944828977208, "loss": 1.4695, "step": 339 }, { "epoch": 0.04, "grad_norm": 0.13103131840592866, "learning_rate": 0.0009998908758657153, "loss": 1.3719, "step": 340 }, { "epoch": 0.04, "grad_norm": 0.15938251830581013, "learning_rate": 0.0009998872082202176, "loss": 1.6759, "step": 341 }, { "epoch": 0.04, "grad_norm": 0.14062844047046466, "learning_rate": 0.0009998834799616726, "loss": 1.5435, "step": 342 }, { "epoch": 0.04, "grad_norm": 0.14069295274521235, "learning_rate": 0.0009998796910905324, "loss": 1.5798, "step": 343 }, { "epoch": 0.04, "grad_norm": 0.14899985863071655, "learning_rate": 0.0009998758416072562, "loss": 1.4963, "step": 344 }, { "epoch": 0.04, "grad_norm": 0.15403995672516727, "learning_rate": 0.000999871931512311, "loss": 1.5071, "step": 345 }, { "epoch": 0.04, "grad_norm": 0.15965488508555337, "learning_rate": 0.0009998679608061705, "loss": 1.4844, "step": 346 }, { "epoch": 0.04, "grad_norm": 0.1397205113440922, "learning_rate": 0.0009998639294893166, "loss": 1.3994, "step": 347 }, { "epoch": 0.04, "grad_norm": 0.16837720499970224, "learning_rate": 0.0009998598375622382, "loss": 1.6294, "step": 348 }, { "epoch": 0.04, "grad_norm": 0.14290960526839144, "learning_rate": 0.0009998556850254307, "loss": 1.393, "step": 349 }, { "epoch": 0.04, "grad_norm": 0.1647613660769307, "learning_rate": 0.0009998514718793986, "loss": 1.5986, "step": 350 }, { "epoch": 0.04, "grad_norm": 0.126604301560717, "learning_rate": 0.000999847198124652, "loss": 1.5888, "step": 351 }, { "epoch": 0.04, "grad_norm": 0.1625076881295614, "learning_rate": 0.0009998428637617094, "loss": 1.4581, "step": 352 }, { "epoch": 0.04, "grad_norm": 0.1354795426114141, "learning_rate": 0.0009998384687910967, "loss": 1.5685, "step": 353 }, { "epoch": 0.04, "grad_norm": 0.1960614626166613, "learning_rate": 0.000999834013213346, "loss": 1.4762, "step": 354 }, { "epoch": 0.04, "grad_norm": 0.15410756920154223, "learning_rate": 0.0009998294970289983, "loss": 1.5645, "step": 355 }, { "epoch": 0.04, "grad_norm": 0.18398803112017642, "learning_rate": 0.0009998249202386007, "loss": 1.4068, "step": 356 }, { "epoch": 0.04, "grad_norm": 0.19952868680271008, "learning_rate": 0.0009998202828427085, "loss": 1.5943, "step": 357 }, { "epoch": 0.04, "grad_norm": 0.17194494265665142, "learning_rate": 0.000999815584841884, "loss": 1.5148, "step": 358 }, { "epoch": 0.04, "grad_norm": 0.16781358427269294, "learning_rate": 0.0009998108262366965, "loss": 1.4017, "step": 359 }, { "epoch": 0.04, "grad_norm": 0.17487641147080715, "learning_rate": 0.0009998060070277232, "loss": 1.4302, "step": 360 }, { "epoch": 0.04, "grad_norm": 0.14854649570076, "learning_rate": 0.0009998011272155485, "loss": 1.435, "step": 361 }, { "epoch": 0.04, "grad_norm": 0.13955296740945117, "learning_rate": 0.0009997961868007642, "loss": 1.3177, "step": 362 }, { "epoch": 0.04, "grad_norm": 0.17180197963616256, "learning_rate": 0.0009997911857839688, "loss": 1.4593, "step": 363 }, { "epoch": 0.04, "grad_norm": 0.1450351295687952, "learning_rate": 0.0009997861241657694, "loss": 1.4266, "step": 364 }, { "epoch": 0.04, "grad_norm": 0.17459394228876687, "learning_rate": 0.0009997810019467793, "loss": 1.4735, "step": 365 }, { "epoch": 0.04, "grad_norm": 0.21035233323036612, "learning_rate": 0.0009997758191276197, "loss": 1.5312, "step": 366 }, { "epoch": 0.04, "grad_norm": 0.1522354572488286, "learning_rate": 0.0009997705757089192, "loss": 1.4431, "step": 367 }, { "epoch": 0.04, "grad_norm": 0.15293858625262405, "learning_rate": 0.0009997652716913134, "loss": 1.6787, "step": 368 }, { "epoch": 0.04, "grad_norm": 0.1353046205100757, "learning_rate": 0.0009997599070754454, "loss": 1.4503, "step": 369 }, { "epoch": 0.04, "grad_norm": 0.17227780219049219, "learning_rate": 0.0009997544818619657, "loss": 1.4085, "step": 370 }, { "epoch": 0.04, "grad_norm": 0.1437172909509328, "learning_rate": 0.000999748996051532, "loss": 1.5073, "step": 371 }, { "epoch": 0.04, "grad_norm": 0.1504855289987324, "learning_rate": 0.0009997434496448099, "loss": 1.5767, "step": 372 }, { "epoch": 0.04, "grad_norm": 0.13326666119325065, "learning_rate": 0.0009997378426424715, "loss": 1.4447, "step": 373 }, { "epoch": 0.04, "grad_norm": 0.12945116710966437, "learning_rate": 0.0009997321750451968, "loss": 1.5107, "step": 374 }, { "epoch": 0.04, "grad_norm": 0.1633221149641643, "learning_rate": 0.0009997264468536732, "loss": 1.5965, "step": 375 }, { "epoch": 0.04, "grad_norm": 0.14094383798016874, "learning_rate": 0.000999720658068595, "loss": 1.5529, "step": 376 }, { "epoch": 0.04, "grad_norm": 0.1424006539876174, "learning_rate": 0.0009997148086906642, "loss": 1.4084, "step": 377 }, { "epoch": 0.04, "grad_norm": 0.1659677155871326, "learning_rate": 0.0009997088987205903, "loss": 1.6574, "step": 378 }, { "epoch": 0.04, "grad_norm": 0.16406857672291258, "learning_rate": 0.0009997029281590892, "loss": 1.4398, "step": 379 }, { "epoch": 0.04, "grad_norm": 0.14181116310622616, "learning_rate": 0.0009996968970068857, "loss": 1.4507, "step": 380 }, { "epoch": 0.04, "grad_norm": 0.14808479870132174, "learning_rate": 0.0009996908052647105, "loss": 1.5897, "step": 381 }, { "epoch": 0.04, "grad_norm": 0.17030159407878712, "learning_rate": 0.0009996846529333027, "loss": 1.5398, "step": 382 }, { "epoch": 0.04, "grad_norm": 0.16969702631552683, "learning_rate": 0.0009996784400134078, "loss": 1.4812, "step": 383 }, { "epoch": 0.04, "grad_norm": 0.17226460579810668, "learning_rate": 0.0009996721665057795, "loss": 1.469, "step": 384 }, { "epoch": 0.04, "grad_norm": 0.1721386837516993, "learning_rate": 0.0009996658324111785, "loss": 1.5467, "step": 385 }, { "epoch": 0.04, "grad_norm": 0.14892597511153813, "learning_rate": 0.0009996594377303725, "loss": 1.5863, "step": 386 }, { "epoch": 0.04, "grad_norm": 0.1575483529907697, "learning_rate": 0.0009996529824641374, "loss": 1.4743, "step": 387 }, { "epoch": 0.04, "grad_norm": 0.1456398333006279, "learning_rate": 0.0009996464666132553, "loss": 1.5276, "step": 388 }, { "epoch": 0.04, "grad_norm": 0.15489743549736446, "learning_rate": 0.0009996398901785167, "loss": 1.482, "step": 389 }, { "epoch": 0.04, "grad_norm": 0.1583860129059049, "learning_rate": 0.000999633253160719, "loss": 1.5561, "step": 390 }, { "epoch": 0.04, "grad_norm": 0.17262883033342483, "learning_rate": 0.0009996265555606667, "loss": 1.4898, "step": 391 }, { "epoch": 0.04, "grad_norm": 0.16439961527862196, "learning_rate": 0.000999619797379172, "loss": 1.5272, "step": 392 }, { "epoch": 0.04, "grad_norm": 0.15828301920189175, "learning_rate": 0.0009996129786170546, "loss": 1.4813, "step": 393 }, { "epoch": 0.04, "grad_norm": 0.1731170888583976, "learning_rate": 0.000999606099275141, "loss": 1.4245, "step": 394 }, { "epoch": 0.04, "grad_norm": 0.1559416559722423, "learning_rate": 0.0009995991593542656, "loss": 1.5365, "step": 395 }, { "epoch": 0.04, "grad_norm": 0.18583660593985893, "learning_rate": 0.0009995921588552695, "loss": 1.4156, "step": 396 }, { "epoch": 0.04, "grad_norm": 0.1707241641157939, "learning_rate": 0.000999585097779002, "loss": 1.5137, "step": 397 }, { "epoch": 0.04, "grad_norm": 0.1407581665746861, "learning_rate": 0.0009995779761263189, "loss": 1.5203, "step": 398 }, { "epoch": 0.04, "grad_norm": 0.1388777158799526, "learning_rate": 0.000999570793898084, "loss": 1.5761, "step": 399 }, { "epoch": 0.04, "grad_norm": 0.16295821219358653, "learning_rate": 0.000999563551095168, "loss": 1.6157, "step": 400 }, { "epoch": 0.04, "grad_norm": 0.1539280261207723, "learning_rate": 0.0009995562477184492, "loss": 1.416, "step": 401 }, { "epoch": 0.04, "grad_norm": 0.14246096761905572, "learning_rate": 0.0009995488837688132, "loss": 1.5411, "step": 402 }, { "epoch": 0.04, "grad_norm": 0.15716424737797968, "learning_rate": 0.0009995414592471527, "loss": 1.4782, "step": 403 }, { "epoch": 0.04, "grad_norm": 0.14283758228041246, "learning_rate": 0.000999533974154368, "loss": 1.4147, "step": 404 }, { "epoch": 0.04, "grad_norm": 0.15328695456751443, "learning_rate": 0.000999526428491367, "loss": 1.5153, "step": 405 }, { "epoch": 0.04, "grad_norm": 0.1436552033181903, "learning_rate": 0.0009995188222590645, "loss": 1.5375, "step": 406 }, { "epoch": 0.04, "grad_norm": 0.1454954430103715, "learning_rate": 0.0009995111554583825, "loss": 1.34, "step": 407 }, { "epoch": 0.04, "grad_norm": 0.16441780739523218, "learning_rate": 0.0009995034280902509, "loss": 1.5478, "step": 408 }, { "epoch": 0.04, "grad_norm": 0.13139245357731225, "learning_rate": 0.0009994956401556065, "loss": 1.3986, "step": 409 }, { "epoch": 0.04, "grad_norm": 0.1609687663696494, "learning_rate": 0.0009994877916553937, "loss": 1.651, "step": 410 }, { "epoch": 0.04, "grad_norm": 0.1774917612967736, "learning_rate": 0.0009994798825905644, "loss": 1.4559, "step": 411 }, { "epoch": 0.04, "grad_norm": 0.12610286657532213, "learning_rate": 0.000999471912962077, "loss": 1.3864, "step": 412 }, { "epoch": 0.04, "grad_norm": 0.15091836283777837, "learning_rate": 0.0009994638827708986, "loss": 1.3917, "step": 413 }, { "epoch": 0.04, "grad_norm": 0.1274380080548309, "learning_rate": 0.0009994557920180024, "loss": 1.4435, "step": 414 }, { "epoch": 0.04, "grad_norm": 0.1403056631620941, "learning_rate": 0.0009994476407043694, "loss": 1.5053, "step": 415 }, { "epoch": 0.04, "grad_norm": 0.13960834407799336, "learning_rate": 0.000999439428830988, "loss": 1.4803, "step": 416 }, { "epoch": 0.04, "grad_norm": 0.14244926848342568, "learning_rate": 0.0009994311563988542, "loss": 1.5339, "step": 417 }, { "epoch": 0.04, "grad_norm": 0.14869433694885337, "learning_rate": 0.0009994228234089708, "loss": 1.4244, "step": 418 }, { "epoch": 0.05, "grad_norm": 0.14528003568402328, "learning_rate": 0.0009994144298623485, "loss": 1.4579, "step": 419 }, { "epoch": 0.05, "grad_norm": 0.13406799469371342, "learning_rate": 0.0009994059757600048, "loss": 1.5282, "step": 420 }, { "epoch": 0.05, "grad_norm": 0.14796217619302254, "learning_rate": 0.0009993974611029646, "loss": 1.5673, "step": 421 }, { "epoch": 0.05, "grad_norm": 0.1370563358302789, "learning_rate": 0.0009993888858922605, "loss": 1.4697, "step": 422 }, { "epoch": 0.05, "grad_norm": 0.12170722368190066, "learning_rate": 0.0009993802501289326, "loss": 1.4959, "step": 423 }, { "epoch": 0.05, "grad_norm": 0.10003558537131502, "learning_rate": 0.0009993715538140276, "loss": 1.4291, "step": 424 }, { "epoch": 0.05, "grad_norm": 0.1334012983526422, "learning_rate": 0.0009993627969486002, "loss": 1.558, "step": 425 }, { "epoch": 0.05, "grad_norm": 0.12602036845602907, "learning_rate": 0.000999353979533712, "loss": 1.3761, "step": 426 }, { "epoch": 0.05, "grad_norm": 0.09994334678040766, "learning_rate": 0.0009993451015704324, "loss": 1.3765, "step": 427 }, { "epoch": 0.05, "grad_norm": 0.1429494796596272, "learning_rate": 0.0009993361630598377, "loss": 1.4477, "step": 428 }, { "epoch": 0.05, "grad_norm": 0.1459824322844078, "learning_rate": 0.0009993271640030116, "loss": 1.5808, "step": 429 }, { "epoch": 0.05, "grad_norm": 0.13516497528709895, "learning_rate": 0.0009993181044010454, "loss": 1.5225, "step": 430 }, { "epoch": 0.05, "grad_norm": 0.12460002385750782, "learning_rate": 0.000999308984255038, "loss": 1.5176, "step": 431 }, { "epoch": 0.05, "grad_norm": 0.12165124868540639, "learning_rate": 0.0009992998035660945, "loss": 1.4966, "step": 432 }, { "epoch": 0.05, "grad_norm": 0.11682420685092966, "learning_rate": 0.0009992905623353286, "loss": 1.4564, "step": 433 }, { "epoch": 0.05, "grad_norm": 0.13472134097524974, "learning_rate": 0.0009992812605638605, "loss": 1.4921, "step": 434 }, { "epoch": 0.05, "grad_norm": 0.11455037083596988, "learning_rate": 0.0009992718982528186, "loss": 1.4622, "step": 435 }, { "epoch": 0.05, "grad_norm": 0.13050817933155195, "learning_rate": 0.0009992624754033377, "loss": 1.5472, "step": 436 }, { "epoch": 0.05, "grad_norm": 0.15767461304956595, "learning_rate": 0.0009992529920165602, "loss": 1.6125, "step": 437 }, { "epoch": 0.05, "grad_norm": 0.146525866672167, "learning_rate": 0.0009992434480936366, "loss": 1.4863, "step": 438 }, { "epoch": 0.05, "grad_norm": 0.1321979814402815, "learning_rate": 0.0009992338436357235, "loss": 1.5751, "step": 439 }, { "epoch": 0.05, "grad_norm": 0.1292076236415919, "learning_rate": 0.0009992241786439858, "loss": 1.4775, "step": 440 }, { "epoch": 0.05, "grad_norm": 0.13500666830362648, "learning_rate": 0.0009992144531195955, "loss": 1.5966, "step": 441 }, { "epoch": 0.05, "grad_norm": 0.13695901467453922, "learning_rate": 0.0009992046670637316, "loss": 1.5805, "step": 442 }, { "epoch": 0.05, "grad_norm": 0.13922716501708265, "learning_rate": 0.0009991948204775807, "loss": 1.5072, "step": 443 }, { "epoch": 0.05, "grad_norm": 0.14647658645269154, "learning_rate": 0.000999184913362337, "loss": 1.3592, "step": 444 }, { "epoch": 0.05, "grad_norm": 0.12899480292336285, "learning_rate": 0.0009991749457192013, "loss": 1.4513, "step": 445 }, { "epoch": 0.05, "grad_norm": 0.12393656639381344, "learning_rate": 0.0009991649175493827, "loss": 1.4687, "step": 446 }, { "epoch": 0.05, "grad_norm": 0.16754428326189647, "learning_rate": 0.000999154828854097, "loss": 1.4184, "step": 447 }, { "epoch": 0.05, "grad_norm": 0.14210675981117007, "learning_rate": 0.0009991446796345676, "loss": 1.4588, "step": 448 }, { "epoch": 0.05, "grad_norm": 0.1377863515377502, "learning_rate": 0.0009991344698920246, "loss": 1.5647, "step": 449 }, { "epoch": 0.05, "grad_norm": 0.15677195035868116, "learning_rate": 0.0009991241996277068, "loss": 1.4818, "step": 450 }, { "epoch": 0.05, "grad_norm": 0.14291065447977097, "learning_rate": 0.0009991138688428588, "loss": 1.4337, "step": 451 }, { "epoch": 0.05, "grad_norm": 0.1500952470396202, "learning_rate": 0.0009991034775387335, "loss": 1.5161, "step": 452 }, { "epoch": 0.05, "grad_norm": 0.14513044258257118, "learning_rate": 0.000999093025716591, "loss": 1.5096, "step": 453 }, { "epoch": 0.05, "grad_norm": 0.16032928911490424, "learning_rate": 0.000999082513377698, "loss": 1.4002, "step": 454 }, { "epoch": 0.05, "grad_norm": 0.17202853259574485, "learning_rate": 0.0009990719405233303, "loss": 1.4792, "step": 455 }, { "epoch": 0.05, "grad_norm": 0.15835946799615688, "learning_rate": 0.000999061307154769, "loss": 1.4615, "step": 456 }, { "epoch": 0.05, "grad_norm": 0.16951071663191195, "learning_rate": 0.0009990506132733037, "loss": 1.4748, "step": 457 }, { "epoch": 0.05, "grad_norm": 0.1398963494982491, "learning_rate": 0.000999039858880231, "loss": 1.5173, "step": 458 }, { "epoch": 0.05, "grad_norm": 0.12997095802643854, "learning_rate": 0.000999029043976855, "loss": 1.2695, "step": 459 }, { "epoch": 0.05, "grad_norm": 0.11616314177597527, "learning_rate": 0.000999018168564487, "loss": 1.4087, "step": 460 }, { "epoch": 0.05, "grad_norm": 0.17461980067454186, "learning_rate": 0.0009990072326444455, "loss": 1.5803, "step": 461 }, { "epoch": 0.05, "grad_norm": 0.14272014194613272, "learning_rate": 0.0009989962362180569, "loss": 1.4678, "step": 462 }, { "epoch": 0.05, "grad_norm": 0.16570305570684601, "learning_rate": 0.0009989851792866543, "loss": 1.6578, "step": 463 }, { "epoch": 0.05, "grad_norm": 0.11998225014078424, "learning_rate": 0.0009989740618515787, "loss": 1.4982, "step": 464 }, { "epoch": 0.05, "grad_norm": 0.13537021139680433, "learning_rate": 0.0009989628839141775, "loss": 1.5582, "step": 465 }, { "epoch": 0.05, "grad_norm": 0.14086644816346083, "learning_rate": 0.0009989516454758066, "loss": 1.4891, "step": 466 }, { "epoch": 0.05, "grad_norm": 0.1358567824453613, "learning_rate": 0.0009989403465378284, "loss": 1.4645, "step": 467 }, { "epoch": 0.05, "grad_norm": 0.1401683725102955, "learning_rate": 0.0009989289871016132, "loss": 1.5034, "step": 468 }, { "epoch": 0.05, "grad_norm": 0.14130289874302648, "learning_rate": 0.0009989175671685383, "loss": 1.6208, "step": 469 }, { "epoch": 0.05, "grad_norm": 0.1476505273386164, "learning_rate": 0.0009989060867399884, "loss": 1.4666, "step": 470 }, { "epoch": 0.05, "grad_norm": 0.1494861721735433, "learning_rate": 0.0009988945458173552, "loss": 1.458, "step": 471 }, { "epoch": 0.05, "grad_norm": 0.14147325490478396, "learning_rate": 0.0009988829444020385, "loss": 1.6521, "step": 472 }, { "epoch": 0.05, "grad_norm": 0.15133662684402113, "learning_rate": 0.0009988712824954451, "loss": 1.5475, "step": 473 }, { "epoch": 0.05, "grad_norm": 0.15736937108778778, "learning_rate": 0.0009988595600989886, "loss": 1.5526, "step": 474 }, { "epoch": 0.05, "grad_norm": 0.13815053807297753, "learning_rate": 0.0009988477772140908, "loss": 1.4171, "step": 475 }, { "epoch": 0.05, "grad_norm": 0.15170448316189497, "learning_rate": 0.00099883593384218, "loss": 1.4429, "step": 476 }, { "epoch": 0.05, "grad_norm": 0.14523768793650085, "learning_rate": 0.0009988240299846926, "loss": 1.6591, "step": 477 }, { "epoch": 0.05, "grad_norm": 0.15517158286559127, "learning_rate": 0.0009988120656430719, "loss": 1.4886, "step": 478 }, { "epoch": 0.05, "grad_norm": 0.13328382363243924, "learning_rate": 0.0009988000408187685, "loss": 1.4964, "step": 479 }, { "epoch": 0.05, "grad_norm": 0.12208130405531557, "learning_rate": 0.0009987879555132405, "loss": 1.3444, "step": 480 }, { "epoch": 0.05, "grad_norm": 0.1371216763034477, "learning_rate": 0.0009987758097279534, "loss": 1.5955, "step": 481 }, { "epoch": 0.05, "grad_norm": 0.11997966531046428, "learning_rate": 0.0009987636034643798, "loss": 1.462, "step": 482 }, { "epoch": 0.05, "grad_norm": 0.15171065892823024, "learning_rate": 0.0009987513367239996, "loss": 1.3923, "step": 483 }, { "epoch": 0.05, "grad_norm": 0.13082361088442068, "learning_rate": 0.0009987390095083004, "loss": 1.4156, "step": 484 }, { "epoch": 0.05, "grad_norm": 0.12932044553418595, "learning_rate": 0.0009987266218187772, "loss": 1.4468, "step": 485 }, { "epoch": 0.05, "grad_norm": 0.1475713076816688, "learning_rate": 0.0009987141736569314, "loss": 1.4624, "step": 486 }, { "epoch": 0.05, "grad_norm": 0.14178318017974623, "learning_rate": 0.000998701665024273, "loss": 1.3946, "step": 487 }, { "epoch": 0.05, "grad_norm": 0.16305685355062952, "learning_rate": 0.0009986890959223181, "loss": 1.5231, "step": 488 }, { "epoch": 0.05, "grad_norm": 0.1686444442480543, "learning_rate": 0.0009986764663525913, "loss": 1.6783, "step": 489 }, { "epoch": 0.05, "grad_norm": 0.15549050758741176, "learning_rate": 0.0009986637763166237, "loss": 1.62, "step": 490 }, { "epoch": 0.05, "grad_norm": 0.13218940398691503, "learning_rate": 0.0009986510258159541, "loss": 1.4141, "step": 491 }, { "epoch": 0.05, "grad_norm": 0.14336555374093515, "learning_rate": 0.0009986382148521283, "loss": 1.545, "step": 492 }, { "epoch": 0.05, "grad_norm": 0.12407347653401202, "learning_rate": 0.0009986253434267, "loss": 1.4171, "step": 493 }, { "epoch": 0.05, "grad_norm": 0.15672602263906668, "learning_rate": 0.00099861241154123, "loss": 1.5143, "step": 494 }, { "epoch": 0.05, "grad_norm": 0.13339275812986198, "learning_rate": 0.000998599419197286, "loss": 1.4366, "step": 495 }, { "epoch": 0.05, "grad_norm": 0.13385666580192612, "learning_rate": 0.0009985863663964434, "loss": 1.4831, "step": 496 }, { "epoch": 0.05, "grad_norm": 0.1704216054223673, "learning_rate": 0.000998573253140285, "loss": 1.365, "step": 497 }, { "epoch": 0.05, "grad_norm": 0.1419854321243664, "learning_rate": 0.0009985600794304007, "loss": 1.3812, "step": 498 }, { "epoch": 0.05, "grad_norm": 0.1294114515649719, "learning_rate": 0.0009985468452683882, "loss": 1.4495, "step": 499 }, { "epoch": 0.05, "grad_norm": 0.16788358525374333, "learning_rate": 0.0009985335506558519, "loss": 1.6707, "step": 500 }, { "epoch": 0.05, "grad_norm": 0.1633606897699836, "learning_rate": 0.0009985201955944039, "loss": 1.6144, "step": 501 }, { "epoch": 0.05, "grad_norm": 0.13574456823443043, "learning_rate": 0.0009985067800856635, "loss": 1.5012, "step": 502 }, { "epoch": 0.05, "grad_norm": 0.12805624518518505, "learning_rate": 0.0009984933041312573, "loss": 1.469, "step": 503 }, { "epoch": 0.05, "grad_norm": 0.14562449161498103, "learning_rate": 0.0009984797677328194, "loss": 1.557, "step": 504 }, { "epoch": 0.05, "grad_norm": 0.13274042544010126, "learning_rate": 0.0009984661708919913, "loss": 1.5591, "step": 505 }, { "epoch": 0.05, "grad_norm": 0.13783829279522242, "learning_rate": 0.0009984525136104215, "loss": 1.4461, "step": 506 }, { "epoch": 0.05, "grad_norm": 0.14150450438484263, "learning_rate": 0.000998438795889766, "loss": 1.4764, "step": 507 }, { "epoch": 0.05, "grad_norm": 0.17191114370741217, "learning_rate": 0.0009984250177316881, "loss": 1.5333, "step": 508 }, { "epoch": 0.05, "grad_norm": 0.16755229866536533, "learning_rate": 0.0009984111791378582, "loss": 1.5334, "step": 509 }, { "epoch": 0.05, "grad_norm": 0.15257417846103322, "learning_rate": 0.0009983972801099548, "loss": 1.4058, "step": 510 }, { "epoch": 0.05, "grad_norm": 0.15478833011778742, "learning_rate": 0.000998383320649663, "loss": 1.5657, "step": 511 }, { "epoch": 0.06, "grad_norm": 0.12880041372659604, "learning_rate": 0.0009983693007586752, "loss": 1.6036, "step": 512 }, { "epoch": 0.06, "grad_norm": 0.14047382096291214, "learning_rate": 0.0009983552204386916, "loss": 1.5327, "step": 513 }, { "epoch": 0.06, "grad_norm": 0.12671081085946684, "learning_rate": 0.0009983410796914197, "loss": 1.5109, "step": 514 }, { "epoch": 0.06, "grad_norm": 0.13315349868182663, "learning_rate": 0.0009983268785185735, "loss": 1.4648, "step": 515 }, { "epoch": 0.06, "grad_norm": 0.12974308859167147, "learning_rate": 0.0009983126169218755, "loss": 1.4275, "step": 516 }, { "epoch": 0.06, "grad_norm": 0.10708978863759408, "learning_rate": 0.0009982982949030546, "loss": 1.4725, "step": 517 }, { "epoch": 0.06, "grad_norm": 0.11232917145575787, "learning_rate": 0.0009982839124638475, "loss": 1.5388, "step": 518 }, { "epoch": 0.06, "grad_norm": 0.13263578439023072, "learning_rate": 0.0009982694696059982, "loss": 1.5457, "step": 519 }, { "epoch": 0.06, "grad_norm": 0.1180388204627775, "learning_rate": 0.0009982549663312581, "loss": 1.4521, "step": 520 }, { "epoch": 0.06, "grad_norm": 0.1069615275333495, "learning_rate": 0.0009982404026413854, "loss": 1.4397, "step": 521 }, { "epoch": 0.06, "grad_norm": 0.11920448256909753, "learning_rate": 0.0009982257785381464, "loss": 1.3893, "step": 522 }, { "epoch": 0.06, "grad_norm": 0.13802661336336072, "learning_rate": 0.0009982110940233138, "loss": 1.3956, "step": 523 }, { "epoch": 0.06, "grad_norm": 0.12589487672919536, "learning_rate": 0.0009981963490986686, "loss": 1.6111, "step": 524 }, { "epoch": 0.06, "grad_norm": 0.1388066167759072, "learning_rate": 0.0009981815437659985, "loss": 1.4834, "step": 525 }, { "epoch": 0.06, "grad_norm": 0.11860878855329496, "learning_rate": 0.0009981666780270989, "loss": 1.5376, "step": 526 }, { "epoch": 0.06, "grad_norm": 0.1291593285336083, "learning_rate": 0.000998151751883772, "loss": 1.5099, "step": 527 }, { "epoch": 0.06, "grad_norm": 0.13856146675132416, "learning_rate": 0.0009981367653378278, "loss": 1.4985, "step": 528 }, { "epoch": 0.06, "grad_norm": 0.1428411422383799, "learning_rate": 0.0009981217183910834, "loss": 1.4989, "step": 529 }, { "epoch": 0.06, "grad_norm": 0.13401950703669366, "learning_rate": 0.0009981066110453633, "loss": 1.5726, "step": 530 }, { "epoch": 0.06, "grad_norm": 0.11238669635358409, "learning_rate": 0.0009980914433024997, "loss": 1.4765, "step": 531 }, { "epoch": 0.06, "grad_norm": 0.14635913263213093, "learning_rate": 0.0009980762151643313, "loss": 1.5009, "step": 532 }, { "epoch": 0.06, "grad_norm": 0.12077582640456455, "learning_rate": 0.0009980609266327044, "loss": 1.402, "step": 533 }, { "epoch": 0.06, "grad_norm": 0.13303350194920682, "learning_rate": 0.0009980455777094733, "loss": 1.6252, "step": 534 }, { "epoch": 0.06, "grad_norm": 0.13303747826576684, "learning_rate": 0.0009980301683964988, "loss": 1.5057, "step": 535 }, { "epoch": 0.06, "grad_norm": 0.14834784322318495, "learning_rate": 0.0009980146986956495, "loss": 1.4399, "step": 536 }, { "epoch": 0.06, "grad_norm": 0.1385305315636288, "learning_rate": 0.000997999168608801, "loss": 1.5656, "step": 537 }, { "epoch": 0.06, "grad_norm": 0.1264521765532537, "learning_rate": 0.0009979835781378367, "loss": 1.4, "step": 538 }, { "epoch": 0.06, "grad_norm": 0.15363887930073927, "learning_rate": 0.0009979679272846462, "loss": 1.5321, "step": 539 }, { "epoch": 0.06, "grad_norm": 0.12205084836277039, "learning_rate": 0.0009979522160511282, "loss": 1.4562, "step": 540 }, { "epoch": 0.06, "grad_norm": 0.11622818650530785, "learning_rate": 0.000997936444439187, "loss": 1.4356, "step": 541 }, { "epoch": 0.06, "grad_norm": 0.12491589190127463, "learning_rate": 0.0009979206124507355, "loss": 1.3947, "step": 542 }, { "epoch": 0.06, "grad_norm": 0.12922947991275946, "learning_rate": 0.0009979047200876932, "loss": 1.49, "step": 543 }, { "epoch": 0.06, "grad_norm": 0.13489214169177516, "learning_rate": 0.000997888767351987, "loss": 1.5153, "step": 544 }, { "epoch": 0.06, "grad_norm": 0.12878593154366547, "learning_rate": 0.0009978727542455511, "loss": 1.4892, "step": 545 }, { "epoch": 0.06, "grad_norm": 0.13996023103395688, "learning_rate": 0.0009978566807703274, "loss": 1.5035, "step": 546 }, { "epoch": 0.06, "grad_norm": 0.12336944314011723, "learning_rate": 0.0009978405469282647, "loss": 1.3817, "step": 547 }, { "epoch": 0.06, "grad_norm": 0.1420723471733146, "learning_rate": 0.0009978243527213196, "loss": 1.5272, "step": 548 }, { "epoch": 0.06, "grad_norm": 0.14789430461564015, "learning_rate": 0.0009978080981514553, "loss": 1.5767, "step": 549 }, { "epoch": 0.06, "grad_norm": 0.1267611684634138, "learning_rate": 0.0009977917832206431, "loss": 1.4002, "step": 550 }, { "epoch": 0.06, "grad_norm": 0.13572961076613538, "learning_rate": 0.0009977754079308608, "loss": 1.3569, "step": 551 }, { "epoch": 0.06, "grad_norm": 0.12869792779865574, "learning_rate": 0.0009977589722840942, "loss": 1.4989, "step": 552 }, { "epoch": 0.06, "grad_norm": 0.12884701104585888, "learning_rate": 0.0009977424762823363, "loss": 1.4594, "step": 553 }, { "epoch": 0.06, "grad_norm": 0.1496458530775414, "learning_rate": 0.000997725919927587, "loss": 1.6697, "step": 554 }, { "epoch": 0.06, "grad_norm": 0.16089313194101512, "learning_rate": 0.0009977093032218544, "loss": 1.4965, "step": 555 }, { "epoch": 0.06, "grad_norm": 0.12898716410042255, "learning_rate": 0.0009976926261671523, "loss": 1.5562, "step": 556 }, { "epoch": 0.06, "grad_norm": 0.13909417311407565, "learning_rate": 0.000997675888765504, "loss": 1.4994, "step": 557 }, { "epoch": 0.06, "grad_norm": 0.1364769705088755, "learning_rate": 0.0009976590910189382, "loss": 1.6546, "step": 558 }, { "epoch": 0.06, "grad_norm": 0.1407938612275754, "learning_rate": 0.0009976422329294919, "loss": 1.6437, "step": 559 }, { "epoch": 0.06, "grad_norm": 0.12342258081030302, "learning_rate": 0.0009976253144992093, "loss": 1.4347, "step": 560 }, { "epoch": 0.06, "grad_norm": 0.11970500471927006, "learning_rate": 0.0009976083357301417, "loss": 1.2459, "step": 561 }, { "epoch": 0.06, "grad_norm": 0.11644855616958355, "learning_rate": 0.0009975912966243478, "loss": 1.4681, "step": 562 }, { "epoch": 0.06, "grad_norm": 0.1399132278939777, "learning_rate": 0.0009975741971838938, "loss": 1.4792, "step": 563 }, { "epoch": 0.06, "grad_norm": 0.12385326071720139, "learning_rate": 0.000997557037410853, "loss": 1.4996, "step": 564 }, { "epoch": 0.06, "grad_norm": 0.13958038993004748, "learning_rate": 0.0009975398173073062, "loss": 1.5275, "step": 565 }, { "epoch": 0.06, "grad_norm": 0.11635118724721759, "learning_rate": 0.000997522536875341, "loss": 1.5998, "step": 566 }, { "epoch": 0.06, "grad_norm": 0.1135776748032988, "learning_rate": 0.0009975051961170532, "loss": 1.4715, "step": 567 }, { "epoch": 0.06, "grad_norm": 0.14026212563791537, "learning_rate": 0.0009974877950345452, "loss": 1.5512, "step": 568 }, { "epoch": 0.06, "grad_norm": 0.13372265079333248, "learning_rate": 0.000997470333629927, "loss": 1.5538, "step": 569 }, { "epoch": 0.06, "grad_norm": 0.09829929826672491, "learning_rate": 0.0009974528119053156, "loss": 1.404, "step": 570 }, { "epoch": 0.06, "grad_norm": 0.1400205748949228, "learning_rate": 0.0009974352298628359, "loss": 1.4348, "step": 571 }, { "epoch": 0.06, "grad_norm": 0.12816133710759073, "learning_rate": 0.0009974175875046196, "loss": 1.5441, "step": 572 }, { "epoch": 0.06, "grad_norm": 0.11826809528085243, "learning_rate": 0.0009973998848328061, "loss": 1.4434, "step": 573 }, { "epoch": 0.06, "grad_norm": 0.11123840372385792, "learning_rate": 0.0009973821218495415, "loss": 1.5314, "step": 574 }, { "epoch": 0.06, "grad_norm": 0.1042235432756459, "learning_rate": 0.0009973642985569803, "loss": 1.5121, "step": 575 }, { "epoch": 0.06, "grad_norm": 0.11749153433901713, "learning_rate": 0.0009973464149572828, "loss": 1.5447, "step": 576 }, { "epoch": 0.06, "grad_norm": 0.15188595873537555, "learning_rate": 0.000997328471052618, "loss": 1.5764, "step": 577 }, { "epoch": 0.06, "grad_norm": 0.11940122268611729, "learning_rate": 0.0009973104668451617, "loss": 1.4156, "step": 578 }, { "epoch": 0.06, "grad_norm": 0.1230749446399393, "learning_rate": 0.0009972924023370967, "loss": 1.5258, "step": 579 }, { "epoch": 0.06, "grad_norm": 0.11807230862866323, "learning_rate": 0.0009972742775306133, "loss": 1.5498, "step": 580 }, { "epoch": 0.06, "grad_norm": 0.12698953546566902, "learning_rate": 0.0009972560924279097, "loss": 1.5633, "step": 581 }, { "epoch": 0.06, "grad_norm": 0.12048242833327667, "learning_rate": 0.0009972378470311904, "loss": 1.4828, "step": 582 }, { "epoch": 0.06, "grad_norm": 0.12821890991850848, "learning_rate": 0.0009972195413426679, "loss": 1.3623, "step": 583 }, { "epoch": 0.06, "grad_norm": 0.1364893555564914, "learning_rate": 0.000997201175364562, "loss": 1.4673, "step": 584 }, { "epoch": 0.06, "grad_norm": 0.10110545251686494, "learning_rate": 0.0009971827490990993, "loss": 1.5114, "step": 585 }, { "epoch": 0.06, "grad_norm": 0.12792269127038894, "learning_rate": 0.0009971642625485144, "loss": 1.3687, "step": 586 }, { "epoch": 0.06, "grad_norm": 0.11988262596625479, "learning_rate": 0.0009971457157150485, "loss": 1.4693, "step": 587 }, { "epoch": 0.06, "grad_norm": 0.14192927684022144, "learning_rate": 0.0009971271086009507, "loss": 1.5541, "step": 588 }, { "epoch": 0.06, "grad_norm": 0.1335038186372788, "learning_rate": 0.0009971084412084771, "loss": 1.3357, "step": 589 }, { "epoch": 0.06, "grad_norm": 0.11027670743622948, "learning_rate": 0.0009970897135398913, "loss": 1.4499, "step": 590 }, { "epoch": 0.06, "grad_norm": 0.10736693798136951, "learning_rate": 0.000997070925597464, "loss": 1.3992, "step": 591 }, { "epoch": 0.06, "grad_norm": 0.10885656905907487, "learning_rate": 0.0009970520773834733, "loss": 1.4657, "step": 592 }, { "epoch": 0.06, "grad_norm": 0.09049890929626538, "learning_rate": 0.0009970331689002046, "loss": 1.4588, "step": 593 }, { "epoch": 0.06, "grad_norm": 0.10742491587756407, "learning_rate": 0.0009970142001499505, "loss": 1.3222, "step": 594 }, { "epoch": 0.06, "grad_norm": 0.1267830112432258, "learning_rate": 0.0009969951711350114, "loss": 1.5056, "step": 595 }, { "epoch": 0.06, "grad_norm": 0.11243589034871186, "learning_rate": 0.0009969760818576941, "loss": 1.4724, "step": 596 }, { "epoch": 0.06, "grad_norm": 0.1280646485857755, "learning_rate": 0.0009969569323203138, "loss": 1.5029, "step": 597 }, { "epoch": 0.06, "grad_norm": 0.13283166676950728, "learning_rate": 0.000996937722525192, "loss": 1.5652, "step": 598 }, { "epoch": 0.06, "grad_norm": 0.11385817819290642, "learning_rate": 0.0009969184524746585, "loss": 1.3798, "step": 599 }, { "epoch": 0.06, "grad_norm": 0.1104480250063874, "learning_rate": 0.000996899122171049, "loss": 1.3786, "step": 600 }, { "epoch": 0.06, "grad_norm": 0.1153007090104645, "learning_rate": 0.0009968797316167082, "loss": 1.397, "step": 601 }, { "epoch": 0.06, "grad_norm": 0.12277337551873949, "learning_rate": 0.0009968602808139869, "loss": 1.6086, "step": 602 }, { "epoch": 0.06, "grad_norm": 0.12101584666596928, "learning_rate": 0.0009968407697652434, "loss": 1.4888, "step": 603 }, { "epoch": 0.06, "grad_norm": 0.15500860601806254, "learning_rate": 0.000996821198472844, "loss": 1.5664, "step": 604 }, { "epoch": 0.07, "grad_norm": 0.1178828194892715, "learning_rate": 0.0009968015669391613, "loss": 1.5685, "step": 605 }, { "epoch": 0.07, "grad_norm": 0.12335417800873578, "learning_rate": 0.000996781875166576, "loss": 1.5528, "step": 606 }, { "epoch": 0.07, "grad_norm": 0.12233699469045906, "learning_rate": 0.0009967621231574753, "loss": 1.3711, "step": 607 }, { "epoch": 0.07, "grad_norm": 0.11591524885822231, "learning_rate": 0.000996742310914255, "loss": 1.4826, "step": 608 }, { "epoch": 0.07, "grad_norm": 0.14044536601206312, "learning_rate": 0.0009967224384393168, "loss": 1.6158, "step": 609 }, { "epoch": 0.07, "grad_norm": 0.1381027639030021, "learning_rate": 0.0009967025057350705, "loss": 1.4215, "step": 610 }, { "epoch": 0.07, "grad_norm": 0.125010089541578, "learning_rate": 0.000996682512803933, "loss": 1.4867, "step": 611 }, { "epoch": 0.07, "grad_norm": 0.12207318684054917, "learning_rate": 0.0009966624596483285, "loss": 1.3042, "step": 612 }, { "epoch": 0.07, "grad_norm": 0.10878370098894088, "learning_rate": 0.0009966423462706884, "loss": 1.4636, "step": 613 }, { "epoch": 0.07, "grad_norm": 0.13328193000271166, "learning_rate": 0.0009966221726734517, "loss": 1.3897, "step": 614 }, { "epoch": 0.07, "grad_norm": 0.11689483535778034, "learning_rate": 0.0009966019388590644, "loss": 1.6064, "step": 615 }, { "epoch": 0.07, "grad_norm": 0.1301761197932106, "learning_rate": 0.00099658164482998, "loss": 1.6075, "step": 616 }, { "epoch": 0.07, "grad_norm": 0.1346747568866436, "learning_rate": 0.0009965612905886592, "loss": 1.3378, "step": 617 }, { "epoch": 0.07, "grad_norm": 0.12025260657555636, "learning_rate": 0.00099654087613757, "loss": 1.3927, "step": 618 }, { "epoch": 0.07, "grad_norm": 0.14311098764679006, "learning_rate": 0.0009965204014791879, "loss": 1.5443, "step": 619 }, { "epoch": 0.07, "grad_norm": 0.11841687436158872, "learning_rate": 0.0009964998666159952, "loss": 1.538, "step": 620 }, { "epoch": 0.07, "grad_norm": 0.11248266795214713, "learning_rate": 0.000996479271550482, "loss": 1.4871, "step": 621 }, { "epoch": 0.07, "grad_norm": 0.13459548022776915, "learning_rate": 0.0009964586162851455, "loss": 1.3075, "step": 622 }, { "epoch": 0.07, "grad_norm": 0.13005288898830575, "learning_rate": 0.0009964379008224901, "loss": 1.4756, "step": 623 }, { "epoch": 0.07, "grad_norm": 0.12150477440478978, "learning_rate": 0.0009964171251650277, "loss": 1.3786, "step": 624 }, { "epoch": 0.07, "grad_norm": 0.11943429960726923, "learning_rate": 0.0009963962893152778, "loss": 1.4501, "step": 625 }, { "epoch": 0.07, "grad_norm": 0.12382509486585465, "learning_rate": 0.0009963753932757662, "loss": 1.5678, "step": 626 }, { "epoch": 0.07, "grad_norm": 0.15440640002770115, "learning_rate": 0.000996354437049027, "loss": 1.5596, "step": 627 }, { "epoch": 0.07, "grad_norm": 0.11091940708776127, "learning_rate": 0.0009963334206376012, "loss": 1.4617, "step": 628 }, { "epoch": 0.07, "grad_norm": 0.11455971413894141, "learning_rate": 0.0009963123440440368, "loss": 1.3403, "step": 629 }, { "epoch": 0.07, "grad_norm": 0.15444295040707226, "learning_rate": 0.0009962912072708897, "loss": 1.5607, "step": 630 }, { "epoch": 0.07, "grad_norm": 0.1059896049882209, "learning_rate": 0.0009962700103207228, "loss": 1.4402, "step": 631 }, { "epoch": 0.07, "grad_norm": 0.10979744315974159, "learning_rate": 0.0009962487531961063, "loss": 1.509, "step": 632 }, { "epoch": 0.07, "grad_norm": 0.1271977452069575, "learning_rate": 0.0009962274358996178, "loss": 1.5976, "step": 633 }, { "epoch": 0.07, "grad_norm": 0.11723721165067925, "learning_rate": 0.0009962060584338417, "loss": 1.5683, "step": 634 }, { "epoch": 0.07, "grad_norm": 0.11516730530763199, "learning_rate": 0.0009961846208013704, "loss": 1.4661, "step": 635 }, { "epoch": 0.07, "grad_norm": 0.1176706269429394, "learning_rate": 0.0009961631230048032, "loss": 1.4224, "step": 636 }, { "epoch": 0.07, "grad_norm": 0.1072376383600733, "learning_rate": 0.0009961415650467467, "loss": 1.5348, "step": 637 }, { "epoch": 0.07, "grad_norm": 0.12294971323557731, "learning_rate": 0.000996119946929815, "loss": 1.6004, "step": 638 }, { "epoch": 0.07, "grad_norm": 0.11965915531049516, "learning_rate": 0.0009960982686566294, "loss": 1.3978, "step": 639 }, { "epoch": 0.07, "grad_norm": 0.09157392129523097, "learning_rate": 0.0009960765302298184, "loss": 1.4615, "step": 640 }, { "epoch": 0.07, "grad_norm": 0.11716155321247809, "learning_rate": 0.0009960547316520182, "loss": 1.5232, "step": 641 }, { "epoch": 0.07, "grad_norm": 0.1170791482693603, "learning_rate": 0.0009960328729258711, "loss": 1.4715, "step": 642 }, { "epoch": 0.07, "grad_norm": 0.12259710702230697, "learning_rate": 0.0009960109540540284, "loss": 1.5141, "step": 643 }, { "epoch": 0.07, "grad_norm": 0.12107772105788704, "learning_rate": 0.0009959889750391472, "loss": 1.461, "step": 644 }, { "epoch": 0.07, "grad_norm": 0.10557229425276307, "learning_rate": 0.0009959669358838932, "loss": 1.5069, "step": 645 }, { "epoch": 0.07, "grad_norm": 0.09790643592491724, "learning_rate": 0.0009959448365909384, "loss": 1.4338, "step": 646 }, { "epoch": 0.07, "grad_norm": 0.12391777117899554, "learning_rate": 0.0009959226771629622, "loss": 1.4601, "step": 647 }, { "epoch": 0.07, "grad_norm": 0.11025736594706048, "learning_rate": 0.0009959004576026516, "loss": 1.5652, "step": 648 }, { "epoch": 0.07, "grad_norm": 0.10460029117111654, "learning_rate": 0.000995878177912701, "loss": 1.4367, "step": 649 }, { "epoch": 0.07, "grad_norm": 0.10927989562994969, "learning_rate": 0.0009958558380958116, "loss": 1.4751, "step": 650 }, { "epoch": 0.07, "grad_norm": 0.12173129880732504, "learning_rate": 0.0009958334381546927, "loss": 1.5573, "step": 651 }, { "epoch": 0.07, "grad_norm": 0.12838332720662465, "learning_rate": 0.0009958109780920598, "loss": 1.4785, "step": 652 }, { "epoch": 0.07, "grad_norm": 0.12814839246956414, "learning_rate": 0.0009957884579106363, "loss": 1.5043, "step": 653 }, { "epoch": 0.07, "grad_norm": 0.12270162223081243, "learning_rate": 0.0009957658776131536, "loss": 1.6063, "step": 654 }, { "epoch": 0.07, "grad_norm": 0.13428707781101876, "learning_rate": 0.0009957432372023486, "loss": 1.4086, "step": 655 }, { "epoch": 0.07, "grad_norm": 0.13177426626485697, "learning_rate": 0.000995720536680967, "loss": 1.4597, "step": 656 }, { "epoch": 0.07, "grad_norm": 0.12414797214999132, "learning_rate": 0.0009956977760517615, "loss": 1.4479, "step": 657 }, { "epoch": 0.07, "grad_norm": 0.12911162849346897, "learning_rate": 0.0009956749553174918, "loss": 1.6136, "step": 658 }, { "epoch": 0.07, "grad_norm": 0.12716929664557133, "learning_rate": 0.0009956520744809248, "loss": 1.4265, "step": 659 }, { "epoch": 0.07, "grad_norm": 0.1179166264201855, "learning_rate": 0.000995629133544835, "loss": 1.4051, "step": 660 }, { "epoch": 0.07, "grad_norm": 0.14156709119896776, "learning_rate": 0.000995606132512004, "loss": 1.4486, "step": 661 }, { "epoch": 0.07, "grad_norm": 0.14670926950068083, "learning_rate": 0.000995583071385221, "loss": 1.6208, "step": 662 }, { "epoch": 0.07, "grad_norm": 0.10603558357744294, "learning_rate": 0.000995559950167282, "loss": 1.4793, "step": 663 }, { "epoch": 0.07, "grad_norm": 0.1224055075034946, "learning_rate": 0.0009955367688609905, "loss": 1.4483, "step": 664 }, { "epoch": 0.07, "grad_norm": 0.1395496872898413, "learning_rate": 0.0009955135274691573, "loss": 1.5365, "step": 665 }, { "epoch": 0.07, "grad_norm": 0.1079570336167769, "learning_rate": 0.0009954902259946008, "loss": 1.4479, "step": 666 }, { "epoch": 0.07, "grad_norm": 0.13238288662712247, "learning_rate": 0.0009954668644401462, "loss": 1.6067, "step": 667 }, { "epoch": 0.07, "grad_norm": 0.10990400407308812, "learning_rate": 0.0009954434428086259, "loss": 1.4747, "step": 668 }, { "epoch": 0.07, "grad_norm": 0.12106482867993244, "learning_rate": 0.0009954199611028802, "loss": 1.4404, "step": 669 }, { "epoch": 0.07, "grad_norm": 0.10078426132952148, "learning_rate": 0.0009953964193257564, "loss": 1.5387, "step": 670 }, { "epoch": 0.07, "grad_norm": 0.09989641475889491, "learning_rate": 0.0009953728174801088, "loss": 1.2998, "step": 671 }, { "epoch": 0.07, "grad_norm": 0.1222248510617004, "learning_rate": 0.0009953491555687991, "loss": 1.5839, "step": 672 }, { "epoch": 0.07, "grad_norm": 0.10030915780605011, "learning_rate": 0.0009953254335946969, "loss": 1.6116, "step": 673 }, { "epoch": 0.07, "grad_norm": 0.10147470567669492, "learning_rate": 0.000995301651560678, "loss": 1.4606, "step": 674 }, { "epoch": 0.07, "grad_norm": 0.12189077525868597, "learning_rate": 0.0009952778094696262, "loss": 1.5718, "step": 675 }, { "epoch": 0.07, "grad_norm": 0.12389923995874418, "learning_rate": 0.0009952539073244326, "loss": 1.5397, "step": 676 }, { "epoch": 0.07, "grad_norm": 0.1079829259763348, "learning_rate": 0.0009952299451279954, "loss": 1.5822, "step": 677 }, { "epoch": 0.07, "grad_norm": 0.13025108271421548, "learning_rate": 0.00099520592288322, "loss": 1.3588, "step": 678 }, { "epoch": 0.07, "grad_norm": 0.09252175398506655, "learning_rate": 0.0009951818405930194, "loss": 1.3074, "step": 679 }, { "epoch": 0.07, "grad_norm": 0.12190380184039762, "learning_rate": 0.0009951576982603133, "loss": 1.5712, "step": 680 }, { "epoch": 0.07, "grad_norm": 0.12012031597622391, "learning_rate": 0.0009951334958880292, "loss": 1.5322, "step": 681 }, { "epoch": 0.07, "grad_norm": 0.11291624944914505, "learning_rate": 0.0009951092334791016, "loss": 1.4539, "step": 682 }, { "epoch": 0.07, "grad_norm": 0.0931588349385203, "learning_rate": 0.0009950849110364729, "loss": 1.3937, "step": 683 }, { "epoch": 0.07, "grad_norm": 0.1053379448621117, "learning_rate": 0.0009950605285630916, "loss": 1.4337, "step": 684 }, { "epoch": 0.07, "grad_norm": 0.10645922517410703, "learning_rate": 0.0009950360860619147, "loss": 1.3934, "step": 685 }, { "epoch": 0.07, "grad_norm": 0.10572018719307143, "learning_rate": 0.0009950115835359054, "loss": 1.4284, "step": 686 }, { "epoch": 0.07, "grad_norm": 0.09603500722299851, "learning_rate": 0.0009949870209880354, "loss": 1.4436, "step": 687 }, { "epoch": 0.07, "grad_norm": 0.1171822427524059, "learning_rate": 0.0009949623984212824, "loss": 1.4438, "step": 688 }, { "epoch": 0.07, "grad_norm": 0.10775144263060174, "learning_rate": 0.0009949377158386323, "loss": 1.5625, "step": 689 }, { "epoch": 0.07, "grad_norm": 0.11412434036504529, "learning_rate": 0.0009949129732430778, "loss": 1.501, "step": 690 }, { "epoch": 0.07, "grad_norm": 0.10324234472443945, "learning_rate": 0.0009948881706376192, "loss": 1.6314, "step": 691 }, { "epoch": 0.07, "grad_norm": 0.12233091194141885, "learning_rate": 0.0009948633080252636, "loss": 1.5398, "step": 692 }, { "epoch": 0.07, "grad_norm": 0.10981153329077435, "learning_rate": 0.000994838385409026, "loss": 1.5315, "step": 693 }, { "epoch": 0.07, "grad_norm": 0.11124117283144365, "learning_rate": 0.000994813402791928, "loss": 1.468, "step": 694 }, { "epoch": 0.07, "grad_norm": 0.10859983867961821, "learning_rate": 0.000994788360176999, "loss": 1.4983, "step": 695 }, { "epoch": 0.07, "grad_norm": 0.10679836737127918, "learning_rate": 0.0009947632575672757, "loss": 1.509, "step": 696 }, { "epoch": 0.07, "grad_norm": 0.1452761412325607, "learning_rate": 0.0009947380949658017, "loss": 1.5244, "step": 697 }, { "epoch": 0.08, "grad_norm": 0.10376320328347177, "learning_rate": 0.0009947128723756281, "loss": 1.4568, "step": 698 }, { "epoch": 0.08, "grad_norm": 0.13737925652052532, "learning_rate": 0.0009946875897998131, "loss": 1.5241, "step": 699 }, { "epoch": 0.08, "grad_norm": 0.12421770725172457, "learning_rate": 0.0009946622472414223, "loss": 1.5322, "step": 700 }, { "epoch": 0.08, "grad_norm": 0.12125159942786252, "learning_rate": 0.000994636844703529, "loss": 1.3998, "step": 701 }, { "epoch": 0.08, "grad_norm": 0.13120907346940802, "learning_rate": 0.0009946113821892128, "loss": 1.4633, "step": 702 }, { "epoch": 0.08, "grad_norm": 0.15391223685243563, "learning_rate": 0.0009945858597015613, "loss": 1.4451, "step": 703 }, { "epoch": 0.08, "grad_norm": 0.1360647191323169, "learning_rate": 0.0009945602772436692, "loss": 1.4341, "step": 704 }, { "epoch": 0.08, "grad_norm": 0.10529263459373553, "learning_rate": 0.0009945346348186385, "loss": 1.4727, "step": 705 }, { "epoch": 0.08, "grad_norm": 0.11431211623838632, "learning_rate": 0.0009945089324295785, "loss": 1.4321, "step": 706 }, { "epoch": 0.08, "grad_norm": 0.13598904701131048, "learning_rate": 0.0009944831700796053, "loss": 1.5741, "step": 707 }, { "epoch": 0.08, "grad_norm": 0.1355862716894886, "learning_rate": 0.0009944573477718435, "loss": 1.4294, "step": 708 }, { "epoch": 0.08, "grad_norm": 0.12494946367200017, "learning_rate": 0.0009944314655094232, "loss": 1.4831, "step": 709 }, { "epoch": 0.08, "grad_norm": 0.11809725685709474, "learning_rate": 0.0009944055232954832, "loss": 1.4706, "step": 710 }, { "epoch": 0.08, "grad_norm": 0.1198085117849122, "learning_rate": 0.0009943795211331693, "loss": 1.579, "step": 711 }, { "epoch": 0.08, "grad_norm": 0.13247770297281755, "learning_rate": 0.0009943534590256338, "loss": 1.5206, "step": 712 }, { "epoch": 0.08, "grad_norm": 0.11403899759015178, "learning_rate": 0.000994327336976037, "loss": 1.5245, "step": 713 }, { "epoch": 0.08, "grad_norm": 0.1273405472522352, "learning_rate": 0.0009943011549875466, "loss": 1.4184, "step": 714 }, { "epoch": 0.08, "grad_norm": 0.1358059212062907, "learning_rate": 0.000994274913063337, "loss": 1.5701, "step": 715 }, { "epoch": 0.08, "grad_norm": 0.10773623159909808, "learning_rate": 0.0009942486112065902, "loss": 1.5458, "step": 716 }, { "epoch": 0.08, "grad_norm": 0.11792824796737497, "learning_rate": 0.0009942222494204954, "loss": 1.5711, "step": 717 }, { "epoch": 0.08, "grad_norm": 0.11176245858069334, "learning_rate": 0.000994195827708249, "loss": 1.4227, "step": 718 }, { "epoch": 0.08, "grad_norm": 0.11809148806007971, "learning_rate": 0.0009941693460730548, "loss": 1.4618, "step": 719 }, { "epoch": 0.08, "grad_norm": 0.12523379292415296, "learning_rate": 0.0009941428045181235, "loss": 1.4765, "step": 720 }, { "epoch": 0.08, "grad_norm": 0.11416568669285194, "learning_rate": 0.0009941162030466736, "loss": 1.4385, "step": 721 }, { "epoch": 0.08, "grad_norm": 0.11968357238475509, "learning_rate": 0.0009940895416619307, "loss": 1.4373, "step": 722 }, { "epoch": 0.08, "grad_norm": 0.15665080698553804, "learning_rate": 0.0009940628203671275, "loss": 1.5316, "step": 723 }, { "epoch": 0.08, "grad_norm": 0.10064658632776849, "learning_rate": 0.0009940360391655042, "loss": 1.5352, "step": 724 }, { "epoch": 0.08, "grad_norm": 0.10251226529146976, "learning_rate": 0.0009940091980603076, "loss": 1.4791, "step": 725 }, { "epoch": 0.08, "grad_norm": 0.10378073760870751, "learning_rate": 0.000993982297054793, "loss": 1.5029, "step": 726 }, { "epoch": 0.08, "grad_norm": 0.10539285260130456, "learning_rate": 0.0009939553361522217, "loss": 1.4734, "step": 727 }, { "epoch": 0.08, "grad_norm": 0.1100171833779912, "learning_rate": 0.0009939283153558628, "loss": 1.353, "step": 728 }, { "epoch": 0.08, "grad_norm": 0.11572299828458692, "learning_rate": 0.000993901234668993, "loss": 1.4491, "step": 729 }, { "epoch": 0.08, "grad_norm": 0.10411765589679892, "learning_rate": 0.0009938740940948958, "loss": 1.4687, "step": 730 }, { "epoch": 0.08, "grad_norm": 0.1213664111538291, "learning_rate": 0.0009938468936368618, "loss": 1.4684, "step": 731 }, { "epoch": 0.08, "grad_norm": 0.11803576700547622, "learning_rate": 0.0009938196332981897, "loss": 1.5678, "step": 732 }, { "epoch": 0.08, "grad_norm": 0.1189255355811123, "learning_rate": 0.0009937923130821844, "loss": 1.4901, "step": 733 }, { "epoch": 0.08, "grad_norm": 0.1140209620430202, "learning_rate": 0.0009937649329921589, "loss": 1.3862, "step": 734 }, { "epoch": 0.08, "grad_norm": 0.12863941643968582, "learning_rate": 0.000993737493031433, "loss": 1.4651, "step": 735 }, { "epoch": 0.08, "grad_norm": 0.11018747815379674, "learning_rate": 0.0009937099932033338, "loss": 1.4941, "step": 736 }, { "epoch": 0.08, "grad_norm": 0.11099330218951524, "learning_rate": 0.000993682433511196, "loss": 1.5038, "step": 737 }, { "epoch": 0.08, "grad_norm": 0.11147184255727886, "learning_rate": 0.000993654813958361, "loss": 1.4039, "step": 738 }, { "epoch": 0.08, "grad_norm": 0.10734348471301973, "learning_rate": 0.000993627134548178, "loss": 1.4491, "step": 739 }, { "epoch": 0.08, "grad_norm": 0.10710535668640941, "learning_rate": 0.0009935993952840032, "loss": 1.39, "step": 740 }, { "epoch": 0.08, "grad_norm": 0.13094386533114225, "learning_rate": 0.0009935715961692, "loss": 1.5734, "step": 741 }, { "epoch": 0.08, "grad_norm": 0.12488034601195075, "learning_rate": 0.000993543737207139, "loss": 1.5388, "step": 742 }, { "epoch": 0.08, "grad_norm": 0.12403785629260212, "learning_rate": 0.0009935158184011985, "loss": 1.2961, "step": 743 }, { "epoch": 0.08, "grad_norm": 0.11652789448505044, "learning_rate": 0.0009934878397547635, "loss": 1.4781, "step": 744 }, { "epoch": 0.08, "grad_norm": 0.11516732956770669, "learning_rate": 0.0009934598012712268, "loss": 1.4467, "step": 745 }, { "epoch": 0.08, "grad_norm": 0.12245986579048859, "learning_rate": 0.000993431702953988, "loss": 1.4653, "step": 746 }, { "epoch": 0.08, "grad_norm": 0.10983200899730307, "learning_rate": 0.0009934035448064538, "loss": 1.5349, "step": 747 }, { "epoch": 0.08, "grad_norm": 0.09167922119798957, "learning_rate": 0.000993375326832039, "loss": 1.4424, "step": 748 }, { "epoch": 0.08, "grad_norm": 0.1069490038423996, "learning_rate": 0.0009933470490341649, "loss": 1.447, "step": 749 }, { "epoch": 0.08, "grad_norm": 0.11574964570738946, "learning_rate": 0.0009933187114162603, "loss": 1.4137, "step": 750 }, { "epoch": 0.08, "grad_norm": 0.13373086874940893, "learning_rate": 0.0009932903139817611, "loss": 1.4324, "step": 751 }, { "epoch": 0.08, "grad_norm": 0.12154833008669642, "learning_rate": 0.0009932618567341108, "loss": 1.4614, "step": 752 }, { "epoch": 0.08, "grad_norm": 0.12863675940870534, "learning_rate": 0.0009932333396767596, "loss": 1.6192, "step": 753 }, { "epoch": 0.08, "grad_norm": 0.12602366816139124, "learning_rate": 0.0009932047628131659, "loss": 1.4902, "step": 754 }, { "epoch": 0.08, "grad_norm": 0.11828428253206899, "learning_rate": 0.000993176126146794, "loss": 1.5057, "step": 755 }, { "epoch": 0.08, "grad_norm": 0.11447959551486472, "learning_rate": 0.0009931474296811169, "loss": 1.4539, "step": 756 }, { "epoch": 0.08, "grad_norm": 0.11652175026432009, "learning_rate": 0.0009931186734196136, "loss": 1.5865, "step": 757 }, { "epoch": 0.08, "grad_norm": 0.15953279308615217, "learning_rate": 0.0009930898573657712, "loss": 1.5144, "step": 758 }, { "epoch": 0.08, "grad_norm": 0.11445487382353405, "learning_rate": 0.0009930609815230838, "loss": 1.5207, "step": 759 }, { "epoch": 0.08, "grad_norm": 0.09523137267014015, "learning_rate": 0.0009930320458950523, "loss": 1.4406, "step": 760 }, { "epoch": 0.08, "grad_norm": 0.11500133371745935, "learning_rate": 0.0009930030504851856, "loss": 1.5745, "step": 761 }, { "epoch": 0.08, "grad_norm": 0.11011675099438246, "learning_rate": 0.0009929739952969994, "loss": 1.4388, "step": 762 }, { "epoch": 0.08, "grad_norm": 0.11046935647517428, "learning_rate": 0.0009929448803340166, "loss": 1.5235, "step": 763 }, { "epoch": 0.08, "grad_norm": 0.11679227263335894, "learning_rate": 0.0009929157055997677, "loss": 1.4837, "step": 764 }, { "epoch": 0.08, "grad_norm": 0.10443444082409932, "learning_rate": 0.0009928864710977902, "loss": 1.5236, "step": 765 }, { "epoch": 0.08, "grad_norm": 0.11647164928646948, "learning_rate": 0.0009928571768316288, "loss": 1.4907, "step": 766 }, { "epoch": 0.08, "grad_norm": 0.10566648958207446, "learning_rate": 0.0009928278228048357, "loss": 1.4907, "step": 767 }, { "epoch": 0.08, "grad_norm": 0.09826190297521414, "learning_rate": 0.00099279840902097, "loss": 1.4079, "step": 768 }, { "epoch": 0.08, "grad_norm": 0.1017904451693403, "learning_rate": 0.0009927689354835981, "loss": 1.4102, "step": 769 }, { "epoch": 0.08, "grad_norm": 0.09993971482709918, "learning_rate": 0.0009927394021962941, "loss": 1.6505, "step": 770 }, { "epoch": 0.08, "grad_norm": 0.10591663082599534, "learning_rate": 0.0009927098091626388, "loss": 1.4566, "step": 771 }, { "epoch": 0.08, "grad_norm": 0.11498870565123132, "learning_rate": 0.0009926801563862205, "loss": 1.6038, "step": 772 }, { "epoch": 0.08, "grad_norm": 0.11407235573294057, "learning_rate": 0.0009926504438706348, "loss": 1.5839, "step": 773 }, { "epoch": 0.08, "grad_norm": 0.12308677147643597, "learning_rate": 0.0009926206716194842, "loss": 1.5441, "step": 774 }, { "epoch": 0.08, "grad_norm": 0.10318309978776286, "learning_rate": 0.0009925908396363789, "loss": 1.5983, "step": 775 }, { "epoch": 0.08, "grad_norm": 0.10265958152079739, "learning_rate": 0.0009925609479249362, "loss": 1.467, "step": 776 }, { "epoch": 0.08, "grad_norm": 0.1041780857939972, "learning_rate": 0.0009925309964887803, "loss": 1.3516, "step": 777 }, { "epoch": 0.08, "grad_norm": 0.1127904379754784, "learning_rate": 0.0009925009853315432, "loss": 1.3867, "step": 778 }, { "epoch": 0.08, "grad_norm": 0.10759215513795695, "learning_rate": 0.0009924709144568636, "loss": 1.4878, "step": 779 }, { "epoch": 0.08, "grad_norm": 0.10524365237305855, "learning_rate": 0.0009924407838683878, "loss": 1.4779, "step": 780 }, { "epoch": 0.08, "grad_norm": 0.09420759561249992, "learning_rate": 0.0009924105935697693, "loss": 1.475, "step": 781 }, { "epoch": 0.08, "grad_norm": 0.10835188218183127, "learning_rate": 0.0009923803435646689, "loss": 1.5956, "step": 782 }, { "epoch": 0.08, "grad_norm": 0.10517000343653546, "learning_rate": 0.0009923500338567541, "loss": 1.4006, "step": 783 }, { "epoch": 0.08, "grad_norm": 0.10588943505093504, "learning_rate": 0.0009923196644497003, "loss": 1.4583, "step": 784 }, { "epoch": 0.08, "grad_norm": 0.09840148056749254, "learning_rate": 0.00099228923534719, "loss": 1.5177, "step": 785 }, { "epoch": 0.08, "grad_norm": 0.087873993730566, "learning_rate": 0.0009922587465529128, "loss": 1.4561, "step": 786 }, { "epoch": 0.08, "grad_norm": 0.10730475413220047, "learning_rate": 0.0009922281980705653, "loss": 1.4842, "step": 787 }, { "epoch": 0.08, "grad_norm": 0.09819906057900965, "learning_rate": 0.0009921975899038521, "loss": 1.443, "step": 788 }, { "epoch": 0.08, "grad_norm": 0.10673849388513212, "learning_rate": 0.0009921669220564842, "loss": 1.4846, "step": 789 }, { "epoch": 0.08, "grad_norm": 0.09585259890060156, "learning_rate": 0.00099213619453218, "loss": 1.4982, "step": 790 }, { "epoch": 0.09, "grad_norm": 0.10886473711799136, "learning_rate": 0.0009921054073346659, "loss": 1.5461, "step": 791 }, { "epoch": 0.09, "grad_norm": 0.11529970337773958, "learning_rate": 0.0009920745604676743, "loss": 1.483, "step": 792 }, { "epoch": 0.09, "grad_norm": 0.1049014082110331, "learning_rate": 0.0009920436539349459, "loss": 1.428, "step": 793 }, { "epoch": 0.09, "grad_norm": 0.11341000507473294, "learning_rate": 0.0009920126877402283, "loss": 1.4127, "step": 794 }, { "epoch": 0.09, "grad_norm": 0.0972092028209167, "learning_rate": 0.000991981661887276, "loss": 1.5701, "step": 795 }, { "epoch": 0.09, "grad_norm": 0.10979701692895823, "learning_rate": 0.0009919505763798509, "loss": 1.4882, "step": 796 }, { "epoch": 0.09, "grad_norm": 0.12347853346750738, "learning_rate": 0.0009919194312217226, "loss": 1.5197, "step": 797 }, { "epoch": 0.09, "grad_norm": 0.12862095545623062, "learning_rate": 0.0009918882264166671, "loss": 1.4307, "step": 798 }, { "epoch": 0.09, "grad_norm": 0.11011053871767552, "learning_rate": 0.0009918569619684686, "loss": 1.5358, "step": 799 }, { "epoch": 0.09, "grad_norm": 0.10923348365641565, "learning_rate": 0.0009918256378809178, "loss": 1.3628, "step": 800 }, { "epoch": 0.09, "grad_norm": 0.10848730542687637, "learning_rate": 0.0009917942541578127, "loss": 1.4926, "step": 801 }, { "epoch": 0.09, "grad_norm": 0.09815586793245867, "learning_rate": 0.0009917628108029588, "loss": 1.4147, "step": 802 }, { "epoch": 0.09, "grad_norm": 0.12150522280091537, "learning_rate": 0.000991731307820169, "loss": 1.3304, "step": 803 }, { "epoch": 0.09, "grad_norm": 0.10961537259089094, "learning_rate": 0.0009916997452132624, "loss": 1.4825, "step": 804 }, { "epoch": 0.09, "grad_norm": 0.11017075427928086, "learning_rate": 0.0009916681229860669, "loss": 1.5064, "step": 805 }, { "epoch": 0.09, "grad_norm": 0.10544774278956268, "learning_rate": 0.0009916364411424164, "loss": 1.4638, "step": 806 }, { "epoch": 0.09, "grad_norm": 0.121938065418309, "learning_rate": 0.0009916046996861522, "loss": 1.4857, "step": 807 }, { "epoch": 0.09, "grad_norm": 0.12330109961033686, "learning_rate": 0.0009915728986211237, "loss": 1.3688, "step": 808 }, { "epoch": 0.09, "grad_norm": 0.1371285563688456, "learning_rate": 0.0009915410379511865, "loss": 1.5859, "step": 809 }, { "epoch": 0.09, "grad_norm": 0.1053850031308369, "learning_rate": 0.0009915091176802035, "loss": 1.3979, "step": 810 }, { "epoch": 0.09, "grad_norm": 0.10649860867690676, "learning_rate": 0.000991477137812046, "loss": 1.5179, "step": 811 }, { "epoch": 0.09, "grad_norm": 0.13922844265421294, "learning_rate": 0.0009914450983505908, "loss": 1.3672, "step": 812 }, { "epoch": 0.09, "grad_norm": 0.1287741960027465, "learning_rate": 0.0009914129992997232, "loss": 1.465, "step": 813 }, { "epoch": 0.09, "grad_norm": 0.10611709028639908, "learning_rate": 0.0009913808406633354, "loss": 1.568, "step": 814 }, { "epoch": 0.09, "grad_norm": 0.10666038719575555, "learning_rate": 0.0009913486224453266, "loss": 1.4613, "step": 815 }, { "epoch": 0.09, "grad_norm": 0.09552780948354576, "learning_rate": 0.0009913163446496032, "loss": 1.3986, "step": 816 }, { "epoch": 0.09, "grad_norm": 0.10458514944773992, "learning_rate": 0.0009912840072800796, "loss": 1.4997, "step": 817 }, { "epoch": 0.09, "grad_norm": 0.10781289805463203, "learning_rate": 0.0009912516103406759, "loss": 1.4163, "step": 818 }, { "epoch": 0.09, "grad_norm": 0.11066849525641222, "learning_rate": 0.0009912191538353212, "loss": 1.3647, "step": 819 }, { "epoch": 0.09, "grad_norm": 0.11037102460141801, "learning_rate": 0.0009911866377679505, "loss": 1.4986, "step": 820 }, { "epoch": 0.09, "grad_norm": 0.10550276331452135, "learning_rate": 0.0009911540621425066, "loss": 1.4142, "step": 821 }, { "epoch": 0.09, "grad_norm": 0.09357877084866194, "learning_rate": 0.0009911214269629394, "loss": 1.4046, "step": 822 }, { "epoch": 0.09, "grad_norm": 0.10821731143868805, "learning_rate": 0.000991088732233206, "loss": 1.4454, "step": 823 }, { "epoch": 0.09, "grad_norm": 0.10885502691894844, "learning_rate": 0.0009910559779572707, "loss": 1.4416, "step": 824 }, { "epoch": 0.09, "grad_norm": 0.09442699393531002, "learning_rate": 0.0009910231641391052, "loss": 1.5394, "step": 825 }, { "epoch": 0.09, "grad_norm": 0.10525545876158422, "learning_rate": 0.0009909902907826883, "loss": 1.546, "step": 826 }, { "epoch": 0.09, "grad_norm": 0.11720992112248244, "learning_rate": 0.000990957357892006, "loss": 1.5321, "step": 827 }, { "epoch": 0.09, "grad_norm": 0.10864424543494355, "learning_rate": 0.0009909243654710514, "loss": 1.4025, "step": 828 }, { "epoch": 0.09, "grad_norm": 0.1117918912175246, "learning_rate": 0.000990891313523825, "loss": 1.692, "step": 829 }, { "epoch": 0.09, "grad_norm": 0.1313258688345413, "learning_rate": 0.0009908582020543345, "loss": 1.5812, "step": 830 }, { "epoch": 0.09, "grad_norm": 0.11780429486456513, "learning_rate": 0.0009908250310665947, "loss": 1.556, "step": 831 }, { "epoch": 0.09, "grad_norm": 0.11012726750810582, "learning_rate": 0.0009907918005646276, "loss": 1.4705, "step": 832 }, { "epoch": 0.09, "grad_norm": 0.10699556897583314, "learning_rate": 0.000990758510552463, "loss": 1.4487, "step": 833 }, { "epoch": 0.09, "grad_norm": 0.1012229677196335, "learning_rate": 0.0009907251610341368, "loss": 1.5403, "step": 834 }, { "epoch": 0.09, "grad_norm": 0.1236371026793818, "learning_rate": 0.0009906917520136932, "loss": 1.4242, "step": 835 }, { "epoch": 0.09, "grad_norm": 0.12682607602434304, "learning_rate": 0.000990658283495183, "loss": 1.4958, "step": 836 }, { "epoch": 0.09, "grad_norm": 0.1035656380844131, "learning_rate": 0.0009906247554826643, "loss": 1.4714, "step": 837 }, { "epoch": 0.09, "grad_norm": 0.10153729327718629, "learning_rate": 0.0009905911679802027, "loss": 1.4798, "step": 838 }, { "epoch": 0.09, "grad_norm": 0.09357476745648424, "learning_rate": 0.0009905575209918705, "loss": 1.5151, "step": 839 }, { "epoch": 0.09, "grad_norm": 0.10110221465858242, "learning_rate": 0.0009905238145217477, "loss": 1.5721, "step": 840 }, { "epoch": 0.09, "grad_norm": 0.09581386231874159, "learning_rate": 0.0009904900485739213, "loss": 1.5697, "step": 841 }, { "epoch": 0.09, "grad_norm": 0.11136596229104052, "learning_rate": 0.0009904562231524857, "loss": 1.5124, "step": 842 }, { "epoch": 0.09, "grad_norm": 0.10563905529316776, "learning_rate": 0.0009904223382615417, "loss": 1.5153, "step": 843 }, { "epoch": 0.09, "grad_norm": 0.105291268018836, "learning_rate": 0.000990388393905199, "loss": 1.5121, "step": 844 }, { "epoch": 0.09, "grad_norm": 0.13132650708524377, "learning_rate": 0.0009903543900875726, "loss": 1.565, "step": 845 }, { "epoch": 0.09, "grad_norm": 0.11176225233742272, "learning_rate": 0.000990320326812786, "loss": 1.5904, "step": 846 }, { "epoch": 0.09, "grad_norm": 0.09906334204646819, "learning_rate": 0.0009902862040849694, "loss": 1.434, "step": 847 }, { "epoch": 0.09, "grad_norm": 0.10512336498042481, "learning_rate": 0.0009902520219082602, "loss": 1.4771, "step": 848 }, { "epoch": 0.09, "grad_norm": 0.10922470793825259, "learning_rate": 0.0009902177802868033, "loss": 1.5708, "step": 849 }, { "epoch": 0.09, "grad_norm": 0.1013422099157301, "learning_rate": 0.0009901834792247503, "loss": 1.543, "step": 850 }, { "epoch": 0.09, "grad_norm": 0.11685308384489977, "learning_rate": 0.0009901491187262609, "loss": 1.5184, "step": 851 }, { "epoch": 0.09, "grad_norm": 0.1068500340383022, "learning_rate": 0.0009901146987955007, "loss": 1.3863, "step": 852 }, { "epoch": 0.09, "grad_norm": 0.10535661820687749, "learning_rate": 0.0009900802194366437, "loss": 1.4796, "step": 853 }, { "epoch": 0.09, "grad_norm": 0.11783230529707762, "learning_rate": 0.0009900456806538707, "loss": 1.3684, "step": 854 }, { "epoch": 0.09, "grad_norm": 0.11384519631304521, "learning_rate": 0.0009900110824513691, "loss": 1.4558, "step": 855 }, { "epoch": 0.09, "grad_norm": 0.10264058083146971, "learning_rate": 0.0009899764248333348, "loss": 1.434, "step": 856 }, { "epoch": 0.09, "grad_norm": 0.11519712314754389, "learning_rate": 0.0009899417078039696, "loss": 1.4387, "step": 857 }, { "epoch": 0.09, "grad_norm": 0.1123372880638399, "learning_rate": 0.0009899069313674832, "loss": 1.4141, "step": 858 }, { "epoch": 0.09, "grad_norm": 0.1097586324100307, "learning_rate": 0.0009898720955280925, "loss": 1.5496, "step": 859 }, { "epoch": 0.09, "grad_norm": 0.11048300266047464, "learning_rate": 0.0009898372002900213, "loss": 1.3956, "step": 860 }, { "epoch": 0.09, "grad_norm": 0.10667135360093603, "learning_rate": 0.000989802245657501, "loss": 1.4431, "step": 861 }, { "epoch": 0.09, "grad_norm": 0.10169688375776761, "learning_rate": 0.0009897672316347696, "loss": 1.4944, "step": 862 }, { "epoch": 0.09, "grad_norm": 0.10805112813479695, "learning_rate": 0.000989732158226073, "loss": 1.5502, "step": 863 }, { "epoch": 0.09, "grad_norm": 0.11383346267574627, "learning_rate": 0.0009896970254356637, "loss": 1.5222, "step": 864 }, { "epoch": 0.09, "grad_norm": 0.10437239928845843, "learning_rate": 0.000989661833267802, "loss": 1.557, "step": 865 }, { "epoch": 0.09, "grad_norm": 0.12880169356158996, "learning_rate": 0.0009896265817267548, "loss": 1.6166, "step": 866 }, { "epoch": 0.09, "grad_norm": 0.12070472393705185, "learning_rate": 0.0009895912708167967, "loss": 1.5124, "step": 867 }, { "epoch": 0.09, "grad_norm": 0.12068593858069836, "learning_rate": 0.000989555900542209, "loss": 1.3678, "step": 868 }, { "epoch": 0.09, "grad_norm": 0.11386637597424645, "learning_rate": 0.0009895204709072806, "loss": 1.3299, "step": 869 }, { "epoch": 0.09, "grad_norm": 0.12175046980633066, "learning_rate": 0.0009894849819163075, "loss": 1.5199, "step": 870 }, { "epoch": 0.09, "grad_norm": 0.10357307243540072, "learning_rate": 0.000989449433573593, "loss": 1.4439, "step": 871 }, { "epoch": 0.09, "grad_norm": 0.11587763922735016, "learning_rate": 0.000989413825883447, "loss": 1.424, "step": 872 }, { "epoch": 0.09, "grad_norm": 0.10361221368654966, "learning_rate": 0.0009893781588501875, "loss": 1.5112, "step": 873 }, { "epoch": 0.09, "grad_norm": 0.1307499271286858, "learning_rate": 0.000989342432478139, "loss": 1.467, "step": 874 }, { "epoch": 0.09, "grad_norm": 0.10556346561030903, "learning_rate": 0.0009893066467716336, "loss": 1.6212, "step": 875 }, { "epoch": 0.09, "grad_norm": 0.13185572299133208, "learning_rate": 0.0009892708017350104, "loss": 1.4886, "step": 876 }, { "epoch": 0.09, "grad_norm": 0.11593000129396568, "learning_rate": 0.0009892348973726157, "loss": 1.4242, "step": 877 }, { "epoch": 0.09, "grad_norm": 0.10263973471205634, "learning_rate": 0.0009891989336888033, "loss": 1.4018, "step": 878 }, { "epoch": 0.09, "grad_norm": 0.09388484545466252, "learning_rate": 0.0009891629106879333, "loss": 1.5406, "step": 879 }, { "epoch": 0.09, "grad_norm": 0.10579398130719597, "learning_rate": 0.0009891268283743742, "loss": 1.4365, "step": 880 }, { "epoch": 0.09, "grad_norm": 0.10205073561963973, "learning_rate": 0.0009890906867525008, "loss": 1.36, "step": 881 }, { "epoch": 0.09, "grad_norm": 0.0993925966084655, "learning_rate": 0.0009890544858266953, "loss": 1.5417, "step": 882 }, { "epoch": 0.09, "grad_norm": 0.12462405391860587, "learning_rate": 0.0009890182256013476, "loss": 1.4766, "step": 883 }, { "epoch": 0.1, "grad_norm": 0.11871289446812809, "learning_rate": 0.0009889819060808541, "loss": 1.3472, "step": 884 }, { "epoch": 0.1, "grad_norm": 0.10788692034328204, "learning_rate": 0.0009889455272696186, "loss": 1.5064, "step": 885 }, { "epoch": 0.1, "grad_norm": 0.10830663754630337, "learning_rate": 0.0009889090891720524, "loss": 1.4562, "step": 886 }, { "epoch": 0.1, "grad_norm": 0.1020377985895941, "learning_rate": 0.0009888725917925735, "loss": 1.4743, "step": 887 }, { "epoch": 0.1, "grad_norm": 0.11402004313995222, "learning_rate": 0.0009888360351356076, "loss": 1.5885, "step": 888 }, { "epoch": 0.1, "grad_norm": 0.10404694253312369, "learning_rate": 0.0009887994192055872, "loss": 1.4231, "step": 889 }, { "epoch": 0.1, "grad_norm": 0.11522811129082161, "learning_rate": 0.0009887627440069518, "loss": 1.4904, "step": 890 }, { "epoch": 0.1, "grad_norm": 0.11120179814086706, "learning_rate": 0.0009887260095441488, "loss": 1.4243, "step": 891 }, { "epoch": 0.1, "grad_norm": 0.11751114935366559, "learning_rate": 0.0009886892158216323, "loss": 1.4856, "step": 892 }, { "epoch": 0.1, "grad_norm": 0.09387632888638782, "learning_rate": 0.0009886523628438635, "loss": 1.4743, "step": 893 }, { "epoch": 0.1, "grad_norm": 0.1089212935769675, "learning_rate": 0.0009886154506153113, "loss": 1.4551, "step": 894 }, { "epoch": 0.1, "grad_norm": 0.10504625831674352, "learning_rate": 0.000988578479140451, "loss": 1.5762, "step": 895 }, { "epoch": 0.1, "grad_norm": 0.11992942665130872, "learning_rate": 0.0009885414484237657, "loss": 1.5388, "step": 896 }, { "epoch": 0.1, "grad_norm": 0.12013797851445274, "learning_rate": 0.0009885043584697457, "loss": 1.3869, "step": 897 }, { "epoch": 0.1, "grad_norm": 0.1055384732184131, "learning_rate": 0.000988467209282888, "loss": 1.4898, "step": 898 }, { "epoch": 0.1, "grad_norm": 0.10227730583423617, "learning_rate": 0.000988430000867697, "loss": 1.4475, "step": 899 }, { "epoch": 0.1, "grad_norm": 0.11746558731715007, "learning_rate": 0.0009883927332286846, "loss": 1.5964, "step": 900 }, { "epoch": 0.1, "grad_norm": 0.11176430850331567, "learning_rate": 0.0009883554063703697, "loss": 1.4568, "step": 901 }, { "epoch": 0.1, "grad_norm": 0.10878283844133003, "learning_rate": 0.0009883180202972781, "loss": 1.5068, "step": 902 }, { "epoch": 0.1, "grad_norm": 0.1241270556094801, "learning_rate": 0.0009882805750139432, "loss": 1.5213, "step": 903 }, { "epoch": 0.1, "grad_norm": 0.11725153365662314, "learning_rate": 0.000988243070524905, "loss": 1.5395, "step": 904 }, { "epoch": 0.1, "grad_norm": 0.11545711343200074, "learning_rate": 0.0009882055068347114, "loss": 1.5103, "step": 905 }, { "epoch": 0.1, "grad_norm": 0.10455006262284446, "learning_rate": 0.000988167883947917, "loss": 1.4557, "step": 906 }, { "epoch": 0.1, "grad_norm": 0.10220852893232839, "learning_rate": 0.0009881302018690833, "loss": 1.4858, "step": 907 }, { "epoch": 0.1, "grad_norm": 0.0935560395575124, "learning_rate": 0.0009880924606027802, "loss": 1.5, "step": 908 }, { "epoch": 0.1, "grad_norm": 0.10430753292676732, "learning_rate": 0.0009880546601535834, "loss": 1.4719, "step": 909 }, { "epoch": 0.1, "grad_norm": 0.10808182563791133, "learning_rate": 0.0009880168005260766, "loss": 1.4541, "step": 910 }, { "epoch": 0.1, "grad_norm": 0.1250933395522429, "learning_rate": 0.0009879788817248503, "loss": 1.5065, "step": 911 }, { "epoch": 0.1, "grad_norm": 0.1072916390151232, "learning_rate": 0.000987940903754502, "loss": 1.456, "step": 912 }, { "epoch": 0.1, "grad_norm": 0.09340424157858447, "learning_rate": 0.0009879028666196373, "loss": 1.5287, "step": 913 }, { "epoch": 0.1, "grad_norm": 0.11313097745359296, "learning_rate": 0.0009878647703248677, "loss": 1.5945, "step": 914 }, { "epoch": 0.1, "grad_norm": 0.09626876557166207, "learning_rate": 0.0009878266148748128, "loss": 1.5226, "step": 915 }, { "epoch": 0.1, "grad_norm": 0.1136594247718088, "learning_rate": 0.000987788400274099, "loss": 1.3699, "step": 916 }, { "epoch": 0.1, "grad_norm": 0.09857785705146646, "learning_rate": 0.0009877501265273603, "loss": 1.4187, "step": 917 }, { "epoch": 0.1, "grad_norm": 0.09942441719629294, "learning_rate": 0.000987711793639237, "loss": 1.5443, "step": 918 }, { "epoch": 0.1, "grad_norm": 0.11513364964116303, "learning_rate": 0.0009876734016143773, "loss": 1.5316, "step": 919 }, { "epoch": 0.1, "grad_norm": 0.10223246559180174, "learning_rate": 0.0009876349504574365, "loss": 1.5269, "step": 920 }, { "epoch": 0.1, "grad_norm": 0.09292632355039271, "learning_rate": 0.000987596440173077, "loss": 1.3696, "step": 921 }, { "epoch": 0.1, "grad_norm": 0.11279813746379812, "learning_rate": 0.0009875578707659676, "loss": 1.4942, "step": 922 }, { "epoch": 0.1, "grad_norm": 0.09788112736253952, "learning_rate": 0.0009875192422407859, "loss": 1.418, "step": 923 }, { "epoch": 0.1, "grad_norm": 0.09913455232850231, "learning_rate": 0.0009874805546022153, "loss": 1.5017, "step": 924 }, { "epoch": 0.1, "grad_norm": 0.10018718019093535, "learning_rate": 0.0009874418078549467, "loss": 1.4764, "step": 925 }, { "epoch": 0.1, "grad_norm": 0.10335561243043875, "learning_rate": 0.0009874030020036787, "loss": 1.4936, "step": 926 }, { "epoch": 0.1, "grad_norm": 0.1014320149701531, "learning_rate": 0.0009873641370531162, "loss": 1.4366, "step": 927 }, { "epoch": 0.1, "grad_norm": 0.11382615640806051, "learning_rate": 0.0009873252130079718, "loss": 1.527, "step": 928 }, { "epoch": 0.1, "grad_norm": 0.09703283161987435, "learning_rate": 0.0009872862298729653, "loss": 1.3459, "step": 929 }, { "epoch": 0.1, "grad_norm": 0.12078726730680549, "learning_rate": 0.0009872471876528235, "loss": 1.4748, "step": 930 }, { "epoch": 0.1, "grad_norm": 0.13779708131618348, "learning_rate": 0.0009872080863522806, "loss": 1.4424, "step": 931 }, { "epoch": 0.1, "grad_norm": 0.09662842113778519, "learning_rate": 0.0009871689259760771, "loss": 1.506, "step": 932 }, { "epoch": 0.1, "grad_norm": 0.10470466535314994, "learning_rate": 0.0009871297065289623, "loss": 1.4008, "step": 933 }, { "epoch": 0.1, "grad_norm": 0.09825370182641988, "learning_rate": 0.000987090428015691, "loss": 1.457, "step": 934 }, { "epoch": 0.1, "grad_norm": 0.10128986233646643, "learning_rate": 0.000987051090441026, "loss": 1.5307, "step": 935 }, { "epoch": 0.1, "grad_norm": 0.10257667805231513, "learning_rate": 0.0009870116938097374, "loss": 1.5253, "step": 936 }, { "epoch": 0.1, "grad_norm": 0.13212406497778306, "learning_rate": 0.0009869722381266016, "loss": 1.4041, "step": 937 }, { "epoch": 0.1, "grad_norm": 0.09740066939177527, "learning_rate": 0.0009869327233964032, "loss": 1.4791, "step": 938 }, { "epoch": 0.1, "grad_norm": 0.09993232471086905, "learning_rate": 0.0009868931496239334, "loss": 1.3653, "step": 939 }, { "epoch": 0.1, "grad_norm": 0.10008462495414401, "learning_rate": 0.0009868535168139907, "loss": 1.3846, "step": 940 }, { "epoch": 0.1, "grad_norm": 0.13685743246388624, "learning_rate": 0.0009868138249713805, "loss": 1.4776, "step": 941 }, { "epoch": 0.1, "grad_norm": 0.09142847020327265, "learning_rate": 0.0009867740741009159, "loss": 1.3617, "step": 942 }, { "epoch": 0.1, "grad_norm": 0.11429730776068008, "learning_rate": 0.0009867342642074165, "loss": 1.4035, "step": 943 }, { "epoch": 0.1, "grad_norm": 0.12908152202529422, "learning_rate": 0.0009866943952957096, "loss": 1.4352, "step": 944 }, { "epoch": 0.1, "grad_norm": 0.12166464155376931, "learning_rate": 0.0009866544673706294, "loss": 1.3091, "step": 945 }, { "epoch": 0.1, "grad_norm": 0.11394285539064337, "learning_rate": 0.0009866144804370172, "loss": 1.418, "step": 946 }, { "epoch": 0.1, "grad_norm": 0.11662230091559467, "learning_rate": 0.0009865744344997216, "loss": 1.4271, "step": 947 }, { "epoch": 0.1, "grad_norm": 0.10758530149575866, "learning_rate": 0.0009865343295635985, "loss": 1.4356, "step": 948 }, { "epoch": 0.1, "grad_norm": 0.10608808920552162, "learning_rate": 0.0009864941656335105, "loss": 1.4282, "step": 949 }, { "epoch": 0.1, "grad_norm": 0.10309934585985372, "learning_rate": 0.0009864539427143278, "loss": 1.514, "step": 950 }, { "epoch": 0.1, "grad_norm": 0.10917924921838347, "learning_rate": 0.0009864136608109272, "loss": 1.4121, "step": 951 }, { "epoch": 0.1, "grad_norm": 0.11296901752973143, "learning_rate": 0.0009863733199281938, "loss": 1.4354, "step": 952 }, { "epoch": 0.1, "grad_norm": 0.12866370183838677, "learning_rate": 0.0009863329200710182, "loss": 1.5468, "step": 953 }, { "epoch": 0.1, "grad_norm": 0.1286966396951818, "learning_rate": 0.0009862924612442994, "loss": 1.4508, "step": 954 }, { "epoch": 0.1, "grad_norm": 0.13960278705237253, "learning_rate": 0.0009862519434529434, "loss": 1.538, "step": 955 }, { "epoch": 0.1, "grad_norm": 0.11652432188490669, "learning_rate": 0.0009862113667018627, "loss": 1.5395, "step": 956 }, { "epoch": 0.1, "grad_norm": 0.10072364916856659, "learning_rate": 0.0009861707309959777, "loss": 1.4169, "step": 957 }, { "epoch": 0.1, "grad_norm": 0.12757051256843824, "learning_rate": 0.0009861300363402153, "loss": 1.4389, "step": 958 }, { "epoch": 0.1, "grad_norm": 0.111629121012831, "learning_rate": 0.0009860892827395103, "loss": 1.598, "step": 959 }, { "epoch": 0.1, "grad_norm": 0.12919976312893156, "learning_rate": 0.0009860484701988037, "loss": 1.4479, "step": 960 }, { "epoch": 0.1, "grad_norm": 0.11279528892079407, "learning_rate": 0.0009860075987230446, "loss": 1.5771, "step": 961 }, { "epoch": 0.1, "grad_norm": 0.1278424802958141, "learning_rate": 0.0009859666683171885, "loss": 1.5236, "step": 962 }, { "epoch": 0.1, "grad_norm": 0.11501125985783359, "learning_rate": 0.0009859256789861986, "loss": 1.4961, "step": 963 }, { "epoch": 0.1, "grad_norm": 0.09565466997074817, "learning_rate": 0.000985884630735045, "loss": 1.4487, "step": 964 }, { "epoch": 0.1, "grad_norm": 0.09876986226137081, "learning_rate": 0.0009858435235687044, "loss": 1.617, "step": 965 }, { "epoch": 0.1, "grad_norm": 0.10299888552267045, "learning_rate": 0.000985802357492162, "loss": 1.4978, "step": 966 }, { "epoch": 0.1, "grad_norm": 0.11351964792472877, "learning_rate": 0.0009857611325104088, "loss": 1.4704, "step": 967 }, { "epoch": 0.1, "grad_norm": 0.11014860613660067, "learning_rate": 0.0009857198486284435, "loss": 1.5225, "step": 968 }, { "epoch": 0.1, "grad_norm": 0.10712646048648043, "learning_rate": 0.0009856785058512721, "loss": 1.5689, "step": 969 }, { "epoch": 0.1, "grad_norm": 0.12399232842174956, "learning_rate": 0.0009856371041839075, "loss": 1.5708, "step": 970 }, { "epoch": 0.1, "grad_norm": 0.09681065676026035, "learning_rate": 0.00098559564363137, "loss": 1.6005, "step": 971 }, { "epoch": 0.1, "grad_norm": 0.10178011038545998, "learning_rate": 0.0009855541241986863, "loss": 1.512, "step": 972 }, { "epoch": 0.1, "grad_norm": 0.12471617013219558, "learning_rate": 0.000985512545890891, "loss": 1.6852, "step": 973 }, { "epoch": 0.1, "grad_norm": 0.10686030336674802, "learning_rate": 0.000985470908713026, "loss": 1.4567, "step": 974 }, { "epoch": 0.1, "grad_norm": 0.10697882274944155, "learning_rate": 0.0009854292126701397, "loss": 1.5434, "step": 975 }, { "epoch": 0.1, "grad_norm": 0.10325976456318522, "learning_rate": 0.0009853874577672875, "loss": 1.4225, "step": 976 }, { "epoch": 0.11, "grad_norm": 0.09506887103679255, "learning_rate": 0.0009853456440095327, "loss": 1.4139, "step": 977 }, { "epoch": 0.11, "grad_norm": 0.09578804705306913, "learning_rate": 0.0009853037714019454, "loss": 1.4377, "step": 978 }, { "epoch": 0.11, "grad_norm": 0.09398835151976992, "learning_rate": 0.000985261839949603, "loss": 1.5171, "step": 979 }, { "epoch": 0.11, "grad_norm": 0.1143179413310966, "learning_rate": 0.0009852198496575894, "loss": 1.5511, "step": 980 }, { "epoch": 0.11, "grad_norm": 0.10145389270154191, "learning_rate": 0.000985177800530996, "loss": 1.4236, "step": 981 }, { "epoch": 0.11, "grad_norm": 0.08855066022940801, "learning_rate": 0.0009851356925749217, "loss": 1.428, "step": 982 }, { "epoch": 0.11, "grad_norm": 0.10133105738476883, "learning_rate": 0.0009850935257944722, "loss": 1.3685, "step": 983 }, { "epoch": 0.11, "grad_norm": 0.10335410310098274, "learning_rate": 0.0009850513001947604, "loss": 1.464, "step": 984 }, { "epoch": 0.11, "grad_norm": 0.09331951297898144, "learning_rate": 0.0009850090157809061, "loss": 1.4766, "step": 985 }, { "epoch": 0.11, "grad_norm": 0.0851802762504621, "learning_rate": 0.0009849666725580367, "loss": 1.6014, "step": 986 }, { "epoch": 0.11, "grad_norm": 0.09555168236454595, "learning_rate": 0.0009849242705312863, "loss": 1.4576, "step": 987 }, { "epoch": 0.11, "grad_norm": 0.0921061434635937, "learning_rate": 0.000984881809705796, "loss": 1.4993, "step": 988 }, { "epoch": 0.11, "grad_norm": 0.08929323937431682, "learning_rate": 0.000984839290086715, "loss": 1.5275, "step": 989 }, { "epoch": 0.11, "grad_norm": 0.08698430865853823, "learning_rate": 0.0009847967116791985, "loss": 1.4085, "step": 990 }, { "epoch": 0.11, "grad_norm": 0.1106918708421856, "learning_rate": 0.0009847540744884094, "loss": 1.4351, "step": 991 }, { "epoch": 0.11, "grad_norm": 0.1198492958950366, "learning_rate": 0.0009847113785195175, "loss": 1.5471, "step": 992 }, { "epoch": 0.11, "grad_norm": 0.09043846175343777, "learning_rate": 0.0009846686237776998, "loss": 1.3394, "step": 993 }, { "epoch": 0.11, "grad_norm": 0.09081924089189229, "learning_rate": 0.0009846258102681406, "loss": 1.4382, "step": 994 }, { "epoch": 0.11, "grad_norm": 0.09804043302232386, "learning_rate": 0.0009845829379960312, "loss": 1.3546, "step": 995 }, { "epoch": 0.11, "grad_norm": 0.10160497776628699, "learning_rate": 0.00098454000696657, "loss": 1.425, "step": 996 }, { "epoch": 0.11, "grad_norm": 0.09855611576341153, "learning_rate": 0.0009844970171849624, "loss": 1.3847, "step": 997 }, { "epoch": 0.11, "grad_norm": 0.08069941782676368, "learning_rate": 0.000984453968656421, "loss": 1.4353, "step": 998 }, { "epoch": 0.11, "grad_norm": 0.10779230448674895, "learning_rate": 0.0009844108613861662, "loss": 1.5072, "step": 999 }, { "epoch": 0.11, "grad_norm": 0.10385625952467904, "learning_rate": 0.000984367695379424, "loss": 1.4946, "step": 1000 }, { "epoch": 0.11, "grad_norm": 0.10086289488325921, "learning_rate": 0.0009843244706414292, "loss": 1.4217, "step": 1001 }, { "epoch": 0.11, "grad_norm": 0.1119121824309659, "learning_rate": 0.0009842811871774225, "loss": 1.4483, "step": 1002 }, { "epoch": 0.11, "grad_norm": 0.1080440169974913, "learning_rate": 0.000984237844992652, "loss": 1.4309, "step": 1003 }, { "epoch": 0.11, "grad_norm": 0.10987133844288485, "learning_rate": 0.0009841944440923736, "loss": 1.6328, "step": 1004 }, { "epoch": 0.11, "grad_norm": 0.10625514927826236, "learning_rate": 0.0009841509844818496, "loss": 1.4141, "step": 1005 }, { "epoch": 0.11, "grad_norm": 0.10159122308468017, "learning_rate": 0.0009841074661663496, "loss": 1.4201, "step": 1006 }, { "epoch": 0.11, "grad_norm": 0.10218291648959085, "learning_rate": 0.0009840638891511504, "loss": 1.4711, "step": 1007 }, { "epoch": 0.11, "grad_norm": 0.09931046150217804, "learning_rate": 0.0009840202534415357, "loss": 1.5503, "step": 1008 }, { "epoch": 0.11, "grad_norm": 0.10118813706201002, "learning_rate": 0.0009839765590427968, "loss": 1.4787, "step": 1009 }, { "epoch": 0.11, "grad_norm": 0.11350892060283965, "learning_rate": 0.0009839328059602316, "loss": 1.4651, "step": 1010 }, { "epoch": 0.11, "grad_norm": 0.10192194476484162, "learning_rate": 0.000983888994199145, "loss": 1.3939, "step": 1011 }, { "epoch": 0.11, "grad_norm": 0.09806497870255045, "learning_rate": 0.0009838451237648498, "loss": 1.544, "step": 1012 }, { "epoch": 0.11, "grad_norm": 0.1076394976182956, "learning_rate": 0.0009838011946626652, "loss": 1.4081, "step": 1013 }, { "epoch": 0.11, "grad_norm": 0.10085057657545801, "learning_rate": 0.0009837572068979179, "loss": 1.4804, "step": 1014 }, { "epoch": 0.11, "grad_norm": 0.10371985397156486, "learning_rate": 0.0009837131604759416, "loss": 1.6341, "step": 1015 }, { "epoch": 0.11, "grad_norm": 0.11205728480425982, "learning_rate": 0.0009836690554020768, "loss": 1.4994, "step": 1016 }, { "epoch": 0.11, "grad_norm": 0.0986560095523421, "learning_rate": 0.0009836248916816716, "loss": 1.3913, "step": 1017 }, { "epoch": 0.11, "grad_norm": 0.11832071985582042, "learning_rate": 0.000983580669320081, "loss": 1.4542, "step": 1018 }, { "epoch": 0.11, "grad_norm": 0.11660566131058599, "learning_rate": 0.0009835363883226673, "loss": 1.3678, "step": 1019 }, { "epoch": 0.11, "grad_norm": 0.11717444386866575, "learning_rate": 0.0009834920486947994, "loss": 1.5154, "step": 1020 }, { "epoch": 0.11, "grad_norm": 0.11973506149009075, "learning_rate": 0.0009834476504418535, "loss": 1.5199, "step": 1021 }, { "epoch": 0.11, "grad_norm": 0.12117972897901545, "learning_rate": 0.0009834031935692135, "loss": 1.5541, "step": 1022 }, { "epoch": 0.11, "grad_norm": 0.09979961498083265, "learning_rate": 0.0009833586780822697, "loss": 1.4248, "step": 1023 }, { "epoch": 0.11, "grad_norm": 0.08245349674048885, "learning_rate": 0.0009833141039864198, "loss": 1.3094, "step": 1024 }, { "epoch": 0.11, "grad_norm": 0.09588735228913653, "learning_rate": 0.0009832694712870688, "loss": 1.4229, "step": 1025 }, { "epoch": 0.11, "grad_norm": 0.09828741670587042, "learning_rate": 0.000983224779989628, "loss": 1.569, "step": 1026 }, { "epoch": 0.11, "grad_norm": 0.11780202591373197, "learning_rate": 0.0009831800300995166, "loss": 1.418, "step": 1027 }, { "epoch": 0.11, "grad_norm": 0.09290873549896511, "learning_rate": 0.000983135221622161, "loss": 1.4123, "step": 1028 }, { "epoch": 0.11, "grad_norm": 0.10994217265314823, "learning_rate": 0.000983090354562994, "loss": 1.5884, "step": 1029 }, { "epoch": 0.11, "grad_norm": 0.11691382359685173, "learning_rate": 0.0009830454289274562, "loss": 1.4539, "step": 1030 }, { "epoch": 0.11, "grad_norm": 0.1054038050930932, "learning_rate": 0.0009830004447209948, "loss": 1.4914, "step": 1031 }, { "epoch": 0.11, "grad_norm": 0.093228195566531, "learning_rate": 0.0009829554019490643, "loss": 1.6054, "step": 1032 }, { "epoch": 0.11, "grad_norm": 0.09753879636453995, "learning_rate": 0.0009829103006171263, "loss": 1.5552, "step": 1033 }, { "epoch": 0.11, "grad_norm": 0.0990335731096939, "learning_rate": 0.0009828651407306494, "loss": 1.4238, "step": 1034 }, { "epoch": 0.11, "grad_norm": 0.10536520343116573, "learning_rate": 0.0009828199222951097, "loss": 1.5421, "step": 1035 }, { "epoch": 0.11, "grad_norm": 0.10416559849628501, "learning_rate": 0.0009827746453159897, "loss": 1.5409, "step": 1036 }, { "epoch": 0.11, "grad_norm": 0.11005172462656163, "learning_rate": 0.0009827293097987798, "loss": 1.4537, "step": 1037 }, { "epoch": 0.11, "grad_norm": 0.11247632552430167, "learning_rate": 0.0009826839157489767, "loss": 1.5894, "step": 1038 }, { "epoch": 0.11, "grad_norm": 0.10756012516652874, "learning_rate": 0.0009826384631720848, "loss": 1.3542, "step": 1039 }, { "epoch": 0.11, "grad_norm": 0.10824115615017481, "learning_rate": 0.0009825929520736155, "loss": 1.4874, "step": 1040 }, { "epoch": 0.11, "grad_norm": 0.1300208355066187, "learning_rate": 0.0009825473824590866, "loss": 1.4001, "step": 1041 }, { "epoch": 0.11, "grad_norm": 0.12730737924436886, "learning_rate": 0.0009825017543340245, "loss": 1.344, "step": 1042 }, { "epoch": 0.11, "grad_norm": 0.12974938372532713, "learning_rate": 0.000982456067703961, "loss": 1.507, "step": 1043 }, { "epoch": 0.11, "grad_norm": 0.1035502393897516, "learning_rate": 0.0009824103225744359, "loss": 1.5458, "step": 1044 }, { "epoch": 0.11, "grad_norm": 0.1141337981032992, "learning_rate": 0.000982364518950996, "loss": 1.5196, "step": 1045 }, { "epoch": 0.11, "grad_norm": 0.12199297553231467, "learning_rate": 0.0009823186568391955, "loss": 1.5455, "step": 1046 }, { "epoch": 0.11, "grad_norm": 0.11478738447697275, "learning_rate": 0.000982272736244595, "loss": 1.4725, "step": 1047 }, { "epoch": 0.11, "grad_norm": 0.1262147429809612, "learning_rate": 0.0009822267571727623, "loss": 1.3743, "step": 1048 }, { "epoch": 0.11, "grad_norm": 0.10300865953073311, "learning_rate": 0.000982180719629273, "loss": 1.3518, "step": 1049 }, { "epoch": 0.11, "grad_norm": 0.1337500675482, "learning_rate": 0.0009821346236197092, "loss": 1.5529, "step": 1050 }, { "epoch": 0.11, "grad_norm": 0.12392587654224337, "learning_rate": 0.00098208846914966, "loss": 1.3774, "step": 1051 }, { "epoch": 0.11, "grad_norm": 0.15327272162495567, "learning_rate": 0.000982042256224722, "loss": 1.5009, "step": 1052 }, { "epoch": 0.11, "grad_norm": 0.17463158449167768, "learning_rate": 0.0009819959848504985, "loss": 1.3791, "step": 1053 }, { "epoch": 0.11, "grad_norm": 0.10993680189395197, "learning_rate": 0.0009819496550326002, "loss": 1.3499, "step": 1054 }, { "epoch": 0.11, "grad_norm": 0.09452638170222592, "learning_rate": 0.0009819032667766445, "loss": 1.3927, "step": 1055 }, { "epoch": 0.11, "grad_norm": 0.11780194487896538, "learning_rate": 0.0009818568200882566, "loss": 1.5436, "step": 1056 }, { "epoch": 0.11, "grad_norm": 0.12564058560924768, "learning_rate": 0.0009818103149730679, "loss": 1.4736, "step": 1057 }, { "epoch": 0.11, "grad_norm": 0.12738094503349265, "learning_rate": 0.0009817637514367174, "loss": 1.5559, "step": 1058 }, { "epoch": 0.11, "grad_norm": 0.11203298042668797, "learning_rate": 0.0009817171294848514, "loss": 1.5329, "step": 1059 }, { "epoch": 0.11, "grad_norm": 0.09762718006430858, "learning_rate": 0.0009816704491231226, "loss": 1.5596, "step": 1060 }, { "epoch": 0.11, "grad_norm": 0.09700332763521427, "learning_rate": 0.0009816237103571913, "loss": 1.4408, "step": 1061 }, { "epoch": 0.11, "grad_norm": 0.1141120906538291, "learning_rate": 0.0009815769131927246, "loss": 1.5046, "step": 1062 }, { "epoch": 0.11, "grad_norm": 0.09951655888040634, "learning_rate": 0.0009815300576353969, "loss": 1.3907, "step": 1063 }, { "epoch": 0.11, "grad_norm": 0.11335129257836248, "learning_rate": 0.0009814831436908897, "loss": 1.46, "step": 1064 }, { "epoch": 0.11, "grad_norm": 0.12462823502348326, "learning_rate": 0.0009814361713648915, "loss": 1.5528, "step": 1065 }, { "epoch": 0.11, "grad_norm": 0.10207058518187534, "learning_rate": 0.0009813891406630975, "loss": 1.5344, "step": 1066 }, { "epoch": 0.11, "grad_norm": 0.11331609691483102, "learning_rate": 0.0009813420515912108, "loss": 1.3399, "step": 1067 }, { "epoch": 0.11, "grad_norm": 0.11008646442048181, "learning_rate": 0.0009812949041549408, "loss": 1.4344, "step": 1068 }, { "epoch": 0.11, "grad_norm": 0.10103273692277252, "learning_rate": 0.0009812476983600046, "loss": 1.4093, "step": 1069 }, { "epoch": 0.12, "grad_norm": 0.10079447646547779, "learning_rate": 0.0009812004342121257, "loss": 1.4924, "step": 1070 }, { "epoch": 0.12, "grad_norm": 0.10309180507533766, "learning_rate": 0.0009811531117170352, "loss": 1.4854, "step": 1071 }, { "epoch": 0.12, "grad_norm": 0.10701374638267942, "learning_rate": 0.000981105730880471, "loss": 1.5011, "step": 1072 }, { "epoch": 0.12, "grad_norm": 0.10438559121872394, "learning_rate": 0.0009810582917081786, "loss": 1.5557, "step": 1073 }, { "epoch": 0.12, "grad_norm": 0.08928725239161178, "learning_rate": 0.0009810107942059096, "loss": 1.5582, "step": 1074 }, { "epoch": 0.12, "grad_norm": 0.10607406200382605, "learning_rate": 0.0009809632383794237, "loss": 1.4577, "step": 1075 }, { "epoch": 0.12, "grad_norm": 0.10140866770455789, "learning_rate": 0.0009809156242344868, "loss": 1.587, "step": 1076 }, { "epoch": 0.12, "grad_norm": 0.11324183419584681, "learning_rate": 0.0009808679517768727, "loss": 1.3907, "step": 1077 }, { "epoch": 0.12, "grad_norm": 0.09985830953044832, "learning_rate": 0.0009808202210123615, "loss": 1.5259, "step": 1078 }, { "epoch": 0.12, "grad_norm": 0.10726443153559254, "learning_rate": 0.000980772431946741, "loss": 1.5433, "step": 1079 }, { "epoch": 0.12, "grad_norm": 0.09371546019627704, "learning_rate": 0.0009807245845858054, "loss": 1.4472, "step": 1080 }, { "epoch": 0.12, "grad_norm": 0.09099857207405426, "learning_rate": 0.000980676678935357, "loss": 1.5692, "step": 1081 }, { "epoch": 0.12, "grad_norm": 0.1026646049901164, "learning_rate": 0.000980628715001204, "loss": 1.4125, "step": 1082 }, { "epoch": 0.12, "grad_norm": 0.10668880279133305, "learning_rate": 0.000980580692789162, "loss": 1.3309, "step": 1083 }, { "epoch": 0.12, "grad_norm": 0.1119693933886867, "learning_rate": 0.0009805326123050544, "loss": 1.445, "step": 1084 }, { "epoch": 0.12, "grad_norm": 0.11640640857224897, "learning_rate": 0.000980484473554711, "loss": 1.4927, "step": 1085 }, { "epoch": 0.12, "grad_norm": 0.10229689293466915, "learning_rate": 0.0009804362765439688, "loss": 1.4752, "step": 1086 }, { "epoch": 0.12, "grad_norm": 0.10514504374029744, "learning_rate": 0.0009803880212786715, "loss": 1.6104, "step": 1087 }, { "epoch": 0.12, "grad_norm": 0.11079636321731676, "learning_rate": 0.0009803397077646704, "loss": 1.4712, "step": 1088 }, { "epoch": 0.12, "grad_norm": 0.10082080188781722, "learning_rate": 0.000980291336007824, "loss": 1.4789, "step": 1089 }, { "epoch": 0.12, "grad_norm": 0.10675297685692006, "learning_rate": 0.0009802429060139974, "loss": 1.5433, "step": 1090 }, { "epoch": 0.12, "grad_norm": 0.126227264307551, "learning_rate": 0.0009801944177890624, "loss": 1.4064, "step": 1091 }, { "epoch": 0.12, "grad_norm": 0.09845560036111164, "learning_rate": 0.000980145871338899, "loss": 1.4893, "step": 1092 }, { "epoch": 0.12, "grad_norm": 0.1273710597482172, "learning_rate": 0.0009800972666693935, "loss": 1.5066, "step": 1093 }, { "epoch": 0.12, "grad_norm": 0.10197482636295606, "learning_rate": 0.000980048603786439, "loss": 1.4119, "step": 1094 }, { "epoch": 0.12, "grad_norm": 0.09923180377874398, "learning_rate": 0.0009799998826959366, "loss": 1.4974, "step": 1095 }, { "epoch": 0.12, "grad_norm": 0.10551711580597985, "learning_rate": 0.0009799511034037933, "loss": 1.5412, "step": 1096 }, { "epoch": 0.12, "grad_norm": 0.10527622567768133, "learning_rate": 0.0009799022659159242, "loss": 1.5318, "step": 1097 }, { "epoch": 0.12, "grad_norm": 0.09689921924266803, "learning_rate": 0.000979853370238251, "loss": 1.5553, "step": 1098 }, { "epoch": 0.12, "grad_norm": 0.09900786363899416, "learning_rate": 0.0009798044163767023, "loss": 1.3221, "step": 1099 }, { "epoch": 0.12, "grad_norm": 0.11176675418735046, "learning_rate": 0.0009797554043372138, "loss": 1.7212, "step": 1100 }, { "epoch": 0.12, "grad_norm": 0.10447474360809943, "learning_rate": 0.000979706334125729, "loss": 1.4666, "step": 1101 }, { "epoch": 0.12, "grad_norm": 0.0931764807312025, "learning_rate": 0.0009796572057481968, "loss": 1.5391, "step": 1102 }, { "epoch": 0.12, "grad_norm": 0.09105412831210441, "learning_rate": 0.000979608019210575, "loss": 1.4852, "step": 1103 }, { "epoch": 0.12, "grad_norm": 0.11365343481767007, "learning_rate": 0.0009795587745188275, "loss": 1.5142, "step": 1104 }, { "epoch": 0.12, "grad_norm": 0.09720993573412151, "learning_rate": 0.0009795094716789252, "loss": 1.4296, "step": 1105 }, { "epoch": 0.12, "grad_norm": 0.10445259805972958, "learning_rate": 0.0009794601106968466, "loss": 1.4978, "step": 1106 }, { "epoch": 0.12, "grad_norm": 0.08713210273408448, "learning_rate": 0.0009794106915785763, "loss": 1.3949, "step": 1107 }, { "epoch": 0.12, "grad_norm": 0.09792767712858018, "learning_rate": 0.000979361214330107, "loss": 1.3676, "step": 1108 }, { "epoch": 0.12, "grad_norm": 0.10885611133876523, "learning_rate": 0.0009793116789574379, "loss": 1.372, "step": 1109 }, { "epoch": 0.12, "grad_norm": 0.0952070184084006, "learning_rate": 0.0009792620854665753, "loss": 1.475, "step": 1110 }, { "epoch": 0.12, "grad_norm": 0.09550091244632411, "learning_rate": 0.0009792124338635325, "loss": 1.5107, "step": 1111 }, { "epoch": 0.12, "grad_norm": 0.09616490303940983, "learning_rate": 0.00097916272415433, "loss": 1.5391, "step": 1112 }, { "epoch": 0.12, "grad_norm": 0.1050502168365546, "learning_rate": 0.0009791129563449952, "loss": 1.445, "step": 1113 }, { "epoch": 0.12, "grad_norm": 0.09273724100814855, "learning_rate": 0.0009790631304415628, "loss": 1.4769, "step": 1114 }, { "epoch": 0.12, "grad_norm": 0.09836197010370934, "learning_rate": 0.000979013246450074, "loss": 1.3792, "step": 1115 }, { "epoch": 0.12, "grad_norm": 0.10609141191664713, "learning_rate": 0.000978963304376578, "loss": 1.4917, "step": 1116 }, { "epoch": 0.12, "grad_norm": 0.08593793619652067, "learning_rate": 0.00097891330422713, "loss": 1.4464, "step": 1117 }, { "epoch": 0.12, "grad_norm": 0.09539697052300453, "learning_rate": 0.0009788632460077927, "loss": 1.399, "step": 1118 }, { "epoch": 0.12, "grad_norm": 0.10822664643859031, "learning_rate": 0.000978813129724636, "loss": 1.4785, "step": 1119 }, { "epoch": 0.12, "grad_norm": 0.10154289276419569, "learning_rate": 0.0009787629553837367, "loss": 1.4826, "step": 1120 }, { "epoch": 0.12, "grad_norm": 0.10862578787342332, "learning_rate": 0.0009787127229911783, "loss": 1.4277, "step": 1121 }, { "epoch": 0.12, "grad_norm": 0.10981816955085401, "learning_rate": 0.000978662432553052, "loss": 1.3717, "step": 1122 }, { "epoch": 0.12, "grad_norm": 0.1023940491468338, "learning_rate": 0.0009786120840754556, "loss": 1.6124, "step": 1123 }, { "epoch": 0.12, "grad_norm": 0.11702668759170876, "learning_rate": 0.0009785616775644938, "loss": 1.4212, "step": 1124 }, { "epoch": 0.12, "grad_norm": 0.12158573099045404, "learning_rate": 0.000978511213026279, "loss": 1.3692, "step": 1125 }, { "epoch": 0.12, "grad_norm": 0.10410236569465127, "learning_rate": 0.0009784606904669297, "loss": 1.4622, "step": 1126 }, { "epoch": 0.12, "grad_norm": 0.11484842201707575, "learning_rate": 0.0009784101098925723, "loss": 1.5727, "step": 1127 }, { "epoch": 0.12, "grad_norm": 0.10040496777521027, "learning_rate": 0.0009783594713093397, "loss": 1.4933, "step": 1128 }, { "epoch": 0.12, "grad_norm": 0.11033351716891086, "learning_rate": 0.000978308774723372, "loss": 1.5888, "step": 1129 }, { "epoch": 0.12, "grad_norm": 0.12154411199523346, "learning_rate": 0.0009782580201408164, "loss": 1.4452, "step": 1130 }, { "epoch": 0.12, "grad_norm": 0.11269562719555688, "learning_rate": 0.0009782072075678271, "loss": 1.4995, "step": 1131 }, { "epoch": 0.12, "grad_norm": 0.101394103359456, "learning_rate": 0.0009781563370105653, "loss": 1.5348, "step": 1132 }, { "epoch": 0.12, "grad_norm": 0.12337547846107601, "learning_rate": 0.0009781054084751991, "loss": 1.5912, "step": 1133 }, { "epoch": 0.12, "grad_norm": 0.10895999658008346, "learning_rate": 0.0009780544219679039, "loss": 1.5698, "step": 1134 }, { "epoch": 0.12, "grad_norm": 0.10875009724116637, "learning_rate": 0.000978003377494862, "loss": 1.418, "step": 1135 }, { "epoch": 0.12, "grad_norm": 0.08717663220170035, "learning_rate": 0.0009779522750622625, "loss": 1.5179, "step": 1136 }, { "epoch": 0.12, "grad_norm": 0.0901001609158058, "learning_rate": 0.0009779011146763019, "loss": 1.5814, "step": 1137 }, { "epoch": 0.12, "grad_norm": 0.10275116736092334, "learning_rate": 0.0009778498963431837, "loss": 1.4128, "step": 1138 }, { "epoch": 0.12, "grad_norm": 0.09762112462628392, "learning_rate": 0.000977798620069118, "loss": 1.4922, "step": 1139 }, { "epoch": 0.12, "grad_norm": 0.10060723320049322, "learning_rate": 0.0009777472858603226, "loss": 1.5383, "step": 1140 }, { "epoch": 0.12, "grad_norm": 0.1164092627557891, "learning_rate": 0.0009776958937230216, "loss": 1.5062, "step": 1141 }, { "epoch": 0.12, "grad_norm": 0.10534018168294584, "learning_rate": 0.0009776444436634466, "loss": 1.4397, "step": 1142 }, { "epoch": 0.12, "grad_norm": 0.10459737964269737, "learning_rate": 0.0009775929356878362, "loss": 1.4836, "step": 1143 }, { "epoch": 0.12, "grad_norm": 0.1129596993044548, "learning_rate": 0.0009775413698024358, "loss": 1.5252, "step": 1144 }, { "epoch": 0.12, "grad_norm": 0.0977021657766086, "learning_rate": 0.000977489746013498, "loss": 1.3986, "step": 1145 }, { "epoch": 0.12, "grad_norm": 0.10225167190944157, "learning_rate": 0.0009774380643272822, "loss": 1.6317, "step": 1146 }, { "epoch": 0.12, "grad_norm": 0.08850304042119721, "learning_rate": 0.0009773863247500554, "loss": 1.4967, "step": 1147 }, { "epoch": 0.12, "grad_norm": 0.11049263300762383, "learning_rate": 0.0009773345272880906, "loss": 1.5815, "step": 1148 }, { "epoch": 0.12, "grad_norm": 0.09691390291798195, "learning_rate": 0.000977282671947669, "loss": 1.4033, "step": 1149 }, { "epoch": 0.12, "grad_norm": 0.10663292694602702, "learning_rate": 0.000977230758735078, "loss": 1.5099, "step": 1150 }, { "epoch": 0.12, "grad_norm": 0.08971210350781093, "learning_rate": 0.000977178787656612, "loss": 1.4351, "step": 1151 }, { "epoch": 0.12, "grad_norm": 0.12450168045568234, "learning_rate": 0.000977126758718573, "loss": 1.5371, "step": 1152 }, { "epoch": 0.12, "grad_norm": 0.0948433794803075, "learning_rate": 0.0009770746719272696, "loss": 1.4678, "step": 1153 }, { "epoch": 0.12, "grad_norm": 0.10085071740419524, "learning_rate": 0.0009770225272890177, "loss": 1.4806, "step": 1154 }, { "epoch": 0.12, "grad_norm": 0.4484143809430015, "learning_rate": 0.0009769703248101397, "loss": 1.607, "step": 1155 }, { "epoch": 0.12, "grad_norm": 0.15922980601920514, "learning_rate": 0.0009769180644969653, "loss": 1.5416, "step": 1156 }, { "epoch": 0.12, "grad_norm": 0.11107798130456996, "learning_rate": 0.0009768657463558315, "loss": 1.6267, "step": 1157 }, { "epoch": 0.12, "grad_norm": 0.2946033526128502, "learning_rate": 0.0009768133703930819, "loss": 1.4602, "step": 1158 }, { "epoch": 0.12, "grad_norm": 0.15370026664096484, "learning_rate": 0.0009767609366150673, "loss": 1.5199, "step": 1159 }, { "epoch": 0.12, "grad_norm": 0.10375667791821207, "learning_rate": 0.0009767084450281456, "loss": 1.4155, "step": 1160 }, { "epoch": 0.12, "grad_norm": 0.12207675726090313, "learning_rate": 0.0009766558956386814, "loss": 1.3242, "step": 1161 }, { "epoch": 0.12, "grad_norm": 0.22605444819633985, "learning_rate": 0.0009766032884530465, "loss": 1.3588, "step": 1162 }, { "epoch": 0.13, "grad_norm": 0.12489025319914, "learning_rate": 0.00097655062347762, "loss": 1.5016, "step": 1163 }, { "epoch": 0.13, "grad_norm": 0.11854980300003969, "learning_rate": 0.0009764979007187873, "loss": 1.4542, "step": 1164 }, { "epoch": 0.13, "grad_norm": 0.26535399019300804, "learning_rate": 0.0009764451201829414, "loss": 1.5479, "step": 1165 }, { "epoch": 0.13, "grad_norm": 0.4479661893821317, "learning_rate": 0.0009763922818764822, "loss": 1.5013, "step": 1166 }, { "epoch": 0.13, "grad_norm": 0.17293052163018774, "learning_rate": 0.0009763393858058164, "loss": 1.4978, "step": 1167 }, { "epoch": 0.13, "grad_norm": 0.12328773343089187, "learning_rate": 0.0009762864319773579, "loss": 1.4241, "step": 1168 }, { "epoch": 0.13, "grad_norm": 0.10961795118064666, "learning_rate": 0.0009762334203975276, "loss": 1.5303, "step": 1169 }, { "epoch": 0.13, "grad_norm": 0.1175993556438544, "learning_rate": 0.0009761803510727531, "loss": 1.4783, "step": 1170 }, { "epoch": 0.13, "grad_norm": 0.11106899286073688, "learning_rate": 0.0009761272240094695, "loss": 1.458, "step": 1171 }, { "epoch": 0.13, "grad_norm": 0.13369412952321702, "learning_rate": 0.0009760740392141186, "loss": 1.4819, "step": 1172 }, { "epoch": 0.13, "grad_norm": 0.09580765095849292, "learning_rate": 0.0009760207966931489, "loss": 1.4705, "step": 1173 }, { "epoch": 0.13, "grad_norm": 0.11617828359160304, "learning_rate": 0.0009759674964530167, "loss": 1.4023, "step": 1174 }, { "epoch": 0.13, "grad_norm": 0.10434869245175268, "learning_rate": 0.0009759141385001847, "loss": 1.5545, "step": 1175 }, { "epoch": 0.13, "grad_norm": 0.1199786542618325, "learning_rate": 0.0009758607228411225, "loss": 1.5722, "step": 1176 }, { "epoch": 0.13, "grad_norm": 0.0964001059293876, "learning_rate": 0.0009758072494823072, "loss": 1.5165, "step": 1177 }, { "epoch": 0.13, "grad_norm": 0.11184263546413122, "learning_rate": 0.0009757537184302225, "loss": 1.4181, "step": 1178 }, { "epoch": 0.13, "grad_norm": 0.11404339323309447, "learning_rate": 0.0009757001296913593, "loss": 1.5288, "step": 1179 }, { "epoch": 0.13, "grad_norm": 0.11956579528844924, "learning_rate": 0.0009756464832722154, "loss": 1.4549, "step": 1180 }, { "epoch": 0.13, "grad_norm": 0.0978519457773404, "learning_rate": 0.0009755927791792956, "loss": 1.3303, "step": 1181 }, { "epoch": 0.13, "grad_norm": 0.17000533389215636, "learning_rate": 0.0009755390174191117, "loss": 1.4161, "step": 1182 }, { "epoch": 0.13, "grad_norm": 0.07996186824950127, "learning_rate": 0.0009754851979981826, "loss": 1.3548, "step": 1183 }, { "epoch": 0.13, "grad_norm": 0.09044166304385826, "learning_rate": 0.0009754313209230339, "loss": 1.5155, "step": 1184 }, { "epoch": 0.13, "grad_norm": 0.10621964304981515, "learning_rate": 0.0009753773862001985, "loss": 1.6404, "step": 1185 }, { "epoch": 0.13, "grad_norm": 0.08254025272480696, "learning_rate": 0.0009753233938362161, "loss": 1.4376, "step": 1186 }, { "epoch": 0.13, "grad_norm": 0.1106607801453522, "learning_rate": 0.0009752693438376336, "loss": 1.498, "step": 1187 }, { "epoch": 0.13, "grad_norm": 0.09876265807599625, "learning_rate": 0.0009752152362110045, "loss": 1.5122, "step": 1188 }, { "epoch": 0.13, "grad_norm": 0.10039646627078934, "learning_rate": 0.0009751610709628897, "loss": 1.3371, "step": 1189 }, { "epoch": 0.13, "grad_norm": 0.1005714762578938, "learning_rate": 0.000975106848099857, "loss": 1.4913, "step": 1190 }, { "epoch": 0.13, "grad_norm": 0.09409073411849112, "learning_rate": 0.0009750525676284811, "loss": 1.4359, "step": 1191 }, { "epoch": 0.13, "grad_norm": 0.1114939909639379, "learning_rate": 0.0009749982295553436, "loss": 1.4572, "step": 1192 }, { "epoch": 0.13, "grad_norm": 0.11159452971642979, "learning_rate": 0.0009749438338870331, "loss": 1.4944, "step": 1193 }, { "epoch": 0.13, "grad_norm": 0.10054333256733074, "learning_rate": 0.0009748893806301455, "loss": 1.4464, "step": 1194 }, { "epoch": 0.13, "grad_norm": 0.09895279648206912, "learning_rate": 0.0009748348697912832, "loss": 1.509, "step": 1195 }, { "epoch": 0.13, "grad_norm": 0.10061217401043886, "learning_rate": 0.000974780301377056, "loss": 1.4162, "step": 1196 }, { "epoch": 0.13, "grad_norm": 0.10417574356188845, "learning_rate": 0.0009747256753940803, "loss": 1.3959, "step": 1197 }, { "epoch": 0.13, "grad_norm": 0.10853524853515546, "learning_rate": 0.00097467099184898, "loss": 1.3809, "step": 1198 }, { "epoch": 0.13, "grad_norm": 0.10205080178365547, "learning_rate": 0.0009746162507483854, "loss": 1.5636, "step": 1199 }, { "epoch": 0.13, "grad_norm": 0.11428455096975486, "learning_rate": 0.0009745614520989341, "loss": 1.3596, "step": 1200 }, { "epoch": 0.13, "grad_norm": 0.10288778226541649, "learning_rate": 0.0009745065959072708, "loss": 1.5153, "step": 1201 }, { "epoch": 0.13, "grad_norm": 0.13135365823412376, "learning_rate": 0.0009744516821800469, "loss": 1.5387, "step": 1202 }, { "epoch": 0.13, "grad_norm": 0.10102362449138394, "learning_rate": 0.0009743967109239206, "loss": 1.4004, "step": 1203 }, { "epoch": 0.13, "grad_norm": 0.13512092208553644, "learning_rate": 0.0009743416821455577, "loss": 1.5138, "step": 1204 }, { "epoch": 0.13, "grad_norm": 0.1087914935718707, "learning_rate": 0.0009742865958516307, "loss": 1.5475, "step": 1205 }, { "epoch": 0.13, "grad_norm": 0.11166016102754622, "learning_rate": 0.0009742314520488187, "loss": 1.4857, "step": 1206 }, { "epoch": 0.13, "grad_norm": 0.10473240664950298, "learning_rate": 0.0009741762507438083, "loss": 1.4585, "step": 1207 }, { "epoch": 0.13, "grad_norm": 0.09850077005465968, "learning_rate": 0.0009741209919432928, "loss": 1.475, "step": 1208 }, { "epoch": 0.13, "grad_norm": 0.09199217175728568, "learning_rate": 0.0009740656756539723, "loss": 1.5659, "step": 1209 }, { "epoch": 0.13, "grad_norm": 0.09683493951065482, "learning_rate": 0.0009740103018825543, "loss": 1.4967, "step": 1210 }, { "epoch": 0.13, "grad_norm": 0.09121350319498756, "learning_rate": 0.0009739548706357532, "loss": 1.3673, "step": 1211 }, { "epoch": 0.13, "grad_norm": 0.10048743165141734, "learning_rate": 0.0009738993819202901, "loss": 1.5065, "step": 1212 }, { "epoch": 0.13, "grad_norm": 0.09127861055952981, "learning_rate": 0.0009738438357428929, "loss": 1.2944, "step": 1213 }, { "epoch": 0.13, "grad_norm": 0.09363988777597988, "learning_rate": 0.0009737882321102972, "loss": 1.4625, "step": 1214 }, { "epoch": 0.13, "grad_norm": 0.10479854745256931, "learning_rate": 0.000973732571029245, "loss": 1.5235, "step": 1215 }, { "epoch": 0.13, "grad_norm": 0.09677667521333407, "learning_rate": 0.0009736768525064851, "loss": 1.4624, "step": 1216 }, { "epoch": 0.13, "grad_norm": 0.09244710850250462, "learning_rate": 0.0009736210765487741, "loss": 1.5458, "step": 1217 }, { "epoch": 0.13, "grad_norm": 0.10714498510511104, "learning_rate": 0.0009735652431628747, "loss": 1.5138, "step": 1218 }, { "epoch": 0.13, "grad_norm": 0.10730277307980761, "learning_rate": 0.000973509352355557, "loss": 1.3659, "step": 1219 }, { "epoch": 0.13, "grad_norm": 0.11548224356442682, "learning_rate": 0.0009734534041335977, "loss": 1.4155, "step": 1220 }, { "epoch": 0.13, "grad_norm": 0.10400406071826049, "learning_rate": 0.000973397398503781, "loss": 1.4974, "step": 1221 }, { "epoch": 0.13, "grad_norm": 0.10990936584302333, "learning_rate": 0.0009733413354728977, "loss": 1.455, "step": 1222 }, { "epoch": 0.13, "grad_norm": 0.09334736281891305, "learning_rate": 0.0009732852150477456, "loss": 1.4218, "step": 1223 }, { "epoch": 0.13, "grad_norm": 0.10295858393774611, "learning_rate": 0.0009732290372351294, "loss": 1.5732, "step": 1224 }, { "epoch": 0.13, "grad_norm": 0.09349978355010372, "learning_rate": 0.0009731728020418611, "loss": 1.5234, "step": 1225 }, { "epoch": 0.13, "grad_norm": 0.10091993747486702, "learning_rate": 0.0009731165094747593, "loss": 1.4055, "step": 1226 }, { "epoch": 0.13, "grad_norm": 0.1048721686348737, "learning_rate": 0.0009730601595406496, "loss": 1.4747, "step": 1227 }, { "epoch": 0.13, "grad_norm": 0.09189084245289222, "learning_rate": 0.0009730037522463647, "loss": 1.4501, "step": 1228 }, { "epoch": 0.13, "grad_norm": 0.09610235865729654, "learning_rate": 0.0009729472875987439, "loss": 1.4709, "step": 1229 }, { "epoch": 0.13, "grad_norm": 0.09998948369758241, "learning_rate": 0.0009728907656046343, "loss": 1.4859, "step": 1230 }, { "epoch": 0.13, "grad_norm": 0.09704555535405772, "learning_rate": 0.0009728341862708889, "loss": 1.5897, "step": 1231 }, { "epoch": 0.13, "grad_norm": 0.11181733909784186, "learning_rate": 0.0009727775496043685, "loss": 1.4797, "step": 1232 }, { "epoch": 0.13, "grad_norm": 0.1078281133859294, "learning_rate": 0.0009727208556119401, "loss": 1.5068, "step": 1233 }, { "epoch": 0.13, "grad_norm": 0.12417520601082405, "learning_rate": 0.0009726641043004782, "loss": 1.4862, "step": 1234 }, { "epoch": 0.13, "grad_norm": 0.09808494647507954, "learning_rate": 0.0009726072956768643, "loss": 1.4989, "step": 1235 }, { "epoch": 0.13, "grad_norm": 0.11618660624020795, "learning_rate": 0.0009725504297479864, "loss": 1.3851, "step": 1236 }, { "epoch": 0.13, "grad_norm": 0.10189474305066351, "learning_rate": 0.0009724935065207399, "loss": 1.4341, "step": 1237 }, { "epoch": 0.13, "grad_norm": 0.099810201488847, "learning_rate": 0.0009724365260020267, "loss": 1.3454, "step": 1238 }, { "epoch": 0.13, "grad_norm": 0.10607882500756213, "learning_rate": 0.0009723794881987559, "loss": 1.4519, "step": 1239 }, { "epoch": 0.13, "grad_norm": 0.10124419381866068, "learning_rate": 0.0009723223931178438, "loss": 1.6662, "step": 1240 }, { "epoch": 0.13, "grad_norm": 0.08695360420074442, "learning_rate": 0.0009722652407662129, "loss": 1.4175, "step": 1241 }, { "epoch": 0.13, "grad_norm": 0.09549842078403618, "learning_rate": 0.0009722080311507937, "loss": 1.5201, "step": 1242 }, { "epoch": 0.13, "grad_norm": 0.08806136818436658, "learning_rate": 0.0009721507642785226, "loss": 1.4395, "step": 1243 }, { "epoch": 0.13, "grad_norm": 0.09556559977297367, "learning_rate": 0.0009720934401563437, "loss": 1.5403, "step": 1244 }, { "epoch": 0.13, "grad_norm": 0.08343873926288949, "learning_rate": 0.0009720360587912075, "loss": 1.4373, "step": 1245 }, { "epoch": 0.13, "grad_norm": 0.09441068917886046, "learning_rate": 0.0009719786201900719, "loss": 1.5036, "step": 1246 }, { "epoch": 0.13, "grad_norm": 0.09864797097988, "learning_rate": 0.0009719211243599014, "loss": 1.3718, "step": 1247 }, { "epoch": 0.13, "grad_norm": 0.08882461434600883, "learning_rate": 0.0009718635713076676, "loss": 1.473, "step": 1248 }, { "epoch": 0.13, "grad_norm": 0.08388976795441185, "learning_rate": 0.0009718059610403491, "loss": 1.4691, "step": 1249 }, { "epoch": 0.13, "grad_norm": 0.10270330807860907, "learning_rate": 0.0009717482935649312, "loss": 1.4669, "step": 1250 }, { "epoch": 0.13, "grad_norm": 0.096526175780071, "learning_rate": 0.0009716905688884063, "loss": 1.4292, "step": 1251 }, { "epoch": 0.13, "grad_norm": 0.07826161807267731, "learning_rate": 0.0009716327870177739, "loss": 1.4802, "step": 1252 }, { "epoch": 0.13, "grad_norm": 0.07991878427102167, "learning_rate": 0.00097157494796004, "loss": 1.4538, "step": 1253 }, { "epoch": 0.13, "grad_norm": 0.08512603268481847, "learning_rate": 0.000971517051722218, "loss": 1.3659, "step": 1254 }, { "epoch": 0.13, "grad_norm": 0.08429100786699598, "learning_rate": 0.0009714590983113279, "loss": 1.4721, "step": 1255 }, { "epoch": 0.14, "grad_norm": 0.08571040834333599, "learning_rate": 0.0009714010877343966, "loss": 1.5399, "step": 1256 }, { "epoch": 0.14, "grad_norm": 0.08559073310258722, "learning_rate": 0.0009713430199984583, "loss": 1.3743, "step": 1257 }, { "epoch": 0.14, "grad_norm": 0.09653287635713186, "learning_rate": 0.0009712848951105539, "loss": 1.4327, "step": 1258 }, { "epoch": 0.14, "grad_norm": 0.09277545525213768, "learning_rate": 0.0009712267130777312, "loss": 1.4792, "step": 1259 }, { "epoch": 0.14, "grad_norm": 0.08815163163035124, "learning_rate": 0.000971168473907045, "loss": 1.5024, "step": 1260 }, { "epoch": 0.14, "grad_norm": 0.08767532362959778, "learning_rate": 0.0009711101776055569, "loss": 1.4843, "step": 1261 }, { "epoch": 0.14, "grad_norm": 0.09195990998443754, "learning_rate": 0.0009710518241803356, "loss": 1.5151, "step": 1262 }, { "epoch": 0.14, "grad_norm": 0.09294957831798757, "learning_rate": 0.0009709934136384568, "loss": 1.5475, "step": 1263 }, { "epoch": 0.14, "grad_norm": 0.09036887036299439, "learning_rate": 0.0009709349459870027, "loss": 1.3581, "step": 1264 }, { "epoch": 0.14, "grad_norm": 0.10196282506888943, "learning_rate": 0.0009708764212330629, "loss": 1.3326, "step": 1265 }, { "epoch": 0.14, "grad_norm": 0.10355169189765306, "learning_rate": 0.0009708178393837336, "loss": 1.5029, "step": 1266 }, { "epoch": 0.14, "grad_norm": 0.09318762477278592, "learning_rate": 0.0009707592004461182, "loss": 1.5204, "step": 1267 }, { "epoch": 0.14, "grad_norm": 0.08563077439899368, "learning_rate": 0.0009707005044273267, "loss": 1.3752, "step": 1268 }, { "epoch": 0.14, "grad_norm": 0.08600170949975434, "learning_rate": 0.0009706417513344764, "loss": 1.5756, "step": 1269 }, { "epoch": 0.14, "grad_norm": 0.08164799944411805, "learning_rate": 0.0009705829411746911, "loss": 1.3627, "step": 1270 }, { "epoch": 0.14, "grad_norm": 0.07849971938700012, "learning_rate": 0.000970524073955102, "loss": 1.3703, "step": 1271 }, { "epoch": 0.14, "grad_norm": 0.07646692578878106, "learning_rate": 0.0009704651496828466, "loss": 1.4787, "step": 1272 }, { "epoch": 0.14, "grad_norm": 0.09464792546642692, "learning_rate": 0.00097040616836507, "loss": 1.3937, "step": 1273 }, { "epoch": 0.14, "grad_norm": 0.09949300784748961, "learning_rate": 0.0009703471300089236, "loss": 1.5593, "step": 1274 }, { "epoch": 0.14, "grad_norm": 0.08122148070582941, "learning_rate": 0.0009702880346215664, "loss": 1.3546, "step": 1275 }, { "epoch": 0.14, "grad_norm": 0.10318702598898441, "learning_rate": 0.0009702288822101634, "loss": 1.52, "step": 1276 }, { "epoch": 0.14, "grad_norm": 0.11343488674792428, "learning_rate": 0.0009701696727818874, "loss": 1.3261, "step": 1277 }, { "epoch": 0.14, "grad_norm": 0.10000234811603913, "learning_rate": 0.0009701104063439177, "loss": 1.4701, "step": 1278 }, { "epoch": 0.14, "grad_norm": 0.08002664638456945, "learning_rate": 0.0009700510829034404, "loss": 1.4196, "step": 1279 }, { "epoch": 0.14, "grad_norm": 0.08976865563700746, "learning_rate": 0.0009699917024676488, "loss": 1.4879, "step": 1280 }, { "epoch": 0.14, "grad_norm": 0.09486370384243133, "learning_rate": 0.0009699322650437432, "loss": 1.6916, "step": 1281 }, { "epoch": 0.14, "grad_norm": 0.09483802520074949, "learning_rate": 0.00096987277063893, "loss": 1.4527, "step": 1282 }, { "epoch": 0.14, "grad_norm": 0.10092644093150213, "learning_rate": 0.0009698132192604238, "loss": 1.4879, "step": 1283 }, { "epoch": 0.14, "grad_norm": 0.09844581657806153, "learning_rate": 0.0009697536109154449, "loss": 1.4846, "step": 1284 }, { "epoch": 0.14, "grad_norm": 0.09448314390473393, "learning_rate": 0.0009696939456112213, "loss": 1.318, "step": 1285 }, { "epoch": 0.14, "grad_norm": 0.11425995620933611, "learning_rate": 0.0009696342233549873, "loss": 1.3915, "step": 1286 }, { "epoch": 0.14, "grad_norm": 0.10553751796369051, "learning_rate": 0.0009695744441539849, "loss": 1.6516, "step": 1287 }, { "epoch": 0.14, "grad_norm": 0.09672534952850492, "learning_rate": 0.0009695146080154621, "loss": 1.4494, "step": 1288 }, { "epoch": 0.14, "grad_norm": 0.10035478781628406, "learning_rate": 0.0009694547149466745, "loss": 1.5295, "step": 1289 }, { "epoch": 0.14, "grad_norm": 0.08972138904342805, "learning_rate": 0.0009693947649548842, "loss": 1.474, "step": 1290 }, { "epoch": 0.14, "grad_norm": 0.08679630306391951, "learning_rate": 0.0009693347580473604, "loss": 1.4317, "step": 1291 }, { "epoch": 0.14, "grad_norm": 0.10871542085222537, "learning_rate": 0.0009692746942313792, "loss": 1.5484, "step": 1292 }, { "epoch": 0.14, "grad_norm": 0.08603429220157566, "learning_rate": 0.0009692145735142235, "loss": 1.4189, "step": 1293 }, { "epoch": 0.14, "grad_norm": 0.094369401781512, "learning_rate": 0.000969154395903183, "loss": 1.2444, "step": 1294 }, { "epoch": 0.14, "grad_norm": 0.08502135101989325, "learning_rate": 0.0009690941614055546, "loss": 1.4072, "step": 1295 }, { "epoch": 0.14, "grad_norm": 0.09178718049011819, "learning_rate": 0.0009690338700286421, "loss": 1.3757, "step": 1296 }, { "epoch": 0.14, "grad_norm": 0.08929961053282694, "learning_rate": 0.0009689735217797557, "loss": 1.423, "step": 1297 }, { "epoch": 0.14, "grad_norm": 0.0829406241969028, "learning_rate": 0.0009689131166662131, "loss": 1.4598, "step": 1298 }, { "epoch": 0.14, "grad_norm": 0.09831046552987227, "learning_rate": 0.0009688526546953384, "loss": 1.5143, "step": 1299 }, { "epoch": 0.14, "grad_norm": 0.10233533693634345, "learning_rate": 0.0009687921358744629, "loss": 1.5625, "step": 1300 }, { "epoch": 0.14, "grad_norm": 0.12009618350664676, "learning_rate": 0.0009687315602109248, "loss": 1.5813, "step": 1301 }, { "epoch": 0.14, "grad_norm": 0.09579390968775611, "learning_rate": 0.0009686709277120691, "loss": 1.5, "step": 1302 }, { "epoch": 0.14, "grad_norm": 0.09347070874904188, "learning_rate": 0.0009686102383852477, "loss": 1.4762, "step": 1303 }, { "epoch": 0.14, "grad_norm": 0.0974530705160833, "learning_rate": 0.0009685494922378193, "loss": 1.4729, "step": 1304 }, { "epoch": 0.14, "grad_norm": 0.09780466008302127, "learning_rate": 0.0009684886892771497, "loss": 1.5751, "step": 1305 }, { "epoch": 0.14, "grad_norm": 0.10446303843416181, "learning_rate": 0.0009684278295106112, "loss": 1.521, "step": 1306 }, { "epoch": 0.14, "grad_norm": 0.08583740614987581, "learning_rate": 0.0009683669129455836, "loss": 1.4352, "step": 1307 }, { "epoch": 0.14, "grad_norm": 0.09548449846071512, "learning_rate": 0.0009683059395894532, "loss": 1.6236, "step": 1308 }, { "epoch": 0.14, "grad_norm": 0.09317317974015707, "learning_rate": 0.0009682449094496129, "loss": 1.4718, "step": 1309 }, { "epoch": 0.14, "grad_norm": 0.09028344652051286, "learning_rate": 0.0009681838225334632, "loss": 1.5197, "step": 1310 }, { "epoch": 0.14, "grad_norm": 0.1010454477834947, "learning_rate": 0.0009681226788484107, "loss": 1.4355, "step": 1311 }, { "epoch": 0.14, "grad_norm": 0.10063424616961393, "learning_rate": 0.0009680614784018699, "loss": 1.5216, "step": 1312 }, { "epoch": 0.14, "grad_norm": 0.08847368543043868, "learning_rate": 0.000968000221201261, "loss": 1.4489, "step": 1313 }, { "epoch": 0.14, "grad_norm": 0.08917200687658379, "learning_rate": 0.0009679389072540118, "loss": 1.5095, "step": 1314 }, { "epoch": 0.14, "grad_norm": 0.08780084899229014, "learning_rate": 0.0009678775365675569, "loss": 1.2328, "step": 1315 }, { "epoch": 0.14, "grad_norm": 0.09952472380916254, "learning_rate": 0.0009678161091493377, "loss": 1.5445, "step": 1316 }, { "epoch": 0.14, "grad_norm": 0.0922044875770402, "learning_rate": 0.0009677546250068024, "loss": 1.4997, "step": 1317 }, { "epoch": 0.14, "grad_norm": 0.08679058782028819, "learning_rate": 0.0009676930841474063, "loss": 1.4818, "step": 1318 }, { "epoch": 0.14, "grad_norm": 0.09946020728542797, "learning_rate": 0.0009676314865786113, "loss": 1.541, "step": 1319 }, { "epoch": 0.14, "grad_norm": 0.09924599571927359, "learning_rate": 0.0009675698323078864, "loss": 1.4275, "step": 1320 }, { "epoch": 0.14, "grad_norm": 0.09736847437990914, "learning_rate": 0.0009675081213427075, "loss": 1.3534, "step": 1321 }, { "epoch": 0.14, "grad_norm": 0.08981117793178947, "learning_rate": 0.0009674463536905569, "loss": 1.5193, "step": 1322 }, { "epoch": 0.14, "grad_norm": 0.0892544928899613, "learning_rate": 0.0009673845293589245, "loss": 1.4088, "step": 1323 }, { "epoch": 0.14, "grad_norm": 0.09692218740693645, "learning_rate": 0.0009673226483553067, "loss": 1.4853, "step": 1324 }, { "epoch": 0.14, "grad_norm": 0.090931858503463, "learning_rate": 0.0009672607106872065, "loss": 1.5478, "step": 1325 }, { "epoch": 0.14, "grad_norm": 0.08677645972181737, "learning_rate": 0.0009671987163621343, "loss": 1.4351, "step": 1326 }, { "epoch": 0.14, "grad_norm": 0.0965811198069088, "learning_rate": 0.0009671366653876072, "loss": 1.44, "step": 1327 }, { "epoch": 0.14, "grad_norm": 0.10265572360163623, "learning_rate": 0.0009670745577711489, "loss": 1.4758, "step": 1328 }, { "epoch": 0.14, "grad_norm": 0.10813126200323721, "learning_rate": 0.0009670123935202901, "loss": 1.5154, "step": 1329 }, { "epoch": 0.14, "grad_norm": 0.0818442543578839, "learning_rate": 0.0009669501726425686, "loss": 1.5325, "step": 1330 }, { "epoch": 0.14, "grad_norm": 0.08476502614974389, "learning_rate": 0.0009668878951455289, "loss": 1.4566, "step": 1331 }, { "epoch": 0.14, "grad_norm": 0.0929172330057343, "learning_rate": 0.0009668255610367222, "loss": 1.4826, "step": 1332 }, { "epoch": 0.14, "grad_norm": 0.11067141747464249, "learning_rate": 0.0009667631703237069, "loss": 1.4778, "step": 1333 }, { "epoch": 0.14, "grad_norm": 0.09895618231426587, "learning_rate": 0.000966700723014048, "loss": 1.4972, "step": 1334 }, { "epoch": 0.14, "grad_norm": 0.09258309463650612, "learning_rate": 0.0009666382191153175, "loss": 1.5373, "step": 1335 }, { "epoch": 0.14, "grad_norm": 0.08478282889023635, "learning_rate": 0.0009665756586350942, "loss": 1.3624, "step": 1336 }, { "epoch": 0.14, "grad_norm": 0.1029379375379849, "learning_rate": 0.0009665130415809636, "loss": 1.5687, "step": 1337 }, { "epoch": 0.14, "grad_norm": 0.10999399674084075, "learning_rate": 0.0009664503679605186, "loss": 1.4364, "step": 1338 }, { "epoch": 0.14, "grad_norm": 0.1002629659584574, "learning_rate": 0.0009663876377813583, "loss": 1.4827, "step": 1339 }, { "epoch": 0.14, "grad_norm": 0.09189118197800461, "learning_rate": 0.000966324851051089, "loss": 1.5273, "step": 1340 }, { "epoch": 0.14, "grad_norm": 0.08846946549951583, "learning_rate": 0.0009662620077773239, "loss": 1.4333, "step": 1341 }, { "epoch": 0.14, "grad_norm": 0.09374605110911702, "learning_rate": 0.0009661991079676828, "loss": 1.547, "step": 1342 }, { "epoch": 0.14, "grad_norm": 0.08242169059262731, "learning_rate": 0.0009661361516297928, "loss": 1.4459, "step": 1343 }, { "epoch": 0.14, "grad_norm": 0.09832558372243909, "learning_rate": 0.0009660731387712873, "loss": 1.5649, "step": 1344 }, { "epoch": 0.14, "grad_norm": 0.10825358627457048, "learning_rate": 0.0009660100693998071, "loss": 1.4277, "step": 1345 }, { "epoch": 0.14, "grad_norm": 0.09313205123054559, "learning_rate": 0.0009659469435229992, "loss": 1.5624, "step": 1346 }, { "epoch": 0.14, "grad_norm": 0.07998615164356912, "learning_rate": 0.0009658837611485181, "loss": 1.4637, "step": 1347 }, { "epoch": 0.14, "grad_norm": 0.10552045428986864, "learning_rate": 0.0009658205222840249, "loss": 1.454, "step": 1348 }, { "epoch": 0.15, "grad_norm": 0.08907788707383209, "learning_rate": 0.0009657572269371873, "loss": 1.5136, "step": 1349 }, { "epoch": 0.15, "grad_norm": 0.08617641281827475, "learning_rate": 0.0009656938751156802, "loss": 1.4615, "step": 1350 }, { "epoch": 0.15, "grad_norm": 0.09921990951476314, "learning_rate": 0.0009656304668271854, "loss": 1.4423, "step": 1351 }, { "epoch": 0.15, "grad_norm": 0.10024579878753528, "learning_rate": 0.000965567002079391, "loss": 1.4746, "step": 1352 }, { "epoch": 0.15, "grad_norm": 0.09568464334273936, "learning_rate": 0.0009655034808799928, "loss": 1.5115, "step": 1353 }, { "epoch": 0.15, "grad_norm": 0.0948152667533343, "learning_rate": 0.0009654399032366925, "loss": 1.482, "step": 1354 }, { "epoch": 0.15, "grad_norm": 0.09802476493056436, "learning_rate": 0.0009653762691571995, "loss": 1.427, "step": 1355 }, { "epoch": 0.15, "grad_norm": 0.08709415251464686, "learning_rate": 0.0009653125786492294, "loss": 1.4658, "step": 1356 }, { "epoch": 0.15, "grad_norm": 0.08686335295193764, "learning_rate": 0.0009652488317205048, "loss": 1.3828, "step": 1357 }, { "epoch": 0.15, "grad_norm": 0.10936667844927866, "learning_rate": 0.0009651850283787555, "loss": 1.6204, "step": 1358 }, { "epoch": 0.15, "grad_norm": 0.08318505997719156, "learning_rate": 0.000965121168631718, "loss": 1.3626, "step": 1359 }, { "epoch": 0.15, "grad_norm": 0.0981715400028448, "learning_rate": 0.000965057252487135, "loss": 1.4695, "step": 1360 }, { "epoch": 0.15, "grad_norm": 0.0855374725173818, "learning_rate": 0.000964993279952757, "loss": 1.4296, "step": 1361 }, { "epoch": 0.15, "grad_norm": 0.09090872788305977, "learning_rate": 0.0009649292510363407, "loss": 1.4942, "step": 1362 }, { "epoch": 0.15, "grad_norm": 0.08903024982584848, "learning_rate": 0.0009648651657456497, "loss": 1.53, "step": 1363 }, { "epoch": 0.15, "grad_norm": 0.09651472004451829, "learning_rate": 0.0009648010240884549, "loss": 1.4141, "step": 1364 }, { "epoch": 0.15, "grad_norm": 0.0975116971187096, "learning_rate": 0.0009647368260725335, "loss": 1.6606, "step": 1365 }, { "epoch": 0.15, "grad_norm": 0.09109281987857183, "learning_rate": 0.0009646725717056696, "loss": 1.4704, "step": 1366 }, { "epoch": 0.15, "grad_norm": 0.08977013401911238, "learning_rate": 0.0009646082609956546, "loss": 1.4401, "step": 1367 }, { "epoch": 0.15, "grad_norm": 0.09619001203581924, "learning_rate": 0.0009645438939502862, "loss": 1.4719, "step": 1368 }, { "epoch": 0.15, "grad_norm": 0.0920101285961825, "learning_rate": 0.000964479470577369, "loss": 1.4291, "step": 1369 }, { "epoch": 0.15, "grad_norm": 0.10189576673060297, "learning_rate": 0.0009644149908847148, "loss": 1.3505, "step": 1370 }, { "epoch": 0.15, "grad_norm": 0.10141333070010058, "learning_rate": 0.0009643504548801418, "loss": 1.5281, "step": 1371 }, { "epoch": 0.15, "grad_norm": 0.10183136911830679, "learning_rate": 0.0009642858625714753, "loss": 1.617, "step": 1372 }, { "epoch": 0.15, "grad_norm": 0.08938428018230365, "learning_rate": 0.0009642212139665474, "loss": 1.5241, "step": 1373 }, { "epoch": 0.15, "grad_norm": 0.09867932384400704, "learning_rate": 0.0009641565090731968, "loss": 1.4051, "step": 1374 }, { "epoch": 0.15, "grad_norm": 0.08685976647015854, "learning_rate": 0.0009640917478992692, "loss": 1.4531, "step": 1375 }, { "epoch": 0.15, "grad_norm": 0.08388522860676766, "learning_rate": 0.0009640269304526175, "loss": 1.4059, "step": 1376 }, { "epoch": 0.15, "grad_norm": 0.08755849762519224, "learning_rate": 0.0009639620567411005, "loss": 1.4757, "step": 1377 }, { "epoch": 0.15, "grad_norm": 0.08967987306203287, "learning_rate": 0.0009638971267725846, "loss": 1.4917, "step": 1378 }, { "epoch": 0.15, "grad_norm": 0.09337617930404216, "learning_rate": 0.0009638321405549429, "loss": 1.4998, "step": 1379 }, { "epoch": 0.15, "grad_norm": 0.11438394374416985, "learning_rate": 0.0009637670980960549, "loss": 1.3926, "step": 1380 }, { "epoch": 0.15, "grad_norm": 0.08596327108077496, "learning_rate": 0.0009637019994038076, "loss": 1.4768, "step": 1381 }, { "epoch": 0.15, "grad_norm": 0.10081100976304277, "learning_rate": 0.0009636368444860941, "loss": 1.4218, "step": 1382 }, { "epoch": 0.15, "grad_norm": 0.08628745782550797, "learning_rate": 0.0009635716333508149, "loss": 1.5145, "step": 1383 }, { "epoch": 0.15, "grad_norm": 0.09973150834603343, "learning_rate": 0.000963506366005877, "loss": 1.5565, "step": 1384 }, { "epoch": 0.15, "grad_norm": 0.0915294973766297, "learning_rate": 0.0009634410424591941, "loss": 1.5018, "step": 1385 }, { "epoch": 0.15, "grad_norm": 0.09358282880478652, "learning_rate": 0.0009633756627186874, "loss": 1.4947, "step": 1386 }, { "epoch": 0.15, "grad_norm": 0.08854602719419898, "learning_rate": 0.0009633102267922838, "loss": 1.3823, "step": 1387 }, { "epoch": 0.15, "grad_norm": 0.09370252902472906, "learning_rate": 0.0009632447346879181, "loss": 1.372, "step": 1388 }, { "epoch": 0.15, "grad_norm": 0.07704849046364678, "learning_rate": 0.0009631791864135313, "loss": 1.3268, "step": 1389 }, { "epoch": 0.15, "grad_norm": 0.09768848364054729, "learning_rate": 0.0009631135819770711, "loss": 1.4281, "step": 1390 }, { "epoch": 0.15, "grad_norm": 0.10743879271717618, "learning_rate": 0.0009630479213864927, "loss": 1.6314, "step": 1391 }, { "epoch": 0.15, "grad_norm": 0.11841657156720103, "learning_rate": 0.0009629822046497573, "loss": 1.455, "step": 1392 }, { "epoch": 0.15, "grad_norm": 0.0871394521914947, "learning_rate": 0.0009629164317748335, "loss": 1.4534, "step": 1393 }, { "epoch": 0.15, "grad_norm": 0.10329392146592385, "learning_rate": 0.0009628506027696966, "loss": 1.4848, "step": 1394 }, { "epoch": 0.15, "grad_norm": 0.08307046208995565, "learning_rate": 0.0009627847176423282, "loss": 1.4573, "step": 1395 }, { "epoch": 0.15, "grad_norm": 0.10441134569877149, "learning_rate": 0.0009627187764007175, "loss": 1.4169, "step": 1396 }, { "epoch": 0.15, "grad_norm": 0.11076009082556416, "learning_rate": 0.0009626527790528599, "loss": 1.4463, "step": 1397 }, { "epoch": 0.15, "grad_norm": 0.09972177969927494, "learning_rate": 0.0009625867256067577, "loss": 1.4483, "step": 1398 }, { "epoch": 0.15, "grad_norm": 0.10232915510331024, "learning_rate": 0.0009625206160704203, "loss": 1.4843, "step": 1399 }, { "epoch": 0.15, "grad_norm": 0.10033949503613594, "learning_rate": 0.0009624544504518636, "loss": 1.4372, "step": 1400 }, { "epoch": 0.15, "grad_norm": 0.10470825877339096, "learning_rate": 0.0009623882287591106, "loss": 1.4166, "step": 1401 }, { "epoch": 0.15, "grad_norm": 0.09177089511218502, "learning_rate": 0.0009623219510001906, "loss": 1.1729, "step": 1402 }, { "epoch": 0.15, "grad_norm": 0.09485966877207715, "learning_rate": 0.0009622556171831403, "loss": 1.5652, "step": 1403 }, { "epoch": 0.15, "grad_norm": 0.0908345472809001, "learning_rate": 0.0009621892273160027, "loss": 1.4296, "step": 1404 }, { "epoch": 0.15, "grad_norm": 0.09258681147903781, "learning_rate": 0.0009621227814068281, "loss": 1.3104, "step": 1405 }, { "epoch": 0.15, "grad_norm": 0.09872348083382965, "learning_rate": 0.0009620562794636728, "loss": 1.4792, "step": 1406 }, { "epoch": 0.15, "grad_norm": 0.10549538108369423, "learning_rate": 0.0009619897214946007, "loss": 1.4798, "step": 1407 }, { "epoch": 0.15, "grad_norm": 0.10762569208689654, "learning_rate": 0.0009619231075076823, "loss": 1.5403, "step": 1408 }, { "epoch": 0.15, "grad_norm": 0.10595909019966, "learning_rate": 0.0009618564375109945, "loss": 1.4209, "step": 1409 }, { "epoch": 0.15, "grad_norm": 0.09530047576327778, "learning_rate": 0.0009617897115126215, "loss": 1.5453, "step": 1410 }, { "epoch": 0.15, "grad_norm": 0.09150305099250461, "learning_rate": 0.0009617229295206537, "loss": 1.4043, "step": 1411 }, { "epoch": 0.15, "grad_norm": 0.10734559081510649, "learning_rate": 0.0009616560915431891, "loss": 1.5386, "step": 1412 }, { "epoch": 0.15, "grad_norm": 0.1166936881059637, "learning_rate": 0.000961589197588332, "loss": 1.4432, "step": 1413 }, { "epoch": 0.15, "grad_norm": 0.09399762085255639, "learning_rate": 0.000961522247664193, "loss": 1.3782, "step": 1414 }, { "epoch": 0.15, "grad_norm": 0.10108051188315073, "learning_rate": 0.0009614552417788906, "loss": 1.5388, "step": 1415 }, { "epoch": 0.15, "grad_norm": 0.08425065039992434, "learning_rate": 0.0009613881799405491, "loss": 1.3643, "step": 1416 }, { "epoch": 0.15, "grad_norm": 0.08774005124641272, "learning_rate": 0.0009613210621573001, "loss": 1.4577, "step": 1417 }, { "epoch": 0.15, "grad_norm": 0.11048355058020695, "learning_rate": 0.0009612538884372821, "loss": 1.4588, "step": 1418 }, { "epoch": 0.15, "grad_norm": 0.08641151132612267, "learning_rate": 0.0009611866587886399, "loss": 1.5007, "step": 1419 }, { "epoch": 0.15, "grad_norm": 0.09339672697113094, "learning_rate": 0.0009611193732195254, "loss": 1.4057, "step": 1420 }, { "epoch": 0.15, "grad_norm": 0.08382803898484295, "learning_rate": 0.000961052031738097, "loss": 1.4925, "step": 1421 }, { "epoch": 0.15, "grad_norm": 0.13267703370359948, "learning_rate": 0.0009609846343525204, "loss": 1.3566, "step": 1422 }, { "epoch": 0.15, "grad_norm": 0.09158613485789965, "learning_rate": 0.0009609171810709676, "loss": 1.4777, "step": 1423 }, { "epoch": 0.15, "grad_norm": 0.09234863197344007, "learning_rate": 0.0009608496719016175, "loss": 1.4714, "step": 1424 }, { "epoch": 0.15, "grad_norm": 0.08860250240147277, "learning_rate": 0.0009607821068526559, "loss": 1.5249, "step": 1425 }, { "epoch": 0.15, "grad_norm": 0.09605535819946916, "learning_rate": 0.0009607144859322754, "loss": 1.4138, "step": 1426 }, { "epoch": 0.15, "grad_norm": 0.0910599326476764, "learning_rate": 0.000960646809148675, "loss": 1.393, "step": 1427 }, { "epoch": 0.15, "grad_norm": 0.08997170262147476, "learning_rate": 0.000960579076510061, "loss": 1.4954, "step": 1428 }, { "epoch": 0.15, "grad_norm": 0.09321262030864544, "learning_rate": 0.0009605112880246462, "loss": 1.4509, "step": 1429 }, { "epoch": 0.15, "grad_norm": 0.08691768022573092, "learning_rate": 0.00096044344370065, "loss": 1.6263, "step": 1430 }, { "epoch": 0.15, "grad_norm": 0.09869112519405952, "learning_rate": 0.0009603755435462989, "loss": 1.5487, "step": 1431 }, { "epoch": 0.15, "grad_norm": 0.0929891550387196, "learning_rate": 0.000960307587569826, "loss": 1.5137, "step": 1432 }, { "epoch": 0.15, "grad_norm": 0.09569271308352895, "learning_rate": 0.0009602395757794711, "loss": 1.4777, "step": 1433 }, { "epoch": 0.15, "grad_norm": 0.09755014615911316, "learning_rate": 0.000960171508183481, "loss": 1.4682, "step": 1434 }, { "epoch": 0.15, "grad_norm": 0.0972474147226531, "learning_rate": 0.0009601033847901091, "loss": 1.5142, "step": 1435 }, { "epoch": 0.15, "grad_norm": 0.08735796798182946, "learning_rate": 0.0009600352056076154, "loss": 1.4992, "step": 1436 }, { "epoch": 0.15, "grad_norm": 0.09119723664332861, "learning_rate": 0.0009599669706442672, "loss": 1.4638, "step": 1437 }, { "epoch": 0.15, "grad_norm": 0.12378601755710915, "learning_rate": 0.000959898679908338, "loss": 1.6005, "step": 1438 }, { "epoch": 0.15, "grad_norm": 0.10478109012333306, "learning_rate": 0.0009598303334081085, "loss": 1.4894, "step": 1439 }, { "epoch": 0.15, "grad_norm": 0.23570162511872436, "learning_rate": 0.0009597619311518657, "loss": 1.5644, "step": 1440 }, { "epoch": 0.15, "grad_norm": 0.10682124284391994, "learning_rate": 0.0009596934731479036, "loss": 1.5554, "step": 1441 }, { "epoch": 0.16, "grad_norm": 0.09295343956178324, "learning_rate": 0.0009596249594045232, "loss": 1.438, "step": 1442 }, { "epoch": 0.16, "grad_norm": 0.09713099725191914, "learning_rate": 0.0009595563899300319, "loss": 1.4028, "step": 1443 }, { "epoch": 0.16, "grad_norm": 0.08720151241947806, "learning_rate": 0.000959487764732744, "loss": 1.3532, "step": 1444 }, { "epoch": 0.16, "grad_norm": 0.0886950843743924, "learning_rate": 0.0009594190838209805, "loss": 1.5788, "step": 1445 }, { "epoch": 0.16, "grad_norm": 0.08793516619885679, "learning_rate": 0.0009593503472030692, "loss": 1.3038, "step": 1446 }, { "epoch": 0.16, "grad_norm": 0.09144261031291229, "learning_rate": 0.0009592815548873448, "loss": 1.3458, "step": 1447 }, { "epoch": 0.16, "grad_norm": 0.09311955286422781, "learning_rate": 0.0009592127068821484, "loss": 1.5537, "step": 1448 }, { "epoch": 0.16, "grad_norm": 0.08723657754682848, "learning_rate": 0.0009591438031958282, "loss": 1.3372, "step": 1449 }, { "epoch": 0.16, "grad_norm": 0.09217854048138731, "learning_rate": 0.0009590748438367388, "loss": 1.5481, "step": 1450 }, { "epoch": 0.16, "grad_norm": 0.08500728494003494, "learning_rate": 0.000959005828813242, "loss": 1.4458, "step": 1451 }, { "epoch": 0.16, "grad_norm": 0.0853018457118595, "learning_rate": 0.0009589367581337061, "loss": 1.424, "step": 1452 }, { "epoch": 0.16, "grad_norm": 0.09868598911419524, "learning_rate": 0.000958867631806506, "loss": 1.3293, "step": 1453 }, { "epoch": 0.16, "grad_norm": 0.08639367094360562, "learning_rate": 0.0009587984498400235, "loss": 1.3447, "step": 1454 }, { "epoch": 0.16, "grad_norm": 0.10359812768616228, "learning_rate": 0.0009587292122426475, "loss": 1.4611, "step": 1455 }, { "epoch": 0.16, "grad_norm": 0.11130702504760993, "learning_rate": 0.0009586599190227728, "loss": 1.4713, "step": 1456 }, { "epoch": 0.16, "grad_norm": 0.08535204266384723, "learning_rate": 0.0009585905701888018, "loss": 1.4174, "step": 1457 }, { "epoch": 0.16, "grad_norm": 0.09085953542296663, "learning_rate": 0.0009585211657491431, "loss": 1.4496, "step": 1458 }, { "epoch": 0.16, "grad_norm": 0.08848742414165559, "learning_rate": 0.0009584517057122122, "loss": 1.5021, "step": 1459 }, { "epoch": 0.16, "grad_norm": 0.08832297742063362, "learning_rate": 0.0009583821900864315, "loss": 1.494, "step": 1460 }, { "epoch": 0.16, "grad_norm": 0.08922415214466346, "learning_rate": 0.0009583126188802302, "loss": 1.5711, "step": 1461 }, { "epoch": 0.16, "grad_norm": 0.09999174702384374, "learning_rate": 0.0009582429921020436, "loss": 1.5518, "step": 1462 }, { "epoch": 0.16, "grad_norm": 0.09822083586247843, "learning_rate": 0.0009581733097603145, "loss": 1.3522, "step": 1463 }, { "epoch": 0.16, "grad_norm": 0.10384201828720639, "learning_rate": 0.0009581035718634919, "loss": 1.4244, "step": 1464 }, { "epoch": 0.16, "grad_norm": 0.09055085355297395, "learning_rate": 0.0009580337784200319, "loss": 1.5444, "step": 1465 }, { "epoch": 0.16, "grad_norm": 0.10518106325110177, "learning_rate": 0.0009579639294383973, "loss": 1.4364, "step": 1466 }, { "epoch": 0.16, "grad_norm": 0.0967908735277497, "learning_rate": 0.0009578940249270573, "loss": 1.457, "step": 1467 }, { "epoch": 0.16, "grad_norm": 0.09207038006235847, "learning_rate": 0.0009578240648944882, "loss": 1.4476, "step": 1468 }, { "epoch": 0.16, "grad_norm": 0.10043286509181094, "learning_rate": 0.000957754049349173, "loss": 1.4658, "step": 1469 }, { "epoch": 0.16, "grad_norm": 0.09054729227475117, "learning_rate": 0.0009576839782996012, "loss": 1.4334, "step": 1470 }, { "epoch": 0.16, "grad_norm": 0.08377663291202349, "learning_rate": 0.000957613851754269, "loss": 1.4492, "step": 1471 }, { "epoch": 0.16, "grad_norm": 0.11060837805665544, "learning_rate": 0.0009575436697216797, "loss": 1.4966, "step": 1472 }, { "epoch": 0.16, "grad_norm": 0.09969183136235889, "learning_rate": 0.0009574734322103431, "loss": 1.5009, "step": 1473 }, { "epoch": 0.16, "grad_norm": 0.09967141930559241, "learning_rate": 0.0009574031392287757, "loss": 1.4341, "step": 1474 }, { "epoch": 0.16, "grad_norm": 0.09799525072093489, "learning_rate": 0.0009573327907855006, "loss": 1.4278, "step": 1475 }, { "epoch": 0.16, "grad_norm": 0.08604866662176865, "learning_rate": 0.0009572623868890481, "loss": 1.4587, "step": 1476 }, { "epoch": 0.16, "grad_norm": 0.09265415277930036, "learning_rate": 0.0009571919275479548, "loss": 1.4699, "step": 1477 }, { "epoch": 0.16, "grad_norm": 0.10062297583552167, "learning_rate": 0.0009571214127707639, "loss": 1.4039, "step": 1478 }, { "epoch": 0.16, "grad_norm": 0.09118049223741401, "learning_rate": 0.000957050842566026, "loss": 1.4081, "step": 1479 }, { "epoch": 0.16, "grad_norm": 0.08501681321065166, "learning_rate": 0.0009569802169422976, "loss": 1.4726, "step": 1480 }, { "epoch": 0.16, "grad_norm": 0.08422437325947779, "learning_rate": 0.0009569095359081426, "loss": 1.4094, "step": 1481 }, { "epoch": 0.16, "grad_norm": 0.09366136573080257, "learning_rate": 0.000956838799472131, "loss": 1.3966, "step": 1482 }, { "epoch": 0.16, "grad_norm": 0.08511044239192846, "learning_rate": 0.00095676800764284, "loss": 1.464, "step": 1483 }, { "epoch": 0.16, "grad_norm": 0.09087661694984668, "learning_rate": 0.0009566971604288534, "loss": 1.4871, "step": 1484 }, { "epoch": 0.16, "grad_norm": 0.1192006425086519, "learning_rate": 0.0009566262578387617, "loss": 1.404, "step": 1485 }, { "epoch": 0.16, "grad_norm": 0.09748627055341871, "learning_rate": 0.000956555299881162, "loss": 1.3744, "step": 1486 }, { "epoch": 0.16, "grad_norm": 0.09993689583714448, "learning_rate": 0.000956484286564658, "loss": 1.5675, "step": 1487 }, { "epoch": 0.16, "grad_norm": 0.10586802308275939, "learning_rate": 0.0009564132178978606, "loss": 1.4087, "step": 1488 }, { "epoch": 0.16, "grad_norm": 0.09472377025775539, "learning_rate": 0.0009563420938893871, "loss": 1.4368, "step": 1489 }, { "epoch": 0.16, "grad_norm": 0.10012467647677623, "learning_rate": 0.0009562709145478615, "loss": 1.4937, "step": 1490 }, { "epoch": 0.16, "grad_norm": 0.10303077194874116, "learning_rate": 0.0009561996798819145, "loss": 1.4654, "step": 1491 }, { "epoch": 0.16, "grad_norm": 0.10113641879804244, "learning_rate": 0.0009561283899001835, "loss": 1.5285, "step": 1492 }, { "epoch": 0.16, "grad_norm": 0.09589102118147506, "learning_rate": 0.0009560570446113128, "loss": 1.4873, "step": 1493 }, { "epoch": 0.16, "grad_norm": 0.09126782272024617, "learning_rate": 0.000955985644023953, "loss": 1.5029, "step": 1494 }, { "epoch": 0.16, "grad_norm": 0.09001747858385097, "learning_rate": 0.0009559141881467619, "loss": 1.3982, "step": 1495 }, { "epoch": 0.16, "grad_norm": 0.10262554472649192, "learning_rate": 0.0009558426769884039, "loss": 1.5575, "step": 1496 }, { "epoch": 0.16, "grad_norm": 0.10556088502827525, "learning_rate": 0.0009557711105575496, "loss": 1.483, "step": 1497 }, { "epoch": 0.16, "grad_norm": 0.08980632344515092, "learning_rate": 0.0009556994888628769, "loss": 1.3076, "step": 1498 }, { "epoch": 0.16, "grad_norm": 0.090263236114018, "learning_rate": 0.0009556278119130701, "loss": 1.3918, "step": 1499 }, { "epoch": 0.16, "grad_norm": 0.09906411529902781, "learning_rate": 0.0009555560797168204, "loss": 1.5834, "step": 1500 }, { "epoch": 0.16, "grad_norm": 0.09241869135981694, "learning_rate": 0.0009554842922828254, "loss": 1.5699, "step": 1501 }, { "epoch": 0.16, "grad_norm": 0.09344833611604621, "learning_rate": 0.0009554124496197897, "loss": 1.3565, "step": 1502 }, { "epoch": 0.16, "grad_norm": 0.09286482726655389, "learning_rate": 0.0009553405517364244, "loss": 1.4, "step": 1503 }, { "epoch": 0.16, "grad_norm": 0.09532973376012242, "learning_rate": 0.0009552685986414475, "loss": 1.4654, "step": 1504 }, { "epoch": 0.16, "grad_norm": 0.09067680572772746, "learning_rate": 0.0009551965903435835, "loss": 1.4848, "step": 1505 }, { "epoch": 0.16, "grad_norm": 0.1040664046288145, "learning_rate": 0.0009551245268515636, "loss": 1.4488, "step": 1506 }, { "epoch": 0.16, "grad_norm": 0.09429619546311979, "learning_rate": 0.0009550524081741256, "loss": 1.5326, "step": 1507 }, { "epoch": 0.16, "grad_norm": 0.10532072284542995, "learning_rate": 0.0009549802343200145, "loss": 1.3992, "step": 1508 }, { "epoch": 0.16, "grad_norm": 0.08912863106155763, "learning_rate": 0.0009549080052979813, "loss": 1.4448, "step": 1509 }, { "epoch": 0.16, "grad_norm": 0.1003030251918555, "learning_rate": 0.0009548357211167841, "loss": 1.484, "step": 1510 }, { "epoch": 0.16, "grad_norm": 0.11243803152082767, "learning_rate": 0.000954763381785188, "loss": 1.4291, "step": 1511 }, { "epoch": 0.16, "grad_norm": 0.1001886187096508, "learning_rate": 0.0009546909873119636, "loss": 1.4383, "step": 1512 }, { "epoch": 0.16, "grad_norm": 0.10231401178866115, "learning_rate": 0.0009546185377058898, "loss": 1.5126, "step": 1513 }, { "epoch": 0.16, "grad_norm": 0.104553472069901, "learning_rate": 0.0009545460329757507, "loss": 1.6627, "step": 1514 }, { "epoch": 0.16, "grad_norm": 0.08907543138688107, "learning_rate": 0.0009544734731303382, "loss": 1.5044, "step": 1515 }, { "epoch": 0.16, "grad_norm": 0.12886965839182934, "learning_rate": 0.0009544008581784503, "loss": 1.4528, "step": 1516 }, { "epoch": 0.16, "grad_norm": 0.09209918402576728, "learning_rate": 0.0009543281881288918, "loss": 1.468, "step": 1517 }, { "epoch": 0.16, "grad_norm": 0.09475658742189953, "learning_rate": 0.0009542554629904741, "loss": 1.3359, "step": 1518 }, { "epoch": 0.16, "grad_norm": 0.08776235492227999, "learning_rate": 0.0009541826827720155, "loss": 1.4278, "step": 1519 }, { "epoch": 0.16, "grad_norm": 0.0973510550854703, "learning_rate": 0.0009541098474823408, "loss": 1.4128, "step": 1520 }, { "epoch": 0.16, "grad_norm": 0.08791160995249331, "learning_rate": 0.0009540369571302815, "loss": 1.4823, "step": 1521 }, { "epoch": 0.16, "grad_norm": 0.1104543848288305, "learning_rate": 0.0009539640117246759, "loss": 1.474, "step": 1522 }, { "epoch": 0.16, "grad_norm": 0.09377728378127773, "learning_rate": 0.0009538910112743687, "loss": 1.5595, "step": 1523 }, { "epoch": 0.16, "grad_norm": 0.08352231010091461, "learning_rate": 0.0009538179557882117, "loss": 1.4802, "step": 1524 }, { "epoch": 0.16, "grad_norm": 0.10346239673786903, "learning_rate": 0.000953744845275063, "loss": 1.5015, "step": 1525 }, { "epoch": 0.16, "grad_norm": 0.10087785904923345, "learning_rate": 0.0009536716797437875, "loss": 1.5315, "step": 1526 }, { "epoch": 0.16, "grad_norm": 0.09040805736131872, "learning_rate": 0.0009535984592032569, "loss": 1.4685, "step": 1527 }, { "epoch": 0.16, "grad_norm": 0.12036277893925264, "learning_rate": 0.0009535251836623491, "loss": 1.39, "step": 1528 }, { "epoch": 0.16, "grad_norm": 0.09447315081864598, "learning_rate": 0.0009534518531299494, "loss": 1.5542, "step": 1529 }, { "epoch": 0.16, "grad_norm": 0.09873458361328201, "learning_rate": 0.0009533784676149492, "loss": 1.4234, "step": 1530 }, { "epoch": 0.16, "grad_norm": 0.09101268943669456, "learning_rate": 0.0009533050271262467, "loss": 1.4116, "step": 1531 }, { "epoch": 0.16, "grad_norm": 0.08897558088863952, "learning_rate": 0.0009532315316727469, "loss": 1.4038, "step": 1532 }, { "epoch": 0.16, "grad_norm": 0.11018902277583942, "learning_rate": 0.0009531579812633615, "loss": 1.4236, "step": 1533 }, { "epoch": 0.16, "grad_norm": 0.09125555762503207, "learning_rate": 0.0009530843759070085, "loss": 1.5578, "step": 1534 }, { "epoch": 0.17, "grad_norm": 0.09385367834635039, "learning_rate": 0.0009530107156126129, "loss": 1.5099, "step": 1535 }, { "epoch": 0.17, "grad_norm": 0.10683817721639628, "learning_rate": 0.0009529370003891062, "loss": 1.6342, "step": 1536 }, { "epoch": 0.17, "grad_norm": 0.08967972247987649, "learning_rate": 0.0009528632302454268, "loss": 1.4171, "step": 1537 }, { "epoch": 0.17, "grad_norm": 0.09753788211751449, "learning_rate": 0.0009527894051905194, "loss": 1.4745, "step": 1538 }, { "epoch": 0.17, "grad_norm": 0.09304012816220279, "learning_rate": 0.0009527155252333357, "loss": 1.5435, "step": 1539 }, { "epoch": 0.17, "grad_norm": 0.09474919727112473, "learning_rate": 0.0009526415903828337, "loss": 1.3861, "step": 1540 }, { "epoch": 0.17, "grad_norm": 0.07778975111369331, "learning_rate": 0.0009525676006479784, "loss": 1.4173, "step": 1541 }, { "epoch": 0.17, "grad_norm": 0.09020032560229345, "learning_rate": 0.0009524935560377414, "loss": 1.4258, "step": 1542 }, { "epoch": 0.17, "grad_norm": 0.09665307246990602, "learning_rate": 0.0009524194565611006, "loss": 1.4578, "step": 1543 }, { "epoch": 0.17, "grad_norm": 0.09800227849364905, "learning_rate": 0.000952345302227041, "loss": 1.367, "step": 1544 }, { "epoch": 0.17, "grad_norm": 0.1039422730155937, "learning_rate": 0.0009522710930445541, "loss": 1.5948, "step": 1545 }, { "epoch": 0.17, "grad_norm": 0.08481742258534228, "learning_rate": 0.000952196829022638, "loss": 1.4945, "step": 1546 }, { "epoch": 0.17, "grad_norm": 0.08727988396404307, "learning_rate": 0.0009521225101702973, "loss": 1.4855, "step": 1547 }, { "epoch": 0.17, "grad_norm": 0.10107756346544976, "learning_rate": 0.0009520481364965435, "loss": 1.5414, "step": 1548 }, { "epoch": 0.17, "grad_norm": 0.08148558962045617, "learning_rate": 0.0009519737080103948, "loss": 1.4015, "step": 1549 }, { "epoch": 0.17, "grad_norm": 0.09117985257996772, "learning_rate": 0.0009518992247208758, "loss": 1.4934, "step": 1550 }, { "epoch": 0.17, "grad_norm": 0.10201481363275045, "learning_rate": 0.000951824686637018, "loss": 1.4628, "step": 1551 }, { "epoch": 0.17, "grad_norm": 0.09022581841205508, "learning_rate": 0.000951750093767859, "loss": 1.6013, "step": 1552 }, { "epoch": 0.17, "grad_norm": 0.0953335804179374, "learning_rate": 0.0009516754461224439, "loss": 1.6041, "step": 1553 }, { "epoch": 0.17, "grad_norm": 0.09639577286814874, "learning_rate": 0.0009516007437098238, "loss": 1.554, "step": 1554 }, { "epoch": 0.17, "grad_norm": 0.09708674309060862, "learning_rate": 0.0009515259865390564, "loss": 1.3906, "step": 1555 }, { "epoch": 0.17, "grad_norm": 0.09782023489932025, "learning_rate": 0.0009514511746192068, "loss": 1.4169, "step": 1556 }, { "epoch": 0.17, "grad_norm": 0.08847376964278968, "learning_rate": 0.0009513763079593456, "loss": 1.489, "step": 1557 }, { "epoch": 0.17, "grad_norm": 0.09418702831874794, "learning_rate": 0.0009513013865685511, "loss": 1.4692, "step": 1558 }, { "epoch": 0.17, "grad_norm": 0.0839674566660529, "learning_rate": 0.0009512264104559077, "loss": 1.4908, "step": 1559 }, { "epoch": 0.17, "grad_norm": 0.09666360133659271, "learning_rate": 0.0009511513796305062, "loss": 1.519, "step": 1560 }, { "epoch": 0.17, "grad_norm": 0.08317259532860914, "learning_rate": 0.0009510762941014446, "loss": 1.4297, "step": 1561 }, { "epoch": 0.17, "grad_norm": 0.09939011817545634, "learning_rate": 0.0009510011538778274, "loss": 1.5525, "step": 1562 }, { "epoch": 0.17, "grad_norm": 0.08154865062357083, "learning_rate": 0.0009509259589687653, "loss": 1.4989, "step": 1563 }, { "epoch": 0.17, "grad_norm": 0.09355040253719421, "learning_rate": 0.000950850709383376, "loss": 1.4582, "step": 1564 }, { "epoch": 0.17, "grad_norm": 0.09125277909364553, "learning_rate": 0.0009507754051307841, "loss": 1.5391, "step": 1565 }, { "epoch": 0.17, "grad_norm": 0.089502284335009, "learning_rate": 0.0009507000462201201, "loss": 1.543, "step": 1566 }, { "epoch": 0.17, "grad_norm": 0.08176070826278141, "learning_rate": 0.0009506246326605219, "loss": 1.5341, "step": 1567 }, { "epoch": 0.17, "grad_norm": 0.10665943107435298, "learning_rate": 0.0009505491644611333, "loss": 1.3918, "step": 1568 }, { "epoch": 0.17, "grad_norm": 0.09157527137567606, "learning_rate": 0.0009504736416311053, "loss": 1.5549, "step": 1569 }, { "epoch": 0.17, "grad_norm": 0.09586127992803163, "learning_rate": 0.0009503980641795952, "loss": 1.3776, "step": 1570 }, { "epoch": 0.17, "grad_norm": 0.09017513026010152, "learning_rate": 0.0009503224321157671, "loss": 1.4205, "step": 1571 }, { "epoch": 0.17, "grad_norm": 0.09741079955785971, "learning_rate": 0.0009502467454487915, "loss": 1.2827, "step": 1572 }, { "epoch": 0.17, "grad_norm": 0.10275736685666167, "learning_rate": 0.0009501710041878458, "loss": 1.4762, "step": 1573 }, { "epoch": 0.17, "grad_norm": 0.08569182696154201, "learning_rate": 0.0009500952083421139, "loss": 1.3994, "step": 1574 }, { "epoch": 0.17, "grad_norm": 0.09179001894163673, "learning_rate": 0.0009500193579207863, "loss": 1.5314, "step": 1575 }, { "epoch": 0.17, "grad_norm": 0.08676494976521502, "learning_rate": 0.0009499434529330602, "loss": 1.4571, "step": 1576 }, { "epoch": 0.17, "grad_norm": 0.09077194936057632, "learning_rate": 0.000949867493388139, "loss": 1.4957, "step": 1577 }, { "epoch": 0.17, "grad_norm": 0.10232739854820663, "learning_rate": 0.0009497914792952333, "loss": 1.5458, "step": 1578 }, { "epoch": 0.17, "grad_norm": 0.10029952697335129, "learning_rate": 0.0009497154106635604, "loss": 1.4446, "step": 1579 }, { "epoch": 0.17, "grad_norm": 0.1156402004405788, "learning_rate": 0.0009496392875023432, "loss": 1.4567, "step": 1580 }, { "epoch": 0.17, "grad_norm": 0.09278124851672967, "learning_rate": 0.0009495631098208124, "loss": 1.4469, "step": 1581 }, { "epoch": 0.17, "grad_norm": 0.10394690523142269, "learning_rate": 0.0009494868776282046, "loss": 1.3859, "step": 1582 }, { "epoch": 0.17, "grad_norm": 0.09395591900009802, "learning_rate": 0.0009494105909337633, "loss": 1.5942, "step": 1583 }, { "epoch": 0.17, "grad_norm": 0.10303144085226874, "learning_rate": 0.0009493342497467385, "loss": 1.4295, "step": 1584 }, { "epoch": 0.17, "grad_norm": 0.1037174632287724, "learning_rate": 0.000949257854076387, "loss": 1.4119, "step": 1585 }, { "epoch": 0.17, "grad_norm": 0.09553928851204532, "learning_rate": 0.0009491814039319716, "loss": 1.3801, "step": 1586 }, { "epoch": 0.17, "grad_norm": 0.09651654620253583, "learning_rate": 0.0009491048993227625, "loss": 1.4304, "step": 1587 }, { "epoch": 0.17, "grad_norm": 0.09455369631618026, "learning_rate": 0.000949028340258036, "loss": 1.5054, "step": 1588 }, { "epoch": 0.17, "grad_norm": 0.097039379644678, "learning_rate": 0.0009489517267470753, "loss": 1.5381, "step": 1589 }, { "epoch": 0.17, "grad_norm": 0.09872281104254445, "learning_rate": 0.00094887505879917, "loss": 1.4912, "step": 1590 }, { "epoch": 0.17, "grad_norm": 0.09155850201559067, "learning_rate": 0.0009487983364236162, "loss": 1.5075, "step": 1591 }, { "epoch": 0.17, "grad_norm": 0.08864721777779384, "learning_rate": 0.0009487215596297169, "loss": 1.5051, "step": 1592 }, { "epoch": 0.17, "grad_norm": 0.11145148010181871, "learning_rate": 0.0009486447284267816, "loss": 1.5501, "step": 1593 }, { "epoch": 0.17, "grad_norm": 0.09424231822009453, "learning_rate": 0.0009485678428241262, "loss": 1.4026, "step": 1594 }, { "epoch": 0.17, "grad_norm": 0.0936796798826941, "learning_rate": 0.0009484909028310734, "loss": 1.4845, "step": 1595 }, { "epoch": 0.17, "grad_norm": 0.09572362072102075, "learning_rate": 0.0009484139084569525, "loss": 1.5573, "step": 1596 }, { "epoch": 0.17, "grad_norm": 0.10489996329436806, "learning_rate": 0.0009483368597110992, "loss": 1.3578, "step": 1597 }, { "epoch": 0.17, "grad_norm": 0.09287949322524128, "learning_rate": 0.000948259756602856, "loss": 1.3336, "step": 1598 }, { "epoch": 0.17, "grad_norm": 0.10417263018902385, "learning_rate": 0.000948182599141572, "loss": 1.5492, "step": 1599 }, { "epoch": 0.17, "grad_norm": 0.09698735516601625, "learning_rate": 0.0009481053873366027, "loss": 1.6122, "step": 1600 }, { "epoch": 0.17, "grad_norm": 0.10366781086993937, "learning_rate": 0.0009480281211973103, "loss": 1.4094, "step": 1601 }, { "epoch": 0.17, "grad_norm": 0.08318527269695515, "learning_rate": 0.0009479508007330638, "loss": 1.395, "step": 1602 }, { "epoch": 0.17, "grad_norm": 0.09637176359968291, "learning_rate": 0.0009478734259532381, "loss": 1.4232, "step": 1603 }, { "epoch": 0.17, "grad_norm": 0.10117308070962983, "learning_rate": 0.0009477959968672156, "loss": 1.4984, "step": 1604 }, { "epoch": 0.17, "grad_norm": 0.08695311552966221, "learning_rate": 0.0009477185134843846, "loss": 1.6225, "step": 1605 }, { "epoch": 0.17, "grad_norm": 0.08627423777317476, "learning_rate": 0.0009476409758141405, "loss": 1.5029, "step": 1606 }, { "epoch": 0.17, "grad_norm": 0.07656034118664419, "learning_rate": 0.0009475633838658847, "loss": 1.3344, "step": 1607 }, { "epoch": 0.17, "grad_norm": 0.0928219643216207, "learning_rate": 0.0009474857376490257, "loss": 1.4961, "step": 1608 }, { "epoch": 0.17, "grad_norm": 0.10412888114786652, "learning_rate": 0.0009474080371729782, "loss": 1.3727, "step": 1609 }, { "epoch": 0.17, "grad_norm": 0.09068058658406214, "learning_rate": 0.0009473302824471637, "loss": 1.5552, "step": 1610 }, { "epoch": 0.17, "grad_norm": 0.08843922927390716, "learning_rate": 0.0009472524734810104, "loss": 1.5213, "step": 1611 }, { "epoch": 0.17, "grad_norm": 0.0915814113209529, "learning_rate": 0.0009471746102839527, "loss": 1.5118, "step": 1612 }, { "epoch": 0.17, "grad_norm": 0.06836751650965489, "learning_rate": 0.0009470966928654319, "loss": 1.4489, "step": 1613 }, { "epoch": 0.17, "grad_norm": 0.07747282566984308, "learning_rate": 0.0009470187212348957, "loss": 1.4894, "step": 1614 }, { "epoch": 0.17, "grad_norm": 0.07132050314074537, "learning_rate": 0.0009469406954017985, "loss": 1.5092, "step": 1615 }, { "epoch": 0.17, "grad_norm": 0.08277949038549247, "learning_rate": 0.000946862615375601, "loss": 1.5643, "step": 1616 }, { "epoch": 0.17, "grad_norm": 0.09289209657839693, "learning_rate": 0.000946784481165771, "loss": 1.4531, "step": 1617 }, { "epoch": 0.17, "grad_norm": 0.10295338032257036, "learning_rate": 0.0009467062927817822, "loss": 1.4354, "step": 1618 }, { "epoch": 0.17, "grad_norm": 0.09135645998922941, "learning_rate": 0.0009466280502331154, "loss": 1.4541, "step": 1619 }, { "epoch": 0.17, "grad_norm": 0.11184134475928127, "learning_rate": 0.0009465497535292579, "loss": 1.5205, "step": 1620 }, { "epoch": 0.17, "grad_norm": 0.09717044875080767, "learning_rate": 0.0009464714026797031, "loss": 1.4884, "step": 1621 }, { "epoch": 0.17, "grad_norm": 0.09757801259769658, "learning_rate": 0.0009463929976939515, "loss": 1.425, "step": 1622 }, { "epoch": 0.17, "grad_norm": 0.0922656442309496, "learning_rate": 0.0009463145385815102, "loss": 1.4102, "step": 1623 }, { "epoch": 0.17, "grad_norm": 0.10056802441985584, "learning_rate": 0.0009462360253518923, "loss": 1.5064, "step": 1624 }, { "epoch": 0.17, "grad_norm": 0.09757900282309952, "learning_rate": 0.0009461574580146179, "loss": 1.4064, "step": 1625 }, { "epoch": 0.17, "grad_norm": 0.09223895834044753, "learning_rate": 0.0009460788365792135, "loss": 1.4853, "step": 1626 }, { "epoch": 0.17, "grad_norm": 0.08517631548088779, "learning_rate": 0.0009460001610552124, "loss": 1.3385, "step": 1627 }, { "epoch": 0.18, "grad_norm": 0.09644877186160637, "learning_rate": 0.000945921431452154, "loss": 1.3694, "step": 1628 }, { "epoch": 0.18, "grad_norm": 0.10147088301807176, "learning_rate": 0.000945842647779585, "loss": 1.4762, "step": 1629 }, { "epoch": 0.18, "grad_norm": 0.10594360644469922, "learning_rate": 0.0009457638100470577, "loss": 1.5326, "step": 1630 }, { "epoch": 0.18, "grad_norm": 0.0964403054717145, "learning_rate": 0.0009456849182641317, "loss": 1.4083, "step": 1631 }, { "epoch": 0.18, "grad_norm": 0.08982800262769298, "learning_rate": 0.000945605972440373, "loss": 1.3163, "step": 1632 }, { "epoch": 0.18, "grad_norm": 0.09752836436861666, "learning_rate": 0.0009455269725853536, "loss": 1.3745, "step": 1633 }, { "epoch": 0.18, "grad_norm": 0.09479524516483495, "learning_rate": 0.000945447918708653, "loss": 1.3966, "step": 1634 }, { "epoch": 0.18, "grad_norm": 0.08660628978231161, "learning_rate": 0.0009453688108198566, "loss": 1.3603, "step": 1635 }, { "epoch": 0.18, "grad_norm": 0.08391899236342358, "learning_rate": 0.0009452896489285563, "loss": 1.4881, "step": 1636 }, { "epoch": 0.18, "grad_norm": 0.10847952104104855, "learning_rate": 0.0009452104330443511, "loss": 1.5492, "step": 1637 }, { "epoch": 0.18, "grad_norm": 0.08318537065982123, "learning_rate": 0.0009451311631768459, "loss": 1.5448, "step": 1638 }, { "epoch": 0.18, "grad_norm": 0.09468223699769081, "learning_rate": 0.0009450518393356527, "loss": 1.4535, "step": 1639 }, { "epoch": 0.18, "grad_norm": 0.10397951977441247, "learning_rate": 0.0009449724615303894, "loss": 1.4966, "step": 1640 }, { "epoch": 0.18, "grad_norm": 0.10389824321917117, "learning_rate": 0.0009448930297706813, "loss": 1.484, "step": 1641 }, { "epoch": 0.18, "grad_norm": 0.08499587782031137, "learning_rate": 0.0009448135440661595, "loss": 1.4936, "step": 1642 }, { "epoch": 0.18, "grad_norm": 0.08757048555474968, "learning_rate": 0.0009447340044264619, "loss": 1.4992, "step": 1643 }, { "epoch": 0.18, "grad_norm": 0.09261434742761426, "learning_rate": 0.0009446544108612331, "loss": 1.4285, "step": 1644 }, { "epoch": 0.18, "grad_norm": 0.08674281372806956, "learning_rate": 0.0009445747633801241, "loss": 1.4837, "step": 1645 }, { "epoch": 0.18, "grad_norm": 0.08584946897397816, "learning_rate": 0.0009444950619927924, "loss": 1.5338, "step": 1646 }, { "epoch": 0.18, "grad_norm": 0.11737706230601937, "learning_rate": 0.0009444153067089019, "loss": 1.5565, "step": 1647 }, { "epoch": 0.18, "grad_norm": 0.09096614220056518, "learning_rate": 0.0009443354975381234, "loss": 1.382, "step": 1648 }, { "epoch": 0.18, "grad_norm": 0.09032154151221224, "learning_rate": 0.0009442556344901339, "loss": 1.3891, "step": 1649 }, { "epoch": 0.18, "grad_norm": 0.1027879079616311, "learning_rate": 0.000944175717574617, "loss": 1.551, "step": 1650 }, { "epoch": 0.18, "grad_norm": 0.08719485311068445, "learning_rate": 0.0009440957468012632, "loss": 1.4096, "step": 1651 }, { "epoch": 0.18, "grad_norm": 0.09352779878047505, "learning_rate": 0.0009440157221797692, "loss": 1.4714, "step": 1652 }, { "epoch": 0.18, "grad_norm": 0.10486400709151468, "learning_rate": 0.0009439356437198379, "loss": 1.4014, "step": 1653 }, { "epoch": 0.18, "grad_norm": 0.08752718991568993, "learning_rate": 0.0009438555114311795, "loss": 1.3704, "step": 1654 }, { "epoch": 0.18, "grad_norm": 0.11686969419920402, "learning_rate": 0.00094377532532351, "loss": 1.3982, "step": 1655 }, { "epoch": 0.18, "grad_norm": 0.0973101634579299, "learning_rate": 0.0009436950854065524, "loss": 1.5212, "step": 1656 }, { "epoch": 0.18, "grad_norm": 0.09899858128911071, "learning_rate": 0.0009436147916900361, "loss": 1.5066, "step": 1657 }, { "epoch": 0.18, "grad_norm": 0.11649096433856834, "learning_rate": 0.0009435344441836969, "loss": 1.4407, "step": 1658 }, { "epoch": 0.18, "grad_norm": 0.0953453538897908, "learning_rate": 0.0009434540428972772, "loss": 1.4545, "step": 1659 }, { "epoch": 0.18, "grad_norm": 0.10133785841493663, "learning_rate": 0.0009433735878405261, "loss": 1.4837, "step": 1660 }, { "epoch": 0.18, "grad_norm": 0.10015520209464993, "learning_rate": 0.0009432930790231988, "loss": 1.5321, "step": 1661 }, { "epoch": 0.18, "grad_norm": 0.08948532164322566, "learning_rate": 0.0009432125164550576, "loss": 1.4165, "step": 1662 }, { "epoch": 0.18, "grad_norm": 0.09658583989362529, "learning_rate": 0.0009431319001458704, "loss": 1.4593, "step": 1663 }, { "epoch": 0.18, "grad_norm": 0.10145569880361127, "learning_rate": 0.000943051230105413, "loss": 1.426, "step": 1664 }, { "epoch": 0.18, "grad_norm": 0.09816258060491967, "learning_rate": 0.0009429705063434664, "loss": 1.4223, "step": 1665 }, { "epoch": 0.18, "grad_norm": 0.09250427904332187, "learning_rate": 0.0009428897288698188, "loss": 1.5038, "step": 1666 }, { "epoch": 0.18, "grad_norm": 0.0853389109743307, "learning_rate": 0.0009428088976942646, "loss": 1.5678, "step": 1667 }, { "epoch": 0.18, "grad_norm": 0.07695692774289016, "learning_rate": 0.0009427280128266049, "loss": 1.4343, "step": 1668 }, { "epoch": 0.18, "grad_norm": 0.10008107955257982, "learning_rate": 0.0009426470742766476, "loss": 1.4508, "step": 1669 }, { "epoch": 0.18, "grad_norm": 0.08818370379739952, "learning_rate": 0.0009425660820542062, "loss": 1.4621, "step": 1670 }, { "epoch": 0.18, "grad_norm": 0.0852469680143305, "learning_rate": 0.0009424850361691017, "loss": 1.5184, "step": 1671 }, { "epoch": 0.18, "grad_norm": 0.081114977796592, "learning_rate": 0.0009424039366311612, "loss": 1.4093, "step": 1672 }, { "epoch": 0.18, "grad_norm": 0.10402276311434469, "learning_rate": 0.0009423227834502181, "loss": 1.5449, "step": 1673 }, { "epoch": 0.18, "grad_norm": 0.08138128486557042, "learning_rate": 0.0009422415766361126, "loss": 1.4053, "step": 1674 }, { "epoch": 0.18, "grad_norm": 0.09250871509940908, "learning_rate": 0.0009421603161986913, "loss": 1.4838, "step": 1675 }, { "epoch": 0.18, "grad_norm": 0.09799638166091465, "learning_rate": 0.0009420790021478072, "loss": 1.3309, "step": 1676 }, { "epoch": 0.18, "grad_norm": 0.07799924026862029, "learning_rate": 0.00094199763449332, "loss": 1.3402, "step": 1677 }, { "epoch": 0.18, "grad_norm": 0.09607736488537287, "learning_rate": 0.0009419162132450961, "loss": 1.4766, "step": 1678 }, { "epoch": 0.18, "grad_norm": 0.08245856386036492, "learning_rate": 0.0009418347384130076, "loss": 1.3557, "step": 1679 }, { "epoch": 0.18, "grad_norm": 0.08653876091437132, "learning_rate": 0.000941753210006934, "loss": 1.4636, "step": 1680 }, { "epoch": 0.18, "grad_norm": 0.09659732334971687, "learning_rate": 0.0009416716280367606, "loss": 1.5785, "step": 1681 }, { "epoch": 0.18, "grad_norm": 0.09251463831839435, "learning_rate": 0.0009415899925123795, "loss": 1.4092, "step": 1682 }, { "epoch": 0.18, "grad_norm": 0.0885401579842873, "learning_rate": 0.0009415083034436895, "loss": 1.4988, "step": 1683 }, { "epoch": 0.18, "grad_norm": 0.08458310827754215, "learning_rate": 0.0009414265608405956, "loss": 1.453, "step": 1684 }, { "epoch": 0.18, "grad_norm": 0.09539966740345308, "learning_rate": 0.0009413447647130096, "loss": 1.5588, "step": 1685 }, { "epoch": 0.18, "grad_norm": 0.087828154350039, "learning_rate": 0.0009412629150708492, "loss": 1.4878, "step": 1686 }, { "epoch": 0.18, "grad_norm": 0.09293970968778822, "learning_rate": 0.0009411810119240389, "loss": 1.4289, "step": 1687 }, { "epoch": 0.18, "grad_norm": 0.09332098625509935, "learning_rate": 0.00094109905528251, "loss": 1.3876, "step": 1688 }, { "epoch": 0.18, "grad_norm": 0.09483816513762469, "learning_rate": 0.0009410170451562001, "loss": 1.4632, "step": 1689 }, { "epoch": 0.18, "grad_norm": 0.0878505960015256, "learning_rate": 0.000940934981555053, "loss": 1.4356, "step": 1690 }, { "epoch": 0.18, "grad_norm": 0.10200709452803336, "learning_rate": 0.000940852864489019, "loss": 1.3956, "step": 1691 }, { "epoch": 0.18, "grad_norm": 0.09522477733928604, "learning_rate": 0.0009407706939680556, "loss": 1.3232, "step": 1692 }, { "epoch": 0.18, "grad_norm": 0.08411835086532109, "learning_rate": 0.0009406884700021259, "loss": 1.5153, "step": 1693 }, { "epoch": 0.18, "grad_norm": 0.09014720566437971, "learning_rate": 0.0009406061926012, "loss": 1.546, "step": 1694 }, { "epoch": 0.18, "grad_norm": 0.07605071855611331, "learning_rate": 0.0009405238617752543, "loss": 1.4299, "step": 1695 }, { "epoch": 0.18, "grad_norm": 0.080623963055023, "learning_rate": 0.0009404414775342715, "loss": 1.4253, "step": 1696 }, { "epoch": 0.18, "grad_norm": 0.09427266407825832, "learning_rate": 0.0009403590398882411, "loss": 1.4959, "step": 1697 }, { "epoch": 0.18, "grad_norm": 0.07474659358381536, "learning_rate": 0.0009402765488471591, "loss": 1.3768, "step": 1698 }, { "epoch": 0.18, "grad_norm": 0.09176750550282939, "learning_rate": 0.0009401940044210276, "loss": 1.4923, "step": 1699 }, { "epoch": 0.18, "grad_norm": 0.10027187119937282, "learning_rate": 0.0009401114066198555, "loss": 1.4703, "step": 1700 }, { "epoch": 0.18, "grad_norm": 0.09729007735305473, "learning_rate": 0.000940028755453658, "loss": 1.4746, "step": 1701 }, { "epoch": 0.18, "grad_norm": 0.08153229489393464, "learning_rate": 0.000939946050932457, "loss": 1.4445, "step": 1702 }, { "epoch": 0.18, "grad_norm": 0.08757343940935985, "learning_rate": 0.0009398632930662805, "loss": 1.5132, "step": 1703 }, { "epoch": 0.18, "grad_norm": 0.08901895804515486, "learning_rate": 0.0009397804818651634, "loss": 1.2978, "step": 1704 }, { "epoch": 0.18, "grad_norm": 0.10619612933936191, "learning_rate": 0.0009396976173391466, "loss": 1.507, "step": 1705 }, { "epoch": 0.18, "grad_norm": 0.0917857472797286, "learning_rate": 0.000939614699498278, "loss": 1.4033, "step": 1706 }, { "epoch": 0.18, "grad_norm": 0.1134247857054161, "learning_rate": 0.0009395317283526113, "loss": 1.5523, "step": 1707 }, { "epoch": 0.18, "grad_norm": 0.09581787178579383, "learning_rate": 0.0009394487039122072, "loss": 1.4631, "step": 1708 }, { "epoch": 0.18, "grad_norm": 0.09259611299053887, "learning_rate": 0.0009393656261871328, "loss": 1.4405, "step": 1709 }, { "epoch": 0.18, "grad_norm": 0.09297130715435674, "learning_rate": 0.0009392824951874617, "loss": 1.6184, "step": 1710 }, { "epoch": 0.18, "grad_norm": 0.08872509563911614, "learning_rate": 0.0009391993109232735, "loss": 1.4219, "step": 1711 }, { "epoch": 0.18, "grad_norm": 0.08836517965035444, "learning_rate": 0.0009391160734046547, "loss": 1.5004, "step": 1712 }, { "epoch": 0.18, "grad_norm": 0.09390231142694527, "learning_rate": 0.000939032782641698, "loss": 1.4052, "step": 1713 }, { "epoch": 0.18, "grad_norm": 0.09998203205571347, "learning_rate": 0.000938949438644503, "loss": 1.4534, "step": 1714 }, { "epoch": 0.18, "grad_norm": 0.10024244141659862, "learning_rate": 0.0009388660414231751, "loss": 1.597, "step": 1715 }, { "epoch": 0.18, "grad_norm": 0.09070174363207298, "learning_rate": 0.0009387825909878269, "loss": 1.431, "step": 1716 }, { "epoch": 0.18, "grad_norm": 0.09080214985361508, "learning_rate": 0.0009386990873485767, "loss": 1.4065, "step": 1717 }, { "epoch": 0.18, "grad_norm": 0.1004759223908107, "learning_rate": 0.0009386155305155497, "loss": 1.4715, "step": 1718 }, { "epoch": 0.18, "grad_norm": 0.09826529128818194, "learning_rate": 0.0009385319204988776, "loss": 1.436, "step": 1719 }, { "epoch": 0.18, "grad_norm": 0.09212018460786485, "learning_rate": 0.0009384482573086983, "loss": 1.4487, "step": 1720 }, { "epoch": 0.19, "grad_norm": 0.09053519573952286, "learning_rate": 0.000938364540955156, "loss": 1.6079, "step": 1721 }, { "epoch": 0.19, "grad_norm": 0.09161427273443738, "learning_rate": 0.0009382807714484021, "loss": 1.5146, "step": 1722 }, { "epoch": 0.19, "grad_norm": 0.09193193868164781, "learning_rate": 0.0009381969487985935, "loss": 1.4547, "step": 1723 }, { "epoch": 0.19, "grad_norm": 0.08852240861982075, "learning_rate": 0.0009381130730158943, "loss": 1.5276, "step": 1724 }, { "epoch": 0.19, "grad_norm": 0.0914904802379876, "learning_rate": 0.0009380291441104747, "loss": 1.6143, "step": 1725 }, { "epoch": 0.19, "grad_norm": 0.08260988756714899, "learning_rate": 0.000937945162092511, "loss": 1.4669, "step": 1726 }, { "epoch": 0.19, "grad_norm": 0.10181657253275951, "learning_rate": 0.0009378611269721866, "loss": 1.4018, "step": 1727 }, { "epoch": 0.19, "grad_norm": 0.09565750562612672, "learning_rate": 0.0009377770387596911, "loss": 1.5622, "step": 1728 }, { "epoch": 0.19, "grad_norm": 0.08427791252695747, "learning_rate": 0.0009376928974652205, "loss": 1.494, "step": 1729 }, { "epoch": 0.19, "grad_norm": 0.08475458912711241, "learning_rate": 0.0009376087030989771, "loss": 1.4584, "step": 1730 }, { "epoch": 0.19, "grad_norm": 0.09136780435680898, "learning_rate": 0.0009375244556711695, "loss": 1.4727, "step": 1731 }, { "epoch": 0.19, "grad_norm": 0.08392899372822892, "learning_rate": 0.0009374401551920135, "loss": 1.4106, "step": 1732 }, { "epoch": 0.19, "grad_norm": 0.07494760250079167, "learning_rate": 0.0009373558016717306, "loss": 1.3448, "step": 1733 }, { "epoch": 0.19, "grad_norm": 0.10183183783607534, "learning_rate": 0.000937271395120549, "loss": 1.5347, "step": 1734 }, { "epoch": 0.19, "grad_norm": 0.09490611937533996, "learning_rate": 0.0009371869355487031, "loss": 1.5137, "step": 1735 }, { "epoch": 0.19, "grad_norm": 0.09095625419563282, "learning_rate": 0.0009371024229664341, "loss": 1.5858, "step": 1736 }, { "epoch": 0.19, "grad_norm": 0.09897520105592522, "learning_rate": 0.0009370178573839894, "loss": 1.502, "step": 1737 }, { "epoch": 0.19, "grad_norm": 0.08026803716653642, "learning_rate": 0.000936933238811623, "loss": 1.3948, "step": 1738 }, { "epoch": 0.19, "grad_norm": 0.08326463099403736, "learning_rate": 0.0009368485672595948, "loss": 1.3566, "step": 1739 }, { "epoch": 0.19, "grad_norm": 0.08141481156595314, "learning_rate": 0.000936763842738172, "loss": 1.4374, "step": 1740 }, { "epoch": 0.19, "grad_norm": 0.0986349039516981, "learning_rate": 0.0009366790652576274, "loss": 1.5395, "step": 1741 }, { "epoch": 0.19, "grad_norm": 0.10291774398160058, "learning_rate": 0.0009365942348282405, "loss": 1.5838, "step": 1742 }, { "epoch": 0.19, "grad_norm": 0.10217497765301305, "learning_rate": 0.0009365093514602978, "loss": 1.5123, "step": 1743 }, { "epoch": 0.19, "grad_norm": 0.08112289391270511, "learning_rate": 0.0009364244151640913, "loss": 1.4214, "step": 1744 }, { "epoch": 0.19, "grad_norm": 0.08252851448595519, "learning_rate": 0.0009363394259499197, "loss": 1.5329, "step": 1745 }, { "epoch": 0.19, "grad_norm": 0.10139278527828859, "learning_rate": 0.0009362543838280884, "loss": 1.5226, "step": 1746 }, { "epoch": 0.19, "grad_norm": 0.09091939187498209, "learning_rate": 0.0009361692888089092, "loss": 1.5196, "step": 1747 }, { "epoch": 0.19, "grad_norm": 0.08879705035046179, "learning_rate": 0.0009360841409027001, "loss": 1.3142, "step": 1748 }, { "epoch": 0.19, "grad_norm": 0.1033063834090903, "learning_rate": 0.0009359989401197852, "loss": 1.4918, "step": 1749 }, { "epoch": 0.19, "grad_norm": 0.08250221798770896, "learning_rate": 0.000935913686470496, "loss": 1.4754, "step": 1750 }, { "epoch": 0.19, "grad_norm": 0.09952553090757268, "learning_rate": 0.0009358283799651694, "loss": 1.5277, "step": 1751 }, { "epoch": 0.19, "grad_norm": 0.08770051236655776, "learning_rate": 0.0009357430206141492, "loss": 1.5348, "step": 1752 }, { "epoch": 0.19, "grad_norm": 0.08456069157036568, "learning_rate": 0.0009356576084277855, "loss": 1.594, "step": 1753 }, { "epoch": 0.19, "grad_norm": 0.08688679813984553, "learning_rate": 0.0009355721434164348, "loss": 1.4007, "step": 1754 }, { "epoch": 0.19, "grad_norm": 0.08459334076466057, "learning_rate": 0.0009354866255904602, "loss": 1.3808, "step": 1755 }, { "epoch": 0.19, "grad_norm": 0.0750770133707275, "learning_rate": 0.0009354010549602308, "loss": 1.4116, "step": 1756 }, { "epoch": 0.19, "grad_norm": 0.0810999858623209, "learning_rate": 0.0009353154315361223, "loss": 1.392, "step": 1757 }, { "epoch": 0.19, "grad_norm": 0.07561264712057969, "learning_rate": 0.0009352297553285172, "loss": 1.5725, "step": 1758 }, { "epoch": 0.19, "grad_norm": 0.07689189356736299, "learning_rate": 0.0009351440263478036, "loss": 1.3871, "step": 1759 }, { "epoch": 0.19, "grad_norm": 0.07656862328089326, "learning_rate": 0.0009350582446043767, "loss": 1.4561, "step": 1760 }, { "epoch": 0.19, "grad_norm": 0.07421858530277249, "learning_rate": 0.0009349724101086379, "loss": 1.3506, "step": 1761 }, { "epoch": 0.19, "grad_norm": 0.09125757959767364, "learning_rate": 0.0009348865228709947, "loss": 1.3664, "step": 1762 }, { "epoch": 0.19, "grad_norm": 0.08458607011462088, "learning_rate": 0.0009348005829018612, "loss": 1.3518, "step": 1763 }, { "epoch": 0.19, "grad_norm": 0.07846569043882619, "learning_rate": 0.0009347145902116582, "loss": 1.4436, "step": 1764 }, { "epoch": 0.19, "grad_norm": 0.07880792008907359, "learning_rate": 0.0009346285448108124, "loss": 1.3728, "step": 1765 }, { "epoch": 0.19, "grad_norm": 0.08379065827759669, "learning_rate": 0.0009345424467097572, "loss": 1.4667, "step": 1766 }, { "epoch": 0.19, "grad_norm": 0.07785121808069187, "learning_rate": 0.0009344562959189321, "loss": 1.4439, "step": 1767 }, { "epoch": 0.19, "grad_norm": 0.07867477807500672, "learning_rate": 0.0009343700924487835, "loss": 1.4389, "step": 1768 }, { "epoch": 0.19, "grad_norm": 0.0823946804001334, "learning_rate": 0.0009342838363097635, "loss": 1.5018, "step": 1769 }, { "epoch": 0.19, "grad_norm": 0.08063914021381803, "learning_rate": 0.0009341975275123313, "loss": 1.406, "step": 1770 }, { "epoch": 0.19, "grad_norm": 0.08547202214736771, "learning_rate": 0.0009341111660669519, "loss": 1.328, "step": 1771 }, { "epoch": 0.19, "grad_norm": 0.10723639210392595, "learning_rate": 0.0009340247519840969, "loss": 1.5986, "step": 1772 }, { "epoch": 0.19, "grad_norm": 0.08744751839005566, "learning_rate": 0.0009339382852742446, "loss": 1.4943, "step": 1773 }, { "epoch": 0.19, "grad_norm": 0.0947836846245832, "learning_rate": 0.0009338517659478791, "loss": 1.4841, "step": 1774 }, { "epoch": 0.19, "grad_norm": 0.07957752401569529, "learning_rate": 0.0009337651940154914, "loss": 1.415, "step": 1775 }, { "epoch": 0.19, "grad_norm": 0.09486280553870081, "learning_rate": 0.0009336785694875785, "loss": 1.4312, "step": 1776 }, { "epoch": 0.19, "grad_norm": 0.0946131450018491, "learning_rate": 0.0009335918923746438, "loss": 1.4232, "step": 1777 }, { "epoch": 0.19, "grad_norm": 0.0869396458702221, "learning_rate": 0.0009335051626871973, "loss": 1.416, "step": 1778 }, { "epoch": 0.19, "grad_norm": 0.09052244814090689, "learning_rate": 0.0009334183804357555, "loss": 1.4041, "step": 1779 }, { "epoch": 0.19, "grad_norm": 0.11469431378434027, "learning_rate": 0.0009333315456308407, "loss": 1.4579, "step": 1780 }, { "epoch": 0.19, "grad_norm": 0.0906512085860017, "learning_rate": 0.0009332446582829821, "loss": 1.5208, "step": 1781 }, { "epoch": 0.19, "grad_norm": 0.09416622905854574, "learning_rate": 0.0009331577184027149, "loss": 1.3891, "step": 1782 }, { "epoch": 0.19, "grad_norm": 0.09651543652014465, "learning_rate": 0.0009330707260005813, "loss": 1.6078, "step": 1783 }, { "epoch": 0.19, "grad_norm": 0.09579404336719885, "learning_rate": 0.000932983681087129, "loss": 1.52, "step": 1784 }, { "epoch": 0.19, "grad_norm": 0.09205179631972425, "learning_rate": 0.0009328965836729127, "loss": 1.4295, "step": 1785 }, { "epoch": 0.19, "grad_norm": 0.10099213256917071, "learning_rate": 0.0009328094337684932, "loss": 1.5516, "step": 1786 }, { "epoch": 0.19, "grad_norm": 0.08243984686364722, "learning_rate": 0.0009327222313844376, "loss": 1.3988, "step": 1787 }, { "epoch": 0.19, "grad_norm": 0.10671922892355516, "learning_rate": 0.0009326349765313199, "loss": 1.3028, "step": 1788 }, { "epoch": 0.19, "grad_norm": 0.10306132239894458, "learning_rate": 0.0009325476692197197, "loss": 1.4138, "step": 1789 }, { "epoch": 0.19, "grad_norm": 0.09456390248444875, "learning_rate": 0.0009324603094602232, "loss": 1.5466, "step": 1790 }, { "epoch": 0.19, "grad_norm": 0.08567679617373725, "learning_rate": 0.0009323728972634234, "loss": 1.4125, "step": 1791 }, { "epoch": 0.19, "grad_norm": 0.0866261102864471, "learning_rate": 0.0009322854326399192, "loss": 1.3749, "step": 1792 }, { "epoch": 0.19, "grad_norm": 0.10008267779048576, "learning_rate": 0.000932197915600316, "loss": 1.4912, "step": 1793 }, { "epoch": 0.19, "grad_norm": 0.09628776139314607, "learning_rate": 0.0009321103461552254, "loss": 1.5609, "step": 1794 }, { "epoch": 0.19, "grad_norm": 0.09887887458347153, "learning_rate": 0.0009320227243152657, "loss": 1.5033, "step": 1795 }, { "epoch": 0.19, "grad_norm": 0.08765493554091913, "learning_rate": 0.0009319350500910612, "loss": 1.4827, "step": 1796 }, { "epoch": 0.19, "grad_norm": 0.09325239873922055, "learning_rate": 0.0009318473234932428, "loss": 1.3376, "step": 1797 }, { "epoch": 0.19, "grad_norm": 0.08425381736541987, "learning_rate": 0.0009317595445324476, "loss": 1.4944, "step": 1798 }, { "epoch": 0.19, "grad_norm": 0.0966079305940004, "learning_rate": 0.0009316717132193192, "loss": 1.5316, "step": 1799 }, { "epoch": 0.19, "grad_norm": 0.08757180961711196, "learning_rate": 0.0009315838295645074, "loss": 1.4882, "step": 1800 }, { "epoch": 0.19, "grad_norm": 0.08662799707706155, "learning_rate": 0.0009314958935786683, "loss": 1.46, "step": 1801 }, { "epoch": 0.19, "grad_norm": 0.08311178903942941, "learning_rate": 0.0009314079052724644, "loss": 1.574, "step": 1802 }, { "epoch": 0.19, "grad_norm": 0.09580809932720677, "learning_rate": 0.0009313198646565648, "loss": 1.4954, "step": 1803 }, { "epoch": 0.19, "grad_norm": 0.09515422211680134, "learning_rate": 0.0009312317717416448, "loss": 1.4624, "step": 1804 }, { "epoch": 0.19, "grad_norm": 0.09528198931281466, "learning_rate": 0.0009311436265383856, "loss": 1.5618, "step": 1805 }, { "epoch": 0.19, "grad_norm": 0.1036689450583713, "learning_rate": 0.0009310554290574753, "loss": 1.4556, "step": 1806 }, { "epoch": 0.19, "grad_norm": 0.08480419301117718, "learning_rate": 0.0009309671793096082, "loss": 1.4717, "step": 1807 }, { "epoch": 0.19, "grad_norm": 0.08133107911744367, "learning_rate": 0.0009308788773054848, "loss": 1.3288, "step": 1808 }, { "epoch": 0.19, "grad_norm": 0.08016797148504663, "learning_rate": 0.0009307905230558121, "loss": 1.5397, "step": 1809 }, { "epoch": 0.19, "grad_norm": 0.0846677701501156, "learning_rate": 0.0009307021165713033, "loss": 1.4324, "step": 1810 }, { "epoch": 0.19, "grad_norm": 0.07570819301145869, "learning_rate": 0.000930613657862678, "loss": 1.5013, "step": 1811 }, { "epoch": 0.19, "grad_norm": 0.09220020986485801, "learning_rate": 0.0009305251469406622, "loss": 1.3811, "step": 1812 }, { "epoch": 0.19, "grad_norm": 0.08711057852439665, "learning_rate": 0.000930436583815988, "loss": 1.5304, "step": 1813 }, { "epoch": 0.2, "grad_norm": 0.0801584114606825, "learning_rate": 0.0009303479684993942, "loss": 1.4914, "step": 1814 }, { "epoch": 0.2, "grad_norm": 0.08411107652644935, "learning_rate": 0.0009302593010016254, "loss": 1.309, "step": 1815 }, { "epoch": 0.2, "grad_norm": 0.08740716260274044, "learning_rate": 0.0009301705813334331, "loss": 1.4615, "step": 1816 }, { "epoch": 0.2, "grad_norm": 0.08826543969180319, "learning_rate": 0.000930081809505575, "loss": 1.3672, "step": 1817 }, { "epoch": 0.2, "grad_norm": 0.08663114298057124, "learning_rate": 0.0009299929855288145, "loss": 1.3388, "step": 1818 }, { "epoch": 0.2, "grad_norm": 0.09701128888327752, "learning_rate": 0.0009299041094139222, "loss": 1.5865, "step": 1819 }, { "epoch": 0.2, "grad_norm": 0.07635149578067951, "learning_rate": 0.0009298151811716744, "loss": 1.5124, "step": 1820 }, { "epoch": 0.2, "grad_norm": 0.08728723564923248, "learning_rate": 0.0009297262008128543, "loss": 1.5027, "step": 1821 }, { "epoch": 0.2, "grad_norm": 0.11155604154795036, "learning_rate": 0.0009296371683482508, "loss": 1.3568, "step": 1822 }, { "epoch": 0.2, "grad_norm": 0.07719213570012251, "learning_rate": 0.0009295480837886594, "loss": 1.4425, "step": 1823 }, { "epoch": 0.2, "grad_norm": 0.0911625047888657, "learning_rate": 0.0009294589471448819, "loss": 1.4705, "step": 1824 }, { "epoch": 0.2, "grad_norm": 0.09613111465408736, "learning_rate": 0.0009293697584277265, "loss": 1.4979, "step": 1825 }, { "epoch": 0.2, "grad_norm": 0.08512139925092915, "learning_rate": 0.0009292805176480077, "loss": 1.3785, "step": 1826 }, { "epoch": 0.2, "grad_norm": 0.09262471827262757, "learning_rate": 0.0009291912248165461, "loss": 1.4475, "step": 1827 }, { "epoch": 0.2, "grad_norm": 0.09367459711998498, "learning_rate": 0.0009291018799441691, "loss": 1.4571, "step": 1828 }, { "epoch": 0.2, "grad_norm": 0.09291744975591652, "learning_rate": 0.0009290124830417096, "loss": 1.6161, "step": 1829 }, { "epoch": 0.2, "grad_norm": 0.091514229624702, "learning_rate": 0.0009289230341200075, "loss": 1.4075, "step": 1830 }, { "epoch": 0.2, "grad_norm": 0.09264404283002275, "learning_rate": 0.0009288335331899088, "loss": 1.4796, "step": 1831 }, { "epoch": 0.2, "grad_norm": 0.09373072460820865, "learning_rate": 0.0009287439802622659, "loss": 1.4221, "step": 1832 }, { "epoch": 0.2, "grad_norm": 0.09399036796231679, "learning_rate": 0.0009286543753479372, "loss": 1.3825, "step": 1833 }, { "epoch": 0.2, "grad_norm": 0.08289694956094026, "learning_rate": 0.0009285647184577877, "loss": 1.477, "step": 1834 }, { "epoch": 0.2, "grad_norm": 0.08546486711268557, "learning_rate": 0.0009284750096026887, "loss": 1.3694, "step": 1835 }, { "epoch": 0.2, "grad_norm": 0.09047798016590947, "learning_rate": 0.0009283852487935174, "loss": 1.4847, "step": 1836 }, { "epoch": 0.2, "grad_norm": 0.10009191077886122, "learning_rate": 0.0009282954360411577, "loss": 1.4234, "step": 1837 }, { "epoch": 0.2, "grad_norm": 0.0841123623143838, "learning_rate": 0.0009282055713565001, "loss": 1.5119, "step": 1838 }, { "epoch": 0.2, "grad_norm": 0.0915824218936105, "learning_rate": 0.0009281156547504408, "loss": 1.4167, "step": 1839 }, { "epoch": 0.2, "grad_norm": 0.08648630759897831, "learning_rate": 0.0009280256862338822, "loss": 1.3795, "step": 1840 }, { "epoch": 0.2, "grad_norm": 0.08526753456498552, "learning_rate": 0.0009279356658177336, "loss": 1.3847, "step": 1841 }, { "epoch": 0.2, "grad_norm": 0.09626763852163273, "learning_rate": 0.0009278455935129102, "loss": 1.5616, "step": 1842 }, { "epoch": 0.2, "grad_norm": 0.08816486594899263, "learning_rate": 0.0009277554693303337, "loss": 1.53, "step": 1843 }, { "epoch": 0.2, "grad_norm": 0.09226706367835856, "learning_rate": 0.0009276652932809315, "loss": 1.4491, "step": 1844 }, { "epoch": 0.2, "grad_norm": 0.10737839815995892, "learning_rate": 0.0009275750653756384, "loss": 1.3005, "step": 1845 }, { "epoch": 0.2, "grad_norm": 0.0827159786371698, "learning_rate": 0.0009274847856253945, "loss": 1.3935, "step": 1846 }, { "epoch": 0.2, "grad_norm": 0.08930362176980898, "learning_rate": 0.0009273944540411465, "loss": 1.4578, "step": 1847 }, { "epoch": 0.2, "grad_norm": 0.08293126307134922, "learning_rate": 0.0009273040706338476, "loss": 1.4298, "step": 1848 }, { "epoch": 0.2, "grad_norm": 0.09167982308662573, "learning_rate": 0.0009272136354144569, "loss": 1.4165, "step": 1849 }, { "epoch": 0.2, "grad_norm": 0.08142752813431287, "learning_rate": 0.0009271231483939402, "loss": 1.413, "step": 1850 }, { "epoch": 0.2, "grad_norm": 0.08088682067099412, "learning_rate": 0.0009270326095832691, "loss": 1.5194, "step": 1851 }, { "epoch": 0.2, "grad_norm": 0.080201882006416, "learning_rate": 0.0009269420189934219, "loss": 1.5184, "step": 1852 }, { "epoch": 0.2, "grad_norm": 0.08617551581904413, "learning_rate": 0.000926851376635383, "loss": 1.3672, "step": 1853 }, { "epoch": 0.2, "grad_norm": 0.08943700327583678, "learning_rate": 0.0009267606825201433, "loss": 1.4141, "step": 1854 }, { "epoch": 0.2, "grad_norm": 0.0788005263395767, "learning_rate": 0.0009266699366586993, "loss": 1.4555, "step": 1855 }, { "epoch": 0.2, "grad_norm": 0.08619240998799432, "learning_rate": 0.0009265791390620546, "loss": 1.4135, "step": 1856 }, { "epoch": 0.2, "grad_norm": 0.0801316495282694, "learning_rate": 0.0009264882897412188, "loss": 1.4703, "step": 1857 }, { "epoch": 0.2, "grad_norm": 0.08894081393129301, "learning_rate": 0.0009263973887072074, "loss": 1.5404, "step": 1858 }, { "epoch": 0.2, "grad_norm": 0.08415426907162747, "learning_rate": 0.0009263064359710427, "loss": 1.3856, "step": 1859 }, { "epoch": 0.2, "grad_norm": 0.08494026519042301, "learning_rate": 0.0009262154315437528, "loss": 1.3861, "step": 1860 }, { "epoch": 0.2, "grad_norm": 0.08459923609704402, "learning_rate": 0.0009261243754363725, "loss": 1.4656, "step": 1861 }, { "epoch": 0.2, "grad_norm": 0.08646099536983778, "learning_rate": 0.0009260332676599425, "loss": 1.4025, "step": 1862 }, { "epoch": 0.2, "grad_norm": 0.0833393446485166, "learning_rate": 0.0009259421082255103, "loss": 1.313, "step": 1863 }, { "epoch": 0.2, "grad_norm": 0.10105293160333415, "learning_rate": 0.0009258508971441288, "loss": 1.4142, "step": 1864 }, { "epoch": 0.2, "grad_norm": 0.07753588468155745, "learning_rate": 0.0009257596344268579, "loss": 1.4151, "step": 1865 }, { "epoch": 0.2, "grad_norm": 0.07843557406004462, "learning_rate": 0.0009256683200847637, "loss": 1.5059, "step": 1866 }, { "epoch": 0.2, "grad_norm": 0.09283600002740316, "learning_rate": 0.000925576954128918, "loss": 1.4546, "step": 1867 }, { "epoch": 0.2, "grad_norm": 0.08221451122832747, "learning_rate": 0.0009254855365703995, "loss": 1.4822, "step": 1868 }, { "epoch": 0.2, "grad_norm": 0.09696442404894645, "learning_rate": 0.0009253940674202929, "loss": 1.5163, "step": 1869 }, { "epoch": 0.2, "grad_norm": 0.09377079226474856, "learning_rate": 0.000925302546689689, "loss": 1.5331, "step": 1870 }, { "epoch": 0.2, "grad_norm": 0.08495042450853789, "learning_rate": 0.000925210974389685, "loss": 1.4877, "step": 1871 }, { "epoch": 0.2, "grad_norm": 0.09270980057823298, "learning_rate": 0.0009251193505313844, "loss": 1.4065, "step": 1872 }, { "epoch": 0.2, "grad_norm": 0.10013493564774836, "learning_rate": 0.0009250276751258972, "loss": 1.3734, "step": 1873 }, { "epoch": 0.2, "grad_norm": 0.08111322828236464, "learning_rate": 0.0009249359481843389, "loss": 1.4621, "step": 1874 }, { "epoch": 0.2, "grad_norm": 0.095044532703512, "learning_rate": 0.0009248441697178318, "loss": 1.3761, "step": 1875 }, { "epoch": 0.2, "grad_norm": 0.09643327975927989, "learning_rate": 0.0009247523397375047, "loss": 1.5059, "step": 1876 }, { "epoch": 0.2, "grad_norm": 0.09213762752747705, "learning_rate": 0.000924660458254492, "loss": 1.4399, "step": 1877 }, { "epoch": 0.2, "grad_norm": 0.096760198515979, "learning_rate": 0.0009245685252799345, "loss": 1.4854, "step": 1878 }, { "epoch": 0.2, "grad_norm": 0.08833643778084667, "learning_rate": 0.0009244765408249798, "loss": 1.3744, "step": 1879 }, { "epoch": 0.2, "grad_norm": 0.10338132621574833, "learning_rate": 0.0009243845049007811, "loss": 1.6115, "step": 1880 }, { "epoch": 0.2, "grad_norm": 0.09608860137761568, "learning_rate": 0.0009242924175184981, "loss": 1.4643, "step": 1881 }, { "epoch": 0.2, "grad_norm": 0.09499464003051669, "learning_rate": 0.0009242002786892967, "loss": 1.4931, "step": 1882 }, { "epoch": 0.2, "grad_norm": 0.10553838276741273, "learning_rate": 0.0009241080884243491, "loss": 1.4613, "step": 1883 }, { "epoch": 0.2, "grad_norm": 0.08971818800379484, "learning_rate": 0.0009240158467348337, "loss": 1.3961, "step": 1884 }, { "epoch": 0.2, "grad_norm": 0.09005172515294688, "learning_rate": 0.000923923553631935, "loss": 1.3789, "step": 1885 }, { "epoch": 0.2, "grad_norm": 0.09165971227928925, "learning_rate": 0.000923831209126844, "loss": 1.5329, "step": 1886 }, { "epoch": 0.2, "grad_norm": 0.08771485926445989, "learning_rate": 0.0009237388132307576, "loss": 1.3577, "step": 1887 }, { "epoch": 0.2, "grad_norm": 0.10489309578393058, "learning_rate": 0.0009236463659548793, "loss": 1.4723, "step": 1888 }, { "epoch": 0.2, "grad_norm": 0.08272777189492383, "learning_rate": 0.0009235538673104187, "loss": 1.3486, "step": 1889 }, { "epoch": 0.2, "grad_norm": 0.08734761554544256, "learning_rate": 0.0009234613173085913, "loss": 1.3613, "step": 1890 }, { "epoch": 0.2, "grad_norm": 0.0913984053355163, "learning_rate": 0.0009233687159606194, "loss": 1.3321, "step": 1891 }, { "epoch": 0.2, "grad_norm": 0.08114508390947732, "learning_rate": 0.000923276063277731, "loss": 1.5438, "step": 1892 }, { "epoch": 0.2, "grad_norm": 0.08283596903978362, "learning_rate": 0.0009231833592711609, "loss": 1.5123, "step": 1893 }, { "epoch": 0.2, "grad_norm": 0.08104943206708809, "learning_rate": 0.0009230906039521494, "loss": 1.4906, "step": 1894 }, { "epoch": 0.2, "grad_norm": 0.0813732064731777, "learning_rate": 0.0009229977973319436, "loss": 1.5399, "step": 1895 }, { "epoch": 0.2, "grad_norm": 0.07189004509962865, "learning_rate": 0.0009229049394217965, "loss": 1.4665, "step": 1896 }, { "epoch": 0.2, "grad_norm": 0.1086799985754889, "learning_rate": 0.0009228120302329677, "loss": 1.3363, "step": 1897 }, { "epoch": 0.2, "grad_norm": 0.08013314880463421, "learning_rate": 0.0009227190697767224, "loss": 1.5356, "step": 1898 }, { "epoch": 0.2, "grad_norm": 0.08018669322491434, "learning_rate": 0.0009226260580643326, "loss": 1.5013, "step": 1899 }, { "epoch": 0.2, "grad_norm": 0.09553634333447207, "learning_rate": 0.0009225329951070763, "loss": 1.4245, "step": 1900 }, { "epoch": 0.2, "grad_norm": 0.09467093564785904, "learning_rate": 0.0009224398809162376, "loss": 1.4783, "step": 1901 }, { "epoch": 0.2, "grad_norm": 0.08422828895427777, "learning_rate": 0.0009223467155031068, "loss": 1.4271, "step": 1902 }, { "epoch": 0.2, "grad_norm": 0.0876539354690251, "learning_rate": 0.000922253498878981, "loss": 1.2882, "step": 1903 }, { "epoch": 0.2, "grad_norm": 0.09326919056385669, "learning_rate": 0.0009221602310551625, "loss": 1.4323, "step": 1904 }, { "epoch": 0.2, "grad_norm": 0.093287013902574, "learning_rate": 0.0009220669120429608, "loss": 1.476, "step": 1905 }, { "epoch": 0.2, "grad_norm": 0.0839791046751122, "learning_rate": 0.0009219735418536908, "loss": 1.5382, "step": 1906 }, { "epoch": 0.2, "grad_norm": 0.0969289999285875, "learning_rate": 0.000921880120498674, "loss": 1.4541, "step": 1907 }, { "epoch": 0.21, "grad_norm": 0.10043740648290224, "learning_rate": 0.0009217866479892383, "loss": 1.5245, "step": 1908 }, { "epoch": 0.21, "grad_norm": 0.09340595530802943, "learning_rate": 0.0009216931243367173, "loss": 1.4604, "step": 1909 }, { "epoch": 0.21, "grad_norm": 0.09369494142636196, "learning_rate": 0.0009215995495524512, "loss": 1.4864, "step": 1910 }, { "epoch": 0.21, "grad_norm": 0.09596598542059517, "learning_rate": 0.0009215059236477863, "loss": 1.3945, "step": 1911 }, { "epoch": 0.21, "grad_norm": 0.09961180356748962, "learning_rate": 0.000921412246634075, "loss": 1.4319, "step": 1912 }, { "epoch": 0.21, "grad_norm": 0.09037393165223366, "learning_rate": 0.000921318518522676, "loss": 1.5074, "step": 1913 }, { "epoch": 0.21, "grad_norm": 0.08625277482368682, "learning_rate": 0.000921224739324954, "loss": 1.3883, "step": 1914 }, { "epoch": 0.21, "grad_norm": 0.09714422223326019, "learning_rate": 0.0009211309090522802, "loss": 1.5628, "step": 1915 }, { "epoch": 0.21, "grad_norm": 0.0909470499364441, "learning_rate": 0.0009210370277160319, "loss": 1.5049, "step": 1916 }, { "epoch": 0.21, "grad_norm": 0.09368988434281326, "learning_rate": 0.0009209430953275923, "loss": 1.5492, "step": 1917 }, { "epoch": 0.21, "grad_norm": 0.09939051407296741, "learning_rate": 0.0009208491118983514, "loss": 1.4222, "step": 1918 }, { "epoch": 0.21, "grad_norm": 0.11093468746461634, "learning_rate": 0.0009207550774397047, "loss": 1.5089, "step": 1919 }, { "epoch": 0.21, "grad_norm": 0.09192166444725798, "learning_rate": 0.0009206609919630542, "loss": 1.4475, "step": 1920 }, { "epoch": 0.21, "grad_norm": 0.08413223070653192, "learning_rate": 0.0009205668554798084, "loss": 1.3386, "step": 1921 }, { "epoch": 0.21, "grad_norm": 0.08125748886386505, "learning_rate": 0.0009204726680013813, "loss": 1.3634, "step": 1922 }, { "epoch": 0.21, "grad_norm": 0.08620903076980944, "learning_rate": 0.0009203784295391937, "loss": 1.401, "step": 1923 }, { "epoch": 0.21, "grad_norm": 0.10407022815151389, "learning_rate": 0.0009202841401046722, "loss": 1.4641, "step": 1924 }, { "epoch": 0.21, "grad_norm": 0.09106470624464104, "learning_rate": 0.0009201897997092496, "loss": 1.3364, "step": 1925 }, { "epoch": 0.21, "grad_norm": 0.0793647200463425, "learning_rate": 0.0009200954083643654, "loss": 1.3409, "step": 1926 }, { "epoch": 0.21, "grad_norm": 0.11996672867870423, "learning_rate": 0.0009200009660814645, "loss": 1.4471, "step": 1927 }, { "epoch": 0.21, "grad_norm": 0.07598152126075328, "learning_rate": 0.0009199064728719988, "loss": 1.3633, "step": 1928 }, { "epoch": 0.21, "grad_norm": 0.09030240320487572, "learning_rate": 0.0009198119287474254, "loss": 1.4453, "step": 1929 }, { "epoch": 0.21, "grad_norm": 0.09808317088255411, "learning_rate": 0.0009197173337192082, "loss": 1.4955, "step": 1930 }, { "epoch": 0.21, "grad_norm": 0.09680838880881662, "learning_rate": 0.0009196226877988174, "loss": 1.5606, "step": 1931 }, { "epoch": 0.21, "grad_norm": 0.08010659787429597, "learning_rate": 0.0009195279909977293, "loss": 1.3131, "step": 1932 }, { "epoch": 0.21, "grad_norm": 0.09152502912300714, "learning_rate": 0.0009194332433274256, "loss": 1.5883, "step": 1933 }, { "epoch": 0.21, "grad_norm": 0.09644413439022238, "learning_rate": 0.0009193384447993954, "loss": 1.4096, "step": 1934 }, { "epoch": 0.21, "grad_norm": 0.07964388791093453, "learning_rate": 0.0009192435954251328, "loss": 1.3457, "step": 1935 }, { "epoch": 0.21, "grad_norm": 0.08985149308714338, "learning_rate": 0.0009191486952161392, "loss": 1.4786, "step": 1936 }, { "epoch": 0.21, "grad_norm": 0.08504235679832424, "learning_rate": 0.0009190537441839211, "loss": 1.5413, "step": 1937 }, { "epoch": 0.21, "grad_norm": 0.08693547735099826, "learning_rate": 0.0009189587423399919, "loss": 1.3525, "step": 1938 }, { "epoch": 0.21, "grad_norm": 0.08671648762518341, "learning_rate": 0.0009188636896958707, "loss": 1.5021, "step": 1939 }, { "epoch": 0.21, "grad_norm": 0.0905281579702194, "learning_rate": 0.0009187685862630833, "loss": 1.4043, "step": 1940 }, { "epoch": 0.21, "grad_norm": 0.08064542998545053, "learning_rate": 0.0009186734320531609, "loss": 1.4366, "step": 1941 }, { "epoch": 0.21, "grad_norm": 0.08168312123459194, "learning_rate": 0.0009185782270776416, "loss": 1.433, "step": 1942 }, { "epoch": 0.21, "grad_norm": 0.07806262864437452, "learning_rate": 0.0009184829713480691, "loss": 1.4421, "step": 1943 }, { "epoch": 0.21, "grad_norm": 0.09621596143974015, "learning_rate": 0.0009183876648759937, "loss": 1.4011, "step": 1944 }, { "epoch": 0.21, "grad_norm": 0.08310495330480361, "learning_rate": 0.0009182923076729714, "loss": 1.4856, "step": 1945 }, { "epoch": 0.21, "grad_norm": 0.09214450573103178, "learning_rate": 0.0009181968997505649, "loss": 1.4795, "step": 1946 }, { "epoch": 0.21, "grad_norm": 0.09309818499806359, "learning_rate": 0.0009181014411203425, "loss": 1.4156, "step": 1947 }, { "epoch": 0.21, "grad_norm": 0.09404188405062014, "learning_rate": 0.0009180059317938789, "loss": 1.4638, "step": 1948 }, { "epoch": 0.21, "grad_norm": 0.08127932701261531, "learning_rate": 0.000917910371782755, "loss": 1.4105, "step": 1949 }, { "epoch": 0.21, "grad_norm": 0.06988438601558783, "learning_rate": 0.0009178147610985577, "loss": 1.4714, "step": 1950 }, { "epoch": 0.21, "grad_norm": 0.08197023756718551, "learning_rate": 0.0009177190997528803, "loss": 1.4166, "step": 1951 }, { "epoch": 0.21, "grad_norm": 0.08041723486575238, "learning_rate": 0.0009176233877573219, "loss": 1.4853, "step": 1952 }, { "epoch": 0.21, "grad_norm": 0.08324063551372075, "learning_rate": 0.000917527625123488, "loss": 1.3482, "step": 1953 }, { "epoch": 0.21, "grad_norm": 0.08663822722836335, "learning_rate": 0.00091743181186299, "loss": 1.4284, "step": 1954 }, { "epoch": 0.21, "grad_norm": 0.08043853843069798, "learning_rate": 0.000917335947987446, "loss": 1.5212, "step": 1955 }, { "epoch": 0.21, "grad_norm": 0.09342537247056798, "learning_rate": 0.0009172400335084792, "loss": 1.5348, "step": 1956 }, { "epoch": 0.21, "grad_norm": 0.08823417963705833, "learning_rate": 0.0009171440684377202, "loss": 1.4782, "step": 1957 }, { "epoch": 0.21, "grad_norm": 0.08640741433278323, "learning_rate": 0.0009170480527868045, "loss": 1.3428, "step": 1958 }, { "epoch": 0.21, "grad_norm": 0.08641452096333434, "learning_rate": 0.0009169519865673747, "loss": 1.4254, "step": 1959 }, { "epoch": 0.21, "grad_norm": 0.09594850199633233, "learning_rate": 0.0009168558697910792, "loss": 1.493, "step": 1960 }, { "epoch": 0.21, "grad_norm": 0.0810561582133138, "learning_rate": 0.0009167597024695722, "loss": 1.3539, "step": 1961 }, { "epoch": 0.21, "grad_norm": 0.07901539053041998, "learning_rate": 0.0009166634846145145, "loss": 1.4775, "step": 1962 }, { "epoch": 0.21, "grad_norm": 0.09014507355204333, "learning_rate": 0.000916567216237573, "loss": 1.5478, "step": 1963 }, { "epoch": 0.21, "grad_norm": 0.08634547230819684, "learning_rate": 0.0009164708973504204, "loss": 1.3436, "step": 1964 }, { "epoch": 0.21, "grad_norm": 0.09372835977458653, "learning_rate": 0.0009163745279647355, "loss": 1.4296, "step": 1965 }, { "epoch": 0.21, "grad_norm": 0.08489228357702569, "learning_rate": 0.0009162781080922038, "loss": 1.5168, "step": 1966 }, { "epoch": 0.21, "grad_norm": 0.08701353461899991, "learning_rate": 0.0009161816377445162, "loss": 1.5228, "step": 1967 }, { "epoch": 0.21, "grad_norm": 0.08607045368609934, "learning_rate": 0.0009160851169333704, "loss": 1.5066, "step": 1968 }, { "epoch": 0.21, "grad_norm": 0.07958188338929062, "learning_rate": 0.0009159885456704695, "loss": 1.3882, "step": 1969 }, { "epoch": 0.21, "grad_norm": 0.09109091693797759, "learning_rate": 0.0009158919239675235, "loss": 1.4687, "step": 1970 }, { "epoch": 0.21, "grad_norm": 0.08095901944672143, "learning_rate": 0.0009157952518362478, "loss": 1.4857, "step": 1971 }, { "epoch": 0.21, "grad_norm": 0.0809430968575337, "learning_rate": 0.0009156985292883645, "loss": 1.4591, "step": 1972 }, { "epoch": 0.21, "grad_norm": 0.09504100751905215, "learning_rate": 0.0009156017563356013, "loss": 1.4128, "step": 1973 }, { "epoch": 0.21, "grad_norm": 0.09268525098327988, "learning_rate": 0.0009155049329896923, "loss": 1.4546, "step": 1974 }, { "epoch": 0.21, "grad_norm": 0.09645365774327952, "learning_rate": 0.0009154080592623777, "loss": 1.3952, "step": 1975 }, { "epoch": 0.21, "grad_norm": 0.08312108781757994, "learning_rate": 0.000915311135165404, "loss": 1.3924, "step": 1976 }, { "epoch": 0.21, "grad_norm": 0.08589342731975778, "learning_rate": 0.0009152141607105231, "loss": 1.4111, "step": 1977 }, { "epoch": 0.21, "grad_norm": 0.08478583867251643, "learning_rate": 0.0009151171359094939, "loss": 1.4493, "step": 1978 }, { "epoch": 0.21, "grad_norm": 0.08228102758097187, "learning_rate": 0.0009150200607740809, "loss": 1.4154, "step": 1979 }, { "epoch": 0.21, "grad_norm": 0.08876973483667644, "learning_rate": 0.0009149229353160545, "loss": 1.4799, "step": 1980 }, { "epoch": 0.21, "grad_norm": 0.08620876609407999, "learning_rate": 0.0009148257595471919, "loss": 1.4748, "step": 1981 }, { "epoch": 0.21, "grad_norm": 0.08926651388447245, "learning_rate": 0.0009147285334792759, "loss": 1.5433, "step": 1982 }, { "epoch": 0.21, "grad_norm": 0.08686773533514985, "learning_rate": 0.0009146312571240953, "loss": 1.4099, "step": 1983 }, { "epoch": 0.21, "grad_norm": 0.08120790615360068, "learning_rate": 0.0009145339304934453, "loss": 1.5089, "step": 1984 }, { "epoch": 0.21, "grad_norm": 0.08751756915323748, "learning_rate": 0.0009144365535991273, "loss": 1.5103, "step": 1985 }, { "epoch": 0.21, "grad_norm": 0.08258101469539926, "learning_rate": 0.0009143391264529482, "loss": 1.4118, "step": 1986 }, { "epoch": 0.21, "grad_norm": 0.08068566310207773, "learning_rate": 0.0009142416490667217, "loss": 1.4599, "step": 1987 }, { "epoch": 0.21, "grad_norm": 0.09637149301141386, "learning_rate": 0.000914144121452267, "loss": 1.5734, "step": 1988 }, { "epoch": 0.21, "grad_norm": 0.08287725138258284, "learning_rate": 0.0009140465436214099, "loss": 1.5485, "step": 1989 }, { "epoch": 0.21, "grad_norm": 0.09651842539644462, "learning_rate": 0.000913948915585982, "loss": 1.4493, "step": 1990 }, { "epoch": 0.21, "grad_norm": 0.08806081162924627, "learning_rate": 0.0009138512373578209, "loss": 1.4323, "step": 1991 }, { "epoch": 0.21, "grad_norm": 0.07898507253280361, "learning_rate": 0.0009137535089487705, "loss": 1.4421, "step": 1992 }, { "epoch": 0.21, "grad_norm": 0.08991267965323817, "learning_rate": 0.0009136557303706808, "loss": 1.4571, "step": 1993 }, { "epoch": 0.21, "grad_norm": 0.08687067556252696, "learning_rate": 0.0009135579016354077, "loss": 1.5119, "step": 1994 }, { "epoch": 0.21, "grad_norm": 0.09059313463610325, "learning_rate": 0.0009134600227548132, "loss": 1.3702, "step": 1995 }, { "epoch": 0.21, "grad_norm": 0.08454581493932053, "learning_rate": 0.0009133620937407656, "loss": 1.3382, "step": 1996 }, { "epoch": 0.21, "grad_norm": 0.0915977469098984, "learning_rate": 0.0009132641146051391, "loss": 1.4972, "step": 1997 }, { "epoch": 0.21, "grad_norm": 0.07616506753971135, "learning_rate": 0.0009131660853598138, "loss": 1.4425, "step": 1998 }, { "epoch": 0.21, "grad_norm": 0.0853799975536159, "learning_rate": 0.0009130680060166763, "loss": 1.4682, "step": 1999 }, { "epoch": 0.21, "grad_norm": 0.08997371980517725, "learning_rate": 0.0009129698765876191, "loss": 1.4905, "step": 2000 }, { "epoch": 0.22, "grad_norm": 0.0893355318664062, "learning_rate": 0.0009128716970845406, "loss": 1.4148, "step": 2001 }, { "epoch": 0.22, "grad_norm": 0.0984766048951835, "learning_rate": 0.0009127734675193454, "loss": 1.4577, "step": 2002 }, { "epoch": 0.22, "grad_norm": 0.08685169232348386, "learning_rate": 0.0009126751879039441, "loss": 1.326, "step": 2003 }, { "epoch": 0.22, "grad_norm": 0.08337716785047515, "learning_rate": 0.0009125768582502539, "loss": 1.457, "step": 2004 }, { "epoch": 0.22, "grad_norm": 0.08013750991226204, "learning_rate": 0.0009124784785701969, "loss": 1.2967, "step": 2005 }, { "epoch": 0.22, "grad_norm": 0.08594967067757286, "learning_rate": 0.0009123800488757026, "loss": 1.5688, "step": 2006 }, { "epoch": 0.22, "grad_norm": 0.0777155592804685, "learning_rate": 0.0009122815691787055, "loss": 1.4152, "step": 2007 }, { "epoch": 0.22, "grad_norm": 0.10406910057370224, "learning_rate": 0.000912183039491147, "loss": 1.614, "step": 2008 }, { "epoch": 0.22, "grad_norm": 0.08189508655131475, "learning_rate": 0.0009120844598249737, "loss": 1.447, "step": 2009 }, { "epoch": 0.22, "grad_norm": 0.07712372854960725, "learning_rate": 0.0009119858301921391, "loss": 1.3347, "step": 2010 }, { "epoch": 0.22, "grad_norm": 0.08994235194617171, "learning_rate": 0.0009118871506046024, "loss": 1.5152, "step": 2011 }, { "epoch": 0.22, "grad_norm": 0.07797437990098123, "learning_rate": 0.0009117884210743286, "loss": 1.4365, "step": 2012 }, { "epoch": 0.22, "grad_norm": 0.08655098692860116, "learning_rate": 0.0009116896416132889, "loss": 1.4759, "step": 2013 }, { "epoch": 0.22, "grad_norm": 0.07745160520338816, "learning_rate": 0.000911590812233461, "loss": 1.3671, "step": 2014 }, { "epoch": 0.22, "grad_norm": 0.10139052107795801, "learning_rate": 0.0009114919329468282, "loss": 1.4782, "step": 2015 }, { "epoch": 0.22, "grad_norm": 0.09445383935773374, "learning_rate": 0.00091139300376538, "loss": 1.5765, "step": 2016 }, { "epoch": 0.22, "grad_norm": 0.0893852361489357, "learning_rate": 0.0009112940247011116, "loss": 1.4059, "step": 2017 }, { "epoch": 0.22, "grad_norm": 0.08429572837560623, "learning_rate": 0.0009111949957660248, "loss": 1.3523, "step": 2018 }, { "epoch": 0.22, "grad_norm": 0.08670828198710072, "learning_rate": 0.0009110959169721271, "loss": 1.3836, "step": 2019 }, { "epoch": 0.22, "grad_norm": 0.09593977547060985, "learning_rate": 0.0009109967883314323, "loss": 1.499, "step": 2020 }, { "epoch": 0.22, "grad_norm": 0.0893023259917977, "learning_rate": 0.00091089760985596, "loss": 1.4865, "step": 2021 }, { "epoch": 0.22, "grad_norm": 0.08920793652767317, "learning_rate": 0.0009107983815577359, "loss": 1.5379, "step": 2022 }, { "epoch": 0.22, "grad_norm": 0.10048196567007192, "learning_rate": 0.0009106991034487917, "loss": 1.5585, "step": 2023 }, { "epoch": 0.22, "grad_norm": 0.09363391762575825, "learning_rate": 0.0009105997755411655, "loss": 1.5651, "step": 2024 }, { "epoch": 0.22, "grad_norm": 0.10401149059455012, "learning_rate": 0.0009105003978469009, "loss": 1.4563, "step": 2025 }, { "epoch": 0.22, "grad_norm": 0.08535302742501824, "learning_rate": 0.0009104009703780478, "loss": 1.4701, "step": 2026 }, { "epoch": 0.22, "grad_norm": 0.08868145003485764, "learning_rate": 0.0009103014931466623, "loss": 1.4583, "step": 2027 }, { "epoch": 0.22, "grad_norm": 0.0879030351354358, "learning_rate": 0.000910201966164806, "loss": 1.3511, "step": 2028 }, { "epoch": 0.22, "grad_norm": 0.10253950527640521, "learning_rate": 0.0009101023894445472, "loss": 1.5729, "step": 2029 }, { "epoch": 0.22, "grad_norm": 0.08542785241162401, "learning_rate": 0.0009100027629979599, "loss": 1.4357, "step": 2030 }, { "epoch": 0.22, "grad_norm": 0.08579083004639929, "learning_rate": 0.0009099030868371241, "loss": 1.4863, "step": 2031 }, { "epoch": 0.22, "grad_norm": 0.09758998052839936, "learning_rate": 0.0009098033609741259, "loss": 1.5492, "step": 2032 }, { "epoch": 0.22, "grad_norm": 0.07844186539869633, "learning_rate": 0.0009097035854210573, "loss": 1.5178, "step": 2033 }, { "epoch": 0.22, "grad_norm": 0.08847073531592889, "learning_rate": 0.0009096037601900166, "loss": 1.474, "step": 2034 }, { "epoch": 0.22, "grad_norm": 0.08055118937273592, "learning_rate": 0.0009095038852931077, "loss": 1.3124, "step": 2035 }, { "epoch": 0.22, "grad_norm": 0.08954927397429793, "learning_rate": 0.000909403960742441, "loss": 1.3911, "step": 2036 }, { "epoch": 0.22, "grad_norm": 0.08871609293202781, "learning_rate": 0.0009093039865501327, "loss": 1.4989, "step": 2037 }, { "epoch": 0.22, "grad_norm": 0.08331819035983692, "learning_rate": 0.0009092039627283049, "loss": 1.5124, "step": 2038 }, { "epoch": 0.22, "grad_norm": 0.08617807710536765, "learning_rate": 0.0009091038892890859, "loss": 1.3432, "step": 2039 }, { "epoch": 0.22, "grad_norm": 0.08013091876615193, "learning_rate": 0.0009090037662446099, "loss": 1.4968, "step": 2040 }, { "epoch": 0.22, "grad_norm": 0.08088959403354601, "learning_rate": 0.0009089035936070171, "loss": 1.2544, "step": 2041 }, { "epoch": 0.22, "grad_norm": 0.07511260388633456, "learning_rate": 0.0009088033713884541, "loss": 1.5305, "step": 2042 }, { "epoch": 0.22, "grad_norm": 0.07857443163504174, "learning_rate": 0.0009087030996010728, "loss": 1.4741, "step": 2043 }, { "epoch": 0.22, "grad_norm": 0.08773673280492814, "learning_rate": 0.0009086027782570316, "loss": 1.4905, "step": 2044 }, { "epoch": 0.22, "grad_norm": 0.08617794607094241, "learning_rate": 0.0009085024073684951, "loss": 1.5055, "step": 2045 }, { "epoch": 0.22, "grad_norm": 0.0949855047620933, "learning_rate": 0.0009084019869476332, "loss": 1.5818, "step": 2046 }, { "epoch": 0.22, "grad_norm": 0.09609447475098014, "learning_rate": 0.0009083015170066224, "loss": 1.4573, "step": 2047 }, { "epoch": 0.22, "grad_norm": 0.08109159950480951, "learning_rate": 0.0009082009975576451, "loss": 1.3369, "step": 2048 }, { "epoch": 0.22, "grad_norm": 0.08835922201145058, "learning_rate": 0.0009081004286128895, "loss": 1.3451, "step": 2049 }, { "epoch": 0.22, "grad_norm": 0.08859070142962146, "learning_rate": 0.0009079998101845501, "loss": 1.3428, "step": 2050 }, { "epoch": 0.22, "grad_norm": 0.088189800018709, "learning_rate": 0.000907899142284827, "loss": 1.5434, "step": 2051 }, { "epoch": 0.22, "grad_norm": 0.08644531320243522, "learning_rate": 0.0009077984249259268, "loss": 1.4702, "step": 2052 }, { "epoch": 0.22, "grad_norm": 0.07668741919840479, "learning_rate": 0.0009076976581200615, "loss": 1.3968, "step": 2053 }, { "epoch": 0.22, "grad_norm": 0.09581406401109188, "learning_rate": 0.0009075968418794498, "loss": 1.4724, "step": 2054 }, { "epoch": 0.22, "grad_norm": 0.09246875649499241, "learning_rate": 0.0009074959762163157, "loss": 1.4314, "step": 2055 }, { "epoch": 0.22, "grad_norm": 0.07492645924876302, "learning_rate": 0.0009073950611428897, "loss": 1.2898, "step": 2056 }, { "epoch": 0.22, "grad_norm": 0.06928869437837035, "learning_rate": 0.000907294096671408, "loss": 1.3936, "step": 2057 }, { "epoch": 0.22, "grad_norm": 0.0832862559597914, "learning_rate": 0.0009071930828141128, "loss": 1.4466, "step": 2058 }, { "epoch": 0.22, "grad_norm": 0.08848451000796444, "learning_rate": 0.0009070920195832527, "loss": 1.4536, "step": 2059 }, { "epoch": 0.22, "grad_norm": 0.09506876450203977, "learning_rate": 0.0009069909069910816, "loss": 1.342, "step": 2060 }, { "epoch": 0.22, "grad_norm": 0.08569941811729877, "learning_rate": 0.0009068897450498602, "loss": 1.4359, "step": 2061 }, { "epoch": 0.22, "grad_norm": 0.08488207204085854, "learning_rate": 0.0009067885337718543, "loss": 1.5106, "step": 2062 }, { "epoch": 0.22, "grad_norm": 0.08690685472711807, "learning_rate": 0.0009066872731693361, "loss": 1.3516, "step": 2063 }, { "epoch": 0.22, "grad_norm": 0.08252793754556853, "learning_rate": 0.0009065859632545841, "loss": 1.496, "step": 2064 }, { "epoch": 0.22, "grad_norm": 0.09116501152269689, "learning_rate": 0.0009064846040398822, "loss": 1.3323, "step": 2065 }, { "epoch": 0.22, "grad_norm": 0.08508703996689969, "learning_rate": 0.0009063831955375209, "loss": 1.4011, "step": 2066 }, { "epoch": 0.22, "grad_norm": 0.08245437493180188, "learning_rate": 0.0009062817377597961, "loss": 1.5624, "step": 2067 }, { "epoch": 0.22, "grad_norm": 0.08352745314965869, "learning_rate": 0.0009061802307190098, "loss": 1.4388, "step": 2068 }, { "epoch": 0.22, "grad_norm": 0.08597478680811273, "learning_rate": 0.0009060786744274703, "loss": 1.4461, "step": 2069 }, { "epoch": 0.22, "grad_norm": 0.08312171106043391, "learning_rate": 0.0009059770688974915, "loss": 1.4689, "step": 2070 }, { "epoch": 0.22, "grad_norm": 0.09194463520309476, "learning_rate": 0.0009058754141413935, "loss": 1.4197, "step": 2071 }, { "epoch": 0.22, "grad_norm": 0.08150972185382112, "learning_rate": 0.0009057737101715024, "loss": 1.4696, "step": 2072 }, { "epoch": 0.22, "grad_norm": 0.0853710443666307, "learning_rate": 0.0009056719570001498, "loss": 1.5233, "step": 2073 }, { "epoch": 0.22, "grad_norm": 0.07523008570520828, "learning_rate": 0.000905570154639674, "loss": 1.5505, "step": 2074 }, { "epoch": 0.22, "grad_norm": 0.0847559640472571, "learning_rate": 0.0009054683031024187, "loss": 1.5916, "step": 2075 }, { "epoch": 0.22, "grad_norm": 0.08104363762554731, "learning_rate": 0.0009053664024007337, "loss": 1.4587, "step": 2076 }, { "epoch": 0.22, "grad_norm": 0.07962733245896239, "learning_rate": 0.0009052644525469751, "loss": 1.3431, "step": 2077 }, { "epoch": 0.22, "grad_norm": 0.08353363693288063, "learning_rate": 0.0009051624535535044, "loss": 1.4799, "step": 2078 }, { "epoch": 0.22, "grad_norm": 0.08825166774195803, "learning_rate": 0.0009050604054326893, "loss": 1.3508, "step": 2079 }, { "epoch": 0.22, "grad_norm": 0.07694662079941654, "learning_rate": 0.0009049583081969037, "loss": 1.3975, "step": 2080 }, { "epoch": 0.22, "grad_norm": 0.08547425993073962, "learning_rate": 0.0009048561618585269, "loss": 1.4588, "step": 2081 }, { "epoch": 0.22, "grad_norm": 0.07890146312971011, "learning_rate": 0.000904753966429945, "loss": 1.5806, "step": 2082 }, { "epoch": 0.22, "grad_norm": 0.0895410367058908, "learning_rate": 0.0009046517219235492, "loss": 1.428, "step": 2083 }, { "epoch": 0.22, "grad_norm": 0.07457533928827222, "learning_rate": 0.000904549428351737, "loss": 1.4553, "step": 2084 }, { "epoch": 0.22, "grad_norm": 0.08100017328753376, "learning_rate": 0.0009044470857269121, "loss": 1.3618, "step": 2085 }, { "epoch": 0.22, "grad_norm": 0.07129459326988942, "learning_rate": 0.0009043446940614835, "loss": 1.4317, "step": 2086 }, { "epoch": 0.22, "grad_norm": 0.07500535174736596, "learning_rate": 0.0009042422533678667, "loss": 1.4146, "step": 2087 }, { "epoch": 0.22, "grad_norm": 0.08011378389844677, "learning_rate": 0.0009041397636584831, "loss": 1.3876, "step": 2088 }, { "epoch": 0.22, "grad_norm": 0.07943960209069717, "learning_rate": 0.00090403722494576, "loss": 1.428, "step": 2089 }, { "epoch": 0.22, "grad_norm": 0.07076112195478479, "learning_rate": 0.0009039346372421304, "loss": 1.3577, "step": 2090 }, { "epoch": 0.22, "grad_norm": 0.09256678327668848, "learning_rate": 0.0009038320005600336, "loss": 1.4306, "step": 2091 }, { "epoch": 0.22, "grad_norm": 0.07157860379515876, "learning_rate": 0.0009037293149119144, "loss": 1.4065, "step": 2092 }, { "epoch": 0.22, "grad_norm": 0.09035990774616685, "learning_rate": 0.0009036265803102237, "loss": 1.3061, "step": 2093 }, { "epoch": 0.23, "grad_norm": 0.09038176456247499, "learning_rate": 0.0009035237967674188, "loss": 1.4361, "step": 2094 }, { "epoch": 0.23, "grad_norm": 0.10021661807035723, "learning_rate": 0.0009034209642959624, "loss": 1.3928, "step": 2095 }, { "epoch": 0.23, "grad_norm": 0.08500099881896636, "learning_rate": 0.0009033180829083232, "loss": 1.2741, "step": 2096 }, { "epoch": 0.23, "grad_norm": 0.09099960063293876, "learning_rate": 0.0009032151526169761, "loss": 1.3831, "step": 2097 }, { "epoch": 0.23, "grad_norm": 0.09257103776160713, "learning_rate": 0.0009031121734344016, "loss": 1.3861, "step": 2098 }, { "epoch": 0.23, "grad_norm": 0.09067821153143035, "learning_rate": 0.0009030091453730862, "loss": 1.5433, "step": 2099 }, { "epoch": 0.23, "grad_norm": 0.10061981038222514, "learning_rate": 0.0009029060684455228, "loss": 1.6066, "step": 2100 }, { "epoch": 0.23, "grad_norm": 0.08322155025349774, "learning_rate": 0.0009028029426642095, "loss": 1.44, "step": 2101 }, { "epoch": 0.23, "grad_norm": 0.08632501692592569, "learning_rate": 0.0009026997680416505, "loss": 1.5127, "step": 2102 }, { "epoch": 0.23, "grad_norm": 0.10646380692549054, "learning_rate": 0.0009025965445903565, "loss": 1.5835, "step": 2103 }, { "epoch": 0.23, "grad_norm": 0.0778664557671461, "learning_rate": 0.0009024932723228436, "loss": 1.4187, "step": 2104 }, { "epoch": 0.23, "grad_norm": 0.08218957602622107, "learning_rate": 0.0009023899512516336, "loss": 1.322, "step": 2105 }, { "epoch": 0.23, "grad_norm": 0.068440123958451, "learning_rate": 0.0009022865813892549, "loss": 1.4493, "step": 2106 }, { "epoch": 0.23, "grad_norm": 0.07736229830673369, "learning_rate": 0.0009021831627482413, "loss": 1.3777, "step": 2107 }, { "epoch": 0.23, "grad_norm": 0.07927886201598712, "learning_rate": 0.0009020796953411327, "loss": 1.4454, "step": 2108 }, { "epoch": 0.23, "grad_norm": 0.07994526360761076, "learning_rate": 0.0009019761791804748, "loss": 1.3731, "step": 2109 }, { "epoch": 0.23, "grad_norm": 0.07833679220856182, "learning_rate": 0.0009018726142788194, "loss": 1.3188, "step": 2110 }, { "epoch": 0.23, "grad_norm": 0.07289754206616753, "learning_rate": 0.000901769000648724, "loss": 1.4212, "step": 2111 }, { "epoch": 0.23, "grad_norm": 0.07449956522325085, "learning_rate": 0.0009016653383027522, "loss": 1.4958, "step": 2112 }, { "epoch": 0.23, "grad_norm": 0.07840040658702752, "learning_rate": 0.0009015616272534734, "loss": 1.3861, "step": 2113 }, { "epoch": 0.23, "grad_norm": 0.07392483160096817, "learning_rate": 0.0009014578675134628, "loss": 1.4716, "step": 2114 }, { "epoch": 0.23, "grad_norm": 0.06578630614556928, "learning_rate": 0.0009013540590953017, "loss": 1.5169, "step": 2115 }, { "epoch": 0.23, "grad_norm": 0.07803948013204608, "learning_rate": 0.0009012502020115776, "loss": 1.5624, "step": 2116 }, { "epoch": 0.23, "grad_norm": 0.076649100119888, "learning_rate": 0.0009011462962748829, "loss": 1.5829, "step": 2117 }, { "epoch": 0.23, "grad_norm": 0.07625429267408237, "learning_rate": 0.0009010423418978168, "loss": 1.4575, "step": 2118 }, { "epoch": 0.23, "grad_norm": 0.07959475385111861, "learning_rate": 0.0009009383388929842, "loss": 1.4098, "step": 2119 }, { "epoch": 0.23, "grad_norm": 0.07880013779769067, "learning_rate": 0.0009008342872729957, "loss": 1.4709, "step": 2120 }, { "epoch": 0.23, "grad_norm": 0.08272735817424194, "learning_rate": 0.0009007301870504681, "loss": 1.4538, "step": 2121 }, { "epoch": 0.23, "grad_norm": 0.07552018367416652, "learning_rate": 0.0009006260382380238, "loss": 1.3958, "step": 2122 }, { "epoch": 0.23, "grad_norm": 0.0773802621109197, "learning_rate": 0.0009005218408482911, "loss": 1.6479, "step": 2123 }, { "epoch": 0.23, "grad_norm": 0.0903702500400604, "learning_rate": 0.0009004175948939044, "loss": 1.4688, "step": 2124 }, { "epoch": 0.23, "grad_norm": 0.08143156259179056, "learning_rate": 0.000900313300387504, "loss": 1.3508, "step": 2125 }, { "epoch": 0.23, "grad_norm": 0.08345652362054003, "learning_rate": 0.0009002089573417356, "loss": 1.4798, "step": 2126 }, { "epoch": 0.23, "grad_norm": 0.08097286863224183, "learning_rate": 0.0009001045657692517, "loss": 1.4088, "step": 2127 }, { "epoch": 0.23, "grad_norm": 0.07816712018885351, "learning_rate": 0.0009000001256827095, "loss": 1.464, "step": 2128 }, { "epoch": 0.23, "grad_norm": 0.07995949220911897, "learning_rate": 0.0008998956370947733, "loss": 1.4584, "step": 2129 }, { "epoch": 0.23, "grad_norm": 0.08389083649029062, "learning_rate": 0.0008997911000181122, "loss": 1.4115, "step": 2130 }, { "epoch": 0.23, "grad_norm": 0.08706037657202785, "learning_rate": 0.0008996865144654023, "loss": 1.4264, "step": 2131 }, { "epoch": 0.23, "grad_norm": 0.08739980235747145, "learning_rate": 0.0008995818804493243, "loss": 1.4701, "step": 2132 }, { "epoch": 0.23, "grad_norm": 0.09140265561641635, "learning_rate": 0.0008994771979825658, "loss": 1.4218, "step": 2133 }, { "epoch": 0.23, "grad_norm": 0.07375929912993871, "learning_rate": 0.0008993724670778198, "loss": 1.4068, "step": 2134 }, { "epoch": 0.23, "grad_norm": 0.07953888777292227, "learning_rate": 0.0008992676877477854, "loss": 1.444, "step": 2135 }, { "epoch": 0.23, "grad_norm": 0.10877764802904497, "learning_rate": 0.0008991628600051673, "loss": 1.4785, "step": 2136 }, { "epoch": 0.23, "grad_norm": 0.07748952894124081, "learning_rate": 0.0008990579838626764, "loss": 1.4316, "step": 2137 }, { "epoch": 0.23, "grad_norm": 0.08527565495180588, "learning_rate": 0.0008989530593330291, "loss": 1.6002, "step": 2138 }, { "epoch": 0.23, "grad_norm": 0.08335708177946245, "learning_rate": 0.0008988480864289481, "loss": 1.3452, "step": 2139 }, { "epoch": 0.23, "grad_norm": 0.09759801081695489, "learning_rate": 0.0008987430651631613, "loss": 1.4563, "step": 2140 }, { "epoch": 0.23, "grad_norm": 0.07840457899332089, "learning_rate": 0.0008986379955484036, "loss": 1.4884, "step": 2141 }, { "epoch": 0.23, "grad_norm": 0.0876443419021953, "learning_rate": 0.0008985328775974142, "loss": 1.5695, "step": 2142 }, { "epoch": 0.23, "grad_norm": 0.08807857999714565, "learning_rate": 0.0008984277113229397, "loss": 1.5175, "step": 2143 }, { "epoch": 0.23, "grad_norm": 0.10090005538461481, "learning_rate": 0.0008983224967377315, "loss": 1.435, "step": 2144 }, { "epoch": 0.23, "grad_norm": 0.09627909221834423, "learning_rate": 0.0008982172338545474, "loss": 1.5002, "step": 2145 }, { "epoch": 0.23, "grad_norm": 0.0789701966323783, "learning_rate": 0.0008981119226861508, "loss": 1.6117, "step": 2146 }, { "epoch": 0.23, "grad_norm": 0.0862375515699889, "learning_rate": 0.0008980065632453111, "loss": 1.5657, "step": 2147 }, { "epoch": 0.23, "grad_norm": 0.07179033638664258, "learning_rate": 0.0008979011555448035, "loss": 1.4099, "step": 2148 }, { "epoch": 0.23, "grad_norm": 0.07114245906288379, "learning_rate": 0.0008977956995974089, "loss": 1.4637, "step": 2149 }, { "epoch": 0.23, "grad_norm": 0.07414337930145709, "learning_rate": 0.0008976901954159144, "loss": 1.4752, "step": 2150 }, { "epoch": 0.23, "grad_norm": 0.07132830095164981, "learning_rate": 0.0008975846430131127, "loss": 1.4056, "step": 2151 }, { "epoch": 0.23, "grad_norm": 0.0719025939859958, "learning_rate": 0.0008974790424018022, "loss": 1.4565, "step": 2152 }, { "epoch": 0.23, "grad_norm": 0.07354577248780032, "learning_rate": 0.0008973733935947877, "loss": 1.3983, "step": 2153 }, { "epoch": 0.23, "grad_norm": 0.0787391972157109, "learning_rate": 0.0008972676966048789, "loss": 1.3842, "step": 2154 }, { "epoch": 0.23, "grad_norm": 0.07914358659288476, "learning_rate": 0.0008971619514448927, "loss": 1.4485, "step": 2155 }, { "epoch": 0.23, "grad_norm": 0.09372633769515623, "learning_rate": 0.0008970561581276505, "loss": 1.4679, "step": 2156 }, { "epoch": 0.23, "grad_norm": 0.07041013581411883, "learning_rate": 0.0008969503166659803, "loss": 1.3479, "step": 2157 }, { "epoch": 0.23, "grad_norm": 0.08613639135343044, "learning_rate": 0.0008968444270727157, "loss": 1.4191, "step": 2158 }, { "epoch": 0.23, "grad_norm": 0.08240147616175394, "learning_rate": 0.0008967384893606962, "loss": 1.4829, "step": 2159 }, { "epoch": 0.23, "grad_norm": 0.08323090670400539, "learning_rate": 0.0008966325035427669, "loss": 1.4426, "step": 2160 }, { "epoch": 0.23, "grad_norm": 0.0830480211709058, "learning_rate": 0.0008965264696317795, "loss": 1.4769, "step": 2161 }, { "epoch": 0.23, "grad_norm": 0.08928546482396188, "learning_rate": 0.0008964203876405903, "loss": 1.3255, "step": 2162 }, { "epoch": 0.23, "grad_norm": 0.0822658434232799, "learning_rate": 0.0008963142575820626, "loss": 1.4945, "step": 2163 }, { "epoch": 0.23, "grad_norm": 0.08491975646767387, "learning_rate": 0.0008962080794690648, "loss": 1.5602, "step": 2164 }, { "epoch": 0.23, "grad_norm": 0.08414408557119114, "learning_rate": 0.0008961018533144716, "loss": 1.4698, "step": 2165 }, { "epoch": 0.23, "grad_norm": 0.0954940468086645, "learning_rate": 0.000895995579131163, "loss": 1.3421, "step": 2166 }, { "epoch": 0.23, "grad_norm": 0.08659721199046269, "learning_rate": 0.0008958892569320251, "loss": 1.446, "step": 2167 }, { "epoch": 0.23, "grad_norm": 0.08211464984956957, "learning_rate": 0.00089578288672995, "loss": 1.4428, "step": 2168 }, { "epoch": 0.23, "grad_norm": 0.08730211184796505, "learning_rate": 0.0008956764685378356, "loss": 1.5298, "step": 2169 }, { "epoch": 0.23, "grad_norm": 0.08028511736236937, "learning_rate": 0.0008955700023685851, "loss": 1.401, "step": 2170 }, { "epoch": 0.23, "grad_norm": 0.08607184275244723, "learning_rate": 0.000895463488235108, "loss": 1.2803, "step": 2171 }, { "epoch": 0.23, "grad_norm": 0.08878050364927839, "learning_rate": 0.0008953569261503198, "loss": 1.3938, "step": 2172 }, { "epoch": 0.23, "grad_norm": 0.09230301699295128, "learning_rate": 0.0008952503161271413, "loss": 1.5243, "step": 2173 }, { "epoch": 0.23, "grad_norm": 0.09530871727738145, "learning_rate": 0.000895143658178499, "loss": 1.4443, "step": 2174 }, { "epoch": 0.23, "grad_norm": 0.09034274785163224, "learning_rate": 0.0008950369523173263, "loss": 1.3993, "step": 2175 }, { "epoch": 0.23, "grad_norm": 0.08126098450813693, "learning_rate": 0.000894930198556561, "loss": 1.3772, "step": 2176 }, { "epoch": 0.23, "grad_norm": 0.09846129233960661, "learning_rate": 0.0008948233969091477, "loss": 1.4249, "step": 2177 }, { "epoch": 0.23, "grad_norm": 0.09346589113210531, "learning_rate": 0.0008947165473880363, "loss": 1.2923, "step": 2178 }, { "epoch": 0.23, "grad_norm": 0.07884971976680567, "learning_rate": 0.0008946096500061828, "loss": 1.5027, "step": 2179 }, { "epoch": 0.23, "grad_norm": 0.0815049356861313, "learning_rate": 0.0008945027047765488, "loss": 1.5911, "step": 2180 }, { "epoch": 0.23, "grad_norm": 0.08053472358349446, "learning_rate": 0.0008943957117121017, "loss": 1.3357, "step": 2181 }, { "epoch": 0.23, "grad_norm": 0.07124111934527916, "learning_rate": 0.0008942886708258148, "loss": 1.485, "step": 2182 }, { "epoch": 0.23, "grad_norm": 0.08208081331893921, "learning_rate": 0.0008941815821306674, "loss": 1.3862, "step": 2183 }, { "epoch": 0.23, "grad_norm": 0.08992051131410057, "learning_rate": 0.0008940744456396442, "loss": 1.4123, "step": 2184 }, { "epoch": 0.23, "grad_norm": 0.08292039307031486, "learning_rate": 0.0008939672613657359, "loss": 1.4377, "step": 2185 }, { "epoch": 0.23, "grad_norm": 0.07751252629536556, "learning_rate": 0.000893860029321939, "loss": 1.3624, "step": 2186 }, { "epoch": 0.24, "grad_norm": 0.07378227106710288, "learning_rate": 0.0008937527495212555, "loss": 1.5427, "step": 2187 }, { "epoch": 0.24, "grad_norm": 0.08362754852649955, "learning_rate": 0.0008936454219766938, "loss": 1.3911, "step": 2188 }, { "epoch": 0.24, "grad_norm": 0.08561863317364699, "learning_rate": 0.0008935380467012675, "loss": 1.4485, "step": 2189 }, { "epoch": 0.24, "grad_norm": 0.08022086430199703, "learning_rate": 0.0008934306237079963, "loss": 1.4691, "step": 2190 }, { "epoch": 0.24, "grad_norm": 0.10481201673736351, "learning_rate": 0.0008933231530099058, "loss": 1.4008, "step": 2191 }, { "epoch": 0.24, "grad_norm": 0.07735777171508976, "learning_rate": 0.0008932156346200268, "loss": 1.4511, "step": 2192 }, { "epoch": 0.24, "grad_norm": 0.07372539389115546, "learning_rate": 0.0008931080685513966, "loss": 1.5533, "step": 2193 }, { "epoch": 0.24, "grad_norm": 0.08699225540912171, "learning_rate": 0.0008930004548170577, "loss": 1.5613, "step": 2194 }, { "epoch": 0.24, "grad_norm": 0.08015320764450956, "learning_rate": 0.0008928927934300588, "loss": 1.4748, "step": 2195 }, { "epoch": 0.24, "grad_norm": 0.08026380063427321, "learning_rate": 0.0008927850844034544, "loss": 1.5011, "step": 2196 }, { "epoch": 0.24, "grad_norm": 0.08569620404860831, "learning_rate": 0.0008926773277503041, "loss": 1.4672, "step": 2197 }, { "epoch": 0.24, "grad_norm": 0.08527228975419182, "learning_rate": 0.0008925695234836742, "loss": 1.4531, "step": 2198 }, { "epoch": 0.24, "grad_norm": 0.08123604466644242, "learning_rate": 0.0008924616716166363, "loss": 1.4952, "step": 2199 }, { "epoch": 0.24, "grad_norm": 0.08556822527789404, "learning_rate": 0.0008923537721622674, "loss": 1.5915, "step": 2200 }, { "epoch": 0.24, "grad_norm": 0.09081981002504544, "learning_rate": 0.0008922458251336511, "loss": 1.5223, "step": 2201 }, { "epoch": 0.24, "grad_norm": 0.08036843164515005, "learning_rate": 0.0008921378305438763, "loss": 1.415, "step": 2202 }, { "epoch": 0.24, "grad_norm": 0.08294936113634996, "learning_rate": 0.0008920297884060376, "loss": 1.3729, "step": 2203 }, { "epoch": 0.24, "grad_norm": 0.09402295203612535, "learning_rate": 0.0008919216987332356, "loss": 1.3907, "step": 2204 }, { "epoch": 0.24, "grad_norm": 0.08251725989381521, "learning_rate": 0.0008918135615385763, "loss": 1.3759, "step": 2205 }, { "epoch": 0.24, "grad_norm": 0.07518200632745241, "learning_rate": 0.0008917053768351719, "loss": 1.3763, "step": 2206 }, { "epoch": 0.24, "grad_norm": 0.07912416047033925, "learning_rate": 0.0008915971446361404, "loss": 1.4037, "step": 2207 }, { "epoch": 0.24, "grad_norm": 0.08064078885414537, "learning_rate": 0.0008914888649546048, "loss": 1.5326, "step": 2208 }, { "epoch": 0.24, "grad_norm": 0.07408548298786542, "learning_rate": 0.0008913805378036948, "loss": 1.5165, "step": 2209 }, { "epoch": 0.24, "grad_norm": 0.08872131280264428, "learning_rate": 0.0008912721631965453, "loss": 1.4678, "step": 2210 }, { "epoch": 0.24, "grad_norm": 0.08249217488465536, "learning_rate": 0.0008911637411462969, "loss": 1.3006, "step": 2211 }, { "epoch": 0.24, "grad_norm": 0.08866808668942394, "learning_rate": 0.0008910552716660965, "loss": 1.4458, "step": 2212 }, { "epoch": 0.24, "grad_norm": 0.07822108916047592, "learning_rate": 0.0008909467547690962, "loss": 1.5045, "step": 2213 }, { "epoch": 0.24, "grad_norm": 0.07968046873758618, "learning_rate": 0.0008908381904684542, "loss": 1.3571, "step": 2214 }, { "epoch": 0.24, "grad_norm": 0.08621063906251096, "learning_rate": 0.0008907295787773339, "loss": 1.4686, "step": 2215 }, { "epoch": 0.24, "grad_norm": 0.09541846290904246, "learning_rate": 0.0008906209197089054, "loss": 1.4994, "step": 2216 }, { "epoch": 0.24, "grad_norm": 0.08307421917053688, "learning_rate": 0.0008905122132763437, "loss": 1.5249, "step": 2217 }, { "epoch": 0.24, "grad_norm": 0.07788724633936456, "learning_rate": 0.0008904034594928296, "loss": 1.4023, "step": 2218 }, { "epoch": 0.24, "grad_norm": 0.0816366690950349, "learning_rate": 0.0008902946583715503, "loss": 1.43, "step": 2219 }, { "epoch": 0.24, "grad_norm": 0.09982940037534067, "learning_rate": 0.0008901858099256981, "loss": 1.4255, "step": 2220 }, { "epoch": 0.24, "grad_norm": 0.08232288093656423, "learning_rate": 0.0008900769141684712, "loss": 1.3594, "step": 2221 }, { "epoch": 0.24, "grad_norm": 0.08357733265593364, "learning_rate": 0.0008899679711130737, "loss": 1.4477, "step": 2222 }, { "epoch": 0.24, "grad_norm": 0.08849502280367522, "learning_rate": 0.0008898589807727153, "loss": 1.3596, "step": 2223 }, { "epoch": 0.24, "grad_norm": 0.07174272902783962, "learning_rate": 0.0008897499431606116, "loss": 1.4807, "step": 2224 }, { "epoch": 0.24, "grad_norm": 0.08742620366231663, "learning_rate": 0.0008896408582899833, "loss": 1.4376, "step": 2225 }, { "epoch": 0.24, "grad_norm": 0.07675971108920426, "learning_rate": 0.0008895317261740579, "loss": 1.4947, "step": 2226 }, { "epoch": 0.24, "grad_norm": 0.07584998544369173, "learning_rate": 0.0008894225468260675, "loss": 1.5446, "step": 2227 }, { "epoch": 0.24, "grad_norm": 0.08675223323782724, "learning_rate": 0.000889313320259251, "loss": 1.5474, "step": 2228 }, { "epoch": 0.24, "grad_norm": 0.07847232947461194, "learning_rate": 0.000889204046486852, "loss": 1.3825, "step": 2229 }, { "epoch": 0.24, "grad_norm": 0.08410254193952434, "learning_rate": 0.0008890947255221209, "loss": 1.4741, "step": 2230 }, { "epoch": 0.24, "grad_norm": 0.08534168834441479, "learning_rate": 0.0008889853573783127, "loss": 1.359, "step": 2231 }, { "epoch": 0.24, "grad_norm": 0.07198918151582337, "learning_rate": 0.0008888759420686889, "loss": 1.3831, "step": 2232 }, { "epoch": 0.24, "grad_norm": 0.07677775674736524, "learning_rate": 0.0008887664796065165, "loss": 1.3148, "step": 2233 }, { "epoch": 0.24, "grad_norm": 0.08207106759233733, "learning_rate": 0.0008886569700050682, "loss": 1.468, "step": 2234 }, { "epoch": 0.24, "grad_norm": 0.07396556465325442, "learning_rate": 0.0008885474132776224, "loss": 1.3863, "step": 2235 }, { "epoch": 0.24, "grad_norm": 0.08104484567295037, "learning_rate": 0.0008884378094374632, "loss": 1.469, "step": 2236 }, { "epoch": 0.24, "grad_norm": 0.07786764881894506, "learning_rate": 0.0008883281584978804, "loss": 1.5114, "step": 2237 }, { "epoch": 0.24, "grad_norm": 0.07771523721695527, "learning_rate": 0.0008882184604721697, "loss": 1.5415, "step": 2238 }, { "epoch": 0.24, "grad_norm": 0.08084233888893828, "learning_rate": 0.000888108715373632, "loss": 1.5652, "step": 2239 }, { "epoch": 0.24, "grad_norm": 0.07311662878971846, "learning_rate": 0.0008879989232155748, "loss": 1.4416, "step": 2240 }, { "epoch": 0.24, "grad_norm": 0.07765587834196362, "learning_rate": 0.0008878890840113105, "loss": 1.3599, "step": 2241 }, { "epoch": 0.24, "grad_norm": 0.07468966830819058, "learning_rate": 0.0008877791977741575, "loss": 1.4502, "step": 2242 }, { "epoch": 0.24, "grad_norm": 0.0810774288676914, "learning_rate": 0.0008876692645174399, "loss": 1.5496, "step": 2243 }, { "epoch": 0.24, "grad_norm": 0.07578306736684104, "learning_rate": 0.0008875592842544875, "loss": 1.4876, "step": 2244 }, { "epoch": 0.24, "grad_norm": 0.07308565897661695, "learning_rate": 0.0008874492569986357, "loss": 1.4319, "step": 2245 }, { "epoch": 0.24, "grad_norm": 0.08661688941721231, "learning_rate": 0.0008873391827632258, "loss": 1.3202, "step": 2246 }, { "epoch": 0.24, "grad_norm": 0.08669507642175672, "learning_rate": 0.0008872290615616046, "loss": 1.5107, "step": 2247 }, { "epoch": 0.24, "grad_norm": 0.08289264655816533, "learning_rate": 0.0008871188934071246, "loss": 1.3426, "step": 2248 }, { "epoch": 0.24, "grad_norm": 0.08315127821419833, "learning_rate": 0.0008870086783131444, "loss": 1.3794, "step": 2249 }, { "epoch": 0.24, "grad_norm": 0.08843733636930243, "learning_rate": 0.0008868984162930275, "loss": 1.4677, "step": 2250 }, { "epoch": 0.24, "grad_norm": 0.09172413612371813, "learning_rate": 0.0008867881073601439, "loss": 1.5372, "step": 2251 }, { "epoch": 0.24, "grad_norm": 0.08078669573488897, "learning_rate": 0.0008866777515278688, "loss": 1.4151, "step": 2252 }, { "epoch": 0.24, "grad_norm": 0.08090166791860437, "learning_rate": 0.0008865673488095832, "loss": 1.5728, "step": 2253 }, { "epoch": 0.24, "grad_norm": 0.08000330075098684, "learning_rate": 0.0008864568992186739, "loss": 1.3872, "step": 2254 }, { "epoch": 0.24, "grad_norm": 0.0756398228234597, "learning_rate": 0.0008863464027685332, "loss": 1.2759, "step": 2255 }, { "epoch": 0.24, "grad_norm": 0.09501349605436056, "learning_rate": 0.0008862358594725595, "loss": 1.6111, "step": 2256 }, { "epoch": 0.24, "grad_norm": 0.08869060983281359, "learning_rate": 0.0008861252693441559, "loss": 1.5307, "step": 2257 }, { "epoch": 0.24, "grad_norm": 0.08322655242755857, "learning_rate": 0.0008860146323967324, "loss": 1.3653, "step": 2258 }, { "epoch": 0.24, "grad_norm": 0.09070182705927224, "learning_rate": 0.0008859039486437039, "loss": 1.465, "step": 2259 }, { "epoch": 0.24, "grad_norm": 0.08490542675613597, "learning_rate": 0.0008857932180984914, "loss": 1.3364, "step": 2260 }, { "epoch": 0.24, "grad_norm": 0.08572743355151981, "learning_rate": 0.000885682440774521, "loss": 1.39, "step": 2261 }, { "epoch": 0.24, "grad_norm": 0.08391700720736694, "learning_rate": 0.000885571616685225, "loss": 1.3427, "step": 2262 }, { "epoch": 0.24, "grad_norm": 0.07350389296018711, "learning_rate": 0.0008854607458440412, "loss": 1.3976, "step": 2263 }, { "epoch": 0.24, "grad_norm": 0.0914364941499127, "learning_rate": 0.000885349828264413, "loss": 1.4906, "step": 2264 }, { "epoch": 0.24, "grad_norm": 0.08023243376640941, "learning_rate": 0.0008852388639597897, "loss": 1.3761, "step": 2265 }, { "epoch": 0.24, "grad_norm": 0.07893137372515271, "learning_rate": 0.0008851278529436261, "loss": 1.5595, "step": 2266 }, { "epoch": 0.24, "grad_norm": 0.07561619587902403, "learning_rate": 0.0008850167952293825, "loss": 1.476, "step": 2267 }, { "epoch": 0.24, "grad_norm": 0.08126009628490852, "learning_rate": 0.0008849056908305252, "loss": 1.5766, "step": 2268 }, { "epoch": 0.24, "grad_norm": 0.0779068568614784, "learning_rate": 0.0008847945397605258, "loss": 1.4817, "step": 2269 }, { "epoch": 0.24, "grad_norm": 0.07932082446412245, "learning_rate": 0.0008846833420328619, "loss": 1.373, "step": 2270 }, { "epoch": 0.24, "grad_norm": 0.08156960018218874, "learning_rate": 0.0008845720976610168, "loss": 1.4695, "step": 2271 }, { "epoch": 0.24, "grad_norm": 0.07735279185733433, "learning_rate": 0.0008844608066584787, "loss": 1.3753, "step": 2272 }, { "epoch": 0.24, "grad_norm": 0.07733770886342596, "learning_rate": 0.0008843494690387426, "loss": 1.4613, "step": 2273 }, { "epoch": 0.24, "grad_norm": 0.07890085671660646, "learning_rate": 0.0008842380848153082, "loss": 1.3932, "step": 2274 }, { "epoch": 0.24, "grad_norm": 0.07782994223247774, "learning_rate": 0.0008841266540016813, "loss": 1.4201, "step": 2275 }, { "epoch": 0.24, "grad_norm": 0.0957307653994958, "learning_rate": 0.0008840151766113735, "loss": 1.3713, "step": 2276 }, { "epoch": 0.24, "grad_norm": 0.08676129615166901, "learning_rate": 0.0008839036526579014, "loss": 1.4288, "step": 2277 }, { "epoch": 0.24, "grad_norm": 0.08086343688253304, "learning_rate": 0.000883792082154788, "loss": 1.453, "step": 2278 }, { "epoch": 0.24, "grad_norm": 0.07603031386284381, "learning_rate": 0.0008836804651155617, "loss": 1.3063, "step": 2279 }, { "epoch": 0.25, "grad_norm": 0.08419241615367228, "learning_rate": 0.0008835688015537559, "loss": 1.3828, "step": 2280 }, { "epoch": 0.25, "grad_norm": 0.08829666758411504, "learning_rate": 0.0008834570914829108, "loss": 1.4098, "step": 2281 }, { "epoch": 0.25, "grad_norm": 0.08693258350713497, "learning_rate": 0.0008833453349165714, "loss": 1.3494, "step": 2282 }, { "epoch": 0.25, "grad_norm": 0.09007341118626776, "learning_rate": 0.0008832335318682883, "loss": 1.4282, "step": 2283 }, { "epoch": 0.25, "grad_norm": 0.09274387264986217, "learning_rate": 0.0008831216823516185, "loss": 1.4668, "step": 2284 }, { "epoch": 0.25, "grad_norm": 0.09646572654831478, "learning_rate": 0.0008830097863801238, "loss": 1.5571, "step": 2285 }, { "epoch": 0.25, "grad_norm": 0.08083030222951221, "learning_rate": 0.000882897843967372, "loss": 1.4943, "step": 2286 }, { "epoch": 0.25, "grad_norm": 0.08450865254218719, "learning_rate": 0.0008827858551269368, "loss": 1.3344, "step": 2287 }, { "epoch": 0.25, "grad_norm": 0.08067151928811613, "learning_rate": 0.0008826738198723967, "loss": 1.5423, "step": 2288 }, { "epoch": 0.25, "grad_norm": 0.08614124183323224, "learning_rate": 0.0008825617382173369, "loss": 1.4565, "step": 2289 }, { "epoch": 0.25, "grad_norm": 0.07814908550447222, "learning_rate": 0.0008824496101753473, "loss": 1.459, "step": 2290 }, { "epoch": 0.25, "grad_norm": 0.08058843744933337, "learning_rate": 0.0008823374357600241, "loss": 1.5628, "step": 2291 }, { "epoch": 0.25, "grad_norm": 0.08686365181873504, "learning_rate": 0.0008822252149849686, "loss": 1.4197, "step": 2292 }, { "epoch": 0.25, "grad_norm": 0.07392644100827063, "learning_rate": 0.000882112947863788, "loss": 1.3753, "step": 2293 }, { "epoch": 0.25, "grad_norm": 0.08686559375776513, "learning_rate": 0.0008820006344100953, "loss": 1.5226, "step": 2294 }, { "epoch": 0.25, "grad_norm": 0.0891901138257802, "learning_rate": 0.0008818882746375085, "loss": 1.516, "step": 2295 }, { "epoch": 0.25, "grad_norm": 0.09554588650132052, "learning_rate": 0.0008817758685596519, "loss": 1.4844, "step": 2296 }, { "epoch": 0.25, "grad_norm": 0.07631449918448031, "learning_rate": 0.0008816634161901552, "loss": 1.3256, "step": 2297 }, { "epoch": 0.25, "grad_norm": 0.08089428852812466, "learning_rate": 0.0008815509175426534, "loss": 1.3853, "step": 2298 }, { "epoch": 0.25, "grad_norm": 0.07814014705209726, "learning_rate": 0.0008814383726307876, "loss": 1.473, "step": 2299 }, { "epoch": 0.25, "grad_norm": 0.07405353182252997, "learning_rate": 0.0008813257814682038, "loss": 1.5659, "step": 2300 }, { "epoch": 0.25, "grad_norm": 0.07250750021173145, "learning_rate": 0.0008812131440685544, "loss": 1.4467, "step": 2301 }, { "epoch": 0.25, "grad_norm": 0.0717452676620836, "learning_rate": 0.0008811004604454973, "loss": 1.4328, "step": 2302 }, { "epoch": 0.25, "grad_norm": 0.07509376431463823, "learning_rate": 0.0008809877306126953, "loss": 1.4588, "step": 2303 }, { "epoch": 0.25, "grad_norm": 0.09604822190961418, "learning_rate": 0.0008808749545838176, "loss": 1.4542, "step": 2304 }, { "epoch": 0.25, "grad_norm": 0.08803782587082482, "learning_rate": 0.0008807621323725386, "loss": 1.4286, "step": 2305 }, { "epoch": 0.25, "grad_norm": 0.07727137861921227, "learning_rate": 0.0008806492639925383, "loss": 1.5215, "step": 2306 }, { "epoch": 0.25, "grad_norm": 0.08376997789258082, "learning_rate": 0.0008805363494575024, "loss": 1.3851, "step": 2307 }, { "epoch": 0.25, "grad_norm": 0.0819358725338881, "learning_rate": 0.0008804233887811223, "loss": 1.4812, "step": 2308 }, { "epoch": 0.25, "grad_norm": 0.0792579194627983, "learning_rate": 0.0008803103819770947, "loss": 1.5521, "step": 2309 }, { "epoch": 0.25, "grad_norm": 0.0915688602232919, "learning_rate": 0.0008801973290591223, "loss": 1.4422, "step": 2310 }, { "epoch": 0.25, "grad_norm": 0.09290993712517474, "learning_rate": 0.0008800842300409129, "loss": 1.4499, "step": 2311 }, { "epoch": 0.25, "grad_norm": 0.07260739372011728, "learning_rate": 0.0008799710849361803, "loss": 1.3542, "step": 2312 }, { "epoch": 0.25, "grad_norm": 0.07889876448184573, "learning_rate": 0.0008798578937586436, "loss": 1.5093, "step": 2313 }, { "epoch": 0.25, "grad_norm": 0.082364590057529, "learning_rate": 0.0008797446565220278, "loss": 1.3858, "step": 2314 }, { "epoch": 0.25, "grad_norm": 0.07912789012616007, "learning_rate": 0.0008796313732400634, "loss": 1.4733, "step": 2315 }, { "epoch": 0.25, "grad_norm": 0.08590092205671555, "learning_rate": 0.000879518043926486, "loss": 1.5081, "step": 2316 }, { "epoch": 0.25, "grad_norm": 0.07329552774435327, "learning_rate": 0.0008794046685950373, "loss": 1.3782, "step": 2317 }, { "epoch": 0.25, "grad_norm": 0.07799343415492194, "learning_rate": 0.0008792912472594647, "loss": 1.3976, "step": 2318 }, { "epoch": 0.25, "grad_norm": 0.08369021131091983, "learning_rate": 0.0008791777799335205, "loss": 1.5131, "step": 2319 }, { "epoch": 0.25, "grad_norm": 0.08178300989609327, "learning_rate": 0.0008790642666309637, "loss": 1.5082, "step": 2320 }, { "epoch": 0.25, "grad_norm": 0.08988780905708967, "learning_rate": 0.0008789507073655574, "loss": 1.3528, "step": 2321 }, { "epoch": 0.25, "grad_norm": 0.08569905270305181, "learning_rate": 0.0008788371021510713, "loss": 1.5646, "step": 2322 }, { "epoch": 0.25, "grad_norm": 0.08054189742708023, "learning_rate": 0.0008787234510012807, "loss": 1.3814, "step": 2323 }, { "epoch": 0.25, "grad_norm": 0.08068380039302207, "learning_rate": 0.000878609753929966, "loss": 1.4276, "step": 2324 }, { "epoch": 0.25, "grad_norm": 0.08722938708128018, "learning_rate": 0.0008784960109509133, "loss": 1.4702, "step": 2325 }, { "epoch": 0.25, "grad_norm": 0.08364451612686977, "learning_rate": 0.0008783822220779145, "loss": 1.4256, "step": 2326 }, { "epoch": 0.25, "grad_norm": 0.0754569829554669, "learning_rate": 0.0008782683873247667, "loss": 1.4047, "step": 2327 }, { "epoch": 0.25, "grad_norm": 0.0885670001296458, "learning_rate": 0.0008781545067052729, "loss": 1.4172, "step": 2328 }, { "epoch": 0.25, "grad_norm": 0.08249488016564695, "learning_rate": 0.0008780405802332415, "loss": 1.4541, "step": 2329 }, { "epoch": 0.25, "grad_norm": 0.08681960813369974, "learning_rate": 0.0008779266079224863, "loss": 1.3966, "step": 2330 }, { "epoch": 0.25, "grad_norm": 0.09003709379265666, "learning_rate": 0.0008778125897868272, "loss": 1.5273, "step": 2331 }, { "epoch": 0.25, "grad_norm": 0.08457069138096256, "learning_rate": 0.0008776985258400889, "loss": 1.5136, "step": 2332 }, { "epoch": 0.25, "grad_norm": 0.08639359952616797, "learning_rate": 0.0008775844160961023, "loss": 1.3153, "step": 2333 }, { "epoch": 0.25, "grad_norm": 0.08436050301536181, "learning_rate": 0.0008774702605687035, "loss": 1.4928, "step": 2334 }, { "epoch": 0.25, "grad_norm": 0.07796128523951937, "learning_rate": 0.0008773560592717343, "loss": 1.4667, "step": 2335 }, { "epoch": 0.25, "grad_norm": 0.08239972146783438, "learning_rate": 0.0008772418122190418, "loss": 1.4009, "step": 2336 }, { "epoch": 0.25, "grad_norm": 0.08335490168042457, "learning_rate": 0.0008771275194244792, "loss": 1.4612, "step": 2337 }, { "epoch": 0.25, "grad_norm": 0.0857843475703198, "learning_rate": 0.0008770131809019046, "loss": 1.4773, "step": 2338 }, { "epoch": 0.25, "grad_norm": 0.08868483682877011, "learning_rate": 0.0008768987966651822, "loss": 1.4839, "step": 2339 }, { "epoch": 0.25, "grad_norm": 0.07590656353342753, "learning_rate": 0.0008767843667281812, "loss": 1.3742, "step": 2340 }, { "epoch": 0.25, "grad_norm": 0.08180431782754888, "learning_rate": 0.0008766698911047768, "loss": 1.4264, "step": 2341 }, { "epoch": 0.25, "grad_norm": 0.0827417682256729, "learning_rate": 0.0008765553698088496, "loss": 1.3259, "step": 2342 }, { "epoch": 0.25, "grad_norm": 0.08122944950217337, "learning_rate": 0.0008764408028542854, "loss": 1.4616, "step": 2343 }, { "epoch": 0.25, "grad_norm": 0.07970569960246655, "learning_rate": 0.0008763261902549762, "loss": 1.4272, "step": 2344 }, { "epoch": 0.25, "grad_norm": 0.0831541958515407, "learning_rate": 0.0008762115320248192, "loss": 1.3979, "step": 2345 }, { "epoch": 0.25, "grad_norm": 0.06820614193604783, "learning_rate": 0.0008760968281777167, "loss": 1.4121, "step": 2346 }, { "epoch": 0.25, "grad_norm": 0.08231977544674345, "learning_rate": 0.0008759820787275773, "loss": 1.3942, "step": 2347 }, { "epoch": 0.25, "grad_norm": 0.07261045445751502, "learning_rate": 0.0008758672836883146, "loss": 1.4361, "step": 2348 }, { "epoch": 0.25, "grad_norm": 0.07834877807278877, "learning_rate": 0.0008757524430738479, "loss": 1.5091, "step": 2349 }, { "epoch": 0.25, "grad_norm": 0.08087675977722167, "learning_rate": 0.0008756375568981023, "loss": 1.5176, "step": 2350 }, { "epoch": 0.25, "grad_norm": 0.06467515099636481, "learning_rate": 0.0008755226251750077, "loss": 1.3714, "step": 2351 }, { "epoch": 0.25, "grad_norm": 0.07426388522565618, "learning_rate": 0.0008754076479185001, "loss": 1.518, "step": 2352 }, { "epoch": 0.25, "grad_norm": 0.09416405071741757, "learning_rate": 0.000875292625142521, "loss": 1.5035, "step": 2353 }, { "epoch": 0.25, "grad_norm": 0.08717906392381507, "learning_rate": 0.0008751775568610175, "loss": 1.5043, "step": 2354 }, { "epoch": 0.25, "grad_norm": 0.07416649126779296, "learning_rate": 0.0008750624430879416, "loss": 1.5297, "step": 2355 }, { "epoch": 0.25, "grad_norm": 0.07998119133842944, "learning_rate": 0.0008749472838372514, "loss": 1.3105, "step": 2356 }, { "epoch": 0.25, "grad_norm": 0.081112375757246, "learning_rate": 0.0008748320791229106, "loss": 1.7473, "step": 2357 }, { "epoch": 0.25, "grad_norm": 0.08904481652658924, "learning_rate": 0.0008747168289588879, "loss": 1.4059, "step": 2358 }, { "epoch": 0.25, "grad_norm": 0.08234605951979214, "learning_rate": 0.0008746015333591578, "loss": 1.5228, "step": 2359 }, { "epoch": 0.25, "grad_norm": 0.08955036857950854, "learning_rate": 0.0008744861923377001, "loss": 1.5783, "step": 2360 }, { "epoch": 0.25, "grad_norm": 0.0831734267365419, "learning_rate": 0.0008743708059085008, "loss": 1.63, "step": 2361 }, { "epoch": 0.25, "grad_norm": 0.08897953739111732, "learning_rate": 0.0008742553740855505, "loss": 1.3316, "step": 2362 }, { "epoch": 0.25, "grad_norm": 0.0982441775876888, "learning_rate": 0.000874139896882846, "loss": 1.4574, "step": 2363 }, { "epoch": 0.25, "grad_norm": 0.08749307153723353, "learning_rate": 0.000874024374314389, "loss": 1.4225, "step": 2364 }, { "epoch": 0.25, "grad_norm": 0.08421424247218816, "learning_rate": 0.0008739088063941874, "loss": 1.4224, "step": 2365 }, { "epoch": 0.25, "grad_norm": 0.09632762229707822, "learning_rate": 0.0008737931931362536, "loss": 1.4607, "step": 2366 }, { "epoch": 0.25, "grad_norm": 0.08000551688554444, "learning_rate": 0.0008736775345546066, "loss": 1.5745, "step": 2367 }, { "epoch": 0.25, "grad_norm": 0.08320750171271744, "learning_rate": 0.0008735618306632704, "loss": 1.409, "step": 2368 }, { "epoch": 0.25, "grad_norm": 0.07967367321090332, "learning_rate": 0.0008734460814762743, "loss": 1.4925, "step": 2369 }, { "epoch": 0.25, "grad_norm": 0.07739999356529328, "learning_rate": 0.0008733302870076534, "loss": 1.5329, "step": 2370 }, { "epoch": 0.25, "grad_norm": 0.0739284834248217, "learning_rate": 0.000873214447271448, "loss": 1.4682, "step": 2371 }, { "epoch": 0.25, "grad_norm": 0.08430494075436981, "learning_rate": 0.0008730985622817043, "loss": 1.5422, "step": 2372 }, { "epoch": 0.26, "grad_norm": 0.07628909438924128, "learning_rate": 0.0008729826320524736, "loss": 1.3732, "step": 2373 }, { "epoch": 0.26, "grad_norm": 0.08018562323859652, "learning_rate": 0.0008728666565978129, "loss": 1.3432, "step": 2374 }, { "epoch": 0.26, "grad_norm": 0.07884604930066987, "learning_rate": 0.0008727506359317847, "loss": 1.4342, "step": 2375 }, { "epoch": 0.26, "grad_norm": 0.07219376890047094, "learning_rate": 0.0008726345700684568, "loss": 1.5017, "step": 2376 }, { "epoch": 0.26, "grad_norm": 0.07842816450456827, "learning_rate": 0.0008725184590219026, "loss": 1.3689, "step": 2377 }, { "epoch": 0.26, "grad_norm": 0.08355168533148163, "learning_rate": 0.000872402302806201, "loss": 1.5077, "step": 2378 }, { "epoch": 0.26, "grad_norm": 0.07483659212043353, "learning_rate": 0.0008722861014354363, "loss": 1.4426, "step": 2379 }, { "epoch": 0.26, "grad_norm": 0.08414078995361736, "learning_rate": 0.0008721698549236982, "loss": 1.5652, "step": 2380 }, { "epoch": 0.26, "grad_norm": 0.07285448936483976, "learning_rate": 0.0008720535632850823, "loss": 1.4901, "step": 2381 }, { "epoch": 0.26, "grad_norm": 0.07770740691795891, "learning_rate": 0.0008719372265336892, "loss": 1.4684, "step": 2382 }, { "epoch": 0.26, "grad_norm": 0.09889862035074047, "learning_rate": 0.0008718208446836251, "loss": 1.4999, "step": 2383 }, { "epoch": 0.26, "grad_norm": 0.07299850297868295, "learning_rate": 0.0008717044177490017, "loss": 1.3581, "step": 2384 }, { "epoch": 0.26, "grad_norm": 0.07882407017544525, "learning_rate": 0.0008715879457439362, "loss": 1.3601, "step": 2385 }, { "epoch": 0.26, "grad_norm": 0.08112163967024738, "learning_rate": 0.0008714714286825511, "loss": 1.3724, "step": 2386 }, { "epoch": 0.26, "grad_norm": 0.09481491559845347, "learning_rate": 0.0008713548665789748, "loss": 1.6153, "step": 2387 }, { "epoch": 0.26, "grad_norm": 0.07735816148190601, "learning_rate": 0.0008712382594473404, "loss": 1.3866, "step": 2388 }, { "epoch": 0.26, "grad_norm": 0.08755764137667417, "learning_rate": 0.0008711216073017875, "loss": 1.4494, "step": 2389 }, { "epoch": 0.26, "grad_norm": 0.08229766736555363, "learning_rate": 0.0008710049101564601, "loss": 1.4153, "step": 2390 }, { "epoch": 0.26, "grad_norm": 0.0842704737549601, "learning_rate": 0.0008708881680255083, "loss": 1.4117, "step": 2391 }, { "epoch": 0.26, "grad_norm": 0.08135189581146728, "learning_rate": 0.0008707713809230875, "loss": 1.5633, "step": 2392 }, { "epoch": 0.26, "grad_norm": 0.0820669553456813, "learning_rate": 0.0008706545488633586, "loss": 1.4688, "step": 2393 }, { "epoch": 0.26, "grad_norm": 0.07755666930453603, "learning_rate": 0.0008705376718604877, "loss": 1.4559, "step": 2394 }, { "epoch": 0.26, "grad_norm": 0.0788686142056009, "learning_rate": 0.0008704207499286467, "loss": 1.4704, "step": 2395 }, { "epoch": 0.26, "grad_norm": 0.08433337387934393, "learning_rate": 0.0008703037830820127, "loss": 1.4576, "step": 2396 }, { "epoch": 0.26, "grad_norm": 0.07928307629309808, "learning_rate": 0.0008701867713347684, "loss": 1.4808, "step": 2397 }, { "epoch": 0.26, "grad_norm": 0.083526767428123, "learning_rate": 0.0008700697147011018, "loss": 1.4463, "step": 2398 }, { "epoch": 0.26, "grad_norm": 0.0837424016622009, "learning_rate": 0.0008699526131952064, "loss": 1.4302, "step": 2399 }, { "epoch": 0.26, "grad_norm": 0.09432955877287996, "learning_rate": 0.0008698354668312815, "loss": 1.5715, "step": 2400 }, { "epoch": 0.26, "grad_norm": 0.08495084110646854, "learning_rate": 0.0008697182756235311, "loss": 1.45, "step": 2401 }, { "epoch": 0.26, "grad_norm": 0.07433196345469577, "learning_rate": 0.0008696010395861651, "loss": 1.4465, "step": 2402 }, { "epoch": 0.26, "grad_norm": 0.07958498308048166, "learning_rate": 0.0008694837587333988, "loss": 1.4197, "step": 2403 }, { "epoch": 0.26, "grad_norm": 0.07695594597259321, "learning_rate": 0.000869366433079453, "loss": 1.3394, "step": 2404 }, { "epoch": 0.26, "grad_norm": 0.08917274057254963, "learning_rate": 0.0008692490626385538, "loss": 1.5033, "step": 2405 }, { "epoch": 0.26, "grad_norm": 0.07292600891886494, "learning_rate": 0.0008691316474249329, "loss": 1.446, "step": 2406 }, { "epoch": 0.26, "grad_norm": 0.07778910311002209, "learning_rate": 0.000869014187452827, "loss": 1.5116, "step": 2407 }, { "epoch": 0.26, "grad_norm": 0.0758528324629491, "learning_rate": 0.0008688966827364788, "loss": 1.4107, "step": 2408 }, { "epoch": 0.26, "grad_norm": 0.07499271508937456, "learning_rate": 0.000868779133290136, "loss": 1.417, "step": 2409 }, { "epoch": 0.26, "grad_norm": 0.09554549660511087, "learning_rate": 0.0008686615391280518, "loss": 1.4752, "step": 2410 }, { "epoch": 0.26, "grad_norm": 0.07605546223030824, "learning_rate": 0.0008685439002644851, "loss": 1.4271, "step": 2411 }, { "epoch": 0.26, "grad_norm": 0.087182582522994, "learning_rate": 0.0008684262167136998, "loss": 1.4356, "step": 2412 }, { "epoch": 0.26, "grad_norm": 0.07107863576670509, "learning_rate": 0.0008683084884899656, "loss": 1.3961, "step": 2413 }, { "epoch": 0.26, "grad_norm": 0.08099026302786344, "learning_rate": 0.0008681907156075577, "loss": 1.5513, "step": 2414 }, { "epoch": 0.26, "grad_norm": 0.09136275098009797, "learning_rate": 0.0008680728980807559, "loss": 1.5117, "step": 2415 }, { "epoch": 0.26, "grad_norm": 0.09153738020255894, "learning_rate": 0.0008679550359238464, "loss": 1.391, "step": 2416 }, { "epoch": 0.26, "grad_norm": 0.08700276520341825, "learning_rate": 0.0008678371291511202, "loss": 1.4262, "step": 2417 }, { "epoch": 0.26, "grad_norm": 0.0845217493208463, "learning_rate": 0.0008677191777768739, "loss": 1.3337, "step": 2418 }, { "epoch": 0.26, "grad_norm": 0.07806179350815017, "learning_rate": 0.0008676011818154097, "loss": 1.4307, "step": 2419 }, { "epoch": 0.26, "grad_norm": 0.08075539501899112, "learning_rate": 0.0008674831412810349, "loss": 1.3418, "step": 2420 }, { "epoch": 0.26, "grad_norm": 0.08976538106363947, "learning_rate": 0.0008673650561880622, "loss": 1.433, "step": 2421 }, { "epoch": 0.26, "grad_norm": 0.08419710209291338, "learning_rate": 0.0008672469265508099, "loss": 1.3991, "step": 2422 }, { "epoch": 0.26, "grad_norm": 0.07548679020927829, "learning_rate": 0.0008671287523836018, "loss": 1.3889, "step": 2423 }, { "epoch": 0.26, "grad_norm": 0.07968383779478568, "learning_rate": 0.0008670105337007667, "loss": 1.4236, "step": 2424 }, { "epoch": 0.26, "grad_norm": 0.07402167066513765, "learning_rate": 0.0008668922705166391, "loss": 1.3511, "step": 2425 }, { "epoch": 0.26, "grad_norm": 0.07791837249202221, "learning_rate": 0.0008667739628455591, "loss": 1.6978, "step": 2426 }, { "epoch": 0.26, "grad_norm": 0.07326641874026783, "learning_rate": 0.0008666556107018713, "loss": 1.3809, "step": 2427 }, { "epoch": 0.26, "grad_norm": 0.08249221016536444, "learning_rate": 0.0008665372140999268, "loss": 1.4819, "step": 2428 }, { "epoch": 0.26, "grad_norm": 0.08468100454038449, "learning_rate": 0.0008664187730540813, "loss": 1.3896, "step": 2429 }, { "epoch": 0.26, "grad_norm": 0.07235421408348185, "learning_rate": 0.0008663002875786965, "loss": 1.3804, "step": 2430 }, { "epoch": 0.26, "grad_norm": 0.07356618584967646, "learning_rate": 0.0008661817576881391, "loss": 1.4013, "step": 2431 }, { "epoch": 0.26, "grad_norm": 0.08129543864427304, "learning_rate": 0.0008660631833967809, "loss": 1.4612, "step": 2432 }, { "epoch": 0.26, "grad_norm": 0.07326509117595839, "learning_rate": 0.0008659445647189999, "loss": 1.4255, "step": 2433 }, { "epoch": 0.26, "grad_norm": 0.07271571725202336, "learning_rate": 0.0008658259016691786, "loss": 1.3477, "step": 2434 }, { "epoch": 0.26, "grad_norm": 0.07563391254650749, "learning_rate": 0.0008657071942617056, "loss": 1.5405, "step": 2435 }, { "epoch": 0.26, "grad_norm": 0.07088489797268319, "learning_rate": 0.0008655884425109747, "loss": 1.4278, "step": 2436 }, { "epoch": 0.26, "grad_norm": 0.07552308628678947, "learning_rate": 0.0008654696464313846, "loss": 1.4229, "step": 2437 }, { "epoch": 0.26, "grad_norm": 0.08332445254415345, "learning_rate": 0.0008653508060373399, "loss": 1.4936, "step": 2438 }, { "epoch": 0.26, "grad_norm": 0.07147667803000843, "learning_rate": 0.0008652319213432504, "loss": 1.4365, "step": 2439 }, { "epoch": 0.26, "grad_norm": 0.07498735151078668, "learning_rate": 0.0008651129923635314, "loss": 1.4489, "step": 2440 }, { "epoch": 0.26, "grad_norm": 0.08279578112131421, "learning_rate": 0.0008649940191126033, "loss": 1.4801, "step": 2441 }, { "epoch": 0.26, "grad_norm": 0.07408268276283134, "learning_rate": 0.0008648750016048921, "loss": 1.3708, "step": 2442 }, { "epoch": 0.26, "grad_norm": 0.08279177095879077, "learning_rate": 0.000864755939854829, "loss": 1.4605, "step": 2443 }, { "epoch": 0.26, "grad_norm": 0.07600695074651367, "learning_rate": 0.0008646368338768506, "loss": 1.3836, "step": 2444 }, { "epoch": 0.26, "grad_norm": 0.08935962208751888, "learning_rate": 0.0008645176836853992, "loss": 1.434, "step": 2445 }, { "epoch": 0.26, "grad_norm": 0.07414169923498534, "learning_rate": 0.0008643984892949217, "loss": 1.4505, "step": 2446 }, { "epoch": 0.26, "grad_norm": 0.088469364176592, "learning_rate": 0.0008642792507198713, "loss": 1.3818, "step": 2447 }, { "epoch": 0.26, "grad_norm": 0.0826174631745449, "learning_rate": 0.0008641599679747059, "loss": 1.5778, "step": 2448 }, { "epoch": 0.26, "grad_norm": 0.08789837591114688, "learning_rate": 0.0008640406410738888, "loss": 1.3421, "step": 2449 }, { "epoch": 0.26, "grad_norm": 0.07804401790078475, "learning_rate": 0.000863921270031889, "loss": 1.5028, "step": 2450 }, { "epoch": 0.26, "grad_norm": 0.07428320796104862, "learning_rate": 0.0008638018548631805, "loss": 1.4064, "step": 2451 }, { "epoch": 0.26, "grad_norm": 0.07560885338288993, "learning_rate": 0.000863682395582243, "loss": 1.3384, "step": 2452 }, { "epoch": 0.26, "grad_norm": 0.10187961286723403, "learning_rate": 0.0008635628922035613, "loss": 1.5161, "step": 2453 }, { "epoch": 0.26, "grad_norm": 0.09026702268572244, "learning_rate": 0.0008634433447416253, "loss": 1.4465, "step": 2454 }, { "epoch": 0.26, "grad_norm": 0.09035052032958152, "learning_rate": 0.000863323753210931, "loss": 1.441, "step": 2455 }, { "epoch": 0.26, "grad_norm": 0.0813296004930123, "learning_rate": 0.0008632041176259788, "loss": 1.4335, "step": 2456 }, { "epoch": 0.26, "grad_norm": 0.08422711412673596, "learning_rate": 0.0008630844380012754, "loss": 1.4123, "step": 2457 }, { "epoch": 0.26, "grad_norm": 0.07926370932506693, "learning_rate": 0.0008629647143513321, "loss": 1.3936, "step": 2458 }, { "epoch": 0.26, "grad_norm": 0.07404514764363598, "learning_rate": 0.0008628449466906658, "loss": 1.3632, "step": 2459 }, { "epoch": 0.26, "grad_norm": 0.07673003267922554, "learning_rate": 0.0008627251350337988, "loss": 1.4799, "step": 2460 }, { "epoch": 0.26, "grad_norm": 0.07558809531135767, "learning_rate": 0.0008626052793952588, "loss": 1.3333, "step": 2461 }, { "epoch": 0.26, "grad_norm": 0.07736495424535647, "learning_rate": 0.0008624853797895784, "loss": 1.4643, "step": 2462 }, { "epoch": 0.26, "grad_norm": 0.0781190229156017, "learning_rate": 0.0008623654362312961, "loss": 1.4538, "step": 2463 }, { "epoch": 0.26, "grad_norm": 0.08576676357567804, "learning_rate": 0.0008622454487349554, "loss": 1.4785, "step": 2464 }, { "epoch": 0.26, "grad_norm": 0.07382184959413687, "learning_rate": 0.0008621254173151052, "loss": 1.4344, "step": 2465 }, { "epoch": 0.27, "grad_norm": 0.08142733655694544, "learning_rate": 0.0008620053419862997, "loss": 1.4177, "step": 2466 }, { "epoch": 0.27, "grad_norm": 0.0752615574922961, "learning_rate": 0.0008618852227630985, "loss": 1.4311, "step": 2467 }, { "epoch": 0.27, "grad_norm": 0.09972312920589078, "learning_rate": 0.0008617650596600665, "loss": 1.4834, "step": 2468 }, { "epoch": 0.27, "grad_norm": 0.08568112647402251, "learning_rate": 0.0008616448526917736, "loss": 1.4549, "step": 2469 }, { "epoch": 0.27, "grad_norm": 0.0780979765408123, "learning_rate": 0.0008615246018727956, "loss": 1.4037, "step": 2470 }, { "epoch": 0.27, "grad_norm": 0.08294238148339915, "learning_rate": 0.0008614043072177135, "loss": 1.4814, "step": 2471 }, { "epoch": 0.27, "grad_norm": 0.09715355593692114, "learning_rate": 0.000861283968741113, "loss": 1.5874, "step": 2472 }, { "epoch": 0.27, "grad_norm": 0.07782506887676875, "learning_rate": 0.0008611635864575857, "loss": 1.381, "step": 2473 }, { "epoch": 0.27, "grad_norm": 0.0906288348611541, "learning_rate": 0.0008610431603817285, "loss": 1.3718, "step": 2474 }, { "epoch": 0.27, "grad_norm": 0.07870778480964968, "learning_rate": 0.0008609226905281433, "loss": 1.4466, "step": 2475 }, { "epoch": 0.27, "grad_norm": 0.08602254355096633, "learning_rate": 0.0008608021769114378, "loss": 1.4097, "step": 2476 }, { "epoch": 0.27, "grad_norm": 0.0854785814244712, "learning_rate": 0.0008606816195462243, "loss": 1.3345, "step": 2477 }, { "epoch": 0.27, "grad_norm": 0.08923277482885959, "learning_rate": 0.000860561018447121, "loss": 1.4728, "step": 2478 }, { "epoch": 0.27, "grad_norm": 0.09441553256745243, "learning_rate": 0.0008604403736287512, "loss": 1.4979, "step": 2479 }, { "epoch": 0.27, "grad_norm": 0.09169254563073491, "learning_rate": 0.0008603196851057434, "loss": 1.4615, "step": 2480 }, { "epoch": 0.27, "grad_norm": 0.09397528694611802, "learning_rate": 0.0008601989528927317, "loss": 1.3988, "step": 2481 }, { "epoch": 0.27, "grad_norm": 0.08594017810333246, "learning_rate": 0.0008600781770043551, "loss": 1.434, "step": 2482 }, { "epoch": 0.27, "grad_norm": 0.0857484672914396, "learning_rate": 0.0008599573574552582, "loss": 1.4412, "step": 2483 }, { "epoch": 0.27, "grad_norm": 0.09556263713315377, "learning_rate": 0.0008598364942600906, "loss": 1.5596, "step": 2484 }, { "epoch": 0.27, "grad_norm": 0.096404618723962, "learning_rate": 0.0008597155874335076, "loss": 1.4049, "step": 2485 }, { "epoch": 0.27, "grad_norm": 0.08729667242752806, "learning_rate": 0.0008595946369901696, "loss": 1.4342, "step": 2486 }, { "epoch": 0.27, "grad_norm": 0.08010480744400537, "learning_rate": 0.0008594736429447421, "loss": 1.4179, "step": 2487 }, { "epoch": 0.27, "grad_norm": 0.07866884276153974, "learning_rate": 0.0008593526053118961, "loss": 1.5207, "step": 2488 }, { "epoch": 0.27, "grad_norm": 0.08333403772066363, "learning_rate": 0.0008592315241063077, "loss": 1.4315, "step": 2489 }, { "epoch": 0.27, "grad_norm": 0.08300905086491174, "learning_rate": 0.0008591103993426588, "loss": 1.3496, "step": 2490 }, { "epoch": 0.27, "grad_norm": 0.0820231745923072, "learning_rate": 0.0008589892310356357, "loss": 1.4858, "step": 2491 }, { "epoch": 0.27, "grad_norm": 0.09302760868607428, "learning_rate": 0.0008588680191999308, "loss": 1.4802, "step": 2492 }, { "epoch": 0.27, "grad_norm": 0.08616521316743678, "learning_rate": 0.0008587467638502413, "loss": 1.4069, "step": 2493 }, { "epoch": 0.27, "grad_norm": 0.08199246375158696, "learning_rate": 0.0008586254650012699, "loss": 1.4902, "step": 2494 }, { "epoch": 0.27, "grad_norm": 0.08134043261911114, "learning_rate": 0.0008585041226677247, "loss": 1.4154, "step": 2495 }, { "epoch": 0.27, "grad_norm": 0.07958788079491519, "learning_rate": 0.0008583827368643185, "loss": 1.4978, "step": 2496 }, { "epoch": 0.27, "grad_norm": 0.07389846036126062, "learning_rate": 0.0008582613076057699, "loss": 1.4757, "step": 2497 }, { "epoch": 0.27, "grad_norm": 0.07637567948544043, "learning_rate": 0.0008581398349068028, "loss": 1.3928, "step": 2498 }, { "epoch": 0.27, "grad_norm": 0.07228880909881819, "learning_rate": 0.0008580183187821459, "loss": 1.496, "step": 2499 }, { "epoch": 0.27, "grad_norm": 0.07744674862621322, "learning_rate": 0.0008578967592465335, "loss": 1.409, "step": 2500 }, { "epoch": 0.27, "grad_norm": 0.07110313278941842, "learning_rate": 0.0008577751563147054, "loss": 1.4029, "step": 2501 }, { "epoch": 0.27, "grad_norm": 0.0699785168285317, "learning_rate": 0.000857653510001406, "loss": 1.365, "step": 2502 }, { "epoch": 0.27, "grad_norm": 0.07645102695193777, "learning_rate": 0.0008575318203213855, "loss": 1.3709, "step": 2503 }, { "epoch": 0.27, "grad_norm": 0.09179887617243095, "learning_rate": 0.0008574100872893992, "loss": 1.6029, "step": 2504 }, { "epoch": 0.27, "grad_norm": 0.08142107716187588, "learning_rate": 0.0008572883109202077, "loss": 1.4783, "step": 2505 }, { "epoch": 0.27, "grad_norm": 0.06824772265618469, "learning_rate": 0.0008571664912285766, "loss": 1.3282, "step": 2506 }, { "epoch": 0.27, "grad_norm": 0.07416330481288567, "learning_rate": 0.0008570446282292773, "loss": 1.439, "step": 2507 }, { "epoch": 0.27, "grad_norm": 0.07513943521999795, "learning_rate": 0.0008569227219370856, "loss": 1.4978, "step": 2508 }, { "epoch": 0.27, "grad_norm": 0.07361208809988612, "learning_rate": 0.0008568007723667837, "loss": 1.6253, "step": 2509 }, { "epoch": 0.27, "grad_norm": 0.08213288675158097, "learning_rate": 0.0008566787795331579, "loss": 1.4, "step": 2510 }, { "epoch": 0.27, "grad_norm": 0.07926099951067463, "learning_rate": 0.0008565567434510004, "loss": 1.4767, "step": 2511 }, { "epoch": 0.27, "grad_norm": 0.07761340571003827, "learning_rate": 0.0008564346641351087, "loss": 1.3751, "step": 2512 }, { "epoch": 0.27, "grad_norm": 0.07060565203198378, "learning_rate": 0.0008563125416002849, "loss": 1.4009, "step": 2513 }, { "epoch": 0.27, "grad_norm": 0.07997901091917972, "learning_rate": 0.0008561903758613372, "loss": 1.5378, "step": 2514 }, { "epoch": 0.27, "grad_norm": 0.08769052562520391, "learning_rate": 0.0008560681669330783, "loss": 1.3733, "step": 2515 }, { "epoch": 0.27, "grad_norm": 0.07068911164600497, "learning_rate": 0.0008559459148303268, "loss": 1.4424, "step": 2516 }, { "epoch": 0.27, "grad_norm": 0.06882590599628988, "learning_rate": 0.0008558236195679059, "loss": 1.4711, "step": 2517 }, { "epoch": 0.27, "grad_norm": 0.07503865848801855, "learning_rate": 0.0008557012811606444, "loss": 1.5057, "step": 2518 }, { "epoch": 0.27, "grad_norm": 0.07369198331548805, "learning_rate": 0.0008555788996233764, "loss": 1.3597, "step": 2519 }, { "epoch": 0.27, "grad_norm": 0.06915275711769295, "learning_rate": 0.0008554564749709408, "loss": 1.5348, "step": 2520 }, { "epoch": 0.27, "grad_norm": 0.08524842375728962, "learning_rate": 0.0008553340072181822, "loss": 1.2874, "step": 2521 }, { "epoch": 0.27, "grad_norm": 0.08088460709388172, "learning_rate": 0.0008552114963799502, "loss": 1.4587, "step": 2522 }, { "epoch": 0.27, "grad_norm": 0.07020129575793528, "learning_rate": 0.0008550889424710997, "loss": 1.494, "step": 2523 }, { "epoch": 0.27, "grad_norm": 0.07286098460203513, "learning_rate": 0.0008549663455064907, "loss": 1.4065, "step": 2524 }, { "epoch": 0.27, "grad_norm": 0.07224682131553291, "learning_rate": 0.0008548437055009886, "loss": 1.3713, "step": 2525 }, { "epoch": 0.27, "grad_norm": 0.07626714388252791, "learning_rate": 0.0008547210224694639, "loss": 1.4525, "step": 2526 }, { "epoch": 0.27, "grad_norm": 0.08518727889962867, "learning_rate": 0.0008545982964267922, "loss": 1.3893, "step": 2527 }, { "epoch": 0.27, "grad_norm": 0.08097460248924065, "learning_rate": 0.0008544755273878546, "loss": 1.3658, "step": 2528 }, { "epoch": 0.27, "grad_norm": 0.07863853475459574, "learning_rate": 0.0008543527153675374, "loss": 1.5665, "step": 2529 }, { "epoch": 0.27, "grad_norm": 0.07424216152167827, "learning_rate": 0.0008542298603807317, "loss": 1.3939, "step": 2530 }, { "epoch": 0.27, "grad_norm": 0.0711106261983796, "learning_rate": 0.0008541069624423343, "loss": 1.3313, "step": 2531 }, { "epoch": 0.27, "grad_norm": 0.0786625769759333, "learning_rate": 0.0008539840215672467, "loss": 1.4449, "step": 2532 }, { "epoch": 0.27, "grad_norm": 0.07540816364950761, "learning_rate": 0.0008538610377703764, "loss": 1.5184, "step": 2533 }, { "epoch": 0.27, "grad_norm": 0.07406561827332242, "learning_rate": 0.0008537380110666351, "loss": 1.4328, "step": 2534 }, { "epoch": 0.27, "grad_norm": 0.07747251536991673, "learning_rate": 0.0008536149414709404, "loss": 1.4349, "step": 2535 }, { "epoch": 0.27, "grad_norm": 0.07339786673715235, "learning_rate": 0.0008534918289982152, "loss": 1.3211, "step": 2536 }, { "epoch": 0.27, "grad_norm": 0.07446062835008409, "learning_rate": 0.0008533686736633868, "loss": 1.396, "step": 2537 }, { "epoch": 0.27, "grad_norm": 0.08408711492254073, "learning_rate": 0.0008532454754813886, "loss": 1.4873, "step": 2538 }, { "epoch": 0.27, "grad_norm": 0.06694151801394763, "learning_rate": 0.0008531222344671588, "loss": 1.5546, "step": 2539 }, { "epoch": 0.27, "grad_norm": 0.10107174239142706, "learning_rate": 0.0008529989506356406, "loss": 1.4372, "step": 2540 }, { "epoch": 0.27, "grad_norm": 0.07716872380123085, "learning_rate": 0.0008528756240017825, "loss": 1.3469, "step": 2541 }, { "epoch": 0.27, "grad_norm": 0.1048404291852175, "learning_rate": 0.0008527522545805385, "loss": 1.4357, "step": 2542 }, { "epoch": 0.27, "grad_norm": 0.06980478409748192, "learning_rate": 0.0008526288423868675, "loss": 1.3616, "step": 2543 }, { "epoch": 0.27, "grad_norm": 0.07265651507995816, "learning_rate": 0.0008525053874357338, "loss": 1.4286, "step": 2544 }, { "epoch": 0.27, "grad_norm": 0.07424307098433014, "learning_rate": 0.0008523818897421065, "loss": 1.4647, "step": 2545 }, { "epoch": 0.27, "grad_norm": 0.07441297327581105, "learning_rate": 0.0008522583493209603, "loss": 1.4718, "step": 2546 }, { "epoch": 0.27, "grad_norm": 0.07910137117621747, "learning_rate": 0.0008521347661872748, "loss": 1.5926, "step": 2547 }, { "epoch": 0.27, "grad_norm": 0.07998697396490068, "learning_rate": 0.0008520111403560349, "loss": 1.3263, "step": 2548 }, { "epoch": 0.27, "grad_norm": 0.0724968400988003, "learning_rate": 0.0008518874718422307, "loss": 1.3337, "step": 2549 }, { "epoch": 0.27, "grad_norm": 0.0798632264900906, "learning_rate": 0.0008517637606608573, "loss": 1.4341, "step": 2550 }, { "epoch": 0.27, "grad_norm": 0.07801198011377168, "learning_rate": 0.0008516400068269152, "loss": 1.5612, "step": 2551 }, { "epoch": 0.27, "grad_norm": 0.08604863144633705, "learning_rate": 0.00085151621035541, "loss": 1.4635, "step": 2552 }, { "epoch": 0.27, "grad_norm": 0.07636576667059033, "learning_rate": 0.0008513923712613525, "loss": 1.5159, "step": 2553 }, { "epoch": 0.27, "grad_norm": 0.10516077127557029, "learning_rate": 0.0008512684895597586, "loss": 1.4889, "step": 2554 }, { "epoch": 0.27, "grad_norm": 0.07562060657657407, "learning_rate": 0.0008511445652656494, "loss": 1.4407, "step": 2555 }, { "epoch": 0.27, "grad_norm": 0.07763797531070722, "learning_rate": 0.0008510205983940507, "loss": 1.5135, "step": 2556 }, { "epoch": 0.27, "grad_norm": 0.07375876092295945, "learning_rate": 0.0008508965889599947, "loss": 1.4025, "step": 2557 }, { "epoch": 0.27, "grad_norm": 0.07563084794765446, "learning_rate": 0.0008507725369785173, "loss": 1.3534, "step": 2558 }, { "epoch": 0.28, "grad_norm": 0.07692755695568271, "learning_rate": 0.0008506484424646606, "loss": 1.4164, "step": 2559 }, { "epoch": 0.28, "grad_norm": 0.0808219714314826, "learning_rate": 0.0008505243054334713, "loss": 1.4273, "step": 2560 }, { "epoch": 0.28, "grad_norm": 0.08065608841873591, "learning_rate": 0.0008504001259000016, "loss": 1.4571, "step": 2561 }, { "epoch": 0.28, "grad_norm": 0.08063335526960716, "learning_rate": 0.0008502759038793087, "loss": 1.3944, "step": 2562 }, { "epoch": 0.28, "grad_norm": 0.09265606382023213, "learning_rate": 0.0008501516393864548, "loss": 1.5381, "step": 2563 }, { "epoch": 0.28, "grad_norm": 0.08276084737543599, "learning_rate": 0.0008500273324365073, "loss": 1.4669, "step": 2564 }, { "epoch": 0.28, "grad_norm": 0.08159564795043535, "learning_rate": 0.0008499029830445393, "loss": 1.4467, "step": 2565 }, { "epoch": 0.28, "grad_norm": 0.08220391890474721, "learning_rate": 0.0008497785912256282, "loss": 1.3983, "step": 2566 }, { "epoch": 0.28, "grad_norm": 0.07864749205521121, "learning_rate": 0.000849654156994857, "loss": 1.538, "step": 2567 }, { "epoch": 0.28, "grad_norm": 0.08184544063144142, "learning_rate": 0.0008495296803673138, "loss": 1.4729, "step": 2568 }, { "epoch": 0.28, "grad_norm": 0.08469200046218774, "learning_rate": 0.0008494051613580918, "loss": 1.402, "step": 2569 }, { "epoch": 0.28, "grad_norm": 0.07779454288092019, "learning_rate": 0.0008492805999822897, "loss": 1.4113, "step": 2570 }, { "epoch": 0.28, "grad_norm": 0.08809251512257028, "learning_rate": 0.0008491559962550104, "loss": 1.4379, "step": 2571 }, { "epoch": 0.28, "grad_norm": 0.08325662907420392, "learning_rate": 0.0008490313501913629, "loss": 1.3634, "step": 2572 }, { "epoch": 0.28, "grad_norm": 0.08503557494839924, "learning_rate": 0.0008489066618064609, "loss": 1.482, "step": 2573 }, { "epoch": 0.28, "grad_norm": 0.08179961293993213, "learning_rate": 0.0008487819311154233, "loss": 1.3169, "step": 2574 }, { "epoch": 0.28, "grad_norm": 0.07767794072518784, "learning_rate": 0.0008486571581333742, "loss": 1.4895, "step": 2575 }, { "epoch": 0.28, "grad_norm": 0.07652551678602128, "learning_rate": 0.0008485323428754426, "loss": 1.4348, "step": 2576 }, { "epoch": 0.28, "grad_norm": 0.07414488818852104, "learning_rate": 0.0008484074853567629, "loss": 1.395, "step": 2577 }, { "epoch": 0.28, "grad_norm": 0.08519549165590558, "learning_rate": 0.0008482825855924743, "loss": 1.4378, "step": 2578 }, { "epoch": 0.28, "grad_norm": 0.08324947940809344, "learning_rate": 0.0008481576435977217, "loss": 1.3522, "step": 2579 }, { "epoch": 0.28, "grad_norm": 0.06811742710699656, "learning_rate": 0.0008480326593876544, "loss": 1.3793, "step": 2580 }, { "epoch": 0.28, "grad_norm": 0.0812754167751017, "learning_rate": 0.0008479076329774274, "loss": 1.4694, "step": 2581 }, { "epoch": 0.28, "grad_norm": 0.07372976296447055, "learning_rate": 0.0008477825643822003, "loss": 1.5546, "step": 2582 }, { "epoch": 0.28, "grad_norm": 0.08190879452155753, "learning_rate": 0.0008476574536171385, "loss": 1.3917, "step": 2583 }, { "epoch": 0.28, "grad_norm": 0.09192577655834207, "learning_rate": 0.0008475323006974116, "loss": 1.5549, "step": 2584 }, { "epoch": 0.28, "grad_norm": 0.07686509843963789, "learning_rate": 0.0008474071056381953, "loss": 1.4178, "step": 2585 }, { "epoch": 0.28, "grad_norm": 0.0905695216161556, "learning_rate": 0.0008472818684546697, "loss": 1.4021, "step": 2586 }, { "epoch": 0.28, "grad_norm": 0.07791879082984891, "learning_rate": 0.0008471565891620203, "loss": 1.5132, "step": 2587 }, { "epoch": 0.28, "grad_norm": 0.07101522886996688, "learning_rate": 0.0008470312677754377, "loss": 1.4741, "step": 2588 }, { "epoch": 0.28, "grad_norm": 0.08034711963413725, "learning_rate": 0.0008469059043101175, "loss": 1.4286, "step": 2589 }, { "epoch": 0.28, "grad_norm": 0.07548169925316262, "learning_rate": 0.0008467804987812603, "loss": 1.3944, "step": 2590 }, { "epoch": 0.28, "grad_norm": 0.07519177174592058, "learning_rate": 0.0008466550512040722, "loss": 1.4444, "step": 2591 }, { "epoch": 0.28, "grad_norm": 0.09383438703269041, "learning_rate": 0.0008465295615937641, "loss": 1.4118, "step": 2592 }, { "epoch": 0.28, "grad_norm": 0.08051542055976478, "learning_rate": 0.0008464040299655518, "loss": 1.4393, "step": 2593 }, { "epoch": 0.28, "grad_norm": 0.08233997317534307, "learning_rate": 0.0008462784563346567, "loss": 1.401, "step": 2594 }, { "epoch": 0.28, "grad_norm": 0.08144426653897043, "learning_rate": 0.000846152840716305, "loss": 1.4978, "step": 2595 }, { "epoch": 0.28, "grad_norm": 0.09101065818594047, "learning_rate": 0.000846027183125728, "loss": 1.4853, "step": 2596 }, { "epoch": 0.28, "grad_norm": 0.08392381563918748, "learning_rate": 0.0008459014835781621, "loss": 1.3675, "step": 2597 }, { "epoch": 0.28, "grad_norm": 0.0800643944017374, "learning_rate": 0.0008457757420888488, "loss": 1.4346, "step": 2598 }, { "epoch": 0.28, "grad_norm": 0.07982629773993684, "learning_rate": 0.0008456499586730346, "loss": 1.3716, "step": 2599 }, { "epoch": 0.28, "grad_norm": 0.0933833278071163, "learning_rate": 0.0008455241333459715, "loss": 1.4097, "step": 2600 }, { "epoch": 0.28, "grad_norm": 0.09264156229360837, "learning_rate": 0.0008453982661229158, "loss": 1.4631, "step": 2601 }, { "epoch": 0.28, "grad_norm": 0.09061343077073224, "learning_rate": 0.0008452723570191297, "loss": 1.5063, "step": 2602 }, { "epoch": 0.28, "grad_norm": 0.07478352133945661, "learning_rate": 0.0008451464060498799, "loss": 1.2421, "step": 2603 }, { "epoch": 0.28, "grad_norm": 0.08105557941037968, "learning_rate": 0.0008450204132304386, "loss": 1.4022, "step": 2604 }, { "epoch": 0.28, "grad_norm": 0.08207495978087463, "learning_rate": 0.0008448943785760826, "loss": 1.3719, "step": 2605 }, { "epoch": 0.28, "grad_norm": 0.08368550950846189, "learning_rate": 0.0008447683021020942, "loss": 1.5827, "step": 2606 }, { "epoch": 0.28, "grad_norm": 0.07688854298372594, "learning_rate": 0.0008446421838237605, "loss": 1.4621, "step": 2607 }, { "epoch": 0.28, "grad_norm": 0.09196343428155447, "learning_rate": 0.0008445160237563741, "loss": 1.5451, "step": 2608 }, { "epoch": 0.28, "grad_norm": 0.07903413930876052, "learning_rate": 0.0008443898219152319, "loss": 1.4598, "step": 2609 }, { "epoch": 0.28, "grad_norm": 0.07076947983689727, "learning_rate": 0.0008442635783156365, "loss": 1.3268, "step": 2610 }, { "epoch": 0.28, "grad_norm": 0.07636266741339315, "learning_rate": 0.0008441372929728954, "loss": 1.5277, "step": 2611 }, { "epoch": 0.28, "grad_norm": 0.07600285349782061, "learning_rate": 0.0008440109659023211, "loss": 1.4652, "step": 2612 }, { "epoch": 0.28, "grad_norm": 0.07201022852211071, "learning_rate": 0.0008438845971192313, "loss": 1.4532, "step": 2613 }, { "epoch": 0.28, "grad_norm": 0.07285683346924836, "learning_rate": 0.0008437581866389483, "loss": 1.4461, "step": 2614 }, { "epoch": 0.28, "grad_norm": 0.07768183722711053, "learning_rate": 0.0008436317344768003, "loss": 1.585, "step": 2615 }, { "epoch": 0.28, "grad_norm": 0.08838640887202365, "learning_rate": 0.0008435052406481196, "loss": 1.4553, "step": 2616 }, { "epoch": 0.28, "grad_norm": 0.08074211986383892, "learning_rate": 0.0008433787051682443, "loss": 1.5233, "step": 2617 }, { "epoch": 0.28, "grad_norm": 0.07530166795895882, "learning_rate": 0.0008432521280525174, "loss": 1.543, "step": 2618 }, { "epoch": 0.28, "grad_norm": 0.07064169571659298, "learning_rate": 0.0008431255093162864, "loss": 1.5052, "step": 2619 }, { "epoch": 0.28, "grad_norm": 0.07175033898234041, "learning_rate": 0.0008429988489749046, "loss": 1.5151, "step": 2620 }, { "epoch": 0.28, "grad_norm": 0.07320433910283142, "learning_rate": 0.0008428721470437296, "loss": 1.3737, "step": 2621 }, { "epoch": 0.28, "grad_norm": 0.07414426014352578, "learning_rate": 0.0008427454035381249, "loss": 1.5339, "step": 2622 }, { "epoch": 0.28, "grad_norm": 0.09143642921759464, "learning_rate": 0.0008426186184734585, "loss": 1.4326, "step": 2623 }, { "epoch": 0.28, "grad_norm": 0.0845165786223017, "learning_rate": 0.0008424917918651031, "loss": 1.4812, "step": 2624 }, { "epoch": 0.28, "grad_norm": 0.0759915730522228, "learning_rate": 0.0008423649237284376, "loss": 1.4493, "step": 2625 }, { "epoch": 0.28, "grad_norm": 0.0745678505883287, "learning_rate": 0.0008422380140788445, "loss": 1.36, "step": 2626 }, { "epoch": 0.28, "grad_norm": 0.08146179178868668, "learning_rate": 0.0008421110629317123, "loss": 1.4266, "step": 2627 }, { "epoch": 0.28, "grad_norm": 0.07628880445326346, "learning_rate": 0.0008419840703024344, "loss": 1.401, "step": 2628 }, { "epoch": 0.28, "grad_norm": 0.07686954348813545, "learning_rate": 0.0008418570362064091, "loss": 1.5006, "step": 2629 }, { "epoch": 0.28, "grad_norm": 0.07513837237629467, "learning_rate": 0.0008417299606590393, "loss": 1.4112, "step": 2630 }, { "epoch": 0.28, "grad_norm": 0.07439513015706296, "learning_rate": 0.0008416028436757339, "loss": 1.3948, "step": 2631 }, { "epoch": 0.28, "grad_norm": 0.07113589119232142, "learning_rate": 0.0008414756852719059, "loss": 1.3521, "step": 2632 }, { "epoch": 0.28, "grad_norm": 0.0746111940741349, "learning_rate": 0.0008413484854629739, "loss": 1.5365, "step": 2633 }, { "epoch": 0.28, "grad_norm": 0.08053469900948855, "learning_rate": 0.0008412212442643611, "loss": 1.317, "step": 2634 }, { "epoch": 0.28, "grad_norm": 0.07311809517893839, "learning_rate": 0.0008410939616914961, "loss": 1.3688, "step": 2635 }, { "epoch": 0.28, "grad_norm": 0.08879393398483074, "learning_rate": 0.0008409666377598124, "loss": 1.4062, "step": 2636 }, { "epoch": 0.28, "grad_norm": 0.06956310232444521, "learning_rate": 0.0008408392724847482, "loss": 1.3274, "step": 2637 }, { "epoch": 0.28, "grad_norm": 0.07211476766646407, "learning_rate": 0.0008407118658817474, "loss": 1.4828, "step": 2638 }, { "epoch": 0.28, "grad_norm": 0.07084769306180853, "learning_rate": 0.0008405844179662581, "loss": 1.46, "step": 2639 }, { "epoch": 0.28, "grad_norm": 0.08089545995057705, "learning_rate": 0.000840456928753734, "loss": 1.4635, "step": 2640 }, { "epoch": 0.28, "grad_norm": 0.09611046115627091, "learning_rate": 0.0008403293982596336, "loss": 1.505, "step": 2641 }, { "epoch": 0.28, "grad_norm": 0.07674825520262066, "learning_rate": 0.0008402018264994203, "loss": 1.4323, "step": 2642 }, { "epoch": 0.28, "grad_norm": 0.0753083169510489, "learning_rate": 0.0008400742134885627, "loss": 1.4169, "step": 2643 }, { "epoch": 0.28, "grad_norm": 0.07514595139372099, "learning_rate": 0.0008399465592425342, "loss": 1.3978, "step": 2644 }, { "epoch": 0.28, "grad_norm": 0.06846109808760739, "learning_rate": 0.0008398188637768136, "loss": 1.3855, "step": 2645 }, { "epoch": 0.28, "grad_norm": 0.08015966915623766, "learning_rate": 0.0008396911271068841, "loss": 1.4843, "step": 2646 }, { "epoch": 0.28, "grad_norm": 0.07651693334730639, "learning_rate": 0.0008395633492482344, "loss": 1.4069, "step": 2647 }, { "epoch": 0.28, "grad_norm": 0.08363488440862517, "learning_rate": 0.0008394355302163578, "loss": 1.5263, "step": 2648 }, { "epoch": 0.28, "grad_norm": 0.0910487532185436, "learning_rate": 0.0008393076700267532, "loss": 1.5277, "step": 2649 }, { "epoch": 0.28, "grad_norm": 0.08272526983826728, "learning_rate": 0.0008391797686949237, "loss": 1.4217, "step": 2650 }, { "epoch": 0.28, "grad_norm": 0.08459734947024608, "learning_rate": 0.000839051826236378, "loss": 1.3437, "step": 2651 }, { "epoch": 0.29, "grad_norm": 0.07350458824423961, "learning_rate": 0.0008389238426666294, "loss": 1.4984, "step": 2652 }, { "epoch": 0.29, "grad_norm": 0.07125971137650254, "learning_rate": 0.0008387958180011964, "loss": 1.329, "step": 2653 }, { "epoch": 0.29, "grad_norm": 0.0819044410255831, "learning_rate": 0.0008386677522556025, "loss": 1.372, "step": 2654 }, { "epoch": 0.29, "grad_norm": 0.07611520811269741, "learning_rate": 0.0008385396454453762, "loss": 1.6093, "step": 2655 }, { "epoch": 0.29, "grad_norm": 0.06776836737705744, "learning_rate": 0.0008384114975860507, "loss": 1.4628, "step": 2656 }, { "epoch": 0.29, "grad_norm": 0.07221637155692598, "learning_rate": 0.0008382833086931642, "loss": 1.2861, "step": 2657 }, { "epoch": 0.29, "grad_norm": 0.07581032939812374, "learning_rate": 0.0008381550787822605, "loss": 1.588, "step": 2658 }, { "epoch": 0.29, "grad_norm": 0.0767775626653725, "learning_rate": 0.0008380268078688877, "loss": 1.4529, "step": 2659 }, { "epoch": 0.29, "grad_norm": 0.07200170746516522, "learning_rate": 0.0008378984959685991, "loss": 1.4092, "step": 2660 }, { "epoch": 0.29, "grad_norm": 0.07172741580740591, "learning_rate": 0.0008377701430969528, "loss": 1.4825, "step": 2661 }, { "epoch": 0.29, "grad_norm": 0.06814074124177844, "learning_rate": 0.0008376417492695123, "loss": 1.4343, "step": 2662 }, { "epoch": 0.29, "grad_norm": 0.08848005244759329, "learning_rate": 0.0008375133145018457, "loss": 1.5059, "step": 2663 }, { "epoch": 0.29, "grad_norm": 0.07264186405227538, "learning_rate": 0.000837384838809526, "loss": 1.279, "step": 2664 }, { "epoch": 0.29, "grad_norm": 0.07900937324347093, "learning_rate": 0.0008372563222081316, "loss": 1.4473, "step": 2665 }, { "epoch": 0.29, "grad_norm": 0.08847583018877739, "learning_rate": 0.0008371277647132453, "loss": 1.5201, "step": 2666 }, { "epoch": 0.29, "grad_norm": 0.07547179396139557, "learning_rate": 0.0008369991663404555, "loss": 1.332, "step": 2667 }, { "epoch": 0.29, "grad_norm": 0.07603778101136066, "learning_rate": 0.0008368705271053547, "loss": 1.2928, "step": 2668 }, { "epoch": 0.29, "grad_norm": 0.06737944518535066, "learning_rate": 0.0008367418470235413, "loss": 1.3386, "step": 2669 }, { "epoch": 0.29, "grad_norm": 0.09022510747259038, "learning_rate": 0.0008366131261106179, "loss": 1.6164, "step": 2670 }, { "epoch": 0.29, "grad_norm": 0.07686893921895585, "learning_rate": 0.0008364843643821927, "loss": 1.4379, "step": 2671 }, { "epoch": 0.29, "grad_norm": 0.07283643227924404, "learning_rate": 0.000836355561853878, "loss": 1.4125, "step": 2672 }, { "epoch": 0.29, "grad_norm": 0.07205487947966797, "learning_rate": 0.0008362267185412919, "loss": 1.4272, "step": 2673 }, { "epoch": 0.29, "grad_norm": 0.07591904501548323, "learning_rate": 0.0008360978344600572, "loss": 1.3735, "step": 2674 }, { "epoch": 0.29, "grad_norm": 0.0840464697876995, "learning_rate": 0.0008359689096258011, "loss": 1.4788, "step": 2675 }, { "epoch": 0.29, "grad_norm": 0.07417724728902096, "learning_rate": 0.0008358399440541567, "loss": 1.4503, "step": 2676 }, { "epoch": 0.29, "grad_norm": 0.07269091407325959, "learning_rate": 0.0008357109377607611, "loss": 1.4989, "step": 2677 }, { "epoch": 0.29, "grad_norm": 0.08418319284393984, "learning_rate": 0.0008355818907612569, "loss": 1.557, "step": 2678 }, { "epoch": 0.29, "grad_norm": 0.07474993738977974, "learning_rate": 0.0008354528030712915, "loss": 1.4532, "step": 2679 }, { "epoch": 0.29, "grad_norm": 0.07500813692751027, "learning_rate": 0.0008353236747065174, "loss": 1.634, "step": 2680 }, { "epoch": 0.29, "grad_norm": 0.0829101197991892, "learning_rate": 0.0008351945056825917, "loss": 1.3884, "step": 2681 }, { "epoch": 0.29, "grad_norm": 0.07835371691603794, "learning_rate": 0.0008350652960151765, "loss": 1.4707, "step": 2682 }, { "epoch": 0.29, "grad_norm": 0.08405599478984883, "learning_rate": 0.0008349360457199391, "loss": 1.4975, "step": 2683 }, { "epoch": 0.29, "grad_norm": 0.07659122988719483, "learning_rate": 0.0008348067548125514, "loss": 1.2843, "step": 2684 }, { "epoch": 0.29, "grad_norm": 0.07639967954015899, "learning_rate": 0.0008346774233086904, "loss": 1.4632, "step": 2685 }, { "epoch": 0.29, "grad_norm": 0.08634703761278015, "learning_rate": 0.000834548051224038, "loss": 1.4512, "step": 2686 }, { "epoch": 0.29, "grad_norm": 0.07601354417275835, "learning_rate": 0.0008344186385742811, "loss": 1.455, "step": 2687 }, { "epoch": 0.29, "grad_norm": 0.07770416940481581, "learning_rate": 0.0008342891853751114, "loss": 1.4682, "step": 2688 }, { "epoch": 0.29, "grad_norm": 0.0738525816231597, "learning_rate": 0.0008341596916422254, "loss": 1.459, "step": 2689 }, { "epoch": 0.29, "grad_norm": 0.08505835511210237, "learning_rate": 0.0008340301573913249, "loss": 1.567, "step": 2690 }, { "epoch": 0.29, "grad_norm": 0.07871515325468663, "learning_rate": 0.0008339005826381161, "loss": 1.3747, "step": 2691 }, { "epoch": 0.29, "grad_norm": 0.07020707472104386, "learning_rate": 0.0008337709673983106, "loss": 1.4348, "step": 2692 }, { "epoch": 0.29, "grad_norm": 0.07336686807263738, "learning_rate": 0.0008336413116876245, "loss": 1.3568, "step": 2693 }, { "epoch": 0.29, "grad_norm": 0.07164094008232932, "learning_rate": 0.0008335116155217793, "loss": 1.4443, "step": 2694 }, { "epoch": 0.29, "grad_norm": 0.06686213937741443, "learning_rate": 0.0008333818789165008, "loss": 1.4464, "step": 2695 }, { "epoch": 0.29, "grad_norm": 0.07568116953259008, "learning_rate": 0.00083325210188752, "loss": 1.3801, "step": 2696 }, { "epoch": 0.29, "grad_norm": 0.07516356051721163, "learning_rate": 0.0008331222844505733, "loss": 1.2997, "step": 2697 }, { "epoch": 0.29, "grad_norm": 0.06746999225222641, "learning_rate": 0.000832992426621401, "loss": 1.4718, "step": 2698 }, { "epoch": 0.29, "grad_norm": 0.07642008615200645, "learning_rate": 0.000832862528415749, "loss": 1.4935, "step": 2699 }, { "epoch": 0.29, "grad_norm": 0.0784700225660804, "learning_rate": 0.0008327325898493677, "loss": 1.5099, "step": 2700 }, { "epoch": 0.29, "grad_norm": 0.07330172661899201, "learning_rate": 0.000832602610938013, "loss": 1.5399, "step": 2701 }, { "epoch": 0.29, "grad_norm": 0.0717009174794634, "learning_rate": 0.000832472591697445, "loss": 1.4603, "step": 2702 }, { "epoch": 0.29, "grad_norm": 0.07255700608309591, "learning_rate": 0.0008323425321434291, "loss": 1.3573, "step": 2703 }, { "epoch": 0.29, "grad_norm": 0.07581764931155754, "learning_rate": 0.0008322124322917353, "loss": 1.4361, "step": 2704 }, { "epoch": 0.29, "grad_norm": 0.07880780191271873, "learning_rate": 0.0008320822921581388, "loss": 1.4721, "step": 2705 }, { "epoch": 0.29, "grad_norm": 0.07603032304986217, "learning_rate": 0.0008319521117584194, "loss": 1.2977, "step": 2706 }, { "epoch": 0.29, "grad_norm": 0.06423068223877285, "learning_rate": 0.0008318218911083623, "loss": 1.4188, "step": 2707 }, { "epoch": 0.29, "grad_norm": 0.08181975268370313, "learning_rate": 0.0008316916302237568, "loss": 1.5171, "step": 2708 }, { "epoch": 0.29, "grad_norm": 0.0788819316295538, "learning_rate": 0.0008315613291203976, "loss": 1.4912, "step": 2709 }, { "epoch": 0.29, "grad_norm": 0.07534497094282523, "learning_rate": 0.0008314309878140842, "loss": 1.4276, "step": 2710 }, { "epoch": 0.29, "grad_norm": 0.07599726403491437, "learning_rate": 0.000831300606320621, "loss": 1.3766, "step": 2711 }, { "epoch": 0.29, "grad_norm": 0.08399386546384041, "learning_rate": 0.0008311701846558171, "loss": 1.4363, "step": 2712 }, { "epoch": 0.29, "grad_norm": 0.07194774780906135, "learning_rate": 0.0008310397228354864, "loss": 1.359, "step": 2713 }, { "epoch": 0.29, "grad_norm": 0.07170651795131391, "learning_rate": 0.0008309092208754483, "loss": 1.4945, "step": 2714 }, { "epoch": 0.29, "grad_norm": 0.09471088333737213, "learning_rate": 0.0008307786787915261, "loss": 1.6064, "step": 2715 }, { "epoch": 0.29, "grad_norm": 0.07001458324123662, "learning_rate": 0.0008306480965995489, "loss": 1.477, "step": 2716 }, { "epoch": 0.29, "grad_norm": 0.07331420219072145, "learning_rate": 0.0008305174743153499, "loss": 1.4956, "step": 2717 }, { "epoch": 0.29, "grad_norm": 0.08307822550827987, "learning_rate": 0.000830386811954768, "loss": 1.4908, "step": 2718 }, { "epoch": 0.29, "grad_norm": 0.10553037644370117, "learning_rate": 0.0008302561095336459, "loss": 1.502, "step": 2719 }, { "epoch": 0.29, "grad_norm": 0.07753527659679234, "learning_rate": 0.0008301253670678319, "loss": 1.3897, "step": 2720 }, { "epoch": 0.29, "grad_norm": 0.07645511236420256, "learning_rate": 0.0008299945845731792, "loss": 1.4402, "step": 2721 }, { "epoch": 0.29, "grad_norm": 0.07836695249123142, "learning_rate": 0.0008298637620655453, "loss": 1.3848, "step": 2722 }, { "epoch": 0.29, "grad_norm": 0.08857471945388738, "learning_rate": 0.0008297328995607932, "loss": 1.3904, "step": 2723 }, { "epoch": 0.29, "grad_norm": 0.08127929407000808, "learning_rate": 0.0008296019970747902, "loss": 1.2742, "step": 2724 }, { "epoch": 0.29, "grad_norm": 0.07769020055838141, "learning_rate": 0.0008294710546234086, "loss": 1.2634, "step": 2725 }, { "epoch": 0.29, "grad_norm": 0.07820145025470278, "learning_rate": 0.0008293400722225259, "loss": 1.4207, "step": 2726 }, { "epoch": 0.29, "grad_norm": 0.08332673123651665, "learning_rate": 0.0008292090498880241, "loss": 1.4028, "step": 2727 }, { "epoch": 0.29, "grad_norm": 0.08469613645080842, "learning_rate": 0.0008290779876357899, "loss": 1.507, "step": 2728 }, { "epoch": 0.29, "grad_norm": 0.07603799229175152, "learning_rate": 0.0008289468854817153, "loss": 1.4011, "step": 2729 }, { "epoch": 0.29, "grad_norm": 0.07966344010849837, "learning_rate": 0.0008288157434416967, "loss": 1.4921, "step": 2730 }, { "epoch": 0.29, "grad_norm": 0.07132344853654753, "learning_rate": 0.0008286845615316356, "loss": 1.3162, "step": 2731 }, { "epoch": 0.29, "grad_norm": 0.07104434671857382, "learning_rate": 0.0008285533397674382, "loss": 1.373, "step": 2732 }, { "epoch": 0.29, "grad_norm": 0.08032338161910683, "learning_rate": 0.0008284220781650158, "loss": 1.4925, "step": 2733 }, { "epoch": 0.29, "grad_norm": 0.0877688716941064, "learning_rate": 0.000828290776740284, "loss": 1.4924, "step": 2734 }, { "epoch": 0.29, "grad_norm": 0.07186097832409206, "learning_rate": 0.0008281594355091641, "loss": 1.3546, "step": 2735 }, { "epoch": 0.29, "grad_norm": 0.07622048007118681, "learning_rate": 0.0008280280544875811, "loss": 1.4452, "step": 2736 }, { "epoch": 0.29, "grad_norm": 0.07849727387690847, "learning_rate": 0.0008278966336914655, "loss": 1.4423, "step": 2737 }, { "epoch": 0.29, "grad_norm": 0.07222521947527058, "learning_rate": 0.0008277651731367528, "loss": 1.4843, "step": 2738 }, { "epoch": 0.29, "grad_norm": 0.07099999895203911, "learning_rate": 0.0008276336728393828, "loss": 1.4156, "step": 2739 }, { "epoch": 0.29, "grad_norm": 0.06822141452088458, "learning_rate": 0.0008275021328153006, "loss": 1.3963, "step": 2740 }, { "epoch": 0.29, "grad_norm": 0.0740043671559136, "learning_rate": 0.0008273705530804554, "loss": 1.4871, "step": 2741 }, { "epoch": 0.29, "grad_norm": 0.07094511989097507, "learning_rate": 0.0008272389336508022, "loss": 1.2901, "step": 2742 }, { "epoch": 0.29, "grad_norm": 0.07948023468384699, "learning_rate": 0.0008271072745423, "loss": 1.4011, "step": 2743 }, { "epoch": 0.29, "grad_norm": 0.08280207297295716, "learning_rate": 0.0008269755757709132, "loss": 1.5201, "step": 2744 }, { "epoch": 0.3, "grad_norm": 0.088306395939248, "learning_rate": 0.0008268438373526106, "loss": 1.517, "step": 2745 }, { "epoch": 0.3, "grad_norm": 0.07326544074639216, "learning_rate": 0.0008267120593033659, "loss": 1.4476, "step": 2746 }, { "epoch": 0.3, "grad_norm": 0.08336236489635436, "learning_rate": 0.0008265802416391577, "loss": 1.2915, "step": 2747 }, { "epoch": 0.3, "grad_norm": 0.08857755307752961, "learning_rate": 0.0008264483843759691, "loss": 1.4161, "step": 2748 }, { "epoch": 0.3, "grad_norm": 0.08720921111079, "learning_rate": 0.0008263164875297887, "loss": 1.3664, "step": 2749 }, { "epoch": 0.3, "grad_norm": 0.0849530419155793, "learning_rate": 0.0008261845511166092, "loss": 1.4203, "step": 2750 }, { "epoch": 0.3, "grad_norm": 0.08352948658753696, "learning_rate": 0.0008260525751524282, "loss": 1.4433, "step": 2751 }, { "epoch": 0.3, "grad_norm": 0.07117468986953664, "learning_rate": 0.0008259205596532484, "loss": 1.4106, "step": 2752 }, { "epoch": 0.3, "grad_norm": 0.09710061791847775, "learning_rate": 0.0008257885046350773, "loss": 1.3963, "step": 2753 }, { "epoch": 0.3, "grad_norm": 0.08773828788389773, "learning_rate": 0.0008256564101139266, "loss": 1.4722, "step": 2754 }, { "epoch": 0.3, "grad_norm": 0.0782548391737564, "learning_rate": 0.0008255242761058135, "loss": 1.3094, "step": 2755 }, { "epoch": 0.3, "grad_norm": 0.08430634648229657, "learning_rate": 0.0008253921026267599, "loss": 1.6252, "step": 2756 }, { "epoch": 0.3, "grad_norm": 0.08229486931029277, "learning_rate": 0.0008252598896927918, "loss": 1.4528, "step": 2757 }, { "epoch": 0.3, "grad_norm": 0.08570349190519552, "learning_rate": 0.0008251276373199408, "loss": 1.445, "step": 2758 }, { "epoch": 0.3, "grad_norm": 0.0823944896586127, "learning_rate": 0.0008249953455242429, "loss": 1.4586, "step": 2759 }, { "epoch": 0.3, "grad_norm": 0.078848461846731, "learning_rate": 0.000824863014321739, "loss": 1.3897, "step": 2760 }, { "epoch": 0.3, "grad_norm": 0.07616890999329672, "learning_rate": 0.0008247306437284747, "loss": 1.3357, "step": 2761 }, { "epoch": 0.3, "grad_norm": 0.07291692672694469, "learning_rate": 0.0008245982337605003, "loss": 1.4341, "step": 2762 }, { "epoch": 0.3, "grad_norm": 0.0799698621170182, "learning_rate": 0.0008244657844338708, "loss": 1.4776, "step": 2763 }, { "epoch": 0.3, "grad_norm": 0.07410913925115305, "learning_rate": 0.0008243332957646464, "loss": 1.4272, "step": 2764 }, { "epoch": 0.3, "grad_norm": 0.0725370622239953, "learning_rate": 0.0008242007677688918, "loss": 1.3854, "step": 2765 }, { "epoch": 0.3, "grad_norm": 0.07919036331246422, "learning_rate": 0.0008240682004626765, "loss": 1.4331, "step": 2766 }, { "epoch": 0.3, "grad_norm": 0.07625402550594935, "learning_rate": 0.0008239355938620745, "loss": 1.4987, "step": 2767 }, { "epoch": 0.3, "grad_norm": 0.07087029054031294, "learning_rate": 0.0008238029479831652, "loss": 1.5275, "step": 2768 }, { "epoch": 0.3, "grad_norm": 0.09117193245742936, "learning_rate": 0.0008236702628420319, "loss": 1.5647, "step": 2769 }, { "epoch": 0.3, "grad_norm": 0.07650468483821572, "learning_rate": 0.0008235375384547635, "loss": 1.4506, "step": 2770 }, { "epoch": 0.3, "grad_norm": 0.09424438605028439, "learning_rate": 0.000823404774837453, "loss": 1.4702, "step": 2771 }, { "epoch": 0.3, "grad_norm": 0.07393962832466713, "learning_rate": 0.0008232719720061987, "loss": 1.3698, "step": 2772 }, { "epoch": 0.3, "grad_norm": 0.07453913303088434, "learning_rate": 0.0008231391299771034, "loss": 1.3223, "step": 2773 }, { "epoch": 0.3, "grad_norm": 0.08888193416857225, "learning_rate": 0.0008230062487662745, "loss": 1.5193, "step": 2774 }, { "epoch": 0.3, "grad_norm": 0.08072388459301796, "learning_rate": 0.0008228733283898243, "loss": 1.4469, "step": 2775 }, { "epoch": 0.3, "grad_norm": 0.08710297536435438, "learning_rate": 0.00082274036886387, "loss": 1.4754, "step": 2776 }, { "epoch": 0.3, "grad_norm": 0.08359284400448978, "learning_rate": 0.0008226073702045333, "loss": 1.5685, "step": 2777 }, { "epoch": 0.3, "grad_norm": 0.08749874887891788, "learning_rate": 0.0008224743324279407, "loss": 1.4436, "step": 2778 }, { "epoch": 0.3, "grad_norm": 0.07760028764078293, "learning_rate": 0.0008223412555502236, "loss": 1.5412, "step": 2779 }, { "epoch": 0.3, "grad_norm": 0.08406358133209071, "learning_rate": 0.000822208139587518, "loss": 1.5003, "step": 2780 }, { "epoch": 0.3, "grad_norm": 0.08354272711327344, "learning_rate": 0.0008220749845559648, "loss": 1.4887, "step": 2781 }, { "epoch": 0.3, "grad_norm": 0.07861669011751445, "learning_rate": 0.0008219417904717091, "loss": 1.3924, "step": 2782 }, { "epoch": 0.3, "grad_norm": 0.06729249498959294, "learning_rate": 0.0008218085573509016, "loss": 1.4451, "step": 2783 }, { "epoch": 0.3, "grad_norm": 0.07110046725712708, "learning_rate": 0.0008216752852096969, "loss": 1.5831, "step": 2784 }, { "epoch": 0.3, "grad_norm": 0.07728581028416445, "learning_rate": 0.0008215419740642549, "loss": 1.4519, "step": 2785 }, { "epoch": 0.3, "grad_norm": 0.07445156605390331, "learning_rate": 0.0008214086239307401, "loss": 1.4988, "step": 2786 }, { "epoch": 0.3, "grad_norm": 0.0826710728733664, "learning_rate": 0.0008212752348253216, "loss": 1.3996, "step": 2787 }, { "epoch": 0.3, "grad_norm": 0.10887963866407403, "learning_rate": 0.0008211418067641734, "loss": 1.4847, "step": 2788 }, { "epoch": 0.3, "grad_norm": 0.08905891287841133, "learning_rate": 0.0008210083397634738, "loss": 1.4369, "step": 2789 }, { "epoch": 0.3, "grad_norm": 0.08396342066020057, "learning_rate": 0.0008208748338394064, "loss": 1.2997, "step": 2790 }, { "epoch": 0.3, "grad_norm": 0.08154082009916738, "learning_rate": 0.000820741289008159, "loss": 1.5037, "step": 2791 }, { "epoch": 0.3, "grad_norm": 0.0800821125968288, "learning_rate": 0.0008206077052859246, "loss": 1.5425, "step": 2792 }, { "epoch": 0.3, "grad_norm": 0.07327743953485256, "learning_rate": 0.0008204740826889008, "loss": 1.4202, "step": 2793 }, { "epoch": 0.3, "grad_norm": 0.06753263368726979, "learning_rate": 0.0008203404212332897, "loss": 1.4971, "step": 2794 }, { "epoch": 0.3, "grad_norm": 0.06862134633875801, "learning_rate": 0.0008202067209352979, "loss": 1.3466, "step": 2795 }, { "epoch": 0.3, "grad_norm": 0.06855119036723545, "learning_rate": 0.0008200729818111372, "loss": 1.466, "step": 2796 }, { "epoch": 0.3, "grad_norm": 0.08170769665034927, "learning_rate": 0.0008199392038770242, "loss": 1.4341, "step": 2797 }, { "epoch": 0.3, "grad_norm": 0.06737320855709193, "learning_rate": 0.0008198053871491797, "loss": 1.6141, "step": 2798 }, { "epoch": 0.3, "grad_norm": 0.0659153505087909, "learning_rate": 0.0008196715316438294, "loss": 1.4589, "step": 2799 }, { "epoch": 0.3, "grad_norm": 0.08287915047683615, "learning_rate": 0.0008195376373772039, "loss": 1.4108, "step": 2800 }, { "epoch": 0.3, "grad_norm": 0.08024241150674424, "learning_rate": 0.0008194037043655382, "loss": 1.2903, "step": 2801 }, { "epoch": 0.3, "grad_norm": 0.07159138451861921, "learning_rate": 0.0008192697326250722, "loss": 1.4705, "step": 2802 }, { "epoch": 0.3, "grad_norm": 0.07311652052094464, "learning_rate": 0.0008191357221720506, "loss": 1.4327, "step": 2803 }, { "epoch": 0.3, "grad_norm": 0.0764745512460257, "learning_rate": 0.0008190016730227224, "loss": 1.5245, "step": 2804 }, { "epoch": 0.3, "grad_norm": 0.07772607729105947, "learning_rate": 0.0008188675851933414, "loss": 1.5384, "step": 2805 }, { "epoch": 0.3, "grad_norm": 0.06669285839551875, "learning_rate": 0.0008187334587001664, "loss": 1.3745, "step": 2806 }, { "epoch": 0.3, "grad_norm": 0.08076034380977436, "learning_rate": 0.0008185992935594607, "loss": 1.4328, "step": 2807 }, { "epoch": 0.3, "grad_norm": 0.08093218565950956, "learning_rate": 0.0008184650897874923, "loss": 1.4824, "step": 2808 }, { "epoch": 0.3, "grad_norm": 0.07044639355943785, "learning_rate": 0.000818330847400534, "loss": 1.4994, "step": 2809 }, { "epoch": 0.3, "grad_norm": 0.07088399140907917, "learning_rate": 0.0008181965664148628, "loss": 1.4263, "step": 2810 }, { "epoch": 0.3, "grad_norm": 0.07363030280128995, "learning_rate": 0.0008180622468467611, "loss": 1.2771, "step": 2811 }, { "epoch": 0.3, "grad_norm": 0.08144611471453442, "learning_rate": 0.0008179278887125152, "loss": 1.4055, "step": 2812 }, { "epoch": 0.3, "grad_norm": 0.07736857460180284, "learning_rate": 0.000817793492028417, "loss": 1.4156, "step": 2813 }, { "epoch": 0.3, "grad_norm": 0.06555042851554732, "learning_rate": 0.0008176590568107622, "loss": 1.4743, "step": 2814 }, { "epoch": 0.3, "grad_norm": 0.07479045299224463, "learning_rate": 0.0008175245830758515, "loss": 1.49, "step": 2815 }, { "epoch": 0.3, "grad_norm": 0.0841811461554626, "learning_rate": 0.0008173900708399906, "loss": 1.4209, "step": 2816 }, { "epoch": 0.3, "grad_norm": 0.07004572589189338, "learning_rate": 0.0008172555201194894, "loss": 1.5112, "step": 2817 }, { "epoch": 0.3, "grad_norm": 0.07560307310975573, "learning_rate": 0.0008171209309306625, "loss": 1.4413, "step": 2818 }, { "epoch": 0.3, "grad_norm": 0.07308794549332547, "learning_rate": 0.0008169863032898296, "loss": 1.3943, "step": 2819 }, { "epoch": 0.3, "grad_norm": 0.07782205167088278, "learning_rate": 0.0008168516372133145, "loss": 1.4607, "step": 2820 }, { "epoch": 0.3, "grad_norm": 0.08067353235727805, "learning_rate": 0.0008167169327174459, "loss": 1.485, "step": 2821 }, { "epoch": 0.3, "grad_norm": 0.07605076294811133, "learning_rate": 0.0008165821898185576, "loss": 1.3621, "step": 2822 }, { "epoch": 0.3, "grad_norm": 0.07507805847597698, "learning_rate": 0.0008164474085329872, "loss": 1.4791, "step": 2823 }, { "epoch": 0.3, "grad_norm": 0.08074805704595682, "learning_rate": 0.0008163125888770776, "loss": 1.4916, "step": 2824 }, { "epoch": 0.3, "grad_norm": 0.08492739058974759, "learning_rate": 0.0008161777308671762, "loss": 1.529, "step": 2825 }, { "epoch": 0.3, "grad_norm": 0.06727666112619586, "learning_rate": 0.0008160428345196347, "loss": 1.3156, "step": 2826 }, { "epoch": 0.3, "grad_norm": 0.0688311847976553, "learning_rate": 0.00081590789985081, "loss": 1.5281, "step": 2827 }, { "epoch": 0.3, "grad_norm": 0.09836022791746445, "learning_rate": 0.0008157729268770635, "loss": 1.4562, "step": 2828 }, { "epoch": 0.3, "grad_norm": 0.06422326070659888, "learning_rate": 0.0008156379156147608, "loss": 1.3688, "step": 2829 }, { "epoch": 0.3, "grad_norm": 0.07043174146263807, "learning_rate": 0.0008155028660802728, "loss": 1.5884, "step": 2830 }, { "epoch": 0.3, "grad_norm": 0.083964177686042, "learning_rate": 0.0008153677782899745, "loss": 1.5427, "step": 2831 }, { "epoch": 0.3, "grad_norm": 0.06899335085260903, "learning_rate": 0.0008152326522602458, "loss": 1.4434, "step": 2832 }, { "epoch": 0.3, "grad_norm": 0.08238764103901412, "learning_rate": 0.0008150974880074713, "loss": 1.5017, "step": 2833 }, { "epoch": 0.3, "grad_norm": 0.07391969606608612, "learning_rate": 0.0008149622855480401, "loss": 1.3963, "step": 2834 }, { "epoch": 0.3, "grad_norm": 0.08318721513910766, "learning_rate": 0.000814827044898346, "loss": 1.5438, "step": 2835 }, { "epoch": 0.3, "grad_norm": 0.08554886321365836, "learning_rate": 0.0008146917660747872, "loss": 1.4779, "step": 2836 }, { "epoch": 0.3, "grad_norm": 0.07698488554302305, "learning_rate": 0.0008145564490937668, "loss": 1.4578, "step": 2837 }, { "epoch": 0.31, "grad_norm": 0.07037277585204749, "learning_rate": 0.0008144210939716927, "loss": 1.4635, "step": 2838 }, { "epoch": 0.31, "grad_norm": 0.07461934381938339, "learning_rate": 0.0008142857007249768, "loss": 1.4102, "step": 2839 }, { "epoch": 0.31, "grad_norm": 0.08392078326405981, "learning_rate": 0.0008141502693700363, "loss": 1.4993, "step": 2840 }, { "epoch": 0.31, "grad_norm": 0.06748677350552709, "learning_rate": 0.0008140147999232925, "loss": 1.5196, "step": 2841 }, { "epoch": 0.31, "grad_norm": 0.07883616211063142, "learning_rate": 0.0008138792924011717, "loss": 1.2979, "step": 2842 }, { "epoch": 0.31, "grad_norm": 0.06773196082861774, "learning_rate": 0.0008137437468201047, "loss": 1.3348, "step": 2843 }, { "epoch": 0.31, "grad_norm": 0.06988800909616495, "learning_rate": 0.0008136081631965267, "loss": 1.4625, "step": 2844 }, { "epoch": 0.31, "grad_norm": 0.07477987235145722, "learning_rate": 0.0008134725415468775, "loss": 1.4027, "step": 2845 }, { "epoch": 0.31, "grad_norm": 0.07010726064819425, "learning_rate": 0.0008133368818876022, "loss": 1.4468, "step": 2846 }, { "epoch": 0.31, "grad_norm": 0.08261004610399639, "learning_rate": 0.0008132011842351496, "loss": 1.3365, "step": 2847 }, { "epoch": 0.31, "grad_norm": 0.08736930521087224, "learning_rate": 0.0008130654486059737, "loss": 1.566, "step": 2848 }, { "epoch": 0.31, "grad_norm": 0.06642310720948336, "learning_rate": 0.0008129296750165329, "loss": 1.4061, "step": 2849 }, { "epoch": 0.31, "grad_norm": 0.08933889777362511, "learning_rate": 0.0008127938634832901, "loss": 1.3926, "step": 2850 }, { "epoch": 0.31, "grad_norm": 0.0724623698609669, "learning_rate": 0.0008126580140227131, "loss": 1.4211, "step": 2851 }, { "epoch": 0.31, "grad_norm": 0.07930876972688328, "learning_rate": 0.0008125221266512739, "loss": 1.4018, "step": 2852 }, { "epoch": 0.31, "grad_norm": 0.07774062520927012, "learning_rate": 0.0008123862013854495, "loss": 1.3741, "step": 2853 }, { "epoch": 0.31, "grad_norm": 0.07060780534531708, "learning_rate": 0.0008122502382417211, "loss": 1.507, "step": 2854 }, { "epoch": 0.31, "grad_norm": 0.07428692679133919, "learning_rate": 0.0008121142372365749, "loss": 1.4531, "step": 2855 }, { "epoch": 0.31, "grad_norm": 0.07165081198741169, "learning_rate": 0.0008119781983865013, "loss": 1.3349, "step": 2856 }, { "epoch": 0.31, "grad_norm": 0.07103201332670216, "learning_rate": 0.0008118421217079958, "loss": 1.4967, "step": 2857 }, { "epoch": 0.31, "grad_norm": 0.06745289271534266, "learning_rate": 0.0008117060072175578, "loss": 1.3217, "step": 2858 }, { "epoch": 0.31, "grad_norm": 0.07565056480064823, "learning_rate": 0.0008115698549316919, "loss": 1.3472, "step": 2859 }, { "epoch": 0.31, "grad_norm": 0.08917104470742244, "learning_rate": 0.0008114336648669068, "loss": 1.4251, "step": 2860 }, { "epoch": 0.31, "grad_norm": 0.07999502935604388, "learning_rate": 0.0008112974370397163, "loss": 1.4453, "step": 2861 }, { "epoch": 0.31, "grad_norm": 0.07927721882772928, "learning_rate": 0.0008111611714666382, "loss": 1.4793, "step": 2862 }, { "epoch": 0.31, "grad_norm": 0.07378233339106233, "learning_rate": 0.0008110248681641956, "loss": 1.3303, "step": 2863 }, { "epoch": 0.31, "grad_norm": 0.0716702577172704, "learning_rate": 0.0008108885271489153, "loss": 1.39, "step": 2864 }, { "epoch": 0.31, "grad_norm": 0.07223233377589355, "learning_rate": 0.0008107521484373292, "loss": 1.433, "step": 2865 }, { "epoch": 0.31, "grad_norm": 0.07618471336989437, "learning_rate": 0.000810615732045974, "loss": 1.4637, "step": 2866 }, { "epoch": 0.31, "grad_norm": 0.07505002942237572, "learning_rate": 0.0008104792779913903, "loss": 1.5149, "step": 2867 }, { "epoch": 0.31, "grad_norm": 0.09599552930508799, "learning_rate": 0.0008103427862901238, "loss": 1.453, "step": 2868 }, { "epoch": 0.31, "grad_norm": 0.08413359365818811, "learning_rate": 0.0008102062569587244, "loss": 1.299, "step": 2869 }, { "epoch": 0.31, "grad_norm": 0.08334424197676094, "learning_rate": 0.0008100696900137469, "loss": 1.5173, "step": 2870 }, { "epoch": 0.31, "grad_norm": 0.0681229530596204, "learning_rate": 0.0008099330854717508, "loss": 1.4892, "step": 2871 }, { "epoch": 0.31, "grad_norm": 0.0826144462608642, "learning_rate": 0.0008097964433492993, "loss": 1.5718, "step": 2872 }, { "epoch": 0.31, "grad_norm": 0.07647809806526784, "learning_rate": 0.0008096597636629612, "loss": 1.3691, "step": 2873 }, { "epoch": 0.31, "grad_norm": 0.07280620512949906, "learning_rate": 0.0008095230464293091, "loss": 1.416, "step": 2874 }, { "epoch": 0.31, "grad_norm": 0.0881966046359378, "learning_rate": 0.0008093862916649207, "loss": 1.4359, "step": 2875 }, { "epoch": 0.31, "grad_norm": 0.07054285637471477, "learning_rate": 0.0008092494993863775, "loss": 1.4334, "step": 2876 }, { "epoch": 0.31, "grad_norm": 0.07684187702267566, "learning_rate": 0.0008091126696102665, "loss": 1.3943, "step": 2877 }, { "epoch": 0.31, "grad_norm": 0.06921697707144545, "learning_rate": 0.0008089758023531788, "loss": 1.2889, "step": 2878 }, { "epoch": 0.31, "grad_norm": 0.0741571622600297, "learning_rate": 0.0008088388976317096, "loss": 1.4221, "step": 2879 }, { "epoch": 0.31, "grad_norm": 0.0770392792484649, "learning_rate": 0.0008087019554624595, "loss": 1.3828, "step": 2880 }, { "epoch": 0.31, "grad_norm": 0.07399574456270709, "learning_rate": 0.000808564975862033, "loss": 1.5514, "step": 2881 }, { "epoch": 0.31, "grad_norm": 0.08204482880103793, "learning_rate": 0.0008084279588470393, "loss": 1.4972, "step": 2882 }, { "epoch": 0.31, "grad_norm": 0.07952716032297752, "learning_rate": 0.0008082909044340924, "loss": 1.4671, "step": 2883 }, { "epoch": 0.31, "grad_norm": 0.08374489001130474, "learning_rate": 0.0008081538126398105, "loss": 1.4455, "step": 2884 }, { "epoch": 0.31, "grad_norm": 0.06973746802826421, "learning_rate": 0.0008080166834808165, "loss": 1.5941, "step": 2885 }, { "epoch": 0.31, "grad_norm": 0.07006537617171173, "learning_rate": 0.0008078795169737376, "loss": 1.4264, "step": 2886 }, { "epoch": 0.31, "grad_norm": 0.08481181694594125, "learning_rate": 0.0008077423131352059, "loss": 1.3789, "step": 2887 }, { "epoch": 0.31, "grad_norm": 0.09125579107905064, "learning_rate": 0.0008076050719818577, "loss": 1.4581, "step": 2888 }, { "epoch": 0.31, "grad_norm": 0.07959044783895391, "learning_rate": 0.0008074677935303342, "loss": 1.3779, "step": 2889 }, { "epoch": 0.31, "grad_norm": 0.0794410279675721, "learning_rate": 0.0008073304777972807, "loss": 1.4972, "step": 2890 }, { "epoch": 0.31, "grad_norm": 0.07737642113539102, "learning_rate": 0.0008071931247993472, "loss": 1.378, "step": 2891 }, { "epoch": 0.31, "grad_norm": 0.07837475551562528, "learning_rate": 0.0008070557345531881, "loss": 1.4374, "step": 2892 }, { "epoch": 0.31, "grad_norm": 0.0753630649514313, "learning_rate": 0.0008069183070754628, "loss": 1.4086, "step": 2893 }, { "epoch": 0.31, "grad_norm": 0.0896110774192796, "learning_rate": 0.0008067808423828347, "loss": 1.4894, "step": 2894 }, { "epoch": 0.31, "grad_norm": 0.0825189232098451, "learning_rate": 0.0008066433404919718, "loss": 1.5712, "step": 2895 }, { "epoch": 0.31, "grad_norm": 0.08237890387777524, "learning_rate": 0.0008065058014195465, "loss": 1.3692, "step": 2896 }, { "epoch": 0.31, "grad_norm": 0.0742473531492679, "learning_rate": 0.0008063682251822363, "loss": 1.3707, "step": 2897 }, { "epoch": 0.31, "grad_norm": 0.07624379455220504, "learning_rate": 0.0008062306117967225, "loss": 1.3805, "step": 2898 }, { "epoch": 0.31, "grad_norm": 0.0806083050065474, "learning_rate": 0.0008060929612796914, "loss": 1.3719, "step": 2899 }, { "epoch": 0.31, "grad_norm": 0.07716589591308537, "learning_rate": 0.0008059552736478333, "loss": 1.3963, "step": 2900 }, { "epoch": 0.31, "grad_norm": 0.09220293711869569, "learning_rate": 0.0008058175489178436, "loss": 1.4691, "step": 2901 }, { "epoch": 0.31, "grad_norm": 0.07788974238950698, "learning_rate": 0.0008056797871064216, "loss": 1.3894, "step": 2902 }, { "epoch": 0.31, "grad_norm": 0.10235235836865986, "learning_rate": 0.0008055419882302719, "loss": 1.5451, "step": 2903 }, { "epoch": 0.31, "grad_norm": 0.07739987925558103, "learning_rate": 0.0008054041523061026, "loss": 1.3031, "step": 2904 }, { "epoch": 0.31, "grad_norm": 0.09149966036697384, "learning_rate": 0.000805266279350627, "loss": 1.4051, "step": 2905 }, { "epoch": 0.31, "grad_norm": 0.0873485165013668, "learning_rate": 0.0008051283693805625, "loss": 1.3646, "step": 2906 }, { "epoch": 0.31, "grad_norm": 0.08345322596031957, "learning_rate": 0.0008049904224126312, "loss": 1.4502, "step": 2907 }, { "epoch": 0.31, "grad_norm": 0.08088555125381701, "learning_rate": 0.0008048524384635598, "loss": 1.3885, "step": 2908 }, { "epoch": 0.31, "grad_norm": 0.08576366605222421, "learning_rate": 0.0008047144175500794, "loss": 1.452, "step": 2909 }, { "epoch": 0.31, "grad_norm": 0.07420507574850772, "learning_rate": 0.0008045763596889253, "loss": 1.4566, "step": 2910 }, { "epoch": 0.31, "grad_norm": 0.0704001420236665, "learning_rate": 0.0008044382648968374, "loss": 1.3832, "step": 2911 }, { "epoch": 0.31, "grad_norm": 0.07461222698093337, "learning_rate": 0.0008043001331905604, "loss": 1.3567, "step": 2912 }, { "epoch": 0.31, "grad_norm": 0.08177185770423774, "learning_rate": 0.0008041619645868433, "loss": 1.4646, "step": 2913 }, { "epoch": 0.31, "grad_norm": 0.07573707444529373, "learning_rate": 0.0008040237591024393, "loss": 1.457, "step": 2914 }, { "epoch": 0.31, "grad_norm": 0.10223308961472137, "learning_rate": 0.0008038855167541064, "loss": 1.4352, "step": 2915 }, { "epoch": 0.31, "grad_norm": 0.08263870527187396, "learning_rate": 0.0008037472375586067, "loss": 1.2946, "step": 2916 }, { "epoch": 0.31, "grad_norm": 0.08385596883014022, "learning_rate": 0.0008036089215327076, "loss": 1.3963, "step": 2917 }, { "epoch": 0.31, "grad_norm": 0.0765236562605998, "learning_rate": 0.00080347056869318, "loss": 1.4262, "step": 2918 }, { "epoch": 0.31, "grad_norm": 0.08764321552227011, "learning_rate": 0.0008033321790567996, "loss": 1.5388, "step": 2919 }, { "epoch": 0.31, "grad_norm": 0.07062198820275128, "learning_rate": 0.0008031937526403469, "loss": 1.3124, "step": 2920 }, { "epoch": 0.31, "grad_norm": 0.07193907290002885, "learning_rate": 0.0008030552894606063, "loss": 1.4052, "step": 2921 }, { "epoch": 0.31, "grad_norm": 0.08627791606011874, "learning_rate": 0.0008029167895343671, "loss": 1.494, "step": 2922 }, { "epoch": 0.31, "grad_norm": 0.07689630376058164, "learning_rate": 0.0008027782528784228, "loss": 1.5321, "step": 2923 }, { "epoch": 0.31, "grad_norm": 0.07838402069101297, "learning_rate": 0.0008026396795095716, "loss": 1.4008, "step": 2924 }, { "epoch": 0.31, "grad_norm": 0.07890700473282818, "learning_rate": 0.000802501069444616, "loss": 1.4139, "step": 2925 }, { "epoch": 0.31, "grad_norm": 0.07602723921168046, "learning_rate": 0.0008023624227003626, "loss": 1.5312, "step": 2926 }, { "epoch": 0.31, "grad_norm": 0.09021190956086227, "learning_rate": 0.0008022237392936231, "loss": 1.4854, "step": 2927 }, { "epoch": 0.31, "grad_norm": 0.0772411567904348, "learning_rate": 0.0008020850192412135, "loss": 1.3465, "step": 2928 }, { "epoch": 0.31, "grad_norm": 0.07531806908392204, "learning_rate": 0.0008019462625599536, "loss": 1.493, "step": 2929 }, { "epoch": 0.31, "grad_norm": 0.07284679179896028, "learning_rate": 0.0008018074692666686, "loss": 1.4981, "step": 2930 }, { "epoch": 0.32, "grad_norm": 0.06456369250701985, "learning_rate": 0.0008016686393781874, "loss": 1.487, "step": 2931 }, { "epoch": 0.32, "grad_norm": 0.06991722508371778, "learning_rate": 0.0008015297729113436, "loss": 1.4999, "step": 2932 }, { "epoch": 0.32, "grad_norm": 0.08148887404697132, "learning_rate": 0.0008013908698829752, "loss": 1.3856, "step": 2933 }, { "epoch": 0.32, "grad_norm": 0.08756760737824867, "learning_rate": 0.000801251930309925, "loss": 1.5644, "step": 2934 }, { "epoch": 0.32, "grad_norm": 0.06420419497450133, "learning_rate": 0.0008011129542090397, "loss": 1.3941, "step": 2935 }, { "epoch": 0.32, "grad_norm": 0.0796356391212337, "learning_rate": 0.0008009739415971704, "loss": 1.4468, "step": 2936 }, { "epoch": 0.32, "grad_norm": 0.07887732694508226, "learning_rate": 0.0008008348924911732, "loss": 1.5729, "step": 2937 }, { "epoch": 0.32, "grad_norm": 0.07020543228364666, "learning_rate": 0.0008006958069079081, "loss": 1.3993, "step": 2938 }, { "epoch": 0.32, "grad_norm": 0.0671047003370231, "learning_rate": 0.0008005566848642399, "loss": 1.3402, "step": 2939 }, { "epoch": 0.32, "grad_norm": 0.07054047003240703, "learning_rate": 0.0008004175263770372, "loss": 1.4655, "step": 2940 }, { "epoch": 0.32, "grad_norm": 0.07740737220087766, "learning_rate": 0.0008002783314631738, "loss": 1.4764, "step": 2941 }, { "epoch": 0.32, "grad_norm": 0.0728072252685772, "learning_rate": 0.0008001391001395277, "loss": 1.357, "step": 2942 }, { "epoch": 0.32, "grad_norm": 0.07554078318188316, "learning_rate": 0.0007999998324229809, "loss": 1.4548, "step": 2943 }, { "epoch": 0.32, "grad_norm": 0.08128226314789191, "learning_rate": 0.0007998605283304201, "loss": 1.5187, "step": 2944 }, { "epoch": 0.32, "grad_norm": 0.07245701659717234, "learning_rate": 0.0007997211878787365, "loss": 1.4871, "step": 2945 }, { "epoch": 0.32, "grad_norm": 0.08310930694834401, "learning_rate": 0.0007995818110848256, "loss": 1.437, "step": 2946 }, { "epoch": 0.32, "grad_norm": 0.08043969912327549, "learning_rate": 0.0007994423979655872, "loss": 1.4586, "step": 2947 }, { "epoch": 0.32, "grad_norm": 0.07315357331256875, "learning_rate": 0.0007993029485379257, "loss": 1.3868, "step": 2948 }, { "epoch": 0.32, "grad_norm": 0.08625764575142641, "learning_rate": 0.0007991634628187499, "loss": 1.5691, "step": 2949 }, { "epoch": 0.32, "grad_norm": 0.06859030969791598, "learning_rate": 0.0007990239408249729, "loss": 1.4585, "step": 2950 }, { "epoch": 0.32, "grad_norm": 0.08095069758093375, "learning_rate": 0.0007988843825735121, "loss": 1.4382, "step": 2951 }, { "epoch": 0.32, "grad_norm": 0.08455585505928823, "learning_rate": 0.0007987447880812895, "loss": 1.4736, "step": 2952 }, { "epoch": 0.32, "grad_norm": 0.07840102281047322, "learning_rate": 0.0007986051573652315, "loss": 1.3839, "step": 2953 }, { "epoch": 0.32, "grad_norm": 0.0781115296791198, "learning_rate": 0.0007984654904422685, "loss": 1.5133, "step": 2954 }, { "epoch": 0.32, "grad_norm": 0.0743966495861294, "learning_rate": 0.0007983257873293362, "loss": 1.2965, "step": 2955 }, { "epoch": 0.32, "grad_norm": 0.0751376147423777, "learning_rate": 0.0007981860480433733, "loss": 1.4109, "step": 2956 }, { "epoch": 0.32, "grad_norm": 0.08181895305123754, "learning_rate": 0.0007980462726013245, "loss": 1.4691, "step": 2957 }, { "epoch": 0.32, "grad_norm": 0.08562272032257762, "learning_rate": 0.0007979064610201372, "loss": 1.3148, "step": 2958 }, { "epoch": 0.32, "grad_norm": 0.07781075518252807, "learning_rate": 0.0007977666133167647, "loss": 1.3035, "step": 2959 }, { "epoch": 0.32, "grad_norm": 0.09447045090218623, "learning_rate": 0.0007976267295081636, "loss": 1.3363, "step": 2960 }, { "epoch": 0.32, "grad_norm": 0.08987328702071136, "learning_rate": 0.0007974868096112957, "loss": 1.3668, "step": 2961 }, { "epoch": 0.32, "grad_norm": 0.07679392751255011, "learning_rate": 0.0007973468536431266, "loss": 1.2937, "step": 2962 }, { "epoch": 0.32, "grad_norm": 0.07944557506892269, "learning_rate": 0.000797206861620626, "loss": 1.376, "step": 2963 }, { "epoch": 0.32, "grad_norm": 0.0879601918515955, "learning_rate": 0.0007970668335607692, "loss": 1.4672, "step": 2964 }, { "epoch": 0.32, "grad_norm": 0.0875774791959148, "learning_rate": 0.0007969267694805344, "loss": 1.7008, "step": 2965 }, { "epoch": 0.32, "grad_norm": 0.07462856123424448, "learning_rate": 0.0007967866693969053, "loss": 1.4685, "step": 2966 }, { "epoch": 0.32, "grad_norm": 0.07553293553208955, "learning_rate": 0.0007966465333268692, "loss": 1.4977, "step": 2967 }, { "epoch": 0.32, "grad_norm": 0.07498275141260913, "learning_rate": 0.0007965063612874184, "loss": 1.4594, "step": 2968 }, { "epoch": 0.32, "grad_norm": 0.08857786850312585, "learning_rate": 0.0007963661532955491, "loss": 1.3205, "step": 2969 }, { "epoch": 0.32, "grad_norm": 0.0781016657672057, "learning_rate": 0.0007962259093682618, "loss": 1.5386, "step": 2970 }, { "epoch": 0.32, "grad_norm": 0.06611466290116325, "learning_rate": 0.0007960856295225618, "loss": 1.412, "step": 2971 }, { "epoch": 0.32, "grad_norm": 0.05980479298650208, "learning_rate": 0.0007959453137754586, "loss": 1.2784, "step": 2972 }, { "epoch": 0.32, "grad_norm": 0.08170204436084891, "learning_rate": 0.0007958049621439658, "loss": 1.3268, "step": 2973 }, { "epoch": 0.32, "grad_norm": 0.07901322029342102, "learning_rate": 0.0007956645746451014, "loss": 1.4302, "step": 2974 }, { "epoch": 0.32, "grad_norm": 0.07307847800484599, "learning_rate": 0.0007955241512958881, "loss": 1.3754, "step": 2975 }, { "epoch": 0.32, "grad_norm": 0.06798109224771459, "learning_rate": 0.0007953836921133526, "loss": 1.4458, "step": 2976 }, { "epoch": 0.32, "grad_norm": 0.07147885396357032, "learning_rate": 0.000795243197114526, "loss": 1.4256, "step": 2977 }, { "epoch": 0.32, "grad_norm": 0.08764899736377428, "learning_rate": 0.0007951026663164441, "loss": 1.4224, "step": 2978 }, { "epoch": 0.32, "grad_norm": 0.0789920953072474, "learning_rate": 0.0007949620997361464, "loss": 1.527, "step": 2979 }, { "epoch": 0.32, "grad_norm": 0.060860731754923306, "learning_rate": 0.0007948214973906773, "loss": 1.3997, "step": 2980 }, { "epoch": 0.32, "grad_norm": 0.0708414939041876, "learning_rate": 0.0007946808592970851, "loss": 1.3102, "step": 2981 }, { "epoch": 0.32, "grad_norm": 0.07718632769563154, "learning_rate": 0.0007945401854724231, "loss": 1.2831, "step": 2982 }, { "epoch": 0.32, "grad_norm": 0.08056455667858022, "learning_rate": 0.0007943994759337478, "loss": 1.3903, "step": 2983 }, { "epoch": 0.32, "grad_norm": 0.07101624686847008, "learning_rate": 0.0007942587306981213, "loss": 1.4499, "step": 2984 }, { "epoch": 0.32, "grad_norm": 0.06746511771239394, "learning_rate": 0.0007941179497826092, "loss": 1.5146, "step": 2985 }, { "epoch": 0.32, "grad_norm": 0.07859460093405395, "learning_rate": 0.0007939771332042817, "loss": 1.5021, "step": 2986 }, { "epoch": 0.32, "grad_norm": 0.08026433583333177, "learning_rate": 0.0007938362809802133, "loss": 1.5269, "step": 2987 }, { "epoch": 0.32, "grad_norm": 0.0804769915693462, "learning_rate": 0.0007936953931274827, "loss": 1.4494, "step": 2988 }, { "epoch": 0.32, "grad_norm": 0.07631714582546169, "learning_rate": 0.0007935544696631734, "loss": 1.4709, "step": 2989 }, { "epoch": 0.32, "grad_norm": 0.07816795866767308, "learning_rate": 0.0007934135106043725, "loss": 1.4538, "step": 2990 }, { "epoch": 0.32, "grad_norm": 0.07403291790022538, "learning_rate": 0.000793272515968172, "loss": 1.4281, "step": 2991 }, { "epoch": 0.32, "grad_norm": 0.07624985603510445, "learning_rate": 0.0007931314857716676, "loss": 1.5291, "step": 2992 }, { "epoch": 0.32, "grad_norm": 0.0742004553934557, "learning_rate": 0.0007929904200319602, "loss": 1.4579, "step": 2993 }, { "epoch": 0.32, "grad_norm": 0.08297138817412927, "learning_rate": 0.0007928493187661543, "loss": 1.478, "step": 2994 }, { "epoch": 0.32, "grad_norm": 0.07212656405189363, "learning_rate": 0.0007927081819913589, "loss": 1.3083, "step": 2995 }, { "epoch": 0.32, "grad_norm": 0.07193865619974371, "learning_rate": 0.0007925670097246871, "loss": 1.4224, "step": 2996 }, { "epoch": 0.32, "grad_norm": 0.08509412142880574, "learning_rate": 0.0007924258019832569, "loss": 1.4168, "step": 2997 }, { "epoch": 0.32, "grad_norm": 0.07326614306013109, "learning_rate": 0.00079228455878419, "loss": 1.4902, "step": 2998 }, { "epoch": 0.32, "grad_norm": 0.08526895309448661, "learning_rate": 0.0007921432801446127, "loss": 1.3673, "step": 2999 }, { "epoch": 0.32, "grad_norm": 0.06863352180449873, "learning_rate": 0.0007920019660816555, "loss": 1.347, "step": 3000 }, { "epoch": 0.32, "grad_norm": 0.07099094175406699, "learning_rate": 0.0007918606166124534, "loss": 1.4413, "step": 3001 }, { "epoch": 0.32, "grad_norm": 0.07331999460617844, "learning_rate": 0.000791719231754145, "loss": 1.3459, "step": 3002 }, { "epoch": 0.32, "grad_norm": 0.06742208138232067, "learning_rate": 0.0007915778115238743, "loss": 1.3944, "step": 3003 }, { "epoch": 0.32, "grad_norm": 0.07564835035452432, "learning_rate": 0.0007914363559387887, "loss": 1.4459, "step": 3004 }, { "epoch": 0.32, "grad_norm": 0.0775774753702783, "learning_rate": 0.0007912948650160404, "loss": 1.6455, "step": 3005 }, { "epoch": 0.32, "grad_norm": 0.06744949006518554, "learning_rate": 0.0007911533387727852, "loss": 1.377, "step": 3006 }, { "epoch": 0.32, "grad_norm": 0.07458417426910349, "learning_rate": 0.0007910117772261839, "loss": 1.3564, "step": 3007 }, { "epoch": 0.32, "grad_norm": 0.06923492807546411, "learning_rate": 0.0007908701803934013, "loss": 1.4969, "step": 3008 }, { "epoch": 0.32, "grad_norm": 0.0741115394912985, "learning_rate": 0.0007907285482916067, "loss": 1.5018, "step": 3009 }, { "epoch": 0.32, "grad_norm": 0.07936484472761873, "learning_rate": 0.0007905868809379734, "loss": 1.5695, "step": 3010 }, { "epoch": 0.32, "grad_norm": 0.07847917265702553, "learning_rate": 0.0007904451783496789, "loss": 1.3488, "step": 3011 }, { "epoch": 0.32, "grad_norm": 0.07361634529449362, "learning_rate": 0.0007903034405439051, "loss": 1.3476, "step": 3012 }, { "epoch": 0.32, "grad_norm": 0.06785097870413606, "learning_rate": 0.0007901616675378383, "loss": 1.4224, "step": 3013 }, { "epoch": 0.32, "grad_norm": 0.08175453562213789, "learning_rate": 0.0007900198593486691, "loss": 1.4744, "step": 3014 }, { "epoch": 0.32, "grad_norm": 0.07547507633226375, "learning_rate": 0.0007898780159935921, "loss": 1.4698, "step": 3015 }, { "epoch": 0.32, "grad_norm": 0.0743726151849371, "learning_rate": 0.0007897361374898062, "loss": 1.4479, "step": 3016 }, { "epoch": 0.32, "grad_norm": 0.08480416967435063, "learning_rate": 0.0007895942238545148, "loss": 1.385, "step": 3017 }, { "epoch": 0.32, "grad_norm": 0.07394595425252229, "learning_rate": 0.0007894522751049253, "loss": 1.4534, "step": 3018 }, { "epoch": 0.32, "grad_norm": 0.0714067033877472, "learning_rate": 0.0007893102912582497, "loss": 1.4485, "step": 3019 }, { "epoch": 0.32, "grad_norm": 0.07582505910196573, "learning_rate": 0.000789168272331704, "loss": 1.4941, "step": 3020 }, { "epoch": 0.32, "grad_norm": 0.0731420165751429, "learning_rate": 0.0007890262183425081, "loss": 1.4112, "step": 3021 }, { "epoch": 0.32, "grad_norm": 0.09305550460370213, "learning_rate": 0.0007888841293078868, "loss": 1.4552, "step": 3022 }, { "epoch": 0.32, "grad_norm": 0.06702964079143485, "learning_rate": 0.0007887420052450689, "loss": 1.4836, "step": 3023 }, { "epoch": 0.33, "grad_norm": 0.07074665516379978, "learning_rate": 0.0007885998461712875, "loss": 1.4678, "step": 3024 }, { "epoch": 0.33, "grad_norm": 0.07983639768866699, "learning_rate": 0.0007884576521037798, "loss": 1.4624, "step": 3025 }, { "epoch": 0.33, "grad_norm": 0.07033851096059493, "learning_rate": 0.0007883154230597873, "loss": 1.3853, "step": 3026 }, { "epoch": 0.33, "grad_norm": 0.08476954997112443, "learning_rate": 0.0007881731590565559, "loss": 1.4094, "step": 3027 }, { "epoch": 0.33, "grad_norm": 0.07706177346613348, "learning_rate": 0.0007880308601113352, "loss": 1.4241, "step": 3028 }, { "epoch": 0.33, "grad_norm": 0.08278153657107577, "learning_rate": 0.0007878885262413799, "loss": 1.3777, "step": 3029 }, { "epoch": 0.33, "grad_norm": 0.08990975215370425, "learning_rate": 0.0007877461574639484, "loss": 1.5666, "step": 3030 }, { "epoch": 0.33, "grad_norm": 0.08601960681426747, "learning_rate": 0.0007876037537963033, "loss": 1.3981, "step": 3031 }, { "epoch": 0.33, "grad_norm": 0.07788237507460331, "learning_rate": 0.0007874613152557113, "loss": 1.3502, "step": 3032 }, { "epoch": 0.33, "grad_norm": 0.104418842475275, "learning_rate": 0.0007873188418594438, "loss": 1.5997, "step": 3033 }, { "epoch": 0.33, "grad_norm": 0.08013070374938921, "learning_rate": 0.0007871763336247764, "loss": 1.481, "step": 3034 }, { "epoch": 0.33, "grad_norm": 0.07381930904128295, "learning_rate": 0.0007870337905689881, "loss": 1.3511, "step": 3035 }, { "epoch": 0.33, "grad_norm": 0.08219086071312165, "learning_rate": 0.0007868912127093638, "loss": 1.3737, "step": 3036 }, { "epoch": 0.33, "grad_norm": 0.08179829364428115, "learning_rate": 0.0007867486000631903, "loss": 1.595, "step": 3037 }, { "epoch": 0.33, "grad_norm": 0.09105328320067167, "learning_rate": 0.0007866059526477607, "loss": 1.5167, "step": 3038 }, { "epoch": 0.33, "grad_norm": 0.07385748224030242, "learning_rate": 0.0007864632704803711, "loss": 1.3882, "step": 3039 }, { "epoch": 0.33, "grad_norm": 0.08146182608657129, "learning_rate": 0.0007863205535783227, "loss": 1.4446, "step": 3040 }, { "epoch": 0.33, "grad_norm": 0.08225645645470177, "learning_rate": 0.0007861778019589198, "loss": 1.3429, "step": 3041 }, { "epoch": 0.33, "grad_norm": 0.07002468941388816, "learning_rate": 0.000786035015639472, "loss": 1.4645, "step": 3042 }, { "epoch": 0.33, "grad_norm": 0.08573645543248615, "learning_rate": 0.0007858921946372922, "loss": 1.352, "step": 3043 }, { "epoch": 0.33, "grad_norm": 0.07785179379080004, "learning_rate": 0.0007857493389696983, "loss": 1.4217, "step": 3044 }, { "epoch": 0.33, "grad_norm": 0.06999357815245667, "learning_rate": 0.0007856064486540119, "loss": 1.5484, "step": 3045 }, { "epoch": 0.33, "grad_norm": 0.07645852761959619, "learning_rate": 0.0007854635237075591, "loss": 1.382, "step": 3046 }, { "epoch": 0.33, "grad_norm": 0.07006480211273053, "learning_rate": 0.0007853205641476698, "loss": 1.409, "step": 3047 }, { "epoch": 0.33, "grad_norm": 0.07233610453690911, "learning_rate": 0.0007851775699916786, "loss": 1.5152, "step": 3048 }, { "epoch": 0.33, "grad_norm": 0.06938021129476968, "learning_rate": 0.0007850345412569236, "loss": 1.4692, "step": 3049 }, { "epoch": 0.33, "grad_norm": 0.07696153387851845, "learning_rate": 0.0007848914779607479, "loss": 1.3746, "step": 3050 }, { "epoch": 0.33, "grad_norm": 0.07222688770788331, "learning_rate": 0.0007847483801204984, "loss": 1.3984, "step": 3051 }, { "epoch": 0.33, "grad_norm": 0.07217459083677298, "learning_rate": 0.0007846052477535262, "loss": 1.4673, "step": 3052 }, { "epoch": 0.33, "grad_norm": 0.07665892010803753, "learning_rate": 0.0007844620808771864, "loss": 1.2931, "step": 3053 }, { "epoch": 0.33, "grad_norm": 0.0753925094097213, "learning_rate": 0.0007843188795088386, "loss": 1.5428, "step": 3054 }, { "epoch": 0.33, "grad_norm": 0.10489383050028697, "learning_rate": 0.0007841756436658464, "loss": 1.3772, "step": 3055 }, { "epoch": 0.33, "grad_norm": 0.07367833059294322, "learning_rate": 0.0007840323733655779, "loss": 1.4949, "step": 3056 }, { "epoch": 0.33, "grad_norm": 0.0720997223768572, "learning_rate": 0.0007838890686254048, "loss": 1.3626, "step": 3057 }, { "epoch": 0.33, "grad_norm": 0.07082156336721733, "learning_rate": 0.0007837457294627035, "loss": 1.4854, "step": 3058 }, { "epoch": 0.33, "grad_norm": 0.06935942782492194, "learning_rate": 0.0007836023558948542, "loss": 1.4135, "step": 3059 }, { "epoch": 0.33, "grad_norm": 0.08042147923845204, "learning_rate": 0.0007834589479392415, "loss": 1.2829, "step": 3060 }, { "epoch": 0.33, "grad_norm": 0.07681957831308812, "learning_rate": 0.0007833155056132543, "loss": 1.5114, "step": 3061 }, { "epoch": 0.33, "grad_norm": 0.06971247023737136, "learning_rate": 0.0007831720289342851, "loss": 1.2874, "step": 3062 }, { "epoch": 0.33, "grad_norm": 0.07685493671422122, "learning_rate": 0.0007830285179197313, "loss": 1.5248, "step": 3063 }, { "epoch": 0.33, "grad_norm": 0.08086964730881499, "learning_rate": 0.0007828849725869939, "loss": 1.575, "step": 3064 }, { "epoch": 0.33, "grad_norm": 0.07938799977856249, "learning_rate": 0.0007827413929534783, "loss": 1.4236, "step": 3065 }, { "epoch": 0.33, "grad_norm": 0.07732912826079967, "learning_rate": 0.000782597779036594, "loss": 1.5576, "step": 3066 }, { "epoch": 0.33, "grad_norm": 0.07555603999638347, "learning_rate": 0.0007824541308537547, "loss": 1.437, "step": 3067 }, { "epoch": 0.33, "grad_norm": 0.0723582025951719, "learning_rate": 0.0007823104484223784, "loss": 1.3081, "step": 3068 }, { "epoch": 0.33, "grad_norm": 0.07712330288020602, "learning_rate": 0.0007821667317598871, "loss": 1.4435, "step": 3069 }, { "epoch": 0.33, "grad_norm": 0.07022497073587027, "learning_rate": 0.0007820229808837065, "loss": 1.4977, "step": 3070 }, { "epoch": 0.33, "grad_norm": 0.08805211648596689, "learning_rate": 0.0007818791958112672, "loss": 1.4112, "step": 3071 }, { "epoch": 0.33, "grad_norm": 0.09148966969586016, "learning_rate": 0.0007817353765600038, "loss": 1.4515, "step": 3072 }, { "epoch": 0.33, "grad_norm": 0.06831467563512097, "learning_rate": 0.0007815915231473547, "loss": 1.4484, "step": 3073 }, { "epoch": 0.33, "grad_norm": 0.07058078689286881, "learning_rate": 0.0007814476355907625, "loss": 1.5195, "step": 3074 }, { "epoch": 0.33, "grad_norm": 0.07712659647369673, "learning_rate": 0.0007813037139076742, "loss": 1.5531, "step": 3075 }, { "epoch": 0.33, "grad_norm": 0.07372955770119158, "learning_rate": 0.0007811597581155407, "loss": 1.2755, "step": 3076 }, { "epoch": 0.33, "grad_norm": 0.06877276645781746, "learning_rate": 0.0007810157682318174, "loss": 1.4825, "step": 3077 }, { "epoch": 0.33, "grad_norm": 0.07132173224006651, "learning_rate": 0.0007808717442739634, "loss": 1.4652, "step": 3078 }, { "epoch": 0.33, "grad_norm": 0.07358607939108996, "learning_rate": 0.000780727686259442, "loss": 1.4582, "step": 3079 }, { "epoch": 0.33, "grad_norm": 0.07282302440663897, "learning_rate": 0.0007805835942057209, "loss": 1.4773, "step": 3080 }, { "epoch": 0.33, "grad_norm": 0.08644424392136274, "learning_rate": 0.0007804394681302716, "loss": 1.3965, "step": 3081 }, { "epoch": 0.33, "grad_norm": 0.07859450169273975, "learning_rate": 0.0007802953080505699, "loss": 1.5057, "step": 3082 }, { "epoch": 0.33, "grad_norm": 0.07110005546604672, "learning_rate": 0.000780151113984096, "loss": 1.4402, "step": 3083 }, { "epoch": 0.33, "grad_norm": 0.06966222966144436, "learning_rate": 0.0007800068859483335, "loss": 1.4002, "step": 3084 }, { "epoch": 0.33, "grad_norm": 0.07523096465564282, "learning_rate": 0.0007798626239607709, "loss": 1.4039, "step": 3085 }, { "epoch": 0.33, "grad_norm": 0.08023929078514579, "learning_rate": 0.0007797183280389002, "loss": 1.3024, "step": 3086 }, { "epoch": 0.33, "grad_norm": 0.06607105968539327, "learning_rate": 0.0007795739982002179, "loss": 1.4149, "step": 3087 }, { "epoch": 0.33, "grad_norm": 0.07990908371456909, "learning_rate": 0.0007794296344622245, "loss": 1.5067, "step": 3088 }, { "epoch": 0.33, "grad_norm": 0.07774648613968008, "learning_rate": 0.0007792852368424246, "loss": 1.3315, "step": 3089 }, { "epoch": 0.33, "grad_norm": 0.06540989487465651, "learning_rate": 0.0007791408053583269, "loss": 1.3436, "step": 3090 }, { "epoch": 0.33, "grad_norm": 0.07598791466657227, "learning_rate": 0.0007789963400274443, "loss": 1.4636, "step": 3091 }, { "epoch": 0.33, "grad_norm": 0.07035326107353247, "learning_rate": 0.0007788518408672934, "loss": 1.4288, "step": 3092 }, { "epoch": 0.33, "grad_norm": 0.07360792080801745, "learning_rate": 0.0007787073078953955, "loss": 1.4191, "step": 3093 }, { "epoch": 0.33, "grad_norm": 0.08166605972997906, "learning_rate": 0.0007785627411292757, "loss": 1.4531, "step": 3094 }, { "epoch": 0.33, "grad_norm": 0.08293183860962706, "learning_rate": 0.0007784181405864633, "loss": 1.4735, "step": 3095 }, { "epoch": 0.33, "grad_norm": 0.07091869532476382, "learning_rate": 0.0007782735062844914, "loss": 1.5312, "step": 3096 }, { "epoch": 0.33, "grad_norm": 0.07628603322010782, "learning_rate": 0.0007781288382408975, "loss": 1.4945, "step": 3097 }, { "epoch": 0.33, "grad_norm": 0.07645251008994298, "learning_rate": 0.0007779841364732231, "loss": 1.5767, "step": 3098 }, { "epoch": 0.33, "grad_norm": 0.07069182150804638, "learning_rate": 0.0007778394009990138, "loss": 1.3911, "step": 3099 }, { "epoch": 0.33, "grad_norm": 0.06384757627433746, "learning_rate": 0.0007776946318358193, "loss": 1.4573, "step": 3100 }, { "epoch": 0.33, "grad_norm": 0.06905081379306961, "learning_rate": 0.0007775498290011934, "loss": 1.4385, "step": 3101 }, { "epoch": 0.33, "grad_norm": 0.06830814690017548, "learning_rate": 0.0007774049925126938, "loss": 1.3747, "step": 3102 }, { "epoch": 0.33, "grad_norm": 0.07537641175106087, "learning_rate": 0.0007772601223878825, "loss": 1.4257, "step": 3103 }, { "epoch": 0.33, "grad_norm": 0.06944422593512861, "learning_rate": 0.0007771152186443254, "loss": 1.3538, "step": 3104 }, { "epoch": 0.33, "grad_norm": 0.06475440686857296, "learning_rate": 0.0007769702812995929, "loss": 1.4781, "step": 3105 }, { "epoch": 0.33, "grad_norm": 0.06935888065280987, "learning_rate": 0.0007768253103712588, "loss": 1.5099, "step": 3106 }, { "epoch": 0.33, "grad_norm": 0.0958797906744289, "learning_rate": 0.0007766803058769015, "loss": 1.423, "step": 3107 }, { "epoch": 0.33, "grad_norm": 0.07001471875357002, "learning_rate": 0.0007765352678341031, "loss": 1.3799, "step": 3108 }, { "epoch": 0.33, "grad_norm": 0.06829461879326418, "learning_rate": 0.0007763901962604502, "loss": 1.4027, "step": 3109 }, { "epoch": 0.33, "grad_norm": 0.07110317044671406, "learning_rate": 0.0007762450911735333, "loss": 1.3645, "step": 3110 }, { "epoch": 0.33, "grad_norm": 0.06932317253596268, "learning_rate": 0.0007760999525909465, "loss": 1.3177, "step": 3111 }, { "epoch": 0.33, "grad_norm": 0.07623931912800835, "learning_rate": 0.0007759547805302887, "loss": 1.5691, "step": 3112 }, { "epoch": 0.33, "grad_norm": 0.07738409838265192, "learning_rate": 0.0007758095750091624, "loss": 1.376, "step": 3113 }, { "epoch": 0.33, "grad_norm": 0.07384334853814246, "learning_rate": 0.0007756643360451742, "loss": 1.4941, "step": 3114 }, { "epoch": 0.33, "grad_norm": 0.07783517274774741, "learning_rate": 0.0007755190636559349, "loss": 1.5442, "step": 3115 }, { "epoch": 0.33, "grad_norm": 0.08431183660573531, "learning_rate": 0.0007753737578590594, "loss": 1.3991, "step": 3116 }, { "epoch": 0.34, "grad_norm": 0.07425496823855175, "learning_rate": 0.0007752284186721664, "loss": 1.4384, "step": 3117 }, { "epoch": 0.34, "grad_norm": 0.07768753925552281, "learning_rate": 0.0007750830461128786, "loss": 1.5313, "step": 3118 }, { "epoch": 0.34, "grad_norm": 0.0742130465574765, "learning_rate": 0.0007749376401988232, "loss": 1.4509, "step": 3119 }, { "epoch": 0.34, "grad_norm": 0.07106119474212085, "learning_rate": 0.0007747922009476311, "loss": 1.4476, "step": 3120 }, { "epoch": 0.34, "grad_norm": 0.08026943237402184, "learning_rate": 0.0007746467283769373, "loss": 1.4237, "step": 3121 }, { "epoch": 0.34, "grad_norm": 0.07251319491821662, "learning_rate": 0.0007745012225043808, "loss": 1.4031, "step": 3122 }, { "epoch": 0.34, "grad_norm": 0.0838468737335798, "learning_rate": 0.0007743556833476046, "loss": 1.3595, "step": 3123 }, { "epoch": 0.34, "grad_norm": 0.07248789364297477, "learning_rate": 0.0007742101109242561, "loss": 1.3831, "step": 3124 }, { "epoch": 0.34, "grad_norm": 0.06538596756504367, "learning_rate": 0.0007740645052519863, "loss": 1.4328, "step": 3125 }, { "epoch": 0.34, "grad_norm": 0.07321164447995468, "learning_rate": 0.0007739188663484504, "loss": 1.4778, "step": 3126 }, { "epoch": 0.34, "grad_norm": 0.06658563944433109, "learning_rate": 0.0007737731942313075, "loss": 1.4636, "step": 3127 }, { "epoch": 0.34, "grad_norm": 0.07882492086563908, "learning_rate": 0.000773627488918221, "loss": 1.4752, "step": 3128 }, { "epoch": 0.34, "grad_norm": 0.07703285975834412, "learning_rate": 0.0007734817504268581, "loss": 1.4747, "step": 3129 }, { "epoch": 0.34, "grad_norm": 0.07459758239690902, "learning_rate": 0.0007733359787748903, "loss": 1.3904, "step": 3130 }, { "epoch": 0.34, "grad_norm": 0.07884448125379544, "learning_rate": 0.0007731901739799926, "loss": 1.4475, "step": 3131 }, { "epoch": 0.34, "grad_norm": 0.07877035915224383, "learning_rate": 0.0007730443360598446, "loss": 1.2817, "step": 3132 }, { "epoch": 0.34, "grad_norm": 0.07954104659452825, "learning_rate": 0.0007728984650321294, "loss": 1.3287, "step": 3133 }, { "epoch": 0.34, "grad_norm": 0.07618254195620103, "learning_rate": 0.0007727525609145345, "loss": 1.3799, "step": 3134 }, { "epoch": 0.34, "grad_norm": 0.07133598285920512, "learning_rate": 0.0007726066237247513, "loss": 1.4582, "step": 3135 }, { "epoch": 0.34, "grad_norm": 0.08030773599573748, "learning_rate": 0.000772460653480475, "loss": 1.3284, "step": 3136 }, { "epoch": 0.34, "grad_norm": 0.07601094209678182, "learning_rate": 0.0007723146501994053, "loss": 1.4454, "step": 3137 }, { "epoch": 0.34, "grad_norm": 0.07237544124895776, "learning_rate": 0.0007721686138992456, "loss": 1.4829, "step": 3138 }, { "epoch": 0.34, "grad_norm": 0.07307223788740541, "learning_rate": 0.0007720225445977029, "loss": 1.5171, "step": 3139 }, { "epoch": 0.34, "grad_norm": 0.07334412315927522, "learning_rate": 0.000771876442312489, "loss": 1.3808, "step": 3140 }, { "epoch": 0.34, "grad_norm": 0.07045811403441511, "learning_rate": 0.0007717303070613192, "loss": 1.5331, "step": 3141 }, { "epoch": 0.34, "grad_norm": 0.06922844829063676, "learning_rate": 0.0007715841388619129, "loss": 1.3979, "step": 3142 }, { "epoch": 0.34, "grad_norm": 0.07247091145004228, "learning_rate": 0.0007714379377319933, "loss": 1.3762, "step": 3143 }, { "epoch": 0.34, "grad_norm": 0.08291167413964351, "learning_rate": 0.0007712917036892881, "loss": 1.5631, "step": 3144 }, { "epoch": 0.34, "grad_norm": 0.07776849587307075, "learning_rate": 0.0007711454367515284, "loss": 1.4031, "step": 3145 }, { "epoch": 0.34, "grad_norm": 0.0775914855157828, "learning_rate": 0.0007709991369364498, "loss": 1.2737, "step": 3146 }, { "epoch": 0.34, "grad_norm": 0.07038223844706518, "learning_rate": 0.0007708528042617915, "loss": 1.3721, "step": 3147 }, { "epoch": 0.34, "grad_norm": 0.06373412075036435, "learning_rate": 0.0007707064387452972, "loss": 1.4981, "step": 3148 }, { "epoch": 0.34, "grad_norm": 0.07680543115273483, "learning_rate": 0.0007705600404047135, "loss": 1.5106, "step": 3149 }, { "epoch": 0.34, "grad_norm": 0.07667822761501376, "learning_rate": 0.0007704136092577925, "loss": 1.4488, "step": 3150 }, { "epoch": 0.34, "grad_norm": 0.0650475144264375, "learning_rate": 0.000770267145322289, "loss": 1.5037, "step": 3151 }, { "epoch": 0.34, "grad_norm": 0.07429507685088853, "learning_rate": 0.0007701206486159622, "loss": 1.2532, "step": 3152 }, { "epoch": 0.34, "grad_norm": 0.06955586145840957, "learning_rate": 0.0007699741191565758, "loss": 1.4294, "step": 3153 }, { "epoch": 0.34, "grad_norm": 0.08122438166751517, "learning_rate": 0.0007698275569618965, "loss": 1.4821, "step": 3154 }, { "epoch": 0.34, "grad_norm": 0.08451174509670904, "learning_rate": 0.0007696809620496958, "loss": 1.4029, "step": 3155 }, { "epoch": 0.34, "grad_norm": 0.08009083722558016, "learning_rate": 0.0007695343344377485, "loss": 1.4404, "step": 3156 }, { "epoch": 0.34, "grad_norm": 0.07499676196986182, "learning_rate": 0.0007693876741438341, "loss": 1.383, "step": 3157 }, { "epoch": 0.34, "grad_norm": 0.0725984705093447, "learning_rate": 0.0007692409811857356, "loss": 1.5022, "step": 3158 }, { "epoch": 0.34, "grad_norm": 0.07422957294380073, "learning_rate": 0.0007690942555812397, "loss": 1.3875, "step": 3159 }, { "epoch": 0.34, "grad_norm": 0.06613298070529067, "learning_rate": 0.0007689474973481377, "loss": 1.4326, "step": 3160 }, { "epoch": 0.34, "grad_norm": 0.08919731981533549, "learning_rate": 0.0007688007065042245, "loss": 1.4545, "step": 3161 }, { "epoch": 0.34, "grad_norm": 0.07398471196751458, "learning_rate": 0.0007686538830672988, "loss": 1.4074, "step": 3162 }, { "epoch": 0.34, "grad_norm": 0.08089708897669948, "learning_rate": 0.0007685070270551638, "loss": 1.452, "step": 3163 }, { "epoch": 0.34, "grad_norm": 0.0789578097029579, "learning_rate": 0.000768360138485626, "loss": 1.4309, "step": 3164 }, { "epoch": 0.34, "grad_norm": 0.07689564974172196, "learning_rate": 0.000768213217376496, "loss": 1.4699, "step": 3165 }, { "epoch": 0.34, "grad_norm": 0.07618594956580693, "learning_rate": 0.0007680662637455889, "loss": 1.3925, "step": 3166 }, { "epoch": 0.34, "grad_norm": 0.07046309543070939, "learning_rate": 0.0007679192776107232, "loss": 1.4711, "step": 3167 }, { "epoch": 0.34, "grad_norm": 0.06065773332639102, "learning_rate": 0.0007677722589897214, "loss": 1.4663, "step": 3168 }, { "epoch": 0.34, "grad_norm": 0.06448888328620603, "learning_rate": 0.0007676252079004101, "loss": 1.2777, "step": 3169 }, { "epoch": 0.34, "grad_norm": 0.06919677462771456, "learning_rate": 0.0007674781243606197, "loss": 1.4472, "step": 3170 }, { "epoch": 0.34, "grad_norm": 0.07706927662533691, "learning_rate": 0.0007673310083881844, "loss": 1.4423, "step": 3171 }, { "epoch": 0.34, "grad_norm": 0.07029417746284516, "learning_rate": 0.0007671838600009429, "loss": 1.5247, "step": 3172 }, { "epoch": 0.34, "grad_norm": 0.07006293784244212, "learning_rate": 0.0007670366792167371, "loss": 1.3477, "step": 3173 }, { "epoch": 0.34, "grad_norm": 0.0665223617685133, "learning_rate": 0.0007668894660534135, "loss": 1.3575, "step": 3174 }, { "epoch": 0.34, "grad_norm": 0.07626888473910773, "learning_rate": 0.0007667422205288219, "loss": 1.4666, "step": 3175 }, { "epoch": 0.34, "grad_norm": 0.0680382115354333, "learning_rate": 0.0007665949426608164, "loss": 1.4727, "step": 3176 }, { "epoch": 0.34, "grad_norm": 0.0718190962160836, "learning_rate": 0.0007664476324672552, "loss": 1.5404, "step": 3177 }, { "epoch": 0.34, "grad_norm": 0.0793876214439945, "learning_rate": 0.0007663002899659999, "loss": 1.477, "step": 3178 }, { "epoch": 0.34, "grad_norm": 0.07195150887665572, "learning_rate": 0.0007661529151749163, "loss": 1.4443, "step": 3179 }, { "epoch": 0.34, "grad_norm": 0.07746803954967403, "learning_rate": 0.0007660055081118742, "loss": 1.5019, "step": 3180 }, { "epoch": 0.34, "grad_norm": 0.07756374186181363, "learning_rate": 0.0007658580687947473, "loss": 1.5107, "step": 3181 }, { "epoch": 0.34, "grad_norm": 0.084088607358134, "learning_rate": 0.0007657105972414128, "loss": 1.35, "step": 3182 }, { "epoch": 0.34, "grad_norm": 0.07775278682089769, "learning_rate": 0.0007655630934697524, "loss": 1.4803, "step": 3183 }, { "epoch": 0.34, "grad_norm": 0.07602799563639731, "learning_rate": 0.0007654155574976515, "loss": 1.5302, "step": 3184 }, { "epoch": 0.34, "grad_norm": 0.09250492590276259, "learning_rate": 0.0007652679893429993, "loss": 1.5617, "step": 3185 }, { "epoch": 0.34, "grad_norm": 0.07556500064817434, "learning_rate": 0.0007651203890236886, "loss": 1.3106, "step": 3186 }, { "epoch": 0.34, "grad_norm": 0.0823390739699376, "learning_rate": 0.0007649727565576168, "loss": 1.399, "step": 3187 }, { "epoch": 0.34, "grad_norm": 0.08353330942823384, "learning_rate": 0.0007648250919626849, "loss": 1.5233, "step": 3188 }, { "epoch": 0.34, "grad_norm": 0.07649777003122347, "learning_rate": 0.0007646773952567975, "loss": 1.4492, "step": 3189 }, { "epoch": 0.34, "grad_norm": 0.07280004721530052, "learning_rate": 0.0007645296664578635, "loss": 1.3165, "step": 3190 }, { "epoch": 0.34, "grad_norm": 0.09933336670129464, "learning_rate": 0.0007643819055837955, "loss": 1.3888, "step": 3191 }, { "epoch": 0.34, "grad_norm": 0.07779644484036152, "learning_rate": 0.0007642341126525099, "loss": 1.3763, "step": 3192 }, { "epoch": 0.34, "grad_norm": 0.0683306880772141, "learning_rate": 0.0007640862876819271, "loss": 1.3403, "step": 3193 }, { "epoch": 0.34, "grad_norm": 0.07300035128576285, "learning_rate": 0.0007639384306899716, "loss": 1.4203, "step": 3194 }, { "epoch": 0.34, "grad_norm": 0.07191331871436503, "learning_rate": 0.0007637905416945716, "loss": 1.3708, "step": 3195 }, { "epoch": 0.34, "grad_norm": 0.06748665275475779, "learning_rate": 0.0007636426207136587, "loss": 1.3958, "step": 3196 }, { "epoch": 0.34, "grad_norm": 0.06979899017349758, "learning_rate": 0.0007634946677651693, "loss": 1.5443, "step": 3197 }, { "epoch": 0.34, "grad_norm": 0.0665352354043053, "learning_rate": 0.000763346682867043, "loss": 1.4987, "step": 3198 }, { "epoch": 0.34, "grad_norm": 0.08238063923115757, "learning_rate": 0.0007631986660372234, "loss": 1.4835, "step": 3199 }, { "epoch": 0.34, "grad_norm": 0.06784751022670603, "learning_rate": 0.0007630506172936583, "loss": 1.5603, "step": 3200 }, { "epoch": 0.34, "grad_norm": 0.06156458689570208, "learning_rate": 0.000762902536654299, "loss": 1.4053, "step": 3201 }, { "epoch": 0.34, "grad_norm": 0.07874911875458783, "learning_rate": 0.0007627544241371005, "loss": 1.4591, "step": 3202 }, { "epoch": 0.34, "grad_norm": 0.06958839969062831, "learning_rate": 0.0007626062797600225, "loss": 1.43, "step": 3203 }, { "epoch": 0.34, "grad_norm": 0.07551242792555159, "learning_rate": 0.0007624581035410277, "loss": 1.539, "step": 3204 }, { "epoch": 0.34, "grad_norm": 0.0802738339351694, "learning_rate": 0.000762309895498083, "loss": 1.411, "step": 3205 }, { "epoch": 0.34, "grad_norm": 0.07610686443993148, "learning_rate": 0.0007621616556491591, "loss": 1.331, "step": 3206 }, { "epoch": 0.34, "grad_norm": 0.07973442227199408, "learning_rate": 0.0007620133840122305, "loss": 1.3919, "step": 3207 }, { "epoch": 0.34, "grad_norm": 0.06935731716024028, "learning_rate": 0.0007618650806052761, "loss": 1.468, "step": 3208 }, { "epoch": 0.34, "grad_norm": 0.07140926619478406, "learning_rate": 0.0007617167454462777, "loss": 1.3073, "step": 3209 }, { "epoch": 0.35, "grad_norm": 0.07628062933111013, "learning_rate": 0.0007615683785532217, "loss": 1.4563, "step": 3210 }, { "epoch": 0.35, "grad_norm": 0.06641544863858916, "learning_rate": 0.000761419979944098, "loss": 1.488, "step": 3211 }, { "epoch": 0.35, "grad_norm": 0.07750941731981588, "learning_rate": 0.0007612715496369006, "loss": 1.4531, "step": 3212 }, { "epoch": 0.35, "grad_norm": 0.06603884034822871, "learning_rate": 0.0007611230876496269, "loss": 1.3834, "step": 3213 }, { "epoch": 0.35, "grad_norm": 0.07814376352410252, "learning_rate": 0.0007609745940002785, "loss": 1.2924, "step": 3214 }, { "epoch": 0.35, "grad_norm": 0.06860503690532974, "learning_rate": 0.0007608260687068611, "loss": 1.3301, "step": 3215 }, { "epoch": 0.35, "grad_norm": 0.07411296420440955, "learning_rate": 0.0007606775117873836, "loss": 1.4608, "step": 3216 }, { "epoch": 0.35, "grad_norm": 0.0887118333530251, "learning_rate": 0.0007605289232598591, "loss": 1.4031, "step": 3217 }, { "epoch": 0.35, "grad_norm": 0.0749536165963251, "learning_rate": 0.0007603803031423045, "loss": 1.3985, "step": 3218 }, { "epoch": 0.35, "grad_norm": 0.08599613367738648, "learning_rate": 0.0007602316514527404, "loss": 1.3853, "step": 3219 }, { "epoch": 0.35, "grad_norm": 0.06959589644535209, "learning_rate": 0.0007600829682091915, "loss": 1.3899, "step": 3220 }, { "epoch": 0.35, "grad_norm": 0.08227183337119814, "learning_rate": 0.0007599342534296861, "loss": 1.573, "step": 3221 }, { "epoch": 0.35, "grad_norm": 0.07457434958035399, "learning_rate": 0.0007597855071322561, "loss": 1.4239, "step": 3222 }, { "epoch": 0.35, "grad_norm": 0.07725146361782721, "learning_rate": 0.000759636729334938, "loss": 1.4454, "step": 3223 }, { "epoch": 0.35, "grad_norm": 0.07127251988346796, "learning_rate": 0.0007594879200557711, "loss": 1.2921, "step": 3224 }, { "epoch": 0.35, "grad_norm": 0.08661988788655628, "learning_rate": 0.0007593390793127996, "loss": 1.5255, "step": 3225 }, { "epoch": 0.35, "grad_norm": 0.08081542170362471, "learning_rate": 0.0007591902071240705, "loss": 1.5518, "step": 3226 }, { "epoch": 0.35, "grad_norm": 0.07345692626207229, "learning_rate": 0.0007590413035076354, "loss": 1.3901, "step": 3227 }, { "epoch": 0.35, "grad_norm": 0.08021573419868515, "learning_rate": 0.0007588923684815489, "loss": 1.4691, "step": 3228 }, { "epoch": 0.35, "grad_norm": 0.07183007813806935, "learning_rate": 0.0007587434020638704, "loss": 1.474, "step": 3229 }, { "epoch": 0.35, "grad_norm": 0.09673214943620395, "learning_rate": 0.0007585944042726622, "loss": 1.4883, "step": 3230 }, { "epoch": 0.35, "grad_norm": 0.07943320951000708, "learning_rate": 0.0007584453751259911, "loss": 1.4071, "step": 3231 }, { "epoch": 0.35, "grad_norm": 0.07247765308432169, "learning_rate": 0.0007582963146419273, "loss": 1.4031, "step": 3232 }, { "epoch": 0.35, "grad_norm": 0.0768200140003289, "learning_rate": 0.0007581472228385447, "loss": 1.4768, "step": 3233 }, { "epoch": 0.35, "grad_norm": 0.086314346381171, "learning_rate": 0.0007579980997339215, "loss": 1.5756, "step": 3234 }, { "epoch": 0.35, "grad_norm": 0.08993807019188155, "learning_rate": 0.0007578489453461392, "loss": 1.4346, "step": 3235 }, { "epoch": 0.35, "grad_norm": 0.07458162456361211, "learning_rate": 0.0007576997596932833, "loss": 1.4711, "step": 3236 }, { "epoch": 0.35, "grad_norm": 0.0715283955335303, "learning_rate": 0.0007575505427934433, "loss": 1.4222, "step": 3237 }, { "epoch": 0.35, "grad_norm": 0.07396927269800081, "learning_rate": 0.000757401294664712, "loss": 1.3881, "step": 3238 }, { "epoch": 0.35, "grad_norm": 0.07757426845121163, "learning_rate": 0.0007572520153251861, "loss": 1.5357, "step": 3239 }, { "epoch": 0.35, "grad_norm": 0.07139276773731634, "learning_rate": 0.0007571027047929668, "loss": 1.3491, "step": 3240 }, { "epoch": 0.35, "grad_norm": 0.09502708923492989, "learning_rate": 0.0007569533630861578, "loss": 1.5269, "step": 3241 }, { "epoch": 0.35, "grad_norm": 0.07195223372318488, "learning_rate": 0.0007568039902228679, "loss": 1.4009, "step": 3242 }, { "epoch": 0.35, "grad_norm": 0.07778683580295905, "learning_rate": 0.000756654586221209, "loss": 1.3772, "step": 3243 }, { "epoch": 0.35, "grad_norm": 0.06633510179131384, "learning_rate": 0.0007565051510992964, "loss": 1.5195, "step": 3244 }, { "epoch": 0.35, "grad_norm": 0.07960888862929236, "learning_rate": 0.00075635568487525, "loss": 1.4633, "step": 3245 }, { "epoch": 0.35, "grad_norm": 0.07572686281344403, "learning_rate": 0.0007562061875671932, "loss": 1.4543, "step": 3246 }, { "epoch": 0.35, "grad_norm": 0.07078481527058693, "learning_rate": 0.0007560566591932526, "loss": 1.4283, "step": 3247 }, { "epoch": 0.35, "grad_norm": 0.07721076934087472, "learning_rate": 0.0007559070997715596, "loss": 1.4219, "step": 3248 }, { "epoch": 0.35, "grad_norm": 0.07709818422571885, "learning_rate": 0.0007557575093202483, "loss": 1.4719, "step": 3249 }, { "epoch": 0.35, "grad_norm": 0.08369733445571949, "learning_rate": 0.0007556078878574573, "loss": 1.3737, "step": 3250 }, { "epoch": 0.35, "grad_norm": 0.07127796374107943, "learning_rate": 0.0007554582354013287, "loss": 1.4437, "step": 3251 }, { "epoch": 0.35, "grad_norm": 0.06956043970136461, "learning_rate": 0.0007553085519700084, "loss": 1.4763, "step": 3252 }, { "epoch": 0.35, "grad_norm": 0.07552454140482719, "learning_rate": 0.0007551588375816461, "loss": 1.4528, "step": 3253 }, { "epoch": 0.35, "grad_norm": 0.07703097370477303, "learning_rate": 0.0007550090922543948, "loss": 1.432, "step": 3254 }, { "epoch": 0.35, "grad_norm": 0.08408237716390983, "learning_rate": 0.000754859316006412, "loss": 1.5888, "step": 3255 }, { "epoch": 0.35, "grad_norm": 0.07580679587042956, "learning_rate": 0.0007547095088558585, "loss": 1.4903, "step": 3256 }, { "epoch": 0.35, "grad_norm": 0.07470619755946706, "learning_rate": 0.0007545596708208991, "loss": 1.2541, "step": 3257 }, { "epoch": 0.35, "grad_norm": 0.07443437030109887, "learning_rate": 0.0007544098019197018, "loss": 1.3443, "step": 3258 }, { "epoch": 0.35, "grad_norm": 0.07786511617842874, "learning_rate": 0.000754259902170439, "loss": 1.4346, "step": 3259 }, { "epoch": 0.35, "grad_norm": 0.07049486481909144, "learning_rate": 0.0007541099715912867, "loss": 1.4424, "step": 3260 }, { "epoch": 0.35, "grad_norm": 0.08884401595716221, "learning_rate": 0.0007539600102004241, "loss": 1.4576, "step": 3261 }, { "epoch": 0.35, "grad_norm": 0.06963460525741602, "learning_rate": 0.0007538100180160347, "loss": 1.5296, "step": 3262 }, { "epoch": 0.35, "grad_norm": 0.07647861256873244, "learning_rate": 0.0007536599950563056, "loss": 1.4882, "step": 3263 }, { "epoch": 0.35, "grad_norm": 0.07582018739217335, "learning_rate": 0.0007535099413394276, "loss": 1.3487, "step": 3264 }, { "epoch": 0.35, "grad_norm": 0.0677356672438712, "learning_rate": 0.0007533598568835953, "loss": 1.3253, "step": 3265 }, { "epoch": 0.35, "grad_norm": 0.06655141971882164, "learning_rate": 0.0007532097417070069, "loss": 1.3951, "step": 3266 }, { "epoch": 0.35, "grad_norm": 0.0672083493114287, "learning_rate": 0.0007530595958278643, "loss": 1.4441, "step": 3267 }, { "epoch": 0.35, "grad_norm": 0.06966617093307825, "learning_rate": 0.0007529094192643733, "loss": 1.4569, "step": 3268 }, { "epoch": 0.35, "grad_norm": 0.07642161102040695, "learning_rate": 0.0007527592120347433, "loss": 1.4863, "step": 3269 }, { "epoch": 0.35, "grad_norm": 0.07535741721980958, "learning_rate": 0.0007526089741571875, "loss": 1.5901, "step": 3270 }, { "epoch": 0.35, "grad_norm": 0.08060712796678715, "learning_rate": 0.0007524587056499227, "loss": 1.4188, "step": 3271 }, { "epoch": 0.35, "grad_norm": 0.07216132537348179, "learning_rate": 0.0007523084065311694, "loss": 1.4918, "step": 3272 }, { "epoch": 0.35, "grad_norm": 0.06697670994819911, "learning_rate": 0.000752158076819152, "loss": 1.3135, "step": 3273 }, { "epoch": 0.35, "grad_norm": 0.09212419434556492, "learning_rate": 0.0007520077165320986, "loss": 1.5355, "step": 3274 }, { "epoch": 0.35, "grad_norm": 0.07964494763502676, "learning_rate": 0.0007518573256882406, "loss": 1.4749, "step": 3275 }, { "epoch": 0.35, "grad_norm": 0.06205358368642162, "learning_rate": 0.0007517069043058136, "loss": 1.4527, "step": 3276 }, { "epoch": 0.35, "grad_norm": 0.06997752615570801, "learning_rate": 0.0007515564524030567, "loss": 1.4434, "step": 3277 }, { "epoch": 0.35, "grad_norm": 0.06796929318797444, "learning_rate": 0.0007514059699982127, "loss": 1.4697, "step": 3278 }, { "epoch": 0.35, "grad_norm": 0.06739848065311337, "learning_rate": 0.0007512554571095281, "loss": 1.5002, "step": 3279 }, { "epoch": 0.35, "grad_norm": 0.07068450192460699, "learning_rate": 0.0007511049137552533, "loss": 1.387, "step": 3280 }, { "epoch": 0.35, "grad_norm": 0.06525573630565533, "learning_rate": 0.0007509543399536418, "loss": 1.347, "step": 3281 }, { "epoch": 0.35, "grad_norm": 0.0718387775523518, "learning_rate": 0.0007508037357229515, "loss": 1.5561, "step": 3282 }, { "epoch": 0.35, "grad_norm": 0.06705072625049899, "learning_rate": 0.0007506531010814435, "loss": 1.4128, "step": 3283 }, { "epoch": 0.35, "grad_norm": 0.06879282519736174, "learning_rate": 0.0007505024360473829, "loss": 1.481, "step": 3284 }, { "epoch": 0.35, "grad_norm": 0.07087927610595694, "learning_rate": 0.0007503517406390384, "loss": 1.5064, "step": 3285 }, { "epoch": 0.35, "grad_norm": 0.06462073478069816, "learning_rate": 0.000750201014874682, "loss": 1.3303, "step": 3286 }, { "epoch": 0.35, "grad_norm": 0.07515022238790801, "learning_rate": 0.0007500502587725903, "loss": 1.4377, "step": 3287 }, { "epoch": 0.35, "grad_norm": 0.06472988386041287, "learning_rate": 0.0007498994723510427, "loss": 1.4442, "step": 3288 }, { "epoch": 0.35, "grad_norm": 0.07501542260623986, "learning_rate": 0.0007497486556283222, "loss": 1.4461, "step": 3289 }, { "epoch": 0.35, "grad_norm": 0.07074110541629863, "learning_rate": 0.0007495978086227164, "loss": 1.3875, "step": 3290 }, { "epoch": 0.35, "grad_norm": 0.08067924371663614, "learning_rate": 0.0007494469313525158, "loss": 1.3703, "step": 3291 }, { "epoch": 0.35, "grad_norm": 0.07953533766463396, "learning_rate": 0.0007492960238360147, "loss": 1.3868, "step": 3292 }, { "epoch": 0.35, "grad_norm": 0.08099029033843955, "learning_rate": 0.0007491450860915114, "loss": 1.5645, "step": 3293 }, { "epoch": 0.35, "grad_norm": 0.06974776206856093, "learning_rate": 0.0007489941181373075, "loss": 1.39, "step": 3294 }, { "epoch": 0.35, "grad_norm": 0.0663906501580651, "learning_rate": 0.0007488431199917081, "loss": 1.3797, "step": 3295 }, { "epoch": 0.35, "grad_norm": 0.07187536093113295, "learning_rate": 0.0007486920916730228, "loss": 1.4383, "step": 3296 }, { "epoch": 0.35, "grad_norm": 0.07034086526155568, "learning_rate": 0.0007485410331995639, "loss": 1.4984, "step": 3297 }, { "epoch": 0.35, "grad_norm": 0.09952125204166053, "learning_rate": 0.0007483899445896478, "loss": 1.387, "step": 3298 }, { "epoch": 0.35, "grad_norm": 0.07855678239956934, "learning_rate": 0.0007482388258615946, "loss": 1.5485, "step": 3299 }, { "epoch": 0.35, "grad_norm": 0.06719366075858256, "learning_rate": 0.000748087677033728, "loss": 1.3693, "step": 3300 }, { "epoch": 0.35, "grad_norm": 0.07047823572228767, "learning_rate": 0.0007479364981243753, "loss": 1.5318, "step": 3301 }, { "epoch": 0.35, "grad_norm": 0.07007505720471473, "learning_rate": 0.0007477852891518675, "loss": 1.5427, "step": 3302 }, { "epoch": 0.36, "grad_norm": 0.07306598130413317, "learning_rate": 0.000747634050134539, "loss": 1.4338, "step": 3303 }, { "epoch": 0.36, "grad_norm": 0.073346910314022, "learning_rate": 0.0007474827810907283, "loss": 1.4186, "step": 3304 }, { "epoch": 0.36, "grad_norm": 0.08051731318932802, "learning_rate": 0.0007473314820387771, "loss": 1.3899, "step": 3305 }, { "epoch": 0.36, "grad_norm": 0.07628081552060612, "learning_rate": 0.0007471801529970311, "loss": 1.4434, "step": 3306 }, { "epoch": 0.36, "grad_norm": 0.07692904755262349, "learning_rate": 0.0007470287939838393, "loss": 1.4179, "step": 3307 }, { "epoch": 0.36, "grad_norm": 0.07629253080964134, "learning_rate": 0.0007468774050175547, "loss": 1.495, "step": 3308 }, { "epoch": 0.36, "grad_norm": 0.07601463459835986, "learning_rate": 0.0007467259861165335, "loss": 1.3929, "step": 3309 }, { "epoch": 0.36, "grad_norm": 0.07143316010707389, "learning_rate": 0.0007465745372991359, "loss": 1.4335, "step": 3310 }, { "epoch": 0.36, "grad_norm": 0.06589330547380333, "learning_rate": 0.0007464230585837257, "loss": 1.4047, "step": 3311 }, { "epoch": 0.36, "grad_norm": 0.07249848843228891, "learning_rate": 0.0007462715499886701, "loss": 1.4494, "step": 3312 }, { "epoch": 0.36, "grad_norm": 0.07137267226329203, "learning_rate": 0.00074612001153234, "loss": 1.2914, "step": 3313 }, { "epoch": 0.36, "grad_norm": 0.0871238453782044, "learning_rate": 0.00074596844323311, "loss": 1.3637, "step": 3314 }, { "epoch": 0.36, "grad_norm": 0.06963991804376052, "learning_rate": 0.0007458168451093582, "loss": 1.4503, "step": 3315 }, { "epoch": 0.36, "grad_norm": 0.08264139026068265, "learning_rate": 0.0007456652171794665, "loss": 1.4632, "step": 3316 }, { "epoch": 0.36, "grad_norm": 0.07188881114245928, "learning_rate": 0.0007455135594618204, "loss": 1.5282, "step": 3317 }, { "epoch": 0.36, "grad_norm": 0.06770134356378205, "learning_rate": 0.0007453618719748086, "loss": 1.423, "step": 3318 }, { "epoch": 0.36, "grad_norm": 0.07265567228128596, "learning_rate": 0.0007452101547368241, "loss": 1.2598, "step": 3319 }, { "epoch": 0.36, "grad_norm": 0.08608144387463132, "learning_rate": 0.0007450584077662628, "loss": 1.4486, "step": 3320 }, { "epoch": 0.36, "grad_norm": 0.08328290542930794, "learning_rate": 0.0007449066310815249, "loss": 1.3688, "step": 3321 }, { "epoch": 0.36, "grad_norm": 0.07610336074240623, "learning_rate": 0.0007447548247010137, "loss": 1.3053, "step": 3322 }, { "epoch": 0.36, "grad_norm": 0.07294867434292124, "learning_rate": 0.000744602988643136, "loss": 1.4548, "step": 3323 }, { "epoch": 0.36, "grad_norm": 0.07649839853752352, "learning_rate": 0.0007444511229263026, "loss": 1.3756, "step": 3324 }, { "epoch": 0.36, "grad_norm": 0.07738633235915615, "learning_rate": 0.0007442992275689281, "loss": 1.4607, "step": 3325 }, { "epoch": 0.36, "grad_norm": 0.0820756094066135, "learning_rate": 0.0007441473025894298, "loss": 1.3747, "step": 3326 }, { "epoch": 0.36, "grad_norm": 0.07086103194112217, "learning_rate": 0.0007439953480062294, "loss": 1.3998, "step": 3327 }, { "epoch": 0.36, "grad_norm": 0.07017241619996507, "learning_rate": 0.000743843363837752, "loss": 1.4109, "step": 3328 }, { "epoch": 0.36, "grad_norm": 0.07430812668013119, "learning_rate": 0.0007436913501024259, "loss": 1.4276, "step": 3329 }, { "epoch": 0.36, "grad_norm": 0.07826138304497253, "learning_rate": 0.0007435393068186835, "loss": 1.3875, "step": 3330 }, { "epoch": 0.36, "grad_norm": 0.07893852810631888, "learning_rate": 0.0007433872340049607, "loss": 1.417, "step": 3331 }, { "epoch": 0.36, "grad_norm": 0.0685915410461669, "learning_rate": 0.0007432351316796964, "loss": 1.4295, "step": 3332 }, { "epoch": 0.36, "grad_norm": 0.07565845102634473, "learning_rate": 0.0007430829998613342, "loss": 1.4313, "step": 3333 }, { "epoch": 0.36, "grad_norm": 0.07870178690082014, "learning_rate": 0.0007429308385683199, "loss": 1.3445, "step": 3334 }, { "epoch": 0.36, "grad_norm": 0.06969600272935284, "learning_rate": 0.0007427786478191041, "loss": 1.3896, "step": 3335 }, { "epoch": 0.36, "grad_norm": 0.07161250210757818, "learning_rate": 0.0007426264276321401, "loss": 1.5339, "step": 3336 }, { "epoch": 0.36, "grad_norm": 0.06567066213357739, "learning_rate": 0.0007424741780258855, "loss": 1.4913, "step": 3337 }, { "epoch": 0.36, "grad_norm": 0.07739750277858809, "learning_rate": 0.0007423218990188008, "loss": 1.5223, "step": 3338 }, { "epoch": 0.36, "grad_norm": 0.07139533267516067, "learning_rate": 0.0007421695906293504, "loss": 1.2608, "step": 3339 }, { "epoch": 0.36, "grad_norm": 0.07084136654668771, "learning_rate": 0.0007420172528760022, "loss": 1.4166, "step": 3340 }, { "epoch": 0.36, "grad_norm": 0.07461768305043502, "learning_rate": 0.0007418648857772279, "loss": 1.5376, "step": 3341 }, { "epoch": 0.36, "grad_norm": 0.07018413349605858, "learning_rate": 0.0007417124893515022, "loss": 1.2933, "step": 3342 }, { "epoch": 0.36, "grad_norm": 0.07748422851796735, "learning_rate": 0.0007415600636173039, "loss": 1.3764, "step": 3343 }, { "epoch": 0.36, "grad_norm": 0.06651878463581043, "learning_rate": 0.0007414076085931152, "loss": 1.274, "step": 3344 }, { "epoch": 0.36, "grad_norm": 0.0720113569348327, "learning_rate": 0.0007412551242974215, "loss": 1.4276, "step": 3345 }, { "epoch": 0.36, "grad_norm": 0.06866646064645342, "learning_rate": 0.0007411026107487123, "loss": 1.5271, "step": 3346 }, { "epoch": 0.36, "grad_norm": 0.07366727179489832, "learning_rate": 0.0007409500679654805, "loss": 1.4906, "step": 3347 }, { "epoch": 0.36, "grad_norm": 0.08059863399610857, "learning_rate": 0.0007407974959662222, "loss": 1.3035, "step": 3348 }, { "epoch": 0.36, "grad_norm": 0.08704144005558019, "learning_rate": 0.0007406448947694373, "loss": 1.4854, "step": 3349 }, { "epoch": 0.36, "grad_norm": 0.08134336654874601, "learning_rate": 0.0007404922643936294, "loss": 1.3461, "step": 3350 }, { "epoch": 0.36, "grad_norm": 0.07515621562261053, "learning_rate": 0.0007403396048573051, "loss": 1.4072, "step": 3351 }, { "epoch": 0.36, "grad_norm": 0.07475152903248071, "learning_rate": 0.0007401869161789753, "loss": 1.3376, "step": 3352 }, { "epoch": 0.36, "grad_norm": 0.07219916506306422, "learning_rate": 0.0007400341983771539, "loss": 1.5695, "step": 3353 }, { "epoch": 0.36, "grad_norm": 0.07830596989301176, "learning_rate": 0.0007398814514703585, "loss": 1.5121, "step": 3354 }, { "epoch": 0.36, "grad_norm": 0.07554099739341379, "learning_rate": 0.0007397286754771098, "loss": 1.3901, "step": 3355 }, { "epoch": 0.36, "grad_norm": 0.0806406228367244, "learning_rate": 0.000739575870415933, "loss": 1.4044, "step": 3356 }, { "epoch": 0.36, "grad_norm": 0.07450558110927982, "learning_rate": 0.0007394230363053558, "loss": 1.4013, "step": 3357 }, { "epoch": 0.36, "grad_norm": 0.07182394432047186, "learning_rate": 0.0007392701731639102, "loss": 1.5534, "step": 3358 }, { "epoch": 0.36, "grad_norm": 0.0755364633765582, "learning_rate": 0.0007391172810101311, "loss": 1.4123, "step": 3359 }, { "epoch": 0.36, "grad_norm": 0.07636094394024896, "learning_rate": 0.0007389643598625573, "loss": 1.4783, "step": 3360 }, { "epoch": 0.36, "grad_norm": 0.06831486564246876, "learning_rate": 0.0007388114097397311, "loss": 1.5466, "step": 3361 }, { "epoch": 0.36, "grad_norm": 0.08977748147634984, "learning_rate": 0.0007386584306601983, "loss": 1.4046, "step": 3362 }, { "epoch": 0.36, "grad_norm": 0.0672322704542431, "learning_rate": 0.0007385054226425077, "loss": 1.3721, "step": 3363 }, { "epoch": 0.36, "grad_norm": 0.06880509467787907, "learning_rate": 0.0007383523857052124, "loss": 1.3872, "step": 3364 }, { "epoch": 0.36, "grad_norm": 0.07513708330952967, "learning_rate": 0.0007381993198668688, "loss": 1.5438, "step": 3365 }, { "epoch": 0.36, "grad_norm": 0.07848633488508752, "learning_rate": 0.0007380462251460364, "loss": 1.5115, "step": 3366 }, { "epoch": 0.36, "grad_norm": 0.07274279733626927, "learning_rate": 0.0007378931015612786, "loss": 1.5028, "step": 3367 }, { "epoch": 0.36, "grad_norm": 0.06856498960103916, "learning_rate": 0.0007377399491311619, "loss": 1.4003, "step": 3368 }, { "epoch": 0.36, "grad_norm": 0.06815056499835738, "learning_rate": 0.0007375867678742567, "loss": 1.582, "step": 3369 }, { "epoch": 0.36, "grad_norm": 0.07130247554924865, "learning_rate": 0.0007374335578091371, "loss": 1.3157, "step": 3370 }, { "epoch": 0.36, "grad_norm": 0.0699724644388025, "learning_rate": 0.0007372803189543798, "loss": 1.4183, "step": 3371 }, { "epoch": 0.36, "grad_norm": 0.07859318932566481, "learning_rate": 0.0007371270513285659, "loss": 1.5631, "step": 3372 }, { "epoch": 0.36, "grad_norm": 0.07308852607081995, "learning_rate": 0.0007369737549502796, "loss": 1.576, "step": 3373 }, { "epoch": 0.36, "grad_norm": 0.06767306064322558, "learning_rate": 0.0007368204298381085, "loss": 1.4173, "step": 3374 }, { "epoch": 0.36, "grad_norm": 0.07610629235626565, "learning_rate": 0.0007366670760106438, "loss": 1.3515, "step": 3375 }, { "epoch": 0.36, "grad_norm": 0.07255829044756128, "learning_rate": 0.0007365136934864803, "loss": 1.3286, "step": 3376 }, { "epoch": 0.36, "grad_norm": 0.06113231650355408, "learning_rate": 0.0007363602822842159, "loss": 1.4304, "step": 3377 }, { "epoch": 0.36, "grad_norm": 0.07358452028485662, "learning_rate": 0.0007362068424224526, "loss": 1.3216, "step": 3378 }, { "epoch": 0.36, "grad_norm": 0.07125399818046416, "learning_rate": 0.0007360533739197952, "loss": 1.4562, "step": 3379 }, { "epoch": 0.36, "grad_norm": 0.09144438881226293, "learning_rate": 0.0007358998767948525, "loss": 1.478, "step": 3380 }, { "epoch": 0.36, "grad_norm": 0.09000168011921686, "learning_rate": 0.0007357463510662364, "loss": 1.5342, "step": 3381 }, { "epoch": 0.36, "grad_norm": 0.0706911701954772, "learning_rate": 0.0007355927967525626, "loss": 1.3949, "step": 3382 }, { "epoch": 0.36, "grad_norm": 0.07477870009184143, "learning_rate": 0.0007354392138724499, "loss": 1.4481, "step": 3383 }, { "epoch": 0.36, "grad_norm": 0.09106274263140833, "learning_rate": 0.0007352856024445208, "loss": 1.3723, "step": 3384 }, { "epoch": 0.36, "grad_norm": 0.07556958378953439, "learning_rate": 0.0007351319624874012, "loss": 1.5089, "step": 3385 }, { "epoch": 0.36, "grad_norm": 0.07314230545614694, "learning_rate": 0.0007349782940197207, "loss": 1.3267, "step": 3386 }, { "epoch": 0.36, "grad_norm": 0.07184969570633701, "learning_rate": 0.000734824597060112, "loss": 1.5058, "step": 3387 }, { "epoch": 0.36, "grad_norm": 0.0729652621412798, "learning_rate": 0.000734670871627211, "loss": 1.5736, "step": 3388 }, { "epoch": 0.36, "grad_norm": 0.0669490998865407, "learning_rate": 0.000734517117739658, "loss": 1.4002, "step": 3389 }, { "epoch": 0.36, "grad_norm": 0.08135789295051453, "learning_rate": 0.000734363335416096, "loss": 1.4905, "step": 3390 }, { "epoch": 0.36, "grad_norm": 0.07506628391924669, "learning_rate": 0.0007342095246751717, "loss": 1.4123, "step": 3391 }, { "epoch": 0.36, "grad_norm": 0.07631817394827138, "learning_rate": 0.000734055685535535, "loss": 1.4101, "step": 3392 }, { "epoch": 0.36, "grad_norm": 0.08451380581854086, "learning_rate": 0.0007339018180158394, "loss": 1.4673, "step": 3393 }, { "epoch": 0.36, "grad_norm": 0.08132207812962348, "learning_rate": 0.0007337479221347419, "loss": 1.5261, "step": 3394 }, { "epoch": 0.36, "grad_norm": 0.07423312430722277, "learning_rate": 0.0007335939979109032, "loss": 1.4681, "step": 3395 }, { "epoch": 0.37, "grad_norm": 0.08032224968211471, "learning_rate": 0.0007334400453629869, "loss": 1.518, "step": 3396 }, { "epoch": 0.37, "grad_norm": 0.07632194005433686, "learning_rate": 0.0007332860645096604, "loss": 1.4886, "step": 3397 }, { "epoch": 0.37, "grad_norm": 0.0864928518705132, "learning_rate": 0.0007331320553695942, "loss": 1.516, "step": 3398 }, { "epoch": 0.37, "grad_norm": 0.07020971282123498, "learning_rate": 0.0007329780179614624, "loss": 1.3371, "step": 3399 }, { "epoch": 0.37, "grad_norm": 0.06779508227886841, "learning_rate": 0.000732823952303943, "loss": 1.4788, "step": 3400 }, { "epoch": 0.37, "grad_norm": 0.08093791904028005, "learning_rate": 0.0007326698584157167, "loss": 1.5612, "step": 3401 }, { "epoch": 0.37, "grad_norm": 0.07297898365770618, "learning_rate": 0.0007325157363154678, "loss": 1.4382, "step": 3402 }, { "epoch": 0.37, "grad_norm": 0.07359410616916069, "learning_rate": 0.0007323615860218843, "loss": 1.4834, "step": 3403 }, { "epoch": 0.37, "grad_norm": 0.07731757154460116, "learning_rate": 0.0007322074075536574, "loss": 1.5044, "step": 3404 }, { "epoch": 0.37, "grad_norm": 0.0669375876227775, "learning_rate": 0.0007320532009294818, "loss": 1.5491, "step": 3405 }, { "epoch": 0.37, "grad_norm": 0.08319722191595673, "learning_rate": 0.0007318989661680556, "loss": 1.5788, "step": 3406 }, { "epoch": 0.37, "grad_norm": 0.07349318095347086, "learning_rate": 0.0007317447032880804, "loss": 1.4061, "step": 3407 }, { "epoch": 0.37, "grad_norm": 0.0815880060028677, "learning_rate": 0.0007315904123082608, "loss": 1.4521, "step": 3408 }, { "epoch": 0.37, "grad_norm": 0.07833449022531756, "learning_rate": 0.0007314360932473054, "loss": 1.301, "step": 3409 }, { "epoch": 0.37, "grad_norm": 0.06153440472323062, "learning_rate": 0.0007312817461239258, "loss": 1.4043, "step": 3410 }, { "epoch": 0.37, "grad_norm": 0.061429343476582014, "learning_rate": 0.000731127370956837, "loss": 1.3902, "step": 3411 }, { "epoch": 0.37, "grad_norm": 0.07111252029973611, "learning_rate": 0.0007309729677647579, "loss": 1.3566, "step": 3412 }, { "epoch": 0.37, "grad_norm": 0.07442921283389783, "learning_rate": 0.00073081853656641, "loss": 1.4205, "step": 3413 }, { "epoch": 0.37, "grad_norm": 0.06875680871458785, "learning_rate": 0.0007306640773805188, "loss": 1.5055, "step": 3414 }, { "epoch": 0.37, "grad_norm": 0.07011065801353429, "learning_rate": 0.000730509590225813, "loss": 1.4903, "step": 3415 }, { "epoch": 0.37, "grad_norm": 0.07315229939972583, "learning_rate": 0.0007303550751210247, "loss": 1.3406, "step": 3416 }, { "epoch": 0.37, "grad_norm": 0.06862713960228749, "learning_rate": 0.0007302005320848894, "loss": 1.487, "step": 3417 }, { "epoch": 0.37, "grad_norm": 0.06991335457962292, "learning_rate": 0.0007300459611361461, "loss": 1.3993, "step": 3418 }, { "epoch": 0.37, "grad_norm": 0.07522110372865594, "learning_rate": 0.0007298913622935366, "loss": 1.5515, "step": 3419 }, { "epoch": 0.37, "grad_norm": 0.08368471518379769, "learning_rate": 0.0007297367355758071, "loss": 1.4278, "step": 3420 }, { "epoch": 0.37, "grad_norm": 0.07070017111651242, "learning_rate": 0.0007295820810017064, "loss": 1.3956, "step": 3421 }, { "epoch": 0.37, "grad_norm": 0.07220151835057213, "learning_rate": 0.0007294273985899867, "loss": 1.4705, "step": 3422 }, { "epoch": 0.37, "grad_norm": 0.06758161171286328, "learning_rate": 0.0007292726883594042, "loss": 1.5548, "step": 3423 }, { "epoch": 0.37, "grad_norm": 0.08252864849353624, "learning_rate": 0.0007291179503287178, "loss": 1.4895, "step": 3424 }, { "epoch": 0.37, "grad_norm": 0.07274048956748501, "learning_rate": 0.0007289631845166897, "loss": 1.4906, "step": 3425 }, { "epoch": 0.37, "grad_norm": 0.0645889905677694, "learning_rate": 0.0007288083909420865, "loss": 1.506, "step": 3426 }, { "epoch": 0.37, "grad_norm": 0.07054697947372947, "learning_rate": 0.0007286535696236769, "loss": 1.3929, "step": 3427 }, { "epoch": 0.37, "grad_norm": 0.06771033703226476, "learning_rate": 0.0007284987205802338, "loss": 1.3862, "step": 3428 }, { "epoch": 0.37, "grad_norm": 0.07230953142493653, "learning_rate": 0.000728343843830533, "loss": 1.5308, "step": 3429 }, { "epoch": 0.37, "grad_norm": 0.07990829409950602, "learning_rate": 0.0007281889393933539, "loss": 1.3315, "step": 3430 }, { "epoch": 0.37, "grad_norm": 0.0744965070918347, "learning_rate": 0.0007280340072874791, "loss": 1.3883, "step": 3431 }, { "epoch": 0.37, "grad_norm": 0.06853594795545971, "learning_rate": 0.000727879047531695, "loss": 1.4513, "step": 3432 }, { "epoch": 0.37, "grad_norm": 0.07005750508920855, "learning_rate": 0.0007277240601447907, "loss": 1.429, "step": 3433 }, { "epoch": 0.37, "grad_norm": 0.0746780149046262, "learning_rate": 0.000727569045145559, "loss": 1.4085, "step": 3434 }, { "epoch": 0.37, "grad_norm": 0.07182195957106077, "learning_rate": 0.000727414002552796, "loss": 1.4943, "step": 3435 }, { "epoch": 0.37, "grad_norm": 0.0640184881855754, "learning_rate": 0.0007272589323853012, "loss": 1.4937, "step": 3436 }, { "epoch": 0.37, "grad_norm": 0.07500444842348676, "learning_rate": 0.0007271038346618774, "loss": 1.3062, "step": 3437 }, { "epoch": 0.37, "grad_norm": 0.07010662383143268, "learning_rate": 0.0007269487094013306, "loss": 1.5224, "step": 3438 }, { "epoch": 0.37, "grad_norm": 0.06754630061604039, "learning_rate": 0.0007267935566224704, "loss": 1.5214, "step": 3439 }, { "epoch": 0.37, "grad_norm": 0.07547928011898576, "learning_rate": 0.0007266383763441097, "loss": 1.5368, "step": 3440 }, { "epoch": 0.37, "grad_norm": 0.06936226935791721, "learning_rate": 0.0007264831685850645, "loss": 1.3726, "step": 3441 }, { "epoch": 0.37, "grad_norm": 0.07120368855493818, "learning_rate": 0.0007263279333641541, "loss": 1.5166, "step": 3442 }, { "epoch": 0.37, "grad_norm": 0.06496628037540313, "learning_rate": 0.0007261726707002016, "loss": 1.4256, "step": 3443 }, { "epoch": 0.37, "grad_norm": 0.07751887752981045, "learning_rate": 0.0007260173806120331, "loss": 1.4362, "step": 3444 }, { "epoch": 0.37, "grad_norm": 0.07593916993944279, "learning_rate": 0.0007258620631184781, "loss": 1.4158, "step": 3445 }, { "epoch": 0.37, "grad_norm": 0.06962702489132955, "learning_rate": 0.0007257067182383692, "loss": 1.4768, "step": 3446 }, { "epoch": 0.37, "grad_norm": 0.07130112366350376, "learning_rate": 0.0007255513459905425, "loss": 1.4573, "step": 3447 }, { "epoch": 0.37, "grad_norm": 0.07317457257010111, "learning_rate": 0.0007253959463938375, "loss": 1.5383, "step": 3448 }, { "epoch": 0.37, "grad_norm": 0.07639509895879555, "learning_rate": 0.0007252405194670972, "loss": 1.4555, "step": 3449 }, { "epoch": 0.37, "grad_norm": 0.08762753103718868, "learning_rate": 0.0007250850652291671, "loss": 1.4631, "step": 3450 }, { "epoch": 0.37, "grad_norm": 0.07417213080044456, "learning_rate": 0.000724929583698897, "loss": 1.4104, "step": 3451 }, { "epoch": 0.37, "grad_norm": 0.08182638136439328, "learning_rate": 0.0007247740748951393, "loss": 1.4581, "step": 3452 }, { "epoch": 0.37, "grad_norm": 0.07652625611553487, "learning_rate": 0.0007246185388367502, "loss": 1.495, "step": 3453 }, { "epoch": 0.37, "grad_norm": 0.0718289444464132, "learning_rate": 0.0007244629755425889, "loss": 1.3931, "step": 3454 }, { "epoch": 0.37, "grad_norm": 0.0776318362638091, "learning_rate": 0.0007243073850315179, "loss": 1.4296, "step": 3455 }, { "epoch": 0.37, "grad_norm": 0.07319195455280723, "learning_rate": 0.0007241517673224032, "loss": 1.4762, "step": 3456 }, { "epoch": 0.37, "grad_norm": 0.0704550126688549, "learning_rate": 0.000723996122434114, "loss": 1.5216, "step": 3457 }, { "epoch": 0.37, "grad_norm": 0.0773671225672999, "learning_rate": 0.0007238404503855227, "loss": 1.4112, "step": 3458 }, { "epoch": 0.37, "grad_norm": 0.09560707561440114, "learning_rate": 0.000723684751195505, "loss": 1.4316, "step": 3459 }, { "epoch": 0.37, "grad_norm": 0.08176097879461783, "learning_rate": 0.0007235290248829402, "loss": 1.4419, "step": 3460 }, { "epoch": 0.37, "grad_norm": 0.07671659208485172, "learning_rate": 0.0007233732714667104, "loss": 1.5268, "step": 3461 }, { "epoch": 0.37, "grad_norm": 0.07096830395444363, "learning_rate": 0.0007232174909657014, "loss": 1.4263, "step": 3462 }, { "epoch": 0.37, "grad_norm": 0.07937368779759209, "learning_rate": 0.0007230616833988021, "loss": 1.4219, "step": 3463 }, { "epoch": 0.37, "grad_norm": 0.07043297450143687, "learning_rate": 0.0007229058487849045, "loss": 1.5423, "step": 3464 }, { "epoch": 0.37, "grad_norm": 0.08169254273194243, "learning_rate": 0.0007227499871429046, "loss": 1.3532, "step": 3465 }, { "epoch": 0.37, "grad_norm": 0.06726101998135063, "learning_rate": 0.0007225940984917007, "loss": 1.5103, "step": 3466 }, { "epoch": 0.37, "grad_norm": 0.06720793619103706, "learning_rate": 0.0007224381828501947, "loss": 1.5341, "step": 3467 }, { "epoch": 0.37, "grad_norm": 0.08637904126374926, "learning_rate": 0.0007222822402372924, "loss": 1.4348, "step": 3468 }, { "epoch": 0.37, "grad_norm": 0.08033335965656004, "learning_rate": 0.000722126270671902, "loss": 1.3226, "step": 3469 }, { "epoch": 0.37, "grad_norm": 0.07863747479165964, "learning_rate": 0.0007219702741729357, "loss": 1.4313, "step": 3470 }, { "epoch": 0.37, "grad_norm": 0.08115522916601269, "learning_rate": 0.0007218142507593084, "loss": 1.5195, "step": 3471 }, { "epoch": 0.37, "grad_norm": 0.08244015470619837, "learning_rate": 0.0007216582004499384, "loss": 1.4175, "step": 3472 }, { "epoch": 0.37, "grad_norm": 0.08949002833198955, "learning_rate": 0.0007215021232637474, "loss": 1.4375, "step": 3473 }, { "epoch": 0.37, "grad_norm": 0.07748904819217635, "learning_rate": 0.0007213460192196602, "loss": 1.3619, "step": 3474 }, { "epoch": 0.37, "grad_norm": 0.09245287952959458, "learning_rate": 0.0007211898883366052, "loss": 1.5071, "step": 3475 }, { "epoch": 0.37, "grad_norm": 0.07437346670229164, "learning_rate": 0.0007210337306335137, "loss": 1.4178, "step": 3476 }, { "epoch": 0.37, "grad_norm": 0.07848104764122867, "learning_rate": 0.0007208775461293205, "loss": 1.4574, "step": 3477 }, { "epoch": 0.37, "grad_norm": 0.08088266548645225, "learning_rate": 0.0007207213348429629, "loss": 1.4889, "step": 3478 }, { "epoch": 0.37, "grad_norm": 0.08175567193562198, "learning_rate": 0.0007205650967933829, "loss": 1.5308, "step": 3479 }, { "epoch": 0.37, "grad_norm": 0.08083881794107464, "learning_rate": 0.0007204088319995245, "loss": 1.45, "step": 3480 }, { "epoch": 0.37, "grad_norm": 0.06983298049987251, "learning_rate": 0.0007202525404803352, "loss": 1.4103, "step": 3481 }, { "epoch": 0.37, "grad_norm": 0.08082054975206557, "learning_rate": 0.0007200962222547662, "loss": 1.3104, "step": 3482 }, { "epoch": 0.37, "grad_norm": 0.07562790021280924, "learning_rate": 0.0007199398773417713, "loss": 1.4955, "step": 3483 }, { "epoch": 0.37, "grad_norm": 0.06228637321505799, "learning_rate": 0.000719783505760308, "loss": 1.5076, "step": 3484 }, { "epoch": 0.37, "grad_norm": 0.07524344062108244, "learning_rate": 0.000719627107529337, "loss": 1.4938, "step": 3485 }, { "epoch": 0.37, "grad_norm": 0.06815009438550329, "learning_rate": 0.0007194706826678222, "loss": 1.5052, "step": 3486 }, { "epoch": 0.37, "grad_norm": 0.0670706317496325, "learning_rate": 0.0007193142311947302, "loss": 1.291, "step": 3487 }, { "epoch": 0.37, "grad_norm": 0.07554509041215911, "learning_rate": 0.0007191577531290318, "loss": 1.5673, "step": 3488 }, { "epoch": 0.38, "grad_norm": 0.06758859386555005, "learning_rate": 0.0007190012484897002, "loss": 1.4189, "step": 3489 }, { "epoch": 0.38, "grad_norm": 0.0725558291970073, "learning_rate": 0.0007188447172957121, "loss": 1.526, "step": 3490 }, { "epoch": 0.38, "grad_norm": 0.07233185809808401, "learning_rate": 0.0007186881595660478, "loss": 1.4618, "step": 3491 }, { "epoch": 0.38, "grad_norm": 0.06533375243840589, "learning_rate": 0.0007185315753196899, "loss": 1.4157, "step": 3492 }, { "epoch": 0.38, "grad_norm": 0.07160807457158654, "learning_rate": 0.0007183749645756253, "loss": 1.5502, "step": 3493 }, { "epoch": 0.38, "grad_norm": 0.0695540672338539, "learning_rate": 0.0007182183273528436, "loss": 1.3664, "step": 3494 }, { "epoch": 0.38, "grad_norm": 0.07960679599217649, "learning_rate": 0.000718061663670337, "loss": 1.4714, "step": 3495 }, { "epoch": 0.38, "grad_norm": 0.06495251596402488, "learning_rate": 0.0007179049735471021, "loss": 1.3737, "step": 3496 }, { "epoch": 0.38, "grad_norm": 0.06138238935894003, "learning_rate": 0.0007177482570021379, "loss": 1.4541, "step": 3497 }, { "epoch": 0.38, "grad_norm": 0.06824731668030849, "learning_rate": 0.0007175915140544469, "loss": 1.429, "step": 3498 }, { "epoch": 0.38, "grad_norm": 0.07191700712025918, "learning_rate": 0.0007174347447230346, "loss": 1.4168, "step": 3499 }, { "epoch": 0.38, "grad_norm": 0.06375599026585484, "learning_rate": 0.0007172779490269099, "loss": 1.422, "step": 3500 }, { "epoch": 0.38, "grad_norm": 0.0727035296847183, "learning_rate": 0.0007171211269850847, "loss": 1.4303, "step": 3501 }, { "epoch": 0.38, "grad_norm": 0.07744658478094242, "learning_rate": 0.0007169642786165746, "loss": 1.5019, "step": 3502 }, { "epoch": 0.38, "grad_norm": 0.07138038364485365, "learning_rate": 0.0007168074039403975, "loss": 1.3967, "step": 3503 }, { "epoch": 0.38, "grad_norm": 0.07553044790967374, "learning_rate": 0.0007166505029755752, "loss": 1.3925, "step": 3504 }, { "epoch": 0.38, "grad_norm": 0.08417883389077783, "learning_rate": 0.0007164935757411327, "loss": 1.4638, "step": 3505 }, { "epoch": 0.38, "grad_norm": 0.0700501690822233, "learning_rate": 0.0007163366222560976, "loss": 1.4004, "step": 3506 }, { "epoch": 0.38, "grad_norm": 0.07009352089361452, "learning_rate": 0.0007161796425395013, "loss": 1.4417, "step": 3507 }, { "epoch": 0.38, "grad_norm": 0.08437481489426124, "learning_rate": 0.0007160226366103781, "loss": 1.3978, "step": 3508 }, { "epoch": 0.38, "grad_norm": 0.07281127210582462, "learning_rate": 0.0007158656044877654, "loss": 1.4749, "step": 3509 }, { "epoch": 0.38, "grad_norm": 0.08738001079910843, "learning_rate": 0.000715708546190704, "loss": 1.3974, "step": 3510 }, { "epoch": 0.38, "grad_norm": 0.07474336566461999, "learning_rate": 0.0007155514617382377, "loss": 1.4416, "step": 3511 }, { "epoch": 0.38, "grad_norm": 0.07599929215831347, "learning_rate": 0.0007153943511494134, "loss": 1.5088, "step": 3512 }, { "epoch": 0.38, "grad_norm": 0.07441053895292685, "learning_rate": 0.0007152372144432817, "loss": 1.4735, "step": 3513 }, { "epoch": 0.38, "grad_norm": 0.06776065211117123, "learning_rate": 0.0007150800516388956, "loss": 1.4267, "step": 3514 }, { "epoch": 0.38, "grad_norm": 0.07725836370655137, "learning_rate": 0.0007149228627553117, "loss": 1.4947, "step": 3515 }, { "epoch": 0.38, "grad_norm": 0.08250426681685036, "learning_rate": 0.0007147656478115898, "loss": 1.4598, "step": 3516 }, { "epoch": 0.38, "grad_norm": 0.079833072983792, "learning_rate": 0.0007146084068267928, "loss": 1.3901, "step": 3517 }, { "epoch": 0.38, "grad_norm": 0.07133873587178743, "learning_rate": 0.0007144511398199865, "loss": 1.3933, "step": 3518 }, { "epoch": 0.38, "grad_norm": 0.06894999027593159, "learning_rate": 0.00071429384681024, "loss": 1.4166, "step": 3519 }, { "epoch": 0.38, "grad_norm": 0.06676758690851145, "learning_rate": 0.0007141365278166261, "loss": 1.3517, "step": 3520 }, { "epoch": 0.38, "grad_norm": 0.06575167661410714, "learning_rate": 0.0007139791828582196, "loss": 1.4837, "step": 3521 }, { "epoch": 0.38, "grad_norm": 0.08017489354969888, "learning_rate": 0.0007138218119540998, "loss": 1.4512, "step": 3522 }, { "epoch": 0.38, "grad_norm": 0.07228178571734656, "learning_rate": 0.000713664415123348, "loss": 1.3205, "step": 3523 }, { "epoch": 0.38, "grad_norm": 0.1026294128547175, "learning_rate": 0.0007135069923850493, "loss": 1.4643, "step": 3524 }, { "epoch": 0.38, "grad_norm": 0.08011786852797782, "learning_rate": 0.0007133495437582916, "loss": 1.442, "step": 3525 }, { "epoch": 0.38, "grad_norm": 0.07244582841262252, "learning_rate": 0.0007131920692621663, "loss": 1.2873, "step": 3526 }, { "epoch": 0.38, "grad_norm": 0.06822431369016423, "learning_rate": 0.0007130345689157676, "loss": 1.4352, "step": 3527 }, { "epoch": 0.38, "grad_norm": 0.07031653395975418, "learning_rate": 0.000712877042738193, "loss": 1.3986, "step": 3528 }, { "epoch": 0.38, "grad_norm": 0.07397680940283695, "learning_rate": 0.000712719490748543, "loss": 1.361, "step": 3529 }, { "epoch": 0.38, "grad_norm": 0.07489945680906114, "learning_rate": 0.0007125619129659214, "loss": 1.3775, "step": 3530 }, { "epoch": 0.38, "grad_norm": 0.06688488591400973, "learning_rate": 0.0007124043094094352, "loss": 1.4639, "step": 3531 }, { "epoch": 0.38, "grad_norm": 0.0706049908559832, "learning_rate": 0.0007122466800981939, "loss": 1.5427, "step": 3532 }, { "epoch": 0.38, "grad_norm": 0.0711990066874569, "learning_rate": 0.0007120890250513111, "loss": 1.515, "step": 3533 }, { "epoch": 0.38, "grad_norm": 0.07309355595657928, "learning_rate": 0.0007119313442879028, "loss": 1.5657, "step": 3534 }, { "epoch": 0.38, "grad_norm": 0.07203044835573408, "learning_rate": 0.0007117736378270885, "loss": 1.3643, "step": 3535 }, { "epoch": 0.38, "grad_norm": 0.07921231575470476, "learning_rate": 0.0007116159056879904, "loss": 1.3966, "step": 3536 }, { "epoch": 0.38, "grad_norm": 0.06516803459785417, "learning_rate": 0.0007114581478897342, "loss": 1.3938, "step": 3537 }, { "epoch": 0.38, "grad_norm": 0.07084548870515166, "learning_rate": 0.0007113003644514485, "loss": 1.4167, "step": 3538 }, { "epoch": 0.38, "grad_norm": 0.0754906872344934, "learning_rate": 0.0007111425553922653, "loss": 1.4765, "step": 3539 }, { "epoch": 0.38, "grad_norm": 0.07277782098342274, "learning_rate": 0.0007109847207313191, "loss": 1.4083, "step": 3540 }, { "epoch": 0.38, "grad_norm": 0.06913253674908561, "learning_rate": 0.0007108268604877483, "loss": 1.4447, "step": 3541 }, { "epoch": 0.38, "grad_norm": 0.06659941330711411, "learning_rate": 0.0007106689746806939, "loss": 1.4691, "step": 3542 }, { "epoch": 0.38, "grad_norm": 0.07232512460358181, "learning_rate": 0.0007105110633292999, "loss": 1.5318, "step": 3543 }, { "epoch": 0.38, "grad_norm": 0.06723843964590365, "learning_rate": 0.0007103531264527138, "loss": 1.4276, "step": 3544 }, { "epoch": 0.38, "grad_norm": 0.06852588979292723, "learning_rate": 0.000710195164070086, "loss": 1.3008, "step": 3545 }, { "epoch": 0.38, "grad_norm": 0.06904335986327417, "learning_rate": 0.0007100371762005697, "loss": 1.2505, "step": 3546 }, { "epoch": 0.38, "grad_norm": 0.07517395552253944, "learning_rate": 0.0007098791628633217, "loss": 1.5369, "step": 3547 }, { "epoch": 0.38, "grad_norm": 0.07166106998321647, "learning_rate": 0.0007097211240775018, "loss": 1.2608, "step": 3548 }, { "epoch": 0.38, "grad_norm": 0.06562667167612078, "learning_rate": 0.0007095630598622724, "loss": 1.4766, "step": 3549 }, { "epoch": 0.38, "grad_norm": 0.07500007910028895, "learning_rate": 0.0007094049702367997, "loss": 1.1845, "step": 3550 }, { "epoch": 0.38, "grad_norm": 0.07829807898182478, "learning_rate": 0.0007092468552202523, "loss": 1.2858, "step": 3551 }, { "epoch": 0.38, "grad_norm": 0.06925524966397749, "learning_rate": 0.0007090887148318023, "loss": 1.2332, "step": 3552 }, { "epoch": 0.38, "grad_norm": 0.08475475670099092, "learning_rate": 0.000708930549090625, "loss": 1.5682, "step": 3553 }, { "epoch": 0.38, "grad_norm": 0.07622082830610633, "learning_rate": 0.0007087723580158983, "loss": 1.3916, "step": 3554 }, { "epoch": 0.38, "grad_norm": 0.0766525857066531, "learning_rate": 0.0007086141416268033, "loss": 1.4084, "step": 3555 }, { "epoch": 0.38, "grad_norm": 0.08003974204009992, "learning_rate": 0.0007084558999425245, "loss": 1.4344, "step": 3556 }, { "epoch": 0.38, "grad_norm": 0.06920958308933454, "learning_rate": 0.0007082976329822491, "loss": 1.4378, "step": 3557 }, { "epoch": 0.38, "grad_norm": 0.07090901454865277, "learning_rate": 0.0007081393407651675, "loss": 1.4642, "step": 3558 }, { "epoch": 0.38, "grad_norm": 0.07078282523191834, "learning_rate": 0.0007079810233104734, "loss": 1.3724, "step": 3559 }, { "epoch": 0.38, "grad_norm": 0.06605378041470791, "learning_rate": 0.0007078226806373631, "loss": 1.4078, "step": 3560 }, { "epoch": 0.38, "grad_norm": 0.06343453006005209, "learning_rate": 0.0007076643127650366, "loss": 1.4076, "step": 3561 }, { "epoch": 0.38, "grad_norm": 0.07317786253087795, "learning_rate": 0.0007075059197126961, "loss": 1.5665, "step": 3562 }, { "epoch": 0.38, "grad_norm": 0.0654489556452131, "learning_rate": 0.0007073475014995472, "loss": 1.4653, "step": 3563 }, { "epoch": 0.38, "grad_norm": 0.07119377427290143, "learning_rate": 0.0007071890581447992, "loss": 1.4013, "step": 3564 }, { "epoch": 0.38, "grad_norm": 0.06589837136177674, "learning_rate": 0.0007070305896676634, "loss": 1.3597, "step": 3565 }, { "epoch": 0.38, "grad_norm": 0.07099054244663582, "learning_rate": 0.0007068720960873552, "loss": 1.4718, "step": 3566 }, { "epoch": 0.38, "grad_norm": 0.06627867168212025, "learning_rate": 0.0007067135774230919, "loss": 1.5182, "step": 3567 }, { "epoch": 0.38, "grad_norm": 0.06392067010017136, "learning_rate": 0.0007065550336940947, "loss": 1.5218, "step": 3568 }, { "epoch": 0.38, "grad_norm": 0.06383872794322432, "learning_rate": 0.0007063964649195874, "loss": 1.3602, "step": 3569 }, { "epoch": 0.38, "grad_norm": 0.07302909210052719, "learning_rate": 0.0007062378711187973, "loss": 1.4906, "step": 3570 }, { "epoch": 0.38, "grad_norm": 0.07202766521247028, "learning_rate": 0.0007060792523109544, "loss": 1.5076, "step": 3571 }, { "epoch": 0.38, "grad_norm": 0.06485625650547473, "learning_rate": 0.0007059206085152918, "loss": 1.3075, "step": 3572 }, { "epoch": 0.38, "grad_norm": 0.0715065717760381, "learning_rate": 0.0007057619397510453, "loss": 1.4296, "step": 3573 }, { "epoch": 0.38, "grad_norm": 0.08825733453760141, "learning_rate": 0.0007056032460374541, "loss": 1.4665, "step": 3574 }, { "epoch": 0.38, "grad_norm": 0.0701463361815818, "learning_rate": 0.0007054445273937609, "loss": 1.3857, "step": 3575 }, { "epoch": 0.38, "grad_norm": 0.07630369200437422, "learning_rate": 0.0007052857838392104, "loss": 1.4023, "step": 3576 }, { "epoch": 0.38, "grad_norm": 0.06817368849988802, "learning_rate": 0.0007051270153930506, "loss": 1.5429, "step": 3577 }, { "epoch": 0.38, "grad_norm": 0.07273743830308618, "learning_rate": 0.0007049682220745332, "loss": 1.5339, "step": 3578 }, { "epoch": 0.38, "grad_norm": 0.06237971196922428, "learning_rate": 0.0007048094039029122, "loss": 1.4023, "step": 3579 }, { "epoch": 0.38, "grad_norm": 0.07174054000231482, "learning_rate": 0.0007046505608974447, "loss": 1.4179, "step": 3580 }, { "epoch": 0.38, "grad_norm": 0.07079421703558056, "learning_rate": 0.0007044916930773915, "loss": 1.4336, "step": 3581 }, { "epoch": 0.39, "grad_norm": 0.06324002319659304, "learning_rate": 0.0007043328004620154, "loss": 1.4002, "step": 3582 }, { "epoch": 0.39, "grad_norm": 0.07273252464213828, "learning_rate": 0.0007041738830705827, "loss": 1.3021, "step": 3583 }, { "epoch": 0.39, "grad_norm": 0.0679012855779142, "learning_rate": 0.0007040149409223628, "loss": 1.5173, "step": 3584 }, { "epoch": 0.39, "grad_norm": 0.06706605104839712, "learning_rate": 0.0007038559740366281, "loss": 1.4245, "step": 3585 }, { "epoch": 0.39, "grad_norm": 0.07263886553942718, "learning_rate": 0.0007036969824326535, "loss": 1.5822, "step": 3586 }, { "epoch": 0.39, "grad_norm": 0.06272461509437303, "learning_rate": 0.0007035379661297179, "loss": 1.3431, "step": 3587 }, { "epoch": 0.39, "grad_norm": 0.07364340877937099, "learning_rate": 0.0007033789251471019, "loss": 1.3892, "step": 3588 }, { "epoch": 0.39, "grad_norm": 0.07060017034955898, "learning_rate": 0.0007032198595040901, "loss": 1.4748, "step": 3589 }, { "epoch": 0.39, "grad_norm": 0.06756244433398076, "learning_rate": 0.00070306076921997, "loss": 1.3551, "step": 3590 }, { "epoch": 0.39, "grad_norm": 0.07358338514614324, "learning_rate": 0.0007029016543140311, "loss": 1.407, "step": 3591 }, { "epoch": 0.39, "grad_norm": 0.09232691672724472, "learning_rate": 0.0007027425148055677, "loss": 1.4698, "step": 3592 }, { "epoch": 0.39, "grad_norm": 0.07306567061185729, "learning_rate": 0.000702583350713875, "loss": 1.3349, "step": 3593 }, { "epoch": 0.39, "grad_norm": 0.07152049501825936, "learning_rate": 0.0007024241620582527, "loss": 1.5199, "step": 3594 }, { "epoch": 0.39, "grad_norm": 0.07471021321320678, "learning_rate": 0.0007022649488580029, "loss": 1.4685, "step": 3595 }, { "epoch": 0.39, "grad_norm": 0.07674020487606688, "learning_rate": 0.0007021057111324307, "loss": 1.3409, "step": 3596 }, { "epoch": 0.39, "grad_norm": 0.08010512353343058, "learning_rate": 0.0007019464489008443, "loss": 1.4164, "step": 3597 }, { "epoch": 0.39, "grad_norm": 0.07294899590648436, "learning_rate": 0.0007017871621825549, "loss": 1.4573, "step": 3598 }, { "epoch": 0.39, "grad_norm": 0.0714024285774514, "learning_rate": 0.0007016278509968761, "loss": 1.3979, "step": 3599 }, { "epoch": 0.39, "grad_norm": 0.0715807774435257, "learning_rate": 0.0007014685153631255, "loss": 1.3887, "step": 3600 }, { "epoch": 0.39, "grad_norm": 0.07500788006512786, "learning_rate": 0.0007013091553006227, "loss": 1.4591, "step": 3601 }, { "epoch": 0.39, "grad_norm": 0.06521259328969803, "learning_rate": 0.0007011497708286909, "loss": 1.4541, "step": 3602 }, { "epoch": 0.39, "grad_norm": 0.0741904167001245, "learning_rate": 0.000700990361966656, "loss": 1.4057, "step": 3603 }, { "epoch": 0.39, "grad_norm": 0.06183999889715281, "learning_rate": 0.0007008309287338467, "loss": 1.3977, "step": 3604 }, { "epoch": 0.39, "grad_norm": 0.06681395641341766, "learning_rate": 0.0007006714711495949, "loss": 1.5082, "step": 3605 }, { "epoch": 0.39, "grad_norm": 0.08653655603945522, "learning_rate": 0.0007005119892332354, "loss": 1.4142, "step": 3606 }, { "epoch": 0.39, "grad_norm": 0.07028847770806987, "learning_rate": 0.0007003524830041059, "loss": 1.4125, "step": 3607 }, { "epoch": 0.39, "grad_norm": 0.08717202095751826, "learning_rate": 0.0007001929524815472, "loss": 1.4099, "step": 3608 }, { "epoch": 0.39, "grad_norm": 0.07358796039969515, "learning_rate": 0.0007000333976849028, "loss": 1.4611, "step": 3609 }, { "epoch": 0.39, "grad_norm": 0.07321829406105443, "learning_rate": 0.0006998738186335193, "loss": 1.2502, "step": 3610 }, { "epoch": 0.39, "grad_norm": 0.07212265150735724, "learning_rate": 0.0006997142153467461, "loss": 1.5196, "step": 3611 }, { "epoch": 0.39, "grad_norm": 0.06711496628227426, "learning_rate": 0.000699554587843936, "loss": 1.4971, "step": 3612 }, { "epoch": 0.39, "grad_norm": 0.0709609735167094, "learning_rate": 0.0006993949361444441, "loss": 1.3627, "step": 3613 }, { "epoch": 0.39, "grad_norm": 0.06737833681998814, "learning_rate": 0.0006992352602676287, "loss": 1.4435, "step": 3614 }, { "epoch": 0.39, "grad_norm": 0.08318606479339613, "learning_rate": 0.0006990755602328512, "loss": 1.3223, "step": 3615 }, { "epoch": 0.39, "grad_norm": 0.07678376698123786, "learning_rate": 0.0006989158360594756, "loss": 1.2798, "step": 3616 }, { "epoch": 0.39, "grad_norm": 0.07405997377374739, "learning_rate": 0.0006987560877668692, "loss": 1.4287, "step": 3617 }, { "epoch": 0.39, "grad_norm": 0.0764351613615117, "learning_rate": 0.0006985963153744019, "loss": 1.5333, "step": 3618 }, { "epoch": 0.39, "grad_norm": 0.08385446808915825, "learning_rate": 0.0006984365189014467, "loss": 1.4834, "step": 3619 }, { "epoch": 0.39, "grad_norm": 0.06263563035341106, "learning_rate": 0.0006982766983673795, "loss": 1.3152, "step": 3620 }, { "epoch": 0.39, "grad_norm": 0.08852794236399607, "learning_rate": 0.000698116853791579, "loss": 1.4673, "step": 3621 }, { "epoch": 0.39, "grad_norm": 0.06941771637584818, "learning_rate": 0.000697956985193427, "loss": 1.4864, "step": 3622 }, { "epoch": 0.39, "grad_norm": 0.08562998996198683, "learning_rate": 0.0006977970925923081, "loss": 1.4882, "step": 3623 }, { "epoch": 0.39, "grad_norm": 0.07531704894846612, "learning_rate": 0.0006976371760076099, "loss": 1.4987, "step": 3624 }, { "epoch": 0.39, "grad_norm": 0.0801714652638698, "learning_rate": 0.0006974772354587226, "loss": 1.4313, "step": 3625 }, { "epoch": 0.39, "grad_norm": 0.08020508619169321, "learning_rate": 0.0006973172709650397, "loss": 1.4755, "step": 3626 }, { "epoch": 0.39, "grad_norm": 0.07296454416739705, "learning_rate": 0.0006971572825459576, "loss": 1.4356, "step": 3627 }, { "epoch": 0.39, "grad_norm": 0.06951882359819238, "learning_rate": 0.000696997270220875, "loss": 1.4227, "step": 3628 }, { "epoch": 0.39, "grad_norm": 0.07644784282849344, "learning_rate": 0.0006968372340091946, "loss": 1.4035, "step": 3629 }, { "epoch": 0.39, "grad_norm": 0.07155938125174738, "learning_rate": 0.0006966771739303206, "loss": 1.4814, "step": 3630 }, { "epoch": 0.39, "grad_norm": 0.06280959644830418, "learning_rate": 0.0006965170900036613, "loss": 1.432, "step": 3631 }, { "epoch": 0.39, "grad_norm": 0.06420053763727787, "learning_rate": 0.0006963569822486276, "loss": 1.3966, "step": 3632 }, { "epoch": 0.39, "grad_norm": 0.06407629379194352, "learning_rate": 0.0006961968506846327, "loss": 1.4223, "step": 3633 }, { "epoch": 0.39, "grad_norm": 0.07341699164847276, "learning_rate": 0.0006960366953310931, "loss": 1.6418, "step": 3634 }, { "epoch": 0.39, "grad_norm": 0.07158103196668297, "learning_rate": 0.0006958765162074287, "loss": 1.3857, "step": 3635 }, { "epoch": 0.39, "grad_norm": 0.06656688307492928, "learning_rate": 0.0006957163133330611, "loss": 1.5035, "step": 3636 }, { "epoch": 0.39, "grad_norm": 0.07009898988268752, "learning_rate": 0.0006955560867274159, "loss": 1.4776, "step": 3637 }, { "epoch": 0.39, "grad_norm": 0.06893175987469181, "learning_rate": 0.0006953958364099208, "loss": 1.5117, "step": 3638 }, { "epoch": 0.39, "grad_norm": 0.07194928575269588, "learning_rate": 0.0006952355624000072, "loss": 1.4054, "step": 3639 }, { "epoch": 0.39, "grad_norm": 0.07005599291594371, "learning_rate": 0.0006950752647171086, "loss": 1.4454, "step": 3640 }, { "epoch": 0.39, "grad_norm": 0.06485794286202459, "learning_rate": 0.0006949149433806614, "loss": 1.2388, "step": 3641 }, { "epoch": 0.39, "grad_norm": 0.07321685193872544, "learning_rate": 0.0006947545984101053, "loss": 1.5212, "step": 3642 }, { "epoch": 0.39, "grad_norm": 0.06766799514535249, "learning_rate": 0.000694594229824883, "loss": 1.4703, "step": 3643 }, { "epoch": 0.39, "grad_norm": 0.07674140785209206, "learning_rate": 0.0006944338376444393, "loss": 1.4143, "step": 3644 }, { "epoch": 0.39, "grad_norm": 0.07113361066488795, "learning_rate": 0.0006942734218882225, "loss": 1.5061, "step": 3645 }, { "epoch": 0.39, "grad_norm": 0.07408116710866873, "learning_rate": 0.0006941129825756836, "loss": 1.3525, "step": 3646 }, { "epoch": 0.39, "grad_norm": 0.06834433253859368, "learning_rate": 0.0006939525197262762, "loss": 1.3372, "step": 3647 }, { "epoch": 0.39, "grad_norm": 0.07240980576151891, "learning_rate": 0.0006937920333594572, "loss": 1.3757, "step": 3648 }, { "epoch": 0.39, "grad_norm": 0.07630489881087277, "learning_rate": 0.0006936315234946861, "loss": 1.4788, "step": 3649 }, { "epoch": 0.39, "grad_norm": 0.07956854851302582, "learning_rate": 0.0006934709901514251, "loss": 1.4576, "step": 3650 }, { "epoch": 0.39, "grad_norm": 0.0670356740719808, "learning_rate": 0.0006933104333491398, "loss": 1.5165, "step": 3651 }, { "epoch": 0.39, "grad_norm": 0.07588722767607452, "learning_rate": 0.0006931498531072977, "loss": 1.3835, "step": 3652 }, { "epoch": 0.39, "grad_norm": 0.09334811732319619, "learning_rate": 0.0006929892494453703, "loss": 1.4464, "step": 3653 }, { "epoch": 0.39, "grad_norm": 0.061988691124392446, "learning_rate": 0.0006928286223828309, "loss": 1.3886, "step": 3654 }, { "epoch": 0.39, "grad_norm": 0.07921738004769585, "learning_rate": 0.0006926679719391562, "loss": 1.4586, "step": 3655 }, { "epoch": 0.39, "grad_norm": 0.08326077624240993, "learning_rate": 0.0006925072981338259, "loss": 1.3614, "step": 3656 }, { "epoch": 0.39, "grad_norm": 0.08352543499375753, "learning_rate": 0.0006923466009863218, "loss": 1.4544, "step": 3657 }, { "epoch": 0.39, "grad_norm": 0.06887853383184417, "learning_rate": 0.0006921858805161294, "loss": 1.3777, "step": 3658 }, { "epoch": 0.39, "grad_norm": 0.06923628331383389, "learning_rate": 0.0006920251367427361, "loss": 1.5486, "step": 3659 }, { "epoch": 0.39, "grad_norm": 0.06519640964192866, "learning_rate": 0.0006918643696856333, "loss": 1.4187, "step": 3660 }, { "epoch": 0.39, "grad_norm": 0.07237657442273564, "learning_rate": 0.0006917035793643141, "loss": 1.5045, "step": 3661 }, { "epoch": 0.39, "grad_norm": 0.0693432290313142, "learning_rate": 0.0006915427657982751, "loss": 1.3163, "step": 3662 }, { "epoch": 0.39, "grad_norm": 0.06416290954447852, "learning_rate": 0.0006913819290070153, "loss": 1.4348, "step": 3663 }, { "epoch": 0.39, "grad_norm": 0.07147056356269492, "learning_rate": 0.0006912210690100369, "loss": 1.4821, "step": 3664 }, { "epoch": 0.39, "grad_norm": 0.07969102834155034, "learning_rate": 0.0006910601858268444, "loss": 1.453, "step": 3665 }, { "epoch": 0.39, "grad_norm": 0.13718934471197883, "learning_rate": 0.000690899279476946, "loss": 1.4078, "step": 3666 }, { "epoch": 0.39, "grad_norm": 0.08114721434640836, "learning_rate": 0.0006907383499798516, "loss": 1.4907, "step": 3667 }, { "epoch": 0.39, "grad_norm": 0.07115178902771321, "learning_rate": 0.0006905773973550748, "loss": 1.4556, "step": 3668 }, { "epoch": 0.39, "grad_norm": 0.07942590621430361, "learning_rate": 0.0006904164216221314, "loss": 1.4596, "step": 3669 }, { "epoch": 0.39, "grad_norm": 0.07289576701035694, "learning_rate": 0.0006902554228005405, "loss": 1.4612, "step": 3670 }, { "epoch": 0.39, "grad_norm": 0.06528821995816773, "learning_rate": 0.0006900944009098238, "loss": 1.3585, "step": 3671 }, { "epoch": 0.39, "grad_norm": 0.07616160702579455, "learning_rate": 0.0006899333559695056, "loss": 1.3554, "step": 3672 }, { "epoch": 0.39, "grad_norm": 0.07769584578503212, "learning_rate": 0.0006897722879991131, "loss": 1.4175, "step": 3673 }, { "epoch": 0.39, "grad_norm": 0.07083227509463093, "learning_rate": 0.0006896111970181764, "loss": 1.4129, "step": 3674 }, { "epoch": 0.4, "grad_norm": 0.06563564523573756, "learning_rate": 0.0006894500830462285, "loss": 1.3298, "step": 3675 }, { "epoch": 0.4, "grad_norm": 0.0704221954251129, "learning_rate": 0.0006892889461028047, "loss": 1.4314, "step": 3676 }, { "epoch": 0.4, "grad_norm": 0.07517734381017364, "learning_rate": 0.0006891277862074439, "loss": 1.3131, "step": 3677 }, { "epoch": 0.4, "grad_norm": 0.07709956431855618, "learning_rate": 0.0006889666033796869, "loss": 1.4983, "step": 3678 }, { "epoch": 0.4, "grad_norm": 0.06641564320298735, "learning_rate": 0.0006888053976390776, "loss": 1.4659, "step": 3679 }, { "epoch": 0.4, "grad_norm": 0.07326313513970828, "learning_rate": 0.000688644169005163, "loss": 1.4576, "step": 3680 }, { "epoch": 0.4, "grad_norm": 0.06526229926022548, "learning_rate": 0.0006884829174974927, "loss": 1.5378, "step": 3681 }, { "epoch": 0.4, "grad_norm": 0.06510469605310103, "learning_rate": 0.0006883216431356187, "loss": 1.2951, "step": 3682 }, { "epoch": 0.4, "grad_norm": 0.06975087735626676, "learning_rate": 0.0006881603459390964, "loss": 1.535, "step": 3683 }, { "epoch": 0.4, "grad_norm": 0.07054312371923935, "learning_rate": 0.0006879990259274832, "loss": 1.6018, "step": 3684 }, { "epoch": 0.4, "grad_norm": 0.06413316601711448, "learning_rate": 0.0006878376831203401, "loss": 1.4806, "step": 3685 }, { "epoch": 0.4, "grad_norm": 0.06207448118614186, "learning_rate": 0.0006876763175372305, "loss": 1.4473, "step": 3686 }, { "epoch": 0.4, "grad_norm": 0.07947569981904305, "learning_rate": 0.00068751492919772, "loss": 1.2827, "step": 3687 }, { "epoch": 0.4, "grad_norm": 0.0667703783383479, "learning_rate": 0.0006873535181213784, "loss": 1.4303, "step": 3688 }, { "epoch": 0.4, "grad_norm": 0.06814443823594021, "learning_rate": 0.0006871920843277764, "loss": 1.3534, "step": 3689 }, { "epoch": 0.4, "grad_norm": 0.06858806164536586, "learning_rate": 0.000687030627836489, "loss": 1.4704, "step": 3690 }, { "epoch": 0.4, "grad_norm": 0.07516155321113553, "learning_rate": 0.0006868691486670932, "loss": 1.4027, "step": 3691 }, { "epoch": 0.4, "grad_norm": 0.06523038718461018, "learning_rate": 0.0006867076468391688, "loss": 1.4835, "step": 3692 }, { "epoch": 0.4, "grad_norm": 0.0641694726958727, "learning_rate": 0.0006865461223722986, "loss": 1.4408, "step": 3693 }, { "epoch": 0.4, "grad_norm": 0.07188965078622174, "learning_rate": 0.0006863845752860679, "loss": 1.509, "step": 3694 }, { "epoch": 0.4, "grad_norm": 0.06917751289317574, "learning_rate": 0.0006862230056000648, "loss": 1.4961, "step": 3695 }, { "epoch": 0.4, "grad_norm": 0.06763332815454019, "learning_rate": 0.0006860614133338804, "loss": 1.3402, "step": 3696 }, { "epoch": 0.4, "grad_norm": 0.06762636955642931, "learning_rate": 0.0006858997985071081, "loss": 1.5605, "step": 3697 }, { "epoch": 0.4, "grad_norm": 0.06856232217515264, "learning_rate": 0.0006857381611393445, "loss": 1.4544, "step": 3698 }, { "epoch": 0.4, "grad_norm": 0.06275521544299778, "learning_rate": 0.0006855765012501883, "loss": 1.4577, "step": 3699 }, { "epoch": 0.4, "grad_norm": 0.06899956083233556, "learning_rate": 0.0006854148188592418, "loss": 1.4359, "step": 3700 }, { "epoch": 0.4, "grad_norm": 0.06995023762873773, "learning_rate": 0.000685253113986109, "loss": 1.4413, "step": 3701 }, { "epoch": 0.4, "grad_norm": 0.06988588357833313, "learning_rate": 0.0006850913866503977, "loss": 1.3826, "step": 3702 }, { "epoch": 0.4, "grad_norm": 0.06786538775904234, "learning_rate": 0.0006849296368717176, "loss": 1.4923, "step": 3703 }, { "epoch": 0.4, "grad_norm": 0.07654915353921299, "learning_rate": 0.0006847678646696813, "loss": 1.3739, "step": 3704 }, { "epoch": 0.4, "grad_norm": 0.0627728148075214, "learning_rate": 0.0006846060700639046, "loss": 1.3529, "step": 3705 }, { "epoch": 0.4, "grad_norm": 0.06829333606625117, "learning_rate": 0.0006844442530740055, "loss": 1.5439, "step": 3706 }, { "epoch": 0.4, "grad_norm": 0.06632683279100844, "learning_rate": 0.0006842824137196046, "loss": 1.3775, "step": 3707 }, { "epoch": 0.4, "grad_norm": 0.0709408213687562, "learning_rate": 0.000684120552020326, "loss": 1.2745, "step": 3708 }, { "epoch": 0.4, "grad_norm": 0.07214752580069766, "learning_rate": 0.0006839586679957956, "loss": 1.5218, "step": 3709 }, { "epoch": 0.4, "grad_norm": 0.07159420982548895, "learning_rate": 0.0006837967616656425, "loss": 1.4378, "step": 3710 }, { "epoch": 0.4, "grad_norm": 0.07620806323467304, "learning_rate": 0.0006836348330494984, "loss": 1.4767, "step": 3711 }, { "epoch": 0.4, "grad_norm": 0.0712872441268927, "learning_rate": 0.0006834728821669977, "loss": 1.3746, "step": 3712 }, { "epoch": 0.4, "grad_norm": 0.06739384795782043, "learning_rate": 0.0006833109090377775, "loss": 1.3444, "step": 3713 }, { "epoch": 0.4, "grad_norm": 0.07259481271825156, "learning_rate": 0.0006831489136814777, "loss": 1.4627, "step": 3714 }, { "epoch": 0.4, "grad_norm": 0.08300629502199651, "learning_rate": 0.0006829868961177406, "loss": 1.3774, "step": 3715 }, { "epoch": 0.4, "grad_norm": 0.06620793761290668, "learning_rate": 0.0006828248563662116, "loss": 1.441, "step": 3716 }, { "epoch": 0.4, "grad_norm": 0.06753191484298786, "learning_rate": 0.0006826627944465383, "loss": 1.2897, "step": 3717 }, { "epoch": 0.4, "grad_norm": 0.06968868045330964, "learning_rate": 0.0006825007103783716, "loss": 1.3708, "step": 3718 }, { "epoch": 0.4, "grad_norm": 0.0664248081179177, "learning_rate": 0.0006823386041813647, "loss": 1.3146, "step": 3719 }, { "epoch": 0.4, "grad_norm": 0.06767190375786564, "learning_rate": 0.0006821764758751732, "loss": 1.404, "step": 3720 }, { "epoch": 0.4, "grad_norm": 0.07605015217317455, "learning_rate": 0.0006820143254794559, "loss": 1.5113, "step": 3721 }, { "epoch": 0.4, "grad_norm": 0.06485099522202713, "learning_rate": 0.0006818521530138743, "loss": 1.3003, "step": 3722 }, { "epoch": 0.4, "grad_norm": 0.07531942615179421, "learning_rate": 0.0006816899584980922, "loss": 1.4695, "step": 3723 }, { "epoch": 0.4, "grad_norm": 0.09489560161012517, "learning_rate": 0.000681527741951776, "loss": 1.3409, "step": 3724 }, { "epoch": 0.4, "grad_norm": 0.061931818640721746, "learning_rate": 0.0006813655033945956, "loss": 1.4266, "step": 3725 }, { "epoch": 0.4, "grad_norm": 0.08377945929406046, "learning_rate": 0.0006812032428462225, "loss": 1.4634, "step": 3726 }, { "epoch": 0.4, "grad_norm": 0.08635568739760313, "learning_rate": 0.0006810409603263314, "loss": 1.5708, "step": 3727 }, { "epoch": 0.4, "grad_norm": 0.07301256105687044, "learning_rate": 0.0006808786558546, "loss": 1.4444, "step": 3728 }, { "epoch": 0.4, "grad_norm": 0.07858240960791367, "learning_rate": 0.0006807163294507078, "loss": 1.4104, "step": 3729 }, { "epoch": 0.4, "grad_norm": 0.069090300634783, "learning_rate": 0.0006805539811343376, "loss": 1.3712, "step": 3730 }, { "epoch": 0.4, "grad_norm": 0.07759023420372646, "learning_rate": 0.0006803916109251748, "loss": 1.3338, "step": 3731 }, { "epoch": 0.4, "grad_norm": 0.07414115909337814, "learning_rate": 0.0006802292188429072, "loss": 1.5009, "step": 3732 }, { "epoch": 0.4, "grad_norm": 0.0818109136073175, "learning_rate": 0.0006800668049072256, "loss": 1.3017, "step": 3733 }, { "epoch": 0.4, "grad_norm": 0.07277609899484332, "learning_rate": 0.000679904369137823, "loss": 1.4741, "step": 3734 }, { "epoch": 0.4, "grad_norm": 0.06986648440783053, "learning_rate": 0.0006797419115543954, "loss": 1.3859, "step": 3735 }, { "epoch": 0.4, "grad_norm": 0.08023992915741134, "learning_rate": 0.0006795794321766415, "loss": 1.5204, "step": 3736 }, { "epoch": 0.4, "grad_norm": 0.07276280799914031, "learning_rate": 0.0006794169310242624, "loss": 1.4427, "step": 3737 }, { "epoch": 0.4, "grad_norm": 0.07170170736813007, "learning_rate": 0.0006792544081169616, "loss": 1.4303, "step": 3738 }, { "epoch": 0.4, "grad_norm": 0.06361279357416362, "learning_rate": 0.000679091863474446, "loss": 1.5411, "step": 3739 }, { "epoch": 0.4, "grad_norm": 0.07108682039307701, "learning_rate": 0.0006789292971164244, "loss": 1.2778, "step": 3740 }, { "epoch": 0.4, "grad_norm": 0.07580525556322194, "learning_rate": 0.0006787667090626089, "loss": 1.3907, "step": 3741 }, { "epoch": 0.4, "grad_norm": 0.06995561131507061, "learning_rate": 0.0006786040993327135, "loss": 1.4253, "step": 3742 }, { "epoch": 0.4, "grad_norm": 0.07492744663626666, "learning_rate": 0.0006784414679464552, "loss": 1.4251, "step": 3743 }, { "epoch": 0.4, "grad_norm": 0.06882834924705727, "learning_rate": 0.0006782788149235538, "loss": 1.4079, "step": 3744 }, { "epoch": 0.4, "grad_norm": 0.07609185882270524, "learning_rate": 0.0006781161402837316, "loss": 1.5241, "step": 3745 }, { "epoch": 0.4, "grad_norm": 0.0919088950874994, "learning_rate": 0.0006779534440467133, "loss": 1.4305, "step": 3746 }, { "epoch": 0.4, "grad_norm": 0.07213172396040231, "learning_rate": 0.0006777907262322262, "loss": 1.3415, "step": 3747 }, { "epoch": 0.4, "grad_norm": 0.06770604290970164, "learning_rate": 0.0006776279868600008, "loss": 1.4458, "step": 3748 }, { "epoch": 0.4, "grad_norm": 0.07331521526131282, "learning_rate": 0.0006774652259497696, "loss": 1.4466, "step": 3749 }, { "epoch": 0.4, "grad_norm": 0.06631965109609582, "learning_rate": 0.0006773024435212678, "loss": 1.45, "step": 3750 }, { "epoch": 0.4, "grad_norm": 0.06771060610435131, "learning_rate": 0.0006771396395942338, "loss": 1.3031, "step": 3751 }, { "epoch": 0.4, "grad_norm": 0.0725160661843529, "learning_rate": 0.0006769768141884074, "loss": 1.5257, "step": 3752 }, { "epoch": 0.4, "grad_norm": 0.0793938270762842, "learning_rate": 0.0006768139673235323, "loss": 1.5339, "step": 3753 }, { "epoch": 0.4, "grad_norm": 0.07151774981622631, "learning_rate": 0.0006766510990193541, "loss": 1.3757, "step": 3754 }, { "epoch": 0.4, "grad_norm": 0.06547987137883667, "learning_rate": 0.0006764882092956211, "loss": 1.3952, "step": 3755 }, { "epoch": 0.4, "grad_norm": 0.07314158842433856, "learning_rate": 0.0006763252981720844, "loss": 1.5099, "step": 3756 }, { "epoch": 0.4, "grad_norm": 0.07145510078013781, "learning_rate": 0.0006761623656684973, "loss": 1.3377, "step": 3757 }, { "epoch": 0.4, "grad_norm": 0.08644202316815978, "learning_rate": 0.0006759994118046161, "loss": 1.414, "step": 3758 }, { "epoch": 0.4, "grad_norm": 0.07282147693961499, "learning_rate": 0.0006758364366001994, "loss": 1.4244, "step": 3759 }, { "epoch": 0.4, "grad_norm": 0.07554452717292655, "learning_rate": 0.0006756734400750087, "loss": 1.3676, "step": 3760 }, { "epoch": 0.4, "grad_norm": 0.0666807039375581, "learning_rate": 0.0006755104222488076, "loss": 1.4265, "step": 3761 }, { "epoch": 0.4, "grad_norm": 0.09012510595806483, "learning_rate": 0.0006753473831413628, "loss": 1.3391, "step": 3762 }, { "epoch": 0.4, "grad_norm": 0.07835217913269821, "learning_rate": 0.0006751843227724432, "loss": 1.5525, "step": 3763 }, { "epoch": 0.4, "grad_norm": 0.06987950663692771, "learning_rate": 0.0006750212411618206, "loss": 1.3493, "step": 3764 }, { "epoch": 0.4, "grad_norm": 0.06996075079577234, "learning_rate": 0.000674858138329269, "loss": 1.457, "step": 3765 }, { "epoch": 0.4, "grad_norm": 0.07173072264270693, "learning_rate": 0.0006746950142945653, "loss": 1.326, "step": 3766 }, { "epoch": 0.4, "grad_norm": 0.06810115938826602, "learning_rate": 0.0006745318690774891, "loss": 1.2359, "step": 3767 }, { "epoch": 0.41, "grad_norm": 0.07757166620232256, "learning_rate": 0.0006743687026978219, "loss": 1.4865, "step": 3768 }, { "epoch": 0.41, "grad_norm": 0.08521148854272749, "learning_rate": 0.0006742055151753483, "loss": 1.5518, "step": 3769 }, { "epoch": 0.41, "grad_norm": 0.07341229077115634, "learning_rate": 0.0006740423065298556, "loss": 1.5728, "step": 3770 }, { "epoch": 0.41, "grad_norm": 0.07251757762711362, "learning_rate": 0.0006738790767811329, "loss": 1.4438, "step": 3771 }, { "epoch": 0.41, "grad_norm": 0.0754765370031017, "learning_rate": 0.0006737158259489729, "loss": 1.4546, "step": 3772 }, { "epoch": 0.41, "grad_norm": 0.06987041998936272, "learning_rate": 0.0006735525540531702, "loss": 1.4971, "step": 3773 }, { "epoch": 0.41, "grad_norm": 0.07846064142843433, "learning_rate": 0.0006733892611135217, "loss": 1.3602, "step": 3774 }, { "epoch": 0.41, "grad_norm": 0.08199997856494941, "learning_rate": 0.0006732259471498278, "loss": 1.5195, "step": 3775 }, { "epoch": 0.41, "grad_norm": 0.06705852406335323, "learning_rate": 0.0006730626121818906, "loss": 1.3127, "step": 3776 }, { "epoch": 0.41, "grad_norm": 0.07146928164080467, "learning_rate": 0.0006728992562295148, "loss": 1.4246, "step": 3777 }, { "epoch": 0.41, "grad_norm": 0.07467077035958926, "learning_rate": 0.0006727358793125084, "loss": 1.3603, "step": 3778 }, { "epoch": 0.41, "grad_norm": 0.06747803921749279, "learning_rate": 0.0006725724814506809, "loss": 1.401, "step": 3779 }, { "epoch": 0.41, "grad_norm": 0.07455585726671034, "learning_rate": 0.0006724090626638451, "loss": 1.3864, "step": 3780 }, { "epoch": 0.41, "grad_norm": 0.06935011523991552, "learning_rate": 0.0006722456229718162, "loss": 1.3373, "step": 3781 }, { "epoch": 0.41, "grad_norm": 0.07001330043357815, "learning_rate": 0.0006720821623944117, "loss": 1.3813, "step": 3782 }, { "epoch": 0.41, "grad_norm": 0.07006791244257965, "learning_rate": 0.0006719186809514516, "loss": 1.4948, "step": 3783 }, { "epoch": 0.41, "grad_norm": 0.08041409052706847, "learning_rate": 0.000671755178662759, "loss": 1.4941, "step": 3784 }, { "epoch": 0.41, "grad_norm": 0.07903064459799436, "learning_rate": 0.0006715916555481585, "loss": 1.3258, "step": 3785 }, { "epoch": 0.41, "grad_norm": 0.07790220948556671, "learning_rate": 0.0006714281116274783, "loss": 1.3577, "step": 3786 }, { "epoch": 0.41, "grad_norm": 0.0683831054506245, "learning_rate": 0.0006712645469205488, "loss": 1.3955, "step": 3787 }, { "epoch": 0.41, "grad_norm": 0.07375602721280128, "learning_rate": 0.0006711009614472022, "loss": 1.344, "step": 3788 }, { "epoch": 0.41, "grad_norm": 0.0753083239073712, "learning_rate": 0.0006709373552272744, "loss": 1.3137, "step": 3789 }, { "epoch": 0.41, "grad_norm": 0.08058980255270123, "learning_rate": 0.0006707737282806029, "loss": 1.4757, "step": 3790 }, { "epoch": 0.41, "grad_norm": 0.07188458605519603, "learning_rate": 0.000670610080627028, "loss": 1.4421, "step": 3791 }, { "epoch": 0.41, "grad_norm": 0.0745797053981257, "learning_rate": 0.0006704464122863928, "loss": 1.385, "step": 3792 }, { "epoch": 0.41, "grad_norm": 0.0809561549922701, "learning_rate": 0.0006702827232785425, "loss": 1.5045, "step": 3793 }, { "epoch": 0.41, "grad_norm": 0.07043268111949744, "learning_rate": 0.0006701190136233249, "loss": 1.4208, "step": 3794 }, { "epoch": 0.41, "grad_norm": 0.06464522265885646, "learning_rate": 0.0006699552833405903, "loss": 1.4705, "step": 3795 }, { "epoch": 0.41, "grad_norm": 0.06898157473720405, "learning_rate": 0.0006697915324501918, "loss": 1.3685, "step": 3796 }, { "epoch": 0.41, "grad_norm": 0.06720721473990293, "learning_rate": 0.0006696277609719845, "loss": 1.2577, "step": 3797 }, { "epoch": 0.41, "grad_norm": 0.069477039304262, "learning_rate": 0.0006694639689258265, "loss": 1.395, "step": 3798 }, { "epoch": 0.41, "grad_norm": 0.067690922335574, "learning_rate": 0.0006693001563315782, "loss": 1.4064, "step": 3799 }, { "epoch": 0.41, "grad_norm": 0.07169248794058057, "learning_rate": 0.0006691363232091019, "loss": 1.483, "step": 3800 }, { "epoch": 0.41, "grad_norm": 0.07878728528871834, "learning_rate": 0.0006689724695782635, "loss": 1.5265, "step": 3801 }, { "epoch": 0.41, "grad_norm": 0.07027898095463854, "learning_rate": 0.0006688085954589307, "loss": 1.4565, "step": 3802 }, { "epoch": 0.41, "grad_norm": 0.0671644222122429, "learning_rate": 0.0006686447008709735, "loss": 1.3886, "step": 3803 }, { "epoch": 0.41, "grad_norm": 0.10803177504795355, "learning_rate": 0.0006684807858342652, "loss": 1.4621, "step": 3804 }, { "epoch": 0.41, "grad_norm": 0.06890699521351634, "learning_rate": 0.0006683168503686806, "loss": 1.3289, "step": 3805 }, { "epoch": 0.41, "grad_norm": 0.06272061534057344, "learning_rate": 0.0006681528944940977, "loss": 1.4191, "step": 3806 }, { "epoch": 0.41, "grad_norm": 0.06759035500761486, "learning_rate": 0.0006679889182303966, "loss": 1.4501, "step": 3807 }, { "epoch": 0.41, "grad_norm": 0.07934872841569579, "learning_rate": 0.00066782492159746, "loss": 1.3587, "step": 3808 }, { "epoch": 0.41, "grad_norm": 0.07583621080731881, "learning_rate": 0.0006676609046151732, "loss": 1.2725, "step": 3809 }, { "epoch": 0.41, "grad_norm": 0.08046256366810259, "learning_rate": 0.0006674968673034235, "loss": 1.3842, "step": 3810 }, { "epoch": 0.41, "grad_norm": 0.08949644141491607, "learning_rate": 0.0006673328096821012, "loss": 1.3413, "step": 3811 }, { "epoch": 0.41, "grad_norm": 0.07477384583849257, "learning_rate": 0.0006671687317710989, "loss": 1.4916, "step": 3812 }, { "epoch": 0.41, "grad_norm": 0.07549342598106959, "learning_rate": 0.0006670046335903116, "loss": 1.3989, "step": 3813 }, { "epoch": 0.41, "grad_norm": 0.07054448270490725, "learning_rate": 0.0006668405151596367, "loss": 1.3175, "step": 3814 }, { "epoch": 0.41, "grad_norm": 0.06726660936523654, "learning_rate": 0.0006666763764989742, "loss": 1.5434, "step": 3815 }, { "epoch": 0.41, "grad_norm": 0.07343583098757128, "learning_rate": 0.0006665122176282264, "loss": 1.4393, "step": 3816 }, { "epoch": 0.41, "grad_norm": 0.07954843590714228, "learning_rate": 0.000666348038567298, "loss": 1.4805, "step": 3817 }, { "epoch": 0.41, "grad_norm": 0.0749554275646494, "learning_rate": 0.0006661838393360966, "loss": 1.3784, "step": 3818 }, { "epoch": 0.41, "grad_norm": 0.07420331079114773, "learning_rate": 0.0006660196199545317, "loss": 1.3676, "step": 3819 }, { "epoch": 0.41, "grad_norm": 0.06857050240511989, "learning_rate": 0.0006658553804425156, "loss": 1.4668, "step": 3820 }, { "epoch": 0.41, "grad_norm": 0.07062870193427842, "learning_rate": 0.0006656911208199627, "loss": 1.4805, "step": 3821 }, { "epoch": 0.41, "grad_norm": 0.07851558591594957, "learning_rate": 0.0006655268411067903, "loss": 1.4744, "step": 3822 }, { "epoch": 0.41, "grad_norm": 0.07572325313412566, "learning_rate": 0.0006653625413229177, "loss": 1.4193, "step": 3823 }, { "epoch": 0.41, "grad_norm": 0.07822957044039873, "learning_rate": 0.000665198221488267, "loss": 1.4386, "step": 3824 }, { "epoch": 0.41, "grad_norm": 0.07604473764552287, "learning_rate": 0.0006650338816227622, "loss": 1.3252, "step": 3825 }, { "epoch": 0.41, "grad_norm": 0.07524178279695189, "learning_rate": 0.0006648695217463304, "loss": 1.3825, "step": 3826 }, { "epoch": 0.41, "grad_norm": 0.08116651197896221, "learning_rate": 0.0006647051418789007, "loss": 1.3457, "step": 3827 }, { "epoch": 0.41, "grad_norm": 0.07209272417387406, "learning_rate": 0.0006645407420404047, "loss": 1.3494, "step": 3828 }, { "epoch": 0.41, "grad_norm": 0.07602965698815438, "learning_rate": 0.0006643763222507765, "loss": 1.4471, "step": 3829 }, { "epoch": 0.41, "grad_norm": 0.06920958645347326, "learning_rate": 0.0006642118825299526, "loss": 1.3819, "step": 3830 }, { "epoch": 0.41, "grad_norm": 0.06868419959482276, "learning_rate": 0.0006640474228978716, "loss": 1.3569, "step": 3831 }, { "epoch": 0.41, "grad_norm": 0.07442565049284244, "learning_rate": 0.0006638829433744753, "loss": 1.4242, "step": 3832 }, { "epoch": 0.41, "grad_norm": 0.07469968744098102, "learning_rate": 0.0006637184439797069, "loss": 1.3941, "step": 3833 }, { "epoch": 0.41, "grad_norm": 0.06954189557918407, "learning_rate": 0.0006635539247335128, "loss": 1.3249, "step": 3834 }, { "epoch": 0.41, "grad_norm": 0.0788039300618776, "learning_rate": 0.0006633893856558415, "loss": 1.5244, "step": 3835 }, { "epoch": 0.41, "grad_norm": 0.07227186871954694, "learning_rate": 0.000663224826766644, "loss": 1.4414, "step": 3836 }, { "epoch": 0.41, "grad_norm": 0.07364965898774103, "learning_rate": 0.0006630602480858734, "loss": 1.4668, "step": 3837 }, { "epoch": 0.41, "grad_norm": 0.0710617632092743, "learning_rate": 0.0006628956496334856, "loss": 1.4955, "step": 3838 }, { "epoch": 0.41, "grad_norm": 0.07483232033855866, "learning_rate": 0.0006627310314294385, "loss": 1.4276, "step": 3839 }, { "epoch": 0.41, "grad_norm": 0.07028946813885235, "learning_rate": 0.0006625663934936932, "loss": 1.4192, "step": 3840 }, { "epoch": 0.41, "grad_norm": 0.06907195642106281, "learning_rate": 0.0006624017358462122, "loss": 1.509, "step": 3841 }, { "epoch": 0.41, "grad_norm": 0.08082576245434056, "learning_rate": 0.0006622370585069604, "loss": 1.4527, "step": 3842 }, { "epoch": 0.41, "grad_norm": 0.07246371293472104, "learning_rate": 0.0006620723614959063, "loss": 1.4888, "step": 3843 }, { "epoch": 0.41, "grad_norm": 0.07731335402711258, "learning_rate": 0.0006619076448330197, "loss": 1.3546, "step": 3844 }, { "epoch": 0.41, "grad_norm": 0.09375234814524461, "learning_rate": 0.0006617429085382727, "loss": 1.5487, "step": 3845 }, { "epoch": 0.41, "grad_norm": 0.08419642597319515, "learning_rate": 0.0006615781526316406, "loss": 1.3322, "step": 3846 }, { "epoch": 0.41, "grad_norm": 0.07497704397986803, "learning_rate": 0.0006614133771331006, "loss": 1.3227, "step": 3847 }, { "epoch": 0.41, "grad_norm": 0.06673807622226641, "learning_rate": 0.0006612485820626317, "loss": 1.4786, "step": 3848 }, { "epoch": 0.41, "grad_norm": 0.06545079125780927, "learning_rate": 0.0006610837674402167, "loss": 1.4818, "step": 3849 }, { "epoch": 0.41, "grad_norm": 0.07666246902718891, "learning_rate": 0.0006609189332858394, "loss": 1.4476, "step": 3850 }, { "epoch": 0.41, "grad_norm": 0.07178457217246913, "learning_rate": 0.0006607540796194866, "loss": 1.4296, "step": 3851 }, { "epoch": 0.41, "grad_norm": 0.07540630488822511, "learning_rate": 0.0006605892064611477, "loss": 1.3576, "step": 3852 }, { "epoch": 0.41, "grad_norm": 0.06905586130396371, "learning_rate": 0.0006604243138308137, "loss": 1.4058, "step": 3853 }, { "epoch": 0.41, "grad_norm": 0.0762169441747839, "learning_rate": 0.0006602594017484785, "loss": 1.4798, "step": 3854 }, { "epoch": 0.41, "grad_norm": 0.07945689113659538, "learning_rate": 0.0006600944702341385, "loss": 1.4323, "step": 3855 }, { "epoch": 0.41, "grad_norm": 0.06695641836147707, "learning_rate": 0.0006599295193077922, "loss": 1.514, "step": 3856 }, { "epoch": 0.41, "grad_norm": 0.06348489376560794, "learning_rate": 0.0006597645489894399, "loss": 1.3441, "step": 3857 }, { "epoch": 0.41, "grad_norm": 0.07358283615988383, "learning_rate": 0.0006595995592990855, "loss": 1.3343, "step": 3858 }, { "epoch": 0.41, "grad_norm": 0.06725547686839599, "learning_rate": 0.0006594345502567342, "loss": 1.4087, "step": 3859 }, { "epoch": 0.41, "grad_norm": 0.08059962050387262, "learning_rate": 0.0006592695218823942, "loss": 1.4242, "step": 3860 }, { "epoch": 0.42, "grad_norm": 0.07206294434314071, "learning_rate": 0.0006591044741960754, "loss": 1.3445, "step": 3861 }, { "epoch": 0.42, "grad_norm": 0.06549271529864518, "learning_rate": 0.0006589394072177907, "loss": 1.3822, "step": 3862 }, { "epoch": 0.42, "grad_norm": 0.07173780240938447, "learning_rate": 0.0006587743209675549, "loss": 1.5295, "step": 3863 }, { "epoch": 0.42, "grad_norm": 0.07004691337394642, "learning_rate": 0.0006586092154653854, "loss": 1.4876, "step": 3864 }, { "epoch": 0.42, "grad_norm": 0.07900190551261445, "learning_rate": 0.0006584440907313016, "loss": 1.4928, "step": 3865 }, { "epoch": 0.42, "grad_norm": 0.07316898820714804, "learning_rate": 0.0006582789467853256, "loss": 1.3457, "step": 3866 }, { "epoch": 0.42, "grad_norm": 0.06359569667254837, "learning_rate": 0.0006581137836474816, "loss": 1.3676, "step": 3867 }, { "epoch": 0.42, "grad_norm": 0.06782660467001123, "learning_rate": 0.0006579486013377963, "loss": 1.3953, "step": 3868 }, { "epoch": 0.42, "grad_norm": 0.06696194015947708, "learning_rate": 0.0006577833998762985, "loss": 1.4628, "step": 3869 }, { "epoch": 0.42, "grad_norm": 0.0720934944292986, "learning_rate": 0.0006576181792830193, "loss": 1.395, "step": 3870 }, { "epoch": 0.42, "grad_norm": 0.05904143097036177, "learning_rate": 0.0006574529395779928, "loss": 1.2883, "step": 3871 }, { "epoch": 0.42, "grad_norm": 0.07144924842031333, "learning_rate": 0.0006572876807812545, "loss": 1.3083, "step": 3872 }, { "epoch": 0.42, "grad_norm": 0.07096535038028348, "learning_rate": 0.0006571224029128425, "loss": 1.4658, "step": 3873 }, { "epoch": 0.42, "grad_norm": 0.06858119737322739, "learning_rate": 0.0006569571059927977, "loss": 1.397, "step": 3874 }, { "epoch": 0.42, "grad_norm": 0.07465588152502295, "learning_rate": 0.0006567917900411625, "loss": 1.4628, "step": 3875 }, { "epoch": 0.42, "grad_norm": 0.07150645537132062, "learning_rate": 0.0006566264550779821, "loss": 1.3837, "step": 3876 }, { "epoch": 0.42, "grad_norm": 0.06856592343026248, "learning_rate": 0.0006564611011233043, "loss": 1.3578, "step": 3877 }, { "epoch": 0.42, "grad_norm": 0.06183930235911227, "learning_rate": 0.0006562957281971785, "loss": 1.3457, "step": 3878 }, { "epoch": 0.42, "grad_norm": 0.06662531178283607, "learning_rate": 0.0006561303363196568, "loss": 1.357, "step": 3879 }, { "epoch": 0.42, "grad_norm": 0.07518583399000245, "learning_rate": 0.0006559649255107935, "loss": 1.4672, "step": 3880 }, { "epoch": 0.42, "grad_norm": 0.0693085282173378, "learning_rate": 0.0006557994957906455, "loss": 1.3528, "step": 3881 }, { "epoch": 0.42, "grad_norm": 0.0728523880755816, "learning_rate": 0.0006556340471792712, "loss": 1.4816, "step": 3882 }, { "epoch": 0.42, "grad_norm": 0.06637118795337808, "learning_rate": 0.0006554685796967323, "loss": 1.4812, "step": 3883 }, { "epoch": 0.42, "grad_norm": 0.06861984917136406, "learning_rate": 0.0006553030933630921, "loss": 1.3983, "step": 3884 }, { "epoch": 0.42, "grad_norm": 0.06529551777213016, "learning_rate": 0.0006551375881984165, "loss": 1.4422, "step": 3885 }, { "epoch": 0.42, "grad_norm": 0.06858208005798797, "learning_rate": 0.0006549720642227735, "loss": 1.4688, "step": 3886 }, { "epoch": 0.42, "grad_norm": 0.07143616441393273, "learning_rate": 0.0006548065214562332, "loss": 1.3626, "step": 3887 }, { "epoch": 0.42, "grad_norm": 0.07404672141993965, "learning_rate": 0.0006546409599188686, "loss": 1.347, "step": 3888 }, { "epoch": 0.42, "grad_norm": 0.06567975995952656, "learning_rate": 0.0006544753796307547, "loss": 1.3886, "step": 3889 }, { "epoch": 0.42, "grad_norm": 0.06827963964218622, "learning_rate": 0.0006543097806119682, "loss": 1.3641, "step": 3890 }, { "epoch": 0.42, "grad_norm": 0.06502890786809211, "learning_rate": 0.000654144162882589, "loss": 1.5318, "step": 3891 }, { "epoch": 0.42, "grad_norm": 0.09549961420573892, "learning_rate": 0.0006539785264626985, "loss": 1.5141, "step": 3892 }, { "epoch": 0.42, "grad_norm": 0.0696505156579795, "learning_rate": 0.0006538128713723808, "loss": 1.4501, "step": 3893 }, { "epoch": 0.42, "grad_norm": 0.07241761729384924, "learning_rate": 0.0006536471976317223, "loss": 1.4871, "step": 3894 }, { "epoch": 0.42, "grad_norm": 0.07391803138958859, "learning_rate": 0.0006534815052608114, "loss": 1.4029, "step": 3895 }, { "epoch": 0.42, "grad_norm": 0.06593299094089043, "learning_rate": 0.0006533157942797387, "loss": 1.5064, "step": 3896 }, { "epoch": 0.42, "grad_norm": 0.07575361796363428, "learning_rate": 0.0006531500647085974, "loss": 1.3223, "step": 3897 }, { "epoch": 0.42, "grad_norm": 0.07197208892800534, "learning_rate": 0.0006529843165674828, "loss": 1.3693, "step": 3898 }, { "epoch": 0.42, "grad_norm": 0.0776799924076783, "learning_rate": 0.0006528185498764924, "loss": 1.4277, "step": 3899 }, { "epoch": 0.42, "grad_norm": 0.07989342835825704, "learning_rate": 0.0006526527646557261, "loss": 1.4741, "step": 3900 }, { "epoch": 0.42, "grad_norm": 0.07733258169149501, "learning_rate": 0.0006524869609252856, "loss": 1.4633, "step": 3901 }, { "epoch": 0.42, "grad_norm": 0.07556221365420827, "learning_rate": 0.0006523211387052755, "loss": 1.4267, "step": 3902 }, { "epoch": 0.42, "grad_norm": 0.07166604005031749, "learning_rate": 0.0006521552980158023, "loss": 1.4879, "step": 3903 }, { "epoch": 0.42, "grad_norm": 0.07005460949807596, "learning_rate": 0.0006519894388769744, "loss": 1.4087, "step": 3904 }, { "epoch": 0.42, "grad_norm": 0.08364807345793063, "learning_rate": 0.0006518235613089034, "loss": 1.461, "step": 3905 }, { "epoch": 0.42, "grad_norm": 0.07413803750026045, "learning_rate": 0.0006516576653317019, "loss": 1.3398, "step": 3906 }, { "epoch": 0.42, "grad_norm": 0.07896706543787335, "learning_rate": 0.0006514917509654857, "loss": 1.3738, "step": 3907 }, { "epoch": 0.42, "grad_norm": 0.0638598561453725, "learning_rate": 0.0006513258182303724, "loss": 1.4045, "step": 3908 }, { "epoch": 0.42, "grad_norm": 0.06952205296431396, "learning_rate": 0.000651159867146482, "loss": 1.42, "step": 3909 }, { "epoch": 0.42, "grad_norm": 0.06892081357631127, "learning_rate": 0.0006509938977339366, "loss": 1.4329, "step": 3910 }, { "epoch": 0.42, "grad_norm": 0.06937126649817552, "learning_rate": 0.0006508279100128605, "loss": 1.3472, "step": 3911 }, { "epoch": 0.42, "grad_norm": 0.06116336612759358, "learning_rate": 0.0006506619040033804, "loss": 1.4974, "step": 3912 }, { "epoch": 0.42, "grad_norm": 0.06792960629230754, "learning_rate": 0.0006504958797256249, "loss": 1.5023, "step": 3913 }, { "epoch": 0.42, "grad_norm": 0.07305324582122641, "learning_rate": 0.0006503298371997252, "loss": 1.3766, "step": 3914 }, { "epoch": 0.42, "grad_norm": 0.06663241510812803, "learning_rate": 0.0006501637764458145, "loss": 1.3254, "step": 3915 }, { "epoch": 0.42, "grad_norm": 0.0649843862468896, "learning_rate": 0.0006499976974840281, "loss": 1.4063, "step": 3916 }, { "epoch": 0.42, "grad_norm": 0.06951734575941149, "learning_rate": 0.0006498316003345039, "loss": 1.3195, "step": 3917 }, { "epoch": 0.42, "grad_norm": 0.062361862591796616, "learning_rate": 0.0006496654850173815, "loss": 1.3639, "step": 3918 }, { "epoch": 0.42, "grad_norm": 0.07616360631714768, "learning_rate": 0.0006494993515528031, "loss": 1.516, "step": 3919 }, { "epoch": 0.42, "grad_norm": 0.06141123114164976, "learning_rate": 0.0006493331999609132, "loss": 1.4371, "step": 3920 }, { "epoch": 0.42, "grad_norm": 0.06802623499257793, "learning_rate": 0.0006491670302618576, "loss": 1.4329, "step": 3921 }, { "epoch": 0.42, "grad_norm": 0.0772677461754373, "learning_rate": 0.0006490008424757855, "loss": 1.5025, "step": 3922 }, { "epoch": 0.42, "grad_norm": 0.0743215556309416, "learning_rate": 0.0006488346366228475, "loss": 1.3276, "step": 3923 }, { "epoch": 0.42, "grad_norm": 0.07205238649167922, "learning_rate": 0.0006486684127231967, "loss": 1.4187, "step": 3924 }, { "epoch": 0.42, "grad_norm": 0.0643706136749596, "learning_rate": 0.0006485021707969882, "loss": 1.42, "step": 3925 }, { "epoch": 0.42, "grad_norm": 0.06063894800539872, "learning_rate": 0.0006483359108643798, "loss": 1.3124, "step": 3926 }, { "epoch": 0.42, "grad_norm": 0.06558860865385341, "learning_rate": 0.0006481696329455307, "loss": 1.5174, "step": 3927 }, { "epoch": 0.42, "grad_norm": 0.07291103363275614, "learning_rate": 0.0006480033370606027, "loss": 1.5637, "step": 3928 }, { "epoch": 0.42, "grad_norm": 0.06763422148856137, "learning_rate": 0.0006478370232297599, "loss": 1.5378, "step": 3929 }, { "epoch": 0.42, "grad_norm": 0.06303247852882345, "learning_rate": 0.0006476706914731683, "loss": 1.3136, "step": 3930 }, { "epoch": 0.42, "grad_norm": 0.08152561088657095, "learning_rate": 0.0006475043418109965, "loss": 1.4433, "step": 3931 }, { "epoch": 0.42, "grad_norm": 0.06088011791680538, "learning_rate": 0.0006473379742634144, "loss": 1.3877, "step": 3932 }, { "epoch": 0.42, "grad_norm": 0.07318502818351857, "learning_rate": 0.0006471715888505951, "loss": 1.4242, "step": 3933 }, { "epoch": 0.42, "grad_norm": 0.06649810034662097, "learning_rate": 0.0006470051855927134, "loss": 1.4482, "step": 3934 }, { "epoch": 0.42, "grad_norm": 0.07058262099194323, "learning_rate": 0.000646838764509946, "loss": 1.4439, "step": 3935 }, { "epoch": 0.42, "grad_norm": 0.06514689463950991, "learning_rate": 0.0006466723256224723, "loss": 1.4394, "step": 3936 }, { "epoch": 0.42, "grad_norm": 0.08531249688658517, "learning_rate": 0.0006465058689504733, "loss": 1.4133, "step": 3937 }, { "epoch": 0.42, "grad_norm": 0.09075806648354802, "learning_rate": 0.0006463393945141327, "loss": 1.4447, "step": 3938 }, { "epoch": 0.42, "grad_norm": 0.06925448902528702, "learning_rate": 0.0006461729023336361, "loss": 1.4942, "step": 3939 }, { "epoch": 0.42, "grad_norm": 0.08057013127356427, "learning_rate": 0.0006460063924291709, "loss": 1.4033, "step": 3940 }, { "epoch": 0.42, "grad_norm": 0.0744006256177114, "learning_rate": 0.0006458398648209274, "loss": 1.4624, "step": 3941 }, { "epoch": 0.42, "grad_norm": 0.08683211100878094, "learning_rate": 0.0006456733195290976, "loss": 1.3895, "step": 3942 }, { "epoch": 0.42, "grad_norm": 0.07420107511822488, "learning_rate": 0.0006455067565738755, "loss": 1.4124, "step": 3943 }, { "epoch": 0.42, "grad_norm": 0.08515285186593229, "learning_rate": 0.0006453401759754574, "loss": 1.3782, "step": 3944 }, { "epoch": 0.42, "grad_norm": 0.07716068943132816, "learning_rate": 0.0006451735777540421, "loss": 1.4545, "step": 3945 }, { "epoch": 0.42, "grad_norm": 0.06785250313095319, "learning_rate": 0.0006450069619298299, "loss": 1.4342, "step": 3946 }, { "epoch": 0.42, "grad_norm": 0.08320823203786909, "learning_rate": 0.0006448403285230238, "loss": 1.3588, "step": 3947 }, { "epoch": 0.42, "grad_norm": 0.06851001898167915, "learning_rate": 0.0006446736775538284, "loss": 1.4696, "step": 3948 }, { "epoch": 0.42, "grad_norm": 0.07424712344584676, "learning_rate": 0.0006445070090424507, "loss": 1.368, "step": 3949 }, { "epoch": 0.42, "grad_norm": 0.08196924625145434, "learning_rate": 0.0006443403230091001, "loss": 1.5129, "step": 3950 }, { "epoch": 0.42, "grad_norm": 0.07066837376848595, "learning_rate": 0.0006441736194739878, "loss": 1.4107, "step": 3951 }, { "epoch": 0.42, "grad_norm": 0.07186081308313993, "learning_rate": 0.0006440068984573271, "loss": 1.3675, "step": 3952 }, { "epoch": 0.42, "grad_norm": 0.08857135342229146, "learning_rate": 0.0006438401599793333, "loss": 1.3819, "step": 3953 }, { "epoch": 0.43, "grad_norm": 0.07697014336064355, "learning_rate": 0.0006436734040602244, "loss": 1.4648, "step": 3954 }, { "epoch": 0.43, "grad_norm": 0.0697777333276736, "learning_rate": 0.0006435066307202197, "loss": 1.5152, "step": 3955 }, { "epoch": 0.43, "grad_norm": 0.07935233015115574, "learning_rate": 0.0006433398399795415, "loss": 1.3593, "step": 3956 }, { "epoch": 0.43, "grad_norm": 0.07245156010257515, "learning_rate": 0.0006431730318584135, "loss": 1.5383, "step": 3957 }, { "epoch": 0.43, "grad_norm": 0.06912143950929635, "learning_rate": 0.0006430062063770618, "loss": 1.3255, "step": 3958 }, { "epoch": 0.43, "grad_norm": 0.06871716581567094, "learning_rate": 0.0006428393635557146, "loss": 1.377, "step": 3959 }, { "epoch": 0.43, "grad_norm": 0.07122351216156715, "learning_rate": 0.000642672503414602, "loss": 1.4079, "step": 3960 }, { "epoch": 0.43, "grad_norm": 0.07245998086323546, "learning_rate": 0.0006425056259739566, "loss": 1.5169, "step": 3961 }, { "epoch": 0.43, "grad_norm": 0.07059710490678917, "learning_rate": 0.0006423387312540126, "loss": 1.3649, "step": 3962 }, { "epoch": 0.43, "grad_norm": 0.07049724857677844, "learning_rate": 0.0006421718192750069, "loss": 1.451, "step": 3963 }, { "epoch": 0.43, "grad_norm": 0.06736834343072567, "learning_rate": 0.000642004890057178, "loss": 1.4212, "step": 3964 }, { "epoch": 0.43, "grad_norm": 0.07409561524105795, "learning_rate": 0.0006418379436207664, "loss": 1.5065, "step": 3965 }, { "epoch": 0.43, "grad_norm": 0.07636154384442553, "learning_rate": 0.0006416709799860152, "loss": 1.3027, "step": 3966 }, { "epoch": 0.43, "grad_norm": 0.07598801731212228, "learning_rate": 0.0006415039991731694, "loss": 1.4313, "step": 3967 }, { "epoch": 0.43, "grad_norm": 0.07390579715074876, "learning_rate": 0.0006413370012024759, "loss": 1.3851, "step": 3968 }, { "epoch": 0.43, "grad_norm": 0.0717562024777435, "learning_rate": 0.0006411699860941835, "loss": 1.474, "step": 3969 }, { "epoch": 0.43, "grad_norm": 0.06909040510305756, "learning_rate": 0.0006410029538685438, "loss": 1.4227, "step": 3970 }, { "epoch": 0.43, "grad_norm": 0.06699697627532278, "learning_rate": 0.0006408359045458099, "loss": 1.5094, "step": 3971 }, { "epoch": 0.43, "grad_norm": 0.07224536624675192, "learning_rate": 0.0006406688381462367, "loss": 1.4341, "step": 3972 }, { "epoch": 0.43, "grad_norm": 0.07865011939575806, "learning_rate": 0.0006405017546900822, "loss": 1.4284, "step": 3973 }, { "epoch": 0.43, "grad_norm": 0.07174267385677884, "learning_rate": 0.0006403346541976056, "loss": 1.4537, "step": 3974 }, { "epoch": 0.43, "grad_norm": 0.06841466489928297, "learning_rate": 0.0006401675366890682, "loss": 1.4555, "step": 3975 }, { "epoch": 0.43, "grad_norm": 0.07168977043503581, "learning_rate": 0.0006400004021847338, "loss": 1.456, "step": 3976 }, { "epoch": 0.43, "grad_norm": 0.08316765009925761, "learning_rate": 0.000639833250704868, "loss": 1.5264, "step": 3977 }, { "epoch": 0.43, "grad_norm": 0.06598310254941155, "learning_rate": 0.0006396660822697383, "loss": 1.2632, "step": 3978 }, { "epoch": 0.43, "grad_norm": 0.06861447351985102, "learning_rate": 0.000639498896899615, "loss": 1.3635, "step": 3979 }, { "epoch": 0.43, "grad_norm": 0.06990865790262073, "learning_rate": 0.0006393316946147692, "loss": 1.3088, "step": 3980 }, { "epoch": 0.43, "grad_norm": 0.06726246864470845, "learning_rate": 0.0006391644754354751, "loss": 1.4502, "step": 3981 }, { "epoch": 0.43, "grad_norm": 0.07210289911724266, "learning_rate": 0.0006389972393820087, "loss": 1.4364, "step": 3982 }, { "epoch": 0.43, "grad_norm": 0.0708583088350892, "learning_rate": 0.0006388299864746477, "loss": 1.4931, "step": 3983 }, { "epoch": 0.43, "grad_norm": 0.06838091953553109, "learning_rate": 0.0006386627167336724, "loss": 1.3996, "step": 3984 }, { "epoch": 0.43, "grad_norm": 0.07333635634297311, "learning_rate": 0.0006384954301793647, "loss": 1.424, "step": 3985 }, { "epoch": 0.43, "grad_norm": 0.06392704323596084, "learning_rate": 0.0006383281268320083, "loss": 1.4805, "step": 3986 }, { "epoch": 0.43, "grad_norm": 0.07239214592386142, "learning_rate": 0.0006381608067118898, "loss": 1.4503, "step": 3987 }, { "epoch": 0.43, "grad_norm": 0.08377596451570307, "learning_rate": 0.0006379934698392972, "loss": 1.4575, "step": 3988 }, { "epoch": 0.43, "grad_norm": 0.06512746205390593, "learning_rate": 0.0006378261162345207, "loss": 1.4052, "step": 3989 }, { "epoch": 0.43, "grad_norm": 0.06566868198185125, "learning_rate": 0.0006376587459178525, "loss": 1.4508, "step": 3990 }, { "epoch": 0.43, "grad_norm": 0.07464547519704835, "learning_rate": 0.0006374913589095865, "loss": 1.3952, "step": 3991 }, { "epoch": 0.43, "grad_norm": 0.07711648505702222, "learning_rate": 0.0006373239552300194, "loss": 1.3761, "step": 3992 }, { "epoch": 0.43, "grad_norm": 0.06850586335147511, "learning_rate": 0.0006371565348994492, "loss": 1.4297, "step": 3993 }, { "epoch": 0.43, "grad_norm": 0.07199508377082078, "learning_rate": 0.0006369890979381765, "loss": 1.5062, "step": 3994 }, { "epoch": 0.43, "grad_norm": 0.06794921663514014, "learning_rate": 0.0006368216443665033, "loss": 1.3346, "step": 3995 }, { "epoch": 0.43, "grad_norm": 0.06926739185641112, "learning_rate": 0.000636654174204734, "loss": 1.48, "step": 3996 }, { "epoch": 0.43, "grad_norm": 0.06888232643275677, "learning_rate": 0.0006364866874731749, "loss": 1.2917, "step": 3997 }, { "epoch": 0.43, "grad_norm": 0.07729538672205612, "learning_rate": 0.0006363191841921344, "loss": 1.4689, "step": 3998 }, { "epoch": 0.43, "grad_norm": 0.0666543134995083, "learning_rate": 0.0006361516643819229, "loss": 1.3915, "step": 3999 }, { "epoch": 0.43, "grad_norm": 0.08126873599265884, "learning_rate": 0.0006359841280628529, "loss": 1.4609, "step": 4000 }, { "epoch": 0.43, "grad_norm": 0.0778814602771884, "learning_rate": 0.0006358165752552383, "loss": 1.5199, "step": 4001 }, { "epoch": 0.43, "grad_norm": 0.06526420166226415, "learning_rate": 0.0006356490059793959, "loss": 1.4386, "step": 4002 }, { "epoch": 0.43, "grad_norm": 0.07223692530289383, "learning_rate": 0.0006354814202556437, "loss": 1.3302, "step": 4003 }, { "epoch": 0.43, "grad_norm": 0.06565906223942776, "learning_rate": 0.0006353138181043024, "loss": 1.4046, "step": 4004 }, { "epoch": 0.43, "grad_norm": 0.06969485520321976, "learning_rate": 0.000635146199545694, "loss": 1.4483, "step": 4005 }, { "epoch": 0.43, "grad_norm": 0.0769829877285532, "learning_rate": 0.000634978564600143, "loss": 1.5446, "step": 4006 }, { "epoch": 0.43, "grad_norm": 0.06600204951040782, "learning_rate": 0.0006348109132879758, "loss": 1.4368, "step": 4007 }, { "epoch": 0.43, "grad_norm": 0.06277465263963822, "learning_rate": 0.0006346432456295206, "loss": 1.4953, "step": 4008 }, { "epoch": 0.43, "grad_norm": 0.070869092214546, "learning_rate": 0.0006344755616451075, "loss": 1.3439, "step": 4009 }, { "epoch": 0.43, "grad_norm": 0.0851992383775605, "learning_rate": 0.0006343078613550692, "loss": 1.2783, "step": 4010 }, { "epoch": 0.43, "grad_norm": 0.06873785217748689, "learning_rate": 0.0006341401447797395, "loss": 1.4184, "step": 4011 }, { "epoch": 0.43, "grad_norm": 0.08105491442820943, "learning_rate": 0.0006339724119394548, "loss": 1.4541, "step": 4012 }, { "epoch": 0.43, "grad_norm": 0.09058309410357024, "learning_rate": 0.0006338046628545533, "loss": 1.3389, "step": 4013 }, { "epoch": 0.43, "grad_norm": 0.07765474200223683, "learning_rate": 0.0006336368975453751, "loss": 1.3905, "step": 4014 }, { "epoch": 0.43, "grad_norm": 0.08691514341346585, "learning_rate": 0.0006334691160322625, "loss": 1.3661, "step": 4015 }, { "epoch": 0.43, "grad_norm": 0.07644850905371951, "learning_rate": 0.0006333013183355594, "loss": 1.4375, "step": 4016 }, { "epoch": 0.43, "grad_norm": 0.06857380019815038, "learning_rate": 0.0006331335044756118, "loss": 1.3521, "step": 4017 }, { "epoch": 0.43, "grad_norm": 0.08305252497824059, "learning_rate": 0.0006329656744727679, "loss": 1.3916, "step": 4018 }, { "epoch": 0.43, "grad_norm": 0.08124314368644833, "learning_rate": 0.0006327978283473775, "loss": 1.3291, "step": 4019 }, { "epoch": 0.43, "grad_norm": 0.0705597696963439, "learning_rate": 0.0006326299661197925, "loss": 1.3706, "step": 4020 }, { "epoch": 0.43, "grad_norm": 0.07269778983693065, "learning_rate": 0.000632462087810367, "loss": 1.3945, "step": 4021 }, { "epoch": 0.43, "grad_norm": 0.07146559700538933, "learning_rate": 0.0006322941934394568, "loss": 1.533, "step": 4022 }, { "epoch": 0.43, "grad_norm": 0.08857351618785715, "learning_rate": 0.0006321262830274192, "loss": 1.4558, "step": 4023 }, { "epoch": 0.43, "grad_norm": 0.07938377494020217, "learning_rate": 0.0006319583565946147, "loss": 1.3931, "step": 4024 }, { "epoch": 0.43, "grad_norm": 0.07280102266618659, "learning_rate": 0.0006317904141614043, "loss": 1.3522, "step": 4025 }, { "epoch": 0.43, "grad_norm": 0.07857482129301427, "learning_rate": 0.0006316224557481518, "loss": 1.4833, "step": 4026 }, { "epoch": 0.43, "grad_norm": 0.06592901984933465, "learning_rate": 0.0006314544813752229, "loss": 1.4067, "step": 4027 }, { "epoch": 0.43, "grad_norm": 0.07367040385418791, "learning_rate": 0.0006312864910629848, "loss": 1.4827, "step": 4028 }, { "epoch": 0.43, "grad_norm": 0.0735499496321778, "learning_rate": 0.0006311184848318071, "loss": 1.4299, "step": 4029 }, { "epoch": 0.43, "grad_norm": 0.07313825624311018, "learning_rate": 0.0006309504627020611, "loss": 1.4739, "step": 4030 }, { "epoch": 0.43, "grad_norm": 0.06941465489720243, "learning_rate": 0.0006307824246941199, "loss": 1.4239, "step": 4031 }, { "epoch": 0.43, "grad_norm": 0.07456618633064974, "learning_rate": 0.0006306143708283592, "loss": 1.3066, "step": 4032 }, { "epoch": 0.43, "grad_norm": 0.0651709204419574, "learning_rate": 0.0006304463011251554, "loss": 1.3473, "step": 4033 }, { "epoch": 0.43, "grad_norm": 0.06531237023428524, "learning_rate": 0.000630278215604888, "loss": 1.5217, "step": 4034 }, { "epoch": 0.43, "grad_norm": 0.08111952291273701, "learning_rate": 0.0006301101142879378, "loss": 1.302, "step": 4035 }, { "epoch": 0.43, "grad_norm": 0.07363733510463709, "learning_rate": 0.0006299419971946876, "loss": 1.446, "step": 4036 }, { "epoch": 0.43, "grad_norm": 0.07311494980959608, "learning_rate": 0.0006297738643455224, "loss": 1.4033, "step": 4037 }, { "epoch": 0.43, "grad_norm": 0.06715207919719457, "learning_rate": 0.0006296057157608287, "loss": 1.5365, "step": 4038 }, { "epoch": 0.43, "grad_norm": 0.06925573041694051, "learning_rate": 0.0006294375514609951, "loss": 1.3796, "step": 4039 }, { "epoch": 0.43, "grad_norm": 0.0819579049167436, "learning_rate": 0.0006292693714664122, "loss": 1.416, "step": 4040 }, { "epoch": 0.43, "grad_norm": 0.06194321911570573, "learning_rate": 0.0006291011757974722, "loss": 1.4414, "step": 4041 }, { "epoch": 0.43, "grad_norm": 0.07743332118321378, "learning_rate": 0.0006289329644745698, "loss": 1.4206, "step": 4042 }, { "epoch": 0.43, "grad_norm": 0.07286834756737166, "learning_rate": 0.0006287647375181009, "loss": 1.4865, "step": 4043 }, { "epoch": 0.43, "grad_norm": 0.06525179714561152, "learning_rate": 0.0006285964949484637, "loss": 1.2784, "step": 4044 }, { "epoch": 0.43, "grad_norm": 0.07796288896846361, "learning_rate": 0.0006284282367860579, "loss": 1.4092, "step": 4045 }, { "epoch": 0.43, "grad_norm": 0.07380537860259485, "learning_rate": 0.0006282599630512858, "loss": 1.5595, "step": 4046 }, { "epoch": 0.44, "grad_norm": 0.07290283264188771, "learning_rate": 0.000628091673764551, "loss": 1.4883, "step": 4047 }, { "epoch": 0.44, "grad_norm": 0.07557073453800928, "learning_rate": 0.0006279233689462591, "loss": 1.3976, "step": 4048 }, { "epoch": 0.44, "grad_norm": 0.07190307484813235, "learning_rate": 0.0006277550486168177, "loss": 1.4121, "step": 4049 }, { "epoch": 0.44, "grad_norm": 0.07124234491193457, "learning_rate": 0.0006275867127966363, "loss": 1.3455, "step": 4050 }, { "epoch": 0.44, "grad_norm": 0.07187076898229122, "learning_rate": 0.000627418361506126, "loss": 1.3166, "step": 4051 }, { "epoch": 0.44, "grad_norm": 0.06811580777354202, "learning_rate": 0.0006272499947657002, "loss": 1.5047, "step": 4052 }, { "epoch": 0.44, "grad_norm": 0.07519583887683878, "learning_rate": 0.000627081612595774, "loss": 1.4374, "step": 4053 }, { "epoch": 0.44, "grad_norm": 0.0658215338355946, "learning_rate": 0.0006269132150167638, "loss": 1.4198, "step": 4054 }, { "epoch": 0.44, "grad_norm": 0.06636046398423283, "learning_rate": 0.0006267448020490889, "loss": 1.5282, "step": 4055 }, { "epoch": 0.44, "grad_norm": 0.0756700595936872, "learning_rate": 0.0006265763737131698, "loss": 1.4572, "step": 4056 }, { "epoch": 0.44, "grad_norm": 0.09031156746298717, "learning_rate": 0.000626407930029429, "loss": 1.4109, "step": 4057 }, { "epoch": 0.44, "grad_norm": 0.07865278513159098, "learning_rate": 0.0006262394710182909, "loss": 1.3605, "step": 4058 }, { "epoch": 0.44, "grad_norm": 0.06988033127417924, "learning_rate": 0.0006260709967001816, "loss": 1.4146, "step": 4059 }, { "epoch": 0.44, "grad_norm": 0.07532677831427176, "learning_rate": 0.0006259025070955295, "loss": 1.4004, "step": 4060 }, { "epoch": 0.44, "grad_norm": 0.07109168814063885, "learning_rate": 0.0006257340022247643, "loss": 1.5781, "step": 4061 }, { "epoch": 0.44, "grad_norm": 0.06307341769528307, "learning_rate": 0.0006255654821083178, "loss": 1.3837, "step": 4062 }, { "epoch": 0.44, "grad_norm": 0.0657165372516695, "learning_rate": 0.0006253969467666238, "loss": 1.4479, "step": 4063 }, { "epoch": 0.44, "grad_norm": 0.0767516467964421, "learning_rate": 0.0006252283962201177, "loss": 1.3942, "step": 4064 }, { "epoch": 0.44, "grad_norm": 0.07047450174961416, "learning_rate": 0.0006250598304892368, "loss": 1.4335, "step": 4065 }, { "epoch": 0.44, "grad_norm": 0.0726381170938812, "learning_rate": 0.0006248912495944203, "loss": 1.4966, "step": 4066 }, { "epoch": 0.44, "grad_norm": 0.07488327336903305, "learning_rate": 0.0006247226535561092, "loss": 1.4666, "step": 4067 }, { "epoch": 0.44, "grad_norm": 0.06558892902830106, "learning_rate": 0.0006245540423947462, "loss": 1.4455, "step": 4068 }, { "epoch": 0.44, "grad_norm": 0.07251446239916863, "learning_rate": 0.0006243854161307765, "loss": 1.5224, "step": 4069 }, { "epoch": 0.44, "grad_norm": 0.08316498630023138, "learning_rate": 0.0006242167747846461, "loss": 1.5017, "step": 4070 }, { "epoch": 0.44, "grad_norm": 0.07975250717728095, "learning_rate": 0.0006240481183768036, "loss": 1.3195, "step": 4071 }, { "epoch": 0.44, "grad_norm": 0.07204884155011891, "learning_rate": 0.000623879446927699, "loss": 1.4155, "step": 4072 }, { "epoch": 0.44, "grad_norm": 0.07334104065864457, "learning_rate": 0.0006237107604577843, "loss": 1.3981, "step": 4073 }, { "epoch": 0.44, "grad_norm": 0.07842758263483142, "learning_rate": 0.0006235420589875136, "loss": 1.4745, "step": 4074 }, { "epoch": 0.44, "grad_norm": 0.0705062668239952, "learning_rate": 0.0006233733425373422, "loss": 1.4089, "step": 4075 }, { "epoch": 0.44, "grad_norm": 0.07756434672164429, "learning_rate": 0.0006232046111277277, "loss": 1.5183, "step": 4076 }, { "epoch": 0.44, "grad_norm": 0.07135757491952958, "learning_rate": 0.0006230358647791294, "loss": 1.5568, "step": 4077 }, { "epoch": 0.44, "grad_norm": 0.08626731930825417, "learning_rate": 0.0006228671035120082, "loss": 1.3864, "step": 4078 }, { "epoch": 0.44, "grad_norm": 0.07264549223156583, "learning_rate": 0.0006226983273468273, "loss": 1.4313, "step": 4079 }, { "epoch": 0.44, "grad_norm": 0.06987639541786198, "learning_rate": 0.0006225295363040511, "loss": 1.361, "step": 4080 }, { "epoch": 0.44, "grad_norm": 0.06641572646235297, "learning_rate": 0.0006223607304041462, "loss": 1.5163, "step": 4081 }, { "epoch": 0.44, "grad_norm": 0.06436519422690486, "learning_rate": 0.0006221919096675808, "loss": 1.4704, "step": 4082 }, { "epoch": 0.44, "grad_norm": 0.07388014001773682, "learning_rate": 0.0006220230741148253, "loss": 1.5568, "step": 4083 }, { "epoch": 0.44, "grad_norm": 0.06842339021724836, "learning_rate": 0.0006218542237663513, "loss": 1.4529, "step": 4084 }, { "epoch": 0.44, "grad_norm": 0.07737167494281082, "learning_rate": 0.0006216853586426325, "loss": 1.4532, "step": 4085 }, { "epoch": 0.44, "grad_norm": 0.08744856475360263, "learning_rate": 0.0006215164787641446, "loss": 1.3995, "step": 4086 }, { "epoch": 0.44, "grad_norm": 0.08103564337406777, "learning_rate": 0.0006213475841513646, "loss": 1.2311, "step": 4087 }, { "epoch": 0.44, "grad_norm": 0.07470751509790251, "learning_rate": 0.0006211786748247716, "loss": 1.4645, "step": 4088 }, { "epoch": 0.44, "grad_norm": 0.06984095772926356, "learning_rate": 0.0006210097508048466, "loss": 1.3588, "step": 4089 }, { "epoch": 0.44, "grad_norm": 0.07684632265692232, "learning_rate": 0.0006208408121120723, "loss": 1.432, "step": 4090 }, { "epoch": 0.44, "grad_norm": 0.0648317073790488, "learning_rate": 0.0006206718587669326, "loss": 1.3795, "step": 4091 }, { "epoch": 0.44, "grad_norm": 0.06610090338490285, "learning_rate": 0.0006205028907899143, "loss": 1.4399, "step": 4092 }, { "epoch": 0.44, "grad_norm": 0.06986369171000673, "learning_rate": 0.0006203339082015048, "loss": 1.3131, "step": 4093 }, { "epoch": 0.44, "grad_norm": 0.06672274588600151, "learning_rate": 0.0006201649110221943, "loss": 1.4506, "step": 4094 }, { "epoch": 0.44, "grad_norm": 0.07641605904306757, "learning_rate": 0.000619995899272474, "loss": 1.4011, "step": 4095 }, { "epoch": 0.44, "grad_norm": 0.07299058614515767, "learning_rate": 0.0006198268729728371, "loss": 1.3871, "step": 4096 }, { "epoch": 0.44, "grad_norm": 0.06910804786567874, "learning_rate": 0.0006196578321437789, "loss": 1.2873, "step": 4097 }, { "epoch": 0.44, "grad_norm": 0.0663791199619923, "learning_rate": 0.000619488776805796, "loss": 1.4373, "step": 4098 }, { "epoch": 0.44, "grad_norm": 0.06725867211121205, "learning_rate": 0.0006193197069793869, "loss": 1.4147, "step": 4099 }, { "epoch": 0.44, "grad_norm": 0.06213689170540425, "learning_rate": 0.000619150622685052, "loss": 1.4147, "step": 4100 }, { "epoch": 0.44, "grad_norm": 0.058435811328203476, "learning_rate": 0.0006189815239432935, "loss": 1.4477, "step": 4101 }, { "epoch": 0.44, "grad_norm": 0.06735167267567684, "learning_rate": 0.0006188124107746148, "loss": 1.3274, "step": 4102 }, { "epoch": 0.44, "grad_norm": 0.07419176458112457, "learning_rate": 0.0006186432831995218, "loss": 1.3537, "step": 4103 }, { "epoch": 0.44, "grad_norm": 0.07341471855142019, "learning_rate": 0.0006184741412385217, "loss": 1.5122, "step": 4104 }, { "epoch": 0.44, "grad_norm": 0.06830118546681271, "learning_rate": 0.0006183049849121233, "loss": 1.3571, "step": 4105 }, { "epoch": 0.44, "grad_norm": 0.0636175844946524, "learning_rate": 0.000618135814240838, "loss": 1.3388, "step": 4106 }, { "epoch": 0.44, "grad_norm": 0.06988407450517839, "learning_rate": 0.0006179666292451775, "loss": 1.4889, "step": 4107 }, { "epoch": 0.44, "grad_norm": 0.06755098834408893, "learning_rate": 0.0006177974299456568, "loss": 1.4158, "step": 4108 }, { "epoch": 0.44, "grad_norm": 0.07359010079927847, "learning_rate": 0.0006176282163627917, "loss": 1.4536, "step": 4109 }, { "epoch": 0.44, "grad_norm": 0.08025899963759743, "learning_rate": 0.0006174589885170995, "loss": 1.5464, "step": 4110 }, { "epoch": 0.44, "grad_norm": 0.08035685133786531, "learning_rate": 0.0006172897464291004, "loss": 1.3692, "step": 4111 }, { "epoch": 0.44, "grad_norm": 0.06498891842009204, "learning_rate": 0.000617120490119315, "loss": 1.5264, "step": 4112 }, { "epoch": 0.44, "grad_norm": 0.06996794393706789, "learning_rate": 0.0006169512196082663, "loss": 1.484, "step": 4113 }, { "epoch": 0.44, "grad_norm": 0.07656932570480635, "learning_rate": 0.0006167819349164791, "loss": 1.4086, "step": 4114 }, { "epoch": 0.44, "grad_norm": 0.07442145054353973, "learning_rate": 0.0006166126360644797, "loss": 1.3669, "step": 4115 }, { "epoch": 0.44, "grad_norm": 0.06996384412019334, "learning_rate": 0.0006164433230727962, "loss": 1.4832, "step": 4116 }, { "epoch": 0.44, "grad_norm": 0.07644256155086539, "learning_rate": 0.0006162739959619583, "loss": 1.4555, "step": 4117 }, { "epoch": 0.44, "grad_norm": 0.06486478925259731, "learning_rate": 0.0006161046547524976, "loss": 1.486, "step": 4118 }, { "epoch": 0.44, "grad_norm": 0.07282495629633286, "learning_rate": 0.0006159352994649469, "loss": 1.4068, "step": 4119 }, { "epoch": 0.44, "grad_norm": 0.0756036520808425, "learning_rate": 0.0006157659301198418, "loss": 1.5446, "step": 4120 }, { "epoch": 0.44, "grad_norm": 0.07928837539192085, "learning_rate": 0.0006155965467377186, "loss": 1.3833, "step": 4121 }, { "epoch": 0.44, "grad_norm": 0.07648852141287911, "learning_rate": 0.0006154271493391155, "loss": 1.4019, "step": 4122 }, { "epoch": 0.44, "grad_norm": 0.08129914315194162, "learning_rate": 0.0006152577379445725, "loss": 1.4999, "step": 4123 }, { "epoch": 0.44, "grad_norm": 0.06435353991840793, "learning_rate": 0.0006150883125746314, "loss": 1.429, "step": 4124 }, { "epoch": 0.44, "grad_norm": 0.07344269169809117, "learning_rate": 0.0006149188732498357, "loss": 1.4553, "step": 4125 }, { "epoch": 0.44, "grad_norm": 0.06969141501711071, "learning_rate": 0.0006147494199907305, "loss": 1.3447, "step": 4126 }, { "epoch": 0.44, "grad_norm": 0.07913855361793129, "learning_rate": 0.0006145799528178625, "loss": 1.4756, "step": 4127 }, { "epoch": 0.44, "grad_norm": 0.07453073475167775, "learning_rate": 0.0006144104717517802, "loss": 1.5231, "step": 4128 }, { "epoch": 0.44, "grad_norm": 0.07847315579126765, "learning_rate": 0.0006142409768130338, "loss": 1.5112, "step": 4129 }, { "epoch": 0.44, "grad_norm": 0.06889484864529667, "learning_rate": 0.0006140714680221749, "loss": 1.3361, "step": 4130 }, { "epoch": 0.44, "grad_norm": 0.07564516476238581, "learning_rate": 0.0006139019453997575, "loss": 1.3954, "step": 4131 }, { "epoch": 0.44, "grad_norm": 0.06694427175894813, "learning_rate": 0.0006137324089663365, "loss": 1.4801, "step": 4132 }, { "epoch": 0.44, "grad_norm": 0.06993820649694431, "learning_rate": 0.0006135628587424687, "loss": 1.3924, "step": 4133 }, { "epoch": 0.44, "grad_norm": 0.06788981217297224, "learning_rate": 0.0006133932947487129, "loss": 1.3172, "step": 4134 }, { "epoch": 0.44, "grad_norm": 0.07762909048019521, "learning_rate": 0.0006132237170056291, "loss": 1.3408, "step": 4135 }, { "epoch": 0.44, "grad_norm": 0.07190854220257663, "learning_rate": 0.0006130541255337791, "loss": 1.3979, "step": 4136 }, { "epoch": 0.44, "grad_norm": 0.07586277240340727, "learning_rate": 0.0006128845203537269, "loss": 1.3706, "step": 4137 }, { "epoch": 0.44, "grad_norm": 0.08398736072378425, "learning_rate": 0.0006127149014860374, "loss": 1.4149, "step": 4138 }, { "epoch": 0.44, "grad_norm": 0.07868961218092786, "learning_rate": 0.0006125452689512774, "loss": 1.4638, "step": 4139 }, { "epoch": 0.45, "grad_norm": 0.06358548319471856, "learning_rate": 0.0006123756227700155, "loss": 1.3557, "step": 4140 }, { "epoch": 0.45, "grad_norm": 0.06691413699984927, "learning_rate": 0.000612205962962822, "loss": 1.4534, "step": 4141 }, { "epoch": 0.45, "grad_norm": 0.06411817333259076, "learning_rate": 0.0006120362895502687, "loss": 1.4033, "step": 4142 }, { "epoch": 0.45, "grad_norm": 0.061015608518283186, "learning_rate": 0.0006118666025529292, "loss": 1.4078, "step": 4143 }, { "epoch": 0.45, "grad_norm": 0.06242891829313593, "learning_rate": 0.0006116969019913781, "loss": 1.6057, "step": 4144 }, { "epoch": 0.45, "grad_norm": 0.07128735272434375, "learning_rate": 0.0006115271878861928, "loss": 1.5334, "step": 4145 }, { "epoch": 0.45, "grad_norm": 0.06366021932361336, "learning_rate": 0.0006113574602579515, "loss": 1.376, "step": 4146 }, { "epoch": 0.45, "grad_norm": 0.07725408870783299, "learning_rate": 0.0006111877191272339, "loss": 1.3857, "step": 4147 }, { "epoch": 0.45, "grad_norm": 0.07624293185866599, "learning_rate": 0.0006110179645146224, "loss": 1.3786, "step": 4148 }, { "epoch": 0.45, "grad_norm": 0.06575853794546838, "learning_rate": 0.0006108481964407, "loss": 1.3347, "step": 4149 }, { "epoch": 0.45, "grad_norm": 0.07375952009354378, "learning_rate": 0.0006106784149260513, "loss": 1.4017, "step": 4150 }, { "epoch": 0.45, "grad_norm": 0.07059410446959481, "learning_rate": 0.0006105086199912635, "loss": 1.3164, "step": 4151 }, { "epoch": 0.45, "grad_norm": 0.07501949153880226, "learning_rate": 0.0006103388116569244, "loss": 1.5203, "step": 4152 }, { "epoch": 0.45, "grad_norm": 0.06371702168963504, "learning_rate": 0.000610168989943624, "loss": 1.4431, "step": 4153 }, { "epoch": 0.45, "grad_norm": 0.06411394548266706, "learning_rate": 0.0006099991548719539, "loss": 1.5797, "step": 4154 }, { "epoch": 0.45, "grad_norm": 0.07495407167224134, "learning_rate": 0.000609829306462507, "loss": 1.4326, "step": 4155 }, { "epoch": 0.45, "grad_norm": 0.06631718739285854, "learning_rate": 0.0006096594447358778, "loss": 1.4092, "step": 4156 }, { "epoch": 0.45, "grad_norm": 0.09938044865046552, "learning_rate": 0.0006094895697126631, "loss": 1.5385, "step": 4157 }, { "epoch": 0.45, "grad_norm": 0.07461912150158716, "learning_rate": 0.0006093196814134605, "loss": 1.4419, "step": 4158 }, { "epoch": 0.45, "grad_norm": 0.07312130807661461, "learning_rate": 0.0006091497798588699, "loss": 1.4032, "step": 4159 }, { "epoch": 0.45, "grad_norm": 0.07582986448445779, "learning_rate": 0.000608979865069492, "loss": 1.4466, "step": 4160 }, { "epoch": 0.45, "grad_norm": 0.07880974884557526, "learning_rate": 0.0006088099370659297, "loss": 1.4004, "step": 4161 }, { "epoch": 0.45, "grad_norm": 0.07458495001160687, "learning_rate": 0.0006086399958687875, "loss": 1.4518, "step": 4162 }, { "epoch": 0.45, "grad_norm": 0.06578258899976172, "learning_rate": 0.0006084700414986712, "loss": 1.3905, "step": 4163 }, { "epoch": 0.45, "grad_norm": 0.06810011832709272, "learning_rate": 0.0006083000739761884, "loss": 1.5071, "step": 4164 }, { "epoch": 0.45, "grad_norm": 0.07909807518347882, "learning_rate": 0.0006081300933219485, "loss": 1.5617, "step": 4165 }, { "epoch": 0.45, "grad_norm": 0.06909964402324427, "learning_rate": 0.0006079600995565617, "loss": 1.4311, "step": 4166 }, { "epoch": 0.45, "grad_norm": 0.07644740121314376, "learning_rate": 0.0006077900927006408, "loss": 1.5397, "step": 4167 }, { "epoch": 0.45, "grad_norm": 0.07791533648735414, "learning_rate": 0.0006076200727747994, "loss": 1.4011, "step": 4168 }, { "epoch": 0.45, "grad_norm": 0.07737760669772996, "learning_rate": 0.0006074500397996533, "loss": 1.3309, "step": 4169 }, { "epoch": 0.45, "grad_norm": 0.06895771259558378, "learning_rate": 0.0006072799937958195, "loss": 1.3748, "step": 4170 }, { "epoch": 0.45, "grad_norm": 0.07586645523731778, "learning_rate": 0.0006071099347839164, "loss": 1.5111, "step": 4171 }, { "epoch": 0.45, "grad_norm": 0.07236277970051411, "learning_rate": 0.0006069398627845645, "loss": 1.3702, "step": 4172 }, { "epoch": 0.45, "grad_norm": 0.07276198083117988, "learning_rate": 0.0006067697778183856, "loss": 1.3674, "step": 4173 }, { "epoch": 0.45, "grad_norm": 0.06649290069396598, "learning_rate": 0.0006065996799060031, "loss": 1.406, "step": 4174 }, { "epoch": 0.45, "grad_norm": 0.085528893791016, "learning_rate": 0.0006064295690680418, "loss": 1.4379, "step": 4175 }, { "epoch": 0.45, "grad_norm": 0.07056553332509218, "learning_rate": 0.0006062594453251285, "loss": 1.5733, "step": 4176 }, { "epoch": 0.45, "grad_norm": 0.06910344638946603, "learning_rate": 0.0006060893086978909, "loss": 1.5036, "step": 4177 }, { "epoch": 0.45, "grad_norm": 0.06867054468060886, "learning_rate": 0.0006059191592069589, "loss": 1.367, "step": 4178 }, { "epoch": 0.45, "grad_norm": 0.07692617680362644, "learning_rate": 0.0006057489968729638, "loss": 1.516, "step": 4179 }, { "epoch": 0.45, "grad_norm": 0.06656309146573545, "learning_rate": 0.0006055788217165383, "loss": 1.4344, "step": 4180 }, { "epoch": 0.45, "grad_norm": 0.06873108247286198, "learning_rate": 0.0006054086337583165, "loss": 1.4402, "step": 4181 }, { "epoch": 0.45, "grad_norm": 0.07460649744446407, "learning_rate": 0.0006052384330189346, "loss": 1.4741, "step": 4182 }, { "epoch": 0.45, "grad_norm": 0.07849888148581544, "learning_rate": 0.0006050682195190299, "loss": 1.3884, "step": 4183 }, { "epoch": 0.45, "grad_norm": 0.0652682809816125, "learning_rate": 0.0006048979932792413, "loss": 1.3857, "step": 4184 }, { "epoch": 0.45, "grad_norm": 0.07701868904645039, "learning_rate": 0.0006047277543202095, "loss": 1.4581, "step": 4185 }, { "epoch": 0.45, "grad_norm": 0.06849774253433671, "learning_rate": 0.0006045575026625762, "loss": 1.3599, "step": 4186 }, { "epoch": 0.45, "grad_norm": 0.09158825757368835, "learning_rate": 0.0006043872383269854, "loss": 1.3715, "step": 4187 }, { "epoch": 0.45, "grad_norm": 0.07279400872351754, "learning_rate": 0.000604216961334082, "loss": 1.3506, "step": 4188 }, { "epoch": 0.45, "grad_norm": 0.06912852463603696, "learning_rate": 0.0006040466717045127, "loss": 1.4299, "step": 4189 }, { "epoch": 0.45, "grad_norm": 0.06763599779398562, "learning_rate": 0.0006038763694589258, "loss": 1.3763, "step": 4190 }, { "epoch": 0.45, "grad_norm": 0.08951066042952609, "learning_rate": 0.0006037060546179712, "loss": 1.4625, "step": 4191 }, { "epoch": 0.45, "grad_norm": 0.06326259809844488, "learning_rate": 0.0006035357272022997, "loss": 1.4229, "step": 4192 }, { "epoch": 0.45, "grad_norm": 0.07451116529186678, "learning_rate": 0.0006033653872325644, "loss": 1.4435, "step": 4193 }, { "epoch": 0.45, "grad_norm": 0.07508265893694721, "learning_rate": 0.0006031950347294196, "loss": 1.4549, "step": 4194 }, { "epoch": 0.45, "grad_norm": 0.07826032094894797, "learning_rate": 0.0006030246697135209, "loss": 1.4891, "step": 4195 }, { "epoch": 0.45, "grad_norm": 0.07645274230350402, "learning_rate": 0.000602854292205526, "loss": 1.3029, "step": 4196 }, { "epoch": 0.45, "grad_norm": 0.06920033529496816, "learning_rate": 0.0006026839022260935, "loss": 1.412, "step": 4197 }, { "epoch": 0.45, "grad_norm": 0.06704934333095056, "learning_rate": 0.0006025134997958839, "loss": 1.2994, "step": 4198 }, { "epoch": 0.45, "grad_norm": 0.07423418599780926, "learning_rate": 0.000602343084935559, "loss": 1.3095, "step": 4199 }, { "epoch": 0.45, "grad_norm": 0.07390960821320829, "learning_rate": 0.0006021726576657822, "loss": 1.3535, "step": 4200 }, { "epoch": 0.45, "grad_norm": 0.07476816449840105, "learning_rate": 0.0006020022180072184, "loss": 1.4619, "step": 4201 }, { "epoch": 0.45, "grad_norm": 0.07002103516627521, "learning_rate": 0.0006018317659805341, "loss": 1.3879, "step": 4202 }, { "epoch": 0.45, "grad_norm": 0.0773920967736904, "learning_rate": 0.000601661301606397, "loss": 1.5078, "step": 4203 }, { "epoch": 0.45, "grad_norm": 0.07217551586812339, "learning_rate": 0.0006014908249054767, "loss": 1.4657, "step": 4204 }, { "epoch": 0.45, "grad_norm": 0.07543631118107905, "learning_rate": 0.000601320335898444, "loss": 1.4275, "step": 4205 }, { "epoch": 0.45, "grad_norm": 0.07223024017477711, "learning_rate": 0.0006011498346059712, "loss": 1.3861, "step": 4206 }, { "epoch": 0.45, "grad_norm": 0.07472220435048994, "learning_rate": 0.0006009793210487322, "loss": 1.4108, "step": 4207 }, { "epoch": 0.45, "grad_norm": 0.07639838356520745, "learning_rate": 0.0006008087952474024, "loss": 1.4983, "step": 4208 }, { "epoch": 0.45, "grad_norm": 0.0701511264724409, "learning_rate": 0.0006006382572226587, "loss": 1.3362, "step": 4209 }, { "epoch": 0.45, "grad_norm": 0.06770431572735086, "learning_rate": 0.0006004677069951792, "loss": 1.4401, "step": 4210 }, { "epoch": 0.45, "grad_norm": 0.07739188467731939, "learning_rate": 0.0006002971445856441, "loss": 1.4421, "step": 4211 }, { "epoch": 0.45, "grad_norm": 0.07539465339428007, "learning_rate": 0.0006001265700147344, "loss": 1.5035, "step": 4212 }, { "epoch": 0.45, "grad_norm": 0.07078364592406552, "learning_rate": 0.0005999559833031327, "loss": 1.3512, "step": 4213 }, { "epoch": 0.45, "grad_norm": 0.07865502303660855, "learning_rate": 0.0005997853844715237, "loss": 1.4592, "step": 4214 }, { "epoch": 0.45, "grad_norm": 0.06829634894502473, "learning_rate": 0.0005996147735405925, "loss": 1.3703, "step": 4215 }, { "epoch": 0.45, "grad_norm": 0.0781852996100699, "learning_rate": 0.0005994441505310269, "loss": 1.3338, "step": 4216 }, { "epoch": 0.45, "grad_norm": 0.07062610585794313, "learning_rate": 0.0005992735154635151, "loss": 1.4492, "step": 4217 }, { "epoch": 0.45, "grad_norm": 0.07909815760336844, "learning_rate": 0.0005991028683587471, "loss": 1.3787, "step": 4218 }, { "epoch": 0.45, "grad_norm": 0.07063792835909273, "learning_rate": 0.000598932209237415, "loss": 1.363, "step": 4219 }, { "epoch": 0.45, "grad_norm": 0.08213431406304056, "learning_rate": 0.0005987615381202112, "loss": 1.3396, "step": 4220 }, { "epoch": 0.45, "grad_norm": 0.07766459822155773, "learning_rate": 0.0005985908550278305, "loss": 1.4481, "step": 4221 }, { "epoch": 0.45, "grad_norm": 0.07358174371250564, "learning_rate": 0.0005984201599809689, "loss": 1.37, "step": 4222 }, { "epoch": 0.45, "grad_norm": 0.06590233723419557, "learning_rate": 0.0005982494530003233, "loss": 1.3558, "step": 4223 }, { "epoch": 0.45, "grad_norm": 0.0682442044026668, "learning_rate": 0.0005980787341065929, "loss": 1.4652, "step": 4224 }, { "epoch": 0.45, "grad_norm": 0.07891003356492753, "learning_rate": 0.000597908003320478, "loss": 1.4018, "step": 4225 }, { "epoch": 0.45, "grad_norm": 0.06639971946412793, "learning_rate": 0.00059773726066268, "loss": 1.3917, "step": 4226 }, { "epoch": 0.45, "grad_norm": 0.07335602496727649, "learning_rate": 0.0005975665061539022, "loss": 1.3812, "step": 4227 }, { "epoch": 0.45, "grad_norm": 0.07410909291067329, "learning_rate": 0.0005973957398148493, "loss": 1.6306, "step": 4228 }, { "epoch": 0.45, "grad_norm": 0.06263196785177541, "learning_rate": 0.000597224961666227, "loss": 1.4055, "step": 4229 }, { "epoch": 0.45, "grad_norm": 0.06865685568182275, "learning_rate": 0.0005970541717287431, "loss": 1.4365, "step": 4230 }, { "epoch": 0.45, "grad_norm": 0.07007647676666574, "learning_rate": 0.0005968833700231062, "loss": 1.564, "step": 4231 }, { "epoch": 0.45, "grad_norm": 0.0759327624640143, "learning_rate": 0.0005967125565700265, "loss": 1.3756, "step": 4232 }, { "epoch": 0.46, "grad_norm": 0.0683659235338103, "learning_rate": 0.0005965417313902162, "loss": 1.3516, "step": 4233 }, { "epoch": 0.46, "grad_norm": 0.059114031755854154, "learning_rate": 0.000596370894504388, "loss": 1.3317, "step": 4234 }, { "epoch": 0.46, "grad_norm": 0.06596578435593728, "learning_rate": 0.0005962000459332566, "loss": 1.3701, "step": 4235 }, { "epoch": 0.46, "grad_norm": 0.056174330967718004, "learning_rate": 0.000596029185697538, "loss": 1.4079, "step": 4236 }, { "epoch": 0.46, "grad_norm": 0.06556568495064191, "learning_rate": 0.0005958583138179494, "loss": 1.3345, "step": 4237 }, { "epoch": 0.46, "grad_norm": 0.07692922309310339, "learning_rate": 0.00059568743031521, "loss": 1.4097, "step": 4238 }, { "epoch": 0.46, "grad_norm": 0.06730920807440371, "learning_rate": 0.0005955165352100398, "loss": 1.3266, "step": 4239 }, { "epoch": 0.46, "grad_norm": 0.06838790689808874, "learning_rate": 0.0005953456285231602, "loss": 1.433, "step": 4240 }, { "epoch": 0.46, "grad_norm": 0.06915277142703034, "learning_rate": 0.0005951747102752946, "loss": 1.429, "step": 4241 }, { "epoch": 0.46, "grad_norm": 0.07277351313611133, "learning_rate": 0.0005950037804871673, "loss": 1.5549, "step": 4242 }, { "epoch": 0.46, "grad_norm": 0.06650552884193628, "learning_rate": 0.0005948328391795038, "loss": 1.3798, "step": 4243 }, { "epoch": 0.46, "grad_norm": 0.07550184977976046, "learning_rate": 0.000594661886373032, "loss": 1.3489, "step": 4244 }, { "epoch": 0.46, "grad_norm": 0.06434529850760629, "learning_rate": 0.00059449092208848, "loss": 1.5048, "step": 4245 }, { "epoch": 0.46, "grad_norm": 0.06134605286144569, "learning_rate": 0.0005943199463465779, "loss": 1.4264, "step": 4246 }, { "epoch": 0.46, "grad_norm": 0.07135379478202215, "learning_rate": 0.000594148959168057, "loss": 1.4708, "step": 4247 }, { "epoch": 0.46, "grad_norm": 0.0657081744632789, "learning_rate": 0.0005939779605736504, "loss": 1.4203, "step": 4248 }, { "epoch": 0.46, "grad_norm": 0.09721619161803492, "learning_rate": 0.0005938069505840919, "loss": 1.5514, "step": 4249 }, { "epoch": 0.46, "grad_norm": 0.06960076559949502, "learning_rate": 0.0005936359292201174, "loss": 1.4348, "step": 4250 }, { "epoch": 0.46, "grad_norm": 0.0681511654307289, "learning_rate": 0.0005934648965024636, "loss": 1.2808, "step": 4251 }, { "epoch": 0.46, "grad_norm": 0.0665974511558386, "learning_rate": 0.0005932938524518689, "loss": 1.548, "step": 4252 }, { "epoch": 0.46, "grad_norm": 0.06975298195353812, "learning_rate": 0.0005931227970890731, "loss": 1.5667, "step": 4253 }, { "epoch": 0.46, "grad_norm": 0.0671332270408465, "learning_rate": 0.000592951730434817, "loss": 1.3778, "step": 4254 }, { "epoch": 0.46, "grad_norm": 0.062144346033571184, "learning_rate": 0.0005927806525098431, "loss": 1.4931, "step": 4255 }, { "epoch": 0.46, "grad_norm": 0.06924031944664585, "learning_rate": 0.0005926095633348952, "loss": 1.4989, "step": 4256 }, { "epoch": 0.46, "grad_norm": 0.06798196988097031, "learning_rate": 0.0005924384629307184, "loss": 1.4264, "step": 4257 }, { "epoch": 0.46, "grad_norm": 0.07981962984856707, "learning_rate": 0.0005922673513180596, "loss": 1.379, "step": 4258 }, { "epoch": 0.46, "grad_norm": 0.07231588001633618, "learning_rate": 0.0005920962285176661, "loss": 1.3043, "step": 4259 }, { "epoch": 0.46, "grad_norm": 0.0742394001366356, "learning_rate": 0.0005919250945502874, "loss": 1.4649, "step": 4260 }, { "epoch": 0.46, "grad_norm": 0.0699345660413472, "learning_rate": 0.0005917539494366741, "loss": 1.3223, "step": 4261 }, { "epoch": 0.46, "grad_norm": 0.07213363643829698, "learning_rate": 0.0005915827931975782, "loss": 1.3135, "step": 4262 }, { "epoch": 0.46, "grad_norm": 0.07930439093632695, "learning_rate": 0.0005914116258537527, "loss": 1.423, "step": 4263 }, { "epoch": 0.46, "grad_norm": 0.07094944522631236, "learning_rate": 0.0005912404474259526, "loss": 1.4871, "step": 4264 }, { "epoch": 0.46, "grad_norm": 0.07994927967283738, "learning_rate": 0.0005910692579349336, "loss": 1.4013, "step": 4265 }, { "epoch": 0.46, "grad_norm": 0.0739574577729969, "learning_rate": 0.0005908980574014532, "loss": 1.3338, "step": 4266 }, { "epoch": 0.46, "grad_norm": 0.07033576100419545, "learning_rate": 0.00059072684584627, "loss": 1.3731, "step": 4267 }, { "epoch": 0.46, "grad_norm": 0.0693006408871248, "learning_rate": 0.0005905556232901437, "loss": 1.4151, "step": 4268 }, { "epoch": 0.46, "grad_norm": 0.0771275790905268, "learning_rate": 0.000590384389753836, "loss": 1.3806, "step": 4269 }, { "epoch": 0.46, "grad_norm": 0.08802543077110558, "learning_rate": 0.0005902131452581096, "loss": 1.4195, "step": 4270 }, { "epoch": 0.46, "grad_norm": 0.07815218988772281, "learning_rate": 0.0005900418898237281, "loss": 1.3446, "step": 4271 }, { "epoch": 0.46, "grad_norm": 0.07661324405037143, "learning_rate": 0.0005898706234714572, "loss": 1.5356, "step": 4272 }, { "epoch": 0.46, "grad_norm": 0.07920864004503088, "learning_rate": 0.0005896993462220633, "loss": 1.3312, "step": 4273 }, { "epoch": 0.46, "grad_norm": 0.07351431361264203, "learning_rate": 0.0005895280580963144, "loss": 1.3353, "step": 4274 }, { "epoch": 0.46, "grad_norm": 0.0829204899421725, "learning_rate": 0.0005893567591149799, "loss": 1.4148, "step": 4275 }, { "epoch": 0.46, "grad_norm": 0.0731711710914952, "learning_rate": 0.0005891854492988302, "loss": 1.4031, "step": 4276 }, { "epoch": 0.46, "grad_norm": 0.07697900216587132, "learning_rate": 0.0005890141286686375, "loss": 1.4344, "step": 4277 }, { "epoch": 0.46, "grad_norm": 0.08406706969300401, "learning_rate": 0.0005888427972451746, "loss": 1.4329, "step": 4278 }, { "epoch": 0.46, "grad_norm": 0.07000902617829333, "learning_rate": 0.0005886714550492163, "loss": 1.5284, "step": 4279 }, { "epoch": 0.46, "grad_norm": 0.07299706209596549, "learning_rate": 0.0005885001021015383, "loss": 1.5595, "step": 4280 }, { "epoch": 0.46, "grad_norm": 0.06511331156240723, "learning_rate": 0.0005883287384229181, "loss": 1.4026, "step": 4281 }, { "epoch": 0.46, "grad_norm": 0.08615882506688809, "learning_rate": 0.0005881573640341335, "loss": 1.3818, "step": 4282 }, { "epoch": 0.46, "grad_norm": 0.0665931298491308, "learning_rate": 0.0005879859789559649, "loss": 1.489, "step": 4283 }, { "epoch": 0.46, "grad_norm": 0.06736534693938794, "learning_rate": 0.0005878145832091929, "loss": 1.4695, "step": 4284 }, { "epoch": 0.46, "grad_norm": 0.07637517648261326, "learning_rate": 0.0005876431768145997, "loss": 1.4606, "step": 4285 }, { "epoch": 0.46, "grad_norm": 0.07295821930998996, "learning_rate": 0.0005874717597929696, "loss": 1.2971, "step": 4286 }, { "epoch": 0.46, "grad_norm": 0.06826614255036653, "learning_rate": 0.0005873003321650868, "loss": 1.4135, "step": 4287 }, { "epoch": 0.46, "grad_norm": 0.07459899080496685, "learning_rate": 0.0005871288939517377, "loss": 1.4705, "step": 4288 }, { "epoch": 0.46, "grad_norm": 0.07846593078913912, "learning_rate": 0.00058695744517371, "loss": 1.4887, "step": 4289 }, { "epoch": 0.46, "grad_norm": 0.07214161019843333, "learning_rate": 0.0005867859858517922, "loss": 1.3555, "step": 4290 }, { "epoch": 0.46, "grad_norm": 0.06799052813777913, "learning_rate": 0.0005866145160067745, "loss": 1.5964, "step": 4291 }, { "epoch": 0.46, "grad_norm": 0.06470531737409344, "learning_rate": 0.000586443035659448, "loss": 1.4667, "step": 4292 }, { "epoch": 0.46, "grad_norm": 0.06948444252266078, "learning_rate": 0.0005862715448306055, "loss": 1.4447, "step": 4293 }, { "epoch": 0.46, "grad_norm": 0.06420616680388852, "learning_rate": 0.0005861000435410407, "loss": 1.4562, "step": 4294 }, { "epoch": 0.46, "grad_norm": 0.07606806237403266, "learning_rate": 0.0005859285318115488, "loss": 1.4291, "step": 4295 }, { "epoch": 0.46, "grad_norm": 0.06970676132945107, "learning_rate": 0.0005857570096629262, "loss": 1.5096, "step": 4296 }, { "epoch": 0.46, "grad_norm": 0.07218587347413118, "learning_rate": 0.0005855854771159706, "loss": 1.4344, "step": 4297 }, { "epoch": 0.46, "grad_norm": 0.0869640303145645, "learning_rate": 0.0005854139341914808, "loss": 1.4323, "step": 4298 }, { "epoch": 0.46, "grad_norm": 0.06296273663509053, "learning_rate": 0.0005852423809102568, "loss": 1.3476, "step": 4299 }, { "epoch": 0.46, "grad_norm": 0.06801967942797818, "learning_rate": 0.0005850708172931005, "loss": 1.4381, "step": 4300 }, { "epoch": 0.46, "grad_norm": 0.07313413676647226, "learning_rate": 0.0005848992433608142, "loss": 1.4988, "step": 4301 }, { "epoch": 0.46, "grad_norm": 0.07855268328588744, "learning_rate": 0.0005847276591342021, "loss": 1.3393, "step": 4302 }, { "epoch": 0.46, "grad_norm": 0.07208810569830114, "learning_rate": 0.0005845560646340691, "loss": 1.4611, "step": 4303 }, { "epoch": 0.46, "grad_norm": 0.07485263168250995, "learning_rate": 0.0005843844598812218, "loss": 1.394, "step": 4304 }, { "epoch": 0.46, "grad_norm": 0.06973519268508456, "learning_rate": 0.0005842128448964676, "loss": 1.3173, "step": 4305 }, { "epoch": 0.46, "grad_norm": 0.06435508387831129, "learning_rate": 0.000584041219700616, "loss": 1.4103, "step": 4306 }, { "epoch": 0.46, "grad_norm": 0.0719765280377028, "learning_rate": 0.0005838695843144766, "loss": 1.3714, "step": 4307 }, { "epoch": 0.46, "grad_norm": 0.08143997254921763, "learning_rate": 0.000583697938758861, "loss": 1.5099, "step": 4308 }, { "epoch": 0.46, "grad_norm": 0.06992837500062335, "learning_rate": 0.0005835262830545816, "loss": 1.3621, "step": 4309 }, { "epoch": 0.46, "grad_norm": 0.0667326641216811, "learning_rate": 0.0005833546172224526, "loss": 1.3966, "step": 4310 }, { "epoch": 0.46, "grad_norm": 0.07025203654887667, "learning_rate": 0.0005831829412832888, "loss": 1.3464, "step": 4311 }, { "epoch": 0.46, "grad_norm": 0.07092819341479226, "learning_rate": 0.0005830112552579067, "loss": 1.4383, "step": 4312 }, { "epoch": 0.46, "grad_norm": 0.06969893660218596, "learning_rate": 0.0005828395591671238, "loss": 1.3822, "step": 4313 }, { "epoch": 0.46, "grad_norm": 0.07472487467182264, "learning_rate": 0.0005826678530317585, "loss": 1.3483, "step": 4314 }, { "epoch": 0.46, "grad_norm": 0.08045792356361967, "learning_rate": 0.0005824961368726311, "loss": 1.2365, "step": 4315 }, { "epoch": 0.46, "grad_norm": 0.07539767278084446, "learning_rate": 0.0005823244107105627, "loss": 1.4309, "step": 4316 }, { "epoch": 0.46, "grad_norm": 0.06989912203654702, "learning_rate": 0.0005821526745663758, "loss": 1.3415, "step": 4317 }, { "epoch": 0.46, "grad_norm": 0.07123258473151052, "learning_rate": 0.000581980928460894, "loss": 1.3155, "step": 4318 }, { "epoch": 0.46, "grad_norm": 0.07321426972149589, "learning_rate": 0.0005818091724149417, "loss": 1.5035, "step": 4319 }, { "epoch": 0.46, "grad_norm": 0.06861095456012398, "learning_rate": 0.0005816374064493453, "loss": 1.4533, "step": 4320 }, { "epoch": 0.46, "grad_norm": 0.062160527409850376, "learning_rate": 0.000581465630584932, "loss": 1.5065, "step": 4321 }, { "epoch": 0.46, "grad_norm": 0.08033920097956411, "learning_rate": 0.0005812938448425299, "loss": 1.4659, "step": 4322 }, { "epoch": 0.46, "grad_norm": 0.06743837135831846, "learning_rate": 0.0005811220492429692, "loss": 1.4259, "step": 4323 }, { "epoch": 0.46, "grad_norm": 0.07239672168722948, "learning_rate": 0.0005809502438070801, "loss": 1.3405, "step": 4324 }, { "epoch": 0.46, "grad_norm": 0.07073635590191434, "learning_rate": 0.000580778428555695, "loss": 1.3189, "step": 4325 }, { "epoch": 0.47, "grad_norm": 0.07614227248503913, "learning_rate": 0.0005806066035096468, "loss": 1.3165, "step": 4326 }, { "epoch": 0.47, "grad_norm": 0.06700790849180441, "learning_rate": 0.0005804347686897702, "loss": 1.3281, "step": 4327 }, { "epoch": 0.47, "grad_norm": 0.06791159995686306, "learning_rate": 0.0005802629241169005, "loss": 1.5144, "step": 4328 }, { "epoch": 0.47, "grad_norm": 0.06581161360888663, "learning_rate": 0.0005800910698118745, "loss": 1.6056, "step": 4329 }, { "epoch": 0.47, "grad_norm": 0.07091361131731538, "learning_rate": 0.0005799192057955303, "loss": 1.3676, "step": 4330 }, { "epoch": 0.47, "grad_norm": 0.06892622529489716, "learning_rate": 0.0005797473320887068, "loss": 1.3971, "step": 4331 }, { "epoch": 0.47, "grad_norm": 0.06963628707063342, "learning_rate": 0.0005795754487122444, "loss": 1.4121, "step": 4332 }, { "epoch": 0.47, "grad_norm": 0.07610530573128409, "learning_rate": 0.0005794035556869843, "loss": 1.4506, "step": 4333 }, { "epoch": 0.47, "grad_norm": 0.06066084045830262, "learning_rate": 0.0005792316530337696, "loss": 1.5658, "step": 4334 }, { "epoch": 0.47, "grad_norm": 0.0625241482545445, "learning_rate": 0.0005790597407734437, "loss": 1.3852, "step": 4335 }, { "epoch": 0.47, "grad_norm": 0.07533639979979685, "learning_rate": 0.0005788878189268516, "loss": 1.4158, "step": 4336 }, { "epoch": 0.47, "grad_norm": 0.063951064948048, "learning_rate": 0.0005787158875148397, "loss": 1.6017, "step": 4337 }, { "epoch": 0.47, "grad_norm": 0.07185074481823407, "learning_rate": 0.0005785439465582548, "loss": 1.3789, "step": 4338 }, { "epoch": 0.47, "grad_norm": 0.067738052717109, "learning_rate": 0.0005783719960779458, "loss": 1.2404, "step": 4339 }, { "epoch": 0.47, "grad_norm": 0.06693252733909952, "learning_rate": 0.000578200036094762, "loss": 1.4805, "step": 4340 }, { "epoch": 0.47, "grad_norm": 0.0707406767223583, "learning_rate": 0.0005780280666295543, "loss": 1.3044, "step": 4341 }, { "epoch": 0.47, "grad_norm": 0.07245024813674909, "learning_rate": 0.0005778560877031743, "loss": 1.4834, "step": 4342 }, { "epoch": 0.47, "grad_norm": 0.07076343834104014, "learning_rate": 0.0005776840993364754, "loss": 1.4862, "step": 4343 }, { "epoch": 0.47, "grad_norm": 0.0771692706601467, "learning_rate": 0.0005775121015503117, "loss": 1.5121, "step": 4344 }, { "epoch": 0.47, "grad_norm": 0.07575869372023414, "learning_rate": 0.0005773400943655385, "loss": 1.4231, "step": 4345 }, { "epoch": 0.47, "grad_norm": 0.08331985864502259, "learning_rate": 0.0005771680778030122, "loss": 1.4944, "step": 4346 }, { "epoch": 0.47, "grad_norm": 0.0978213404908466, "learning_rate": 0.0005769960518835902, "loss": 1.385, "step": 4347 }, { "epoch": 0.47, "grad_norm": 0.07030500514431032, "learning_rate": 0.0005768240166281317, "loss": 1.384, "step": 4348 }, { "epoch": 0.47, "grad_norm": 0.0731870634540761, "learning_rate": 0.0005766519720574963, "loss": 1.5135, "step": 4349 }, { "epoch": 0.47, "grad_norm": 0.07700046097610479, "learning_rate": 0.0005764799181925449, "loss": 1.468, "step": 4350 }, { "epoch": 0.47, "grad_norm": 0.0820989017585397, "learning_rate": 0.0005763078550541399, "loss": 1.4766, "step": 4351 }, { "epoch": 0.47, "grad_norm": 0.06727729877440962, "learning_rate": 0.0005761357826631443, "loss": 1.4807, "step": 4352 }, { "epoch": 0.47, "grad_norm": 0.0710779421387419, "learning_rate": 0.0005759637010404226, "loss": 1.4129, "step": 4353 }, { "epoch": 0.47, "grad_norm": 0.0792314675429011, "learning_rate": 0.0005757916102068402, "loss": 1.5648, "step": 4354 }, { "epoch": 0.47, "grad_norm": 0.0694154213276982, "learning_rate": 0.000575619510183264, "loss": 1.3538, "step": 4355 }, { "epoch": 0.47, "grad_norm": 0.0635064723999925, "learning_rate": 0.0005754474009905613, "loss": 1.3959, "step": 4356 }, { "epoch": 0.47, "grad_norm": 0.06970467537810597, "learning_rate": 0.0005752752826496012, "loss": 1.4013, "step": 4357 }, { "epoch": 0.47, "grad_norm": 0.07260049255850706, "learning_rate": 0.0005751031551812536, "loss": 1.3132, "step": 4358 }, { "epoch": 0.47, "grad_norm": 0.06955971587200312, "learning_rate": 0.0005749310186063892, "loss": 1.3798, "step": 4359 }, { "epoch": 0.47, "grad_norm": 0.0679171340858405, "learning_rate": 0.0005747588729458809, "loss": 1.526, "step": 4360 }, { "epoch": 0.47, "grad_norm": 0.0673009342150572, "learning_rate": 0.0005745867182206012, "loss": 1.4129, "step": 4361 }, { "epoch": 0.47, "grad_norm": 0.07326363249587493, "learning_rate": 0.000574414554451425, "loss": 1.4206, "step": 4362 }, { "epoch": 0.47, "grad_norm": 0.06410449649673688, "learning_rate": 0.0005742423816592274, "loss": 1.3288, "step": 4363 }, { "epoch": 0.47, "grad_norm": 0.06985286145175268, "learning_rate": 0.0005740701998648849, "loss": 1.3431, "step": 4364 }, { "epoch": 0.47, "grad_norm": 0.07041595467515156, "learning_rate": 0.0005738980090892757, "loss": 1.4546, "step": 4365 }, { "epoch": 0.47, "grad_norm": 0.07074849648123872, "learning_rate": 0.0005737258093532781, "loss": 1.4942, "step": 4366 }, { "epoch": 0.47, "grad_norm": 0.06451965099343698, "learning_rate": 0.0005735536006777716, "loss": 1.4386, "step": 4367 }, { "epoch": 0.47, "grad_norm": 0.07518999046838014, "learning_rate": 0.0005733813830836376, "loss": 1.5297, "step": 4368 }, { "epoch": 0.47, "grad_norm": 0.06507127189100723, "learning_rate": 0.000573209156591758, "loss": 1.4292, "step": 4369 }, { "epoch": 0.47, "grad_norm": 0.061908429397981614, "learning_rate": 0.0005730369212230157, "loss": 1.4905, "step": 4370 }, { "epoch": 0.47, "grad_norm": 0.07486397178116465, "learning_rate": 0.0005728646769982951, "loss": 1.3875, "step": 4371 }, { "epoch": 0.47, "grad_norm": 0.06995650744403155, "learning_rate": 0.0005726924239384809, "loss": 1.3832, "step": 4372 }, { "epoch": 0.47, "grad_norm": 0.06642041686352557, "learning_rate": 0.0005725201620644598, "loss": 1.3663, "step": 4373 }, { "epoch": 0.47, "grad_norm": 0.06838526746301918, "learning_rate": 0.0005723478913971191, "loss": 1.4206, "step": 4374 }, { "epoch": 0.47, "grad_norm": 0.06994726686310161, "learning_rate": 0.0005721756119573471, "loss": 1.3621, "step": 4375 }, { "epoch": 0.47, "grad_norm": 0.07335027824922698, "learning_rate": 0.0005720033237660334, "loss": 1.38, "step": 4376 }, { "epoch": 0.47, "grad_norm": 0.07132320370362646, "learning_rate": 0.0005718310268440683, "loss": 1.5475, "step": 4377 }, { "epoch": 0.47, "grad_norm": 0.07136276677595751, "learning_rate": 0.0005716587212123435, "loss": 1.4212, "step": 4378 }, { "epoch": 0.47, "grad_norm": 0.06487403418814532, "learning_rate": 0.0005714864068917519, "loss": 1.4789, "step": 4379 }, { "epoch": 0.47, "grad_norm": 0.0772161895483952, "learning_rate": 0.0005713140839031871, "loss": 1.4534, "step": 4380 }, { "epoch": 0.47, "grad_norm": 0.06981128959729192, "learning_rate": 0.0005711417522675437, "loss": 1.4698, "step": 4381 }, { "epoch": 0.47, "grad_norm": 0.07765049782121027, "learning_rate": 0.0005709694120057174, "loss": 1.3064, "step": 4382 }, { "epoch": 0.47, "grad_norm": 0.07241987185501046, "learning_rate": 0.0005707970631386054, "loss": 1.4083, "step": 4383 }, { "epoch": 0.47, "grad_norm": 0.07081369496009181, "learning_rate": 0.0005706247056871053, "loss": 1.4168, "step": 4384 }, { "epoch": 0.47, "grad_norm": 0.06703084002299411, "learning_rate": 0.0005704523396721162, "loss": 1.4372, "step": 4385 }, { "epoch": 0.47, "grad_norm": 0.08033733458464856, "learning_rate": 0.000570279965114538, "loss": 1.2662, "step": 4386 }, { "epoch": 0.47, "grad_norm": 0.0781908026380043, "learning_rate": 0.0005701075820352718, "loss": 1.4461, "step": 4387 }, { "epoch": 0.47, "grad_norm": 0.07027921368888441, "learning_rate": 0.0005699351904552196, "loss": 1.38, "step": 4388 }, { "epoch": 0.47, "grad_norm": 0.07984943706970755, "learning_rate": 0.0005697627903952844, "loss": 1.3059, "step": 4389 }, { "epoch": 0.47, "grad_norm": 0.07262529776501059, "learning_rate": 0.0005695903818763703, "loss": 1.439, "step": 4390 }, { "epoch": 0.47, "grad_norm": 0.06658673915041431, "learning_rate": 0.0005694179649193826, "loss": 1.2767, "step": 4391 }, { "epoch": 0.47, "grad_norm": 0.07230322011342759, "learning_rate": 0.0005692455395452272, "loss": 1.6068, "step": 4392 }, { "epoch": 0.47, "grad_norm": 0.07247693991750412, "learning_rate": 0.0005690731057748116, "loss": 1.4641, "step": 4393 }, { "epoch": 0.47, "grad_norm": 0.08824859279901634, "learning_rate": 0.0005689006636290436, "loss": 1.397, "step": 4394 }, { "epoch": 0.47, "grad_norm": 0.07435843692754146, "learning_rate": 0.0005687282131288326, "loss": 1.4981, "step": 4395 }, { "epoch": 0.47, "grad_norm": 0.06866404893479804, "learning_rate": 0.000568555754295089, "loss": 1.3269, "step": 4396 }, { "epoch": 0.47, "grad_norm": 0.06631490616956406, "learning_rate": 0.0005683832871487237, "loss": 1.3666, "step": 4397 }, { "epoch": 0.47, "grad_norm": 0.07461689282658565, "learning_rate": 0.0005682108117106492, "loss": 1.442, "step": 4398 }, { "epoch": 0.47, "grad_norm": 0.07629678720903356, "learning_rate": 0.0005680383280017785, "loss": 1.4704, "step": 4399 }, { "epoch": 0.47, "grad_norm": 0.10030040676241504, "learning_rate": 0.000567865836043026, "loss": 1.4009, "step": 4400 }, { "epoch": 0.47, "grad_norm": 0.07009548150800028, "learning_rate": 0.0005676933358553068, "loss": 1.3269, "step": 4401 }, { "epoch": 0.47, "grad_norm": 0.07800720543403114, "learning_rate": 0.0005675208274595376, "loss": 1.4252, "step": 4402 }, { "epoch": 0.47, "grad_norm": 0.06738892904108221, "learning_rate": 0.0005673483108766348, "loss": 1.3973, "step": 4403 }, { "epoch": 0.47, "grad_norm": 0.06857122139693983, "learning_rate": 0.0005671757861275175, "loss": 1.3929, "step": 4404 }, { "epoch": 0.47, "grad_norm": 0.07166471776508838, "learning_rate": 0.0005670032532331045, "loss": 1.4055, "step": 4405 }, { "epoch": 0.47, "grad_norm": 0.06517833141153938, "learning_rate": 0.0005668307122143159, "loss": 1.4336, "step": 4406 }, { "epoch": 0.47, "grad_norm": 0.07899339662584212, "learning_rate": 0.0005666581630920731, "loss": 1.428, "step": 4407 }, { "epoch": 0.47, "grad_norm": 0.0858859644599195, "learning_rate": 0.0005664856058872984, "loss": 1.3167, "step": 4408 }, { "epoch": 0.47, "grad_norm": 0.06573428062527853, "learning_rate": 0.0005663130406209144, "loss": 1.3983, "step": 4409 }, { "epoch": 0.47, "grad_norm": 0.07532366412662411, "learning_rate": 0.000566140467313846, "loss": 1.5081, "step": 4410 }, { "epoch": 0.47, "grad_norm": 0.07402368057331848, "learning_rate": 0.0005659678859870177, "loss": 1.2766, "step": 4411 }, { "epoch": 0.47, "grad_norm": 0.07084658555164398, "learning_rate": 0.0005657952966613558, "loss": 1.4809, "step": 4412 }, { "epoch": 0.47, "grad_norm": 0.07293649298192152, "learning_rate": 0.0005656226993577874, "loss": 1.3181, "step": 4413 }, { "epoch": 0.47, "grad_norm": 0.07343140003558507, "learning_rate": 0.0005654500940972404, "loss": 1.3817, "step": 4414 }, { "epoch": 0.47, "grad_norm": 0.06442382898080595, "learning_rate": 0.000565277480900644, "loss": 1.565, "step": 4415 }, { "epoch": 0.47, "grad_norm": 0.0758026294438705, "learning_rate": 0.0005651048597889277, "loss": 1.5896, "step": 4416 }, { "epoch": 0.47, "grad_norm": 0.07908556991375093, "learning_rate": 0.000564932230783023, "loss": 1.2737, "step": 4417 }, { "epoch": 0.47, "grad_norm": 0.06293706327093158, "learning_rate": 0.0005647595939038615, "loss": 1.4938, "step": 4418 }, { "epoch": 0.48, "grad_norm": 0.06813466656281142, "learning_rate": 0.0005645869491723757, "loss": 1.4309, "step": 4419 }, { "epoch": 0.48, "grad_norm": 0.07377659147668462, "learning_rate": 0.0005644142966094997, "loss": 1.4835, "step": 4420 }, { "epoch": 0.48, "grad_norm": 0.07262404263783771, "learning_rate": 0.0005642416362361683, "loss": 1.4194, "step": 4421 }, { "epoch": 0.48, "grad_norm": 0.06906843043045555, "learning_rate": 0.0005640689680733171, "loss": 1.4486, "step": 4422 }, { "epoch": 0.48, "grad_norm": 0.07374356166217189, "learning_rate": 0.0005638962921418824, "loss": 1.4561, "step": 4423 }, { "epoch": 0.48, "grad_norm": 0.07256179926993496, "learning_rate": 0.0005637236084628022, "loss": 1.4977, "step": 4424 }, { "epoch": 0.48, "grad_norm": 0.09589845080482406, "learning_rate": 0.0005635509170570148, "loss": 1.4129, "step": 4425 }, { "epoch": 0.48, "grad_norm": 0.074427280353466, "learning_rate": 0.0005633782179454594, "loss": 1.366, "step": 4426 }, { "epoch": 0.48, "grad_norm": 0.06692816156957947, "learning_rate": 0.0005632055111490767, "loss": 1.3508, "step": 4427 }, { "epoch": 0.48, "grad_norm": 0.07199780349642708, "learning_rate": 0.0005630327966888079, "loss": 1.4069, "step": 4428 }, { "epoch": 0.48, "grad_norm": 0.0683222669972921, "learning_rate": 0.0005628600745855952, "loss": 1.5312, "step": 4429 }, { "epoch": 0.48, "grad_norm": 0.07072452845567084, "learning_rate": 0.0005626873448603817, "loss": 1.2589, "step": 4430 }, { "epoch": 0.48, "grad_norm": 0.07226687346488332, "learning_rate": 0.0005625146075341115, "loss": 1.5328, "step": 4431 }, { "epoch": 0.48, "grad_norm": 0.06829504835227422, "learning_rate": 0.0005623418626277296, "loss": 1.5685, "step": 4432 }, { "epoch": 0.48, "grad_norm": 0.06920654184104114, "learning_rate": 0.000562169110162182, "loss": 1.3811, "step": 4433 }, { "epoch": 0.48, "grad_norm": 0.07101655341266463, "learning_rate": 0.0005619963501584154, "loss": 1.6274, "step": 4434 }, { "epoch": 0.48, "grad_norm": 0.0678822950684123, "learning_rate": 0.0005618235826373776, "loss": 1.4166, "step": 4435 }, { "epoch": 0.48, "grad_norm": 0.06572661972423154, "learning_rate": 0.0005616508076200174, "loss": 1.4391, "step": 4436 }, { "epoch": 0.48, "grad_norm": 0.06820610609290627, "learning_rate": 0.0005614780251272842, "loss": 1.5358, "step": 4437 }, { "epoch": 0.48, "grad_norm": 0.06550275046899529, "learning_rate": 0.0005613052351801283, "loss": 1.4511, "step": 4438 }, { "epoch": 0.48, "grad_norm": 0.08188025593494892, "learning_rate": 0.0005611324377995016, "loss": 1.4931, "step": 4439 }, { "epoch": 0.48, "grad_norm": 0.06581901611273966, "learning_rate": 0.0005609596330063558, "loss": 1.3614, "step": 4440 }, { "epoch": 0.48, "grad_norm": 0.06954853363417197, "learning_rate": 0.0005607868208216445, "loss": 1.3153, "step": 4441 }, { "epoch": 0.48, "grad_norm": 0.07447233452866256, "learning_rate": 0.0005606140012663214, "loss": 1.5368, "step": 4442 }, { "epoch": 0.48, "grad_norm": 0.06884509785274705, "learning_rate": 0.0005604411743613418, "loss": 1.51, "step": 4443 }, { "epoch": 0.48, "grad_norm": 0.07476522479204314, "learning_rate": 0.0005602683401276614, "loss": 1.3806, "step": 4444 }, { "epoch": 0.48, "grad_norm": 0.07323636031812013, "learning_rate": 0.0005600954985862373, "loss": 1.315, "step": 4445 }, { "epoch": 0.48, "grad_norm": 0.06912970850725218, "learning_rate": 0.0005599226497580264, "loss": 1.4559, "step": 4446 }, { "epoch": 0.48, "grad_norm": 0.0730926337836783, "learning_rate": 0.0005597497936639878, "loss": 1.4216, "step": 4447 }, { "epoch": 0.48, "grad_norm": 0.07234954230583959, "learning_rate": 0.0005595769303250809, "loss": 1.4416, "step": 4448 }, { "epoch": 0.48, "grad_norm": 0.0746131568968767, "learning_rate": 0.0005594040597622653, "loss": 1.3221, "step": 4449 }, { "epoch": 0.48, "grad_norm": 0.08148817831391064, "learning_rate": 0.0005592311819965032, "loss": 1.4065, "step": 4450 }, { "epoch": 0.48, "grad_norm": 0.07166491026144559, "learning_rate": 0.0005590582970487558, "loss": 1.345, "step": 4451 }, { "epoch": 0.48, "grad_norm": 0.07930429551915798, "learning_rate": 0.0005588854049399864, "loss": 1.4026, "step": 4452 }, { "epoch": 0.48, "grad_norm": 0.06973923917211951, "learning_rate": 0.0005587125056911586, "loss": 1.3695, "step": 4453 }, { "epoch": 0.48, "grad_norm": 0.07736874764036672, "learning_rate": 0.000558539599323237, "loss": 1.3889, "step": 4454 }, { "epoch": 0.48, "grad_norm": 0.07395876425197255, "learning_rate": 0.0005583666858571873, "loss": 1.4323, "step": 4455 }, { "epoch": 0.48, "grad_norm": 0.06956108570224902, "learning_rate": 0.0005581937653139757, "loss": 1.3301, "step": 4456 }, { "epoch": 0.48, "grad_norm": 0.070340521017195, "learning_rate": 0.0005580208377145693, "loss": 1.4881, "step": 4457 }, { "epoch": 0.48, "grad_norm": 0.07142190688929934, "learning_rate": 0.0005578479030799362, "loss": 1.3902, "step": 4458 }, { "epoch": 0.48, "grad_norm": 0.07158868756773161, "learning_rate": 0.0005576749614310456, "loss": 1.4741, "step": 4459 }, { "epoch": 0.48, "grad_norm": 0.06838868568645787, "learning_rate": 0.0005575020127888672, "loss": 1.4185, "step": 4460 }, { "epoch": 0.48, "grad_norm": 0.07183543922807431, "learning_rate": 0.0005573290571743714, "loss": 1.4307, "step": 4461 }, { "epoch": 0.48, "grad_norm": 0.06986236197336322, "learning_rate": 0.0005571560946085298, "loss": 1.4594, "step": 4462 }, { "epoch": 0.48, "grad_norm": 0.06696960142160002, "learning_rate": 0.0005569831251123145, "loss": 1.3465, "step": 4463 }, { "epoch": 0.48, "grad_norm": 0.07233568368627842, "learning_rate": 0.0005568101487066991, "loss": 1.307, "step": 4464 }, { "epoch": 0.48, "grad_norm": 0.0775921186659931, "learning_rate": 0.0005566371654126572, "loss": 1.439, "step": 4465 }, { "epoch": 0.48, "grad_norm": 0.07908711625273704, "learning_rate": 0.0005564641752511636, "loss": 1.3838, "step": 4466 }, { "epoch": 0.48, "grad_norm": 0.07270285235982457, "learning_rate": 0.0005562911782431943, "loss": 1.4833, "step": 4467 }, { "epoch": 0.48, "grad_norm": 0.07069593509582667, "learning_rate": 0.0005561181744097255, "loss": 1.4951, "step": 4468 }, { "epoch": 0.48, "grad_norm": 0.07302606485108576, "learning_rate": 0.0005559451637717345, "loss": 1.376, "step": 4469 }, { "epoch": 0.48, "grad_norm": 0.06534902828368297, "learning_rate": 0.0005557721463501997, "loss": 1.395, "step": 4470 }, { "epoch": 0.48, "grad_norm": 0.06380982214192189, "learning_rate": 0.0005555991221660998, "loss": 1.3312, "step": 4471 }, { "epoch": 0.48, "grad_norm": 0.06465927979215724, "learning_rate": 0.0005554260912404146, "loss": 1.3872, "step": 4472 }, { "epoch": 0.48, "grad_norm": 0.07193715282214726, "learning_rate": 0.0005552530535941248, "loss": 1.3708, "step": 4473 }, { "epoch": 0.48, "grad_norm": 0.06858753038473571, "learning_rate": 0.0005550800092482117, "loss": 1.4944, "step": 4474 }, { "epoch": 0.48, "grad_norm": 0.06901279556324783, "learning_rate": 0.0005549069582236576, "loss": 1.3406, "step": 4475 }, { "epoch": 0.48, "grad_norm": 0.08512978246349323, "learning_rate": 0.0005547339005414456, "loss": 1.5978, "step": 4476 }, { "epoch": 0.48, "grad_norm": 0.07861647095932271, "learning_rate": 0.0005545608362225594, "loss": 1.4021, "step": 4477 }, { "epoch": 0.48, "grad_norm": 0.06934085764688186, "learning_rate": 0.0005543877652879837, "loss": 1.3235, "step": 4478 }, { "epoch": 0.48, "grad_norm": 0.07068395348115039, "learning_rate": 0.0005542146877587041, "loss": 1.4409, "step": 4479 }, { "epoch": 0.48, "grad_norm": 0.07537414137102522, "learning_rate": 0.0005540416036557064, "loss": 1.4742, "step": 4480 }, { "epoch": 0.48, "grad_norm": 0.06280318869662363, "learning_rate": 0.0005538685129999782, "loss": 1.4793, "step": 4481 }, { "epoch": 0.48, "grad_norm": 0.06802012013643947, "learning_rate": 0.000553695415812507, "loss": 1.4218, "step": 4482 }, { "epoch": 0.48, "grad_norm": 0.07184833062274965, "learning_rate": 0.0005535223121142817, "loss": 1.5405, "step": 4483 }, { "epoch": 0.48, "grad_norm": 0.07048066216163036, "learning_rate": 0.0005533492019262913, "loss": 1.5284, "step": 4484 }, { "epoch": 0.48, "grad_norm": 0.07208127756365079, "learning_rate": 0.0005531760852695263, "loss": 1.4083, "step": 4485 }, { "epoch": 0.48, "grad_norm": 0.06955811505268919, "learning_rate": 0.0005530029621649777, "loss": 1.3813, "step": 4486 }, { "epoch": 0.48, "grad_norm": 0.07251905452428771, "learning_rate": 0.0005528298326336372, "loss": 1.47, "step": 4487 }, { "epoch": 0.48, "grad_norm": 0.0660393445159999, "learning_rate": 0.0005526566966964972, "loss": 1.4468, "step": 4488 }, { "epoch": 0.48, "grad_norm": 0.07447153416480336, "learning_rate": 0.0005524835543745515, "loss": 1.2929, "step": 4489 }, { "epoch": 0.48, "grad_norm": 0.07285995323219417, "learning_rate": 0.0005523104056887936, "loss": 1.3571, "step": 4490 }, { "epoch": 0.48, "grad_norm": 0.07066125440052619, "learning_rate": 0.0005521372506602187, "loss": 1.3609, "step": 4491 }, { "epoch": 0.48, "grad_norm": 0.06989235987621864, "learning_rate": 0.0005519640893098227, "loss": 1.4226, "step": 4492 }, { "epoch": 0.48, "grad_norm": 0.06912670079475867, "learning_rate": 0.0005517909216586015, "loss": 1.4072, "step": 4493 }, { "epoch": 0.48, "grad_norm": 0.08376149611071462, "learning_rate": 0.0005516177477275523, "loss": 1.4395, "step": 4494 }, { "epoch": 0.48, "grad_norm": 0.07540252701541866, "learning_rate": 0.0005514445675376735, "loss": 1.3781, "step": 4495 }, { "epoch": 0.48, "grad_norm": 0.07338801331012032, "learning_rate": 0.0005512713811099636, "loss": 1.4511, "step": 4496 }, { "epoch": 0.48, "grad_norm": 0.08017833620848996, "learning_rate": 0.0005510981884654217, "loss": 1.3373, "step": 4497 }, { "epoch": 0.48, "grad_norm": 0.07695481291241366, "learning_rate": 0.0005509249896250485, "loss": 1.3973, "step": 4498 }, { "epoch": 0.48, "grad_norm": 0.07799573575606975, "learning_rate": 0.0005507517846098447, "loss": 1.3026, "step": 4499 }, { "epoch": 0.48, "grad_norm": 0.06890484703699848, "learning_rate": 0.0005505785734408121, "loss": 1.4129, "step": 4500 }, { "epoch": 0.48, "grad_norm": 0.07170266528926741, "learning_rate": 0.0005504053561389531, "loss": 1.4825, "step": 4501 }, { "epoch": 0.48, "grad_norm": 0.08265522375488303, "learning_rate": 0.0005502321327252708, "loss": 1.413, "step": 4502 }, { "epoch": 0.48, "grad_norm": 0.06619748791951723, "learning_rate": 0.0005500589032207692, "loss": 1.375, "step": 4503 }, { "epoch": 0.48, "grad_norm": 0.07303920274619637, "learning_rate": 0.000549885667646453, "loss": 1.4716, "step": 4504 }, { "epoch": 0.48, "grad_norm": 0.08679445748778027, "learning_rate": 0.0005497124260233277, "loss": 1.5006, "step": 4505 }, { "epoch": 0.48, "grad_norm": 0.06787411422320562, "learning_rate": 0.0005495391783723994, "loss": 1.4461, "step": 4506 }, { "epoch": 0.48, "grad_norm": 0.0813816825415281, "learning_rate": 0.0005493659247146749, "loss": 1.6357, "step": 4507 }, { "epoch": 0.48, "grad_norm": 0.07138574670659505, "learning_rate": 0.0005491926650711619, "loss": 1.4653, "step": 4508 }, { "epoch": 0.48, "grad_norm": 0.06750733162437504, "learning_rate": 0.0005490193994628685, "loss": 1.4337, "step": 4509 }, { "epoch": 0.48, "grad_norm": 0.0987904441000795, "learning_rate": 0.000548846127910804, "loss": 1.2916, "step": 4510 }, { "epoch": 0.48, "grad_norm": 0.07504186207018469, "learning_rate": 0.000548672850435978, "loss": 1.3991, "step": 4511 }, { "epoch": 0.49, "grad_norm": 0.08576301042708652, "learning_rate": 0.0005484995670594012, "loss": 1.5069, "step": 4512 }, { "epoch": 0.49, "grad_norm": 0.07276167204073009, "learning_rate": 0.0005483262778020846, "loss": 1.4622, "step": 4513 }, { "epoch": 0.49, "grad_norm": 0.08992303325112609, "learning_rate": 0.0005481529826850403, "loss": 1.3602, "step": 4514 }, { "epoch": 0.49, "grad_norm": 0.07875033982616096, "learning_rate": 0.000547979681729281, "loss": 1.585, "step": 4515 }, { "epoch": 0.49, "grad_norm": 0.07207386003591792, "learning_rate": 0.0005478063749558195, "loss": 1.4695, "step": 4516 }, { "epoch": 0.49, "grad_norm": 0.0719171163854882, "learning_rate": 0.0005476330623856705, "loss": 1.2742, "step": 4517 }, { "epoch": 0.49, "grad_norm": 0.07184078857477177, "learning_rate": 0.0005474597440398483, "loss": 1.4122, "step": 4518 }, { "epoch": 0.49, "grad_norm": 0.06633376011230552, "learning_rate": 0.0005472864199393687, "loss": 1.308, "step": 4519 }, { "epoch": 0.49, "grad_norm": 0.07621419629579644, "learning_rate": 0.0005471130901052476, "loss": 1.5846, "step": 4520 }, { "epoch": 0.49, "grad_norm": 0.06731631619116417, "learning_rate": 0.0005469397545585019, "loss": 1.3949, "step": 4521 }, { "epoch": 0.49, "grad_norm": 0.07302884182715914, "learning_rate": 0.000546766413320149, "loss": 1.4557, "step": 4522 }, { "epoch": 0.49, "grad_norm": 0.06445838444376298, "learning_rate": 0.0005465930664112073, "loss": 1.4074, "step": 4523 }, { "epoch": 0.49, "grad_norm": 0.06288037927464722, "learning_rate": 0.000546419713852696, "loss": 1.3382, "step": 4524 }, { "epoch": 0.49, "grad_norm": 0.07343137466979509, "learning_rate": 0.000546246355665634, "loss": 1.4763, "step": 4525 }, { "epoch": 0.49, "grad_norm": 0.09605837351043238, "learning_rate": 0.000546072991871042, "loss": 1.4698, "step": 4526 }, { "epoch": 0.49, "grad_norm": 0.07278503081830355, "learning_rate": 0.0005458996224899409, "loss": 1.4603, "step": 4527 }, { "epoch": 0.49, "grad_norm": 0.06792443909696488, "learning_rate": 0.0005457262475433523, "loss": 1.3286, "step": 4528 }, { "epoch": 0.49, "grad_norm": 0.07308926990410777, "learning_rate": 0.0005455528670522987, "loss": 1.3996, "step": 4529 }, { "epoch": 0.49, "grad_norm": 0.07682204161835285, "learning_rate": 0.0005453794810378028, "loss": 1.367, "step": 4530 }, { "epoch": 0.49, "grad_norm": 0.06996761782406571, "learning_rate": 0.0005452060895208883, "loss": 1.3614, "step": 4531 }, { "epoch": 0.49, "grad_norm": 0.0725382634877934, "learning_rate": 0.0005450326925225798, "loss": 1.3161, "step": 4532 }, { "epoch": 0.49, "grad_norm": 0.06988327489990985, "learning_rate": 0.000544859290063902, "loss": 1.4512, "step": 4533 }, { "epoch": 0.49, "grad_norm": 0.08344302247555796, "learning_rate": 0.0005446858821658805, "loss": 1.4152, "step": 4534 }, { "epoch": 0.49, "grad_norm": 0.07425148455363119, "learning_rate": 0.000544512468849542, "loss": 1.3646, "step": 4535 }, { "epoch": 0.49, "grad_norm": 0.06737391959124295, "learning_rate": 0.0005443390501359129, "loss": 1.3049, "step": 4536 }, { "epoch": 0.49, "grad_norm": 0.06954957142687951, "learning_rate": 0.0005441656260460212, "loss": 1.366, "step": 4537 }, { "epoch": 0.49, "grad_norm": 0.07154336570704602, "learning_rate": 0.0005439921966008953, "loss": 1.5324, "step": 4538 }, { "epoch": 0.49, "grad_norm": 0.08572842758012887, "learning_rate": 0.0005438187618215636, "loss": 1.4391, "step": 4539 }, { "epoch": 0.49, "grad_norm": 0.06743260458188859, "learning_rate": 0.0005436453217290562, "loss": 1.3816, "step": 4540 }, { "epoch": 0.49, "grad_norm": 0.07155234240297914, "learning_rate": 0.000543471876344403, "loss": 1.4001, "step": 4541 }, { "epoch": 0.49, "grad_norm": 0.07525337751704271, "learning_rate": 0.0005432984256886347, "loss": 1.5117, "step": 4542 }, { "epoch": 0.49, "grad_norm": 0.07358363432443406, "learning_rate": 0.0005431249697827832, "loss": 1.335, "step": 4543 }, { "epoch": 0.49, "grad_norm": 0.07454610224380064, "learning_rate": 0.0005429515086478805, "loss": 1.3017, "step": 4544 }, { "epoch": 0.49, "grad_norm": 0.07046647316226497, "learning_rate": 0.0005427780423049593, "loss": 1.3929, "step": 4545 }, { "epoch": 0.49, "grad_norm": 0.0766116705717234, "learning_rate": 0.0005426045707750529, "loss": 1.4339, "step": 4546 }, { "epoch": 0.49, "grad_norm": 0.09166538682860578, "learning_rate": 0.0005424310940791953, "loss": 1.4755, "step": 4547 }, { "epoch": 0.49, "grad_norm": 0.07278076098680313, "learning_rate": 0.0005422576122384215, "loss": 1.5623, "step": 4548 }, { "epoch": 0.49, "grad_norm": 0.06308207348750028, "learning_rate": 0.0005420841252737664, "loss": 1.3334, "step": 4549 }, { "epoch": 0.49, "grad_norm": 0.07808545423036595, "learning_rate": 0.0005419106332062661, "loss": 1.5023, "step": 4550 }, { "epoch": 0.49, "grad_norm": 0.07914617981438024, "learning_rate": 0.000541737136056957, "loss": 1.4335, "step": 4551 }, { "epoch": 0.49, "grad_norm": 0.07101119935426858, "learning_rate": 0.0005415636338468762, "loss": 1.3554, "step": 4552 }, { "epoch": 0.49, "grad_norm": 0.07221781995588977, "learning_rate": 0.0005413901265970616, "loss": 1.3392, "step": 4553 }, { "epoch": 0.49, "grad_norm": 0.06859955402895859, "learning_rate": 0.0005412166143285514, "loss": 1.3689, "step": 4554 }, { "epoch": 0.49, "grad_norm": 0.06889021096031589, "learning_rate": 0.0005410430970623847, "loss": 1.4025, "step": 4555 }, { "epoch": 0.49, "grad_norm": 0.07536620068094364, "learning_rate": 0.0005408695748196009, "loss": 1.3711, "step": 4556 }, { "epoch": 0.49, "grad_norm": 0.07286775325340998, "learning_rate": 0.0005406960476212403, "loss": 1.4687, "step": 4557 }, { "epoch": 0.49, "grad_norm": 0.07270129112592764, "learning_rate": 0.0005405225154883435, "loss": 1.5237, "step": 4558 }, { "epoch": 0.49, "grad_norm": 0.07454308740712048, "learning_rate": 0.0005403489784419521, "loss": 1.5203, "step": 4559 }, { "epoch": 0.49, "grad_norm": 0.07490105304575796, "learning_rate": 0.000540175436503108, "loss": 1.3618, "step": 4560 }, { "epoch": 0.49, "grad_norm": 0.07055456412536586, "learning_rate": 0.0005400018896928537, "loss": 1.3952, "step": 4561 }, { "epoch": 0.49, "grad_norm": 0.07493601484219946, "learning_rate": 0.0005398283380322323, "loss": 1.4886, "step": 4562 }, { "epoch": 0.49, "grad_norm": 0.07994133908501737, "learning_rate": 0.0005396547815422877, "loss": 1.3913, "step": 4563 }, { "epoch": 0.49, "grad_norm": 0.07205633839504125, "learning_rate": 0.000539481220244064, "loss": 1.4247, "step": 4564 }, { "epoch": 0.49, "grad_norm": 0.08296967780452945, "learning_rate": 0.0005393076541586062, "loss": 1.4473, "step": 4565 }, { "epoch": 0.49, "grad_norm": 0.06787442123709032, "learning_rate": 0.0005391340833069601, "loss": 1.3086, "step": 4566 }, { "epoch": 0.49, "grad_norm": 0.08012266054574767, "learning_rate": 0.0005389605077101712, "loss": 1.4119, "step": 4567 }, { "epoch": 0.49, "grad_norm": 0.08161258295766774, "learning_rate": 0.0005387869273892865, "loss": 1.5046, "step": 4568 }, { "epoch": 0.49, "grad_norm": 0.07775070711351714, "learning_rate": 0.0005386133423653532, "loss": 1.3888, "step": 4569 }, { "epoch": 0.49, "grad_norm": 0.06586472603954369, "learning_rate": 0.0005384397526594189, "loss": 1.3614, "step": 4570 }, { "epoch": 0.49, "grad_norm": 0.08135731486330744, "learning_rate": 0.0005382661582925322, "loss": 1.3754, "step": 4571 }, { "epoch": 0.49, "grad_norm": 0.06833444123583289, "learning_rate": 0.000538092559285742, "loss": 1.5292, "step": 4572 }, { "epoch": 0.49, "grad_norm": 0.07780963232580691, "learning_rate": 0.0005379189556600974, "loss": 1.3701, "step": 4573 }, { "epoch": 0.49, "grad_norm": 0.06877199878994798, "learning_rate": 0.0005377453474366489, "loss": 1.3532, "step": 4574 }, { "epoch": 0.49, "grad_norm": 0.06300244430878904, "learning_rate": 0.0005375717346364468, "loss": 1.3087, "step": 4575 }, { "epoch": 0.49, "grad_norm": 0.08631255658908871, "learning_rate": 0.0005373981172805421, "loss": 1.5267, "step": 4576 }, { "epoch": 0.49, "grad_norm": 0.0741591794846563, "learning_rate": 0.0005372244953899872, "loss": 1.4152, "step": 4577 }, { "epoch": 0.49, "grad_norm": 0.07917543188100162, "learning_rate": 0.0005370508689858336, "loss": 1.5721, "step": 4578 }, { "epoch": 0.49, "grad_norm": 0.07534740443199708, "learning_rate": 0.0005368772380891345, "loss": 1.3377, "step": 4579 }, { "epoch": 0.49, "grad_norm": 0.06688936328416421, "learning_rate": 0.0005367036027209431, "loss": 1.4143, "step": 4580 }, { "epoch": 0.49, "grad_norm": 0.06971105161281534, "learning_rate": 0.0005365299629023133, "loss": 1.4915, "step": 4581 }, { "epoch": 0.49, "grad_norm": 0.07018375199107363, "learning_rate": 0.0005363563186542997, "loss": 1.3651, "step": 4582 }, { "epoch": 0.49, "grad_norm": 0.0783407287552422, "learning_rate": 0.000536182669997957, "loss": 1.4741, "step": 4583 }, { "epoch": 0.49, "grad_norm": 0.06865204082546987, "learning_rate": 0.0005360090169543409, "loss": 1.5494, "step": 4584 }, { "epoch": 0.49, "grad_norm": 0.07492596982291116, "learning_rate": 0.0005358353595445074, "loss": 1.4175, "step": 4585 }, { "epoch": 0.49, "grad_norm": 0.06726307354329226, "learning_rate": 0.0005356616977895129, "loss": 1.4331, "step": 4586 }, { "epoch": 0.49, "grad_norm": 0.09002694081447887, "learning_rate": 0.0005354880317104144, "loss": 1.4617, "step": 4587 }, { "epoch": 0.49, "grad_norm": 0.069512338457349, "learning_rate": 0.0005353143613282702, "loss": 1.4518, "step": 4588 }, { "epoch": 0.49, "grad_norm": 0.07455424036661215, "learning_rate": 0.0005351406866641377, "loss": 1.5088, "step": 4589 }, { "epoch": 0.49, "grad_norm": 0.07415562850466256, "learning_rate": 0.0005349670077390757, "loss": 1.3662, "step": 4590 }, { "epoch": 0.49, "grad_norm": 0.0715278258922769, "learning_rate": 0.0005347933245741435, "loss": 1.3138, "step": 4591 }, { "epoch": 0.49, "grad_norm": 0.07669012583344414, "learning_rate": 0.0005346196371904009, "loss": 1.488, "step": 4592 }, { "epoch": 0.49, "grad_norm": 0.06974715535155264, "learning_rate": 0.0005344459456089078, "loss": 1.5462, "step": 4593 }, { "epoch": 0.49, "grad_norm": 0.07338763101109938, "learning_rate": 0.0005342722498507251, "loss": 1.485, "step": 4594 }, { "epoch": 0.49, "grad_norm": 0.0694636156603943, "learning_rate": 0.0005340985499369137, "loss": 1.3388, "step": 4595 }, { "epoch": 0.49, "grad_norm": 0.0705713385482358, "learning_rate": 0.0005339248458885359, "loss": 1.4198, "step": 4596 }, { "epoch": 0.49, "grad_norm": 0.08029313414307719, "learning_rate": 0.0005337511377266535, "loss": 1.6143, "step": 4597 }, { "epoch": 0.49, "grad_norm": 0.06277611025631606, "learning_rate": 0.0005335774254723293, "loss": 1.3245, "step": 4598 }, { "epoch": 0.49, "grad_norm": 0.07068327245009445, "learning_rate": 0.0005334037091466264, "loss": 1.4243, "step": 4599 }, { "epoch": 0.49, "grad_norm": 0.07234677948360466, "learning_rate": 0.0005332299887706087, "loss": 1.4454, "step": 4600 }, { "epoch": 0.49, "grad_norm": 0.07374854687243138, "learning_rate": 0.0005330562643653401, "loss": 1.484, "step": 4601 }, { "epoch": 0.49, "grad_norm": 0.08347732349695075, "learning_rate": 0.0005328825359518858, "loss": 1.4723, "step": 4602 }, { "epoch": 0.49, "grad_norm": 0.06287568215439522, "learning_rate": 0.0005327088035513105, "loss": 1.4508, "step": 4603 }, { "epoch": 0.49, "grad_norm": 0.07172576192681832, "learning_rate": 0.00053253506718468, "loss": 1.3933, "step": 4604 }, { "epoch": 0.5, "grad_norm": 0.07005732728591357, "learning_rate": 0.0005323613268730605, "loss": 1.4256, "step": 4605 }, { "epoch": 0.5, "grad_norm": 0.0654226164057184, "learning_rate": 0.0005321875826375185, "loss": 1.464, "step": 4606 }, { "epoch": 0.5, "grad_norm": 0.06238379236280632, "learning_rate": 0.000532013834499121, "loss": 1.4955, "step": 4607 }, { "epoch": 0.5, "grad_norm": 0.06840929269433478, "learning_rate": 0.0005318400824789359, "loss": 1.4424, "step": 4608 }, { "epoch": 0.5, "grad_norm": 0.07194276160370738, "learning_rate": 0.0005316663265980309, "loss": 1.2864, "step": 4609 }, { "epoch": 0.5, "grad_norm": 0.05734857788180746, "learning_rate": 0.0005314925668774747, "loss": 1.3987, "step": 4610 }, { "epoch": 0.5, "grad_norm": 0.06876114472500308, "learning_rate": 0.0005313188033383363, "loss": 1.4162, "step": 4611 }, { "epoch": 0.5, "grad_norm": 0.06978076302819285, "learning_rate": 0.0005311450360016847, "loss": 1.4863, "step": 4612 }, { "epoch": 0.5, "grad_norm": 0.061183894124280415, "learning_rate": 0.0005309712648885904, "loss": 1.3364, "step": 4613 }, { "epoch": 0.5, "grad_norm": 0.07043102493486708, "learning_rate": 0.0005307974900201235, "loss": 1.5243, "step": 4614 }, { "epoch": 0.5, "grad_norm": 0.06643923473201709, "learning_rate": 0.0005306237114173545, "loss": 1.5084, "step": 4615 }, { "epoch": 0.5, "grad_norm": 0.07447149586592648, "learning_rate": 0.0005304499291013551, "loss": 1.3401, "step": 4616 }, { "epoch": 0.5, "grad_norm": 0.06198491339411245, "learning_rate": 0.0005302761430931968, "loss": 1.3923, "step": 4617 }, { "epoch": 0.5, "grad_norm": 0.07738412696592357, "learning_rate": 0.0005301023534139516, "loss": 1.3069, "step": 4618 }, { "epoch": 0.5, "grad_norm": 0.08466989307130728, "learning_rate": 0.0005299285600846926, "loss": 1.4284, "step": 4619 }, { "epoch": 0.5, "grad_norm": 0.06723599229505743, "learning_rate": 0.0005297547631264922, "loss": 1.347, "step": 4620 }, { "epoch": 0.5, "grad_norm": 0.07153350664015816, "learning_rate": 0.0005295809625604244, "loss": 1.4633, "step": 4621 }, { "epoch": 0.5, "grad_norm": 0.07407785833339331, "learning_rate": 0.0005294071584075629, "loss": 1.3834, "step": 4622 }, { "epoch": 0.5, "grad_norm": 0.0764897207373361, "learning_rate": 0.0005292333506889819, "loss": 1.3923, "step": 4623 }, { "epoch": 0.5, "grad_norm": 0.0747599974968833, "learning_rate": 0.0005290595394257564, "loss": 1.3799, "step": 4624 }, { "epoch": 0.5, "grad_norm": 0.07394829288250857, "learning_rate": 0.0005288857246389617, "loss": 1.357, "step": 4625 }, { "epoch": 0.5, "grad_norm": 0.07255830568972636, "learning_rate": 0.000528711906349673, "loss": 1.3896, "step": 4626 }, { "epoch": 0.5, "grad_norm": 0.08099083798541638, "learning_rate": 0.0005285380845789669, "loss": 1.4059, "step": 4627 }, { "epoch": 0.5, "grad_norm": 0.07223762794677366, "learning_rate": 0.0005283642593479197, "loss": 1.4438, "step": 4628 }, { "epoch": 0.5, "grad_norm": 0.07054943655122418, "learning_rate": 0.0005281904306776082, "loss": 1.3298, "step": 4629 }, { "epoch": 0.5, "grad_norm": 0.07290095631200455, "learning_rate": 0.0005280165985891098, "loss": 1.4432, "step": 4630 }, { "epoch": 0.5, "grad_norm": 0.06882239361849153, "learning_rate": 0.0005278427631035022, "loss": 1.3861, "step": 4631 }, { "epoch": 0.5, "grad_norm": 0.0774646267226411, "learning_rate": 0.0005276689242418635, "loss": 1.3935, "step": 4632 }, { "epoch": 0.5, "grad_norm": 0.07159465617312386, "learning_rate": 0.0005274950820252725, "loss": 1.4455, "step": 4633 }, { "epoch": 0.5, "grad_norm": 0.07941785759449224, "learning_rate": 0.000527321236474808, "loss": 1.457, "step": 4634 }, { "epoch": 0.5, "grad_norm": 0.08133325764192359, "learning_rate": 0.0005271473876115494, "loss": 1.4672, "step": 4635 }, { "epoch": 0.5, "grad_norm": 0.07566371625930349, "learning_rate": 0.0005269735354565763, "loss": 1.4521, "step": 4636 }, { "epoch": 0.5, "grad_norm": 0.07032893323259092, "learning_rate": 0.0005267996800309692, "loss": 1.5184, "step": 4637 }, { "epoch": 0.5, "grad_norm": 0.07951209008943733, "learning_rate": 0.0005266258213558084, "loss": 1.5179, "step": 4638 }, { "epoch": 0.5, "grad_norm": 0.0868690888705973, "learning_rate": 0.0005264519594521751, "loss": 1.4653, "step": 4639 }, { "epoch": 0.5, "grad_norm": 0.07875135983511622, "learning_rate": 0.0005262780943411504, "loss": 1.4295, "step": 4640 }, { "epoch": 0.5, "grad_norm": 0.07812734317245881, "learning_rate": 0.0005261042260438163, "loss": 1.3016, "step": 4641 }, { "epoch": 0.5, "grad_norm": 0.07367453442907974, "learning_rate": 0.0005259303545812546, "loss": 1.4366, "step": 4642 }, { "epoch": 0.5, "grad_norm": 0.07696299129620354, "learning_rate": 0.000525756479974548, "loss": 1.4112, "step": 4643 }, { "epoch": 0.5, "grad_norm": 0.0726616742047467, "learning_rate": 0.0005255826022447796, "loss": 1.5034, "step": 4644 }, { "epoch": 0.5, "grad_norm": 0.07007504723694576, "learning_rate": 0.0005254087214130324, "loss": 1.5622, "step": 4645 }, { "epoch": 0.5, "grad_norm": 0.09152270269567035, "learning_rate": 0.0005252348375003902, "loss": 1.4707, "step": 4646 }, { "epoch": 0.5, "grad_norm": 0.07062202199553434, "learning_rate": 0.0005250609505279369, "loss": 1.4475, "step": 4647 }, { "epoch": 0.5, "grad_norm": 0.07991628604235958, "learning_rate": 0.0005248870605167569, "loss": 1.5251, "step": 4648 }, { "epoch": 0.5, "grad_norm": 0.07321059260782213, "learning_rate": 0.000524713167487935, "loss": 1.3062, "step": 4649 }, { "epoch": 0.5, "grad_norm": 0.06430883856415529, "learning_rate": 0.0005245392714625564, "loss": 1.4268, "step": 4650 }, { "epoch": 0.5, "grad_norm": 0.07432342350797429, "learning_rate": 0.0005243653724617067, "loss": 1.3743, "step": 4651 }, { "epoch": 0.5, "grad_norm": 0.07084662663170704, "learning_rate": 0.0005241914705064713, "loss": 1.3945, "step": 4652 }, { "epoch": 0.5, "grad_norm": 0.07007986233491623, "learning_rate": 0.0005240175656179368, "loss": 1.4225, "step": 4653 }, { "epoch": 0.5, "grad_norm": 0.09277118002504049, "learning_rate": 0.0005238436578171898, "loss": 1.4031, "step": 4654 }, { "epoch": 0.5, "grad_norm": 0.07412570293831583, "learning_rate": 0.0005236697471253167, "loss": 1.4623, "step": 4655 }, { "epoch": 0.5, "grad_norm": 0.0703204816023259, "learning_rate": 0.0005234958335634057, "loss": 1.2797, "step": 4656 }, { "epoch": 0.5, "grad_norm": 0.06764260000870075, "learning_rate": 0.0005233219171525436, "loss": 1.5262, "step": 4657 }, { "epoch": 0.5, "grad_norm": 0.06908150282828376, "learning_rate": 0.0005231479979138186, "loss": 1.5008, "step": 4658 }, { "epoch": 0.5, "grad_norm": 0.06594842101397937, "learning_rate": 0.0005229740758683192, "loss": 1.5086, "step": 4659 }, { "epoch": 0.5, "grad_norm": 0.07234559595933199, "learning_rate": 0.0005228001510371337, "loss": 1.3922, "step": 4660 }, { "epoch": 0.5, "grad_norm": 0.07615374538885955, "learning_rate": 0.0005226262234413514, "loss": 1.3848, "step": 4661 }, { "epoch": 0.5, "grad_norm": 0.07029248323561711, "learning_rate": 0.0005224522931020616, "loss": 1.445, "step": 4662 }, { "epoch": 0.5, "grad_norm": 0.0714406791924088, "learning_rate": 0.0005222783600403536, "loss": 1.5777, "step": 4663 }, { "epoch": 0.5, "grad_norm": 0.0748912481646773, "learning_rate": 0.0005221044242773177, "loss": 1.2854, "step": 4664 }, { "epoch": 0.5, "grad_norm": 0.06734632544256124, "learning_rate": 0.0005219304858340443, "loss": 1.5012, "step": 4665 }, { "epoch": 0.5, "grad_norm": 0.06856504138644735, "learning_rate": 0.0005217565447316238, "loss": 1.4233, "step": 4666 }, { "epoch": 0.5, "grad_norm": 0.07015175624730764, "learning_rate": 0.0005215826009911474, "loss": 1.5598, "step": 4667 }, { "epoch": 0.5, "grad_norm": 0.07662387766996255, "learning_rate": 0.0005214086546337061, "loss": 1.4279, "step": 4668 }, { "epoch": 0.5, "grad_norm": 0.07579601306263993, "learning_rate": 0.0005212347056803916, "loss": 1.4488, "step": 4669 }, { "epoch": 0.5, "grad_norm": 0.08407512048240309, "learning_rate": 0.0005210607541522958, "loss": 1.4203, "step": 4670 }, { "epoch": 0.5, "grad_norm": 0.06957229687866279, "learning_rate": 0.000520886800070511, "loss": 1.3616, "step": 4671 }, { "epoch": 0.5, "grad_norm": 0.06557597435831303, "learning_rate": 0.0005207128434561297, "loss": 1.2343, "step": 4672 }, { "epoch": 0.5, "grad_norm": 0.10138332283543867, "learning_rate": 0.0005205388843302446, "loss": 1.3596, "step": 4673 }, { "epoch": 0.5, "grad_norm": 0.059370486462788453, "learning_rate": 0.0005203649227139491, "loss": 1.4954, "step": 4674 }, { "epoch": 0.5, "grad_norm": 0.06702999161881795, "learning_rate": 0.0005201909586283365, "loss": 1.419, "step": 4675 }, { "epoch": 0.5, "grad_norm": 0.07669954138606098, "learning_rate": 0.0005200169920945005, "loss": 1.344, "step": 4676 }, { "epoch": 0.5, "grad_norm": 0.07665471343138372, "learning_rate": 0.0005198430231335352, "loss": 1.3518, "step": 4677 }, { "epoch": 0.5, "grad_norm": 0.07485280276094071, "learning_rate": 0.000519669051766535, "loss": 1.3616, "step": 4678 }, { "epoch": 0.5, "grad_norm": 0.08512190117286093, "learning_rate": 0.0005194950780145945, "loss": 1.4598, "step": 4679 }, { "epoch": 0.5, "grad_norm": 0.07047653270509795, "learning_rate": 0.0005193211018988084, "loss": 1.3529, "step": 4680 }, { "epoch": 0.5, "grad_norm": 0.06881206519124212, "learning_rate": 0.0005191471234402723, "loss": 1.4288, "step": 4681 }, { "epoch": 0.5, "grad_norm": 0.0804348080365886, "learning_rate": 0.0005189731426600813, "loss": 1.3915, "step": 4682 }, { "epoch": 0.5, "grad_norm": 0.06573587273377841, "learning_rate": 0.0005187991595793314, "loss": 1.4739, "step": 4683 }, { "epoch": 0.5, "grad_norm": 0.06985488464953007, "learning_rate": 0.0005186251742191187, "loss": 1.4218, "step": 4684 }, { "epoch": 0.5, "grad_norm": 0.08365828393922413, "learning_rate": 0.0005184511866005392, "loss": 1.5171, "step": 4685 }, { "epoch": 0.5, "grad_norm": 0.06799405095670837, "learning_rate": 0.0005182771967446899, "loss": 1.4046, "step": 4686 }, { "epoch": 0.5, "grad_norm": 0.07745680866308548, "learning_rate": 0.0005181032046726674, "loss": 1.3601, "step": 4687 }, { "epoch": 0.5, "grad_norm": 0.07401868558341498, "learning_rate": 0.0005179292104055689, "loss": 1.408, "step": 4688 }, { "epoch": 0.5, "grad_norm": 0.08185325765888811, "learning_rate": 0.0005177552139644919, "loss": 1.3529, "step": 4689 }, { "epoch": 0.5, "grad_norm": 0.06555219557774031, "learning_rate": 0.0005175812153705339, "loss": 1.3481, "step": 4690 }, { "epoch": 0.5, "grad_norm": 0.07034421079868959, "learning_rate": 0.000517407214644793, "loss": 1.4121, "step": 4691 }, { "epoch": 0.5, "grad_norm": 0.06276669263952558, "learning_rate": 0.0005172332118083673, "loss": 1.3234, "step": 4692 }, { "epoch": 0.5, "grad_norm": 0.07723946949855989, "learning_rate": 0.0005170592068823553, "loss": 1.5377, "step": 4693 }, { "epoch": 0.5, "grad_norm": 0.08084112493527873, "learning_rate": 0.0005168851998878555, "loss": 1.3488, "step": 4694 }, { "epoch": 0.5, "grad_norm": 0.0727018555758983, "learning_rate": 0.0005167111908459672, "loss": 1.3023, "step": 4695 }, { "epoch": 0.5, "grad_norm": 0.07233243830653298, "learning_rate": 0.0005165371797777894, "loss": 1.4445, "step": 4696 }, { "epoch": 0.5, "grad_norm": 0.07291339507881155, "learning_rate": 0.0005163631667044213, "loss": 1.4742, "step": 4697 }, { "epoch": 0.51, "grad_norm": 0.07670065393263528, "learning_rate": 0.000516189151646963, "loss": 1.4396, "step": 4698 }, { "epoch": 0.51, "grad_norm": 0.07358001838784738, "learning_rate": 0.0005160151346265142, "loss": 1.409, "step": 4699 }, { "epoch": 0.51, "grad_norm": 0.08157498764671632, "learning_rate": 0.0005158411156641751, "loss": 1.3561, "step": 4700 }, { "epoch": 0.51, "grad_norm": 0.07501124171108461, "learning_rate": 0.0005156670947810462, "loss": 1.4614, "step": 4701 }, { "epoch": 0.51, "grad_norm": 0.06975465507140635, "learning_rate": 0.000515493071998228, "loss": 1.3463, "step": 4702 }, { "epoch": 0.51, "grad_norm": 0.07219506375827546, "learning_rate": 0.0005153190473368213, "loss": 1.4587, "step": 4703 }, { "epoch": 0.51, "grad_norm": 0.07223521378851384, "learning_rate": 0.0005151450208179276, "loss": 1.3898, "step": 4704 }, { "epoch": 0.51, "grad_norm": 0.08370766965759803, "learning_rate": 0.0005149709924626476, "loss": 1.3624, "step": 4705 }, { "epoch": 0.51, "grad_norm": 0.08079188900750653, "learning_rate": 0.0005147969622920832, "loss": 1.3803, "step": 4706 }, { "epoch": 0.51, "grad_norm": 0.07792965487429286, "learning_rate": 0.0005146229303273363, "loss": 1.4281, "step": 4707 }, { "epoch": 0.51, "grad_norm": 0.08238838852280057, "learning_rate": 0.0005144488965895084, "loss": 1.4642, "step": 4708 }, { "epoch": 0.51, "grad_norm": 0.0669417006150072, "learning_rate": 0.0005142748610997023, "loss": 1.484, "step": 4709 }, { "epoch": 0.51, "grad_norm": 0.07877637474591272, "learning_rate": 0.0005141008238790199, "loss": 1.3851, "step": 4710 }, { "epoch": 0.51, "grad_norm": 0.07109987453919168, "learning_rate": 0.0005139267849485639, "loss": 1.3506, "step": 4711 }, { "epoch": 0.51, "grad_norm": 0.07645449263811037, "learning_rate": 0.0005137527443294374, "loss": 1.4377, "step": 4712 }, { "epoch": 0.51, "grad_norm": 0.06840270491375017, "learning_rate": 0.0005135787020427432, "loss": 1.3547, "step": 4713 }, { "epoch": 0.51, "grad_norm": 0.07120138812630702, "learning_rate": 0.0005134046581095844, "loss": 1.4399, "step": 4714 }, { "epoch": 0.51, "grad_norm": 0.07151681302142116, "learning_rate": 0.0005132306125510648, "loss": 1.2813, "step": 4715 }, { "epoch": 0.51, "grad_norm": 0.06240065323117308, "learning_rate": 0.0005130565653882877, "loss": 1.3654, "step": 4716 }, { "epoch": 0.51, "grad_norm": 0.06194617156336227, "learning_rate": 0.0005128825166423569, "loss": 1.5099, "step": 4717 }, { "epoch": 0.51, "grad_norm": 0.059304667134885136, "learning_rate": 0.0005127084663343769, "loss": 1.4544, "step": 4718 }, { "epoch": 0.51, "grad_norm": 0.06483159155925523, "learning_rate": 0.0005125344144854513, "loss": 1.5357, "step": 4719 }, { "epoch": 0.51, "grad_norm": 0.06246551571893033, "learning_rate": 0.0005123603611166848, "loss": 1.5015, "step": 4720 }, { "epoch": 0.51, "grad_norm": 0.061595172916569754, "learning_rate": 0.0005121863062491818, "loss": 1.3058, "step": 4721 }, { "epoch": 0.51, "grad_norm": 0.06544023845066463, "learning_rate": 0.0005120122499040473, "loss": 1.392, "step": 4722 }, { "epoch": 0.51, "grad_norm": 0.0759664433784072, "learning_rate": 0.0005118381921023859, "loss": 1.4097, "step": 4723 }, { "epoch": 0.51, "grad_norm": 0.07274017908537568, "learning_rate": 0.0005116641328653031, "loss": 1.5345, "step": 4724 }, { "epoch": 0.51, "grad_norm": 0.06619000110896354, "learning_rate": 0.0005114900722139039, "loss": 1.4603, "step": 4725 }, { "epoch": 0.51, "grad_norm": 0.07000980439320045, "learning_rate": 0.0005113160101692938, "loss": 1.4563, "step": 4726 }, { "epoch": 0.51, "grad_norm": 0.0685154283152642, "learning_rate": 0.0005111419467525786, "loss": 1.4495, "step": 4727 }, { "epoch": 0.51, "grad_norm": 0.07039172540427986, "learning_rate": 0.0005109678819848637, "loss": 1.403, "step": 4728 }, { "epoch": 0.51, "grad_norm": 0.06839571480775747, "learning_rate": 0.0005107938158872554, "loss": 1.4476, "step": 4729 }, { "epoch": 0.51, "grad_norm": 0.06564357301738481, "learning_rate": 0.0005106197484808598, "loss": 1.368, "step": 4730 }, { "epoch": 0.51, "grad_norm": 0.06865827489358711, "learning_rate": 0.0005104456797867831, "loss": 1.3423, "step": 4731 }, { "epoch": 0.51, "grad_norm": 0.07352389360835015, "learning_rate": 0.0005102716098261315, "loss": 1.4653, "step": 4732 }, { "epoch": 0.51, "grad_norm": 0.07081825517552962, "learning_rate": 0.0005100975386200119, "loss": 1.3664, "step": 4733 }, { "epoch": 0.51, "grad_norm": 0.07545040261023422, "learning_rate": 0.000509923466189531, "loss": 1.5089, "step": 4734 }, { "epoch": 0.51, "grad_norm": 0.07863333546994716, "learning_rate": 0.0005097493925557956, "loss": 1.5012, "step": 4735 }, { "epoch": 0.51, "grad_norm": 0.06897139505039224, "learning_rate": 0.0005095753177399127, "loss": 1.4103, "step": 4736 }, { "epoch": 0.51, "grad_norm": 0.0709893551702056, "learning_rate": 0.0005094012417629895, "loss": 1.5152, "step": 4737 }, { "epoch": 0.51, "grad_norm": 0.07522261575946469, "learning_rate": 0.0005092271646461334, "loss": 1.3445, "step": 4738 }, { "epoch": 0.51, "grad_norm": 0.07113884668637967, "learning_rate": 0.0005090530864104517, "loss": 1.3651, "step": 4739 }, { "epoch": 0.51, "grad_norm": 0.07430397727410497, "learning_rate": 0.000508879007077052, "loss": 1.5024, "step": 4740 }, { "epoch": 0.51, "grad_norm": 0.06937378871027804, "learning_rate": 0.0005087049266670424, "loss": 1.4918, "step": 4741 }, { "epoch": 0.51, "grad_norm": 0.07196532177141281, "learning_rate": 0.0005085308452015301, "loss": 1.5521, "step": 4742 }, { "epoch": 0.51, "grad_norm": 0.07511206297719938, "learning_rate": 0.0005083567627016235, "loss": 1.5095, "step": 4743 }, { "epoch": 0.51, "grad_norm": 0.06208055254053003, "learning_rate": 0.0005081826791884307, "loss": 1.4788, "step": 4744 }, { "epoch": 0.51, "grad_norm": 0.06275328428534259, "learning_rate": 0.0005080085946830596, "loss": 1.2569, "step": 4745 }, { "epoch": 0.51, "grad_norm": 0.07479166475912431, "learning_rate": 0.0005078345092066191, "loss": 1.3658, "step": 4746 }, { "epoch": 0.51, "grad_norm": 0.06983578026027626, "learning_rate": 0.0005076604227802171, "loss": 1.506, "step": 4747 }, { "epoch": 0.51, "grad_norm": 0.059381228868428844, "learning_rate": 0.0005074863354249625, "loss": 1.5236, "step": 4748 }, { "epoch": 0.51, "grad_norm": 0.07064049104101132, "learning_rate": 0.000507312247161964, "loss": 1.2982, "step": 4749 }, { "epoch": 0.51, "grad_norm": 0.06537241706728257, "learning_rate": 0.0005071381580123302, "loss": 1.3971, "step": 4750 }, { "epoch": 0.51, "grad_norm": 0.0705342768829638, "learning_rate": 0.0005069640679971702, "loss": 1.556, "step": 4751 }, { "epoch": 0.51, "grad_norm": 0.07234045693835783, "learning_rate": 0.000506789977137593, "loss": 1.4512, "step": 4752 }, { "epoch": 0.51, "grad_norm": 0.07044135313910474, "learning_rate": 0.0005066158854547075, "loss": 1.4174, "step": 4753 }, { "epoch": 0.51, "grad_norm": 0.07391834794874395, "learning_rate": 0.0005064417929696232, "loss": 1.378, "step": 4754 }, { "epoch": 0.51, "grad_norm": 0.0717476739675131, "learning_rate": 0.0005062676997034493, "loss": 1.3867, "step": 4755 }, { "epoch": 0.51, "grad_norm": 0.067547995646433, "learning_rate": 0.0005060936056772951, "loss": 1.5415, "step": 4756 }, { "epoch": 0.51, "grad_norm": 0.0709070460555177, "learning_rate": 0.0005059195109122705, "loss": 1.4631, "step": 4757 }, { "epoch": 0.51, "grad_norm": 0.0732869766299112, "learning_rate": 0.0005057454154294846, "loss": 1.4731, "step": 4758 }, { "epoch": 0.51, "grad_norm": 0.07631628853739582, "learning_rate": 0.0005055713192500472, "loss": 1.3617, "step": 4759 }, { "epoch": 0.51, "grad_norm": 0.07557827019800746, "learning_rate": 0.0005053972223950682, "loss": 1.3886, "step": 4760 }, { "epoch": 0.51, "grad_norm": 0.07541011499079218, "learning_rate": 0.0005052231248856573, "loss": 1.4401, "step": 4761 }, { "epoch": 0.51, "grad_norm": 0.06878411120055702, "learning_rate": 0.0005050490267429246, "loss": 1.4652, "step": 4762 }, { "epoch": 0.51, "grad_norm": 0.06739762564942098, "learning_rate": 0.00050487492798798, "loss": 1.5507, "step": 4763 }, { "epoch": 0.51, "grad_norm": 0.08015595700952176, "learning_rate": 0.0005047008286419336, "loss": 1.3467, "step": 4764 }, { "epoch": 0.51, "grad_norm": 0.07001966186468692, "learning_rate": 0.0005045267287258953, "loss": 1.4121, "step": 4765 }, { "epoch": 0.51, "grad_norm": 0.07030643248207329, "learning_rate": 0.0005043526282609757, "loss": 1.4879, "step": 4766 }, { "epoch": 0.51, "grad_norm": 0.08827989390479461, "learning_rate": 0.0005041785272682849, "loss": 1.3802, "step": 4767 }, { "epoch": 0.51, "grad_norm": 0.06827504201270179, "learning_rate": 0.0005040044257689333, "loss": 1.2448, "step": 4768 }, { "epoch": 0.51, "grad_norm": 0.07915638860324421, "learning_rate": 0.0005038303237840314, "loss": 1.4055, "step": 4769 }, { "epoch": 0.51, "grad_norm": 0.06918765384054784, "learning_rate": 0.0005036562213346893, "loss": 1.538, "step": 4770 }, { "epoch": 0.51, "grad_norm": 0.08004483251964818, "learning_rate": 0.0005034821184420179, "loss": 1.279, "step": 4771 }, { "epoch": 0.51, "grad_norm": 0.06912086940038036, "learning_rate": 0.0005033080151271276, "loss": 1.3917, "step": 4772 }, { "epoch": 0.51, "grad_norm": 0.0714885845925155, "learning_rate": 0.000503133911411129, "loss": 1.394, "step": 4773 }, { "epoch": 0.51, "grad_norm": 0.06714730640721422, "learning_rate": 0.0005029598073151329, "loss": 1.4416, "step": 4774 }, { "epoch": 0.51, "grad_norm": 0.07827813518251049, "learning_rate": 0.00050278570286025, "loss": 1.3962, "step": 4775 }, { "epoch": 0.51, "grad_norm": 0.08034014575300731, "learning_rate": 0.0005026115980675908, "loss": 1.3307, "step": 4776 }, { "epoch": 0.51, "grad_norm": 0.0765192032903928, "learning_rate": 0.0005024374929582664, "loss": 1.4161, "step": 4777 }, { "epoch": 0.51, "grad_norm": 0.06608261104152828, "learning_rate": 0.0005022633875533879, "loss": 1.2704, "step": 4778 }, { "epoch": 0.51, "grad_norm": 0.07850238429821357, "learning_rate": 0.0005020892818740656, "loss": 1.4342, "step": 4779 }, { "epoch": 0.51, "grad_norm": 0.06403022438627028, "learning_rate": 0.0005019151759414107, "loss": 1.4186, "step": 4780 }, { "epoch": 0.51, "grad_norm": 0.06799933698291283, "learning_rate": 0.0005017410697765342, "loss": 1.362, "step": 4781 }, { "epoch": 0.51, "grad_norm": 0.0665150830459829, "learning_rate": 0.0005015669634005467, "loss": 1.4916, "step": 4782 }, { "epoch": 0.51, "grad_norm": 0.07236361062725231, "learning_rate": 0.0005013928568345597, "loss": 1.2738, "step": 4783 }, { "epoch": 0.51, "grad_norm": 0.06861181480465103, "learning_rate": 0.000501218750099684, "loss": 1.3728, "step": 4784 }, { "epoch": 0.51, "grad_norm": 0.06525807506942764, "learning_rate": 0.0005010446432170306, "loss": 1.5133, "step": 4785 }, { "epoch": 0.51, "grad_norm": 0.06543208409660094, "learning_rate": 0.0005008705362077108, "loss": 1.3821, "step": 4786 }, { "epoch": 0.51, "grad_norm": 0.07321867883420735, "learning_rate": 0.0005006964290928351, "loss": 1.3791, "step": 4787 }, { "epoch": 0.51, "grad_norm": 0.07359931820206309, "learning_rate": 0.0005005223218935152, "loss": 1.3444, "step": 4788 }, { "epoch": 0.51, "grad_norm": 0.07192584249908926, "learning_rate": 0.0005003482146308621, "loss": 1.3597, "step": 4789 }, { "epoch": 0.51, "grad_norm": 0.06602677971933513, "learning_rate": 0.0005001741073259866, "loss": 1.5547, "step": 4790 }, { "epoch": 0.52, "grad_norm": 0.07961813739728019, "learning_rate": 0.0005, "loss": 1.4441, "step": 4791 }, { "epoch": 0.52, "grad_norm": 0.07216239496007772, "learning_rate": 0.0004998258926740136, "loss": 1.3991, "step": 4792 }, { "epoch": 0.52, "grad_norm": 0.06996627259186475, "learning_rate": 0.0004996517853691379, "loss": 1.498, "step": 4793 }, { "epoch": 0.52, "grad_norm": 0.06657604028460089, "learning_rate": 0.0004994776781064847, "loss": 1.3081, "step": 4794 }, { "epoch": 0.52, "grad_norm": 0.06303048383434355, "learning_rate": 0.0004993035709071648, "loss": 1.2912, "step": 4795 }, { "epoch": 0.52, "grad_norm": 0.06521095894949946, "learning_rate": 0.0004991294637922893, "loss": 1.3, "step": 4796 }, { "epoch": 0.52, "grad_norm": 0.07399255311468003, "learning_rate": 0.0004989553567829695, "loss": 1.4204, "step": 4797 }, { "epoch": 0.52, "grad_norm": 0.08417173627975091, "learning_rate": 0.000498781249900316, "loss": 1.514, "step": 4798 }, { "epoch": 0.52, "grad_norm": 0.07708032383741255, "learning_rate": 0.0004986071431654404, "loss": 1.4721, "step": 4799 }, { "epoch": 0.52, "grad_norm": 0.06307820528877853, "learning_rate": 0.0004984330365994535, "loss": 1.373, "step": 4800 }, { "epoch": 0.52, "grad_norm": 0.06111579146187217, "learning_rate": 0.000498258930223466, "loss": 1.3943, "step": 4801 }, { "epoch": 0.52, "grad_norm": 0.06554523449766973, "learning_rate": 0.0004980848240585895, "loss": 1.3626, "step": 4802 }, { "epoch": 0.52, "grad_norm": 0.06604543741412207, "learning_rate": 0.0004979107181259345, "loss": 1.4796, "step": 4803 }, { "epoch": 0.52, "grad_norm": 0.06646732193706745, "learning_rate": 0.0004977366124466121, "loss": 1.337, "step": 4804 }, { "epoch": 0.52, "grad_norm": 0.07501224583003234, "learning_rate": 0.0004975625070417335, "loss": 1.4135, "step": 4805 }, { "epoch": 0.52, "grad_norm": 0.0737723782461909, "learning_rate": 0.0004973884019324092, "loss": 1.4717, "step": 4806 }, { "epoch": 0.52, "grad_norm": 0.07017623757888976, "learning_rate": 0.0004972142971397503, "loss": 1.2908, "step": 4807 }, { "epoch": 0.52, "grad_norm": 0.08090698115220855, "learning_rate": 0.0004970401926848673, "loss": 1.3435, "step": 4808 }, { "epoch": 0.52, "grad_norm": 0.07428142762072636, "learning_rate": 0.0004968660885888712, "loss": 1.3949, "step": 4809 }, { "epoch": 0.52, "grad_norm": 0.06726515819501672, "learning_rate": 0.0004966919848728726, "loss": 1.5103, "step": 4810 }, { "epoch": 0.52, "grad_norm": 0.07380195335443142, "learning_rate": 0.0004965178815579822, "loss": 1.3751, "step": 4811 }, { "epoch": 0.52, "grad_norm": 0.07761558508982735, "learning_rate": 0.0004963437786653108, "loss": 1.3809, "step": 4812 }, { "epoch": 0.52, "grad_norm": 0.07763719108725761, "learning_rate": 0.0004961696762159687, "loss": 1.4223, "step": 4813 }, { "epoch": 0.52, "grad_norm": 0.07189756416959095, "learning_rate": 0.0004959955742310667, "loss": 1.3861, "step": 4814 }, { "epoch": 0.52, "grad_norm": 0.08204004830906156, "learning_rate": 0.0004958214727317151, "loss": 1.3729, "step": 4815 }, { "epoch": 0.52, "grad_norm": 0.07748801457158774, "learning_rate": 0.0004956473717390242, "loss": 1.4368, "step": 4816 }, { "epoch": 0.52, "grad_norm": 0.07114876371367315, "learning_rate": 0.0004954732712741046, "loss": 1.3806, "step": 4817 }, { "epoch": 0.52, "grad_norm": 0.07188339735039269, "learning_rate": 0.0004952991713580667, "loss": 1.2483, "step": 4818 }, { "epoch": 0.52, "grad_norm": 0.07930257872447899, "learning_rate": 0.0004951250720120203, "loss": 1.3644, "step": 4819 }, { "epoch": 0.52, "grad_norm": 0.08348873172896026, "learning_rate": 0.0004949509732570756, "loss": 1.3957, "step": 4820 }, { "epoch": 0.52, "grad_norm": 0.08389458458452004, "learning_rate": 0.0004947768751143428, "loss": 1.4093, "step": 4821 }, { "epoch": 0.52, "grad_norm": 0.06612652000902616, "learning_rate": 0.000494602777604932, "loss": 1.3784, "step": 4822 }, { "epoch": 0.52, "grad_norm": 0.0741760050819813, "learning_rate": 0.0004944286807499529, "loss": 1.4181, "step": 4823 }, { "epoch": 0.52, "grad_norm": 0.07490795828156847, "learning_rate": 0.0004942545845705155, "loss": 1.3859, "step": 4824 }, { "epoch": 0.52, "grad_norm": 0.07099564390158039, "learning_rate": 0.0004940804890877297, "loss": 1.437, "step": 4825 }, { "epoch": 0.52, "grad_norm": 0.07139635992893732, "learning_rate": 0.0004939063943227048, "loss": 1.2719, "step": 4826 }, { "epoch": 0.52, "grad_norm": 0.07376969080532272, "learning_rate": 0.0004937323002965506, "loss": 1.4981, "step": 4827 }, { "epoch": 0.52, "grad_norm": 0.07045853122954072, "learning_rate": 0.0004935582070303767, "loss": 1.3566, "step": 4828 }, { "epoch": 0.52, "grad_norm": 0.0717218632271678, "learning_rate": 0.0004933841145452926, "loss": 1.2817, "step": 4829 }, { "epoch": 0.52, "grad_norm": 0.06912439103196728, "learning_rate": 0.0004932100228624072, "loss": 1.3627, "step": 4830 }, { "epoch": 0.52, "grad_norm": 0.06397379656010241, "learning_rate": 0.0004930359320028299, "loss": 1.3331, "step": 4831 }, { "epoch": 0.52, "grad_norm": 0.072065307732877, "learning_rate": 0.0004928618419876698, "loss": 1.4236, "step": 4832 }, { "epoch": 0.52, "grad_norm": 0.06265824186043788, "learning_rate": 0.0004926877528380362, "loss": 1.4034, "step": 4833 }, { "epoch": 0.52, "grad_norm": 0.07470555319601731, "learning_rate": 0.0004925136645750376, "loss": 1.3959, "step": 4834 }, { "epoch": 0.52, "grad_norm": 0.06716092261155211, "learning_rate": 0.0004923395772197829, "loss": 1.3977, "step": 4835 }, { "epoch": 0.52, "grad_norm": 0.07475858200018855, "learning_rate": 0.000492165490793381, "loss": 1.3418, "step": 4836 }, { "epoch": 0.52, "grad_norm": 0.07327276783550837, "learning_rate": 0.0004919914053169404, "loss": 1.5526, "step": 4837 }, { "epoch": 0.52, "grad_norm": 0.06446215898606858, "learning_rate": 0.0004918173208115694, "loss": 1.4026, "step": 4838 }, { "epoch": 0.52, "grad_norm": 0.06850668413178283, "learning_rate": 0.0004916432372983767, "loss": 1.4541, "step": 4839 }, { "epoch": 0.52, "grad_norm": 0.07883512917030402, "learning_rate": 0.0004914691547984701, "loss": 1.4152, "step": 4840 }, { "epoch": 0.52, "grad_norm": 0.06626183570546103, "learning_rate": 0.0004912950733329579, "loss": 1.5067, "step": 4841 }, { "epoch": 0.52, "grad_norm": 0.06982230220511904, "learning_rate": 0.000491120992922948, "loss": 1.4825, "step": 4842 }, { "epoch": 0.52, "grad_norm": 0.06640439609436137, "learning_rate": 0.0004909469135895484, "loss": 1.5727, "step": 4843 }, { "epoch": 0.52, "grad_norm": 0.07230302368111746, "learning_rate": 0.0004907728353538667, "loss": 1.3462, "step": 4844 }, { "epoch": 0.52, "grad_norm": 0.07980176876442475, "learning_rate": 0.0004905987582370106, "loss": 1.2809, "step": 4845 }, { "epoch": 0.52, "grad_norm": 0.07376211478806756, "learning_rate": 0.0004904246822600874, "loss": 1.3073, "step": 4846 }, { "epoch": 0.52, "grad_norm": 0.08760449272236642, "learning_rate": 0.0004902506074442044, "loss": 1.4225, "step": 4847 }, { "epoch": 0.52, "grad_norm": 0.06840610524758446, "learning_rate": 0.000490076533810469, "loss": 1.2817, "step": 4848 }, { "epoch": 0.52, "grad_norm": 0.07559520805663945, "learning_rate": 0.0004899024613799881, "loss": 1.5285, "step": 4849 }, { "epoch": 0.52, "grad_norm": 0.06808732086358632, "learning_rate": 0.0004897283901738686, "loss": 1.3411, "step": 4850 }, { "epoch": 0.52, "grad_norm": 0.06805750834540934, "learning_rate": 0.0004895543202132172, "loss": 1.431, "step": 4851 }, { "epoch": 0.52, "grad_norm": 0.08061022722716565, "learning_rate": 0.0004893802515191403, "loss": 1.4008, "step": 4852 }, { "epoch": 0.52, "grad_norm": 0.07093877908970345, "learning_rate": 0.0004892061841127446, "loss": 1.3412, "step": 4853 }, { "epoch": 0.52, "grad_norm": 0.07059607839069051, "learning_rate": 0.0004890321180151364, "loss": 1.3494, "step": 4854 }, { "epoch": 0.52, "grad_norm": 0.07822255808458586, "learning_rate": 0.0004888580532474216, "loss": 1.5409, "step": 4855 }, { "epoch": 0.52, "grad_norm": 0.07227079108820397, "learning_rate": 0.0004886839898307062, "loss": 1.473, "step": 4856 }, { "epoch": 0.52, "grad_norm": 0.07038880965726567, "learning_rate": 0.0004885099277860961, "loss": 1.3489, "step": 4857 }, { "epoch": 0.52, "grad_norm": 0.06991878687842776, "learning_rate": 0.0004883358671346968, "loss": 1.4336, "step": 4858 }, { "epoch": 0.52, "grad_norm": 0.06717727331715036, "learning_rate": 0.000488161807897614, "loss": 1.4411, "step": 4859 }, { "epoch": 0.52, "grad_norm": 0.0774671198594342, "learning_rate": 0.00048798775009595285, "loss": 1.4623, "step": 4860 }, { "epoch": 0.52, "grad_norm": 0.06980051958932858, "learning_rate": 0.0004878136937508183, "loss": 1.3775, "step": 4861 }, { "epoch": 0.52, "grad_norm": 0.06641487458265112, "learning_rate": 0.00048763963888331544, "loss": 1.4088, "step": 4862 }, { "epoch": 0.52, "grad_norm": 0.076824429835218, "learning_rate": 0.00048746558551454876, "loss": 1.3507, "step": 4863 }, { "epoch": 0.52, "grad_norm": 0.06712314946460401, "learning_rate": 0.00048729153366562324, "loss": 1.364, "step": 4864 }, { "epoch": 0.52, "grad_norm": 0.06919590987860398, "learning_rate": 0.00048711748335764305, "loss": 1.4172, "step": 4865 }, { "epoch": 0.52, "grad_norm": 0.07129027417864583, "learning_rate": 0.00048694343461171233, "loss": 1.5116, "step": 4866 }, { "epoch": 0.52, "grad_norm": 0.0732730577898096, "learning_rate": 0.0004867693874489353, "loss": 1.504, "step": 4867 }, { "epoch": 0.52, "grad_norm": 0.06935474063557696, "learning_rate": 0.0004865953418904156, "loss": 1.5817, "step": 4868 }, { "epoch": 0.52, "grad_norm": 0.06946055369077489, "learning_rate": 0.0004864212979572569, "loss": 1.5422, "step": 4869 }, { "epoch": 0.52, "grad_norm": 0.08287847765182309, "learning_rate": 0.0004862472556705626, "loss": 1.4387, "step": 4870 }, { "epoch": 0.52, "grad_norm": 0.06424473267777256, "learning_rate": 0.00048607321505143614, "loss": 1.3416, "step": 4871 }, { "epoch": 0.52, "grad_norm": 0.07389225955640676, "learning_rate": 0.0004858991761209803, "loss": 1.3793, "step": 4872 }, { "epoch": 0.52, "grad_norm": 0.07360082163492014, "learning_rate": 0.0004857251389002979, "loss": 1.344, "step": 4873 }, { "epoch": 0.52, "grad_norm": 0.07449052954350506, "learning_rate": 0.0004855511034104916, "loss": 1.3882, "step": 4874 }, { "epoch": 0.52, "grad_norm": 0.08073253698261196, "learning_rate": 0.0004853770696726638, "loss": 1.3821, "step": 4875 }, { "epoch": 0.52, "grad_norm": 0.07201609348875974, "learning_rate": 0.0004852030377079168, "loss": 1.3864, "step": 4876 }, { "epoch": 0.52, "grad_norm": 0.07729040284480578, "learning_rate": 0.00048502900753735246, "loss": 1.3482, "step": 4877 }, { "epoch": 0.52, "grad_norm": 0.08204100045657077, "learning_rate": 0.0004848549791820725, "loss": 1.3878, "step": 4878 }, { "epoch": 0.52, "grad_norm": 0.07915089902414485, "learning_rate": 0.0004846809526631786, "loss": 1.4156, "step": 4879 }, { "epoch": 0.52, "grad_norm": 0.06668874663977116, "learning_rate": 0.00048450692800177205, "loss": 1.4023, "step": 4880 }, { "epoch": 0.52, "grad_norm": 0.07506899782913949, "learning_rate": 0.00048433290521895375, "loss": 1.5963, "step": 4881 }, { "epoch": 0.52, "grad_norm": 0.08009982251249653, "learning_rate": 0.000484158884335825, "loss": 1.5212, "step": 4882 }, { "epoch": 0.52, "grad_norm": 0.07433691781861437, "learning_rate": 0.00048398486537348583, "loss": 1.4625, "step": 4883 }, { "epoch": 0.53, "grad_norm": 0.07517421414037016, "learning_rate": 0.0004838108483530371, "loss": 1.3501, "step": 4884 }, { "epoch": 0.53, "grad_norm": 0.07718074248971674, "learning_rate": 0.00048363683329557877, "loss": 1.449, "step": 4885 }, { "epoch": 0.53, "grad_norm": 0.06772550140183944, "learning_rate": 0.0004834628202222107, "loss": 1.4537, "step": 4886 }, { "epoch": 0.53, "grad_norm": 0.08030132699081302, "learning_rate": 0.00048328880915403285, "loss": 1.3553, "step": 4887 }, { "epoch": 0.53, "grad_norm": 0.08022333563213116, "learning_rate": 0.0004831148001121445, "loss": 1.3818, "step": 4888 }, { "epoch": 0.53, "grad_norm": 0.08425144353421923, "learning_rate": 0.0004829407931176447, "loss": 1.4234, "step": 4889 }, { "epoch": 0.53, "grad_norm": 0.07772699541497993, "learning_rate": 0.00048276678819163265, "loss": 1.275, "step": 4890 }, { "epoch": 0.53, "grad_norm": 0.07129243130758527, "learning_rate": 0.00048259278535520703, "loss": 1.3798, "step": 4891 }, { "epoch": 0.53, "grad_norm": 0.08053342693305993, "learning_rate": 0.0004824187846294662, "loss": 1.5514, "step": 4892 }, { "epoch": 0.53, "grad_norm": 0.08309886930824148, "learning_rate": 0.00048224478603550833, "loss": 1.538, "step": 4893 }, { "epoch": 0.53, "grad_norm": 0.06876084759596014, "learning_rate": 0.0004820707895944312, "loss": 1.4856, "step": 4894 }, { "epoch": 0.53, "grad_norm": 0.07354818557342893, "learning_rate": 0.00048189679532733274, "loss": 1.3469, "step": 4895 }, { "epoch": 0.53, "grad_norm": 0.07530483097981298, "learning_rate": 0.00048172280325531027, "loss": 1.3601, "step": 4896 }, { "epoch": 0.53, "grad_norm": 0.077507644997754, "learning_rate": 0.0004815488133994608, "loss": 1.4887, "step": 4897 }, { "epoch": 0.53, "grad_norm": 0.07757860383810837, "learning_rate": 0.0004813748257808814, "loss": 1.4837, "step": 4898 }, { "epoch": 0.53, "grad_norm": 0.0844479797681231, "learning_rate": 0.00048120084042066865, "loss": 1.4884, "step": 4899 }, { "epoch": 0.53, "grad_norm": 0.09641157890597221, "learning_rate": 0.0004810268573399187, "loss": 1.4827, "step": 4900 }, { "epoch": 0.53, "grad_norm": 0.07859998643895252, "learning_rate": 0.0004808528765597278, "loss": 1.4992, "step": 4901 }, { "epoch": 0.53, "grad_norm": 0.07276275280157116, "learning_rate": 0.00048067889810119157, "loss": 1.3915, "step": 4902 }, { "epoch": 0.53, "grad_norm": 0.08248731446994635, "learning_rate": 0.00048050492198540575, "loss": 1.5134, "step": 4903 }, { "epoch": 0.53, "grad_norm": 0.07469307310941618, "learning_rate": 0.00048033094823346517, "loss": 1.429, "step": 4904 }, { "epoch": 0.53, "grad_norm": 0.06984611564695324, "learning_rate": 0.00048015697686646486, "loss": 1.3318, "step": 4905 }, { "epoch": 0.53, "grad_norm": 0.07699548220008161, "learning_rate": 0.00047998300790549957, "loss": 1.4534, "step": 4906 }, { "epoch": 0.53, "grad_norm": 0.07689484583591935, "learning_rate": 0.0004798090413716636, "loss": 1.458, "step": 4907 }, { "epoch": 0.53, "grad_norm": 0.0694871817732402, "learning_rate": 0.00047963507728605105, "loss": 1.4913, "step": 4908 }, { "epoch": 0.53, "grad_norm": 0.07682175823997765, "learning_rate": 0.00047946111566975544, "loss": 1.4395, "step": 4909 }, { "epoch": 0.53, "grad_norm": 0.07852039121044253, "learning_rate": 0.00047928715654387043, "loss": 1.3262, "step": 4910 }, { "epoch": 0.53, "grad_norm": 0.07051721959880726, "learning_rate": 0.0004791131999294891, "loss": 1.5198, "step": 4911 }, { "epoch": 0.53, "grad_norm": 0.0830403180149037, "learning_rate": 0.00047893924584770423, "loss": 1.4268, "step": 4912 }, { "epoch": 0.53, "grad_norm": 0.07212490399033414, "learning_rate": 0.0004787652943196087, "loss": 1.3805, "step": 4913 }, { "epoch": 0.53, "grad_norm": 0.07701948410209183, "learning_rate": 0.0004785913453662941, "loss": 1.4523, "step": 4914 }, { "epoch": 0.53, "grad_norm": 0.08254302412735906, "learning_rate": 0.00047841739900885284, "loss": 1.3042, "step": 4915 }, { "epoch": 0.53, "grad_norm": 0.06816403715521224, "learning_rate": 0.0004782434552683763, "loss": 1.4307, "step": 4916 }, { "epoch": 0.53, "grad_norm": 0.07860013899326017, "learning_rate": 0.0004780695141659557, "loss": 1.6049, "step": 4917 }, { "epoch": 0.53, "grad_norm": 0.06599668818079388, "learning_rate": 0.0004778955757226823, "loss": 1.4359, "step": 4918 }, { "epoch": 0.53, "grad_norm": 0.0784233713723611, "learning_rate": 0.0004777216399596465, "loss": 1.2344, "step": 4919 }, { "epoch": 0.53, "grad_norm": 0.0788183292534905, "learning_rate": 0.0004775477068979385, "loss": 1.416, "step": 4920 }, { "epoch": 0.53, "grad_norm": 0.07366391835272944, "learning_rate": 0.00047737377655864867, "loss": 1.3873, "step": 4921 }, { "epoch": 0.53, "grad_norm": 0.0644438910372059, "learning_rate": 0.00047719984896286635, "loss": 1.2997, "step": 4922 }, { "epoch": 0.53, "grad_norm": 0.0819510510639513, "learning_rate": 0.0004770259241316809, "loss": 1.3432, "step": 4923 }, { "epoch": 0.53, "grad_norm": 0.07460646311584986, "learning_rate": 0.00047685200208618164, "loss": 1.4545, "step": 4924 }, { "epoch": 0.53, "grad_norm": 0.07142513170538935, "learning_rate": 0.00047667808284745656, "loss": 1.456, "step": 4925 }, { "epoch": 0.53, "grad_norm": 0.0638211680555277, "learning_rate": 0.0004765041664365945, "loss": 1.384, "step": 4926 }, { "epoch": 0.53, "grad_norm": 0.06456251777135609, "learning_rate": 0.00047633025287468323, "loss": 1.3933, "step": 4927 }, { "epoch": 0.53, "grad_norm": 0.07219441230096289, "learning_rate": 0.00047615634218281034, "loss": 1.4249, "step": 4928 }, { "epoch": 0.53, "grad_norm": 0.06770038061326396, "learning_rate": 0.0004759824343820632, "loss": 1.3451, "step": 4929 }, { "epoch": 0.53, "grad_norm": 0.06401901008264176, "learning_rate": 0.00047580852949352876, "loss": 1.3593, "step": 4930 }, { "epoch": 0.53, "grad_norm": 0.06572917995129061, "learning_rate": 0.0004756346275382934, "loss": 1.4683, "step": 4931 }, { "epoch": 0.53, "grad_norm": 0.06690009643906077, "learning_rate": 0.00047546072853744357, "loss": 1.4288, "step": 4932 }, { "epoch": 0.53, "grad_norm": 0.06554492012605238, "learning_rate": 0.00047528683251206493, "loss": 1.5643, "step": 4933 }, { "epoch": 0.53, "grad_norm": 0.0632941403255531, "learning_rate": 0.00047511293948324324, "loss": 1.2482, "step": 4934 }, { "epoch": 0.53, "grad_norm": 0.06731580775226245, "learning_rate": 0.0004749390494720633, "loss": 1.4638, "step": 4935 }, { "epoch": 0.53, "grad_norm": 0.07153188163916746, "learning_rate": 0.00047476516249960994, "loss": 1.4326, "step": 4936 }, { "epoch": 0.53, "grad_norm": 0.05895315648337665, "learning_rate": 0.00047459127858696763, "loss": 1.519, "step": 4937 }, { "epoch": 0.53, "grad_norm": 0.07150295093078468, "learning_rate": 0.00047441739775522045, "loss": 1.3454, "step": 4938 }, { "epoch": 0.53, "grad_norm": 0.06787792092572041, "learning_rate": 0.000474243520025452, "loss": 1.3111, "step": 4939 }, { "epoch": 0.53, "grad_norm": 0.06984567395943, "learning_rate": 0.00047406964541874544, "loss": 1.4355, "step": 4940 }, { "epoch": 0.53, "grad_norm": 0.07572599576818485, "learning_rate": 0.00047389577395618387, "loss": 1.5107, "step": 4941 }, { "epoch": 0.53, "grad_norm": 0.08604265411994148, "learning_rate": 0.0004737219056588497, "loss": 1.4844, "step": 4942 }, { "epoch": 0.53, "grad_norm": 0.06685170487349368, "learning_rate": 0.00047354804054782493, "loss": 1.3661, "step": 4943 }, { "epoch": 0.53, "grad_norm": 0.07421917272121109, "learning_rate": 0.0004733741786441916, "loss": 1.3045, "step": 4944 }, { "epoch": 0.53, "grad_norm": 0.08034262065898659, "learning_rate": 0.00047320031996903094, "loss": 1.3913, "step": 4945 }, { "epoch": 0.53, "grad_norm": 0.07111550377539654, "learning_rate": 0.0004730264645434238, "loss": 1.4248, "step": 4946 }, { "epoch": 0.53, "grad_norm": 0.07320459036647926, "learning_rate": 0.0004728526123884508, "loss": 1.378, "step": 4947 }, { "epoch": 0.53, "grad_norm": 0.08675173001306088, "learning_rate": 0.0004726787635251921, "loss": 1.3621, "step": 4948 }, { "epoch": 0.53, "grad_norm": 0.07183641238182674, "learning_rate": 0.00047250491797472754, "loss": 1.438, "step": 4949 }, { "epoch": 0.53, "grad_norm": 0.0757586168872166, "learning_rate": 0.00047233107575813657, "loss": 1.5881, "step": 4950 }, { "epoch": 0.53, "grad_norm": 0.07513930507667875, "learning_rate": 0.0004721572368964979, "loss": 1.5212, "step": 4951 }, { "epoch": 0.53, "grad_norm": 0.08132533337284426, "learning_rate": 0.0004719834014108903, "loss": 1.4006, "step": 4952 }, { "epoch": 0.53, "grad_norm": 0.07089111386192162, "learning_rate": 0.00047180956932239186, "loss": 1.3653, "step": 4953 }, { "epoch": 0.53, "grad_norm": 0.07935963141630312, "learning_rate": 0.00047163574065208034, "loss": 1.4387, "step": 4954 }, { "epoch": 0.53, "grad_norm": 0.07808586869134204, "learning_rate": 0.000471461915421033, "loss": 1.4203, "step": 4955 }, { "epoch": 0.53, "grad_norm": 0.07325513089570637, "learning_rate": 0.00047128809365032707, "loss": 1.4985, "step": 4956 }, { "epoch": 0.53, "grad_norm": 0.08026244304016605, "learning_rate": 0.0004711142753610385, "loss": 1.3556, "step": 4957 }, { "epoch": 0.53, "grad_norm": 0.07566667413783172, "learning_rate": 0.0004709404605742437, "loss": 1.3653, "step": 4958 }, { "epoch": 0.53, "grad_norm": 0.07429766224344789, "learning_rate": 0.0004707666493110182, "loss": 1.509, "step": 4959 }, { "epoch": 0.53, "grad_norm": 0.0794655393764005, "learning_rate": 0.00047059284159243727, "loss": 1.4117, "step": 4960 }, { "epoch": 0.53, "grad_norm": 0.07529539563266788, "learning_rate": 0.0004704190374395757, "loss": 1.3844, "step": 4961 }, { "epoch": 0.53, "grad_norm": 0.07016820804619064, "learning_rate": 0.00047024523687350773, "loss": 1.4303, "step": 4962 }, { "epoch": 0.53, "grad_norm": 0.07816173017864488, "learning_rate": 0.0004700714399153075, "loss": 1.3392, "step": 4963 }, { "epoch": 0.53, "grad_norm": 0.06755985966842121, "learning_rate": 0.0004698976465860483, "loss": 1.4734, "step": 4964 }, { "epoch": 0.53, "grad_norm": 0.07271241598715444, "learning_rate": 0.0004697238569068033, "loss": 1.3556, "step": 4965 }, { "epoch": 0.53, "grad_norm": 0.06993775956506902, "learning_rate": 0.0004695500708986451, "loss": 1.5511, "step": 4966 }, { "epoch": 0.53, "grad_norm": 0.07252382373357674, "learning_rate": 0.00046937628858264555, "loss": 1.4274, "step": 4967 }, { "epoch": 0.53, "grad_norm": 0.08483848022811791, "learning_rate": 0.0004692025099798767, "loss": 1.4444, "step": 4968 }, { "epoch": 0.53, "grad_norm": 0.0715220291276646, "learning_rate": 0.0004690287351114097, "loss": 1.3493, "step": 4969 }, { "epoch": 0.53, "grad_norm": 0.07550992035600171, "learning_rate": 0.00046885496399831536, "loss": 1.3109, "step": 4970 }, { "epoch": 0.53, "grad_norm": 0.06715757190667862, "learning_rate": 0.0004686811966616639, "loss": 1.4589, "step": 4971 }, { "epoch": 0.53, "grad_norm": 0.08206633402247311, "learning_rate": 0.00046850743312252537, "loss": 1.332, "step": 4972 }, { "epoch": 0.53, "grad_norm": 0.0812947283676224, "learning_rate": 0.00046833367340196915, "loss": 1.3324, "step": 4973 }, { "epoch": 0.53, "grad_norm": 0.08377563101034731, "learning_rate": 0.0004681599175210641, "loss": 1.4677, "step": 4974 }, { "epoch": 0.53, "grad_norm": 0.07932030491381303, "learning_rate": 0.000467986165500879, "loss": 1.3165, "step": 4975 }, { "epoch": 0.53, "grad_norm": 0.07764813651091716, "learning_rate": 0.0004678124173624816, "loss": 1.4508, "step": 4976 }, { "epoch": 0.54, "grad_norm": 0.06502997929922044, "learning_rate": 0.00046763867312693975, "loss": 1.4183, "step": 4977 }, { "epoch": 0.54, "grad_norm": 0.08892043190200546, "learning_rate": 0.0004674649328153202, "loss": 1.3739, "step": 4978 }, { "epoch": 0.54, "grad_norm": 0.07284899072991152, "learning_rate": 0.0004672911964486896, "loss": 1.2622, "step": 4979 }, { "epoch": 0.54, "grad_norm": 0.07680367290052509, "learning_rate": 0.00046711746404811435, "loss": 1.5053, "step": 4980 }, { "epoch": 0.54, "grad_norm": 0.07623392032659518, "learning_rate": 0.0004669437356346599, "loss": 1.3632, "step": 4981 }, { "epoch": 0.54, "grad_norm": 0.07287035493877488, "learning_rate": 0.0004667700112293913, "loss": 1.4439, "step": 4982 }, { "epoch": 0.54, "grad_norm": 0.07694402909619838, "learning_rate": 0.0004665962908533736, "loss": 1.2747, "step": 4983 }, { "epoch": 0.54, "grad_norm": 0.06519847555927114, "learning_rate": 0.00046642257452767085, "loss": 1.3187, "step": 4984 }, { "epoch": 0.54, "grad_norm": 0.0753700653266978, "learning_rate": 0.00046624886227334653, "loss": 1.4432, "step": 4985 }, { "epoch": 0.54, "grad_norm": 0.06877063017007487, "learning_rate": 0.0004660751541114641, "loss": 1.4588, "step": 4986 }, { "epoch": 0.54, "grad_norm": 0.06914165470466452, "learning_rate": 0.00046590145006308626, "loss": 1.3748, "step": 4987 }, { "epoch": 0.54, "grad_norm": 0.07154339906462596, "learning_rate": 0.0004657277501492751, "loss": 1.3536, "step": 4988 }, { "epoch": 0.54, "grad_norm": 0.061994530518439867, "learning_rate": 0.0004655540543910924, "loss": 1.3233, "step": 4989 }, { "epoch": 0.54, "grad_norm": 0.07749942945527018, "learning_rate": 0.0004653803628095992, "loss": 1.4189, "step": 4990 }, { "epoch": 0.54, "grad_norm": 0.06924609370873705, "learning_rate": 0.00046520667542585654, "loss": 1.363, "step": 4991 }, { "epoch": 0.54, "grad_norm": 0.0765738814212289, "learning_rate": 0.0004650329922609244, "loss": 1.3572, "step": 4992 }, { "epoch": 0.54, "grad_norm": 0.07121048885445819, "learning_rate": 0.0004648593133358624, "loss": 1.4481, "step": 4993 }, { "epoch": 0.54, "grad_norm": 0.06260534252699797, "learning_rate": 0.0004646856386717299, "loss": 1.4498, "step": 4994 }, { "epoch": 0.54, "grad_norm": 0.06744698219437496, "learning_rate": 0.0004645119682895855, "loss": 1.4486, "step": 4995 }, { "epoch": 0.54, "grad_norm": 0.07100221736713873, "learning_rate": 0.0004643383022104871, "loss": 1.4091, "step": 4996 }, { "epoch": 0.54, "grad_norm": 0.06257697855926746, "learning_rate": 0.00046416464045549266, "loss": 1.3761, "step": 4997 }, { "epoch": 0.54, "grad_norm": 0.0644591791026162, "learning_rate": 0.0004639909830456592, "loss": 1.3716, "step": 4998 }, { "epoch": 0.54, "grad_norm": 0.06888699844748095, "learning_rate": 0.0004638173300020431, "loss": 1.4956, "step": 4999 }, { "epoch": 0.54, "grad_norm": 0.06980960282827073, "learning_rate": 0.0004636436813457005, "loss": 1.3363, "step": 5000 }, { "epoch": 0.54, "grad_norm": 0.06117394784086422, "learning_rate": 0.0004634700370976867, "loss": 1.5449, "step": 5001 }, { "epoch": 0.54, "grad_norm": 0.07078852629655827, "learning_rate": 0.00046329639727905696, "loss": 1.3434, "step": 5002 }, { "epoch": 0.54, "grad_norm": 0.06698154268968336, "learning_rate": 0.00046312276191086567, "loss": 1.4607, "step": 5003 }, { "epoch": 0.54, "grad_norm": 0.06296613954642143, "learning_rate": 0.0004629491310141665, "loss": 1.391, "step": 5004 }, { "epoch": 0.54, "grad_norm": 0.07509008854360427, "learning_rate": 0.00046277550461001297, "loss": 1.4132, "step": 5005 }, { "epoch": 0.54, "grad_norm": 0.06856227972641615, "learning_rate": 0.00046260188271945784, "loss": 1.3535, "step": 5006 }, { "epoch": 0.54, "grad_norm": 0.07113060912731874, "learning_rate": 0.0004624282653635534, "loss": 1.4244, "step": 5007 }, { "epoch": 0.54, "grad_norm": 0.08050206389441429, "learning_rate": 0.00046225465256335117, "loss": 1.4556, "step": 5008 }, { "epoch": 0.54, "grad_norm": 0.06897136804433396, "learning_rate": 0.0004620810443399028, "loss": 1.3154, "step": 5009 }, { "epoch": 0.54, "grad_norm": 0.0757574536258104, "learning_rate": 0.0004619074407142582, "loss": 1.5634, "step": 5010 }, { "epoch": 0.54, "grad_norm": 0.07923391536996084, "learning_rate": 0.0004617338417074679, "loss": 1.4741, "step": 5011 }, { "epoch": 0.54, "grad_norm": 0.08502196581005937, "learning_rate": 0.00046156024734058114, "loss": 1.4186, "step": 5012 }, { "epoch": 0.54, "grad_norm": 0.07460264161817834, "learning_rate": 0.0004613866576346468, "loss": 1.3466, "step": 5013 }, { "epoch": 0.54, "grad_norm": 0.07965479727859294, "learning_rate": 0.0004612130726107135, "loss": 1.5325, "step": 5014 }, { "epoch": 0.54, "grad_norm": 0.07123399184880777, "learning_rate": 0.0004610394922898289, "loss": 1.36, "step": 5015 }, { "epoch": 0.54, "grad_norm": 0.07470424341472832, "learning_rate": 0.00046086591669303997, "loss": 1.2811, "step": 5016 }, { "epoch": 0.54, "grad_norm": 0.06729237043022075, "learning_rate": 0.0004606923458413937, "loss": 1.5231, "step": 5017 }, { "epoch": 0.54, "grad_norm": 0.07790469758464379, "learning_rate": 0.000460518779755936, "loss": 1.317, "step": 5018 }, { "epoch": 0.54, "grad_norm": 0.0712544564620146, "learning_rate": 0.00046034521845771256, "loss": 1.4068, "step": 5019 }, { "epoch": 0.54, "grad_norm": 0.08929737083626135, "learning_rate": 0.00046017166196776787, "loss": 1.3364, "step": 5020 }, { "epoch": 0.54, "grad_norm": 0.06544812497801969, "learning_rate": 0.00045999811030714643, "loss": 1.4243, "step": 5021 }, { "epoch": 0.54, "grad_norm": 0.060540401781816446, "learning_rate": 0.0004598245634968921, "loss": 1.3251, "step": 5022 }, { "epoch": 0.54, "grad_norm": 0.0753615240710179, "learning_rate": 0.000459651021558048, "loss": 1.3198, "step": 5023 }, { "epoch": 0.54, "grad_norm": 0.08191977033070631, "learning_rate": 0.0004594774845116565, "loss": 1.3843, "step": 5024 }, { "epoch": 0.54, "grad_norm": 0.07716411871829355, "learning_rate": 0.00045930395237875983, "loss": 1.2769, "step": 5025 }, { "epoch": 0.54, "grad_norm": 0.07458412119990497, "learning_rate": 0.0004591304251803992, "loss": 1.6653, "step": 5026 }, { "epoch": 0.54, "grad_norm": 0.06024873879839457, "learning_rate": 0.0004589569029376153, "loss": 1.2628, "step": 5027 }, { "epoch": 0.54, "grad_norm": 0.07220559994143401, "learning_rate": 0.00045878338567144854, "loss": 1.4473, "step": 5028 }, { "epoch": 0.54, "grad_norm": 0.08935952893918574, "learning_rate": 0.0004586098734029384, "loss": 1.4616, "step": 5029 }, { "epoch": 0.54, "grad_norm": 0.07031227068731213, "learning_rate": 0.0004584363661531239, "loss": 1.3846, "step": 5030 }, { "epoch": 0.54, "grad_norm": 0.07215927891814075, "learning_rate": 0.00045826286394304316, "loss": 1.4024, "step": 5031 }, { "epoch": 0.54, "grad_norm": 0.06502965527596266, "learning_rate": 0.00045808936679373396, "loss": 1.4927, "step": 5032 }, { "epoch": 0.54, "grad_norm": 0.07239411343302822, "learning_rate": 0.00045791587472623365, "loss": 1.3573, "step": 5033 }, { "epoch": 0.54, "grad_norm": 0.06741578912286643, "learning_rate": 0.0004577423877615786, "loss": 1.4525, "step": 5034 }, { "epoch": 0.54, "grad_norm": 0.08016804263999956, "learning_rate": 0.00045756890592080473, "loss": 1.6092, "step": 5035 }, { "epoch": 0.54, "grad_norm": 0.08362373973885047, "learning_rate": 0.0004573954292249471, "loss": 1.3283, "step": 5036 }, { "epoch": 0.54, "grad_norm": 0.07866004625026597, "learning_rate": 0.00045722195769504084, "loss": 1.4516, "step": 5037 }, { "epoch": 0.54, "grad_norm": 0.08537438521785196, "learning_rate": 0.0004570484913521196, "loss": 1.4975, "step": 5038 }, { "epoch": 0.54, "grad_norm": 0.07595030335723768, "learning_rate": 0.00045687503021721673, "loss": 1.4266, "step": 5039 }, { "epoch": 0.54, "grad_norm": 0.07736407471090755, "learning_rate": 0.00045670157431136545, "loss": 1.4982, "step": 5040 }, { "epoch": 0.54, "grad_norm": 0.0772172433915742, "learning_rate": 0.00045652812365559725, "loss": 1.4539, "step": 5041 }, { "epoch": 0.54, "grad_norm": 0.06913636508981376, "learning_rate": 0.00045635467827094404, "loss": 1.3728, "step": 5042 }, { "epoch": 0.54, "grad_norm": 0.07308174202558122, "learning_rate": 0.00045618123817843656, "loss": 1.2952, "step": 5043 }, { "epoch": 0.54, "grad_norm": 0.07119469079079288, "learning_rate": 0.00045600780339910487, "loss": 1.4836, "step": 5044 }, { "epoch": 0.54, "grad_norm": 0.07229097508085704, "learning_rate": 0.0004558343739539788, "loss": 1.3501, "step": 5045 }, { "epoch": 0.54, "grad_norm": 0.07533651801310617, "learning_rate": 0.00045566094986408716, "loss": 1.506, "step": 5046 }, { "epoch": 0.54, "grad_norm": 0.07069151684931094, "learning_rate": 0.0004554875311504581, "loss": 1.421, "step": 5047 }, { "epoch": 0.54, "grad_norm": 0.07104602711266829, "learning_rate": 0.0004553141178341195, "loss": 1.302, "step": 5048 }, { "epoch": 0.54, "grad_norm": 0.0678684179203935, "learning_rate": 0.00045514070993609806, "loss": 1.2245, "step": 5049 }, { "epoch": 0.54, "grad_norm": 0.06362739487382921, "learning_rate": 0.00045496730747742023, "loss": 1.4206, "step": 5050 }, { "epoch": 0.54, "grad_norm": 0.07856895251337907, "learning_rate": 0.00045479391047911186, "loss": 1.4577, "step": 5051 }, { "epoch": 0.54, "grad_norm": 0.07390180709879325, "learning_rate": 0.00045462051896219736, "loss": 1.2967, "step": 5052 }, { "epoch": 0.54, "grad_norm": 0.06240181363019398, "learning_rate": 0.0004544471329477015, "loss": 1.3616, "step": 5053 }, { "epoch": 0.54, "grad_norm": 0.0769563634912301, "learning_rate": 0.0004542737524566478, "loss": 1.3491, "step": 5054 }, { "epoch": 0.54, "grad_norm": 0.0772523406670056, "learning_rate": 0.00045410037751005916, "loss": 1.5153, "step": 5055 }, { "epoch": 0.54, "grad_norm": 0.08634109712464239, "learning_rate": 0.0004539270081289581, "loss": 1.4924, "step": 5056 }, { "epoch": 0.54, "grad_norm": 0.07431762471246574, "learning_rate": 0.0004537536443343662, "loss": 1.3931, "step": 5057 }, { "epoch": 0.54, "grad_norm": 0.0733649512462882, "learning_rate": 0.0004535802861473042, "loss": 1.3972, "step": 5058 }, { "epoch": 0.54, "grad_norm": 0.07901391203531107, "learning_rate": 0.0004534069335887926, "loss": 1.4817, "step": 5059 }, { "epoch": 0.54, "grad_norm": 0.07381995246703327, "learning_rate": 0.000453233586679851, "loss": 1.3819, "step": 5060 }, { "epoch": 0.54, "grad_norm": 0.07247418114870308, "learning_rate": 0.0004530602454414982, "loss": 1.3994, "step": 5061 }, { "epoch": 0.54, "grad_norm": 0.08511342964700407, "learning_rate": 0.00045288690989475264, "loss": 1.5179, "step": 5062 }, { "epoch": 0.54, "grad_norm": 0.07305640500166688, "learning_rate": 0.0004527135800606314, "loss": 1.352, "step": 5063 }, { "epoch": 0.54, "grad_norm": 0.07150911228214982, "learning_rate": 0.0004525402559601517, "loss": 1.4765, "step": 5064 }, { "epoch": 0.54, "grad_norm": 0.06880615891115242, "learning_rate": 0.0004523669376143296, "loss": 1.383, "step": 5065 }, { "epoch": 0.54, "grad_norm": 0.07549278756591758, "learning_rate": 0.00045219362504418057, "loss": 1.4114, "step": 5066 }, { "epoch": 0.54, "grad_norm": 0.08469046092663751, "learning_rate": 0.00045202031827071916, "loss": 1.458, "step": 5067 }, { "epoch": 0.54, "grad_norm": 0.06922994658984552, "learning_rate": 0.0004518470173149597, "loss": 1.4044, "step": 5068 }, { "epoch": 0.54, "grad_norm": 0.07163348102127615, "learning_rate": 0.00045167372219791544, "loss": 1.3636, "step": 5069 }, { "epoch": 0.55, "grad_norm": 0.07201028025488586, "learning_rate": 0.00045150043294059876, "loss": 1.4034, "step": 5070 }, { "epoch": 0.55, "grad_norm": 0.07383927628148199, "learning_rate": 0.000451327149564022, "loss": 1.4078, "step": 5071 }, { "epoch": 0.55, "grad_norm": 0.07201378668042176, "learning_rate": 0.00045115387208919625, "loss": 1.4128, "step": 5072 }, { "epoch": 0.55, "grad_norm": 0.07129619592093052, "learning_rate": 0.0004509806005371317, "loss": 1.458, "step": 5073 }, { "epoch": 0.55, "grad_norm": 0.07247629019072475, "learning_rate": 0.0004508073349288384, "loss": 1.3639, "step": 5074 }, { "epoch": 0.55, "grad_norm": 0.1810663332686453, "learning_rate": 0.0004506340752853252, "loss": 1.4445, "step": 5075 }, { "epoch": 0.55, "grad_norm": 0.07918478567312066, "learning_rate": 0.0004504608216276007, "loss": 1.3372, "step": 5076 }, { "epoch": 0.55, "grad_norm": 0.07803524195785769, "learning_rate": 0.0004502875739766724, "loss": 1.4658, "step": 5077 }, { "epoch": 0.55, "grad_norm": 0.07553645875301666, "learning_rate": 0.00045011433235354697, "loss": 1.372, "step": 5078 }, { "epoch": 0.55, "grad_norm": 0.06978572187693603, "learning_rate": 0.0004499410967792308, "loss": 1.4196, "step": 5079 }, { "epoch": 0.55, "grad_norm": 0.06894296278745332, "learning_rate": 0.00044976786727472936, "loss": 1.3758, "step": 5080 }, { "epoch": 0.55, "grad_norm": 0.07302702532691431, "learning_rate": 0.000449594643861047, "loss": 1.3609, "step": 5081 }, { "epoch": 0.55, "grad_norm": 0.0809007009229191, "learning_rate": 0.00044942142655918796, "loss": 1.3801, "step": 5082 }, { "epoch": 0.55, "grad_norm": 0.07892839586023646, "learning_rate": 0.0004492482153901554, "loss": 1.4028, "step": 5083 }, { "epoch": 0.55, "grad_norm": 0.06926018562785509, "learning_rate": 0.00044907501037495155, "loss": 1.4091, "step": 5084 }, { "epoch": 0.55, "grad_norm": 0.07193791827021002, "learning_rate": 0.0004489018115345784, "loss": 1.2789, "step": 5085 }, { "epoch": 0.55, "grad_norm": 0.07040347068886654, "learning_rate": 0.0004487286188900365, "loss": 1.3098, "step": 5086 }, { "epoch": 0.55, "grad_norm": 0.07605857094439195, "learning_rate": 0.00044855543246232653, "loss": 1.3199, "step": 5087 }, { "epoch": 0.55, "grad_norm": 0.07217955296294648, "learning_rate": 0.00044838225227244766, "loss": 1.4811, "step": 5088 }, { "epoch": 0.55, "grad_norm": 0.07890617853340841, "learning_rate": 0.0004482090783413986, "loss": 1.3277, "step": 5089 }, { "epoch": 0.55, "grad_norm": 0.0720778390267614, "learning_rate": 0.00044803591069017746, "loss": 1.5122, "step": 5090 }, { "epoch": 0.55, "grad_norm": 0.07516660854615728, "learning_rate": 0.0004478627493397813, "loss": 1.3591, "step": 5091 }, { "epoch": 0.55, "grad_norm": 0.07776960659986273, "learning_rate": 0.0004476895943112064, "loss": 1.504, "step": 5092 }, { "epoch": 0.55, "grad_norm": 0.06708427653195059, "learning_rate": 0.0004475164456254488, "loss": 1.3771, "step": 5093 }, { "epoch": 0.55, "grad_norm": 0.0806630681787127, "learning_rate": 0.0004473433033035028, "loss": 1.4429, "step": 5094 }, { "epoch": 0.55, "grad_norm": 0.06961576483572854, "learning_rate": 0.00044717016736636295, "loss": 1.3978, "step": 5095 }, { "epoch": 0.55, "grad_norm": 0.07131127279311827, "learning_rate": 0.0004469970378350224, "loss": 1.3899, "step": 5096 }, { "epoch": 0.55, "grad_norm": 0.06955600958029026, "learning_rate": 0.00044682391473047366, "loss": 1.4512, "step": 5097 }, { "epoch": 0.55, "grad_norm": 0.06367222085226946, "learning_rate": 0.0004466507980737087, "loss": 1.3487, "step": 5098 }, { "epoch": 0.55, "grad_norm": 0.08530236840783734, "learning_rate": 0.0004464776878857184, "loss": 1.3804, "step": 5099 }, { "epoch": 0.55, "grad_norm": 0.07515745169686079, "learning_rate": 0.000446304584187493, "loss": 1.409, "step": 5100 }, { "epoch": 0.55, "grad_norm": 0.059871998178720805, "learning_rate": 0.0004461314870000217, "loss": 1.3639, "step": 5101 }, { "epoch": 0.55, "grad_norm": 0.06668678886268802, "learning_rate": 0.0004459583963442935, "loss": 1.4565, "step": 5102 }, { "epoch": 0.55, "grad_norm": 0.06941757215145417, "learning_rate": 0.000445785312241296, "loss": 1.4375, "step": 5103 }, { "epoch": 0.55, "grad_norm": 0.0787820736556593, "learning_rate": 0.0004456122347120164, "loss": 1.4982, "step": 5104 }, { "epoch": 0.55, "grad_norm": 0.06498468989445924, "learning_rate": 0.0004454391637774408, "loss": 1.4231, "step": 5105 }, { "epoch": 0.55, "grad_norm": 0.07333559120436196, "learning_rate": 0.0004452660994585545, "loss": 1.4282, "step": 5106 }, { "epoch": 0.55, "grad_norm": 0.06590759319312026, "learning_rate": 0.00044509304177634245, "loss": 1.4366, "step": 5107 }, { "epoch": 0.55, "grad_norm": 0.07684068243714061, "learning_rate": 0.00044491999075178844, "loss": 1.4612, "step": 5108 }, { "epoch": 0.55, "grad_norm": 0.06923333908354005, "learning_rate": 0.0004447469464058753, "loss": 1.2806, "step": 5109 }, { "epoch": 0.55, "grad_norm": 0.07182215488774789, "learning_rate": 0.00044457390875958546, "loss": 1.6128, "step": 5110 }, { "epoch": 0.55, "grad_norm": 0.06531612126963693, "learning_rate": 0.0004444008778339003, "loss": 1.5235, "step": 5111 }, { "epoch": 0.55, "grad_norm": 0.06562690937863525, "learning_rate": 0.0004442278536498003, "loss": 1.4009, "step": 5112 }, { "epoch": 0.55, "grad_norm": 0.06630177290727778, "learning_rate": 0.00044405483622826544, "loss": 1.3705, "step": 5113 }, { "epoch": 0.55, "grad_norm": 0.07884292129674725, "learning_rate": 0.0004438818255902746, "loss": 1.4984, "step": 5114 }, { "epoch": 0.55, "grad_norm": 0.07161075616132401, "learning_rate": 0.00044370882175680585, "loss": 1.4684, "step": 5115 }, { "epoch": 0.55, "grad_norm": 0.07517122372057895, "learning_rate": 0.00044353582474883645, "loss": 1.3819, "step": 5116 }, { "epoch": 0.55, "grad_norm": 0.07602287354464461, "learning_rate": 0.0004433628345873429, "loss": 1.3158, "step": 5117 }, { "epoch": 0.55, "grad_norm": 0.0617182529028056, "learning_rate": 0.000443189851293301, "loss": 1.4243, "step": 5118 }, { "epoch": 0.55, "grad_norm": 0.07441545210439454, "learning_rate": 0.0004430168748876855, "loss": 1.3834, "step": 5119 }, { "epoch": 0.55, "grad_norm": 0.06804643338631677, "learning_rate": 0.00044284390539147024, "loss": 1.4772, "step": 5120 }, { "epoch": 0.55, "grad_norm": 0.06762244884350038, "learning_rate": 0.00044267094282562865, "loss": 1.3884, "step": 5121 }, { "epoch": 0.55, "grad_norm": 0.07151377791730641, "learning_rate": 0.00044249798721113286, "loss": 1.3676, "step": 5122 }, { "epoch": 0.55, "grad_norm": 0.06859534272623603, "learning_rate": 0.0004423250385689542, "loss": 1.4373, "step": 5123 }, { "epoch": 0.55, "grad_norm": 0.0777754907921359, "learning_rate": 0.0004421520969200636, "loss": 1.41, "step": 5124 }, { "epoch": 0.55, "grad_norm": 0.06955694265720186, "learning_rate": 0.0004419791622854308, "loss": 1.5175, "step": 5125 }, { "epoch": 0.55, "grad_norm": 0.06921231387512972, "learning_rate": 0.00044180623468602457, "loss": 1.3898, "step": 5126 }, { "epoch": 0.55, "grad_norm": 0.0718189778864757, "learning_rate": 0.00044163331414281287, "loss": 1.3719, "step": 5127 }, { "epoch": 0.55, "grad_norm": 0.07074598625511523, "learning_rate": 0.000441460400676763, "loss": 1.5005, "step": 5128 }, { "epoch": 0.55, "grad_norm": 0.06483544761867031, "learning_rate": 0.00044128749430884153, "loss": 1.5393, "step": 5129 }, { "epoch": 0.55, "grad_norm": 0.06688587191843826, "learning_rate": 0.00044111459506001373, "loss": 1.3699, "step": 5130 }, { "epoch": 0.55, "grad_norm": 0.07058261521922415, "learning_rate": 0.00044094170295124423, "loss": 1.3599, "step": 5131 }, { "epoch": 0.55, "grad_norm": 0.08377792244328604, "learning_rate": 0.0004407688180034968, "loss": 1.5726, "step": 5132 }, { "epoch": 0.55, "grad_norm": 0.08170757981847657, "learning_rate": 0.0004405959402377345, "loss": 1.4028, "step": 5133 }, { "epoch": 0.55, "grad_norm": 0.07094933529123704, "learning_rate": 0.00044042306967491927, "loss": 1.4782, "step": 5134 }, { "epoch": 0.55, "grad_norm": 0.07833739407165695, "learning_rate": 0.0004402502063360121, "loss": 1.439, "step": 5135 }, { "epoch": 0.55, "grad_norm": 0.07612811567792596, "learning_rate": 0.00044007735024197375, "loss": 1.4726, "step": 5136 }, { "epoch": 0.55, "grad_norm": 0.07233376447979718, "learning_rate": 0.0004399045014137629, "loss": 1.484, "step": 5137 }, { "epoch": 0.55, "grad_norm": 0.07426330129609558, "learning_rate": 0.00043973165987233853, "loss": 1.4103, "step": 5138 }, { "epoch": 0.55, "grad_norm": 0.0703597646707655, "learning_rate": 0.00043955882563865824, "loss": 1.406, "step": 5139 }, { "epoch": 0.55, "grad_norm": 0.0747228033894531, "learning_rate": 0.0004393859987336786, "loss": 1.3999, "step": 5140 }, { "epoch": 0.55, "grad_norm": 0.06645446667750779, "learning_rate": 0.0004392131791783556, "loss": 1.4284, "step": 5141 }, { "epoch": 0.55, "grad_norm": 0.06742755209718568, "learning_rate": 0.0004390403669936443, "loss": 1.5311, "step": 5142 }, { "epoch": 0.55, "grad_norm": 0.06452897694766252, "learning_rate": 0.0004388675622004985, "loss": 1.5793, "step": 5143 }, { "epoch": 0.55, "grad_norm": 0.07123286746462744, "learning_rate": 0.00043869476481987166, "loss": 1.3647, "step": 5144 }, { "epoch": 0.55, "grad_norm": 0.080404488096446, "learning_rate": 0.00043852197487271596, "loss": 1.3003, "step": 5145 }, { "epoch": 0.55, "grad_norm": 0.06632696267396859, "learning_rate": 0.00043834919237998275, "loss": 1.3213, "step": 5146 }, { "epoch": 0.55, "grad_norm": 0.072257797841883, "learning_rate": 0.0004381764173626225, "loss": 1.4167, "step": 5147 }, { "epoch": 0.55, "grad_norm": 0.07509748873673347, "learning_rate": 0.0004380036498415847, "loss": 1.3869, "step": 5148 }, { "epoch": 0.55, "grad_norm": 0.0757740088264892, "learning_rate": 0.0004378308898378181, "loss": 1.5651, "step": 5149 }, { "epoch": 0.55, "grad_norm": 0.08298039825124719, "learning_rate": 0.0004376581373722705, "loss": 1.5168, "step": 5150 }, { "epoch": 0.55, "grad_norm": 0.06581738179640959, "learning_rate": 0.0004374853924658886, "loss": 1.4759, "step": 5151 }, { "epoch": 0.55, "grad_norm": 0.07360615892941527, "learning_rate": 0.00043731265513961837, "loss": 1.4205, "step": 5152 }, { "epoch": 0.55, "grad_norm": 0.0761582288989777, "learning_rate": 0.00043713992541440495, "loss": 1.4479, "step": 5153 }, { "epoch": 0.55, "grad_norm": 0.07168122590048633, "learning_rate": 0.0004369672033111921, "loss": 1.2982, "step": 5154 }, { "epoch": 0.55, "grad_norm": 0.06899343353227715, "learning_rate": 0.0004367944888509233, "loss": 1.4615, "step": 5155 }, { "epoch": 0.55, "grad_norm": 0.0851060919458039, "learning_rate": 0.00043662178205454064, "loss": 1.416, "step": 5156 }, { "epoch": 0.55, "grad_norm": 0.07129039991770322, "learning_rate": 0.0004364490829429855, "loss": 1.4104, "step": 5157 }, { "epoch": 0.55, "grad_norm": 0.07358697165011492, "learning_rate": 0.000436276391537198, "loss": 1.4107, "step": 5158 }, { "epoch": 0.55, "grad_norm": 0.06873354857206569, "learning_rate": 0.0004361037078581176, "loss": 1.4866, "step": 5159 }, { "epoch": 0.55, "grad_norm": 0.06921469555044571, "learning_rate": 0.00043593103192668306, "loss": 1.2982, "step": 5160 }, { "epoch": 0.55, "grad_norm": 0.08636862001391342, "learning_rate": 0.00043575836376383173, "loss": 1.3917, "step": 5161 }, { "epoch": 0.55, "grad_norm": 0.0737680093296688, "learning_rate": 0.0004355857033905003, "loss": 1.3104, "step": 5162 }, { "epoch": 0.56, "grad_norm": 0.07065380230712737, "learning_rate": 0.0004354130508276243, "loss": 1.412, "step": 5163 }, { "epoch": 0.56, "grad_norm": 0.0777158993181712, "learning_rate": 0.0004352404060961387, "loss": 1.368, "step": 5164 }, { "epoch": 0.56, "grad_norm": 0.07163302597716577, "learning_rate": 0.00043506776921697703, "loss": 1.3967, "step": 5165 }, { "epoch": 0.56, "grad_norm": 0.07395361060929702, "learning_rate": 0.0004348951402110721, "loss": 1.4889, "step": 5166 }, { "epoch": 0.56, "grad_norm": 0.07668834064910103, "learning_rate": 0.0004347225190993563, "loss": 1.3945, "step": 5167 }, { "epoch": 0.56, "grad_norm": 0.07379835994602744, "learning_rate": 0.00043454990590275966, "loss": 1.2941, "step": 5168 }, { "epoch": 0.56, "grad_norm": 0.07353437475217274, "learning_rate": 0.00043437730064221274, "loss": 1.4592, "step": 5169 }, { "epoch": 0.56, "grad_norm": 0.07180642897850834, "learning_rate": 0.00043420470333864437, "loss": 1.5081, "step": 5170 }, { "epoch": 0.56, "grad_norm": 0.07480628068562017, "learning_rate": 0.0004340321140129824, "loss": 1.4688, "step": 5171 }, { "epoch": 0.56, "grad_norm": 0.08326786901526848, "learning_rate": 0.0004338595326861542, "loss": 1.3402, "step": 5172 }, { "epoch": 0.56, "grad_norm": 0.0776630677788923, "learning_rate": 0.00043368695937908564, "loss": 1.396, "step": 5173 }, { "epoch": 0.56, "grad_norm": 0.0858497428936398, "learning_rate": 0.00043351439411270175, "loss": 1.6149, "step": 5174 }, { "epoch": 0.56, "grad_norm": 0.07842818825426176, "learning_rate": 0.00043334183690792687, "loss": 1.3343, "step": 5175 }, { "epoch": 0.56, "grad_norm": 0.07189766698764516, "learning_rate": 0.00043316928778568413, "loss": 1.4638, "step": 5176 }, { "epoch": 0.56, "grad_norm": 0.08069092707873271, "learning_rate": 0.0004329967467668955, "loss": 1.6077, "step": 5177 }, { "epoch": 0.56, "grad_norm": 0.06730626948761761, "learning_rate": 0.00043282421387248266, "loss": 1.4001, "step": 5178 }, { "epoch": 0.56, "grad_norm": 0.09400270596405078, "learning_rate": 0.0004326516891233652, "loss": 1.4182, "step": 5179 }, { "epoch": 0.56, "grad_norm": 0.07152190135429132, "learning_rate": 0.00043247917254046265, "loss": 1.4702, "step": 5180 }, { "epoch": 0.56, "grad_norm": 0.07839367829975089, "learning_rate": 0.0004323066641446932, "loss": 1.3874, "step": 5181 }, { "epoch": 0.56, "grad_norm": 0.08025327287685355, "learning_rate": 0.00043213416395697406, "loss": 1.4811, "step": 5182 }, { "epoch": 0.56, "grad_norm": 0.0675459205855955, "learning_rate": 0.0004319616719982216, "loss": 1.5319, "step": 5183 }, { "epoch": 0.56, "grad_norm": 0.08674747999573565, "learning_rate": 0.00043178918828935093, "loss": 1.3552, "step": 5184 }, { "epoch": 0.56, "grad_norm": 0.06783046466875754, "learning_rate": 0.0004316167128512763, "loss": 1.4993, "step": 5185 }, { "epoch": 0.56, "grad_norm": 0.0766712737981568, "learning_rate": 0.000431444245704911, "loss": 1.3712, "step": 5186 }, { "epoch": 0.56, "grad_norm": 0.07620524476335197, "learning_rate": 0.00043127178687116734, "loss": 1.2, "step": 5187 }, { "epoch": 0.56, "grad_norm": 0.0698091025429192, "learning_rate": 0.0004310993363709563, "loss": 1.451, "step": 5188 }, { "epoch": 0.56, "grad_norm": 0.07871163624673415, "learning_rate": 0.0004309268942251887, "loss": 1.4297, "step": 5189 }, { "epoch": 0.56, "grad_norm": 0.07820467834592452, "learning_rate": 0.0004307544604547728, "loss": 1.3821, "step": 5190 }, { "epoch": 0.56, "grad_norm": 0.07426071313960025, "learning_rate": 0.00043058203508061755, "loss": 1.5387, "step": 5191 }, { "epoch": 0.56, "grad_norm": 0.07152198924937515, "learning_rate": 0.00043040961812362984, "loss": 1.4379, "step": 5192 }, { "epoch": 0.56, "grad_norm": 0.07643513013877934, "learning_rate": 0.00043023720960471567, "loss": 1.3828, "step": 5193 }, { "epoch": 0.56, "grad_norm": 0.07377944276619808, "learning_rate": 0.0004300648095447805, "loss": 1.3577, "step": 5194 }, { "epoch": 0.56, "grad_norm": 0.07677807667832921, "learning_rate": 0.0004298924179647283, "loss": 1.4854, "step": 5195 }, { "epoch": 0.56, "grad_norm": 0.07254987146980628, "learning_rate": 0.00042972003488546206, "loss": 1.3919, "step": 5196 }, { "epoch": 0.56, "grad_norm": 0.07606417273762252, "learning_rate": 0.00042954766032788386, "loss": 1.4481, "step": 5197 }, { "epoch": 0.56, "grad_norm": 0.06574538650509663, "learning_rate": 0.00042937529431289476, "loss": 1.4083, "step": 5198 }, { "epoch": 0.56, "grad_norm": 0.08056330530383395, "learning_rate": 0.00042920293686139483, "loss": 1.3452, "step": 5199 }, { "epoch": 0.56, "grad_norm": 0.07254927775620691, "learning_rate": 0.00042903058799428277, "loss": 1.354, "step": 5200 }, { "epoch": 0.56, "grad_norm": 0.08042786998418212, "learning_rate": 0.0004288582477324566, "loss": 1.4644, "step": 5201 }, { "epoch": 0.56, "grad_norm": 0.07551326119414072, "learning_rate": 0.0004286859160968131, "loss": 1.4498, "step": 5202 }, { "epoch": 0.56, "grad_norm": 0.0716710195185741, "learning_rate": 0.0004285135931082481, "loss": 1.3936, "step": 5203 }, { "epoch": 0.56, "grad_norm": 0.07607372161962338, "learning_rate": 0.0004283412787876565, "loss": 1.4397, "step": 5204 }, { "epoch": 0.56, "grad_norm": 0.0767839216278687, "learning_rate": 0.0004281689731559318, "loss": 1.5554, "step": 5205 }, { "epoch": 0.56, "grad_norm": 0.08170307596491168, "learning_rate": 0.00042799667623396676, "loss": 1.4555, "step": 5206 }, { "epoch": 0.56, "grad_norm": 0.07293635890780277, "learning_rate": 0.000427824388042653, "loss": 1.5309, "step": 5207 }, { "epoch": 0.56, "grad_norm": 0.06361479379989111, "learning_rate": 0.00042765210860288097, "loss": 1.3286, "step": 5208 }, { "epoch": 0.56, "grad_norm": 0.06701607782127969, "learning_rate": 0.0004274798379355402, "loss": 1.3905, "step": 5209 }, { "epoch": 0.56, "grad_norm": 0.08632887914379973, "learning_rate": 0.00042730757606151927, "loss": 1.4785, "step": 5210 }, { "epoch": 0.56, "grad_norm": 0.06647493250126521, "learning_rate": 0.0004271353230017052, "loss": 1.2555, "step": 5211 }, { "epoch": 0.56, "grad_norm": 0.06693961197696197, "learning_rate": 0.0004269630787769845, "loss": 1.4008, "step": 5212 }, { "epoch": 0.56, "grad_norm": 0.0692409003412069, "learning_rate": 0.0004267908434082421, "loss": 1.4041, "step": 5213 }, { "epoch": 0.56, "grad_norm": 0.06737205948239505, "learning_rate": 0.0004266186169163624, "loss": 1.3695, "step": 5214 }, { "epoch": 0.56, "grad_norm": 0.06829870460828749, "learning_rate": 0.0004264463993222285, "loss": 1.3776, "step": 5215 }, { "epoch": 0.56, "grad_norm": 0.06568111411168952, "learning_rate": 0.0004262741906467221, "loss": 1.5051, "step": 5216 }, { "epoch": 0.56, "grad_norm": 0.07099711966396452, "learning_rate": 0.0004261019909107243, "loss": 1.4215, "step": 5217 }, { "epoch": 0.56, "grad_norm": 0.06926480274268498, "learning_rate": 0.000425929800135115, "loss": 1.4874, "step": 5218 }, { "epoch": 0.56, "grad_norm": 0.07346867907033706, "learning_rate": 0.0004257576183407726, "loss": 1.4229, "step": 5219 }, { "epoch": 0.56, "grad_norm": 0.06602439589330132, "learning_rate": 0.0004255854455485753, "loss": 1.5083, "step": 5220 }, { "epoch": 0.56, "grad_norm": 0.08114794312950216, "learning_rate": 0.0004254132817793989, "loss": 1.3613, "step": 5221 }, { "epoch": 0.56, "grad_norm": 0.07671248645303533, "learning_rate": 0.0004252411270541193, "loss": 1.5036, "step": 5222 }, { "epoch": 0.56, "grad_norm": 0.07326874251575916, "learning_rate": 0.0004250689813936108, "loss": 1.4248, "step": 5223 }, { "epoch": 0.56, "grad_norm": 0.06407015535943295, "learning_rate": 0.00042489684481874655, "loss": 1.3891, "step": 5224 }, { "epoch": 0.56, "grad_norm": 0.06590779720525618, "learning_rate": 0.00042472471735039894, "loss": 1.2699, "step": 5225 }, { "epoch": 0.56, "grad_norm": 0.07001647041392903, "learning_rate": 0.0004245525990094388, "loss": 1.4988, "step": 5226 }, { "epoch": 0.56, "grad_norm": 0.06950346761352938, "learning_rate": 0.00042438048981673613, "loss": 1.5613, "step": 5227 }, { "epoch": 0.56, "grad_norm": 0.0681780359829946, "learning_rate": 0.00042420838979315975, "loss": 1.3695, "step": 5228 }, { "epoch": 0.56, "grad_norm": 0.07072671929945357, "learning_rate": 0.0004240362989595774, "loss": 1.3276, "step": 5229 }, { "epoch": 0.56, "grad_norm": 0.07027533413197294, "learning_rate": 0.00042386421733685574, "loss": 1.414, "step": 5230 }, { "epoch": 0.56, "grad_norm": 0.07273910038980894, "learning_rate": 0.00042369214494586025, "loss": 1.3791, "step": 5231 }, { "epoch": 0.56, "grad_norm": 0.07673259671575565, "learning_rate": 0.00042352008180745527, "loss": 1.4342, "step": 5232 }, { "epoch": 0.56, "grad_norm": 0.08629006705556348, "learning_rate": 0.0004233480279425039, "loss": 1.4249, "step": 5233 }, { "epoch": 0.56, "grad_norm": 0.07021380822979276, "learning_rate": 0.00042317598337186845, "loss": 1.3425, "step": 5234 }, { "epoch": 0.56, "grad_norm": 0.0696246874174527, "learning_rate": 0.0004230039481164099, "loss": 1.396, "step": 5235 }, { "epoch": 0.56, "grad_norm": 0.0771553120990635, "learning_rate": 0.00042283192219698797, "loss": 1.3683, "step": 5236 }, { "epoch": 0.56, "grad_norm": 0.06880356851494933, "learning_rate": 0.00042265990563446166, "loss": 1.3027, "step": 5237 }, { "epoch": 0.56, "grad_norm": 0.07126039016589009, "learning_rate": 0.0004224878984496884, "loss": 1.4568, "step": 5238 }, { "epoch": 0.56, "grad_norm": 0.07841400172673146, "learning_rate": 0.00042231590066352454, "loss": 1.353, "step": 5239 }, { "epoch": 0.56, "grad_norm": 0.0691154552365852, "learning_rate": 0.00042214391229682564, "loss": 1.3609, "step": 5240 }, { "epoch": 0.56, "grad_norm": 0.08822877788777062, "learning_rate": 0.0004219719333704458, "loss": 1.5357, "step": 5241 }, { "epoch": 0.56, "grad_norm": 0.07321921465130389, "learning_rate": 0.00042179996390523817, "loss": 1.4246, "step": 5242 }, { "epoch": 0.56, "grad_norm": 0.07663401686520896, "learning_rate": 0.0004216280039220544, "loss": 1.4507, "step": 5243 }, { "epoch": 0.56, "grad_norm": 0.06902686680633362, "learning_rate": 0.00042145605344174524, "loss": 1.3971, "step": 5244 }, { "epoch": 0.56, "grad_norm": 0.06877425014001001, "learning_rate": 0.0004212841124851605, "loss": 1.4818, "step": 5245 }, { "epoch": 0.56, "grad_norm": 0.0796041285678449, "learning_rate": 0.00042111218107314846, "loss": 1.4862, "step": 5246 }, { "epoch": 0.56, "grad_norm": 0.07232671407705622, "learning_rate": 0.00042094025922655636, "loss": 1.493, "step": 5247 }, { "epoch": 0.56, "grad_norm": 0.06841978145994511, "learning_rate": 0.0004207683469662305, "loss": 1.61, "step": 5248 }, { "epoch": 0.56, "grad_norm": 0.08341542608065328, "learning_rate": 0.0004205964443130157, "loss": 1.4736, "step": 5249 }, { "epoch": 0.56, "grad_norm": 0.08253205503423554, "learning_rate": 0.0004204245512877557, "loss": 1.4058, "step": 5250 }, { "epoch": 0.56, "grad_norm": 0.07622013336203287, "learning_rate": 0.00042025266791129325, "loss": 1.3469, "step": 5251 }, { "epoch": 0.56, "grad_norm": 0.07590685646308382, "learning_rate": 0.00042008079420446985, "loss": 1.4038, "step": 5252 }, { "epoch": 0.56, "grad_norm": 0.06613348327522989, "learning_rate": 0.0004199089301881256, "loss": 1.2766, "step": 5253 }, { "epoch": 0.56, "grad_norm": 0.06636202703584451, "learning_rate": 0.0004197370758830997, "loss": 1.4253, "step": 5254 }, { "epoch": 0.56, "grad_norm": 0.08224651639354623, "learning_rate": 0.0004195652313102299, "loss": 1.3336, "step": 5255 }, { "epoch": 0.57, "grad_norm": 0.07657486580957353, "learning_rate": 0.00041939339649035325, "loss": 1.4397, "step": 5256 }, { "epoch": 0.57, "grad_norm": 0.07238429465194317, "learning_rate": 0.0004192215714443052, "loss": 1.3522, "step": 5257 }, { "epoch": 0.57, "grad_norm": 0.07108040706690585, "learning_rate": 0.00041904975619292003, "loss": 1.4461, "step": 5258 }, { "epoch": 0.57, "grad_norm": 0.07753429464243641, "learning_rate": 0.00041887795075703095, "loss": 1.4958, "step": 5259 }, { "epoch": 0.57, "grad_norm": 0.0755302483106443, "learning_rate": 0.00041870615515747007, "loss": 1.4597, "step": 5260 }, { "epoch": 0.57, "grad_norm": 0.06787790634763187, "learning_rate": 0.00041853436941506817, "loss": 1.3877, "step": 5261 }, { "epoch": 0.57, "grad_norm": 0.0661392999583233, "learning_rate": 0.00041836259355065473, "loss": 1.3623, "step": 5262 }, { "epoch": 0.57, "grad_norm": 0.07417584421347115, "learning_rate": 0.00041819082758505857, "loss": 1.4985, "step": 5263 }, { "epoch": 0.57, "grad_norm": 0.0684843530224598, "learning_rate": 0.00041801907153910623, "loss": 1.3694, "step": 5264 }, { "epoch": 0.57, "grad_norm": 0.0726643145095135, "learning_rate": 0.0004178473254336242, "loss": 1.4311, "step": 5265 }, { "epoch": 0.57, "grad_norm": 0.07855844880744575, "learning_rate": 0.00041767558928943734, "loss": 1.5054, "step": 5266 }, { "epoch": 0.57, "grad_norm": 0.06463843675977816, "learning_rate": 0.0004175038631273689, "loss": 1.4252, "step": 5267 }, { "epoch": 0.57, "grad_norm": 0.07200443421609361, "learning_rate": 0.0004173321469682415, "loss": 1.5673, "step": 5268 }, { "epoch": 0.57, "grad_norm": 0.07157206049559349, "learning_rate": 0.0004171604408328764, "loss": 1.4307, "step": 5269 }, { "epoch": 0.57, "grad_norm": 0.07888986817590185, "learning_rate": 0.00041698874474209327, "loss": 1.355, "step": 5270 }, { "epoch": 0.57, "grad_norm": 0.08408446086913257, "learning_rate": 0.0004168170587167111, "loss": 1.5721, "step": 5271 }, { "epoch": 0.57, "grad_norm": 0.07150037519993824, "learning_rate": 0.0004166453827775474, "loss": 1.4548, "step": 5272 }, { "epoch": 0.57, "grad_norm": 0.0723367901238978, "learning_rate": 0.00041647371694541845, "loss": 1.3577, "step": 5273 }, { "epoch": 0.57, "grad_norm": 0.0668665999900835, "learning_rate": 0.00041630206124113923, "loss": 1.3156, "step": 5274 }, { "epoch": 0.57, "grad_norm": 0.07303698085136547, "learning_rate": 0.0004161304156855235, "loss": 1.3796, "step": 5275 }, { "epoch": 0.57, "grad_norm": 0.07114217269771465, "learning_rate": 0.00041595878029938415, "loss": 1.4391, "step": 5276 }, { "epoch": 0.57, "grad_norm": 0.07038735670023649, "learning_rate": 0.0004157871551035324, "loss": 1.3433, "step": 5277 }, { "epoch": 0.57, "grad_norm": 0.0765105156773969, "learning_rate": 0.0004156155401187783, "loss": 1.3778, "step": 5278 }, { "epoch": 0.57, "grad_norm": 0.06795495327597158, "learning_rate": 0.00041544393536593096, "loss": 1.3867, "step": 5279 }, { "epoch": 0.57, "grad_norm": 0.07040727125995247, "learning_rate": 0.000415272340865798, "loss": 1.419, "step": 5280 }, { "epoch": 0.57, "grad_norm": 0.07715273193285668, "learning_rate": 0.0004151007566391857, "loss": 1.3902, "step": 5281 }, { "epoch": 0.57, "grad_norm": 0.07115238949389142, "learning_rate": 0.0004149291827068995, "loss": 1.3634, "step": 5282 }, { "epoch": 0.57, "grad_norm": 0.06990145449700264, "learning_rate": 0.00041475761908974315, "loss": 1.3727, "step": 5283 }, { "epoch": 0.57, "grad_norm": 0.07417513717261448, "learning_rate": 0.00041458606580851943, "loss": 1.3928, "step": 5284 }, { "epoch": 0.57, "grad_norm": 0.07342873182353658, "learning_rate": 0.0004144145228840296, "loss": 1.3938, "step": 5285 }, { "epoch": 0.57, "grad_norm": 0.07729370889778275, "learning_rate": 0.00041424299033707384, "loss": 1.3694, "step": 5286 }, { "epoch": 0.57, "grad_norm": 0.07807411980809978, "learning_rate": 0.00041407146818845124, "loss": 1.4838, "step": 5287 }, { "epoch": 0.57, "grad_norm": 0.07769125995931969, "learning_rate": 0.0004138999564589594, "loss": 1.3846, "step": 5288 }, { "epoch": 0.57, "grad_norm": 0.07203153523933757, "learning_rate": 0.00041372845516939456, "loss": 1.4529, "step": 5289 }, { "epoch": 0.57, "grad_norm": 0.0852466033918914, "learning_rate": 0.000413556964340552, "loss": 1.435, "step": 5290 }, { "epoch": 0.57, "grad_norm": 0.07876953151957321, "learning_rate": 0.0004133854839932256, "loss": 1.3949, "step": 5291 }, { "epoch": 0.57, "grad_norm": 0.07922352701188885, "learning_rate": 0.00041321401414820785, "loss": 1.3702, "step": 5292 }, { "epoch": 0.57, "grad_norm": 0.07789017134336326, "learning_rate": 0.00041304255482628997, "loss": 1.399, "step": 5293 }, { "epoch": 0.57, "grad_norm": 0.07120922513834405, "learning_rate": 0.00041287110604826233, "loss": 1.2416, "step": 5294 }, { "epoch": 0.57, "grad_norm": 0.09864215287276547, "learning_rate": 0.0004126996678349133, "loss": 1.4479, "step": 5295 }, { "epoch": 0.57, "grad_norm": 0.07540022268253116, "learning_rate": 0.0004125282402070306, "loss": 1.5072, "step": 5296 }, { "epoch": 0.57, "grad_norm": 0.07117404624697435, "learning_rate": 0.0004123568231854003, "loss": 1.4258, "step": 5297 }, { "epoch": 0.57, "grad_norm": 0.07632072817859777, "learning_rate": 0.00041218541679080724, "loss": 1.3239, "step": 5298 }, { "epoch": 0.57, "grad_norm": 0.07545200184891443, "learning_rate": 0.00041201402104403516, "loss": 1.4645, "step": 5299 }, { "epoch": 0.57, "grad_norm": 0.0755586931749317, "learning_rate": 0.00041184263596586644, "loss": 1.3638, "step": 5300 }, { "epoch": 0.57, "grad_norm": 0.07534667284843548, "learning_rate": 0.00041167126157708194, "loss": 1.2639, "step": 5301 }, { "epoch": 0.57, "grad_norm": 0.07371800282548627, "learning_rate": 0.0004114998978984616, "loss": 1.3628, "step": 5302 }, { "epoch": 0.57, "grad_norm": 0.06979442007264192, "learning_rate": 0.0004113285449507837, "loss": 1.5124, "step": 5303 }, { "epoch": 0.57, "grad_norm": 0.07518472070533627, "learning_rate": 0.00041115720275482535, "loss": 1.3375, "step": 5304 }, { "epoch": 0.57, "grad_norm": 0.07145575246092586, "learning_rate": 0.0004109858713313628, "loss": 1.268, "step": 5305 }, { "epoch": 0.57, "grad_norm": 0.07227095780800502, "learning_rate": 0.0004108145507011698, "loss": 1.4299, "step": 5306 }, { "epoch": 0.57, "grad_norm": 0.06962460632342404, "learning_rate": 0.0004106432408850202, "loss": 1.3067, "step": 5307 }, { "epoch": 0.57, "grad_norm": 0.06967858569287429, "learning_rate": 0.0004104719419036856, "loss": 1.4634, "step": 5308 }, { "epoch": 0.57, "grad_norm": 0.07452803560588936, "learning_rate": 0.00041030065377793673, "loss": 1.2936, "step": 5309 }, { "epoch": 0.57, "grad_norm": 0.07198736708156497, "learning_rate": 0.0004101293765285429, "loss": 1.399, "step": 5310 }, { "epoch": 0.57, "grad_norm": 0.07088761102492468, "learning_rate": 0.00040995811017627195, "loss": 1.5214, "step": 5311 }, { "epoch": 0.57, "grad_norm": 0.08037863075518545, "learning_rate": 0.0004097868547418905, "loss": 1.538, "step": 5312 }, { "epoch": 0.57, "grad_norm": 0.06702118564346261, "learning_rate": 0.00040961561024616393, "loss": 1.4452, "step": 5313 }, { "epoch": 0.57, "grad_norm": 0.07138907507596554, "learning_rate": 0.00040944437670985636, "loss": 1.4375, "step": 5314 }, { "epoch": 0.57, "grad_norm": 0.0713287767088554, "learning_rate": 0.00040927315415373013, "loss": 1.3658, "step": 5315 }, { "epoch": 0.57, "grad_norm": 0.07246596365203058, "learning_rate": 0.00040910194259854705, "loss": 1.3858, "step": 5316 }, { "epoch": 0.57, "grad_norm": 0.0638642019848533, "learning_rate": 0.00040893074206506646, "loss": 1.449, "step": 5317 }, { "epoch": 0.57, "grad_norm": 0.07467248996983068, "learning_rate": 0.0004087595525740475, "loss": 1.45, "step": 5318 }, { "epoch": 0.57, "grad_norm": 0.06856809835411172, "learning_rate": 0.0004085883741462474, "loss": 1.4768, "step": 5319 }, { "epoch": 0.57, "grad_norm": 0.07499617753743705, "learning_rate": 0.00040841720680242187, "loss": 1.4109, "step": 5320 }, { "epoch": 0.57, "grad_norm": 0.07075707467749452, "learning_rate": 0.0004082460505633259, "loss": 1.5193, "step": 5321 }, { "epoch": 0.57, "grad_norm": 0.08615689422162827, "learning_rate": 0.0004080749054497126, "loss": 1.3668, "step": 5322 }, { "epoch": 0.57, "grad_norm": 0.08010696554699227, "learning_rate": 0.00040790377148233406, "loss": 1.5512, "step": 5323 }, { "epoch": 0.57, "grad_norm": 0.06867799500138569, "learning_rate": 0.00040773264868194047, "loss": 1.5605, "step": 5324 }, { "epoch": 0.57, "grad_norm": 0.07358887873840544, "learning_rate": 0.0004075615370692815, "loss": 1.3062, "step": 5325 }, { "epoch": 0.57, "grad_norm": 0.07190535444709516, "learning_rate": 0.0004073904366651049, "loss": 1.5042, "step": 5326 }, { "epoch": 0.57, "grad_norm": 0.07395900114719453, "learning_rate": 0.0004072193474901571, "loss": 1.4081, "step": 5327 }, { "epoch": 0.57, "grad_norm": 0.07036662502059775, "learning_rate": 0.0004070482695651832, "loss": 1.4774, "step": 5328 }, { "epoch": 0.57, "grad_norm": 0.0837011412853149, "learning_rate": 0.000406877202910927, "loss": 1.4898, "step": 5329 }, { "epoch": 0.57, "grad_norm": 0.06325539977822217, "learning_rate": 0.00040670614754813105, "loss": 1.3729, "step": 5330 }, { "epoch": 0.57, "grad_norm": 0.06890633970155305, "learning_rate": 0.0004065351034975364, "loss": 1.3829, "step": 5331 }, { "epoch": 0.57, "grad_norm": 0.06839743814713234, "learning_rate": 0.0004063640707798826, "loss": 1.5331, "step": 5332 }, { "epoch": 0.57, "grad_norm": 0.07358862957787274, "learning_rate": 0.00040619304941590806, "loss": 1.3844, "step": 5333 }, { "epoch": 0.57, "grad_norm": 0.08022228572519535, "learning_rate": 0.00040602203942634974, "loss": 1.4749, "step": 5334 }, { "epoch": 0.57, "grad_norm": 0.07053418115513409, "learning_rate": 0.00040585104083194296, "loss": 1.5135, "step": 5335 }, { "epoch": 0.57, "grad_norm": 0.07580591745380308, "learning_rate": 0.0004056800536534222, "loss": 1.476, "step": 5336 }, { "epoch": 0.57, "grad_norm": 0.0740733400567423, "learning_rate": 0.0004055090779115202, "loss": 1.4435, "step": 5337 }, { "epoch": 0.57, "grad_norm": 0.07623581006510774, "learning_rate": 0.0004053381136269682, "loss": 1.3887, "step": 5338 }, { "epoch": 0.57, "grad_norm": 0.07726090719594393, "learning_rate": 0.0004051671608204962, "loss": 1.3204, "step": 5339 }, { "epoch": 0.57, "grad_norm": 0.07643636380776819, "learning_rate": 0.00040499621951283285, "loss": 1.5598, "step": 5340 }, { "epoch": 0.57, "grad_norm": 0.06916237569138638, "learning_rate": 0.0004048252897247054, "loss": 1.2734, "step": 5341 }, { "epoch": 0.57, "grad_norm": 0.07440005449676713, "learning_rate": 0.00040465437147683985, "loss": 1.477, "step": 5342 }, { "epoch": 0.57, "grad_norm": 0.0768591188002704, "learning_rate": 0.0004044834647899603, "loss": 1.4269, "step": 5343 }, { "epoch": 0.57, "grad_norm": 0.07706826003310004, "learning_rate": 0.00040431256968479, "loss": 1.3852, "step": 5344 }, { "epoch": 0.57, "grad_norm": 0.07365496429342674, "learning_rate": 0.00040414168618205053, "loss": 1.5324, "step": 5345 }, { "epoch": 0.57, "grad_norm": 0.06961222207303024, "learning_rate": 0.000403970814302462, "loss": 1.4392, "step": 5346 }, { "epoch": 0.57, "grad_norm": 0.07663822923497113, "learning_rate": 0.0004037999540667436, "loss": 1.3213, "step": 5347 }, { "epoch": 0.57, "grad_norm": 0.08041052042443397, "learning_rate": 0.00040362910549561216, "loss": 1.4161, "step": 5348 }, { "epoch": 0.58, "grad_norm": 0.07464715250262671, "learning_rate": 0.000403458268609784, "loss": 1.5022, "step": 5349 }, { "epoch": 0.58, "grad_norm": 0.06798489895346704, "learning_rate": 0.0004032874434299735, "loss": 1.329, "step": 5350 }, { "epoch": 0.58, "grad_norm": 0.07330991474639785, "learning_rate": 0.0004031166299768939, "loss": 1.4582, "step": 5351 }, { "epoch": 0.58, "grad_norm": 0.07128785978243318, "learning_rate": 0.000402945828271257, "loss": 1.3826, "step": 5352 }, { "epoch": 0.58, "grad_norm": 0.07672704144785629, "learning_rate": 0.000402775038333773, "loss": 1.3935, "step": 5353 }, { "epoch": 0.58, "grad_norm": 0.06971968315018112, "learning_rate": 0.00040260426018515086, "loss": 1.3436, "step": 5354 }, { "epoch": 0.58, "grad_norm": 0.08551247843667656, "learning_rate": 0.0004024334938460978, "loss": 1.5431, "step": 5355 }, { "epoch": 0.58, "grad_norm": 0.06496244821036254, "learning_rate": 0.00040226273933732006, "loss": 1.4034, "step": 5356 }, { "epoch": 0.58, "grad_norm": 0.06880854364756596, "learning_rate": 0.0004020919966795221, "loss": 1.3686, "step": 5357 }, { "epoch": 0.58, "grad_norm": 0.061631643039155914, "learning_rate": 0.0004019212658934073, "loss": 1.4865, "step": 5358 }, { "epoch": 0.58, "grad_norm": 0.0876656022645856, "learning_rate": 0.0004017505469996769, "loss": 1.4607, "step": 5359 }, { "epoch": 0.58, "grad_norm": 0.07698134695084392, "learning_rate": 0.0004015798400190314, "loss": 1.3628, "step": 5360 }, { "epoch": 0.58, "grad_norm": 0.07046999280271365, "learning_rate": 0.0004014091449721696, "loss": 1.3769, "step": 5361 }, { "epoch": 0.58, "grad_norm": 0.07410553795969785, "learning_rate": 0.00040123846187978897, "loss": 1.337, "step": 5362 }, { "epoch": 0.58, "grad_norm": 0.07474813390239679, "learning_rate": 0.00040106779076258515, "loss": 1.3158, "step": 5363 }, { "epoch": 0.58, "grad_norm": 0.07356400677418831, "learning_rate": 0.00040089713164125285, "loss": 1.4531, "step": 5364 }, { "epoch": 0.58, "grad_norm": 0.08282254384195048, "learning_rate": 0.00040072648453648506, "loss": 1.4346, "step": 5365 }, { "epoch": 0.58, "grad_norm": 0.08105180775455986, "learning_rate": 0.00040055584946897317, "loss": 1.5177, "step": 5366 }, { "epoch": 0.58, "grad_norm": 0.06655886062838566, "learning_rate": 0.00040038522645940744, "loss": 1.3714, "step": 5367 }, { "epoch": 0.58, "grad_norm": 0.0762914733486254, "learning_rate": 0.0004002146155284764, "loss": 1.355, "step": 5368 }, { "epoch": 0.58, "grad_norm": 0.07308623399730424, "learning_rate": 0.0004000440166968674, "loss": 1.3991, "step": 5369 }, { "epoch": 0.58, "grad_norm": 0.07344200850911864, "learning_rate": 0.0003998734299852659, "loss": 1.4345, "step": 5370 }, { "epoch": 0.58, "grad_norm": 0.07950989021718434, "learning_rate": 0.00039970285541435604, "loss": 1.2364, "step": 5371 }, { "epoch": 0.58, "grad_norm": 0.06865838341036416, "learning_rate": 0.0003995322930048208, "loss": 1.3799, "step": 5372 }, { "epoch": 0.58, "grad_norm": 0.08467264426498419, "learning_rate": 0.0003993617427773415, "loss": 1.4482, "step": 5373 }, { "epoch": 0.58, "grad_norm": 0.07733714287405516, "learning_rate": 0.0003991912047525976, "loss": 1.4416, "step": 5374 }, { "epoch": 0.58, "grad_norm": 0.11476822294757576, "learning_rate": 0.0003990206789512679, "loss": 1.4573, "step": 5375 }, { "epoch": 0.58, "grad_norm": 0.07610655086519266, "learning_rate": 0.00039885016539402895, "loss": 1.3066, "step": 5376 }, { "epoch": 0.58, "grad_norm": 0.07716286349323345, "learning_rate": 0.0003986796641015561, "loss": 1.3914, "step": 5377 }, { "epoch": 0.58, "grad_norm": 0.08078118508279136, "learning_rate": 0.0003985091750945233, "loss": 1.3834, "step": 5378 }, { "epoch": 0.58, "grad_norm": 0.08079701422701666, "learning_rate": 0.0003983386983936031, "loss": 1.3173, "step": 5379 }, { "epoch": 0.58, "grad_norm": 0.07174180247292715, "learning_rate": 0.0003981682340194661, "loss": 1.6485, "step": 5380 }, { "epoch": 0.58, "grad_norm": 0.08102967123260112, "learning_rate": 0.00039799778199278176, "loss": 1.4069, "step": 5381 }, { "epoch": 0.58, "grad_norm": 0.08146881326489869, "learning_rate": 0.0003978273423342179, "loss": 1.3601, "step": 5382 }, { "epoch": 0.58, "grad_norm": 0.07227396353091131, "learning_rate": 0.00039765691506444113, "loss": 1.5108, "step": 5383 }, { "epoch": 0.58, "grad_norm": 0.07519482559814357, "learning_rate": 0.00039748650020411625, "loss": 1.3298, "step": 5384 }, { "epoch": 0.58, "grad_norm": 0.06780383496089704, "learning_rate": 0.00039731609777390647, "loss": 1.4425, "step": 5385 }, { "epoch": 0.58, "grad_norm": 0.06902960397709067, "learning_rate": 0.000397145707794474, "loss": 1.3888, "step": 5386 }, { "epoch": 0.58, "grad_norm": 0.06986637026932582, "learning_rate": 0.0003969753302864791, "loss": 1.3675, "step": 5387 }, { "epoch": 0.58, "grad_norm": 0.0691980178959223, "learning_rate": 0.00039680496527058054, "loss": 1.3784, "step": 5388 }, { "epoch": 0.58, "grad_norm": 0.08156045851765133, "learning_rate": 0.00039663461276743557, "loss": 1.4175, "step": 5389 }, { "epoch": 0.58, "grad_norm": 0.0704137566422376, "learning_rate": 0.00039646427279770035, "loss": 1.4708, "step": 5390 }, { "epoch": 0.58, "grad_norm": 0.06468213372241707, "learning_rate": 0.000396293945382029, "loss": 1.3275, "step": 5391 }, { "epoch": 0.58, "grad_norm": 0.07230454038232008, "learning_rate": 0.00039612363054107423, "loss": 1.4686, "step": 5392 }, { "epoch": 0.58, "grad_norm": 0.06080188424261516, "learning_rate": 0.00039595332829548736, "loss": 1.3506, "step": 5393 }, { "epoch": 0.58, "grad_norm": 0.08481811022519306, "learning_rate": 0.00039578303866591804, "loss": 1.4125, "step": 5394 }, { "epoch": 0.58, "grad_norm": 0.07164820257648974, "learning_rate": 0.00039561276167301475, "loss": 1.398, "step": 5395 }, { "epoch": 0.58, "grad_norm": 0.07295058056378592, "learning_rate": 0.0003954424973374239, "loss": 1.4414, "step": 5396 }, { "epoch": 0.58, "grad_norm": 0.07617537325382101, "learning_rate": 0.0003952722456797906, "loss": 1.5086, "step": 5397 }, { "epoch": 0.58, "grad_norm": 0.0723477940325076, "learning_rate": 0.0003951020067207587, "loss": 1.4446, "step": 5398 }, { "epoch": 0.58, "grad_norm": 0.08723825363550239, "learning_rate": 0.0003949317804809701, "loss": 1.3639, "step": 5399 }, { "epoch": 0.58, "grad_norm": 0.06936075965065762, "learning_rate": 0.00039476156698106554, "loss": 1.3929, "step": 5400 }, { "epoch": 0.58, "grad_norm": 0.07934707933846326, "learning_rate": 0.0003945913662416836, "loss": 1.3018, "step": 5401 }, { "epoch": 0.58, "grad_norm": 0.07516365654439944, "learning_rate": 0.0003944211782834618, "loss": 1.4766, "step": 5402 }, { "epoch": 0.58, "grad_norm": 0.07511497051173129, "learning_rate": 0.00039425100312703626, "loss": 1.446, "step": 5403 }, { "epoch": 0.58, "grad_norm": 0.07399958481633905, "learning_rate": 0.00039408084079304114, "loss": 1.4446, "step": 5404 }, { "epoch": 0.58, "grad_norm": 0.07322395813105882, "learning_rate": 0.0003939106913021091, "loss": 1.4424, "step": 5405 }, { "epoch": 0.58, "grad_norm": 0.08006397128046831, "learning_rate": 0.00039374055467487163, "loss": 1.4953, "step": 5406 }, { "epoch": 0.58, "grad_norm": 0.08113213797120367, "learning_rate": 0.00039357043093195824, "loss": 1.4554, "step": 5407 }, { "epoch": 0.58, "grad_norm": 0.08034824832299563, "learning_rate": 0.0003934003200939969, "loss": 1.2838, "step": 5408 }, { "epoch": 0.58, "grad_norm": 0.07582701660283922, "learning_rate": 0.00039323022218161436, "loss": 1.3473, "step": 5409 }, { "epoch": 0.58, "grad_norm": 0.07262946774124066, "learning_rate": 0.0003930601372154355, "loss": 1.4852, "step": 5410 }, { "epoch": 0.58, "grad_norm": 0.0816848756450506, "learning_rate": 0.00039289006521608374, "loss": 1.3998, "step": 5411 }, { "epoch": 0.58, "grad_norm": 0.08026040007883284, "learning_rate": 0.00039272000620418077, "loss": 1.3904, "step": 5412 }, { "epoch": 0.58, "grad_norm": 0.08005161253565878, "learning_rate": 0.0003925499602003467, "loss": 1.3568, "step": 5413 }, { "epoch": 0.58, "grad_norm": 0.08971571870062349, "learning_rate": 0.00039237992722520064, "loss": 1.3311, "step": 5414 }, { "epoch": 0.58, "grad_norm": 0.0753830626442861, "learning_rate": 0.00039220990729935937, "loss": 1.4081, "step": 5415 }, { "epoch": 0.58, "grad_norm": 0.08683335516423724, "learning_rate": 0.0003920399004434383, "loss": 1.4205, "step": 5416 }, { "epoch": 0.58, "grad_norm": 0.08114337234622578, "learning_rate": 0.0003918699066780517, "loss": 1.4885, "step": 5417 }, { "epoch": 0.58, "grad_norm": 0.07527136672140505, "learning_rate": 0.0003916999260238115, "loss": 1.3366, "step": 5418 }, { "epoch": 0.58, "grad_norm": 0.07529646089819533, "learning_rate": 0.0003915299585013288, "loss": 1.4195, "step": 5419 }, { "epoch": 0.58, "grad_norm": 0.08565027017623841, "learning_rate": 0.00039136000413121246, "loss": 1.4588, "step": 5420 }, { "epoch": 0.58, "grad_norm": 0.07864361897315884, "learning_rate": 0.00039119006293407025, "loss": 1.4351, "step": 5421 }, { "epoch": 0.58, "grad_norm": 0.07915941892313927, "learning_rate": 0.0003910201349305081, "loss": 1.3672, "step": 5422 }, { "epoch": 0.58, "grad_norm": 0.07156264135526873, "learning_rate": 0.0003908502201411303, "loss": 1.4315, "step": 5423 }, { "epoch": 0.58, "grad_norm": 0.07618419764564519, "learning_rate": 0.0003906803185865395, "loss": 1.5126, "step": 5424 }, { "epoch": 0.58, "grad_norm": 0.0814665018963305, "learning_rate": 0.0003905104302873369, "loss": 1.467, "step": 5425 }, { "epoch": 0.58, "grad_norm": 0.06795989374856158, "learning_rate": 0.0003903405552641222, "loss": 1.4223, "step": 5426 }, { "epoch": 0.58, "grad_norm": 0.07673542464248548, "learning_rate": 0.00039017069353749324, "loss": 1.3443, "step": 5427 }, { "epoch": 0.58, "grad_norm": 0.07803117959879066, "learning_rate": 0.00039000084512804615, "loss": 1.4296, "step": 5428 }, { "epoch": 0.58, "grad_norm": 0.07549394873728246, "learning_rate": 0.000389831010056376, "loss": 1.4681, "step": 5429 }, { "epoch": 0.58, "grad_norm": 0.06315628829607588, "learning_rate": 0.00038966118834307564, "loss": 1.4331, "step": 5430 }, { "epoch": 0.58, "grad_norm": 0.07803465268274827, "learning_rate": 0.0003894913800087365, "loss": 1.3954, "step": 5431 }, { "epoch": 0.58, "grad_norm": 0.0775945281904031, "learning_rate": 0.00038932158507394885, "loss": 1.5557, "step": 5432 }, { "epoch": 0.58, "grad_norm": 0.07374023221278247, "learning_rate": 0.0003891518035593002, "loss": 1.4375, "step": 5433 }, { "epoch": 0.58, "grad_norm": 0.06745322580856865, "learning_rate": 0.00038898203548537767, "loss": 1.4085, "step": 5434 }, { "epoch": 0.58, "grad_norm": 0.07268110095101987, "learning_rate": 0.0003888122808727661, "loss": 1.2798, "step": 5435 }, { "epoch": 0.58, "grad_norm": 0.06780737778028098, "learning_rate": 0.00038864253974204866, "loss": 1.42, "step": 5436 }, { "epoch": 0.58, "grad_norm": 0.06232751201032706, "learning_rate": 0.0003884728121138073, "loss": 1.368, "step": 5437 }, { "epoch": 0.58, "grad_norm": 0.07425964477371508, "learning_rate": 0.00038830309800862194, "loss": 1.3845, "step": 5438 }, { "epoch": 0.58, "grad_norm": 0.0696927104312303, "learning_rate": 0.00038813339744707095, "loss": 1.5026, "step": 5439 }, { "epoch": 0.58, "grad_norm": 0.08829232052578292, "learning_rate": 0.0003879637104497313, "loss": 1.459, "step": 5440 }, { "epoch": 0.58, "grad_norm": 0.0800197842374922, "learning_rate": 0.00038779403703717797, "loss": 1.3374, "step": 5441 }, { "epoch": 0.59, "grad_norm": 0.08461196112841203, "learning_rate": 0.0003876243772299843, "loss": 1.4803, "step": 5442 }, { "epoch": 0.59, "grad_norm": 0.07591094969169548, "learning_rate": 0.00038745473104872277, "loss": 1.4347, "step": 5443 }, { "epoch": 0.59, "grad_norm": 0.06951993132870146, "learning_rate": 0.00038728509851396274, "loss": 1.3894, "step": 5444 }, { "epoch": 0.59, "grad_norm": 0.0709955709325333, "learning_rate": 0.0003871154796462732, "loss": 1.4263, "step": 5445 }, { "epoch": 0.59, "grad_norm": 0.07217695702237546, "learning_rate": 0.00038694587446622093, "loss": 1.4771, "step": 5446 }, { "epoch": 0.59, "grad_norm": 0.07624851858056204, "learning_rate": 0.000386776282994371, "loss": 1.5054, "step": 5447 }, { "epoch": 0.59, "grad_norm": 0.0805471238267498, "learning_rate": 0.0003866067052512872, "loss": 1.4343, "step": 5448 }, { "epoch": 0.59, "grad_norm": 0.07193135020200961, "learning_rate": 0.0003864371412575314, "loss": 1.4548, "step": 5449 }, { "epoch": 0.59, "grad_norm": 0.07357020438933615, "learning_rate": 0.00038626759103366367, "loss": 1.4873, "step": 5450 }, { "epoch": 0.59, "grad_norm": 0.08159699049650707, "learning_rate": 0.0003860980546002425, "loss": 1.4111, "step": 5451 }, { "epoch": 0.59, "grad_norm": 0.08174122049659675, "learning_rate": 0.00038592853197782503, "loss": 1.3474, "step": 5452 }, { "epoch": 0.59, "grad_norm": 0.07962770380661251, "learning_rate": 0.0003857590231869664, "loss": 1.3244, "step": 5453 }, { "epoch": 0.59, "grad_norm": 0.07448555257556383, "learning_rate": 0.00038558952824821994, "loss": 1.4534, "step": 5454 }, { "epoch": 0.59, "grad_norm": 0.07277064642875133, "learning_rate": 0.0003854200471821376, "loss": 1.4508, "step": 5455 }, { "epoch": 0.59, "grad_norm": 0.08151762819863291, "learning_rate": 0.0003852505800092695, "loss": 1.3559, "step": 5456 }, { "epoch": 0.59, "grad_norm": 0.08049679696768172, "learning_rate": 0.0003850811267501643, "loss": 1.3082, "step": 5457 }, { "epoch": 0.59, "grad_norm": 0.06983411242009606, "learning_rate": 0.0003849116874253686, "loss": 1.4051, "step": 5458 }, { "epoch": 0.59, "grad_norm": 0.07159626918035832, "learning_rate": 0.0003847422620554276, "loss": 1.4298, "step": 5459 }, { "epoch": 0.59, "grad_norm": 0.08503770511535662, "learning_rate": 0.00038457285066088463, "loss": 1.3354, "step": 5460 }, { "epoch": 0.59, "grad_norm": 0.0790162568405594, "learning_rate": 0.00038440345326228156, "loss": 1.3853, "step": 5461 }, { "epoch": 0.59, "grad_norm": 0.0782423315532537, "learning_rate": 0.0003842340698801581, "loss": 1.4141, "step": 5462 }, { "epoch": 0.59, "grad_norm": 0.07828919322816909, "learning_rate": 0.0003840647005350529, "loss": 1.3817, "step": 5463 }, { "epoch": 0.59, "grad_norm": 0.07866537991857481, "learning_rate": 0.00038389534524750266, "loss": 1.2861, "step": 5464 }, { "epoch": 0.59, "grad_norm": 0.07442515151685897, "learning_rate": 0.00038372600403804185, "loss": 1.4331, "step": 5465 }, { "epoch": 0.59, "grad_norm": 0.08202431853846086, "learning_rate": 0.000383556676927204, "loss": 1.3602, "step": 5466 }, { "epoch": 0.59, "grad_norm": 0.08207556685486656, "learning_rate": 0.0003833873639355203, "loss": 1.4111, "step": 5467 }, { "epoch": 0.59, "grad_norm": 0.07216498981129676, "learning_rate": 0.000383218065083521, "loss": 1.3615, "step": 5468 }, { "epoch": 0.59, "grad_norm": 0.07135005849767023, "learning_rate": 0.0003830487803917338, "loss": 1.4001, "step": 5469 }, { "epoch": 0.59, "grad_norm": 0.07557193610406127, "learning_rate": 0.00038287950988068507, "loss": 1.4354, "step": 5470 }, { "epoch": 0.59, "grad_norm": 0.07651272647178654, "learning_rate": 0.0003827102535708997, "loss": 1.4316, "step": 5471 }, { "epoch": 0.59, "grad_norm": 0.07205282353825856, "learning_rate": 0.00038254101148290043, "loss": 1.6366, "step": 5472 }, { "epoch": 0.59, "grad_norm": 0.08323470409611829, "learning_rate": 0.00038237178363720835, "loss": 1.413, "step": 5473 }, { "epoch": 0.59, "grad_norm": 0.06608968374186841, "learning_rate": 0.00038220257005434314, "loss": 1.3479, "step": 5474 }, { "epoch": 0.59, "grad_norm": 0.08621171799268833, "learning_rate": 0.00038203337075482245, "loss": 1.4297, "step": 5475 }, { "epoch": 0.59, "grad_norm": 0.06968222782720418, "learning_rate": 0.00038186418575916227, "loss": 1.3863, "step": 5476 }, { "epoch": 0.59, "grad_norm": 0.08547720995611516, "learning_rate": 0.0003816950150878768, "loss": 1.4868, "step": 5477 }, { "epoch": 0.59, "grad_norm": 0.07746098530633695, "learning_rate": 0.0003815258587614785, "loss": 1.3578, "step": 5478 }, { "epoch": 0.59, "grad_norm": 0.07851996940602321, "learning_rate": 0.00038135671680047836, "loss": 1.3269, "step": 5479 }, { "epoch": 0.59, "grad_norm": 0.0671031884146776, "learning_rate": 0.00038118758922538534, "loss": 1.3328, "step": 5480 }, { "epoch": 0.59, "grad_norm": 0.07086563839326887, "learning_rate": 0.0003810184760567066, "loss": 1.3801, "step": 5481 }, { "epoch": 0.59, "grad_norm": 0.06829227406937718, "learning_rate": 0.00038084937731494795, "loss": 1.3466, "step": 5482 }, { "epoch": 0.59, "grad_norm": 0.07644260647676682, "learning_rate": 0.0003806802930206131, "loss": 1.463, "step": 5483 }, { "epoch": 0.59, "grad_norm": 0.07327912352959912, "learning_rate": 0.00038051122319420405, "loss": 1.5011, "step": 5484 }, { "epoch": 0.59, "grad_norm": 0.07474131903712519, "learning_rate": 0.00038034216785622126, "loss": 1.4398, "step": 5485 }, { "epoch": 0.59, "grad_norm": 0.06789310016224517, "learning_rate": 0.0003801731270271629, "loss": 1.4834, "step": 5486 }, { "epoch": 0.59, "grad_norm": 0.07304893429551268, "learning_rate": 0.0003800041007275261, "loss": 1.4962, "step": 5487 }, { "epoch": 0.59, "grad_norm": 0.06520647255990622, "learning_rate": 0.00037983508897780583, "loss": 1.3565, "step": 5488 }, { "epoch": 0.59, "grad_norm": 0.06924010423700398, "learning_rate": 0.00037966609179849526, "loss": 1.3497, "step": 5489 }, { "epoch": 0.59, "grad_norm": 0.07253469655081643, "learning_rate": 0.0003794971092100858, "loss": 1.363, "step": 5490 }, { "epoch": 0.59, "grad_norm": 0.06834466384222009, "learning_rate": 0.00037932814123306735, "loss": 1.4292, "step": 5491 }, { "epoch": 0.59, "grad_norm": 0.07204827925530317, "learning_rate": 0.00037915918788792793, "loss": 1.5018, "step": 5492 }, { "epoch": 0.59, "grad_norm": 0.07344716634947808, "learning_rate": 0.00037899024919515334, "loss": 1.38, "step": 5493 }, { "epoch": 0.59, "grad_norm": 0.07802721400721604, "learning_rate": 0.0003788213251752284, "loss": 1.4881, "step": 5494 }, { "epoch": 0.59, "grad_norm": 0.07188704680601309, "learning_rate": 0.00037865241584863547, "loss": 1.3633, "step": 5495 }, { "epoch": 0.59, "grad_norm": 0.06597149494787036, "learning_rate": 0.0003784835212358556, "loss": 1.4876, "step": 5496 }, { "epoch": 0.59, "grad_norm": 0.07149421228145474, "learning_rate": 0.00037831464135736764, "loss": 1.4034, "step": 5497 }, { "epoch": 0.59, "grad_norm": 0.07525825575415172, "learning_rate": 0.00037814577623364884, "loss": 1.2948, "step": 5498 }, { "epoch": 0.59, "grad_norm": 0.08479029685348394, "learning_rate": 0.00037797692588517487, "loss": 1.4546, "step": 5499 }, { "epoch": 0.59, "grad_norm": 0.07051587577136423, "learning_rate": 0.00037780809033241926, "loss": 1.5106, "step": 5500 }, { "epoch": 0.59, "grad_norm": 0.07380593539742977, "learning_rate": 0.00037763926959585387, "loss": 1.3892, "step": 5501 }, { "epoch": 0.59, "grad_norm": 0.06881571512378142, "learning_rate": 0.000377470463695949, "loss": 1.4178, "step": 5502 }, { "epoch": 0.59, "grad_norm": 0.07028405289575519, "learning_rate": 0.0003773016726531728, "loss": 1.3714, "step": 5503 }, { "epoch": 0.59, "grad_norm": 0.0666545859612911, "learning_rate": 0.00037713289648799177, "loss": 1.4037, "step": 5504 }, { "epoch": 0.59, "grad_norm": 0.06767247145129425, "learning_rate": 0.00037696413522087067, "loss": 1.2454, "step": 5505 }, { "epoch": 0.59, "grad_norm": 0.07342524302568901, "learning_rate": 0.00037679538887227246, "loss": 1.3988, "step": 5506 }, { "epoch": 0.59, "grad_norm": 0.07264381277887294, "learning_rate": 0.000376626657462658, "loss": 1.5017, "step": 5507 }, { "epoch": 0.59, "grad_norm": 0.06619640197119608, "learning_rate": 0.00037645794101248665, "loss": 1.3296, "step": 5508 }, { "epoch": 0.59, "grad_norm": 0.08066585644997017, "learning_rate": 0.00037628923954221574, "loss": 1.4821, "step": 5509 }, { "epoch": 0.59, "grad_norm": 0.0779357570182373, "learning_rate": 0.00037612055307230114, "loss": 1.3817, "step": 5510 }, { "epoch": 0.59, "grad_norm": 0.08662510505643409, "learning_rate": 0.0003759518816231966, "loss": 1.2358, "step": 5511 }, { "epoch": 0.59, "grad_norm": 0.06984981321199445, "learning_rate": 0.0003757832252153539, "loss": 1.2271, "step": 5512 }, { "epoch": 0.59, "grad_norm": 0.07931974426531138, "learning_rate": 0.00037561458386922356, "loss": 1.4736, "step": 5513 }, { "epoch": 0.59, "grad_norm": 0.08119674665971423, "learning_rate": 0.0003754459576052537, "loss": 1.399, "step": 5514 }, { "epoch": 0.59, "grad_norm": 0.07910822175099083, "learning_rate": 0.0003752773464438909, "loss": 1.2959, "step": 5515 }, { "epoch": 0.59, "grad_norm": 0.07694259855361689, "learning_rate": 0.0003751087504055797, "loss": 1.4247, "step": 5516 }, { "epoch": 0.59, "grad_norm": 0.07256522449985622, "learning_rate": 0.0003749401695107634, "loss": 1.397, "step": 5517 }, { "epoch": 0.59, "grad_norm": 0.08607831710188796, "learning_rate": 0.00037477160377988246, "loss": 1.5007, "step": 5518 }, { "epoch": 0.59, "grad_norm": 0.07415689538573572, "learning_rate": 0.0003746030532333763, "loss": 1.4612, "step": 5519 }, { "epoch": 0.59, "grad_norm": 0.07758296899328197, "learning_rate": 0.0003744345178916823, "loss": 1.3624, "step": 5520 }, { "epoch": 0.59, "grad_norm": 0.08082820232713231, "learning_rate": 0.00037426599777523573, "loss": 1.4306, "step": 5521 }, { "epoch": 0.59, "grad_norm": 0.07400004575923684, "learning_rate": 0.0003740974929044706, "loss": 1.3545, "step": 5522 }, { "epoch": 0.59, "grad_norm": 0.07599643071172713, "learning_rate": 0.00037392900329981843, "loss": 1.3584, "step": 5523 }, { "epoch": 0.59, "grad_norm": 0.07895934453748053, "learning_rate": 0.00037376052898170913, "loss": 1.4622, "step": 5524 }, { "epoch": 0.59, "grad_norm": 0.0668384332444495, "learning_rate": 0.00037359206997057107, "loss": 1.5848, "step": 5525 }, { "epoch": 0.59, "grad_norm": 0.07121712503947927, "learning_rate": 0.0003734236262868303, "loss": 1.3717, "step": 5526 }, { "epoch": 0.59, "grad_norm": 0.0732117144452143, "learning_rate": 0.00037325519795091127, "loss": 1.5614, "step": 5527 }, { "epoch": 0.59, "grad_norm": 0.08251413632425897, "learning_rate": 0.00037308678498323647, "loss": 1.3482, "step": 5528 }, { "epoch": 0.59, "grad_norm": 0.07988610447393804, "learning_rate": 0.00037291838740422623, "loss": 1.441, "step": 5529 }, { "epoch": 0.59, "grad_norm": 0.07343160375426805, "learning_rate": 0.0003727500052342999, "loss": 1.4126, "step": 5530 }, { "epoch": 0.59, "grad_norm": 0.0750280771624314, "learning_rate": 0.00037258163849387407, "loss": 1.3603, "step": 5531 }, { "epoch": 0.59, "grad_norm": 0.0699588962331154, "learning_rate": 0.0003724132872033638, "loss": 1.4122, "step": 5532 }, { "epoch": 0.59, "grad_norm": 0.08618571728366133, "learning_rate": 0.0003722449513831823, "loss": 1.3199, "step": 5533 }, { "epoch": 0.59, "grad_norm": 0.0752992863299164, "learning_rate": 0.00037207663105374094, "loss": 1.3892, "step": 5534 }, { "epoch": 0.6, "grad_norm": 0.0756774846041659, "learning_rate": 0.00037190832623544903, "loss": 1.4252, "step": 5535 }, { "epoch": 0.6, "grad_norm": 0.0755846484269021, "learning_rate": 0.0003717400369487142, "loss": 1.4436, "step": 5536 }, { "epoch": 0.6, "grad_norm": 0.07183736040402938, "learning_rate": 0.0003715717632139421, "loss": 1.2719, "step": 5537 }, { "epoch": 0.6, "grad_norm": 0.07633084282336201, "learning_rate": 0.0003714035050515366, "loss": 1.4026, "step": 5538 }, { "epoch": 0.6, "grad_norm": 0.07675837417693088, "learning_rate": 0.0003712352624818993, "loss": 1.2591, "step": 5539 }, { "epoch": 0.6, "grad_norm": 0.07676462023153384, "learning_rate": 0.00037106703552543024, "loss": 1.3136, "step": 5540 }, { "epoch": 0.6, "grad_norm": 0.07689170885756248, "learning_rate": 0.0003708988242025277, "loss": 1.3616, "step": 5541 }, { "epoch": 0.6, "grad_norm": 0.07474625872774958, "learning_rate": 0.0003707306285335879, "loss": 1.4878, "step": 5542 }, { "epoch": 0.6, "grad_norm": 0.08136785858544164, "learning_rate": 0.0003705624485390049, "loss": 1.3011, "step": 5543 }, { "epoch": 0.6, "grad_norm": 0.08019714407726865, "learning_rate": 0.0003703942842391714, "loss": 1.4349, "step": 5544 }, { "epoch": 0.6, "grad_norm": 0.06931800845539068, "learning_rate": 0.0003702261356544777, "loss": 1.336, "step": 5545 }, { "epoch": 0.6, "grad_norm": 0.06865776028434824, "learning_rate": 0.00037005800280531245, "loss": 1.3829, "step": 5546 }, { "epoch": 0.6, "grad_norm": 0.07110683293273809, "learning_rate": 0.0003698898857120622, "loss": 1.3449, "step": 5547 }, { "epoch": 0.6, "grad_norm": 0.06752517336614956, "learning_rate": 0.00036972178439511206, "loss": 1.4429, "step": 5548 }, { "epoch": 0.6, "grad_norm": 0.07369625011178438, "learning_rate": 0.00036955369887484477, "loss": 1.3135, "step": 5549 }, { "epoch": 0.6, "grad_norm": 0.08217739104019585, "learning_rate": 0.00036938562917164107, "loss": 1.44, "step": 5550 }, { "epoch": 0.6, "grad_norm": 0.0679176740185262, "learning_rate": 0.00036921757530588016, "loss": 1.4109, "step": 5551 }, { "epoch": 0.6, "grad_norm": 0.08183989458372486, "learning_rate": 0.000369049537297939, "loss": 1.5463, "step": 5552 }, { "epoch": 0.6, "grad_norm": 0.08068697173407725, "learning_rate": 0.00036888151516819304, "loss": 1.3758, "step": 5553 }, { "epoch": 0.6, "grad_norm": 0.0916066824594436, "learning_rate": 0.0003687135089370153, "loss": 1.4083, "step": 5554 }, { "epoch": 0.6, "grad_norm": 0.07203913084540307, "learning_rate": 0.0003685455186247772, "loss": 1.3663, "step": 5555 }, { "epoch": 0.6, "grad_norm": 0.07810221503711978, "learning_rate": 0.0003683775442518482, "loss": 1.337, "step": 5556 }, { "epoch": 0.6, "grad_norm": 0.07378044550397307, "learning_rate": 0.0003682095858385958, "loss": 1.4417, "step": 5557 }, { "epoch": 0.6, "grad_norm": 0.07662962879711044, "learning_rate": 0.0003680416434053854, "loss": 1.4428, "step": 5558 }, { "epoch": 0.6, "grad_norm": 0.07391624067869233, "learning_rate": 0.00036787371697258087, "loss": 1.5021, "step": 5559 }, { "epoch": 0.6, "grad_norm": 0.08018611758276063, "learning_rate": 0.0003677058065605434, "loss": 1.4745, "step": 5560 }, { "epoch": 0.6, "grad_norm": 0.07141145070323125, "learning_rate": 0.00036753791218963305, "loss": 1.5691, "step": 5561 }, { "epoch": 0.6, "grad_norm": 0.07200974918384846, "learning_rate": 0.00036737003388020756, "loss": 1.3799, "step": 5562 }, { "epoch": 0.6, "grad_norm": 0.07893684170114153, "learning_rate": 0.0003672021716526226, "loss": 1.4226, "step": 5563 }, { "epoch": 0.6, "grad_norm": 0.08330290597126691, "learning_rate": 0.0003670343255272322, "loss": 1.556, "step": 5564 }, { "epoch": 0.6, "grad_norm": 0.07058247997252769, "learning_rate": 0.00036686649552438827, "loss": 1.4039, "step": 5565 }, { "epoch": 0.6, "grad_norm": 0.07577260612687944, "learning_rate": 0.00036669868166444065, "loss": 1.3556, "step": 5566 }, { "epoch": 0.6, "grad_norm": 0.07442078259679677, "learning_rate": 0.0003665308839677375, "loss": 1.4111, "step": 5567 }, { "epoch": 0.6, "grad_norm": 0.09062648799718757, "learning_rate": 0.00036636310245462484, "loss": 1.3685, "step": 5568 }, { "epoch": 0.6, "grad_norm": 0.07404072406825808, "learning_rate": 0.00036619533714544664, "loss": 1.2866, "step": 5569 }, { "epoch": 0.6, "grad_norm": 0.07341209572066199, "learning_rate": 0.00036602758806054535, "loss": 1.3663, "step": 5570 }, { "epoch": 0.6, "grad_norm": 0.08367994768627389, "learning_rate": 0.00036585985522026067, "loss": 1.3816, "step": 5571 }, { "epoch": 0.6, "grad_norm": 0.06888392014168349, "learning_rate": 0.000365692138644931, "loss": 1.4769, "step": 5572 }, { "epoch": 0.6, "grad_norm": 0.07446670991451924, "learning_rate": 0.0003655244383548926, "loss": 1.4686, "step": 5573 }, { "epoch": 0.6, "grad_norm": 0.07197126237743647, "learning_rate": 0.00036535675437047955, "loss": 1.3757, "step": 5574 }, { "epoch": 0.6, "grad_norm": 0.07647613896270093, "learning_rate": 0.00036518908671202426, "loss": 1.5409, "step": 5575 }, { "epoch": 0.6, "grad_norm": 0.06842253526012758, "learning_rate": 0.00036502143539985705, "loss": 1.3703, "step": 5576 }, { "epoch": 0.6, "grad_norm": 0.07301037980837188, "learning_rate": 0.00036485380045430597, "loss": 1.4469, "step": 5577 }, { "epoch": 0.6, "grad_norm": 0.07007991133973272, "learning_rate": 0.0003646861818956977, "loss": 1.4505, "step": 5578 }, { "epoch": 0.6, "grad_norm": 0.06574887026402759, "learning_rate": 0.0003645185797443563, "loss": 1.2006, "step": 5579 }, { "epoch": 0.6, "grad_norm": 0.07588016531671705, "learning_rate": 0.0003643509940206043, "loss": 1.3754, "step": 5580 }, { "epoch": 0.6, "grad_norm": 0.06515398225680037, "learning_rate": 0.00036418342474476184, "loss": 1.368, "step": 5581 }, { "epoch": 0.6, "grad_norm": 0.07544085970434565, "learning_rate": 0.00036401587193714724, "loss": 1.5863, "step": 5582 }, { "epoch": 0.6, "grad_norm": 0.07153631958666246, "learning_rate": 0.00036384833561807706, "loss": 1.4028, "step": 5583 }, { "epoch": 0.6, "grad_norm": 0.06872766857481105, "learning_rate": 0.0003636808158078656, "loss": 1.3447, "step": 5584 }, { "epoch": 0.6, "grad_norm": 0.08987840989416099, "learning_rate": 0.00036351331252682515, "loss": 1.4119, "step": 5585 }, { "epoch": 0.6, "grad_norm": 0.06816450340215846, "learning_rate": 0.0003633458257952661, "loss": 1.4812, "step": 5586 }, { "epoch": 0.6, "grad_norm": 0.0737825306184352, "learning_rate": 0.0003631783556334968, "loss": 1.4151, "step": 5587 }, { "epoch": 0.6, "grad_norm": 0.07504967730605744, "learning_rate": 0.00036301090206182366, "loss": 1.4566, "step": 5588 }, { "epoch": 0.6, "grad_norm": 0.08464044928871382, "learning_rate": 0.00036284346510055064, "loss": 1.3867, "step": 5589 }, { "epoch": 0.6, "grad_norm": 0.07468777452003433, "learning_rate": 0.0003626760447699806, "loss": 1.5272, "step": 5590 }, { "epoch": 0.6, "grad_norm": 0.07626542283005774, "learning_rate": 0.0003625086410904136, "loss": 1.3815, "step": 5591 }, { "epoch": 0.6, "grad_norm": 0.06636087185478588, "learning_rate": 0.0003623412540821478, "loss": 1.2893, "step": 5592 }, { "epoch": 0.6, "grad_norm": 0.06748116009133115, "learning_rate": 0.0003621738837654795, "loss": 1.4307, "step": 5593 }, { "epoch": 0.6, "grad_norm": 0.08014808265191209, "learning_rate": 0.00036200653016070283, "loss": 1.2923, "step": 5594 }, { "epoch": 0.6, "grad_norm": 0.07630050936118145, "learning_rate": 0.0003618391932881102, "loss": 1.5177, "step": 5595 }, { "epoch": 0.6, "grad_norm": 0.08900857906661022, "learning_rate": 0.0003616718731679918, "loss": 1.421, "step": 5596 }, { "epoch": 0.6, "grad_norm": 0.07500532846595925, "learning_rate": 0.0003615045698206355, "loss": 1.3891, "step": 5597 }, { "epoch": 0.6, "grad_norm": 0.07810709867424619, "learning_rate": 0.00036133728326632764, "loss": 1.4612, "step": 5598 }, { "epoch": 0.6, "grad_norm": 0.06859701862319084, "learning_rate": 0.0003611700135253523, "loss": 1.3761, "step": 5599 }, { "epoch": 0.6, "grad_norm": 0.07414205880381541, "learning_rate": 0.0003610027606179913, "loss": 1.4306, "step": 5600 }, { "epoch": 0.6, "grad_norm": 0.06934922386528766, "learning_rate": 0.00036083552456452484, "loss": 1.463, "step": 5601 }, { "epoch": 0.6, "grad_norm": 0.0737091137736752, "learning_rate": 0.00036066830538523096, "loss": 1.3899, "step": 5602 }, { "epoch": 0.6, "grad_norm": 0.07854348024840235, "learning_rate": 0.0003605011031003853, "loss": 1.348, "step": 5603 }, { "epoch": 0.6, "grad_norm": 0.08047178520802252, "learning_rate": 0.0003603339177302618, "loss": 1.5244, "step": 5604 }, { "epoch": 0.6, "grad_norm": 0.07429033202974966, "learning_rate": 0.00036016674929513216, "loss": 1.4901, "step": 5605 }, { "epoch": 0.6, "grad_norm": 0.07099137795592182, "learning_rate": 0.00035999959781526637, "loss": 1.4711, "step": 5606 }, { "epoch": 0.6, "grad_norm": 0.07344959139200635, "learning_rate": 0.00035983246331093197, "loss": 1.4571, "step": 5607 }, { "epoch": 0.6, "grad_norm": 0.08400809429915737, "learning_rate": 0.00035966534580239454, "loss": 1.2612, "step": 5608 }, { "epoch": 0.6, "grad_norm": 0.08538467421828809, "learning_rate": 0.0003594982453099178, "loss": 1.3764, "step": 5609 }, { "epoch": 0.6, "grad_norm": 0.07206064549666952, "learning_rate": 0.00035933116185376325, "loss": 1.4262, "step": 5610 }, { "epoch": 0.6, "grad_norm": 0.07958752096332246, "learning_rate": 0.00035916409545419027, "loss": 1.3995, "step": 5611 }, { "epoch": 0.6, "grad_norm": 0.07919682179856065, "learning_rate": 0.00035899704613145635, "loss": 1.5493, "step": 5612 }, { "epoch": 0.6, "grad_norm": 0.07986030378083334, "learning_rate": 0.00035883001390581647, "loss": 1.4705, "step": 5613 }, { "epoch": 0.6, "grad_norm": 0.07799866116922917, "learning_rate": 0.0003586629987975243, "loss": 1.4285, "step": 5614 }, { "epoch": 0.6, "grad_norm": 0.07339482852672152, "learning_rate": 0.00035849600082683066, "loss": 1.4108, "step": 5615 }, { "epoch": 0.6, "grad_norm": 0.07719796136377283, "learning_rate": 0.00035832902001398483, "loss": 1.5008, "step": 5616 }, { "epoch": 0.6, "grad_norm": 0.08039852916277493, "learning_rate": 0.0003581620563792336, "loss": 1.4794, "step": 5617 }, { "epoch": 0.6, "grad_norm": 0.08287709546388787, "learning_rate": 0.00035799510994282216, "loss": 1.3035, "step": 5618 }, { "epoch": 0.6, "grad_norm": 0.07496917924553367, "learning_rate": 0.0003578281807249931, "loss": 1.4986, "step": 5619 }, { "epoch": 0.6, "grad_norm": 0.06504525578999058, "learning_rate": 0.0003576612687459873, "loss": 1.6326, "step": 5620 }, { "epoch": 0.6, "grad_norm": 0.07543080011759193, "learning_rate": 0.00035749437402604346, "loss": 1.4127, "step": 5621 }, { "epoch": 0.6, "grad_norm": 0.06900592630400436, "learning_rate": 0.00035732749658539797, "loss": 1.4264, "step": 5622 }, { "epoch": 0.6, "grad_norm": 0.0689469569944877, "learning_rate": 0.00035716063644428565, "loss": 1.2965, "step": 5623 }, { "epoch": 0.6, "grad_norm": 0.07147758440559308, "learning_rate": 0.00035699379362293836, "loss": 1.3985, "step": 5624 }, { "epoch": 0.6, "grad_norm": 0.07056189553778609, "learning_rate": 0.00035682696814158657, "loss": 1.333, "step": 5625 }, { "epoch": 0.6, "grad_norm": 0.07538426411309675, "learning_rate": 0.00035666016002045854, "loss": 1.3421, "step": 5626 }, { "epoch": 0.6, "grad_norm": 0.06937029736035574, "learning_rate": 0.0003564933692797803, "loss": 1.4397, "step": 5627 }, { "epoch": 0.6, "grad_norm": 0.06838536960364613, "learning_rate": 0.0003563265959397757, "loss": 1.4764, "step": 5628 }, { "epoch": 0.61, "grad_norm": 0.06858569045988618, "learning_rate": 0.0003561598400206667, "loss": 1.3769, "step": 5629 }, { "epoch": 0.61, "grad_norm": 0.0821419094374213, "learning_rate": 0.00035599310154267307, "loss": 1.4455, "step": 5630 }, { "epoch": 0.61, "grad_norm": 0.07978858517243766, "learning_rate": 0.00035582638052601223, "loss": 1.4309, "step": 5631 }, { "epoch": 0.61, "grad_norm": 0.07710280608060323, "learning_rate": 0.00035565967699089984, "loss": 1.412, "step": 5632 }, { "epoch": 0.61, "grad_norm": 0.07006371462775733, "learning_rate": 0.00035549299095754937, "loss": 1.3961, "step": 5633 }, { "epoch": 0.61, "grad_norm": 0.07673372395440631, "learning_rate": 0.0003553263224461718, "loss": 1.2666, "step": 5634 }, { "epoch": 0.61, "grad_norm": 0.07617606707799483, "learning_rate": 0.00035515967147697647, "loss": 1.3182, "step": 5635 }, { "epoch": 0.61, "grad_norm": 0.07299159334036456, "learning_rate": 0.0003549930380701701, "loss": 1.3535, "step": 5636 }, { "epoch": 0.61, "grad_norm": 0.07548869479560834, "learning_rate": 0.000354826422245958, "loss": 1.3972, "step": 5637 }, { "epoch": 0.61, "grad_norm": 0.07528600549226311, "learning_rate": 0.00035465982402454267, "loss": 1.3542, "step": 5638 }, { "epoch": 0.61, "grad_norm": 0.0761655413610927, "learning_rate": 0.0003544932434261246, "loss": 1.5579, "step": 5639 }, { "epoch": 0.61, "grad_norm": 0.07328638809355852, "learning_rate": 0.0003543266804709026, "loss": 1.4611, "step": 5640 }, { "epoch": 0.61, "grad_norm": 0.0684025976932677, "learning_rate": 0.00035416013517907264, "loss": 1.3723, "step": 5641 }, { "epoch": 0.61, "grad_norm": 0.0709444446488586, "learning_rate": 0.00035399360757082913, "loss": 1.3982, "step": 5642 }, { "epoch": 0.61, "grad_norm": 0.07902829727346929, "learning_rate": 0.000353827097666364, "loss": 1.3444, "step": 5643 }, { "epoch": 0.61, "grad_norm": 0.07138351900963495, "learning_rate": 0.00035366060548586744, "loss": 1.3536, "step": 5644 }, { "epoch": 0.61, "grad_norm": 0.06664446570362624, "learning_rate": 0.00035349413104952685, "loss": 1.4987, "step": 5645 }, { "epoch": 0.61, "grad_norm": 0.06565668811270635, "learning_rate": 0.0003533276743775279, "loss": 1.3504, "step": 5646 }, { "epoch": 0.61, "grad_norm": 0.06867503022652852, "learning_rate": 0.00035316123549005416, "loss": 1.3162, "step": 5647 }, { "epoch": 0.61, "grad_norm": 0.08049531458815742, "learning_rate": 0.0003529948144072867, "loss": 1.3726, "step": 5648 }, { "epoch": 0.61, "grad_norm": 0.06401788263416966, "learning_rate": 0.0003528284111494049, "loss": 1.3049, "step": 5649 }, { "epoch": 0.61, "grad_norm": 0.08055355952171174, "learning_rate": 0.00035266202573658567, "loss": 1.5669, "step": 5650 }, { "epoch": 0.61, "grad_norm": 0.07453158758644808, "learning_rate": 0.00035249565818900366, "loss": 1.4813, "step": 5651 }, { "epoch": 0.61, "grad_norm": 0.07887708489653913, "learning_rate": 0.00035232930852683165, "loss": 1.4236, "step": 5652 }, { "epoch": 0.61, "grad_norm": 0.07109618467705087, "learning_rate": 0.0003521629767702401, "loss": 1.2988, "step": 5653 }, { "epoch": 0.61, "grad_norm": 0.06710743003825625, "learning_rate": 0.00035199666293939726, "loss": 1.4079, "step": 5654 }, { "epoch": 0.61, "grad_norm": 0.07155522294010239, "learning_rate": 0.0003518303670544696, "loss": 1.4105, "step": 5655 }, { "epoch": 0.61, "grad_norm": 0.07196979769357402, "learning_rate": 0.00035166408913562034, "loss": 1.4136, "step": 5656 }, { "epoch": 0.61, "grad_norm": 0.07745496675046756, "learning_rate": 0.00035149782920301174, "loss": 1.4632, "step": 5657 }, { "epoch": 0.61, "grad_norm": 0.07447542178913955, "learning_rate": 0.0003513315872768035, "loss": 1.4284, "step": 5658 }, { "epoch": 0.61, "grad_norm": 0.07836930328423564, "learning_rate": 0.00035116536337715255, "loss": 1.4194, "step": 5659 }, { "epoch": 0.61, "grad_norm": 0.08966749202790163, "learning_rate": 0.00035099915752421465, "loss": 1.4606, "step": 5660 }, { "epoch": 0.61, "grad_norm": 0.06903241945632177, "learning_rate": 0.0003508329697381425, "loss": 1.4138, "step": 5661 }, { "epoch": 0.61, "grad_norm": 0.08028060766204789, "learning_rate": 0.0003506668000390869, "loss": 1.4581, "step": 5662 }, { "epoch": 0.61, "grad_norm": 0.07138617804983953, "learning_rate": 0.00035050064844719677, "loss": 1.3281, "step": 5663 }, { "epoch": 0.61, "grad_norm": 0.09922730576053841, "learning_rate": 0.0003503345149826185, "loss": 1.343, "step": 5664 }, { "epoch": 0.61, "grad_norm": 0.07817632658073502, "learning_rate": 0.00035016839966549627, "loss": 1.5559, "step": 5665 }, { "epoch": 0.61, "grad_norm": 0.07861593935008956, "learning_rate": 0.000350002302515972, "loss": 1.3796, "step": 5666 }, { "epoch": 0.61, "grad_norm": 0.07251164645828607, "learning_rate": 0.0003498362235541856, "loss": 1.4127, "step": 5667 }, { "epoch": 0.61, "grad_norm": 0.06707603220109654, "learning_rate": 0.0003496701628002749, "loss": 1.3143, "step": 5668 }, { "epoch": 0.61, "grad_norm": 0.06988053951437045, "learning_rate": 0.00034950412027437525, "loss": 1.4305, "step": 5669 }, { "epoch": 0.61, "grad_norm": 0.07867145829080913, "learning_rate": 0.0003493380959966197, "loss": 1.2806, "step": 5670 }, { "epoch": 0.61, "grad_norm": 0.07429734651699726, "learning_rate": 0.00034917208998713956, "loss": 1.4044, "step": 5671 }, { "epoch": 0.61, "grad_norm": 0.07446972862489869, "learning_rate": 0.0003490061022660634, "loss": 1.4015, "step": 5672 }, { "epoch": 0.61, "grad_norm": 0.07585596482806031, "learning_rate": 0.00034884013285351796, "loss": 1.4996, "step": 5673 }, { "epoch": 0.61, "grad_norm": 0.0765452008993846, "learning_rate": 0.0003486741817696275, "loss": 1.3428, "step": 5674 }, { "epoch": 0.61, "grad_norm": 0.07743629189736943, "learning_rate": 0.0003485082490345143, "loss": 1.3471, "step": 5675 }, { "epoch": 0.61, "grad_norm": 0.07962414669280672, "learning_rate": 0.00034834233466829825, "loss": 1.5078, "step": 5676 }, { "epoch": 0.61, "grad_norm": 0.06823095793714402, "learning_rate": 0.0003481764386910968, "loss": 1.2538, "step": 5677 }, { "epoch": 0.61, "grad_norm": 0.07430885125530548, "learning_rate": 0.00034801056112302554, "loss": 1.5397, "step": 5678 }, { "epoch": 0.61, "grad_norm": 0.07967890542328465, "learning_rate": 0.0003478447019841978, "loss": 1.4244, "step": 5679 }, { "epoch": 0.61, "grad_norm": 0.08165222831567905, "learning_rate": 0.00034767886129472453, "loss": 1.4542, "step": 5680 }, { "epoch": 0.61, "grad_norm": 0.08037753942606386, "learning_rate": 0.0003475130390747144, "loss": 1.5213, "step": 5681 }, { "epoch": 0.61, "grad_norm": 0.07534567859413022, "learning_rate": 0.000347347235344274, "loss": 1.3967, "step": 5682 }, { "epoch": 0.61, "grad_norm": 0.08780878813456622, "learning_rate": 0.0003471814501235076, "loss": 1.2945, "step": 5683 }, { "epoch": 0.61, "grad_norm": 0.07526528057162522, "learning_rate": 0.00034701568343251723, "loss": 1.4243, "step": 5684 }, { "epoch": 0.61, "grad_norm": 0.0877269611831134, "learning_rate": 0.0003468499352914026, "loss": 1.2562, "step": 5685 }, { "epoch": 0.61, "grad_norm": 0.07211662248051867, "learning_rate": 0.00034668420572026156, "loss": 1.3204, "step": 5686 }, { "epoch": 0.61, "grad_norm": 0.0785050832257113, "learning_rate": 0.00034651849473918883, "loss": 1.3089, "step": 5687 }, { "epoch": 0.61, "grad_norm": 0.0733152327279244, "learning_rate": 0.00034635280236827785, "loss": 1.4598, "step": 5688 }, { "epoch": 0.61, "grad_norm": 0.07122292618843426, "learning_rate": 0.0003461871286276194, "loss": 1.4084, "step": 5689 }, { "epoch": 0.61, "grad_norm": 0.07521042733808453, "learning_rate": 0.0003460214735373016, "loss": 1.2403, "step": 5690 }, { "epoch": 0.61, "grad_norm": 0.08669744108413878, "learning_rate": 0.00034585583711741114, "loss": 1.3522, "step": 5691 }, { "epoch": 0.61, "grad_norm": 0.07735992970438209, "learning_rate": 0.00034569021938803184, "loss": 1.3652, "step": 5692 }, { "epoch": 0.61, "grad_norm": 0.08321736056147003, "learning_rate": 0.0003455246203692454, "loss": 1.4371, "step": 5693 }, { "epoch": 0.61, "grad_norm": 0.07046318299439341, "learning_rate": 0.0003453590400811313, "loss": 1.2962, "step": 5694 }, { "epoch": 0.61, "grad_norm": 0.08303814502094009, "learning_rate": 0.0003451934785437668, "loss": 1.3947, "step": 5695 }, { "epoch": 0.61, "grad_norm": 0.0778596641434968, "learning_rate": 0.00034502793577722657, "loss": 1.4846, "step": 5696 }, { "epoch": 0.61, "grad_norm": 0.07305095025031742, "learning_rate": 0.00034486241180158375, "loss": 1.4616, "step": 5697 }, { "epoch": 0.61, "grad_norm": 0.07711619596739502, "learning_rate": 0.00034469690663690793, "loss": 1.244, "step": 5698 }, { "epoch": 0.61, "grad_norm": 0.08701736627740074, "learning_rate": 0.0003445314203032678, "loss": 1.5445, "step": 5699 }, { "epoch": 0.61, "grad_norm": 0.08825112294005741, "learning_rate": 0.0003443659528207289, "loss": 1.2481, "step": 5700 }, { "epoch": 0.61, "grad_norm": 0.08846932338421327, "learning_rate": 0.0003442005042093547, "loss": 1.3274, "step": 5701 }, { "epoch": 0.61, "grad_norm": 0.07205817936514314, "learning_rate": 0.00034403507448920655, "loss": 1.433, "step": 5702 }, { "epoch": 0.61, "grad_norm": 0.07069231060608115, "learning_rate": 0.00034386966368034333, "loss": 1.2406, "step": 5703 }, { "epoch": 0.61, "grad_norm": 0.07557644180661727, "learning_rate": 0.0003437042718028215, "loss": 1.4779, "step": 5704 }, { "epoch": 0.61, "grad_norm": 0.07339080355677123, "learning_rate": 0.00034353889887669574, "loss": 1.4528, "step": 5705 }, { "epoch": 0.61, "grad_norm": 0.06957718608936084, "learning_rate": 0.00034337354492201784, "loss": 1.5388, "step": 5706 }, { "epoch": 0.61, "grad_norm": 0.07314609581938167, "learning_rate": 0.0003432082099588377, "loss": 1.349, "step": 5707 }, { "epoch": 0.61, "grad_norm": 0.0818507302919073, "learning_rate": 0.0003430428940072026, "loss": 1.3628, "step": 5708 }, { "epoch": 0.61, "grad_norm": 0.0648690866111131, "learning_rate": 0.0003428775970871575, "loss": 1.4247, "step": 5709 }, { "epoch": 0.61, "grad_norm": 0.06938785217931223, "learning_rate": 0.0003427123192187456, "loss": 1.5578, "step": 5710 }, { "epoch": 0.61, "grad_norm": 0.07535029207051505, "learning_rate": 0.00034254706042200725, "loss": 1.3668, "step": 5711 }, { "epoch": 0.61, "grad_norm": 0.07028672868584891, "learning_rate": 0.00034238182071698065, "loss": 1.4355, "step": 5712 }, { "epoch": 0.61, "grad_norm": 0.08299347809832416, "learning_rate": 0.0003422166001237016, "loss": 1.4188, "step": 5713 }, { "epoch": 0.61, "grad_norm": 0.06277772175495448, "learning_rate": 0.0003420513986622038, "loss": 1.2739, "step": 5714 }, { "epoch": 0.61, "grad_norm": 0.06737514638829561, "learning_rate": 0.0003418862163525185, "loss": 1.3447, "step": 5715 }, { "epoch": 0.61, "grad_norm": 0.07349604640587126, "learning_rate": 0.0003417210532146744, "loss": 1.3473, "step": 5716 }, { "epoch": 0.61, "grad_norm": 0.0861956852391659, "learning_rate": 0.00034155590926869837, "loss": 1.1675, "step": 5717 }, { "epoch": 0.61, "grad_norm": 0.06819916582304868, "learning_rate": 0.0003413907845346147, "loss": 1.418, "step": 5718 }, { "epoch": 0.61, "grad_norm": 0.07113808982321314, "learning_rate": 0.0003412256790324452, "loss": 1.3437, "step": 5719 }, { "epoch": 0.61, "grad_norm": 0.08113355490818232, "learning_rate": 0.00034106059278220935, "loss": 1.3868, "step": 5720 }, { "epoch": 0.61, "grad_norm": 0.09610388012345854, "learning_rate": 0.00034089552580392456, "loss": 1.4229, "step": 5721 }, { "epoch": 0.62, "grad_norm": 0.07795794477824468, "learning_rate": 0.00034073047811760586, "loss": 1.4547, "step": 5722 }, { "epoch": 0.62, "grad_norm": 0.09967082596439263, "learning_rate": 0.0003405654497432658, "loss": 1.4848, "step": 5723 }, { "epoch": 0.62, "grad_norm": 0.06998698314030954, "learning_rate": 0.0003404004407009145, "loss": 1.4644, "step": 5724 }, { "epoch": 0.62, "grad_norm": 0.08424361964264927, "learning_rate": 0.0003402354510105601, "loss": 1.468, "step": 5725 }, { "epoch": 0.62, "grad_norm": 0.07588249803140369, "learning_rate": 0.00034007048069220803, "loss": 1.2903, "step": 5726 }, { "epoch": 0.62, "grad_norm": 0.0718749317137767, "learning_rate": 0.00033990552976586143, "loss": 1.4871, "step": 5727 }, { "epoch": 0.62, "grad_norm": 0.06527144687140929, "learning_rate": 0.0003397405982515214, "loss": 1.3258, "step": 5728 }, { "epoch": 0.62, "grad_norm": 0.08734474868450842, "learning_rate": 0.0003395756861691864, "loss": 1.3422, "step": 5729 }, { "epoch": 0.62, "grad_norm": 0.07129891373227273, "learning_rate": 0.0003394107935388525, "loss": 1.5485, "step": 5730 }, { "epoch": 0.62, "grad_norm": 0.0688040837256214, "learning_rate": 0.00033924592038051346, "loss": 1.4304, "step": 5731 }, { "epoch": 0.62, "grad_norm": 0.07444968184588249, "learning_rate": 0.0003390810667141606, "loss": 1.3824, "step": 5732 }, { "epoch": 0.62, "grad_norm": 0.07877965685992819, "learning_rate": 0.0003389162325597834, "loss": 1.4078, "step": 5733 }, { "epoch": 0.62, "grad_norm": 0.07229117315627312, "learning_rate": 0.0003387514179373683, "loss": 1.3603, "step": 5734 }, { "epoch": 0.62, "grad_norm": 0.07622358969159525, "learning_rate": 0.00033858662286689954, "loss": 1.4646, "step": 5735 }, { "epoch": 0.62, "grad_norm": 0.07074157195742369, "learning_rate": 0.0003384218473683594, "loss": 1.4831, "step": 5736 }, { "epoch": 0.62, "grad_norm": 0.07407245287057423, "learning_rate": 0.0003382570914617273, "loss": 1.519, "step": 5737 }, { "epoch": 0.62, "grad_norm": 0.07466035597162812, "learning_rate": 0.0003380923551669804, "loss": 1.228, "step": 5738 }, { "epoch": 0.62, "grad_norm": 0.0683061545829768, "learning_rate": 0.0003379276385040938, "loss": 1.4295, "step": 5739 }, { "epoch": 0.62, "grad_norm": 0.07171623879287245, "learning_rate": 0.00033776294149303953, "loss": 1.5231, "step": 5740 }, { "epoch": 0.62, "grad_norm": 0.07017306983889705, "learning_rate": 0.0003375982641537881, "loss": 1.2489, "step": 5741 }, { "epoch": 0.62, "grad_norm": 0.06908949184279577, "learning_rate": 0.0003374336065063069, "loss": 1.5955, "step": 5742 }, { "epoch": 0.62, "grad_norm": 0.07588952795946573, "learning_rate": 0.0003372689685705614, "loss": 1.5576, "step": 5743 }, { "epoch": 0.62, "grad_norm": 0.0807730430692002, "learning_rate": 0.00033710435036651446, "loss": 1.422, "step": 5744 }, { "epoch": 0.62, "grad_norm": 0.07615707250575719, "learning_rate": 0.0003369397519141267, "loss": 1.4909, "step": 5745 }, { "epoch": 0.62, "grad_norm": 0.07415125469436308, "learning_rate": 0.00033677517323335614, "loss": 1.4658, "step": 5746 }, { "epoch": 0.62, "grad_norm": 0.07714066587006227, "learning_rate": 0.00033661061434415844, "loss": 1.423, "step": 5747 }, { "epoch": 0.62, "grad_norm": 0.06975556296096408, "learning_rate": 0.00033644607526648717, "loss": 1.4794, "step": 5748 }, { "epoch": 0.62, "grad_norm": 0.07700941531030474, "learning_rate": 0.0003362815560202931, "loss": 1.4553, "step": 5749 }, { "epoch": 0.62, "grad_norm": 0.07152268833698286, "learning_rate": 0.00033611705662552494, "loss": 1.4017, "step": 5750 }, { "epoch": 0.62, "grad_norm": 0.07145733194757649, "learning_rate": 0.0003359525771021285, "loss": 1.3974, "step": 5751 }, { "epoch": 0.62, "grad_norm": 0.07285528831823655, "learning_rate": 0.0003357881174700476, "loss": 1.3365, "step": 5752 }, { "epoch": 0.62, "grad_norm": 0.07203425116451612, "learning_rate": 0.0003356236777492236, "loss": 1.4425, "step": 5753 }, { "epoch": 0.62, "grad_norm": 0.07216624969608204, "learning_rate": 0.00033545925795959544, "loss": 1.4025, "step": 5754 }, { "epoch": 0.62, "grad_norm": 0.07809662601678845, "learning_rate": 0.00033529485812109933, "loss": 1.4063, "step": 5755 }, { "epoch": 0.62, "grad_norm": 0.07661840340526746, "learning_rate": 0.0003351304782536697, "loss": 1.4225, "step": 5756 }, { "epoch": 0.62, "grad_norm": 0.0756729672262736, "learning_rate": 0.0003349661183772379, "loss": 1.3925, "step": 5757 }, { "epoch": 0.62, "grad_norm": 0.0788412033149621, "learning_rate": 0.0003348017785117331, "loss": 1.5799, "step": 5758 }, { "epoch": 0.62, "grad_norm": 0.07957797898161738, "learning_rate": 0.0003346374586770823, "loss": 1.3948, "step": 5759 }, { "epoch": 0.62, "grad_norm": 0.07042844223390973, "learning_rate": 0.00033447315889320985, "loss": 1.2962, "step": 5760 }, { "epoch": 0.62, "grad_norm": 0.07623292390015404, "learning_rate": 0.0003343088791800374, "loss": 1.4195, "step": 5761 }, { "epoch": 0.62, "grad_norm": 0.07479667738963493, "learning_rate": 0.0003341446195574846, "loss": 1.5432, "step": 5762 }, { "epoch": 0.62, "grad_norm": 0.09130577475740195, "learning_rate": 0.0003339803800454684, "loss": 1.3322, "step": 5763 }, { "epoch": 0.62, "grad_norm": 0.0830672023590636, "learning_rate": 0.00033381616066390353, "loss": 1.3991, "step": 5764 }, { "epoch": 0.62, "grad_norm": 0.07167158802173361, "learning_rate": 0.00033365196143270203, "loss": 1.5648, "step": 5765 }, { "epoch": 0.62, "grad_norm": 0.08324039636311231, "learning_rate": 0.0003334877823717737, "loss": 1.1953, "step": 5766 }, { "epoch": 0.62, "grad_norm": 0.07364194494627035, "learning_rate": 0.0003333236235010259, "loss": 1.3725, "step": 5767 }, { "epoch": 0.62, "grad_norm": 0.08352686220297945, "learning_rate": 0.0003331594848403634, "loss": 1.2813, "step": 5768 }, { "epoch": 0.62, "grad_norm": 0.0782586913324151, "learning_rate": 0.0003329953664096884, "loss": 1.4421, "step": 5769 }, { "epoch": 0.62, "grad_norm": 0.07344286447573398, "learning_rate": 0.00033283126822890107, "loss": 1.4148, "step": 5770 }, { "epoch": 0.62, "grad_norm": 0.07640927867015894, "learning_rate": 0.00033266719031789895, "loss": 1.3939, "step": 5771 }, { "epoch": 0.62, "grad_norm": 0.07444837850894832, "learning_rate": 0.0003325031326965767, "loss": 1.344, "step": 5772 }, { "epoch": 0.62, "grad_norm": 0.07453338538015851, "learning_rate": 0.0003323390953848271, "loss": 1.5043, "step": 5773 }, { "epoch": 0.62, "grad_norm": 0.07563824140788851, "learning_rate": 0.0003321750784025401, "loss": 1.4378, "step": 5774 }, { "epoch": 0.62, "grad_norm": 0.07733704126648222, "learning_rate": 0.00033201108176960346, "loss": 1.3634, "step": 5775 }, { "epoch": 0.62, "grad_norm": 0.07477323086087968, "learning_rate": 0.00033184710550590243, "loss": 1.3377, "step": 5776 }, { "epoch": 0.62, "grad_norm": 0.0673227413865871, "learning_rate": 0.00033168314963131953, "loss": 1.3578, "step": 5777 }, { "epoch": 0.62, "grad_norm": 0.07476933458770825, "learning_rate": 0.00033151921416573485, "loss": 1.3904, "step": 5778 }, { "epoch": 0.62, "grad_norm": 0.06899281005764235, "learning_rate": 0.0003313552991290264, "loss": 1.3827, "step": 5779 }, { "epoch": 0.62, "grad_norm": 0.07602161019541753, "learning_rate": 0.0003311914045410694, "loss": 1.4342, "step": 5780 }, { "epoch": 0.62, "grad_norm": 0.06935786602316016, "learning_rate": 0.00033102753042173644, "loss": 1.2842, "step": 5781 }, { "epoch": 0.62, "grad_norm": 0.07221675602626097, "learning_rate": 0.00033086367679089826, "loss": 1.3586, "step": 5782 }, { "epoch": 0.62, "grad_norm": 0.08951430196528462, "learning_rate": 0.00033069984366842207, "loss": 1.3451, "step": 5783 }, { "epoch": 0.62, "grad_norm": 0.06737689928701268, "learning_rate": 0.00033053603107417365, "loss": 1.3806, "step": 5784 }, { "epoch": 0.62, "grad_norm": 0.0926594824874406, "learning_rate": 0.0003303722390280156, "loss": 1.3205, "step": 5785 }, { "epoch": 0.62, "grad_norm": 0.06461257789980361, "learning_rate": 0.00033020846754980824, "loss": 1.3715, "step": 5786 }, { "epoch": 0.62, "grad_norm": 0.08945684857302136, "learning_rate": 0.0003300447166594097, "loss": 1.4229, "step": 5787 }, { "epoch": 0.62, "grad_norm": 0.06326462043703998, "learning_rate": 0.0003298809863766752, "loss": 1.3268, "step": 5788 }, { "epoch": 0.62, "grad_norm": 0.07313020967684405, "learning_rate": 0.0003297172767214576, "loss": 1.4367, "step": 5789 }, { "epoch": 0.62, "grad_norm": 0.07901312968141494, "learning_rate": 0.0003295535877136072, "loss": 1.3237, "step": 5790 }, { "epoch": 0.62, "grad_norm": 0.07933674541674322, "learning_rate": 0.00032938991937297193, "loss": 1.3319, "step": 5791 }, { "epoch": 0.62, "grad_norm": 0.06740982353930541, "learning_rate": 0.0003292262717193972, "loss": 1.3509, "step": 5792 }, { "epoch": 0.62, "grad_norm": 0.07754239914727032, "learning_rate": 0.00032906264477272575, "loss": 1.3317, "step": 5793 }, { "epoch": 0.62, "grad_norm": 0.07521240547901578, "learning_rate": 0.0003288990385527978, "loss": 1.4327, "step": 5794 }, { "epoch": 0.62, "grad_norm": 0.0807145346435453, "learning_rate": 0.00032873545307945143, "loss": 1.4121, "step": 5795 }, { "epoch": 0.62, "grad_norm": 0.0873071269093502, "learning_rate": 0.0003285718883725217, "loss": 1.346, "step": 5796 }, { "epoch": 0.62, "grad_norm": 0.07939894137269075, "learning_rate": 0.00032840834445184154, "loss": 1.4587, "step": 5797 }, { "epoch": 0.62, "grad_norm": 0.08248088922264714, "learning_rate": 0.0003282448213372412, "loss": 1.3405, "step": 5798 }, { "epoch": 0.62, "grad_norm": 0.08439168880904907, "learning_rate": 0.0003280813190485484, "loss": 1.3494, "step": 5799 }, { "epoch": 0.62, "grad_norm": 0.08123391440953981, "learning_rate": 0.00032791783760558835, "loss": 1.4026, "step": 5800 }, { "epoch": 0.62, "grad_norm": 0.07908771534976788, "learning_rate": 0.00032775437702818377, "loss": 1.5283, "step": 5801 }, { "epoch": 0.62, "grad_norm": 0.07532811513448483, "learning_rate": 0.0003275909373361548, "loss": 1.4235, "step": 5802 }, { "epoch": 0.62, "grad_norm": 0.07166058099901026, "learning_rate": 0.0003274275185493192, "loss": 1.4699, "step": 5803 }, { "epoch": 0.62, "grad_norm": 0.08297468674472518, "learning_rate": 0.0003272641206874918, "loss": 1.4871, "step": 5804 }, { "epoch": 0.62, "grad_norm": 0.07243487395107884, "learning_rate": 0.0003271007437704852, "loss": 1.3383, "step": 5805 }, { "epoch": 0.62, "grad_norm": 0.0805152716343893, "learning_rate": 0.00032693738781810957, "loss": 1.482, "step": 5806 }, { "epoch": 0.62, "grad_norm": 0.09221296792088401, "learning_rate": 0.00032677405285017226, "loss": 1.4894, "step": 5807 }, { "epoch": 0.62, "grad_norm": 0.07764307331973927, "learning_rate": 0.0003266107388864783, "loss": 1.4943, "step": 5808 }, { "epoch": 0.62, "grad_norm": 0.0771348222959278, "learning_rate": 0.0003264474459468299, "loss": 1.395, "step": 5809 }, { "epoch": 0.62, "grad_norm": 0.07434205741667296, "learning_rate": 0.00032628417405102704, "loss": 1.3975, "step": 5810 }, { "epoch": 0.62, "grad_norm": 0.08459239119791959, "learning_rate": 0.0003261209232188671, "loss": 1.4852, "step": 5811 }, { "epoch": 0.62, "grad_norm": 0.08710954280161412, "learning_rate": 0.00032595769347014446, "loss": 1.2992, "step": 5812 }, { "epoch": 0.62, "grad_norm": 0.07702098501175848, "learning_rate": 0.0003257944848246519, "loss": 1.384, "step": 5813 }, { "epoch": 0.62, "grad_norm": 0.0729400817671885, "learning_rate": 0.00032563129730217826, "loss": 1.3524, "step": 5814 }, { "epoch": 0.63, "grad_norm": 0.07949284921131701, "learning_rate": 0.0003254681309225111, "loss": 1.4447, "step": 5815 }, { "epoch": 0.63, "grad_norm": 0.08413069338126254, "learning_rate": 0.00032530498570543476, "loss": 1.5163, "step": 5816 }, { "epoch": 0.63, "grad_norm": 0.07414571013437149, "learning_rate": 0.000325141861670731, "loss": 1.2736, "step": 5817 }, { "epoch": 0.63, "grad_norm": 0.0709935359677738, "learning_rate": 0.0003249787588381795, "loss": 1.4867, "step": 5818 }, { "epoch": 0.63, "grad_norm": 0.08271727344452846, "learning_rate": 0.0003248156772275569, "loss": 1.4364, "step": 5819 }, { "epoch": 0.63, "grad_norm": 0.07447507008238005, "learning_rate": 0.00032465261685863723, "loss": 1.3646, "step": 5820 }, { "epoch": 0.63, "grad_norm": 0.07074186981804943, "learning_rate": 0.0003244895777511925, "loss": 1.2683, "step": 5821 }, { "epoch": 0.63, "grad_norm": 0.08099285892882745, "learning_rate": 0.0003243265599249914, "loss": 1.3554, "step": 5822 }, { "epoch": 0.63, "grad_norm": 0.0766455672893839, "learning_rate": 0.00032416356339980053, "loss": 1.3961, "step": 5823 }, { "epoch": 0.63, "grad_norm": 0.07939556315878743, "learning_rate": 0.00032400058819538407, "loss": 1.3909, "step": 5824 }, { "epoch": 0.63, "grad_norm": 0.07740960550571895, "learning_rate": 0.00032383763433150274, "loss": 1.5367, "step": 5825 }, { "epoch": 0.63, "grad_norm": 0.07881442246987015, "learning_rate": 0.0003236747018279157, "loss": 1.6059, "step": 5826 }, { "epoch": 0.63, "grad_norm": 0.07457194940604828, "learning_rate": 0.000323511790704379, "loss": 1.4712, "step": 5827 }, { "epoch": 0.63, "grad_norm": 0.07818352176339892, "learning_rate": 0.00032334890098064593, "loss": 1.3353, "step": 5828 }, { "epoch": 0.63, "grad_norm": 0.07577655989104184, "learning_rate": 0.00032318603267646775, "loss": 1.4101, "step": 5829 }, { "epoch": 0.63, "grad_norm": 0.08063222118709179, "learning_rate": 0.0003230231858115927, "loss": 1.4651, "step": 5830 }, { "epoch": 0.63, "grad_norm": 0.07871062903426618, "learning_rate": 0.0003228603604057664, "loss": 1.54, "step": 5831 }, { "epoch": 0.63, "grad_norm": 0.07753991143364851, "learning_rate": 0.00032269755647873217, "loss": 1.4145, "step": 5832 }, { "epoch": 0.63, "grad_norm": 0.07769271192950013, "learning_rate": 0.00032253477405023046, "loss": 1.2744, "step": 5833 }, { "epoch": 0.63, "grad_norm": 0.07149459574934523, "learning_rate": 0.00032237201313999926, "loss": 1.4112, "step": 5834 }, { "epoch": 0.63, "grad_norm": 0.06967019720018568, "learning_rate": 0.0003222092737677739, "loss": 1.4218, "step": 5835 }, { "epoch": 0.63, "grad_norm": 0.07366431502138024, "learning_rate": 0.0003220465559532869, "loss": 1.3923, "step": 5836 }, { "epoch": 0.63, "grad_norm": 0.0774173907387302, "learning_rate": 0.0003218838597162685, "loss": 1.3827, "step": 5837 }, { "epoch": 0.63, "grad_norm": 0.09568525653553758, "learning_rate": 0.0003217211850764462, "loss": 1.4002, "step": 5838 }, { "epoch": 0.63, "grad_norm": 0.06820415005290484, "learning_rate": 0.0003215585320535449, "loss": 1.5259, "step": 5839 }, { "epoch": 0.63, "grad_norm": 0.07099253896133205, "learning_rate": 0.0003213959006672866, "loss": 1.4814, "step": 5840 }, { "epoch": 0.63, "grad_norm": 0.10048590704557143, "learning_rate": 0.0003212332909373912, "loss": 1.3643, "step": 5841 }, { "epoch": 0.63, "grad_norm": 0.0868215430127107, "learning_rate": 0.0003210707028835755, "loss": 1.2828, "step": 5842 }, { "epoch": 0.63, "grad_norm": 0.07111722341191054, "learning_rate": 0.00032090813652555395, "loss": 1.2882, "step": 5843 }, { "epoch": 0.63, "grad_norm": 0.072419215989912, "learning_rate": 0.00032074559188303834, "loss": 1.4059, "step": 5844 }, { "epoch": 0.63, "grad_norm": 0.07626815207305306, "learning_rate": 0.00032058306897573787, "loss": 1.3845, "step": 5845 }, { "epoch": 0.63, "grad_norm": 0.07593411084596641, "learning_rate": 0.0003204205678233586, "loss": 1.2355, "step": 5846 }, { "epoch": 0.63, "grad_norm": 0.07671474226352402, "learning_rate": 0.00032025808844560465, "loss": 1.2905, "step": 5847 }, { "epoch": 0.63, "grad_norm": 0.07194390806509542, "learning_rate": 0.000320095630862177, "loss": 1.3861, "step": 5848 }, { "epoch": 0.63, "grad_norm": 0.0882906222537737, "learning_rate": 0.0003199331950927745, "loss": 1.4485, "step": 5849 }, { "epoch": 0.63, "grad_norm": 0.07800170587048255, "learning_rate": 0.0003197707811570928, "loss": 1.5706, "step": 5850 }, { "epoch": 0.63, "grad_norm": 0.0798895187757382, "learning_rate": 0.0003196083890748252, "loss": 1.3286, "step": 5851 }, { "epoch": 0.63, "grad_norm": 0.08321227249490748, "learning_rate": 0.0003194460188656624, "loss": 1.3371, "step": 5852 }, { "epoch": 0.63, "grad_norm": 0.07770505707512657, "learning_rate": 0.0003192836705492923, "loss": 1.5892, "step": 5853 }, { "epoch": 0.63, "grad_norm": 0.08257359787550428, "learning_rate": 0.00031912134414540007, "loss": 1.4664, "step": 5854 }, { "epoch": 0.63, "grad_norm": 0.07573547661484602, "learning_rate": 0.00031895903967366844, "loss": 1.3355, "step": 5855 }, { "epoch": 0.63, "grad_norm": 0.07977638149685118, "learning_rate": 0.00031879675715377765, "loss": 1.5056, "step": 5856 }, { "epoch": 0.63, "grad_norm": 0.0700197292003201, "learning_rate": 0.00031863449660540457, "loss": 1.4674, "step": 5857 }, { "epoch": 0.63, "grad_norm": 0.07645749377104584, "learning_rate": 0.00031847225804822405, "loss": 1.4547, "step": 5858 }, { "epoch": 0.63, "grad_norm": 0.07689507595670635, "learning_rate": 0.00031831004150190796, "loss": 1.4252, "step": 5859 }, { "epoch": 0.63, "grad_norm": 0.06998294349242434, "learning_rate": 0.0003181478469861259, "loss": 1.4104, "step": 5860 }, { "epoch": 0.63, "grad_norm": 0.07176385236960926, "learning_rate": 0.00031798567452054414, "loss": 1.4554, "step": 5861 }, { "epoch": 0.63, "grad_norm": 0.07609553056940516, "learning_rate": 0.0003178235241248269, "loss": 1.3244, "step": 5862 }, { "epoch": 0.63, "grad_norm": 0.07696676290756585, "learning_rate": 0.0003176613958186355, "loss": 1.3908, "step": 5863 }, { "epoch": 0.63, "grad_norm": 0.08404653627393327, "learning_rate": 0.00031749928962162844, "loss": 1.4277, "step": 5864 }, { "epoch": 0.63, "grad_norm": 0.07363258110080857, "learning_rate": 0.00031733720555346157, "loss": 1.5204, "step": 5865 }, { "epoch": 0.63, "grad_norm": 0.07463614990723165, "learning_rate": 0.0003171751436337886, "loss": 1.4818, "step": 5866 }, { "epoch": 0.63, "grad_norm": 0.07347277265972793, "learning_rate": 0.00031701310388225945, "loss": 1.5231, "step": 5867 }, { "epoch": 0.63, "grad_norm": 0.07243901069441412, "learning_rate": 0.0003168510863185224, "loss": 1.368, "step": 5868 }, { "epoch": 0.63, "grad_norm": 0.07148451498935104, "learning_rate": 0.00031668909096222255, "loss": 1.4055, "step": 5869 }, { "epoch": 0.63, "grad_norm": 0.07683946813743962, "learning_rate": 0.0003165271178330023, "loss": 1.304, "step": 5870 }, { "epoch": 0.63, "grad_norm": 0.08296014033921659, "learning_rate": 0.00031636516695050164, "loss": 1.3915, "step": 5871 }, { "epoch": 0.63, "grad_norm": 0.07000354267739249, "learning_rate": 0.0003162032383343576, "loss": 1.3169, "step": 5872 }, { "epoch": 0.63, "grad_norm": 0.07802272002326763, "learning_rate": 0.0003160413320042045, "loss": 1.47, "step": 5873 }, { "epoch": 0.63, "grad_norm": 0.0732789511602166, "learning_rate": 0.000315879447979674, "loss": 1.3223, "step": 5874 }, { "epoch": 0.63, "grad_norm": 0.07027730292619676, "learning_rate": 0.0003157175862803953, "loss": 1.3835, "step": 5875 }, { "epoch": 0.63, "grad_norm": 0.07682569968171925, "learning_rate": 0.0003155557469259946, "loss": 1.359, "step": 5876 }, { "epoch": 0.63, "grad_norm": 0.08350094563625435, "learning_rate": 0.00031539392993609554, "loss": 1.3508, "step": 5877 }, { "epoch": 0.63, "grad_norm": 0.08295722149013737, "learning_rate": 0.0003152321353303188, "loss": 1.4293, "step": 5878 }, { "epoch": 0.63, "grad_norm": 0.07254010025148988, "learning_rate": 0.0003150703631282826, "loss": 1.3245, "step": 5879 }, { "epoch": 0.63, "grad_norm": 0.07751813189871887, "learning_rate": 0.00031490861334960247, "loss": 1.2882, "step": 5880 }, { "epoch": 0.63, "grad_norm": 0.07379579097767187, "learning_rate": 0.00031474688601389113, "loss": 1.5361, "step": 5881 }, { "epoch": 0.63, "grad_norm": 0.06670928281533853, "learning_rate": 0.00031458518114075836, "loss": 1.3023, "step": 5882 }, { "epoch": 0.63, "grad_norm": 0.06800302796922687, "learning_rate": 0.00031442349874981167, "loss": 1.4567, "step": 5883 }, { "epoch": 0.63, "grad_norm": 0.07874237147537874, "learning_rate": 0.0003142618388606556, "loss": 1.3324, "step": 5884 }, { "epoch": 0.63, "grad_norm": 0.07217062766502823, "learning_rate": 0.0003141002014928918, "loss": 1.4494, "step": 5885 }, { "epoch": 0.63, "grad_norm": 0.07671074428154255, "learning_rate": 0.0003139385866661196, "loss": 1.4268, "step": 5886 }, { "epoch": 0.63, "grad_norm": 0.08152511966680674, "learning_rate": 0.0003137769943999352, "loss": 1.3887, "step": 5887 }, { "epoch": 0.63, "grad_norm": 0.0857558783134707, "learning_rate": 0.00031361542471393226, "loss": 1.373, "step": 5888 }, { "epoch": 0.63, "grad_norm": 0.08265072365117745, "learning_rate": 0.00031345387762770163, "loss": 1.4538, "step": 5889 }, { "epoch": 0.63, "grad_norm": 0.08161194391076494, "learning_rate": 0.00031329235316083126, "loss": 1.3585, "step": 5890 }, { "epoch": 0.63, "grad_norm": 0.07593695011204729, "learning_rate": 0.00031313085133290695, "loss": 1.4407, "step": 5891 }, { "epoch": 0.63, "grad_norm": 0.07210716065409313, "learning_rate": 0.00031296937216351113, "loss": 1.4562, "step": 5892 }, { "epoch": 0.63, "grad_norm": 0.0850661492304202, "learning_rate": 0.0003128079156722236, "loss": 1.572, "step": 5893 }, { "epoch": 0.63, "grad_norm": 0.09330568226061443, "learning_rate": 0.0003126464818786218, "loss": 1.3642, "step": 5894 }, { "epoch": 0.63, "grad_norm": 0.0737289442763714, "learning_rate": 0.0003124850708022799, "loss": 1.3718, "step": 5895 }, { "epoch": 0.63, "grad_norm": 0.08218771057410153, "learning_rate": 0.00031232368246276955, "loss": 1.4054, "step": 5896 }, { "epoch": 0.63, "grad_norm": 0.07967506567629058, "learning_rate": 0.00031216231687965977, "loss": 1.4413, "step": 5897 }, { "epoch": 0.63, "grad_norm": 0.079496248186044, "learning_rate": 0.00031200097407251687, "loss": 1.5249, "step": 5898 }, { "epoch": 0.63, "grad_norm": 0.07693598790618682, "learning_rate": 0.0003118396540609038, "loss": 1.4197, "step": 5899 }, { "epoch": 0.63, "grad_norm": 0.08543054581020419, "learning_rate": 0.0003116783568643814, "loss": 1.3692, "step": 5900 }, { "epoch": 0.63, "grad_norm": 0.0774464397249776, "learning_rate": 0.00031151708250250735, "loss": 1.349, "step": 5901 }, { "epoch": 0.63, "grad_norm": 0.06921752872907357, "learning_rate": 0.000311355830994837, "loss": 1.4459, "step": 5902 }, { "epoch": 0.63, "grad_norm": 0.07514722249298769, "learning_rate": 0.00031119460236092247, "loss": 1.4358, "step": 5903 }, { "epoch": 0.63, "grad_norm": 0.08242771723457627, "learning_rate": 0.00031103339662031325, "loss": 1.3992, "step": 5904 }, { "epoch": 0.63, "grad_norm": 0.0757734027343892, "learning_rate": 0.00031087221379255616, "loss": 1.3503, "step": 5905 }, { "epoch": 0.63, "grad_norm": 0.07672806483686527, "learning_rate": 0.0003107110538971952, "loss": 1.4053, "step": 5906 }, { "epoch": 0.63, "grad_norm": 0.08680615262647434, "learning_rate": 0.00031054991695377156, "loss": 1.5851, "step": 5907 }, { "epoch": 0.64, "grad_norm": 0.08822808648220977, "learning_rate": 0.0003103888029818235, "loss": 1.4323, "step": 5908 }, { "epoch": 0.64, "grad_norm": 0.07106740585924771, "learning_rate": 0.00031022771200088706, "loss": 1.4376, "step": 5909 }, { "epoch": 0.64, "grad_norm": 0.07779044400772091, "learning_rate": 0.0003100666440304946, "loss": 1.3308, "step": 5910 }, { "epoch": 0.64, "grad_norm": 0.07194910372531178, "learning_rate": 0.00030990559909017636, "loss": 1.4664, "step": 5911 }, { "epoch": 0.64, "grad_norm": 0.07745473453560854, "learning_rate": 0.00030974457719945954, "loss": 1.37, "step": 5912 }, { "epoch": 0.64, "grad_norm": 0.0743549948835792, "learning_rate": 0.00030958357837786854, "loss": 1.5214, "step": 5913 }, { "epoch": 0.64, "grad_norm": 0.07378607983212841, "learning_rate": 0.00030942260264492534, "loss": 1.59, "step": 5914 }, { "epoch": 0.64, "grad_norm": 0.08354354717365467, "learning_rate": 0.0003092616500201485, "loss": 1.3002, "step": 5915 }, { "epoch": 0.64, "grad_norm": 0.0759387023706967, "learning_rate": 0.0003091007205230541, "loss": 1.3571, "step": 5916 }, { "epoch": 0.64, "grad_norm": 0.06915812880321831, "learning_rate": 0.00030893981417315553, "loss": 1.4155, "step": 5917 }, { "epoch": 0.64, "grad_norm": 0.08602623259978609, "learning_rate": 0.00030877893098996324, "loss": 1.2942, "step": 5918 }, { "epoch": 0.64, "grad_norm": 0.07686753605428095, "learning_rate": 0.0003086180709929849, "loss": 1.2931, "step": 5919 }, { "epoch": 0.64, "grad_norm": 0.0743510939165553, "learning_rate": 0.0003084572342017251, "loss": 1.3802, "step": 5920 }, { "epoch": 0.64, "grad_norm": 0.08302234915061892, "learning_rate": 0.00030829642063568595, "loss": 1.4475, "step": 5921 }, { "epoch": 0.64, "grad_norm": 0.08139252633012196, "learning_rate": 0.00030813563031436674, "loss": 1.3602, "step": 5922 }, { "epoch": 0.64, "grad_norm": 0.07148548640627475, "learning_rate": 0.0003079748632572639, "loss": 1.3305, "step": 5923 }, { "epoch": 0.64, "grad_norm": 0.0792567664336374, "learning_rate": 0.0003078141194838707, "loss": 1.3468, "step": 5924 }, { "epoch": 0.64, "grad_norm": 0.07455553638782846, "learning_rate": 0.00030765339901367824, "loss": 1.4425, "step": 5925 }, { "epoch": 0.64, "grad_norm": 0.0762782607168731, "learning_rate": 0.00030749270186617426, "loss": 1.4555, "step": 5926 }, { "epoch": 0.64, "grad_norm": 0.07274576676044348, "learning_rate": 0.0003073320280608437, "loss": 1.4174, "step": 5927 }, { "epoch": 0.64, "grad_norm": 0.0684112547807106, "learning_rate": 0.00030717137761716916, "loss": 1.539, "step": 5928 }, { "epoch": 0.64, "grad_norm": 0.07220831738070271, "learning_rate": 0.0003070107505546298, "loss": 1.2818, "step": 5929 }, { "epoch": 0.64, "grad_norm": 0.0973184931653835, "learning_rate": 0.00030685014689270243, "loss": 1.4565, "step": 5930 }, { "epoch": 0.64, "grad_norm": 0.07731877758281028, "learning_rate": 0.0003066895666508605, "loss": 1.3536, "step": 5931 }, { "epoch": 0.64, "grad_norm": 0.07415858374192567, "learning_rate": 0.0003065290098485749, "loss": 1.4543, "step": 5932 }, { "epoch": 0.64, "grad_norm": 0.07258684303449475, "learning_rate": 0.000306368476505314, "loss": 1.3418, "step": 5933 }, { "epoch": 0.64, "grad_norm": 0.0692859825650207, "learning_rate": 0.0003062079666405429, "loss": 1.4166, "step": 5934 }, { "epoch": 0.64, "grad_norm": 0.07029306454398922, "learning_rate": 0.00030604748027372394, "loss": 1.4104, "step": 5935 }, { "epoch": 0.64, "grad_norm": 0.0733708023287787, "learning_rate": 0.0003058870174243165, "loss": 1.368, "step": 5936 }, { "epoch": 0.64, "grad_norm": 0.07324475363368584, "learning_rate": 0.00030572657811177756, "loss": 1.5348, "step": 5937 }, { "epoch": 0.64, "grad_norm": 0.07459659298410051, "learning_rate": 0.0003055661623555608, "loss": 1.5159, "step": 5938 }, { "epoch": 0.64, "grad_norm": 0.07507360349738781, "learning_rate": 0.000305405770175117, "loss": 1.4729, "step": 5939 }, { "epoch": 0.64, "grad_norm": 0.0633251934623615, "learning_rate": 0.0003052454015898948, "loss": 1.3679, "step": 5940 }, { "epoch": 0.64, "grad_norm": 0.0793842984745501, "learning_rate": 0.00030508505661933874, "loss": 1.3561, "step": 5941 }, { "epoch": 0.64, "grad_norm": 0.06819289917335115, "learning_rate": 0.0003049247352828917, "loss": 1.4199, "step": 5942 }, { "epoch": 0.64, "grad_norm": 0.06660602563715051, "learning_rate": 0.00030476443759999293, "loss": 1.4692, "step": 5943 }, { "epoch": 0.64, "grad_norm": 0.0742100503490787, "learning_rate": 0.00030460416359007913, "loss": 1.3488, "step": 5944 }, { "epoch": 0.64, "grad_norm": 0.07296538978034156, "learning_rate": 0.00030444391327258424, "loss": 1.4411, "step": 5945 }, { "epoch": 0.64, "grad_norm": 0.07363651934129214, "learning_rate": 0.00030428368666693905, "loss": 1.4599, "step": 5946 }, { "epoch": 0.64, "grad_norm": 0.08639448348123929, "learning_rate": 0.0003041234837925715, "loss": 1.3221, "step": 5947 }, { "epoch": 0.64, "grad_norm": 0.0759787051112038, "learning_rate": 0.0003039633046689069, "loss": 1.4504, "step": 5948 }, { "epoch": 0.64, "grad_norm": 0.08473623915638286, "learning_rate": 0.0003038031493153675, "loss": 1.4055, "step": 5949 }, { "epoch": 0.64, "grad_norm": 0.09560174827623097, "learning_rate": 0.00030364301775137245, "loss": 1.4658, "step": 5950 }, { "epoch": 0.64, "grad_norm": 0.06961875392765221, "learning_rate": 0.00030348290999633875, "loss": 1.3609, "step": 5951 }, { "epoch": 0.64, "grad_norm": 0.0714538707164538, "learning_rate": 0.0003033228260696795, "loss": 1.3151, "step": 5952 }, { "epoch": 0.64, "grad_norm": 0.07808335058336303, "learning_rate": 0.00030316276599080566, "loss": 1.4946, "step": 5953 }, { "epoch": 0.64, "grad_norm": 0.08282668322888084, "learning_rate": 0.0003030027297791251, "loss": 1.3759, "step": 5954 }, { "epoch": 0.64, "grad_norm": 0.06800510962454319, "learning_rate": 0.00030284271745404257, "loss": 1.4049, "step": 5955 }, { "epoch": 0.64, "grad_norm": 0.07475879845301409, "learning_rate": 0.00030268272903496036, "loss": 1.4277, "step": 5956 }, { "epoch": 0.64, "grad_norm": 0.07642167347690558, "learning_rate": 0.0003025227645412775, "loss": 1.3954, "step": 5957 }, { "epoch": 0.64, "grad_norm": 0.07643834144422731, "learning_rate": 0.0003023628239923902, "loss": 1.3185, "step": 5958 }, { "epoch": 0.64, "grad_norm": 0.07685637721938932, "learning_rate": 0.0003022029074076919, "loss": 1.4371, "step": 5959 }, { "epoch": 0.64, "grad_norm": 0.07909995683759365, "learning_rate": 0.000302043014806573, "loss": 1.4715, "step": 5960 }, { "epoch": 0.64, "grad_norm": 0.07349604482190411, "learning_rate": 0.00030188314620842094, "loss": 1.3706, "step": 5961 }, { "epoch": 0.64, "grad_norm": 0.07233447503471593, "learning_rate": 0.0003017233016326207, "loss": 1.3405, "step": 5962 }, { "epoch": 0.64, "grad_norm": 0.0720093425925684, "learning_rate": 0.0003015634810985534, "loss": 1.5037, "step": 5963 }, { "epoch": 0.64, "grad_norm": 0.0726984904284892, "learning_rate": 0.0003014036846255982, "loss": 1.1959, "step": 5964 }, { "epoch": 0.64, "grad_norm": 0.07403128407492846, "learning_rate": 0.0003012439122331309, "loss": 1.4965, "step": 5965 }, { "epoch": 0.64, "grad_norm": 0.07709803247439713, "learning_rate": 0.0003010841639405244, "loss": 1.5168, "step": 5966 }, { "epoch": 0.64, "grad_norm": 0.07602607776353369, "learning_rate": 0.0003009244397671489, "loss": 1.5096, "step": 5967 }, { "epoch": 0.64, "grad_norm": 0.07978329522099706, "learning_rate": 0.0003007647397323714, "loss": 1.5163, "step": 5968 }, { "epoch": 0.64, "grad_norm": 0.08888699106540596, "learning_rate": 0.00030060506385555595, "loss": 1.479, "step": 5969 }, { "epoch": 0.64, "grad_norm": 0.07168143621723064, "learning_rate": 0.000300445412156064, "loss": 1.3272, "step": 5970 }, { "epoch": 0.64, "grad_norm": 0.07329373929619436, "learning_rate": 0.0003002857846532538, "loss": 1.3277, "step": 5971 }, { "epoch": 0.64, "grad_norm": 0.07654288914163611, "learning_rate": 0.00030012618136648087, "loss": 1.3359, "step": 5972 }, { "epoch": 0.64, "grad_norm": 0.07354952702168815, "learning_rate": 0.0002999666023150974, "loss": 1.4215, "step": 5973 }, { "epoch": 0.64, "grad_norm": 0.06871219507940302, "learning_rate": 0.000299807047518453, "loss": 1.3821, "step": 5974 }, { "epoch": 0.64, "grad_norm": 0.06497905023784828, "learning_rate": 0.00029964751699589423, "loss": 1.403, "step": 5975 }, { "epoch": 0.64, "grad_norm": 0.07652963092713426, "learning_rate": 0.00029948801076676475, "loss": 1.3289, "step": 5976 }, { "epoch": 0.64, "grad_norm": 0.071199491483903, "learning_rate": 0.00029932852885040527, "loss": 1.3271, "step": 5977 }, { "epoch": 0.64, "grad_norm": 0.0745442586868254, "learning_rate": 0.0002991690712661534, "loss": 1.4917, "step": 5978 }, { "epoch": 0.64, "grad_norm": 0.07328338974119428, "learning_rate": 0.00029900963803334413, "loss": 1.5249, "step": 5979 }, { "epoch": 0.64, "grad_norm": 0.07679011633383984, "learning_rate": 0.0002988502291713091, "loss": 1.4964, "step": 5980 }, { "epoch": 0.64, "grad_norm": 0.06510401298584284, "learning_rate": 0.0002986908446993772, "loss": 1.4489, "step": 5981 }, { "epoch": 0.64, "grad_norm": 0.07088074772151598, "learning_rate": 0.0002985314846368745, "loss": 1.4252, "step": 5982 }, { "epoch": 0.64, "grad_norm": 0.08126290418323827, "learning_rate": 0.00029837214900312395, "loss": 1.3525, "step": 5983 }, { "epoch": 0.64, "grad_norm": 0.06975498384895287, "learning_rate": 0.00029821283781744537, "loss": 1.5507, "step": 5984 }, { "epoch": 0.64, "grad_norm": 0.07945659664328315, "learning_rate": 0.0002980535510991558, "loss": 1.3528, "step": 5985 }, { "epoch": 0.64, "grad_norm": 0.07123058023593087, "learning_rate": 0.0002978942888675693, "loss": 1.3405, "step": 5986 }, { "epoch": 0.64, "grad_norm": 0.07698558265674868, "learning_rate": 0.0002977350511419972, "loss": 1.432, "step": 5987 }, { "epoch": 0.64, "grad_norm": 0.06750769980073211, "learning_rate": 0.0002975758379417474, "loss": 1.5532, "step": 5988 }, { "epoch": 0.64, "grad_norm": 0.07374373847083314, "learning_rate": 0.00029741664928612503, "loss": 1.3748, "step": 5989 }, { "epoch": 0.64, "grad_norm": 0.06784693456068619, "learning_rate": 0.00029725748519443247, "loss": 1.3238, "step": 5990 }, { "epoch": 0.64, "grad_norm": 0.0844336706395431, "learning_rate": 0.0002970983456859687, "loss": 1.3847, "step": 5991 }, { "epoch": 0.64, "grad_norm": 0.07079001459931357, "learning_rate": 0.00029693923078003004, "loss": 1.2528, "step": 5992 }, { "epoch": 0.64, "grad_norm": 0.07149040354390375, "learning_rate": 0.00029678014049590995, "loss": 1.3724, "step": 5993 }, { "epoch": 0.64, "grad_norm": 0.06824266996087566, "learning_rate": 0.00029662107485289814, "loss": 1.4637, "step": 5994 }, { "epoch": 0.64, "grad_norm": 0.0774125335188962, "learning_rate": 0.0002964620338702823, "loss": 1.38, "step": 5995 }, { "epoch": 0.64, "grad_norm": 0.07150515374202804, "learning_rate": 0.00029630301756734643, "loss": 1.524, "step": 5996 }, { "epoch": 0.64, "grad_norm": 0.07467047959655457, "learning_rate": 0.000296144025963372, "loss": 1.3389, "step": 5997 }, { "epoch": 0.64, "grad_norm": 0.07005420069909853, "learning_rate": 0.00029598505907763717, "loss": 1.2697, "step": 5998 }, { "epoch": 0.64, "grad_norm": 0.07941624953171082, "learning_rate": 0.00029582611692941733, "loss": 1.5686, "step": 5999 }, { "epoch": 0.64, "grad_norm": 0.08267345828706943, "learning_rate": 0.0002956671995379847, "loss": 1.4291, "step": 6000 }, { "epoch": 0.65, "grad_norm": 0.09815036580786829, "learning_rate": 0.0002955083069226086, "loss": 1.3402, "step": 6001 }, { "epoch": 0.65, "grad_norm": 0.07314143721806532, "learning_rate": 0.0002953494391025552, "loss": 1.3167, "step": 6002 }, { "epoch": 0.65, "grad_norm": 0.07718320633480043, "learning_rate": 0.0002951905960970879, "loss": 1.3187, "step": 6003 }, { "epoch": 0.65, "grad_norm": 0.07022848258771092, "learning_rate": 0.000295031777925467, "loss": 1.388, "step": 6004 }, { "epoch": 0.65, "grad_norm": 0.0880795025536751, "learning_rate": 0.0002948729846069496, "loss": 1.3403, "step": 6005 }, { "epoch": 0.65, "grad_norm": 0.0701067624869438, "learning_rate": 0.00029471421616078985, "loss": 1.2425, "step": 6006 }, { "epoch": 0.65, "grad_norm": 0.09666753220487535, "learning_rate": 0.0002945554726062392, "loss": 1.4792, "step": 6007 }, { "epoch": 0.65, "grad_norm": 0.08171114169072571, "learning_rate": 0.0002943967539625458, "loss": 1.5026, "step": 6008 }, { "epoch": 0.65, "grad_norm": 0.11522314251288446, "learning_rate": 0.0002942380602489547, "loss": 1.5163, "step": 6009 }, { "epoch": 0.65, "grad_norm": 0.0769067408158239, "learning_rate": 0.0002940793914847083, "loss": 1.3969, "step": 6010 }, { "epoch": 0.65, "grad_norm": 0.07605757451663893, "learning_rate": 0.0002939207476890456, "loss": 1.3257, "step": 6011 }, { "epoch": 0.65, "grad_norm": 0.08573115295190666, "learning_rate": 0.00029376212888120257, "loss": 1.4362, "step": 6012 }, { "epoch": 0.65, "grad_norm": 0.09276575884914588, "learning_rate": 0.00029360353508041257, "loss": 1.503, "step": 6013 }, { "epoch": 0.65, "grad_norm": 0.07993298408856686, "learning_rate": 0.00029344496630590543, "loss": 1.4253, "step": 6014 }, { "epoch": 0.65, "grad_norm": 0.08552281071719056, "learning_rate": 0.00029328642257690833, "loss": 1.3724, "step": 6015 }, { "epoch": 0.65, "grad_norm": 0.07851945938289058, "learning_rate": 0.0002931279039126451, "loss": 1.314, "step": 6016 }, { "epoch": 0.65, "grad_norm": 0.07843057357746579, "learning_rate": 0.0002929694103323366, "loss": 1.475, "step": 6017 }, { "epoch": 0.65, "grad_norm": 0.08650642474253452, "learning_rate": 0.00029281094185520094, "loss": 1.4325, "step": 6018 }, { "epoch": 0.65, "grad_norm": 0.07755758493084858, "learning_rate": 0.00029265249850045285, "loss": 1.4469, "step": 6019 }, { "epoch": 0.65, "grad_norm": 0.08462709644270011, "learning_rate": 0.00029249408028730406, "loss": 1.4838, "step": 6020 }, { "epoch": 0.65, "grad_norm": 0.08276090223466218, "learning_rate": 0.0002923356872349635, "loss": 1.4411, "step": 6021 }, { "epoch": 0.65, "grad_norm": 0.08773444524959222, "learning_rate": 0.00029217731936263683, "loss": 1.3605, "step": 6022 }, { "epoch": 0.65, "grad_norm": 0.08218699242748785, "learning_rate": 0.0002920189766895265, "loss": 1.3351, "step": 6023 }, { "epoch": 0.65, "grad_norm": 0.07040731094288417, "learning_rate": 0.00029186065923483243, "loss": 1.3763, "step": 6024 }, { "epoch": 0.65, "grad_norm": 0.0740188467934851, "learning_rate": 0.0002917023670177511, "loss": 1.3715, "step": 6025 }, { "epoch": 0.65, "grad_norm": 0.07362205859410367, "learning_rate": 0.00029154410005747584, "loss": 1.3693, "step": 6026 }, { "epoch": 0.65, "grad_norm": 0.07984353221296256, "learning_rate": 0.0002913858583731969, "loss": 1.4406, "step": 6027 }, { "epoch": 0.65, "grad_norm": 0.07519072202814801, "learning_rate": 0.0002912276419841019, "loss": 1.4988, "step": 6028 }, { "epoch": 0.65, "grad_norm": 0.07631248401472579, "learning_rate": 0.0002910694509093752, "loss": 1.4252, "step": 6029 }, { "epoch": 0.65, "grad_norm": 0.08027426610180229, "learning_rate": 0.00029091128516819763, "loss": 1.2927, "step": 6030 }, { "epoch": 0.65, "grad_norm": 0.10664126095564695, "learning_rate": 0.00029075314477974764, "loss": 1.3158, "step": 6031 }, { "epoch": 0.65, "grad_norm": 0.07485328411133679, "learning_rate": 0.0002905950297632004, "loss": 1.3941, "step": 6032 }, { "epoch": 0.65, "grad_norm": 0.08601281945619063, "learning_rate": 0.0002904369401377275, "loss": 1.4172, "step": 6033 }, { "epoch": 0.65, "grad_norm": 0.07099566029910874, "learning_rate": 0.00029027887592249815, "loss": 1.3107, "step": 6034 }, { "epoch": 0.65, "grad_norm": 0.07243682422839058, "learning_rate": 0.00029012083713667833, "loss": 1.3992, "step": 6035 }, { "epoch": 0.65, "grad_norm": 0.07818980658292189, "learning_rate": 0.0002899628237994305, "loss": 1.3995, "step": 6036 }, { "epoch": 0.65, "grad_norm": 0.07345641595125825, "learning_rate": 0.00028980483592991427, "loss": 1.4564, "step": 6037 }, { "epoch": 0.65, "grad_norm": 0.07378658076318882, "learning_rate": 0.0002896468735472863, "loss": 1.3621, "step": 6038 }, { "epoch": 0.65, "grad_norm": 0.07730154495877888, "learning_rate": 0.0002894889366707002, "loss": 1.3741, "step": 6039 }, { "epoch": 0.65, "grad_norm": 0.07867744297535843, "learning_rate": 0.0002893310253193062, "loss": 1.4205, "step": 6040 }, { "epoch": 0.65, "grad_norm": 0.0806104655157845, "learning_rate": 0.0002891731395122516, "loss": 1.3632, "step": 6041 }, { "epoch": 0.65, "grad_norm": 0.09214380580505434, "learning_rate": 0.0002890152792686809, "loss": 1.7369, "step": 6042 }, { "epoch": 0.65, "grad_norm": 0.09003660892734845, "learning_rate": 0.0002888574446077348, "loss": 1.3157, "step": 6043 }, { "epoch": 0.65, "grad_norm": 0.08107202269847597, "learning_rate": 0.0002886996355485514, "loss": 1.3031, "step": 6044 }, { "epoch": 0.65, "grad_norm": 0.07863112211660843, "learning_rate": 0.0002885418521102658, "loss": 1.4732, "step": 6045 }, { "epoch": 0.65, "grad_norm": 0.08105230040654554, "learning_rate": 0.00028838409431200974, "loss": 1.2731, "step": 6046 }, { "epoch": 0.65, "grad_norm": 0.08080853361750891, "learning_rate": 0.0002882263621729116, "loss": 1.4831, "step": 6047 }, { "epoch": 0.65, "grad_norm": 0.06849354451778983, "learning_rate": 0.0002880686557120973, "loss": 1.3394, "step": 6048 }, { "epoch": 0.65, "grad_norm": 0.0843146464274864, "learning_rate": 0.00028791097494868895, "loss": 1.3068, "step": 6049 }, { "epoch": 0.65, "grad_norm": 0.07860656896213726, "learning_rate": 0.0002877533199018061, "loss": 1.4533, "step": 6050 }, { "epoch": 0.65, "grad_norm": 0.07718407963474753, "learning_rate": 0.00028759569059056507, "loss": 1.4552, "step": 6051 }, { "epoch": 0.65, "grad_norm": 0.07045024717197383, "learning_rate": 0.00028743808703407867, "loss": 1.4094, "step": 6052 }, { "epoch": 0.65, "grad_norm": 0.08392898263167461, "learning_rate": 0.000287280509251457, "loss": 1.4199, "step": 6053 }, { "epoch": 0.65, "grad_norm": 0.07135753984364539, "learning_rate": 0.00028712295726180715, "loss": 1.3966, "step": 6054 }, { "epoch": 0.65, "grad_norm": 0.07971585134038697, "learning_rate": 0.00028696543108423247, "loss": 1.3936, "step": 6055 }, { "epoch": 0.65, "grad_norm": 0.07298002690430236, "learning_rate": 0.0002868079307378336, "loss": 1.4275, "step": 6056 }, { "epoch": 0.65, "grad_norm": 0.07606117292151965, "learning_rate": 0.0002866504562417086, "loss": 1.4009, "step": 6057 }, { "epoch": 0.65, "grad_norm": 0.07673914296478794, "learning_rate": 0.0002864930076149509, "loss": 1.4826, "step": 6058 }, { "epoch": 0.65, "grad_norm": 0.07935188856224078, "learning_rate": 0.00028633558487665213, "loss": 1.3802, "step": 6059 }, { "epoch": 0.65, "grad_norm": 0.07771790506095733, "learning_rate": 0.00028617818804590046, "loss": 1.3848, "step": 6060 }, { "epoch": 0.65, "grad_norm": 0.08481704582222165, "learning_rate": 0.0002860208171417804, "loss": 1.4468, "step": 6061 }, { "epoch": 0.65, "grad_norm": 0.08220689565548146, "learning_rate": 0.0002858634721833741, "loss": 1.4044, "step": 6062 }, { "epoch": 0.65, "grad_norm": 0.08349805126907142, "learning_rate": 0.00028570615318976013, "loss": 1.4339, "step": 6063 }, { "epoch": 0.65, "grad_norm": 0.0782694576272315, "learning_rate": 0.0002855488601800137, "loss": 1.377, "step": 6064 }, { "epoch": 0.65, "grad_norm": 0.07431275992276852, "learning_rate": 0.0002853915931732073, "loss": 1.3656, "step": 6065 }, { "epoch": 0.65, "grad_norm": 0.09453173639235407, "learning_rate": 0.00028523435218841026, "loss": 1.3901, "step": 6066 }, { "epoch": 0.65, "grad_norm": 0.07031269040346856, "learning_rate": 0.00028507713724468844, "loss": 1.4042, "step": 6067 }, { "epoch": 0.65, "grad_norm": 0.0882233685502084, "learning_rate": 0.0002849199483611045, "loss": 1.3748, "step": 6068 }, { "epoch": 0.65, "grad_norm": 0.07604641853251956, "learning_rate": 0.0002847627855567183, "loss": 1.36, "step": 6069 }, { "epoch": 0.65, "grad_norm": 0.08338095020434803, "learning_rate": 0.0002846056488505866, "loss": 1.4305, "step": 6070 }, { "epoch": 0.65, "grad_norm": 0.07695158760839388, "learning_rate": 0.0002844485382617624, "loss": 1.3133, "step": 6071 }, { "epoch": 0.65, "grad_norm": 0.07082354574245085, "learning_rate": 0.000284291453809296, "loss": 1.3982, "step": 6072 }, { "epoch": 0.65, "grad_norm": 0.07190282632214344, "learning_rate": 0.0002841343955122347, "loss": 1.3965, "step": 6073 }, { "epoch": 0.65, "grad_norm": 0.08003261627937998, "learning_rate": 0.00028397736338962187, "loss": 1.4751, "step": 6074 }, { "epoch": 0.65, "grad_norm": 0.07217113093699555, "learning_rate": 0.0002838203574604986, "loss": 1.4419, "step": 6075 }, { "epoch": 0.65, "grad_norm": 0.09568634177982653, "learning_rate": 0.00028366337774390237, "loss": 1.3784, "step": 6076 }, { "epoch": 0.65, "grad_norm": 0.08143677906916136, "learning_rate": 0.0002835064242588673, "loss": 1.3997, "step": 6077 }, { "epoch": 0.65, "grad_norm": 0.06714182759584644, "learning_rate": 0.00028334949702442483, "loss": 1.3418, "step": 6078 }, { "epoch": 0.65, "grad_norm": 0.07817525588333529, "learning_rate": 0.0002831925960596027, "loss": 1.4515, "step": 6079 }, { "epoch": 0.65, "grad_norm": 0.07766148184272453, "learning_rate": 0.0002830357213834256, "loss": 1.4837, "step": 6080 }, { "epoch": 0.65, "grad_norm": 0.08646731671686747, "learning_rate": 0.0002828788730149152, "loss": 1.4054, "step": 6081 }, { "epoch": 0.65, "grad_norm": 0.08325901309195903, "learning_rate": 0.0002827220509730903, "loss": 1.3141, "step": 6082 }, { "epoch": 0.65, "grad_norm": 0.07714246322339714, "learning_rate": 0.0002825652552769655, "loss": 1.3832, "step": 6083 }, { "epoch": 0.65, "grad_norm": 0.08684031433009541, "learning_rate": 0.0002824084859455531, "loss": 1.3973, "step": 6084 }, { "epoch": 0.65, "grad_norm": 0.06920637783187342, "learning_rate": 0.0002822517429978622, "loss": 1.3132, "step": 6085 }, { "epoch": 0.65, "grad_norm": 0.07746410524265271, "learning_rate": 0.0002820950264528979, "loss": 1.3978, "step": 6086 }, { "epoch": 0.65, "grad_norm": 0.07727541915447587, "learning_rate": 0.00028193833632966293, "loss": 1.3852, "step": 6087 }, { "epoch": 0.65, "grad_norm": 0.07721133474886398, "learning_rate": 0.0002817816726471566, "loss": 1.4555, "step": 6088 }, { "epoch": 0.65, "grad_norm": 0.07974176592718676, "learning_rate": 0.0002816250354243748, "loss": 1.4801, "step": 6089 }, { "epoch": 0.65, "grad_norm": 0.06787223694888275, "learning_rate": 0.00028146842468031007, "loss": 1.4043, "step": 6090 }, { "epoch": 0.65, "grad_norm": 0.08216216293404127, "learning_rate": 0.00028131184043395254, "loss": 1.3816, "step": 6091 }, { "epoch": 0.65, "grad_norm": 0.07311183455847245, "learning_rate": 0.000281155282704288, "loss": 1.5151, "step": 6092 }, { "epoch": 0.65, "grad_norm": 0.08228569645390815, "learning_rate": 0.00028099875151029987, "loss": 1.4022, "step": 6093 }, { "epoch": 0.66, "grad_norm": 0.08468156268757646, "learning_rate": 0.0002808422468709684, "loss": 1.3635, "step": 6094 }, { "epoch": 0.66, "grad_norm": 0.084115782220857, "learning_rate": 0.00028068576880526985, "loss": 1.5106, "step": 6095 }, { "epoch": 0.66, "grad_norm": 0.0939884815741877, "learning_rate": 0.0002805293173321779, "loss": 1.4746, "step": 6096 }, { "epoch": 0.66, "grad_norm": 0.08867327787743164, "learning_rate": 0.00028037289247066305, "loss": 1.3817, "step": 6097 }, { "epoch": 0.66, "grad_norm": 0.0810061217356107, "learning_rate": 0.000280216494239692, "loss": 1.3271, "step": 6098 }, { "epoch": 0.66, "grad_norm": 0.08379496101255957, "learning_rate": 0.0002800601226582289, "loss": 1.3737, "step": 6099 }, { "epoch": 0.66, "grad_norm": 0.08031308779705486, "learning_rate": 0.00027990377774523396, "loss": 1.5601, "step": 6100 }, { "epoch": 0.66, "grad_norm": 0.07753119254590604, "learning_rate": 0.00027974745951966494, "loss": 1.3183, "step": 6101 }, { "epoch": 0.66, "grad_norm": 0.08531571995244437, "learning_rate": 0.00027959116800047566, "loss": 1.3538, "step": 6102 }, { "epoch": 0.66, "grad_norm": 0.0776622313440044, "learning_rate": 0.00027943490320661704, "loss": 1.4598, "step": 6103 }, { "epoch": 0.66, "grad_norm": 0.09492248497676697, "learning_rate": 0.000279278665157037, "loss": 1.3709, "step": 6104 }, { "epoch": 0.66, "grad_norm": 0.08248818947863316, "learning_rate": 0.0002791224538706797, "loss": 1.3884, "step": 6105 }, { "epoch": 0.66, "grad_norm": 0.07468677694976233, "learning_rate": 0.0002789662693664862, "loss": 1.3133, "step": 6106 }, { "epoch": 0.66, "grad_norm": 0.08673493539274889, "learning_rate": 0.00027881011166339483, "loss": 1.3215, "step": 6107 }, { "epoch": 0.66, "grad_norm": 0.08082725029580298, "learning_rate": 0.00027865398078033977, "loss": 1.4312, "step": 6108 }, { "epoch": 0.66, "grad_norm": 0.08379905097619719, "learning_rate": 0.0002784978767362528, "loss": 1.419, "step": 6109 }, { "epoch": 0.66, "grad_norm": 0.07778141120838956, "learning_rate": 0.0002783417995500619, "loss": 1.4632, "step": 6110 }, { "epoch": 0.66, "grad_norm": 0.07634657455999405, "learning_rate": 0.00027818574924069186, "loss": 1.3575, "step": 6111 }, { "epoch": 0.66, "grad_norm": 0.08029962788812359, "learning_rate": 0.0002780297258270643, "loss": 1.3785, "step": 6112 }, { "epoch": 0.66, "grad_norm": 0.08130136262534324, "learning_rate": 0.00027787372932809806, "loss": 1.5693, "step": 6113 }, { "epoch": 0.66, "grad_norm": 0.0788973332314687, "learning_rate": 0.0002777177597627077, "loss": 1.3804, "step": 6114 }, { "epoch": 0.66, "grad_norm": 0.09548515561076097, "learning_rate": 0.00027756181714980524, "loss": 1.3901, "step": 6115 }, { "epoch": 0.66, "grad_norm": 0.07356430507555382, "learning_rate": 0.0002774059015082996, "loss": 1.3691, "step": 6116 }, { "epoch": 0.66, "grad_norm": 0.0895079629964563, "learning_rate": 0.0002772500128570955, "loss": 1.3651, "step": 6117 }, { "epoch": 0.66, "grad_norm": 0.0756789159867376, "learning_rate": 0.00027709415121509527, "loss": 1.3808, "step": 6118 }, { "epoch": 0.66, "grad_norm": 0.08174272585535419, "learning_rate": 0.00027693831660119796, "loss": 1.4582, "step": 6119 }, { "epoch": 0.66, "grad_norm": 0.07347513506484865, "learning_rate": 0.00027678250903429876, "loss": 1.3427, "step": 6120 }, { "epoch": 0.66, "grad_norm": 0.07628590655891802, "learning_rate": 0.0002766267285332896, "loss": 1.3316, "step": 6121 }, { "epoch": 0.66, "grad_norm": 0.0805266209828289, "learning_rate": 0.00027647097511706, "loss": 1.4766, "step": 6122 }, { "epoch": 0.66, "grad_norm": 0.07804202650616353, "learning_rate": 0.0002763152488044951, "loss": 1.3473, "step": 6123 }, { "epoch": 0.66, "grad_norm": 0.07563813059212712, "learning_rate": 0.0002761595496144773, "loss": 1.4365, "step": 6124 }, { "epoch": 0.66, "grad_norm": 0.07205259007008297, "learning_rate": 0.00027600387756588615, "loss": 1.4753, "step": 6125 }, { "epoch": 0.66, "grad_norm": 0.0768814719100409, "learning_rate": 0.00027584823267759684, "loss": 1.2608, "step": 6126 }, { "epoch": 0.66, "grad_norm": 0.07656294739845501, "learning_rate": 0.000275692614968482, "loss": 1.3841, "step": 6127 }, { "epoch": 0.66, "grad_norm": 0.0785779592286055, "learning_rate": 0.00027553702445741126, "loss": 1.5065, "step": 6128 }, { "epoch": 0.66, "grad_norm": 0.07947757326516629, "learning_rate": 0.0002753814611632498, "loss": 1.4559, "step": 6129 }, { "epoch": 0.66, "grad_norm": 0.07434673630299542, "learning_rate": 0.0002752259251048606, "loss": 1.3781, "step": 6130 }, { "epoch": 0.66, "grad_norm": 0.08122447339583906, "learning_rate": 0.00027507041630110305, "loss": 1.5396, "step": 6131 }, { "epoch": 0.66, "grad_norm": 0.07766472263427851, "learning_rate": 0.0002749149347708331, "loss": 1.3898, "step": 6132 }, { "epoch": 0.66, "grad_norm": 0.0875456038166925, "learning_rate": 0.000274759480532903, "loss": 1.3486, "step": 6133 }, { "epoch": 0.66, "grad_norm": 0.08330838816033348, "learning_rate": 0.00027460405360616244, "loss": 1.37, "step": 6134 }, { "epoch": 0.66, "grad_norm": 0.07743418564873557, "learning_rate": 0.00027444865400945765, "loss": 1.2946, "step": 6135 }, { "epoch": 0.66, "grad_norm": 0.07452191764881552, "learning_rate": 0.0002742932817616309, "loss": 1.4033, "step": 6136 }, { "epoch": 0.66, "grad_norm": 0.08104030355111651, "learning_rate": 0.0002741379368815219, "loss": 1.3705, "step": 6137 }, { "epoch": 0.66, "grad_norm": 0.08512209537619303, "learning_rate": 0.0002739826193879669, "loss": 1.4029, "step": 6138 }, { "epoch": 0.66, "grad_norm": 0.08401661215344652, "learning_rate": 0.0002738273292997983, "loss": 1.3075, "step": 6139 }, { "epoch": 0.66, "grad_norm": 0.08496479321642669, "learning_rate": 0.00027367206663584574, "loss": 1.412, "step": 6140 }, { "epoch": 0.66, "grad_norm": 0.07883745123661512, "learning_rate": 0.00027351683141493567, "loss": 1.3398, "step": 6141 }, { "epoch": 0.66, "grad_norm": 0.07946198965375008, "learning_rate": 0.00027336162365589045, "loss": 1.3921, "step": 6142 }, { "epoch": 0.66, "grad_norm": 0.06745687764508318, "learning_rate": 0.00027320644337752964, "loss": 1.4423, "step": 6143 }, { "epoch": 0.66, "grad_norm": 0.08892866120135398, "learning_rate": 0.00027305129059866954, "loss": 1.2575, "step": 6144 }, { "epoch": 0.66, "grad_norm": 0.06875335376318778, "learning_rate": 0.00027289616533812276, "loss": 1.4766, "step": 6145 }, { "epoch": 0.66, "grad_norm": 0.08030815203424742, "learning_rate": 0.00027274106761469884, "loss": 1.3601, "step": 6146 }, { "epoch": 0.66, "grad_norm": 0.07478088113386552, "learning_rate": 0.00027258599744720414, "loss": 1.4551, "step": 6147 }, { "epoch": 0.66, "grad_norm": 0.07733691783191772, "learning_rate": 0.000272430954854441, "loss": 1.4606, "step": 6148 }, { "epoch": 0.66, "grad_norm": 0.07488938900948164, "learning_rate": 0.0002722759398552093, "loss": 1.315, "step": 6149 }, { "epoch": 0.66, "grad_norm": 0.07264299787776964, "learning_rate": 0.00027212095246830506, "loss": 1.4117, "step": 6150 }, { "epoch": 0.66, "grad_norm": 0.08146802376307108, "learning_rate": 0.00027196599271252074, "loss": 1.3426, "step": 6151 }, { "epoch": 0.66, "grad_norm": 0.07965692665262414, "learning_rate": 0.00027181106060664617, "loss": 1.5066, "step": 6152 }, { "epoch": 0.66, "grad_norm": 0.08155946793072419, "learning_rate": 0.0002716561561694673, "loss": 1.3994, "step": 6153 }, { "epoch": 0.66, "grad_norm": 0.07316359754028685, "learning_rate": 0.00027150127941976635, "loss": 1.395, "step": 6154 }, { "epoch": 0.66, "grad_norm": 0.08202388565748747, "learning_rate": 0.0002713464303763231, "loss": 1.3386, "step": 6155 }, { "epoch": 0.66, "grad_norm": 0.07804125380153318, "learning_rate": 0.0002711916090579137, "loss": 1.4255, "step": 6156 }, { "epoch": 0.66, "grad_norm": 0.07175148851830035, "learning_rate": 0.00027103681548331027, "loss": 1.3847, "step": 6157 }, { "epoch": 0.66, "grad_norm": 0.07387603354527797, "learning_rate": 0.00027088204967128235, "loss": 1.4942, "step": 6158 }, { "epoch": 0.66, "grad_norm": 0.06952552748421588, "learning_rate": 0.00027072731164059594, "loss": 1.4904, "step": 6159 }, { "epoch": 0.66, "grad_norm": 0.07546195447701311, "learning_rate": 0.00027057260141001327, "loss": 1.3508, "step": 6160 }, { "epoch": 0.66, "grad_norm": 0.07874233894984566, "learning_rate": 0.0002704179189982936, "loss": 1.3431, "step": 6161 }, { "epoch": 0.66, "grad_norm": 0.07561417364598048, "learning_rate": 0.00027026326442419296, "loss": 1.3988, "step": 6162 }, { "epoch": 0.66, "grad_norm": 0.06969834031693835, "learning_rate": 0.00027010863770646356, "loss": 1.4481, "step": 6163 }, { "epoch": 0.66, "grad_norm": 0.06957347570850005, "learning_rate": 0.0002699540388638542, "loss": 1.5234, "step": 6164 }, { "epoch": 0.66, "grad_norm": 0.08021592576713846, "learning_rate": 0.0002697994679151106, "loss": 1.4236, "step": 6165 }, { "epoch": 0.66, "grad_norm": 0.07249343492297182, "learning_rate": 0.0002696449248789754, "loss": 1.4235, "step": 6166 }, { "epoch": 0.66, "grad_norm": 0.08347570438059976, "learning_rate": 0.000269490409774187, "loss": 1.4639, "step": 6167 }, { "epoch": 0.66, "grad_norm": 0.06813592066830779, "learning_rate": 0.0002693359226194812, "loss": 1.4131, "step": 6168 }, { "epoch": 0.66, "grad_norm": 0.088520289977426, "learning_rate": 0.0002691814634335902, "loss": 1.4653, "step": 6169 }, { "epoch": 0.66, "grad_norm": 0.07927725480495933, "learning_rate": 0.00026902703223524217, "loss": 1.3032, "step": 6170 }, { "epoch": 0.66, "grad_norm": 0.07476030815511266, "learning_rate": 0.0002688726290431629, "loss": 1.4893, "step": 6171 }, { "epoch": 0.66, "grad_norm": 0.07592562154301802, "learning_rate": 0.0002687182538760743, "loss": 1.547, "step": 6172 }, { "epoch": 0.66, "grad_norm": 0.07741661330328563, "learning_rate": 0.0002685639067526948, "loss": 1.432, "step": 6173 }, { "epoch": 0.66, "grad_norm": 0.07284139646621345, "learning_rate": 0.0002684095876917393, "loss": 1.4776, "step": 6174 }, { "epoch": 0.66, "grad_norm": 0.08675181858083143, "learning_rate": 0.00026825529671191986, "loss": 1.3249, "step": 6175 }, { "epoch": 0.66, "grad_norm": 0.08411753433275078, "learning_rate": 0.0002681010338319444, "loss": 1.4295, "step": 6176 }, { "epoch": 0.66, "grad_norm": 0.07837071521163565, "learning_rate": 0.00026794679907051817, "loss": 1.4625, "step": 6177 }, { "epoch": 0.66, "grad_norm": 0.07019930824849117, "learning_rate": 0.0002677925924463427, "loss": 1.3814, "step": 6178 }, { "epoch": 0.66, "grad_norm": 0.07544575050760528, "learning_rate": 0.00026763841397811573, "loss": 1.6822, "step": 6179 }, { "epoch": 0.66, "grad_norm": 0.0711653064704804, "learning_rate": 0.0002674842636845322, "loss": 1.2985, "step": 6180 }, { "epoch": 0.66, "grad_norm": 0.08226687927820503, "learning_rate": 0.0002673301415842835, "loss": 1.2929, "step": 6181 }, { "epoch": 0.66, "grad_norm": 0.08082723301222589, "learning_rate": 0.00026717604769605695, "loss": 1.4702, "step": 6182 }, { "epoch": 0.66, "grad_norm": 0.0771809400828876, "learning_rate": 0.0002670219820385374, "loss": 1.4699, "step": 6183 }, { "epoch": 0.66, "grad_norm": 0.07712962428203328, "learning_rate": 0.0002668679446304061, "loss": 1.3207, "step": 6184 }, { "epoch": 0.66, "grad_norm": 0.07212775681925493, "learning_rate": 0.00026671393549033983, "loss": 1.4173, "step": 6185 }, { "epoch": 0.66, "grad_norm": 0.08585221084325963, "learning_rate": 0.0002665599546370131, "loss": 1.4432, "step": 6186 }, { "epoch": 0.67, "grad_norm": 0.07161579555511431, "learning_rate": 0.0002664060020890969, "loss": 1.3093, "step": 6187 }, { "epoch": 0.67, "grad_norm": 0.07166138838974309, "learning_rate": 0.00026625207786525805, "loss": 1.3763, "step": 6188 }, { "epoch": 0.67, "grad_norm": 0.08179686758556896, "learning_rate": 0.0002660981819841607, "loss": 1.3632, "step": 6189 }, { "epoch": 0.67, "grad_norm": 0.07687496958224152, "learning_rate": 0.00026594431446446526, "loss": 1.2778, "step": 6190 }, { "epoch": 0.67, "grad_norm": 0.08429720606973103, "learning_rate": 0.00026579047532482845, "loss": 1.4235, "step": 6191 }, { "epoch": 0.67, "grad_norm": 0.07874039330715418, "learning_rate": 0.00026563666458390394, "loss": 1.468, "step": 6192 }, { "epoch": 0.67, "grad_norm": 0.0849153270188568, "learning_rate": 0.00026548288226034203, "loss": 1.3426, "step": 6193 }, { "epoch": 0.67, "grad_norm": 0.07719925522577803, "learning_rate": 0.00026532912837278893, "loss": 1.3349, "step": 6194 }, { "epoch": 0.67, "grad_norm": 0.07244606222045334, "learning_rate": 0.0002651754029398883, "loss": 1.4874, "step": 6195 }, { "epoch": 0.67, "grad_norm": 0.07435186661080587, "learning_rate": 0.0002650217059802794, "loss": 1.4572, "step": 6196 }, { "epoch": 0.67, "grad_norm": 0.07718559468867621, "learning_rate": 0.0002648680375125989, "loss": 1.4856, "step": 6197 }, { "epoch": 0.67, "grad_norm": 0.08202214417553845, "learning_rate": 0.0002647143975554793, "loss": 1.3457, "step": 6198 }, { "epoch": 0.67, "grad_norm": 0.0760250326747222, "learning_rate": 0.0002645607861275502, "loss": 1.439, "step": 6199 }, { "epoch": 0.67, "grad_norm": 0.07603471814525921, "learning_rate": 0.00026440720324743763, "loss": 1.3743, "step": 6200 }, { "epoch": 0.67, "grad_norm": 0.0828742228552649, "learning_rate": 0.0002642536489337636, "loss": 1.4984, "step": 6201 }, { "epoch": 0.67, "grad_norm": 0.08352744199976005, "learning_rate": 0.00026410012320514744, "loss": 1.5592, "step": 6202 }, { "epoch": 0.67, "grad_norm": 0.07460991618324651, "learning_rate": 0.0002639466260802048, "loss": 1.4728, "step": 6203 }, { "epoch": 0.67, "grad_norm": 0.08349369977006431, "learning_rate": 0.0002637931575775474, "loss": 1.3287, "step": 6204 }, { "epoch": 0.67, "grad_norm": 0.08921844889478096, "learning_rate": 0.00026363971771578413, "loss": 1.4241, "step": 6205 }, { "epoch": 0.67, "grad_norm": 0.07896415452446431, "learning_rate": 0.00026348630651352, "loss": 1.3908, "step": 6206 }, { "epoch": 0.67, "grad_norm": 0.08815108621019546, "learning_rate": 0.00026333292398935635, "loss": 1.4188, "step": 6207 }, { "epoch": 0.67, "grad_norm": 0.0785662676339109, "learning_rate": 0.00026317957016189155, "loss": 1.4875, "step": 6208 }, { "epoch": 0.67, "grad_norm": 0.07435226352357563, "learning_rate": 0.0002630262450497205, "loss": 1.4019, "step": 6209 }, { "epoch": 0.67, "grad_norm": 0.07869236534132566, "learning_rate": 0.0002628729486714341, "loss": 1.5194, "step": 6210 }, { "epoch": 0.67, "grad_norm": 0.08111495196318634, "learning_rate": 0.0002627196810456201, "loss": 1.4171, "step": 6211 }, { "epoch": 0.67, "grad_norm": 0.0752417116249629, "learning_rate": 0.000262566442190863, "loss": 1.1826, "step": 6212 }, { "epoch": 0.67, "grad_norm": 0.07090451927739674, "learning_rate": 0.0002624132321257432, "loss": 1.4379, "step": 6213 }, { "epoch": 0.67, "grad_norm": 0.072209257295845, "learning_rate": 0.00026226005086883806, "loss": 1.2822, "step": 6214 }, { "epoch": 0.67, "grad_norm": 0.0775073866558998, "learning_rate": 0.0002621068984387216, "loss": 1.6329, "step": 6215 }, { "epoch": 0.67, "grad_norm": 0.07276027526755828, "learning_rate": 0.00026195377485396375, "loss": 1.4094, "step": 6216 }, { "epoch": 0.67, "grad_norm": 0.07660187872068078, "learning_rate": 0.0002618006801331313, "loss": 1.4048, "step": 6217 }, { "epoch": 0.67, "grad_norm": 0.07561214597012043, "learning_rate": 0.00026164761429478767, "loss": 1.3556, "step": 6218 }, { "epoch": 0.67, "grad_norm": 0.07608383237940829, "learning_rate": 0.00026149457735749235, "loss": 1.4997, "step": 6219 }, { "epoch": 0.67, "grad_norm": 0.08117197065650657, "learning_rate": 0.00026134156933980187, "loss": 1.5299, "step": 6220 }, { "epoch": 0.67, "grad_norm": 0.07667811374754589, "learning_rate": 0.000261188590260269, "loss": 1.5114, "step": 6221 }, { "epoch": 0.67, "grad_norm": 0.07236230591516954, "learning_rate": 0.0002610356401374427, "loss": 1.4631, "step": 6222 }, { "epoch": 0.67, "grad_norm": 0.0729862517775749, "learning_rate": 0.0002608827189898688, "loss": 1.4073, "step": 6223 }, { "epoch": 0.67, "grad_norm": 0.08487372745622469, "learning_rate": 0.0002607298268360899, "loss": 1.5601, "step": 6224 }, { "epoch": 0.67, "grad_norm": 0.0895758449211697, "learning_rate": 0.0002605769636946441, "loss": 1.2656, "step": 6225 }, { "epoch": 0.67, "grad_norm": 0.07973668237947351, "learning_rate": 0.00026042412958406715, "loss": 1.2544, "step": 6226 }, { "epoch": 0.67, "grad_norm": 0.07568869419754443, "learning_rate": 0.00026027132452289013, "loss": 1.4048, "step": 6227 }, { "epoch": 0.67, "grad_norm": 0.0744278379703313, "learning_rate": 0.0002601185485296418, "loss": 1.4452, "step": 6228 }, { "epoch": 0.67, "grad_norm": 0.07385615820726067, "learning_rate": 0.00025996580162284614, "loss": 1.3883, "step": 6229 }, { "epoch": 0.67, "grad_norm": 0.07289747818726597, "learning_rate": 0.0002598130838210246, "loss": 1.5465, "step": 6230 }, { "epoch": 0.67, "grad_norm": 0.07867781806652333, "learning_rate": 0.0002596603951426949, "loss": 1.4572, "step": 6231 }, { "epoch": 0.67, "grad_norm": 0.07007763050297192, "learning_rate": 0.00025950773560637076, "loss": 1.5115, "step": 6232 }, { "epoch": 0.67, "grad_norm": 0.0781571173215587, "learning_rate": 0.00025935510523056267, "loss": 1.2784, "step": 6233 }, { "epoch": 0.67, "grad_norm": 0.08274854533054618, "learning_rate": 0.00025920250403377787, "loss": 1.4113, "step": 6234 }, { "epoch": 0.67, "grad_norm": 0.0769569700994162, "learning_rate": 0.0002590499320345195, "loss": 1.4623, "step": 6235 }, { "epoch": 0.67, "grad_norm": 0.07322203756357455, "learning_rate": 0.0002588973892512875, "loss": 1.3969, "step": 6236 }, { "epoch": 0.67, "grad_norm": 0.07285908482504222, "learning_rate": 0.00025874487570257864, "loss": 1.3235, "step": 6237 }, { "epoch": 0.67, "grad_norm": 0.0824245182399957, "learning_rate": 0.0002585923914068851, "loss": 1.3926, "step": 6238 }, { "epoch": 0.67, "grad_norm": 0.08538154374456672, "learning_rate": 0.00025843993638269616, "loss": 1.3852, "step": 6239 }, { "epoch": 0.67, "grad_norm": 0.08303180796543792, "learning_rate": 0.000258287510648498, "loss": 1.291, "step": 6240 }, { "epoch": 0.67, "grad_norm": 0.0744008611060297, "learning_rate": 0.00025813511422277224, "loss": 1.4407, "step": 6241 }, { "epoch": 0.67, "grad_norm": 0.07491379607406161, "learning_rate": 0.0002579827471239978, "loss": 1.5728, "step": 6242 }, { "epoch": 0.67, "grad_norm": 0.07224802257116374, "learning_rate": 0.0002578304093706497, "loss": 1.4437, "step": 6243 }, { "epoch": 0.67, "grad_norm": 0.08108850715756206, "learning_rate": 0.00025767810098119927, "loss": 1.527, "step": 6244 }, { "epoch": 0.67, "grad_norm": 0.07409277926113383, "learning_rate": 0.00025752582197411446, "loss": 1.5389, "step": 6245 }, { "epoch": 0.67, "grad_norm": 0.08538872621157978, "learning_rate": 0.00025737357236785984, "loss": 1.3672, "step": 6246 }, { "epoch": 0.67, "grad_norm": 0.08357544942655332, "learning_rate": 0.0002572213521808959, "loss": 1.5069, "step": 6247 }, { "epoch": 0.67, "grad_norm": 0.07945973711646714, "learning_rate": 0.0002570691614316802, "loss": 1.368, "step": 6248 }, { "epoch": 0.67, "grad_norm": 0.08676847576563627, "learning_rate": 0.00025691700013866616, "loss": 1.3731, "step": 6249 }, { "epoch": 0.67, "grad_norm": 0.08102362106338495, "learning_rate": 0.00025676486832030366, "loss": 1.4159, "step": 6250 }, { "epoch": 0.67, "grad_norm": 0.08972372454953455, "learning_rate": 0.00025661276599503945, "loss": 1.3462, "step": 6251 }, { "epoch": 0.67, "grad_norm": 0.08439968518474841, "learning_rate": 0.0002564606931813166, "loss": 1.3473, "step": 6252 }, { "epoch": 0.67, "grad_norm": 0.07589797298359928, "learning_rate": 0.0002563086498975742, "loss": 1.4392, "step": 6253 }, { "epoch": 0.67, "grad_norm": 0.07431464211955395, "learning_rate": 0.00025615663616224805, "loss": 1.3391, "step": 6254 }, { "epoch": 0.67, "grad_norm": 0.07807770877253503, "learning_rate": 0.00025600465199377066, "loss": 1.3193, "step": 6255 }, { "epoch": 0.67, "grad_norm": 0.07428484878269796, "learning_rate": 0.00025585269741057026, "loss": 1.4478, "step": 6256 }, { "epoch": 0.67, "grad_norm": 0.07081123718795393, "learning_rate": 0.00025570077243107194, "loss": 1.4212, "step": 6257 }, { "epoch": 0.67, "grad_norm": 0.08312259339625878, "learning_rate": 0.00025554887707369733, "loss": 1.3672, "step": 6258 }, { "epoch": 0.67, "grad_norm": 0.09350706453268828, "learning_rate": 0.0002553970113568642, "loss": 1.5117, "step": 6259 }, { "epoch": 0.67, "grad_norm": 0.07624662839361142, "learning_rate": 0.0002552451752989865, "loss": 1.4194, "step": 6260 }, { "epoch": 0.67, "grad_norm": 0.08381231697762678, "learning_rate": 0.0002550933689184751, "loss": 1.4533, "step": 6261 }, { "epoch": 0.67, "grad_norm": 0.0744415792079469, "learning_rate": 0.00025494159223373727, "loss": 1.4775, "step": 6262 }, { "epoch": 0.67, "grad_norm": 0.07773631383404493, "learning_rate": 0.00025478984526317597, "loss": 1.3141, "step": 6263 }, { "epoch": 0.67, "grad_norm": 0.07366855260516596, "learning_rate": 0.0002546381280251913, "loss": 1.4199, "step": 6264 }, { "epoch": 0.67, "grad_norm": 0.0801259194771756, "learning_rate": 0.0002544864405381797, "loss": 1.2572, "step": 6265 }, { "epoch": 0.67, "grad_norm": 0.07759226269944282, "learning_rate": 0.0002543347828205335, "loss": 1.3919, "step": 6266 }, { "epoch": 0.67, "grad_norm": 0.07901655886822204, "learning_rate": 0.0002541831548906417, "loss": 1.4355, "step": 6267 }, { "epoch": 0.67, "grad_norm": 0.08200143516361498, "learning_rate": 0.0002540315567668901, "loss": 1.3433, "step": 6268 }, { "epoch": 0.67, "grad_norm": 0.0860479874628173, "learning_rate": 0.00025387998846766014, "loss": 1.3361, "step": 6269 }, { "epoch": 0.67, "grad_norm": 0.0750332981935864, "learning_rate": 0.00025372845001132995, "loss": 1.39, "step": 6270 }, { "epoch": 0.67, "grad_norm": 0.08091321341472489, "learning_rate": 0.0002535769414162744, "loss": 1.3943, "step": 6271 }, { "epoch": 0.67, "grad_norm": 0.08131512343994637, "learning_rate": 0.0002534254627008641, "loss": 1.3901, "step": 6272 }, { "epoch": 0.67, "grad_norm": 0.08340487879640397, "learning_rate": 0.0002532740138834665, "loss": 1.4814, "step": 6273 }, { "epoch": 0.67, "grad_norm": 0.0823885510183785, "learning_rate": 0.00025312259498244547, "loss": 1.4846, "step": 6274 }, { "epoch": 0.67, "grad_norm": 0.08451185254200341, "learning_rate": 0.00025297120601616073, "loss": 1.2977, "step": 6275 }, { "epoch": 0.67, "grad_norm": 0.08059700760409108, "learning_rate": 0.0002528198470029689, "loss": 1.3714, "step": 6276 }, { "epoch": 0.67, "grad_norm": 0.08329022693965066, "learning_rate": 0.00025266851796122296, "loss": 1.3995, "step": 6277 }, { "epoch": 0.67, "grad_norm": 0.07771378342062599, "learning_rate": 0.0002525172189092717, "loss": 1.4348, "step": 6278 }, { "epoch": 0.67, "grad_norm": 0.08460287024642418, "learning_rate": 0.00025236594986546113, "loss": 1.4236, "step": 6279 }, { "epoch": 0.68, "grad_norm": 0.07704392579140935, "learning_rate": 0.00025221471084813275, "loss": 1.3786, "step": 6280 }, { "epoch": 0.68, "grad_norm": 0.08446989165175656, "learning_rate": 0.0002520635018756248, "loss": 1.3692, "step": 6281 }, { "epoch": 0.68, "grad_norm": 0.08354359803524838, "learning_rate": 0.00025191232296627197, "loss": 1.569, "step": 6282 }, { "epoch": 0.68, "grad_norm": 0.08001104967843899, "learning_rate": 0.0002517611741384055, "loss": 1.3756, "step": 6283 }, { "epoch": 0.68, "grad_norm": 0.0799008608461087, "learning_rate": 0.0002516100554103523, "loss": 1.4743, "step": 6284 }, { "epoch": 0.68, "grad_norm": 0.08895399103852052, "learning_rate": 0.0002514589668004362, "loss": 1.4824, "step": 6285 }, { "epoch": 0.68, "grad_norm": 0.08389049233996097, "learning_rate": 0.00025130790832697737, "loss": 1.4368, "step": 6286 }, { "epoch": 0.68, "grad_norm": 0.08167409917869137, "learning_rate": 0.0002511568800082919, "loss": 1.4084, "step": 6287 }, { "epoch": 0.68, "grad_norm": 0.0820738335616238, "learning_rate": 0.00025100588186269257, "loss": 1.272, "step": 6288 }, { "epoch": 0.68, "grad_norm": 0.07262380780001264, "learning_rate": 0.0002508549139084887, "loss": 1.5007, "step": 6289 }, { "epoch": 0.68, "grad_norm": 0.08250263142633718, "learning_rate": 0.00025070397616398543, "loss": 1.291, "step": 6290 }, { "epoch": 0.68, "grad_norm": 0.07324180126088345, "learning_rate": 0.00025055306864748433, "loss": 1.3554, "step": 6291 }, { "epoch": 0.68, "grad_norm": 0.07491530886846616, "learning_rate": 0.0002504021913772836, "loss": 1.3406, "step": 6292 }, { "epoch": 0.68, "grad_norm": 0.07998014625838289, "learning_rate": 0.00025025134437167793, "loss": 1.522, "step": 6293 }, { "epoch": 0.68, "grad_norm": 0.08076320367760288, "learning_rate": 0.0002501005276489575, "loss": 1.3689, "step": 6294 }, { "epoch": 0.68, "grad_norm": 0.0781089542719641, "learning_rate": 0.00024994974122740965, "loss": 1.4426, "step": 6295 }, { "epoch": 0.68, "grad_norm": 0.07999448507874525, "learning_rate": 0.00024979898512531793, "loss": 1.4671, "step": 6296 }, { "epoch": 0.68, "grad_norm": 0.07443936163831828, "learning_rate": 0.00024964825936096163, "loss": 1.4709, "step": 6297 }, { "epoch": 0.68, "grad_norm": 0.07752927506986254, "learning_rate": 0.00024949756395261703, "loss": 1.4224, "step": 6298 }, { "epoch": 0.68, "grad_norm": 0.07297892064768649, "learning_rate": 0.00024934689891855657, "loss": 1.4106, "step": 6299 }, { "epoch": 0.68, "grad_norm": 0.0705016410192572, "learning_rate": 0.00024919626427704874, "loss": 1.4373, "step": 6300 }, { "epoch": 0.68, "grad_norm": 0.06998077427911564, "learning_rate": 0.0002490456600463583, "loss": 1.3348, "step": 6301 }, { "epoch": 0.68, "grad_norm": 0.08936338441734996, "learning_rate": 0.0002488950862447469, "loss": 1.4434, "step": 6302 }, { "epoch": 0.68, "grad_norm": 0.08152473768427583, "learning_rate": 0.00024874454289047196, "loss": 1.3254, "step": 6303 }, { "epoch": 0.68, "grad_norm": 0.0811328676071586, "learning_rate": 0.0002485940300017873, "loss": 1.2318, "step": 6304 }, { "epoch": 0.68, "grad_norm": 0.07536323950532108, "learning_rate": 0.00024844354759694337, "loss": 1.3747, "step": 6305 }, { "epoch": 0.68, "grad_norm": 0.08189574613290967, "learning_rate": 0.0002482930956941865, "loss": 1.3824, "step": 6306 }, { "epoch": 0.68, "grad_norm": 0.08466969026863551, "learning_rate": 0.0002481426743117594, "loss": 1.4511, "step": 6307 }, { "epoch": 0.68, "grad_norm": 0.07700482866848125, "learning_rate": 0.00024799228346790154, "loss": 1.3756, "step": 6308 }, { "epoch": 0.68, "grad_norm": 0.07394423034126002, "learning_rate": 0.00024784192318084796, "loss": 1.3997, "step": 6309 }, { "epoch": 0.68, "grad_norm": 0.08163148393060057, "learning_rate": 0.0002476915934688305, "loss": 1.3569, "step": 6310 }, { "epoch": 0.68, "grad_norm": 0.08245747676419808, "learning_rate": 0.00024754129435007756, "loss": 1.4508, "step": 6311 }, { "epoch": 0.68, "grad_norm": 0.08316884689132627, "learning_rate": 0.00024739102584281266, "loss": 1.5506, "step": 6312 }, { "epoch": 0.68, "grad_norm": 0.08981091998302579, "learning_rate": 0.0002472407879652567, "loss": 1.3847, "step": 6313 }, { "epoch": 0.68, "grad_norm": 0.07069410558333868, "learning_rate": 0.00024709058073562684, "loss": 1.3207, "step": 6314 }, { "epoch": 0.68, "grad_norm": 0.07217099140842033, "learning_rate": 0.0002469404041721358, "loss": 1.2995, "step": 6315 }, { "epoch": 0.68, "grad_norm": 0.0819390025189319, "learning_rate": 0.0002467902582929931, "loss": 1.3276, "step": 6316 }, { "epoch": 0.68, "grad_norm": 0.09764517482898454, "learning_rate": 0.0002466401431164048, "loss": 1.3504, "step": 6317 }, { "epoch": 0.68, "grad_norm": 0.08663621031922736, "learning_rate": 0.0002464900586605724, "loss": 1.4582, "step": 6318 }, { "epoch": 0.68, "grad_norm": 0.07135499230948997, "learning_rate": 0.0002463400049436944, "loss": 1.3456, "step": 6319 }, { "epoch": 0.68, "grad_norm": 0.07806358456612215, "learning_rate": 0.0002461899819839654, "loss": 1.4174, "step": 6320 }, { "epoch": 0.68, "grad_norm": 0.08049798140545715, "learning_rate": 0.000246039989799576, "loss": 1.3761, "step": 6321 }, { "epoch": 0.68, "grad_norm": 0.08031384734125688, "learning_rate": 0.00024589002840871355, "loss": 1.374, "step": 6322 }, { "epoch": 0.68, "grad_norm": 0.07909678000250613, "learning_rate": 0.00024574009782956096, "loss": 1.6276, "step": 6323 }, { "epoch": 0.68, "grad_norm": 0.07584212527403288, "learning_rate": 0.0002455901980802983, "loss": 1.428, "step": 6324 }, { "epoch": 0.68, "grad_norm": 0.08462804630783761, "learning_rate": 0.000245440329179101, "loss": 1.3807, "step": 6325 }, { "epoch": 0.68, "grad_norm": 0.09435281069530776, "learning_rate": 0.0002452904911441414, "loss": 1.3654, "step": 6326 }, { "epoch": 0.68, "grad_norm": 0.08026059661730983, "learning_rate": 0.0002451406839935881, "loss": 1.2393, "step": 6327 }, { "epoch": 0.68, "grad_norm": 0.09463728059653936, "learning_rate": 0.00024499090774560524, "loss": 1.3196, "step": 6328 }, { "epoch": 0.68, "grad_norm": 0.08561772489183278, "learning_rate": 0.00024484116241835403, "loss": 1.4476, "step": 6329 }, { "epoch": 0.68, "grad_norm": 0.08344976913598492, "learning_rate": 0.00024469144802999164, "loss": 1.3839, "step": 6330 }, { "epoch": 0.68, "grad_norm": 0.0855239337159454, "learning_rate": 0.0002445417645986713, "loss": 1.5262, "step": 6331 }, { "epoch": 0.68, "grad_norm": 0.08926849614318069, "learning_rate": 0.00024439211214254277, "loss": 1.3539, "step": 6332 }, { "epoch": 0.68, "grad_norm": 0.08822555963908468, "learning_rate": 0.0002442424906797519, "loss": 1.3936, "step": 6333 }, { "epoch": 0.68, "grad_norm": 0.08578391466405638, "learning_rate": 0.0002440929002284406, "loss": 1.4229, "step": 6334 }, { "epoch": 0.68, "grad_norm": 0.086223295049479, "learning_rate": 0.00024394334080674734, "loss": 1.3489, "step": 6335 }, { "epoch": 0.68, "grad_norm": 0.10149756749252606, "learning_rate": 0.00024379381243280703, "loss": 1.5394, "step": 6336 }, { "epoch": 0.68, "grad_norm": 0.08212011780136454, "learning_rate": 0.00024364431512475, "loss": 1.4554, "step": 6337 }, { "epoch": 0.68, "grad_norm": 0.09010571016682901, "learning_rate": 0.00024349484890070355, "loss": 1.4116, "step": 6338 }, { "epoch": 0.68, "grad_norm": 0.10287841772251785, "learning_rate": 0.00024334541377879116, "loss": 1.4102, "step": 6339 }, { "epoch": 0.68, "grad_norm": 0.08646469129338254, "learning_rate": 0.00024319600977713203, "loss": 1.3129, "step": 6340 }, { "epoch": 0.68, "grad_norm": 0.07477706387936801, "learning_rate": 0.00024304663691384205, "loss": 1.3683, "step": 6341 }, { "epoch": 0.68, "grad_norm": 0.0742561087884464, "learning_rate": 0.00024289729520703335, "loss": 1.4076, "step": 6342 }, { "epoch": 0.68, "grad_norm": 0.0727554495538966, "learning_rate": 0.00024274798467481396, "loss": 1.391, "step": 6343 }, { "epoch": 0.68, "grad_norm": 0.09096214796504422, "learning_rate": 0.00024259870533528815, "loss": 1.3449, "step": 6344 }, { "epoch": 0.68, "grad_norm": 0.07170963499794096, "learning_rate": 0.00024244945720655693, "loss": 1.3732, "step": 6345 }, { "epoch": 0.68, "grad_norm": 0.07006967689524883, "learning_rate": 0.0002423002403067167, "loss": 1.5636, "step": 6346 }, { "epoch": 0.68, "grad_norm": 0.07720905952192304, "learning_rate": 0.00024215105465386078, "loss": 1.2022, "step": 6347 }, { "epoch": 0.68, "grad_norm": 0.07092099425334585, "learning_rate": 0.0002420019002660786, "loss": 1.3869, "step": 6348 }, { "epoch": 0.68, "grad_norm": 0.07731471823323678, "learning_rate": 0.0002418527771614553, "loss": 1.375, "step": 6349 }, { "epoch": 0.68, "grad_norm": 0.0670229260877233, "learning_rate": 0.00024170368535807274, "loss": 1.3853, "step": 6350 }, { "epoch": 0.68, "grad_norm": 0.07526592555668116, "learning_rate": 0.00024155462487400898, "loss": 1.4127, "step": 6351 }, { "epoch": 0.68, "grad_norm": 0.0824513831130822, "learning_rate": 0.00024140559572733778, "loss": 1.4122, "step": 6352 }, { "epoch": 0.68, "grad_norm": 0.06022553289796199, "learning_rate": 0.0002412565979361298, "loss": 1.4234, "step": 6353 }, { "epoch": 0.68, "grad_norm": 0.06856769172839336, "learning_rate": 0.00024110763151845112, "loss": 1.3135, "step": 6354 }, { "epoch": 0.68, "grad_norm": 0.07706925472267376, "learning_rate": 0.00024095869649236491, "loss": 1.295, "step": 6355 }, { "epoch": 0.68, "grad_norm": 0.08128807781499314, "learning_rate": 0.00024080979287592952, "loss": 1.4854, "step": 6356 }, { "epoch": 0.68, "grad_norm": 0.07997548107576101, "learning_rate": 0.0002406609206872004, "loss": 1.4402, "step": 6357 }, { "epoch": 0.68, "grad_norm": 0.0812986548596618, "learning_rate": 0.00024051207994422885, "loss": 1.3249, "step": 6358 }, { "epoch": 0.68, "grad_norm": 0.07668872546913742, "learning_rate": 0.00024036327066506203, "loss": 1.4075, "step": 6359 }, { "epoch": 0.68, "grad_norm": 0.07576029951246802, "learning_rate": 0.00024021449286774378, "loss": 1.3767, "step": 6360 }, { "epoch": 0.68, "grad_norm": 0.0705989781571333, "learning_rate": 0.00024006574657031406, "loss": 1.4834, "step": 6361 }, { "epoch": 0.68, "grad_norm": 0.08410816163192895, "learning_rate": 0.00023991703179080848, "loss": 1.3311, "step": 6362 }, { "epoch": 0.68, "grad_norm": 0.0744909882298907, "learning_rate": 0.00023976834854725943, "loss": 1.4475, "step": 6363 }, { "epoch": 0.68, "grad_norm": 0.07417287239926322, "learning_rate": 0.00023961969685769568, "loss": 1.4801, "step": 6364 }, { "epoch": 0.68, "grad_norm": 0.08508771858692672, "learning_rate": 0.00023947107674014096, "loss": 1.3167, "step": 6365 }, { "epoch": 0.68, "grad_norm": 0.08704235220539557, "learning_rate": 0.00023932248821261637, "loss": 1.4221, "step": 6366 }, { "epoch": 0.68, "grad_norm": 0.06998956050843806, "learning_rate": 0.00023917393129313896, "loss": 1.4133, "step": 6367 }, { "epoch": 0.68, "grad_norm": 0.07324472581915419, "learning_rate": 0.00023902540599972144, "loss": 1.378, "step": 6368 }, { "epoch": 0.68, "grad_norm": 0.07294277583373579, "learning_rate": 0.00023887691235037313, "loss": 1.4382, "step": 6369 }, { "epoch": 0.68, "grad_norm": 0.08534875004191578, "learning_rate": 0.0002387284503630996, "loss": 1.4318, "step": 6370 }, { "epoch": 0.68, "grad_norm": 0.07220491552722837, "learning_rate": 0.00023858002005590202, "loss": 1.4859, "step": 6371 }, { "epoch": 0.68, "grad_norm": 0.08203133231113481, "learning_rate": 0.00023843162144677828, "loss": 1.4108, "step": 6372 }, { "epoch": 0.69, "grad_norm": 0.07399688753489576, "learning_rate": 0.00023828325455372236, "loss": 1.4796, "step": 6373 }, { "epoch": 0.69, "grad_norm": 0.08070783051161995, "learning_rate": 0.00023813491939472393, "loss": 1.344, "step": 6374 }, { "epoch": 0.69, "grad_norm": 0.08277727679169404, "learning_rate": 0.0002379866159877695, "loss": 1.2994, "step": 6375 }, { "epoch": 0.69, "grad_norm": 0.07044262154113139, "learning_rate": 0.00023783834435084117, "loss": 1.3152, "step": 6376 }, { "epoch": 0.69, "grad_norm": 0.07933013616591182, "learning_rate": 0.0002376901045019172, "loss": 1.4583, "step": 6377 }, { "epoch": 0.69, "grad_norm": 0.08308805766312702, "learning_rate": 0.0002375418964589724, "loss": 1.2326, "step": 6378 }, { "epoch": 0.69, "grad_norm": 0.08249281840241653, "learning_rate": 0.00023739372023997763, "loss": 1.4402, "step": 6379 }, { "epoch": 0.69, "grad_norm": 0.09484665425518647, "learning_rate": 0.00023724557586289946, "loss": 1.1794, "step": 6380 }, { "epoch": 0.69, "grad_norm": 0.0739011722621357, "learning_rate": 0.0002370974633457011, "loss": 1.3369, "step": 6381 }, { "epoch": 0.69, "grad_norm": 0.07765413859608447, "learning_rate": 0.00023694938270634188, "loss": 1.3658, "step": 6382 }, { "epoch": 0.69, "grad_norm": 0.08727919704600484, "learning_rate": 0.0002368013339627766, "loss": 1.3588, "step": 6383 }, { "epoch": 0.69, "grad_norm": 0.07282288372917742, "learning_rate": 0.00023665331713295702, "loss": 1.3357, "step": 6384 }, { "epoch": 0.69, "grad_norm": 0.07995318342105713, "learning_rate": 0.00023650533223483085, "loss": 1.4342, "step": 6385 }, { "epoch": 0.69, "grad_norm": 0.07350102385403665, "learning_rate": 0.00023635737928634148, "loss": 1.4507, "step": 6386 }, { "epoch": 0.69, "grad_norm": 0.07565174355512315, "learning_rate": 0.0002362094583054286, "loss": 1.4482, "step": 6387 }, { "epoch": 0.69, "grad_norm": 0.07992476149419737, "learning_rate": 0.00023606156931002838, "loss": 1.4335, "step": 6388 }, { "epoch": 0.69, "grad_norm": 0.08573538817338362, "learning_rate": 0.00023591371231807296, "loss": 1.397, "step": 6389 }, { "epoch": 0.69, "grad_norm": 0.08571710420595538, "learning_rate": 0.00023576588734749022, "loss": 1.4763, "step": 6390 }, { "epoch": 0.69, "grad_norm": 0.0731916157652849, "learning_rate": 0.00023561809441620458, "loss": 1.3818, "step": 6391 }, { "epoch": 0.69, "grad_norm": 0.08771184646456208, "learning_rate": 0.00023547033354213658, "loss": 1.3579, "step": 6392 }, { "epoch": 0.69, "grad_norm": 0.077798234525272, "learning_rate": 0.0002353226047432025, "loss": 1.3273, "step": 6393 }, { "epoch": 0.69, "grad_norm": 0.07815607317243291, "learning_rate": 0.00023517490803731507, "loss": 1.4548, "step": 6394 }, { "epoch": 0.69, "grad_norm": 0.08263717397214161, "learning_rate": 0.00023502724344238318, "loss": 1.4002, "step": 6395 }, { "epoch": 0.69, "grad_norm": 0.08569366911481649, "learning_rate": 0.00023487961097631155, "loss": 1.3903, "step": 6396 }, { "epoch": 0.69, "grad_norm": 0.07121023055466971, "learning_rate": 0.0002347320106570009, "loss": 1.3606, "step": 6397 }, { "epoch": 0.69, "grad_norm": 0.08475099857112675, "learning_rate": 0.00023458444250234868, "loss": 1.461, "step": 6398 }, { "epoch": 0.69, "grad_norm": 0.08412237390244266, "learning_rate": 0.00023443690653024763, "loss": 1.3305, "step": 6399 }, { "epoch": 0.69, "grad_norm": 0.08095037113122881, "learning_rate": 0.0002342894027585872, "loss": 1.2791, "step": 6400 }, { "epoch": 0.69, "grad_norm": 0.06950633340592942, "learning_rate": 0.0002341419312052529, "loss": 1.2907, "step": 6401 }, { "epoch": 0.69, "grad_norm": 0.08264516966426638, "learning_rate": 0.00023399449188812584, "loss": 1.2311, "step": 6402 }, { "epoch": 0.69, "grad_norm": 0.08734564167313208, "learning_rate": 0.00023384708482508364, "loss": 1.4628, "step": 6403 }, { "epoch": 0.69, "grad_norm": 0.08726129525601546, "learning_rate": 0.0002336997100340002, "loss": 1.3995, "step": 6404 }, { "epoch": 0.69, "grad_norm": 0.07588282327103045, "learning_rate": 0.00023355236753274484, "loss": 1.4151, "step": 6405 }, { "epoch": 0.69, "grad_norm": 0.08370642493906598, "learning_rate": 0.00023340505733918365, "loss": 1.4016, "step": 6406 }, { "epoch": 0.69, "grad_norm": 0.08108362721094939, "learning_rate": 0.0002332577794711783, "loss": 1.598, "step": 6407 }, { "epoch": 0.69, "grad_norm": 0.08135817340718471, "learning_rate": 0.0002331105339465867, "loss": 1.2881, "step": 6408 }, { "epoch": 0.69, "grad_norm": 0.09012388270227195, "learning_rate": 0.00023296332078326292, "loss": 1.3892, "step": 6409 }, { "epoch": 0.69, "grad_norm": 0.07793560241657278, "learning_rate": 0.00023281613999905732, "loss": 1.4816, "step": 6410 }, { "epoch": 0.69, "grad_norm": 0.08867833354510514, "learning_rate": 0.00023266899161181564, "loss": 1.4143, "step": 6411 }, { "epoch": 0.69, "grad_norm": 0.07688612344754879, "learning_rate": 0.00023252187563938044, "loss": 1.4396, "step": 6412 }, { "epoch": 0.69, "grad_norm": 0.07695243857532634, "learning_rate": 0.00023237479209959006, "loss": 1.4831, "step": 6413 }, { "epoch": 0.69, "grad_norm": 0.10597533406334113, "learning_rate": 0.0002322277410102786, "loss": 1.3586, "step": 6414 }, { "epoch": 0.69, "grad_norm": 0.08486673254253517, "learning_rate": 0.00023208072238927675, "loss": 1.4086, "step": 6415 }, { "epoch": 0.69, "grad_norm": 0.09352301771338838, "learning_rate": 0.00023193373625441112, "loss": 1.3706, "step": 6416 }, { "epoch": 0.69, "grad_norm": 0.09267212329917103, "learning_rate": 0.0002317867826235041, "loss": 1.4183, "step": 6417 }, { "epoch": 0.69, "grad_norm": 0.08024382166748611, "learning_rate": 0.00023163986151437423, "loss": 1.4594, "step": 6418 }, { "epoch": 0.69, "grad_norm": 0.07312178808860083, "learning_rate": 0.0002314929729448363, "loss": 1.3003, "step": 6419 }, { "epoch": 0.69, "grad_norm": 0.1003750783018416, "learning_rate": 0.0002313461169327013, "loss": 1.3099, "step": 6420 }, { "epoch": 0.69, "grad_norm": 0.08298443226564493, "learning_rate": 0.00023119929349577555, "loss": 1.4246, "step": 6421 }, { "epoch": 0.69, "grad_norm": 0.08834680875376852, "learning_rate": 0.00023105250265186223, "loss": 1.5498, "step": 6422 }, { "epoch": 0.69, "grad_norm": 0.08112147320182733, "learning_rate": 0.00023090574441876033, "loss": 1.531, "step": 6423 }, { "epoch": 0.69, "grad_norm": 0.08887189548723178, "learning_rate": 0.00023075901881426447, "loss": 1.4392, "step": 6424 }, { "epoch": 0.69, "grad_norm": 0.07775527371255965, "learning_rate": 0.00023061232585616577, "loss": 1.2903, "step": 6425 }, { "epoch": 0.69, "grad_norm": 0.07631985491448418, "learning_rate": 0.00023046566556225145, "loss": 1.366, "step": 6426 }, { "epoch": 0.69, "grad_norm": 0.07884367654719883, "learning_rate": 0.0002303190379503043, "loss": 1.4406, "step": 6427 }, { "epoch": 0.69, "grad_norm": 0.07647140083236327, "learning_rate": 0.00023017244303810363, "loss": 1.5203, "step": 6428 }, { "epoch": 0.69, "grad_norm": 0.0755226169127557, "learning_rate": 0.0002300258808434245, "loss": 1.4164, "step": 6429 }, { "epoch": 0.69, "grad_norm": 0.07622607302032058, "learning_rate": 0.00022987935138403786, "loss": 1.4841, "step": 6430 }, { "epoch": 0.69, "grad_norm": 0.0824646838031537, "learning_rate": 0.00022973285467771116, "loss": 1.5279, "step": 6431 }, { "epoch": 0.69, "grad_norm": 0.0886875422328421, "learning_rate": 0.0002295863907422077, "loss": 1.545, "step": 6432 }, { "epoch": 0.69, "grad_norm": 0.08848464683625346, "learning_rate": 0.00022943995959528652, "loss": 1.3529, "step": 6433 }, { "epoch": 0.69, "grad_norm": 0.08132183333693678, "learning_rate": 0.00022929356125470297, "loss": 1.36, "step": 6434 }, { "epoch": 0.69, "grad_norm": 0.0879109504827401, "learning_rate": 0.0002291471957382085, "loss": 1.4193, "step": 6435 }, { "epoch": 0.69, "grad_norm": 0.07435601021309238, "learning_rate": 0.00022900086306355022, "loss": 1.4843, "step": 6436 }, { "epoch": 0.69, "grad_norm": 0.07947785083610934, "learning_rate": 0.00022885456324847153, "loss": 1.4676, "step": 6437 }, { "epoch": 0.69, "grad_norm": 0.08881617486248049, "learning_rate": 0.00022870829631071204, "loss": 1.385, "step": 6438 }, { "epoch": 0.69, "grad_norm": 0.08314651513325921, "learning_rate": 0.00022856206226800686, "loss": 1.4345, "step": 6439 }, { "epoch": 0.69, "grad_norm": 0.08039152643497237, "learning_rate": 0.00022841586113808726, "loss": 1.4642, "step": 6440 }, { "epoch": 0.69, "grad_norm": 0.08254667264623641, "learning_rate": 0.00022826969293868098, "loss": 1.3568, "step": 6441 }, { "epoch": 0.69, "grad_norm": 0.0850751659769357, "learning_rate": 0.00022812355768751102, "loss": 1.4961, "step": 6442 }, { "epoch": 0.69, "grad_norm": 0.07569920317359677, "learning_rate": 0.00022797745540229704, "loss": 1.3281, "step": 6443 }, { "epoch": 0.69, "grad_norm": 0.08527834837940627, "learning_rate": 0.00022783138610075454, "loss": 1.5263, "step": 6444 }, { "epoch": 0.69, "grad_norm": 0.09203558938704211, "learning_rate": 0.00022768534980059464, "loss": 1.3848, "step": 6445 }, { "epoch": 0.69, "grad_norm": 0.08002778832140865, "learning_rate": 0.00022753934651952484, "loss": 1.4289, "step": 6446 }, { "epoch": 0.69, "grad_norm": 0.07691283557052742, "learning_rate": 0.0002273933762752488, "loss": 1.4839, "step": 6447 }, { "epoch": 0.69, "grad_norm": 0.08555435524639886, "learning_rate": 0.00022724743908546552, "loss": 1.3288, "step": 6448 }, { "epoch": 0.69, "grad_norm": 0.07359411407844321, "learning_rate": 0.00022710153496787074, "loss": 1.3386, "step": 6449 }, { "epoch": 0.69, "grad_norm": 0.08535186262318026, "learning_rate": 0.0002269556639401555, "loss": 1.3845, "step": 6450 }, { "epoch": 0.69, "grad_norm": 0.08081806643440836, "learning_rate": 0.00022680982602000748, "loss": 1.3093, "step": 6451 }, { "epoch": 0.69, "grad_norm": 0.08042330242595397, "learning_rate": 0.00022666402122510976, "loss": 1.2788, "step": 6452 }, { "epoch": 0.69, "grad_norm": 0.0770073548322996, "learning_rate": 0.00022651824957314176, "loss": 1.3039, "step": 6453 }, { "epoch": 0.69, "grad_norm": 0.07566086212385112, "learning_rate": 0.00022637251108177902, "loss": 1.4038, "step": 6454 }, { "epoch": 0.69, "grad_norm": 0.09810806066509348, "learning_rate": 0.0002262268057686925, "loss": 1.393, "step": 6455 }, { "epoch": 0.69, "grad_norm": 0.07640142270989021, "learning_rate": 0.0002260811336515496, "loss": 1.5287, "step": 6456 }, { "epoch": 0.69, "grad_norm": 0.08288360938472895, "learning_rate": 0.00022593549474801377, "loss": 1.3465, "step": 6457 }, { "epoch": 0.69, "grad_norm": 0.08416800605991567, "learning_rate": 0.00022578988907574388, "loss": 1.375, "step": 6458 }, { "epoch": 0.69, "grad_norm": 0.08118995759959549, "learning_rate": 0.00022564431665239544, "loss": 1.4158, "step": 6459 }, { "epoch": 0.69, "grad_norm": 0.09327858003512501, "learning_rate": 0.00022549877749561943, "loss": 1.4409, "step": 6460 }, { "epoch": 0.69, "grad_norm": 0.08104347386981663, "learning_rate": 0.00022535327162306285, "loss": 1.3011, "step": 6461 }, { "epoch": 0.69, "grad_norm": 0.08079496316961529, "learning_rate": 0.00022520779905236892, "loss": 1.3472, "step": 6462 }, { "epoch": 0.69, "grad_norm": 0.09930812169108252, "learning_rate": 0.00022506235980117697, "loss": 1.3556, "step": 6463 }, { "epoch": 0.69, "grad_norm": 0.081827074933239, "learning_rate": 0.00022491695388712147, "loss": 1.4348, "step": 6464 }, { "epoch": 0.69, "grad_norm": 0.08327321510981332, "learning_rate": 0.0002247715813278337, "loss": 1.2857, "step": 6465 }, { "epoch": 0.7, "grad_norm": 0.07800325727916337, "learning_rate": 0.00022462624214094075, "loss": 1.4488, "step": 6466 }, { "epoch": 0.7, "grad_norm": 0.09429196943285079, "learning_rate": 0.00022448093634406507, "loss": 1.483, "step": 6467 }, { "epoch": 0.7, "grad_norm": 0.09258873648328031, "learning_rate": 0.00022433566395482573, "loss": 1.3856, "step": 6468 }, { "epoch": 0.7, "grad_norm": 0.08195780122541721, "learning_rate": 0.0002241904249908377, "loss": 1.288, "step": 6469 }, { "epoch": 0.7, "grad_norm": 0.07988131648227399, "learning_rate": 0.0002240452194697115, "loss": 1.4128, "step": 6470 }, { "epoch": 0.7, "grad_norm": 0.0894159323508057, "learning_rate": 0.00022390004740905362, "loss": 1.5256, "step": 6471 }, { "epoch": 0.7, "grad_norm": 0.08880130190892369, "learning_rate": 0.000223754908826467, "loss": 1.2916, "step": 6472 }, { "epoch": 0.7, "grad_norm": 0.09106245882443116, "learning_rate": 0.00022360980373954987, "loss": 1.4184, "step": 6473 }, { "epoch": 0.7, "grad_norm": 0.08358404758333761, "learning_rate": 0.0002234647321658969, "loss": 1.4161, "step": 6474 }, { "epoch": 0.7, "grad_norm": 0.08990776461075271, "learning_rate": 0.00022331969412309877, "loss": 1.3321, "step": 6475 }, { "epoch": 0.7, "grad_norm": 0.08028040754728914, "learning_rate": 0.00022317468962874132, "loss": 1.3376, "step": 6476 }, { "epoch": 0.7, "grad_norm": 0.09033607654277113, "learning_rate": 0.00022302971870040718, "loss": 1.4686, "step": 6477 }, { "epoch": 0.7, "grad_norm": 0.09633659162771828, "learning_rate": 0.00022288478135567465, "loss": 1.4402, "step": 6478 }, { "epoch": 0.7, "grad_norm": 0.07054694812027362, "learning_rate": 0.00022273987761211756, "loss": 1.4753, "step": 6479 }, { "epoch": 0.7, "grad_norm": 0.07971226310945649, "learning_rate": 0.00022259500748730637, "loss": 1.4936, "step": 6480 }, { "epoch": 0.7, "grad_norm": 0.08270479882148428, "learning_rate": 0.00022245017099880665, "loss": 1.3579, "step": 6481 }, { "epoch": 0.7, "grad_norm": 0.0943209601689459, "learning_rate": 0.0002223053681641808, "loss": 1.4191, "step": 6482 }, { "epoch": 0.7, "grad_norm": 0.08834209518549241, "learning_rate": 0.00022216059900098624, "loss": 1.445, "step": 6483 }, { "epoch": 0.7, "grad_norm": 0.08388454303429033, "learning_rate": 0.00022201586352677688, "loss": 1.4445, "step": 6484 }, { "epoch": 0.7, "grad_norm": 0.07909092713902374, "learning_rate": 0.0002218711617591026, "loss": 1.3958, "step": 6485 }, { "epoch": 0.7, "grad_norm": 0.06978073299876707, "learning_rate": 0.00022172649371550863, "loss": 1.4058, "step": 6486 }, { "epoch": 0.7, "grad_norm": 0.08679179635542227, "learning_rate": 0.0002215818594135367, "loss": 1.3989, "step": 6487 }, { "epoch": 0.7, "grad_norm": 0.07993985077411585, "learning_rate": 0.0002214372588707243, "loss": 1.4374, "step": 6488 }, { "epoch": 0.7, "grad_norm": 0.07427798279179676, "learning_rate": 0.00022129269210460444, "loss": 1.4993, "step": 6489 }, { "epoch": 0.7, "grad_norm": 0.08155449054888644, "learning_rate": 0.00022114815913270653, "loss": 1.4904, "step": 6490 }, { "epoch": 0.7, "grad_norm": 0.07970364164040267, "learning_rate": 0.00022100365997255601, "loss": 1.2852, "step": 6491 }, { "epoch": 0.7, "grad_norm": 0.08682486023092911, "learning_rate": 0.00022085919464167326, "loss": 1.6039, "step": 6492 }, { "epoch": 0.7, "grad_norm": 0.07331684440190417, "learning_rate": 0.00022071476315757544, "loss": 1.3076, "step": 6493 }, { "epoch": 0.7, "grad_norm": 0.0757239710866081, "learning_rate": 0.00022057036553777565, "loss": 1.3685, "step": 6494 }, { "epoch": 0.7, "grad_norm": 0.07585935488051394, "learning_rate": 0.00022042600179978216, "loss": 1.4274, "step": 6495 }, { "epoch": 0.7, "grad_norm": 0.08444732219970214, "learning_rate": 0.00022028167196109981, "loss": 1.4211, "step": 6496 }, { "epoch": 0.7, "grad_norm": 0.07518887569695178, "learning_rate": 0.0002201373760392293, "loss": 1.3419, "step": 6497 }, { "epoch": 0.7, "grad_norm": 0.07301302504211518, "learning_rate": 0.0002199931140516665, "loss": 1.5196, "step": 6498 }, { "epoch": 0.7, "grad_norm": 0.07598122555463341, "learning_rate": 0.00021984888601590404, "loss": 1.4913, "step": 6499 }, { "epoch": 0.7, "grad_norm": 0.07442679390168067, "learning_rate": 0.0002197046919494301, "loss": 1.4397, "step": 6500 }, { "epoch": 0.7, "grad_norm": 0.10135433982914047, "learning_rate": 0.0002195605318697284, "loss": 1.3109, "step": 6501 }, { "epoch": 0.7, "grad_norm": 0.07208256394451946, "learning_rate": 0.00021941640579427928, "loss": 1.4048, "step": 6502 }, { "epoch": 0.7, "grad_norm": 0.0787725168679611, "learning_rate": 0.00021927231374055823, "loss": 1.4708, "step": 6503 }, { "epoch": 0.7, "grad_norm": 0.08301111523284674, "learning_rate": 0.00021912825572603678, "loss": 1.2329, "step": 6504 }, { "epoch": 0.7, "grad_norm": 0.08558231206577362, "learning_rate": 0.00021898423176818266, "loss": 1.5087, "step": 6505 }, { "epoch": 0.7, "grad_norm": 0.08352462729960797, "learning_rate": 0.0002188402418844594, "loss": 1.3946, "step": 6506 }, { "epoch": 0.7, "grad_norm": 0.07363711061252484, "learning_rate": 0.00021869628609232596, "loss": 1.3682, "step": 6507 }, { "epoch": 0.7, "grad_norm": 0.0805971289346462, "learning_rate": 0.0002185523644092376, "loss": 1.3311, "step": 6508 }, { "epoch": 0.7, "grad_norm": 0.07032069350557803, "learning_rate": 0.00021840847685264555, "loss": 1.3778, "step": 6509 }, { "epoch": 0.7, "grad_norm": 0.0744760329733795, "learning_rate": 0.00021826462343999627, "loss": 1.4149, "step": 6510 }, { "epoch": 0.7, "grad_norm": 0.09067490689524893, "learning_rate": 0.0002181208041887327, "loss": 1.442, "step": 6511 }, { "epoch": 0.7, "grad_norm": 0.0837501444944338, "learning_rate": 0.0002179770191162936, "loss": 1.4889, "step": 6512 }, { "epoch": 0.7, "grad_norm": 0.08007588034484431, "learning_rate": 0.00021783326824011324, "loss": 1.4214, "step": 6513 }, { "epoch": 0.7, "grad_norm": 0.0799023651814438, "learning_rate": 0.00021768955157762165, "loss": 1.4904, "step": 6514 }, { "epoch": 0.7, "grad_norm": 0.09065512534636933, "learning_rate": 0.00021754586914624524, "loss": 1.4548, "step": 6515 }, { "epoch": 0.7, "grad_norm": 0.08477293649906326, "learning_rate": 0.0002174022209634061, "loss": 1.4235, "step": 6516 }, { "epoch": 0.7, "grad_norm": 0.0779585839119935, "learning_rate": 0.0002172586070465218, "loss": 1.3522, "step": 6517 }, { "epoch": 0.7, "grad_norm": 0.07615724676838401, "learning_rate": 0.0002171150274130061, "loss": 1.4947, "step": 6518 }, { "epoch": 0.7, "grad_norm": 0.07558094107947531, "learning_rate": 0.0002169714820802688, "loss": 1.5059, "step": 6519 }, { "epoch": 0.7, "grad_norm": 0.09027457729867946, "learning_rate": 0.0002168279710657149, "loss": 1.4962, "step": 6520 }, { "epoch": 0.7, "grad_norm": 0.07694453291924619, "learning_rate": 0.0002166844943867457, "loss": 1.5161, "step": 6521 }, { "epoch": 0.7, "grad_norm": 0.08320596409149107, "learning_rate": 0.00021654105206075848, "loss": 1.3651, "step": 6522 }, { "epoch": 0.7, "grad_norm": 0.09290290757932727, "learning_rate": 0.0002163976441051459, "loss": 1.3206, "step": 6523 }, { "epoch": 0.7, "grad_norm": 0.07738617365116893, "learning_rate": 0.00021625427053729656, "loss": 1.3863, "step": 6524 }, { "epoch": 0.7, "grad_norm": 0.08379385652143403, "learning_rate": 0.0002161109313745953, "loss": 1.3935, "step": 6525 }, { "epoch": 0.7, "grad_norm": 0.08494754800851972, "learning_rate": 0.00021596762663442215, "loss": 1.4045, "step": 6526 }, { "epoch": 0.7, "grad_norm": 0.08370244101996513, "learning_rate": 0.0002158243563341535, "loss": 1.3563, "step": 6527 }, { "epoch": 0.7, "grad_norm": 0.08251734877414564, "learning_rate": 0.00021568112049116152, "loss": 1.1581, "step": 6528 }, { "epoch": 0.7, "grad_norm": 0.07399029848646638, "learning_rate": 0.00021553791912281369, "loss": 1.2488, "step": 6529 }, { "epoch": 0.7, "grad_norm": 0.08615102440250381, "learning_rate": 0.00021539475224647382, "loss": 1.3296, "step": 6530 }, { "epoch": 0.7, "grad_norm": 0.08760288742801585, "learning_rate": 0.00021525161987950164, "loss": 1.4621, "step": 6531 }, { "epoch": 0.7, "grad_norm": 0.08548065697127558, "learning_rate": 0.00021510852203925206, "loss": 1.4295, "step": 6532 }, { "epoch": 0.7, "grad_norm": 0.08830861858417012, "learning_rate": 0.0002149654587430765, "loss": 1.3621, "step": 6533 }, { "epoch": 0.7, "grad_norm": 0.07901395402583786, "learning_rate": 0.00021482243000832158, "loss": 1.3396, "step": 6534 }, { "epoch": 0.7, "grad_norm": 0.07521402912545687, "learning_rate": 0.00021467943585233036, "loss": 1.4412, "step": 6535 }, { "epoch": 0.7, "grad_norm": 0.0757548604259532, "learning_rate": 0.000214536476292441, "loss": 1.35, "step": 6536 }, { "epoch": 0.7, "grad_norm": 0.08152821401632664, "learning_rate": 0.0002143935513459882, "loss": 1.4062, "step": 6537 }, { "epoch": 0.7, "grad_norm": 0.09936944029433495, "learning_rate": 0.0002142506610303017, "loss": 1.4571, "step": 6538 }, { "epoch": 0.7, "grad_norm": 0.07181111096163625, "learning_rate": 0.00021410780536270779, "loss": 1.3778, "step": 6539 }, { "epoch": 0.7, "grad_norm": 0.07568757215933702, "learning_rate": 0.00021396498436052826, "loss": 1.3805, "step": 6540 }, { "epoch": 0.7, "grad_norm": 0.08252191822278969, "learning_rate": 0.0002138221980410802, "loss": 1.4755, "step": 6541 }, { "epoch": 0.7, "grad_norm": 0.0965810583333691, "learning_rate": 0.00021367944642167736, "loss": 1.4406, "step": 6542 }, { "epoch": 0.7, "grad_norm": 0.08218602214053825, "learning_rate": 0.00021353672951962888, "loss": 1.3727, "step": 6543 }, { "epoch": 0.7, "grad_norm": 0.07903607477438594, "learning_rate": 0.0002133940473522395, "loss": 1.493, "step": 6544 }, { "epoch": 0.7, "grad_norm": 0.09434836139967438, "learning_rate": 0.00021325139993680982, "loss": 1.489, "step": 6545 }, { "epoch": 0.7, "grad_norm": 0.09415498817850494, "learning_rate": 0.00021310878729063643, "loss": 1.4413, "step": 6546 }, { "epoch": 0.7, "grad_norm": 0.0837639102330669, "learning_rate": 0.00021296620943101185, "loss": 1.4946, "step": 6547 }, { "epoch": 0.7, "grad_norm": 0.08977086282659386, "learning_rate": 0.00021282366637522378, "loss": 1.3019, "step": 6548 }, { "epoch": 0.7, "grad_norm": 0.0778087682911158, "learning_rate": 0.00021268115814055617, "loss": 1.333, "step": 6549 }, { "epoch": 0.7, "grad_norm": 0.0750368541827475, "learning_rate": 0.0002125386847442889, "loss": 1.5137, "step": 6550 }, { "epoch": 0.7, "grad_norm": 0.07990480483989432, "learning_rate": 0.00021239624620369692, "loss": 1.4183, "step": 6551 }, { "epoch": 0.7, "grad_norm": 0.08691069242823043, "learning_rate": 0.00021225384253605156, "loss": 1.3846, "step": 6552 }, { "epoch": 0.7, "grad_norm": 0.08922151650826089, "learning_rate": 0.00021211147375862005, "loss": 1.5065, "step": 6553 }, { "epoch": 0.7, "grad_norm": 0.08532353831500455, "learning_rate": 0.00021196913988866467, "loss": 1.2723, "step": 6554 }, { "epoch": 0.7, "grad_norm": 0.08102466561548363, "learning_rate": 0.00021182684094344422, "loss": 1.3856, "step": 6555 }, { "epoch": 0.7, "grad_norm": 0.07457371890788876, "learning_rate": 0.00021168457694021282, "loss": 1.3585, "step": 6556 }, { "epoch": 0.7, "grad_norm": 0.07968114957329238, "learning_rate": 0.00021154234789622024, "loss": 1.4162, "step": 6557 }, { "epoch": 0.7, "grad_norm": 0.08118810789452972, "learning_rate": 0.00021140015382871248, "loss": 1.4492, "step": 6558 }, { "epoch": 0.71, "grad_norm": 0.0803685591148541, "learning_rate": 0.00021125799475493114, "loss": 1.3808, "step": 6559 }, { "epoch": 0.71, "grad_norm": 0.08168756775809427, "learning_rate": 0.00021111587069211324, "loss": 1.306, "step": 6560 }, { "epoch": 0.71, "grad_norm": 0.08662437531664363, "learning_rate": 0.00021097378165749192, "loss": 1.3639, "step": 6561 }, { "epoch": 0.71, "grad_norm": 0.07911911522419161, "learning_rate": 0.00021083172766829623, "loss": 1.4021, "step": 6562 }, { "epoch": 0.71, "grad_norm": 0.08142696269384453, "learning_rate": 0.00021068970874175025, "loss": 1.3587, "step": 6563 }, { "epoch": 0.71, "grad_norm": 0.08014034479548356, "learning_rate": 0.00021054772489507452, "loss": 1.3174, "step": 6564 }, { "epoch": 0.71, "grad_norm": 0.08382442401757927, "learning_rate": 0.00021040577614548522, "loss": 1.4604, "step": 6565 }, { "epoch": 0.71, "grad_norm": 0.07901248508081173, "learning_rate": 0.0002102638625101939, "loss": 1.4022, "step": 6566 }, { "epoch": 0.71, "grad_norm": 0.08222010138581641, "learning_rate": 0.00021012198400640803, "loss": 1.4083, "step": 6567 }, { "epoch": 0.71, "grad_norm": 0.08063227244132427, "learning_rate": 0.0002099801406513311, "loss": 1.3365, "step": 6568 }, { "epoch": 0.71, "grad_norm": 0.08005206480715796, "learning_rate": 0.00020983833246216178, "loss": 1.421, "step": 6569 }, { "epoch": 0.71, "grad_norm": 0.0734211309304561, "learning_rate": 0.00020969655945609494, "loss": 1.299, "step": 6570 }, { "epoch": 0.71, "grad_norm": 0.07864468033355397, "learning_rate": 0.00020955482165032136, "loss": 1.4178, "step": 6571 }, { "epoch": 0.71, "grad_norm": 0.07951495019839461, "learning_rate": 0.0002094131190620267, "loss": 1.2936, "step": 6572 }, { "epoch": 0.71, "grad_norm": 0.0797927878475767, "learning_rate": 0.00020927145170839323, "loss": 1.4509, "step": 6573 }, { "epoch": 0.71, "grad_norm": 0.07822452330414903, "learning_rate": 0.00020912981960659872, "loss": 1.4728, "step": 6574 }, { "epoch": 0.71, "grad_norm": 0.09082590234638262, "learning_rate": 0.00020898822277381612, "loss": 1.57, "step": 6575 }, { "epoch": 0.71, "grad_norm": 0.07487868864298572, "learning_rate": 0.00020884666122721502, "loss": 1.4102, "step": 6576 }, { "epoch": 0.71, "grad_norm": 0.08487261642892924, "learning_rate": 0.00020870513498395978, "loss": 1.3731, "step": 6577 }, { "epoch": 0.71, "grad_norm": 0.07665208224280295, "learning_rate": 0.00020856364406121136, "loss": 1.4139, "step": 6578 }, { "epoch": 0.71, "grad_norm": 0.0736129009710678, "learning_rate": 0.0002084221884761257, "loss": 1.4054, "step": 6579 }, { "epoch": 0.71, "grad_norm": 0.08456473210977278, "learning_rate": 0.00020828076824585485, "loss": 1.2447, "step": 6580 }, { "epoch": 0.71, "grad_norm": 0.07277321479264692, "learning_rate": 0.0002081393833875468, "loss": 1.5035, "step": 6581 }, { "epoch": 0.71, "grad_norm": 0.08135520280681077, "learning_rate": 0.00020799803391834443, "loss": 1.3139, "step": 6582 }, { "epoch": 0.71, "grad_norm": 0.08097736134436409, "learning_rate": 0.00020785671985538724, "loss": 1.3363, "step": 6583 }, { "epoch": 0.71, "grad_norm": 0.08425630719100732, "learning_rate": 0.00020771544121581003, "loss": 1.4049, "step": 6584 }, { "epoch": 0.71, "grad_norm": 0.08781705002009797, "learning_rate": 0.0002075741980167431, "loss": 1.4498, "step": 6585 }, { "epoch": 0.71, "grad_norm": 0.07521461031735634, "learning_rate": 0.000207432990275313, "loss": 1.4064, "step": 6586 }, { "epoch": 0.71, "grad_norm": 0.08087613506815886, "learning_rate": 0.00020729181800864145, "loss": 1.4177, "step": 6587 }, { "epoch": 0.71, "grad_norm": 0.08082117128976071, "learning_rate": 0.00020715068123384588, "loss": 1.4106, "step": 6588 }, { "epoch": 0.71, "grad_norm": 0.0746232658644306, "learning_rate": 0.00020700957996803982, "loss": 1.4639, "step": 6589 }, { "epoch": 0.71, "grad_norm": 0.07719550175977072, "learning_rate": 0.00020686851422833247, "loss": 1.4654, "step": 6590 }, { "epoch": 0.71, "grad_norm": 0.08352707823350032, "learning_rate": 0.00020672748403182818, "loss": 1.4165, "step": 6591 }, { "epoch": 0.71, "grad_norm": 0.08250365019800493, "learning_rate": 0.00020658648939562752, "loss": 1.4926, "step": 6592 }, { "epoch": 0.71, "grad_norm": 0.08343810025009954, "learning_rate": 0.00020644553033682673, "loss": 1.4649, "step": 6593 }, { "epoch": 0.71, "grad_norm": 0.08558698583672085, "learning_rate": 0.00020630460687251723, "loss": 1.4754, "step": 6594 }, { "epoch": 0.71, "grad_norm": 0.0862044285372647, "learning_rate": 0.0002061637190197867, "loss": 1.4983, "step": 6595 }, { "epoch": 0.71, "grad_norm": 0.08385121801130181, "learning_rate": 0.0002060228667957184, "loss": 1.4094, "step": 6596 }, { "epoch": 0.71, "grad_norm": 0.08458614443447766, "learning_rate": 0.000205882050217391, "loss": 1.4314, "step": 6597 }, { "epoch": 0.71, "grad_norm": 0.0940062094037127, "learning_rate": 0.00020574126930187882, "loss": 1.3113, "step": 6598 }, { "epoch": 0.71, "grad_norm": 0.08044626858810763, "learning_rate": 0.00020560052406625235, "loss": 1.2947, "step": 6599 }, { "epoch": 0.71, "grad_norm": 0.10262807774306441, "learning_rate": 0.00020545981452757718, "loss": 1.4088, "step": 6600 }, { "epoch": 0.71, "grad_norm": 0.08041627362609009, "learning_rate": 0.00020531914070291485, "loss": 1.3947, "step": 6601 }, { "epoch": 0.71, "grad_norm": 0.08017300590515987, "learning_rate": 0.00020517850260932285, "loss": 1.3487, "step": 6602 }, { "epoch": 0.71, "grad_norm": 0.09120867994933178, "learning_rate": 0.00020503790026385365, "loss": 1.3817, "step": 6603 }, { "epoch": 0.71, "grad_norm": 0.08010760497374898, "learning_rate": 0.00020489733368355588, "loss": 1.4757, "step": 6604 }, { "epoch": 0.71, "grad_norm": 0.07122969441559135, "learning_rate": 0.00020475680288547398, "loss": 1.2778, "step": 6605 }, { "epoch": 0.71, "grad_norm": 0.08403794664807253, "learning_rate": 0.00020461630788664743, "loss": 1.5009, "step": 6606 }, { "epoch": 0.71, "grad_norm": 0.08299400551352507, "learning_rate": 0.0002044758487041119, "loss": 1.3146, "step": 6607 }, { "epoch": 0.71, "grad_norm": 0.07976714523904847, "learning_rate": 0.0002043354253548987, "loss": 1.4699, "step": 6608 }, { "epoch": 0.71, "grad_norm": 0.08026151620792248, "learning_rate": 0.00020419503785603445, "loss": 1.3158, "step": 6609 }, { "epoch": 0.71, "grad_norm": 0.08797413778999093, "learning_rate": 0.00020405468622454155, "loss": 1.3516, "step": 6610 }, { "epoch": 0.71, "grad_norm": 0.07741596819135142, "learning_rate": 0.00020391437047743817, "loss": 1.4267, "step": 6611 }, { "epoch": 0.71, "grad_norm": 0.0867998747324431, "learning_rate": 0.0002037740906317383, "loss": 1.3826, "step": 6612 }, { "epoch": 0.71, "grad_norm": 0.09645908165108294, "learning_rate": 0.000203633846704451, "loss": 1.4439, "step": 6613 }, { "epoch": 0.71, "grad_norm": 0.07676929477174242, "learning_rate": 0.0002034936387125816, "loss": 1.3473, "step": 6614 }, { "epoch": 0.71, "grad_norm": 0.08455784449172907, "learning_rate": 0.0002033534666731308, "loss": 1.2772, "step": 6615 }, { "epoch": 0.71, "grad_norm": 0.08107012147918617, "learning_rate": 0.00020321333060309478, "loss": 1.286, "step": 6616 }, { "epoch": 0.71, "grad_norm": 0.08464678556549356, "learning_rate": 0.00020307323051946553, "loss": 1.4273, "step": 6617 }, { "epoch": 0.71, "grad_norm": 0.08124259630730062, "learning_rate": 0.0002029331664392311, "loss": 1.2747, "step": 6618 }, { "epoch": 0.71, "grad_norm": 0.09432309785978557, "learning_rate": 0.0002027931383793741, "loss": 1.4427, "step": 6619 }, { "epoch": 0.71, "grad_norm": 0.07536259226364693, "learning_rate": 0.0002026531463568736, "loss": 1.3773, "step": 6620 }, { "epoch": 0.71, "grad_norm": 0.10086324922011485, "learning_rate": 0.00020251319038870448, "loss": 1.5472, "step": 6621 }, { "epoch": 0.71, "grad_norm": 0.09653867084761454, "learning_rate": 0.0002023732704918364, "loss": 1.5599, "step": 6622 }, { "epoch": 0.71, "grad_norm": 0.084699031110335, "learning_rate": 0.00020223338668323533, "loss": 1.4765, "step": 6623 }, { "epoch": 0.71, "grad_norm": 0.07554415347563273, "learning_rate": 0.0002020935389798629, "loss": 1.4258, "step": 6624 }, { "epoch": 0.71, "grad_norm": 0.07818526536382409, "learning_rate": 0.00020195372739867569, "loss": 1.3709, "step": 6625 }, { "epoch": 0.71, "grad_norm": 0.08850803610170471, "learning_rate": 0.00020181395195662655, "loss": 1.5076, "step": 6626 }, { "epoch": 0.71, "grad_norm": 0.0876713653675245, "learning_rate": 0.00020167421267066392, "loss": 1.3318, "step": 6627 }, { "epoch": 0.71, "grad_norm": 0.08688217151281227, "learning_rate": 0.0002015345095577314, "loss": 1.4213, "step": 6628 }, { "epoch": 0.71, "grad_norm": 0.08836116928923096, "learning_rate": 0.00020139484263476866, "loss": 1.2419, "step": 6629 }, { "epoch": 0.71, "grad_norm": 0.0895517491670006, "learning_rate": 0.00020125521191871054, "loss": 1.433, "step": 6630 }, { "epoch": 0.71, "grad_norm": 0.08227120369012335, "learning_rate": 0.00020111561742648803, "loss": 1.4532, "step": 6631 }, { "epoch": 0.71, "grad_norm": 0.08709107947772152, "learning_rate": 0.0002009760591750272, "loss": 1.4795, "step": 6632 }, { "epoch": 0.71, "grad_norm": 0.08139579162806306, "learning_rate": 0.00020083653718125027, "loss": 1.4581, "step": 6633 }, { "epoch": 0.71, "grad_norm": 0.08475824957944199, "learning_rate": 0.0002006970514620744, "loss": 1.4174, "step": 6634 }, { "epoch": 0.71, "grad_norm": 0.08621342413318868, "learning_rate": 0.00020055760203441288, "loss": 1.4104, "step": 6635 }, { "epoch": 0.71, "grad_norm": 0.08288494862554391, "learning_rate": 0.0002004181889151746, "loss": 1.3434, "step": 6636 }, { "epoch": 0.71, "grad_norm": 0.08305143444288314, "learning_rate": 0.0002002788121212636, "loss": 1.3572, "step": 6637 }, { "epoch": 0.71, "grad_norm": 0.07601786290155091, "learning_rate": 0.00020013947166957992, "loss": 1.3279, "step": 6638 }, { "epoch": 0.71, "grad_norm": 0.07653362087750513, "learning_rate": 0.00020000016757701922, "loss": 1.4184, "step": 6639 }, { "epoch": 0.71, "grad_norm": 0.07932924099857178, "learning_rate": 0.00019986089986047246, "loss": 1.4887, "step": 6640 }, { "epoch": 0.71, "grad_norm": 0.07901853454265233, "learning_rate": 0.00019972166853682617, "loss": 1.4399, "step": 6641 }, { "epoch": 0.71, "grad_norm": 0.06553771047838028, "learning_rate": 0.00019958247362296278, "loss": 1.3047, "step": 6642 }, { "epoch": 0.71, "grad_norm": 0.07917463053808547, "learning_rate": 0.00019944331513576036, "loss": 1.5311, "step": 6643 }, { "epoch": 0.71, "grad_norm": 0.0745452152182659, "learning_rate": 0.00019930419309209196, "loss": 1.4733, "step": 6644 }, { "epoch": 0.71, "grad_norm": 0.07259087329080621, "learning_rate": 0.00019916510750882683, "loss": 1.3976, "step": 6645 }, { "epoch": 0.71, "grad_norm": 0.08174348493303095, "learning_rate": 0.00019902605840282968, "loss": 1.3222, "step": 6646 }, { "epoch": 0.71, "grad_norm": 0.07719419124593137, "learning_rate": 0.0001988870457909604, "loss": 1.4798, "step": 6647 }, { "epoch": 0.71, "grad_norm": 0.09086005817838735, "learning_rate": 0.00019874806969007492, "loss": 1.4071, "step": 6648 }, { "epoch": 0.71, "grad_norm": 0.07637012035806491, "learning_rate": 0.00019860913011702475, "loss": 1.4735, "step": 6649 }, { "epoch": 0.71, "grad_norm": 0.08008763937136829, "learning_rate": 0.0001984702270886566, "loss": 1.4013, "step": 6650 }, { "epoch": 0.71, "grad_norm": 0.07633349271039644, "learning_rate": 0.0001983313606218128, "loss": 1.4385, "step": 6651 }, { "epoch": 0.72, "grad_norm": 0.0922809276923969, "learning_rate": 0.00019819253073333166, "loss": 1.5445, "step": 6652 }, { "epoch": 0.72, "grad_norm": 0.08152360957588603, "learning_rate": 0.0001980537374400465, "loss": 1.3338, "step": 6653 }, { "epoch": 0.72, "grad_norm": 0.0858820904568059, "learning_rate": 0.00019791498075878662, "loss": 1.2776, "step": 6654 }, { "epoch": 0.72, "grad_norm": 0.07880393375507283, "learning_rate": 0.000197776260706377, "loss": 1.3785, "step": 6655 }, { "epoch": 0.72, "grad_norm": 0.07789571614399886, "learning_rate": 0.00019763757729963745, "loss": 1.2272, "step": 6656 }, { "epoch": 0.72, "grad_norm": 0.08571963301433981, "learning_rate": 0.0001974989305553841, "loss": 1.3977, "step": 6657 }, { "epoch": 0.72, "grad_norm": 0.08812834195849592, "learning_rate": 0.00019736032049042846, "loss": 1.3836, "step": 6658 }, { "epoch": 0.72, "grad_norm": 0.08479265010733626, "learning_rate": 0.00019722174712157714, "loss": 1.4593, "step": 6659 }, { "epoch": 0.72, "grad_norm": 0.08609566303687616, "learning_rate": 0.00019708321046563298, "loss": 1.2265, "step": 6660 }, { "epoch": 0.72, "grad_norm": 0.0765427963656903, "learning_rate": 0.0001969447105393937, "loss": 1.3816, "step": 6661 }, { "epoch": 0.72, "grad_norm": 0.07992363192285609, "learning_rate": 0.00019680624735965324, "loss": 1.3869, "step": 6662 }, { "epoch": 0.72, "grad_norm": 0.08216831699566847, "learning_rate": 0.00019666782094320042, "loss": 1.3774, "step": 6663 }, { "epoch": 0.72, "grad_norm": 0.08439452976795346, "learning_rate": 0.00019652943130682015, "loss": 1.385, "step": 6664 }, { "epoch": 0.72, "grad_norm": 0.08737819487197597, "learning_rate": 0.00019639107846729242, "loss": 1.5652, "step": 6665 }, { "epoch": 0.72, "grad_norm": 0.08022829112347864, "learning_rate": 0.00019625276244139317, "loss": 1.3677, "step": 6666 }, { "epoch": 0.72, "grad_norm": 0.09083698232520176, "learning_rate": 0.00019611448324589377, "loss": 1.3274, "step": 6667 }, { "epoch": 0.72, "grad_norm": 0.09574330590908853, "learning_rate": 0.00019597624089756076, "loss": 1.3743, "step": 6668 }, { "epoch": 0.72, "grad_norm": 0.07621982941746845, "learning_rate": 0.0001958380354131567, "loss": 1.4548, "step": 6669 }, { "epoch": 0.72, "grad_norm": 0.08335567959546465, "learning_rate": 0.00019569986680943957, "loss": 1.381, "step": 6670 }, { "epoch": 0.72, "grad_norm": 0.08064847869524633, "learning_rate": 0.00019556173510316271, "loss": 1.4698, "step": 6671 }, { "epoch": 0.72, "grad_norm": 0.08671777975446042, "learning_rate": 0.00019542364031107485, "loss": 1.3249, "step": 6672 }, { "epoch": 0.72, "grad_norm": 0.0753294442038951, "learning_rate": 0.0001952855824499206, "loss": 1.3026, "step": 6673 }, { "epoch": 0.72, "grad_norm": 0.0810641059229283, "learning_rate": 0.00019514756153644027, "loss": 1.4282, "step": 6674 }, { "epoch": 0.72, "grad_norm": 0.08289829139487187, "learning_rate": 0.0001950095775873688, "loss": 1.4603, "step": 6675 }, { "epoch": 0.72, "grad_norm": 0.09110856841308793, "learning_rate": 0.00019487163061943758, "loss": 1.3966, "step": 6676 }, { "epoch": 0.72, "grad_norm": 0.08034250256655166, "learning_rate": 0.00019473372064937323, "loss": 1.4295, "step": 6677 }, { "epoch": 0.72, "grad_norm": 0.08049880862150381, "learning_rate": 0.0001945958476938975, "loss": 1.426, "step": 6678 }, { "epoch": 0.72, "grad_norm": 0.08222994978626211, "learning_rate": 0.0001944580117697281, "loss": 1.3235, "step": 6679 }, { "epoch": 0.72, "grad_norm": 0.09559967814473865, "learning_rate": 0.00019432021289357833, "loss": 1.4478, "step": 6680 }, { "epoch": 0.72, "grad_norm": 0.08036374699327789, "learning_rate": 0.00019418245108215637, "loss": 1.422, "step": 6681 }, { "epoch": 0.72, "grad_norm": 0.08060853174720173, "learning_rate": 0.00019404472635216674, "loss": 1.4803, "step": 6682 }, { "epoch": 0.72, "grad_norm": 0.08451954416817149, "learning_rate": 0.00019390703872030886, "loss": 1.3489, "step": 6683 }, { "epoch": 0.72, "grad_norm": 0.0786560087602854, "learning_rate": 0.0001937693882032776, "loss": 1.4111, "step": 6684 }, { "epoch": 0.72, "grad_norm": 0.09707523238495702, "learning_rate": 0.00019363177481776373, "loss": 1.5404, "step": 6685 }, { "epoch": 0.72, "grad_norm": 0.08754859574303579, "learning_rate": 0.0001934941985804536, "loss": 1.3307, "step": 6686 }, { "epoch": 0.72, "grad_norm": 0.07550925047472697, "learning_rate": 0.0001933566595080284, "loss": 1.3903, "step": 6687 }, { "epoch": 0.72, "grad_norm": 0.09006506803931079, "learning_rate": 0.00019321915761716534, "loss": 1.5876, "step": 6688 }, { "epoch": 0.72, "grad_norm": 0.08481310995440604, "learning_rate": 0.00019308169292453725, "loss": 1.4452, "step": 6689 }, { "epoch": 0.72, "grad_norm": 0.08923095224891851, "learning_rate": 0.00019294426544681182, "loss": 1.384, "step": 6690 }, { "epoch": 0.72, "grad_norm": 0.07069536368701973, "learning_rate": 0.00019280687520065282, "loss": 1.3548, "step": 6691 }, { "epoch": 0.72, "grad_norm": 0.07602239730273473, "learning_rate": 0.00019266952220271937, "loss": 1.3895, "step": 6692 }, { "epoch": 0.72, "grad_norm": 0.07398666072791947, "learning_rate": 0.00019253220646966597, "loss": 1.2796, "step": 6693 }, { "epoch": 0.72, "grad_norm": 0.08773848956447536, "learning_rate": 0.0001923949280181423, "loss": 1.4594, "step": 6694 }, { "epoch": 0.72, "grad_norm": 0.07650409822997449, "learning_rate": 0.00019225768686479428, "loss": 1.2462, "step": 6695 }, { "epoch": 0.72, "grad_norm": 0.07586459619579579, "learning_rate": 0.0001921204830262625, "loss": 1.4254, "step": 6696 }, { "epoch": 0.72, "grad_norm": 0.08147345080849903, "learning_rate": 0.0001919833165191836, "loss": 1.4551, "step": 6697 }, { "epoch": 0.72, "grad_norm": 0.07772700515033043, "learning_rate": 0.0001918461873601896, "loss": 1.2775, "step": 6698 }, { "epoch": 0.72, "grad_norm": 0.07878468764229728, "learning_rate": 0.0001917090955659076, "loss": 1.4245, "step": 6699 }, { "epoch": 0.72, "grad_norm": 0.0856986791366906, "learning_rate": 0.0001915720411529606, "loss": 1.4059, "step": 6700 }, { "epoch": 0.72, "grad_norm": 0.08232357102950424, "learning_rate": 0.0001914350241379671, "loss": 1.3602, "step": 6701 }, { "epoch": 0.72, "grad_norm": 0.08037236283367581, "learning_rate": 0.00019129804453754052, "loss": 1.3657, "step": 6702 }, { "epoch": 0.72, "grad_norm": 0.08306951753793287, "learning_rate": 0.0001911611023682905, "loss": 1.2233, "step": 6703 }, { "epoch": 0.72, "grad_norm": 0.0731465844221435, "learning_rate": 0.00019102419764682133, "loss": 1.4167, "step": 6704 }, { "epoch": 0.72, "grad_norm": 0.07859228132072026, "learning_rate": 0.0001908873303897336, "loss": 1.3531, "step": 6705 }, { "epoch": 0.72, "grad_norm": 0.07670616178704094, "learning_rate": 0.00019075050061362252, "loss": 1.4396, "step": 6706 }, { "epoch": 0.72, "grad_norm": 0.07452515984590537, "learning_rate": 0.00019061370833507946, "loss": 1.4069, "step": 6707 }, { "epoch": 0.72, "grad_norm": 0.09091438656192163, "learning_rate": 0.000190476953570691, "loss": 1.357, "step": 6708 }, { "epoch": 0.72, "grad_norm": 0.0885594352489358, "learning_rate": 0.00019034023633703883, "loss": 1.4248, "step": 6709 }, { "epoch": 0.72, "grad_norm": 0.07470022744007065, "learning_rate": 0.0001902035566507006, "loss": 1.4364, "step": 6710 }, { "epoch": 0.72, "grad_norm": 0.07785651744361902, "learning_rate": 0.00019006691452824932, "loss": 1.4123, "step": 6711 }, { "epoch": 0.72, "grad_norm": 0.08260984881365113, "learning_rate": 0.00018993030998625294, "loss": 1.4369, "step": 6712 }, { "epoch": 0.72, "grad_norm": 0.08785121882037714, "learning_rate": 0.00018979374304127567, "loss": 1.4798, "step": 6713 }, { "epoch": 0.72, "grad_norm": 0.08974738369160999, "learning_rate": 0.00018965721370987649, "loss": 1.3725, "step": 6714 }, { "epoch": 0.72, "grad_norm": 0.07439597985191031, "learning_rate": 0.00018952072200860987, "loss": 1.3802, "step": 6715 }, { "epoch": 0.72, "grad_norm": 0.07989917071160745, "learning_rate": 0.00018938426795402614, "loss": 1.4656, "step": 6716 }, { "epoch": 0.72, "grad_norm": 0.07990885134710889, "learning_rate": 0.00018924785156267088, "loss": 1.3247, "step": 6717 }, { "epoch": 0.72, "grad_norm": 0.0920282425858599, "learning_rate": 0.0001891114728510848, "loss": 1.5226, "step": 6718 }, { "epoch": 0.72, "grad_norm": 0.08098133988211124, "learning_rate": 0.00018897513183580445, "loss": 1.3289, "step": 6719 }, { "epoch": 0.72, "grad_norm": 0.08236995237832075, "learning_rate": 0.00018883882853336183, "loss": 1.4082, "step": 6720 }, { "epoch": 0.72, "grad_norm": 0.08109450367713297, "learning_rate": 0.00018870256296028376, "loss": 1.3274, "step": 6721 }, { "epoch": 0.72, "grad_norm": 0.08854925614558622, "learning_rate": 0.00018856633513309313, "loss": 1.4633, "step": 6722 }, { "epoch": 0.72, "grad_norm": 0.08776537527339491, "learning_rate": 0.00018843014506830823, "loss": 1.4247, "step": 6723 }, { "epoch": 0.72, "grad_norm": 0.08403123751330256, "learning_rate": 0.0001882939927824424, "loss": 1.3254, "step": 6724 }, { "epoch": 0.72, "grad_norm": 0.07484992936906583, "learning_rate": 0.00018815787829200436, "loss": 1.4866, "step": 6725 }, { "epoch": 0.72, "grad_norm": 0.08493873152531263, "learning_rate": 0.0001880218016134987, "loss": 1.2764, "step": 6726 }, { "epoch": 0.72, "grad_norm": 0.08016073948899083, "learning_rate": 0.00018788576276342528, "loss": 1.5368, "step": 6727 }, { "epoch": 0.72, "grad_norm": 0.08021313491786318, "learning_rate": 0.00018774976175827895, "loss": 1.4752, "step": 6728 }, { "epoch": 0.72, "grad_norm": 0.08468752473482426, "learning_rate": 0.00018761379861455076, "loss": 1.4151, "step": 6729 }, { "epoch": 0.72, "grad_norm": 0.07521467300223297, "learning_rate": 0.00018747787334872618, "loss": 1.4668, "step": 6730 }, { "epoch": 0.72, "grad_norm": 0.07685387563084291, "learning_rate": 0.00018734198597728698, "loss": 1.4331, "step": 6731 }, { "epoch": 0.72, "grad_norm": 0.07505611286286995, "learning_rate": 0.00018720613651670997, "loss": 1.4281, "step": 6732 }, { "epoch": 0.72, "grad_norm": 0.07502769080720458, "learning_rate": 0.0001870703249834671, "loss": 1.3482, "step": 6733 }, { "epoch": 0.72, "grad_norm": 0.07203431096058605, "learning_rate": 0.0001869345513940262, "loss": 1.3121, "step": 6734 }, { "epoch": 0.72, "grad_norm": 0.07909327996419319, "learning_rate": 0.00018679881576485043, "loss": 1.4207, "step": 6735 }, { "epoch": 0.72, "grad_norm": 0.09281906210695522, "learning_rate": 0.00018666311811239795, "loss": 1.3021, "step": 6736 }, { "epoch": 0.72, "grad_norm": 0.10364312066748778, "learning_rate": 0.00018652745845312252, "loss": 1.3919, "step": 6737 }, { "epoch": 0.72, "grad_norm": 0.07723214415961016, "learning_rate": 0.00018639183680347344, "loss": 1.4411, "step": 6738 }, { "epoch": 0.72, "grad_norm": 0.07938462403198333, "learning_rate": 0.00018625625317989547, "loss": 1.5324, "step": 6739 }, { "epoch": 0.72, "grad_norm": 0.0817394915072993, "learning_rate": 0.00018612070759882826, "loss": 1.4159, "step": 6740 }, { "epoch": 0.72, "grad_norm": 0.07546117695110775, "learning_rate": 0.00018598520007670743, "loss": 1.4377, "step": 6741 }, { "epoch": 0.72, "grad_norm": 0.07981149695058505, "learning_rate": 0.00018584973062996379, "loss": 1.3169, "step": 6742 }, { "epoch": 0.72, "grad_norm": 0.09914212403092261, "learning_rate": 0.0001857142992750232, "loss": 1.3235, "step": 6743 }, { "epoch": 0.72, "grad_norm": 0.08197515988292878, "learning_rate": 0.0001855789060283073, "loss": 1.4592, "step": 6744 }, { "epoch": 0.73, "grad_norm": 0.10267448225101261, "learning_rate": 0.00018544355090623337, "loss": 1.3162, "step": 6745 }, { "epoch": 0.73, "grad_norm": 0.07850174588448537, "learning_rate": 0.00018530823392521302, "loss": 1.3222, "step": 6746 }, { "epoch": 0.73, "grad_norm": 0.08075899888732171, "learning_rate": 0.00018517295510165417, "loss": 1.4816, "step": 6747 }, { "epoch": 0.73, "grad_norm": 0.08032012562669273, "learning_rate": 0.00018503771445196006, "loss": 1.3729, "step": 6748 }, { "epoch": 0.73, "grad_norm": 0.07918002511033893, "learning_rate": 0.00018490251199252878, "loss": 1.267, "step": 6749 }, { "epoch": 0.73, "grad_norm": 0.07957204447098715, "learning_rate": 0.0001847673477397542, "loss": 1.4327, "step": 6750 }, { "epoch": 0.73, "grad_norm": 0.07659661414310863, "learning_rate": 0.00018463222171002564, "loss": 1.4241, "step": 6751 }, { "epoch": 0.73, "grad_norm": 0.08910841479830099, "learning_rate": 0.00018449713391972732, "loss": 1.4116, "step": 6752 }, { "epoch": 0.73, "grad_norm": 0.0708037129156938, "learning_rate": 0.00018436208438523915, "loss": 1.3844, "step": 6753 }, { "epoch": 0.73, "grad_norm": 0.080526008014301, "learning_rate": 0.00018422707312293663, "loss": 1.4397, "step": 6754 }, { "epoch": 0.73, "grad_norm": 0.09423939921649865, "learning_rate": 0.00018409210014918992, "loss": 1.3331, "step": 6755 }, { "epoch": 0.73, "grad_norm": 0.08920464006608342, "learning_rate": 0.0001839571654803654, "loss": 1.43, "step": 6756 }, { "epoch": 0.73, "grad_norm": 0.08378333881364608, "learning_rate": 0.0001838222691328239, "loss": 1.4928, "step": 6757 }, { "epoch": 0.73, "grad_norm": 0.07805182934549852, "learning_rate": 0.00018368741112292252, "loss": 1.3854, "step": 6758 }, { "epoch": 0.73, "grad_norm": 0.0854097806654915, "learning_rate": 0.0001835525914670128, "loss": 1.3806, "step": 6759 }, { "epoch": 0.73, "grad_norm": 0.08040018273693553, "learning_rate": 0.00018341781018144253, "loss": 1.3825, "step": 6760 }, { "epoch": 0.73, "grad_norm": 0.0936813686907315, "learning_rate": 0.00018328306728255405, "loss": 1.5222, "step": 6761 }, { "epoch": 0.73, "grad_norm": 0.07344171050358542, "learning_rate": 0.00018314836278668557, "loss": 1.3782, "step": 6762 }, { "epoch": 0.73, "grad_norm": 0.07684534780565848, "learning_rate": 0.00018301369671017058, "loss": 1.4264, "step": 6763 }, { "epoch": 0.73, "grad_norm": 0.07826953273917311, "learning_rate": 0.00018287906906933754, "loss": 1.3789, "step": 6764 }, { "epoch": 0.73, "grad_norm": 0.08928722109970684, "learning_rate": 0.00018274447988051064, "loss": 1.3112, "step": 6765 }, { "epoch": 0.73, "grad_norm": 0.08674179911378348, "learning_rate": 0.00018260992916000946, "loss": 1.4021, "step": 6766 }, { "epoch": 0.73, "grad_norm": 0.08089963246558474, "learning_rate": 0.00018247541692414858, "loss": 1.489, "step": 6767 }, { "epoch": 0.73, "grad_norm": 0.09059576891894354, "learning_rate": 0.00018234094318923794, "loss": 1.2786, "step": 6768 }, { "epoch": 0.73, "grad_norm": 0.0848582732695057, "learning_rate": 0.000182206507971583, "loss": 1.3481, "step": 6769 }, { "epoch": 0.73, "grad_norm": 0.07842148758676074, "learning_rate": 0.0001820721112874848, "loss": 1.3778, "step": 6770 }, { "epoch": 0.73, "grad_norm": 0.08192393685903324, "learning_rate": 0.000181937753153239, "loss": 1.431, "step": 6771 }, { "epoch": 0.73, "grad_norm": 0.08794094605478588, "learning_rate": 0.00018180343358513713, "loss": 1.5262, "step": 6772 }, { "epoch": 0.73, "grad_norm": 0.07989683360465649, "learning_rate": 0.00018166915259946616, "loss": 1.4531, "step": 6773 }, { "epoch": 0.73, "grad_norm": 0.07895430276687004, "learning_rate": 0.00018153491021250762, "loss": 1.3966, "step": 6774 }, { "epoch": 0.73, "grad_norm": 0.07984445806504739, "learning_rate": 0.0001814007064405392, "loss": 1.3677, "step": 6775 }, { "epoch": 0.73, "grad_norm": 0.08715921957027024, "learning_rate": 0.00018126654129983367, "loss": 1.3904, "step": 6776 }, { "epoch": 0.73, "grad_norm": 0.0923525775887198, "learning_rate": 0.00018113241480665883, "loss": 1.4055, "step": 6777 }, { "epoch": 0.73, "grad_norm": 0.08056941965998873, "learning_rate": 0.00018099832697727786, "loss": 1.3418, "step": 6778 }, { "epoch": 0.73, "grad_norm": 0.07920305560868664, "learning_rate": 0.00018086427782794962, "loss": 1.3279, "step": 6779 }, { "epoch": 0.73, "grad_norm": 0.07615284040014661, "learning_rate": 0.00018073026737492782, "loss": 1.3265, "step": 6780 }, { "epoch": 0.73, "grad_norm": 0.07508902500142603, "learning_rate": 0.00018059629563446173, "loss": 1.4519, "step": 6781 }, { "epoch": 0.73, "grad_norm": 0.09785702332279825, "learning_rate": 0.00018046236262279615, "loss": 1.4866, "step": 6782 }, { "epoch": 0.73, "grad_norm": 0.09289516193260337, "learning_rate": 0.00018032846835617055, "loss": 1.1696, "step": 6783 }, { "epoch": 0.73, "grad_norm": 0.09300543988745133, "learning_rate": 0.00018019461285082023, "loss": 1.4885, "step": 6784 }, { "epoch": 0.73, "grad_norm": 0.0857508580574828, "learning_rate": 0.00018006079612297582, "loss": 1.3232, "step": 6785 }, { "epoch": 0.73, "grad_norm": 0.07557541283565075, "learning_rate": 0.0001799270181888627, "loss": 1.3176, "step": 6786 }, { "epoch": 0.73, "grad_norm": 0.0937229554586484, "learning_rate": 0.00017979327906470204, "loss": 1.4221, "step": 6787 }, { "epoch": 0.73, "grad_norm": 0.08422026801156707, "learning_rate": 0.00017965957876671046, "loss": 1.3148, "step": 6788 }, { "epoch": 0.73, "grad_norm": 0.0870791699339773, "learning_rate": 0.0001795259173110993, "loss": 1.1682, "step": 6789 }, { "epoch": 0.73, "grad_norm": 0.08035752064799632, "learning_rate": 0.0001793922947140753, "loss": 1.381, "step": 6790 }, { "epoch": 0.73, "grad_norm": 0.07755396439121148, "learning_rate": 0.0001792587109918411, "loss": 1.3554, "step": 6791 }, { "epoch": 0.73, "grad_norm": 0.07669787633054774, "learning_rate": 0.00017912516616059378, "loss": 1.347, "step": 6792 }, { "epoch": 0.73, "grad_norm": 0.0838443620302552, "learning_rate": 0.00017899166023652624, "loss": 1.3293, "step": 6793 }, { "epoch": 0.73, "grad_norm": 0.08594325030118602, "learning_rate": 0.0001788581932358268, "loss": 1.5661, "step": 6794 }, { "epoch": 0.73, "grad_norm": 0.0741443300137945, "learning_rate": 0.0001787247651746784, "loss": 1.3332, "step": 6795 }, { "epoch": 0.73, "grad_norm": 0.08201836586996483, "learning_rate": 0.0001785913760692598, "loss": 1.4351, "step": 6796 }, { "epoch": 0.73, "grad_norm": 0.08571123279116978, "learning_rate": 0.0001784580259357451, "loss": 1.3921, "step": 6797 }, { "epoch": 0.73, "grad_norm": 0.08420236015565297, "learning_rate": 0.00017832471479030328, "loss": 1.4287, "step": 6798 }, { "epoch": 0.73, "grad_norm": 0.08673883035937369, "learning_rate": 0.0001781914426490986, "loss": 1.3989, "step": 6799 }, { "epoch": 0.73, "grad_norm": 0.08328479946706482, "learning_rate": 0.00017805820952829094, "loss": 1.3287, "step": 6800 }, { "epoch": 0.73, "grad_norm": 0.0911264911348588, "learning_rate": 0.00017792501544403546, "loss": 1.2763, "step": 6801 }, { "epoch": 0.73, "grad_norm": 0.07562969503527785, "learning_rate": 0.00017779186041248202, "loss": 1.3719, "step": 6802 }, { "epoch": 0.73, "grad_norm": 0.08436317460320578, "learning_rate": 0.00017765874444977637, "loss": 1.5092, "step": 6803 }, { "epoch": 0.73, "grad_norm": 0.08201803089533123, "learning_rate": 0.00017752566757205934, "loss": 1.3735, "step": 6804 }, { "epoch": 0.73, "grad_norm": 0.08212245968024402, "learning_rate": 0.0001773926297954667, "loss": 1.437, "step": 6805 }, { "epoch": 0.73, "grad_norm": 0.08936482521224416, "learning_rate": 0.00017725963113612996, "loss": 1.3278, "step": 6806 }, { "epoch": 0.73, "grad_norm": 0.08005483146062434, "learning_rate": 0.0001771266716101757, "loss": 1.5125, "step": 6807 }, { "epoch": 0.73, "grad_norm": 0.08323718244792362, "learning_rate": 0.00017699375123372553, "loss": 1.3158, "step": 6808 }, { "epoch": 0.73, "grad_norm": 0.08374696265235664, "learning_rate": 0.0001768608700228967, "loss": 1.4074, "step": 6809 }, { "epoch": 0.73, "grad_norm": 0.08846882203118381, "learning_rate": 0.0001767280279938014, "loss": 1.465, "step": 6810 }, { "epoch": 0.73, "grad_norm": 0.08056026493132028, "learning_rate": 0.00017659522516254707, "loss": 1.3597, "step": 6811 }, { "epoch": 0.73, "grad_norm": 0.09657117745998345, "learning_rate": 0.0001764624615452366, "loss": 1.4254, "step": 6812 }, { "epoch": 0.73, "grad_norm": 0.08434230471203434, "learning_rate": 0.00017632973715796824, "loss": 1.3593, "step": 6813 }, { "epoch": 0.73, "grad_norm": 0.07279166739385243, "learning_rate": 0.00017619705201683494, "loss": 1.5091, "step": 6814 }, { "epoch": 0.73, "grad_norm": 0.08449448457832491, "learning_rate": 0.00017606440613792546, "loss": 1.4328, "step": 6815 }, { "epoch": 0.73, "grad_norm": 0.0848450788789903, "learning_rate": 0.00017593179953732363, "loss": 1.4954, "step": 6816 }, { "epoch": 0.73, "grad_norm": 0.08389551707213994, "learning_rate": 0.00017579923223110815, "loss": 1.5587, "step": 6817 }, { "epoch": 0.73, "grad_norm": 0.08551329662455956, "learning_rate": 0.00017566670423535346, "loss": 1.4431, "step": 6818 }, { "epoch": 0.73, "grad_norm": 0.0845378310088562, "learning_rate": 0.00017553421556612924, "loss": 1.4607, "step": 6819 }, { "epoch": 0.73, "grad_norm": 0.08248996628222881, "learning_rate": 0.0001754017662395, "loss": 1.35, "step": 6820 }, { "epoch": 0.73, "grad_norm": 0.09292398961034305, "learning_rate": 0.00017526935627152542, "loss": 1.354, "step": 6821 }, { "epoch": 0.73, "grad_norm": 0.09073753070746698, "learning_rate": 0.00017513698567826096, "loss": 1.3038, "step": 6822 }, { "epoch": 0.73, "grad_norm": 0.09641388034069218, "learning_rate": 0.0001750046544757571, "loss": 1.3099, "step": 6823 }, { "epoch": 0.73, "grad_norm": 0.08378939811932765, "learning_rate": 0.00017487236268005918, "loss": 1.4058, "step": 6824 }, { "epoch": 0.73, "grad_norm": 0.08011359281231487, "learning_rate": 0.00017474011030720832, "loss": 1.3095, "step": 6825 }, { "epoch": 0.73, "grad_norm": 0.07936773141110776, "learning_rate": 0.00017460789737324024, "loss": 1.4565, "step": 6826 }, { "epoch": 0.73, "grad_norm": 0.08898734259408628, "learning_rate": 0.00017447572389418643, "loss": 1.4878, "step": 6827 }, { "epoch": 0.73, "grad_norm": 0.08102931349353243, "learning_rate": 0.0001743435898860735, "loss": 1.3488, "step": 6828 }, { "epoch": 0.73, "grad_norm": 0.08001499042331985, "learning_rate": 0.00017421149536492282, "loss": 1.4098, "step": 6829 }, { "epoch": 0.73, "grad_norm": 0.08157457664209204, "learning_rate": 0.0001740794403467517, "loss": 1.3322, "step": 6830 }, { "epoch": 0.73, "grad_norm": 0.07722036111521809, "learning_rate": 0.00017394742484757187, "loss": 1.3963, "step": 6831 }, { "epoch": 0.73, "grad_norm": 0.08352673258439874, "learning_rate": 0.000173815448883391, "loss": 1.2971, "step": 6832 }, { "epoch": 0.73, "grad_norm": 0.07859349149790394, "learning_rate": 0.00017368351247021136, "loss": 1.4829, "step": 6833 }, { "epoch": 0.73, "grad_norm": 0.07105629459606275, "learning_rate": 0.00017355161562403076, "loss": 1.304, "step": 6834 }, { "epoch": 0.73, "grad_norm": 0.08610324427789169, "learning_rate": 0.00017341975836084245, "loss": 1.2876, "step": 6835 }, { "epoch": 0.73, "grad_norm": 0.0754237529009342, "learning_rate": 0.0001732879406966341, "loss": 1.3283, "step": 6836 }, { "epoch": 0.73, "grad_norm": 0.07922581197977313, "learning_rate": 0.0001731561626473893, "loss": 1.3629, "step": 6837 }, { "epoch": 0.74, "grad_norm": 0.08558328737606986, "learning_rate": 0.00017302442422908676, "loss": 1.5443, "step": 6838 }, { "epoch": 0.74, "grad_norm": 0.07826006509172885, "learning_rate": 0.00017289272545769986, "loss": 1.3043, "step": 6839 }, { "epoch": 0.74, "grad_norm": 0.09022868404772783, "learning_rate": 0.00017276106634919774, "loss": 1.4351, "step": 6840 }, { "epoch": 0.74, "grad_norm": 0.09189036230143136, "learning_rate": 0.0001726294469195448, "loss": 1.2811, "step": 6841 }, { "epoch": 0.74, "grad_norm": 0.08050200243446126, "learning_rate": 0.00017249786718469967, "loss": 1.5167, "step": 6842 }, { "epoch": 0.74, "grad_norm": 0.07399654613968369, "learning_rate": 0.00017236632716061728, "loss": 1.4766, "step": 6843 }, { "epoch": 0.74, "grad_norm": 0.0806654913246944, "learning_rate": 0.00017223482686324736, "loss": 1.4564, "step": 6844 }, { "epoch": 0.74, "grad_norm": 0.09296418958185575, "learning_rate": 0.0001721033663085345, "loss": 1.4995, "step": 6845 }, { "epoch": 0.74, "grad_norm": 0.08405262508188, "learning_rate": 0.00017197194551241897, "loss": 1.4281, "step": 6846 }, { "epoch": 0.74, "grad_norm": 0.09050133536256577, "learning_rate": 0.00017184056449083603, "loss": 1.452, "step": 6847 }, { "epoch": 0.74, "grad_norm": 0.10443701802852487, "learning_rate": 0.00017170922325971584, "loss": 1.5564, "step": 6848 }, { "epoch": 0.74, "grad_norm": 0.08562641911936063, "learning_rate": 0.00017157792183498412, "loss": 1.2908, "step": 6849 }, { "epoch": 0.74, "grad_norm": 0.07636600524567706, "learning_rate": 0.00017144666023256178, "loss": 1.3667, "step": 6850 }, { "epoch": 0.74, "grad_norm": 0.08478261331322622, "learning_rate": 0.00017131543846836457, "loss": 1.3854, "step": 6851 }, { "epoch": 0.74, "grad_norm": 0.08098833976518655, "learning_rate": 0.00017118425655830344, "loss": 1.5091, "step": 6852 }, { "epoch": 0.74, "grad_norm": 0.08677585467877516, "learning_rate": 0.0001710531145182848, "loss": 1.5628, "step": 6853 }, { "epoch": 0.74, "grad_norm": 0.07471384846368484, "learning_rate": 0.0001709220123642103, "loss": 1.4041, "step": 6854 }, { "epoch": 0.74, "grad_norm": 0.09541745514219427, "learning_rate": 0.00017079095011197608, "loss": 1.4971, "step": 6855 }, { "epoch": 0.74, "grad_norm": 0.08623794035116948, "learning_rate": 0.0001706599277774743, "loss": 1.3316, "step": 6856 }, { "epoch": 0.74, "grad_norm": 0.08054776192726237, "learning_rate": 0.00017052894537659147, "loss": 1.43, "step": 6857 }, { "epoch": 0.74, "grad_norm": 0.07521154256399776, "learning_rate": 0.00017039800292520995, "loss": 1.3866, "step": 6858 }, { "epoch": 0.74, "grad_norm": 0.09193438771336371, "learning_rate": 0.00017026710043920702, "loss": 1.3083, "step": 6859 }, { "epoch": 0.74, "grad_norm": 0.08977963139511945, "learning_rate": 0.0001701362379344547, "loss": 1.3806, "step": 6860 }, { "epoch": 0.74, "grad_norm": 0.07698085395458001, "learning_rate": 0.00017000541542682086, "loss": 1.4348, "step": 6861 }, { "epoch": 0.74, "grad_norm": 0.08198947779570069, "learning_rate": 0.00016987463293216814, "loss": 1.4138, "step": 6862 }, { "epoch": 0.74, "grad_norm": 0.07855413193058092, "learning_rate": 0.0001697438904663543, "loss": 1.4153, "step": 6863 }, { "epoch": 0.74, "grad_norm": 0.08950865101868916, "learning_rate": 0.00016961318804523218, "loss": 1.5416, "step": 6864 }, { "epoch": 0.74, "grad_norm": 0.0831439080639515, "learning_rate": 0.00016948252568465, "loss": 1.4049, "step": 6865 }, { "epoch": 0.74, "grad_norm": 0.08513958963759437, "learning_rate": 0.0001693519034004512, "loss": 1.4206, "step": 6866 }, { "epoch": 0.74, "grad_norm": 0.07840923965012238, "learning_rate": 0.00016922132120847388, "loss": 1.3569, "step": 6867 }, { "epoch": 0.74, "grad_norm": 0.07532709899874017, "learning_rate": 0.0001690907791245518, "loss": 1.2623, "step": 6868 }, { "epoch": 0.74, "grad_norm": 0.08043137997388435, "learning_rate": 0.00016896027716451363, "loss": 1.3917, "step": 6869 }, { "epoch": 0.74, "grad_norm": 0.07859670126561226, "learning_rate": 0.00016882981534418302, "loss": 1.495, "step": 6870 }, { "epoch": 0.74, "grad_norm": 0.08174506145645243, "learning_rate": 0.00016869939367937896, "loss": 1.447, "step": 6871 }, { "epoch": 0.74, "grad_norm": 0.07206520273087716, "learning_rate": 0.00016856901218591596, "loss": 1.5492, "step": 6872 }, { "epoch": 0.74, "grad_norm": 0.08019987669464876, "learning_rate": 0.00016843867087960252, "loss": 1.4361, "step": 6873 }, { "epoch": 0.74, "grad_norm": 0.07809580316950983, "learning_rate": 0.00016830836977624325, "loss": 1.2727, "step": 6874 }, { "epoch": 0.74, "grad_norm": 0.07459830181974891, "learning_rate": 0.00016817810889163787, "loss": 1.425, "step": 6875 }, { "epoch": 0.74, "grad_norm": 0.08806181104637921, "learning_rate": 0.00016804788824158057, "loss": 1.5296, "step": 6876 }, { "epoch": 0.74, "grad_norm": 0.07800614211582017, "learning_rate": 0.00016791770784186128, "loss": 1.5628, "step": 6877 }, { "epoch": 0.74, "grad_norm": 0.08281656842314608, "learning_rate": 0.00016778756770826486, "loss": 1.3046, "step": 6878 }, { "epoch": 0.74, "grad_norm": 0.08666547575815937, "learning_rate": 0.00016765746785657104, "loss": 1.4894, "step": 6879 }, { "epoch": 0.74, "grad_norm": 0.07603211449722727, "learning_rate": 0.00016752740830255504, "loss": 1.4852, "step": 6880 }, { "epoch": 0.74, "grad_norm": 0.08116015501105417, "learning_rate": 0.0001673973890619871, "loss": 1.5455, "step": 6881 }, { "epoch": 0.74, "grad_norm": 0.08366331426875613, "learning_rate": 0.00016726741015063223, "loss": 1.4234, "step": 6882 }, { "epoch": 0.74, "grad_norm": 0.09604489598470241, "learning_rate": 0.00016713747158425118, "loss": 1.3777, "step": 6883 }, { "epoch": 0.74, "grad_norm": 0.08394195627592127, "learning_rate": 0.00016700757337859907, "loss": 1.4061, "step": 6884 }, { "epoch": 0.74, "grad_norm": 0.08751800146513225, "learning_rate": 0.00016687771554942688, "loss": 1.4041, "step": 6885 }, { "epoch": 0.74, "grad_norm": 0.08575353248678774, "learning_rate": 0.00016674789811247992, "loss": 1.3437, "step": 6886 }, { "epoch": 0.74, "grad_norm": 0.08504085235306681, "learning_rate": 0.0001666181210834994, "loss": 1.4633, "step": 6887 }, { "epoch": 0.74, "grad_norm": 0.09564690373909444, "learning_rate": 0.00016648838447822084, "loss": 1.4409, "step": 6888 }, { "epoch": 0.74, "grad_norm": 0.08571014076959972, "learning_rate": 0.0001663586883123755, "loss": 1.3986, "step": 6889 }, { "epoch": 0.74, "grad_norm": 0.07663223113335221, "learning_rate": 0.00016622903260168955, "loss": 1.4524, "step": 6890 }, { "epoch": 0.74, "grad_norm": 0.07946817775922309, "learning_rate": 0.00016609941736188394, "loss": 1.3571, "step": 6891 }, { "epoch": 0.74, "grad_norm": 0.08637460712979757, "learning_rate": 0.00016596984260867516, "loss": 1.4576, "step": 6892 }, { "epoch": 0.74, "grad_norm": 0.07903291672994443, "learning_rate": 0.00016584030835777464, "loss": 1.4884, "step": 6893 }, { "epoch": 0.74, "grad_norm": 0.07466860169220521, "learning_rate": 0.00016571081462488874, "loss": 1.5506, "step": 6894 }, { "epoch": 0.74, "grad_norm": 0.06936360532492965, "learning_rate": 0.0001655813614257189, "loss": 1.3753, "step": 6895 }, { "epoch": 0.74, "grad_norm": 0.09451161532387511, "learning_rate": 0.00016545194877596193, "loss": 1.5499, "step": 6896 }, { "epoch": 0.74, "grad_norm": 0.08750817216023275, "learning_rate": 0.00016532257669130967, "loss": 1.4725, "step": 6897 }, { "epoch": 0.74, "grad_norm": 0.08644183462372003, "learning_rate": 0.00016519324518744866, "loss": 1.4153, "step": 6898 }, { "epoch": 0.74, "grad_norm": 0.07856152367805538, "learning_rate": 0.00016506395428006088, "loss": 1.4271, "step": 6899 }, { "epoch": 0.74, "grad_norm": 0.08313932519241506, "learning_rate": 0.00016493470398482352, "loss": 1.4276, "step": 6900 }, { "epoch": 0.74, "grad_norm": 0.07758330661107049, "learning_rate": 0.00016480549431740831, "loss": 1.4259, "step": 6901 }, { "epoch": 0.74, "grad_norm": 0.07890737430673259, "learning_rate": 0.00016467632529348247, "loss": 1.5194, "step": 6902 }, { "epoch": 0.74, "grad_norm": 0.09159297058951059, "learning_rate": 0.0001645471969287084, "loss": 1.487, "step": 6903 }, { "epoch": 0.74, "grad_norm": 0.09770573554325404, "learning_rate": 0.00016441810923874318, "loss": 1.4068, "step": 6904 }, { "epoch": 0.74, "grad_norm": 0.09916837391473986, "learning_rate": 0.00016428906223923902, "loss": 1.3563, "step": 6905 }, { "epoch": 0.74, "grad_norm": 0.08312448828582723, "learning_rate": 0.00016416005594584355, "loss": 1.4346, "step": 6906 }, { "epoch": 0.74, "grad_norm": 0.0777976180163894, "learning_rate": 0.00016403109037419893, "loss": 1.3848, "step": 6907 }, { "epoch": 0.74, "grad_norm": 0.07890686457460733, "learning_rate": 0.00016390216553994292, "loss": 1.3671, "step": 6908 }, { "epoch": 0.74, "grad_norm": 0.07973944756595773, "learning_rate": 0.00016377328145870823, "loss": 1.3963, "step": 6909 }, { "epoch": 0.74, "grad_norm": 0.07806363228733787, "learning_rate": 0.00016364443814612207, "loss": 1.3893, "step": 6910 }, { "epoch": 0.74, "grad_norm": 0.07209223731006845, "learning_rate": 0.00016351563561780742, "loss": 1.4204, "step": 6911 }, { "epoch": 0.74, "grad_norm": 0.07909441403477985, "learning_rate": 0.00016338687388938217, "loss": 1.3775, "step": 6912 }, { "epoch": 0.74, "grad_norm": 0.07972099827853533, "learning_rate": 0.00016325815297645873, "loss": 1.5274, "step": 6913 }, { "epoch": 0.74, "grad_norm": 0.08052821554655319, "learning_rate": 0.00016312947289464518, "loss": 1.5595, "step": 6914 }, { "epoch": 0.74, "grad_norm": 0.08023420829022035, "learning_rate": 0.0001630008336595446, "loss": 1.3303, "step": 6915 }, { "epoch": 0.74, "grad_norm": 0.09799506539759276, "learning_rate": 0.00016287223528675476, "loss": 1.2331, "step": 6916 }, { "epoch": 0.74, "grad_norm": 0.08491064095612154, "learning_rate": 0.00016274367779186844, "loss": 1.3602, "step": 6917 }, { "epoch": 0.74, "grad_norm": 0.08807116482937358, "learning_rate": 0.00016261516119047393, "loss": 1.4781, "step": 6918 }, { "epoch": 0.74, "grad_norm": 0.10870127860147914, "learning_rate": 0.00016248668549815443, "loss": 1.5202, "step": 6919 }, { "epoch": 0.74, "grad_norm": 0.10562154840968066, "learning_rate": 0.0001623582507304877, "loss": 1.5252, "step": 6920 }, { "epoch": 0.74, "grad_norm": 0.08298775907519637, "learning_rate": 0.0001622298569030473, "loss": 1.4743, "step": 6921 }, { "epoch": 0.74, "grad_norm": 0.08203093210794524, "learning_rate": 0.000162101504031401, "loss": 1.4706, "step": 6922 }, { "epoch": 0.74, "grad_norm": 0.08380641907542187, "learning_rate": 0.00016197319213111233, "loss": 1.335, "step": 6923 }, { "epoch": 0.74, "grad_norm": 0.08283572450475699, "learning_rate": 0.0001618449212177396, "loss": 1.3608, "step": 6924 }, { "epoch": 0.74, "grad_norm": 0.09123974004311679, "learning_rate": 0.00016171669130683592, "loss": 1.305, "step": 6925 }, { "epoch": 0.74, "grad_norm": 0.09077853862321274, "learning_rate": 0.00016158850241394958, "loss": 1.4741, "step": 6926 }, { "epoch": 0.74, "grad_norm": 0.08266347964611022, "learning_rate": 0.00016146035455462393, "loss": 1.4007, "step": 6927 }, { "epoch": 0.74, "grad_norm": 0.07728062995794181, "learning_rate": 0.0001613322477443976, "loss": 1.3946, "step": 6928 }, { "epoch": 0.74, "grad_norm": 0.09516214966689458, "learning_rate": 0.00016120418199880367, "loss": 1.4611, "step": 6929 }, { "epoch": 0.74, "grad_norm": 0.08415646166248951, "learning_rate": 0.0001610761573333706, "loss": 1.3602, "step": 6930 }, { "epoch": 0.75, "grad_norm": 0.08683899922320816, "learning_rate": 0.00016094817376362215, "loss": 1.4251, "step": 6931 }, { "epoch": 0.75, "grad_norm": 0.08047309008486432, "learning_rate": 0.00016082023130507627, "loss": 1.3802, "step": 6932 }, { "epoch": 0.75, "grad_norm": 0.08534330070542163, "learning_rate": 0.0001606923299732468, "loss": 1.347, "step": 6933 }, { "epoch": 0.75, "grad_norm": 0.08466164032891992, "learning_rate": 0.00016056446978364214, "loss": 1.3549, "step": 6934 }, { "epoch": 0.75, "grad_norm": 0.10321861631811807, "learning_rate": 0.00016043665075176562, "loss": 1.4485, "step": 6935 }, { "epoch": 0.75, "grad_norm": 0.0927153279870653, "learning_rate": 0.000160308872893116, "loss": 1.3688, "step": 6936 }, { "epoch": 0.75, "grad_norm": 0.09696757316712741, "learning_rate": 0.00016018113622318664, "loss": 1.3439, "step": 6937 }, { "epoch": 0.75, "grad_norm": 0.07852402945748814, "learning_rate": 0.00016005344075746585, "loss": 1.4648, "step": 6938 }, { "epoch": 0.75, "grad_norm": 0.07922286942853338, "learning_rate": 0.0001599257865114374, "loss": 1.233, "step": 6939 }, { "epoch": 0.75, "grad_norm": 0.0913938351396083, "learning_rate": 0.0001597981735005799, "loss": 1.4028, "step": 6940 }, { "epoch": 0.75, "grad_norm": 0.08728248486685027, "learning_rate": 0.0001596706017403665, "loss": 1.3228, "step": 6941 }, { "epoch": 0.75, "grad_norm": 0.08034521864864075, "learning_rate": 0.000159543071246266, "loss": 1.3307, "step": 6942 }, { "epoch": 0.75, "grad_norm": 0.08437406293251533, "learning_rate": 0.00015941558203374197, "loss": 1.3317, "step": 6943 }, { "epoch": 0.75, "grad_norm": 0.07271689555275264, "learning_rate": 0.00015928813411825266, "loss": 1.388, "step": 6944 }, { "epoch": 0.75, "grad_norm": 0.09175529197602372, "learning_rate": 0.00015916072751525167, "loss": 1.5004, "step": 6945 }, { "epoch": 0.75, "grad_norm": 0.08084840838382006, "learning_rate": 0.0001590333622401877, "loss": 1.3225, "step": 6946 }, { "epoch": 0.75, "grad_norm": 0.0839105076082594, "learning_rate": 0.00015890603830850402, "loss": 1.4301, "step": 6947 }, { "epoch": 0.75, "grad_norm": 0.10145040433206401, "learning_rate": 0.000158778755735639, "loss": 1.4455, "step": 6948 }, { "epoch": 0.75, "grad_norm": 0.07785525394316473, "learning_rate": 0.0001586515145370262, "loss": 1.3215, "step": 6949 }, { "epoch": 0.75, "grad_norm": 0.07590586417847282, "learning_rate": 0.00015852431472809426, "loss": 1.4979, "step": 6950 }, { "epoch": 0.75, "grad_norm": 0.07980851983434818, "learning_rate": 0.0001583971563242662, "loss": 1.3844, "step": 6951 }, { "epoch": 0.75, "grad_norm": 0.07922936787588304, "learning_rate": 0.0001582700393409608, "loss": 1.4856, "step": 6952 }, { "epoch": 0.75, "grad_norm": 0.08281630274525655, "learning_rate": 0.00015814296379359106, "loss": 1.3173, "step": 6953 }, { "epoch": 0.75, "grad_norm": 0.08092482775318731, "learning_rate": 0.00015801592969756555, "loss": 1.3588, "step": 6954 }, { "epoch": 0.75, "grad_norm": 0.07710543321208346, "learning_rate": 0.00015788893706828773, "loss": 1.4724, "step": 6955 }, { "epoch": 0.75, "grad_norm": 0.07537722490115532, "learning_rate": 0.00015776198592115553, "loss": 1.4065, "step": 6956 }, { "epoch": 0.75, "grad_norm": 0.07354587379308565, "learning_rate": 0.00015763507627156265, "loss": 1.4229, "step": 6957 }, { "epoch": 0.75, "grad_norm": 0.08239350486018063, "learning_rate": 0.00015750820813489685, "loss": 1.3505, "step": 6958 }, { "epoch": 0.75, "grad_norm": 0.08386481585345223, "learning_rate": 0.00015738138152654175, "loss": 1.3849, "step": 6959 }, { "epoch": 0.75, "grad_norm": 0.09620737533851652, "learning_rate": 0.00015725459646187518, "loss": 1.3966, "step": 6960 }, { "epoch": 0.75, "grad_norm": 0.08601988560556079, "learning_rate": 0.00015712785295627035, "loss": 1.4484, "step": 6961 }, { "epoch": 0.75, "grad_norm": 0.07897105164507949, "learning_rate": 0.0001570011510250956, "loss": 1.4333, "step": 6962 }, { "epoch": 0.75, "grad_norm": 0.07358390406887073, "learning_rate": 0.00015687449068371367, "loss": 1.429, "step": 6963 }, { "epoch": 0.75, "grad_norm": 0.0924152537075702, "learning_rate": 0.00015674787194748264, "loss": 1.3773, "step": 6964 }, { "epoch": 0.75, "grad_norm": 0.08221188593201989, "learning_rate": 0.00015662129483175568, "loss": 1.3499, "step": 6965 }, { "epoch": 0.75, "grad_norm": 0.08749139899956103, "learning_rate": 0.00015649475935188033, "loss": 1.4698, "step": 6966 }, { "epoch": 0.75, "grad_norm": 0.08393889404422417, "learning_rate": 0.00015636826552319972, "loss": 1.4097, "step": 6967 }, { "epoch": 0.75, "grad_norm": 0.07418914646855072, "learning_rate": 0.00015624181336105187, "loss": 1.2507, "step": 6968 }, { "epoch": 0.75, "grad_norm": 0.08998017862926071, "learning_rate": 0.0001561154028807689, "loss": 1.4847, "step": 6969 }, { "epoch": 0.75, "grad_norm": 0.07646907923162406, "learning_rate": 0.00015598903409767896, "loss": 1.3316, "step": 6970 }, { "epoch": 0.75, "grad_norm": 0.10029053480803479, "learning_rate": 0.00015586270702710474, "loss": 1.5359, "step": 6971 }, { "epoch": 0.75, "grad_norm": 0.08308434884812765, "learning_rate": 0.00015573642168436358, "loss": 1.504, "step": 6972 }, { "epoch": 0.75, "grad_norm": 0.10198312095249795, "learning_rate": 0.00015561017808476813, "loss": 1.4175, "step": 6973 }, { "epoch": 0.75, "grad_norm": 0.0819281330153054, "learning_rate": 0.00015548397624362605, "loss": 1.371, "step": 6974 }, { "epoch": 0.75, "grad_norm": 0.08491787970373423, "learning_rate": 0.0001553578161762394, "loss": 1.4177, "step": 6975 }, { "epoch": 0.75, "grad_norm": 0.08401926419336586, "learning_rate": 0.00015523169789790576, "loss": 1.3111, "step": 6976 }, { "epoch": 0.75, "grad_norm": 0.08999804666215713, "learning_rate": 0.00015510562142391742, "loss": 1.3743, "step": 6977 }, { "epoch": 0.75, "grad_norm": 0.08523286268070919, "learning_rate": 0.0001549795867695616, "loss": 1.4312, "step": 6978 }, { "epoch": 0.75, "grad_norm": 0.08185701371966983, "learning_rate": 0.00015485359395012011, "loss": 1.3798, "step": 6979 }, { "epoch": 0.75, "grad_norm": 0.0701710856775418, "learning_rate": 0.00015472764298087027, "loss": 1.4199, "step": 6980 }, { "epoch": 0.75, "grad_norm": 0.08314221104473174, "learning_rate": 0.00015460173387708427, "loss": 1.3538, "step": 6981 }, { "epoch": 0.75, "grad_norm": 0.08420556211767127, "learning_rate": 0.00015447586665402857, "loss": 1.391, "step": 6982 }, { "epoch": 0.75, "grad_norm": 0.07528567612802567, "learning_rate": 0.00015435004132696546, "loss": 1.4486, "step": 6983 }, { "epoch": 0.75, "grad_norm": 0.09387052135404927, "learning_rate": 0.0001542242579111513, "loss": 1.3717, "step": 6984 }, { "epoch": 0.75, "grad_norm": 0.07264079271114803, "learning_rate": 0.0001540985164218379, "loss": 1.4776, "step": 6985 }, { "epoch": 0.75, "grad_norm": 0.0687592679653029, "learning_rate": 0.0001539728168742721, "loss": 1.4902, "step": 6986 }, { "epoch": 0.75, "grad_norm": 0.08720354161857613, "learning_rate": 0.00015384715928369502, "loss": 1.45, "step": 6987 }, { "epoch": 0.75, "grad_norm": 0.08356950973283157, "learning_rate": 0.00015372154366534324, "loss": 1.4589, "step": 6988 }, { "epoch": 0.75, "grad_norm": 0.08640405818330486, "learning_rate": 0.00015359597003444824, "loss": 1.4614, "step": 6989 }, { "epoch": 0.75, "grad_norm": 0.07591193807593741, "learning_rate": 0.00015347043840623615, "loss": 1.3445, "step": 6990 }, { "epoch": 0.75, "grad_norm": 0.07509872422742482, "learning_rate": 0.00015334494879592787, "loss": 1.5149, "step": 6991 }, { "epoch": 0.75, "grad_norm": 0.08250599953346692, "learning_rate": 0.00015321950121873967, "loss": 1.3254, "step": 6992 }, { "epoch": 0.75, "grad_norm": 0.07975103285921857, "learning_rate": 0.00015309409568988263, "loss": 1.3806, "step": 6993 }, { "epoch": 0.75, "grad_norm": 0.08522385169066489, "learning_rate": 0.0001529687322245623, "loss": 1.3766, "step": 6994 }, { "epoch": 0.75, "grad_norm": 0.08078623532738036, "learning_rate": 0.0001528434108379796, "loss": 1.3841, "step": 6995 }, { "epoch": 0.75, "grad_norm": 0.09287538197642288, "learning_rate": 0.00015271813154533033, "loss": 1.397, "step": 6996 }, { "epoch": 0.75, "grad_norm": 0.07227532782369564, "learning_rate": 0.00015259289436180467, "loss": 1.3578, "step": 6997 }, { "epoch": 0.75, "grad_norm": 0.08275454676238209, "learning_rate": 0.0001524676993025883, "loss": 1.4225, "step": 6998 }, { "epoch": 0.75, "grad_norm": 0.0787200646752553, "learning_rate": 0.00015234254638286183, "loss": 1.433, "step": 6999 }, { "epoch": 0.75, "grad_norm": 0.08559670118586739, "learning_rate": 0.00015221743561779987, "loss": 1.4289, "step": 7000 }, { "epoch": 0.75, "grad_norm": 0.08685366803895994, "learning_rate": 0.00015209236702257278, "loss": 1.4789, "step": 7001 }, { "epoch": 0.75, "grad_norm": 0.08398812238516672, "learning_rate": 0.0001519673406123458, "loss": 1.3169, "step": 7002 }, { "epoch": 0.75, "grad_norm": 0.07951981075675485, "learning_rate": 0.00015184235640227845, "loss": 1.46, "step": 7003 }, { "epoch": 0.75, "grad_norm": 0.07797865721296189, "learning_rate": 0.00015171741440752568, "loss": 1.5121, "step": 7004 }, { "epoch": 0.75, "grad_norm": 0.08033960308925113, "learning_rate": 0.00015159251464323732, "loss": 1.4325, "step": 7005 }, { "epoch": 0.75, "grad_norm": 0.07822679950394763, "learning_rate": 0.00015146765712455745, "loss": 1.4887, "step": 7006 }, { "epoch": 0.75, "grad_norm": 0.07221658273320822, "learning_rate": 0.00015134284186662584, "loss": 1.5121, "step": 7007 }, { "epoch": 0.75, "grad_norm": 0.0851519454515554, "learning_rate": 0.00015121806888457673, "loss": 1.4916, "step": 7008 }, { "epoch": 0.75, "grad_norm": 0.07553393733953745, "learning_rate": 0.00015109333819353905, "loss": 1.3452, "step": 7009 }, { "epoch": 0.75, "grad_norm": 0.08710936906626386, "learning_rate": 0.00015096864980863718, "loss": 1.3347, "step": 7010 }, { "epoch": 0.75, "grad_norm": 0.08725477793257659, "learning_rate": 0.00015084400374498964, "loss": 1.412, "step": 7011 }, { "epoch": 0.75, "grad_norm": 0.08923549232165805, "learning_rate": 0.0001507194000177105, "loss": 1.2951, "step": 7012 }, { "epoch": 0.75, "grad_norm": 0.0796244829660177, "learning_rate": 0.00015059483864190816, "loss": 1.2852, "step": 7013 }, { "epoch": 0.75, "grad_norm": 0.08487704673509294, "learning_rate": 0.00015047031963268614, "loss": 1.4351, "step": 7014 }, { "epoch": 0.75, "grad_norm": 0.08299755706265552, "learning_rate": 0.0001503458430051431, "loss": 1.2572, "step": 7015 }, { "epoch": 0.75, "grad_norm": 0.09293429207949322, "learning_rate": 0.00015022140877437185, "loss": 1.4802, "step": 7016 }, { "epoch": 0.75, "grad_norm": 0.07406152365690835, "learning_rate": 0.0001500970169554608, "loss": 1.3775, "step": 7017 }, { "epoch": 0.75, "grad_norm": 0.08114305241044793, "learning_rate": 0.00014997266756349264, "loss": 1.3368, "step": 7018 }, { "epoch": 0.75, "grad_norm": 0.07629530714726956, "learning_rate": 0.00014984836061354524, "loss": 1.4962, "step": 7019 }, { "epoch": 0.75, "grad_norm": 0.08980752121203324, "learning_rate": 0.00014972409612069138, "loss": 1.3905, "step": 7020 }, { "epoch": 0.75, "grad_norm": 0.08575581179496382, "learning_rate": 0.00014959987409999853, "loss": 1.3924, "step": 7021 }, { "epoch": 0.75, "grad_norm": 0.08198613603708774, "learning_rate": 0.00014947569456652876, "loss": 1.339, "step": 7022 }, { "epoch": 0.75, "grad_norm": 0.09454705124999215, "learning_rate": 0.00014935155753533947, "loss": 1.4268, "step": 7023 }, { "epoch": 0.76, "grad_norm": 0.09143985910869612, "learning_rate": 0.00014922746302148282, "loss": 1.5904, "step": 7024 }, { "epoch": 0.76, "grad_norm": 0.08156496168263828, "learning_rate": 0.00014910341104000546, "loss": 1.4352, "step": 7025 }, { "epoch": 0.76, "grad_norm": 0.08359413915614876, "learning_rate": 0.00014897940160594925, "loss": 1.4386, "step": 7026 }, { "epoch": 0.76, "grad_norm": 0.08953163494531653, "learning_rate": 0.00014885543473435088, "loss": 1.4843, "step": 7027 }, { "epoch": 0.76, "grad_norm": 0.07495695660289363, "learning_rate": 0.00014873151044024146, "loss": 1.2414, "step": 7028 }, { "epoch": 0.76, "grad_norm": 0.10177289279918775, "learning_rate": 0.00014860762873864742, "loss": 1.4096, "step": 7029 }, { "epoch": 0.76, "grad_norm": 0.07368892484521711, "learning_rate": 0.00014848378964458999, "loss": 1.3566, "step": 7030 }, { "epoch": 0.76, "grad_norm": 0.08689439609994665, "learning_rate": 0.0001483599931730849, "loss": 1.4542, "step": 7031 }, { "epoch": 0.76, "grad_norm": 0.08443024292684892, "learning_rate": 0.00014823623933914276, "loss": 1.3873, "step": 7032 }, { "epoch": 0.76, "grad_norm": 0.08822916976981438, "learning_rate": 0.00014811252815776955, "loss": 1.5633, "step": 7033 }, { "epoch": 0.76, "grad_norm": 0.0887025471292702, "learning_rate": 0.0001479888596439652, "loss": 1.4117, "step": 7034 }, { "epoch": 0.76, "grad_norm": 0.08246689872933267, "learning_rate": 0.0001478652338127252, "loss": 1.4453, "step": 7035 }, { "epoch": 0.76, "grad_norm": 0.07093731196022528, "learning_rate": 0.00014774165067903982, "loss": 1.3772, "step": 7036 }, { "epoch": 0.76, "grad_norm": 0.08363424689449472, "learning_rate": 0.00014761811025789352, "loss": 1.327, "step": 7037 }, { "epoch": 0.76, "grad_norm": 0.07842351016801305, "learning_rate": 0.00014749461256426615, "loss": 1.3652, "step": 7038 }, { "epoch": 0.76, "grad_norm": 0.0824205239126465, "learning_rate": 0.00014737115761313246, "loss": 1.4238, "step": 7039 }, { "epoch": 0.76, "grad_norm": 0.08687263476544752, "learning_rate": 0.00014724774541946146, "loss": 1.3724, "step": 7040 }, { "epoch": 0.76, "grad_norm": 0.0920673100075393, "learning_rate": 0.00014712437599821742, "loss": 1.2506, "step": 7041 }, { "epoch": 0.76, "grad_norm": 0.09222049188447305, "learning_rate": 0.00014700104936435953, "loss": 1.4085, "step": 7042 }, { "epoch": 0.76, "grad_norm": 0.07736454871275107, "learning_rate": 0.00014687776553284137, "loss": 1.4622, "step": 7043 }, { "epoch": 0.76, "grad_norm": 0.08255409541057177, "learning_rate": 0.00014675452451861138, "loss": 1.5405, "step": 7044 }, { "epoch": 0.76, "grad_norm": 0.08612416866292971, "learning_rate": 0.00014663132633661313, "loss": 1.417, "step": 7045 }, { "epoch": 0.76, "grad_norm": 0.09432241903711268, "learning_rate": 0.00014650817100178492, "loss": 1.4246, "step": 7046 }, { "epoch": 0.76, "grad_norm": 0.11601460726127812, "learning_rate": 0.00014638505852905954, "loss": 1.3404, "step": 7047 }, { "epoch": 0.76, "grad_norm": 0.0966656462751512, "learning_rate": 0.00014626198893336506, "loss": 1.461, "step": 7048 }, { "epoch": 0.76, "grad_norm": 0.07949808600283965, "learning_rate": 0.00014613896222962375, "loss": 1.3932, "step": 7049 }, { "epoch": 0.76, "grad_norm": 0.09250961738605959, "learning_rate": 0.00014601597843275327, "loss": 1.4166, "step": 7050 }, { "epoch": 0.76, "grad_norm": 0.07636058338566189, "learning_rate": 0.00014589303755766587, "loss": 1.4146, "step": 7051 }, { "epoch": 0.76, "grad_norm": 0.08105055959370543, "learning_rate": 0.0001457701396192685, "loss": 1.3308, "step": 7052 }, { "epoch": 0.76, "grad_norm": 0.08548635220894996, "learning_rate": 0.00014564728463246275, "loss": 1.3975, "step": 7053 }, { "epoch": 0.76, "grad_norm": 0.07965255750151616, "learning_rate": 0.00014552447261214534, "loss": 1.4911, "step": 7054 }, { "epoch": 0.76, "grad_norm": 0.08537985574986948, "learning_rate": 0.00014540170357320786, "loss": 1.4963, "step": 7055 }, { "epoch": 0.76, "grad_norm": 0.09078007237282826, "learning_rate": 0.0001452789775305362, "loss": 1.4235, "step": 7056 }, { "epoch": 0.76, "grad_norm": 0.08402632342143757, "learning_rate": 0.0001451562944990114, "loss": 1.4207, "step": 7057 }, { "epoch": 0.76, "grad_norm": 0.08301706262546693, "learning_rate": 0.00014503365449350936, "loss": 1.2685, "step": 7058 }, { "epoch": 0.76, "grad_norm": 0.08203445439664891, "learning_rate": 0.00014491105752890033, "loss": 1.3933, "step": 7059 }, { "epoch": 0.76, "grad_norm": 0.09063623215892826, "learning_rate": 0.00014478850362004974, "loss": 1.486, "step": 7060 }, { "epoch": 0.76, "grad_norm": 0.09475208951182909, "learning_rate": 0.00014466599278181787, "loss": 1.4044, "step": 7061 }, { "epoch": 0.76, "grad_norm": 0.09177737333236773, "learning_rate": 0.0001445435250290592, "loss": 1.4223, "step": 7062 }, { "epoch": 0.76, "grad_norm": 0.09652109094250604, "learning_rate": 0.00014442110037662375, "loss": 1.4139, "step": 7063 }, { "epoch": 0.76, "grad_norm": 0.09218566946256986, "learning_rate": 0.00014429871883935575, "loss": 1.2943, "step": 7064 }, { "epoch": 0.76, "grad_norm": 0.08224360335861525, "learning_rate": 0.0001441763804320942, "loss": 1.4995, "step": 7065 }, { "epoch": 0.76, "grad_norm": 0.08621259301000005, "learning_rate": 0.00014405408516967328, "loss": 1.381, "step": 7066 }, { "epoch": 0.76, "grad_norm": 0.08733699795943922, "learning_rate": 0.00014393183306692176, "loss": 1.4354, "step": 7067 }, { "epoch": 0.76, "grad_norm": 0.0859822893466842, "learning_rate": 0.00014380962413866288, "loss": 1.4137, "step": 7068 }, { "epoch": 0.76, "grad_norm": 0.08013451149466999, "learning_rate": 0.00014368745839971509, "loss": 1.2975, "step": 7069 }, { "epoch": 0.76, "grad_norm": 0.07670849214756494, "learning_rate": 0.00014356533586489152, "loss": 1.4086, "step": 7070 }, { "epoch": 0.76, "grad_norm": 0.08379391904018833, "learning_rate": 0.00014344325654899964, "loss": 1.5488, "step": 7071 }, { "epoch": 0.76, "grad_norm": 0.08446287225279143, "learning_rate": 0.0001433212204668421, "loss": 1.3633, "step": 7072 }, { "epoch": 0.76, "grad_norm": 0.08655495607730566, "learning_rate": 0.00014319922763321642, "loss": 1.4289, "step": 7073 }, { "epoch": 0.76, "grad_norm": 0.09076922400021534, "learning_rate": 0.0001430772780629145, "loss": 1.3839, "step": 7074 }, { "epoch": 0.76, "grad_norm": 0.07944001534849933, "learning_rate": 0.00014295537177072288, "loss": 1.3809, "step": 7075 }, { "epoch": 0.76, "grad_norm": 0.09329551688513704, "learning_rate": 0.00014283350877142343, "loss": 1.3861, "step": 7076 }, { "epoch": 0.76, "grad_norm": 0.07042640493257381, "learning_rate": 0.00014271168907979248, "loss": 1.3829, "step": 7077 }, { "epoch": 0.76, "grad_norm": 0.07897438905434498, "learning_rate": 0.00014258991271060085, "loss": 1.4558, "step": 7078 }, { "epoch": 0.76, "grad_norm": 0.09126203160664402, "learning_rate": 0.00014246817967861463, "loss": 1.3902, "step": 7079 }, { "epoch": 0.76, "grad_norm": 0.09630423913710359, "learning_rate": 0.00014234648999859412, "loss": 1.3629, "step": 7080 }, { "epoch": 0.76, "grad_norm": 0.08349720636262284, "learning_rate": 0.0001422248436852947, "loss": 1.3694, "step": 7081 }, { "epoch": 0.76, "grad_norm": 0.08293949990701834, "learning_rate": 0.00014210324075346654, "loss": 1.3749, "step": 7082 }, { "epoch": 0.76, "grad_norm": 0.07570143074146496, "learning_rate": 0.00014198168121785416, "loss": 1.4489, "step": 7083 }, { "epoch": 0.76, "grad_norm": 0.08376068026895099, "learning_rate": 0.0001418601650931974, "loss": 1.3698, "step": 7084 }, { "epoch": 0.76, "grad_norm": 0.08051201255768421, "learning_rate": 0.0001417386923942301, "loss": 1.3801, "step": 7085 }, { "epoch": 0.76, "grad_norm": 0.08361829214712761, "learning_rate": 0.00014161726313568162, "loss": 1.3365, "step": 7086 }, { "epoch": 0.76, "grad_norm": 0.07539311701974652, "learning_rate": 0.00014149587733227543, "loss": 1.5533, "step": 7087 }, { "epoch": 0.76, "grad_norm": 0.07636646791153252, "learning_rate": 0.00014137453499873, "loss": 1.3016, "step": 7088 }, { "epoch": 0.76, "grad_norm": 0.07726830815322598, "learning_rate": 0.00014125323614975878, "loss": 1.4055, "step": 7089 }, { "epoch": 0.76, "grad_norm": 0.07852283896570228, "learning_rate": 0.00014113198080006927, "loss": 1.3658, "step": 7090 }, { "epoch": 0.76, "grad_norm": 0.08455046614695923, "learning_rate": 0.00014101076896436428, "loss": 1.4775, "step": 7091 }, { "epoch": 0.76, "grad_norm": 0.0870258879015629, "learning_rate": 0.00014088960065734136, "loss": 1.4282, "step": 7092 }, { "epoch": 0.76, "grad_norm": 0.08600593545883693, "learning_rate": 0.00014076847589369223, "loss": 1.4818, "step": 7093 }, { "epoch": 0.76, "grad_norm": 0.0742503862653404, "learning_rate": 0.00014064739468810388, "loss": 1.4268, "step": 7094 }, { "epoch": 0.76, "grad_norm": 0.08703341486513806, "learning_rate": 0.00014052635705525814, "loss": 1.3743, "step": 7095 }, { "epoch": 0.76, "grad_norm": 0.0916770967788742, "learning_rate": 0.00014040536300983052, "loss": 1.4698, "step": 7096 }, { "epoch": 0.76, "grad_norm": 0.0784670687340215, "learning_rate": 0.00014028441256649238, "loss": 1.4073, "step": 7097 }, { "epoch": 0.76, "grad_norm": 0.08774542735944259, "learning_rate": 0.00014016350573990948, "loss": 1.4194, "step": 7098 }, { "epoch": 0.76, "grad_norm": 0.09906949969586148, "learning_rate": 0.0001400426425447419, "loss": 1.3304, "step": 7099 }, { "epoch": 0.76, "grad_norm": 0.09401233175045126, "learning_rate": 0.00013992182299564493, "loss": 1.4606, "step": 7100 }, { "epoch": 0.76, "grad_norm": 0.07694095237545967, "learning_rate": 0.00013980104710726844, "loss": 1.4595, "step": 7101 }, { "epoch": 0.76, "grad_norm": 0.1054928297352277, "learning_rate": 0.00013968031489425658, "loss": 1.3676, "step": 7102 }, { "epoch": 0.76, "grad_norm": 0.08347109928024152, "learning_rate": 0.0001395596263712488, "loss": 1.3819, "step": 7103 }, { "epoch": 0.76, "grad_norm": 0.07712119034782984, "learning_rate": 0.00013943898155287904, "loss": 1.3646, "step": 7104 }, { "epoch": 0.76, "grad_norm": 0.08194542905625728, "learning_rate": 0.00013931838045377586, "loss": 1.4872, "step": 7105 }, { "epoch": 0.76, "grad_norm": 0.08432223869123434, "learning_rate": 0.00013919782308856232, "loss": 1.4337, "step": 7106 }, { "epoch": 0.76, "grad_norm": 0.09573328871170617, "learning_rate": 0.00013907730947185665, "loss": 1.4155, "step": 7107 }, { "epoch": 0.76, "grad_norm": 0.08869671452675124, "learning_rate": 0.00013895683961827167, "loss": 1.3511, "step": 7108 }, { "epoch": 0.76, "grad_norm": 0.07279819542175914, "learning_rate": 0.00013883641354241438, "loss": 1.4694, "step": 7109 }, { "epoch": 0.76, "grad_norm": 0.07300922537855549, "learning_rate": 0.00013871603125888704, "loss": 1.3933, "step": 7110 }, { "epoch": 0.76, "grad_norm": 0.0893723766297854, "learning_rate": 0.00013859569278228668, "loss": 1.2866, "step": 7111 }, { "epoch": 0.76, "grad_norm": 0.07385807095451379, "learning_rate": 0.00013847539812720435, "loss": 1.3117, "step": 7112 }, { "epoch": 0.76, "grad_norm": 0.08154523440885406, "learning_rate": 0.00013835514730822646, "loss": 1.4402, "step": 7113 }, { "epoch": 0.76, "grad_norm": 0.07631693608060588, "learning_rate": 0.00013823494033993362, "loss": 1.2876, "step": 7114 }, { "epoch": 0.76, "grad_norm": 0.07501813680429896, "learning_rate": 0.00013811477723690147, "loss": 1.5077, "step": 7115 }, { "epoch": 0.76, "grad_norm": 0.09582028831990151, "learning_rate": 0.0001379946580137003, "loss": 1.1807, "step": 7116 }, { "epoch": 0.77, "grad_norm": 0.08195412649055485, "learning_rate": 0.0001378745826848949, "loss": 1.3206, "step": 7117 }, { "epoch": 0.77, "grad_norm": 0.08577541487377897, "learning_rate": 0.00013775455126504465, "loss": 1.4594, "step": 7118 }, { "epoch": 0.77, "grad_norm": 0.08293162734543053, "learning_rate": 0.00013763456376870387, "loss": 1.3694, "step": 7119 }, { "epoch": 0.77, "grad_norm": 0.08782077768957836, "learning_rate": 0.00013751462021042166, "loss": 1.5043, "step": 7120 }, { "epoch": 0.77, "grad_norm": 0.07999103741806815, "learning_rate": 0.0001373947206047413, "loss": 1.292, "step": 7121 }, { "epoch": 0.77, "grad_norm": 0.08470840165153588, "learning_rate": 0.00013727486496620112, "loss": 1.376, "step": 7122 }, { "epoch": 0.77, "grad_norm": 0.08698455156845768, "learning_rate": 0.00013715505330933427, "loss": 1.4579, "step": 7123 }, { "epoch": 0.77, "grad_norm": 0.08819929465794887, "learning_rate": 0.00013703528564866792, "loss": 1.3595, "step": 7124 }, { "epoch": 0.77, "grad_norm": 0.0860304979498625, "learning_rate": 0.0001369155619987245, "loss": 1.3969, "step": 7125 }, { "epoch": 0.77, "grad_norm": 0.08037149086224887, "learning_rate": 0.0001367958823740213, "loss": 1.5538, "step": 7126 }, { "epoch": 0.77, "grad_norm": 0.07933503079166238, "learning_rate": 0.0001366762467890692, "loss": 1.3866, "step": 7127 }, { "epoch": 0.77, "grad_norm": 0.0927014586146838, "learning_rate": 0.00013655665525837474, "loss": 1.2759, "step": 7128 }, { "epoch": 0.77, "grad_norm": 0.08431397953477916, "learning_rate": 0.00013643710779643892, "loss": 1.4357, "step": 7129 }, { "epoch": 0.77, "grad_norm": 0.09579874268870016, "learning_rate": 0.00013631760441775703, "loss": 1.4489, "step": 7130 }, { "epoch": 0.77, "grad_norm": 0.08347123456391652, "learning_rate": 0.00013619814513681945, "loss": 1.3502, "step": 7131 }, { "epoch": 0.77, "grad_norm": 0.08037094478894655, "learning_rate": 0.0001360787299681111, "loss": 1.4554, "step": 7132 }, { "epoch": 0.77, "grad_norm": 0.08713009992952041, "learning_rate": 0.00013595935892611122, "loss": 1.3501, "step": 7133 }, { "epoch": 0.77, "grad_norm": 0.079963043140441, "learning_rate": 0.00013584003202529415, "loss": 1.3216, "step": 7134 }, { "epoch": 0.77, "grad_norm": 0.08654644726903905, "learning_rate": 0.00013572074928012878, "loss": 1.3006, "step": 7135 }, { "epoch": 0.77, "grad_norm": 0.08684088272894674, "learning_rate": 0.00013560151070507825, "loss": 1.4253, "step": 7136 }, { "epoch": 0.77, "grad_norm": 0.08305476767947234, "learning_rate": 0.00013548231631460095, "loss": 1.3423, "step": 7137 }, { "epoch": 0.77, "grad_norm": 0.07840906222655904, "learning_rate": 0.00013536316612314936, "loss": 1.316, "step": 7138 }, { "epoch": 0.77, "grad_norm": 0.08868368034508109, "learning_rate": 0.00013524406014517115, "loss": 1.4035, "step": 7139 }, { "epoch": 0.77, "grad_norm": 0.0825493183209432, "learning_rate": 0.00013512499839510794, "loss": 1.4327, "step": 7140 }, { "epoch": 0.77, "grad_norm": 0.07142852180236557, "learning_rate": 0.00013500598088739664, "loss": 1.2882, "step": 7141 }, { "epoch": 0.77, "grad_norm": 0.07839573055539671, "learning_rate": 0.00013488700763646862, "loss": 1.3449, "step": 7142 }, { "epoch": 0.77, "grad_norm": 0.0762490819632461, "learning_rate": 0.0001347680786567495, "loss": 1.446, "step": 7143 }, { "epoch": 0.77, "grad_norm": 0.08253038492998804, "learning_rate": 0.00013464919396266017, "loss": 1.3892, "step": 7144 }, { "epoch": 0.77, "grad_norm": 0.08080981648970774, "learning_rate": 0.00013453035356861544, "loss": 1.4137, "step": 7145 }, { "epoch": 0.77, "grad_norm": 0.07895680489359269, "learning_rate": 0.00013441155748902535, "loss": 1.3648, "step": 7146 }, { "epoch": 0.77, "grad_norm": 0.09131009564719973, "learning_rate": 0.00013429280573829438, "loss": 1.3353, "step": 7147 }, { "epoch": 0.77, "grad_norm": 0.08148140496357519, "learning_rate": 0.00013417409833082155, "loss": 1.3986, "step": 7148 }, { "epoch": 0.77, "grad_norm": 0.09545610989391617, "learning_rate": 0.0001340554352810003, "loss": 1.5625, "step": 7149 }, { "epoch": 0.77, "grad_norm": 0.09783883668758428, "learning_rate": 0.00013393681660321915, "loss": 1.439, "step": 7150 }, { "epoch": 0.77, "grad_norm": 0.07799550589752935, "learning_rate": 0.00013381824231186113, "loss": 1.4207, "step": 7151 }, { "epoch": 0.77, "grad_norm": 0.0795862542261532, "learning_rate": 0.00013369971242130352, "loss": 1.404, "step": 7152 }, { "epoch": 0.77, "grad_norm": 0.08548654694614052, "learning_rate": 0.00013358122694591862, "loss": 1.3907, "step": 7153 }, { "epoch": 0.77, "grad_norm": 0.08134899653490377, "learning_rate": 0.00013346278590007334, "loss": 1.3212, "step": 7154 }, { "epoch": 0.77, "grad_norm": 0.08104358016743386, "learning_rate": 0.0001333443892981287, "loss": 1.4092, "step": 7155 }, { "epoch": 0.77, "grad_norm": 0.08339197096891865, "learning_rate": 0.00013322603715444097, "loss": 1.4359, "step": 7156 }, { "epoch": 0.77, "grad_norm": 0.07981284471191327, "learning_rate": 0.00013310772948336085, "loss": 1.512, "step": 7157 }, { "epoch": 0.77, "grad_norm": 0.07761084452308815, "learning_rate": 0.00013298946629923335, "loss": 1.3671, "step": 7158 }, { "epoch": 0.77, "grad_norm": 0.0854348960861969, "learning_rate": 0.00013287124761639823, "loss": 1.494, "step": 7159 }, { "epoch": 0.77, "grad_norm": 0.09161357129893245, "learning_rate": 0.0001327530734491902, "loss": 1.4476, "step": 7160 }, { "epoch": 0.77, "grad_norm": 0.08312868545947096, "learning_rate": 0.0001326349438119379, "loss": 1.2806, "step": 7161 }, { "epoch": 0.77, "grad_norm": 0.09452434467284239, "learning_rate": 0.0001325168587189652, "loss": 1.5183, "step": 7162 }, { "epoch": 0.77, "grad_norm": 0.08587057449972695, "learning_rate": 0.00013239881818459043, "loss": 1.4184, "step": 7163 }, { "epoch": 0.77, "grad_norm": 0.08793194779558704, "learning_rate": 0.0001322808222231261, "loss": 1.386, "step": 7164 }, { "epoch": 0.77, "grad_norm": 0.08722167548904788, "learning_rate": 0.00013216287084887984, "loss": 1.3514, "step": 7165 }, { "epoch": 0.77, "grad_norm": 0.08853779388704727, "learning_rate": 0.00013204496407615373, "loss": 1.4281, "step": 7166 }, { "epoch": 0.77, "grad_norm": 0.09532120705919452, "learning_rate": 0.00013192710191924412, "loss": 1.4111, "step": 7167 }, { "epoch": 0.77, "grad_norm": 0.09657694916039453, "learning_rate": 0.00013180928439244233, "loss": 1.3998, "step": 7168 }, { "epoch": 0.77, "grad_norm": 0.07677970663766127, "learning_rate": 0.00013169151151003433, "loss": 1.2778, "step": 7169 }, { "epoch": 0.77, "grad_norm": 0.09092227790364336, "learning_rate": 0.00013157378328630025, "loss": 1.4506, "step": 7170 }, { "epoch": 0.77, "grad_norm": 0.09163414959090961, "learning_rate": 0.00013145609973551502, "loss": 1.4888, "step": 7171 }, { "epoch": 0.77, "grad_norm": 0.089725966578204, "learning_rate": 0.00013133846087194824, "loss": 1.4996, "step": 7172 }, { "epoch": 0.77, "grad_norm": 0.08845121892261157, "learning_rate": 0.0001312208667098642, "loss": 1.4485, "step": 7173 }, { "epoch": 0.77, "grad_norm": 0.08463233673164806, "learning_rate": 0.00013110331726352132, "loss": 1.4001, "step": 7174 }, { "epoch": 0.77, "grad_norm": 0.08616388370330381, "learning_rate": 0.00013098581254717312, "loss": 1.4804, "step": 7175 }, { "epoch": 0.77, "grad_norm": 0.08580067427689526, "learning_rate": 0.00013086835257506717, "loss": 1.4569, "step": 7176 }, { "epoch": 0.77, "grad_norm": 0.08164815485831177, "learning_rate": 0.00013075093736144612, "loss": 1.4586, "step": 7177 }, { "epoch": 0.77, "grad_norm": 0.09324023988053676, "learning_rate": 0.000130633566920547, "loss": 1.4063, "step": 7178 }, { "epoch": 0.77, "grad_norm": 0.08436554667047103, "learning_rate": 0.00013051624126660132, "loss": 1.4858, "step": 7179 }, { "epoch": 0.77, "grad_norm": 0.08268273314661022, "learning_rate": 0.00013039896041383504, "loss": 1.4565, "step": 7180 }, { "epoch": 0.77, "grad_norm": 0.08402105295671775, "learning_rate": 0.000130281724376469, "loss": 1.4243, "step": 7181 }, { "epoch": 0.77, "grad_norm": 0.0714282284349483, "learning_rate": 0.00013016453316871867, "loss": 1.3911, "step": 7182 }, { "epoch": 0.77, "grad_norm": 0.08425509199398158, "learning_rate": 0.00013004738680479354, "loss": 1.3734, "step": 7183 }, { "epoch": 0.77, "grad_norm": 0.09272718405902941, "learning_rate": 0.00012993028529889816, "loss": 1.4259, "step": 7184 }, { "epoch": 0.77, "grad_norm": 0.0790776375510767, "learning_rate": 0.00012981322866523171, "loss": 1.3094, "step": 7185 }, { "epoch": 0.77, "grad_norm": 0.07733944744848811, "learning_rate": 0.00012969621691798734, "loss": 1.3611, "step": 7186 }, { "epoch": 0.77, "grad_norm": 0.07745412806827585, "learning_rate": 0.0001295792500713533, "loss": 1.2361, "step": 7187 }, { "epoch": 0.77, "grad_norm": 0.08395765539333197, "learning_rate": 0.00012946232813951236, "loss": 1.4544, "step": 7188 }, { "epoch": 0.77, "grad_norm": 0.08265912082983201, "learning_rate": 0.00012934545113664142, "loss": 1.4492, "step": 7189 }, { "epoch": 0.77, "grad_norm": 0.08664702529416787, "learning_rate": 0.00012922861907691257, "loss": 1.3912, "step": 7190 }, { "epoch": 0.77, "grad_norm": 0.08828986408607989, "learning_rate": 0.00012911183197449183, "loss": 1.3833, "step": 7191 }, { "epoch": 0.77, "grad_norm": 0.0783447810072099, "learning_rate": 0.00012899508984354002, "loss": 1.5175, "step": 7192 }, { "epoch": 0.77, "grad_norm": 0.09375213580877549, "learning_rate": 0.00012887839269821262, "loss": 1.3619, "step": 7193 }, { "epoch": 0.77, "grad_norm": 0.09220600644539853, "learning_rate": 0.00012876174055265966, "loss": 1.4331, "step": 7194 }, { "epoch": 0.77, "grad_norm": 0.07795700921437147, "learning_rate": 0.00012864513342102535, "loss": 1.4297, "step": 7195 }, { "epoch": 0.77, "grad_norm": 0.08299473491200346, "learning_rate": 0.00012852857131744888, "loss": 1.469, "step": 7196 }, { "epoch": 0.77, "grad_norm": 0.09896839999352933, "learning_rate": 0.00012841205425606395, "loss": 1.3864, "step": 7197 }, { "epoch": 0.77, "grad_norm": 0.0817748274050442, "learning_rate": 0.00012829558225099834, "loss": 1.4143, "step": 7198 }, { "epoch": 0.77, "grad_norm": 0.12473542368934352, "learning_rate": 0.0001281791553163749, "loss": 1.3604, "step": 7199 }, { "epoch": 0.77, "grad_norm": 0.09029268877199315, "learning_rate": 0.00012806277346631085, "loss": 1.4519, "step": 7200 }, { "epoch": 0.77, "grad_norm": 0.09018864980005668, "learning_rate": 0.0001279464367149178, "loss": 1.3292, "step": 7201 }, { "epoch": 0.77, "grad_norm": 0.07386791132315604, "learning_rate": 0.00012783014507630175, "loss": 1.5217, "step": 7202 }, { "epoch": 0.77, "grad_norm": 0.08689566481818276, "learning_rate": 0.00012771389856456371, "loss": 1.5659, "step": 7203 }, { "epoch": 0.77, "grad_norm": 0.07796769099863722, "learning_rate": 0.0001275976971937991, "loss": 1.3743, "step": 7204 }, { "epoch": 0.77, "grad_norm": 0.08347089844634027, "learning_rate": 0.00012748154097809745, "loss": 1.403, "step": 7205 }, { "epoch": 0.77, "grad_norm": 0.08194139361623523, "learning_rate": 0.00012736542993154316, "loss": 1.5831, "step": 7206 }, { "epoch": 0.77, "grad_norm": 0.08814386072544067, "learning_rate": 0.00012724936406821537, "loss": 1.4637, "step": 7207 }, { "epoch": 0.77, "grad_norm": 0.09305664303495668, "learning_rate": 0.000127133343402187, "loss": 1.3209, "step": 7208 }, { "epoch": 0.77, "grad_norm": 0.09063551416325358, "learning_rate": 0.00012701736794752644, "loss": 1.3946, "step": 7209 }, { "epoch": 0.78, "grad_norm": 0.09015549472437932, "learning_rate": 0.0001269014377182957, "loss": 1.3488, "step": 7210 }, { "epoch": 0.78, "grad_norm": 0.0804346799482095, "learning_rate": 0.00012678555272855208, "loss": 1.4834, "step": 7211 }, { "epoch": 0.78, "grad_norm": 0.07872396579005872, "learning_rate": 0.00012666971299234668, "loss": 1.3482, "step": 7212 }, { "epoch": 0.78, "grad_norm": 0.08578567070288019, "learning_rate": 0.00012655391852372584, "loss": 1.3363, "step": 7213 }, { "epoch": 0.78, "grad_norm": 0.08786717737868875, "learning_rate": 0.00012643816933672968, "loss": 1.3873, "step": 7214 }, { "epoch": 0.78, "grad_norm": 0.10304717691086623, "learning_rate": 0.00012632246544539333, "loss": 1.4498, "step": 7215 }, { "epoch": 0.78, "grad_norm": 0.08021184395395602, "learning_rate": 0.00012620680686374646, "loss": 1.3443, "step": 7216 }, { "epoch": 0.78, "grad_norm": 0.10865624373995214, "learning_rate": 0.00012609119360581277, "loss": 1.4498, "step": 7217 }, { "epoch": 0.78, "grad_norm": 0.07778178613046131, "learning_rate": 0.00012597562568561095, "loss": 1.5317, "step": 7218 }, { "epoch": 0.78, "grad_norm": 0.07738530338405038, "learning_rate": 0.00012586010311715408, "loss": 1.4558, "step": 7219 }, { "epoch": 0.78, "grad_norm": 0.08931510175039695, "learning_rate": 0.0001257446259144494, "loss": 1.503, "step": 7220 }, { "epoch": 0.78, "grad_norm": 0.09255034325625278, "learning_rate": 0.00012562919409149915, "loss": 1.3232, "step": 7221 }, { "epoch": 0.78, "grad_norm": 0.079332226562438, "learning_rate": 0.00012551380766230003, "loss": 1.4021, "step": 7222 }, { "epoch": 0.78, "grad_norm": 0.08365566359520343, "learning_rate": 0.00012539846664084248, "loss": 1.4243, "step": 7223 }, { "epoch": 0.78, "grad_norm": 0.09729578487033033, "learning_rate": 0.00012528317104111225, "loss": 1.3776, "step": 7224 }, { "epoch": 0.78, "grad_norm": 0.08689847549229128, "learning_rate": 0.00012516792087708962, "loss": 1.4569, "step": 7225 }, { "epoch": 0.78, "grad_norm": 0.08986204287640165, "learning_rate": 0.0001250527161627486, "loss": 1.3315, "step": 7226 }, { "epoch": 0.78, "grad_norm": 0.08902530175684555, "learning_rate": 0.00012493755691205845, "loss": 1.3933, "step": 7227 }, { "epoch": 0.78, "grad_norm": 0.07783856035461831, "learning_rate": 0.00012482244313898267, "loss": 1.3352, "step": 7228 }, { "epoch": 0.78, "grad_norm": 0.07579194127448878, "learning_rate": 0.0001247073748574789, "loss": 1.3036, "step": 7229 }, { "epoch": 0.78, "grad_norm": 0.07552103458438016, "learning_rate": 0.00012459235208149984, "loss": 1.4446, "step": 7230 }, { "epoch": 0.78, "grad_norm": 0.07717100398872971, "learning_rate": 0.00012447737482499245, "loss": 1.4675, "step": 7231 }, { "epoch": 0.78, "grad_norm": 0.08179823386986428, "learning_rate": 0.000124362443101898, "loss": 1.2272, "step": 7232 }, { "epoch": 0.78, "grad_norm": 0.08450782049483559, "learning_rate": 0.00012424755692615213, "loss": 1.4358, "step": 7233 }, { "epoch": 0.78, "grad_norm": 0.08644532397598073, "learning_rate": 0.00012413271631168545, "loss": 1.4029, "step": 7234 }, { "epoch": 0.78, "grad_norm": 0.07850539908313922, "learning_rate": 0.00012401792127242285, "loss": 1.2961, "step": 7235 }, { "epoch": 0.78, "grad_norm": 0.0896043126589486, "learning_rate": 0.00012390317182228334, "loss": 1.405, "step": 7236 }, { "epoch": 0.78, "grad_norm": 0.07587555720819515, "learning_rate": 0.0001237884679751809, "loss": 1.4865, "step": 7237 }, { "epoch": 0.78, "grad_norm": 0.08624531471226798, "learning_rate": 0.00012367380974502383, "loss": 1.4715, "step": 7238 }, { "epoch": 0.78, "grad_norm": 0.08904974913087878, "learning_rate": 0.00012355919714571458, "loss": 1.5391, "step": 7239 }, { "epoch": 0.78, "grad_norm": 0.09677429940240297, "learning_rate": 0.0001234446301911506, "loss": 1.3389, "step": 7240 }, { "epoch": 0.78, "grad_norm": 0.09073539193834754, "learning_rate": 0.00012333010889522327, "loss": 1.4164, "step": 7241 }, { "epoch": 0.78, "grad_norm": 0.08293798819734584, "learning_rate": 0.00012321563327181883, "loss": 1.3885, "step": 7242 }, { "epoch": 0.78, "grad_norm": 0.07925689642566201, "learning_rate": 0.00012310120333481795, "loss": 1.3638, "step": 7243 }, { "epoch": 0.78, "grad_norm": 0.0848046515098787, "learning_rate": 0.0001229868190980955, "loss": 1.3759, "step": 7244 }, { "epoch": 0.78, "grad_norm": 0.09296734522311166, "learning_rate": 0.00012287248057552092, "loss": 1.3025, "step": 7245 }, { "epoch": 0.78, "grad_norm": 0.08184586378841201, "learning_rate": 0.0001227581877809582, "loss": 1.4481, "step": 7246 }, { "epoch": 0.78, "grad_norm": 0.07882572428683679, "learning_rate": 0.0001226439407282659, "loss": 1.3656, "step": 7247 }, { "epoch": 0.78, "grad_norm": 0.09208007004167677, "learning_rate": 0.0001225297394312966, "loss": 1.4169, "step": 7248 }, { "epoch": 0.78, "grad_norm": 0.08227884585316046, "learning_rate": 0.00012241558390389769, "loss": 1.4895, "step": 7249 }, { "epoch": 0.78, "grad_norm": 0.0897848075866568, "learning_rate": 0.00012230147415991116, "loss": 1.4633, "step": 7250 }, { "epoch": 0.78, "grad_norm": 0.07979829089445617, "learning_rate": 0.0001221874102131728, "loss": 1.39, "step": 7251 }, { "epoch": 0.78, "grad_norm": 0.08325287183551135, "learning_rate": 0.00012207339207751354, "loss": 1.3288, "step": 7252 }, { "epoch": 0.78, "grad_norm": 0.08921350734918522, "learning_rate": 0.00012195941976675867, "loss": 1.3689, "step": 7253 }, { "epoch": 0.78, "grad_norm": 0.09011140635333832, "learning_rate": 0.00012184549329472717, "loss": 1.4063, "step": 7254 }, { "epoch": 0.78, "grad_norm": 0.08625806008476061, "learning_rate": 0.0001217316126752333, "loss": 1.445, "step": 7255 }, { "epoch": 0.78, "grad_norm": 0.07562931560165986, "learning_rate": 0.00012161777792208562, "loss": 1.5065, "step": 7256 }, { "epoch": 0.78, "grad_norm": 0.08207305626274926, "learning_rate": 0.00012150398904908672, "loss": 1.2579, "step": 7257 }, { "epoch": 0.78, "grad_norm": 0.08553307505785233, "learning_rate": 0.00012139024607003402, "loss": 1.3923, "step": 7258 }, { "epoch": 0.78, "grad_norm": 0.07733318986471323, "learning_rate": 0.00012127654899871936, "loss": 1.439, "step": 7259 }, { "epoch": 0.78, "grad_norm": 0.09377725522902389, "learning_rate": 0.0001211628978489287, "loss": 1.348, "step": 7260 }, { "epoch": 0.78, "grad_norm": 0.09011338811288713, "learning_rate": 0.00012104929263444269, "loss": 1.4032, "step": 7261 }, { "epoch": 0.78, "grad_norm": 0.09418809745410185, "learning_rate": 0.00012093573336903651, "loss": 1.3855, "step": 7262 }, { "epoch": 0.78, "grad_norm": 0.09075476347236389, "learning_rate": 0.00012082222006647942, "loss": 1.4853, "step": 7263 }, { "epoch": 0.78, "grad_norm": 0.08586052880258192, "learning_rate": 0.0001207087527405355, "loss": 1.4392, "step": 7264 }, { "epoch": 0.78, "grad_norm": 0.09258361143240627, "learning_rate": 0.00012059533140496276, "loss": 1.4554, "step": 7265 }, { "epoch": 0.78, "grad_norm": 0.09197353044257664, "learning_rate": 0.0001204819560735142, "loss": 1.4223, "step": 7266 }, { "epoch": 0.78, "grad_norm": 0.08122398178814307, "learning_rate": 0.00012036862675993677, "loss": 1.4581, "step": 7267 }, { "epoch": 0.78, "grad_norm": 0.08569140445620367, "learning_rate": 0.00012025534347797212, "loss": 1.557, "step": 7268 }, { "epoch": 0.78, "grad_norm": 0.08442336639975227, "learning_rate": 0.00012014210624135641, "loss": 1.3068, "step": 7269 }, { "epoch": 0.78, "grad_norm": 0.09359124396229755, "learning_rate": 0.00012002891506381974, "loss": 1.4421, "step": 7270 }, { "epoch": 0.78, "grad_norm": 0.09664875074993715, "learning_rate": 0.0001199157699590872, "loss": 1.51, "step": 7271 }, { "epoch": 0.78, "grad_norm": 0.08256153160798978, "learning_rate": 0.00011980267094087777, "loss": 1.4581, "step": 7272 }, { "epoch": 0.78, "grad_norm": 0.08356266546750146, "learning_rate": 0.00011968961802290523, "loss": 1.3983, "step": 7273 }, { "epoch": 0.78, "grad_norm": 0.07715962500880093, "learning_rate": 0.0001195766112188778, "loss": 1.3396, "step": 7274 }, { "epoch": 0.78, "grad_norm": 0.07775746783057781, "learning_rate": 0.00011946365054249775, "loss": 1.41, "step": 7275 }, { "epoch": 0.78, "grad_norm": 0.08050408481614375, "learning_rate": 0.00011935073600746182, "loss": 1.4984, "step": 7276 }, { "epoch": 0.78, "grad_norm": 0.08051706956456427, "learning_rate": 0.00011923786762746148, "loss": 1.3049, "step": 7277 }, { "epoch": 0.78, "grad_norm": 0.09187755184619545, "learning_rate": 0.00011912504541618252, "loss": 1.3603, "step": 7278 }, { "epoch": 0.78, "grad_norm": 0.0945150395456591, "learning_rate": 0.00011901226938730469, "loss": 1.5084, "step": 7279 }, { "epoch": 0.78, "grad_norm": 0.07896225931399797, "learning_rate": 0.00011889953955450272, "loss": 1.5249, "step": 7280 }, { "epoch": 0.78, "grad_norm": 0.07849848551878082, "learning_rate": 0.00011878685593144556, "loss": 1.4698, "step": 7281 }, { "epoch": 0.78, "grad_norm": 0.08101879037358578, "learning_rate": 0.00011867421853179622, "loss": 1.3817, "step": 7282 }, { "epoch": 0.78, "grad_norm": 0.10727296608575486, "learning_rate": 0.0001185616273692125, "loss": 1.2219, "step": 7283 }, { "epoch": 0.78, "grad_norm": 0.08454663697420552, "learning_rate": 0.00011844908245734659, "loss": 1.396, "step": 7284 }, { "epoch": 0.78, "grad_norm": 0.09072240043086682, "learning_rate": 0.00011833658380984491, "loss": 1.2478, "step": 7285 }, { "epoch": 0.78, "grad_norm": 0.08550914434220919, "learning_rate": 0.00011822413144034805, "loss": 1.4036, "step": 7286 }, { "epoch": 0.78, "grad_norm": 0.08168062993549902, "learning_rate": 0.0001181117253624916, "loss": 1.3707, "step": 7287 }, { "epoch": 0.78, "grad_norm": 0.08925027465919966, "learning_rate": 0.00011799936558990482, "loss": 1.3877, "step": 7288 }, { "epoch": 0.78, "grad_norm": 0.09254528526528016, "learning_rate": 0.00011788705213621198, "loss": 1.4509, "step": 7289 }, { "epoch": 0.78, "grad_norm": 0.11415106012704615, "learning_rate": 0.00011777478501503152, "loss": 1.4539, "step": 7290 }, { "epoch": 0.78, "grad_norm": 0.0838403721688183, "learning_rate": 0.00011766256423997602, "loss": 1.2205, "step": 7291 }, { "epoch": 0.78, "grad_norm": 0.07991379938968937, "learning_rate": 0.00011755038982465266, "loss": 1.4549, "step": 7292 }, { "epoch": 0.78, "grad_norm": 0.08065051687823484, "learning_rate": 0.0001174382617826632, "loss": 1.3445, "step": 7293 }, { "epoch": 0.78, "grad_norm": 0.08678497683129696, "learning_rate": 0.00011732618012760327, "loss": 1.492, "step": 7294 }, { "epoch": 0.78, "grad_norm": 0.08293945639486879, "learning_rate": 0.00011721414487306326, "loss": 1.3688, "step": 7295 }, { "epoch": 0.78, "grad_norm": 0.08227444119004665, "learning_rate": 0.00011710215603262797, "loss": 1.3773, "step": 7296 }, { "epoch": 0.78, "grad_norm": 0.08266583007232876, "learning_rate": 0.00011699021361987633, "loss": 1.3837, "step": 7297 }, { "epoch": 0.78, "grad_norm": 0.09906936808598413, "learning_rate": 0.00011687831764838158, "loss": 1.429, "step": 7298 }, { "epoch": 0.78, "grad_norm": 0.08195428558569683, "learning_rate": 0.00011676646813171166, "loss": 1.4555, "step": 7299 }, { "epoch": 0.78, "grad_norm": 0.08268397919617312, "learning_rate": 0.00011665466508342876, "loss": 1.358, "step": 7300 }, { "epoch": 0.78, "grad_norm": 0.08430853926059426, "learning_rate": 0.0001165429085170892, "loss": 1.3842, "step": 7301 }, { "epoch": 0.78, "grad_norm": 0.09381193980846146, "learning_rate": 0.000116431198446244, "loss": 1.365, "step": 7302 }, { "epoch": 0.79, "grad_norm": 0.08280165821756859, "learning_rate": 0.00011631953488443847, "loss": 1.3367, "step": 7303 }, { "epoch": 0.79, "grad_norm": 0.09674682116615263, "learning_rate": 0.00011620791784521195, "loss": 1.5064, "step": 7304 }, { "epoch": 0.79, "grad_norm": 0.0895578939531404, "learning_rate": 0.00011609634734209867, "loss": 1.3573, "step": 7305 }, { "epoch": 0.79, "grad_norm": 0.09195433336912709, "learning_rate": 0.00011598482338862676, "loss": 1.316, "step": 7306 }, { "epoch": 0.79, "grad_norm": 0.0850041748062091, "learning_rate": 0.00011587334599831877, "loss": 1.3207, "step": 7307 }, { "epoch": 0.79, "grad_norm": 0.09066164990814109, "learning_rate": 0.00011576191518469192, "loss": 1.4068, "step": 7308 }, { "epoch": 0.79, "grad_norm": 0.08737458422241964, "learning_rate": 0.0001156505309612576, "loss": 1.3028, "step": 7309 }, { "epoch": 0.79, "grad_norm": 0.0818355985921247, "learning_rate": 0.00011553919334152135, "loss": 1.4178, "step": 7310 }, { "epoch": 0.79, "grad_norm": 0.09412720167056654, "learning_rate": 0.00011542790233898331, "loss": 1.4519, "step": 7311 }, { "epoch": 0.79, "grad_norm": 0.07815617669426021, "learning_rate": 0.00011531665796713814, "loss": 1.4109, "step": 7312 }, { "epoch": 0.79, "grad_norm": 0.08461422716107501, "learning_rate": 0.0001152054602394742, "loss": 1.4821, "step": 7313 }, { "epoch": 0.79, "grad_norm": 0.09249266123132792, "learning_rate": 0.00011509430916947483, "loss": 1.4866, "step": 7314 }, { "epoch": 0.79, "grad_norm": 0.08558705248643976, "learning_rate": 0.00011498320477061758, "loss": 1.462, "step": 7315 }, { "epoch": 0.79, "grad_norm": 0.08300356854080107, "learning_rate": 0.00011487214705637395, "loss": 1.2952, "step": 7316 }, { "epoch": 0.79, "grad_norm": 0.08004935332127502, "learning_rate": 0.00011476113604021038, "loss": 1.3786, "step": 7317 }, { "epoch": 0.79, "grad_norm": 0.07554824940526937, "learning_rate": 0.00011465017173558717, "loss": 1.4424, "step": 7318 }, { "epoch": 0.79, "grad_norm": 0.08169386017284468, "learning_rate": 0.00011453925415595901, "loss": 1.4019, "step": 7319 }, { "epoch": 0.79, "grad_norm": 0.08875579848412374, "learning_rate": 0.00011442838331477513, "loss": 1.4455, "step": 7320 }, { "epoch": 0.79, "grad_norm": 0.08377514812003137, "learning_rate": 0.00011431755922547921, "loss": 1.3962, "step": 7321 }, { "epoch": 0.79, "grad_norm": 0.08782221330386418, "learning_rate": 0.00011420678190150879, "loss": 1.4341, "step": 7322 }, { "epoch": 0.79, "grad_norm": 0.07954298875210211, "learning_rate": 0.00011409605135629603, "loss": 1.5656, "step": 7323 }, { "epoch": 0.79, "grad_norm": 0.0961529478173863, "learning_rate": 0.00011398536760326761, "loss": 1.3828, "step": 7324 }, { "epoch": 0.79, "grad_norm": 0.0792818090967388, "learning_rate": 0.00011387473065584403, "loss": 1.3704, "step": 7325 }, { "epoch": 0.79, "grad_norm": 0.09990077427687467, "learning_rate": 0.00011376414052744055, "loss": 1.4352, "step": 7326 }, { "epoch": 0.79, "grad_norm": 0.09129573736611461, "learning_rate": 0.00011365359723146673, "loss": 1.3839, "step": 7327 }, { "epoch": 0.79, "grad_norm": 0.08876545291340723, "learning_rate": 0.00011354310078132618, "loss": 1.2871, "step": 7328 }, { "epoch": 0.79, "grad_norm": 0.0826376515378444, "learning_rate": 0.00011343265119041685, "loss": 1.4464, "step": 7329 }, { "epoch": 0.79, "grad_norm": 0.08897338013882695, "learning_rate": 0.00011332224847213124, "loss": 1.4832, "step": 7330 }, { "epoch": 0.79, "grad_norm": 0.08554350555655972, "learning_rate": 0.00011321189263985621, "loss": 1.3448, "step": 7331 }, { "epoch": 0.79, "grad_norm": 0.08963501695769718, "learning_rate": 0.00011310158370697254, "loss": 1.4981, "step": 7332 }, { "epoch": 0.79, "grad_norm": 0.08891139005332287, "learning_rate": 0.00011299132168685566, "loss": 1.5208, "step": 7333 }, { "epoch": 0.79, "grad_norm": 0.07985887227024256, "learning_rate": 0.00011288110659287543, "loss": 1.2565, "step": 7334 }, { "epoch": 0.79, "grad_norm": 0.08249018502823845, "learning_rate": 0.00011277093843839548, "loss": 1.3601, "step": 7335 }, { "epoch": 0.79, "grad_norm": 0.0882205268492248, "learning_rate": 0.00011266081723677434, "loss": 1.343, "step": 7336 }, { "epoch": 0.79, "grad_norm": 0.08568573371670497, "learning_rate": 0.00011255074300136437, "loss": 1.3917, "step": 7337 }, { "epoch": 0.79, "grad_norm": 0.07816830269456879, "learning_rate": 0.00011244071574551268, "loss": 1.3174, "step": 7338 }, { "epoch": 0.79, "grad_norm": 0.08957362122918865, "learning_rate": 0.00011233073548256018, "loss": 1.354, "step": 7339 }, { "epoch": 0.79, "grad_norm": 0.08878251511658643, "learning_rate": 0.00011222080222584263, "loss": 1.3677, "step": 7340 }, { "epoch": 0.79, "grad_norm": 0.08375678858047989, "learning_rate": 0.00011211091598868956, "loss": 1.4903, "step": 7341 }, { "epoch": 0.79, "grad_norm": 0.0887016822378597, "learning_rate": 0.00011200107678442517, "loss": 1.2804, "step": 7342 }, { "epoch": 0.79, "grad_norm": 0.0869215161095677, "learning_rate": 0.000111891284626368, "loss": 1.3929, "step": 7343 }, { "epoch": 0.79, "grad_norm": 0.08806012815888258, "learning_rate": 0.00011178153952783043, "loss": 1.3918, "step": 7344 }, { "epoch": 0.79, "grad_norm": 0.08770599045346396, "learning_rate": 0.00011167184150211962, "loss": 1.3323, "step": 7345 }, { "epoch": 0.79, "grad_norm": 0.08611854804897985, "learning_rate": 0.00011156219056253691, "loss": 1.3703, "step": 7346 }, { "epoch": 0.79, "grad_norm": 0.08968488237086665, "learning_rate": 0.00011145258672237762, "loss": 1.4883, "step": 7347 }, { "epoch": 0.79, "grad_norm": 0.09790986626447239, "learning_rate": 0.00011134302999493173, "loss": 1.3519, "step": 7348 }, { "epoch": 0.79, "grad_norm": 0.08092278640901887, "learning_rate": 0.0001112335203934836, "loss": 1.278, "step": 7349 }, { "epoch": 0.79, "grad_norm": 0.07777105411353137, "learning_rate": 0.00011112405793131114, "loss": 1.3237, "step": 7350 }, { "epoch": 0.79, "grad_norm": 0.09088764719653418, "learning_rate": 0.00011101464262168731, "loss": 1.4631, "step": 7351 }, { "epoch": 0.79, "grad_norm": 0.08000448230902753, "learning_rate": 0.00011090527447787924, "loss": 1.3473, "step": 7352 }, { "epoch": 0.79, "grad_norm": 0.08583186983779009, "learning_rate": 0.00011079595351314791, "loss": 1.4034, "step": 7353 }, { "epoch": 0.79, "grad_norm": 0.0960634586034175, "learning_rate": 0.00011068667974074903, "loss": 1.4432, "step": 7354 }, { "epoch": 0.79, "grad_norm": 0.08076190348815085, "learning_rate": 0.00011057745317393252, "loss": 1.334, "step": 7355 }, { "epoch": 0.79, "grad_norm": 0.09802013313693342, "learning_rate": 0.00011046827382594227, "loss": 1.3139, "step": 7356 }, { "epoch": 0.79, "grad_norm": 0.08870505251066106, "learning_rate": 0.00011035914171001665, "loss": 1.369, "step": 7357 }, { "epoch": 0.79, "grad_norm": 0.10760046353869862, "learning_rate": 0.0001102500568393886, "loss": 1.4748, "step": 7358 }, { "epoch": 0.79, "grad_norm": 0.08482909523650954, "learning_rate": 0.0001101410192272848, "loss": 1.309, "step": 7359 }, { "epoch": 0.79, "grad_norm": 0.08967836852723367, "learning_rate": 0.00011003202888692632, "loss": 1.3521, "step": 7360 }, { "epoch": 0.79, "grad_norm": 0.08971932596246342, "learning_rate": 0.00010992308583152877, "loss": 1.3055, "step": 7361 }, { "epoch": 0.79, "grad_norm": 0.08965578968467429, "learning_rate": 0.00010981419007430199, "loss": 1.496, "step": 7362 }, { "epoch": 0.79, "grad_norm": 0.08924314469159167, "learning_rate": 0.00010970534162844975, "loss": 1.495, "step": 7363 }, { "epoch": 0.79, "grad_norm": 0.08709222636542864, "learning_rate": 0.00010959654050717032, "loss": 1.2642, "step": 7364 }, { "epoch": 0.79, "grad_norm": 0.08930747249209584, "learning_rate": 0.00010948778672365644, "loss": 1.3918, "step": 7365 }, { "epoch": 0.79, "grad_norm": 0.08647523289589773, "learning_rate": 0.00010937908029109461, "loss": 1.4539, "step": 7366 }, { "epoch": 0.79, "grad_norm": 0.08336240679126437, "learning_rate": 0.00010927042122266611, "loss": 1.3636, "step": 7367 }, { "epoch": 0.79, "grad_norm": 0.07905964491305716, "learning_rate": 0.00010916180953154592, "loss": 1.3862, "step": 7368 }, { "epoch": 0.79, "grad_norm": 0.09007504611985878, "learning_rate": 0.00010905324523090377, "loss": 1.4469, "step": 7369 }, { "epoch": 0.79, "grad_norm": 0.08221245517814316, "learning_rate": 0.00010894472833390357, "loss": 1.4572, "step": 7370 }, { "epoch": 0.79, "grad_norm": 0.08384087412285039, "learning_rate": 0.00010883625885370319, "loss": 1.5368, "step": 7371 }, { "epoch": 0.79, "grad_norm": 0.09317832358790074, "learning_rate": 0.00010872783680345489, "loss": 1.4228, "step": 7372 }, { "epoch": 0.79, "grad_norm": 0.10753007879502102, "learning_rate": 0.00010861946219630525, "loss": 1.3968, "step": 7373 }, { "epoch": 0.79, "grad_norm": 0.09197775161563887, "learning_rate": 0.00010851113504539528, "loss": 1.3543, "step": 7374 }, { "epoch": 0.79, "grad_norm": 0.08071650914079001, "learning_rate": 0.00010840285536385968, "loss": 1.421, "step": 7375 }, { "epoch": 0.79, "grad_norm": 0.09167372528944248, "learning_rate": 0.00010829462316482797, "loss": 1.5266, "step": 7376 }, { "epoch": 0.79, "grad_norm": 0.09098539935181792, "learning_rate": 0.00010818643846142373, "loss": 1.3709, "step": 7377 }, { "epoch": 0.79, "grad_norm": 0.08429483207935262, "learning_rate": 0.00010807830126676443, "loss": 1.3496, "step": 7378 }, { "epoch": 0.79, "grad_norm": 0.08474915241120738, "learning_rate": 0.0001079702115939623, "loss": 1.3521, "step": 7379 }, { "epoch": 0.79, "grad_norm": 0.08776848704159057, "learning_rate": 0.0001078621694561237, "loss": 1.3723, "step": 7380 }, { "epoch": 0.79, "grad_norm": 0.08454882765125812, "learning_rate": 0.00010775417486634893, "loss": 1.4085, "step": 7381 }, { "epoch": 0.79, "grad_norm": 0.08768259170723725, "learning_rate": 0.00010764622783773259, "loss": 1.5293, "step": 7382 }, { "epoch": 0.79, "grad_norm": 0.08057887240979167, "learning_rate": 0.00010753832838336397, "loss": 1.4617, "step": 7383 }, { "epoch": 0.79, "grad_norm": 0.08991989764932008, "learning_rate": 0.00010743047651632588, "loss": 1.359, "step": 7384 }, { "epoch": 0.79, "grad_norm": 0.08712173949360097, "learning_rate": 0.00010732267224969588, "loss": 1.3348, "step": 7385 }, { "epoch": 0.79, "grad_norm": 0.09148073926024225, "learning_rate": 0.00010721491559654579, "loss": 1.3145, "step": 7386 }, { "epoch": 0.79, "grad_norm": 0.09115660354014754, "learning_rate": 0.00010710720656994117, "loss": 1.3405, "step": 7387 }, { "epoch": 0.79, "grad_norm": 0.09285950683691585, "learning_rate": 0.00010699954518294224, "loss": 1.351, "step": 7388 }, { "epoch": 0.79, "grad_norm": 0.08496458358175875, "learning_rate": 0.00010689193144860354, "loss": 1.3887, "step": 7389 }, { "epoch": 0.79, "grad_norm": 0.08814196543992106, "learning_rate": 0.00010678436537997321, "loss": 1.3929, "step": 7390 }, { "epoch": 0.79, "grad_norm": 0.08392941262208066, "learning_rate": 0.00010667684699009439, "loss": 1.4186, "step": 7391 }, { "epoch": 0.79, "grad_norm": 0.0825169692685744, "learning_rate": 0.00010656937629200369, "loss": 1.5325, "step": 7392 }, { "epoch": 0.79, "grad_norm": 0.0969018358700212, "learning_rate": 0.00010646195329873259, "loss": 1.5803, "step": 7393 }, { "epoch": 0.79, "grad_norm": 0.08785792908215653, "learning_rate": 0.00010635457802330628, "loss": 1.3293, "step": 7394 }, { "epoch": 0.79, "grad_norm": 0.08970546446133174, "learning_rate": 0.0001062472504787445, "loss": 1.4592, "step": 7395 }, { "epoch": 0.8, "grad_norm": 0.09689624451528016, "learning_rate": 0.0001061399706780612, "loss": 1.3814, "step": 7396 }, { "epoch": 0.8, "grad_norm": 0.08661583703872869, "learning_rate": 0.00010603273863426411, "loss": 1.4196, "step": 7397 }, { "epoch": 0.8, "grad_norm": 0.08672305211358776, "learning_rate": 0.00010592555436035573, "loss": 1.4574, "step": 7398 }, { "epoch": 0.8, "grad_norm": 0.07671987734255735, "learning_rate": 0.00010581841786933261, "loss": 1.471, "step": 7399 }, { "epoch": 0.8, "grad_norm": 0.07563795205528116, "learning_rate": 0.00010571132917418508, "loss": 1.4617, "step": 7400 }, { "epoch": 0.8, "grad_norm": 0.09722906137813156, "learning_rate": 0.00010560428828789837, "loss": 1.2705, "step": 7401 }, { "epoch": 0.8, "grad_norm": 0.09142567018690823, "learning_rate": 0.00010549729522345142, "loss": 1.4441, "step": 7402 }, { "epoch": 0.8, "grad_norm": 0.09043229441252876, "learning_rate": 0.0001053903499938173, "loss": 1.4887, "step": 7403 }, { "epoch": 0.8, "grad_norm": 0.08808126569357126, "learning_rate": 0.0001052834526119637, "loss": 1.5165, "step": 7404 }, { "epoch": 0.8, "grad_norm": 0.10853347121607891, "learning_rate": 0.00010517660309085242, "loss": 1.3782, "step": 7405 }, { "epoch": 0.8, "grad_norm": 0.08195414140573754, "learning_rate": 0.00010506980144343898, "loss": 1.3016, "step": 7406 }, { "epoch": 0.8, "grad_norm": 0.09994610269025854, "learning_rate": 0.00010496304768267373, "loss": 1.3852, "step": 7407 }, { "epoch": 0.8, "grad_norm": 0.09477202194767692, "learning_rate": 0.00010485634182150089, "loss": 1.3728, "step": 7408 }, { "epoch": 0.8, "grad_norm": 0.08841337933183288, "learning_rate": 0.00010474968387285882, "loss": 1.3434, "step": 7409 }, { "epoch": 0.8, "grad_norm": 0.0959761214605188, "learning_rate": 0.00010464307384968019, "loss": 1.5121, "step": 7410 }, { "epoch": 0.8, "grad_norm": 0.08672942952818297, "learning_rate": 0.00010453651176489198, "loss": 1.4939, "step": 7411 }, { "epoch": 0.8, "grad_norm": 0.08689856792950482, "learning_rate": 0.00010442999763141509, "loss": 1.4176, "step": 7412 }, { "epoch": 0.8, "grad_norm": 0.08847913538326554, "learning_rate": 0.00010432353146216456, "loss": 1.3859, "step": 7413 }, { "epoch": 0.8, "grad_norm": 0.08940969838357604, "learning_rate": 0.00010421711327005013, "loss": 1.3339, "step": 7414 }, { "epoch": 0.8, "grad_norm": 0.08584386698958232, "learning_rate": 0.00010411074306797502, "loss": 1.2584, "step": 7415 }, { "epoch": 0.8, "grad_norm": 0.09041104984604122, "learning_rate": 0.00010400442086883715, "loss": 1.5023, "step": 7416 }, { "epoch": 0.8, "grad_norm": 0.08328881943694692, "learning_rate": 0.0001038981466855286, "loss": 1.2562, "step": 7417 }, { "epoch": 0.8, "grad_norm": 0.07626411694127093, "learning_rate": 0.00010379192053093522, "loss": 1.3377, "step": 7418 }, { "epoch": 0.8, "grad_norm": 0.08720457355500504, "learning_rate": 0.00010368574241793738, "loss": 1.364, "step": 7419 }, { "epoch": 0.8, "grad_norm": 0.07654734453604108, "learning_rate": 0.00010357961235940977, "loss": 1.3639, "step": 7420 }, { "epoch": 0.8, "grad_norm": 0.08657449519324938, "learning_rate": 0.0001034735303682206, "loss": 1.3308, "step": 7421 }, { "epoch": 0.8, "grad_norm": 0.08250237320381912, "learning_rate": 0.00010336749645723298, "loss": 1.4285, "step": 7422 }, { "epoch": 0.8, "grad_norm": 0.08605554787724745, "learning_rate": 0.00010326151063930395, "loss": 1.3979, "step": 7423 }, { "epoch": 0.8, "grad_norm": 0.09436166424350222, "learning_rate": 0.0001031555729272845, "loss": 1.4827, "step": 7424 }, { "epoch": 0.8, "grad_norm": 0.0873621789720717, "learning_rate": 0.00010304968333401982, "loss": 1.4897, "step": 7425 }, { "epoch": 0.8, "grad_norm": 0.08852806729006432, "learning_rate": 0.00010294384187234951, "loss": 1.4744, "step": 7426 }, { "epoch": 0.8, "grad_norm": 0.09802979552309042, "learning_rate": 0.00010283804855510742, "loss": 1.3337, "step": 7427 }, { "epoch": 0.8, "grad_norm": 0.08249683463009905, "learning_rate": 0.00010273230339512102, "loss": 1.3253, "step": 7428 }, { "epoch": 0.8, "grad_norm": 0.08737069761111654, "learning_rate": 0.00010262660640521242, "loss": 1.4135, "step": 7429 }, { "epoch": 0.8, "grad_norm": 0.07672002832446094, "learning_rate": 0.00010252095759819785, "loss": 1.3387, "step": 7430 }, { "epoch": 0.8, "grad_norm": 0.08146901689082577, "learning_rate": 0.00010241535698688743, "loss": 1.4587, "step": 7431 }, { "epoch": 0.8, "grad_norm": 0.08357617514349239, "learning_rate": 0.00010230980458408573, "loss": 1.3892, "step": 7432 }, { "epoch": 0.8, "grad_norm": 0.09688593678688902, "learning_rate": 0.00010220430040259116, "loss": 1.4337, "step": 7433 }, { "epoch": 0.8, "grad_norm": 0.0949961436992429, "learning_rate": 0.00010209884445519668, "loss": 1.479, "step": 7434 }, { "epoch": 0.8, "grad_norm": 0.09217958997274633, "learning_rate": 0.00010199343675468897, "loss": 1.4274, "step": 7435 }, { "epoch": 0.8, "grad_norm": 0.0927875344960079, "learning_rate": 0.0001018880773138493, "loss": 1.4548, "step": 7436 }, { "epoch": 0.8, "grad_norm": 0.10240453114593467, "learning_rate": 0.00010178276614545267, "loss": 1.3366, "step": 7437 }, { "epoch": 0.8, "grad_norm": 0.07930711688295726, "learning_rate": 0.00010167750326226848, "loss": 1.3864, "step": 7438 }, { "epoch": 0.8, "grad_norm": 0.08988242509021517, "learning_rate": 0.00010157228867706041, "loss": 1.3858, "step": 7439 }, { "epoch": 0.8, "grad_norm": 0.08546466142893103, "learning_rate": 0.00010146712240258577, "loss": 1.337, "step": 7440 }, { "epoch": 0.8, "grad_norm": 0.07476124167921687, "learning_rate": 0.00010136200445159649, "loss": 1.3619, "step": 7441 }, { "epoch": 0.8, "grad_norm": 0.08503840192574158, "learning_rate": 0.00010125693483683862, "loss": 1.5063, "step": 7442 }, { "epoch": 0.8, "grad_norm": 0.08121488722572119, "learning_rate": 0.00010115191357105192, "loss": 1.3646, "step": 7443 }, { "epoch": 0.8, "grad_norm": 0.08638910819398816, "learning_rate": 0.00010104694066697089, "loss": 1.4189, "step": 7444 }, { "epoch": 0.8, "grad_norm": 0.07556675074773149, "learning_rate": 0.00010094201613732373, "loss": 1.4163, "step": 7445 }, { "epoch": 0.8, "grad_norm": 0.09294527602966572, "learning_rate": 0.00010083713999483269, "loss": 1.3499, "step": 7446 }, { "epoch": 0.8, "grad_norm": 0.09451147367174063, "learning_rate": 0.00010073231225221457, "loss": 1.3585, "step": 7447 }, { "epoch": 0.8, "grad_norm": 0.09048973763769977, "learning_rate": 0.00010062753292218025, "loss": 1.4001, "step": 7448 }, { "epoch": 0.8, "grad_norm": 0.09350501774740308, "learning_rate": 0.00010052280201743425, "loss": 1.376, "step": 7449 }, { "epoch": 0.8, "grad_norm": 0.08746862295951763, "learning_rate": 0.0001004181195506757, "loss": 1.4711, "step": 7450 }, { "epoch": 0.8, "grad_norm": 0.08986402754420884, "learning_rate": 0.00010031348553459785, "loss": 1.5504, "step": 7451 }, { "epoch": 0.8, "grad_norm": 0.08757788063869364, "learning_rate": 0.00010020889998188771, "loss": 1.3495, "step": 7452 }, { "epoch": 0.8, "grad_norm": 0.0786814505136955, "learning_rate": 0.00010010436290522673, "loss": 1.3102, "step": 7453 }, { "epoch": 0.8, "grad_norm": 0.08204954091010624, "learning_rate": 9.999987431729051e-05, "loss": 1.6017, "step": 7454 }, { "epoch": 0.8, "grad_norm": 0.08699570930256927, "learning_rate": 9.989543423074854e-05, "loss": 1.4445, "step": 7455 }, { "epoch": 0.8, "grad_norm": 0.08730952432960949, "learning_rate": 9.979104265826438e-05, "loss": 1.409, "step": 7456 }, { "epoch": 0.8, "grad_norm": 0.08541075113090651, "learning_rate": 9.96866996124961e-05, "loss": 1.3488, "step": 7457 }, { "epoch": 0.8, "grad_norm": 0.09227061283218439, "learning_rate": 9.958240510609568e-05, "loss": 1.3261, "step": 7458 }, { "epoch": 0.8, "grad_norm": 0.07891602228455197, "learning_rate": 9.947815915170894e-05, "loss": 1.4237, "step": 7459 }, { "epoch": 0.8, "grad_norm": 0.10392044416795326, "learning_rate": 9.937396176197621e-05, "loss": 1.373, "step": 7460 }, { "epoch": 0.8, "grad_norm": 0.08909632543444393, "learning_rate": 9.926981294953191e-05, "loss": 1.4048, "step": 7461 }, { "epoch": 0.8, "grad_norm": 0.08711148193189107, "learning_rate": 9.91657127270042e-05, "loss": 1.301, "step": 7462 }, { "epoch": 0.8, "grad_norm": 0.09677062189294994, "learning_rate": 9.906166110701587e-05, "loss": 1.4211, "step": 7463 }, { "epoch": 0.8, "grad_norm": 0.08012618245248758, "learning_rate": 9.895765810218322e-05, "loss": 1.3605, "step": 7464 }, { "epoch": 0.8, "grad_norm": 0.07881596378929731, "learning_rate": 9.885370372511727e-05, "loss": 1.2357, "step": 7465 }, { "epoch": 0.8, "grad_norm": 0.09099731322497107, "learning_rate": 9.874979798842255e-05, "loss": 1.3286, "step": 7466 }, { "epoch": 0.8, "grad_norm": 0.08318317206545245, "learning_rate": 9.86459409046983e-05, "loss": 1.3099, "step": 7467 }, { "epoch": 0.8, "grad_norm": 0.08834905278247804, "learning_rate": 9.854213248653721e-05, "loss": 1.3114, "step": 7468 }, { "epoch": 0.8, "grad_norm": 0.08740069078768421, "learning_rate": 9.843837274652667e-05, "loss": 1.5023, "step": 7469 }, { "epoch": 0.8, "grad_norm": 0.08485296530187857, "learning_rate": 9.833466169724792e-05, "loss": 1.3516, "step": 7470 }, { "epoch": 0.8, "grad_norm": 0.09309838507104544, "learning_rate": 9.823099935127605e-05, "loss": 1.4752, "step": 7471 }, { "epoch": 0.8, "grad_norm": 0.08699771252470198, "learning_rate": 9.812738572118063e-05, "loss": 1.467, "step": 7472 }, { "epoch": 0.8, "grad_norm": 0.08672188155001476, "learning_rate": 9.802382081952527e-05, "loss": 1.3471, "step": 7473 }, { "epoch": 0.8, "grad_norm": 0.0881050276317137, "learning_rate": 9.792030465886736e-05, "loss": 1.3859, "step": 7474 }, { "epoch": 0.8, "grad_norm": 0.0867622769936228, "learning_rate": 9.781683725175866e-05, "loss": 1.542, "step": 7475 }, { "epoch": 0.8, "grad_norm": 0.08473741654209976, "learning_rate": 9.771341861074523e-05, "loss": 1.3428, "step": 7476 }, { "epoch": 0.8, "grad_norm": 0.1022488836419732, "learning_rate": 9.761004874836644e-05, "loss": 1.3613, "step": 7477 }, { "epoch": 0.8, "grad_norm": 0.09639605063432713, "learning_rate": 9.750672767715651e-05, "loss": 1.4877, "step": 7478 }, { "epoch": 0.8, "grad_norm": 0.09102505887751455, "learning_rate": 9.740345540964357e-05, "loss": 1.3897, "step": 7479 }, { "epoch": 0.8, "grad_norm": 0.09344953204952636, "learning_rate": 9.730023195834947e-05, "loss": 1.5112, "step": 7480 }, { "epoch": 0.8, "grad_norm": 0.09225774918138216, "learning_rate": 9.71970573357906e-05, "loss": 1.2466, "step": 7481 }, { "epoch": 0.8, "grad_norm": 0.08174545024210766, "learning_rate": 9.709393155447732e-05, "loss": 1.4278, "step": 7482 }, { "epoch": 0.8, "grad_norm": 0.08691964908390155, "learning_rate": 9.699085462691376e-05, "loss": 1.3324, "step": 7483 }, { "epoch": 0.8, "grad_norm": 0.08499423352944359, "learning_rate": 9.688782656559842e-05, "loss": 1.3487, "step": 7484 }, { "epoch": 0.8, "grad_norm": 0.1086986268584967, "learning_rate": 9.6784847383024e-05, "loss": 1.3336, "step": 7485 }, { "epoch": 0.8, "grad_norm": 0.10390079226716396, "learning_rate": 9.66819170916769e-05, "loss": 1.4299, "step": 7486 }, { "epoch": 0.8, "grad_norm": 0.08179398408151457, "learning_rate": 9.657903570403769e-05, "loss": 1.3214, "step": 7487 }, { "epoch": 0.8, "grad_norm": 0.09271074862383373, "learning_rate": 9.647620323258121e-05, "loss": 1.3402, "step": 7488 }, { "epoch": 0.81, "grad_norm": 0.09774757883056379, "learning_rate": 9.637341968977636e-05, "loss": 1.4131, "step": 7489 }, { "epoch": 0.81, "grad_norm": 0.07832701611547217, "learning_rate": 9.627068508808579e-05, "loss": 1.3784, "step": 7490 }, { "epoch": 0.81, "grad_norm": 0.1041386239978518, "learning_rate": 9.616799943996651e-05, "loss": 1.295, "step": 7491 }, { "epoch": 0.81, "grad_norm": 0.08705483512520137, "learning_rate": 9.606536275786965e-05, "loss": 1.3471, "step": 7492 }, { "epoch": 0.81, "grad_norm": 0.08664530820976736, "learning_rate": 9.596277505423994e-05, "loss": 1.4664, "step": 7493 }, { "epoch": 0.81, "grad_norm": 0.10276841331323949, "learning_rate": 9.586023634151674e-05, "loss": 1.4516, "step": 7494 }, { "epoch": 0.81, "grad_norm": 0.09129386033307563, "learning_rate": 9.575774663213327e-05, "loss": 1.4629, "step": 7495 }, { "epoch": 0.81, "grad_norm": 0.09433687443593726, "learning_rate": 9.565530593851656e-05, "loss": 1.478, "step": 7496 }, { "epoch": 0.81, "grad_norm": 0.08781641900491051, "learning_rate": 9.55529142730881e-05, "loss": 1.2935, "step": 7497 }, { "epoch": 0.81, "grad_norm": 0.10812828620908439, "learning_rate": 9.545057164826315e-05, "loss": 1.4357, "step": 7498 }, { "epoch": 0.81, "grad_norm": 0.10200044996981032, "learning_rate": 9.534827807645091e-05, "loss": 1.229, "step": 7499 }, { "epoch": 0.81, "grad_norm": 0.09240236105764514, "learning_rate": 9.524603357005501e-05, "loss": 1.4634, "step": 7500 }, { "epoch": 0.81, "grad_norm": 0.09454120611142644, "learning_rate": 9.514383814147309e-05, "loss": 1.4214, "step": 7501 }, { "epoch": 0.81, "grad_norm": 0.09548820233824558, "learning_rate": 9.50416918030964e-05, "loss": 1.3984, "step": 7502 }, { "epoch": 0.81, "grad_norm": 0.07630747179285953, "learning_rate": 9.493959456731072e-05, "loss": 1.4153, "step": 7503 }, { "epoch": 0.81, "grad_norm": 0.0819944969222753, "learning_rate": 9.483754644649573e-05, "loss": 1.2477, "step": 7504 }, { "epoch": 0.81, "grad_norm": 0.08982939320379192, "learning_rate": 9.473554745302492e-05, "loss": 1.4237, "step": 7505 }, { "epoch": 0.81, "grad_norm": 0.0941844203191723, "learning_rate": 9.463359759926615e-05, "loss": 1.405, "step": 7506 }, { "epoch": 0.81, "grad_norm": 0.09325934826499595, "learning_rate": 9.453169689758134e-05, "loss": 1.5425, "step": 7507 }, { "epoch": 0.81, "grad_norm": 0.09148103195006312, "learning_rate": 9.442984536032612e-05, "loss": 1.4171, "step": 7508 }, { "epoch": 0.81, "grad_norm": 0.09186090103298966, "learning_rate": 9.43280429998502e-05, "loss": 1.3543, "step": 7509 }, { "epoch": 0.81, "grad_norm": 0.08035881723942448, "learning_rate": 9.422628982849785e-05, "loss": 1.4375, "step": 7510 }, { "epoch": 0.81, "grad_norm": 0.09344417265322892, "learning_rate": 9.412458585860656e-05, "loss": 1.4238, "step": 7511 }, { "epoch": 0.81, "grad_norm": 0.0960705210553254, "learning_rate": 9.402293110250853e-05, "loss": 1.5747, "step": 7512 }, { "epoch": 0.81, "grad_norm": 0.1033151773139312, "learning_rate": 9.392132557252986e-05, "loss": 1.3517, "step": 7513 }, { "epoch": 0.81, "grad_norm": 0.09385641709122178, "learning_rate": 9.381976928099029e-05, "loss": 1.3943, "step": 7514 }, { "epoch": 0.81, "grad_norm": 0.0779487282752614, "learning_rate": 9.371826224020397e-05, "loss": 1.4504, "step": 7515 }, { "epoch": 0.81, "grad_norm": 0.081718534217225, "learning_rate": 9.361680446247922e-05, "loss": 1.3773, "step": 7516 }, { "epoch": 0.81, "grad_norm": 0.07810061118795375, "learning_rate": 9.351539596011776e-05, "loss": 1.336, "step": 7517 }, { "epoch": 0.81, "grad_norm": 0.08383963624184462, "learning_rate": 9.341403674541605e-05, "loss": 1.4401, "step": 7518 }, { "epoch": 0.81, "grad_norm": 0.08943783242052575, "learning_rate": 9.331272683066399e-05, "loss": 1.401, "step": 7519 }, { "epoch": 0.81, "grad_norm": 0.08459384581972972, "learning_rate": 9.321146622814597e-05, "loss": 1.6098, "step": 7520 }, { "epoch": 0.81, "grad_norm": 0.08989778220774186, "learning_rate": 9.311025495013997e-05, "loss": 1.4655, "step": 7521 }, { "epoch": 0.81, "grad_norm": 0.08350938557351134, "learning_rate": 9.30090930089183e-05, "loss": 1.4446, "step": 7522 }, { "epoch": 0.81, "grad_norm": 0.09453361809735741, "learning_rate": 9.290798041674736e-05, "loss": 1.3595, "step": 7523 }, { "epoch": 0.81, "grad_norm": 0.08402181500544711, "learning_rate": 9.280691718588713e-05, "loss": 1.4212, "step": 7524 }, { "epoch": 0.81, "grad_norm": 0.08162461265250069, "learning_rate": 9.270590332859202e-05, "loss": 1.4145, "step": 7525 }, { "epoch": 0.81, "grad_norm": 0.07636153927084101, "learning_rate": 9.260493885711035e-05, "loss": 1.3644, "step": 7526 }, { "epoch": 0.81, "grad_norm": 0.08638875647764933, "learning_rate": 9.250402378368427e-05, "loss": 1.5009, "step": 7527 }, { "epoch": 0.81, "grad_norm": 0.0784996066786888, "learning_rate": 9.240315812055028e-05, "loss": 1.3155, "step": 7528 }, { "epoch": 0.81, "grad_norm": 0.09020209891163362, "learning_rate": 9.230234187993858e-05, "loss": 1.3745, "step": 7529 }, { "epoch": 0.81, "grad_norm": 0.07728842140450685, "learning_rate": 9.220157507407334e-05, "loss": 1.473, "step": 7530 }, { "epoch": 0.81, "grad_norm": 0.09999693012740897, "learning_rate": 9.210085771517296e-05, "loss": 1.3881, "step": 7531 }, { "epoch": 0.81, "grad_norm": 0.07475659418648158, "learning_rate": 9.200018981544999e-05, "loss": 1.4635, "step": 7532 }, { "epoch": 0.81, "grad_norm": 0.08610114317683623, "learning_rate": 9.189957138711053e-05, "loss": 1.3655, "step": 7533 }, { "epoch": 0.81, "grad_norm": 0.08629125966598605, "learning_rate": 9.179900244235489e-05, "loss": 1.4349, "step": 7534 }, { "epoch": 0.81, "grad_norm": 0.08839588239342566, "learning_rate": 9.169848299337764e-05, "loss": 1.3432, "step": 7535 }, { "epoch": 0.81, "grad_norm": 0.09113204431680144, "learning_rate": 9.159801305236687e-05, "loss": 1.4708, "step": 7536 }, { "epoch": 0.81, "grad_norm": 0.08841340566615136, "learning_rate": 9.149759263150493e-05, "loss": 1.3131, "step": 7537 }, { "epoch": 0.81, "grad_norm": 0.09119900971580401, "learning_rate": 9.139722174296838e-05, "loss": 1.4045, "step": 7538 }, { "epoch": 0.81, "grad_norm": 0.08828684717343677, "learning_rate": 9.129690039892735e-05, "loss": 1.3373, "step": 7539 }, { "epoch": 0.81, "grad_norm": 0.09411904346839678, "learning_rate": 9.119662861154598e-05, "loss": 1.4342, "step": 7540 }, { "epoch": 0.81, "grad_norm": 0.09128939712399425, "learning_rate": 9.109640639298294e-05, "loss": 1.3869, "step": 7541 }, { "epoch": 0.81, "grad_norm": 0.0786323938770777, "learning_rate": 9.099623375539017e-05, "loss": 1.3981, "step": 7542 }, { "epoch": 0.81, "grad_norm": 0.08550467735039376, "learning_rate": 9.089611071091414e-05, "loss": 1.2798, "step": 7543 }, { "epoch": 0.81, "grad_norm": 0.0956798289127353, "learning_rate": 9.079603727169521e-05, "loss": 1.4181, "step": 7544 }, { "epoch": 0.81, "grad_norm": 0.09370505228997768, "learning_rate": 9.069601344986733e-05, "loss": 1.4377, "step": 7545 }, { "epoch": 0.81, "grad_norm": 0.09234717974288324, "learning_rate": 9.059603925755894e-05, "loss": 1.308, "step": 7546 }, { "epoch": 0.81, "grad_norm": 0.07823660374355815, "learning_rate": 9.049611470689234e-05, "loss": 1.5184, "step": 7547 }, { "epoch": 0.81, "grad_norm": 0.08104361176940131, "learning_rate": 9.039623980998346e-05, "loss": 1.4144, "step": 7548 }, { "epoch": 0.81, "grad_norm": 0.08313514255907395, "learning_rate": 9.029641457894266e-05, "loss": 1.558, "step": 7549 }, { "epoch": 0.81, "grad_norm": 0.08348448293389067, "learning_rate": 9.019663902587416e-05, "loss": 1.4278, "step": 7550 }, { "epoch": 0.81, "grad_norm": 0.09062830049572031, "learning_rate": 9.009691316287599e-05, "loss": 1.2734, "step": 7551 }, { "epoch": 0.81, "grad_norm": 0.09077517992882414, "learning_rate": 8.99972370020401e-05, "loss": 1.3934, "step": 7552 }, { "epoch": 0.81, "grad_norm": 0.08704980419159235, "learning_rate": 8.989761055545276e-05, "loss": 1.4072, "step": 7553 }, { "epoch": 0.81, "grad_norm": 0.09613577933462986, "learning_rate": 8.979803383519408e-05, "loss": 1.2797, "step": 7554 }, { "epoch": 0.81, "grad_norm": 0.08722762807612891, "learning_rate": 8.969850685333786e-05, "loss": 1.4651, "step": 7555 }, { "epoch": 0.81, "grad_norm": 0.08697344594357496, "learning_rate": 8.959902962195221e-05, "loss": 1.2314, "step": 7556 }, { "epoch": 0.81, "grad_norm": 0.08732520594131644, "learning_rate": 8.949960215309921e-05, "loss": 1.3147, "step": 7557 }, { "epoch": 0.81, "grad_norm": 0.08586624155147374, "learning_rate": 8.94002244588345e-05, "loss": 1.297, "step": 7558 }, { "epoch": 0.81, "grad_norm": 0.0888351209656821, "learning_rate": 8.930089655120832e-05, "loss": 1.4383, "step": 7559 }, { "epoch": 0.81, "grad_norm": 0.08601421622050333, "learning_rate": 8.920161844226416e-05, "loss": 1.454, "step": 7560 }, { "epoch": 0.81, "grad_norm": 0.09618707225930943, "learning_rate": 8.910239014404015e-05, "loss": 1.4776, "step": 7561 }, { "epoch": 0.81, "grad_norm": 0.08193952890730456, "learning_rate": 8.900321166856773e-05, "loss": 1.5756, "step": 7562 }, { "epoch": 0.81, "grad_norm": 0.08770806903706792, "learning_rate": 8.8904083027873e-05, "loss": 1.4326, "step": 7563 }, { "epoch": 0.81, "grad_norm": 0.084513768626814, "learning_rate": 8.88050042339753e-05, "loss": 1.4, "step": 7564 }, { "epoch": 0.81, "grad_norm": 0.09441648259709004, "learning_rate": 8.870597529888847e-05, "loss": 1.4048, "step": 7565 }, { "epoch": 0.81, "grad_norm": 0.09472993962668384, "learning_rate": 8.86069962346202e-05, "loss": 1.4668, "step": 7566 }, { "epoch": 0.81, "grad_norm": 0.08358205795408, "learning_rate": 8.850806705317183e-05, "loss": 1.4674, "step": 7567 }, { "epoch": 0.81, "grad_norm": 0.09200422224464193, "learning_rate": 8.840918776653889e-05, "loss": 1.3779, "step": 7568 }, { "epoch": 0.81, "grad_norm": 0.07949303886065458, "learning_rate": 8.831035838671108e-05, "loss": 1.3128, "step": 7569 }, { "epoch": 0.81, "grad_norm": 0.09294930528202666, "learning_rate": 8.82115789256715e-05, "loss": 1.4005, "step": 7570 }, { "epoch": 0.81, "grad_norm": 0.09418408164473525, "learning_rate": 8.811284939539771e-05, "loss": 1.3714, "step": 7571 }, { "epoch": 0.81, "grad_norm": 0.08533122362657032, "learning_rate": 8.801416980786098e-05, "loss": 1.3943, "step": 7572 }, { "epoch": 0.81, "grad_norm": 0.08479975478034155, "learning_rate": 8.791554017502635e-05, "loss": 1.4126, "step": 7573 }, { "epoch": 0.81, "grad_norm": 0.08617405006978586, "learning_rate": 8.781696050885313e-05, "loss": 1.2658, "step": 7574 }, { "epoch": 0.81, "grad_norm": 0.08868157737014712, "learning_rate": 8.771843082129455e-05, "loss": 1.3586, "step": 7575 }, { "epoch": 0.81, "grad_norm": 0.09198408870723747, "learning_rate": 8.761995112429749e-05, "loss": 1.3592, "step": 7576 }, { "epoch": 0.81, "grad_norm": 0.09868921187630147, "learning_rate": 8.752152142980308e-05, "loss": 1.339, "step": 7577 }, { "epoch": 0.81, "grad_norm": 0.0973691738128529, "learning_rate": 8.74231417497463e-05, "loss": 1.299, "step": 7578 }, { "epoch": 0.81, "grad_norm": 0.09481197124866866, "learning_rate": 8.732481209605587e-05, "loss": 1.3499, "step": 7579 }, { "epoch": 0.81, "grad_norm": 0.10127492341278146, "learning_rate": 8.722653248065466e-05, "loss": 1.4342, "step": 7580 }, { "epoch": 0.81, "grad_norm": 0.0879516972178395, "learning_rate": 8.712830291545953e-05, "loss": 1.3871, "step": 7581 }, { "epoch": 0.82, "grad_norm": 0.08331806460834346, "learning_rate": 8.703012341238109e-05, "loss": 1.3958, "step": 7582 }, { "epoch": 0.82, "grad_norm": 0.09854127039810365, "learning_rate": 8.69319939833238e-05, "loss": 1.4561, "step": 7583 }, { "epoch": 0.82, "grad_norm": 0.08925024465869981, "learning_rate": 8.683391464018625e-05, "loss": 1.4291, "step": 7584 }, { "epoch": 0.82, "grad_norm": 0.09092135858413694, "learning_rate": 8.673588539486116e-05, "loss": 1.4605, "step": 7585 }, { "epoch": 0.82, "grad_norm": 0.08755962376538613, "learning_rate": 8.663790625923452e-05, "loss": 1.502, "step": 7586 }, { "epoch": 0.82, "grad_norm": 0.08256713069946808, "learning_rate": 8.653997724518681e-05, "loss": 1.4174, "step": 7587 }, { "epoch": 0.82, "grad_norm": 0.09290712035854404, "learning_rate": 8.644209836459244e-05, "loss": 1.4073, "step": 7588 }, { "epoch": 0.82, "grad_norm": 0.0882259607720163, "learning_rate": 8.634426962931924e-05, "loss": 1.3124, "step": 7589 }, { "epoch": 0.82, "grad_norm": 0.08602143518147524, "learning_rate": 8.62464910512294e-05, "loss": 1.3498, "step": 7590 }, { "epoch": 0.82, "grad_norm": 0.08533184505192618, "learning_rate": 8.614876264217913e-05, "loss": 1.4247, "step": 7591 }, { "epoch": 0.82, "grad_norm": 0.08913080708015374, "learning_rate": 8.60510844140181e-05, "loss": 1.4991, "step": 7592 }, { "epoch": 0.82, "grad_norm": 0.09800999230765522, "learning_rate": 8.595345637859009e-05, "loss": 1.3599, "step": 7593 }, { "epoch": 0.82, "grad_norm": 0.08656762358628595, "learning_rate": 8.585587854773308e-05, "loss": 1.3821, "step": 7594 }, { "epoch": 0.82, "grad_norm": 0.07657721840844864, "learning_rate": 8.575835093327844e-05, "loss": 1.4007, "step": 7595 }, { "epoch": 0.82, "grad_norm": 0.08925546459390414, "learning_rate": 8.566087354705182e-05, "loss": 1.4172, "step": 7596 }, { "epoch": 0.82, "grad_norm": 0.09137356710770007, "learning_rate": 8.556344640087288e-05, "loss": 1.4559, "step": 7597 }, { "epoch": 0.82, "grad_norm": 0.0977920327352257, "learning_rate": 8.546606950655467e-05, "loss": 1.2935, "step": 7598 }, { "epoch": 0.82, "grad_norm": 0.09795370695417718, "learning_rate": 8.536874287590468e-05, "loss": 1.3887, "step": 7599 }, { "epoch": 0.82, "grad_norm": 0.09176662763448601, "learning_rate": 8.527146652072421e-05, "loss": 1.5284, "step": 7600 }, { "epoch": 0.82, "grad_norm": 0.08372935816698124, "learning_rate": 8.517424045280808e-05, "loss": 1.3019, "step": 7601 }, { "epoch": 0.82, "grad_norm": 0.08514073546626989, "learning_rate": 8.507706468394544e-05, "loss": 1.3655, "step": 7602 }, { "epoch": 0.82, "grad_norm": 0.09309189623499317, "learning_rate": 8.497993922591934e-05, "loss": 1.3931, "step": 7603 }, { "epoch": 0.82, "grad_norm": 0.10140416177523041, "learning_rate": 8.488286409050622e-05, "loss": 1.3656, "step": 7604 }, { "epoch": 0.82, "grad_norm": 0.09603451991030429, "learning_rate": 8.478583928947692e-05, "loss": 1.5697, "step": 7605 }, { "epoch": 0.82, "grad_norm": 0.0977670160586023, "learning_rate": 8.46888648345962e-05, "loss": 1.5035, "step": 7606 }, { "epoch": 0.82, "grad_norm": 0.08551839072309805, "learning_rate": 8.459194073762227e-05, "loss": 1.4294, "step": 7607 }, { "epoch": 0.82, "grad_norm": 0.08037591290539492, "learning_rate": 8.449506701030774e-05, "loss": 1.5657, "step": 7608 }, { "epoch": 0.82, "grad_norm": 0.09590980900355255, "learning_rate": 8.439824366439885e-05, "loss": 1.2933, "step": 7609 }, { "epoch": 0.82, "grad_norm": 0.08713666532241826, "learning_rate": 8.430147071163558e-05, "loss": 1.4531, "step": 7610 }, { "epoch": 0.82, "grad_norm": 0.08992186550468441, "learning_rate": 8.420474816375212e-05, "loss": 1.4069, "step": 7611 }, { "epoch": 0.82, "grad_norm": 0.08338253904896663, "learning_rate": 8.410807603247655e-05, "loss": 1.496, "step": 7612 }, { "epoch": 0.82, "grad_norm": 0.09154435310147199, "learning_rate": 8.401145432953045e-05, "loss": 1.4009, "step": 7613 }, { "epoch": 0.82, "grad_norm": 0.11182926553803102, "learning_rate": 8.391488306662976e-05, "loss": 1.4552, "step": 7614 }, { "epoch": 0.82, "grad_norm": 0.0862731974075149, "learning_rate": 8.381836225548383e-05, "loss": 1.2472, "step": 7615 }, { "epoch": 0.82, "grad_norm": 0.08422895721651501, "learning_rate": 8.372189190779639e-05, "loss": 1.439, "step": 7616 }, { "epoch": 0.82, "grad_norm": 0.09190535096534928, "learning_rate": 8.362547203526455e-05, "loss": 1.3751, "step": 7617 }, { "epoch": 0.82, "grad_norm": 0.08301888840775173, "learning_rate": 8.352910264957969e-05, "loss": 1.351, "step": 7618 }, { "epoch": 0.82, "grad_norm": 0.09395473446856016, "learning_rate": 8.343278376242708e-05, "loss": 1.426, "step": 7619 }, { "epoch": 0.82, "grad_norm": 0.09327444144541883, "learning_rate": 8.333651538548542e-05, "loss": 1.4021, "step": 7620 }, { "epoch": 0.82, "grad_norm": 0.09002012453941054, "learning_rate": 8.324029753042773e-05, "loss": 1.4536, "step": 7621 }, { "epoch": 0.82, "grad_norm": 0.11071663575181982, "learning_rate": 8.314413020892086e-05, "loss": 1.4495, "step": 7622 }, { "epoch": 0.82, "grad_norm": 0.0961113671788749, "learning_rate": 8.304801343262525e-05, "loss": 1.4342, "step": 7623 }, { "epoch": 0.82, "grad_norm": 0.09501998230845707, "learning_rate": 8.295194721319555e-05, "loss": 1.2294, "step": 7624 }, { "epoch": 0.82, "grad_norm": 0.08577388799800784, "learning_rate": 8.285593156228005e-05, "loss": 1.4448, "step": 7625 }, { "epoch": 0.82, "grad_norm": 0.08585778313120833, "learning_rate": 8.275996649152085e-05, "loss": 1.4738, "step": 7626 }, { "epoch": 0.82, "grad_norm": 0.09073679170236165, "learning_rate": 8.266405201255411e-05, "loss": 1.3423, "step": 7627 }, { "epoch": 0.82, "grad_norm": 0.08366332122612558, "learning_rate": 8.256818813701e-05, "loss": 1.3645, "step": 7628 }, { "epoch": 0.82, "grad_norm": 0.1010036314167468, "learning_rate": 8.247237487651204e-05, "loss": 1.2581, "step": 7629 }, { "epoch": 0.82, "grad_norm": 0.08561960263147947, "learning_rate": 8.237661224267806e-05, "loss": 1.3947, "step": 7630 }, { "epoch": 0.82, "grad_norm": 0.08579164829357397, "learning_rate": 8.228090024711976e-05, "loss": 1.585, "step": 7631 }, { "epoch": 0.82, "grad_norm": 0.07982612565350453, "learning_rate": 8.218523890144225e-05, "loss": 1.3644, "step": 7632 }, { "epoch": 0.82, "grad_norm": 0.09154428443060891, "learning_rate": 8.208962821724497e-05, "loss": 1.4524, "step": 7633 }, { "epoch": 0.82, "grad_norm": 0.10977900239432357, "learning_rate": 8.199406820612116e-05, "loss": 1.4807, "step": 7634 }, { "epoch": 0.82, "grad_norm": 0.087843255682426, "learning_rate": 8.189855887965763e-05, "loss": 1.3473, "step": 7635 }, { "epoch": 0.82, "grad_norm": 0.0944862488161355, "learning_rate": 8.180310024943515e-05, "loss": 1.4448, "step": 7636 }, { "epoch": 0.82, "grad_norm": 0.08936470015321014, "learning_rate": 8.170769232702868e-05, "loss": 1.4059, "step": 7637 }, { "epoch": 0.82, "grad_norm": 0.0942866875546708, "learning_rate": 8.16123351240064e-05, "loss": 1.46, "step": 7638 }, { "epoch": 0.82, "grad_norm": 0.09433441619949548, "learning_rate": 8.15170286519309e-05, "loss": 1.3914, "step": 7639 }, { "epoch": 0.82, "grad_norm": 0.08536000362791818, "learning_rate": 8.142177292235853e-05, "loss": 1.4205, "step": 7640 }, { "epoch": 0.82, "grad_norm": 0.1131150345766374, "learning_rate": 8.132656794683912e-05, "loss": 1.3373, "step": 7641 }, { "epoch": 0.82, "grad_norm": 0.09912843864926806, "learning_rate": 8.123141373691678e-05, "loss": 1.4338, "step": 7642 }, { "epoch": 0.82, "grad_norm": 0.09231463345028104, "learning_rate": 8.113631030412932e-05, "loss": 1.4434, "step": 7643 }, { "epoch": 0.82, "grad_norm": 0.09127964871657997, "learning_rate": 8.104125766000814e-05, "loss": 1.4054, "step": 7644 }, { "epoch": 0.82, "grad_norm": 0.09058060725166724, "learning_rate": 8.094625581607901e-05, "loss": 1.3209, "step": 7645 }, { "epoch": 0.82, "grad_norm": 0.09059867682124947, "learning_rate": 8.085130478386088e-05, "loss": 1.4915, "step": 7646 }, { "epoch": 0.82, "grad_norm": 0.08947413283209918, "learning_rate": 8.075640457486722e-05, "loss": 1.2977, "step": 7647 }, { "epoch": 0.82, "grad_norm": 0.08459648939665679, "learning_rate": 8.066155520060475e-05, "loss": 1.2952, "step": 7648 }, { "epoch": 0.82, "grad_norm": 0.08603049389030244, "learning_rate": 8.05667566725744e-05, "loss": 1.378, "step": 7649 }, { "epoch": 0.82, "grad_norm": 0.0853412982084942, "learning_rate": 8.047200900227092e-05, "loss": 1.3794, "step": 7650 }, { "epoch": 0.82, "grad_norm": 0.10203544145492183, "learning_rate": 8.037731220118256e-05, "loss": 1.4407, "step": 7651 }, { "epoch": 0.82, "grad_norm": 0.09023196890129039, "learning_rate": 8.028266628079173e-05, "loss": 1.4527, "step": 7652 }, { "epoch": 0.82, "grad_norm": 0.0808947281733063, "learning_rate": 8.018807125257476e-05, "loss": 1.4643, "step": 7653 }, { "epoch": 0.82, "grad_norm": 0.09435279095402682, "learning_rate": 8.009352712800128e-05, "loss": 1.4356, "step": 7654 }, { "epoch": 0.82, "grad_norm": 0.08363094023395468, "learning_rate": 7.999903391853547e-05, "loss": 1.4149, "step": 7655 }, { "epoch": 0.82, "grad_norm": 0.09201333593543766, "learning_rate": 7.990459163563474e-05, "loss": 1.4506, "step": 7656 }, { "epoch": 0.82, "grad_norm": 0.08267699262829642, "learning_rate": 7.981020029075043e-05, "loss": 1.3957, "step": 7657 }, { "epoch": 0.82, "grad_norm": 0.09365508246569157, "learning_rate": 7.971585989532792e-05, "loss": 1.2511, "step": 7658 }, { "epoch": 0.82, "grad_norm": 0.08658708993034162, "learning_rate": 7.962157046080648e-05, "loss": 1.4408, "step": 7659 }, { "epoch": 0.82, "grad_norm": 0.09438322216968405, "learning_rate": 7.952733199861878e-05, "loss": 1.439, "step": 7660 }, { "epoch": 0.82, "grad_norm": 0.0874301144366475, "learning_rate": 7.943314452019168e-05, "loss": 1.4711, "step": 7661 }, { "epoch": 0.82, "grad_norm": 0.08256679231136793, "learning_rate": 7.933900803694576e-05, "loss": 1.4718, "step": 7662 }, { "epoch": 0.82, "grad_norm": 0.08325944584328684, "learning_rate": 7.92449225602953e-05, "loss": 1.3529, "step": 7663 }, { "epoch": 0.82, "grad_norm": 0.08691118535626666, "learning_rate": 7.915088810164855e-05, "loss": 1.4396, "step": 7664 }, { "epoch": 0.82, "grad_norm": 0.08063120692409934, "learning_rate": 7.905690467240762e-05, "loss": 1.5015, "step": 7665 }, { "epoch": 0.82, "grad_norm": 0.080837406638755, "learning_rate": 7.89629722839682e-05, "loss": 1.2455, "step": 7666 }, { "epoch": 0.82, "grad_norm": 0.08510788864340407, "learning_rate": 7.886909094771982e-05, "loss": 1.3956, "step": 7667 }, { "epoch": 0.82, "grad_norm": 0.08059477667817433, "learning_rate": 7.877526067504614e-05, "loss": 1.4848, "step": 7668 }, { "epoch": 0.82, "grad_norm": 0.08384953530723206, "learning_rate": 7.868148147732413e-05, "loss": 1.3094, "step": 7669 }, { "epoch": 0.82, "grad_norm": 0.08851835803646642, "learning_rate": 7.858775336592505e-05, "loss": 1.3807, "step": 7670 }, { "epoch": 0.82, "grad_norm": 0.09268354025660072, "learning_rate": 7.849407635221379e-05, "loss": 1.4575, "step": 7671 }, { "epoch": 0.82, "grad_norm": 0.07953833744699162, "learning_rate": 7.840045044754879e-05, "loss": 1.4784, "step": 7672 }, { "epoch": 0.82, "grad_norm": 0.09061781935977607, "learning_rate": 7.830687566328265e-05, "loss": 1.493, "step": 7673 }, { "epoch": 0.82, "grad_norm": 0.09061908733465755, "learning_rate": 7.82133520107618e-05, "loss": 1.2913, "step": 7674 }, { "epoch": 0.83, "grad_norm": 0.08607373087290072, "learning_rate": 7.811987950132599e-05, "loss": 1.2954, "step": 7675 }, { "epoch": 0.83, "grad_norm": 0.09028855772629082, "learning_rate": 7.802645814630921e-05, "loss": 1.3465, "step": 7676 }, { "epoch": 0.83, "grad_norm": 0.08628410645460714, "learning_rate": 7.793308795703929e-05, "loss": 1.3811, "step": 7677 }, { "epoch": 0.83, "grad_norm": 0.09710158064190848, "learning_rate": 7.783976894483752e-05, "loss": 1.4639, "step": 7678 }, { "epoch": 0.83, "grad_norm": 0.08846974972943605, "learning_rate": 7.774650112101911e-05, "loss": 1.3841, "step": 7679 }, { "epoch": 0.83, "grad_norm": 0.07809311245800762, "learning_rate": 7.765328449689312e-05, "loss": 1.5403, "step": 7680 }, { "epoch": 0.83, "grad_norm": 0.08931552275490337, "learning_rate": 7.756011908376254e-05, "loss": 1.5256, "step": 7681 }, { "epoch": 0.83, "grad_norm": 0.0891463968433547, "learning_rate": 7.74670048929238e-05, "loss": 1.3094, "step": 7682 }, { "epoch": 0.83, "grad_norm": 0.08977675661275883, "learning_rate": 7.737394193566744e-05, "loss": 1.4532, "step": 7683 }, { "epoch": 0.83, "grad_norm": 0.08825867071922235, "learning_rate": 7.72809302232777e-05, "loss": 1.3373, "step": 7684 }, { "epoch": 0.83, "grad_norm": 0.09372314922944172, "learning_rate": 7.71879697670324e-05, "loss": 1.3349, "step": 7685 }, { "epoch": 0.83, "grad_norm": 0.09768865167003908, "learning_rate": 7.709506057820343e-05, "loss": 1.4697, "step": 7686 }, { "epoch": 0.83, "grad_norm": 0.08895694737402585, "learning_rate": 7.700220266805647e-05, "loss": 1.3731, "step": 7687 }, { "epoch": 0.83, "grad_norm": 0.09179791236859439, "learning_rate": 7.690939604785069e-05, "loss": 1.4724, "step": 7688 }, { "epoch": 0.83, "grad_norm": 0.0897147299222315, "learning_rate": 7.681664072883914e-05, "loss": 1.4539, "step": 7689 }, { "epoch": 0.83, "grad_norm": 0.09394049537942313, "learning_rate": 7.672393672226901e-05, "loss": 1.2418, "step": 7690 }, { "epoch": 0.83, "grad_norm": 0.08350359871804075, "learning_rate": 7.663128403938063e-05, "loss": 1.5139, "step": 7691 }, { "epoch": 0.83, "grad_norm": 0.08732287161011305, "learning_rate": 7.653868269140869e-05, "loss": 1.306, "step": 7692 }, { "epoch": 0.83, "grad_norm": 0.0896616196238533, "learning_rate": 7.644613268958144e-05, "loss": 1.3701, "step": 7693 }, { "epoch": 0.83, "grad_norm": 0.08764433658911641, "learning_rate": 7.635363404512069e-05, "loss": 1.4636, "step": 7694 }, { "epoch": 0.83, "grad_norm": 0.1030645066169904, "learning_rate": 7.626118676924237e-05, "loss": 1.2745, "step": 7695 }, { "epoch": 0.83, "grad_norm": 0.09333495190715421, "learning_rate": 7.616879087315614e-05, "loss": 1.3622, "step": 7696 }, { "epoch": 0.83, "grad_norm": 0.10195538862682457, "learning_rate": 7.607644636806505e-05, "loss": 1.4147, "step": 7697 }, { "epoch": 0.83, "grad_norm": 0.08923282440460135, "learning_rate": 7.598415326516638e-05, "loss": 1.4762, "step": 7698 }, { "epoch": 0.83, "grad_norm": 0.08931760167995045, "learning_rate": 7.5891911575651e-05, "loss": 1.3004, "step": 7699 }, { "epoch": 0.83, "grad_norm": 0.08938475556208395, "learning_rate": 7.579972131070334e-05, "loss": 1.2865, "step": 7700 }, { "epoch": 0.83, "grad_norm": 0.07974469882035637, "learning_rate": 7.570758248150194e-05, "loss": 1.32, "step": 7701 }, { "epoch": 0.83, "grad_norm": 0.11124565390940556, "learning_rate": 7.5615495099219e-05, "loss": 1.3306, "step": 7702 }, { "epoch": 0.83, "grad_norm": 0.09465556704118003, "learning_rate": 7.552345917502023e-05, "loss": 1.4847, "step": 7703 }, { "epoch": 0.83, "grad_norm": 0.08633690644820705, "learning_rate": 7.543147472006545e-05, "loss": 1.3595, "step": 7704 }, { "epoch": 0.83, "grad_norm": 0.07694845369969425, "learning_rate": 7.533954174550817e-05, "loss": 1.4188, "step": 7705 }, { "epoch": 0.83, "grad_norm": 0.0888851522271176, "learning_rate": 7.524766026249536e-05, "loss": 1.4088, "step": 7706 }, { "epoch": 0.83, "grad_norm": 0.08604335226277512, "learning_rate": 7.51558302821681e-05, "loss": 1.3517, "step": 7707 }, { "epoch": 0.83, "grad_norm": 0.0916267922007084, "learning_rate": 7.506405181566123e-05, "loss": 1.507, "step": 7708 }, { "epoch": 0.83, "grad_norm": 0.0858819386378702, "learning_rate": 7.497232487410299e-05, "loss": 1.4484, "step": 7709 }, { "epoch": 0.83, "grad_norm": 0.08443971498772733, "learning_rate": 7.488064946861555e-05, "loss": 1.3468, "step": 7710 }, { "epoch": 0.83, "grad_norm": 0.08820651296219477, "learning_rate": 7.478902561031503e-05, "loss": 1.3766, "step": 7711 }, { "epoch": 0.83, "grad_norm": 0.08569244916818314, "learning_rate": 7.469745331031114e-05, "loss": 1.4528, "step": 7712 }, { "epoch": 0.83, "grad_norm": 0.09377223486281296, "learning_rate": 7.460593257970716e-05, "loss": 1.426, "step": 7713 }, { "epoch": 0.83, "grad_norm": 0.09446187498251976, "learning_rate": 7.451446342960044e-05, "loss": 1.3474, "step": 7714 }, { "epoch": 0.83, "grad_norm": 0.08327545046965788, "learning_rate": 7.4423045871082e-05, "loss": 1.3567, "step": 7715 }, { "epoch": 0.83, "grad_norm": 0.08832129555517432, "learning_rate": 7.433167991523631e-05, "loss": 1.4224, "step": 7716 }, { "epoch": 0.83, "grad_norm": 0.08522053430819553, "learning_rate": 7.424036557314196e-05, "loss": 1.5376, "step": 7717 }, { "epoch": 0.83, "grad_norm": 0.09278789708741045, "learning_rate": 7.414910285587117e-05, "loss": 1.4763, "step": 7718 }, { "epoch": 0.83, "grad_norm": 0.09264503427204793, "learning_rate": 7.405789177448985e-05, "loss": 1.2921, "step": 7719 }, { "epoch": 0.83, "grad_norm": 0.0856871779296312, "learning_rate": 7.39667323400574e-05, "loss": 1.3306, "step": 7720 }, { "epoch": 0.83, "grad_norm": 0.08823520611675048, "learning_rate": 7.387562456362762e-05, "loss": 1.345, "step": 7721 }, { "epoch": 0.83, "grad_norm": 0.08796941351682677, "learning_rate": 7.378456845624725e-05, "loss": 1.5243, "step": 7722 }, { "epoch": 0.83, "grad_norm": 0.08979097920858628, "learning_rate": 7.369356402895738e-05, "loss": 1.3536, "step": 7723 }, { "epoch": 0.83, "grad_norm": 0.08436809874166219, "learning_rate": 7.360261129279272e-05, "loss": 1.3238, "step": 7724 }, { "epoch": 0.83, "grad_norm": 0.10352532323355235, "learning_rate": 7.351171025878128e-05, "loss": 1.4435, "step": 7725 }, { "epoch": 0.83, "grad_norm": 0.07711372998127677, "learning_rate": 7.342086093794531e-05, "loss": 1.4107, "step": 7726 }, { "epoch": 0.83, "grad_norm": 0.09640807472724792, "learning_rate": 7.333006334130077e-05, "loss": 1.3328, "step": 7727 }, { "epoch": 0.83, "grad_norm": 0.08914874349546113, "learning_rate": 7.32393174798568e-05, "loss": 1.3379, "step": 7728 }, { "epoch": 0.83, "grad_norm": 0.08496885016196233, "learning_rate": 7.314862336461692e-05, "loss": 1.3343, "step": 7729 }, { "epoch": 0.83, "grad_norm": 0.07735334418737376, "learning_rate": 7.305798100657823e-05, "loss": 1.4266, "step": 7730 }, { "epoch": 0.83, "grad_norm": 0.08938125918239577, "learning_rate": 7.2967390416731e-05, "loss": 1.4782, "step": 7731 }, { "epoch": 0.83, "grad_norm": 0.09278179808639771, "learning_rate": 7.287685160605995e-05, "loss": 1.3485, "step": 7732 }, { "epoch": 0.83, "grad_norm": 0.08403083852248348, "learning_rate": 7.278636458554322e-05, "loss": 1.4589, "step": 7733 }, { "epoch": 0.83, "grad_norm": 0.09068179591128482, "learning_rate": 7.26959293661525e-05, "loss": 1.5567, "step": 7734 }, { "epoch": 0.83, "grad_norm": 0.0762056740331929, "learning_rate": 7.26055459588535e-05, "loss": 1.2854, "step": 7735 }, { "epoch": 0.83, "grad_norm": 0.07528963922741068, "learning_rate": 7.251521437460562e-05, "loss": 1.3909, "step": 7736 }, { "epoch": 0.83, "grad_norm": 0.0942168001290263, "learning_rate": 7.242493462436161e-05, "loss": 1.2895, "step": 7737 }, { "epoch": 0.83, "grad_norm": 0.10240321129164565, "learning_rate": 7.233470671906844e-05, "loss": 1.2489, "step": 7738 }, { "epoch": 0.83, "grad_norm": 0.08504284002213647, "learning_rate": 7.224453066966652e-05, "loss": 1.3601, "step": 7739 }, { "epoch": 0.83, "grad_norm": 0.09587235474791764, "learning_rate": 7.215440648708982e-05, "loss": 1.4656, "step": 7740 }, { "epoch": 0.83, "grad_norm": 0.08536226344078945, "learning_rate": 7.206433418226649e-05, "loss": 1.4795, "step": 7741 }, { "epoch": 0.83, "grad_norm": 0.09140896762573765, "learning_rate": 7.197431376611785e-05, "loss": 1.4325, "step": 7742 }, { "epoch": 0.83, "grad_norm": 0.09015041988015379, "learning_rate": 7.188434524955939e-05, "loss": 1.4109, "step": 7743 }, { "epoch": 0.83, "grad_norm": 0.08271750495957778, "learning_rate": 7.179442864349988e-05, "loss": 1.4846, "step": 7744 }, { "epoch": 0.83, "grad_norm": 0.08829217021420534, "learning_rate": 7.170456395884217e-05, "loss": 1.4766, "step": 7745 }, { "epoch": 0.83, "grad_norm": 0.08833891796417223, "learning_rate": 7.161475120648276e-05, "loss": 1.3419, "step": 7746 }, { "epoch": 0.83, "grad_norm": 0.08900580755952446, "learning_rate": 7.152499039731153e-05, "loss": 1.5181, "step": 7747 }, { "epoch": 0.83, "grad_norm": 0.0860664905985196, "learning_rate": 7.143528154221235e-05, "loss": 1.4304, "step": 7748 }, { "epoch": 0.83, "grad_norm": 0.08884608101931617, "learning_rate": 7.13456246520629e-05, "loss": 1.4872, "step": 7749 }, { "epoch": 0.83, "grad_norm": 0.08909893472267622, "learning_rate": 7.125601973773416e-05, "loss": 1.3473, "step": 7750 }, { "epoch": 0.83, "grad_norm": 0.08642569234811122, "learning_rate": 7.11664668100912e-05, "loss": 1.5026, "step": 7751 }, { "epoch": 0.83, "grad_norm": 0.0867109188311294, "learning_rate": 7.107696587999263e-05, "loss": 1.3332, "step": 7752 }, { "epoch": 0.83, "grad_norm": 0.07775756140298237, "learning_rate": 7.098751695829048e-05, "loss": 1.2844, "step": 7753 }, { "epoch": 0.83, "grad_norm": 0.08664325787505547, "learning_rate": 7.089812005583096e-05, "loss": 1.4412, "step": 7754 }, { "epoch": 0.83, "grad_norm": 0.08714207866053697, "learning_rate": 7.080877518345385e-05, "loss": 1.5046, "step": 7755 }, { "epoch": 0.83, "grad_norm": 0.09008286495715372, "learning_rate": 7.071948235199228e-05, "loss": 1.4347, "step": 7756 }, { "epoch": 0.83, "grad_norm": 0.09963396647707887, "learning_rate": 7.063024157227344e-05, "loss": 1.411, "step": 7757 }, { "epoch": 0.83, "grad_norm": 0.08125783843505932, "learning_rate": 7.054105285511814e-05, "loss": 1.4923, "step": 7758 }, { "epoch": 0.83, "grad_norm": 0.09825938535932652, "learning_rate": 7.045191621134068e-05, "loss": 1.4293, "step": 7759 }, { "epoch": 0.83, "grad_norm": 0.08176546166160557, "learning_rate": 7.036283165174923e-05, "loss": 1.4709, "step": 7760 }, { "epoch": 0.83, "grad_norm": 0.07799311174622484, "learning_rate": 7.027379918714577e-05, "loss": 1.3462, "step": 7761 }, { "epoch": 0.83, "grad_norm": 0.08215288669420732, "learning_rate": 7.018481882832561e-05, "loss": 1.3547, "step": 7762 }, { "epoch": 0.83, "grad_norm": 0.0971949111065358, "learning_rate": 7.00958905860779e-05, "loss": 1.3067, "step": 7763 }, { "epoch": 0.83, "grad_norm": 0.0810382090903955, "learning_rate": 7.000701447118563e-05, "loss": 1.4154, "step": 7764 }, { "epoch": 0.83, "grad_norm": 0.08624335475689453, "learning_rate": 6.991819049442516e-05, "loss": 1.4382, "step": 7765 }, { "epoch": 0.83, "grad_norm": 0.0923274614359002, "learning_rate": 6.982941866656684e-05, "loss": 1.4152, "step": 7766 }, { "epoch": 0.83, "grad_norm": 0.08995040104069521, "learning_rate": 6.974069899837465e-05, "loss": 1.5865, "step": 7767 }, { "epoch": 0.84, "grad_norm": 0.0899147874974617, "learning_rate": 6.965203150060589e-05, "loss": 1.5105, "step": 7768 }, { "epoch": 0.84, "grad_norm": 0.0835013673745431, "learning_rate": 6.956341618401196e-05, "loss": 1.3654, "step": 7769 }, { "epoch": 0.84, "grad_norm": 0.09350769325031699, "learning_rate": 6.947485305933787e-05, "loss": 1.3866, "step": 7770 }, { "epoch": 0.84, "grad_norm": 0.07761428841872348, "learning_rate": 6.938634213732198e-05, "loss": 1.3952, "step": 7771 }, { "epoch": 0.84, "grad_norm": 0.10393913360882924, "learning_rate": 6.929788342869676e-05, "loss": 1.4183, "step": 7772 }, { "epoch": 0.84, "grad_norm": 0.09154838170195079, "learning_rate": 6.920947694418789e-05, "loss": 1.2753, "step": 7773 }, { "epoch": 0.84, "grad_norm": 0.09637707494862725, "learning_rate": 6.912112269451526e-05, "loss": 1.3974, "step": 7774 }, { "epoch": 0.84, "grad_norm": 0.09508845670840485, "learning_rate": 6.903282069039185e-05, "loss": 1.4752, "step": 7775 }, { "epoch": 0.84, "grad_norm": 0.11155591005942683, "learning_rate": 6.894457094252471e-05, "loss": 1.3607, "step": 7776 }, { "epoch": 0.84, "grad_norm": 0.10317484976555591, "learning_rate": 6.885637346161449e-05, "loss": 1.3526, "step": 7777 }, { "epoch": 0.84, "grad_norm": 0.08389106407279763, "learning_rate": 6.876822825835527e-05, "loss": 1.2543, "step": 7778 }, { "epoch": 0.84, "grad_norm": 0.09115262148193395, "learning_rate": 6.86801353434351e-05, "loss": 1.422, "step": 7779 }, { "epoch": 0.84, "grad_norm": 0.0919312396699346, "learning_rate": 6.859209472753559e-05, "loss": 1.3834, "step": 7780 }, { "epoch": 0.84, "grad_norm": 0.0771367133563575, "learning_rate": 6.850410642133176e-05, "loss": 1.3485, "step": 7781 }, { "epoch": 0.84, "grad_norm": 0.08852606976685708, "learning_rate": 6.841617043549259e-05, "loss": 1.337, "step": 7782 }, { "epoch": 0.84, "grad_norm": 0.09033201320751114, "learning_rate": 6.832828678068092e-05, "loss": 1.4328, "step": 7783 }, { "epoch": 0.84, "grad_norm": 0.08586344860911486, "learning_rate": 6.824045546755242e-05, "loss": 1.5029, "step": 7784 }, { "epoch": 0.84, "grad_norm": 0.09016545117776988, "learning_rate": 6.815267650675721e-05, "loss": 1.5326, "step": 7785 }, { "epoch": 0.84, "grad_norm": 0.09318420049557376, "learning_rate": 6.80649499089389e-05, "loss": 1.3503, "step": 7786 }, { "epoch": 0.84, "grad_norm": 0.09915799008577442, "learning_rate": 6.797727568473444e-05, "loss": 1.4098, "step": 7787 }, { "epoch": 0.84, "grad_norm": 0.0998684888213455, "learning_rate": 6.788965384477463e-05, "loss": 1.3458, "step": 7788 }, { "epoch": 0.84, "grad_norm": 0.09354345098880852, "learning_rate": 6.780208439968417e-05, "loss": 1.4528, "step": 7789 }, { "epoch": 0.84, "grad_norm": 0.10346740823288775, "learning_rate": 6.771456736008086e-05, "loss": 1.4361, "step": 7790 }, { "epoch": 0.84, "grad_norm": 0.08885901539295292, "learning_rate": 6.762710273657658e-05, "loss": 1.389, "step": 7791 }, { "epoch": 0.84, "grad_norm": 0.09811897324623484, "learning_rate": 6.753969053977683e-05, "loss": 1.4192, "step": 7792 }, { "epoch": 0.84, "grad_norm": 0.08673617423121921, "learning_rate": 6.745233078028041e-05, "loss": 1.2513, "step": 7793 }, { "epoch": 0.84, "grad_norm": 0.0912880836237642, "learning_rate": 6.736502346868018e-05, "loss": 1.2771, "step": 7794 }, { "epoch": 0.84, "grad_norm": 0.08520826836546021, "learning_rate": 6.727776861556239e-05, "loss": 1.2978, "step": 7795 }, { "epoch": 0.84, "grad_norm": 0.09253343356505377, "learning_rate": 6.719056623150687e-05, "loss": 1.4179, "step": 7796 }, { "epoch": 0.84, "grad_norm": 0.09727907298470896, "learning_rate": 6.710341632708733e-05, "loss": 1.2541, "step": 7797 }, { "epoch": 0.84, "grad_norm": 0.10455922957410266, "learning_rate": 6.701631891287108e-05, "loss": 1.4294, "step": 7798 }, { "epoch": 0.84, "grad_norm": 0.09565500343844599, "learning_rate": 6.692927399941878e-05, "loss": 1.3228, "step": 7799 }, { "epoch": 0.84, "grad_norm": 0.09670216486916945, "learning_rate": 6.684228159728501e-05, "loss": 1.337, "step": 7800 }, { "epoch": 0.84, "grad_norm": 0.09031993166332863, "learning_rate": 6.675534171701802e-05, "loss": 1.4904, "step": 7801 }, { "epoch": 0.84, "grad_norm": 0.1106867657427879, "learning_rate": 6.666845436915941e-05, "loss": 1.4883, "step": 7802 }, { "epoch": 0.84, "grad_norm": 0.08750518151251388, "learning_rate": 6.658161956424457e-05, "loss": 1.5734, "step": 7803 }, { "epoch": 0.84, "grad_norm": 0.09277619307591722, "learning_rate": 6.64948373128027e-05, "loss": 1.4289, "step": 7804 }, { "epoch": 0.84, "grad_norm": 0.09255864978715336, "learning_rate": 6.640810762535637e-05, "loss": 1.4721, "step": 7805 }, { "epoch": 0.84, "grad_norm": 0.08307761174818809, "learning_rate": 6.632143051242168e-05, "loss": 1.3225, "step": 7806 }, { "epoch": 0.84, "grad_norm": 0.08957267637082313, "learning_rate": 6.623480598450866e-05, "loss": 1.262, "step": 7807 }, { "epoch": 0.84, "grad_norm": 0.10565226536610126, "learning_rate": 6.61482340521209e-05, "loss": 1.3611, "step": 7808 }, { "epoch": 0.84, "grad_norm": 0.09842970799267935, "learning_rate": 6.606171472575539e-05, "loss": 1.3758, "step": 7809 }, { "epoch": 0.84, "grad_norm": 0.09138816156285817, "learning_rate": 6.597524801590299e-05, "loss": 1.3514, "step": 7810 }, { "epoch": 0.84, "grad_norm": 0.08304644485072452, "learning_rate": 6.588883393304818e-05, "loss": 1.4504, "step": 7811 }, { "epoch": 0.84, "grad_norm": 0.08179421892153053, "learning_rate": 6.580247248766875e-05, "loss": 1.4556, "step": 7812 }, { "epoch": 0.84, "grad_norm": 0.0905917159466832, "learning_rate": 6.571616369023642e-05, "loss": 1.343, "step": 7813 }, { "epoch": 0.84, "grad_norm": 0.08782451845628722, "learning_rate": 6.562990755121661e-05, "loss": 1.4379, "step": 7814 }, { "epoch": 0.84, "grad_norm": 0.08681317029442595, "learning_rate": 6.554370408106796e-05, "loss": 1.3353, "step": 7815 }, { "epoch": 0.84, "grad_norm": 0.09018112284327645, "learning_rate": 6.545755329024295e-05, "loss": 1.4735, "step": 7816 }, { "epoch": 0.84, "grad_norm": 0.08596032825622608, "learning_rate": 6.537145518918774e-05, "loss": 1.4837, "step": 7817 }, { "epoch": 0.84, "grad_norm": 0.0842586560156261, "learning_rate": 6.528540978834191e-05, "loss": 1.3523, "step": 7818 }, { "epoch": 0.84, "grad_norm": 0.09000204358620255, "learning_rate": 6.519941709813881e-05, "loss": 1.3803, "step": 7819 }, { "epoch": 0.84, "grad_norm": 0.09290775564210622, "learning_rate": 6.511347712900545e-05, "loss": 1.375, "step": 7820 }, { "epoch": 0.84, "grad_norm": 0.09291821777416866, "learning_rate": 6.502758989136221e-05, "loss": 1.3519, "step": 7821 }, { "epoch": 0.84, "grad_norm": 0.10079947970251664, "learning_rate": 6.494175539562325e-05, "loss": 1.4085, "step": 7822 }, { "epoch": 0.84, "grad_norm": 0.08932330902539856, "learning_rate": 6.485597365219648e-05, "loss": 1.4194, "step": 7823 }, { "epoch": 0.84, "grad_norm": 0.08012412657276556, "learning_rate": 6.477024467148291e-05, "loss": 1.4567, "step": 7824 }, { "epoch": 0.84, "grad_norm": 0.08880866528917783, "learning_rate": 6.468456846387777e-05, "loss": 1.3814, "step": 7825 }, { "epoch": 0.84, "grad_norm": 0.08893574046009894, "learning_rate": 6.459894503976943e-05, "loss": 1.3846, "step": 7826 }, { "epoch": 0.84, "grad_norm": 0.08956078096009155, "learning_rate": 6.451337440953998e-05, "loss": 1.3439, "step": 7827 }, { "epoch": 0.84, "grad_norm": 0.10529417583896636, "learning_rate": 6.442785658356525e-05, "loss": 1.4773, "step": 7828 }, { "epoch": 0.84, "grad_norm": 0.09718365997826772, "learning_rate": 6.434239157221466e-05, "loss": 1.3271, "step": 7829 }, { "epoch": 0.84, "grad_norm": 0.08864347101686207, "learning_rate": 6.42569793858509e-05, "loss": 1.3864, "step": 7830 }, { "epoch": 0.84, "grad_norm": 0.09400814744697399, "learning_rate": 6.417162003483063e-05, "loss": 1.5692, "step": 7831 }, { "epoch": 0.84, "grad_norm": 0.08719216814926357, "learning_rate": 6.408631352950406e-05, "loss": 1.3842, "step": 7832 }, { "epoch": 0.84, "grad_norm": 0.07677562030215503, "learning_rate": 6.40010598802147e-05, "loss": 1.3368, "step": 7833 }, { "epoch": 0.84, "grad_norm": 0.07755566060205164, "learning_rate": 6.391585909729997e-05, "loss": 1.4292, "step": 7834 }, { "epoch": 0.84, "grad_norm": 0.08532107826874637, "learning_rate": 6.383071119109079e-05, "loss": 1.4152, "step": 7835 }, { "epoch": 0.84, "grad_norm": 0.0802070942760828, "learning_rate": 6.374561617191166e-05, "loss": 1.3902, "step": 7836 }, { "epoch": 0.84, "grad_norm": 0.07747124635800962, "learning_rate": 6.366057405008042e-05, "loss": 1.1828, "step": 7837 }, { "epoch": 0.84, "grad_norm": 0.09197460885363941, "learning_rate": 6.357558483590887e-05, "loss": 1.2937, "step": 7838 }, { "epoch": 0.84, "grad_norm": 0.08686641796307093, "learning_rate": 6.349064853970232e-05, "loss": 1.4106, "step": 7839 }, { "epoch": 0.84, "grad_norm": 0.09014370721732225, "learning_rate": 6.340576517175939e-05, "loss": 1.4875, "step": 7840 }, { "epoch": 0.84, "grad_norm": 0.08256481623816217, "learning_rate": 6.332093474237265e-05, "loss": 1.3485, "step": 7841 }, { "epoch": 0.84, "grad_norm": 0.08533178161354073, "learning_rate": 6.323615726182813e-05, "loss": 1.4926, "step": 7842 }, { "epoch": 0.84, "grad_norm": 0.09430168667379107, "learning_rate": 6.315143274040519e-05, "loss": 1.4384, "step": 7843 }, { "epoch": 0.84, "grad_norm": 0.11100609742165347, "learning_rate": 6.306676118837707e-05, "loss": 1.365, "step": 7844 }, { "epoch": 0.84, "grad_norm": 0.08113856887278259, "learning_rate": 6.29821426160106e-05, "loss": 1.3599, "step": 7845 }, { "epoch": 0.84, "grad_norm": 0.0783171748843241, "learning_rate": 6.289757703356597e-05, "loss": 1.3204, "step": 7846 }, { "epoch": 0.84, "grad_norm": 0.10432054223441194, "learning_rate": 6.281306445129697e-05, "loss": 1.4898, "step": 7847 }, { "epoch": 0.84, "grad_norm": 0.08632182749912745, "learning_rate": 6.27286048794512e-05, "loss": 1.3361, "step": 7848 }, { "epoch": 0.84, "grad_norm": 0.09476579758108704, "learning_rate": 6.264419832826945e-05, "loss": 1.425, "step": 7849 }, { "epoch": 0.84, "grad_norm": 0.08215841019664528, "learning_rate": 6.255984480798649e-05, "loss": 1.4258, "step": 7850 }, { "epoch": 0.84, "grad_norm": 0.08563993560179116, "learning_rate": 6.247554432883052e-05, "loss": 1.3825, "step": 7851 }, { "epoch": 0.84, "grad_norm": 0.08548292176570288, "learning_rate": 6.239129690102307e-05, "loss": 1.3595, "step": 7852 }, { "epoch": 0.84, "grad_norm": 0.07864817887270605, "learning_rate": 6.230710253477956e-05, "loss": 1.4026, "step": 7853 }, { "epoch": 0.84, "grad_norm": 0.07912513593576152, "learning_rate": 6.222296124030891e-05, "loss": 1.4491, "step": 7854 }, { "epoch": 0.84, "grad_norm": 0.0775400683256103, "learning_rate": 6.213887302781335e-05, "loss": 1.3045, "step": 7855 }, { "epoch": 0.84, "grad_norm": 0.08471158998301948, "learning_rate": 6.2054837907489e-05, "loss": 1.3708, "step": 7856 }, { "epoch": 0.84, "grad_norm": 0.07866617830977912, "learning_rate": 6.197085588952556e-05, "loss": 1.379, "step": 7857 }, { "epoch": 0.84, "grad_norm": 0.08669780094760313, "learning_rate": 6.188692698410575e-05, "loss": 1.2851, "step": 7858 }, { "epoch": 0.84, "grad_norm": 0.09045076299727618, "learning_rate": 6.180305120140644e-05, "loss": 1.3331, "step": 7859 }, { "epoch": 0.84, "grad_norm": 0.0852228463127037, "learning_rate": 6.1719228551598e-05, "loss": 1.4925, "step": 7860 }, { "epoch": 0.85, "grad_norm": 0.08271086957993047, "learning_rate": 6.163545904484397e-05, "loss": 1.3317, "step": 7861 }, { "epoch": 0.85, "grad_norm": 0.07976940004119311, "learning_rate": 6.155174269130182e-05, "loss": 1.37, "step": 7862 }, { "epoch": 0.85, "grad_norm": 0.07844337767876314, "learning_rate": 6.146807950112248e-05, "loss": 1.5441, "step": 7863 }, { "epoch": 0.85, "grad_norm": 0.08755021665953842, "learning_rate": 6.13844694844503e-05, "loss": 1.3235, "step": 7864 }, { "epoch": 0.85, "grad_norm": 0.11060847383411719, "learning_rate": 6.130091265142329e-05, "loss": 1.424, "step": 7865 }, { "epoch": 0.85, "grad_norm": 0.0794293051223916, "learning_rate": 6.12174090121732e-05, "loss": 1.4387, "step": 7866 }, { "epoch": 0.85, "grad_norm": 0.08712838702078128, "learning_rate": 6.113395857682485e-05, "loss": 1.3756, "step": 7867 }, { "epoch": 0.85, "grad_norm": 0.09304835093342179, "learning_rate": 6.105056135549714e-05, "loss": 1.4444, "step": 7868 }, { "epoch": 0.85, "grad_norm": 0.08206255704216997, "learning_rate": 6.096721735830202e-05, "loss": 1.4388, "step": 7869 }, { "epoch": 0.85, "grad_norm": 0.0805377403471389, "learning_rate": 6.0883926595345475e-05, "loss": 1.3054, "step": 7870 }, { "epoch": 0.85, "grad_norm": 0.09047074750357005, "learning_rate": 6.080068907672659e-05, "loss": 1.3322, "step": 7871 }, { "epoch": 0.85, "grad_norm": 0.10712564001781039, "learning_rate": 6.071750481253835e-05, "loss": 1.4827, "step": 7872 }, { "epoch": 0.85, "grad_norm": 0.08000036424175803, "learning_rate": 6.0634373812867185e-05, "loss": 1.4809, "step": 7873 }, { "epoch": 0.85, "grad_norm": 0.08355315667819273, "learning_rate": 6.055129608779275e-05, "loss": 1.3978, "step": 7874 }, { "epoch": 0.85, "grad_norm": 0.0897445519805741, "learning_rate": 6.046827164738872e-05, "loss": 1.4598, "step": 7875 }, { "epoch": 0.85, "grad_norm": 0.08593164499884852, "learning_rate": 6.03853005017222e-05, "loss": 1.2729, "step": 7876 }, { "epoch": 0.85, "grad_norm": 0.09280406295835326, "learning_rate": 6.0302382660853416e-05, "loss": 1.3568, "step": 7877 }, { "epoch": 0.85, "grad_norm": 0.09287445817382776, "learning_rate": 6.0219518134836726e-05, "loss": 1.3667, "step": 7878 }, { "epoch": 0.85, "grad_norm": 0.08942747992632392, "learning_rate": 6.0136706933719586e-05, "loss": 1.3309, "step": 7879 }, { "epoch": 0.85, "grad_norm": 0.09044881308433124, "learning_rate": 6.0053949067543077e-05, "loss": 1.333, "step": 7880 }, { "epoch": 0.85, "grad_norm": 0.08218485919758337, "learning_rate": 5.997124454634201e-05, "loss": 1.5086, "step": 7881 }, { "epoch": 0.85, "grad_norm": 0.08150958759929276, "learning_rate": 5.9888593380144574e-05, "loss": 1.4831, "step": 7882 }, { "epoch": 0.85, "grad_norm": 0.09238224876183417, "learning_rate": 5.9805995578972426e-05, "loss": 1.3159, "step": 7883 }, { "epoch": 0.85, "grad_norm": 0.08132944733289485, "learning_rate": 5.972345115284095e-05, "loss": 1.4739, "step": 7884 }, { "epoch": 0.85, "grad_norm": 0.09200855967945543, "learning_rate": 5.96409601117589e-05, "loss": 1.3772, "step": 7885 }, { "epoch": 0.85, "grad_norm": 0.10121485467365761, "learning_rate": 5.955852246572851e-05, "loss": 1.317, "step": 7886 }, { "epoch": 0.85, "grad_norm": 0.08068215984602292, "learning_rate": 5.9476138224745734e-05, "loss": 1.3589, "step": 7887 }, { "epoch": 0.85, "grad_norm": 0.08866123553653318, "learning_rate": 5.939380739880002e-05, "loss": 1.4508, "step": 7888 }, { "epoch": 0.85, "grad_norm": 0.08589646053077514, "learning_rate": 5.9311529997874095e-05, "loss": 1.3438, "step": 7889 }, { "epoch": 0.85, "grad_norm": 0.09725805400582763, "learning_rate": 5.922930603194437e-05, "loss": 1.4084, "step": 7890 }, { "epoch": 0.85, "grad_norm": 0.09455544461619558, "learning_rate": 5.9147135510981e-05, "loss": 1.4086, "step": 7891 }, { "epoch": 0.85, "grad_norm": 0.10467580678385532, "learning_rate": 5.906501844494716e-05, "loss": 1.3407, "step": 7892 }, { "epoch": 0.85, "grad_norm": 0.07795243427227122, "learning_rate": 5.898295484379995e-05, "loss": 1.3612, "step": 7893 }, { "epoch": 0.85, "grad_norm": 0.1038394375264948, "learning_rate": 5.890094471749002e-05, "loss": 1.4229, "step": 7894 }, { "epoch": 0.85, "grad_norm": 0.09720640404759297, "learning_rate": 5.881898807596114e-05, "loss": 1.3402, "step": 7895 }, { "epoch": 0.85, "grad_norm": 0.07723043481387958, "learning_rate": 5.873708492915092e-05, "loss": 1.4209, "step": 7896 }, { "epoch": 0.85, "grad_norm": 0.09067326993878297, "learning_rate": 5.8655235286990513e-05, "loss": 1.4408, "step": 7897 }, { "epoch": 0.85, "grad_norm": 0.09774190841467567, "learning_rate": 5.857343915940433e-05, "loss": 1.452, "step": 7898 }, { "epoch": 0.85, "grad_norm": 0.09164211233774362, "learning_rate": 5.8491696556310535e-05, "loss": 1.4819, "step": 7899 }, { "epoch": 0.85, "grad_norm": 0.08938979443232763, "learning_rate": 5.841000748762054e-05, "loss": 1.2762, "step": 7900 }, { "epoch": 0.85, "grad_norm": 0.08276635984258755, "learning_rate": 5.832837196323964e-05, "loss": 1.4339, "step": 7901 }, { "epoch": 0.85, "grad_norm": 0.10140530865368988, "learning_rate": 5.824678999306621e-05, "loss": 1.4236, "step": 7902 }, { "epoch": 0.85, "grad_norm": 0.08952137652557425, "learning_rate": 5.816526158699243e-05, "loss": 1.3607, "step": 7903 }, { "epoch": 0.85, "grad_norm": 0.08598514612393836, "learning_rate": 5.8083786754904e-05, "loss": 1.3682, "step": 7904 }, { "epoch": 0.85, "grad_norm": 0.08180567616052078, "learning_rate": 5.800236550667992e-05, "loss": 1.3125, "step": 7905 }, { "epoch": 0.85, "grad_norm": 0.08795388382397808, "learning_rate": 5.7920997852192735e-05, "loss": 1.44, "step": 7906 }, { "epoch": 0.85, "grad_norm": 0.07932353663760587, "learning_rate": 5.7839683801308775e-05, "loss": 1.3103, "step": 7907 }, { "epoch": 0.85, "grad_norm": 0.08941791390098239, "learning_rate": 5.775842336388742e-05, "loss": 1.2789, "step": 7908 }, { "epoch": 0.85, "grad_norm": 0.0943055727960341, "learning_rate": 5.767721654978186e-05, "loss": 1.4448, "step": 7909 }, { "epoch": 0.85, "grad_norm": 0.08605694259527719, "learning_rate": 5.759606336883893e-05, "loss": 1.3103, "step": 7910 }, { "epoch": 0.85, "grad_norm": 0.09528400159248888, "learning_rate": 5.7514963830898324e-05, "loss": 1.4157, "step": 7911 }, { "epoch": 0.85, "grad_norm": 0.0838022572347359, "learning_rate": 5.7433917945793776e-05, "loss": 1.3387, "step": 7912 }, { "epoch": 0.85, "grad_norm": 0.11424160907650843, "learning_rate": 5.73529257233526e-05, "loss": 1.3327, "step": 7913 }, { "epoch": 0.85, "grad_norm": 0.09560274070572669, "learning_rate": 5.72719871733951e-05, "loss": 1.4464, "step": 7914 }, { "epoch": 0.85, "grad_norm": 0.08509483829820473, "learning_rate": 5.719110230573543e-05, "loss": 1.3786, "step": 7915 }, { "epoch": 0.85, "grad_norm": 0.10596939753078925, "learning_rate": 5.7110271130181356e-05, "loss": 1.523, "step": 7916 }, { "epoch": 0.85, "grad_norm": 0.09238716068882286, "learning_rate": 5.7029493656533646e-05, "loss": 1.3509, "step": 7917 }, { "epoch": 0.85, "grad_norm": 0.09918986128963173, "learning_rate": 5.694876989458697e-05, "loss": 1.3125, "step": 7918 }, { "epoch": 0.85, "grad_norm": 0.09547700248487656, "learning_rate": 5.6868099854129493e-05, "loss": 1.4866, "step": 7919 }, { "epoch": 0.85, "grad_norm": 0.10238214475114867, "learning_rate": 5.678748354494251e-05, "loss": 1.511, "step": 7920 }, { "epoch": 0.85, "grad_norm": 0.09238523148440415, "learning_rate": 5.6706920976801246e-05, "loss": 1.4352, "step": 7921 }, { "epoch": 0.85, "grad_norm": 0.08533477612675754, "learning_rate": 5.662641215947406e-05, "loss": 1.4433, "step": 7922 }, { "epoch": 0.85, "grad_norm": 0.0923241212621198, "learning_rate": 5.654595710272287e-05, "loss": 1.4694, "step": 7923 }, { "epoch": 0.85, "grad_norm": 0.098711665661809, "learning_rate": 5.6465555816303196e-05, "loss": 1.3371, "step": 7924 }, { "epoch": 0.85, "grad_norm": 0.09113188080489218, "learning_rate": 5.638520830996402e-05, "loss": 1.5073, "step": 7925 }, { "epoch": 0.85, "grad_norm": 0.09294883936559913, "learning_rate": 5.630491459344766e-05, "loss": 1.4731, "step": 7926 }, { "epoch": 0.85, "grad_norm": 0.0958511718237879, "learning_rate": 5.622467467649006e-05, "loss": 1.5133, "step": 7927 }, { "epoch": 0.85, "grad_norm": 0.09150286778743173, "learning_rate": 5.614448856882065e-05, "loss": 1.3905, "step": 7928 }, { "epoch": 0.85, "grad_norm": 0.09388354098673922, "learning_rate": 5.60643562801621e-05, "loss": 1.433, "step": 7929 }, { "epoch": 0.85, "grad_norm": 0.1002396530061073, "learning_rate": 5.5984277820230855e-05, "loss": 1.3662, "step": 7930 }, { "epoch": 0.85, "grad_norm": 0.10274461624460365, "learning_rate": 5.5904253198736824e-05, "loss": 1.5483, "step": 7931 }, { "epoch": 0.85, "grad_norm": 0.08530544037612882, "learning_rate": 5.582428242538307e-05, "loss": 1.467, "step": 7932 }, { "epoch": 0.85, "grad_norm": 0.09574392115798785, "learning_rate": 5.574436550986628e-05, "loss": 1.3484, "step": 7933 }, { "epoch": 0.85, "grad_norm": 0.07777008187231625, "learning_rate": 5.5664502461876706e-05, "loss": 1.287, "step": 7934 }, { "epoch": 0.85, "grad_norm": 0.09424269991976277, "learning_rate": 5.558469329109822e-05, "loss": 1.4066, "step": 7935 }, { "epoch": 0.85, "grad_norm": 0.08766616531253907, "learning_rate": 5.550493800720774e-05, "loss": 1.2889, "step": 7936 }, { "epoch": 0.85, "grad_norm": 0.08486592252459829, "learning_rate": 5.5425236619875866e-05, "loss": 1.3519, "step": 7937 }, { "epoch": 0.85, "grad_norm": 0.08143108081202038, "learning_rate": 5.5345589138766864e-05, "loss": 1.4174, "step": 7938 }, { "epoch": 0.85, "grad_norm": 0.08634015059967805, "learning_rate": 5.5265995573538066e-05, "loss": 1.3629, "step": 7939 }, { "epoch": 0.85, "grad_norm": 0.09101367306765057, "learning_rate": 5.5186455933840476e-05, "loss": 1.3984, "step": 7940 }, { "epoch": 0.85, "grad_norm": 0.08940959291798563, "learning_rate": 5.5106970229318765e-05, "loss": 1.3313, "step": 7941 }, { "epoch": 0.85, "grad_norm": 0.08414515779509103, "learning_rate": 5.502753846961067e-05, "loss": 1.387, "step": 7942 }, { "epoch": 0.85, "grad_norm": 0.08772764724912127, "learning_rate": 5.494816066434749e-05, "loss": 1.4057, "step": 7943 }, { "epoch": 0.85, "grad_norm": 0.08757481261207309, "learning_rate": 5.4868836823154246e-05, "loss": 1.2524, "step": 7944 }, { "epoch": 0.85, "grad_norm": 0.09430417292165912, "learning_rate": 5.478956695564902e-05, "loss": 1.4384, "step": 7945 }, { "epoch": 0.85, "grad_norm": 0.0910526631063559, "learning_rate": 5.471035107144373e-05, "loss": 1.4247, "step": 7946 }, { "epoch": 0.85, "grad_norm": 0.09675309496202968, "learning_rate": 5.4631189180143596e-05, "loss": 1.4936, "step": 7947 }, { "epoch": 0.85, "grad_norm": 0.07853317251477367, "learning_rate": 5.4552081291347035e-05, "loss": 1.4164, "step": 7948 }, { "epoch": 0.85, "grad_norm": 0.09942725188704955, "learning_rate": 5.4473027414646384e-05, "loss": 1.5146, "step": 7949 }, { "epoch": 0.85, "grad_norm": 0.08892933014210838, "learning_rate": 5.439402755962719e-05, "loss": 1.4987, "step": 7950 }, { "epoch": 0.85, "grad_norm": 0.1075372531467069, "learning_rate": 5.43150817358683e-05, "loss": 1.3922, "step": 7951 }, { "epoch": 0.85, "grad_norm": 0.09669611531183489, "learning_rate": 5.423618995294238e-05, "loss": 1.2791, "step": 7952 }, { "epoch": 0.85, "grad_norm": 0.08838658799987743, "learning_rate": 5.415735222041518e-05, "loss": 1.2887, "step": 7953 }, { "epoch": 0.86, "grad_norm": 0.08859874586580094, "learning_rate": 5.407856854784598e-05, "loss": 1.4271, "step": 7954 }, { "epoch": 0.86, "grad_norm": 0.09346223291532334, "learning_rate": 5.3999838944787713e-05, "loss": 1.3762, "step": 7955 }, { "epoch": 0.86, "grad_norm": 0.08402497812656093, "learning_rate": 5.392116342078662e-05, "loss": 1.4116, "step": 7956 }, { "epoch": 0.86, "grad_norm": 0.08561235386678215, "learning_rate": 5.3842541985382244e-05, "loss": 1.3021, "step": 7957 }, { "epoch": 0.86, "grad_norm": 0.08016569594538953, "learning_rate": 5.3763974648107786e-05, "loss": 1.3236, "step": 7958 }, { "epoch": 0.86, "grad_norm": 0.08856641914868289, "learning_rate": 5.36854614184899e-05, "loss": 1.556, "step": 7959 }, { "epoch": 0.86, "grad_norm": 0.09532590526560956, "learning_rate": 5.360700230604842e-05, "loss": 1.408, "step": 7960 }, { "epoch": 0.86, "grad_norm": 0.08393650575881223, "learning_rate": 5.3528597320296855e-05, "loss": 1.2279, "step": 7961 }, { "epoch": 0.86, "grad_norm": 0.08808394205662512, "learning_rate": 5.3450246470742204e-05, "loss": 1.3444, "step": 7962 }, { "epoch": 0.86, "grad_norm": 0.08187518003995406, "learning_rate": 5.337194976688464e-05, "loss": 1.4492, "step": 7963 }, { "epoch": 0.86, "grad_norm": 0.09074287291952292, "learning_rate": 5.3293707218217805e-05, "loss": 1.3961, "step": 7964 }, { "epoch": 0.86, "grad_norm": 0.09922091033979089, "learning_rate": 5.3215518834229036e-05, "loss": 1.4014, "step": 7965 }, { "epoch": 0.86, "grad_norm": 0.09558377560461183, "learning_rate": 5.313738462439899e-05, "loss": 1.4919, "step": 7966 }, { "epoch": 0.86, "grad_norm": 0.07975403401656565, "learning_rate": 5.3059304598201576e-05, "loss": 1.374, "step": 7967 }, { "epoch": 0.86, "grad_norm": 0.09718667621815387, "learning_rate": 5.298127876510428e-05, "loss": 1.4082, "step": 7968 }, { "epoch": 0.86, "grad_norm": 0.0912549963123792, "learning_rate": 5.290330713456809e-05, "loss": 1.4846, "step": 7969 }, { "epoch": 0.86, "grad_norm": 0.08168961849298514, "learning_rate": 5.282538971604728e-05, "loss": 1.366, "step": 7970 }, { "epoch": 0.86, "grad_norm": 0.08900406927437467, "learning_rate": 5.274752651898956e-05, "loss": 1.3303, "step": 7971 }, { "epoch": 0.86, "grad_norm": 0.09610658260467948, "learning_rate": 5.266971755283628e-05, "loss": 1.5749, "step": 7972 }, { "epoch": 0.86, "grad_norm": 0.08865058880800436, "learning_rate": 5.259196282702178e-05, "loss": 1.3821, "step": 7973 }, { "epoch": 0.86, "grad_norm": 0.09517903021205283, "learning_rate": 5.251426235097439e-05, "loss": 1.4055, "step": 7974 }, { "epoch": 0.86, "grad_norm": 0.08493156034832718, "learning_rate": 5.243661613411543e-05, "loss": 1.3444, "step": 7975 }, { "epoch": 0.86, "grad_norm": 0.0838612191297965, "learning_rate": 5.2359024185859585e-05, "loss": 1.4417, "step": 7976 }, { "epoch": 0.86, "grad_norm": 0.09421372340720958, "learning_rate": 5.2281486515615375e-05, "loss": 1.4135, "step": 7977 }, { "epoch": 0.86, "grad_norm": 0.09153333076851533, "learning_rate": 5.220400313278451e-05, "loss": 1.4349, "step": 7978 }, { "epoch": 0.86, "grad_norm": 0.09161799711452127, "learning_rate": 5.212657404676191e-05, "loss": 1.4369, "step": 7979 }, { "epoch": 0.86, "grad_norm": 0.08816016774243185, "learning_rate": 5.2049199266936355e-05, "loss": 1.3312, "step": 7980 }, { "epoch": 0.86, "grad_norm": 0.07536067522168258, "learning_rate": 5.197187880268972e-05, "loss": 1.4213, "step": 7981 }, { "epoch": 0.86, "grad_norm": 0.0926666875343112, "learning_rate": 5.1894612663397345e-05, "loss": 1.3562, "step": 7982 }, { "epoch": 0.86, "grad_norm": 0.09777436980649994, "learning_rate": 5.1817400858427956e-05, "loss": 1.4365, "step": 7983 }, { "epoch": 0.86, "grad_norm": 0.0811904250377712, "learning_rate": 5.1740243397144e-05, "loss": 1.4611, "step": 7984 }, { "epoch": 0.86, "grad_norm": 0.08800265668953564, "learning_rate": 5.16631402889009e-05, "loss": 1.3533, "step": 7985 }, { "epoch": 0.86, "grad_norm": 0.0904440816356616, "learning_rate": 5.158609154304761e-05, "loss": 1.4972, "step": 7986 }, { "epoch": 0.86, "grad_norm": 0.08685858702675321, "learning_rate": 5.150909716892671e-05, "loss": 1.3354, "step": 7987 }, { "epoch": 0.86, "grad_norm": 0.09362833005578333, "learning_rate": 5.143215717587385e-05, "loss": 1.3281, "step": 7988 }, { "epoch": 0.86, "grad_norm": 0.07966025286356111, "learning_rate": 5.13552715732184e-05, "loss": 1.5392, "step": 7989 }, { "epoch": 0.86, "grad_norm": 0.08725214418209264, "learning_rate": 5.127844037028312e-05, "loss": 1.4684, "step": 7990 }, { "epoch": 0.86, "grad_norm": 0.09737399318771994, "learning_rate": 5.120166357638378e-05, "loss": 1.4776, "step": 7991 }, { "epoch": 0.86, "grad_norm": 0.08768025693285349, "learning_rate": 5.112494120083e-05, "loss": 1.3234, "step": 7992 }, { "epoch": 0.86, "grad_norm": 0.08323505095792275, "learning_rate": 5.104827325292466e-05, "loss": 1.5446, "step": 7993 }, { "epoch": 0.86, "grad_norm": 0.09839799519704864, "learning_rate": 5.0971659741963883e-05, "loss": 1.3734, "step": 7994 }, { "epoch": 0.86, "grad_norm": 0.0963408838494438, "learning_rate": 5.0895100677237515e-05, "loss": 1.374, "step": 7995 }, { "epoch": 0.86, "grad_norm": 0.10273257116177338, "learning_rate": 5.081859606802841e-05, "loss": 1.3963, "step": 7996 }, { "epoch": 0.86, "grad_norm": 0.09690958321962514, "learning_rate": 5.074214592361315e-05, "loss": 1.4474, "step": 7997 }, { "epoch": 0.86, "grad_norm": 0.09055382203489995, "learning_rate": 5.066575025326148e-05, "loss": 1.2824, "step": 7998 }, { "epoch": 0.86, "grad_norm": 0.09214018733037252, "learning_rate": 5.05894090662366e-05, "loss": 1.3172, "step": 7999 }, { "epoch": 0.86, "grad_norm": 0.08020416377684451, "learning_rate": 5.05131223717954e-05, "loss": 1.4465, "step": 8000 }, { "epoch": 0.86, "grad_norm": 0.10471508281926799, "learning_rate": 5.043689017918757e-05, "loss": 1.3915, "step": 8001 }, { "epoch": 0.86, "grad_norm": 0.10116000957392209, "learning_rate": 5.0360712497656725e-05, "loss": 1.2644, "step": 8002 }, { "epoch": 0.86, "grad_norm": 0.09559909241116268, "learning_rate": 5.028458933643976e-05, "loss": 1.3338, "step": 8003 }, { "epoch": 0.86, "grad_norm": 0.09002643649763474, "learning_rate": 5.020852070476656e-05, "loss": 1.4668, "step": 8004 }, { "epoch": 0.86, "grad_norm": 0.09528584400679911, "learning_rate": 5.013250661186103e-05, "loss": 1.33, "step": 8005 }, { "epoch": 0.86, "grad_norm": 0.11374398792014954, "learning_rate": 5.005654706694002e-05, "loss": 1.3475, "step": 8006 }, { "epoch": 0.86, "grad_norm": 0.09495489508659871, "learning_rate": 4.9980642079213764e-05, "loss": 1.4521, "step": 8007 }, { "epoch": 0.86, "grad_norm": 0.09302443756883642, "learning_rate": 4.9904791657886085e-05, "loss": 1.4601, "step": 8008 }, { "epoch": 0.86, "grad_norm": 0.09287663041146321, "learning_rate": 4.982899581215428e-05, "loss": 1.3386, "step": 8009 }, { "epoch": 0.86, "grad_norm": 0.08839803430724054, "learning_rate": 4.975325455120855e-05, "loss": 1.4648, "step": 8010 }, { "epoch": 0.86, "grad_norm": 0.09007925416026948, "learning_rate": 4.9677567884233e-05, "loss": 1.5249, "step": 8011 }, { "epoch": 0.86, "grad_norm": 0.1024334249851111, "learning_rate": 4.96019358204049e-05, "loss": 1.3792, "step": 8012 }, { "epoch": 0.86, "grad_norm": 0.09493434008827761, "learning_rate": 4.952635836889474e-05, "loss": 1.4692, "step": 8013 }, { "epoch": 0.86, "grad_norm": 0.10177317419169603, "learning_rate": 4.94508355388667e-05, "loss": 1.3567, "step": 8014 }, { "epoch": 0.86, "grad_norm": 0.10359395185727091, "learning_rate": 4.937536733947817e-05, "loss": 1.4361, "step": 8015 }, { "epoch": 0.86, "grad_norm": 0.1014764345449319, "learning_rate": 4.9299953779879894e-05, "loss": 1.4492, "step": 8016 }, { "epoch": 0.86, "grad_norm": 0.09141219987822603, "learning_rate": 4.9224594869215934e-05, "loss": 1.3856, "step": 8017 }, { "epoch": 0.86, "grad_norm": 0.10701698487475056, "learning_rate": 4.914929061662399e-05, "loss": 1.3098, "step": 8018 }, { "epoch": 0.86, "grad_norm": 0.08421051966495124, "learning_rate": 4.9074041031234804e-05, "loss": 1.2876, "step": 8019 }, { "epoch": 0.86, "grad_norm": 0.10280735588357584, "learning_rate": 4.89988461221727e-05, "loss": 1.2543, "step": 8020 }, { "epoch": 0.86, "grad_norm": 0.0871700644770041, "learning_rate": 4.8923705898555496e-05, "loss": 1.4419, "step": 8021 }, { "epoch": 0.86, "grad_norm": 0.09208405244887743, "learning_rate": 4.884862036949389e-05, "loss": 1.4242, "step": 8022 }, { "epoch": 0.86, "grad_norm": 0.09519872895834376, "learning_rate": 4.877358954409245e-05, "loss": 1.4272, "step": 8023 }, { "epoch": 0.86, "grad_norm": 0.08160045804385208, "learning_rate": 4.8698613431448934e-05, "loss": 1.442, "step": 8024 }, { "epoch": 0.86, "grad_norm": 0.08917162958217983, "learning_rate": 4.862369204065437e-05, "loss": 1.4528, "step": 8025 }, { "epoch": 0.86, "grad_norm": 0.09584081971903959, "learning_rate": 4.8548825380793304e-05, "loss": 1.4295, "step": 8026 }, { "epoch": 0.86, "grad_norm": 0.08745885865269687, "learning_rate": 4.84740134609436e-05, "loss": 1.4092, "step": 8027 }, { "epoch": 0.86, "grad_norm": 0.08685155962456509, "learning_rate": 4.839925629017639e-05, "loss": 1.3759, "step": 8028 }, { "epoch": 0.86, "grad_norm": 0.10410625722723307, "learning_rate": 4.8324553877556186e-05, "loss": 1.4921, "step": 8029 }, { "epoch": 0.86, "grad_norm": 0.09053463510410303, "learning_rate": 4.8249906232140984e-05, "loss": 1.3488, "step": 8030 }, { "epoch": 0.86, "grad_norm": 0.08590497883548834, "learning_rate": 4.8175313362982195e-05, "loss": 1.4228, "step": 8031 }, { "epoch": 0.86, "grad_norm": 0.10821065446154124, "learning_rate": 4.810077527912421e-05, "loss": 1.3144, "step": 8032 }, { "epoch": 0.86, "grad_norm": 0.10917917244641281, "learning_rate": 4.8026291989605174e-05, "loss": 1.3937, "step": 8033 }, { "epoch": 0.86, "grad_norm": 0.09423165223915081, "learning_rate": 4.7951863503456485e-05, "loss": 1.3646, "step": 8034 }, { "epoch": 0.86, "grad_norm": 0.088316397400894, "learning_rate": 4.787748982970275e-05, "loss": 1.3682, "step": 8035 }, { "epoch": 0.86, "grad_norm": 0.10998794053022858, "learning_rate": 4.780317097736203e-05, "loss": 1.3058, "step": 8036 }, { "epoch": 0.86, "grad_norm": 0.09088548506112062, "learning_rate": 4.7728906955446015e-05, "loss": 1.4925, "step": 8037 }, { "epoch": 0.86, "grad_norm": 0.08992202733297892, "learning_rate": 4.765469777295906e-05, "loss": 1.5035, "step": 8038 }, { "epoch": 0.86, "grad_norm": 0.08358952526085513, "learning_rate": 4.7580543438899446e-05, "loss": 1.3281, "step": 8039 }, { "epoch": 0.86, "grad_norm": 0.08568139006226679, "learning_rate": 4.750644396225873e-05, "loss": 1.5197, "step": 8040 }, { "epoch": 0.86, "grad_norm": 0.08993280314740992, "learning_rate": 4.743239935202165e-05, "loss": 1.3513, "step": 8041 }, { "epoch": 0.86, "grad_norm": 0.09889419546611622, "learning_rate": 4.7358409617166307e-05, "loss": 1.3172, "step": 8042 }, { "epoch": 0.86, "grad_norm": 0.09847518037940564, "learning_rate": 4.728447476666442e-05, "loss": 1.3748, "step": 8043 }, { "epoch": 0.86, "grad_norm": 0.09646547794616295, "learning_rate": 4.7210594809480645e-05, "loss": 1.4414, "step": 8044 }, { "epoch": 0.86, "grad_norm": 0.08558448303419687, "learning_rate": 4.71367697545732e-05, "loss": 1.3872, "step": 8045 }, { "epoch": 0.86, "grad_norm": 0.08697043289257039, "learning_rate": 4.706299961089383e-05, "loss": 1.338, "step": 8046 }, { "epoch": 0.87, "grad_norm": 0.08883717181982263, "learning_rate": 4.698928438738714e-05, "loss": 1.4657, "step": 8047 }, { "epoch": 0.87, "grad_norm": 0.09612420925192143, "learning_rate": 4.691562409299161e-05, "loss": 1.3923, "step": 8048 }, { "epoch": 0.87, "grad_norm": 0.0866917810947192, "learning_rate": 4.684201873663868e-05, "loss": 1.5458, "step": 8049 }, { "epoch": 0.87, "grad_norm": 0.10058692936269363, "learning_rate": 4.676846832725312e-05, "loss": 1.294, "step": 8050 }, { "epoch": 0.87, "grad_norm": 0.09684992742639126, "learning_rate": 4.6694972873753296e-05, "loss": 1.4402, "step": 8051 }, { "epoch": 0.87, "grad_norm": 0.11670958899864131, "learning_rate": 4.66215323850509e-05, "loss": 1.3937, "step": 8052 }, { "epoch": 0.87, "grad_norm": 0.08558904787337474, "learning_rate": 4.6548146870050656e-05, "loss": 1.4114, "step": 8053 }, { "epoch": 0.87, "grad_norm": 0.08350040748116296, "learning_rate": 4.647481633765088e-05, "loss": 1.2515, "step": 8054 }, { "epoch": 0.87, "grad_norm": 0.08270056183524263, "learning_rate": 4.640154079674325e-05, "loss": 1.3878, "step": 8055 }, { "epoch": 0.87, "grad_norm": 0.12140269918508494, "learning_rate": 4.632832025621253e-05, "loss": 1.369, "step": 8056 }, { "epoch": 0.87, "grad_norm": 0.0967211345492668, "learning_rate": 4.625515472493697e-05, "loss": 1.4863, "step": 8057 }, { "epoch": 0.87, "grad_norm": 0.09594188315943698, "learning_rate": 4.6182044211788343e-05, "loss": 1.3982, "step": 8058 }, { "epoch": 0.87, "grad_norm": 0.10442851702577187, "learning_rate": 4.610898872563135e-05, "loss": 1.4728, "step": 8059 }, { "epoch": 0.87, "grad_norm": 0.09089308048752692, "learning_rate": 4.6035988275324236e-05, "loss": 1.4235, "step": 8060 }, { "epoch": 0.87, "grad_norm": 0.10029383253191723, "learning_rate": 4.596304286971853e-05, "loss": 1.4527, "step": 8061 }, { "epoch": 0.87, "grad_norm": 0.09245414374315496, "learning_rate": 4.5890152517659326e-05, "loss": 1.4476, "step": 8062 }, { "epoch": 0.87, "grad_norm": 0.08749737182144246, "learning_rate": 4.58173172279846e-05, "loss": 1.4544, "step": 8063 }, { "epoch": 0.87, "grad_norm": 0.08128681609621158, "learning_rate": 4.574453700952591e-05, "loss": 1.315, "step": 8064 }, { "epoch": 0.87, "grad_norm": 0.0977477408035849, "learning_rate": 4.56718118711083e-05, "loss": 1.3868, "step": 8065 }, { "epoch": 0.87, "grad_norm": 0.08346194411051658, "learning_rate": 4.559914182154967e-05, "loss": 1.3092, "step": 8066 }, { "epoch": 0.87, "grad_norm": 0.08313207581438073, "learning_rate": 4.5526526869661686e-05, "loss": 1.3348, "step": 8067 }, { "epoch": 0.87, "grad_norm": 0.09434227073134552, "learning_rate": 4.545396702424926e-05, "loss": 1.2725, "step": 8068 }, { "epoch": 0.87, "grad_norm": 0.09172296973103769, "learning_rate": 4.538146229411033e-05, "loss": 1.5323, "step": 8069 }, { "epoch": 0.87, "grad_norm": 0.09822663625867462, "learning_rate": 4.5309012688036334e-05, "loss": 1.4093, "step": 8070 }, { "epoch": 0.87, "grad_norm": 0.0931555383209421, "learning_rate": 4.5236618214812234e-05, "loss": 1.307, "step": 8071 }, { "epoch": 0.87, "grad_norm": 0.09686713999156278, "learning_rate": 4.5164278883215834e-05, "loss": 1.4408, "step": 8072 }, { "epoch": 0.87, "grad_norm": 0.09891740183537313, "learning_rate": 4.509199470201869e-05, "loss": 1.3064, "step": 8073 }, { "epoch": 0.87, "grad_norm": 0.08416754685939414, "learning_rate": 4.501976567998561e-05, "loss": 1.2797, "step": 8074 }, { "epoch": 0.87, "grad_norm": 0.09197596824111855, "learning_rate": 4.494759182587438e-05, "loss": 1.4559, "step": 8075 }, { "epoch": 0.87, "grad_norm": 0.09727193551485228, "learning_rate": 4.487547314843643e-05, "loss": 1.466, "step": 8076 }, { "epoch": 0.87, "grad_norm": 0.09239804210902398, "learning_rate": 4.480340965641655e-05, "loss": 1.4517, "step": 8077 }, { "epoch": 0.87, "grad_norm": 0.09649305439725554, "learning_rate": 4.473140135855247e-05, "loss": 1.4156, "step": 8078 }, { "epoch": 0.87, "grad_norm": 0.09236065359889135, "learning_rate": 4.4659448263575544e-05, "loss": 1.4829, "step": 8079 }, { "epoch": 0.87, "grad_norm": 0.0994382561845817, "learning_rate": 4.4587550380210284e-05, "loss": 1.3684, "step": 8080 }, { "epoch": 0.87, "grad_norm": 0.09209726648563313, "learning_rate": 4.451570771717467e-05, "loss": 1.3426, "step": 8081 }, { "epoch": 0.87, "grad_norm": 0.09843478293680565, "learning_rate": 4.444392028317967e-05, "loss": 1.3716, "step": 8082 }, { "epoch": 0.87, "grad_norm": 0.10283688142537882, "learning_rate": 4.437218808693e-05, "loss": 1.3521, "step": 8083 }, { "epoch": 0.87, "grad_norm": 0.09645222990375216, "learning_rate": 4.4300511137123186e-05, "loss": 1.4416, "step": 8084 }, { "epoch": 0.87, "grad_norm": 0.08737555151807626, "learning_rate": 4.422888944245046e-05, "loss": 1.3301, "step": 8085 }, { "epoch": 0.87, "grad_norm": 0.09666379041694352, "learning_rate": 4.415732301159625e-05, "loss": 1.5381, "step": 8086 }, { "epoch": 0.87, "grad_norm": 0.09304119796433967, "learning_rate": 4.408581185323807e-05, "loss": 1.4706, "step": 8087 }, { "epoch": 0.87, "grad_norm": 0.11082626671337686, "learning_rate": 4.401435597604697e-05, "loss": 1.3994, "step": 8088 }, { "epoch": 0.87, "grad_norm": 0.08799149367663399, "learning_rate": 4.394295538868731e-05, "loss": 1.5273, "step": 8089 }, { "epoch": 0.87, "grad_norm": 0.09010079273368712, "learning_rate": 4.387161009981661e-05, "loss": 1.3633, "step": 8090 }, { "epoch": 0.87, "grad_norm": 0.08354802566216252, "learning_rate": 4.3800320118085605e-05, "loss": 1.4994, "step": 8091 }, { "epoch": 0.87, "grad_norm": 0.08402331275744471, "learning_rate": 4.3729085452138496e-05, "loss": 1.376, "step": 8092 }, { "epoch": 0.87, "grad_norm": 0.10391735563408136, "learning_rate": 4.365790611061293e-05, "loss": 1.4819, "step": 8093 }, { "epoch": 0.87, "grad_norm": 0.09007821896954256, "learning_rate": 4.3586782102139344e-05, "loss": 1.3756, "step": 8094 }, { "epoch": 0.87, "grad_norm": 0.08903422612703692, "learning_rate": 4.3515713435342e-05, "loss": 1.4106, "step": 8095 }, { "epoch": 0.87, "grad_norm": 0.09313008870552218, "learning_rate": 4.344470011883817e-05, "loss": 1.402, "step": 8096 }, { "epoch": 0.87, "grad_norm": 0.08543654211563571, "learning_rate": 4.33737421612384e-05, "loss": 1.2531, "step": 8097 }, { "epoch": 0.87, "grad_norm": 0.09067470275876136, "learning_rate": 4.330283957114656e-05, "loss": 1.3704, "step": 8098 }, { "epoch": 0.87, "grad_norm": 0.08563274293716956, "learning_rate": 4.323199235716002e-05, "loss": 1.3759, "step": 8099 }, { "epoch": 0.87, "grad_norm": 0.08905782530462407, "learning_rate": 4.316120052786904e-05, "loss": 1.3192, "step": 8100 }, { "epoch": 0.87, "grad_norm": 0.09857674223026314, "learning_rate": 4.309046409185757e-05, "loss": 1.3845, "step": 8101 }, { "epoch": 0.87, "grad_norm": 0.08694920472324444, "learning_rate": 4.301978305770249e-05, "loss": 1.4144, "step": 8102 }, { "epoch": 0.87, "grad_norm": 0.08527994623935795, "learning_rate": 4.294915743397409e-05, "loss": 1.4158, "step": 8103 }, { "epoch": 0.87, "grad_norm": 0.09263598147216909, "learning_rate": 4.287858722923604e-05, "loss": 1.5114, "step": 8104 }, { "epoch": 0.87, "grad_norm": 0.09202117731423061, "learning_rate": 4.2808072452045374e-05, "loss": 1.429, "step": 8105 }, { "epoch": 0.87, "grad_norm": 0.08638915131619937, "learning_rate": 4.273761311095192e-05, "loss": 1.4321, "step": 8106 }, { "epoch": 0.87, "grad_norm": 0.09522134929831542, "learning_rate": 4.2667209214499346e-05, "loss": 1.3787, "step": 8107 }, { "epoch": 0.87, "grad_norm": 0.10469829873713113, "learning_rate": 4.259686077122443e-05, "loss": 1.3612, "step": 8108 }, { "epoch": 0.87, "grad_norm": 0.0855733834471551, "learning_rate": 4.2526567789656965e-05, "loss": 1.4306, "step": 8109 }, { "epoch": 0.87, "grad_norm": 0.09614067627908375, "learning_rate": 4.2456330278320245e-05, "loss": 1.4499, "step": 8110 }, { "epoch": 0.87, "grad_norm": 0.08390308759988215, "learning_rate": 4.2386148245731e-05, "loss": 1.3943, "step": 8111 }, { "epoch": 0.87, "grad_norm": 0.09902179964016455, "learning_rate": 4.2316021700398944e-05, "loss": 1.4578, "step": 8112 }, { "epoch": 0.87, "grad_norm": 0.09883714745917478, "learning_rate": 4.224595065082703e-05, "loss": 1.4541, "step": 8113 }, { "epoch": 0.87, "grad_norm": 0.08727422039676933, "learning_rate": 4.217593510551176e-05, "loss": 1.4615, "step": 8114 }, { "epoch": 0.87, "grad_norm": 0.09334590367762645, "learning_rate": 4.210597507294267e-05, "loss": 1.4209, "step": 8115 }, { "epoch": 0.87, "grad_norm": 0.09456309243317293, "learning_rate": 4.203607056160269e-05, "loss": 1.3918, "step": 8116 }, { "epoch": 0.87, "grad_norm": 0.09896588270561626, "learning_rate": 4.19662215799681e-05, "loss": 1.4424, "step": 8117 }, { "epoch": 0.87, "grad_norm": 0.08847654465807804, "learning_rate": 4.1896428136508125e-05, "loss": 1.3337, "step": 8118 }, { "epoch": 0.87, "grad_norm": 0.11081100920646882, "learning_rate": 4.182669023968561e-05, "loss": 1.4485, "step": 8119 }, { "epoch": 0.87, "grad_norm": 0.09416340783753362, "learning_rate": 4.17570078979565e-05, "loss": 1.414, "step": 8120 }, { "epoch": 0.87, "grad_norm": 0.09604121115206532, "learning_rate": 4.168738111976989e-05, "loss": 1.4759, "step": 8121 }, { "epoch": 0.87, "grad_norm": 0.08494053518936755, "learning_rate": 4.161780991356845e-05, "loss": 1.2911, "step": 8122 }, { "epoch": 0.87, "grad_norm": 0.09232432805630465, "learning_rate": 4.154829428778778e-05, "loss": 1.4698, "step": 8123 }, { "epoch": 0.87, "grad_norm": 0.08560962063353268, "learning_rate": 4.147883425085702e-05, "loss": 1.3934, "step": 8124 }, { "epoch": 0.87, "grad_norm": 0.08548953603826323, "learning_rate": 4.140942981119833e-05, "loss": 1.3536, "step": 8125 }, { "epoch": 0.87, "grad_norm": 0.09186465089721559, "learning_rate": 4.1340080977227244e-05, "loss": 1.3503, "step": 8126 }, { "epoch": 0.87, "grad_norm": 0.09138641763789578, "learning_rate": 4.127078775735266e-05, "loss": 1.4755, "step": 8127 }, { "epoch": 0.87, "grad_norm": 0.09084712166052157, "learning_rate": 4.1201550159976455e-05, "loss": 1.3593, "step": 8128 }, { "epoch": 0.87, "grad_norm": 0.09212969623548035, "learning_rate": 4.113236819349403e-05, "loss": 1.5437, "step": 8129 }, { "epoch": 0.87, "grad_norm": 0.09237647458246168, "learning_rate": 4.106324186629396e-05, "loss": 1.3442, "step": 8130 }, { "epoch": 0.87, "grad_norm": 0.09251388456673824, "learning_rate": 4.0994171186757966e-05, "loss": 1.586, "step": 8131 }, { "epoch": 0.87, "grad_norm": 0.09511611784151426, "learning_rate": 4.0925156163261256e-05, "loss": 1.4247, "step": 8132 }, { "epoch": 0.87, "grad_norm": 0.09355509883336248, "learning_rate": 4.085619680417196e-05, "loss": 1.4298, "step": 8133 }, { "epoch": 0.87, "grad_norm": 0.08637365018956814, "learning_rate": 4.0787293117851674e-05, "loss": 1.3321, "step": 8134 }, { "epoch": 0.87, "grad_norm": 0.08396638217296636, "learning_rate": 4.071844511265527e-05, "loss": 1.5422, "step": 8135 }, { "epoch": 0.87, "grad_norm": 0.08649007647069878, "learning_rate": 4.064965279693083e-05, "loss": 1.3918, "step": 8136 }, { "epoch": 0.87, "grad_norm": 0.09236978127469905, "learning_rate": 4.058091617901949e-05, "loss": 1.4447, "step": 8137 }, { "epoch": 0.87, "grad_norm": 0.08843730477667042, "learning_rate": 4.0512235267256e-05, "loss": 1.3427, "step": 8138 }, { "epoch": 0.87, "grad_norm": 0.10048906860747468, "learning_rate": 4.044361006996811e-05, "loss": 1.3218, "step": 8139 }, { "epoch": 0.88, "grad_norm": 0.09575911752372211, "learning_rate": 4.0375040595476754e-05, "loss": 1.3955, "step": 8140 }, { "epoch": 0.88, "grad_norm": 0.09332874770342339, "learning_rate": 4.0306526852096296e-05, "loss": 1.2804, "step": 8141 }, { "epoch": 0.88, "grad_norm": 0.089037521052994, "learning_rate": 4.023806884813436e-05, "loss": 1.4618, "step": 8142 }, { "epoch": 0.88, "grad_norm": 0.08529572187836744, "learning_rate": 4.016966659189158e-05, "loss": 1.394, "step": 8143 }, { "epoch": 0.88, "grad_norm": 0.09280673609000385, "learning_rate": 4.010132009166195e-05, "loss": 1.281, "step": 8144 }, { "epoch": 0.88, "grad_norm": 0.08763093597932838, "learning_rate": 4.0033029355732886e-05, "loss": 1.344, "step": 8145 }, { "epoch": 0.88, "grad_norm": 0.09000825851517992, "learning_rate": 3.996479439238465e-05, "loss": 1.4322, "step": 8146 }, { "epoch": 0.88, "grad_norm": 0.10024569194799243, "learning_rate": 3.989661520989102e-05, "loss": 1.5138, "step": 8147 }, { "epoch": 0.88, "grad_norm": 0.0913436778896483, "learning_rate": 3.982849181651915e-05, "loss": 1.3464, "step": 8148 }, { "epoch": 0.88, "grad_norm": 0.0862936421392638, "learning_rate": 3.9760424220529004e-05, "loss": 1.3939, "step": 8149 }, { "epoch": 0.88, "grad_norm": 0.10394172085424071, "learning_rate": 3.969241243017413e-05, "loss": 1.5113, "step": 8150 }, { "epoch": 0.88, "grad_norm": 0.0917900252830298, "learning_rate": 3.962445645370122e-05, "loss": 1.3708, "step": 8151 }, { "epoch": 0.88, "grad_norm": 0.08331465806789863, "learning_rate": 3.955655629935007e-05, "loss": 1.2775, "step": 8152 }, { "epoch": 0.88, "grad_norm": 0.0838333563003349, "learning_rate": 3.948871197535386e-05, "loss": 1.4261, "step": 8153 }, { "epoch": 0.88, "grad_norm": 0.09130774844385221, "learning_rate": 3.942092348993903e-05, "loss": 1.384, "step": 8154 }, { "epoch": 0.88, "grad_norm": 0.09102809118679295, "learning_rate": 3.935319085132505e-05, "loss": 1.4354, "step": 8155 }, { "epoch": 0.88, "grad_norm": 0.09312677606946752, "learning_rate": 3.928551406772468e-05, "loss": 1.4494, "step": 8156 }, { "epoch": 0.88, "grad_norm": 0.08760962335416297, "learning_rate": 3.921789314734409e-05, "loss": 1.3796, "step": 8157 }, { "epoch": 0.88, "grad_norm": 0.1140094609269149, "learning_rate": 3.915032809838259e-05, "loss": 1.502, "step": 8158 }, { "epoch": 0.88, "grad_norm": 0.08082863729081557, "learning_rate": 3.908281892903254e-05, "loss": 1.4221, "step": 8159 }, { "epoch": 0.88, "grad_norm": 0.08881742661475506, "learning_rate": 3.901536564747965e-05, "loss": 1.4697, "step": 8160 }, { "epoch": 0.88, "grad_norm": 0.09152808811324147, "learning_rate": 3.8947968261903054e-05, "loss": 1.3648, "step": 8161 }, { "epoch": 0.88, "grad_norm": 0.09256609786251456, "learning_rate": 3.888062678047472e-05, "loss": 1.4266, "step": 8162 }, { "epoch": 0.88, "grad_norm": 0.07859820908539289, "learning_rate": 3.88133412113601e-05, "loss": 1.3956, "step": 8163 }, { "epoch": 0.88, "grad_norm": 0.09774279455050242, "learning_rate": 3.874611156271801e-05, "loss": 1.4649, "step": 8164 }, { "epoch": 0.88, "grad_norm": 0.09097510907480971, "learning_rate": 3.867893784269988e-05, "loss": 1.4925, "step": 8165 }, { "epoch": 0.88, "grad_norm": 0.08421560686125693, "learning_rate": 3.861182005945091e-05, "loss": 1.4309, "step": 8166 }, { "epoch": 0.88, "grad_norm": 0.09794270345444765, "learning_rate": 3.854475822110953e-05, "loss": 1.3782, "step": 8167 }, { "epoch": 0.88, "grad_norm": 0.10010697158639269, "learning_rate": 3.8477752335807027e-05, "loss": 1.3843, "step": 8168 }, { "epoch": 0.88, "grad_norm": 0.09122969411040488, "learning_rate": 3.841080241166811e-05, "loss": 1.4884, "step": 8169 }, { "epoch": 0.88, "grad_norm": 0.0879295421905876, "learning_rate": 3.8343908456810905e-05, "loss": 1.4185, "step": 8170 }, { "epoch": 0.88, "grad_norm": 0.10266989127410892, "learning_rate": 3.82770704793462e-05, "loss": 1.4521, "step": 8171 }, { "epoch": 0.88, "grad_norm": 0.09692602731626357, "learning_rate": 3.8210288487378566e-05, "loss": 1.5678, "step": 8172 }, { "epoch": 0.88, "grad_norm": 0.0856255659833879, "learning_rate": 3.8143562489005525e-05, "loss": 1.4182, "step": 8173 }, { "epoch": 0.88, "grad_norm": 0.08637972321065973, "learning_rate": 3.8076892492317713e-05, "loss": 1.4223, "step": 8174 }, { "epoch": 0.88, "grad_norm": 0.085387579620603, "learning_rate": 3.801027850539929e-05, "loss": 1.3246, "step": 8175 }, { "epoch": 0.88, "grad_norm": 0.07966648182833903, "learning_rate": 3.794372053632722e-05, "loss": 1.3599, "step": 8176 }, { "epoch": 0.88, "grad_norm": 0.0839450905097096, "learning_rate": 3.7877218593172057e-05, "loss": 1.3715, "step": 8177 }, { "epoch": 0.88, "grad_norm": 0.08710525678299814, "learning_rate": 3.781077268399724e-05, "loss": 1.5138, "step": 8178 }, { "epoch": 0.88, "grad_norm": 0.0909527791884506, "learning_rate": 3.7744382816859765e-05, "loss": 1.2932, "step": 8179 }, { "epoch": 0.88, "grad_norm": 0.08725990644653168, "learning_rate": 3.7678048999809365e-05, "loss": 1.4722, "step": 8180 }, { "epoch": 0.88, "grad_norm": 0.08838118634523105, "learning_rate": 3.761177124088943e-05, "loss": 1.3409, "step": 8181 }, { "epoch": 0.88, "grad_norm": 0.0817359643411812, "learning_rate": 3.7545549548136435e-05, "loss": 1.3198, "step": 8182 }, { "epoch": 0.88, "grad_norm": 0.0910296821176027, "learning_rate": 3.747938392957972e-05, "loss": 1.406, "step": 8183 }, { "epoch": 0.88, "grad_norm": 0.08337072181587418, "learning_rate": 3.741327439324232e-05, "loss": 1.2309, "step": 8184 }, { "epoch": 0.88, "grad_norm": 0.08964416503075122, "learning_rate": 3.734722094714027e-05, "loss": 1.4342, "step": 8185 }, { "epoch": 0.88, "grad_norm": 0.08834670597246252, "learning_rate": 3.7281223599282655e-05, "loss": 1.5451, "step": 8186 }, { "epoch": 0.88, "grad_norm": 0.09156897718666505, "learning_rate": 3.72152823576718e-05, "loss": 1.3623, "step": 8187 }, { "epoch": 0.88, "grad_norm": 0.0897607277218215, "learning_rate": 3.714939723030347e-05, "loss": 1.4662, "step": 8188 }, { "epoch": 0.88, "grad_norm": 0.09034869839292647, "learning_rate": 3.7083568225166454e-05, "loss": 1.2751, "step": 8189 }, { "epoch": 0.88, "grad_norm": 0.09641389241523105, "learning_rate": 3.701779535024269e-05, "loss": 1.3636, "step": 8190 }, { "epoch": 0.88, "grad_norm": 0.09110058458879819, "learning_rate": 3.69520786135073e-05, "loss": 1.415, "step": 8191 }, { "epoch": 0.88, "grad_norm": 0.09185325382635562, "learning_rate": 3.688641802292891e-05, "loss": 1.4223, "step": 8192 }, { "epoch": 0.88, "grad_norm": 0.08397425268962305, "learning_rate": 3.682081358646883e-05, "loss": 1.4142, "step": 8193 }, { "epoch": 0.88, "grad_norm": 0.07977066022161716, "learning_rate": 3.67552653120819e-05, "loss": 1.414, "step": 8194 }, { "epoch": 0.88, "grad_norm": 0.07986806002290193, "learning_rate": 3.668977320771616e-05, "loss": 1.3467, "step": 8195 }, { "epoch": 0.88, "grad_norm": 0.0937341798408163, "learning_rate": 3.66243372813127e-05, "loss": 1.3114, "step": 8196 }, { "epoch": 0.88, "grad_norm": 0.09905772226978368, "learning_rate": 3.655895754080579e-05, "loss": 1.2811, "step": 8197 }, { "epoch": 0.88, "grad_norm": 0.0976774300018668, "learning_rate": 3.649363399412309e-05, "loss": 1.395, "step": 8198 }, { "epoch": 0.88, "grad_norm": 0.09179719331361488, "learning_rate": 3.6428366649185084e-05, "loss": 1.3635, "step": 8199 }, { "epoch": 0.88, "grad_norm": 0.0824569558000461, "learning_rate": 3.636315551390584e-05, "loss": 1.4963, "step": 8200 }, { "epoch": 0.88, "grad_norm": 0.08942944820307387, "learning_rate": 3.6298000596192485e-05, "loss": 1.4102, "step": 8201 }, { "epoch": 0.88, "grad_norm": 0.0899389335487092, "learning_rate": 3.623290190394507e-05, "loss": 1.3796, "step": 8202 }, { "epoch": 0.88, "grad_norm": 0.0912806741082675, "learning_rate": 3.616785944505713e-05, "loss": 1.3977, "step": 8203 }, { "epoch": 0.88, "grad_norm": 0.09622140600547263, "learning_rate": 3.61028732274154e-05, "loss": 1.3994, "step": 8204 }, { "epoch": 0.88, "grad_norm": 0.10230214814948685, "learning_rate": 3.603794325889953e-05, "loss": 1.3927, "step": 8205 }, { "epoch": 0.88, "grad_norm": 0.08549966685330587, "learning_rate": 3.597306954738255e-05, "loss": 1.4888, "step": 8206 }, { "epoch": 0.88, "grad_norm": 0.0797934896092907, "learning_rate": 3.590825210073073e-05, "loss": 1.3104, "step": 8207 }, { "epoch": 0.88, "grad_norm": 0.08264588594248158, "learning_rate": 3.584349092680328e-05, "loss": 1.5409, "step": 8208 }, { "epoch": 0.88, "grad_norm": 0.08128287757448842, "learning_rate": 3.577878603345269e-05, "loss": 1.3391, "step": 8209 }, { "epoch": 0.88, "grad_norm": 0.09632212209637667, "learning_rate": 3.5714137428524754e-05, "loss": 1.3682, "step": 8210 }, { "epoch": 0.88, "grad_norm": 0.08181637197782007, "learning_rate": 3.564954511985824e-05, "loss": 1.478, "step": 8211 }, { "epoch": 0.88, "grad_norm": 0.08614079720602566, "learning_rate": 3.5585009115285226e-05, "loss": 1.2792, "step": 8212 }, { "epoch": 0.88, "grad_norm": 0.09936056430723951, "learning_rate": 3.552052942263101e-05, "loss": 1.4256, "step": 8213 }, { "epoch": 0.88, "grad_norm": 0.08886464083868731, "learning_rate": 3.545610604971383e-05, "loss": 1.361, "step": 8214 }, { "epoch": 0.88, "grad_norm": 0.09233698701338725, "learning_rate": 3.5391739004345335e-05, "loss": 1.4835, "step": 8215 }, { "epoch": 0.88, "grad_norm": 0.08301256671081278, "learning_rate": 3.5327428294330336e-05, "loss": 1.4588, "step": 8216 }, { "epoch": 0.88, "grad_norm": 0.09239690186282472, "learning_rate": 3.5263173927466584e-05, "loss": 1.3954, "step": 8217 }, { "epoch": 0.88, "grad_norm": 0.09754474472861756, "learning_rate": 3.5198975911545136e-05, "loss": 1.3358, "step": 8218 }, { "epoch": 0.88, "grad_norm": 0.08706581069856931, "learning_rate": 3.513483425435021e-05, "loss": 1.3275, "step": 8219 }, { "epoch": 0.88, "grad_norm": 0.09382740937626291, "learning_rate": 3.507074896365942e-05, "loss": 1.3643, "step": 8220 }, { "epoch": 0.88, "grad_norm": 0.08549684695802111, "learning_rate": 3.500672004724303e-05, "loss": 1.3358, "step": 8221 }, { "epoch": 0.88, "grad_norm": 0.11311319233591514, "learning_rate": 3.494274751286497e-05, "loss": 1.4496, "step": 8222 }, { "epoch": 0.88, "grad_norm": 0.08135780208806964, "learning_rate": 3.4878831368282126e-05, "loss": 1.2636, "step": 8223 }, { "epoch": 0.88, "grad_norm": 0.09552206092091241, "learning_rate": 3.4814971621244415e-05, "loss": 1.4177, "step": 8224 }, { "epoch": 0.88, "grad_norm": 0.09716090579217876, "learning_rate": 3.4751168279495095e-05, "loss": 1.288, "step": 8225 }, { "epoch": 0.88, "grad_norm": 0.08658930830747578, "learning_rate": 3.468742135077069e-05, "loss": 1.3388, "step": 8226 }, { "epoch": 0.88, "grad_norm": 0.09649678229661654, "learning_rate": 3.462373084280057e-05, "loss": 1.3767, "step": 8227 }, { "epoch": 0.88, "grad_norm": 0.09611441101312547, "learning_rate": 3.456009676330751e-05, "loss": 1.4051, "step": 8228 }, { "epoch": 0.88, "grad_norm": 0.09029750550796355, "learning_rate": 3.449651912000734e-05, "loss": 1.3124, "step": 8229 }, { "epoch": 0.88, "grad_norm": 0.10382301198139691, "learning_rate": 3.4432997920609e-05, "loss": 1.4823, "step": 8230 }, { "epoch": 0.88, "grad_norm": 0.08153783473681955, "learning_rate": 3.436953317281472e-05, "loss": 1.4889, "step": 8231 }, { "epoch": 0.88, "grad_norm": 0.09764009699745096, "learning_rate": 3.430612488431989e-05, "loss": 1.3913, "step": 8232 }, { "epoch": 0.89, "grad_norm": 0.08631013956906346, "learning_rate": 3.4242773062812815e-05, "loss": 1.4285, "step": 8233 }, { "epoch": 0.89, "grad_norm": 0.08769483348443738, "learning_rate": 3.4179477715975237e-05, "loss": 1.4148, "step": 8234 }, { "epoch": 0.89, "grad_norm": 0.09050904791758085, "learning_rate": 3.411623885148202e-05, "loss": 1.2897, "step": 8235 }, { "epoch": 0.89, "grad_norm": 0.08907965528334316, "learning_rate": 3.405305647700085e-05, "loss": 1.3697, "step": 8236 }, { "epoch": 0.89, "grad_norm": 0.08801888670287704, "learning_rate": 3.398993060019295e-05, "loss": 1.3306, "step": 8237 }, { "epoch": 0.89, "grad_norm": 0.08602038634493521, "learning_rate": 3.392686122871263e-05, "loss": 1.4508, "step": 8238 }, { "epoch": 0.89, "grad_norm": 0.08606685174750452, "learning_rate": 3.3863848370207226e-05, "loss": 1.4797, "step": 8239 }, { "epoch": 0.89, "grad_norm": 0.10244754385585404, "learning_rate": 3.380089203231712e-05, "loss": 1.4539, "step": 8240 }, { "epoch": 0.89, "grad_norm": 0.09051226085948132, "learning_rate": 3.373799222267615e-05, "loss": 1.4318, "step": 8241 }, { "epoch": 0.89, "grad_norm": 0.10024875889437078, "learning_rate": 3.3675148948911e-05, "loss": 1.3614, "step": 8242 }, { "epoch": 0.89, "grad_norm": 0.09779553374981109, "learning_rate": 3.361236221864172e-05, "loss": 1.272, "step": 8243 }, { "epoch": 0.89, "grad_norm": 0.08717187939794446, "learning_rate": 3.354963203948147e-05, "loss": 1.423, "step": 8244 }, { "epoch": 0.89, "grad_norm": 0.09053686476661967, "learning_rate": 3.348695841903637e-05, "loss": 1.3593, "step": 8245 }, { "epoch": 0.89, "grad_norm": 0.08330610617134679, "learning_rate": 3.342434136490585e-05, "loss": 1.4082, "step": 8246 }, { "epoch": 0.89, "grad_norm": 0.08656762493107648, "learning_rate": 3.3361780884682615e-05, "loss": 1.3553, "step": 8247 }, { "epoch": 0.89, "grad_norm": 0.10053769825211262, "learning_rate": 3.329927698595203e-05, "loss": 1.3354, "step": 8248 }, { "epoch": 0.89, "grad_norm": 0.10128003625082153, "learning_rate": 3.3236829676293215e-05, "loss": 1.3203, "step": 8249 }, { "epoch": 0.89, "grad_norm": 0.08681193791874181, "learning_rate": 3.3174438963277875e-05, "loss": 1.3869, "step": 8250 }, { "epoch": 0.89, "grad_norm": 0.09209269431223971, "learning_rate": 3.311210485447125e-05, "loss": 1.3276, "step": 8251 }, { "epoch": 0.89, "grad_norm": 0.09032137653302173, "learning_rate": 3.30498273574315e-05, "loss": 1.3161, "step": 8252 }, { "epoch": 0.89, "grad_norm": 0.08830919203330095, "learning_rate": 3.298760647970994e-05, "loss": 1.4686, "step": 8253 }, { "epoch": 0.89, "grad_norm": 0.09601488594029912, "learning_rate": 3.292544222885124e-05, "loss": 1.4203, "step": 8254 }, { "epoch": 0.89, "grad_norm": 0.0936075049034287, "learning_rate": 3.286333461239288e-05, "loss": 1.453, "step": 8255 }, { "epoch": 0.89, "grad_norm": 0.08604891420228421, "learning_rate": 3.280128363786561e-05, "loss": 1.2659, "step": 8256 }, { "epoch": 0.89, "grad_norm": 0.08944258325837522, "learning_rate": 3.273928931279346e-05, "loss": 1.4166, "step": 8257 }, { "epoch": 0.89, "grad_norm": 0.09903855128508778, "learning_rate": 3.267735164469332e-05, "loss": 1.2721, "step": 8258 }, { "epoch": 0.89, "grad_norm": 0.09662137777758281, "learning_rate": 3.261547064107551e-05, "loss": 1.2992, "step": 8259 }, { "epoch": 0.89, "grad_norm": 0.08389210761394819, "learning_rate": 3.255364630944313e-05, "loss": 1.3329, "step": 8260 }, { "epoch": 0.89, "grad_norm": 0.08569831100242886, "learning_rate": 3.249187865729264e-05, "loss": 1.3118, "step": 8261 }, { "epoch": 0.89, "grad_norm": 0.0990971080529239, "learning_rate": 3.24301676921136e-05, "loss": 1.338, "step": 8262 }, { "epoch": 0.89, "grad_norm": 0.10539828259931236, "learning_rate": 3.236851342138874e-05, "loss": 1.3873, "step": 8263 }, { "epoch": 0.89, "grad_norm": 0.09089848271652821, "learning_rate": 3.2306915852593713e-05, "loss": 1.3069, "step": 8264 }, { "epoch": 0.89, "grad_norm": 0.08731495361645467, "learning_rate": 3.224537499319757e-05, "loss": 1.451, "step": 8265 }, { "epoch": 0.89, "grad_norm": 0.0963751591762757, "learning_rate": 3.218389085066237e-05, "loss": 1.4377, "step": 8266 }, { "epoch": 0.89, "grad_norm": 0.08766836169616739, "learning_rate": 3.2122463432443125e-05, "loss": 1.3519, "step": 8267 }, { "epoch": 0.89, "grad_norm": 0.0944698914392843, "learning_rate": 3.206109274598817e-05, "loss": 1.3163, "step": 8268 }, { "epoch": 0.89, "grad_norm": 0.08410850904393825, "learning_rate": 3.199977879873906e-05, "loss": 1.3856, "step": 8269 }, { "epoch": 0.89, "grad_norm": 0.08900858429825506, "learning_rate": 3.193852159813021e-05, "loss": 1.4729, "step": 8270 }, { "epoch": 0.89, "grad_norm": 0.08340149953121077, "learning_rate": 3.18773211515892e-05, "loss": 1.4201, "step": 8271 }, { "epoch": 0.89, "grad_norm": 0.08810900737089097, "learning_rate": 3.181617746653687e-05, "loss": 1.3035, "step": 8272 }, { "epoch": 0.89, "grad_norm": 0.09909487833329844, "learning_rate": 3.1755090550387165e-05, "loss": 1.3747, "step": 8273 }, { "epoch": 0.89, "grad_norm": 0.09206263771365691, "learning_rate": 3.169406041054695e-05, "loss": 1.4318, "step": 8274 }, { "epoch": 0.89, "grad_norm": 0.08936295451925626, "learning_rate": 3.163308705441648e-05, "loss": 1.354, "step": 8275 }, { "epoch": 0.89, "grad_norm": 0.10108582918313291, "learning_rate": 3.157217048938882e-05, "loss": 1.3638, "step": 8276 }, { "epoch": 0.89, "grad_norm": 0.09810973020134794, "learning_rate": 3.151131072285041e-05, "loss": 1.3389, "step": 8277 }, { "epoch": 0.89, "grad_norm": 0.09990108657933389, "learning_rate": 3.145050776218078e-05, "loss": 1.5351, "step": 8278 }, { "epoch": 0.89, "grad_norm": 0.09063262414102946, "learning_rate": 3.138976161475238e-05, "loss": 1.4391, "step": 8279 }, { "epoch": 0.89, "grad_norm": 0.08380459100263056, "learning_rate": 3.132907228793086e-05, "loss": 1.3581, "step": 8280 }, { "epoch": 0.89, "grad_norm": 0.08742457245686931, "learning_rate": 3.126843978907518e-05, "loss": 1.2955, "step": 8281 }, { "epoch": 0.89, "grad_norm": 0.09016049568453172, "learning_rate": 3.1207864125537165e-05, "loss": 1.2513, "step": 8282 }, { "epoch": 0.89, "grad_norm": 0.08375867746186022, "learning_rate": 3.1147345304661734e-05, "loss": 1.3385, "step": 8283 }, { "epoch": 0.89, "grad_norm": 0.10576397254106343, "learning_rate": 3.108688333378701e-05, "loss": 1.4306, "step": 8284 }, { "epoch": 0.89, "grad_norm": 0.10166972259158993, "learning_rate": 3.10264782202444e-05, "loss": 1.4401, "step": 8285 }, { "epoch": 0.89, "grad_norm": 0.0816827777590841, "learning_rate": 3.0966129971358005e-05, "loss": 1.3993, "step": 8286 }, { "epoch": 0.89, "grad_norm": 0.10865251945702202, "learning_rate": 3.0905838594445346e-05, "loss": 1.4408, "step": 8287 }, { "epoch": 0.89, "grad_norm": 0.09242901362551612, "learning_rate": 3.0845604096817024e-05, "loss": 1.4811, "step": 8288 }, { "epoch": 0.89, "grad_norm": 0.09310275405757654, "learning_rate": 3.078542648577659e-05, "loss": 1.427, "step": 8289 }, { "epoch": 0.89, "grad_norm": 0.07919899361904145, "learning_rate": 3.072530576862081e-05, "loss": 1.3695, "step": 8290 }, { "epoch": 0.89, "grad_norm": 0.09071826200447604, "learning_rate": 3.06652419526397e-05, "loss": 1.3786, "step": 8291 }, { "epoch": 0.89, "grad_norm": 0.09205784757595566, "learning_rate": 3.060523504511587e-05, "loss": 1.4101, "step": 8292 }, { "epoch": 0.89, "grad_norm": 0.09121367166574848, "learning_rate": 3.05452850533256e-05, "loss": 1.4315, "step": 8293 }, { "epoch": 0.89, "grad_norm": 0.08840960290934788, "learning_rate": 3.048539198453798e-05, "loss": 1.1353, "step": 8294 }, { "epoch": 0.89, "grad_norm": 0.10546852151528087, "learning_rate": 3.0425555846015196e-05, "loss": 1.3073, "step": 8295 }, { "epoch": 0.89, "grad_norm": 0.09881277238387846, "learning_rate": 3.0365776645012666e-05, "loss": 1.3881, "step": 8296 }, { "epoch": 0.89, "grad_norm": 0.0854003085075494, "learning_rate": 3.0306054388778814e-05, "loss": 1.3781, "step": 8297 }, { "epoch": 0.89, "grad_norm": 0.09904759952136048, "learning_rate": 3.0246389084555127e-05, "loss": 1.5183, "step": 8298 }, { "epoch": 0.89, "grad_norm": 0.08909127911291063, "learning_rate": 3.0186780739576202e-05, "loss": 1.4428, "step": 8299 }, { "epoch": 0.89, "grad_norm": 0.11642320664644766, "learning_rate": 3.012722936106993e-05, "loss": 1.3976, "step": 8300 }, { "epoch": 0.89, "grad_norm": 0.09675931420968514, "learning_rate": 3.0067734956256863e-05, "loss": 1.4935, "step": 8301 }, { "epoch": 0.89, "grad_norm": 0.09474977351473926, "learning_rate": 3.0008297532351182e-05, "loss": 1.365, "step": 8302 }, { "epoch": 0.89, "grad_norm": 0.10272168744723374, "learning_rate": 2.9948917096559615e-05, "loss": 1.2695, "step": 8303 }, { "epoch": 0.89, "grad_norm": 0.08298955604398278, "learning_rate": 2.9889593656082404e-05, "loss": 1.4956, "step": 8304 }, { "epoch": 0.89, "grad_norm": 0.09179600755809245, "learning_rate": 2.983032721811263e-05, "loss": 1.4941, "step": 8305 }, { "epoch": 0.89, "grad_norm": 0.10682099436596237, "learning_rate": 2.977111778983671e-05, "loss": 1.3633, "step": 8306 }, { "epoch": 0.89, "grad_norm": 0.0867515300642767, "learning_rate": 2.9711965378433793e-05, "loss": 1.3357, "step": 8307 }, { "epoch": 0.89, "grad_norm": 0.08754850931104234, "learning_rate": 2.9652869991076413e-05, "loss": 1.4155, "step": 8308 }, { "epoch": 0.89, "grad_norm": 0.09424777427849733, "learning_rate": 2.9593831634930123e-05, "loss": 1.3722, "step": 8309 }, { "epoch": 0.89, "grad_norm": 0.09294745036970158, "learning_rate": 2.9534850317153415e-05, "loss": 1.3322, "step": 8310 }, { "epoch": 0.89, "grad_norm": 0.09331836902204226, "learning_rate": 2.947592604489807e-05, "loss": 1.3521, "step": 8311 }, { "epoch": 0.89, "grad_norm": 0.09313847602316322, "learning_rate": 2.9417058825308875e-05, "loss": 1.3364, "step": 8312 }, { "epoch": 0.89, "grad_norm": 0.09053115548644763, "learning_rate": 2.9358248665523667e-05, "loss": 1.4487, "step": 8313 }, { "epoch": 0.89, "grad_norm": 0.09041572250814107, "learning_rate": 2.9299495572673307e-05, "loss": 1.4212, "step": 8314 }, { "epoch": 0.89, "grad_norm": 0.0904359258699773, "learning_rate": 2.9240799553881814e-05, "loss": 1.3782, "step": 8315 }, { "epoch": 0.89, "grad_norm": 0.08136115391502427, "learning_rate": 2.918216061626644e-05, "loss": 1.415, "step": 8316 }, { "epoch": 0.89, "grad_norm": 0.08998781295143174, "learning_rate": 2.912357876693711e-05, "loss": 1.3324, "step": 8317 }, { "epoch": 0.89, "grad_norm": 0.09399137616736783, "learning_rate": 2.9065054012997305e-05, "loss": 1.4648, "step": 8318 }, { "epoch": 0.89, "grad_norm": 0.08235405096329945, "learning_rate": 2.900658636154324e-05, "loss": 1.3484, "step": 8319 }, { "epoch": 0.89, "grad_norm": 0.10489144653783913, "learning_rate": 2.8948175819664357e-05, "loss": 1.5477, "step": 8320 }, { "epoch": 0.89, "grad_norm": 0.09116817963939151, "learning_rate": 2.8889822394443043e-05, "loss": 1.3189, "step": 8321 }, { "epoch": 0.89, "grad_norm": 0.09584664025860186, "learning_rate": 2.883152609295503e-05, "loss": 1.4975, "step": 8322 }, { "epoch": 0.89, "grad_norm": 0.08977636111226732, "learning_rate": 2.8773286922268883e-05, "loss": 1.3452, "step": 8323 }, { "epoch": 0.89, "grad_norm": 0.07654042445721576, "learning_rate": 2.871510488944612e-05, "loss": 1.4143, "step": 8324 }, { "epoch": 0.89, "grad_norm": 0.08818203775801058, "learning_rate": 2.8656980001541765e-05, "loss": 1.4636, "step": 8325 }, { "epoch": 0.9, "grad_norm": 0.08432117009970455, "learning_rate": 2.859891226560346e-05, "loss": 1.3565, "step": 8326 }, { "epoch": 0.9, "grad_norm": 0.09712799846018604, "learning_rate": 2.854090168867224e-05, "loss": 1.4048, "step": 8327 }, { "epoch": 0.9, "grad_norm": 0.10385353675645356, "learning_rate": 2.848294827778214e-05, "loss": 1.347, "step": 8328 }, { "epoch": 0.9, "grad_norm": 0.0959182760982914, "learning_rate": 2.8425052039959987e-05, "loss": 1.3536, "step": 8329 }, { "epoch": 0.9, "grad_norm": 0.09054068618274394, "learning_rate": 2.836721298222611e-05, "loss": 1.3895, "step": 8330 }, { "epoch": 0.9, "grad_norm": 0.08613128379624303, "learning_rate": 2.8309431111593675e-05, "loss": 1.329, "step": 8331 }, { "epoch": 0.9, "grad_norm": 0.09153174477614746, "learning_rate": 2.8251706435068803e-05, "loss": 1.3473, "step": 8332 }, { "epoch": 0.9, "grad_norm": 0.0895519519633704, "learning_rate": 2.8194038959650892e-05, "loss": 1.38, "step": 8333 }, { "epoch": 0.9, "grad_norm": 0.09474284361504132, "learning_rate": 2.813642869233235e-05, "loss": 1.3456, "step": 8334 }, { "epoch": 0.9, "grad_norm": 0.08310663147369579, "learning_rate": 2.8078875640098646e-05, "loss": 1.4512, "step": 8335 }, { "epoch": 0.9, "grad_norm": 0.09109971946210933, "learning_rate": 2.802137980992814e-05, "loss": 1.3312, "step": 8336 }, { "epoch": 0.9, "grad_norm": 0.08391879676313042, "learning_rate": 2.796394120879259e-05, "loss": 1.3833, "step": 8337 }, { "epoch": 0.9, "grad_norm": 0.07946902228691093, "learning_rate": 2.790655984365642e-05, "loss": 1.4357, "step": 8338 }, { "epoch": 0.9, "grad_norm": 0.10628108602411328, "learning_rate": 2.7849235721477406e-05, "loss": 1.3521, "step": 8339 }, { "epoch": 0.9, "grad_norm": 0.10121675475351374, "learning_rate": 2.7791968849206427e-05, "loss": 1.4077, "step": 8340 }, { "epoch": 0.9, "grad_norm": 0.08187516155572779, "learning_rate": 2.7734759233787045e-05, "loss": 1.3724, "step": 8341 }, { "epoch": 0.9, "grad_norm": 0.0986900611957909, "learning_rate": 2.7677606882156314e-05, "loss": 1.369, "step": 8342 }, { "epoch": 0.9, "grad_norm": 0.08405140486248891, "learning_rate": 2.7620511801244143e-05, "loss": 1.4385, "step": 8343 }, { "epoch": 0.9, "grad_norm": 0.09081596189763722, "learning_rate": 2.7563473997973433e-05, "loss": 1.3842, "step": 8344 }, { "epoch": 0.9, "grad_norm": 0.08228297439170708, "learning_rate": 2.750649347926021e-05, "loss": 1.3592, "step": 8345 }, { "epoch": 0.9, "grad_norm": 0.08391568513119509, "learning_rate": 2.7449570252013556e-05, "loss": 1.3608, "step": 8346 }, { "epoch": 0.9, "grad_norm": 0.08954952617668473, "learning_rate": 2.7392704323135677e-05, "loss": 1.3169, "step": 8347 }, { "epoch": 0.9, "grad_norm": 0.0917650342566164, "learning_rate": 2.733589569952172e-05, "loss": 1.3512, "step": 8348 }, { "epoch": 0.9, "grad_norm": 0.09179354595681569, "learning_rate": 2.7279144388059896e-05, "loss": 1.5651, "step": 8349 }, { "epoch": 0.9, "grad_norm": 0.08588776856338311, "learning_rate": 2.7222450395631592e-05, "loss": 1.3745, "step": 8350 }, { "epoch": 0.9, "grad_norm": 0.0909950628609119, "learning_rate": 2.7165813729111032e-05, "loss": 1.3828, "step": 8351 }, { "epoch": 0.9, "grad_norm": 0.10197866780298187, "learning_rate": 2.7109234395365667e-05, "loss": 1.364, "step": 8352 }, { "epoch": 0.9, "grad_norm": 0.0879616622479054, "learning_rate": 2.7052712401256006e-05, "loss": 1.4415, "step": 8353 }, { "epoch": 0.9, "grad_norm": 0.09835874727160945, "learning_rate": 2.6996247753635404e-05, "loss": 1.3447, "step": 8354 }, { "epoch": 0.9, "grad_norm": 0.0963073590732697, "learning_rate": 2.6939840459350496e-05, "loss": 1.339, "step": 8355 }, { "epoch": 0.9, "grad_norm": 0.08902332715993876, "learning_rate": 2.6883490525240804e-05, "loss": 1.3539, "step": 8356 }, { "epoch": 0.9, "grad_norm": 0.08103360913138252, "learning_rate": 2.6827197958138928e-05, "loss": 1.2907, "step": 8357 }, { "epoch": 0.9, "grad_norm": 0.09731155956807074, "learning_rate": 2.677096276487062e-05, "loss": 1.4633, "step": 8358 }, { "epoch": 0.9, "grad_norm": 0.0887681860239562, "learning_rate": 2.6714784952254544e-05, "loss": 1.4055, "step": 8359 }, { "epoch": 0.9, "grad_norm": 0.08898853045298434, "learning_rate": 2.6658664527102417e-05, "loss": 1.258, "step": 8360 }, { "epoch": 0.9, "grad_norm": 0.08523520098416243, "learning_rate": 2.660260149621907e-05, "loss": 1.3498, "step": 8361 }, { "epoch": 0.9, "grad_norm": 0.08728423609774599, "learning_rate": 2.6546595866402403e-05, "loss": 1.3685, "step": 8362 }, { "epoch": 0.9, "grad_norm": 0.08238661659369678, "learning_rate": 2.6490647644443143e-05, "loss": 1.4899, "step": 8363 }, { "epoch": 0.9, "grad_norm": 0.09252426038134721, "learning_rate": 2.6434756837125317e-05, "loss": 1.5046, "step": 8364 }, { "epoch": 0.9, "grad_norm": 0.08778490489889275, "learning_rate": 2.6378923451225888e-05, "loss": 1.3537, "step": 8365 }, { "epoch": 0.9, "grad_norm": 0.10171854887120627, "learning_rate": 2.6323147493514833e-05, "loss": 1.3695, "step": 8366 }, { "epoch": 0.9, "grad_norm": 0.09384589952775692, "learning_rate": 2.6267428970755125e-05, "loss": 1.3111, "step": 8367 }, { "epoch": 0.9, "grad_norm": 0.0795961798737711, "learning_rate": 2.621176788970281e-05, "loss": 1.3767, "step": 8368 }, { "epoch": 0.9, "grad_norm": 0.10107183568789642, "learning_rate": 2.6156164257107097e-05, "loss": 1.2705, "step": 8369 }, { "epoch": 0.9, "grad_norm": 0.09333781313598113, "learning_rate": 2.6100618079710037e-05, "loss": 1.37, "step": 8370 }, { "epoch": 0.9, "grad_norm": 0.08833430413490173, "learning_rate": 2.6045129364246856e-05, "loss": 1.4379, "step": 8371 }, { "epoch": 0.9, "grad_norm": 0.08838142607452992, "learning_rate": 2.5989698117445615e-05, "loss": 1.5307, "step": 8372 }, { "epoch": 0.9, "grad_norm": 0.09304373165717744, "learning_rate": 2.593432434602766e-05, "loss": 1.4593, "step": 8373 }, { "epoch": 0.9, "grad_norm": 0.08477827831395272, "learning_rate": 2.5879008056707286e-05, "loss": 1.3841, "step": 8374 }, { "epoch": 0.9, "grad_norm": 0.09963160622039322, "learning_rate": 2.5823749256191687e-05, "loss": 1.3131, "step": 8375 }, { "epoch": 0.9, "grad_norm": 0.09050444438851227, "learning_rate": 2.5768547951181277e-05, "loss": 1.4336, "step": 8376 }, { "epoch": 0.9, "grad_norm": 0.09180191842894916, "learning_rate": 2.571340414836931e-05, "loss": 1.37, "step": 8377 }, { "epoch": 0.9, "grad_norm": 0.0813612383349236, "learning_rate": 2.565831785444228e-05, "loss": 1.4256, "step": 8378 }, { "epoch": 0.9, "grad_norm": 0.10347883114896525, "learning_rate": 2.5603289076079394e-05, "loss": 1.4034, "step": 8379 }, { "epoch": 0.9, "grad_norm": 0.08899944676178057, "learning_rate": 2.5548317819953203e-05, "loss": 1.3462, "step": 8380 }, { "epoch": 0.9, "grad_norm": 0.09492130882777211, "learning_rate": 2.5493404092729267e-05, "loss": 1.4667, "step": 8381 }, { "epoch": 0.9, "grad_norm": 0.08012737818017668, "learning_rate": 2.5438547901065866e-05, "loss": 1.3719, "step": 8382 }, { "epoch": 0.9, "grad_norm": 0.09302578161515027, "learning_rate": 2.538374925161463e-05, "loss": 1.4519, "step": 8383 }, { "epoch": 0.9, "grad_norm": 0.08547204393981939, "learning_rate": 2.5329008151020072e-05, "loss": 1.5377, "step": 8384 }, { "epoch": 0.9, "grad_norm": 0.07712752399790215, "learning_rate": 2.5274324605919664e-05, "loss": 1.3414, "step": 8385 }, { "epoch": 0.9, "grad_norm": 0.08767049163112096, "learning_rate": 2.521969862294404e-05, "loss": 1.4337, "step": 8386 }, { "epoch": 0.9, "grad_norm": 0.08594081612171055, "learning_rate": 2.5165130208716914e-05, "loss": 1.3469, "step": 8387 }, { "epoch": 0.9, "grad_norm": 0.09468859963832922, "learning_rate": 2.5110619369854594e-05, "loss": 1.3351, "step": 8388 }, { "epoch": 0.9, "grad_norm": 0.09186319861919497, "learning_rate": 2.505616611296685e-05, "loss": 1.3704, "step": 8389 }, { "epoch": 0.9, "grad_norm": 0.08461677792305342, "learning_rate": 2.5001770444656457e-05, "loss": 1.4311, "step": 8390 }, { "epoch": 0.9, "grad_norm": 0.1280313908368673, "learning_rate": 2.494743237151892e-05, "loss": 1.3177, "step": 8391 }, { "epoch": 0.9, "grad_norm": 0.11643983133341222, "learning_rate": 2.4893151900142906e-05, "loss": 1.3947, "step": 8392 }, { "epoch": 0.9, "grad_norm": 0.10473989822954327, "learning_rate": 2.4838929037110268e-05, "loss": 1.5387, "step": 8393 }, { "epoch": 0.9, "grad_norm": 0.0843967030128136, "learning_rate": 2.4784763788995523e-05, "loss": 1.4826, "step": 8394 }, { "epoch": 0.9, "grad_norm": 0.08342362278756797, "learning_rate": 2.473065616236647e-05, "loss": 1.3826, "step": 8395 }, { "epoch": 0.9, "grad_norm": 0.08230065911414464, "learning_rate": 2.4676606163783978e-05, "loss": 1.395, "step": 8396 }, { "epoch": 0.9, "grad_norm": 0.08570971292153445, "learning_rate": 2.462261379980163e-05, "loss": 1.4211, "step": 8397 }, { "epoch": 0.9, "grad_norm": 0.09686187113928392, "learning_rate": 2.4568679076966194e-05, "loss": 1.4408, "step": 8398 }, { "epoch": 0.9, "grad_norm": 0.0853459619893161, "learning_rate": 2.4514802001817438e-05, "loss": 1.4611, "step": 8399 }, { "epoch": 0.9, "grad_norm": 0.08748074962675632, "learning_rate": 2.4460982580888303e-05, "loss": 1.4843, "step": 8400 }, { "epoch": 0.9, "grad_norm": 0.09367239616088394, "learning_rate": 2.4407220820704402e-05, "loss": 1.371, "step": 8401 }, { "epoch": 0.9, "grad_norm": 0.10033912608729492, "learning_rate": 2.435351672778463e-05, "loss": 1.4728, "step": 8402 }, { "epoch": 0.9, "grad_norm": 0.08858671836179179, "learning_rate": 2.4299870308640726e-05, "loss": 1.4597, "step": 8403 }, { "epoch": 0.9, "grad_norm": 0.09235237842112576, "learning_rate": 2.4246281569777485e-05, "loss": 1.4919, "step": 8404 }, { "epoch": 0.9, "grad_norm": 0.103872125039485, "learning_rate": 2.4192750517692873e-05, "loss": 1.4196, "step": 8405 }, { "epoch": 0.9, "grad_norm": 0.09659718234578298, "learning_rate": 2.4139277158877538e-05, "loss": 1.3337, "step": 8406 }, { "epoch": 0.9, "grad_norm": 0.08205692848700044, "learning_rate": 2.4085861499815398e-05, "loss": 1.2939, "step": 8407 }, { "epoch": 0.9, "grad_norm": 0.10968793439303617, "learning_rate": 2.4032503546983332e-05, "loss": 1.4226, "step": 8408 }, { "epoch": 0.9, "grad_norm": 0.08408076692975283, "learning_rate": 2.397920330685116e-05, "loss": 1.5346, "step": 8409 }, { "epoch": 0.9, "grad_norm": 0.08412734327092307, "learning_rate": 2.392596078588155e-05, "loss": 1.4113, "step": 8410 }, { "epoch": 0.9, "grad_norm": 0.0865364726324679, "learning_rate": 2.3872775990530504e-05, "loss": 1.351, "step": 8411 }, { "epoch": 0.9, "grad_norm": 0.08882596689561814, "learning_rate": 2.3819648927246916e-05, "loss": 1.4098, "step": 8412 }, { "epoch": 0.9, "grad_norm": 0.09977260706687908, "learning_rate": 2.376657960247247e-05, "loss": 1.4846, "step": 8413 }, { "epoch": 0.9, "grad_norm": 0.08832253323200623, "learning_rate": 2.371356802264202e-05, "loss": 1.3652, "step": 8414 }, { "epoch": 0.9, "grad_norm": 0.09015004280653494, "learning_rate": 2.3660614194183584e-05, "loss": 1.5453, "step": 8415 }, { "epoch": 0.9, "grad_norm": 0.09993461728963231, "learning_rate": 2.3607718123517753e-05, "loss": 1.3951, "step": 8416 }, { "epoch": 0.9, "grad_norm": 0.09386030180420961, "learning_rate": 2.3554879817058504e-05, "loss": 1.2722, "step": 8417 }, { "epoch": 0.9, "grad_norm": 0.08852572128057404, "learning_rate": 2.3502099281212774e-05, "loss": 1.3115, "step": 8418 }, { "epoch": 0.91, "grad_norm": 0.08987717291858999, "learning_rate": 2.3449376522380107e-05, "loss": 1.3226, "step": 8419 }, { "epoch": 0.91, "grad_norm": 0.10203851051114089, "learning_rate": 2.3396711546953442e-05, "loss": 1.4404, "step": 8420 }, { "epoch": 0.91, "grad_norm": 0.09785692101309608, "learning_rate": 2.3344104361318675e-05, "loss": 1.2662, "step": 8421 }, { "epoch": 0.91, "grad_norm": 0.09629320552610036, "learning_rate": 2.3291554971854477e-05, "loss": 1.3061, "step": 8422 }, { "epoch": 0.91, "grad_norm": 0.09022413330869145, "learning_rate": 2.32390633849327e-05, "loss": 1.3738, "step": 8423 }, { "epoch": 0.91, "grad_norm": 0.09209515044576586, "learning_rate": 2.3186629606918197e-05, "loss": 1.4061, "step": 8424 }, { "epoch": 0.91, "grad_norm": 0.09556573011727089, "learning_rate": 2.31342536441686e-05, "loss": 1.3278, "step": 8425 }, { "epoch": 0.91, "grad_norm": 0.09352813983137584, "learning_rate": 2.3081935503034777e-05, "loss": 1.5637, "step": 8426 }, { "epoch": 0.91, "grad_norm": 0.09343439987112688, "learning_rate": 2.3029675189860544e-05, "loss": 1.4015, "step": 8427 }, { "epoch": 0.91, "grad_norm": 0.10169587181273342, "learning_rate": 2.297747271098244e-05, "loss": 1.3474, "step": 8428 }, { "epoch": 0.91, "grad_norm": 0.10179498660670094, "learning_rate": 2.29253280727304e-05, "loss": 1.4285, "step": 8429 }, { "epoch": 0.91, "grad_norm": 0.09330307333224806, "learning_rate": 2.2873241281427038e-05, "loss": 1.3877, "step": 8430 }, { "epoch": 0.91, "grad_norm": 0.10039279736866197, "learning_rate": 2.2821212343388075e-05, "loss": 1.4382, "step": 8431 }, { "epoch": 0.91, "grad_norm": 0.10346327438587212, "learning_rate": 2.2769241264922193e-05, "loss": 1.3383, "step": 8432 }, { "epoch": 0.91, "grad_norm": 0.08989579051513284, "learning_rate": 2.2717328052331122e-05, "loss": 1.3829, "step": 8433 }, { "epoch": 0.91, "grad_norm": 0.09469033069439596, "learning_rate": 2.2665472711909385e-05, "loss": 1.4191, "step": 8434 }, { "epoch": 0.91, "grad_norm": 0.08302585181396888, "learning_rate": 2.2613675249944676e-05, "loss": 1.552, "step": 8435 }, { "epoch": 0.91, "grad_norm": 0.07639788909277988, "learning_rate": 2.256193567271775e-05, "loss": 1.5736, "step": 8436 }, { "epoch": 0.91, "grad_norm": 0.10245714094055836, "learning_rate": 2.2510253986502026e-05, "loss": 1.4083, "step": 8437 }, { "epoch": 0.91, "grad_norm": 0.10338545883531776, "learning_rate": 2.2458630197564222e-05, "loss": 1.3372, "step": 8438 }, { "epoch": 0.91, "grad_norm": 0.10610452397828928, "learning_rate": 2.2407064312163827e-05, "loss": 1.4028, "step": 8439 }, { "epoch": 0.91, "grad_norm": 0.07244559862137315, "learning_rate": 2.235555633655345e-05, "loss": 1.3914, "step": 8440 }, { "epoch": 0.91, "grad_norm": 0.09405091787870902, "learning_rate": 2.230410627697843e-05, "loss": 1.3263, "step": 8441 }, { "epoch": 0.91, "grad_norm": 0.09015438680566427, "learning_rate": 2.2252714139677444e-05, "loss": 1.4548, "step": 8442 }, { "epoch": 0.91, "grad_norm": 0.08618967083701604, "learning_rate": 2.2201379930882006e-05, "loss": 1.3307, "step": 8443 }, { "epoch": 0.91, "grad_norm": 0.09062863447448566, "learning_rate": 2.2150103656816356e-05, "loss": 1.3327, "step": 8444 }, { "epoch": 0.91, "grad_norm": 0.08555145550793038, "learning_rate": 2.2098885323698027e-05, "loss": 1.3822, "step": 8445 }, { "epoch": 0.91, "grad_norm": 0.10137210592380712, "learning_rate": 2.2047724937737546e-05, "loss": 1.5497, "step": 8446 }, { "epoch": 0.91, "grad_norm": 0.09072992995784526, "learning_rate": 2.1996622505138065e-05, "loss": 1.5256, "step": 8447 }, { "epoch": 0.91, "grad_norm": 0.08917769482057666, "learning_rate": 2.1945578032096015e-05, "loss": 1.4008, "step": 8448 }, { "epoch": 0.91, "grad_norm": 0.08259821066802236, "learning_rate": 2.1894591524800832e-05, "loss": 1.3465, "step": 8449 }, { "epoch": 0.91, "grad_norm": 0.09017995327516512, "learning_rate": 2.1843662989434688e-05, "loss": 1.535, "step": 8450 }, { "epoch": 0.91, "grad_norm": 0.09148728765294482, "learning_rate": 2.179279243217286e-05, "loss": 1.2695, "step": 8451 }, { "epoch": 0.91, "grad_norm": 0.09181262449336586, "learning_rate": 2.174197985918358e-05, "loss": 1.3005, "step": 8452 }, { "epoch": 0.91, "grad_norm": 0.09799199206773658, "learning_rate": 2.169122527662798e-05, "loss": 1.4007, "step": 8453 }, { "epoch": 0.91, "grad_norm": 0.08347453430194898, "learning_rate": 2.1640528690660298e-05, "loss": 1.3682, "step": 8454 }, { "epoch": 0.91, "grad_norm": 0.08692980607633988, "learning_rate": 2.158989010742779e-05, "loss": 1.4219, "step": 8455 }, { "epoch": 0.91, "grad_norm": 0.08937756989442294, "learning_rate": 2.1539309533070316e-05, "loss": 1.3182, "step": 8456 }, { "epoch": 0.91, "grad_norm": 0.08836818598437016, "learning_rate": 2.1488786973721085e-05, "loss": 1.3727, "step": 8457 }, { "epoch": 0.91, "grad_norm": 0.0946721247446865, "learning_rate": 2.1438322435506196e-05, "loss": 1.4795, "step": 8458 }, { "epoch": 0.91, "grad_norm": 0.10350896930890328, "learning_rate": 2.1387915924544475e-05, "loss": 1.3853, "step": 8459 }, { "epoch": 0.91, "grad_norm": 0.10058033450733786, "learning_rate": 2.133756744694798e-05, "loss": 1.417, "step": 8460 }, { "epoch": 0.91, "grad_norm": 0.09827699073225621, "learning_rate": 2.128727700882166e-05, "loss": 1.4018, "step": 8461 }, { "epoch": 0.91, "grad_norm": 0.09041865873232152, "learning_rate": 2.1237044616263412e-05, "loss": 1.424, "step": 8462 }, { "epoch": 0.91, "grad_norm": 0.0924230597271768, "learning_rate": 2.1186870275363977e-05, "loss": 1.3944, "step": 8463 }, { "epoch": 0.91, "grad_norm": 0.0873473916377948, "learning_rate": 2.1136753992207268e-05, "loss": 1.3969, "step": 8464 }, { "epoch": 0.91, "grad_norm": 0.08325181748499406, "learning_rate": 2.108669577287009e-05, "loss": 1.4619, "step": 8465 }, { "epoch": 0.91, "grad_norm": 0.0866440412000076, "learning_rate": 2.103669562342203e-05, "loss": 1.4301, "step": 8466 }, { "epoch": 0.91, "grad_norm": 0.08572274163520682, "learning_rate": 2.098675354992596e-05, "loss": 1.347, "step": 8467 }, { "epoch": 0.91, "grad_norm": 0.11805279277002773, "learning_rate": 2.093686955843732e-05, "loss": 1.283, "step": 8468 }, { "epoch": 0.91, "grad_norm": 0.09658742123983585, "learning_rate": 2.088704365500482e-05, "loss": 1.2494, "step": 8469 }, { "epoch": 0.91, "grad_norm": 0.08822907080986658, "learning_rate": 2.0837275845670135e-05, "loss": 1.3896, "step": 8470 }, { "epoch": 0.91, "grad_norm": 0.08932206436251997, "learning_rate": 2.0787566136467705e-05, "loss": 1.393, "step": 8471 }, { "epoch": 0.91, "grad_norm": 0.09641547295931442, "learning_rate": 2.0737914533424885e-05, "loss": 1.3631, "step": 8472 }, { "epoch": 0.91, "grad_norm": 0.09249433521253747, "learning_rate": 2.0688321042562186e-05, "loss": 1.4559, "step": 8473 }, { "epoch": 0.91, "grad_norm": 0.08820267488822597, "learning_rate": 2.0638785669893024e-05, "loss": 1.4652, "step": 8474 }, { "epoch": 0.91, "grad_norm": 0.08816880733726157, "learning_rate": 2.0589308421423704e-05, "loss": 1.3932, "step": 8475 }, { "epoch": 0.91, "grad_norm": 0.09785846516443422, "learning_rate": 2.053988930315348e-05, "loss": 1.3651, "step": 8476 }, { "epoch": 0.91, "grad_norm": 0.09819695747342767, "learning_rate": 2.049052832107473e-05, "loss": 1.3235, "step": 8477 }, { "epoch": 0.91, "grad_norm": 0.09234628217230027, "learning_rate": 2.0441225481172443e-05, "loss": 1.4419, "step": 8478 }, { "epoch": 0.91, "grad_norm": 0.10311619376021568, "learning_rate": 2.039198078942489e-05, "loss": 1.3974, "step": 8479 }, { "epoch": 0.91, "grad_norm": 0.09331694810071177, "learning_rate": 2.0342794251803188e-05, "loss": 1.4798, "step": 8480 }, { "epoch": 0.91, "grad_norm": 0.09020593321897641, "learning_rate": 2.029366587427123e-05, "loss": 1.3993, "step": 8481 }, { "epoch": 0.91, "grad_norm": 0.09205109315945209, "learning_rate": 2.0244595662786136e-05, "loss": 1.4413, "step": 8482 }, { "epoch": 0.91, "grad_norm": 0.08248355323819702, "learning_rate": 2.019558362329782e-05, "loss": 1.4507, "step": 8483 }, { "epoch": 0.91, "grad_norm": 0.09469002579477122, "learning_rate": 2.014662976174908e-05, "loss": 1.496, "step": 8484 }, { "epoch": 0.91, "grad_norm": 0.09997222647885515, "learning_rate": 2.0097734084075723e-05, "loss": 1.3537, "step": 8485 }, { "epoch": 0.91, "grad_norm": 0.09214343688886159, "learning_rate": 2.0048896596206677e-05, "loss": 1.4185, "step": 8486 }, { "epoch": 0.91, "grad_norm": 0.08602997561822767, "learning_rate": 2.000011730406348e-05, "loss": 1.3327, "step": 8487 }, { "epoch": 0.91, "grad_norm": 0.08411014919679415, "learning_rate": 1.99513962135609e-05, "loss": 1.3495, "step": 8488 }, { "epoch": 0.91, "grad_norm": 0.09021699901169272, "learning_rate": 1.9902733330606604e-05, "loss": 1.4673, "step": 8489 }, { "epoch": 0.91, "grad_norm": 0.0894410838431081, "learning_rate": 1.9854128661100925e-05, "loss": 1.4469, "step": 8490 }, { "epoch": 0.91, "grad_norm": 0.09476249544207571, "learning_rate": 1.980558221093748e-05, "loss": 1.4729, "step": 8491 }, { "epoch": 0.91, "grad_norm": 0.08738490149073329, "learning_rate": 1.9757093986002728e-05, "loss": 1.4347, "step": 8492 }, { "epoch": 0.91, "grad_norm": 0.08126371645001917, "learning_rate": 1.9708663992175968e-05, "loss": 1.4183, "step": 8493 }, { "epoch": 0.91, "grad_norm": 0.09864540184993932, "learning_rate": 1.9660292235329446e-05, "loss": 1.4789, "step": 8494 }, { "epoch": 0.91, "grad_norm": 0.08836207440306233, "learning_rate": 1.961197872132847e-05, "loss": 1.3434, "step": 8495 }, { "epoch": 0.91, "grad_norm": 0.08445418161336433, "learning_rate": 1.95637234560313e-05, "loss": 1.2764, "step": 8496 }, { "epoch": 0.91, "grad_norm": 0.0985059807913652, "learning_rate": 1.951552644528892e-05, "loss": 1.455, "step": 8497 }, { "epoch": 0.91, "grad_norm": 0.08635710557765165, "learning_rate": 1.946738769494555e-05, "loss": 1.4062, "step": 8498 }, { "epoch": 0.91, "grad_norm": 0.0912158272982496, "learning_rate": 1.9419307210837954e-05, "loss": 1.4382, "step": 8499 }, { "epoch": 0.91, "grad_norm": 0.08528304563372603, "learning_rate": 1.9371284998796147e-05, "loss": 1.3473, "step": 8500 }, { "epoch": 0.91, "grad_norm": 0.09228094156924532, "learning_rate": 1.9323321064643128e-05, "loss": 1.5235, "step": 8501 }, { "epoch": 0.91, "grad_norm": 0.08233938925848544, "learning_rate": 1.9275415414194476e-05, "loss": 1.376, "step": 8502 }, { "epoch": 0.91, "grad_norm": 0.08850689703770695, "learning_rate": 1.922756805325909e-05, "loss": 1.4039, "step": 8503 }, { "epoch": 0.91, "grad_norm": 0.09958912606213574, "learning_rate": 1.9179778987638508e-05, "loss": 1.4, "step": 8504 }, { "epoch": 0.91, "grad_norm": 0.09810559605964786, "learning_rate": 1.913204822312742e-05, "loss": 1.3595, "step": 8505 }, { "epoch": 0.91, "grad_norm": 0.09463852301076034, "learning_rate": 1.9084375765513197e-05, "loss": 1.4617, "step": 8506 }, { "epoch": 0.91, "grad_norm": 0.11013757373202628, "learning_rate": 1.9036761620576436e-05, "loss": 1.4929, "step": 8507 }, { "epoch": 0.91, "grad_norm": 0.12099079593085553, "learning_rate": 1.898920579409047e-05, "loss": 1.4703, "step": 8508 }, { "epoch": 0.91, "grad_norm": 0.08528369129827018, "learning_rate": 1.894170829182157e-05, "loss": 1.4153, "step": 8509 }, { "epoch": 0.91, "grad_norm": 0.08461384742941523, "learning_rate": 1.889426911952896e-05, "loss": 1.3631, "step": 8510 }, { "epoch": 0.91, "grad_norm": 0.09677086610219493, "learning_rate": 1.8846888282964937e-05, "loss": 1.3896, "step": 8511 }, { "epoch": 0.92, "grad_norm": 0.09429266082090436, "learning_rate": 1.8799565787874394e-05, "loss": 1.3049, "step": 8512 }, { "epoch": 0.92, "grad_norm": 0.08097096508478885, "learning_rate": 1.875230163999553e-05, "loss": 1.2469, "step": 8513 }, { "epoch": 0.92, "grad_norm": 0.09900252527706288, "learning_rate": 1.870509584505925e-05, "loss": 1.3851, "step": 8514 }, { "epoch": 0.92, "grad_norm": 0.08627629957622081, "learning_rate": 1.8657948408789262e-05, "loss": 1.3561, "step": 8515 }, { "epoch": 0.92, "grad_norm": 0.10019881440339429, "learning_rate": 1.8610859336902486e-05, "loss": 1.3432, "step": 8516 }, { "epoch": 0.92, "grad_norm": 0.09158849272181173, "learning_rate": 1.8563828635108692e-05, "loss": 1.3539, "step": 8517 }, { "epoch": 0.92, "grad_norm": 0.09867557457914743, "learning_rate": 1.8516856309110375e-05, "loss": 1.4684, "step": 8518 }, { "epoch": 0.92, "grad_norm": 0.08122593100233426, "learning_rate": 1.8469942364603142e-05, "loss": 1.4484, "step": 8519 }, { "epoch": 0.92, "grad_norm": 0.0842885239004775, "learning_rate": 1.8423086807275557e-05, "loss": 1.3915, "step": 8520 }, { "epoch": 0.92, "grad_norm": 0.08729296238568429, "learning_rate": 1.8376289642808854e-05, "loss": 1.2997, "step": 8521 }, { "epoch": 0.92, "grad_norm": 0.08963895604227475, "learning_rate": 1.8329550876877487e-05, "loss": 1.5127, "step": 8522 }, { "epoch": 0.92, "grad_norm": 0.08888042005595129, "learning_rate": 1.8282870515148652e-05, "loss": 1.3661, "step": 8523 }, { "epoch": 0.92, "grad_norm": 0.08759784012714594, "learning_rate": 1.8236248563282542e-05, "loss": 1.4205, "step": 8524 }, { "epoch": 0.92, "grad_norm": 0.09167737995294478, "learning_rate": 1.8189685026932136e-05, "loss": 1.4306, "step": 8525 }, { "epoch": 0.92, "grad_norm": 0.08403460807889258, "learning_rate": 1.8143179911743414e-05, "loss": 1.357, "step": 8526 }, { "epoch": 0.92, "grad_norm": 0.08519773533614822, "learning_rate": 1.8096733223355476e-05, "loss": 1.3824, "step": 8527 }, { "epoch": 0.92, "grad_norm": 0.09879834853603861, "learning_rate": 1.805034496739988e-05, "loss": 1.4378, "step": 8528 }, { "epoch": 0.92, "grad_norm": 0.09648184208305852, "learning_rate": 1.8004015149501563e-05, "loss": 1.3694, "step": 8529 }, { "epoch": 0.92, "grad_norm": 0.08839735081030436, "learning_rate": 1.795774377527809e-05, "loss": 1.3559, "step": 8530 }, { "epoch": 0.92, "grad_norm": 0.08930280050423725, "learning_rate": 1.7911530850339976e-05, "loss": 1.3384, "step": 8531 }, { "epoch": 0.92, "grad_norm": 0.09363327856588406, "learning_rate": 1.7865376380290842e-05, "loss": 1.3748, "step": 8532 }, { "epoch": 0.92, "grad_norm": 0.08800254556910388, "learning_rate": 1.7819280370726944e-05, "loss": 1.3577, "step": 8533 }, { "epoch": 0.92, "grad_norm": 0.09220582337447861, "learning_rate": 1.7773242827237634e-05, "loss": 1.3874, "step": 8534 }, { "epoch": 0.92, "grad_norm": 0.09027578303790597, "learning_rate": 1.7727263755405176e-05, "loss": 1.5059, "step": 8535 }, { "epoch": 0.92, "grad_norm": 0.08826799704652526, "learning_rate": 1.7681343160804608e-05, "loss": 1.4667, "step": 8536 }, { "epoch": 0.92, "grad_norm": 0.0898786378584817, "learning_rate": 1.7635481049003975e-05, "loss": 1.4077, "step": 8537 }, { "epoch": 0.92, "grad_norm": 0.09251862718568461, "learning_rate": 1.7589677425564222e-05, "loss": 1.4512, "step": 8538 }, { "epoch": 0.92, "grad_norm": 0.08630638711926578, "learning_rate": 1.7543932296039232e-05, "loss": 1.4314, "step": 8539 }, { "epoch": 0.92, "grad_norm": 0.08552853445385915, "learning_rate": 1.749824566597569e-05, "loss": 1.3791, "step": 8540 }, { "epoch": 0.92, "grad_norm": 0.0998202597307536, "learning_rate": 1.7452617540913264e-05, "loss": 1.3047, "step": 8541 }, { "epoch": 0.92, "grad_norm": 0.08909459675523979, "learning_rate": 1.7407047926384644e-05, "loss": 1.3275, "step": 8542 }, { "epoch": 0.92, "grad_norm": 0.08463440139479447, "learning_rate": 1.7361536827915137e-05, "loss": 1.3971, "step": 8543 }, { "epoch": 0.92, "grad_norm": 0.094601329627551, "learning_rate": 1.7316084251023213e-05, "loss": 1.395, "step": 8544 }, { "epoch": 0.92, "grad_norm": 0.09133703125568057, "learning_rate": 1.7270690201220242e-05, "loss": 1.2725, "step": 8545 }, { "epoch": 0.92, "grad_norm": 0.11459907690898916, "learning_rate": 1.7225354684010208e-05, "loss": 1.395, "step": 8546 }, { "epoch": 0.92, "grad_norm": 0.09232465716072903, "learning_rate": 1.7180077704890274e-05, "loss": 1.3476, "step": 8547 }, { "epoch": 0.92, "grad_norm": 0.09444109804027652, "learning_rate": 1.7134859269350543e-05, "loss": 1.3807, "step": 8548 }, { "epoch": 0.92, "grad_norm": 0.09289326082336746, "learning_rate": 1.7089699382873746e-05, "loss": 1.4472, "step": 8549 }, { "epoch": 0.92, "grad_norm": 0.08862007020517378, "learning_rate": 1.7044598050935724e-05, "loss": 1.4563, "step": 8550 }, { "epoch": 0.92, "grad_norm": 0.0919549410530479, "learning_rate": 1.6999555279005263e-05, "loss": 1.4505, "step": 8551 }, { "epoch": 0.92, "grad_norm": 0.09517584637890769, "learning_rate": 1.6954571072543777e-05, "loss": 1.3477, "step": 8552 }, { "epoch": 0.92, "grad_norm": 0.0937890999439233, "learning_rate": 1.6909645437005905e-05, "loss": 1.3643, "step": 8553 }, { "epoch": 0.92, "grad_norm": 0.08987385955355238, "learning_rate": 1.686477837783906e-05, "loss": 1.4256, "step": 8554 }, { "epoch": 0.92, "grad_norm": 0.09477203791651483, "learning_rate": 1.6819969900483345e-05, "loss": 1.3805, "step": 8555 }, { "epoch": 0.92, "grad_norm": 0.09633659162771828, "learning_rate": 1.6775220010372182e-05, "loss": 1.4013, "step": 8556 }, { "epoch": 0.92, "grad_norm": 0.09878142652040257, "learning_rate": 1.6730528712931404e-05, "loss": 1.4194, "step": 8557 }, { "epoch": 0.92, "grad_norm": 0.09332498556478858, "learning_rate": 1.6685896013580226e-05, "loss": 1.4105, "step": 8558 }, { "epoch": 0.92, "grad_norm": 0.08565145674426411, "learning_rate": 1.6641321917730268e-05, "loss": 1.4823, "step": 8559 }, { "epoch": 0.92, "grad_norm": 0.1172115003327829, "learning_rate": 1.6596806430786425e-05, "loss": 1.3143, "step": 8560 }, { "epoch": 0.92, "grad_norm": 0.09783235169089964, "learning_rate": 1.655234955814644e-05, "loss": 1.4258, "step": 8561 }, { "epoch": 0.92, "grad_norm": 0.1005044359265623, "learning_rate": 1.650795130520072e-05, "loss": 1.4208, "step": 8562 }, { "epoch": 0.92, "grad_norm": 0.08384326929780318, "learning_rate": 1.646361167733279e-05, "loss": 1.2791, "step": 8563 }, { "epoch": 0.92, "grad_norm": 0.09126815563432995, "learning_rate": 1.6419330679918855e-05, "loss": 1.4265, "step": 8564 }, { "epoch": 0.92, "grad_norm": 0.0946576406020799, "learning_rate": 1.637510831832828e-05, "loss": 1.2326, "step": 8565 }, { "epoch": 0.92, "grad_norm": 0.0927423035932863, "learning_rate": 1.633094459792317e-05, "loss": 1.2565, "step": 8566 }, { "epoch": 0.92, "grad_norm": 0.09451555563163766, "learning_rate": 1.6286839524058463e-05, "loss": 1.4538, "step": 8567 }, { "epoch": 0.92, "grad_norm": 0.08899655462628817, "learning_rate": 1.6242793102082043e-05, "loss": 1.3785, "step": 8568 }, { "epoch": 0.92, "grad_norm": 0.09187045471105075, "learning_rate": 1.6198805337334756e-05, "loss": 1.3574, "step": 8569 }, { "epoch": 0.92, "grad_norm": 0.08860419206793055, "learning_rate": 1.6154876235150273e-05, "loss": 1.3431, "step": 8570 }, { "epoch": 0.92, "grad_norm": 0.08899383899424537, "learning_rate": 1.611100580085506e-05, "loss": 1.5686, "step": 8571 }, { "epoch": 0.92, "grad_norm": 0.10070643337059408, "learning_rate": 1.6067194039768584e-05, "loss": 1.2775, "step": 8572 }, { "epoch": 0.92, "grad_norm": 0.09644479465809108, "learning_rate": 1.6023440957203262e-05, "loss": 1.3781, "step": 8573 }, { "epoch": 0.92, "grad_norm": 0.07905816945435647, "learning_rate": 1.5979746558464236e-05, "loss": 1.2644, "step": 8574 }, { "epoch": 0.92, "grad_norm": 0.10817332670827906, "learning_rate": 1.593611084884955e-05, "loss": 1.4592, "step": 8575 }, { "epoch": 0.92, "grad_norm": 0.08316527046152339, "learning_rate": 1.5892533833650356e-05, "loss": 1.305, "step": 8576 }, { "epoch": 0.92, "grad_norm": 0.11415884426723807, "learning_rate": 1.5849015518150377e-05, "loss": 1.4799, "step": 8577 }, { "epoch": 0.92, "grad_norm": 0.083268649141457, "learning_rate": 1.5805555907626334e-05, "loss": 1.2927, "step": 8578 }, { "epoch": 0.92, "grad_norm": 0.0914170798789987, "learning_rate": 1.5762155007347956e-05, "loss": 1.3428, "step": 8579 }, { "epoch": 0.92, "grad_norm": 0.10408842767047184, "learning_rate": 1.57188128225777e-05, "loss": 1.4778, "step": 8580 }, { "epoch": 0.92, "grad_norm": 0.08966895390199751, "learning_rate": 1.5675529358570916e-05, "loss": 1.4545, "step": 8581 }, { "epoch": 0.92, "grad_norm": 0.09387839113739459, "learning_rate": 1.5632304620575965e-05, "loss": 1.3898, "step": 8582 }, { "epoch": 0.92, "grad_norm": 0.08271804328233938, "learning_rate": 1.558913861383393e-05, "loss": 1.2611, "step": 8583 }, { "epoch": 0.92, "grad_norm": 0.09070946609853438, "learning_rate": 1.554603134357879e-05, "loss": 1.4368, "step": 8584 }, { "epoch": 0.92, "grad_norm": 0.0922460182899588, "learning_rate": 1.5502982815037648e-05, "loss": 1.4203, "step": 8585 }, { "epoch": 0.92, "grad_norm": 0.09415164483339611, "learning_rate": 1.545999303343004e-05, "loss": 1.43, "step": 8586 }, { "epoch": 0.92, "grad_norm": 0.07968818770023066, "learning_rate": 1.5417062003968808e-05, "loss": 1.3758, "step": 8587 }, { "epoch": 0.92, "grad_norm": 0.0973453065604615, "learning_rate": 1.537418973185939e-05, "loss": 1.4526, "step": 8588 }, { "epoch": 0.92, "grad_norm": 0.09681877333037021, "learning_rate": 1.53313762223003e-05, "loss": 1.3711, "step": 8589 }, { "epoch": 0.92, "grad_norm": 0.10431149269563894, "learning_rate": 1.5288621480482657e-05, "loss": 1.3885, "step": 8590 }, { "epoch": 0.92, "grad_norm": 0.086045199743274, "learning_rate": 1.5245925511590708e-05, "loss": 1.4141, "step": 8591 }, { "epoch": 0.92, "grad_norm": 0.08735933257811349, "learning_rate": 1.5203288320801589e-05, "loss": 1.3915, "step": 8592 }, { "epoch": 0.92, "grad_norm": 0.09089960974137015, "learning_rate": 1.5160709913284998e-05, "loss": 1.3794, "step": 8593 }, { "epoch": 0.92, "grad_norm": 0.10346871676245731, "learning_rate": 1.5118190294203916e-05, "loss": 1.2933, "step": 8594 }, { "epoch": 0.92, "grad_norm": 0.09863144571471517, "learning_rate": 1.5075729468713828e-05, "loss": 1.4533, "step": 8595 }, { "epoch": 0.92, "grad_norm": 0.08689322248559195, "learning_rate": 1.5033327441963395e-05, "loss": 1.4257, "step": 8596 }, { "epoch": 0.92, "grad_norm": 0.09689504666913185, "learning_rate": 1.4990984219093895e-05, "loss": 1.308, "step": 8597 }, { "epoch": 0.92, "grad_norm": 0.1088708301464488, "learning_rate": 1.4948699805239719e-05, "loss": 1.3454, "step": 8598 }, { "epoch": 0.92, "grad_norm": 0.09341805770506104, "learning_rate": 1.490647420552782e-05, "loss": 1.4011, "step": 8599 }, { "epoch": 0.92, "grad_norm": 0.09385136498735533, "learning_rate": 1.4864307425078328e-05, "loss": 1.4219, "step": 8600 }, { "epoch": 0.92, "grad_norm": 0.0821232435993565, "learning_rate": 1.4822199469004094e-05, "loss": 1.4294, "step": 8601 }, { "epoch": 0.92, "grad_norm": 0.08799735582406523, "learning_rate": 1.4780150342410814e-05, "loss": 1.395, "step": 8602 }, { "epoch": 0.92, "grad_norm": 0.09669252271897247, "learning_rate": 1.4738160050397132e-05, "loss": 1.3807, "step": 8603 }, { "epoch": 0.92, "grad_norm": 0.09423337439377573, "learning_rate": 1.4696228598054473e-05, "loss": 1.3328, "step": 8604 }, { "epoch": 0.93, "grad_norm": 0.087042175661016, "learning_rate": 1.4654355990467216e-05, "loss": 1.2981, "step": 8605 }, { "epoch": 0.93, "grad_norm": 0.08860134615702277, "learning_rate": 1.4612542232712522e-05, "loss": 1.5621, "step": 8606 }, { "epoch": 0.93, "grad_norm": 0.08828791523559235, "learning_rate": 1.4570787329860502e-05, "loss": 1.3671, "step": 8607 }, { "epoch": 0.93, "grad_norm": 0.08608911561738074, "learning_rate": 1.4529091286973995e-05, "loss": 1.2861, "step": 8608 }, { "epoch": 0.93, "grad_norm": 0.08437715352144702, "learning_rate": 1.44874541091089e-05, "loss": 1.3159, "step": 8609 }, { "epoch": 0.93, "grad_norm": 0.08595818844939189, "learning_rate": 1.444587580131379e-05, "loss": 1.4947, "step": 8610 }, { "epoch": 0.93, "grad_norm": 0.11629088640945023, "learning_rate": 1.4404356368630133e-05, "loss": 1.3563, "step": 8611 }, { "epoch": 0.93, "grad_norm": 0.09872012121874528, "learning_rate": 1.4362895816092403e-05, "loss": 1.2898, "step": 8612 }, { "epoch": 0.93, "grad_norm": 0.09723751176966317, "learning_rate": 1.4321494148727854e-05, "loss": 1.3872, "step": 8613 }, { "epoch": 0.93, "grad_norm": 0.08164864248624706, "learning_rate": 1.4280151371556471e-05, "loss": 1.3384, "step": 8614 }, { "epoch": 0.93, "grad_norm": 0.0825178834649068, "learning_rate": 1.4238867489591301e-05, "loss": 1.3563, "step": 8615 }, { "epoch": 0.93, "grad_norm": 0.09765191426189007, "learning_rate": 1.4197642507838115e-05, "loss": 1.2718, "step": 8616 }, { "epoch": 0.93, "grad_norm": 0.09370234390805968, "learning_rate": 1.4156476431295584e-05, "loss": 1.3856, "step": 8617 }, { "epoch": 0.93, "grad_norm": 0.08484573748097803, "learning_rate": 1.4115369264955213e-05, "loss": 1.4273, "step": 8618 }, { "epoch": 0.93, "grad_norm": 0.09953841084466902, "learning_rate": 1.407432101380146e-05, "loss": 1.3876, "step": 8619 }, { "epoch": 0.93, "grad_norm": 0.10160114969677866, "learning_rate": 1.4033331682811568e-05, "loss": 1.3629, "step": 8620 }, { "epoch": 0.93, "grad_norm": 0.08803516529663949, "learning_rate": 1.3992401276955502e-05, "loss": 1.4832, "step": 8621 }, { "epoch": 0.93, "grad_norm": 0.09318963980304762, "learning_rate": 1.3951529801196294e-05, "loss": 1.4404, "step": 8622 }, { "epoch": 0.93, "grad_norm": 0.08645141350772045, "learning_rate": 1.3910717260489869e-05, "loss": 1.411, "step": 8623 }, { "epoch": 0.93, "grad_norm": 0.0894926625646689, "learning_rate": 1.3869963659784657e-05, "loss": 1.3681, "step": 8624 }, { "epoch": 0.93, "grad_norm": 0.09056101206876106, "learning_rate": 1.3829269004022427e-05, "loss": 1.4809, "step": 8625 }, { "epoch": 0.93, "grad_norm": 0.09887299470866391, "learning_rate": 1.3788633298137287e-05, "loss": 1.4831, "step": 8626 }, { "epoch": 0.93, "grad_norm": 0.13513774910578263, "learning_rate": 1.3748056547056632e-05, "loss": 1.5056, "step": 8627 }, { "epoch": 0.93, "grad_norm": 0.10581823456619087, "learning_rate": 1.3707538755700521e-05, "loss": 1.4962, "step": 8628 }, { "epoch": 0.93, "grad_norm": 0.084364216444337, "learning_rate": 1.3667079928981807e-05, "loss": 1.3145, "step": 8629 }, { "epoch": 0.93, "grad_norm": 0.08748428403689872, "learning_rate": 1.362668007180634e-05, "loss": 1.4716, "step": 8630 }, { "epoch": 0.93, "grad_norm": 0.0791241119174475, "learning_rate": 1.3586339189072649e-05, "loss": 1.2485, "step": 8631 }, { "epoch": 0.93, "grad_norm": 0.08759161498800068, "learning_rate": 1.354605728567232e-05, "loss": 1.3997, "step": 8632 }, { "epoch": 0.93, "grad_norm": 0.09708298148600865, "learning_rate": 1.35058343664895e-05, "loss": 1.4377, "step": 8633 }, { "epoch": 0.93, "grad_norm": 0.09798526414450459, "learning_rate": 1.3465670436401512e-05, "loss": 1.4938, "step": 8634 }, { "epoch": 0.93, "grad_norm": 0.10297317798540645, "learning_rate": 1.3425565500278347e-05, "loss": 1.377, "step": 8635 }, { "epoch": 0.93, "grad_norm": 0.09540758310047924, "learning_rate": 1.338551956298284e-05, "loss": 1.4806, "step": 8636 }, { "epoch": 0.93, "grad_norm": 0.09294268328813406, "learning_rate": 1.334553262937066e-05, "loss": 1.5293, "step": 8637 }, { "epoch": 0.93, "grad_norm": 0.09074500638078706, "learning_rate": 1.3305604704290431e-05, "loss": 1.5192, "step": 8638 }, { "epoch": 0.93, "grad_norm": 0.08647215940428625, "learning_rate": 1.3265735792583499e-05, "loss": 1.3124, "step": 8639 }, { "epoch": 0.93, "grad_norm": 0.08799218164916595, "learning_rate": 1.3225925899084169e-05, "loss": 1.5912, "step": 8640 }, { "epoch": 0.93, "grad_norm": 0.09268037997654895, "learning_rate": 1.3186175028619518e-05, "loss": 1.4546, "step": 8641 }, { "epoch": 0.93, "grad_norm": 0.09057608833206314, "learning_rate": 1.3146483186009417e-05, "loss": 1.414, "step": 8642 }, { "epoch": 0.93, "grad_norm": 0.09004670263797164, "learning_rate": 1.3106850376066626e-05, "loss": 1.3588, "step": 8643 }, { "epoch": 0.93, "grad_norm": 0.11473962684426484, "learning_rate": 1.3067276603596856e-05, "loss": 1.5842, "step": 8644 }, { "epoch": 0.93, "grad_norm": 0.10836472932270103, "learning_rate": 1.3027761873398436e-05, "loss": 1.2715, "step": 8645 }, { "epoch": 0.93, "grad_norm": 0.09167158169420705, "learning_rate": 1.2988306190262755e-05, "loss": 1.3965, "step": 8646 }, { "epoch": 0.93, "grad_norm": 0.08991242846948891, "learning_rate": 1.2948909558974042e-05, "loss": 1.4294, "step": 8647 }, { "epoch": 0.93, "grad_norm": 0.09468638044701046, "learning_rate": 1.2909571984309032e-05, "loss": 1.4412, "step": 8648 }, { "epoch": 0.93, "grad_norm": 0.08676751377745191, "learning_rate": 1.2870293471037741e-05, "loss": 1.4942, "step": 8649 }, { "epoch": 0.93, "grad_norm": 0.0916862759987886, "learning_rate": 1.283107402392275e-05, "loss": 1.4295, "step": 8650 }, { "epoch": 0.93, "grad_norm": 0.09650520392353956, "learning_rate": 1.2791913647719589e-05, "loss": 1.3463, "step": 8651 }, { "epoch": 0.93, "grad_norm": 0.08393566526589911, "learning_rate": 1.2752812347176512e-05, "loss": 1.3341, "step": 8652 }, { "epoch": 0.93, "grad_norm": 0.09170932221535763, "learning_rate": 1.271377012703473e-05, "loss": 1.4208, "step": 8653 }, { "epoch": 0.93, "grad_norm": 0.08959861151947758, "learning_rate": 1.2674786992028287e-05, "loss": 1.3821, "step": 8654 }, { "epoch": 0.93, "grad_norm": 0.0848972599871631, "learning_rate": 1.2635862946883957e-05, "loss": 1.3334, "step": 8655 }, { "epoch": 0.93, "grad_norm": 0.0993068814281763, "learning_rate": 1.2596997996321469e-05, "loss": 1.2615, "step": 8656 }, { "epoch": 0.93, "grad_norm": 0.09646313429632936, "learning_rate": 1.2558192145053326e-05, "loss": 1.3858, "step": 8657 }, { "epoch": 0.93, "grad_norm": 0.09286391088063424, "learning_rate": 1.2519445397784769e-05, "loss": 1.3417, "step": 8658 }, { "epoch": 0.93, "grad_norm": 0.08902398492787911, "learning_rate": 1.2480757759214145e-05, "loss": 1.498, "step": 8659 }, { "epoch": 0.93, "grad_norm": 0.08915222091918729, "learning_rate": 1.2442129234032373e-05, "loss": 1.2159, "step": 8660 }, { "epoch": 0.93, "grad_norm": 0.10864732924798791, "learning_rate": 1.2403559826923205e-05, "loss": 1.448, "step": 8661 }, { "epoch": 0.93, "grad_norm": 0.0973476935490254, "learning_rate": 1.236504954256351e-05, "loss": 1.3799, "step": 8662 }, { "epoch": 0.93, "grad_norm": 0.08686513943637508, "learning_rate": 1.2326598385622723e-05, "loss": 1.4464, "step": 8663 }, { "epoch": 0.93, "grad_norm": 0.08930018544005622, "learning_rate": 1.2288206360763055e-05, "loss": 1.3085, "step": 8664 }, { "epoch": 0.93, "grad_norm": 0.08339424642042234, "learning_rate": 1.2249873472639782e-05, "loss": 1.4383, "step": 8665 }, { "epoch": 0.93, "grad_norm": 0.09666134438716192, "learning_rate": 1.2211599725900913e-05, "loss": 1.3808, "step": 8666 }, { "epoch": 0.93, "grad_norm": 0.09117203971501421, "learning_rate": 1.2173385125187175e-05, "loss": 1.2604, "step": 8667 }, { "epoch": 0.93, "grad_norm": 0.08962445862990973, "learning_rate": 1.213522967513231e-05, "loss": 1.3797, "step": 8668 }, { "epoch": 0.93, "grad_norm": 0.0963226720097203, "learning_rate": 1.2097133380362835e-05, "loss": 1.3911, "step": 8669 }, { "epoch": 0.93, "grad_norm": 0.08829865843109752, "learning_rate": 1.2059096245497946e-05, "loss": 1.5422, "step": 8670 }, { "epoch": 0.93, "grad_norm": 0.09592687958075631, "learning_rate": 1.202111827514979e-05, "loss": 1.3628, "step": 8671 }, { "epoch": 0.93, "grad_norm": 0.07879187528901951, "learning_rate": 1.1983199473923456e-05, "loss": 1.5028, "step": 8672 }, { "epoch": 0.93, "grad_norm": 0.10046305256324412, "learning_rate": 1.1945339846416603e-05, "loss": 1.5293, "step": 8673 }, { "epoch": 0.93, "grad_norm": 0.08752446327580514, "learning_rate": 1.1907539397219835e-05, "loss": 1.3339, "step": 8674 }, { "epoch": 0.93, "grad_norm": 0.09815530338289363, "learning_rate": 1.1869798130916654e-05, "loss": 1.3674, "step": 8675 }, { "epoch": 0.93, "grad_norm": 0.0921843523158939, "learning_rate": 1.1832116052083231e-05, "loss": 1.2522, "step": 8676 }, { "epoch": 0.93, "grad_norm": 0.09920247734847988, "learning_rate": 1.1794493165288745e-05, "loss": 1.4375, "step": 8677 }, { "epoch": 0.93, "grad_norm": 0.10373638226182667, "learning_rate": 1.1756929475095102e-05, "loss": 1.3585, "step": 8678 }, { "epoch": 0.93, "grad_norm": 0.11289756227251307, "learning_rate": 1.1719424986056936e-05, "loss": 1.398, "step": 8679 }, { "epoch": 0.93, "grad_norm": 0.08615684017355264, "learning_rate": 1.168197970272189e-05, "loss": 1.4001, "step": 8680 }, { "epoch": 0.93, "grad_norm": 0.08292246245321147, "learning_rate": 1.1644593629630274e-05, "loss": 1.3374, "step": 8681 }, { "epoch": 0.93, "grad_norm": 0.09281794835484808, "learning_rate": 1.1607266771315295e-05, "loss": 1.4318, "step": 8682 }, { "epoch": 0.93, "grad_norm": 0.09380792402917534, "learning_rate": 1.1569999132302999e-05, "loss": 1.3245, "step": 8683 }, { "epoch": 0.93, "grad_norm": 0.08317011645192439, "learning_rate": 1.1532790717112162e-05, "loss": 1.4646, "step": 8684 }, { "epoch": 0.93, "grad_norm": 0.10464656535720604, "learning_rate": 1.1495641530254452e-05, "loss": 1.4131, "step": 8685 }, { "epoch": 0.93, "grad_norm": 0.10344553432386078, "learning_rate": 1.1458551576234322e-05, "loss": 1.3669, "step": 8686 }, { "epoch": 0.93, "grad_norm": 0.08569223859657236, "learning_rate": 1.1421520859549062e-05, "loss": 1.386, "step": 8687 }, { "epoch": 0.93, "grad_norm": 0.09514608590411519, "learning_rate": 1.1384549384688803e-05, "loss": 1.4228, "step": 8688 }, { "epoch": 0.93, "grad_norm": 0.09771515023676958, "learning_rate": 1.1347637156136459e-05, "loss": 1.395, "step": 8689 }, { "epoch": 0.93, "grad_norm": 0.0772270936421878, "learning_rate": 1.1310784178367729e-05, "loss": 1.2789, "step": 8690 }, { "epoch": 0.93, "grad_norm": 0.0911160570848233, "learning_rate": 1.1273990455851203e-05, "loss": 1.3026, "step": 8691 }, { "epoch": 0.93, "grad_norm": 0.0914312510350616, "learning_rate": 1.12372559930482e-05, "loss": 1.4928, "step": 8692 }, { "epoch": 0.93, "grad_norm": 0.09252646977879138, "learning_rate": 1.1200580794412995e-05, "loss": 1.4267, "step": 8693 }, { "epoch": 0.93, "grad_norm": 0.09167015810592967, "learning_rate": 1.1163964864392472e-05, "loss": 1.3711, "step": 8694 }, { "epoch": 0.93, "grad_norm": 0.08638462067121223, "learning_rate": 1.1127408207426471e-05, "loss": 1.4772, "step": 8695 }, { "epoch": 0.93, "grad_norm": 0.08894168565959135, "learning_rate": 1.109091082794761e-05, "loss": 1.4653, "step": 8696 }, { "epoch": 0.93, "grad_norm": 0.08528838348096536, "learning_rate": 1.1054472730381403e-05, "loss": 1.5519, "step": 8697 }, { "epoch": 0.94, "grad_norm": 0.11797144062561185, "learning_rate": 1.1018093919145988e-05, "loss": 1.3926, "step": 8698 }, { "epoch": 0.94, "grad_norm": 0.09838257455982573, "learning_rate": 1.0981774398652444e-05, "loss": 1.3492, "step": 8699 }, { "epoch": 0.94, "grad_norm": 0.0912813666237488, "learning_rate": 1.0945514173304693e-05, "loss": 1.3605, "step": 8700 }, { "epoch": 0.94, "grad_norm": 0.08843196545973088, "learning_rate": 1.090931324749933e-05, "loss": 1.3818, "step": 8701 }, { "epoch": 0.94, "grad_norm": 0.09639414610841635, "learning_rate": 1.0873171625625899e-05, "loss": 1.5414, "step": 8702 }, { "epoch": 0.94, "grad_norm": 0.09300844643291478, "learning_rate": 1.0837089312066728e-05, "loss": 1.4713, "step": 8703 }, { "epoch": 0.94, "grad_norm": 0.0935356886871057, "learning_rate": 1.0801066311196872e-05, "loss": 1.3597, "step": 8704 }, { "epoch": 0.94, "grad_norm": 0.08624681569480633, "learning_rate": 1.0765102627384226e-05, "loss": 1.4299, "step": 8705 }, { "epoch": 0.94, "grad_norm": 0.09823549145427711, "learning_rate": 1.0729198264989582e-05, "loss": 1.4004, "step": 8706 }, { "epoch": 0.94, "grad_norm": 0.08638494814697543, "learning_rate": 1.0693353228366342e-05, "loss": 1.486, "step": 8707 }, { "epoch": 0.94, "grad_norm": 0.08204185326485428, "learning_rate": 1.0657567521860977e-05, "loss": 1.4409, "step": 8708 }, { "epoch": 0.94, "grad_norm": 0.09744301403720242, "learning_rate": 1.0621841149812572e-05, "loss": 1.3364, "step": 8709 }, { "epoch": 0.94, "grad_norm": 0.10000582529959977, "learning_rate": 1.0586174116552993e-05, "loss": 1.4559, "step": 8710 }, { "epoch": 0.94, "grad_norm": 0.09744949985617589, "learning_rate": 1.0550566426407115e-05, "loss": 1.3111, "step": 8711 }, { "epoch": 0.94, "grad_norm": 0.08195953839293012, "learning_rate": 1.0515018083692484e-05, "loss": 1.443, "step": 8712 }, { "epoch": 0.94, "grad_norm": 0.09149345640565289, "learning_rate": 1.0479529092719375e-05, "loss": 1.307, "step": 8713 }, { "epoch": 0.94, "grad_norm": 0.09314773244510169, "learning_rate": 1.0444099457791012e-05, "loss": 1.3025, "step": 8714 }, { "epoch": 0.94, "grad_norm": 0.07480106948838042, "learning_rate": 1.0408729183203403e-05, "loss": 1.3991, "step": 8715 }, { "epoch": 0.94, "grad_norm": 0.0877240164790989, "learning_rate": 1.0373418273245228e-05, "loss": 1.3505, "step": 8716 }, { "epoch": 0.94, "grad_norm": 0.09580152668794165, "learning_rate": 1.0338166732198062e-05, "loss": 1.4759, "step": 8717 }, { "epoch": 0.94, "grad_norm": 0.09232784222569064, "learning_rate": 1.0302974564336265e-05, "loss": 1.333, "step": 8718 }, { "epoch": 0.94, "grad_norm": 0.09517950722743784, "learning_rate": 1.026784177392709e-05, "loss": 1.4041, "step": 8719 }, { "epoch": 0.94, "grad_norm": 0.0870973350453796, "learning_rate": 1.023276836523046e-05, "loss": 1.4549, "step": 8720 }, { "epoch": 0.94, "grad_norm": 0.08866760352849476, "learning_rate": 1.0197754342499199e-05, "loss": 1.3589, "step": 8721 }, { "epoch": 0.94, "grad_norm": 0.09513227232510038, "learning_rate": 1.0162799709978743e-05, "loss": 1.3897, "step": 8722 }, { "epoch": 0.94, "grad_norm": 0.09371045515382317, "learning_rate": 1.0127904471907589e-05, "loss": 1.4099, "step": 8723 }, { "epoch": 0.94, "grad_norm": 0.08614066611489142, "learning_rate": 1.0093068632516855e-05, "loss": 1.3689, "step": 8724 }, { "epoch": 0.94, "grad_norm": 0.10384855286320732, "learning_rate": 1.0058292196030549e-05, "loss": 1.4494, "step": 8725 }, { "epoch": 0.94, "grad_norm": 0.08381647067968226, "learning_rate": 1.0023575166665355e-05, "loss": 1.4584, "step": 8726 }, { "epoch": 0.94, "grad_norm": 0.10300897484233143, "learning_rate": 9.98891754863085e-06, "loss": 1.3709, "step": 8727 }, { "epoch": 0.94, "grad_norm": 0.10475793362232905, "learning_rate": 9.954319346129503e-06, "loss": 1.2983, "step": 8728 }, { "epoch": 0.94, "grad_norm": 0.08644278946240058, "learning_rate": 9.919780563356295e-06, "loss": 1.3854, "step": 8729 }, { "epoch": 0.94, "grad_norm": 0.09194633055680919, "learning_rate": 9.88530120449932e-06, "loss": 1.4767, "step": 8730 }, { "epoch": 0.94, "grad_norm": 0.09169105507664722, "learning_rate": 9.850881273739231e-06, "loss": 1.5221, "step": 8731 }, { "epoch": 0.94, "grad_norm": 0.0917619755532548, "learning_rate": 9.816520775249583e-06, "loss": 1.4147, "step": 8732 }, { "epoch": 0.94, "grad_norm": 0.08201196705016786, "learning_rate": 9.782219713196705e-06, "loss": 1.329, "step": 8733 }, { "epoch": 0.94, "grad_norm": 0.09267725476057265, "learning_rate": 9.747978091739774e-06, "loss": 1.4925, "step": 8734 }, { "epoch": 0.94, "grad_norm": 0.08967739621306049, "learning_rate": 9.713795915030577e-06, "loss": 1.4223, "step": 8735 }, { "epoch": 0.94, "grad_norm": 0.08957521459912487, "learning_rate": 9.679673187214022e-06, "loss": 1.4221, "step": 8736 }, { "epoch": 0.94, "grad_norm": 0.09355925735987494, "learning_rate": 9.64560991242741e-06, "loss": 1.5056, "step": 8737 }, { "epoch": 0.94, "grad_norm": 0.08319093196250256, "learning_rate": 9.611606094801052e-06, "loss": 1.5389, "step": 8738 }, { "epoch": 0.94, "grad_norm": 0.07375547083039735, "learning_rate": 9.577661738458143e-06, "loss": 1.4192, "step": 8739 }, { "epoch": 0.94, "grad_norm": 0.08483871899049837, "learning_rate": 9.543776847514507e-06, "loss": 1.5044, "step": 8740 }, { "epoch": 0.94, "grad_norm": 0.09976069792945877, "learning_rate": 9.509951426078745e-06, "loss": 1.4148, "step": 8741 }, { "epoch": 0.94, "grad_norm": 0.08695437000305326, "learning_rate": 9.476185478252352e-06, "loss": 1.367, "step": 8742 }, { "epoch": 0.94, "grad_norm": 0.09822966313776028, "learning_rate": 9.442479008129557e-06, "loss": 1.479, "step": 8743 }, { "epoch": 0.94, "grad_norm": 0.0886525612017647, "learning_rate": 9.40883201979742e-06, "loss": 1.4471, "step": 8744 }, { "epoch": 0.94, "grad_norm": 0.08808052951426312, "learning_rate": 9.37524451733568e-06, "loss": 1.2375, "step": 8745 }, { "epoch": 0.94, "grad_norm": 0.09043626699549577, "learning_rate": 9.341716504817021e-06, "loss": 1.42, "step": 8746 }, { "epoch": 0.94, "grad_norm": 0.09434528279859916, "learning_rate": 9.308247986306862e-06, "loss": 1.4542, "step": 8747 }, { "epoch": 0.94, "grad_norm": 0.09964885817378788, "learning_rate": 9.274838965863174e-06, "loss": 1.3686, "step": 8748 }, { "epoch": 0.94, "grad_norm": 0.09345635978911934, "learning_rate": 9.241489447537111e-06, "loss": 1.4048, "step": 8749 }, { "epoch": 0.94, "grad_norm": 0.0984638899196532, "learning_rate": 9.208199435372377e-06, "loss": 1.3195, "step": 8750 }, { "epoch": 0.94, "grad_norm": 0.08103394530212814, "learning_rate": 9.174968933405414e-06, "loss": 1.4755, "step": 8751 }, { "epoch": 0.94, "grad_norm": 0.10063030829037273, "learning_rate": 9.141797945665609e-06, "loss": 1.3979, "step": 8752 }, { "epoch": 0.94, "grad_norm": 0.10028878781779615, "learning_rate": 9.108686476175133e-06, "loss": 1.5094, "step": 8753 }, { "epoch": 0.94, "grad_norm": 0.1027783191091369, "learning_rate": 9.075634528948717e-06, "loss": 1.3391, "step": 8754 }, { "epoch": 0.94, "grad_norm": 0.08672950066908128, "learning_rate": 9.042642107994104e-06, "loss": 1.397, "step": 8755 }, { "epoch": 0.94, "grad_norm": 0.08870879929526027, "learning_rate": 9.0097092173117e-06, "loss": 1.364, "step": 8756 }, { "epoch": 0.94, "grad_norm": 0.09341416581700006, "learning_rate": 8.976835860894761e-06, "loss": 1.3041, "step": 8757 }, { "epoch": 0.94, "grad_norm": 0.11196489344792297, "learning_rate": 8.944022042729317e-06, "loss": 1.4337, "step": 8758 }, { "epoch": 0.94, "grad_norm": 0.10451525028124231, "learning_rate": 8.91126776679413e-06, "loss": 1.3766, "step": 8759 }, { "epoch": 0.94, "grad_norm": 0.08593762869175031, "learning_rate": 8.87857303706069e-06, "loss": 1.4982, "step": 8760 }, { "epoch": 0.94, "grad_norm": 0.07904295751893473, "learning_rate": 8.845937857493491e-06, "loss": 1.3474, "step": 8761 }, { "epoch": 0.94, "grad_norm": 0.08276173777838187, "learning_rate": 8.813362232049592e-06, "loss": 1.4948, "step": 8762 }, { "epoch": 0.94, "grad_norm": 0.0953114667332252, "learning_rate": 8.780846164678836e-06, "loss": 1.4323, "step": 8763 }, { "epoch": 0.94, "grad_norm": 0.08730452540721823, "learning_rate": 8.748389659324008e-06, "loss": 1.4438, "step": 8764 }, { "epoch": 0.94, "grad_norm": 0.09130547003069196, "learning_rate": 8.715992719920574e-06, "loss": 1.5007, "step": 8765 }, { "epoch": 0.94, "grad_norm": 0.10602039868255145, "learning_rate": 8.683655350396724e-06, "loss": 1.4545, "step": 8766 }, { "epoch": 0.94, "grad_norm": 0.08236441196708506, "learning_rate": 8.651377554673434e-06, "loss": 1.4182, "step": 8767 }, { "epoch": 0.94, "grad_norm": 0.08527216278876118, "learning_rate": 8.619159336664683e-06, "loss": 1.4018, "step": 8768 }, { "epoch": 0.94, "grad_norm": 0.08511637576566533, "learning_rate": 8.587000700276792e-06, "loss": 1.5263, "step": 8769 }, { "epoch": 0.94, "grad_norm": 0.08838260890517773, "learning_rate": 8.554901649409252e-06, "loss": 1.4725, "step": 8770 }, { "epoch": 0.94, "grad_norm": 0.08090486237576237, "learning_rate": 8.522862187954172e-06, "loss": 1.438, "step": 8771 }, { "epoch": 0.94, "grad_norm": 0.09265052659328142, "learning_rate": 8.490882319796389e-06, "loss": 1.4177, "step": 8772 }, { "epoch": 0.94, "grad_norm": 0.0914917526601237, "learning_rate": 8.458962048813634e-06, "loss": 1.3132, "step": 8773 }, { "epoch": 0.94, "grad_norm": 0.08859690630090684, "learning_rate": 8.427101378876367e-06, "loss": 1.6076, "step": 8774 }, { "epoch": 0.94, "grad_norm": 0.08901781244111316, "learning_rate": 8.39530031384772e-06, "loss": 1.4397, "step": 8775 }, { "epoch": 0.94, "grad_norm": 0.10086813132432688, "learning_rate": 8.36355885758372e-06, "loss": 1.449, "step": 8776 }, { "epoch": 0.94, "grad_norm": 0.09140538883618672, "learning_rate": 8.331877013933176e-06, "loss": 1.1962, "step": 8777 }, { "epoch": 0.94, "grad_norm": 0.08401749894727048, "learning_rate": 8.300254786737627e-06, "loss": 1.4226, "step": 8778 }, { "epoch": 0.94, "grad_norm": 0.09632136067271782, "learning_rate": 8.268692179831228e-06, "loss": 1.3028, "step": 8779 }, { "epoch": 0.94, "grad_norm": 0.08786148939407268, "learning_rate": 8.237189197041195e-06, "loss": 1.5339, "step": 8780 }, { "epoch": 0.94, "grad_norm": 0.08665394568612342, "learning_rate": 8.205745842187361e-06, "loss": 1.4943, "step": 8781 }, { "epoch": 0.94, "grad_norm": 0.10345725784687435, "learning_rate": 8.17436211908229e-06, "loss": 1.3102, "step": 8782 }, { "epoch": 0.94, "grad_norm": 0.08766312295144872, "learning_rate": 8.14303803153138e-06, "loss": 1.5278, "step": 8783 }, { "epoch": 0.94, "grad_norm": 0.08302435920388034, "learning_rate": 8.111773583332872e-06, "loss": 1.3101, "step": 8784 }, { "epoch": 0.94, "grad_norm": 0.08086046970243763, "learning_rate": 8.080568778277509e-06, "loss": 1.3628, "step": 8785 }, { "epoch": 0.94, "grad_norm": 0.07696214120226756, "learning_rate": 8.049423620149154e-06, "loss": 1.3877, "step": 8786 }, { "epoch": 0.94, "grad_norm": 0.08412889864646508, "learning_rate": 8.018338112724178e-06, "loss": 1.3764, "step": 8787 }, { "epoch": 0.94, "grad_norm": 0.11455424686467591, "learning_rate": 7.987312259771839e-06, "loss": 1.4572, "step": 8788 }, { "epoch": 0.94, "grad_norm": 0.09261780785683074, "learning_rate": 7.956346065054132e-06, "loss": 1.5225, "step": 8789 }, { "epoch": 0.94, "grad_norm": 0.09033658557442632, "learning_rate": 7.925439532325772e-06, "loss": 1.4358, "step": 8790 }, { "epoch": 0.95, "grad_norm": 0.09358776230596985, "learning_rate": 7.894592665334265e-06, "loss": 1.4481, "step": 8791 }, { "epoch": 0.95, "grad_norm": 0.10293383897592415, "learning_rate": 7.863805467820006e-06, "loss": 1.2969, "step": 8792 }, { "epoch": 0.95, "grad_norm": 0.09437810328651862, "learning_rate": 7.833077943515955e-06, "loss": 1.3577, "step": 8793 }, { "epoch": 0.95, "grad_norm": 0.09550001282233132, "learning_rate": 7.80241009614796e-06, "loss": 1.362, "step": 8794 }, { "epoch": 0.95, "grad_norm": 0.103722456788033, "learning_rate": 7.771801929434608e-06, "loss": 1.5316, "step": 8795 }, { "epoch": 0.95, "grad_norm": 0.09672039793209836, "learning_rate": 7.74125344708726e-06, "loss": 1.4404, "step": 8796 }, { "epoch": 0.95, "grad_norm": 0.11381378663777032, "learning_rate": 7.71076465281001e-06, "loss": 1.3989, "step": 8797 }, { "epoch": 0.95, "grad_norm": 0.0943974351742112, "learning_rate": 7.680335550299678e-06, "loss": 1.5955, "step": 8798 }, { "epoch": 0.95, "grad_norm": 0.08554720301543454, "learning_rate": 7.649966143245979e-06, "loss": 1.3918, "step": 8799 }, { "epoch": 0.95, "grad_norm": 0.10710528712328721, "learning_rate": 7.619656435331301e-06, "loss": 1.494, "step": 8800 }, { "epoch": 0.95, "grad_norm": 0.09189467724009155, "learning_rate": 7.589406430230705e-06, "loss": 1.3021, "step": 8801 }, { "epoch": 0.95, "grad_norm": 0.10057691654011505, "learning_rate": 7.559216131612256e-06, "loss": 1.3472, "step": 8802 }, { "epoch": 0.95, "grad_norm": 0.0821028096426198, "learning_rate": 7.529085543136472e-06, "loss": 1.4207, "step": 8803 }, { "epoch": 0.95, "grad_norm": 0.0782184005263043, "learning_rate": 7.499014668456872e-06, "loss": 1.3374, "step": 8804 }, { "epoch": 0.95, "grad_norm": 0.0876206795684908, "learning_rate": 7.469003511219707e-06, "loss": 1.4071, "step": 8805 }, { "epoch": 0.95, "grad_norm": 0.08655313629027807, "learning_rate": 7.4390520750638455e-06, "loss": 1.3533, "step": 8806 }, { "epoch": 0.95, "grad_norm": 0.0862730449274471, "learning_rate": 7.409160363621048e-06, "loss": 1.3673, "step": 8807 }, { "epoch": 0.95, "grad_norm": 0.08486986348752033, "learning_rate": 7.379328380515804e-06, "loss": 1.3241, "step": 8808 }, { "epoch": 0.95, "grad_norm": 0.09760545363166366, "learning_rate": 7.349556129365276e-06, "loss": 1.3747, "step": 8809 }, { "epoch": 0.95, "grad_norm": 0.10666679386119536, "learning_rate": 7.31984361377952e-06, "loss": 1.3719, "step": 8810 }, { "epoch": 0.95, "grad_norm": 0.10084244049618121, "learning_rate": 7.29019083736121e-06, "loss": 1.3092, "step": 8811 }, { "epoch": 0.95, "grad_norm": 0.09202231083032954, "learning_rate": 7.260597803705971e-06, "loss": 1.2991, "step": 8812 }, { "epoch": 0.95, "grad_norm": 0.10290993988751386, "learning_rate": 7.23106451640193e-06, "loss": 1.4151, "step": 8813 }, { "epoch": 0.95, "grad_norm": 0.08561006665964292, "learning_rate": 7.201590979030115e-06, "loss": 1.4943, "step": 8814 }, { "epoch": 0.95, "grad_norm": 0.0865490043036091, "learning_rate": 7.172177195164386e-06, "loss": 1.3253, "step": 8815 }, { "epoch": 0.95, "grad_norm": 0.09230136981204798, "learning_rate": 7.1428231683711705e-06, "loss": 1.3461, "step": 8816 }, { "epoch": 0.95, "grad_norm": 0.09945844805144954, "learning_rate": 7.113528902209787e-06, "loss": 1.2755, "step": 8817 }, { "epoch": 0.95, "grad_norm": 0.09948172641397596, "learning_rate": 7.084294400232283e-06, "loss": 1.3729, "step": 8818 }, { "epoch": 0.95, "grad_norm": 0.0992359215039714, "learning_rate": 7.055119665983378e-06, "loss": 1.4723, "step": 8819 }, { "epoch": 0.95, "grad_norm": 0.10929611354225881, "learning_rate": 7.026004703000688e-06, "loss": 1.389, "step": 8820 }, { "epoch": 0.95, "grad_norm": 0.08294067051563485, "learning_rate": 6.996949514814499e-06, "loss": 1.2973, "step": 8821 }, { "epoch": 0.95, "grad_norm": 0.11065562311316486, "learning_rate": 6.967954104947771e-06, "loss": 1.4557, "step": 8822 }, { "epoch": 0.95, "grad_norm": 0.10295369467271562, "learning_rate": 6.939018476916359e-06, "loss": 1.3141, "step": 8823 }, { "epoch": 0.95, "grad_norm": 0.09378386174057356, "learning_rate": 6.910142634228789e-06, "loss": 1.3767, "step": 8824 }, { "epoch": 0.95, "grad_norm": 0.09977427325258244, "learning_rate": 6.8813265803863715e-06, "loss": 1.5256, "step": 8825 }, { "epoch": 0.95, "grad_norm": 0.10057069838924039, "learning_rate": 6.852570318883145e-06, "loss": 1.4013, "step": 8826 }, { "epoch": 0.95, "grad_norm": 0.09187264561880268, "learning_rate": 6.8238738532059306e-06, "loss": 1.4982, "step": 8827 }, { "epoch": 0.95, "grad_norm": 0.11291029430421218, "learning_rate": 6.795237186834169e-06, "loss": 1.3988, "step": 8828 }, { "epoch": 0.95, "grad_norm": 0.08897163991167156, "learning_rate": 6.766660323240303e-06, "loss": 1.5356, "step": 8829 }, { "epoch": 0.95, "grad_norm": 0.09496027710275812, "learning_rate": 6.738143265889285e-06, "loss": 1.4172, "step": 8830 }, { "epoch": 0.95, "grad_norm": 0.08272734550929048, "learning_rate": 6.709686018238958e-06, "loss": 1.3842, "step": 8831 }, { "epoch": 0.95, "grad_norm": 0.102791559328205, "learning_rate": 6.681288583739786e-06, "loss": 1.4251, "step": 8832 }, { "epoch": 0.95, "grad_norm": 0.108585952617785, "learning_rate": 6.652950965835181e-06, "loss": 1.3605, "step": 8833 }, { "epoch": 0.95, "grad_norm": 0.10555789963522569, "learning_rate": 6.624673167961004e-06, "loss": 1.4318, "step": 8834 }, { "epoch": 0.95, "grad_norm": 0.0852695401594645, "learning_rate": 6.59645519354618e-06, "loss": 1.3621, "step": 8835 }, { "epoch": 0.95, "grad_norm": 0.09853582405605046, "learning_rate": 6.568297046012195e-06, "loss": 1.3444, "step": 8836 }, { "epoch": 0.95, "grad_norm": 0.08919377889013902, "learning_rate": 6.540198728773262e-06, "loss": 1.3925, "step": 8837 }, { "epoch": 0.95, "grad_norm": 0.08643002957608505, "learning_rate": 6.512160245236431e-06, "loss": 1.3932, "step": 8838 }, { "epoch": 0.95, "grad_norm": 0.08706342127575102, "learning_rate": 6.484181598801541e-06, "loss": 1.4577, "step": 8839 }, { "epoch": 0.95, "grad_norm": 0.0957071682990628, "learning_rate": 6.4562627928610455e-06, "loss": 1.3636, "step": 8840 }, { "epoch": 0.95, "grad_norm": 0.09110174617138717, "learning_rate": 6.4284038308001224e-06, "loss": 1.3166, "step": 8841 }, { "epoch": 0.95, "grad_norm": 0.09374793989183004, "learning_rate": 6.400604715996905e-06, "loss": 1.3368, "step": 8842 }, { "epoch": 0.95, "grad_norm": 0.11342827940082034, "learning_rate": 6.372865451822085e-06, "loss": 1.2744, "step": 8843 }, { "epoch": 0.95, "grad_norm": 0.09499693290134015, "learning_rate": 6.345186041639028e-06, "loss": 1.4215, "step": 8844 }, { "epoch": 0.95, "grad_norm": 0.08185468984627872, "learning_rate": 6.317566488804105e-06, "loss": 1.4175, "step": 8845 }, { "epoch": 0.95, "grad_norm": 0.09568579794257903, "learning_rate": 6.290006796666248e-06, "loss": 1.5329, "step": 8846 }, { "epoch": 0.95, "grad_norm": 0.08883975464927474, "learning_rate": 6.262506968567061e-06, "loss": 1.3307, "step": 8847 }, { "epoch": 0.95, "grad_norm": 0.08740844984942242, "learning_rate": 6.2350670078411555e-06, "loss": 1.3654, "step": 8848 }, { "epoch": 0.95, "grad_norm": 0.09062562990586284, "learning_rate": 6.207686917815592e-06, "loss": 1.4515, "step": 8849 }, { "epoch": 0.95, "grad_norm": 0.08741639466532415, "learning_rate": 6.1803667018103805e-06, "loss": 1.4691, "step": 8850 }, { "epoch": 0.95, "grad_norm": 0.10116002798678095, "learning_rate": 6.15310636313815e-06, "loss": 1.3397, "step": 8851 }, { "epoch": 0.95, "grad_norm": 0.09585867618812836, "learning_rate": 6.125905905104368e-06, "loss": 1.3576, "step": 8852 }, { "epoch": 0.95, "grad_norm": 0.11018206546669015, "learning_rate": 6.098765331007061e-06, "loss": 1.3525, "step": 8853 }, { "epoch": 0.95, "grad_norm": 0.08725015747575635, "learning_rate": 6.0716846441372655e-06, "loss": 1.3746, "step": 8854 }, { "epoch": 0.95, "grad_norm": 0.09572859468974974, "learning_rate": 6.04466384777852e-06, "loss": 1.4461, "step": 8855 }, { "epoch": 0.95, "grad_norm": 0.09604637593854443, "learning_rate": 6.017702945207149e-06, "loss": 1.529, "step": 8856 }, { "epoch": 0.95, "grad_norm": 0.09035469492567368, "learning_rate": 5.990801939692314e-06, "loss": 1.4351, "step": 8857 }, { "epoch": 0.95, "grad_norm": 0.0945266969821737, "learning_rate": 5.963960834495907e-06, "loss": 1.3995, "step": 8858 }, { "epoch": 0.95, "grad_norm": 0.08392027138389029, "learning_rate": 5.937179632872436e-06, "loss": 1.3874, "step": 8859 }, { "epoch": 0.95, "grad_norm": 0.09700625587597556, "learning_rate": 5.9104583380691914e-06, "loss": 1.4869, "step": 8860 }, { "epoch": 0.95, "grad_norm": 0.08440155967829839, "learning_rate": 5.883796953326359e-06, "loss": 1.3654, "step": 8861 }, { "epoch": 0.95, "grad_norm": 0.08299105068275839, "learning_rate": 5.85719548187652e-06, "loss": 1.4762, "step": 8862 }, { "epoch": 0.95, "grad_norm": 0.09878299275346496, "learning_rate": 5.8306539269453725e-06, "loss": 1.4466, "step": 8863 }, { "epoch": 0.95, "grad_norm": 0.08433678896475492, "learning_rate": 5.804172291751064e-06, "loss": 1.3548, "step": 8864 }, { "epoch": 0.95, "grad_norm": 0.09183326720148738, "learning_rate": 5.777750579504581e-06, "loss": 1.3366, "step": 8865 }, { "epoch": 0.95, "grad_norm": 0.08709653574788266, "learning_rate": 5.7513887934096954e-06, "loss": 1.4318, "step": 8866 }, { "epoch": 0.95, "grad_norm": 0.09201497815107694, "learning_rate": 5.725086936662905e-06, "loss": 1.3497, "step": 8867 }, { "epoch": 0.95, "grad_norm": 0.11283563450980807, "learning_rate": 5.698845012453324e-06, "loss": 1.4845, "step": 8868 }, { "epoch": 0.95, "grad_norm": 0.09315366127124443, "learning_rate": 5.672663023962854e-06, "loss": 1.4852, "step": 8869 }, { "epoch": 0.95, "grad_norm": 0.08316494710549423, "learning_rate": 5.646540974366287e-06, "loss": 1.4266, "step": 8870 }, { "epoch": 0.95, "grad_norm": 0.09282115286451564, "learning_rate": 5.620478866830814e-06, "loss": 1.4259, "step": 8871 }, { "epoch": 0.95, "grad_norm": 0.10159676345240197, "learning_rate": 5.594476704516738e-06, "loss": 1.3527, "step": 8872 }, { "epoch": 0.95, "grad_norm": 0.09893435119118751, "learning_rate": 5.5685344905768156e-06, "loss": 1.5138, "step": 8873 }, { "epoch": 0.95, "grad_norm": 0.09329564540974669, "learning_rate": 5.542652228156697e-06, "loss": 1.464, "step": 8874 }, { "epoch": 0.95, "grad_norm": 0.09485592930638007, "learning_rate": 5.516829920394595e-06, "loss": 1.4764, "step": 8875 }, { "epoch": 0.95, "grad_norm": 0.09133805600317245, "learning_rate": 5.4910675704216154e-06, "loss": 1.5208, "step": 8876 }, { "epoch": 0.95, "grad_norm": 0.08495275003072635, "learning_rate": 5.4653651813615366e-06, "loss": 1.388, "step": 8877 }, { "epoch": 0.95, "grad_norm": 0.08760989309934672, "learning_rate": 5.439722756330812e-06, "loss": 1.4567, "step": 8878 }, { "epoch": 0.95, "grad_norm": 0.09048100636363733, "learning_rate": 5.41414029843873e-06, "loss": 1.4263, "step": 8879 }, { "epoch": 0.95, "grad_norm": 0.09197041411794825, "learning_rate": 5.388617810787255e-06, "loss": 1.3345, "step": 8880 }, { "epoch": 0.95, "grad_norm": 0.10020996050813048, "learning_rate": 5.3631552964710784e-06, "loss": 1.4928, "step": 8881 }, { "epoch": 0.95, "grad_norm": 0.09509674597731703, "learning_rate": 5.337752758577563e-06, "loss": 1.5502, "step": 8882 }, { "epoch": 0.95, "grad_norm": 0.09385515688158769, "learning_rate": 5.312410200186857e-06, "loss": 1.4702, "step": 8883 }, { "epoch": 0.96, "grad_norm": 0.08914837524362412, "learning_rate": 5.287127624371946e-06, "loss": 1.4887, "step": 8884 }, { "epoch": 0.96, "grad_norm": 0.09664353503901052, "learning_rate": 5.2619050341982665e-06, "loss": 1.5038, "step": 8885 }, { "epoch": 0.96, "grad_norm": 0.09159510331470239, "learning_rate": 5.236742432724262e-06, "loss": 1.5056, "step": 8886 }, { "epoch": 0.96, "grad_norm": 0.08356282150014831, "learning_rate": 5.2116398230009355e-06, "loss": 1.3525, "step": 8887 }, { "epoch": 0.96, "grad_norm": 0.08806156912501846, "learning_rate": 5.1865972080720195e-06, "loss": 1.4004, "step": 8888 }, { "epoch": 0.96, "grad_norm": 0.09457677127411936, "learning_rate": 5.16161459097414e-06, "loss": 1.4904, "step": 8889 }, { "epoch": 0.96, "grad_norm": 0.09236016076443747, "learning_rate": 5.136691974736429e-06, "loss": 1.4403, "step": 8890 }, { "epoch": 0.96, "grad_norm": 0.09057456912244687, "learning_rate": 5.11182936238086e-06, "loss": 1.2834, "step": 8891 }, { "epoch": 0.96, "grad_norm": 0.09408635409281225, "learning_rate": 5.087026756922187e-06, "loss": 1.4948, "step": 8892 }, { "epoch": 0.96, "grad_norm": 0.08715396491466379, "learning_rate": 5.062284161367669e-06, "loss": 1.2594, "step": 8893 }, { "epoch": 0.96, "grad_norm": 0.09798288199490669, "learning_rate": 5.037601578717521e-06, "loss": 1.3734, "step": 8894 }, { "epoch": 0.96, "grad_norm": 0.09873923966438046, "learning_rate": 5.01297901196468e-06, "loss": 1.5297, "step": 8895 }, { "epoch": 0.96, "grad_norm": 0.09524621935613473, "learning_rate": 4.9884164640944806e-06, "loss": 1.3661, "step": 8896 }, { "epoch": 0.96, "grad_norm": 0.08793095352213838, "learning_rate": 4.963913938085374e-06, "loss": 1.3534, "step": 8897 }, { "epoch": 0.96, "grad_norm": 0.08738144033221784, "learning_rate": 4.939471436908427e-06, "loss": 1.4034, "step": 8898 }, { "epoch": 0.96, "grad_norm": 0.08983226630169384, "learning_rate": 4.915088963527214e-06, "loss": 1.2978, "step": 8899 }, { "epoch": 0.96, "grad_norm": 0.09660666651070206, "learning_rate": 4.890766520898315e-06, "loss": 1.3553, "step": 8900 }, { "epoch": 0.96, "grad_norm": 0.0857590426342619, "learning_rate": 4.866504111970871e-06, "loss": 1.4055, "step": 8901 }, { "epoch": 0.96, "grad_norm": 0.09299660118242754, "learning_rate": 4.8423017396868055e-06, "loss": 1.3946, "step": 8902 }, { "epoch": 0.96, "grad_norm": 0.09265501470007696, "learning_rate": 4.818159406980715e-06, "loss": 1.424, "step": 8903 }, { "epoch": 0.96, "grad_norm": 0.0873024345487018, "learning_rate": 4.794077116779927e-06, "loss": 1.2523, "step": 8904 }, { "epoch": 0.96, "grad_norm": 0.0941279091102085, "learning_rate": 4.770054872004548e-06, "loss": 1.3808, "step": 8905 }, { "epoch": 0.96, "grad_norm": 0.10765797937551637, "learning_rate": 4.74609267556736e-06, "loss": 1.3262, "step": 8906 }, { "epoch": 0.96, "grad_norm": 0.10585056616671595, "learning_rate": 4.72219053037376e-06, "loss": 1.2806, "step": 8907 }, { "epoch": 0.96, "grad_norm": 0.10284106309103908, "learning_rate": 4.6983484393220974e-06, "loss": 1.3896, "step": 8908 }, { "epoch": 0.96, "grad_norm": 0.0927126007379314, "learning_rate": 4.674566405303227e-06, "loss": 1.3906, "step": 8909 }, { "epoch": 0.96, "grad_norm": 0.09260180558774815, "learning_rate": 4.65084443120084e-06, "loss": 1.3763, "step": 8910 }, { "epoch": 0.96, "grad_norm": 0.09414960587863648, "learning_rate": 4.627182519891304e-06, "loss": 1.4103, "step": 8911 }, { "epoch": 0.96, "grad_norm": 0.09675803044725435, "learning_rate": 4.603580674243657e-06, "loss": 1.3207, "step": 8912 }, { "epoch": 0.96, "grad_norm": 0.09529069911101469, "learning_rate": 4.580038897119776e-06, "loss": 1.5118, "step": 8913 }, { "epoch": 0.96, "grad_norm": 0.07988399140996678, "learning_rate": 4.5565571913741e-06, "loss": 1.2666, "step": 8914 }, { "epoch": 0.96, "grad_norm": 0.08536402815347709, "learning_rate": 4.533135559853962e-06, "loss": 1.4486, "step": 8915 }, { "epoch": 0.96, "grad_norm": 0.08185123235807608, "learning_rate": 4.509774005399314e-06, "loss": 1.388, "step": 8916 }, { "epoch": 0.96, "grad_norm": 0.09593022780045278, "learning_rate": 4.486472530842723e-06, "loss": 1.5757, "step": 8917 }, { "epoch": 0.96, "grad_norm": 0.08176639351464489, "learning_rate": 4.463231139009649e-06, "loss": 1.4924, "step": 8918 }, { "epoch": 0.96, "grad_norm": 0.07809646949759338, "learning_rate": 4.440049832718174e-06, "loss": 1.3543, "step": 8919 }, { "epoch": 0.96, "grad_norm": 0.09367404408882468, "learning_rate": 4.416928614779103e-06, "loss": 1.4076, "step": 8920 }, { "epoch": 0.96, "grad_norm": 0.08363712680509189, "learning_rate": 4.3938674879959726e-06, "loss": 1.364, "step": 8921 }, { "epoch": 0.96, "grad_norm": 0.0999487851449133, "learning_rate": 4.370866455165046e-06, "loss": 1.3693, "step": 8922 }, { "epoch": 0.96, "grad_norm": 0.09668059541295229, "learning_rate": 4.3479255190752574e-06, "loss": 1.3714, "step": 8923 }, { "epoch": 0.96, "grad_norm": 0.08567231889954466, "learning_rate": 4.3250446825082166e-06, "loss": 1.4526, "step": 8924 }, { "epoch": 0.96, "grad_norm": 0.08982678310195207, "learning_rate": 4.302223948238426e-06, "loss": 1.4021, "step": 8925 }, { "epoch": 0.96, "grad_norm": 0.09531371167655645, "learning_rate": 4.279463319032894e-06, "loss": 1.321, "step": 8926 }, { "epoch": 0.96, "grad_norm": 0.08096601838384176, "learning_rate": 4.256762797651414e-06, "loss": 1.2234, "step": 8927 }, { "epoch": 0.96, "grad_norm": 0.09513159682892831, "learning_rate": 4.2341223868465615e-06, "loss": 1.4223, "step": 8928 }, { "epoch": 0.96, "grad_norm": 0.10007520400593102, "learning_rate": 4.2115420893635295e-06, "loss": 1.4104, "step": 8929 }, { "epoch": 0.96, "grad_norm": 0.07827562474694724, "learning_rate": 4.189021907940238e-06, "loss": 1.3046, "step": 8930 }, { "epoch": 0.96, "grad_norm": 0.0964464664312945, "learning_rate": 4.166561845307393e-06, "loss": 1.4174, "step": 8931 }, { "epoch": 0.96, "grad_norm": 0.09301449302503279, "learning_rate": 4.144161904188315e-06, "loss": 1.4113, "step": 8932 }, { "epoch": 0.96, "grad_norm": 0.09578933773852154, "learning_rate": 4.1218220872990546e-06, "loss": 1.3976, "step": 8933 }, { "epoch": 0.96, "grad_norm": 0.08666688349596204, "learning_rate": 4.099542397348444e-06, "loss": 1.4173, "step": 8934 }, { "epoch": 0.96, "grad_norm": 0.07548291457470417, "learning_rate": 4.077322837037933e-06, "loss": 1.2473, "step": 8935 }, { "epoch": 0.96, "grad_norm": 0.08139822299567276, "learning_rate": 4.0551634090617575e-06, "loss": 1.3223, "step": 8936 }, { "epoch": 0.96, "grad_norm": 0.08647643102992054, "learning_rate": 4.033064116106766e-06, "loss": 1.3527, "step": 8937 }, { "epoch": 0.96, "grad_norm": 0.09598127034315897, "learning_rate": 4.01102496085265e-06, "loss": 1.4437, "step": 8938 }, { "epoch": 0.96, "grad_norm": 0.0811119581027814, "learning_rate": 3.989045945971659e-06, "loss": 1.3237, "step": 8939 }, { "epoch": 0.96, "grad_norm": 0.10572601439524844, "learning_rate": 3.967127074128885e-06, "loss": 1.3066, "step": 8940 }, { "epoch": 0.96, "grad_norm": 0.08475692276958388, "learning_rate": 3.945268347981979e-06, "loss": 1.2337, "step": 8941 }, { "epoch": 0.96, "grad_norm": 0.08313401249782255, "learning_rate": 3.923469770181543e-06, "loss": 1.387, "step": 8942 }, { "epoch": 0.96, "grad_norm": 0.08968561187112413, "learning_rate": 3.901731343370574e-06, "loss": 1.4705, "step": 8943 }, { "epoch": 0.96, "grad_norm": 0.08126894441730576, "learning_rate": 3.880053070184964e-06, "loss": 1.4054, "step": 8944 }, { "epoch": 0.96, "grad_norm": 0.08610883749970781, "learning_rate": 3.858434953253332e-06, "loss": 1.4202, "step": 8945 }, { "epoch": 0.96, "grad_norm": 0.0860092367603009, "learning_rate": 3.836876995196914e-06, "loss": 1.478, "step": 8946 }, { "epoch": 0.96, "grad_norm": 0.10382181892590997, "learning_rate": 3.815379198629732e-06, "loss": 1.4004, "step": 8947 }, { "epoch": 0.96, "grad_norm": 0.08284626907086795, "learning_rate": 3.793941566158421e-06, "loss": 1.3705, "step": 8948 }, { "epoch": 0.96, "grad_norm": 0.08119746209465538, "learning_rate": 3.7725641003823476e-06, "loss": 1.437, "step": 8949 }, { "epoch": 0.96, "grad_norm": 0.16574799900803355, "learning_rate": 3.75124680389366e-06, "loss": 1.4948, "step": 8950 }, { "epoch": 0.96, "grad_norm": 0.08457090061547087, "learning_rate": 3.7299896792771236e-06, "loss": 1.3338, "step": 8951 }, { "epoch": 0.96, "grad_norm": 0.08507021365850548, "learning_rate": 3.708792729110233e-06, "loss": 1.2983, "step": 8952 }, { "epoch": 0.96, "grad_norm": 0.09508738545345005, "learning_rate": 3.687655955963154e-06, "loss": 1.3152, "step": 8953 }, { "epoch": 0.96, "grad_norm": 0.08769842563914661, "learning_rate": 3.666579362398892e-06, "loss": 1.3787, "step": 8954 }, { "epoch": 0.96, "grad_norm": 0.09713108114963706, "learning_rate": 3.645562950973014e-06, "loss": 1.5562, "step": 8955 }, { "epoch": 0.96, "grad_norm": 0.09295817353089024, "learning_rate": 3.624606724233759e-06, "loss": 1.3034, "step": 8956 }, { "epoch": 0.96, "grad_norm": 0.09973446153914593, "learning_rate": 3.6037106847223168e-06, "loss": 1.4841, "step": 8957 }, { "epoch": 0.96, "grad_norm": 0.08914007872724189, "learning_rate": 3.5828748349722164e-06, "loss": 1.4014, "step": 8958 }, { "epoch": 0.96, "grad_norm": 0.09706649081960167, "learning_rate": 3.5620991775099363e-06, "loss": 1.4964, "step": 8959 }, { "epoch": 0.96, "grad_norm": 0.08729316376152077, "learning_rate": 3.5413837148546847e-06, "loss": 1.4441, "step": 8960 }, { "epoch": 0.96, "grad_norm": 0.07420659828466358, "learning_rate": 3.520728449518118e-06, "loss": 1.4565, "step": 8961 }, { "epoch": 0.96, "grad_norm": 0.10539629443071484, "learning_rate": 3.5001333840049e-06, "loss": 1.4997, "step": 8962 }, { "epoch": 0.96, "grad_norm": 0.08586868055644019, "learning_rate": 3.4795985208121994e-06, "loss": 1.3828, "step": 8963 }, { "epoch": 0.96, "grad_norm": 0.10015038752842724, "learning_rate": 3.459123862429969e-06, "loss": 1.3812, "step": 8964 }, { "epoch": 0.96, "grad_norm": 0.10194468458724672, "learning_rate": 3.438709411340779e-06, "loss": 1.3115, "step": 8965 }, { "epoch": 0.96, "grad_norm": 0.09206340021786165, "learning_rate": 3.4183551700199823e-06, "loss": 1.3946, "step": 8966 }, { "epoch": 0.96, "grad_norm": 0.08262825186118691, "learning_rate": 3.3980611409356044e-06, "loss": 1.3658, "step": 8967 }, { "epoch": 0.96, "grad_norm": 0.08863719060179712, "learning_rate": 3.377827326548344e-06, "loss": 1.5557, "step": 8968 }, { "epoch": 0.96, "grad_norm": 0.08816582721504075, "learning_rate": 3.3576537293116825e-06, "loss": 1.3183, "step": 8969 }, { "epoch": 0.96, "grad_norm": 0.0976051244422402, "learning_rate": 3.337540351671664e-06, "loss": 1.375, "step": 8970 }, { "epoch": 0.96, "grad_norm": 0.09067808571609655, "learning_rate": 3.317487196067115e-06, "loss": 1.4822, "step": 8971 }, { "epoch": 0.96, "grad_norm": 0.09566472825698379, "learning_rate": 3.2974942649295904e-06, "loss": 1.475, "step": 8972 }, { "epoch": 0.96, "grad_norm": 0.08622749394764569, "learning_rate": 3.277561560683262e-06, "loss": 1.4134, "step": 8973 }, { "epoch": 0.96, "grad_norm": 0.08993542829439416, "learning_rate": 3.257689085745086e-06, "loss": 1.3753, "step": 8974 }, { "epoch": 0.96, "grad_norm": 0.09586665598997336, "learning_rate": 3.237876842524634e-06, "loss": 1.4037, "step": 8975 }, { "epoch": 0.96, "grad_norm": 0.08857577090180915, "learning_rate": 3.2181248334242076e-06, "loss": 1.3484, "step": 8976 }, { "epoch": 0.97, "grad_norm": 0.09287873616748278, "learning_rate": 3.1984330608387793e-06, "loss": 1.344, "step": 8977 }, { "epoch": 0.97, "grad_norm": 0.09875558892977225, "learning_rate": 3.1788015271561053e-06, "loss": 1.4483, "step": 8978 }, { "epoch": 0.97, "grad_norm": 0.09041828728366473, "learning_rate": 3.1592302347565605e-06, "loss": 1.3532, "step": 8979 }, { "epoch": 0.97, "grad_norm": 0.07800414751561797, "learning_rate": 3.1397191860132456e-06, "loss": 1.3968, "step": 8980 }, { "epoch": 0.97, "grad_norm": 0.08232355971655188, "learning_rate": 3.1202683832918797e-06, "loss": 1.3541, "step": 8981 }, { "epoch": 0.97, "grad_norm": 0.0951746586827946, "learning_rate": 3.1008778289509654e-06, "loss": 1.6037, "step": 8982 }, { "epoch": 0.97, "grad_norm": 0.08960082679565905, "learning_rate": 3.0815475253417325e-06, "loss": 1.4719, "step": 8983 }, { "epoch": 0.97, "grad_norm": 0.11251667581254801, "learning_rate": 3.0622774748079175e-06, "loss": 1.3597, "step": 8984 }, { "epoch": 0.97, "grad_norm": 0.09527485741233266, "learning_rate": 3.043067679686262e-06, "loss": 1.3548, "step": 8985 }, { "epoch": 0.97, "grad_norm": 0.0836889698103457, "learning_rate": 3.0239181423058483e-06, "loss": 1.4547, "step": 8986 }, { "epoch": 0.97, "grad_norm": 0.0796557012099375, "learning_rate": 3.004828864988707e-06, "loss": 1.277, "step": 8987 }, { "epoch": 0.97, "grad_norm": 0.09149789312059337, "learning_rate": 2.9857998500494866e-06, "loss": 1.3898, "step": 8988 }, { "epoch": 0.97, "grad_norm": 0.08092008180043826, "learning_rate": 2.9668310997955083e-06, "loss": 1.3283, "step": 8989 }, { "epoch": 0.97, "grad_norm": 0.09915783393849893, "learning_rate": 2.9479226165268215e-06, "loss": 1.4008, "step": 8990 }, { "epoch": 0.97, "grad_norm": 0.08577745745175479, "learning_rate": 2.9290744025360915e-06, "loss": 1.3352, "step": 8991 }, { "epoch": 0.97, "grad_norm": 0.08612397942263612, "learning_rate": 2.910286460108713e-06, "loss": 1.4729, "step": 8992 }, { "epoch": 0.97, "grad_norm": 0.07870393466324724, "learning_rate": 2.891558791522864e-06, "loss": 1.4015, "step": 8993 }, { "epoch": 0.97, "grad_norm": 0.08394931460604015, "learning_rate": 2.872891399049338e-06, "loss": 1.2502, "step": 8994 }, { "epoch": 0.97, "grad_norm": 0.10427009072509341, "learning_rate": 2.854284284951547e-06, "loss": 1.428, "step": 8995 }, { "epoch": 0.97, "grad_norm": 0.08846241604213603, "learning_rate": 2.8357374514856872e-06, "loss": 1.2706, "step": 8996 }, { "epoch": 0.97, "grad_norm": 0.09327150463271378, "learning_rate": 2.817250900900681e-06, "loss": 1.3801, "step": 8997 }, { "epoch": 0.97, "grad_norm": 0.11128758227648324, "learning_rate": 2.798824635438069e-06, "loss": 1.3507, "step": 8998 }, { "epoch": 0.97, "grad_norm": 0.08923177417576252, "learning_rate": 2.780458657332008e-06, "loss": 1.3579, "step": 8999 }, { "epoch": 0.97, "grad_norm": 0.08947199378843616, "learning_rate": 2.762152968809606e-06, "loss": 1.4248, "step": 9000 }, { "epoch": 0.97, "grad_norm": 0.09648030606466207, "learning_rate": 2.743907572090365e-06, "loss": 1.4384, "step": 9001 }, { "epoch": 0.97, "grad_norm": 0.08206314363811901, "learning_rate": 2.7257224693866266e-06, "loss": 1.4017, "step": 9002 }, { "epoch": 0.97, "grad_norm": 0.09363077823329279, "learning_rate": 2.7075976629033495e-06, "loss": 1.3157, "step": 9003 }, { "epoch": 0.97, "grad_norm": 0.09298672001535016, "learning_rate": 2.689533154838386e-06, "loss": 1.3577, "step": 9004 }, { "epoch": 0.97, "grad_norm": 0.11480343177435744, "learning_rate": 2.671528947381929e-06, "loss": 1.5121, "step": 9005 }, { "epoch": 0.97, "grad_norm": 0.0905443724363777, "learning_rate": 2.653585042717177e-06, "loss": 1.3501, "step": 9006 }, { "epoch": 0.97, "grad_norm": 0.0913932632121586, "learning_rate": 2.635701443019889e-06, "loss": 1.2952, "step": 9007 }, { "epoch": 0.97, "grad_norm": 0.10332073678504636, "learning_rate": 2.617878150458386e-06, "loss": 1.4585, "step": 9008 }, { "epoch": 0.97, "grad_norm": 0.09795339200836412, "learning_rate": 2.600115167193995e-06, "loss": 1.4621, "step": 9009 }, { "epoch": 0.97, "grad_norm": 0.08852957559404895, "learning_rate": 2.582412495380382e-06, "loss": 1.4335, "step": 9010 }, { "epoch": 0.97, "grad_norm": 0.09455078692555385, "learning_rate": 2.5647701371641075e-06, "loss": 1.4161, "step": 9011 }, { "epoch": 0.97, "grad_norm": 0.09596235942780901, "learning_rate": 2.547188094684405e-06, "loss": 1.4151, "step": 9012 }, { "epoch": 0.97, "grad_norm": 0.08374721842744182, "learning_rate": 2.5296663700731247e-06, "loss": 1.3216, "step": 9013 }, { "epoch": 0.97, "grad_norm": 0.09278676414140465, "learning_rate": 2.5122049654547897e-06, "loss": 1.456, "step": 9014 }, { "epoch": 0.97, "grad_norm": 0.08964255210780839, "learning_rate": 2.494803882946761e-06, "loss": 1.3075, "step": 9015 }, { "epoch": 0.97, "grad_norm": 0.09620974578886025, "learning_rate": 2.4774631246589074e-06, "loss": 1.4034, "step": 9016 }, { "epoch": 0.97, "grad_norm": 0.11383840109527507, "learning_rate": 2.4601826926938242e-06, "loss": 1.5086, "step": 9017 }, { "epoch": 0.97, "grad_norm": 0.09144766054964178, "learning_rate": 2.442962589146891e-06, "loss": 1.3065, "step": 9018 }, { "epoch": 0.97, "grad_norm": 0.08185228768258924, "learning_rate": 2.425802816106104e-06, "loss": 1.3369, "step": 9019 }, { "epoch": 0.97, "grad_norm": 0.0840524727285538, "learning_rate": 2.4087033756521328e-06, "loss": 1.4063, "step": 9020 }, { "epoch": 0.97, "grad_norm": 0.08907504192099573, "learning_rate": 2.391664269858318e-06, "loss": 1.3916, "step": 9021 }, { "epoch": 0.97, "grad_norm": 0.09370670461297587, "learning_rate": 2.374685500790785e-06, "loss": 1.4576, "step": 9022 }, { "epoch": 0.97, "grad_norm": 0.08615449445440274, "learning_rate": 2.3577670705081654e-06, "loss": 1.4466, "step": 9023 }, { "epoch": 0.97, "grad_norm": 0.08779102881285784, "learning_rate": 2.3409089810618734e-06, "loss": 1.3638, "step": 9024 }, { "epoch": 0.97, "grad_norm": 0.08879645514225343, "learning_rate": 2.324111234496107e-06, "loss": 1.477, "step": 9025 }, { "epoch": 0.97, "grad_norm": 0.08444548732237621, "learning_rate": 2.3073738328476255e-06, "loss": 1.4513, "step": 9026 }, { "epoch": 0.97, "grad_norm": 0.09078258966657334, "learning_rate": 2.2906967781458065e-06, "loss": 1.5172, "step": 9027 }, { "epoch": 0.97, "grad_norm": 0.09488873511558187, "learning_rate": 2.2740800724129206e-06, "loss": 1.4049, "step": 9028 }, { "epoch": 0.97, "grad_norm": 0.09875341751463869, "learning_rate": 2.257523717663745e-06, "loss": 1.4016, "step": 9029 }, { "epoch": 0.97, "grad_norm": 0.11312923220297097, "learning_rate": 2.2410277159057858e-06, "loss": 1.2807, "step": 9030 }, { "epoch": 0.97, "grad_norm": 0.09081658104439949, "learning_rate": 2.2245920691392753e-06, "loss": 1.3431, "step": 9031 }, { "epoch": 0.97, "grad_norm": 0.09947730055048168, "learning_rate": 2.208216779357064e-06, "loss": 1.3545, "step": 9032 }, { "epoch": 0.97, "grad_norm": 0.0918733894258067, "learning_rate": 2.1919018485446753e-06, "loss": 1.4349, "step": 9033 }, { "epoch": 0.97, "grad_norm": 0.09406473803075574, "learning_rate": 2.175647278680415e-06, "loss": 1.4259, "step": 9034 }, { "epoch": 0.97, "grad_norm": 0.08127694221980779, "learning_rate": 2.1594530717352068e-06, "loss": 1.3125, "step": 9035 }, { "epoch": 0.97, "grad_norm": 0.09175464617559018, "learning_rate": 2.1433192296725912e-06, "loss": 1.3946, "step": 9036 }, { "epoch": 0.97, "grad_norm": 0.09641222973699672, "learning_rate": 2.127245754448892e-06, "loss": 1.4488, "step": 9037 }, { "epoch": 0.97, "grad_norm": 0.08726870720260296, "learning_rate": 2.111232648013106e-06, "loss": 1.3105, "step": 9038 }, { "epoch": 0.97, "grad_norm": 0.11137866349602377, "learning_rate": 2.0952799123068466e-06, "loss": 1.3899, "step": 9039 }, { "epoch": 0.97, "grad_norm": 0.08429603496474124, "learning_rate": 2.0793875492644e-06, "loss": 1.3003, "step": 9040 }, { "epoch": 0.97, "grad_norm": 0.10461075575496355, "learning_rate": 2.0635555608128354e-06, "loss": 1.3635, "step": 9041 }, { "epoch": 0.97, "grad_norm": 0.09558409348647286, "learning_rate": 2.04778394887184e-06, "loss": 1.3797, "step": 9042 }, { "epoch": 0.97, "grad_norm": 0.09213081325338919, "learning_rate": 2.0320727153537165e-06, "loss": 1.4851, "step": 9043 }, { "epoch": 0.97, "grad_norm": 0.10022093578171336, "learning_rate": 2.016421862163498e-06, "loss": 1.3951, "step": 9044 }, { "epoch": 0.97, "grad_norm": 0.09523644080123797, "learning_rate": 2.0008313911989986e-06, "loss": 1.3995, "step": 9045 }, { "epoch": 0.97, "grad_norm": 0.09133021078387467, "learning_rate": 1.9853013043504844e-06, "loss": 1.4518, "step": 9046 }, { "epoch": 0.97, "grad_norm": 0.08688534304999465, "learning_rate": 1.969831603501171e-06, "loss": 1.3965, "step": 9047 }, { "epoch": 0.97, "grad_norm": 0.08773971157963967, "learning_rate": 1.954422290526725e-06, "loss": 1.3931, "step": 9048 }, { "epoch": 0.97, "grad_norm": 0.09246961888491326, "learning_rate": 1.9390733672955406e-06, "loss": 1.3227, "step": 9049 }, { "epoch": 0.97, "grad_norm": 0.10713648382422544, "learning_rate": 1.9237848356688514e-06, "loss": 1.4001, "step": 9050 }, { "epoch": 0.97, "grad_norm": 0.08792536366905146, "learning_rate": 1.9085566975003963e-06, "loss": 1.4077, "step": 9051 }, { "epoch": 0.97, "grad_norm": 0.07995517162388631, "learning_rate": 1.893388954636588e-06, "loss": 1.5223, "step": 9052 }, { "epoch": 0.97, "grad_norm": 0.10201954934078061, "learning_rate": 1.878281608916621e-06, "loss": 1.4578, "step": 9053 }, { "epoch": 0.97, "grad_norm": 0.08765267905443119, "learning_rate": 1.8632346621723085e-06, "loss": 1.4235, "step": 9054 }, { "epoch": 0.97, "grad_norm": 0.09134361289135108, "learning_rate": 1.8482481162280795e-06, "loss": 1.3734, "step": 9055 }, { "epoch": 0.97, "grad_norm": 0.09517979343552131, "learning_rate": 1.833321972901203e-06, "loss": 1.4759, "step": 9056 }, { "epoch": 0.97, "grad_norm": 0.09392044720740886, "learning_rate": 1.8184562340014532e-06, "loss": 1.3799, "step": 9057 }, { "epoch": 0.97, "grad_norm": 0.08945528953340533, "learning_rate": 1.8036509013313884e-06, "loss": 1.4674, "step": 9058 }, { "epoch": 0.97, "grad_norm": 0.09660244996835152, "learning_rate": 1.7889059766862392e-06, "loss": 1.4577, "step": 9059 }, { "epoch": 0.97, "grad_norm": 0.08955683843704676, "learning_rate": 1.7742214618537977e-06, "loss": 1.3246, "step": 9060 }, { "epoch": 0.97, "grad_norm": 0.09367894047592056, "learning_rate": 1.7595973586145842e-06, "loss": 1.3879, "step": 9061 }, { "epoch": 0.97, "grad_norm": 0.09055649217132475, "learning_rate": 1.7450336687420131e-06, "loss": 1.3893, "step": 9062 }, { "epoch": 0.97, "grad_norm": 0.09954889766272536, "learning_rate": 1.7305303940017836e-06, "loss": 1.3906, "step": 9063 }, { "epoch": 0.97, "grad_norm": 0.08601609477859878, "learning_rate": 1.7160875361525441e-06, "loss": 1.41, "step": 9064 }, { "epoch": 0.97, "grad_norm": 0.09150624684421146, "learning_rate": 1.701705096945505e-06, "loss": 1.4518, "step": 9065 }, { "epoch": 0.97, "grad_norm": 0.08989981788436734, "learning_rate": 1.6873830781246601e-06, "loss": 1.4331, "step": 9066 }, { "epoch": 0.97, "grad_norm": 0.08875228716445183, "learning_rate": 1.673121481426565e-06, "loss": 1.374, "step": 9067 }, { "epoch": 0.97, "grad_norm": 0.08737529837010975, "learning_rate": 1.6589203085804472e-06, "loss": 1.4335, "step": 9068 }, { "epoch": 0.97, "grad_norm": 0.10109421859275902, "learning_rate": 1.6447795613083183e-06, "loss": 1.4097, "step": 9069 }, { "epoch": 0.98, "grad_norm": 0.09946451803794594, "learning_rate": 1.6306992413247512e-06, "loss": 1.3549, "step": 9070 }, { "epoch": 0.98, "grad_norm": 0.08896478727278188, "learning_rate": 1.6166793503370469e-06, "loss": 1.3679, "step": 9071 }, { "epoch": 0.98, "grad_norm": 0.10638462313903883, "learning_rate": 1.602719890045179e-06, "loss": 1.3335, "step": 9072 }, { "epoch": 0.98, "grad_norm": 0.089120932283575, "learning_rate": 1.5888208621417376e-06, "loss": 1.3602, "step": 9073 }, { "epoch": 0.98, "grad_norm": 0.1012988002695788, "learning_rate": 1.5749822683120419e-06, "loss": 1.3864, "step": 9074 }, { "epoch": 0.98, "grad_norm": 0.10437337636041694, "learning_rate": 1.561204110234138e-06, "loss": 1.433, "step": 9075 }, { "epoch": 0.98, "grad_norm": 0.09601976601444809, "learning_rate": 1.547486389578523e-06, "loss": 1.5101, "step": 9076 }, { "epoch": 0.98, "grad_norm": 0.09522304132825528, "learning_rate": 1.5338291080086441e-06, "loss": 1.3575, "step": 9077 }, { "epoch": 0.98, "grad_norm": 0.09616477229666578, "learning_rate": 1.5202322671805103e-06, "loss": 1.5156, "step": 9078 }, { "epoch": 0.98, "grad_norm": 0.09378474058753226, "learning_rate": 1.5066958687426914e-06, "loss": 1.4158, "step": 9079 }, { "epoch": 0.98, "grad_norm": 0.08739425978756746, "learning_rate": 1.493219914336541e-06, "loss": 1.4265, "step": 9080 }, { "epoch": 0.98, "grad_norm": 0.098000838767873, "learning_rate": 1.4798044055961414e-06, "loss": 1.3646, "step": 9081 }, { "epoch": 0.98, "grad_norm": 0.09244726842890279, "learning_rate": 1.4664493441480797e-06, "loss": 1.3537, "step": 9082 }, { "epoch": 0.98, "grad_norm": 0.09582741121293425, "learning_rate": 1.4531547316117832e-06, "loss": 1.219, "step": 9083 }, { "epoch": 0.98, "grad_norm": 0.10720997957465893, "learning_rate": 1.4399205695991847e-06, "loss": 1.4641, "step": 9084 }, { "epoch": 0.98, "grad_norm": 0.08406612664504096, "learning_rate": 1.4267468597150558e-06, "loss": 1.3459, "step": 9085 }, { "epoch": 0.98, "grad_norm": 0.09440844125905276, "learning_rate": 1.4136336035566744e-06, "loss": 1.5147, "step": 9086 }, { "epoch": 0.98, "grad_norm": 0.09102998393092081, "learning_rate": 1.4005808027141576e-06, "loss": 1.4601, "step": 9087 }, { "epoch": 0.98, "grad_norm": 0.10331120527508215, "learning_rate": 1.3875884587700727e-06, "loss": 1.3414, "step": 9088 }, { "epoch": 0.98, "grad_norm": 0.09685017624372445, "learning_rate": 1.3746565732999372e-06, "loss": 1.4713, "step": 9089 }, { "epoch": 0.98, "grad_norm": 0.09074395569266269, "learning_rate": 1.3617851478716637e-06, "loss": 1.3245, "step": 9090 }, { "epoch": 0.98, "grad_norm": 0.09620486567097974, "learning_rate": 1.348974184046059e-06, "loss": 1.366, "step": 9091 }, { "epoch": 0.98, "grad_norm": 0.10708772859844796, "learning_rate": 1.3362236833763808e-06, "loss": 1.4079, "step": 9092 }, { "epoch": 0.98, "grad_norm": 0.0812319976321557, "learning_rate": 1.3235336474087812e-06, "loss": 1.3993, "step": 9093 }, { "epoch": 0.98, "grad_norm": 0.0775704183224788, "learning_rate": 1.3109040776819182e-06, "loss": 1.3545, "step": 9094 }, { "epoch": 0.98, "grad_norm": 0.0848197503815719, "learning_rate": 1.2983349757271778e-06, "loss": 1.3832, "step": 9095 }, { "epoch": 0.98, "grad_norm": 0.08434932724362751, "learning_rate": 1.285826343068619e-06, "loss": 1.4432, "step": 9096 }, { "epoch": 0.98, "grad_norm": 0.09075226208293427, "learning_rate": 1.2733781812229729e-06, "loss": 1.5216, "step": 9097 }, { "epoch": 0.98, "grad_norm": 0.1065687767191086, "learning_rate": 1.260990491699532e-06, "loss": 1.4738, "step": 9098 }, { "epoch": 0.98, "grad_norm": 0.08992583345049047, "learning_rate": 1.248663276000428e-06, "loss": 1.4144, "step": 9099 }, { "epoch": 0.98, "grad_norm": 0.08435920582959416, "learning_rate": 1.2363965356204099e-06, "loss": 1.4694, "step": 9100 }, { "epoch": 0.98, "grad_norm": 0.10098857061162941, "learning_rate": 1.2241902720467324e-06, "loss": 1.4237, "step": 9101 }, { "epoch": 0.98, "grad_norm": 0.09986073903862418, "learning_rate": 1.2120444867596003e-06, "loss": 1.4478, "step": 9102 }, { "epoch": 0.98, "grad_norm": 0.08558072461227653, "learning_rate": 1.1999591812316135e-06, "loss": 1.5613, "step": 9103 }, { "epoch": 0.98, "grad_norm": 0.09939211521634202, "learning_rate": 1.1879343569282109e-06, "loss": 1.383, "step": 9104 }, { "epoch": 0.98, "grad_norm": 0.09749991658975933, "learning_rate": 1.1759700153073927e-06, "loss": 1.4085, "step": 9105 }, { "epoch": 0.98, "grad_norm": 0.08925710668430854, "learning_rate": 1.1640661578199986e-06, "loss": 1.3, "step": 9106 }, { "epoch": 0.98, "grad_norm": 0.08628724330928919, "learning_rate": 1.1522227859092627e-06, "loss": 1.5635, "step": 9107 }, { "epoch": 0.98, "grad_norm": 0.09571820500346655, "learning_rate": 1.1404399010113696e-06, "loss": 1.4007, "step": 9108 }, { "epoch": 0.98, "grad_norm": 0.08511360335320821, "learning_rate": 1.1287175045548992e-06, "loss": 1.3939, "step": 9109 }, { "epoch": 0.98, "grad_norm": 0.093149927054585, "learning_rate": 1.117055597961325e-06, "loss": 1.3354, "step": 9110 }, { "epoch": 0.98, "grad_norm": 0.08879778321147576, "learning_rate": 1.105454182644683e-06, "loss": 1.4454, "step": 9111 }, { "epoch": 0.98, "grad_norm": 0.09563179301500772, "learning_rate": 1.0939132600116808e-06, "loss": 1.4794, "step": 9112 }, { "epoch": 0.98, "grad_norm": 0.08586217819702109, "learning_rate": 1.0824328314616994e-06, "loss": 1.4134, "step": 9113 }, { "epoch": 0.98, "grad_norm": 0.08860707467489773, "learning_rate": 1.0710128983867362e-06, "loss": 1.4085, "step": 9114 }, { "epoch": 0.98, "grad_norm": 0.09383682611238597, "learning_rate": 1.0596534621715171e-06, "loss": 1.4335, "step": 9115 }, { "epoch": 0.98, "grad_norm": 0.0889687494892466, "learning_rate": 1.04835452419344e-06, "loss": 1.4469, "step": 9116 }, { "epoch": 0.98, "grad_norm": 0.09343696027937952, "learning_rate": 1.037116085822576e-06, "loss": 1.5305, "step": 9117 }, { "epoch": 0.98, "grad_norm": 0.10193965533313602, "learning_rate": 1.0259381484215013e-06, "loss": 1.3598, "step": 9118 }, { "epoch": 0.98, "grad_norm": 0.08673396632150293, "learning_rate": 1.0148207133456878e-06, "loss": 1.3471, "step": 9119 }, { "epoch": 0.98, "grad_norm": 0.09320776936813663, "learning_rate": 1.0037637819431121e-06, "loss": 1.1593, "step": 9120 }, { "epoch": 0.98, "grad_norm": 0.09137352888592001, "learning_rate": 9.927673555544804e-07, "loss": 1.315, "step": 9121 }, { "epoch": 0.98, "grad_norm": 0.08717919879493985, "learning_rate": 9.818314355131697e-07, "loss": 1.4096, "step": 9122 }, { "epoch": 0.98, "grad_norm": 0.0928947483788626, "learning_rate": 9.709560231451198e-07, "loss": 1.452, "step": 9123 }, { "epoch": 0.98, "grad_norm": 0.08813190059284774, "learning_rate": 9.60141119769109e-07, "loss": 1.4183, "step": 9124 }, { "epoch": 0.98, "grad_norm": 0.08887987124924661, "learning_rate": 9.493867266964218e-07, "loss": 1.4121, "step": 9125 }, { "epoch": 0.98, "grad_norm": 0.09395453002384421, "learning_rate": 9.386928452310706e-07, "loss": 1.426, "step": 9126 }, { "epoch": 0.98, "grad_norm": 0.08377161772511613, "learning_rate": 9.280594766697959e-07, "loss": 1.4216, "step": 9127 }, { "epoch": 0.98, "grad_norm": 0.09163942047825116, "learning_rate": 9.174866223018441e-07, "loss": 1.4048, "step": 9128 }, { "epoch": 0.98, "grad_norm": 0.08910281911488153, "learning_rate": 9.069742834092454e-07, "loss": 1.3467, "step": 9129 }, { "epoch": 0.98, "grad_norm": 0.09118313763931776, "learning_rate": 8.965224612665912e-07, "loss": 1.5055, "step": 9130 }, { "epoch": 0.98, "grad_norm": 0.09895577173828514, "learning_rate": 8.861311571413122e-07, "loss": 1.4136, "step": 9131 }, { "epoch": 0.98, "grad_norm": 0.09511323789429267, "learning_rate": 8.75800372293345e-07, "loss": 1.2273, "step": 9132 }, { "epoch": 0.98, "grad_norm": 0.09517490821029329, "learning_rate": 8.655301079752987e-07, "loss": 1.3734, "step": 9133 }, { "epoch": 0.98, "grad_norm": 0.08708118193105326, "learning_rate": 8.553203654325103e-07, "loss": 1.4706, "step": 9134 }, { "epoch": 0.98, "grad_norm": 0.09081075474479096, "learning_rate": 8.451711459029343e-07, "loss": 1.2992, "step": 9135 }, { "epoch": 0.98, "grad_norm": 0.10093740980687556, "learning_rate": 8.350824506172527e-07, "loss": 1.3703, "step": 9136 }, { "epoch": 0.98, "grad_norm": 0.080606655701831, "learning_rate": 8.250542807986538e-07, "loss": 1.4454, "step": 9137 }, { "epoch": 0.98, "grad_norm": 0.08759808465816735, "learning_rate": 8.150866376631649e-07, "loss": 1.4234, "step": 9138 }, { "epoch": 0.98, "grad_norm": 0.09199899630041176, "learning_rate": 8.051795224193748e-07, "loss": 1.5712, "step": 9139 }, { "epoch": 0.98, "grad_norm": 0.08938543020630885, "learning_rate": 7.953329362685447e-07, "loss": 1.4132, "step": 9140 }, { "epoch": 0.98, "grad_norm": 0.09203648239831115, "learning_rate": 7.855468804046084e-07, "loss": 1.4387, "step": 9141 }, { "epoch": 0.98, "grad_norm": 0.0975507858137972, "learning_rate": 7.758213560141725e-07, "loss": 1.3312, "step": 9142 }, { "epoch": 0.98, "grad_norm": 0.09787399811432872, "learning_rate": 7.661563642765157e-07, "loss": 1.3107, "step": 9143 }, { "epoch": 0.98, "grad_norm": 0.10078879497252471, "learning_rate": 7.565519063634785e-07, "loss": 1.3406, "step": 9144 }, { "epoch": 0.98, "grad_norm": 0.09294027961434816, "learning_rate": 7.470079834396848e-07, "loss": 1.3681, "step": 9145 }, { "epoch": 0.98, "grad_norm": 0.09205734553283813, "learning_rate": 7.375245966623756e-07, "loss": 1.3883, "step": 9146 }, { "epoch": 0.98, "grad_norm": 0.08730325996375386, "learning_rate": 7.281017471814089e-07, "loss": 1.3657, "step": 9147 }, { "epoch": 0.98, "grad_norm": 0.08832469615914386, "learning_rate": 7.187394361393707e-07, "loss": 1.3283, "step": 9148 }, { "epoch": 0.98, "grad_norm": 0.08891798495898455, "learning_rate": 7.09437664671464e-07, "loss": 1.4239, "step": 9149 }, { "epoch": 0.98, "grad_norm": 0.08509761132201243, "learning_rate": 7.001964339055645e-07, "loss": 1.2972, "step": 9150 }, { "epoch": 0.98, "grad_norm": 0.09860938568263267, "learning_rate": 6.910157449621646e-07, "loss": 1.3948, "step": 9151 }, { "epoch": 0.98, "grad_norm": 0.09700345844785578, "learning_rate": 6.818955989545406e-07, "loss": 1.4834, "step": 9152 }, { "epoch": 0.98, "grad_norm": 0.10282320662162178, "learning_rate": 6.72835996988419e-07, "loss": 1.3405, "step": 9153 }, { "epoch": 0.98, "grad_norm": 0.09532553767319768, "learning_rate": 6.638369401624212e-07, "loss": 1.3601, "step": 9154 }, { "epoch": 0.98, "grad_norm": 0.09944816708848722, "learning_rate": 6.548984295676741e-07, "loss": 1.3718, "step": 9155 }, { "epoch": 0.98, "grad_norm": 0.08797403192670447, "learning_rate": 6.460204662879776e-07, "loss": 1.467, "step": 9156 }, { "epoch": 0.98, "grad_norm": 0.09782593171572332, "learning_rate": 6.37203051399804e-07, "loss": 1.3464, "step": 9157 }, { "epoch": 0.98, "grad_norm": 0.09783108557900717, "learning_rate": 6.284461859723534e-07, "loss": 1.3776, "step": 9158 }, { "epoch": 0.98, "grad_norm": 0.08536515596852107, "learning_rate": 6.197498710673876e-07, "loss": 1.3792, "step": 9159 }, { "epoch": 0.98, "grad_norm": 0.08102996000876958, "learning_rate": 6.111141077393967e-07, "loss": 1.3457, "step": 9160 }, { "epoch": 0.98, "grad_norm": 0.08950933513385094, "learning_rate": 6.025388970354872e-07, "loss": 1.5071, "step": 9161 }, { "epoch": 0.98, "grad_norm": 0.09077163258171603, "learning_rate": 5.940242399953832e-07, "loss": 1.5238, "step": 9162 }, { "epoch": 0.99, "grad_norm": 0.08107218496059505, "learning_rate": 5.855701376515921e-07, "loss": 1.4836, "step": 9163 }, { "epoch": 0.99, "grad_norm": 0.09402355006480244, "learning_rate": 5.771765910291271e-07, "loss": 1.3273, "step": 9164 }, { "epoch": 0.99, "grad_norm": 0.08419332875007346, "learning_rate": 5.688436011457854e-07, "loss": 1.2936, "step": 9165 }, { "epoch": 0.99, "grad_norm": 0.09582927476497023, "learning_rate": 5.605711690119808e-07, "loss": 1.4482, "step": 9166 }, { "epoch": 0.99, "grad_norm": 0.08145256942098168, "learning_rate": 5.523592956307444e-07, "loss": 1.3393, "step": 9167 }, { "epoch": 0.99, "grad_norm": 0.08945519192985447, "learning_rate": 5.442079819977797e-07, "loss": 1.4419, "step": 9168 }, { "epoch": 0.99, "grad_norm": 0.08389369232847793, "learning_rate": 5.361172291014627e-07, "loss": 1.4232, "step": 9169 }, { "epoch": 0.99, "grad_norm": 0.09665716756692867, "learning_rate": 5.280870379228975e-07, "loss": 1.502, "step": 9170 }, { "epoch": 0.99, "grad_norm": 0.09107272005780782, "learning_rate": 5.201174094356942e-07, "loss": 1.374, "step": 9171 }, { "epoch": 0.99, "grad_norm": 0.08525633295064047, "learning_rate": 5.122083446062464e-07, "loss": 1.2464, "step": 9172 }, { "epoch": 0.99, "grad_norm": 0.09145092325910216, "learning_rate": 5.043598443935094e-07, "loss": 1.3063, "step": 9173 }, { "epoch": 0.99, "grad_norm": 0.09121429449434486, "learning_rate": 4.965719097491661e-07, "loss": 1.317, "step": 9174 }, { "epoch": 0.99, "grad_norm": 0.08106699097565444, "learning_rate": 4.888445416175724e-07, "loss": 1.3662, "step": 9175 }, { "epoch": 0.99, "grad_norm": 0.09235616001095853, "learning_rate": 4.8117774093559e-07, "loss": 1.4448, "step": 9176 }, { "epoch": 0.99, "grad_norm": 0.08632917581540081, "learning_rate": 4.735715086329195e-07, "loss": 1.4205, "step": 9177 }, { "epoch": 0.99, "grad_norm": 0.09459405410772538, "learning_rate": 4.6602584563187886e-07, "loss": 1.3787, "step": 9178 }, { "epoch": 0.99, "grad_norm": 0.08651932809943559, "learning_rate": 4.5854075284729177e-07, "loss": 1.4209, "step": 9179 }, { "epoch": 0.99, "grad_norm": 0.08827714040145299, "learning_rate": 4.511162311868766e-07, "loss": 1.3239, "step": 9180 }, { "epoch": 0.99, "grad_norm": 0.08381943113452264, "learning_rate": 4.4375228155085766e-07, "loss": 1.4219, "step": 9181 }, { "epoch": 0.99, "grad_norm": 0.08236325719668838, "learning_rate": 4.3644890483202084e-07, "loss": 1.3715, "step": 9182 }, { "epoch": 0.99, "grad_norm": 0.09243341173187043, "learning_rate": 4.292061019160465e-07, "loss": 1.4119, "step": 9183 }, { "epoch": 0.99, "grad_norm": 0.10070109950559669, "learning_rate": 4.2202387368112104e-07, "loss": 1.472, "step": 9184 }, { "epoch": 0.99, "grad_norm": 0.09517079092290456, "learning_rate": 4.149022209981035e-07, "loss": 1.3448, "step": 9185 }, { "epoch": 0.99, "grad_norm": 0.09084655013556947, "learning_rate": 4.0784114473052526e-07, "loss": 1.2689, "step": 9186 }, { "epoch": 0.99, "grad_norm": 0.08422060321187015, "learning_rate": 4.008406457345903e-07, "loss": 1.3204, "step": 9187 }, { "epoch": 0.99, "grad_norm": 0.08559323844255416, "learning_rate": 3.9390072485906424e-07, "loss": 1.3425, "step": 9188 }, { "epoch": 0.99, "grad_norm": 0.08430437320924258, "learning_rate": 3.8702138294549605e-07, "loss": 1.3853, "step": 9189 }, { "epoch": 0.99, "grad_norm": 0.09296172134493086, "learning_rate": 3.8020262082799626e-07, "loss": 1.3051, "step": 9190 }, { "epoch": 0.99, "grad_norm": 0.08668309232549491, "learning_rate": 3.7344443933340355e-07, "loss": 1.3997, "step": 9191 }, { "epoch": 0.99, "grad_norm": 0.08495748652651433, "learning_rate": 3.6674683928111795e-07, "loss": 1.3981, "step": 9192 }, { "epoch": 0.99, "grad_norm": 0.11190443758993558, "learning_rate": 3.6010982148332314e-07, "loss": 1.4863, "step": 9193 }, { "epoch": 0.99, "grad_norm": 0.08562198339929697, "learning_rate": 3.535333867447088e-07, "loss": 1.2775, "step": 9194 }, { "epoch": 0.99, "grad_norm": 0.08887863740413854, "learning_rate": 3.4701753586269257e-07, "loss": 1.3826, "step": 9195 }, { "epoch": 0.99, "grad_norm": 0.0911839138816826, "learning_rate": 3.405622696274202e-07, "loss": 1.4951, "step": 9196 }, { "epoch": 0.99, "grad_norm": 0.10007743747550958, "learning_rate": 3.341675888215434e-07, "loss": 1.3479, "step": 9197 }, { "epoch": 0.99, "grad_norm": 0.08820855203603718, "learning_rate": 3.278334942204419e-07, "loss": 1.3686, "step": 9198 }, { "epoch": 0.99, "grad_norm": 0.09489741230917534, "learning_rate": 3.215599865921126e-07, "loss": 1.3376, "step": 9199 }, { "epoch": 0.99, "grad_norm": 0.09132514257961939, "learning_rate": 3.1534706669733573e-07, "loss": 1.4629, "step": 9200 }, { "epoch": 0.99, "grad_norm": 0.09654225348968377, "learning_rate": 3.0919473528939754e-07, "loss": 1.3531, "step": 9201 }, { "epoch": 0.99, "grad_norm": 0.09488757695139786, "learning_rate": 3.0310299311431256e-07, "loss": 1.4028, "step": 9202 }, { "epoch": 0.99, "grad_norm": 0.10174093481738378, "learning_rate": 2.9707184091071204e-07, "loss": 1.4304, "step": 9203 }, { "epoch": 0.99, "grad_norm": 0.08821629876713558, "learning_rate": 2.9110127940984445e-07, "loss": 1.3028, "step": 9204 }, { "epoch": 0.99, "grad_norm": 0.08766097557322772, "learning_rate": 2.851913093357417e-07, "loss": 1.4125, "step": 9205 }, { "epoch": 0.99, "grad_norm": 0.0901572714979954, "learning_rate": 2.793419314049972e-07, "loss": 1.4407, "step": 9206 }, { "epoch": 0.99, "grad_norm": 0.08821420180605707, "learning_rate": 2.73553146326766e-07, "loss": 1.432, "step": 9207 }, { "epoch": 0.99, "grad_norm": 0.08633075490026776, "learning_rate": 2.6782495480309756e-07, "loss": 1.35, "step": 9208 }, { "epoch": 0.99, "grad_norm": 0.08272348964502109, "learning_rate": 2.621573575284919e-07, "loss": 1.3314, "step": 9209 }, { "epoch": 0.99, "grad_norm": 0.08738865558697523, "learning_rate": 2.565503551901216e-07, "loss": 1.3905, "step": 9210 }, { "epoch": 0.99, "grad_norm": 0.08583764484613975, "learning_rate": 2.510039484679427e-07, "loss": 1.4012, "step": 9211 }, { "epoch": 0.99, "grad_norm": 0.09265865578712432, "learning_rate": 2.4551813803441734e-07, "loss": 1.2547, "step": 9212 }, { "epoch": 0.99, "grad_norm": 0.07892496874291217, "learning_rate": 2.400929245546801e-07, "loss": 1.3251, "step": 9213 }, { "epoch": 0.99, "grad_norm": 0.08624414306385945, "learning_rate": 2.3472830868670469e-07, "loss": 1.3862, "step": 9214 }, { "epoch": 0.99, "grad_norm": 0.09302196091866992, "learning_rate": 2.2942429108080422e-07, "loss": 1.2674, "step": 9215 }, { "epoch": 0.99, "grad_norm": 0.08821491575497864, "learning_rate": 2.2418087238018637e-07, "loss": 1.4064, "step": 9216 }, { "epoch": 0.99, "grad_norm": 0.08949986370580101, "learning_rate": 2.189980532206759e-07, "loss": 1.4349, "step": 9217 }, { "epoch": 0.99, "grad_norm": 0.08278441450527144, "learning_rate": 2.1387583423060354e-07, "loss": 1.2998, "step": 9218 }, { "epoch": 0.99, "grad_norm": 0.0840801051048805, "learning_rate": 2.0881421603113904e-07, "loss": 1.263, "step": 9219 }, { "epoch": 0.99, "grad_norm": 0.08056200911342393, "learning_rate": 2.0381319923595821e-07, "loss": 1.4156, "step": 9220 }, { "epoch": 0.99, "grad_norm": 0.08724772973658486, "learning_rate": 1.9887278445152036e-07, "loss": 1.2512, "step": 9221 }, { "epoch": 0.99, "grad_norm": 0.09455867642372127, "learning_rate": 1.9399297227684633e-07, "loss": 1.4142, "step": 9222 }, { "epoch": 0.99, "grad_norm": 0.09343959412492665, "learning_rate": 1.8917376330357395e-07, "loss": 1.3958, "step": 9223 }, { "epoch": 0.99, "grad_norm": 0.08794548124148231, "learning_rate": 1.8441515811612464e-07, "loss": 1.3576, "step": 9224 }, { "epoch": 0.99, "grad_norm": 0.09691778279436149, "learning_rate": 1.797171572914813e-07, "loss": 1.5349, "step": 9225 }, { "epoch": 0.99, "grad_norm": 0.07983284987363899, "learning_rate": 1.7507976139924385e-07, "loss": 1.4171, "step": 9226 }, { "epoch": 0.99, "grad_norm": 0.09287129310883346, "learning_rate": 1.7050297100174028e-07, "loss": 1.3165, "step": 9227 }, { "epoch": 0.99, "grad_norm": 0.08646817433793243, "learning_rate": 1.6598678665397104e-07, "loss": 1.4993, "step": 9228 }, { "epoch": 0.99, "grad_norm": 0.07929013433839835, "learning_rate": 1.6153120890344265e-07, "loss": 1.5507, "step": 9229 }, { "epoch": 0.99, "grad_norm": 0.08850803873232053, "learning_rate": 1.5713623829050062e-07, "loss": 1.4476, "step": 9230 }, { "epoch": 0.99, "grad_norm": 0.0888883518336363, "learning_rate": 1.5280187534794098e-07, "loss": 1.3417, "step": 9231 }, { "epoch": 0.99, "grad_norm": 0.09379458112499682, "learning_rate": 1.4852812060145438e-07, "loss": 1.4362, "step": 9232 }, { "epoch": 0.99, "grad_norm": 0.08832514165492539, "learning_rate": 1.4431497456918186e-07, "loss": 1.4955, "step": 9233 }, { "epoch": 0.99, "grad_norm": 0.10270631070515934, "learning_rate": 1.4016243776193706e-07, "loss": 1.314, "step": 9234 }, { "epoch": 0.99, "grad_norm": 0.08689177286241863, "learning_rate": 1.360705106833171e-07, "loss": 1.4187, "step": 9235 }, { "epoch": 0.99, "grad_norm": 0.0904049295681052, "learning_rate": 1.320391938294252e-07, "loss": 1.4229, "step": 9236 }, { "epoch": 0.99, "grad_norm": 0.08826812497826056, "learning_rate": 1.280684876890925e-07, "loss": 1.3759, "step": 9237 }, { "epoch": 0.99, "grad_norm": 0.09454769152128116, "learning_rate": 1.2415839274376728e-07, "loss": 1.4044, "step": 9238 }, { "epoch": 0.99, "grad_norm": 0.12649764184311058, "learning_rate": 1.2030890946757022e-07, "loss": 1.4513, "step": 9239 }, { "epoch": 0.99, "grad_norm": 0.08893899583982746, "learning_rate": 1.1652003832729464e-07, "loss": 1.4767, "step": 9240 }, { "epoch": 0.99, "grad_norm": 0.09708840021285432, "learning_rate": 1.1279177978229527e-07, "loss": 1.2564, "step": 9241 }, { "epoch": 0.99, "grad_norm": 0.08160711737594363, "learning_rate": 1.0912413428471046e-07, "loss": 1.2285, "step": 9242 }, { "epoch": 0.99, "grad_norm": 0.0830542224283073, "learning_rate": 1.0551710227912903e-07, "loss": 1.3295, "step": 9243 }, { "epoch": 0.99, "grad_norm": 0.10843619771485967, "learning_rate": 1.0197068420308986e-07, "loss": 1.4789, "step": 9244 }, { "epoch": 0.99, "grad_norm": 0.08399511285269524, "learning_rate": 9.848488048647131e-08, "loss": 1.4328, "step": 9245 }, { "epoch": 0.99, "grad_norm": 0.08369868710316501, "learning_rate": 9.505969155193528e-08, "loss": 1.4427, "step": 9246 }, { "epoch": 0.99, "grad_norm": 0.08815787268710067, "learning_rate": 9.169511781492724e-08, "loss": 1.284, "step": 9247 }, { "epoch": 0.99, "grad_norm": 0.10365706578408941, "learning_rate": 8.839115968328759e-08, "loss": 1.4171, "step": 9248 }, { "epoch": 0.99, "grad_norm": 0.0868752961016487, "learning_rate": 8.514781755769585e-08, "loss": 1.3582, "step": 9249 }, { "epoch": 0.99, "grad_norm": 0.09414347762024002, "learning_rate": 8.1965091831393e-08, "loss": 1.3758, "step": 9250 }, { "epoch": 0.99, "grad_norm": 0.08954872276660882, "learning_rate": 7.884298289029258e-08, "loss": 1.3475, "step": 9251 }, { "epoch": 0.99, "grad_norm": 0.10030726142452168, "learning_rate": 7.57814911129806e-08, "loss": 1.3205, "step": 9252 }, { "epoch": 0.99, "grad_norm": 0.09762209771924428, "learning_rate": 7.278061687066018e-08, "loss": 1.3829, "step": 9253 }, { "epoch": 0.99, "grad_norm": 0.08836513485757565, "learning_rate": 6.984036052720688e-08, "loss": 1.3295, "step": 9254 }, { "epoch": 0.99, "grad_norm": 0.0871339758958895, "learning_rate": 6.696072243911333e-08, "loss": 1.3928, "step": 9255 }, { "epoch": 1.0, "grad_norm": 0.09098880914087802, "learning_rate": 6.414170295560017e-08, "loss": 1.357, "step": 9256 }, { "epoch": 1.0, "grad_norm": 0.09077999286470577, "learning_rate": 6.138330241839407e-08, "loss": 1.3801, "step": 9257 }, { "epoch": 1.0, "grad_norm": 0.10122837068892225, "learning_rate": 5.868552116206072e-08, "loss": 1.3623, "step": 9258 }, { "epoch": 1.0, "grad_norm": 0.08517700022337132, "learning_rate": 5.604835951367182e-08, "loss": 1.4786, "step": 9259 }, { "epoch": 1.0, "grad_norm": 0.08585211456743395, "learning_rate": 5.3471817792971614e-08, "loss": 1.4968, "step": 9260 }, { "epoch": 1.0, "grad_norm": 0.10044181669808067, "learning_rate": 5.095589631237685e-08, "loss": 1.2755, "step": 9261 }, { "epoch": 1.0, "grad_norm": 0.08811673378812793, "learning_rate": 4.8500595376976816e-08, "loss": 1.2943, "step": 9262 }, { "epoch": 1.0, "grad_norm": 0.08452180165981107, "learning_rate": 4.610591528447783e-08, "loss": 1.3082, "step": 9263 }, { "epoch": 1.0, "grad_norm": 0.09509764207121812, "learning_rate": 4.3771856325203196e-08, "loss": 1.3976, "step": 9264 }, { "epoch": 1.0, "grad_norm": 0.0905369920640337, "learning_rate": 4.1498418782259795e-08, "loss": 1.34, "step": 9265 }, { "epoch": 1.0, "grad_norm": 0.08517538334754213, "learning_rate": 3.928560293120498e-08, "loss": 1.3423, "step": 9266 }, { "epoch": 1.0, "grad_norm": 0.0853860335389513, "learning_rate": 3.713340904043516e-08, "loss": 1.4828, "step": 9267 }, { "epoch": 1.0, "grad_norm": 0.10931919049201298, "learning_rate": 3.504183737085276e-08, "loss": 1.3513, "step": 9268 }, { "epoch": 1.0, "grad_norm": 0.08736777021378683, "learning_rate": 3.301088817608822e-08, "loss": 1.4117, "step": 9269 }, { "epoch": 1.0, "grad_norm": 0.10028110998630704, "learning_rate": 3.104056170244451e-08, "loss": 1.4898, "step": 9270 }, { "epoch": 1.0, "grad_norm": 0.09773062139045051, "learning_rate": 2.9130858188730626e-08, "loss": 1.3318, "step": 9271 }, { "epoch": 1.0, "grad_norm": 0.08128181127641702, "learning_rate": 2.7281777866594582e-08, "loss": 1.4327, "step": 9272 }, { "epoch": 1.0, "grad_norm": 0.0943358489444862, "learning_rate": 2.5493320960190413e-08, "loss": 1.3043, "step": 9273 }, { "epoch": 1.0, "grad_norm": 0.09785602520380977, "learning_rate": 2.3765487686400188e-08, "loss": 1.4111, "step": 9274 }, { "epoch": 1.0, "grad_norm": 0.09533372939648309, "learning_rate": 2.2098278254722993e-08, "loss": 1.4909, "step": 9275 }, { "epoch": 1.0, "grad_norm": 0.0801968457699735, "learning_rate": 2.0491692867330434e-08, "loss": 1.4355, "step": 9276 }, { "epoch": 1.0, "grad_norm": 0.09429240525483946, "learning_rate": 1.894573171901115e-08, "loss": 1.4304, "step": 9277 }, { "epoch": 1.0, "grad_norm": 0.10266163176098933, "learning_rate": 1.746039499717078e-08, "loss": 1.4492, "step": 9278 }, { "epoch": 1.0, "grad_norm": 0.09383290941084661, "learning_rate": 1.6035682881998527e-08, "loss": 1.4451, "step": 9279 }, { "epoch": 1.0, "grad_norm": 0.080437028926394, "learning_rate": 1.4671595546245086e-08, "loss": 1.4012, "step": 9280 }, { "epoch": 1.0, "grad_norm": 0.08351402759362497, "learning_rate": 1.3368133155222672e-08, "loss": 1.3904, "step": 9281 }, { "epoch": 1.0, "grad_norm": 0.091617721380892, "learning_rate": 1.2125295867027042e-08, "loss": 1.3431, "step": 9282 }, { "epoch": 1.0, "grad_norm": 0.10262114214274354, "learning_rate": 1.094308383237097e-08, "loss": 1.5308, "step": 9283 }, { "epoch": 1.0, "grad_norm": 0.10109618312260171, "learning_rate": 9.82149719458425e-09, "loss": 1.3743, "step": 9284 }, { "epoch": 1.0, "grad_norm": 0.09909394437365746, "learning_rate": 8.760536089724714e-09, "loss": 1.435, "step": 9285 }, { "epoch": 1.0, "grad_norm": 0.08764241854659703, "learning_rate": 7.760200646300675e-09, "loss": 1.3543, "step": 9286 }, { "epoch": 1.0, "grad_norm": 0.10072587519427252, "learning_rate": 6.820490985715022e-09, "loss": 1.3164, "step": 9287 }, { "epoch": 1.0, "grad_norm": 0.09365884377453729, "learning_rate": 5.941407221932149e-09, "loss": 1.3535, "step": 9288 }, { "epoch": 1.0, "grad_norm": 0.09650653930095349, "learning_rate": 5.122949461422444e-09, "loss": 1.3542, "step": 9289 }, { "epoch": 1.0, "grad_norm": 0.08379342583541467, "learning_rate": 4.365117803550866e-09, "loss": 1.4495, "step": 9290 }, { "epoch": 1.0, "grad_norm": 0.09115901593403421, "learning_rate": 3.667912340132862e-09, "loss": 1.3161, "step": 9291 }, { "epoch": 1.0, "grad_norm": 0.10871669726957177, "learning_rate": 3.031333155767424e-09, "loss": 1.5187, "step": 9292 }, { "epoch": 1.0, "grad_norm": 0.08912092052721272, "learning_rate": 2.455380327559542e-09, "loss": 1.3403, "step": 9293 }, { "epoch": 1.0, "grad_norm": 0.08373270056508372, "learning_rate": 1.9400539253977557e-09, "loss": 1.3308, "step": 9294 }, { "epoch": 1.0, "grad_norm": 0.09699578700835747, "learning_rate": 1.4853540117321095e-09, "loss": 1.3618, "step": 9295 }, { "epoch": 1.0, "grad_norm": 0.08636922925763955, "learning_rate": 1.0912806417961995e-09, "loss": 1.3992, "step": 9296 }, { "epoch": 1.0, "grad_norm": 0.09516627466784042, "learning_rate": 7.578338632741044e-10, "loss": 1.423, "step": 9297 }, { "epoch": 1.0, "grad_norm": 0.10331085370003165, "learning_rate": 4.850137165779422e-10, "loss": 1.3025, "step": 9298 }, { "epoch": 1.0, "grad_norm": 0.08985317351843392, "learning_rate": 2.728202349033815e-10, "loss": 1.3703, "step": 9299 }, { "epoch": 1.0, "grad_norm": 0.09341887519280219, "learning_rate": 1.2125344384106285e-10, "loss": 1.4087, "step": 9300 }, { "epoch": 1.0, "grad_norm": 0.08905643836734109, "learning_rate": 3.031336187619971e-11, "loss": 1.4227, "step": 9301 }, { "epoch": 1.0, "grad_norm": 0.08366057997777612, "learning_rate": 0.0, "loss": 1.5995, "step": 9302 }, { "epoch": 1.0, "step": 9302, "total_flos": 1.5268155381671526e+18, "train_loss": 1.4317849794912534, "train_runtime": 52932.8416, "train_samples_per_second": 11.248, "train_steps_per_second": 0.176 } ], "logging_steps": 1.0, "max_steps": 9302, "num_input_tokens_seen": 0, "num_train_epochs": 1, "save_steps": 2000, "total_flos": 1.5268155381671526e+18, "train_batch_size": 8, "trial_name": null, "trial_params": null }